Merge branch 'master' into nextgenv2 Manually resovled the following conflicts: vp10/common/blockd.h vp10/common/entropy.h vp10/common/entropymode.c vp10/common/entropymode.h vp10/common/enums.h vp10/common/thread_common.c vp10/decoder/decodeframe.c vp10/decoder/decodemv.c vp10/encoder/bitstream.c vp10/encoder/encodeframe.c vp10/encoder/rd.c vp10/encoder/rdopt.c Change-Id: I15d20ce5292b70f0c2b4ba55c1f1318181481596
diff --git a/configure b/configure index c3c0f40..29b1da8 100755 --- a/configure +++ b/configure
@@ -272,7 +272,15 @@ spatial_svc fp_mb_stats emulate_hardware + var_tx + ref_mv + ext_tx misc_fixes + ext_intra + ext_inter + ext_interp + ext_refs + supertx " CONFIG_LIST=" dependency_tracking
diff --git a/test/test.mk b/test/test.mk index 80b57e5..471f870 100644 --- a/test/test.mk +++ b/test/test.mk
@@ -175,6 +175,11 @@ endif LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_txfm_test.h +LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm1d_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm1d_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm2d_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm2d_test.cc TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c
diff --git a/test/vp10_fwd_txfm1d_test.cc b/test/vp10_fwd_txfm1d_test.cc new file mode 100644 index 0000000..a39e0ef --- /dev/null +++ b/test/vp10_fwd_txfm1d_test.cc
@@ -0,0 +1,130 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "test/vp10_txfm_test.h" +#include "vp10/common/vp10_fwd_txfm1d.h" + +using libvpx_test::ACMRandom; + +namespace { +static int txfm_type_num = 2; +static TYPE_TXFM txfm_type_ls[2] = {TYPE_DCT, TYPE_ADST}; + +static int txfm_size_num = 4; +static int txfm_size_ls[4] = {4, 8, 16, 32}; + +static TxfmFunc fwd_txfm_func_ls[2][4] = { + {vp10_fdct4_new, vp10_fdct8_new, vp10_fdct16_new, vp10_fdct32_new}, + {vp10_fadst4_new, vp10_fadst8_new, vp10_fadst16_new, vp10_fadst32_new}}; + +// the maximum stage number of fwd/inv 1d dct/adst txfm is 12 +static int8_t cos_bit[12] = {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14}; +static int8_t range_bit[12] = {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32}; + +TEST(vp10_fwd_txfm1d, round_shift) { + EXPECT_EQ(round_shift(7, 1), 3); + EXPECT_EQ(round_shift(-7, 1), -3); + + EXPECT_EQ(round_shift(7, 2), 2); + EXPECT_EQ(round_shift(-7, 2), -2); + + EXPECT_EQ(round_shift(8, 2), 2); + EXPECT_EQ(round_shift(-8, 2), -2); +} + +TEST(vp10_fwd_txfm1d, get_max_bit) { + int max_bit = get_max_bit(8); + EXPECT_EQ(max_bit, 3); +} + +TEST(vp10_fwd_txfm1d, half_btf) { + int32_t max = (1 << 15) - 1; + int32_t w0 = max; + int32_t in0 = max; + int32_t w1 = max; + int32_t in1 = max; + int32_t result_32 = half_btf(w0, in0, w1, in1, 0); + int64_t result_64 = (int64_t)w0 * (int64_t)in0 + (int64_t)w1 * (int64_t)in1; + EXPECT_EQ(result_32, result_64); +} + +TEST(vp10_fwd_txfm1d, cospi_arr) { + for (int i = 0; i < 7; i++) { + for (int j = 0; j < 64; j++) { + EXPECT_EQ(cospi_arr[i][j], + (int32_t)round(cos(M_PI * j / 128) * (1 << (cos_bit_min + i)))); + } + } +} + +TEST(vp10_fwd_txfm1d, clamp_block) { + int16_t block[5][5] = {{7, -5, 6, -3, 9}, + {7, -5, 6, -3, 9}, + {7, -5, 6, -3, 9}, + {7, -5, 6, -3, 9}, + {7, -5, 6, -3, 9}}; + + int16_t ref_block[5][5] = {{7, -5, 6, -3, 9}, + {7, -5, 6, -3, 9}, + {7, -4, 2, -3, 9}, + {7, -4, 2, -3, 9}, + {7, -4, 2, -3, 9}}; + + int row = 2; + int col = 1; + int block_size = 3; + int stride = 5; + clamp_block(block[row] + col, block_size, stride, -4, 2); + for (int r = 0; r < stride; r++) { + for (int c = 0; c < stride; c++) { + EXPECT_EQ(block[r][c], ref_block[r][c]); + } + } +} + +TEST(vp10_fwd_txfm1d, accuracy) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int si = 0; si < txfm_size_num; ++si) { + int txfm_size = txfm_size_ls[si]; + int32_t *input = new int32_t[txfm_size]; + int32_t *output = new int32_t[txfm_size]; + double *ref_input = new double[txfm_size]; + double *ref_output = new double[txfm_size]; + + for (int ti = 0; ti < txfm_type_num; ++ti) { + TYPE_TXFM txfm_type = txfm_type_ls[ti]; + TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[ti][si]; + int max_error = 7; + + const int count_test_block = 5000; + for (int ti = 0; ti < count_test_block; ++ti) { + for (int ni = 0; ni < txfm_size; ++ni) { + input[ni] = rnd.Rand16() % base - rnd.Rand16() % base; + ref_input[ni] = static_cast<double>(input[ni]); + } + + fwd_txfm_func(input, output, cos_bit, range_bit); + reference_hybrid_1d(ref_input, ref_output, txfm_size, txfm_type); + + for (int ni = 0; ni < txfm_size; ++ni) { + EXPECT_LE( + abs(output[ni] - static_cast<int32_t>(round(ref_output[ni]))), + max_error); + } + } + } + + delete[] input; + delete[] output; + delete[] ref_input; + delete[] ref_output; + } +} +} // namespace
diff --git a/test/vp10_fwd_txfm2d_test.cc b/test/vp10_fwd_txfm2d_test.cc new file mode 100644 index 0000000..e6416cc --- /dev/null +++ b/test/vp10_fwd_txfm2d_test.cc
@@ -0,0 +1,104 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "test/vp10_txfm_test.h" +#include "vp10/common/vp10_fwd_txfm2d.h" +#include "vp10/common/vp10_fwd_txfm2d_cfg.h" + +using libvpx_test::ACMRandom; + +namespace { + +const int txfm_size_num = 4; +const int txfm_size_ls[4] = {4, 8, 16, 32}; +const TXFM_2D_CFG fwd_txfm_cfg_ls[4][4] = { + {fwd_txfm_2d_cfg_dct_dct_4, fwd_txfm_2d_cfg_dct_adst_4, + fwd_txfm_2d_cfg_adst_adst_4, fwd_txfm_2d_cfg_adst_dct_4}, + {fwd_txfm_2d_cfg_dct_dct_8, fwd_txfm_2d_cfg_dct_adst_8, + fwd_txfm_2d_cfg_adst_adst_8, fwd_txfm_2d_cfg_adst_dct_8}, + {fwd_txfm_2d_cfg_dct_dct_16, fwd_txfm_2d_cfg_dct_adst_16, + fwd_txfm_2d_cfg_adst_adst_16, fwd_txfm_2d_cfg_adst_dct_16}, + {fwd_txfm_2d_cfg_dct_dct_32, fwd_txfm_2d_cfg_dct_adst_32, + fwd_txfm_2d_cfg_adst_adst_32, fwd_txfm_2d_cfg_adst_dct_32}}; + +const Fwd_Txfm2d_Func fwd_txfm_func_ls[4] = { + vp10_fwd_txfm2d_4x4, vp10_fwd_txfm2d_8x8, vp10_fwd_txfm2d_16x16, + vp10_fwd_txfm2d_32x32}; + +const int txfm_type_num = 4; +const TYPE_TXFM type_ls_0[4] = {TYPE_DCT, TYPE_DCT, TYPE_ADST, TYPE_ADST}; +const TYPE_TXFM type_ls_1[4] = {TYPE_DCT, TYPE_ADST, TYPE_ADST, TYPE_DCT}; + +TEST(vp10_fwd_txfm2d, accuracy) { + for (int txfm_size_idx = 0; txfm_size_idx < txfm_size_num; ++txfm_size_idx) { + int txfm_size = txfm_size_ls[txfm_size_idx]; + int sqr_txfm_size = txfm_size * txfm_size; + int16_t* input = new int16_t[sqr_txfm_size]; + int32_t* output = new int32_t[sqr_txfm_size]; + double* ref_input = new double[sqr_txfm_size]; + double* ref_output = new double[sqr_txfm_size]; + + for (int txfm_type_idx = 0; txfm_type_idx < txfm_type_num; + ++txfm_type_idx) { + TXFM_2D_CFG fwd_txfm_cfg = fwd_txfm_cfg_ls[txfm_size_idx][txfm_type_idx]; + Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx]; + TYPE_TXFM type0 = type_ls_0[txfm_type_idx]; + TYPE_TXFM type1 = type_ls_1[txfm_type_idx]; + int amplify_bit = + fwd_txfm_cfg.shift[0] + fwd_txfm_cfg.shift[1] + fwd_txfm_cfg.shift[2]; + double amplify_factor = + amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit)); + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int count = 5000; + double avg_abs_error = 0; + for (int ci = 0; ci < count; ci++) { + for (int ni = 0; ni < sqr_txfm_size; ++ni) { + input[ni] = rnd.Rand16() % base; + ref_input[ni] = static_cast<double>(input[ni]); + output[ni] = 0; + ref_output[ni] = 0; + } + + fwd_txfm_func(input, output, txfm_size, &fwd_txfm_cfg, bd); + reference_hybrid_2d(ref_input, ref_output, txfm_size, type0, type1); + + for (int ni = 0; ni < sqr_txfm_size; ++ni) { + ref_output[ni] = round(ref_output[ni] * amplify_factor); + EXPECT_LE(fabs(output[ni] - ref_output[ni]) / amplify_factor, 30); + } + avg_abs_error += compute_avg_abs_error<int32_t, double>( + output, ref_output, sqr_txfm_size); + } + + avg_abs_error /= amplify_factor; + avg_abs_error /= count; + // max_abs_avg_error comes from upper bound of avg_abs_error + // printf("type0: %d type1: %d txfm_size: %d accuracy_avg_abs_error: + // %f\n", type0, type1, txfm_size, avg_abs_error); + double max_abs_avg_error = 1.5; + EXPECT_LE(avg_abs_error, max_abs_avg_error); + } + + delete[] input; + delete[] output; + delete[] ref_input; + delete[] ref_output; + } +} + +} // anonymous namespace
diff --git a/test/vp10_inv_txfm1d_test.cc b/test/vp10_inv_txfm1d_test.cc new file mode 100644 index 0000000..3b716c8 --- /dev/null +++ b/test/vp10_inv_txfm1d_test.cc
@@ -0,0 +1,69 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "test/vp10_txfm_test.h" +#include "vp10/common/vp10_fwd_txfm1d.h" +#include "vp10/common/vp10_inv_txfm1d.h" + +using libvpx_test::ACMRandom; + +namespace { +static int txfm_type_num = 2; +static int txfm_size_num = 4; +static int txfm_size_ls[4] = {4, 8, 16, 32}; + +static TxfmFunc fwd_txfm_func_ls[2][4] = { + {vp10_fdct4_new, vp10_fdct8_new, vp10_fdct16_new, vp10_fdct32_new}, + {vp10_fadst4_new, vp10_fadst8_new, vp10_fadst16_new, vp10_fadst32_new}}; + +static TxfmFunc inv_txfm_func_ls[2][4] = { + {vp10_idct4_new, vp10_idct8_new, vp10_idct16_new, vp10_idct32_new}, + {vp10_iadst4_new, vp10_iadst8_new, vp10_iadst16_new, vp10_iadst32_new}}; + +// the maximum stage number of fwd/inv 1d dct/adst txfm is 12 +static int8_t cos_bit[12] = {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14}; +static int8_t range_bit[12] = {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32}; + +TEST(vp10_inv_txfm1d, round_trip) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int si = 0; si < txfm_size_num; ++si) { + int txfm_size = txfm_size_ls[si]; + int32_t *input = new int32_t[txfm_size]; + int32_t *output = new int32_t[txfm_size]; + int32_t *round_trip_output = new int32_t[txfm_size]; + + for (int ti = 0; ti < txfm_type_num; ++ti) { + TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[ti][si]; + TxfmFunc inv_txfm_func = inv_txfm_func_ls[ti][si]; + int max_error = 2; + + const int count_test_block = 5000; + for (int ci = 0; ci < count_test_block; ++ci) { + for (int ni = 0; ni < txfm_size; ++ni) { + input[ni] = rnd.Rand16() % base - rnd.Rand16() % base; + } + + fwd_txfm_func(input, output, cos_bit, range_bit); + inv_txfm_func(output, round_trip_output, cos_bit, range_bit); + + for (int ni = 0; ni < txfm_size; ++ni) { + EXPECT_LE(abs(input[ni] - round_shift(round_trip_output[ni], + get_max_bit(txfm_size) - 1)), + max_error); + } + } + } + delete[] input; + delete[] output; + delete[] round_trip_output; + } +} + +} // namespace
diff --git a/test/vp10_inv_txfm2d_test.cc b/test/vp10_inv_txfm2d_test.cc new file mode 100644 index 0000000..603821e --- /dev/null +++ b/test/vp10_inv_txfm2d_test.cc
@@ -0,0 +1,115 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "test/vp10_txfm_test.h" +#include "vp10/common/vp10_fwd_txfm2d.h" +#include "vp10/common/vp10_fwd_txfm2d_cfg.h" +#include "vp10/common/vp10_inv_txfm2d.h" +#include "vp10/common/vp10_inv_txfm2d_cfg.h" + +using libvpx_test::ACMRandom; + +namespace { + +const int txfm_size_num = 4; +const int txfm_size_ls[4] = {4, 8, 16, 32}; +const TXFM_2D_CFG fwd_txfm_cfg_ls[4][4] = { + {fwd_txfm_2d_cfg_dct_dct_4, fwd_txfm_2d_cfg_dct_adst_4, + fwd_txfm_2d_cfg_adst_adst_4, fwd_txfm_2d_cfg_adst_dct_4}, + {fwd_txfm_2d_cfg_dct_dct_8, fwd_txfm_2d_cfg_dct_adst_8, + fwd_txfm_2d_cfg_adst_adst_8, fwd_txfm_2d_cfg_adst_dct_8}, + {fwd_txfm_2d_cfg_dct_dct_16, fwd_txfm_2d_cfg_dct_adst_16, + fwd_txfm_2d_cfg_adst_adst_16, fwd_txfm_2d_cfg_adst_dct_16}, + {fwd_txfm_2d_cfg_dct_dct_32, fwd_txfm_2d_cfg_dct_adst_32, + fwd_txfm_2d_cfg_adst_adst_32, fwd_txfm_2d_cfg_adst_dct_32}}; + +const TXFM_2D_CFG inv_txfm_cfg_ls[4][4] = { + {inv_txfm_2d_cfg_dct_dct_4, inv_txfm_2d_cfg_dct_adst_4, + inv_txfm_2d_cfg_adst_adst_4, inv_txfm_2d_cfg_adst_dct_4}, + {inv_txfm_2d_cfg_dct_dct_8, inv_txfm_2d_cfg_dct_adst_8, + inv_txfm_2d_cfg_adst_adst_8, inv_txfm_2d_cfg_adst_dct_8}, + {inv_txfm_2d_cfg_dct_dct_16, inv_txfm_2d_cfg_dct_adst_16, + inv_txfm_2d_cfg_adst_adst_16, inv_txfm_2d_cfg_adst_dct_16}, + {inv_txfm_2d_cfg_dct_dct_32, inv_txfm_2d_cfg_dct_adst_32, + inv_txfm_2d_cfg_adst_adst_32, inv_txfm_2d_cfg_adst_dct_32}}; + +const Fwd_Txfm2d_Func fwd_txfm_func_ls[4] = { + vp10_fwd_txfm2d_4x4, vp10_fwd_txfm2d_8x8, vp10_fwd_txfm2d_16x16, + vp10_fwd_txfm2d_32x32}; +const Inv_Txfm2d_Func inv_txfm_func_ls[4] = { + vp10_inv_txfm2d_add_4x4, vp10_inv_txfm2d_add_8x8, vp10_inv_txfm2d_add_16x16, + vp10_inv_txfm2d_add_32x32}; + +const int txfm_type_num = 4; + +TEST(vp10_inv_txfm2d, round_trip) { + for (int txfm_size_idx = 0; txfm_size_idx < txfm_size_num; ++txfm_size_idx) { + const int txfm_size = txfm_size_ls[txfm_size_idx]; + const int sqr_txfm_size = txfm_size * txfm_size; + int16_t* input = new int16_t[sqr_txfm_size]; + uint16_t* ref_input = new uint16_t[sqr_txfm_size]; + int32_t* output = new int32_t[sqr_txfm_size]; + + for (int txfm_type_idx = 0; txfm_type_idx < txfm_type_num; + ++txfm_type_idx) { + const TXFM_2D_CFG fwd_txfm_cfg = + fwd_txfm_cfg_ls[txfm_size_idx][txfm_type_idx]; + const TXFM_2D_CFG inv_txfm_cfg = + inv_txfm_cfg_ls[txfm_size_idx][txfm_type_idx]; + const Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx]; + const Inv_Txfm2d_Func inv_txfm_func = inv_txfm_func_ls[txfm_size_idx]; + const int count = 5000; + double avg_abs_error = 0; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int ci = 0; ci < count; ci++) { + for (int ni = 0; ni < sqr_txfm_size; ++ni) { + if (ci == 0) { + int extreme_input = base - 1; + input[ni] = extreme_input; // extreme case + ref_input[ni] = 0; + } else { + input[ni] = rnd.Rand16() % base; + ref_input[ni] = 0; + } + } + + fwd_txfm_func(input, output, txfm_size, &fwd_txfm_cfg, bd); + inv_txfm_func(output, ref_input, txfm_size, &inv_txfm_cfg, bd); + + for (int ni = 0; ni < sqr_txfm_size; ++ni) { + EXPECT_LE(abs(input[ni] - ref_input[ni]), 2); + } + avg_abs_error += compute_avg_abs_error<int16_t, uint16_t>( + input, ref_input, sqr_txfm_size); + } + + avg_abs_error /= count; + // max_abs_avg_error comes from upper bound of + // printf("txfm_size: %d accuracy_avg_abs_error: %f\n", txfm_size, + // avg_abs_error); + // TODO(angiebird): this upper bound is from adst_adst_8 + const double max_abs_avg_error = 0.024; + EXPECT_LE(avg_abs_error, max_abs_avg_error); + } + + delete[] input; + delete[] ref_input; + delete[] output; + } +} + +} // anonymous namespace
diff --git a/test/vp10_inv_txfm_test.cc b/test/vp10_inv_txfm_test.cc index c49081e..6c0a3d2 100644 --- a/test/vp10_inv_txfm_test.cc +++ b/test/vp10_inv_txfm_test.cc
@@ -203,7 +203,7 @@ // quantization with maximum allowed step sizes test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336; for (int j = 1; j < last_nonzero_; ++j) - test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] + test_coef_block1[get_scan(tx_size_, DCT_DCT, 0)->scan[j]] = (output_ref_block[j] / 1828) * 1828; } @@ -265,7 +265,7 @@ max_energy_leftover = 0; coef = 0; } - test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] = coef; + test_coef_block1[get_scan(tx_size_, DCT_DCT, 0)->scan[j]] = coef; } memcpy(test_coef_block2, test_coef_block1,
diff --git a/test/vp10_txfm_test.h b/test/vp10_txfm_test.h new file mode 100644 index 0000000..967d38b --- /dev/null +++ b/test/vp10_txfm_test.h
@@ -0,0 +1,113 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_TXFM_TEST_H_ +#define VP10_TXFM_TEST_H_ + +#include <stdio.h> +#include <stdlib.h> +#ifdef _MSC_VER +#define _USE_MATH_DEFINES +#endif +#include <math.h> + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "vp10/common/vp10_txfm.h" + +typedef enum { + TYPE_DCT = 0, + TYPE_ADST, + TYPE_IDCT, + TYPE_IADST, + TYPE_LAST +} TYPE_TXFM; + +static double invSqrt2 = 1 / pow(2, 0.5); + +static void reference_dct_1d(const double* in, double* out, int size) { + for (int k = 0; k < size; ++k) { + out[k] = 0; + for (int n = 0; n < size; ++n) { + out[k] += in[n] * cos(M_PI * (2 * n + 1) * k / (2 * size)); + } + if (k == 0) out[k] = out[k] * invSqrt2; + } +} + +static void reference_adst_1d(const double* in, double* out, int size) { + for (int k = 0; k < size; ++k) { + out[k] = 0; + for (int n = 0; n < size; ++n) { + out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (4 * size)); + } + } +} + +static void reference_hybrid_1d(double* in, double* out, int size, int type) { + if (type == TYPE_DCT) + reference_dct_1d(in, out, size); + else + reference_adst_1d(in, out, size); +} + +static void reference_hybrid_2d(double* in, double* out, int size, int type0, + int type1) { + double* tempOut = new double[size * size]; + + for (int r = 0; r < size; r++) { + // out ->tempOut + for (int c = 0; c < size; c++) { + tempOut[r * size + c] = in[c * size + r]; + } + } + + // dct each row: in -> out + for (int r = 0; r < size; r++) { + reference_hybrid_1d(tempOut + r * size, out + r * size, size, type0); + } + + for (int r = 0; r < size; r++) { + // out ->tempOut + for (int c = 0; c < size; c++) { + tempOut[r * size + c] = out[c * size + r]; + } + } + + for (int r = 0; r < size; r++) { + reference_hybrid_1d(tempOut + r * size, out + r * size, size, type1); + } + delete[] tempOut; +} + +template <typename Type1, typename Type2> +static double compute_avg_abs_error(const Type1* a, const Type2* b, + const int size) { + double error = 0; + for (int i = 0; i < size; i++) { + error += fabs(static_cast<double>(a[i]) - static_cast<double>(b[i])); + } + error = error / size; + return error; +} + +typedef void (*TxfmFunc)(const int32_t* in, int32_t* out, const int8_t* cos_bit, + const int8_t* range_bit); + +typedef void (*Fwd_Txfm2d_Func)(const int16_t*, int32_t*, const int, + const TXFM_2D_CFG*, const int); +typedef void (*Inv_Txfm2d_Func)(const int32_t*, uint16_t*, const int, + const TXFM_2D_CFG*, const int); + +static const int bd = 10; +static const int base = (1 << bd); + +#endif // VP10_TXFM_TEST_H_
diff --git a/test/vp9_arf_freq_test.cc b/test/vp9_arf_freq_test.cc index 89200d4..670529c 100644 --- a/test/vp9_arf_freq_test.cc +++ b/test/vp9_arf_freq_test.cc
@@ -78,19 +78,19 @@ return !strcmp(dot, ".y4m"); } -class ArfFreqTest +class ArfFreqTestLarge : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith3Params<TestVideoParam, \ TestEncodeParam, int> { protected: - ArfFreqTest() + ArfFreqTestLarge() : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)), test_encode_param_(GET_PARAM(2)), min_arf_requested_(GET_PARAM(3)) { } - virtual ~ArfFreqTest() {} + virtual ~ArfFreqTestLarge() {} virtual void SetUp() { InitializeConfig(); @@ -190,7 +190,7 @@ int run_of_visible_frames_; }; -TEST_P(ArfFreqTest, MinArfFreqTest) { +TEST_P(ArfFreqTestLarge, MinArfFreqTest) { cfg_.rc_target_bitrate = kBitrate; cfg_.g_error_resilient = 0; cfg_.g_profile = test_video_param_.profile; @@ -225,26 +225,26 @@ } VP9_INSTANTIATE_TEST_CASE( - ArfFreqTest, + ArfFreqTestLarge, ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kEncodeVectors), ::testing::ValuesIn(kMinArfVectors)); #if CONFIG_VP9_HIGHBITDEPTH -# if CONFIG_VP10_ENCODER +#if CONFIG_VP10_ENCODER // TODO(angiebird): 25-29 fail in high bitdepth mode. INSTANTIATE_TEST_CASE_P( - DISABLED_VP10, ArfFreqTest, + DISABLED_VP10, ArfFreqTestLarge, ::testing::Combine( ::testing::Values(static_cast<const libvpx_test::CodecFactory *>( &libvpx_test::kVP10)), ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kEncodeVectors), ::testing::ValuesIn(kMinArfVectors))); -# endif // CONFIG_VP10_ENCODER +#endif // CONFIG_VP10_ENCODER #else VP10_INSTANTIATE_TEST_CASE( - ArfFreqTest, + ArfFreqTestLarge, ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kEncodeVectors), ::testing::ValuesIn(kMinArfVectors));
diff --git a/test/vp9_ethread_test.cc b/test/vp9_ethread_test.cc index 63f6dfe..8ac5c33 100644 --- a/test/vp9_ethread_test.cc +++ b/test/vp9_ethread_test.cc
@@ -108,7 +108,7 @@ TEST_P(VPxEncoderThreadTest, EncoderResultTest) { std::vector<std::string> single_thr_md5, multi_thr_md5; - ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20); + ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 18); cfg_.rc_target_bitrate = 1000; @@ -138,5 +138,5 @@ VP10_INSTANTIATE_TEST_CASE( VPxEncoderThreadTest, ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood), - ::testing::Range(1, 3)); + ::testing::Range(1, 2)); } // namespace
diff --git a/vp10/common/alloccommon.c b/vp10/common/alloccommon.c index 9ca86e5..364afde 100644 --- a/vp10/common/alloccommon.c +++ b/vp10/common/alloccommon.c
@@ -97,6 +97,10 @@ cm->above_context = NULL; vpx_free(cm->above_seg_context); cm->above_seg_context = NULL; +#if CONFIG_VAR_TX + vpx_free(cm->above_txfm_context); + cm->above_txfm_context = NULL; +#endif } int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) { @@ -128,6 +132,14 @@ cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc( mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context)); if (!cm->above_seg_context) goto fail; + +#if CONFIG_VAR_TX + vpx_free(cm->above_txfm_context); + cm->above_txfm_context = (TXFM_CONTEXT *)vpx_calloc( + mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_txfm_context)); + if (!cm->above_txfm_context) goto fail; +#endif + cm->above_context_alloc_cols = cm->mi_cols; }
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index fce1767..dd5c2d1 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h
@@ -38,6 +38,15 @@ FRAME_TYPES, } FRAME_TYPE; +#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS +#define IsInterpolatingFilter(filter) \ + (vp10_filter_kernels[filter][0][SUBPEL_TAPS / 2 - 1] == 128) +#else +#define IsInterpolatingFilter(filter) (1) +#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS + +#define MAXTXLEN 32 + static INLINE int is_inter_mode(PREDICTION_MODE mode) { return mode >= NEARESTMV && mode <= NEWMV; } @@ -58,34 +67,76 @@ #define NONE -1 #define INTRA_FRAME 0 #define LAST_FRAME 1 +#if CONFIG_EXT_REFS +#define LAST2_FRAME 2 +#define LAST3_FRAME 3 +#define LAST4_FRAME 4 +#define GOLDEN_FRAME 5 +#define ALTREF_FRAME 6 +#define MAX_REF_FRAMES 7 +#define LAST_REF_FRAMES (LAST4_FRAME - LAST_FRAME + 1) +#else #define GOLDEN_FRAME 2 #define ALTREF_FRAME 3 #define MAX_REF_FRAMES 4 +#endif // CONFIG_EXT_REFS + typedef int8_t MV_REFERENCE_FRAME; +typedef struct { + // Number of base colors for Y (0) and UV (1) + uint8_t palette_size[2]; + // Value of base colors for Y, U, and V +#if CONFIG_VP9_HIGHBITDEPTH + uint16_t palette_colors[3 * PALETTE_MAX_SIZE]; +#else + uint8_t palette_colors[3 * PALETTE_MAX_SIZE]; +#endif // CONFIG_VP9_HIGHBITDEPTH + // Only used by encoder to store the color index of the top left pixel. + // TODO(huisu): move this to encoder + uint8_t palette_first_color_idx[2]; +} PALETTE_MODE_INFO; + +#if CONFIG_EXT_INTRA +typedef struct { + // 1: an ext intra mode is used; 0: otherwise. + uint8_t use_ext_intra_mode[PLANE_TYPES]; + EXT_INTRA_MODE ext_intra_mode[PLANE_TYPES]; +} EXT_INTRA_MODE_INFO; +#endif // CONFIG_EXT_INTRA + // This structure now relates to 8x8 block regions. typedef struct { // Common for both INTER and INTRA blocks BLOCK_SIZE sb_type; PREDICTION_MODE mode; TX_SIZE tx_size; - int8_t skip; -#if CONFIG_MISC_FIXES - int8_t has_no_coeffs; +#if CONFIG_VAR_TX + // TODO(jingning): This effectively assigned 64 entries for each 8x8 block. + // Apparently it takes much more space than needed. + TX_SIZE inter_tx_size[64]; #endif + int8_t skip; + int8_t has_no_coeffs; int8_t segment_id; int8_t seg_id_predicted; // valid only when temporal_update is enabled // Only for INTRA blocks PREDICTION_MODE uv_mode; + PALETTE_MODE_INFO palette_mode_info; // Only for INTER blocks INTERP_FILTER interp_filter; MV_REFERENCE_FRAME ref_frame[2]; TX_TYPE tx_type; - // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead. +#if CONFIG_EXT_INTRA + EXT_INTRA_MODE_INFO ext_intra_mode_info; + int8_t angle_delta[2]; +#endif // CONFIG_EXT_INTRA + int_mv mv[2]; + int_mv pred_mv[2]; } MB_MODE_INFO; typedef struct MODE_INFO { @@ -122,7 +173,7 @@ int stride; }; -struct macroblockd_plane { +typedef struct macroblockd_plane { tran_low_t *dqcoeff; PLANE_TYPE plane_type; int subsampling_x; @@ -141,7 +192,7 @@ // encoder const int16_t *dequant; -}; +} MACROBLOCKD_PLANE; #define BLOCK_OFFSET(x, i) ((x) + (i) * 16) @@ -172,6 +223,8 @@ int up_available; int left_available; + const vpx_prob (*partition_probs)[PARTITION_TYPES - 1]; + /* Distance of MB away from frame edges */ int mb_to_left_edge; int mb_to_right_edge; @@ -192,6 +245,23 @@ PARTITION_CONTEXT *above_seg_context; PARTITION_CONTEXT left_seg_context[8]; +#if CONFIG_VAR_TX + TXFM_CONTEXT *above_txfm_context; + TXFM_CONTEXT *left_txfm_context; + TXFM_CONTEXT left_txfm_context_buffer[8]; + + TX_SIZE max_tx_size; +#endif + + // dimension in the unit of 8x8 block of the current block + uint8_t n8_w, n8_h; + +#if CONFIG_REF_MV + uint8_t ref_mv_count[MAX_REF_FRAMES]; + CANDIDATE_MV ref_mv_stack[MAX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]; + uint8_t is_sec_rect; +#endif + #if CONFIG_VP9_HIGHBITDEPTH /* Bit depth: 8, 10, 12 */ int bd; @@ -221,17 +291,177 @@ ADST_ADST, // TM }; -static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd, - int block_idx) { +#if CONFIG_SUPERTX +static INLINE int supertx_enabled(const MB_MODE_INFO *mbmi) { + return (int)mbmi->tx_size > + VPXMIN(b_width_log2_lookup[mbmi->sb_type], + b_height_log2_lookup[mbmi->sb_type]); +} +#endif // CONFIG_SUPERTX + +#if CONFIG_EXT_TX +#define ALLOW_INTRA_EXT_TX 1 + +static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = { + 1, 17, 10, 2 +}; +static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = { + 1, 17, 10 +}; + +#define USE_IDTX_FOR_32X32 0 +static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs, + int is_inter) { + (void) is_inter; + if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0; +#if USE_IDTX_FOR_32X32 + if (tx_size == TX_32X32) return is_inter ? 3 : 0; +#else + if (tx_size == TX_32X32) return 0; +#endif + return tx_size == TX_16X16 ? 2 : 1; +} + +static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs, + int is_inter) { + const int set = get_ext_tx_set(tx_size, bs, is_inter); + return is_inter ? num_ext_tx_set_inter[set] : num_ext_tx_set_intra[set]; +} + +static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][TX_SIZES] = { + { 0, 0, 0, 0, }, // unused + { 1, 1, 0, 0, }, + { 0, 0, 1, 0, }, +}; + +static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][TX_SIZES] = { + { 0, 0, 0, 0, }, // unused + { 1, 1, 0, 0, }, + { 0, 0, 1, 0, }, + { 0, 0, 0, USE_IDTX_FOR_32X32, }, +}; + +// Transform types used in each intra set +static const int ext_tx_used_intra[EXT_TX_SETS_INTRA][TX_TYPES] = { + { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, }, +}; + +// Transform types used in each inter set +static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = { + { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, }, + { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, }, +}; +#endif // CONFIG_EXT_TX + +#if CONFIG_EXT_INTRA +#define ALLOW_FILTER_INTRA_MODES 1 +#define ANGLE_STEP 3 +#define MAX_ANGLE_DELTAS 3 +#define ANGLE_FAST_SEARCH 1 +#define ANGLE_SKIP_THRESH 0.10 + +static uint8_t mode_to_angle_map[INTRA_MODES] = { + 0, 90, 180, 45, 135, 111, 157, 203, 67, 0, +}; + +static const TX_TYPE filter_intra_mode_to_tx_type_lookup[FILTER_INTRA_MODES] = { + DCT_DCT, // FILTER_DC + ADST_DCT, // FILTER_V + DCT_ADST, // FILTER_H + DCT_DCT, // FILTER_D45 + ADST_ADST, // FILTER_D135 + ADST_DCT, // FILTER_D117 + DCT_ADST, // FILTER_D153 + DCT_ADST, // FILTER_D207 + ADST_DCT, // FILTER_D63 + ADST_ADST, // FILTER_TM +}; +#endif // CONFIG_EXT_INTRA + +static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, + const MACROBLOCKD *xd, + int block_idx, TX_SIZE tx_size) { const MODE_INFO *const mi = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mi->mbmi; +#if CONFIG_EXT_INTRA + if (!is_inter_block(mbmi)) { + const int use_ext_intra_mode_info = + mbmi->ext_intra_mode_info.use_ext_intra_mode[plane_type]; + const EXT_INTRA_MODE ext_intra_mode = + mbmi->ext_intra_mode_info.ext_intra_mode[plane_type]; + const PREDICTION_MODE mode = (plane_type == PLANE_TYPE_Y) ? + get_y_mode(mi, block_idx) : mbmi->uv_mode; + + if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32) + return DCT_DCT; + +#if CONFIG_EXT_TX + if (mbmi->sb_type >= BLOCK_8X8 && plane_type == PLANE_TYPE_Y && + ALLOW_INTRA_EXT_TX) + return mbmi->tx_type; +#endif // CONFIG_EXT_TX + + if (use_ext_intra_mode_info) + return filter_intra_mode_to_tx_type_lookup[ext_intra_mode]; + + if (mode == DC_PRED) { + return DCT_DCT; + } else if (mode == TM_PRED) { + return ADST_ADST; + } else { + int angle = mode_to_angle_map[mode]; + if (mbmi->sb_type >= BLOCK_8X8) + angle += mbmi->angle_delta[plane_type] * ANGLE_STEP; + assert(angle > 0 && angle < 270); + if (angle == 135) + return ADST_ADST; + else if (angle < 45 || angle > 225) + return DCT_DCT; + else if (angle < 135) + return ADST_DCT; + else + return DCT_ADST; + } + } +#endif // CONFIG_EXT_INTRA + +#if CONFIG_EXT_TX +#if USE_IDTX_FOR_32X32 + if (xd->lossless[mbmi->segment_id] || tx_size > TX_32X32 || + (tx_size >= TX_32X32 && !is_inter_block(mbmi))) +#else + if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32) +#endif + return DCT_DCT; + if (mbmi->sb_type >= BLOCK_8X8) { + if (plane_type == PLANE_TYPE_Y) { + if (is_inter_block(mbmi) || ALLOW_INTRA_EXT_TX) + return mbmi->tx_type; + } + if (is_inter_block(mbmi)) + // UV Inter only + return (mbmi->tx_type == IDTX && tx_size == TX_32X32 ? + DCT_DCT : mbmi->tx_type); + } + + // Sub8x8-Inter/Intra OR UV-Intra + if (is_inter_block(mbmi)) // Sub8x8-Inter + return DCT_DCT; + else // Sub8x8 Intra OR UV-Intra + return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y ? + get_y_mode(mi, block_idx) : mbmi->uv_mode]; +#else (void) block_idx; if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] || - mbmi->tx_size >= TX_32X32) + tx_size >= TX_32X32) return DCT_DCT; - return mbmi->tx_type; +#endif // CONFIG_EXT_TX } void vp10_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y); @@ -248,8 +478,18 @@ static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi, const struct macroblockd_plane *pd) { +#if CONFIG_SUPERTX + if (!supertx_enabled(mbmi)) { + return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x, + pd->subsampling_y); + } else { + return uvsupertx_size_lookup[mbmi->tx_size][pd->subsampling_x] + [pd->subsampling_y]; + } +#else return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x, pd->subsampling_y); +#endif // CONFIG_SUPERTX } static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize, @@ -279,7 +519,6 @@ const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, foreach_transformed_block_visitor visit, void *arg); - void vp10_foreach_transformed_block( const MACROBLOCKD* const xd, BLOCK_SIZE bsize, foreach_transformed_block_visitor visit, void *arg);
diff --git a/vp10/common/common_data.h b/vp10/common/common_data.h index 334489c..84476fa 100644 --- a/vp10/common/common_data.h +++ b/vp10/common/common_data.h
@@ -170,6 +170,21 @@ {0, 0 }, // 64X64 - {0b0000, 0b0000} }; +#if CONFIG_SUPERTX +static const TX_SIZE uvsupertx_size_lookup[TX_SIZES][2][2] = { + // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 + // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 + {{TX_4X4, TX_4X4}, {TX_4X4, TX_4X4}}, + {{TX_8X8, TX_4X4}, {TX_4X4, TX_4X4}}, + {{TX_16X16, TX_8X8}, {TX_8X8, TX_8X8}}, + {{TX_32X32, TX_16X16}, {TX_16X16, TX_16X16}}, +}; + +static const int partition_supertx_context_lookup[PARTITION_TYPES] = { + -1, 0, 0, 1 +}; +#endif // CONFIG_SUPERTX + #ifdef __cplusplus } // extern "C" #endif
diff --git a/vp10/common/entropy.h b/vp10/common/entropy.h index 9a471c8..747d1ad 100644 --- a/vp10/common/entropy.h +++ b/vp10/common/entropy.h
@@ -21,8 +21,8 @@ extern "C" { #endif -#define DIFF_UPDATE_PROB 252 -#define GROUP_DIFF_UPDATE_PROB 252 +#define DIFF_UPDATE_PROB 252 +#define GROUP_DIFF_UPDATE_PROB 252 // Coefficient token alphabet #define ZERO_TOKEN 0 // 0 Extra Bits 0+0
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index 78f3650..1b4fd26 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c
@@ -127,21 +127,6 @@ } }; -#if !CONFIG_MISC_FIXES -const vpx_prob vp10_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = { - { 144, 11, 54, 157, 195, 130, 46, 58, 108 }, // y = dc - { 118, 15, 123, 148, 131, 101, 44, 93, 131 }, // y = v - { 113, 12, 23, 188, 226, 142, 26, 32, 125 }, // y = h - { 120, 11, 50, 123, 163, 135, 64, 77, 103 }, // y = d45 - { 113, 9, 36, 155, 111, 157, 32, 44, 161 }, // y = d135 - { 116, 9, 55, 176, 76, 96, 37, 61, 149 }, // y = d117 - { 115, 9, 28, 141, 161, 167, 21, 25, 193 }, // y = d153 - { 120, 12, 32, 145, 195, 142, 32, 38, 86 }, // y = d207 - { 116, 12, 64, 120, 140, 125, 49, 115, 121 }, // y = d63 - { 102, 19, 66, 162, 182, 122, 35, 59, 128 } // y = tm -}; -#endif - static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = { { 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8 { 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16 @@ -162,32 +147,6 @@ { 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm }; -#if !CONFIG_MISC_FIXES -const vpx_prob vp10_kf_partition_probs[PARTITION_CONTEXTS] - [PARTITION_TYPES - 1] = { - // 8x8 -> 4x4 - { 158, 97, 94 }, // a/l both not split - { 93, 24, 99 }, // a split, l not split - { 85, 119, 44 }, // l split, a not split - { 62, 59, 67 }, // a/l both split - // 16x16 -> 8x8 - { 149, 53, 53 }, // a/l both not split - { 94, 20, 48 }, // a split, l not split - { 83, 53, 24 }, // l split, a not split - { 52, 18, 18 }, // a/l both split - // 32x32 -> 16x16 - { 150, 40, 39 }, // a/l both not split - { 78, 12, 26 }, // a split, l not split - { 67, 33, 11 }, // l split, a not split - { 24, 7, 5 }, // a/l both split - // 64x64 -> 32x32 - { 174, 35, 49 }, // a/l both not split - { 68, 11, 27 }, // a split, l not split - { 57, 15, 9 }, // l split, a not split - { 12, 3, 3 }, // a/l both split -}; -#endif - static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] [PARTITION_TYPES - 1] = { // 8x8 -> 4x4 @@ -212,6 +171,20 @@ { 10, 7, 6 }, // a/l both split }; +#if CONFIG_REF_MV +static const vpx_prob default_newmv_prob[NEWMV_MODE_CONTEXTS] = { + 200, 180, 150, 150, 110, 70, 60, +}; + +static const vpx_prob default_zeromv_prob[ZEROMV_MODE_CONTEXTS] = { + 192, 64, +}; + +static const vpx_prob default_refmv_prob[REFMV_MODE_CONTEXTS] = { + 220, 220, 200, 200, 180, 128, 30, 220, 30, +}; +#endif + static const vpx_prob default_inter_mode_probs[INTER_MODE_CONTEXTS] [INTER_MODES - 1] = { {2, 173, 34}, // 0 = both zero mv @@ -256,16 +229,33 @@ 239, 183, 119, 96, 41 }; -static const vpx_prob default_comp_ref_p[REF_CONTEXTS] = { - 50, 126, 123, 221, 226 +static const vpx_prob default_comp_ref_p[REF_CONTEXTS][COMP_REFS - 1] = { +#if CONFIG_EXT_REFS + // TODO(zoeliu): To adjust the initial prob values. + { 33, 16, 16, 16 }, + { 77, 74, 74, 74 }, + { 142, 142, 142, 142 }, + { 172, 170, 170, 170 }, + { 238, 247, 247, 247 } +#else + { 50 }, { 126 }, { 123 }, { 221 }, { 226 } +#endif // CONFIG_EXT_REFS }; -static const vpx_prob default_single_ref_p[REF_CONTEXTS][2] = { +static const vpx_prob default_single_ref_p[REF_CONTEXTS][SINGLE_REFS - 1] = { +#if CONFIG_EXT_REFS + { 33, 16, 16, 16, 16 }, + { 77, 74, 74, 74, 74 }, + { 142, 142, 142, 142, 142 }, + { 172, 170, 170, 170, 170 }, + { 238, 247, 247, 247, 247 } +#else { 33, 16 }, { 77, 74 }, { 142, 142 }, { 172, 170 }, { 238, 247 } +#endif // CONFIG_EXT_REFS }; static const struct tx_probs default_tx_probs = { @@ -279,6 +269,442 @@ { 66 } } }; +const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)] = { + -TWO_COLORS, 2, + -THREE_COLORS, 4, + -FOUR_COLORS, 6, + -FIVE_COLORS, 8, + -SIX_COLORS, 10, + -SEVEN_COLORS, -EIGHT_COLORS, +}; + +// TODO(huisu): tune these probs +const vpx_prob +vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = { + { 96, 89, 100, 64, 77, 130}, + { 22, 15, 44, 16, 34, 82}, + { 30, 19, 57, 18, 38, 86}, + { 94, 36, 104, 23, 43, 92}, + { 116, 76, 107, 46, 65, 105}, + { 112, 82, 94, 40, 70, 112}, + { 147, 124, 123, 58, 69, 103}, + { 180, 113, 136, 49, 45, 114}, + { 107, 70, 87, 49, 154, 156}, + { 98, 105, 142, 63, 64, 152}, +}; + +const vpx_prob +vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = { + { 160, 196, 228, 213, 175, 230}, + { 87, 148, 208, 141, 166, 163}, + { 72, 151, 204, 139, 155, 161}, + { 78, 135, 171, 104, 120, 173}, + { 59, 92, 131, 78, 92, 142}, + { 75, 118, 149, 84, 90, 128}, + { 89, 87, 92, 66, 66, 128}, + { 67, 53, 54, 55, 66, 93}, + { 120, 130, 83, 171, 75, 214}, + { 72, 55, 66, 68, 79, 107}, +}; + +const vpx_prob +vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS] + = { + { 240, 180, 100, }, + { 240, 180, 100, }, + { 240, 180, 100, }, + { 240, 180, 100, }, + { 240, 180, 100, }, + { 240, 180, 100, }, + { 240, 180, 100, }, + { 240, 180, 100, }, + { 240, 180, 100, }, + { 240, 180, 100, }, +}; + + +const vpx_prob default_uv_palette_mode_prob[2] = { + 253, 229 +}; + +const vpx_tree_index +vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)] = { + { // 2 colors + -PALETTE_COLOR_ONE, -PALETTE_COLOR_TWO, + }, + { // 3 colors + -PALETTE_COLOR_ONE, 2, + -PALETTE_COLOR_TWO, -PALETTE_COLOR_THREE, + }, + { // 4 colors + -PALETTE_COLOR_ONE, 2, + -PALETTE_COLOR_TWO, 4, + -PALETTE_COLOR_THREE, -PALETTE_COLOR_FOUR, + }, + { // 5 colors + -PALETTE_COLOR_ONE, 2, + -PALETTE_COLOR_TWO, 4, + -PALETTE_COLOR_THREE, 6, + -PALETTE_COLOR_FOUR, -PALETTE_COLOR_FIVE, + }, + { // 6 colors + -PALETTE_COLOR_ONE, 2, + -PALETTE_COLOR_TWO, 4, + -PALETTE_COLOR_THREE, 6, + -PALETTE_COLOR_FOUR, 8, + -PALETTE_COLOR_FIVE, -PALETTE_COLOR_SIX, + }, + { // 7 colors + -PALETTE_COLOR_ONE, 2, + -PALETTE_COLOR_TWO, 4, + -PALETTE_COLOR_THREE, 6, + -PALETTE_COLOR_FOUR, 8, + -PALETTE_COLOR_FIVE, 10, + -PALETTE_COLOR_SIX, -PALETTE_COLOR_SEVEN, + }, + { // 8 colors + -PALETTE_COLOR_ONE, 2, + -PALETTE_COLOR_TWO, 4, + -PALETTE_COLOR_THREE, 6, + -PALETTE_COLOR_FOUR, 8, + -PALETTE_COLOR_FIVE, 10, + -PALETTE_COLOR_SIX, 12, + -PALETTE_COLOR_SEVEN, -PALETTE_COLOR_EIGHT, + }, +}; + +const vpx_prob vp10_default_palette_y_color_prob +[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] = { + { // 2 colors + { 230, 255, 128, 128, 128, 128, 128 }, + { 214, 255, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 240, 255, 128, 128, 128, 128, 128 }, + { 73, 255, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 130, 255, 128, 128, 128, 128, 128 }, + { 227, 255, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 188, 255, 128, 128, 128, 128, 128 }, + { 75, 255, 128, 128, 128, 128, 128 }, + { 250, 255, 128, 128, 128, 128, 128 }, + { 223, 255, 128, 128, 128, 128, 128 }, + { 252, 255, 128, 128, 128, 128, 128 }, + }, { // 3 colors + { 229, 137, 255, 128, 128, 128, 128 }, + { 197, 120, 255, 128, 128, 128, 128 }, + { 107, 195, 255, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 27, 151, 255, 128, 128, 128, 128 }, + { 230, 130, 255, 128, 128, 128, 128 }, + { 37, 230, 255, 128, 128, 128, 128 }, + { 67, 221, 255, 128, 128, 128, 128 }, + { 124, 230, 255, 128, 128, 128, 128 }, + { 195, 109, 255, 128, 128, 128, 128 }, + { 99, 122, 255, 128, 128, 128, 128 }, + { 205, 208, 255, 128, 128, 128, 128 }, + { 40, 235, 255, 128, 128, 128, 128 }, + { 251, 132, 255, 128, 128, 128, 128 }, + { 237, 186, 255, 128, 128, 128, 128 }, + { 253, 112, 255, 128, 128, 128, 128 }, + }, { // 4 colors + { 195, 87, 128, 255, 128, 128, 128 }, + { 143, 100, 123, 255, 128, 128, 128 }, + { 94, 124, 119, 255, 128, 128, 128 }, + { 77, 91, 130, 255, 128, 128, 128 }, + { 39, 114, 178, 255, 128, 128, 128 }, + { 222, 94, 125, 255, 128, 128, 128 }, + { 44, 203, 132, 255, 128, 128, 128 }, + { 68, 175, 122, 255, 128, 128, 128 }, + { 110, 187, 124, 255, 128, 128, 128 }, + { 152, 91, 128, 255, 128, 128, 128 }, + { 70, 109, 181, 255, 128, 128, 128 }, + { 133, 113, 164, 255, 128, 128, 128 }, + { 47, 205, 133, 255, 128, 128, 128 }, + { 247, 94, 136, 255, 128, 128, 128 }, + { 205, 122, 146, 255, 128, 128, 128 }, + { 251, 100, 141, 255, 128, 128, 128 }, + }, { // 5 colors + { 195, 65, 84, 125, 255, 128, 128 }, + { 150, 76, 84, 121, 255, 128, 128 }, + { 94, 110, 81, 117, 255, 128, 128 }, + { 79, 85, 91, 139, 255, 128, 128 }, + { 26, 102, 139, 127, 255, 128, 128 }, + { 220, 73, 91, 119, 255, 128, 128 }, + { 38, 203, 86, 127, 255, 128, 128 }, + { 61, 186, 72, 124, 255, 128, 128 }, + { 132, 199, 84, 128, 255, 128, 128 }, + { 172, 52, 62, 120, 255, 128, 128 }, + { 102, 89, 121, 122, 255, 128, 128 }, + { 182, 48, 69, 186, 255, 128, 128 }, + { 36, 206, 87, 126, 255, 128, 128 }, + { 249, 55, 67, 122, 255, 128, 128 }, + { 218, 88, 75, 122, 255, 128, 128 }, + { 253, 64, 80, 119, 255, 128, 128 }, + }, { // 6 colors + { 182, 54, 64, 75, 118, 255, 128 }, + { 126, 67, 70, 76, 116, 255, 128 }, + { 79, 92, 67, 85, 120, 255, 128 }, + { 63, 61, 81, 118, 132, 255, 128 }, + { 21, 80, 105, 83, 119, 255, 128 }, + { 215, 72, 74, 74, 111, 255, 128 }, + { 50, 176, 63, 79, 120, 255, 128 }, + { 72, 148, 66, 77, 120, 255, 128 }, + { 105, 177, 57, 78, 130, 255, 128 }, + { 150, 66, 66, 80, 127, 255, 128 }, + { 81, 76, 109, 85, 116, 255, 128 }, + { 113, 81, 62, 96, 148, 255, 128 }, + { 54, 179, 69, 82, 121, 255, 128 }, + { 244, 47, 48, 67, 118, 255, 128 }, + { 198, 83, 53, 65, 121, 255, 128 }, + { 250, 42, 51, 69, 110, 255, 128 }, + }, { // 7 colors + { 182, 45, 54, 62, 74, 113, 255 }, + { 124, 63, 57, 62, 77, 114, 255 }, + { 77, 80, 56, 66, 76, 117, 255 }, + { 63, 57, 69, 98, 85, 131, 255 }, + { 19, 81, 98, 63, 80, 116, 255 }, + { 215, 56, 60, 63, 68, 105, 255 }, + { 50, 174, 50, 60, 79, 118, 255 }, + { 68, 151, 50, 58, 73, 117, 255 }, + { 104, 182, 53, 57, 79, 127, 255 }, + { 156, 50, 51, 63, 77, 111, 255 }, + { 88, 67, 97, 59, 82, 120, 255 }, + { 114, 81, 46, 65, 103, 132, 255 }, + { 55, 166, 57, 66, 82, 120, 255 }, + { 245, 34, 38, 43, 63, 114, 255 }, + { 203, 68, 45, 47, 60, 118, 255 }, + { 250, 35, 37, 47, 66, 110, 255 }, + }, { // 8 colors + { 180, 43, 46, 50, 56, 69, 109 }, + { 116, 53, 51, 49, 57, 73, 115 }, + { 79, 70, 49, 50, 59, 74, 117 }, + { 60, 54, 57, 70, 62, 83, 129 }, + { 20, 73, 85, 52, 66, 81, 119 }, + { 213, 56, 52, 49, 53, 62, 104 }, + { 48, 161, 41, 45, 56, 77, 116 }, + { 68, 139, 40, 47, 54, 71, 116 }, + { 123, 166, 42, 43, 52, 76, 130 }, + { 153, 44, 44, 47, 54, 79, 129 }, + { 87, 64, 83, 49, 60, 75, 127 }, + { 131, 68, 43, 48, 73, 96, 130 }, + { 55, 152, 45, 51, 64, 77, 113 }, + { 243, 30, 28, 33, 41, 65, 114 }, + { 202, 56, 35, 36, 42, 63, 123 }, + { 249, 31, 29, 32, 45, 68, 111 }, + } +}; + +const vpx_prob vp10_default_palette_uv_color_prob +[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] = { + { // 2 colors + { 228, 255, 128, 128, 128, 128, 128 }, + { 195, 255, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 228, 255, 128, 128, 128, 128, 128 }, + { 71, 255, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 129, 255, 128, 128, 128, 128, 128 }, + { 206, 255, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 136, 255, 128, 128, 128, 128, 128 }, + { 98, 255, 128, 128, 128, 128, 128 }, + { 236, 255, 128, 128, 128, 128, 128 }, + { 222, 255, 128, 128, 128, 128, 128 }, + { 249, 255, 128, 128, 128, 128, 128 }, + }, { // 3 colors + { 198, 136, 255, 128, 128, 128, 128 }, + { 178, 105, 255, 128, 128, 128, 128 }, + { 100, 206, 255, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 12, 136, 255, 128, 128, 128, 128 }, + { 219, 134, 255, 128, 128, 128, 128 }, + { 50, 198, 255, 128, 128, 128, 128 }, + { 61, 231, 255, 128, 128, 128, 128 }, + { 110, 209, 255, 128, 128, 128, 128 }, + { 173, 106, 255, 128, 128, 128, 128 }, + { 145, 166, 255, 128, 128, 128, 128 }, + { 156, 175, 255, 128, 128, 128, 128 }, + { 69, 183, 255, 128, 128, 128, 128 }, + { 241, 163, 255, 128, 128, 128, 128 }, + { 224, 160, 255, 128, 128, 128, 128 }, + { 246, 154, 255, 128, 128, 128, 128 }, + }, { // 4 colors + { 173, 88, 143, 255, 128, 128, 128 }, + { 146, 81, 127, 255, 128, 128, 128 }, + { 84, 134, 102, 255, 128, 128, 128 }, + { 69, 138, 140, 255, 128, 128, 128 }, + { 31, 103, 200, 255, 128, 128, 128 }, + { 217, 101, 139, 255, 128, 128, 128 }, + { 51, 174, 121, 255, 128, 128, 128 }, + { 64, 177, 109, 255, 128, 128, 128 }, + { 96, 179, 145, 255, 128, 128, 128 }, + { 164, 77, 114, 255, 128, 128, 128 }, + { 87, 94, 156, 255, 128, 128, 128 }, + { 105, 57, 173, 255, 128, 128, 128 }, + { 63, 158, 137, 255, 128, 128, 128 }, + { 236, 102, 156, 255, 128, 128, 128 }, + { 197, 115, 153, 255, 128, 128, 128 }, + { 245, 106, 154, 255, 128, 128, 128 }, + }, { // 5 colors + { 179, 64, 97, 129, 255, 128, 128 }, + { 137, 56, 88, 125, 255, 128, 128 }, + { 82, 107, 61, 118, 255, 128, 128 }, + { 59, 113, 86, 115, 255, 128, 128 }, + { 23, 88, 118, 130, 255, 128, 128 }, + { 213, 66, 90, 125, 255, 128, 128 }, + { 37, 181, 103, 121, 255, 128, 128 }, + { 47, 188, 61, 131, 255, 128, 128 }, + { 104, 185, 103, 144, 255, 128, 128 }, + { 163, 39, 76, 112, 255, 128, 128 }, + { 94, 74, 131, 126, 255, 128, 128 }, + { 142, 42, 103, 163, 255, 128, 128 }, + { 53, 162, 99, 149, 255, 128, 128 }, + { 239, 54, 84, 108, 255, 128, 128 }, + { 203, 84, 110, 147, 255, 128, 128 }, + { 248, 70, 105, 151, 255, 128, 128 }, + }, { // 6 colors + { 189, 50, 67, 90, 130, 255, 128 }, + { 114, 50, 55, 90, 123, 255, 128 }, + { 66, 76, 54, 82, 128, 255, 128 }, + { 43, 69, 69, 80, 129, 255, 128 }, + { 22, 59, 87, 88, 141, 255, 128 }, + { 203, 49, 68, 87, 122, 255, 128 }, + { 43, 157, 74, 104, 146, 255, 128 }, + { 54, 138, 51, 95, 138, 255, 128 }, + { 82, 171, 58, 102, 146, 255, 128 }, + { 129, 38, 59, 64, 168, 255, 128 }, + { 56, 67, 119, 92, 112, 255, 128 }, + { 96, 62, 53, 132, 82, 255, 128 }, + { 60, 147, 77, 108, 145, 255, 128 }, + { 238, 76, 73, 93, 148, 255, 128 }, + { 189, 86, 73, 103, 157, 255, 128 }, + { 246, 62, 75, 83, 167, 255, 128 }, + }, { // 7 colors + { 179, 42, 51, 73, 99, 134, 255 }, + { 119, 52, 52, 61, 64, 114, 255 }, + { 53, 77, 35, 65, 71, 131, 255 }, + { 38, 70, 51, 68, 89, 144, 255 }, + { 23, 65, 128, 73, 97, 131, 255 }, + { 210, 47, 52, 63, 81, 143, 255 }, + { 42, 159, 57, 68, 98, 143, 255 }, + { 49, 153, 45, 82, 93, 143, 255 }, + { 81, 169, 52, 72, 113, 151, 255 }, + { 136, 46, 35, 56, 75, 96, 255 }, + { 57, 84, 109, 47, 107, 131, 255 }, + { 128, 78, 57, 36, 128, 85, 255 }, + { 54, 149, 68, 77, 94, 153, 255 }, + { 243, 58, 50, 71, 81, 167, 255 }, + { 189, 92, 64, 70, 121, 173, 255 }, + { 248, 35, 38, 51, 82, 201, 255 }, + }, { // 8 colors + { 201, 40, 36, 42, 64, 92, 123 }, + { 116, 43, 33, 43, 73, 102, 128 }, + { 46, 77, 37, 69, 62, 78, 150 }, + { 40, 65, 52, 50, 76, 89, 133 }, + { 28, 48, 91, 17, 64, 77, 133 }, + { 218, 43, 43, 37, 56, 72, 163 }, + { 41, 155, 44, 83, 82, 129, 180 }, + { 44, 141, 29, 55, 64, 89, 147 }, + { 92, 166, 48, 45, 59, 126, 179 }, + { 169, 35, 49, 41, 36, 99, 139 }, + { 55, 77, 77, 56, 60, 75, 156 }, + { 155, 81, 51, 64, 57, 182, 255 }, + { 60, 134, 49, 49, 93, 128, 174 }, + { 244, 98, 51, 46, 22, 73, 238 }, + { 189, 70, 40, 87, 93, 79, 201 }, + { 248, 54, 49, 40, 29, 42, 227 }, + } +}; + +static const int palette_color_context_lookup[PALETTE_COLOR_CONTEXTS] = { + // (3, 0, 0, 0), (3, 2, 0, 0), (3, 3, 2, 0), (3, 3, 2, 2), + 3993, 4235, 4378, 4380, + // (4, 3, 3, 0), (5, 0, 0, 0), (5, 3, 0, 0), (5, 3, 2, 0), + 5720, 6655, 7018, 7040, + // (5, 5, 0, 0), (6, 2, 0, 0), (6, 2, 2, 0), (6, 4, 0, 0), + 7260, 8228, 8250, 8470, + // (7, 3, 0, 0), (8, 0, 0, 0), (8, 2, 0, 0), (10, 0, 0, 0) + 9680, 10648, 10890, 13310 +}; + +int vp10_get_palette_color_context(const uint8_t *color_map, int cols, + int r, int c, int n, int *color_order) { + int i, j, max, max_idx, temp; + int scores[PALETTE_MAX_SIZE + 10]; + int weights[4] = {3, 2, 3, 2}; + int color_ctx = 0; + int color_neighbors[4]; + + assert(n <= PALETTE_MAX_SIZE); + + if (c - 1 >= 0) + color_neighbors[0] = color_map[r * cols + c - 1]; + else + color_neighbors[0] = -1; + if (c - 1 >= 0 && r - 1 >= 0) + color_neighbors[1] = color_map[(r - 1) * cols + c - 1]; + else + color_neighbors[1] = -1; + if (r - 1 >= 0) + color_neighbors[2] = color_map[(r - 1) * cols + c]; + else + color_neighbors[2] = -1; + if (r - 1 >= 0 && c + 1 <= cols - 1) + color_neighbors[3] = color_map[(r - 1) * cols + c + 1]; + else + color_neighbors[3] = -1; + + for (i = 0; i < PALETTE_MAX_SIZE; ++i) + color_order[i] = i; + memset(scores, 0, PALETTE_MAX_SIZE * sizeof(scores[0])); + for (i = 0; i < 4; ++i) { + if (color_neighbors[i] >= 0) + scores[color_neighbors[i]] += weights[i]; + } + + for (i = 0; i < 4; ++i) { + max = scores[i]; + max_idx = i; + j = i + 1; + while (j < n) { + if (scores[j] > max) { + max = scores[j]; + max_idx = j; + } + ++j; + } + + if (max_idx != i) { + temp = scores[i]; + scores[i] = scores[max_idx]; + scores[max_idx] = temp; + + temp = color_order[i]; + color_order[i] = color_order[max_idx]; + color_order[max_idx] = temp; + } + } + + for (i = 0; i < 4; ++i) + color_ctx = color_ctx * 11 + scores[i]; + + for (i = 0; i < PALETTE_COLOR_CONTEXTS; ++i) + if (color_ctx == palette_color_context_lookup[i]) { + color_ctx = i; + break; + } + + if (color_ctx >= PALETTE_COLOR_CONTEXTS) + color_ctx = 0; + + return color_ctx; +} + void vp10_tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p, unsigned int (*ct_32x32p)[2]) { ct_32x32p[0][0] = tx_count_32x32p[TX_4X4]; @@ -306,10 +732,26 @@ ct_8x8p[0][1] = tx_count_8x8p[TX_8X8]; } +#if CONFIG_VAR_TX +static const vpx_prob default_txfm_partition_probs[TXFM_PARTITION_CONTEXTS] = { + 192, 128, 64, 192, 128, 64, 192, 128, 64, +}; +#endif + static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = { 192, 128, 64 }; +#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4 +static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] + [SWITCHABLE_FILTERS - 1] = { + { 235, 192, 128}, + { 36, 243, 208}, + { 34, 16, 128}, + { 36, 243, 48}, + { 149, 160, 128}, +}; +#else static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] [SWITCHABLE_FILTERS - 1] = { { 235, 162, }, @@ -317,15 +759,440 @@ { 34, 3, }, { 149, 144, }, }; +#endif -#if CONFIG_MISC_FIXES +#if CONFIG_EXT_TX +const vpx_tree_index vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER] + [TREE_SIZE(TX_TYPES)] = { + { // ToDo(yaowu): remove used entry 0. + -IDTX, 2, + -DCT_DCT, 4, + -DST_DST, 6, + 8, 18, + 10, 12, + -DST_DCT, -DCT_DST, + 14, 16, + -ADST_DCT, -DCT_ADST, + -FLIPADST_DCT, -DCT_FLIPADST, + 20, 26, + 22, 24, + -DST_ADST, -ADST_DST, + -DST_FLIPADST, -FLIPADST_DST, + 28, 30, + -ADST_ADST, -FLIPADST_FLIPADST, + -ADST_FLIPADST, -FLIPADST_ADST, + }, { + -IDTX, 2, + -DCT_DCT, 4, + -DST_DST, 6, + 8, 18, + 10, 12, + -DST_DCT, -DCT_DST, + 14, 16, + -ADST_DCT, -DCT_ADST, + -FLIPADST_DCT, -DCT_FLIPADST, + 20, 26, + 22, 24, + -DST_ADST, -ADST_DST, + -DST_FLIPADST, -FLIPADST_DST, + 28, 30, + -ADST_ADST, -FLIPADST_FLIPADST, + -ADST_FLIPADST, -FLIPADST_ADST, + }, { + -IDTX, 2, + -DCT_DCT, 4, + 6, 12, + 8, 10, + -ADST_DCT, -DCT_ADST, + -FLIPADST_DCT, -DCT_FLIPADST, + 14, 16, + -ADST_ADST, -FLIPADST_FLIPADST, + -ADST_FLIPADST, -FLIPADST_ADST + }, { + -IDTX, -DCT_DCT, + } +}; + +const vpx_tree_index vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA] + [TREE_SIZE(TX_TYPES)] = { + { // ToDo(yaowu): remove unused entry 0. + -IDTX, 2, + -DCT_DCT, 4, + -DST_DST, 6, + 8, 18, + 10, 12, + -DST_DCT, -DCT_DST, + 14, 16, + -ADST_DCT, -DCT_ADST, + -FLIPADST_DCT, -DCT_FLIPADST, + 20, 26, + 22, 24, + -DST_ADST, -ADST_DST, + -DST_FLIPADST, -FLIPADST_DST, + 28, 30, + -ADST_ADST, -FLIPADST_FLIPADST, + -ADST_FLIPADST, -FLIPADST_ADST, + }, { + -IDTX, 2, + -DCT_DCT, 4, + -DST_DST, 6, + 8, 18, + 10, 12, + -DST_DCT, -DCT_DST, + 14, 16, + -ADST_DCT, -DCT_ADST, + -FLIPADST_DCT, -DCT_FLIPADST, + 20, 26, + 22, 24, + -DST_ADST, -ADST_DST, + -DST_FLIPADST, -FLIPADST_DST, + 28, 30, + -ADST_ADST, -FLIPADST_FLIPADST, + -ADST_FLIPADST, -FLIPADST_ADST, + }, { + -IDTX, 2, + -DCT_DCT, 4, + 6, 12, + 8, 10, + -ADST_DCT, -DCT_ADST, + -FLIPADST_DCT, -DCT_FLIPADST, + 14, 16, + -ADST_ADST, -FLIPADST_FLIPADST, + -ADST_FLIPADST, -FLIPADST_ADST + } +}; + +static const vpx_prob +default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = { + { // ToDo(yaowu): remove unused entry 0. + { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128 }, + { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128 }, + { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128 }, +#if EXT_TX_SIZES == 4 + { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128 }, +#endif + }, { + { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128 }, + { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128 }, + { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128 }, +#if EXT_TX_SIZES == 4 + { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128 }, +#endif + }, { + { 12, 112, 128, 128, 128, 128, 128, 128, 128 }, + { 12, 112, 128, 128, 128, 128, 128, 128, 128 }, + { 12, 112, 128, 128, 128, 128, 128, 128, 128 }, +#if EXT_TX_SIZES == 4 + { 12, 112, 128, 128, 128, 128, 128, 128, 128 }, +#endif + }, { + { 12, }, + { 12, }, + { 12, }, +#if EXT_TX_SIZES == 4 + { 12, }, +#endif + } +}; + +static const vpx_prob +default_intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES] + [INTRA_MODES][TX_TYPES - 1] = { + { // ToDo(yaowu): remove unused entry 0. + { + { 8, 11, 24, 112, 87, 137, 127, 134, + 128, 86, 128, 124, 125, 133, 176, 123, }, + { 10, 9, 39, 106, 73, 155, 163, 228, + 35, 62, 129, 127, 133, 114, 213, 234, }, + { 10, 9, 14, 88, 91, 127, 151, 51, + 210, 89, 126, 58, 52, 116, 217, 24, }, + { 9, 6, 29, 113, 98, 131, 149, 210, + 119, 60, 124, 93, 90, 143, 170, 197, }, + { 8, 8, 38, 101, 111, 166, 167, 141, + 130, 105, 128, 75, 75, 118, 197, 117, }, + { 7, 8, 39, 91, 101, 153, 166, 200, + 99, 77, 123, 90, 83, 144, 224, 192, }, + { 7, 10, 26, 86, 119, 154, 130, 101, + 152, 91, 129, 75, 79, 137, 219, 77, }, + { 10, 13, 20, 86, 102, 162, 112, 76, + 171, 86, 134, 122, 106, 124, 196, 44, }, + { 8, 9, 33, 108, 100, 144, 148, 215, + 77, 60, 125, 125, 128, 126, 198, 220, }, + { 3, 10, 29, 111, 69, 141, 204, 141, + 139, 93, 120, 75, 77, 163, 242, 124, }, + }, { + { 2, 53, 18, 147, 96, 98, 136, 133, + 131, 120, 153, 163, 169, 137, 173, 124, }, + { 4, 18, 34, 133, 54, 130, 179, 228, + 28, 72, 153, 164, 168, 118, 227, 239, }, + { 4, 18, 13, 125, 72, 110, 176, 36, + 221, 104, 148, 75, 72, 117, 225, 19, }, + { 8, 33, 24, 162, 113, 99, 147, 226, + 103, 85, 153, 143, 153, 124, 155, 210, }, + { 2, 15, 35, 107, 127, 158, 192, 128, + 126, 116, 151, 95, 88, 182, 241, 119, }, + { 3, 15, 36, 112, 100, 146, 194, 189, + 90, 98, 152, 99, 100, 165, 235, 175, }, + { 3, 16, 29, 109, 103, 140, 182, 76, + 173, 104, 147, 82, 85, 159, 235, 70, }, + { 9, 24, 14, 120, 86, 156, 161, 34, + 177, 121, 142, 128, 128, 126, 185, 37, }, + { 5, 24, 29, 152, 98, 99, 174, 228, + 82, 76, 147, 149, 128, 132, 191, 225, }, + { 2, 15, 29, 111, 77, 126, 200, 135, + 117, 93, 152, 96, 84, 191, 245, 135, }, + }, { + { 2, 69, 13, 173, 111, 69, 137, 159, + 159, 146, 151, 193, 203, 131, 180, 123, }, + { 1, 12, 33, 164, 32, 98, 204, 242, + 23, 99, 149, 215, 232, 110, 239, 245, }, + { 1, 17, 9, 136, 82, 83, 171, 28, + 231, 128, 135, 76, 64, 118, 235, 17, }, + { 4, 41, 17, 195, 131, 58, 161, 237, + 141, 97, 153, 189, 191, 117, 182, 202, }, + { 2, 17, 36, 104, 149, 137, 217, 139, + 191, 119, 125, 107, 115, 223, 249, 110, }, + { 2, 14, 24, 127, 91, 135, 219, 198, + 113, 91, 164, 125, 173, 211, 250, 116, }, + { 3, 19, 24, 120, 102, 130, 209, 81, + 187, 95, 143, 102, 50, 190, 244, 56, }, + { 4, 27, 10, 128, 91, 157, 181, 33, + 181, 150, 141, 141, 166, 114, 215, 25, }, + { 2, 34, 27, 187, 102, 77, 210, 245, + 113, 107, 136, 184, 188, 121, 210, 234, }, + { 1, 15, 22, 141, 59, 94, 208, 133, + 154, 95, 152, 112, 105, 191, 242, 111, }, +#if EXT_TX_SIZES == 4 + }, { + { 2, 69, 13, 173, 111, 69, 137, 159, + 159, 146, 151, 193, 203, 131, 180, 123, }, + { 1, 12, 33, 164, 32, 98, 204, 242, + 23, 99, 149, 215, 232, 110, 239, 245, }, + { 1, 17, 9, 136, 82, 83, 171, 28, + 231, 128, 135, 76, 64, 118, 235, 17, }, + { 4, 41, 17, 195, 131, 58, 161, 237, + 141, 97, 153, 189, 191, 117, 182, 202, }, + { 2, 17, 36, 104, 149, 137, 217, 139, + 191, 119, 125, 107, 115, 223, 249, 110, }, + { 2, 14, 24, 127, 91, 135, 219, 198, + 113, 91, 164, 125, 173, 211, 250, 116, }, + { 3, 19, 24, 120, 102, 130, 209, 81, + 187, 95, 143, 102, 50, 190, 244, 56, }, + { 4, 27, 10, 128, 91, 157, 181, 33, + 181, 150, 141, 141, 166, 114, 215, 25, }, + { 2, 34, 27, 187, 102, 77, 210, 245, + 113, 107, 136, 184, 188, 121, 210, 234, }, + { 1, 15, 22, 141, 59, 94, 208, 133, + 154, 95, 152, 112, 105, 191, 242, 111, }, +#endif + }, + }, { + { + { 8, 11, 24, 112, 87, 137, 127, 134, + 128, 86, 128, 124, 125, 133, 176, 123, }, + { 10, 9, 39, 106, 73, 155, 163, 228, + 35, 62, 129, 127, 133, 114, 213, 234, }, + { 10, 9, 14, 88, 91, 127, 151, 51, + 210, 89, 126, 58, 52, 116, 217, 24, }, + { 9, 6, 29, 113, 98, 131, 149, 210, + 119, 60, 124, 93, 90, 143, 170, 197, }, + { 8, 8, 38, 101, 111, 166, 167, 141, + 130, 105, 128, 75, 75, 118, 197, 117, }, + { 7, 8, 39, 91, 101, 153, 166, 200, + 99, 77, 123, 90, 83, 144, 224, 192, }, + { 7, 10, 26, 86, 119, 154, 130, 101, + 152, 91, 129, 75, 79, 137, 219, 77, }, + { 10, 13, 20, 86, 102, 162, 112, 76, + 171, 86, 134, 122, 106, 124, 196, 44, }, + { 8, 9, 33, 108, 100, 144, 148, 215, + 77, 60, 125, 125, 128, 126, 198, 220, }, + { 3, 10, 29, 111, 69, 141, 204, 141, + 139, 93, 120, 75, 77, 163, 242, 124, }, + }, { + { 2, 53, 18, 147, 96, 98, 136, 133, + 131, 120, 153, 163, 169, 137, 173, 124, }, + { 4, 18, 34, 133, 54, 130, 179, 228, + 28, 72, 153, 164, 168, 118, 227, 239, }, + { 4, 18, 13, 125, 72, 110, 176, 36, + 221, 104, 148, 75, 72, 117, 225, 19, }, + { 8, 33, 24, 162, 113, 99, 147, 226, + 103, 85, 153, 143, 153, 124, 155, 210, }, + { 2, 15, 35, 107, 127, 158, 192, 128, + 126, 116, 151, 95, 88, 182, 241, 119, }, + { 3, 15, 36, 112, 100, 146, 194, 189, + 90, 98, 152, 99, 100, 165, 235, 175, }, + { 3, 16, 29, 109, 103, 140, 182, 76, + 173, 104, 147, 82, 85, 159, 235, 70, }, + { 9, 24, 14, 120, 86, 156, 161, 34, + 177, 121, 142, 128, 128, 126, 185, 37, }, + { 5, 24, 29, 152, 98, 99, 174, 228, + 82, 76, 147, 149, 128, 132, 191, 225, }, + { 2, 15, 29, 111, 77, 126, 200, 135, + 117, 93, 152, 96, 84, 191, 245, 135, }, + }, { + { 2, 69, 13, 173, 111, 69, 137, 159, + 159, 146, 151, 193, 203, 131, 180, 123, }, + { 1, 12, 33, 164, 32, 98, 204, 242, + 23, 99, 149, 215, 232, 110, 239, 245, }, + { 1, 17, 9, 136, 82, 83, 171, 28, + 231, 128, 135, 76, 64, 118, 235, 17, }, + { 4, 41, 17, 195, 131, 58, 161, 237, + 141, 97, 153, 189, 191, 117, 182, 202, }, + { 2, 17, 36, 104, 149, 137, 217, 139, + 191, 119, 125, 107, 115, 223, 249, 110, }, + { 2, 14, 24, 127, 91, 135, 219, 198, + 113, 91, 164, 125, 173, 211, 250, 116, }, + { 3, 19, 24, 120, 102, 130, 209, 81, + 187, 95, 143, 102, 50, 190, 244, 56, }, + { 4, 27, 10, 128, 91, 157, 181, 33, + 181, 150, 141, 141, 166, 114, 215, 25, }, + { 2, 34, 27, 187, 102, 77, 210, 245, + 113, 107, 136, 184, 188, 121, 210, 234, }, + { 1, 15, 22, 141, 59, 94, 208, 133, + 154, 95, 152, 112, 105, 191, 242, 111, }, +#if EXT_TX_SIZES == 4 + }, { + { 2, 69, 13, 173, 111, 69, 137, 159, + 159, 146, 151, 193, 203, 131, 180, 123, }, + { 1, 12, 33, 164, 32, 98, 204, 242, + 23, 99, 149, 215, 232, 110, 239, 245, }, + { 1, 17, 9, 136, 82, 83, 171, 28, + 231, 128, 135, 76, 64, 118, 235, 17, }, + { 4, 41, 17, 195, 131, 58, 161, 237, + 141, 97, 153, 189, 191, 117, 182, 202, }, + { 2, 17, 36, 104, 149, 137, 217, 139, + 191, 119, 125, 107, 115, 223, 249, 110, }, + { 2, 14, 24, 127, 91, 135, 219, 198, + 113, 91, 164, 125, 173, 211, 250, 116, }, + { 3, 19, 24, 120, 102, 130, 209, 81, + 187, 95, 143, 102, 50, 190, 244, 56, }, + { 4, 27, 10, 128, 91, 157, 181, 33, + 181, 150, 141, 141, 166, 114, 215, 25, }, + { 2, 34, 27, 187, 102, 77, 210, 245, + 113, 107, 136, 184, 188, 121, 210, 234, }, + { 1, 15, 22, 141, 59, 94, 208, 133, + 154, 95, 152, 112, 105, 191, 242, 111, }, +#endif + }, + }, { + { + { 8, 176, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 10, 28, 176, 192, 208, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 10, 28, 176, 192, 48, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 9, 160, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 8, 28, 96, 128, 128, 128, 160, 192, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 7, 28, 160, 176, 192, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 7, 20, 160, 176, 64, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 10, 23, 160, 176, 64, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 8, 29, 160, 176, 192, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 3, 20, 96, 128, 128, 128, 160, 192, + 128, 128, 128, 128, 128, 128, 128, 128, }, + }, { + { 2, 176, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 4, 28, 176, 192, 208, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 4, 28, 176, 192, 48, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 8, 160, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 28, 96, 128, 128, 128, 160, 192, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 3, 28, 160, 176, 192, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 3, 26, 160, 176, 64, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 9, 24, 160, 176, 64, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 5, 24, 160, 176, 192, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 25, 96, 128, 128, 128, 160, 192, + 128, 128, 128, 128, 128, 128, 128, 128, }, + }, { + { 2, 176, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 1, 28, 176, 192, 208, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 1, 28, 176, 192, 48, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 4, 160, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 28, 96, 128, 128, 128, 160, 192, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 28, 160, 176, 192, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 3, 29, 160, 176, 64, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 4, 27, 160, 176, 64, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 34, 160, 176, 192, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 1, 25, 96, 128, 128, 128, 160, 192, + 128, 128, 128, 128, 128, 128, 128, 128, }, +#if EXT_TX_SIZES == 4 + }, { + { 2, 176, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 1, 12, 160, 176, 192, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 1, 17, 160, 176, 64, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 4, 41, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 17, 96, 128, 128, 128, 160, 192, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 14, 160, 176, 192, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 3, 19, 160, 176, 64, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 4, 27, 160, 176, 64, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 34, 160, 176, 192, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, }, + { 1, 15, 96, 128, 128, 128, 160, 192, + 128, 128, 128, 128, 128, 128, 128, 128, }, +#endif + }, + }, +}; +#endif // CONFIG_EXT_TX + +#if CONFIG_SUPERTX +static const vpx_prob default_supertx_prob[PARTITION_SUPERTX_CONTEXTS] + [TX_SIZES] = { + { 1, 160, 160, 170 }, + { 1, 200, 200, 210 }, +}; +#endif // CONFIG_SUPERTX + // FIXME(someone) need real defaults here static const struct segmentation_probs default_seg_probs = { { 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128 }, }; -#endif +#if CONFIG_EXT_INTRA +static const vpx_prob default_ext_intra_probs[2] = {230, 230}; +#endif // CONFIG_EXT_INTRA + +#if !CONFIG_EXT_TX const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)] = { -DCT_DCT, 2, -ADST_ADST, 4, @@ -345,6 +1212,7 @@ {176, 85, 128}, {192, 85, 128}, }; +#endif static void init_mode_probs(FRAME_CONTEXT *fc) { vp10_copy(fc->uv_mode_prob, default_uv_probs); @@ -356,21 +1224,42 @@ vp10_copy(fc->comp_ref_prob, default_comp_ref_p); vp10_copy(fc->single_ref_prob, default_single_ref_p); fc->tx_probs = default_tx_probs; +#if CONFIG_VAR_TX + vp10_copy(fc->txfm_partition_prob, default_txfm_partition_probs); +#endif vp10_copy(fc->skip_probs, default_skip_probs); +#if CONFIG_REF_MV + vp10_copy(fc->newmv_prob, default_newmv_prob); + vp10_copy(fc->zeromv_prob, default_zeromv_prob); + vp10_copy(fc->refmv_prob, default_refmv_prob); +#endif vp10_copy(fc->inter_mode_probs, default_inter_mode_probs); -#if CONFIG_MISC_FIXES +#if CONFIG_SUPERTX + vp10_copy(fc->supertx_prob, default_supertx_prob); +#endif // CONFIG_SUPERTX vp10_copy(fc->seg.tree_probs, default_seg_probs.tree_probs); vp10_copy(fc->seg.pred_probs, default_seg_probs.pred_probs); -#endif - vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob); +#if CONFIG_EXT_INTRA + vp10_copy(fc->ext_intra_probs, default_ext_intra_probs); +#endif // CONFIG_EXT_INTRA vp10_copy(fc->inter_ext_tx_prob, default_inter_ext_tx_prob); + vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob); } +#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4 const vpx_tree_index vp10_switchable_interp_tree - [TREE_SIZE(SWITCHABLE_FILTERS)] = { +[TREE_SIZE(SWITCHABLE_FILTERS)] = { + -EIGHTTAP, 2, + 4, -EIGHTTAP_SHARP, + -EIGHTTAP_SMOOTH, -EIGHTTAP_SMOOTH2, +}; +#else +const vpx_tree_index vp10_switchable_interp_tree +[TREE_SIZE(SWITCHABLE_FILTERS)] = { -EIGHTTAP, 2, -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP }; +#endif // CONFIG_EXT_INTERP void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) { int i, j; @@ -385,31 +1274,34 @@ fc->comp_inter_prob[i] = mode_mv_merge_probs(pre_fc->comp_inter_prob[i], counts->comp_inter[i]); for (i = 0; i < REF_CONTEXTS; i++) - fc->comp_ref_prob[i] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i], - counts->comp_ref[i]); + for (j = 0; j < (COMP_REFS - 1); j++) + fc->comp_ref_prob[i][j] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i][j], + counts->comp_ref[i][j]); for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) + for (j = 0; j < (SINGLE_REFS - 1); j++) fc->single_ref_prob[i][j] = mode_mv_merge_probs( pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]); +#if CONFIG_REF_MV + for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) + fc->newmv_prob[i] = mode_mv_merge_probs(pre_fc->newmv_prob[i], + counts->newmv_mode[i]); + for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i) + fc->zeromv_prob[i] = mode_mv_merge_probs(pre_fc->zeromv_prob[i], + counts->zeromv_mode[i]); + for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) + fc->refmv_prob[i] = mode_mv_merge_probs(pre_fc->refmv_prob[i], + counts->refmv_mode[i]); +#else for (i = 0; i < INTER_MODE_CONTEXTS; i++) vpx_tree_merge_probs(vp10_inter_mode_tree, pre_fc->inter_mode_probs[i], counts->inter_mode[i], fc->inter_mode_probs[i]); +#endif for (i = 0; i < BLOCK_SIZE_GROUPS; i++) vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->y_mode_prob[i], counts->y_mode[i], fc->y_mode_prob[i]); -#if !CONFIG_MISC_FIXES - for (i = 0; i < INTRA_MODES; ++i) - vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->uv_mode_prob[i], - counts->uv_mode[i], fc->uv_mode_prob[i]); - - for (i = 0; i < PARTITION_CONTEXTS; i++) - vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[i], - counts->partition[i], fc->partition_prob[i]); -#endif - if (cm->interp_filter == SWITCHABLE) { for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) vpx_tree_merge_probs(vp10_switchable_interp_tree, @@ -449,10 +1341,41 @@ } } +#if CONFIG_VAR_TX + if (cm->tx_mode == TX_MODE_SELECT) + for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) + fc->txfm_partition_prob[i] = + mode_mv_merge_probs(pre_fc->txfm_partition_prob[i], + counts->txfm_partition[i]); +#endif + for (i = 0; i < SKIP_CONTEXTS; ++i) fc->skip_probs[i] = mode_mv_merge_probs( pre_fc->skip_probs[i], counts->skip[i]); +#if CONFIG_EXT_TX + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + int s; + for (s = 1; s < EXT_TX_SETS_INTER; ++s) { + if (use_inter_ext_tx_for_txsize[s][i]) { + vpx_tree_merge_probs(vp10_ext_tx_inter_tree[s], + pre_fc->inter_ext_tx_prob[s][i], + counts->inter_ext_tx[s][i], + fc->inter_ext_tx_prob[s][i]); + } + } + for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { + if (use_intra_ext_tx_for_txsize[s][i]) { + int j; + for (j = 0; j < INTRA_MODES; ++j) + vpx_tree_merge_probs(vp10_ext_tx_intra_tree[s], + pre_fc->intra_ext_tx_prob[s][i][j], + counts->intra_ext_tx[s][i][j], + fc->intra_ext_tx_prob[s][i][j]); + } + } + } +#else for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { int j; for (j = 0; j < TX_TYPES; ++j) @@ -467,8 +1390,18 @@ counts->inter_ext_tx[i], fc->inter_ext_tx_prob[i]); } +#endif // CONFIG_EXT_TX -#if CONFIG_MISC_FIXES +#if CONFIG_SUPERTX + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { + int j; + for (j = 1; j < TX_SIZES; ++j) { + fc->supertx_prob[i][j] = mode_mv_merge_probs(pre_fc->supertx_prob[i][j], + counts->supertx[i][j]); + } + } +#endif // CONFIG_SUPERTX + if (cm->seg.temporal_update) { for (i = 0; i < PREDICTION_PROBS; i++) fc->seg.pred_probs[i] = mode_mv_merge_probs(pre_fc->seg.pred_probs[i], @@ -488,7 +1421,13 @@ for (i = 0; i < PARTITION_CONTEXTS; i++) vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[i], counts->partition[i], fc->partition_prob[i]); -#endif + +#if CONFIG_EXT_INTRA + for (i = 0; i < PLANE_TYPES; ++i) { + fc->ext_intra_probs[i] = mode_mv_merge_probs( + pre_fc->ext_intra_probs[i], counts->ext_intra[i]); + } +#endif // CONFIG_EXT_INTRA } static void set_default_lf_deltas(struct loopfilter *lf) { @@ -497,6 +1436,11 @@ lf->ref_deltas[INTRA_FRAME] = 1; lf->ref_deltas[LAST_FRAME] = 0; +#if CONFIG_EXT_REFS + lf->ref_deltas[LAST2_FRAME] = lf->ref_deltas[LAST_FRAME]; + lf->ref_deltas[LAST3_FRAME] = lf->ref_deltas[LAST_FRAME]; + lf->ref_deltas[LAST4_FRAME] = lf->ref_deltas[LAST_FRAME]; +#endif // CONFIG_EXT_REFS lf->ref_deltas[GOLDEN_FRAME] = -1; lf->ref_deltas[ALTREF_FRAME] = -1;
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h index 611d3ad..a1ad2c4 100644 --- a/vp10/common/entropymode.h +++ b/vp10/common/entropymode.h
@@ -27,6 +27,11 @@ #define INTER_OFFSET(mode) ((mode) - NEARESTMV) +#define PALETTE_COLOR_CONTEXTS 16 +#define PALETTE_MAX_SIZE 8 +#define PALETTE_BLOCK_SIZES (BLOCK_64X64 - BLOCK_8X8 + 1) +#define PALETTE_Y_MODE_CONTEXTS 3 + struct VP10Common; struct tx_probs { @@ -55,20 +60,40 @@ vp10_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES]; vpx_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] [SWITCHABLE_FILTERS - 1]; + +#if CONFIG_REF_MV + vpx_prob newmv_prob[NEWMV_MODE_CONTEXTS]; + vpx_prob zeromv_prob[ZEROMV_MODE_CONTEXTS]; + vpx_prob refmv_prob[REFMV_MODE_CONTEXTS]; +#endif + vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS]; - vpx_prob single_ref_prob[REF_CONTEXTS][2]; - vpx_prob comp_ref_prob[REF_CONTEXTS]; + vpx_prob single_ref_prob[REF_CONTEXTS][SINGLE_REFS-1]; + vpx_prob comp_ref_prob[REF_CONTEXTS][COMP_REFS-1]; struct tx_probs tx_probs; +#if CONFIG_VAR_TX + vpx_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS]; +#endif vpx_prob skip_probs[SKIP_CONTEXTS]; nmv_context nmvc; -#if CONFIG_MISC_FIXES - struct segmentation_probs seg; -#endif + int initialized; +#if CONFIG_EXT_TX + vpx_prob inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1]; + vpx_prob intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] + [TX_TYPES - 1]; +#else vpx_prob intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1]; vpx_prob inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1]; - int initialized; +#endif // CONFIG_EXT_TX +#if CONFIG_SUPERTX + vpx_prob supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES]; +#endif // CONFIG_SUPERTX + struct segmentation_probs seg; +#if CONFIG_EXT_INTRA + vpx_prob ext_intra_probs[PLANE_TYPES]; +#endif // CONFIG_EXT_INTRA } FRAME_CONTEXT; typedef struct FRAME_COUNTS { @@ -81,34 +106,62 @@ [COEF_BANDS][COEFF_CONTEXTS]; unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS] [SWITCHABLE_FILTERS]; +#if CONFIG_REF_MV + unsigned int newmv_mode[NEWMV_MODE_CONTEXTS][2]; + unsigned int zeromv_mode[ZEROMV_MODE_CONTEXTS][2]; + unsigned int refmv_mode[REFMV_MODE_CONTEXTS][2]; +#endif + unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES]; unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; - unsigned int single_ref[REF_CONTEXTS][2][2]; - unsigned int comp_ref[REF_CONTEXTS][2]; + unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS-1][2]; + unsigned int comp_ref[REF_CONTEXTS][COMP_REFS-1][2]; struct tx_counts tx; +#if CONFIG_VAR_TX + unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2]; +#endif unsigned int skip[SKIP_CONTEXTS][2]; nmv_context_counts mv; -#if CONFIG_MISC_FIXES - struct seg_counts seg; -#endif +#if CONFIG_EXT_TX + unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES]; + unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] + [TX_TYPES]; +#else unsigned int intra_ext_tx[EXT_TX_SIZES][TX_TYPES][TX_TYPES]; unsigned int inter_ext_tx[EXT_TX_SIZES][TX_TYPES]; +#endif // CONFIG_EXT_TX +#if CONFIG_SUPERTX + unsigned int supertx[PARTITION_SUPERTX_CONTEXTS][TX_SIZES][2]; + unsigned int supertx_size[TX_SIZES]; +#endif // CONFIG_SUPERTX + struct seg_counts seg; +#if CONFIG_EXT_INTRA + unsigned int ext_intra[PLANE_TYPES][2]; +#endif // CONFIG_EXT_INTRA } FRAME_COUNTS; extern const vpx_prob vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES] [INTRA_MODES - 1]; -#if !CONFIG_MISC_FIXES -extern const vpx_prob vp10_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; -extern const vpx_prob vp10_kf_partition_probs[PARTITION_CONTEXTS] - [PARTITION_TYPES - 1]; -#endif +extern const vpx_prob +vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS]; +extern const vpx_prob +vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1]; +extern const vpx_prob +vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1]; +extern const vpx_prob vp10_default_palette_y_color_prob +[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1]; +extern const vpx_prob vp10_default_palette_uv_color_prob +[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1]; extern const vpx_tree_index vp10_intra_mode_tree[TREE_SIZE(INTRA_MODES)]; extern const vpx_tree_index vp10_inter_mode_tree[TREE_SIZE(INTER_MODES)]; extern const vpx_tree_index vp10_partition_tree[TREE_SIZE(PARTITION_TYPES)]; extern const vpx_tree_index vp10_switchable_interp_tree [TREE_SIZE(SWITCHABLE_FILTERS)]; +extern const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)]; +extern const vpx_tree_index +vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)]; void vp10_setup_past_independence(struct VP10Common *cm); @@ -123,8 +176,15 @@ void vp10_tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, unsigned int (*ct_8x8p)[2]); +#if CONFIG_EXT_TX +extern const vpx_tree_index + vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER][TREE_SIZE(TX_TYPES)]; +extern const vpx_tree_index + vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA][TREE_SIZE(TX_TYPES)]; +#else extern const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)]; +#endif // CONFIG_EXT_TX static INLINE int vp10_ceil_log2(int n) { int i = 1, p = 2; @@ -135,6 +195,9 @@ return i; } +int vp10_get_palette_color_context(const uint8_t *color_map, int cols, + int r, int c, int n, int *color_order); + #ifdef __cplusplus } // extern "C" #endif
diff --git a/vp10/common/enums.h b/vp10/common/enums.h index 18c7d16..f0d1ba2 100644 --- a/vp10/common/enums.h +++ b/vp10/common/enums.h
@@ -94,15 +94,44 @@ ADST_DCT = 1, // ADST in vertical, DCT in horizontal DCT_ADST = 2, // DCT in vertical, ADST in horizontal ADST_ADST = 3, // ADST in both directions - TX_TYPES = 4 +#if CONFIG_EXT_TX + FLIPADST_DCT = 4, + DCT_FLIPADST = 5, + FLIPADST_FLIPADST = 6, + ADST_FLIPADST = 7, + FLIPADST_ADST = 8, + DST_DCT = 9, + DCT_DST = 10, + DST_ADST = 11, + ADST_DST = 12, + DST_FLIPADST = 13, + FLIPADST_DST = 14, + DST_DST = 15, + IDTX = 16, +#endif // CONFIG_EXT_TX + TX_TYPES, } TX_TYPE; #define EXT_TX_SIZES 3 // number of sizes that use extended transforms +#if CONFIG_EXT_TX +#define USE_DST2 1 +#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER +#define EXT_TX_SETS_INTRA 3 // Sets of transform selections for INTRA +#endif // CONFIG_EXT_TX + typedef enum { VP9_LAST_FLAG = 1 << 0, +#if CONFIG_EXT_REFS + VP9_LAST2_FLAG = 1 << 1, + VP9_LAST3_FLAG = 1 << 2, + VP9_LAST4_FLAG = 1 << 3, + VP9_GOLD_FLAG = 1 << 4, + VP9_ALT_FLAG = 1 << 5, +#else VP9_GOLD_FLAG = 1 << 1, VP9_ALT_FLAG = 1 << 2, +#endif // CONFIG_EXT_REFS } VP9_REFFRAME; typedef enum { @@ -111,6 +140,29 @@ PLANE_TYPES } PLANE_TYPE; +typedef enum { + TWO_COLORS, + THREE_COLORS, + FOUR_COLORS, + FIVE_COLORS, + SIX_COLORS, + SEVEN_COLORS, + EIGHT_COLORS, + PALETTE_SIZES +} PALETTE_SIZE; + +typedef enum { + PALETTE_COLOR_ONE, + PALETTE_COLOR_TWO, + PALETTE_COLOR_THREE, + PALETTE_COLOR_FOUR, + PALETTE_COLOR_FIVE, + PALETTE_COLOR_SIX, + PALETTE_COLOR_SEVEN, + PALETTE_COLOR_EIGHT, + PALETTE_COLORS +} PALETTE_COLOR; + #define DC_PRED 0 // Average of above and left pixels #define V_PRED 1 // Vertical #define H_PRED 2 // Horizontal @@ -130,18 +182,77 @@ #define INTRA_MODES (TM_PRED + 1) +#if CONFIG_EXT_INTRA +typedef enum { + FILTER_DC_PRED, + FILTER_V_PRED, + FILTER_H_PRED, + FILTER_D45_PRED, + FILTER_D135_PRED, + FILTER_D117_PRED, + FILTER_D153_PRED, + FILTER_D207_PRED, + FILTER_D63_PRED, + FILTER_TM_PRED, + EXT_INTRA_MODES, +} EXT_INTRA_MODE; + +#define FILTER_INTRA_MODES (FILTER_TM_PRED + 1) +#define DIRECTIONAL_MODES (INTRA_MODES - 2) +#endif // CONFIG_EXT_INTRA + #define INTER_MODES (1 + NEWMV - NEARESTMV) #define SKIP_CONTEXTS 3 + +#if CONFIG_REF_MV +#define NEWMV_MODE_CONTEXTS 7 +#define ZEROMV_MODE_CONTEXTS 2 +#define REFMV_MODE_CONTEXTS 9 + +#define ZEROMV_OFFSET 3 +#define REFMV_OFFSET 4 + +#define NEWMV_CTX_MASK ((1 << ZEROMV_OFFSET) - 1) +#define ZEROMV_CTX_MASK ((1 << (REFMV_OFFSET - ZEROMV_OFFSET)) - 1) +#define REFMV_CTX_MASK ((1 << (8 - REFMV_OFFSET)) - 1) + +#define ALL_ZERO_FLAG_OFFSET 8 +#define SKIP_NEARESTMV_OFFSET 9 +#define SKIP_NEARMV_OFFSET 10 +#define SKIP_NEARESTMV_SUB8X8_OFFSET 11 +#endif + #define INTER_MODE_CONTEXTS 7 /* Segment Feature Masks */ #define MAX_MV_REF_CANDIDATES 2 +#if CONFIG_REF_MV +#define MAX_REF_MV_STACK_SIZE 16 +#endif #define INTRA_INTER_CONTEXTS 4 #define COMP_INTER_CONTEXTS 5 #define REF_CONTEXTS 5 +#if CONFIG_VAR_TX +#define TXFM_PARTITION_CONTEXTS 9 +typedef TX_SIZE TXFM_CONTEXT; +#endif + +#if CONFIG_EXT_REFS +#define SINGLE_REFS 6 +#define COMP_REFS 5 +#else +#define SINGLE_REFS 3 +#define COMP_REFS 2 +#endif // CONFIG_EXT_REFS + +#if CONFIG_SUPERTX +#define PARTITION_SUPERTX_CONTEXTS 2 +#define MAX_SUPERTX_BLOCK_SIZE BLOCK_32X32 +#endif // CONFIG_SUPERTX + #ifdef __cplusplus } // extern "C" #endif
diff --git a/vp10/common/filter.c b/vp10/common/filter.c index dda279f..a9225b6 100644 --- a/vp10/common/filter.c +++ b/vp10/common/filter.c
@@ -32,9 +32,28 @@ { 0, 0, 0, 8, 120, 0, 0, 0 } }; -// Lagrangian interpolation filter DECLARE_ALIGNED(256, static const InterpKernel, sub_pel_filters_8[SUBPEL_SHIFTS]) = { +#if CONFIG_EXT_INTERP + // intfilt 0.575 + {0, 0, 0, 128, 0, 0, 0, 0}, + {0, 1, -5, 126, 8, -3, 1, 0}, + {-1, 3, -10, 123, 18, -6, 2, -1}, + {-1, 4, -14, 118, 27, -9, 3, 0}, + {-1, 5, -16, 112, 37, -12, 4, -1}, + {-1, 5, -18, 105, 48, -14, 4, -1}, + {-1, 6, -19, 97, 58, -17, 5, -1}, + {-1, 6, -20, 88, 68, -18, 6, -1}, + {-1, 6, -19, 78, 78, -19, 6, -1}, + {-1, 6, -18, 68, 88, -20, 6, -1}, + {-1, 5, -17, 58, 97, -19, 6, -1}, + {-1, 4, -14, 48, 105, -18, 5, -1}, + {-1, 4, -12, 37, 112, -16, 5, -1}, + {0, 3, -9, 27, 118, -14, 4, -1}, + {-1, 2, -6, 18, 123, -10, 3, -1}, + {0, 1, -3, 8, 126, -5, 1, 0}, +#else + // Lagrangian interpolation filter { 0, 0, 0, 128, 0, 0, 0, 0}, { 0, 1, -5, 126, 8, -3, 1, 0}, { -1, 3, -10, 122, 18, -6, 2, 0}, @@ -51,11 +70,31 @@ { -1, 3, -9, 27, 118, -13, 4, -1}, { 0, 2, -6, 18, 122, -10, 3, -1}, { 0, 1, -3, 8, 126, -5, 1, 0} +#endif // CONFIG_EXT_INTERP }; -// DCT based filter DECLARE_ALIGNED(256, static const InterpKernel, - sub_pel_filters_8s[SUBPEL_SHIFTS]) = { + sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = { +#if CONFIG_EXT_INTERP + // intfilt 0.8 + {0, 0, 0, 128, 0, 0, 0, 0}, + {-1, 2, -6, 127, 9, -4, 2, -1}, + {-2, 5, -12, 124, 18, -7, 4, -2}, + {-2, 7, -16, 119, 28, -11, 5, -2}, + {-3, 8, -19, 114, 38, -14, 7, -3}, + {-3, 9, -22, 107, 49, -17, 8, -3}, + {-4, 10, -23, 99, 60, -20, 10, -4}, + {-4, 11, -23, 90, 70, -22, 10, -4}, + {-4, 11, -23, 80, 80, -23, 11, -4}, + {-4, 10, -22, 70, 90, -23, 11, -4}, + {-4, 10, -20, 60, 99, -23, 10, -4}, + {-3, 8, -17, 49, 107, -22, 9, -3}, + {-3, 7, -14, 38, 114, -19, 8, -3}, + {-2, 5, -11, 28, 119, -16, 7, -2}, + {-2, 4, -7, 18, 124, -12, 5, -2}, + {-1, 2, -4, 9, 127, -6, 2, -1}, +#else + // DCT based filter {0, 0, 0, 128, 0, 0, 0, 0}, {-1, 3, -7, 127, 8, -3, 1, 0}, {-2, 5, -13, 125, 17, -6, 3, -1}, @@ -72,11 +111,58 @@ {-2, 5, -10, 27, 121, -17, 7, -3}, {-1, 3, -6, 17, 125, -13, 5, -2}, {0, 1, -3, 8, 127, -7, 3, -1} +#endif // CONFIG_EXT_INTERP }; -// freqmultiplier = 0.5 +#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4 + DECLARE_ALIGNED(256, static const InterpKernel, - sub_pel_filters_8lp[SUBPEL_SHIFTS]) = { + sub_pel_filters_8smooth2[SUBPEL_SHIFTS]) = { +// freqmultiplier = 0.35 + {0, 0, 0, 128, 0, 0, 0, 0}, + {-1, 8, 31, 47, 34, 10, 0, -1}, + {-1, 7, 29, 46, 36, 12, 0, -1}, + {-1, 6, 28, 46, 37, 13, 0, -1}, + {-1, 5, 26, 46, 38, 14, 1, -1}, + {-1, 4, 25, 45, 39, 16, 1, -1}, + {-1, 4, 23, 44, 41, 17, 1, -1}, + {-1, 3, 21, 44, 42, 18, 2, -1}, + {-1, 2, 20, 43, 43, 20, 2, -1}, + {-1, 2, 18, 42, 44, 21, 3, -1}, + {-1, 1, 17, 41, 44, 23, 4, -1}, + {-1, 1, 16, 39, 45, 25, 4, -1}, + {-1, 1, 14, 38, 46, 26, 5, -1}, + {-1, 0, 13, 37, 46, 28, 6, -1}, + {-1, 0, 12, 36, 46, 29, 7, -1}, + {-1, 0, 10, 34, 47, 31, 8, -1}, +}; + +DECLARE_ALIGNED(256, static const InterpKernel, + sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = { +// freqmultiplier = 0.75 + {0, 0, 0, 128, 0, 0, 0, 0}, + {2, -10, 19, 95, 31, -11, 2, 0}, + {2, -9, 14, 94, 37, -12, 2, 0}, + {2, -8, 9, 92, 43, -12, 1, 1}, + {2, -7, 5, 90, 49, -12, 1, 0}, + {2, -5, 1, 86, 55, -12, 0, 1}, + {1, -4, -2, 82, 61, -11, 0, 1}, + {1, -3, -5, 77, 67, -9, -1, 1}, + {1, -2, -7, 72, 72, -7, -2, 1}, + {1, -1, -9, 67, 77, -5, -3, 1}, + {1, 0, -11, 61, 82, -2, -4, 1}, + {1, 0, -12, 55, 86, 1, -5, 2}, + {0, 1, -12, 49, 90, 5, -7, 2}, + {1, 1, -12, 43, 92, 9, -8, 2}, + {0, 2, -12, 37, 94, 14, -9, 2}, + {0, 2, -11, 31, 95, 19, -10, 2}, +}; + +#else + +DECLARE_ALIGNED(256, static const InterpKernel, + sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = { +// freqmultiplier = 0.5 { 0, 0, 0, 128, 0, 0, 0, 0}, {-3, -1, 32, 64, 38, 1, -3, 0}, {-2, -2, 29, 63, 41, 2, -3, 0}, @@ -95,10 +181,14 @@ { 0, -3, 1, 38, 64, 32, -1, -3} }; +#endif // CONFIG_EXT_INTERP -const InterpKernel *vp10_filter_kernels[4] = { +const InterpKernel *vp10_filter_kernels[SWITCHABLE_FILTERS + 1] = { sub_pel_filters_8, - sub_pel_filters_8lp, - sub_pel_filters_8s, + sub_pel_filters_8smooth, + sub_pel_filters_8sharp, +#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4 + sub_pel_filters_8smooth2, +#endif bilinear_filters };
diff --git a/vp10/common/filter.h b/vp10/common/filter.h index 826cd03..de26b76 100644 --- a/vp10/common/filter.h +++ b/vp10/common/filter.h
@@ -24,16 +24,24 @@ #define EIGHTTAP 0 #define EIGHTTAP_SMOOTH 1 #define EIGHTTAP_SHARP 2 + +#if CONFIG_EXT_INTERP +#define SUPPORT_NONINTERPOLATING_FILTERS 0 /* turn it on for experimentation */ +#define EIGHTTAP_SMOOTH2 3 +#define SWITCHABLE_FILTERS 4 /* Number of switchable filters */ +#else #define SWITCHABLE_FILTERS 3 /* Number of switchable filters */ -#define BILINEAR 3 +#endif // CONFIG_EXT_INTERP // The codec can operate in four possible inter prediction filter mode: // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three. + +#define BILINEAR (SWITCHABLE_FILTERS) +#define SWITCHABLE (SWITCHABLE_FILTERS + 1) /* the last one */ #define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1) -#define SWITCHABLE 4 /* should be the last one */ typedef uint8_t INTERP_FILTER; -extern const InterpKernel *vp10_filter_kernels[4]; +extern const InterpKernel *vp10_filter_kernels[SWITCHABLE_FILTERS + 1]; #ifdef __cplusplus } // extern "C"
diff --git a/vp10/common/idct.c b/vp10/common/idct.c index 5ee15c8..d42f5f5 100644 --- a/vp10/common/idct.c +++ b/vp10/common/idct.c
@@ -13,107 +13,1138 @@ #include "./vp10_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "vp10/common/blockd.h" +#include "vp10/common/enums.h" #include "vp10/common/idct.h" #include "vpx_dsp/inv_txfm.h" #include "vpx_ports/mem.h" +#if CONFIG_EXT_TX +void idst4_c(const tran_low_t *input, tran_low_t *output) { +#if USE_DST2 + tran_low_t step[4]; + tran_high_t temp1, temp2; + // stage 1 + temp1 = (input[3] + input[1]) * cospi_16_64; + temp2 = (input[3] - input[1]) * cospi_16_64; + step[0] = WRAPLOW(dct_const_round_shift(temp1), 8); + step[1] = WRAPLOW(dct_const_round_shift(temp2), 8); + temp1 = input[2] * cospi_24_64 - input[0] * cospi_8_64; + temp2 = input[2] * cospi_8_64 + input[0] * cospi_24_64; + step[2] = WRAPLOW(dct_const_round_shift(temp1), 8); + step[3] = WRAPLOW(dct_const_round_shift(temp2), 8); + + // stage 2 + output[0] = WRAPLOW(step[0] + step[3], 8); + output[1] = WRAPLOW(-step[1] - step[2], 8); + output[2] = WRAPLOW(step[1] - step[2], 8); + output[3] = WRAPLOW(step[3] - step[0], 8); +#else + // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2) + static const int32_t sinvalue_lookup[] = { + 141124871, 228344838, + }; + int64_t sum; + int64_t s03 = (input[0] + input[3]); + int64_t d03 = (input[0] - input[3]); + int64_t s12 = (input[1] + input[2]); + int64_t d12 = (input[1] - input[2]); + sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1]; + output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0]; + output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0]; + output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1]; + output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); +#endif // USE_DST2 +} + +void idst8_c(const tran_low_t *input, tran_low_t *output) { +#if USE_DST2 + // vp9_igentx8(input, output, Tx8); + tran_low_t step1[8], step2[8]; + tran_high_t temp1, temp2; + // stage 1 + step1[0] = input[7]; + step1[2] = input[3]; + step1[1] = input[5]; + step1[3] = input[1]; + temp1 = input[6] * cospi_28_64 - input[0] * cospi_4_64; + temp2 = input[6] * cospi_4_64 + input[0] * cospi_28_64; + step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8); + step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8); + temp1 = input[2] * cospi_12_64 - input[4] * cospi_20_64; + temp2 = input[2] * cospi_20_64 + input[4] * cospi_12_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); + step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); + + // stage 2 + temp1 = (step1[0] + step1[2]) * cospi_16_64; + temp2 = (step1[0] - step1[2]) * cospi_16_64; + step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8); + temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64; + temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64; + step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[4] = WRAPLOW(step1[4] + step1[5], 8); + step2[5] = WRAPLOW(step1[4] - step1[5], 8); + step2[6] = WRAPLOW(-step1[6] + step1[7], 8); + step2[7] = WRAPLOW(step1[6] + step1[7], 8); + + // stage 3 + step1[0] = WRAPLOW(step2[0] + step2[3], 8); + step1[1] = WRAPLOW(step2[1] + step2[2], 8); + step1[2] = WRAPLOW(step2[1] - step2[2], 8); + step1[3] = WRAPLOW(step2[0] - step2[3], 8); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * cospi_16_64; + temp2 = (step2[5] + step2[6]) * cospi_16_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); + step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[7] = step2[7]; + + // stage 4 + output[0] = WRAPLOW(step1[0] + step1[7], 8); + output[1] = WRAPLOW(-step1[1] - step1[6], 8); + output[2] = WRAPLOW(step1[2] + step1[5], 8); + output[3] = WRAPLOW(-step1[3] - step1[4], 8); + output[4] = WRAPLOW(step1[3] - step1[4], 8); + output[5] = WRAPLOW(-step1[2] + step1[5], 8); + output[6] = WRAPLOW(step1[1] - step1[6], 8); + output[7] = WRAPLOW(-step1[0] + step1[7], 8); +#else + // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2 + static const int32_t sinvalue_lookup[] = { + 86559612, 162678858, 219176632, 249238470 + }; + int64_t sum; + int64_t s07 = (input[0] + input[7]); + int64_t d07 = (input[0] - input[7]); + int64_t s16 = (input[1] + input[6]); + int64_t d16 = (input[1] - input[6]); + int64_t s25 = (input[2] + input[5]); + int64_t d25 = (input[2] - input[5]); + int64_t s34 = (input[3] + input[4]); + int64_t d34 = (input[3] - input[4]); + sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] + + s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3]; + output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] + + d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0]; + output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = (s07 + s16 - s34)* sinvalue_lookup[2]; + output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] - + d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1]; + output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] - + s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1]; + output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = (d07 - d16 + d34)* sinvalue_lookup[2]; + output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] + + s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0]; + output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] + + d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3]; + output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); +#endif // USE_DST2 +} + +void idst16_c(const tran_low_t *input, tran_low_t *output) { +#if USE_DST2 + tran_low_t step1[16], step2[16]; + tran_high_t temp1, temp2; + + // stage 1 + step1[0] = input[15]; + step1[1] = input[7]; + step1[2] = input[11]; + step1[3] = input[3]; + step1[4] = input[13]; + step1[5] = input[5]; + step1[6] = input[9]; + step1[7] = input[1]; + step1[8] = input[14]; + step1[9] = input[6]; + step1[10] = input[10]; + step1[11] = input[2]; + step1[12] = input[12]; + step1[13] = input[4]; + step1[14] = input[8]; + step1[15] = input[0]; + + // stage 2 + step2[0] = step1[0]; + step2[1] = step1[1]; + step2[2] = step1[2]; + step2[3] = step1[3]; + step2[4] = step1[4]; + step2[5] = step1[5]; + step2[6] = step1[6]; + step2[7] = step1[7]; + + temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; + temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; + step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8); + + temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; + temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; + step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); + + temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; + temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; + step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + + temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; + temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; + step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); + + // stage 3 + step1[0] = step2[0]; + step1[1] = step2[1]; + step1[2] = step2[2]; + step1[3] = step2[3]; + + temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; + temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; + step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8); + step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8); + temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; + temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); + step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); + + step1[8] = WRAPLOW(step2[8] + step2[9], 8); + step1[9] = WRAPLOW(step2[8] - step2[9], 8); + step1[10] = WRAPLOW(-step2[10] + step2[11], 8); + step1[11] = WRAPLOW(step2[10] + step2[11], 8); + step1[12] = WRAPLOW(step2[12] + step2[13], 8); + step1[13] = WRAPLOW(step2[12] - step2[13], 8); + step1[14] = WRAPLOW(-step2[14] + step2[15], 8); + step1[15] = WRAPLOW(step2[14] + step2[15], 8); + + // stage 4 + temp1 = (step1[0] + step1[1]) * cospi_16_64; + temp2 = (step1[0] - step1[1]) * cospi_16_64; + step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8); + temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; + temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; + step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[4] = WRAPLOW(step1[4] + step1[5], 8); + step2[5] = WRAPLOW(step1[4] - step1[5], 8); + step2[6] = WRAPLOW(-step1[6] + step1[7], 8); + step2[7] = WRAPLOW(step1[6] + step1[7], 8); + + step2[8] = step1[8]; + step2[15] = step1[15]; + temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; + temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; + step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); + temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; + temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; + step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[11] = step1[11]; + step2[12] = step1[12]; + + // stage 5 + step1[0] = WRAPLOW(step2[0] + step2[3], 8); + step1[1] = WRAPLOW(step2[1] + step2[2], 8); + step1[2] = WRAPLOW(step2[1] - step2[2], 8); + step1[3] = WRAPLOW(step2[0] - step2[3], 8); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * cospi_16_64; + temp2 = (step2[5] + step2[6]) * cospi_16_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); + step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[7] = step2[7]; + + step1[8] = WRAPLOW(step2[8] + step2[11], 8); + step1[9] = WRAPLOW(step2[9] + step2[10], 8); + step1[10] = WRAPLOW(step2[9] - step2[10], 8); + step1[11] = WRAPLOW(step2[8] - step2[11], 8); + step1[12] = WRAPLOW(-step2[12] + step2[15], 8); + step1[13] = WRAPLOW(-step2[13] + step2[14], 8); + step1[14] = WRAPLOW(step2[13] + step2[14], 8); + step1[15] = WRAPLOW(step2[12] + step2[15], 8); + + // stage 6 + step2[0] = WRAPLOW(step1[0] + step1[7], 8); + step2[1] = WRAPLOW(step1[1] + step1[6], 8); + step2[2] = WRAPLOW(step1[2] + step1[5], 8); + step2[3] = WRAPLOW(step1[3] + step1[4], 8); + step2[4] = WRAPLOW(step1[3] - step1[4], 8); + step2[5] = WRAPLOW(step1[2] - step1[5], 8); + step2[6] = WRAPLOW(step1[1] - step1[6], 8); + step2[7] = WRAPLOW(step1[0] - step1[7], 8); + step2[8] = step1[8]; + step2[9] = step1[9]; + temp1 = (-step1[10] + step1[13]) * cospi_16_64; + temp2 = (step1[10] + step1[13]) * cospi_16_64; + step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + temp1 = (-step1[11] + step1[12]) * cospi_16_64; + temp2 = (step1[11] + step1[12]) * cospi_16_64; + step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[14] = step1[14]; + step2[15] = step1[15]; + + // stage 7 + output[0] = WRAPLOW(step2[0] + step2[15], 8); + output[1] = WRAPLOW(-step2[1] - step2[14], 8); + output[2] = WRAPLOW(step2[2] + step2[13], 8); + output[3] = WRAPLOW(-step2[3] - step2[12], 8); + output[4] = WRAPLOW(step2[4] + step2[11], 8); + output[5] = WRAPLOW(-step2[5] - step2[10], 8); + output[6] = WRAPLOW(step2[6] + step2[9], 8); + output[7] = WRAPLOW(-step2[7] - step2[8], 8); + output[8] = WRAPLOW(step2[7] - step2[8], 8); + output[9] = WRAPLOW(-step2[6] + step2[9], 8); + output[10] = WRAPLOW(step2[5] - step2[10], 8); + output[11] = WRAPLOW(-step2[4] + step2[11], 8); + output[12] = WRAPLOW(step2[3] - step2[12], 8); + output[13] = WRAPLOW(-step2[2] + step2[13], 8); + output[14] = WRAPLOW(step2[1] - step2[14], 8); + output[15] = WRAPLOW(-step2[0] + step2[15], 8); +#else + // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2) + static const int32_t sinvalue_lookup[] = { + 47852167, 94074787, 137093803, 175444254, + 207820161, 233119001, 250479254, 259309736 + }; + int64_t sum; + int64_t s015 = (input[0] + input[15]); + int64_t d015 = (input[0] - input[15]); + int64_t s114 = (input[1] + input[14]); + int64_t d114 = (input[1] - input[14]); + int64_t s213 = (input[2] + input[13]); + int64_t d213 = (input[2] - input[13]); + int64_t s312 = (input[3] + input[12]); + int64_t d312 = (input[3] - input[12]); + int64_t s411 = (input[4] + input[11]); + int64_t d411 = (input[4] - input[11]); + int64_t s510 = (input[5] + input[10]); + int64_t d510 = (input[5] - input[10]); + int64_t s69 = (input[6] + input[9]); + int64_t d69 = (input[6] - input[9]); + int64_t s78 = (input[7] + input[8]); + int64_t d78 = (input[7] - input[8]); + sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] + + s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] + + s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] + + s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7]; + output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] + + d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] + + d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] + + d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0]; + output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] + + s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] + + s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] - + s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6]; + output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] + + d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] - + d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] - + d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1]; + output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] + + s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] - + s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] + + s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5]; + output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] - + d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] - + d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] + + d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2]; + output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] - + s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] + + s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] + + s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4]; + output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] - + d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] + + d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] - + d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3]; + output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] - + s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] + + s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] - + s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3]; + output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] - + d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] + + d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] + + d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4]; + output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] - + s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] - + s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] + + s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2]; + output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] + + d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] - + d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] + + d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5]; + output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] + + s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] - + s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] - + s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1]; + output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] + + d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] + + d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] - + d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6]; + output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] + + s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] + + s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] + + s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0]; + output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] + + d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] + + d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] + + d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7]; + output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); +#endif // USE_DST2 +} + +// Inverse identiy transform and add. +static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride, + int bs) { + int r, c; + const int shift = bs < 32 ? 3 : 2; + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) + dest[c] = clip_pixel_add(dest[c], input[c] >> shift); + dest += stride; + input += bs; + } +} + +#define FLIPUD_PTR(dest, stride, size) do { \ + (dest) = (dest) + ((size) - 1) * (stride); \ + (stride) = - (stride); \ +} while (0) + +static void maybe_flip_strides(uint8_t **dst, int *dstride, + tran_low_t **src, int *sstride, + int tx_type, int size) { + // Note that the transpose of src will be added to dst. In order to LR + // flip the addends (in dst coordinates), we UD flip the src. To UD flip + // the addends, we UD flip the dst. + switch (tx_type) { + case DCT_DCT: + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + break; + case FLIPADST_DCT: + case FLIPADST_ADST: + case FLIPADST_DST: + // flip UD + FLIPUD_PTR(*dst, *dstride, size); + break; + case DCT_FLIPADST: + case ADST_FLIPADST: + case DST_FLIPADST: + // flip LR + FLIPUD_PTR(*src, *sstride, size); + break; + case FLIPADST_FLIPADST: + // flip UD + FLIPUD_PTR(*dst, *dstride, size); + // flip LR + FLIPUD_PTR(*src, *sstride, size); + break; + default: + assert(0); + break; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) { +#if USE_DST2 + tran_low_t step[4]; + tran_high_t temp1, temp2; + (void) bd; + // stage 1 + temp1 = (input[3] + input[1]) * cospi_16_64; + temp2 = (input[3] - input[1]) * cospi_16_64; + step[0] = WRAPLOW(dct_const_round_shift(temp1), bd); + step[1] = WRAPLOW(dct_const_round_shift(temp2), bd); + temp1 = input[2] * cospi_24_64 - input[0] * cospi_8_64; + temp2 = input[2] * cospi_8_64 + input[0] * cospi_24_64; + step[2] = WRAPLOW(dct_const_round_shift(temp1), bd); + step[3] = WRAPLOW(dct_const_round_shift(temp2), bd); + + // stage 2 + output[0] = WRAPLOW(step[0] + step[3], bd); + output[1] = WRAPLOW(-step[1] - step[2], bd); + output[2] = WRAPLOW(step[1] - step[2], bd); + output[3] = WRAPLOW(step[3] - step[0], bd); +#else + // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2) + static const int32_t sinvalue_lookup[] = { + 141124871, 228344838, + }; + int64_t sum; + int64_t s03 = (input[0] + input[3]); + int64_t d03 = (input[0] - input[3]); + int64_t s12 = (input[1] + input[2]); + int64_t d12 = (input[1] - input[2]); + +#if !CONFIG_EMULATE_HARDWARE + (void)bd; +#endif + + sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1]; + output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0]; + output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0]; + output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1]; + output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); +#endif // USE_DST2 +} + +void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) { +#if USE_DST2 + tran_low_t step1[8], step2[8]; + tran_high_t temp1, temp2; + (void) bd; + // stage 1 + step1[0] = input[7]; + step1[2] = input[3]; + step1[1] = input[5]; + step1[3] = input[1]; + temp1 = input[6] * cospi_28_64 - input[0] * cospi_4_64; + temp2 = input[6] * cospi_4_64 + input[0] * cospi_28_64; + step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); + step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); + temp1 = input[2] * cospi_12_64 - input[4] * cospi_20_64; + temp2 = input[2] * cospi_20_64 + input[4] * cospi_12_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); + step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); + + // stage 2 + temp1 = (step1[0] + step1[2]) * cospi_16_64; + temp2 = (step1[0] - step1[2]) * cospi_16_64; + step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd); + step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd); + temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64; + temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64; + step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd); + step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd); + step2[4] = WRAPLOW(step1[4] + step1[5], bd); + step2[5] = WRAPLOW(step1[4] - step1[5], bd); + step2[6] = WRAPLOW(-step1[6] + step1[7], bd); + step2[7] = WRAPLOW(step1[6] + step1[7], bd); + + // stage 3 + step1[0] = WRAPLOW(step2[0] + step2[3], bd); + step1[1] = WRAPLOW(step2[1] + step2[2], bd); + step1[2] = WRAPLOW(step2[1] - step2[2], bd); + step1[3] = WRAPLOW(step2[0] - step2[3], bd); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * cospi_16_64; + temp2 = (step2[5] + step2[6]) * cospi_16_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); + step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); + step1[7] = step2[7]; + + // stage 4 + output[0] = WRAPLOW(step1[0] + step1[7], bd); + output[1] = WRAPLOW(-step1[1] - step1[6], bd); + output[2] = WRAPLOW(step1[2] + step1[5], bd); + output[3] = WRAPLOW(-step1[3] - step1[4], bd); + output[4] = WRAPLOW(step1[3] - step1[4], bd); + output[5] = WRAPLOW(-step1[2] + step1[5], bd); + output[6] = WRAPLOW(step1[1] - step1[6], bd); + output[7] = WRAPLOW(-step1[0] + step1[7], bd); +#else + // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2 + static const int32_t sinvalue_lookup[] = { + 86559612, 162678858, 219176632, 249238470 + }; + int64_t sum; + int64_t s07 = (input[0] + input[7]); + int64_t d07 = (input[0] - input[7]); + int64_t s16 = (input[1] + input[6]); + int64_t d16 = (input[1] - input[6]); + int64_t s25 = (input[2] + input[5]); + int64_t d25 = (input[2] - input[5]); + int64_t s34 = (input[3] + input[4]); + int64_t d34 = (input[3] - input[4]); + +#if !CONFIG_EMULATE_HARDWARE + (void)bd; +#endif + + sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] + + s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3]; + output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] + + d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0]; + output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = (s07 + s16 - s34)* sinvalue_lookup[2]; + output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] - + d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1]; + output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] - + s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1]; + output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = (d07 - d16 + d34)* sinvalue_lookup[2]; + output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] + + s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0]; + output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] + + d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3]; + output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); +#endif // USE_DST2 +} + +void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) { +#if USE_DST2 + // vp9_highbd_igentx16(input, output, bd, Tx16); + tran_low_t step1[16], step2[16]; + tran_high_t temp1, temp2; + (void) bd; + + // stage 1 + step1[0] = input[15]; + step1[1] = input[7]; + step1[2] = input[11]; + step1[3] = input[3]; + step1[4] = input[13]; + step1[5] = input[5]; + step1[6] = input[9]; + step1[7] = input[1]; + step1[8] = input[14]; + step1[9] = input[6]; + step1[10] = input[10]; + step1[11] = input[2]; + step1[12] = input[12]; + step1[13] = input[4]; + step1[14] = input[8]; + step1[15] = input[0]; + + // stage 2 + step2[0] = step1[0]; + step2[1] = step1[1]; + step2[2] = step1[2]; + step2[3] = step1[3]; + step2[4] = step1[4]; + step2[5] = step1[5]; + step2[6] = step1[6]; + step2[7] = step1[7]; + + temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; + temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; + step2[8] = WRAPLOW(dct_const_round_shift(temp1), bd); + step2[15] = WRAPLOW(dct_const_round_shift(temp2), bd); + + temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; + temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; + step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); + step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); + + temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; + temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; + step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); + step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); + + temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; + temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; + step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); + step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); + + // stage 3 + step1[0] = step2[0]; + step1[1] = step2[1]; + step1[2] = step2[2]; + step1[3] = step2[3]; + + temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; + temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; + step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); + step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); + temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; + temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); + step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); + + step1[8] = WRAPLOW(step2[8] + step2[9], bd); + step1[9] = WRAPLOW(step2[8] - step2[9], bd); + step1[10] = WRAPLOW(-step2[10] + step2[11], bd); + step1[11] = WRAPLOW(step2[10] + step2[11], bd); + step1[12] = WRAPLOW(step2[12] + step2[13], bd); + step1[13] = WRAPLOW(step2[12] - step2[13], bd); + step1[14] = WRAPLOW(-step2[14] + step2[15], bd); + step1[15] = WRAPLOW(step2[14] + step2[15], bd); + + // stage 4 + temp1 = (step1[0] + step1[1]) * cospi_16_64; + temp2 = (step1[0] - step1[1]) * cospi_16_64; + step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd); + step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd); + temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; + temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; + step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd); + step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd); + step2[4] = WRAPLOW(step1[4] + step1[5], bd); + step2[5] = WRAPLOW(step1[4] - step1[5], bd); + step2[6] = WRAPLOW(-step1[6] + step1[7], bd); + step2[7] = WRAPLOW(step1[6] + step1[7], bd); + + step2[8] = step1[8]; + step2[15] = step1[15]; + temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; + temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; + step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); + step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); + temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; + temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; + step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); + step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); + step2[11] = step1[11]; + step2[12] = step1[12]; + + // stage 5 + step1[0] = WRAPLOW(step2[0] + step2[3], bd); + step1[1] = WRAPLOW(step2[1] + step2[2], bd); + step1[2] = WRAPLOW(step2[1] - step2[2], bd); + step1[3] = WRAPLOW(step2[0] - step2[3], bd); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * cospi_16_64; + temp2 = (step2[5] + step2[6]) * cospi_16_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); + step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); + step1[7] = step2[7]; + + step1[8] = WRAPLOW(step2[8] + step2[11], bd); + step1[9] = WRAPLOW(step2[9] + step2[10], bd); + step1[10] = WRAPLOW(step2[9] - step2[10], bd); + step1[11] = WRAPLOW(step2[8] - step2[11], bd); + step1[12] = WRAPLOW(-step2[12] + step2[15], bd); + step1[13] = WRAPLOW(-step2[13] + step2[14], bd); + step1[14] = WRAPLOW(step2[13] + step2[14], bd); + step1[15] = WRAPLOW(step2[12] + step2[15], bd); + + // stage 6 + step2[0] = WRAPLOW(step1[0] + step1[7], bd); + step2[1] = WRAPLOW(step1[1] + step1[6], bd); + step2[2] = WRAPLOW(step1[2] + step1[5], bd); + step2[3] = WRAPLOW(step1[3] + step1[4], bd); + step2[4] = WRAPLOW(step1[3] - step1[4], bd); + step2[5] = WRAPLOW(step1[2] - step1[5], bd); + step2[6] = WRAPLOW(step1[1] - step1[6], bd); + step2[7] = WRAPLOW(step1[0] - step1[7], bd); + step2[8] = step1[8]; + step2[9] = step1[9]; + temp1 = (-step1[10] + step1[13]) * cospi_16_64; + temp2 = (step1[10] + step1[13]) * cospi_16_64; + step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); + step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); + temp1 = (-step1[11] + step1[12]) * cospi_16_64; + temp2 = (step1[11] + step1[12]) * cospi_16_64; + step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); + step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); + step2[14] = step1[14]; + step2[15] = step1[15]; + + // stage 7 + output[0] = WRAPLOW(step2[0] + step2[15], bd); + output[1] = WRAPLOW(-step2[1] - step2[14], bd); + output[2] = WRAPLOW(step2[2] + step2[13], bd); + output[3] = WRAPLOW(-step2[3] - step2[12], bd); + output[4] = WRAPLOW(step2[4] + step2[11], bd); + output[5] = WRAPLOW(-step2[5] - step2[10], bd); + output[6] = WRAPLOW(step2[6] + step2[9], bd); + output[7] = WRAPLOW(-step2[7] - step2[8], bd); + output[8] = WRAPLOW(step2[7] - step2[8], bd); + output[9] = WRAPLOW(-step2[6] + step2[9], bd); + output[10] = WRAPLOW(step2[5] - step2[10], bd); + output[11] = WRAPLOW(-step2[4] + step2[11], bd); + output[12] = WRAPLOW(step2[3] - step2[12], bd); + output[13] = WRAPLOW(-step2[2] + step2[13], bd); + output[14] = WRAPLOW(step2[1] - step2[14], bd); + output[15] = WRAPLOW(-step2[0] + step2[15], bd); +#else + // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2) + static const int32_t sinvalue_lookup[] = { + 47852167, 94074787, 137093803, 175444254, + 207820161, 233119001, 250479254, 259309736 + }; + int64_t sum; + int64_t s015 = (input[0] + input[15]); + int64_t d015 = (input[0] - input[15]); + int64_t s114 = (input[1] + input[14]); + int64_t d114 = (input[1] - input[14]); + int64_t s213 = (input[2] + input[13]); + int64_t d213 = (input[2] - input[13]); + int64_t s312 = (input[3] + input[12]); + int64_t d312 = (input[3] - input[12]); + int64_t s411 = (input[4] + input[11]); + int64_t d411 = (input[4] - input[11]); + int64_t s510 = (input[5] + input[10]); + int64_t d510 = (input[5] - input[10]); + int64_t s69 = (input[6] + input[9]); + int64_t d69 = (input[6] - input[9]); + int64_t s78 = (input[7] + input[8]); + int64_t d78 = (input[7] - input[8]); + +#if !CONFIG_EMULATE_HARDWARE + (void)bd; +#endif + + sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] + + s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] + + s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] + + s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7]; + output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] + + d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] + + d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] + + d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0]; + output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] + + s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] + + s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] - + s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6]; + output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] + + d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] - + d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] - + d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1]; + output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] + + s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] - + s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] + + s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5]; + output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] - + d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] - + d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] + + d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2]; + output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] - + s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] + + s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] + + s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4]; + output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] - + d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] + + d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] - + d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3]; + output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] - + s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] + + s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] - + s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3]; + output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] - + d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] + + d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] + + d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4]; + output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] - + s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] - + s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] + + s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2]; + output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] + + d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] - + d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] + + d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5]; + output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] + + s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] - + s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] - + s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1]; + output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] + + d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] + + d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] - + d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6]; + output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] + + s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] + + s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] + + s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0]; + output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] + + d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] + + d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] + + d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7]; + output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); +#endif // USE_DST2 +} + +static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bs, int bd) { + int r, c; + const int shift = bs < 32 ? 3 : 2; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) + dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd); + dest += stride; + input += bs; + } +} + +static void maybe_flip_strides16(uint16_t **dst, int *dstride, + tran_low_t **src, int *sstride, + int tx_type, int size) { + // Note that the transpose of src will be added to dst. In order to LR + // flip the addends (in dst coordinates), we UD flip the src. To UD flip + // the addends, we UD flip the dst. + switch (tx_type) { + case DCT_DCT: + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + break; + case FLIPADST_DCT: + case FLIPADST_ADST: + case FLIPADST_DST: + // flip UD + FLIPUD_PTR(*dst, *dstride, size); + break; + case DCT_FLIPADST: + case ADST_FLIPADST: + case DST_FLIPADST: + // flip LR + FLIPUD_PTR(*src, *sstride, size); + break; + case FLIPADST_FLIPADST: + // flip UD + FLIPUD_PTR(*dst, *dstride, size); + // flip LR + FLIPUD_PTR(*src, *sstride, size); + break; + default: + assert(0); + break; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_EXT_TX + void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { - const transform_2d IHT_4[] = { - { idct4_c, idct4_c }, // DCT_DCT = 0 - { iadst4_c, idct4_c }, // ADST_DCT = 1 - { idct4_c, iadst4_c }, // DCT_ADST = 2 - { iadst4_c, iadst4_c } // ADST_ADST = 3 + int tx_type) { + static const transform_2d IHT_4[] = { + { idct4_c, idct4_c }, // DCT_DCT = 0, + { iadst4_c, idct4_c }, // ADST_DCT = 1, + { idct4_c, iadst4_c }, // DCT_ADST = 2, + { iadst4_c, iadst4_c }, // ADST_ADST = 3, +#if CONFIG_EXT_TX + { iadst4_c, idct4_c }, // FLIPADST_DCT = 4, + { idct4_c, iadst4_c }, // DCT_FLIPADST = 5, + { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST = 6, + { iadst4_c, iadst4_c }, // ADST_FLIPADST = 7, + { iadst4_c, iadst4_c }, // FLIPADST_ADST = 8, + { idst4_c, idct4_c }, // DST_DCT = 9, + { idct4_c, idst4_c }, // DCT_DST = 10, + { idst4_c, iadst4_c }, // DST_ADST = 11, + { iadst4_c, idst4_c }, // ADST_DST = 12, + { idst4_c, iadst4_c }, // DST_FLIPADST = 13, + { iadst4_c, idst4_c }, // FLIPADST_DST = 14, + { idst4_c, idst4_c }, // DST_DST = 15 +#endif // CONFIG_EXT_TX }; int i, j; - tran_low_t out[4 * 4]; - tran_low_t *outptr = out; - tran_low_t temp_in[4], temp_out[4]; + tran_low_t tmp; + tran_low_t out[4][4]; + tran_low_t *outp = &out[0][0]; + int outstride = 4; // inverse transform row vectors for (i = 0; i < 4; ++i) { - IHT_4[tx_type].rows(input, outptr); + IHT_4[tx_type].rows(input, out[i]); input += 4; - outptr += 4; + } + + // transpose + for (i = 1 ; i < 4; i++) { + for (j = 0; j < i; j++) { + tmp = out[i][j]; + out[i][j] = out[j][i]; + out[j][i] = tmp; + } } // inverse transform column vectors for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; - IHT_4[tx_type].cols(temp_in, temp_out); + IHT_4[tx_type].cols(out[i], out[i]); + } + +#if CONFIG_EXT_TX + maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4); +#endif + + // Sum with the destination + for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 4)); + int d = i * stride + j; + int s = j * outstride + i; + dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4)); } } } -static const transform_2d IHT_8[] = { - { idct8_c, idct8_c }, // DCT_DCT = 0 - { iadst8_c, idct8_c }, // ADST_DCT = 1 - { idct8_c, iadst8_c }, // DCT_ADST = 2 - { iadst8_c, iadst8_c } // ADST_ADST = 3 -}; - void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { + static const transform_2d IHT_8[] = { + { idct8_c, idct8_c }, // DCT_DCT = 0, + { iadst8_c, idct8_c }, // ADST_DCT = 1, + { idct8_c, iadst8_c }, // DCT_ADST = 2, + { iadst8_c, iadst8_c }, // ADST_ADST = 3, +#if CONFIG_EXT_TX + { iadst8_c, idct8_c }, // FLIPADST_DCT = 4, + { idct8_c, iadst8_c }, // DCT_FLIPADST = 5, + { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST = 6, + { iadst8_c, iadst8_c }, // ADST_FLIPADST = 7, + { iadst8_c, iadst8_c }, // FLIPADST_ADST = 8, + { idst8_c, idct8_c }, // DST_DCT = 9, + { idct8_c, idst8_c }, // DCT_DST = 10, + { idst8_c, iadst8_c }, // DST_ADST = 11, + { iadst8_c, idst8_c }, // ADST_DST = 12, + { idst8_c, iadst8_c }, // DST_FLIPADST = 13, + { iadst8_c, idst8_c }, // FLIPADST_DST = 14, + { idst8_c, idst8_c }, // DST_DST = 15 +#endif // CONFIG_EXT_TX + }; + int i, j; - tran_low_t out[8 * 8]; - tran_low_t *outptr = out; - tran_low_t temp_in[8], temp_out[8]; - const transform_2d ht = IHT_8[tx_type]; + tran_low_t tmp; + tran_low_t out[8][8]; + tran_low_t *outp = &out[0][0]; + int outstride = 8; // inverse transform row vectors for (i = 0; i < 8; ++i) { - ht.rows(input, outptr); - input += 8; - outptr += 8; + IHT_8[tx_type].rows(input, out[i]); + input += 8; + } + + // transpose + for (i = 1 ; i < 8; i++) { + for (j = 0; j < i; j++) { + tmp = out[i][j]; + out[i][j] = out[j][i]; + out[j][i] = tmp; + } } // inverse transform column vectors for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - ht.cols(temp_in, temp_out); + IHT_8[tx_type].cols(out[i], out[i]); + } + +#if CONFIG_EXT_TX + maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8); +#endif + + // Sum with the destination + for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 5)); + int d = i * stride + j; + int s = j * outstride + i; + dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); } } } -static const transform_2d IHT_16[] = { - { idct16_c, idct16_c }, // DCT_DCT = 0 - { iadst16_c, idct16_c }, // ADST_DCT = 1 - { idct16_c, iadst16_c }, // DCT_ADST = 2 - { iadst16_c, iadst16_c } // ADST_ADST = 3 -}; - void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { - int i, j; - tran_low_t out[16 * 16]; - tran_low_t *outptr = out; - tran_low_t temp_in[16], temp_out[16]; - const transform_2d ht = IHT_16[tx_type]; + static const transform_2d IHT_16[] = { + { idct16_c, idct16_c }, // DCT_DCT = 0, + { iadst16_c, idct16_c }, // ADST_DCT = 1, + { idct16_c, iadst16_c }, // DCT_ADST = 2, + { iadst16_c, iadst16_c }, // ADST_ADST = 3, +#if CONFIG_EXT_TX + { iadst16_c, idct16_c }, // FLIPADST_DCT = 4, + { idct16_c, iadst16_c }, // DCT_FLIPADST = 5, + { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST = 6, + { iadst16_c, iadst16_c }, // ADST_FLIPADST = 7, + { iadst16_c, iadst16_c }, // FLIPADST_ADST = 8, + { idst16_c, idct16_c }, // DST_DCT = 9, + { idct16_c, idst16_c }, // DCT_DST = 10, + { idst16_c, iadst16_c }, // DST_ADST = 11, + { iadst16_c, idst16_c }, // ADST_DST = 12, + { idst16_c, iadst16_c }, // DST_FLIPADST = 13, + { iadst16_c, idst16_c }, // FLIPADST_DST = 14, + { idst16_c, idst16_c }, // DST_DST = 15 +#endif // CONFIG_EXT_TX + }; - // Rows + int i, j; + tran_low_t tmp; + tran_low_t out[16][16]; + tran_low_t *outp = &out[0][0]; + int outstride = 16; + + // inverse transform row vectors for (i = 0; i < 16; ++i) { - ht.rows(input, outptr); - input += 16; - outptr += 16; + IHT_16[tx_type].rows(input, out[i]); + input += 16; } - // Columns + // transpose + for (i = 1 ; i < 16; i++) { + for (j = 0; j < i; j++) { + tmp = out[i][j]; + out[i][j] = out[j][i]; + out[j][i] = tmp; + } + } + + // inverse transform column vectors for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - ht.cols(temp_in, temp_out); + IHT_16[tx_type].cols(out[i], out[i]); + } + +#if CONFIG_EXT_TX + maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16); +#endif + + // Sum with the destination + for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); + int d = i * stride + j; + int s = j * outstride + i; + dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); } } } @@ -183,20 +1214,43 @@ if (lossless) { assert(tx_type == DCT_DCT); vp10_iwht4x4_add(input, dest, stride, eob); - } else { - switch (tx_type) { - case DCT_DCT: - vp10_idct4x4_add(input, dest, stride, eob); - break; - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - vp10_iht4x4_16_add(input, dest, stride, tx_type); - break; - default: - assert(0); - break; - } + return; + } + + switch (tx_type) { + case DCT_DCT: + vp10_idct4x4_add(input, dest, stride, eob); + break; + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + vp10_iht4x4_16_add(input, dest, stride, tx_type); + break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + case DCT_FLIPADST: + case FLIPADST_FLIPADST: + case ADST_FLIPADST: + case FLIPADST_ADST: + vp10_iht4x4_16_add(input, dest, stride, tx_type); + break; + case DST_DST: + case DST_DCT: + case DCT_DST: + case DST_ADST: + case ADST_DST: + case FLIPADST_DST: + case DST_FLIPADST: + // Use C version since DST only exists in C code + vp10_iht4x4_16_add_c(input, dest, stride, tx_type); + break; + case IDTX: + inv_idtx_add_c(input, dest, stride, 4); + break; +#endif // CONFIG_EXT_TX + default: + assert(0); + break; } } @@ -211,6 +1265,28 @@ case ADST_ADST: vp10_iht8x8_64_add(input, dest, stride, tx_type); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + case DCT_FLIPADST: + case FLIPADST_FLIPADST: + case ADST_FLIPADST: + case FLIPADST_ADST: + vp10_iht8x8_64_add(input, dest, stride, tx_type); + break; + case DST_DST: + case DST_DCT: + case DCT_DST: + case DST_ADST: + case ADST_DST: + case FLIPADST_DST: + case DST_FLIPADST: + // Use C version since DST only exists in C code + vp10_iht8x8_64_add_c(input, dest, stride, tx_type); + break; + case IDTX: + inv_idtx_add_c(input, dest, stride, 8); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -228,6 +1304,28 @@ case ADST_ADST: vp10_iht16x16_256_add(input, dest, stride, tx_type); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + case DCT_FLIPADST: + case FLIPADST_FLIPADST: + case ADST_FLIPADST: + case FLIPADST_ADST: + vp10_iht16x16_256_add(input, dest, stride, tx_type); + break; + case DST_DST: + case DST_DCT: + case DCT_DST: + case DST_ADST: + case ADST_DST: + case FLIPADST_DST: + case DST_FLIPADST: + // Use C version since DST only exists in C code + vp10_iht16x16_256_add_c(input, dest, stride, tx_type); + break; + case IDTX: + inv_idtx_add_c(input, dest, stride, 16); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -240,6 +1338,11 @@ case DCT_DCT: vp10_idct32x32_add(input, dest, stride, eob); break; +#if CONFIG_EXT_TX + case IDTX: + inv_idtx_add_c(input, dest, stride, 32); + break; +#endif // CONFIG_EXT_TX case ADST_DCT: case DCT_ADST: case ADST_ADST: @@ -254,104 +1357,198 @@ #if CONFIG_VP9_HIGHBITDEPTH void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int tx_type, int bd) { - const highbd_transform_2d IHT_4[] = { - { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0 - { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1 - { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2 - { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3 + static const highbd_transform_2d HIGH_IHT_4[] = { + { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0, + { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1, + { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2, + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_ADST = 3, +#if CONFIG_EXT_TX + { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT = 4, + { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST = 5, + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST = 6, + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST = 7, + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST = 8, + { highbd_idst4_c, vpx_highbd_idct4_c }, // DST_DCT = 9, + { vpx_highbd_idct4_c, highbd_idst4_c }, // DCT_DST = 10, + { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_ADST = 11, + { vpx_highbd_iadst4_c, highbd_idst4_c }, // ADST_DST = 12, + { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_FLIPADST = 13, + { vpx_highbd_iadst4_c, highbd_idst4_c }, // FLIPADST_DST = 14, + { highbd_idst4_c, highbd_idst4_c }, // DST_DST = 15 +#endif // CONFIG_EXT_TX }; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); int i, j; - tran_low_t out[4 * 4]; - tran_low_t *outptr = out; - tran_low_t temp_in[4], temp_out[4]; + tran_low_t tmp; + tran_low_t out[4][4]; + tran_low_t *outp = &out[0][0]; + int outstride = 4; - // Inverse transform row vectors. + // inverse transform row vectors for (i = 0; i < 4; ++i) { - IHT_4[tx_type].rows(input, outptr, bd); + HIGH_IHT_4[tx_type].rows(input, out[i], bd); input += 4; - outptr += 4; } - // Inverse transform column vectors. + // transpose + for (i = 1 ; i < 4; i++) { + for (j = 0; j < i; j++) { + tmp = out[i][j]; + out[i][j] = out[j][i]; + out[j][i] = tmp; + } + } + + // inverse transform column vectors for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; - IHT_4[tx_type].cols(temp_in, temp_out, bd); + HIGH_IHT_4[tx_type].cols(out[i], out[i], bd); + } + +#if CONFIG_EXT_TX + maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4); +#endif + + // Sum with the destination + for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); + int d = i * stride + j; + int s = j * outstride + i; + dest[d] = highbd_clip_pixel_add(dest[d], + ROUND_POWER_OF_TWO(outp[s], 4), bd); } } } -static const highbd_transform_2d HIGH_IHT_8[] = { - { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0 - { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1 - { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2 - { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3 -}; - void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int tx_type, int bd) { - int i, j; - tran_low_t out[8 * 8]; - tran_low_t *outptr = out; - tran_low_t temp_in[8], temp_out[8]; - const highbd_transform_2d ht = HIGH_IHT_8[tx_type]; + static const highbd_transform_2d HIGH_IHT_8[] = { + { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0, + { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1, + { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2, + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_ADST = 3, +#if CONFIG_EXT_TX + { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT = 4, + { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST = 5, + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST = 6, + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST = 7, + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST = 8, + { highbd_idst8_c, vpx_highbd_idct8_c }, // DST_DCT = 9, + { vpx_highbd_idct8_c, highbd_idst8_c }, // DCT_DST = 10, + { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_ADST = 11, + { vpx_highbd_iadst8_c, highbd_idst8_c }, // ADST_DST = 12, + { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_FLIPADST = 13, + { vpx_highbd_iadst8_c, highbd_idst8_c }, // FLIPADST_DST = 14, + { highbd_idst8_c, highbd_idst8_c }, // DST_DST = 15 +#endif // CONFIG_EXT_TX + }; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - // Inverse transform row vectors. + int i, j; + tran_low_t tmp; + tran_low_t out[8][8]; + tran_low_t *outp = &out[0][0]; + int outstride = 8; + + // inverse transform row vectors for (i = 0; i < 8; ++i) { - ht.rows(input, outptr, bd); - input += 8; - outptr += 8; + HIGH_IHT_8[tx_type].rows(input, out[i], bd); + input += 8; } - // Inverse transform column vectors. + // transpose + for (i = 1 ; i < 8; i++) { + for (j = 0; j < i; j++) { + tmp = out[i][j]; + out[i][j] = out[j][i]; + out[j][i] = tmp; + } + } + + // inverse transform column vectors for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - ht.cols(temp_in, temp_out, bd); + HIGH_IHT_8[tx_type].cols(out[i], out[i], bd); + } + +#if CONFIG_EXT_TX + maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8); +#endif + + // Sum with the destination + for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); + int d = i * stride + j; + int s = j * outstride + i; + dest[d] = highbd_clip_pixel_add(dest[d], + ROUND_POWER_OF_TWO(outp[s], 5), bd); } } } -static const highbd_transform_2d HIGH_IHT_16[] = { - { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0 - { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1 - { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2 - { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3 -}; - void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int tx_type, int bd) { - int i, j; - tran_low_t out[16 * 16]; - tran_low_t *outptr = out; - tran_low_t temp_in[16], temp_out[16]; - const highbd_transform_2d ht = HIGH_IHT_16[tx_type]; + static const highbd_transform_2d HIGH_IHT_16[] = { + { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0, + { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1, + { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2, + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_ADST = 3, +#if CONFIG_EXT_TX + { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT = 4, + { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST = 5, + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST = 6, + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST = 7, + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST = 8, + { highbd_idst16_c, vpx_highbd_idct16_c }, // DST_DCT = 9, + { vpx_highbd_idct16_c, highbd_idst16_c }, // DCT_DST = 10, + { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_ADST = 11, + { vpx_highbd_iadst16_c, highbd_idst16_c }, // ADST_DST = 12, + { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_FLIPADST = 13, + { vpx_highbd_iadst16_c, highbd_idst16_c }, // FLIPADST_DST = 14, + { highbd_idst16_c, highbd_idst16_c }, // DST_DST = 15 +#endif // CONFIG_EXT_TX + }; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - // Rows + int i, j; + tran_low_t tmp; + tran_low_t out[16][16]; + tran_low_t *outp = &out[0][0]; + int outstride = 16; + + // inverse transform row vectors for (i = 0; i < 16; ++i) { - ht.rows(input, outptr, bd); - input += 16; - outptr += 16; + HIGH_IHT_16[tx_type].rows(input, out[i], bd); + input += 16; } - // Columns + // transpose + for (i = 1 ; i < 16; i++) { + for (j = 0; j < i; j++) { + tmp = out[i][j]; + out[i][j] = out[j][i]; + out[j][i] = tmp; + } + } + + // inverse transform column vectors for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - ht.cols(temp_in, temp_out, bd); + HIGH_IHT_16[tx_type].cols(out[i], out[i], bd); + } + +#if CONFIG_EXT_TX + maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 16); +#endif + + // Sum with the destination + for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); + int d = i * stride + j; + int s = j * outstride + i; + dest[d] = highbd_clip_pixel_add(dest[d], + ROUND_POWER_OF_TWO(outp[s], 6), bd); } } } @@ -425,20 +1622,43 @@ if (lossless) { assert(tx_type == DCT_DCT); vp10_highbd_iwht4x4_add(input, dest, stride, eob, bd); - } else { - switch (tx_type) { - case DCT_DCT: - vp10_highbd_idct4x4_add(input, dest, stride, eob, bd); - break; - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); - break; - default: - assert(0); - break; - } + return; + } + + switch (tx_type) { + case DCT_DCT: + vp10_highbd_idct4x4_add(input, dest, stride, eob, bd); + break; + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); + break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + case DCT_FLIPADST: + case FLIPADST_FLIPADST: + case ADST_FLIPADST: + case FLIPADST_ADST: + vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); + break; + case DST_DST: + case DST_DCT: + case DCT_DST: + case DST_ADST: + case ADST_DST: + case FLIPADST_DST: + case DST_FLIPADST: + // Use C version since DST only exists in C code + vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd); + break; + case IDTX: + highbd_inv_idtx_add_c(input, dest, stride, 4, bd); + break; +#endif // CONFIG_EXT_TX + default: + assert(0); + break; } } @@ -454,6 +1674,28 @@ case ADST_ADST: vp10_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + case DCT_FLIPADST: + case FLIPADST_FLIPADST: + case ADST_FLIPADST: + case FLIPADST_ADST: + vp10_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd); + break; + case DST_DST: + case DST_DCT: + case DCT_DST: + case DST_ADST: + case ADST_DST: + case FLIPADST_DST: + case DST_FLIPADST: + // Use C version since DST only exists in C code + vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd); + break; + case IDTX: + highbd_inv_idtx_add_c(input, dest, stride, 8, bd); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -472,6 +1714,28 @@ case ADST_ADST: vp10_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + case DCT_FLIPADST: + case FLIPADST_FLIPADST: + case ADST_FLIPADST: + case FLIPADST_ADST: + vp10_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); + break; + case DST_DST: + case DST_DCT: + case DCT_DST: + case DST_ADST: + case ADST_DST: + case FLIPADST_DST: + case DST_FLIPADST: + // Use C version since DST only exists in C code + vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd); + break; + case IDTX: + highbd_inv_idtx_add_c(input, dest, stride, 16, bd); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -485,6 +1749,11 @@ case DCT_DCT: vp10_highbd_idct32x32_add(input, dest, stride, eob, bd); break; +#if CONFIG_EXT_TX + case IDTX: + highbd_inv_idtx_add_c(input, dest, stride, 32, bd); + break; +#endif // CONFIG_EXT_TX case ADST_DCT: case DCT_ADST: case ADST_ADST: @@ -496,3 +1765,66 @@ } } #endif // CONFIG_VP9_HIGHBITDEPTH + +void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride, + INV_TXFM_PARAM *inv_txfm_param) { + const TX_TYPE tx_type = inv_txfm_param->tx_type; + const TX_SIZE tx_size = inv_txfm_param->tx_size; + const int eob = inv_txfm_param->eob; + const int lossless = inv_txfm_param->lossless; + + switch (tx_size) { + case TX_32X32: + vp10_inv_txfm_add_32x32(input, dest, stride, eob, tx_type); + break; + case TX_16X16: + vp10_inv_txfm_add_16x16(input, dest, stride, eob, tx_type); + break; + case TX_8X8: + vp10_inv_txfm_add_8x8(input, dest, stride, eob, tx_type); + break; + case TX_4X4: + // this is like vp10_short_idct4x4 but has a special case around eob<=1 + // which is significant (not just an optimization) for the lossless + // case. + vp10_inv_txfm_add_4x4(input, dest, stride, eob, tx_type, + lossless); + break; + default: + assert(0 && "Invalid transform size"); + break; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride, + INV_TXFM_PARAM *inv_txfm_param) { + const TX_TYPE tx_type = inv_txfm_param->tx_type; + const TX_SIZE tx_size = inv_txfm_param->tx_size; + const int eob = inv_txfm_param->eob; + const int bd = inv_txfm_param->bd; + const int lossless = inv_txfm_param->lossless; + + switch (tx_size) { + case TX_32X32: + vp10_highbd_inv_txfm_add_32x32(input, dest, stride, eob, bd, tx_type); + break; + case TX_16X16: + vp10_highbd_inv_txfm_add_16x16(input, dest, stride, eob, bd, tx_type); + break; + case TX_8X8: + vp10_highbd_inv_txfm_add_8x8(input, dest, stride, eob, bd, tx_type); + break; + case TX_4X4: + // this is like vp10_short_idct4x4 but has a special case around eob<=1 + // which is significant (not just an optimization) for the lossless + // case. + vp10_highbd_inv_txfm_add_4x4(input, dest, stride, eob, bd, tx_type, + lossless); + break; + default: + assert(0 && "Invalid transform size"); + break; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp10/common/idct.h b/vp10/common/idct.h index 0883398..31b26b8 100644 --- a/vp10/common/idct.h +++ b/vp10/common/idct.h
@@ -24,6 +24,16 @@ extern "C" { #endif +typedef struct INV_TXFM_PARAM { + TX_TYPE tx_type; + TX_SIZE tx_size; + int eob; + int lossless; +#if CONFIG_VP9_HIGHBITDEPTH + int bd; +#endif +} INV_TXFM_PARAM; + typedef void (*transform_1d)(const tran_low_t*, tran_low_t*); typedef struct { @@ -51,7 +61,8 @@ int stride, int eob, TX_TYPE tx_type); void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, int stride, int eob, TX_TYPE tx_type); - +void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride, + INV_TXFM_PARAM *inv_txfm_param); #if CONFIG_VP9_HIGHBITDEPTH void vp10_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd); @@ -74,6 +85,8 @@ void vp10_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd, TX_TYPE tx_type); +void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride, + INV_TXFM_PARAM *inv_txfm_param); #endif // CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus } // extern "C"
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index a1925de..20d724d 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c
@@ -719,11 +719,7 @@ uint64_t *const int_4x4_y = &lfm->int_4x4_y; uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; -#if CONFIG_MISC_FIXES uint16_t *const int_4x4_uv = &lfm->left_int_4x4_uv; -#else - uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; -#endif int i; // If filter level is 0 we don't loop filter. @@ -758,13 +754,8 @@ // If the block has no coefficients and is not intra we skip applying // the loop filter on block edges. -#if CONFIG_MISC_FIXES if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) return; -#else - if (mbmi->skip && is_inter_block(mbmi)) - return; -#endif // Here we are adding a mask for the transform size. The transform // size mask is set to be correct for a 64x64 prediction block size. We @@ -796,10 +787,18 @@ // we only update u and v masks on the first block. static void build_y_mask(const loop_filter_info_n *const lfi_n, const MODE_INFO *mi, const int shift_y, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif // CONFIG_SUPERTX LOOP_FILTER_MASK *lfm) { const MB_MODE_INFO *mbmi = &mi->mbmi; - const BLOCK_SIZE block_size = mbmi->sb_type; const TX_SIZE tx_size_y = mbmi->tx_size; +#if CONFIG_SUPERTX + const BLOCK_SIZE block_size = + supertx_enabled ? (BLOCK_SIZE)(3 * tx_size_y) : mbmi->sb_type; +#else + const BLOCK_SIZE block_size = mbmi->sb_type; +#endif const int filter_level = get_filter_level(lfi_n, mbmi); uint64_t *const left_y = &lfm->left_y[tx_size_y]; uint64_t *const above_y = &lfm->above_y[tx_size_y]; @@ -821,13 +820,8 @@ *above_y |= above_prediction_mask[block_size] << shift_y; *left_y |= left_prediction_mask[block_size] << shift_y; -#if CONFIG_MISC_FIXES if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) return; -#else - if (mbmi->skip && is_inter_block(mbmi)) - return; -#endif *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y]) << shift_y; @@ -913,6 +907,10 @@ break; case BLOCK_32X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); +#if CONFIG_SUPERTX + if (supertx_enabled(&mip[0]->mbmi)) + break; +#endif if (mi_32_row_offset + 2 >= max_rows) continue; mip2 = mip + mode_info_stride * 2; @@ -920,12 +918,22 @@ break; case BLOCK_16X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); +#if CONFIG_SUPERTX + if (supertx_enabled(&mip[0]->mbmi)) + break; +#endif if (mi_32_col_offset + 2 >= max_cols) continue; mip2 = mip + 2; build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); break; default: +#if CONFIG_SUPERTX + if (mip[0]->mbmi.tx_size == TX_32X32) { + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + break; + } +#endif for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; @@ -942,23 +950,45 @@ build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; case BLOCK_16X8: +#if CONFIG_SUPERTX + if (supertx_enabled(&mip[0]->mbmi)) + break; +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_16_row_offset + 1 >= max_rows) continue; mip2 = mip + mode_info_stride; - build_y_mask(lfi_n, mip2[0], shift_y+8, lfm); + build_y_mask(lfi_n, mip2[0], shift_y+8, +#if CONFIG_SUPERTX + 0, +#endif + lfm); break; case BLOCK_8X16: +#if CONFIG_SUPERTX + if (supertx_enabled(&mip[0]->mbmi)) + break; +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_16_col_offset +1 >= max_cols) continue; mip2 = mip + 1; - build_y_mask(lfi_n, mip2[0], shift_y+1, lfm); + build_y_mask(lfi_n, mip2[0], shift_y+1, +#if CONFIG_SUPERTX + 0, +#endif + lfm); break; default: { const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0]; +#if CONFIG_SUPERTX + if (mip[0]->mbmi.tx_size == TX_16X16) { + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + break; + } +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); mip += offset[0]; for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { @@ -973,7 +1003,11 @@ if (mi_8_col_offset >= max_cols || mi_8_row_offset >= max_rows) continue; - build_y_mask(lfi_n, mip[0], shift_y, lfm); + build_y_mask(lfi_n, mip[0], shift_y, +#if CONFIG_SUPERTX + supertx_enabled(&mip[0]->mbmi), +#endif + lfm); } break; } @@ -1019,11 +1053,7 @@ lfm->above_uv[i] &= mask_uv; } lfm->int_4x4_y &= mask_y; -#if CONFIG_MISC_FIXES lfm->above_int_4x4_uv = lfm->left_int_4x4_uv & mask_uv; -#else - lfm->int_4x4_uv &= mask_uv; -#endif // We don't apply a wide loop filter on the last uv block row. If set // apply the shorter one instead. @@ -1057,11 +1087,7 @@ lfm->above_uv[i] &= mask_uv; } lfm->int_4x4_y &= mask_y; -#if CONFIG_MISC_FIXES lfm->left_int_4x4_uv &= mask_uv_int; -#else - lfm->int_4x4_uv &= mask_uv_int; -#endif // We don't apply a wide loop filter on the last uv column. If set // apply the shorter one instead. @@ -1091,11 +1117,7 @@ assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8])); assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); -#if CONFIG_MISC_FIXES assert(!(lfm->left_int_4x4_uv & lfm->left_uv[TX_16X16])); -#else - assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); -#endif assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); @@ -1103,11 +1125,7 @@ assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); -#if CONFIG_MISC_FIXES assert(!(lfm->above_int_4x4_uv & lfm->above_uv[TX_16X16])); -#else - assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); -#endif } static void filter_selectively_vert(uint8_t *s, int pitch, @@ -1183,9 +1201,9 @@ #endif // CONFIG_VP9_HIGHBITDEPTH void vp10_filter_block_plane_non420(VP10_COMMON *cm, - struct macroblockd_plane *plane, - MODE_INFO **mi_8x8, - int mi_row, int mi_col) { + struct macroblockd_plane *plane, + MODE_INFO **mi_8x8, + int mi_row, int mi_col) { const int ss_x = plane->subsampling_x; const int ss_y = plane->subsampling_y; const int row_step = 1 << ss_y; @@ -1209,49 +1227,103 @@ // Determine the vertical edges that need filtering for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { const MODE_INFO *mi = mi_8x8[c]; - const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type; - const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi); + const MB_MODE_INFO *mbmi = &mi[0].mbmi; + const BLOCK_SIZE sb_type = mbmi->sb_type; + const int skip_this = mbmi->skip && is_inter_block(mbmi); + const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1); + const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1); + // left edge of current unit is block/partition edge -> no skip const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? - !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; + !blk_col : 1; const int skip_this_c = skip_this && !block_edge_left; // top edge of current unit is block/partition edge -> no skip const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? - !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; + !blk_row : 1; const int skip_this_r = skip_this && !block_edge_above; + +#if CONFIG_VAR_TX + TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV) + ? get_uv_tx_size(mbmi, plane) : mbmi->tx_size; +#else const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV) - ? get_uv_tx_size(&mi[0].mbmi, plane) - : mi[0].mbmi.tx_size; + ? get_uv_tx_size(mbmi, plane) + : mbmi->tx_size; +#endif + const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; + TX_SIZE tx_size_c = tx_size; + TX_SIZE tx_size_r = tx_size; + + int tx_size_mask = 0; // Filter level can vary per MI if (!(lfl[(r << 3) + (c >> ss_x)] = - get_filter_level(&cm->lf_info, &mi[0].mbmi))) + get_filter_level(&cm->lf_info, mbmi))) continue; + if (tx_size == TX_32X32) + tx_size_mask = 3; + else if (tx_size == TX_16X16) + tx_size_mask = 1; + else + tx_size_mask = 0; + +#if CONFIG_VAR_TX + if (is_inter_block(mbmi) && !mbmi->skip) + tx_size = (plane->plane_type == PLANE_TYPE_UV) ? + get_uv_tx_size_impl(mbmi->inter_tx_size[blk_row * 8 + blk_col], + sb_type, ss_x, ss_y) : + mbmi->inter_tx_size[blk_row * 8 + blk_col]; + + tx_size_r = VPXMIN(tx_size, cm->above_txfm_context[mi_col + c]); + tx_size_c = VPXMIN(tx_size, cm->left_txfm_context[(mi_row + r) & 0x07]); + + cm->above_txfm_context[mi_col + c] = tx_size; + cm->left_txfm_context[(mi_row + r) & 0x07] = tx_size; +#endif + // Build masks based on the transform size of each block - if (tx_size == TX_32X32) { - if (!skip_this_c && ((c >> ss_x) & 3) == 0) { + // handle vertical mask + if (tx_size_c == TX_32X32) { + if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) { if (!skip_border_4x4_c) mask_16x16_c |= 1 << (c >> ss_x); else mask_8x8_c |= 1 << (c >> ss_x); } - if (!skip_this_r && ((r >> ss_y) & 3) == 0) { + } else if (tx_size_c == TX_16X16) { + if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) { + if (!skip_border_4x4_c) + mask_16x16_c |= 1 << (c >> ss_x); + else + mask_8x8_c |= 1 << (c >> ss_x); + } + } else { + // force 8x8 filtering on 32x32 boundaries + if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) { + if (tx_size_c == TX_8X8 || ((c >> ss_x) & 3) == 0) + mask_8x8_c |= 1 << (c >> ss_x); + else + mask_4x4_c |= 1 << (c >> ss_x); + } + + if (!skip_this && tx_size_c < TX_8X8 && !skip_border_4x4_c && + ((c >> ss_x) & tx_size_mask) == 0) + mask_4x4_int[r] |= 1 << (c >> ss_x); + } + + // set horizontal mask + if (tx_size_r == TX_32X32) { + if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) { if (!skip_border_4x4_r) mask_16x16[r] |= 1 << (c >> ss_x); else mask_8x8[r] |= 1 << (c >> ss_x); } - } else if (tx_size == TX_16X16) { - if (!skip_this_c && ((c >> ss_x) & 1) == 0) { - if (!skip_border_4x4_c) - mask_16x16_c |= 1 << (c >> ss_x); - else - mask_8x8_c |= 1 << (c >> ss_x); - } - if (!skip_this_r && ((r >> ss_y) & 1) == 0) { + } else if (tx_size_r == TX_16X16) { + if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) { if (!skip_border_4x4_r) mask_16x16[r] |= 1 << (c >> ss_x); else @@ -1259,21 +1331,15 @@ } } else { // force 8x8 filtering on 32x32 boundaries - if (!skip_this_c) { - if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0) - mask_8x8_c |= 1 << (c >> ss_x); - else - mask_4x4_c |= 1 << (c >> ss_x); - } - - if (!skip_this_r) { - if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0) + if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) { + if (tx_size_r == TX_8X8 || ((r >> ss_y) & 3) == 0) mask_8x8[r] |= 1 << (c >> ss_x); else mask_4x4[r] |= 1 << (c >> ss_x); } - if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c) + if (!skip_this && tx_size_r < TX_8X8 && !skip_border_4x4_c && + ((r >> ss_y) & tx_size_mask) == 0) mask_4x4_int[r] |= 1 << (c >> ss_x); } } @@ -1462,11 +1528,7 @@ uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; -#if CONFIG_MISC_FIXES uint16_t mask_4x4_int = lfm->left_int_4x4_uv; -#else - uint16_t mask_4x4_int = lfm->int_4x4_uv; -#endif assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); @@ -1518,11 +1580,7 @@ mask_16x16 = lfm->above_uv[TX_16X16]; mask_8x8 = lfm->above_uv[TX_8X8]; mask_4x4 = lfm->above_uv[TX_4X4]; -#if CONFIG_MISC_FIXES mask_4x4_int = lfm->above_int_4x4_uv; -#else - mask_4x4_int = lfm->int_4x4_uv; -#endif for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; @@ -1568,13 +1626,14 @@ } void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, - VP10_COMMON *cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int start, int stop, int y_only) { + VP10_COMMON *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int start, int stop, int y_only) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; + int mi_row, mi_col; +#if !CONFIG_VAR_TX enum lf_path path; LOOP_FILTER_MASK lfm; - int mi_row, mi_col; if (y_only) path = LF_PATH_444; @@ -1584,19 +1643,29 @@ path = LF_PATH_444; else path = LF_PATH_SLOW; +#endif +#if CONFIG_VAR_TX + memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols); +#endif for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; - +#if CONFIG_VAR_TX + memset(cm->left_txfm_context, TX_SIZES, 8); +#endif for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { int plane; vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); +#if CONFIG_VAR_TX + for (plane = 0; plane < num_planes; ++plane) + vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, + mi_row, mi_col); +#else // TODO(JBB): Make setup_mask work for non 420. vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm); - vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); for (plane = 1; plane < num_planes; ++plane) { switch (path) { @@ -1612,6 +1681,7 @@ break; } } +#endif } } }
diff --git a/vp10/common/loopfilter.h b/vp10/common/loopfilter.h index 8db705a..3d76439 100644 --- a/vp10/common/loopfilter.h +++ b/vp10/common/loopfilter.h
@@ -43,7 +43,8 @@ uint8_t mode_ref_delta_enabled; uint8_t mode_ref_delta_update; - // 0 = Intra, Last, GF, ARF + // 0 = Intra, Last, Last2+Last3+LAST4(CONFIG_EXT_REFS), + // GF, ARF signed char ref_deltas[MAX_REF_FRAMES]; signed char last_ref_deltas[MAX_REF_FRAMES]; @@ -80,12 +81,8 @@ uint64_t int_4x4_y; uint16_t left_uv[TX_SIZES]; uint16_t above_uv[TX_SIZES]; -#if CONFIG_MISC_FIXES uint16_t left_int_4x4_uv; uint16_t above_int_4x4_uv; -#else - uint16_t int_4x4_uv; -#endif uint8_t lfl_y[64]; uint8_t lfl_uv[16]; } LOOP_FILTER_MASK;
diff --git a/vp10/common/mv.h b/vp10/common/mv.h index b4971a5..289c591 100644 --- a/vp10/common/mv.h +++ b/vp10/common/mv.h
@@ -34,6 +34,13 @@ int32_t col; } MV32; +#if CONFIG_REF_MV +typedef struct candidate_mv { + int_mv this_mv; + int weight; +} CANDIDATE_MV; +#endif + static INLINE int is_zero_mv(const MV *mv) { return *((const uint32_t *)mv) == 0; } @@ -48,6 +55,9 @@ mv->row = clamp(mv->row, min_row, max_row); } +static INLINE int mv_has_subpel(const MV *mv) { + return (mv->row & 0x0F) || (mv->col & 0x0F); +} #ifdef __cplusplus } // extern "C" #endif
diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c index 1ef80c2..319ef4a 100644 --- a/vp10/common/mvref_common.c +++ b/vp10/common/mvref_common.c
@@ -11,6 +11,427 @@ #include "vp10/common/mvref_common.h" +#if CONFIG_REF_MV +static uint8_t scan_row_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd, + const int mi_row, const int mi_col, int block, + const MV_REFERENCE_FRAME ref_frame, + int row_offset, + CANDIDATE_MV *ref_mv_stack, + uint8_t *refmv_count) { + const TileInfo *const tile = &xd->tile; + int i; + uint8_t newmv_count = 0; + + for (i = 0; i < xd->n8_w && *refmv_count < MAX_REF_MV_STACK_SIZE;) { + POSITION mi_pos; + mi_pos.row = row_offset; + mi_pos.col = i; + + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, &mi_pos)) { + const MODE_INFO *const candidate_mi = + xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col]; + const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; + const int len = VPXMIN(xd->n8_w, + num_8x8_blocks_wide_lookup[candidate->sb_type]); + const int weight = len; + int index = 0, ref; + + for (ref = 0; ref < 2; ++ref) { + if (candidate->ref_frame[ref] == ref_frame) { + int_mv this_refmv = + get_sub_block_mv(candidate_mi, ref, mi_pos.col, block); + for (index = 0; index < *refmv_count; ++index) + if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) + break; + + if (index < *refmv_count) + ref_mv_stack[index].weight += weight; + + // Add a new item to the list. + if (index == *refmv_count) { + ref_mv_stack[index].this_mv = this_refmv; + ref_mv_stack[index].weight = weight; + ++(*refmv_count); + + if (candidate->mode == NEWMV) + ++newmv_count; + } + } + } + i += len; + } else { + ++i; + } + } + + return newmv_count; +} + +static uint8_t scan_col_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd, + const int mi_row, const int mi_col, int block, + const MV_REFERENCE_FRAME ref_frame, + int col_offset, + CANDIDATE_MV *ref_mv_stack, + uint8_t *refmv_count) { + const TileInfo *const tile = &xd->tile; + int i; + uint8_t newmv_count = 0; + + for (i = 0; i < xd->n8_h && *refmv_count < MAX_REF_MV_STACK_SIZE;) { + POSITION mi_pos; + mi_pos.row = i; + mi_pos.col = col_offset; + + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, &mi_pos)) { + const MODE_INFO *const candidate_mi = + xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col]; + const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; + const int len = VPXMIN(xd->n8_h, + num_8x8_blocks_high_lookup[candidate->sb_type]); + const int weight = len; + int index = 0, ref; + + for (ref = 0; ref < 2; ++ref) { + if (candidate->ref_frame[ref] == ref_frame) { + int_mv this_refmv = + get_sub_block_mv(candidate_mi, ref, mi_pos.col, block); + for (index = 0; index < *refmv_count; ++index) + if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) + break; + + if (index < *refmv_count) + ref_mv_stack[index].weight += weight; + + if (index == *refmv_count) { + ref_mv_stack[index].this_mv = this_refmv; + ref_mv_stack[index].weight = weight; + ++(*refmv_count); + + if (candidate->mode == NEWMV) + ++newmv_count; + } + } + } + i += len; + } else { + ++i; + } + } + + return newmv_count; +} + +static uint8_t scan_blk_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd, + const int mi_row, const int mi_col, int block, + const MV_REFERENCE_FRAME ref_frame, + int row_offset, int col_offset, + CANDIDATE_MV *ref_mv_stack, + uint8_t *refmv_count) { + const TileInfo *const tile = &xd->tile; + POSITION mi_pos; + uint8_t newmv_count = 0; + + mi_pos.row = row_offset; + mi_pos.col = col_offset; + + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, &mi_pos) && + *refmv_count < MAX_REF_MV_STACK_SIZE) { + const MODE_INFO *const candidate_mi = + xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col]; + const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; + const int len = 1; + const int weight = len; + int index = 0, ref; + + for (ref = 0; ref < 2; ++ref) { + if (candidate->ref_frame[ref] == ref_frame) { + int_mv this_refmv = + get_sub_block_mv(candidate_mi, ref, mi_pos.col, block); + for (index = 0; index < *refmv_count; ++index) + if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) + break; + + if (index < *refmv_count) + ref_mv_stack[index].weight += weight; + + if (index == *refmv_count) { + ref_mv_stack[index].this_mv = this_refmv; + ref_mv_stack[index].weight = weight; + ++(*refmv_count); + + if (candidate->mode == NEWMV) + ++newmv_count; + } + + if (candidate_mi->mbmi.sb_type < BLOCK_8X8 && block >= 0) { + int alt_block = 3 - block; + this_refmv = + get_sub_block_mv(candidate_mi, ref, mi_pos.col, alt_block); + for (index = 0; index < *refmv_count; ++index) + if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) + break; + + if (index < *refmv_count) + ref_mv_stack[index].weight += weight; + + // Add a new item to the list. + if (index == *refmv_count) { + ref_mv_stack[index].this_mv = this_refmv; + ref_mv_stack[index].weight = weight; + ++(*refmv_count); + } + } + } + } + } // Analyze a single 8x8 block motion information. + return newmv_count; +} + +static int has_top_right(const MACROBLOCKD *xd, + int mi_row, int mi_col, int bs) { + int has_tr = !((mi_row & bs) & (bs * 2 - 1)) || + !((mi_col & bs) & (bs * 2 - 1)); + + // Filter out partial right-most boundaries + if ((mi_col & bs) & (bs * 2 - 1)) { + if (((mi_col & (2 * bs)) & (bs * 4 - 1)) && + ((mi_row & (2 * bs)) & (bs * 4 - 1))) + has_tr = 0; + } + + if (has_tr) + if (((mi_col + xd->n8_w) & 0x07) == 0) + if ((mi_row & 0x07) > 0) + has_tr = 0; + + if (xd->n8_w < xd->n8_h) + if (!xd->is_sec_rect) + has_tr = 1; + + if (xd->n8_w > xd->n8_h) + if (xd->is_sec_rect) + has_tr = 0; + + return has_tr; +} + +static void handle_sec_rect_block(const MB_MODE_INFO * const candidate, + uint8_t refmv_count, + CANDIDATE_MV *ref_mv_stack, + MV_REFERENCE_FRAME ref_frame, + int16_t *mode_context) { + int rf, idx; + + for (rf = 0; rf < 2; ++rf) { + if (candidate->ref_frame[rf] == ref_frame) { + const int list_range = VPXMIN(refmv_count, MAX_MV_REF_CANDIDATES); + + const int_mv pred_mv = candidate->mv[rf]; + for (idx = 0; idx < list_range; ++idx) + if (pred_mv.as_int == ref_mv_stack[idx].this_mv.as_int) + break; + + if (idx < list_range) { + if (idx == 0) + mode_context[ref_frame] |= (1 << SKIP_NEARESTMV_OFFSET); + else if (idx == 1) + mode_context[ref_frame] |= (1 << SKIP_NEARMV_OFFSET); + } + } + } +} + +static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd, + MV_REFERENCE_FRAME ref_frame, + uint8_t *refmv_count, + CANDIDATE_MV *ref_mv_stack, + int_mv *mv_ref_list, + int block, int mi_row, int mi_col, + int16_t *mode_context) { + int idx, nearest_refmv_count = 0; + uint8_t newmv_count = 0; + + CANDIDATE_MV tmp_mv; + int len, nr_len; + + const MV_REF *const prev_frame_mvs_base = cm->use_prev_frame_mvs ? + cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; + + int bs = VPXMAX(xd->n8_w, xd->n8_h); + int has_tr = has_top_right(xd, mi_row, mi_col, bs); + + mode_context[ref_frame] = 0; + *refmv_count = 0; + + // Scan the first above row mode info. + newmv_count = scan_row_mbmi(cm, xd, mi_row, mi_col, block, ref_frame, + -1, ref_mv_stack, refmv_count); + // Scan the first left column mode info. + newmv_count += scan_col_mbmi(cm, xd, mi_row, mi_col, block, ref_frame, + -1, ref_mv_stack, refmv_count); + + // Check top-right boundary + if (has_tr) + newmv_count += scan_blk_mbmi(cm, xd, mi_row, mi_col, block, ref_frame, + -1, 1, ref_mv_stack, refmv_count); + + nearest_refmv_count = *refmv_count; + + if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame) { + int ref; + int blk_row, blk_col; + + for (blk_row = 0; blk_row < xd->n8_h; ++blk_row) { + for (blk_col = 0; blk_col < xd->n8_w; ++blk_col) { + const MV_REF *prev_frame_mvs = + prev_frame_mvs_base + blk_row * cm->mi_cols + blk_col; + + POSITION mi_pos; + mi_pos.row = blk_row; + mi_pos.col = blk_col; + + if (!is_inside(&xd->tile, mi_col, mi_row, cm->mi_rows, &mi_pos)) + continue; + + for (ref = 0; ref < 2; ++ref) { + if (prev_frame_mvs->ref_frame[ref] == ref_frame) { + for (idx = 0; idx < *refmv_count; ++idx) + if (prev_frame_mvs->mv[ref].as_int == + ref_mv_stack[idx].this_mv.as_int) + break; + + if (idx < *refmv_count) + ref_mv_stack[idx].weight += 1; + + if (idx == *refmv_count && + *refmv_count < MAX_REF_MV_STACK_SIZE) { + ref_mv_stack[idx].this_mv.as_int = prev_frame_mvs->mv[ref].as_int; + ref_mv_stack[idx].weight = 1; + ++(*refmv_count); + + if (abs(ref_mv_stack[idx].this_mv.as_mv.row) >= 8 || + abs(ref_mv_stack[idx].this_mv.as_mv.col) >= 8) + mode_context[ref_frame] |= (1 << ZEROMV_OFFSET); + } + } + } + } + } + } + + if (*refmv_count == nearest_refmv_count) + mode_context[ref_frame] |= (1 << ZEROMV_OFFSET); + + // Analyze the top-left corner block mode info. +// scan_blk_mbmi(cm, xd, mi_row, mi_col, block, ref_frame, +// -1, -1, ref_mv_stack, refmv_count); + + // Scan the second outer area. + scan_row_mbmi(cm, xd, mi_row, mi_col, block, ref_frame, + -2, ref_mv_stack, refmv_count); + scan_col_mbmi(cm, xd, mi_row, mi_col, block, ref_frame, + -2, ref_mv_stack, refmv_count); + + // Scan the third outer area. + scan_row_mbmi(cm, xd, mi_row, mi_col, block, ref_frame, + -3, ref_mv_stack, refmv_count); + scan_col_mbmi(cm, xd, mi_row, mi_col, block, ref_frame, + -3, ref_mv_stack, refmv_count); + + // Scan the fourth outer area. + scan_row_mbmi(cm, xd, mi_row, mi_col, block, ref_frame, + -4, ref_mv_stack, refmv_count); + // Scan the third left row mode info. + scan_col_mbmi(cm, xd, mi_row, mi_col, block, ref_frame, + -4, ref_mv_stack, refmv_count); + + switch (nearest_refmv_count) { + case 0: + mode_context[ref_frame] |= 0; + if (*refmv_count >= 1) + mode_context[ref_frame] |= 1; + + if (*refmv_count == 1) + mode_context[ref_frame] |= (1 << REFMV_OFFSET); + else if (*refmv_count >= 2) + mode_context[ref_frame] |= (2 << REFMV_OFFSET); + break; + case 1: + mode_context[ref_frame] |= (newmv_count > 0) ? 2 : 3; + + if (*refmv_count == 1) + mode_context[ref_frame] |= (3 << REFMV_OFFSET); + else if (*refmv_count >= 2) + mode_context[ref_frame] |= (4 << REFMV_OFFSET); + break; + + case 2: + default: + if (newmv_count >= 2) + mode_context[ref_frame] |= 4; + else if (newmv_count == 1) + mode_context[ref_frame] |= 5; + else + mode_context[ref_frame] |= 6; + + mode_context[ref_frame] |= (5 << REFMV_OFFSET); + break; + } + + // Rank the likelihood and assign nearest and near mvs. + len = nearest_refmv_count; + while (len > 0) { + nr_len = 0; + for (idx = 1; idx < len; ++idx) { + if (ref_mv_stack[idx - 1].weight < ref_mv_stack[idx].weight) { + tmp_mv = ref_mv_stack[idx - 1]; + ref_mv_stack[idx - 1] = ref_mv_stack[idx]; + ref_mv_stack[idx] = tmp_mv; + nr_len = idx; + } + } + len = nr_len; + } + + len = *refmv_count; + while (len > nearest_refmv_count) { + nr_len = nearest_refmv_count; + for (idx = nearest_refmv_count + 1; idx < len; ++idx) { + if (ref_mv_stack[idx - 1].weight < ref_mv_stack[idx].weight) { + tmp_mv = ref_mv_stack[idx - 1]; + ref_mv_stack[idx - 1] = ref_mv_stack[idx]; + ref_mv_stack[idx] = tmp_mv; + nr_len = idx; + } + } + len = nr_len; + } + + // TODO(jingning): Clean-up needed. + if (xd->is_sec_rect) { + if (xd->n8_w < xd->n8_h) { + const MODE_INFO *const candidate_mi = xd->mi[-1]; + const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; + handle_sec_rect_block(candidate, nearest_refmv_count, ref_mv_stack, + ref_frame, mode_context); + } + + if (xd->n8_w > xd->n8_h) { + const MODE_INFO *const candidate_mi = xd->mi[-xd->mi_stride]; + const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; + handle_sec_rect_block(candidate, nearest_refmv_count, ref_mv_stack, + ref_frame, mode_context); + } + } + + for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *refmv_count); ++idx) { + mv_ref_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int; + clamp_mv_ref(&mv_ref_list[idx].as_mv, + xd->n8_w << 3, xd->n8_h << 3, xd); + } +} +#endif + // This function searches the neighbourhood of a given MB/SB // to try and find candidate reference vectors. static void find_mv_refs_idx(const VP10_COMMON *cm, const MACROBLOCKD *xd, @@ -18,7 +439,7 @@ int_mv *mv_ref_list, int block, int mi_row, int mi_col, find_mv_refs_sync sync, void *const data, - uint8_t *mode_context) { + int16_t *mode_context) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type]; @@ -30,11 +451,6 @@ const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type] << 3; const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type] << 3; -#if !CONFIG_MISC_FIXES - // Blank the reference vector list - memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); -#endif - // The nearest 2 blocks are treated differently // if the size < 8x8 we get the mv from the bmi substructure, // and we also need to keep a mode count. @@ -133,9 +549,6 @@ } if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME && -#if !CONFIG_MISC_FIXES - prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int && -#endif prev_frame_mvs->ref_frame[1] != ref_frame) { int_mv mv = prev_frame_mvs->mv[1]; if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] != @@ -147,28 +560,40 @@ } } - Done: - - mode_context[ref_frame] = counter_to_context[context_counter]; - -#if CONFIG_MISC_FIXES +Done: + if (mode_context) + mode_context[ref_frame] = counter_to_context[context_counter]; for (i = refmv_count; i < MAX_MV_REF_CANDIDATES; ++i) mv_ref_list[i].as_int = 0; -#else - // Clamp vectors - for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) - clamp_mv_ref(&mv_ref_list[i].as_mv, bw, bh, xd); -#endif } void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, +#if CONFIG_REF_MV + uint8_t *ref_mv_count, + CANDIDATE_MV *ref_mv_stack, +#endif int_mv *mv_ref_list, int mi_row, int mi_col, find_mv_refs_sync sync, void *const data, - uint8_t *mode_context) { + int16_t *mode_context) { +#if CONFIG_REF_MV + int idx, all_zero = 1; +#endif find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col, sync, data, mode_context); + +#if CONFIG_REF_MV + setup_ref_mv_list(cm, xd, ref_frame, ref_mv_count, ref_mv_stack, + mv_ref_list, -1, mi_row, mi_col, mode_context); + + for (idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx) + if (mv_ref_list[idx].as_int != 0) + all_zero = 0; + + if (all_zero) + mode_context[ref_frame] |= (1 << ALL_ZERO_FLAG_OFFSET); +#endif } static void lower_mv_precision(MV *mv, int allow_hp) { @@ -194,18 +619,45 @@ } void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, - int block, int ref, int mi_row, int mi_col, - int_mv *nearest_mv, int_mv *near_mv, - uint8_t *mode_context) { + int block, int ref, int mi_row, int mi_col, + int_mv *nearest_mv, int_mv *near_mv) { int_mv mv_list[MAX_MV_REF_CANDIDATES]; MODE_INFO *const mi = xd->mi[0]; b_mode_info *bmi = mi->bmi; int n; +#if CONFIG_REF_MV + CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE]; + CANDIDATE_MV tmp_mv; + uint8_t ref_mv_count = 0, idx; + uint8_t above_count = 0, left_count = 0; +#endif assert(MAX_MV_REF_CANDIDATES == 2); find_mv_refs_idx(cm, xd, mi, mi->mbmi.ref_frame[ref], mv_list, block, - mi_row, mi_col, NULL, NULL, mode_context); + mi_row, mi_col, NULL, NULL, NULL); + +#if CONFIG_REF_MV + scan_blk_mbmi(cm, xd, mi_row, mi_col, block, mi->mbmi.ref_frame[ref], + -1, 0, ref_mv_stack, &ref_mv_count); + above_count = ref_mv_count; + + scan_blk_mbmi(cm, xd, mi_row, mi_col, block, mi->mbmi.ref_frame[ref], + 0, -1, ref_mv_stack, &ref_mv_count); + left_count = ref_mv_count - above_count; + + if (above_count > 1 && left_count > 0) { + tmp_mv = ref_mv_stack[1]; + ref_mv_stack[1] = ref_mv_stack[above_count]; + ref_mv_stack[above_count] = tmp_mv; + } + + for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, ref_mv_count); ++idx) { + mv_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int; + clamp_mv_ref(&mv_list[idx].as_mv, + xd->n8_w << 3, xd->n8_h << 3, xd); + } +#endif near_mv->as_int = 0; switch (block) {
diff --git a/vp10/common/mvref_common.h b/vp10/common/mvref_common.h index 0a98866..3968469 100644 --- a/vp10/common/mvref_common.h +++ b/vp10/common/mvref_common.h
@@ -119,26 +119,13 @@ }; // clamp_mv_ref -#if CONFIG_MISC_FIXES #define MV_BORDER (8 << 3) // Allow 8 pels in 1/8th pel units -#else -#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units -#endif static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) { -#if CONFIG_MISC_FIXES clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER, xd->mb_to_right_edge + bw * 8 + MV_BORDER, xd->mb_to_top_edge - bh * 8 - MV_BORDER, xd->mb_to_bottom_edge + bh * 8 + MV_BORDER); -#else - (void) bw; - (void) bh; - clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER, - xd->mb_to_right_edge + MV_BORDER, - xd->mb_to_top_edge - MV_BORDER, - xd->mb_to_bottom_edge + MV_BORDER); -#endif } // This function returns either the appropriate sub block or block's mv @@ -164,11 +151,7 @@ return mv; } -#if CONFIG_MISC_FIXES #define CLIP_IN_ADD(mv, bw, bh, xd) clamp_mv_ref(mv, bw, bh, xd) -#else -#define CLIP_IN_ADD(mv, bw, bh, xd) do {} while (0) -#endif // This macro is used to add a motion vector mv_ref list if it isn't // already in the list. If it's the second motion vector it will also @@ -194,8 +177,6 @@ ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ refmv_count, mv_ref_list, bw, bh, xd, Done); \ if (has_second_ref(mbmi) && \ - (CONFIG_MISC_FIXES || \ - (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) && \ (mbmi)->ref_frame[1] != ref_frame) \ ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ refmv_count, mv_ref_list, bw, bh, xd, Done); \ @@ -214,12 +195,39 @@ mi_col + mi_pos->col >= tile->mi_col_end); } +#if CONFIG_REF_MV +static int16_t vp10_mode_context_analyzer(const int16_t *const mode_context, + const MV_REFERENCE_FRAME *const rf, + BLOCK_SIZE bsize, int block) { + int16_t mode_ctx = 0; + if (block >= 0) { + mode_ctx = mode_context[rf[0]] & 0x00ff; + + if (block > 0 && bsize < BLOCK_8X8 && bsize > BLOCK_4X4) + mode_ctx |= (1 << SKIP_NEARESTMV_SUB8X8_OFFSET); + + return mode_ctx; + } + + if (rf[1] > INTRA_FRAME) + return mode_context[rf[0]] & (mode_context[rf[1]] | 0x00ff); + else if (rf[0] != ALTREF_FRAME) + return mode_context[rf[0]] & ~(mode_context[ALTREF_FRAME] & 0xfe00); + else + return mode_context[rf[0]]; +} +#endif + typedef void (*find_mv_refs_sync)(void *const data, int mi_row); void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd, - MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, int mi_row, int mi_col, - find_mv_refs_sync sync, void *const data, - uint8_t *mode_context); + MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, +#if CONFIG_REF_MV + uint8_t *ref_mv_count, + CANDIDATE_MV *ref_mv_stack, +#endif + int_mv *mv_ref_list, int mi_row, int mi_col, + find_mv_refs_sync sync, void *const data, + int16_t *mode_context); // check a list of motion vectors by sad score using a number rows of pixels // above and a number cols of pixels in the left to select the one with best @@ -228,9 +236,8 @@ int_mv *mvlist, int_mv *nearest_mv, int_mv *near_mv); void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, - int block, int ref, int mi_row, int mi_col, - int_mv *nearest_mv, int_mv *near_mv, - uint8_t *mode_context); + int block, int ref, int mi_row, int mi_col, + int_mv *nearest_mv, int_mv *near_mv); #ifdef __cplusplus } // extern "C"
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h index ffef733..9b7a729 100644 --- a/vp10/common/onyxc_int.h +++ b/vp10/common/onyxc_int.h
@@ -20,6 +20,7 @@ #include "vp10/common/entropymv.h" #include "vp10/common/entropy.h" #include "vp10/common/entropymode.h" +#include "vp10/common/mv.h" #include "vp10/common/frame_buffers.h" #include "vp10/common/quant_common.h" #include "vp10/common/tile_common.h" @@ -173,6 +174,12 @@ #endif FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/ +#if CONFIG_EXT_REFS + // frame type of the frame before last frame + FRAME_TYPE last2_frame_type; + // frame type of the frame two frames before last frame + FRAME_TYPE last3_frame_type; +#endif // CONFIG_EXT_REFS FRAME_TYPE frame_type; int show_frame; @@ -185,6 +192,8 @@ int allow_high_precision_mv; + int allow_screen_content_tools; + // Flag signaling which frame contexts should be reset to default values. RESET_FRAME_CONTEXT_MODE reset_frame_context; @@ -252,15 +261,12 @@ struct loopfilter lf; struct segmentation seg; -#if !CONFIG_MISC_FIXES - struct segmentation_probs segp; -#endif int frame_parallel_decode; // frame-based threading. // Context probabilities for reference frame prediction MV_REFERENCE_FRAME comp_fixed_ref; - MV_REFERENCE_FRAME comp_var_ref[2]; + MV_REFERENCE_FRAME comp_var_ref[COMP_REFS]; REFERENCE_MODE reference_mode; FRAME_CONTEXT *fc; /* this frame entropy */ @@ -299,6 +305,10 @@ PARTITION_CONTEXT *above_seg_context; ENTROPY_CONTEXT *above_context; +#if CONFIG_VAR_TX + TXFM_CONTEXT *above_txfm_context; + TXFM_CONTEXT left_txfm_context[8]; +#endif int above_context_alloc_cols; // scratch memory for intraonly/keyframe forward updates from default tables @@ -395,6 +405,9 @@ } xd->above_seg_context = cm->above_seg_context; +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context; +#endif xd->mi_stride = cm->mi_stride; xd->error_info = &cm->error; } @@ -444,6 +457,19 @@ xd->left_mi = NULL; xd->left_mbmi = NULL; } + + xd->n8_h = bh; + xd->n8_w = bw; +#if CONFIG_REF_MV + xd->is_sec_rect = 0; + if (xd->n8_w < xd->n8_h) + if (mi_col & (xd->n8_h - 1)) + xd->is_sec_rect = 1; + + if (xd->n8_w > xd->n8_h) + if (mi_row & (xd->n8_w - 1)) + xd->is_sec_rect = 1; +#endif } static INLINE const vpx_prob *get_y_mode_probs(const VP10_COMMON *cm, @@ -487,6 +513,84 @@ return (left * 2 + above) + bsl * PARTITION_PLOFFSET; } +#if CONFIG_VAR_TX +static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx, + TX_SIZE tx_size, + int len) { + int i; + for (i = 0; i < len; ++i) + txfm_ctx[i] = tx_size; +} + +static INLINE void txfm_partition_update(TXFM_CONTEXT *above_ctx, + TXFM_CONTEXT *left_ctx, + TX_SIZE tx_size) { + BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int bs = num_8x8_blocks_high_lookup[bsize]; + int i; + for (i = 0; i < bs; ++i) { + above_ctx[i] = tx_size; + left_ctx[i] = tx_size; + } +} + +static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx, + TXFM_CONTEXT *left_ctx, + TX_SIZE tx_size) { + int above = *above_ctx < tx_size; + int left = *left_ctx < tx_size; + return (tx_size - 1) * 3 + above + left; +} +#endif + +#if CONFIG_EXT_INTERP +static INLINE int vp10_is_interp_needed(const MACROBLOCKD *const xd) { + MODE_INFO *const mi = xd->mi[0]; + MB_MODE_INFO *const mbmi = &mi->mbmi; + const BLOCK_SIZE bsize = mbmi->sb_type; + const int is_compound = has_second_ref(mbmi); + int intpel_mv; + +#if SUPPORT_NONINTERPOLATING_FILTERS + // TODO(debargha): This is is currently only for experimentation + // with non-interpolating filters. Remove later. + // If any of the filters are non-interpolating, then indicate the + // interpolation filter always. + int i; + for (i = 0; i < SWITCHABLE_FILTERS; ++i) { + if (!IsInterpolatingFilter(i)) return 1; + } +#endif + + // For scaled references, interpolation filter is indicated all the time. + if (vp10_is_scaled(&xd->block_refs[0]->sf)) + return 1; + if (is_compound && vp10_is_scaled(&xd->block_refs[1]->sf)) + return 1; + + if (bsize < BLOCK_8X8) { + intpel_mv = + !mv_has_subpel(&mi->bmi[0].as_mv[0].as_mv) && + !mv_has_subpel(&mi->bmi[1].as_mv[0].as_mv) && + !mv_has_subpel(&mi->bmi[2].as_mv[0].as_mv) && + !mv_has_subpel(&mi->bmi[3].as_mv[0].as_mv); + if (is_compound && intpel_mv) { + intpel_mv &= + !mv_has_subpel(&mi->bmi[0].as_mv[1].as_mv) && + !mv_has_subpel(&mi->bmi[1].as_mv[1].as_mv) && + !mv_has_subpel(&mi->bmi[2].as_mv[1].as_mv) && + !mv_has_subpel(&mi->bmi[3].as_mv[1].as_mv); + } + } else { + intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv); + if (is_compound && intpel_mv) { + intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv); + } + } + return !intpel_mv; +} +#endif // CONFIG_EXT_INTERP + #ifdef __cplusplus } // extern "C" #endif
diff --git a/vp10/common/pred_common.c b/vp10/common/pred_common.c index 2e79e0d..9c42794 100644 --- a/vp10/common/pred_common.c +++ b/vp10/common/pred_common.c
@@ -103,9 +103,424 @@ return ctx; } +#if CONFIG_EXT_REFS + +// TODO(zoeliu): Future work will be conducted to optimize the context design +// for the coding of the reference frames. + +#define CHECK_LAST_OR_LAST2(ref_frame) \ + ((ref_frame == LAST_FRAME) || (ref_frame == LAST2_FRAME)) + +#define CHECK_GOLDEN_LAST3_LAST4(ref_frame) \ + ((ref_frame == GOLDEN_FRAME) || (ref_frame == LAST3_FRAME) || \ + (ref_frame == LAST4_FRAME)) + +// Returns a context number for the given MB prediction signal +// Signal the first reference frame for a compound mode is either +// GOLDEN/LAST3/LAST4, or LAST/LAST2. +// +// NOTE(zoeliu): The probability of ref_frame[0] is either +// GOLDEN_FRAME/LAST3_FRAME/LAST4_FRAME. +int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm, + const MACROBLOCKD *xd) { + int pred_context; + const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; + const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; + const int above_in_image = xd->up_available; + const int left_in_image = xd->left_available; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries correpsonding to real macroblocks. + // The prediction flags in these dummy entries are initialised to 0. + const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; + const int var_ref_idx = !fix_ref_idx; + + if (above_in_image && left_in_image) { // both edges available + const int above_intra = !is_inter_block(above_mbmi); + const int left_intra = !is_inter_block(left_mbmi); + + if (above_intra && left_intra) { // intra/intra (2) + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter + const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; + + if (!has_second_ref(edge_mbmi)) // single pred (1/3) + pred_context = 1 + + 2 * (!CHECK_GOLDEN_LAST3_LAST4(edge_mbmi->ref_frame[0])); + else // comp pred (1/3) + pred_context = 1 + + 2 * (!CHECK_GOLDEN_LAST3_LAST4(edge_mbmi->ref_frame[var_ref_idx])); + } else { // inter/inter + const int l_sg = !has_second_ref(left_mbmi); + const int a_sg = !has_second_ref(above_mbmi); + const MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0] + : above_mbmi->ref_frame[var_ref_idx]; + const MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0] + : left_mbmi->ref_frame[var_ref_idx]; + + if (vrfa == vrfl && CHECK_GOLDEN_LAST3_LAST4(vrfa)) { + pred_context = 0; + } else if (l_sg && a_sg) { // single/single + if ((vrfa == ALTREF_FRAME && CHECK_LAST_OR_LAST2(vrfl)) || + (vrfl == ALTREF_FRAME && CHECK_LAST_OR_LAST2(vrfa))) { + pred_context = 4; + } else if (vrfa == vrfl || (CHECK_LAST_OR_LAST2(vrfa) && + CHECK_LAST_OR_LAST2(vrfl))) { + pred_context = 3; + } else { // Either vrfa or vrfl is GOLDEN / LAST3 / LAST4 + // NOTE(zoeliu): Following assert may be removed once confirmed. + assert(CHECK_GOLDEN_LAST3_LAST4(vrfa) || + CHECK_GOLDEN_LAST3_LAST4(vrfl)); + pred_context = 1; + } + } else if (l_sg || a_sg) { // single/comp + const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl; + const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl; + + if (CHECK_GOLDEN_LAST3_LAST4(vrfc) && !CHECK_GOLDEN_LAST3_LAST4(rfs)) + pred_context = 1; + else if (CHECK_GOLDEN_LAST3_LAST4(rfs) && + !CHECK_GOLDEN_LAST3_LAST4(vrfc)) + pred_context = 2; + else + pred_context = 4; + } else { // comp/comp + if ((CHECK_LAST_OR_LAST2(vrfa) && CHECK_LAST_OR_LAST2(vrfl))) { + pred_context = 4; + } else { + // NOTE(zoeliu): Following assert may be removed once confirmed. + assert(CHECK_GOLDEN_LAST3_LAST4(vrfa) || + CHECK_GOLDEN_LAST3_LAST4(vrfl)); + pred_context = 2; + } + } + } + } else if (above_in_image || left_in_image) { // one edge available + const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi; + + if (!is_inter_block(edge_mbmi)) { + pred_context = 2; + } else { + if (has_second_ref(edge_mbmi)) + pred_context = + 4 * (!CHECK_GOLDEN_LAST3_LAST4(edge_mbmi->ref_frame[var_ref_idx])); + else + pred_context = 3 * (!CHECK_GOLDEN_LAST3_LAST4(edge_mbmi->ref_frame[0])); + } + } else { // no edges available (2) + pred_context = 2; + } + + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + + return pred_context; +} + +// Returns a context number for the given MB prediction signal +// Signal the first reference frame for a compound mode is LAST, +// conditioning on that it is known either LAST/LAST2. +// +// NOTE(zoeliu): The probability of ref_frame[0] is LAST_FRAME, +// conditioning on it is either LAST_FRAME or LAST2_FRAME. +int vp10_get_pred_context_comp_ref_p1(const VP10_COMMON *cm, + const MACROBLOCKD *xd) { + int pred_context; + const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; + const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; + const int above_in_image = xd->up_available; + const int left_in_image = xd->left_available; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries correpsonding to real macroblocks. + // The prediction flags in these dummy entries are initialised to 0. + const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; + const int var_ref_idx = !fix_ref_idx; + + if (above_in_image && left_in_image) { // both edges available + const int above_intra = !is_inter_block(above_mbmi); + const int left_intra = !is_inter_block(left_mbmi); + + if (above_intra && left_intra) { // intra/intra (2) + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter + const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; + + if (!has_second_ref(edge_mbmi)) // single pred (1/3) + pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != LAST_FRAME); + else // comp pred (1/3) + pred_context = 1 + 2 * (edge_mbmi->ref_frame[var_ref_idx] + != LAST_FRAME); + } else { // inter/inter + const int l_sg = !has_second_ref(left_mbmi); + const int a_sg = !has_second_ref(above_mbmi); + const MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0] + : above_mbmi->ref_frame[var_ref_idx]; + const MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0] + : left_mbmi->ref_frame[var_ref_idx]; + + if (vrfa == vrfl && vrfa == LAST_FRAME) + pred_context = 0; + else if (l_sg && a_sg) { // single/single + if (vrfa == LAST_FRAME || vrfl == LAST_FRAME) + pred_context = 1; + else if (CHECK_GOLDEN_LAST3_LAST4(vrfa) || + CHECK_GOLDEN_LAST3_LAST4(vrfl)) + pred_context = 2 + (vrfa != vrfl); + else if (vrfa == vrfl) + pred_context = 3; + else + pred_context = 4; + } else if (l_sg || a_sg) { // single/comp + const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl; + const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl; + + if (vrfc == LAST_FRAME && rfs != LAST_FRAME) + pred_context = 1; + else if (rfs == LAST_FRAME && vrfc != LAST_FRAME) + pred_context = 2; + else + pred_context = 3 + + (vrfc == LAST2_FRAME || CHECK_GOLDEN_LAST3_LAST4(rfs)); + } else { // comp/comp + if (vrfa == LAST_FRAME || vrfl == LAST_FRAME) + pred_context = 2; + else + pred_context = 3 + (CHECK_GOLDEN_LAST3_LAST4(vrfa) || + CHECK_GOLDEN_LAST3_LAST4(vrfl)); + } + } + } else if (above_in_image || left_in_image) { // one edge available + const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi; + + if (!is_inter_block(edge_mbmi)) { + pred_context = 2; + } else { + if (has_second_ref(edge_mbmi)) { + pred_context = 4 * (edge_mbmi->ref_frame[var_ref_idx] != LAST_FRAME); + } else { + if (edge_mbmi->ref_frame[0] == LAST_FRAME) + pred_context = 0; + else + pred_context = 2 + CHECK_GOLDEN_LAST3_LAST4(edge_mbmi->ref_frame[0]); + } + } + } else { // no edges available (2) + pred_context = 2; + } + + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + + return pred_context; +} + +#define CHECK_LAST3_OR_LAST4(ref_frame) \ + ((ref_frame == LAST3_FRAME) || (ref_frame == LAST4_FRAME)) + +// Returns a context number for the given MB prediction signal +// Signal the first reference frame for a compound mode is GOLDEN, +// conditioning on that it is known either GOLDEN/LAST3/LAST4. +// +// NOTE(zoeliu): The probability of ref_frame[0] is GOLDEN_FRAME, +// conditioning on it is either GOLDEN / LAST3 / LAST4. +int vp10_get_pred_context_comp_ref_p2(const VP10_COMMON *cm, + const MACROBLOCKD *xd) { + int pred_context; + const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; + const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; + const int above_in_image = xd->up_available; + const int left_in_image = xd->left_available; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries correpsonding to real macroblocks. + // The prediction flags in these dummy entries are initialised to 0. + const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; + const int var_ref_idx = !fix_ref_idx; + + if (above_in_image && left_in_image) { // both edges available + const int above_intra = !is_inter_block(above_mbmi); + const int left_intra = !is_inter_block(left_mbmi); + + if (above_intra && left_intra) { // intra/intra (2) + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter + const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; + + if (!has_second_ref(edge_mbmi)) // single pred (1/3) + pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != GOLDEN_FRAME); + else // comp pred (1/3) + pred_context = 1 + + 2 * (edge_mbmi->ref_frame[var_ref_idx] != GOLDEN_FRAME); + } else { // inter/inter + const int l_sg = !has_second_ref(left_mbmi); + const int a_sg = !has_second_ref(above_mbmi); + const MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0] + : above_mbmi->ref_frame[var_ref_idx]; + const MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0] + : left_mbmi->ref_frame[var_ref_idx]; + + if (vrfa == vrfl && vrfa == GOLDEN_FRAME) + pred_context = 0; + else if (l_sg && a_sg) { // single/single + if (vrfa == GOLDEN_FRAME || vrfl == GOLDEN_FRAME) + pred_context = 1; + else if (CHECK_LAST_OR_LAST2(vrfa) || CHECK_LAST_OR_LAST2(vrfl)) + pred_context = 2 + (vrfa != vrfl); + else if (vrfa == vrfl) + pred_context = 3; + else + pred_context = 4; + } else if (l_sg || a_sg) { // single/comp + const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl; + const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl; + + if (vrfc == GOLDEN_FRAME && rfs != GOLDEN_FRAME) + pred_context = 1; + else if (rfs == GOLDEN_FRAME && vrfc != GOLDEN_FRAME) + pred_context = 2; + else + pred_context = 3 + + (CHECK_LAST3_OR_LAST4(vrfc) || CHECK_LAST_OR_LAST2(rfs)); + } else { // comp/comp + if (vrfa == GOLDEN_FRAME || vrfl == GOLDEN_FRAME) + pred_context = 2; + else + pred_context = 3 + + (CHECK_LAST_OR_LAST2(vrfa) || CHECK_LAST_OR_LAST2(vrfl)); + } + } + } else if (above_in_image || left_in_image) { // one edge available + const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi; + + if (!is_inter_block(edge_mbmi)) { + pred_context = 2; + } else { + if (has_second_ref(edge_mbmi)) { + pred_context = 4 * (edge_mbmi->ref_frame[var_ref_idx] != GOLDEN_FRAME); + } else { + if (edge_mbmi->ref_frame[0] == GOLDEN_FRAME) + pred_context = 0; + else + pred_context = 2 + CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]); + } + } + } else { // no edges available (2) + pred_context = 2; + } + + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + + return pred_context; +} + +#define CHECK_LAST_LAST2_GOLDEN(ref_frame) \ + ((ref_frame == LAST_FRAME) || (ref_frame == LAST2_FRAME) || \ + (ref_frame == GOLDEN_FRAME)) + +// Returns a context number for the given MB prediction signal +// Signal the first reference frame for a compound mode is LAST3, +// conditioning on that it is known either LAST3/LAST4. +// +// NOTE(zoeliu): The probability of ref_frame[0] is LAST3_FRAME, +// conditioning on it is either LAST3 / LAST4. +int vp10_get_pred_context_comp_ref_p3(const VP10_COMMON *cm, + const MACROBLOCKD *xd) { + int pred_context; + const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; + const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; + const int above_in_image = xd->up_available; + const int left_in_image = xd->left_available; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries correpsonding to real macroblocks. + // The prediction flags in these dummy entries are initialised to 0. + const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; + const int var_ref_idx = !fix_ref_idx; + + if (above_in_image && left_in_image) { // both edges available + const int above_intra = !is_inter_block(above_mbmi); + const int left_intra = !is_inter_block(left_mbmi); + + if (above_intra && left_intra) { // intra/intra (2) + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter + const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; + + if (!has_second_ref(edge_mbmi)) // single pred (1/3) + pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != LAST3_FRAME); + else // comp pred (1/3) + pred_context = 1 + + 2 * (edge_mbmi->ref_frame[var_ref_idx] != LAST3_FRAME); + } else { // inter/inter + const int l_sg = !has_second_ref(left_mbmi); + const int a_sg = !has_second_ref(above_mbmi); + const MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0] + : above_mbmi->ref_frame[var_ref_idx]; + const MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0] + : left_mbmi->ref_frame[var_ref_idx]; + + if (vrfa == vrfl && vrfa == LAST3_FRAME) + pred_context = 0; + else if (l_sg && a_sg) { // single/single + if (vrfa == LAST3_FRAME || vrfl == LAST3_FRAME) + pred_context = 1; + else if (CHECK_LAST_LAST2_GOLDEN(vrfa) || CHECK_LAST_LAST2_GOLDEN(vrfl)) + pred_context = 2 + (vrfa != vrfl); + else if (vrfa == vrfl) + pred_context = 3; + else + pred_context = 4; + } else if (l_sg || a_sg) { // single/comp + const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl; + const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl; + + if (vrfc == LAST3_FRAME && rfs != LAST3_FRAME) + pred_context = 1; + else if (rfs == LAST3_FRAME && vrfc != LAST3_FRAME) + pred_context = 2; + else + pred_context = 3 + + (vrfc == LAST4_FRAME || CHECK_LAST_LAST2_GOLDEN(rfs)); + } else { // comp/comp + if (vrfa == LAST3_FRAME || vrfl == LAST3_FRAME) + pred_context = 2; + else + pred_context = 3 + + (CHECK_LAST_LAST2_GOLDEN(vrfa) || CHECK_LAST_LAST2_GOLDEN(vrfl)); + } + } + } else if (above_in_image || left_in_image) { // one edge available + const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi; + + if (!is_inter_block(edge_mbmi)) { + pred_context = 2; + } else { + if (has_second_ref(edge_mbmi)) { + pred_context = 4 * (edge_mbmi->ref_frame[var_ref_idx] != LAST3_FRAME); + } else { + if (edge_mbmi->ref_frame[0] == LAST3_FRAME) + pred_context = 0; + else + pred_context = 2 + CHECK_LAST_LAST2_GOLDEN(edge_mbmi->ref_frame[0]); + } + } + } else { // no edges available (2) + pred_context = 2; + } + + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + + return pred_context; +} + +#else // CONFIG_EXT_REFS + // Returns a context number for the given MB prediction signal int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm, - const MACROBLOCKD *xd) { + const MACROBLOCKD *xd) { int pred_context; const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; @@ -186,6 +601,472 @@ return pred_context; } +#endif // CONFIG_EXT_REFS + +#if CONFIG_EXT_REFS + +#define CHECK_GOLDEN_OR_ALTREF(ref_frame) \ + ((ref_frame == GOLDEN_FRAME) || (ref_frame == ALTREF_FRAME)) + +// For the bit to signal whether the single reference is a ALTREF_FRAME +// or a GOLDEN_FRAME. +// +// NOTE(zoeliu): The probability of ref_frame[0] is ALTREF/GOLDEN. +int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { + int pred_context; + const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; + const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; + const int has_above = xd->up_available; + const int has_left = xd->left_available; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries correpsonding to real macroblocks. + // The prediction flags in these dummy entries are initialised to 0. + if (has_above && has_left) { // both edges available + const int above_intra = !is_inter_block(above_mbmi); + const int left_intra = !is_inter_block(left_mbmi); + + if (above_intra && left_intra) { // intra/intra + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter or inter/intra + const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; + + if (!has_second_ref(edge_mbmi)) + pred_context = 4 * (!CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0])); + else + pred_context = 1 + (!CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0]) || + !CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[1])); + } else { // inter/inter + const int above_has_second = has_second_ref(above_mbmi); + const int left_has_second = has_second_ref(left_mbmi); + + const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; + const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; + const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0]; + const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1]; + + if (above_has_second && left_has_second) { + pred_context = 1 + (!CHECK_GOLDEN_OR_ALTREF(above0) || + !CHECK_GOLDEN_OR_ALTREF(above1) || + !CHECK_GOLDEN_OR_ALTREF(left0) || + !CHECK_GOLDEN_OR_ALTREF(left1)); + } else if (above_has_second || left_has_second) { + const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; + + if (!CHECK_GOLDEN_OR_ALTREF(rfs)) + pred_context = 3 + (!CHECK_GOLDEN_OR_ALTREF(crf1) || + !CHECK_GOLDEN_OR_ALTREF(crf2)); + else + pred_context = !CHECK_GOLDEN_OR_ALTREF(crf1) || + !CHECK_GOLDEN_OR_ALTREF(crf2); + } else { + pred_context = 2 * (!CHECK_GOLDEN_OR_ALTREF(above0)) + + 2 * (!CHECK_GOLDEN_OR_ALTREF(left0)); + } + } + } else if (has_above || has_left) { // one edge available + const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi; + if (!is_inter_block(edge_mbmi)) { // intra + pred_context = 2; + } else { // inter + if (!has_second_ref(edge_mbmi)) + pred_context = 4 * (!CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0])); + else + pred_context = 1 + (!CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0]) || + !CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[1])); + } + } else { // no edges available + pred_context = 2; + } + + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + return pred_context; +} + +// For the bit to signal whether the single reference is ALTREF_FRAME or +// GOLDEN_FRAME, knowing that it shall be either of these 2 choices. +// +// NOTE(zoeliu): The probability of ref_frame[0] is ALTREF_FRAME, conditioning +// on it is either ALTREF_FRAME/GOLDEN_FRAME. +int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { + int pred_context; + const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; + const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; + const int has_above = xd->up_available; + const int has_left = xd->left_available; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries correpsonding to real macroblocks. + // The prediction flags in these dummy entries are initialised to 0. + if (has_above && has_left) { // both edges available + const int above_intra = !is_inter_block(above_mbmi); + const int left_intra = !is_inter_block(left_mbmi); + + if (above_intra && left_intra) { // intra/intra + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter or inter/intra + const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; + if (!has_second_ref(edge_mbmi)) { + if (!CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0])) + pred_context = 3; + else + pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME); + } else { + pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME || + edge_mbmi->ref_frame[1] == GOLDEN_FRAME); + } + } else { // inter/inter + const int above_has_second = has_second_ref(above_mbmi); + const int left_has_second = has_second_ref(left_mbmi); + const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; + const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; + const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0]; + const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1]; + + if (above_has_second && left_has_second) { + if (above0 == left0 && above1 == left1) + pred_context = 3 * (above0 == GOLDEN_FRAME || + above1 == GOLDEN_FRAME || + left0 == GOLDEN_FRAME || + left1 == GOLDEN_FRAME); + else + pred_context = 2; + } else if (above_has_second || left_has_second) { + const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; + + if (rfs == GOLDEN_FRAME) + pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); + else if (rfs == ALTREF_FRAME) + pred_context = (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); + else + pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); + } else { + if (!CHECK_GOLDEN_OR_ALTREF(above0) && !CHECK_GOLDEN_OR_ALTREF(left0)) { + pred_context = 2 + (above0 == left0); + } else if (!CHECK_GOLDEN_OR_ALTREF(above0) || + !CHECK_GOLDEN_OR_ALTREF(left0)) { + const MV_REFERENCE_FRAME edge0 = + !CHECK_GOLDEN_OR_ALTREF(above0) ? left0 : above0; + pred_context = 4 * (edge0 == GOLDEN_FRAME); + } else { + pred_context = 2 * (above0 == GOLDEN_FRAME) + + 2 * (left0 == GOLDEN_FRAME); + } + } + } + } else if (has_above || has_left) { // one edge available + const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi; + + if (!is_inter_block(edge_mbmi) || + (!CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0]) && + !has_second_ref(edge_mbmi))) + pred_context = 2; + else if (!has_second_ref(edge_mbmi)) + pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME); + else + pred_context = 3 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME || + edge_mbmi->ref_frame[1] == GOLDEN_FRAME); + } else { // no edges available (2) + pred_context = 2; + } + + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + return pred_context; +} + +// For the bit to signal whether the single reference is LAST3/LAST4 or +// LAST2/LAST, knowing that it shall be either of these 2 choices. +// +// NOTE(zoeliu): The probability of ref_frame[0] is LAST3/LAST4, conditioning +// on it is either LAST3/LAST4/LAST2/LAST. +int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd) { + int pred_context; + const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; + const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; + const int has_above = xd->up_available; + const int has_left = xd->left_available; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries correpsonding to real macroblocks. + // The prediction flags in these dummy entries are initialised to 0. + if (has_above && has_left) { // both edges available + const int above_intra = !is_inter_block(above_mbmi); + const int left_intra = !is_inter_block(left_mbmi); + + if (above_intra && left_intra) { // intra/intra + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter or inter/intra + const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; + if (!has_second_ref(edge_mbmi)) { + if (CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0])) + pred_context = 3; + else + pred_context = 4 * CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]); + } else { + pred_context = 1 + + 2 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) || + CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[1])); + } + } else { // inter/inter + const int above_has_second = has_second_ref(above_mbmi); + const int left_has_second = has_second_ref(left_mbmi); + const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; + const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; + const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0]; + const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1]; + + if (above_has_second && left_has_second) { + if (above0 == left0 && above1 == left1) + pred_context = 3 * (CHECK_LAST_OR_LAST2(above0) || + CHECK_LAST_OR_LAST2(above1) || + CHECK_LAST_OR_LAST2(left0) || + CHECK_LAST_OR_LAST2(left1)); + else + pred_context = 2; + } else if (above_has_second || left_has_second) { + const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; + + if (CHECK_LAST_OR_LAST2(rfs)) + pred_context = 3 + (CHECK_LAST_OR_LAST2(crf1) || + CHECK_LAST_OR_LAST2(crf2)); + else if (rfs == LAST3_FRAME || rfs == LAST4_FRAME) + pred_context = (CHECK_LAST_OR_LAST2(crf1) || + CHECK_LAST_OR_LAST2(crf2)); + else + pred_context = 1 + 2 * (CHECK_LAST_OR_LAST2(crf1) || + CHECK_LAST_OR_LAST2(crf2)); + } else { + if (CHECK_GOLDEN_OR_ALTREF(above0) && CHECK_GOLDEN_OR_ALTREF(left0)) { + pred_context = 2 + (above0 == left0); + } else if (CHECK_GOLDEN_OR_ALTREF(above0) || + CHECK_GOLDEN_OR_ALTREF(left0)) { + const MV_REFERENCE_FRAME edge0 = + CHECK_GOLDEN_OR_ALTREF(above0) ? left0 : above0; + pred_context = 4 * CHECK_LAST_OR_LAST2(edge0); + } else { + pred_context = 2 * CHECK_LAST_OR_LAST2(above0) + + 2 * CHECK_LAST_OR_LAST2(left0); + } + } + } + } else if (has_above || has_left) { // one edge available + const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi; + + if (!is_inter_block(edge_mbmi) || + (CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0]) && + !has_second_ref(edge_mbmi))) + pred_context = 2; + else if (!has_second_ref(edge_mbmi)) + pred_context = 4 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0])); + else + pred_context = 3 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) || + CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[1])); + } else { // no edges available (2) + pred_context = 2; + } + + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + return pred_context; +} + +// For the bit to signal whether the single reference is LAST2_FRAME or +// LAST_FRAME, knowing that it shall be either of these 2 choices. +// +// NOTE(zoeliu): The probability of ref_frame[0] is LAST2_FRAME, conditioning +// on it is either LAST2_FRAME/LAST_FRAME. +int vp10_get_pred_context_single_ref_p4(const MACROBLOCKD *xd) { + int pred_context; + const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; + const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; + const int has_above = xd->up_available; + const int has_left = xd->left_available; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries correpsonding to real macroblocks. + // The prediction flags in these dummy entries are initialised to 0. + if (has_above && has_left) { // both edges available + const int above_intra = !is_inter_block(above_mbmi); + const int left_intra = !is_inter_block(left_mbmi); + + if (above_intra && left_intra) { // intra/intra + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter or inter/intra + const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; + if (!has_second_ref(edge_mbmi)) { + if (!CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0])) + pred_context = 3; + else + pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME); + } else { + pred_context = 1 + + 2 * (edge_mbmi->ref_frame[0] == LAST_FRAME || + edge_mbmi->ref_frame[1] == LAST_FRAME); + } + } else { // inter/inter + const int above_has_second = has_second_ref(above_mbmi); + const int left_has_second = has_second_ref(left_mbmi); + const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; + const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; + const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0]; + const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1]; + + if (above_has_second && left_has_second) { + if (above0 == left0 && above1 == left1) + pred_context = 3 * (above0 == LAST_FRAME || above1 == LAST_FRAME || + left0 == LAST_FRAME || left1 == LAST_FRAME); + else + pred_context = 2; + } else if (above_has_second || left_has_second) { + const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; + + if (rfs == LAST_FRAME) + pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME); + else if (rfs == LAST2_FRAME) + pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME); + else + pred_context = 1 + 2 * (crf1 == LAST_FRAME || crf2 == LAST_FRAME); + } else { + if (!CHECK_LAST_OR_LAST2(above0) && + !CHECK_LAST_OR_LAST2(left0)) { + pred_context = 2 + (above0 == left0); + } else if (!CHECK_LAST_OR_LAST2(above0) || + !CHECK_LAST_OR_LAST2(left0)) { + const MV_REFERENCE_FRAME edge0 = + !CHECK_LAST_OR_LAST2(above0) ? left0 : above0; + pred_context = 4 * (edge0 == LAST_FRAME); + } else { + pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME); + } + } + } + } else if (has_above || has_left) { // one edge available + const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi; + + if (!is_inter_block(edge_mbmi) || + (!CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) && + !has_second_ref(edge_mbmi))) + pred_context = 2; + else if (!has_second_ref(edge_mbmi)) + pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME); + else + pred_context = 3 * (edge_mbmi->ref_frame[0] == LAST_FRAME || + edge_mbmi->ref_frame[1] == LAST_FRAME); + } else { // no edges available (2) + pred_context = 2; + } + + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + return pred_context; +} + +// For the bit to signal whether the single reference is LAST4_FRAME or +// LAST3_FRAME, knowing that it shall be either of these 2 choices. +// +// NOTE(zoeliu): The probability of ref_frame[0] is LAST4_FRAME, conditioning +// on it is either LAST4_FRAME/LAST3_FRAME. +int vp10_get_pred_context_single_ref_p5(const MACROBLOCKD *xd) { + int pred_context; + const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; + const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; + const int has_above = xd->up_available; + const int has_left = xd->left_available; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries correpsonding to real macroblocks. + // The prediction flags in these dummy entries are initialised to 0. + if (has_above && has_left) { // both edges available + const int above_intra = !is_inter_block(above_mbmi); + const int left_intra = !is_inter_block(left_mbmi); + + if (above_intra && left_intra) { // intra/intra + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter or inter/intra + const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; + if (!has_second_ref(edge_mbmi)) { + if (!CHECK_LAST3_OR_LAST4(edge_mbmi->ref_frame[0])) + pred_context = 3; + else + pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST3_FRAME); + } else { + pred_context = 1 + + 2 * (edge_mbmi->ref_frame[0] == LAST3_FRAME || + edge_mbmi->ref_frame[1] == LAST3_FRAME); + } + } else { // inter/inter + const int above_has_second = has_second_ref(above_mbmi); + const int left_has_second = has_second_ref(left_mbmi); + const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; + const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; + const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0]; + const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1]; + + if (above_has_second && left_has_second) { + if (above0 == left0 && above1 == left1) + pred_context = 3 * (above0 == LAST3_FRAME || above1 == LAST3_FRAME || + left0 == LAST3_FRAME || left1 == LAST3_FRAME); + else + pred_context = 2; + } else if (above_has_second || left_has_second) { + const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; + + if (rfs == LAST3_FRAME) + pred_context = 3 + (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME); + else if (rfs == LAST4_FRAME) + pred_context = (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME); + else + pred_context = 1 + 2 * (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME); + } else { + if (!CHECK_LAST3_OR_LAST4(above0) && + !CHECK_LAST3_OR_LAST4(left0)) { + pred_context = 2 + (above0 == left0); + } else if (!CHECK_LAST3_OR_LAST4(above0) || + !CHECK_LAST3_OR_LAST4(left0)) { + const MV_REFERENCE_FRAME edge0 = + !CHECK_LAST3_OR_LAST4(above0) ? left0 : above0; + pred_context = 4 * (edge0 == LAST3_FRAME); + } else { + pred_context = 2 * (above0 == LAST3_FRAME) + + 2 * (left0 == LAST3_FRAME); + } + } + } + } else if (has_above || has_left) { // one edge available + const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi; + + if (!is_inter_block(edge_mbmi) || + (!CHECK_LAST3_OR_LAST4(edge_mbmi->ref_frame[0]) && + !has_second_ref(edge_mbmi))) + pred_context = 2; + else if (!has_second_ref(edge_mbmi)) + pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST3_FRAME); + else + pred_context = 3 * (edge_mbmi->ref_frame[0] == LAST3_FRAME || + edge_mbmi->ref_frame[1] == LAST3_FRAME); + } else { // no edges available (2) + pred_context = 2; + } + + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + return pred_context; +} + +#else // CONFIG_EXT_REFS + int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { int pred_context; const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; @@ -337,3 +1218,5 @@ assert(pred_context >= 0 && pred_context < REF_CONTEXTS); return pred_context; } + +#endif // CONFIG_EXT_REFS
diff --git a/vp10/common/pred_common.h b/vp10/common/pred_common.h index d6d7146..4ebfcdb 100644 --- a/vp10/common/pred_common.h +++ b/vp10/common/pred_common.h
@@ -87,25 +87,77 @@ const MACROBLOCKD *xd); static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p(const VP10_COMMON *cm, - const MACROBLOCKD *xd) { + const MACROBLOCKD *xd) { const int pred_context = vp10_get_pred_context_comp_ref_p(cm, xd); - return cm->fc->comp_ref_prob[pred_context]; + return cm->fc->comp_ref_prob[pred_context][0]; } +#if CONFIG_EXT_REFS +int vp10_get_pred_context_comp_ref_p1(const VP10_COMMON *cm, + const MACROBLOCKD *xd); + +static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p1(const VP10_COMMON *cm, + const MACROBLOCKD *xd) { + const int pred_context = vp10_get_pred_context_comp_ref_p1(cm, xd); + return cm->fc->comp_ref_prob[pred_context][1]; +} + +int vp10_get_pred_context_comp_ref_p2(const VP10_COMMON *cm, + const MACROBLOCKD *xd); + +static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p2(const VP10_COMMON *cm, + const MACROBLOCKD *xd) { + const int pred_context = vp10_get_pred_context_comp_ref_p2(cm, xd); + return cm->fc->comp_ref_prob[pred_context][2]; +} + +int vp10_get_pred_context_comp_ref_p3(const VP10_COMMON *cm, + const MACROBLOCKD *xd); + +static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p3(const VP10_COMMON *cm, + const MACROBLOCKD *xd) { + const int pred_context = vp10_get_pred_context_comp_ref_p3(cm, xd); + return cm->fc->comp_ref_prob[pred_context][3]; +} +#endif // CONFIG_EXT_REFS + int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd); static INLINE vpx_prob vp10_get_pred_prob_single_ref_p1(const VP10_COMMON *cm, - const MACROBLOCKD *xd) { + const MACROBLOCKD *xd) { return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p1(xd)][0]; } int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd); static INLINE vpx_prob vp10_get_pred_prob_single_ref_p2(const VP10_COMMON *cm, - const MACROBLOCKD *xd) { + const MACROBLOCKD *xd) { return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p2(xd)][1]; } +#if CONFIG_EXT_REFS +int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd); + +static INLINE vpx_prob vp10_get_pred_prob_single_ref_p3(const VP10_COMMON *cm, + const MACROBLOCKD *xd) { + return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p3(xd)][2]; +} + +int vp10_get_pred_context_single_ref_p4(const MACROBLOCKD *xd); + +static INLINE vpx_prob vp10_get_pred_prob_single_ref_p4(const VP10_COMMON *cm, + const MACROBLOCKD *xd) { + return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p4(xd)][3]; +} + +int vp10_get_pred_context_single_ref_p5(const MACROBLOCKD *xd); + +static INLINE vpx_prob vp10_get_pred_prob_single_ref_p5(const VP10_COMMON *cm, + const MACROBLOCKD *xd) { + return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p5(xd)][4]; +} +#endif // CONFIG_EXT_REFS + // Returns a context number for the given MB prediction signal // The mode info data structure has a one element border above and to the // left of the entries corresponding to real blocks. @@ -165,6 +217,71 @@ } } +#if CONFIG_VAR_TX +static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd, + MB_MODE_INFO *mbmi, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, int blk_row, int blk_col, + TX_SIZE max_tx_size, int ctx, + struct tx_counts *tx_counts) { + const struct macroblockd_plane *const pd = &xd->plane[0]; + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); + TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx]; + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + ++get_tx_counts(max_tx_size, ctx, tx_counts)[tx_size]; + mbmi->tx_size = tx_size; + } else { + int bsl = b_width_log2_lookup[bsize]; + int i; + + assert(bsl > 0); + --bsl; + + for (i = 0; i < 4; ++i) { + const int offsetr = blk_row + ((i >> 1) << bsl); + const int offsetc = blk_col + ((i & 0x01) << bsl); + + if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) + continue; + update_tx_counts(cm, xd, mbmi, plane_bsize, + tx_size - 1, offsetr, offsetc, + max_tx_size, ctx, tx_counts); + } + } +} + +static INLINE void inter_block_tx_count_update(VP10_COMMON *cm, + MACROBLOCKD *xd, + MB_MODE_INFO *mbmi, + BLOCK_SIZE plane_bsize, + int ctx, + struct tx_counts *tx_counts) { + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; + BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + + for (idy = 0; idy < mi_height; idy += bh) + for (idx = 0; idx < mi_width; idx += bh) + update_tx_counts(cm, xd, mbmi, plane_bsize, max_tx_size, idy, idx, + max_tx_size, ctx, tx_counts); +} +#endif + #ifdef __cplusplus } // extern "C" #endif
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c index fdcb967..241b9aa 100644 --- a/vp10/common/reconinter.c +++ b/vp10/common/reconinter.c
@@ -64,9 +64,9 @@ } void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, - int bw, int bh, - int x, int y, int w, int h, - int mi_x, int mi_y) { + int bw, int bh, + int x, int y, int w, int h, + int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; const MODE_INFO *mi = xd->mi[0]; const int is_compound = has_second_ref(&mi->mbmi); @@ -264,3 +264,227 @@ } } } + +#if CONFIG_SUPERTX +static const uint8_t mask_8[8] = { + 64, 64, 62, 52, 12, 2, 0, 0 +}; + +static const uint8_t mask_16[16] = { + 63, 62, 60, 58, 55, 50, 43, 36, 28, 21, 14, 9, 6, 4, 2, 1 +}; + +static const uint8_t mask_32[32] = { + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 61, 57, 52, 45, 36, + 28, 19, 12, 7, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static const uint8_t mask_8_uv[8] = { + 64, 64, 62, 52, 12, 2, 0, 0 +}; + +static const uint8_t mask_16_uv[16] = { + 64, 64, 64, 64, 61, 53, 45, 36, 28, 19, 11, 3, 0, 0, 0, 0 +}; + +static const uint8_t mask_32_uv[32] = { + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 60, 54, 46, 36, + 28, 18, 10, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static void generate_1dmask(int length, uint8_t *mask, int plane) { + switch (length) { + case 8: + memcpy(mask, plane ? mask_8_uv : mask_8, length); + break; + case 16: + memcpy(mask, plane ? mask_16_uv : mask_16, length); + break; + case 32: + memcpy(mask, plane ? mask_32_uv : mask_32, length); + break; + default: + assert(0); + } +} + + +void vp10_build_masked_inter_predictor_complex( + MACROBLOCKD *xd, + uint8_t *dst, int dst_stride, uint8_t *dst2, int dst2_stride, + const struct macroblockd_plane *pd, int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + PARTITION_TYPE partition, int plane) { + int i, j; + uint8_t mask[MAXTXLEN]; + int top_w = 4 << b_width_log2_lookup[top_bsize], + top_h = 4 << b_height_log2_lookup[top_bsize]; + int w = 4 << b_width_log2_lookup[bsize], h = 4 << b_height_log2_lookup[bsize]; + int w_offset = (mi_col - mi_col_ori) << 3, + h_offset = (mi_row - mi_row_ori) << 3; + +#if CONFIG_VP9_HIGHBITDEPTH + uint16_t *dst16= CONVERT_TO_SHORTPTR(dst); + uint16_t *dst216 = CONVERT_TO_SHORTPTR(dst2); + int b_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; +#endif // CONFIG_VP9_HIGHBITDEPTH + + top_w >>= pd->subsampling_x; + top_h >>= pd->subsampling_y; + w >>= pd->subsampling_x; + h >>= pd->subsampling_y; + w_offset >>= pd->subsampling_x; + h_offset >>= pd->subsampling_y; + + switch (partition) { + case PARTITION_HORZ: + { +#if CONFIG_VP9_HIGHBITDEPTH + if (b_hdb) { + uint16_t *dst_tmp = dst16 + h_offset * dst_stride; + uint16_t *dst2_tmp = dst216 + h_offset * dst2_stride; + generate_1dmask(h, mask + h_offset, + plane && xd->plane[plane].subsampling_y); + + for (i = h_offset; i < h_offset + h; i++) { + for (j = 0; j < top_w; j++) { + const int m = mask[i]; assert(m >= 0 && m <= 64); + if (m == 64) + continue; + + if (m == 0) + dst_tmp[j] = dst2_tmp[j]; + else + dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; + } + dst_tmp += dst_stride; + dst2_tmp += dst2_stride; + } + + for (; i < top_h; i ++) { + memcpy(dst_tmp, dst2_tmp, top_w * sizeof(uint16_t)); + dst_tmp += dst_stride; + dst2_tmp += dst2_stride; + } + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + uint8_t *dst_tmp = dst + h_offset * dst_stride; + uint8_t *dst2_tmp = dst2 + h_offset * dst2_stride; + generate_1dmask(h, mask + h_offset, + plane && xd->plane[plane].subsampling_y); + + for (i = h_offset; i < h_offset + h; i++) { + for (j = 0; j < top_w; j++) { + const int m = mask[i]; assert(m >= 0 && m <= 64); + if (m == 64) + continue; + + if (m == 0) + dst_tmp[j] = dst2_tmp[j]; + else + dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; + } + dst_tmp += dst_stride; + dst2_tmp += dst2_stride; + } + + for (; i < top_h; i ++) { + memcpy(dst_tmp, dst2_tmp, top_w * sizeof(uint8_t)); + dst_tmp += dst_stride; + dst2_tmp += dst2_stride; + } +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + } + + break; + case PARTITION_VERT: + { +#if CONFIG_VP9_HIGHBITDEPTH + if (b_hdb) { + uint16_t *dst_tmp = dst16; + uint16_t *dst2_tmp = dst216; + generate_1dmask(w, mask + w_offset, + plane && xd->plane[plane].subsampling_x); + + for (i = 0; i < top_h; i++) { + for (j = w_offset; j < w_offset + w; j++) { + const int m = mask[j]; assert(m >= 0 && m <= 64); + if (m == 64) + continue; + + if (m == 0) + dst_tmp[j] = dst2_tmp[j]; + else + dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; + } + memcpy(dst_tmp + j, dst2_tmp + j, + (top_w - w_offset - w) * sizeof(uint16_t)); + dst_tmp += dst_stride; + dst2_tmp += dst2_stride; + } + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + uint8_t *dst_tmp = dst; + uint8_t *dst2_tmp = dst2; + generate_1dmask(w, mask + w_offset, + plane && xd->plane[plane].subsampling_x); + + for (i = 0; i < top_h; i++) { + for (j = w_offset; j < w_offset + w; j++) { + const int m = mask[j]; assert(m >= 0 && m <= 64); + if (m == 64) + continue; + + if (m == 0) + dst_tmp[j] = dst2_tmp[j]; + else + dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; + } + memcpy(dst_tmp + j, dst2_tmp + j, + (top_w - w_offset - w) * sizeof(uint8_t)); + dst_tmp += dst_stride; + dst2_tmp += dst2_stride; + } +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + } + break; + default: + assert(0); + } + (void) xd; +} + +void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int block) { + // Prediction function used in supertx: + // Use the mv at current block (which is less than 8x8) + // to get prediction of a block located at (mi_row, mi_col) at size of bsize + // bsize can be larger than 8x8. + // block (0-3): the sub8x8 location of current block + int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; + + // For sub8x8 uv: + // Skip uv prediction in supertx except the first block (block = 0) + int max_plane = block ? 1 : MAX_MB_PLANE; + + for (plane = 0; plane < max_plane; plane++) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + build_inter_predictors(xd, plane, block, bw, bh, + 0, 0, bw, bh, + mi_x, mi_y); + } +} +#endif // CONFIG_SUPERTX
diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h index 5678f47..bc2df9e 100644 --- a/vp10/common/reconinter.h +++ b/vp10/common/reconinter.h
@@ -28,9 +28,22 @@ int w, int h, int ref, const InterpKernel *kernel, int xs, int ys) { +#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS + if (kernel[0][SUBPEL_TAPS / 2 - 1] == 128) { + // Interpolating filter + sf->predict[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); + } else { + sf->predict_ni[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); + } +#else sf->predict[subpel_x != 0][subpel_y != 0][ref]( src, src_stride, dst, dst_stride, kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); +#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS } #if CONFIG_VP9_HIGHBITDEPTH @@ -42,9 +55,22 @@ int w, int h, int ref, const InterpKernel *kernel, int xs, int ys, int bd) { +#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS + if (kernel[0][SUBPEL_TAPS / 2 - 1] == 128) { + // Interpolating filter + sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); + } else { + sf->highbd_predict_ni[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); + } +#else sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( src, src_stride, dst, dst_stride, kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); +#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -127,25 +153,39 @@ } void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, - int bw, int bh, - int x, int y, int w, int h, - int mi_x, int mi_y); + int bw, int bh, + int x, int y, int w, int h, + int mi_x, int mi_y); void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, int i, int ir, int ic, int mi_row, int mi_col); void vp10_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize); - -void vp10_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize, int plane); - -void vp10_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); +void vp10_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize, int plane); + +void vp10_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize); + void vp10_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize); + BLOCK_SIZE bsize); + +#if CONFIG_SUPERTX +void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int block); +struct macroblockd_plane; +void vp10_build_masked_inter_predictor_complex( + MACROBLOCKD *xd, + uint8_t *dst, int dst_stride, uint8_t *dst2, int dst2_stride, + const struct macroblockd_plane *pd, int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + PARTITION_TYPE partition, int plane); + +#endif // CONFIG_SUPERTX void vp10_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, @@ -192,7 +232,6 @@ void vp10_setup_pre_planes(MACROBLOCKD *xd, int idx, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, const struct scale_factors *sf); - #ifdef __cplusplus } // extern "C" #endif
diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c index e9e3949..4be5394 100644 --- a/vp10/common/reconintra.c +++ b/vp10/common/reconintra.c
@@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <math.h> + #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" @@ -21,7 +23,6 @@ #include "vp10/common/reconintra.h" #include "vp10/common/onyxc_int.h" -#if CONFIG_MISC_FIXES enum { NEED_LEFT = 1 << 1, NEED_ABOVE = 1 << 2, @@ -42,28 +43,7 @@ NEED_ABOVE | NEED_ABOVERIGHT, // D63 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // TM }; -#else -enum { - NEED_LEFT = 1 << 1, - NEED_ABOVE = 1 << 2, - NEED_ABOVERIGHT = 1 << 3, -}; -static const uint8_t extend_modes[INTRA_MODES] = { - NEED_ABOVE | NEED_LEFT, // DC - NEED_ABOVE, // V - NEED_LEFT, // H - NEED_ABOVERIGHT, // D45 - NEED_LEFT | NEED_ABOVE, // D135 - NEED_LEFT | NEED_ABOVE, // D117 - NEED_LEFT | NEED_ABOVE, // D153 - NEED_LEFT, // D207 - NEED_ABOVERIGHT, // D63 - NEED_LEFT | NEED_ABOVE, // TM -}; -#endif - -#if CONFIG_MISC_FIXES static const uint8_t orders_64x64[1] = { 0 }; static const uint8_t orders_64x32[2] = { 0, 1 }; static const uint8_t orders_32x64[2] = { 0, 1 }; @@ -123,11 +103,12 @@ static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col, int right_available, TX_SIZE txsz, int y, int x, int ss_x) { + const int wl = mi_width_log2_lookup[bsize]; + const int w = VPXMAX(num_4x4_blocks_wide_lookup[bsize] >> ss_x, 1); + const int step = 1 << txsz; + if (y == 0) { - int wl = mi_width_log2_lookup[bsize]; - int hl = mi_height_log2_lookup[bsize]; - int w = 1 << (wl + 1 - ss_x); - int step = 1 << txsz; + const int hl = mi_height_log2_lookup[bsize]; const uint8_t *order = orders[bsize]; int my_order, tr_order; @@ -148,10 +129,6 @@ return my_order > tr_order && right_available; } else { - int wl = mi_width_log2_lookup[bsize]; - int w = 1 << (wl + 1 - ss_x); - int step = 1 << txsz; - return x + step < w; } } @@ -160,10 +137,10 @@ int bottom_available, TX_SIZE txsz, int y, int x, int ss_y) { if (x == 0) { - int wl = mi_width_log2_lookup[bsize]; - int hl = mi_height_log2_lookup[bsize]; - int h = 1 << (hl + 1 - ss_y); - int step = 1 << txsz; + const int wl = mi_width_log2_lookup[bsize]; + const int hl = mi_height_log2_lookup[bsize]; + const int h = 1 << (hl + 1 - ss_y); + const int step = 1 << txsz; const uint8_t *order = orders[bsize]; int my_order, bl_order; @@ -188,7 +165,6 @@ return 0; } } -#endif typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left); @@ -216,15 +192,9 @@ INIT_ALL_SIZES(pred[V_PRED], v); INIT_ALL_SIZES(pred[H_PRED], h); -#if CONFIG_MISC_FIXES INIT_ALL_SIZES(pred[D207_PRED], d207e); INIT_ALL_SIZES(pred[D45_PRED], d45e); INIT_ALL_SIZES(pred[D63_PRED], d63e); -#else - INIT_ALL_SIZES(pred[D207_PRED], d207); - INIT_ALL_SIZES(pred[D45_PRED], d45); - INIT_ALL_SIZES(pred[D63_PRED], d63); -#endif INIT_ALL_SIZES(pred[D117_PRED], d117); INIT_ALL_SIZES(pred[D135_PRED], d135); INIT_ALL_SIZES(pred[D153_PRED], d153); @@ -238,15 +208,9 @@ #if CONFIG_VP9_HIGHBITDEPTH INIT_ALL_SIZES(pred_high[V_PRED], highbd_v); INIT_ALL_SIZES(pred_high[H_PRED], highbd_h); -#if CONFIG_MISC_FIXES INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207e); INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45e); - INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63); -#else - INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207); - INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45); - INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63); -#endif + INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63e); INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117); INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135); INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153); @@ -261,12 +225,598 @@ #undef intra_pred_allsizes } -#if CONFIG_MISC_FIXES static INLINE void memset16(uint16_t *dst, int val, int n) { while (n--) *dst++ = val; } -#endif + +#if CONFIG_EXT_INTRA +#define PI 3.14159265 +#define FILTER_INTRA_PREC_BITS 10 +#define FILTER_INTRA_ROUND_VAL 511 + +static const uint8_t ext_intra_extend_modes[FILTER_INTRA_MODES] = { + NEED_LEFT | NEED_ABOVE, // FILTER_DC + NEED_LEFT | NEED_ABOVE, // FILTER_V + NEED_LEFT | NEED_ABOVE, // FILTER_H + NEED_LEFT | NEED_ABOVE, // FILTER_D45 + NEED_LEFT | NEED_ABOVE, // FILTER_D135 + NEED_LEFT | NEED_ABOVE, // FILTER_D117 + NEED_LEFT | NEED_ABOVE, // FILTER_D153 + NEED_LEFT | NEED_ABOVE, // FILTER_D207 + NEED_LEFT | NEED_ABOVE, // FILTER_D63 + NEED_LEFT | NEED_ABOVE, // FILTER_TM +}; + +// Directional prediction, zone 1: 0 < angle < 90 +static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left, + int dx, int dy) { + int r, c, x, y, base, shift, val; + + (void)left; + (void)dy; + assert(dy == 1); + assert(dx < 0); + + for (r = 0; r < bs; ++r) { + y = r + 1; + for (c = 0; c < bs; ++c) { + x = c * 256 - y * dx; + base = x >> 8; + shift = x - base * 256; + if (base < 2 * bs - 1) { + val = + (above[base] * (256 - shift) + above[base + 1] * shift + 128) >> 8; + dst[c] = clip_pixel(val); + } else { + dst[c] = above[2 * bs - 1]; + } + } + dst += stride; + } +} + +// Directional prediction, zone 2: 90 < angle < 180 +static void dr_prediction_z2(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left, + int dx, int dy) { + int r, c, x, y, val1, val2, shift, val, base; + + assert(dx > 0); + assert(dy > 0); + + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + y = r + 1; + x = c * 256 - y * dx; + if (x >= -256) { + if (x <= 0) { + val1 = above[-1]; + val2 = above[0]; + shift = x + 256; + } else { + base = x >> 8; + val1 = above[base]; + val2 = above[base + 1]; + shift = x - base * 256; + } + } else { + x = c + 1; + y = r * 256 - x * dy; + base = y >> 8; + if (base >= 0) { + val1 = left[base]; + val2 = left[base + 1]; + shift = y - base * 256; + } else { + val1 = val2 = left[0]; + shift = 0; + } + } + val = (val1 * (256 - shift) + val2 * shift + 128) >> 8; + dst[c] = clip_pixel(val); + } + dst += stride; + } +} + +// Directional prediction, zone 3: 180 < angle < 270 +static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left, + int dx, int dy) { + int r, c, x, y, base, shift, val; + + (void)above; + (void)dx; + assert(dx == 1); + assert(dy < 0); + + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + x = c + 1; + y = r * 256 - x * dy; + base = y >> 8; + shift = y - base * 256; + if (base < 2 * bs - 1) { + val = + (left[base] * (256 - shift) + left[base + 1] * shift + 128) >> 8; + dst[c] = clip_pixel(val); + } else { + dst[c] = left[bs - 1]; + } + } + dst += stride; + } +} + +static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, + const uint8_t *above, const uint8_t *left, int angle) { + double t = 0; + int dx, dy; + int bs = 4 << tx_size; + + if (angle != 90 && angle != 180) + t = tan(angle * PI / 180.0); + if (angle > 0 && angle < 90) { + dx = -((int)(256 / t)); + dy = 1; + dr_prediction_z1(dst, stride, bs, above, left, dx, dy); + } else if (angle > 90 && angle < 180) { + t = -t; + dx = (int)(256 / t); + dy = (int)(256 * t); + dr_prediction_z2(dst, stride, bs, above, left, dx, dy); + } else if (angle > 180 && angle < 270) { + dx = 1; + dy = -((int)(256 * t)); + dr_prediction_z3(dst, stride, bs, above, left, dx, dy); + } else if (angle == 90) { + pred[V_PRED][tx_size](dst, stride, above, left); + } else if (angle == 180) { + pred[H_PRED][tx_size](dst, stride, above, left); + } +} + +static int filter_intra_taps_4[TX_SIZES][INTRA_MODES][4] = { + { + {735, 881, -537, -54}, + {1005, 519, -488, -11}, + {383, 990, -343, -6}, + {442, 805, -542, 319}, + {658, 616, -133, -116}, + {875, 442, -141, -151}, + {386, 741, -23, -80}, + {390, 1027, -446, 51}, + {679, 606, -523, 262}, + {903, 922, -778, -23}, + }, + { + {648, 803, -444, 16}, + {972, 620, -576, 7}, + {561, 967, -499, -5}, + {585, 762, -468, 144}, + {596, 619, -182, -9}, + {895, 459, -176, -153}, + {557, 722, -126, -129}, + {601, 839, -523, 105}, + {562, 709, -499, 251}, + {803, 872, -695, 43}, + }, + { + {423, 728, -347, 111}, + {963, 685, -665, 23}, + {281, 1024, -480, 216}, + {640, 596, -437, 78}, + {429, 669, -259, 99}, + {740, 646, -415, 23}, + {568, 771, -346, 40}, + {404, 833, -486, 209}, + {398, 712, -423, 307}, + {939, 935, -887, 17}, + }, + { + {477, 737, -393, 150}, + {881, 630, -546, 67}, + {506, 984, -443, -20}, + {114, 459, -270, 528}, + {433, 528, 14, 3}, + {837, 470, -301, -30}, + {181, 777, 89, -107}, + {-29, 716, -232, 259}, + {589, 646, -495, 255}, + {740, 884, -728, 77}, + }, +}; + +static void filter_intra_predictors_4tap(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, + const uint8_t *left, + int mode) { + int k, r, c; + int pred[33][65]; + int mean, ipred; + const TX_SIZE tx_size = (bs == 32) ? TX_32X32 : + ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4))); + const int c0 = filter_intra_taps_4[tx_size][mode][0]; + const int c1 = filter_intra_taps_4[tx_size][mode][1]; + const int c2 = filter_intra_taps_4[tx_size][mode][2]; + const int c3 = filter_intra_taps_4[tx_size][mode][3]; + + k = 0; + mean = 0; + while (k < bs) { + mean = mean + (int)left[k]; + mean = mean + (int)above[k]; + k++; + } + mean = (mean + bs) / (2 * bs); + + for (r = 0; r < bs; ++r) + pred[r + 1][0] = (int)left[r] - mean; + + for (c = 0; c < 2 * bs + 1; ++c) + pred[0][c] = (int)above[c - 1] - mean; + + for (r = 1; r < bs + 1; ++r) + for (c = 1; c < 2 * bs + 1 - r; ++c) { + ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] + + c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1]; + pred[r][c] = ipred < 0 ? + -((-ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS) : + ((ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS); + } + + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + ipred = pred[r + 1][c + 1] + mean; + dst[c] = clip_pixel(ipred); + } + dst += stride; + } +} + +static void dc_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED); +} + +static void v_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED); +} + +static void h_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED); +} + +static void d45_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED); +} + +static void d135_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED); +} + +static void d117_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED); +} + +static void d153_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED); +} + +static void d207_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED); +} + +static void d63_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED); +} + +static void tm_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED); +} + +static void (*filter_intra_predictors[EXT_INTRA_MODES])(uint8_t *dst, + ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) = { + dc_filter_predictor, v_filter_predictor, h_filter_predictor, + d45_filter_predictor, d135_filter_predictor, d117_filter_predictor, + d153_filter_predictor, d207_filter_predictor, d63_filter_predictor, + tm_filter_predictor, +}; + +#if CONFIG_VP9_HIGHBITDEPTH +// Directional prediction, zone 1: 0 < angle < 90 +static void highbd_dr_prediction_z1(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, const uint16_t *left, + int dx, int dy, int bd) { + int r, c, x, y, base, shift, val; + + (void)left; + (void)dy; + assert(dy == 1); + assert(dx < 0); + + for (r = 0; r < bs; ++r) { + y = r + 1; + for (c = 0; c < bs; ++c) { + x = c * 256 - y * dx; + base = x >> 8; + shift = x - base * 256; + if (base < 2 * bs - 1) { + val = + (above[base] * (256 - shift) + above[base + 1] * shift + 128) >> 8; + dst[c] = clip_pixel_highbd(val, bd); + } else { + dst[c] = above[2 * bs - 1]; + } + } + dst += stride; + } +} + +// Directional prediction, zone 2: 90 < angle < 180 +static void highbd_dr_prediction_z2(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, const uint16_t *left, + int dx, int dy, int bd) { + int r, c, x, y, val1, val2, shift, val, base; + + assert(dx > 0); + assert(dy > 0); + + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + y = r + 1; + x = c * 256 - y * dx; + if (x >= -256) { + if (x <= 0) { + val1 = above[-1]; + val2 = above[0]; + shift = x + 256; + } else { + base = x >> 8; + val1 = above[base]; + val2 = above[base + 1]; + shift = x - base * 256; + } + } else { + x = c + 1; + y = r * 256 - x * dy; + base = y >> 8; + if (base >= 0) { + val1 = left[base]; + val2 = left[base + 1]; + shift = y - base * 256; + } else { + val1 = val2 = left[0]; + shift = 0; + } + } + val = (val1 * (256 - shift) + val2 * shift + 128) >> 8; + dst[c] = clip_pixel_highbd(val, bd); + } + dst += stride; + } +} + +// Directional prediction, zone 3: 180 < angle < 270 +static void highbd_dr_prediction_z3(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, const uint16_t *left, + int dx, int dy, int bd) { + int r, c, x, y, base, shift, val; + + (void)above; + (void)dx; + assert(dx == 1); + assert(dy < 0); + + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + x = c + 1; + y = r * 256 - x * dy; + base = y >> 8; + shift = y - base * 256; + if (base < 2 * bs - 1) { + val = + (left[base] * (256 - shift) + left[base + 1] * shift + 128) >> 8; + dst[c] = clip_pixel_highbd(val, bd); + } else { + dst[c] = left[bs - 1]; + } + } + dst += stride; + } +} + +static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r; + (void) left; + (void) bd; + for (r = 0; r < bs; r++) { + memcpy(dst, above, bs * sizeof(uint16_t)); + dst += stride; + } +} + +static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r; + (void) above; + (void) bd; + for (r = 0; r < bs; r++) { + vpx_memset16(dst, left[r], bs); + dst += stride; + } +} + +static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, const uint16_t *left, + int angle, int bd) { + double t = 0; + int dx, dy; + + if (angle != 90 && angle != 180) + t = tan(angle * PI / 180.0); + if (angle > 0 && angle < 90) { + dx = -((int)(256 / t)); + dy = 1; + highbd_dr_prediction_z1(dst, stride, bs, above, left, dx, dy, bd); + } else if (angle > 90 && angle < 180) { + t = -t; + dx = (int)(256 / t); + dy = (int)(256 * t); + highbd_dr_prediction_z2(dst, stride, bs, above, left, dx, dy, bd); + } else if (angle > 180 && angle < 270) { + dx = 1; + dy = -((int)(256 * t)); + highbd_dr_prediction_z3(dst, stride, bs, above, left, dx, dy, bd); + } else if (angle == 90) { + highbd_v_predictor(dst, stride, bs, above, left, bd); + } else if (angle == 180) { + highbd_h_predictor(dst, stride, bs, above, left, bd); + } +} + +static void highbd_filter_intra_predictors_4tap(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int mode, + int bd) { + int k, r, c; + int pred[33][65]; + int mean, ipred; + const TX_SIZE tx_size = (bs == 32) ? TX_32X32 : + ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4))); + const int c0 = filter_intra_taps_4[tx_size][mode][0]; + const int c1 = filter_intra_taps_4[tx_size][mode][1]; + const int c2 = filter_intra_taps_4[tx_size][mode][2]; + const int c3 = filter_intra_taps_4[tx_size][mode][3]; + + k = 0; + mean = 0; + while (k < bs) { + mean = mean + (int)left[k]; + mean = mean + (int)above[k]; + k++; + } + mean = (mean + bs) / (2 * bs); + + for (r = 0; r < bs; ++r) + pred[r + 1][0] = (int)left[r] - mean; + + for (c = 0; c < 2 * bs + 1; ++c) + pred[0][c] = (int)above[c - 1] - mean; + + for (r = 1; r < bs + 1; ++r) + for (c = 1; c < 2 * bs + 1 - r; ++c) { + ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] + + c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1]; + pred[r][c] = ipred < 0 ? + -((-ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS) : + ((ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS); + } + + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + ipred = pred[r + 1][c + 1] + mean; + dst[c] = clip_pixel_highbd(ipred, bd); + } + dst += stride; + } +} + +static void highbd_dc_filter_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED, + bd); +} + +static void highbd_v_filter_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED, + bd); +} + +static void highbd_h_filter_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED, + bd); +} + +static void highbd_d45_filter_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED, + bd); +} + +static void highbd_d135_filter_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED, + bd); +} + +static void highbd_d117_filter_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED, + bd); +} + +static void highbd_d153_filter_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED, + bd); +} + +static void highbd_d207_filter_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED, + bd); +} + +static void highbd_d63_filter_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED, + bd); +} + +static void highbd_tm_filter_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED, + bd); +} + +static void (*highbd_filter_intra_predictors[EXT_INTRA_MODES])(uint16_t *dst, + ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, + int bd) = { + highbd_dc_filter_predictor, highbd_v_filter_predictor, + highbd_h_filter_predictor, highbd_d45_filter_predictor, + highbd_d135_filter_predictor, highbd_d117_filter_predictor, + highbd_d153_filter_predictor, highbd_d207_filter_predictor, + highbd_d63_filter_predictor, highbd_tm_filter_predictor, +}; +#endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_EXT_INTRA #if CONFIG_VP9_HIGHBITDEPTH static void build_intra_predictors_high(const MACROBLOCKD *xd, @@ -276,56 +826,71 @@ int dst_stride, PREDICTION_MODE mode, TX_SIZE tx_size, -#if CONFIG_MISC_FIXES int n_top_px, int n_topright_px, int n_left_px, int n_bottomleft_px, -#else - int up_available, - int left_available, - int right_available, -#endif - int x, int y, - int plane, int bd) { + int plane) { int i; uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); -#if CONFIG_MISC_FIXES - DECLARE_ALIGNED(16, uint16_t, left_col[32]); -#else DECLARE_ALIGNED(16, uint16_t, left_col[64]); -#endif DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]); uint16_t *above_row = above_data + 16; const uint16_t *const_above_row = above_row; const int bs = 4 << tx_size; -#if CONFIG_MISC_FIXES + int need_left = extend_modes[mode] & NEED_LEFT; + int need_above = extend_modes[mode] & NEED_ABOVE; const uint16_t *above_ref = ref - ref_stride; -#else - int frame_width, frame_height; - int x0, y0; - const struct macroblockd_plane *const pd = &xd->plane[plane]; -#endif - const int need_left = extend_modes[mode] & NEED_LEFT; - const int need_above = extend_modes[mode] & NEED_ABOVE; - const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT; - int base = 128 << (bd - 8); + int base = 128 << (xd->bd - 8); // 127 127 127 .. 127 127 127 127 127 127 // 129 A B .. Y Z // 129 C D .. W X // 129 E F .. U V // 129 G H .. S T T T T T -#if CONFIG_MISC_FIXES - (void) x; - (void) y; +#if CONFIG_EXT_INTRA + const EXT_INTRA_MODE_INFO *ext_intra_mode_info = + &xd->mi[0]->mbmi.ext_intra_mode_info; + const EXT_INTRA_MODE ext_intra_mode = + ext_intra_mode_info->ext_intra_mode[plane != 0]; + int p_angle = 0; + + if (mode != DC_PRED && mode != TM_PRED && + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { + p_angle = mode_to_angle_map[mode] + + xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP; + if (p_angle <= 90) + need_above = 1, need_left = 0; + else if (p_angle < 180) + need_above = 1, need_left = 1; + else + need_above = 0, need_left = 1; + } + + if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) { + EXT_INTRA_MODE ext_intra_mode = + ext_intra_mode_info->ext_intra_mode[plane != 0]; + need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT; + need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE; + } +#endif // CONFIG_EXT_INTRA + (void) plane; - (void) need_left; - (void) need_above; - (void) need_aboveright; // NEED_LEFT - if (extend_modes[mode] & NEED_LEFT) { + if (need_left) { +#if CONFIG_EXT_INTRA + int need_bottom; + if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) { + need_bottom = 0; + } else if (mode != DC_PRED && mode != TM_PRED && + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { + need_bottom = p_angle > 180; + } else { + need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT); + } +#else const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT); +#endif // CONFIG_EXT_INTRA i = 0; if (n_left_px > 0) { for (; i < n_left_px; i++) @@ -343,8 +908,20 @@ } // NEED_ABOVE - if (extend_modes[mode] & NEED_ABOVE) { + if (need_above) { +#if CONFIG_EXT_INTRA + int need_right; + if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) { + need_right = 1; + } else if (mode != DC_PRED && mode != TM_PRED && + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { + need_right = p_angle < 90; + } else { + need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT); + } +#else const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT); +#endif // CONFIG_EXT_INTRA if (n_top_px > 0) { memcpy(above_row, above_ref, n_top_px * 2); i = n_top_px; @@ -360,142 +937,41 @@ } } - if (extend_modes[mode] & NEED_ABOVELEFT) { +#if CONFIG_EXT_INTRA + if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] || + (extend_modes[mode] & NEED_ABOVELEFT) || + (mode != DC_PRED && mode != TM_PRED && + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) { above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1; } #else - // Get current frame pointer, width and height. - if (plane == 0) { - frame_width = xd->cur_buf->y_width; - frame_height = xd->cur_buf->y_height; - } else { - frame_width = xd->cur_buf->uv_width; - frame_height = xd->cur_buf->uv_height; + if ((extend_modes[mode] & NEED_ABOVELEFT)) { + above_row[-1] = n_top_px > 0 ? + (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1; + } +#endif // CONFIG_EXT_INTRA + +#if CONFIG_EXT_INTRA + if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) { + highbd_filter_intra_predictors[ext_intra_mode](dst, dst_stride, bs, + const_above_row, left_col, xd->bd); + return; } - // Get block position in current frame. - x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; - y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - - // NEED_LEFT - if (need_left) { - if (left_available) { - if (xd->mb_to_bottom_edge < 0) { - /* slower path if the block needs border extension */ - if (y0 + bs <= frame_height) { - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; - } else { - const int extend_bottom = frame_height - y0; - for (i = 0; i < extend_bottom; ++i) - left_col[i] = ref[i * ref_stride - 1]; - for (; i < bs; ++i) - left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; - } - } else { - /* faster path if the block does not need extension */ - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; - } - } else { - // TODO(Peter): this value should probably change for high bitdepth - vpx_memset16(left_col, base + 1, bs); - } + if (mode != DC_PRED && mode != TM_PRED && + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { + highbd_dr_predictor(dst, dst_stride, bs, const_above_row, left_col, + p_angle, xd->bd); + return; } - - // NEED_ABOVE - if (need_above) { - if (up_available) { - const uint16_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + bs <= frame_width) { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r * sizeof(above_row[0])); - vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width); - } - } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - } - } - above_row[-1] = left_available ? above_ref[-1] : (base + 1); - } else { - vpx_memset16(above_row, base - 1, bs); - above_row[-1] = base - 1; - } - } - - // NEED_ABOVERIGHT - if (need_aboveright) { - if (up_available) { - const uint16_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + 2 * bs <= frame_width) { - if (right_available && bs == 4) { - memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0])); - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - vpx_memset16(above_row + bs, above_row[bs - 1], bs); - } - } else if (x0 + bs <= frame_width) { - const int r = frame_width - x0; - if (right_available && bs == 4) { - memcpy(above_row, above_ref, r * sizeof(above_row[0])); - vpx_memset16(above_row + r, above_row[r - 1], - x0 + 2 * bs - frame_width); - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - vpx_memset16(above_row + bs, above_row[bs - 1], bs); - } - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r * sizeof(above_row[0])); - vpx_memset16(above_row + r, above_row[r - 1], - x0 + 2 * bs - frame_width); - } - // TODO(Peter) this value should probably change for high bitdepth - above_row[-1] = left_available ? above_ref[-1] : (base + 1); - } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - if (bs == 4 && right_available) - memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0])); - else - vpx_memset16(above_row + bs, above_row[bs - 1], bs); - // TODO(Peter): this value should probably change for high bitdepth - above_row[-1] = left_available ? above_ref[-1] : (base + 1); - } - } - } else { - vpx_memset16(above_row, base - 1, bs * 2); - // TODO(Peter): this value should probably change for high bitdepth - above_row[-1] = base - 1; - } - } -#endif +#endif // CONFIG_EXT_INTRA // predict if (mode == DC_PRED) { -#if CONFIG_MISC_FIXES dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, const_above_row, left_col, xd->bd); -#else - dc_pred_high[left_available][up_available][tx_size](dst, dst_stride, - const_above_row, - left_col, xd->bd); -#endif } else { pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col, xd->bd); @@ -506,28 +982,44 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride, PREDICTION_MODE mode, TX_SIZE tx_size, -#if CONFIG_MISC_FIXES int n_top_px, int n_topright_px, int n_left_px, int n_bottomleft_px, -#else - int up_available, int left_available, - int right_available, -#endif - int x, int y, int plane) { + int plane) { int i; -#if CONFIG_MISC_FIXES DECLARE_ALIGNED(16, uint8_t, left_col[64]); const uint8_t *above_ref = ref - ref_stride; -#else - DECLARE_ALIGNED(16, uint8_t, left_col[32]); - int frame_width, frame_height; - int x0, y0; - const struct macroblockd_plane *const pd = &xd->plane[plane]; -#endif DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]); uint8_t *above_row = above_data + 16; const uint8_t *const_above_row = above_row; const int bs = 4 << tx_size; + int need_left = extend_modes[mode] & NEED_LEFT; + int need_above = extend_modes[mode] & NEED_ABOVE; +#if CONFIG_EXT_INTRA + const EXT_INTRA_MODE_INFO *ext_intra_mode_info = + &xd->mi[0]->mbmi.ext_intra_mode_info; + const EXT_INTRA_MODE ext_intra_mode = + ext_intra_mode_info->ext_intra_mode[plane != 0]; + int p_angle = 0; + + if (mode != DC_PRED && mode != TM_PRED && + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { + p_angle = mode_to_angle_map[mode] + + xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP; + if (p_angle <= 90) + need_above = 1, need_left = 0; + else if (p_angle < 180) + need_above = 1, need_left = 1; + else + need_above = 0, need_left = 1; + } + + if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) { + EXT_INTRA_MODE ext_intra_mode = + ext_intra_mode_info->ext_intra_mode[plane != 0]; + need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT; + need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE; + } +#endif // CONFIG_EXT_INTRA // 127 127 127 .. 127 127 127 127 127 127 // 129 A B .. Y Z @@ -536,34 +1028,28 @@ // 129 G H .. S T T T T T // .. -#if CONFIG_MISC_FIXES (void) xd; - (void) x; - (void) y; (void) plane; assert(n_top_px >= 0); assert(n_topright_px >= 0); assert(n_left_px >= 0); assert(n_bottomleft_px >= 0); -#else - // Get current frame pointer, width and height. - if (plane == 0) { - frame_width = xd->cur_buf->y_width; - frame_height = xd->cur_buf->y_height; - } else { - frame_width = xd->cur_buf->uv_width; - frame_height = xd->cur_buf->uv_height; - } - - // Get block position in current frame. - x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; - y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; -#endif // NEED_LEFT - if (extend_modes[mode] & NEED_LEFT) { -#if CONFIG_MISC_FIXES + if (need_left) { +#if CONFIG_EXT_INTRA + int need_bottom; + if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) { + need_bottom = 0; + } else if (mode != DC_PRED && mode != TM_PRED && + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { + need_bottom = p_angle > 180; + } else { + need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT); + } +#else const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT); +#endif // CONFIG_EXT_INTRA i = 0; if (n_left_px > 0) { for (; i < n_left_px; i++) @@ -578,35 +1064,23 @@ } else { memset(left_col, 129, bs << need_bottom); } -#else - if (left_available) { - if (xd->mb_to_bottom_edge < 0) { - /* slower path if the block needs border extension */ - if (y0 + bs <= frame_height) { - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; - } else { - const int extend_bottom = frame_height - y0; - for (i = 0; i < extend_bottom; ++i) - left_col[i] = ref[i * ref_stride - 1]; - for (; i < bs; ++i) - left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; - } - } else { - /* faster path if the block does not need extension */ - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; - } - } else { - memset(left_col, 129, bs); - } -#endif } // NEED_ABOVE - if (extend_modes[mode] & NEED_ABOVE) { -#if CONFIG_MISC_FIXES + if (need_above) { +#if CONFIG_EXT_INTRA + int need_right; + if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) { + need_right = 1; + } else if (mode != DC_PRED && mode != TM_PRED && + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { + need_right = p_angle < 90; + } else { + need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT); + } +#else const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT); +#endif // CONFIG_EXT_INTRA if (n_top_px > 0) { memcpy(above_row, above_ref, n_top_px); i = n_top_px; @@ -620,115 +1094,58 @@ } else { memset(above_row, 127, bs << need_right); } -#else - if (up_available) { - const uint8_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + bs <= frame_width) { - memcpy(above_row, above_ref, bs); - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r); - memset(above_row + r, above_row[r - 1], x0 + bs - frame_width); - } - } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs); - } - } - above_row[-1] = left_available ? above_ref[-1] : 129; - } else { - memset(above_row, 127, bs); - above_row[-1] = 127; - } -#endif } -#if CONFIG_MISC_FIXES - if (extend_modes[mode] & NEED_ABOVELEFT) { +#if CONFIG_EXT_INTRA + if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] || + (extend_modes[mode] & NEED_ABOVELEFT) || + (mode != DC_PRED && mode != TM_PRED && + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) { above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127; } #else - // NEED_ABOVERIGHT - if (extend_modes[mode] & NEED_ABOVERIGHT) { - if (up_available) { - const uint8_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + 2 * bs <= frame_width) { - if (right_available && bs == 4) { - memcpy(above_row, above_ref, 2 * bs); - } else { - memcpy(above_row, above_ref, bs); - memset(above_row + bs, above_row[bs - 1], bs); - } - } else if (x0 + bs <= frame_width) { - const int r = frame_width - x0; - if (right_available && bs == 4) { - memcpy(above_row, above_ref, r); - memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); - } else { - memcpy(above_row, above_ref, bs); - memset(above_row + bs, above_row[bs - 1], bs); - } - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r); - memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); - } - } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs); - if (bs == 4 && right_available) - memcpy(above_row + bs, above_ref + bs, bs); - else - memset(above_row + bs, above_row[bs - 1], bs); - } - } - above_row[-1] = left_available ? above_ref[-1] : 129; - } else { - memset(above_row, 127, bs * 2); - above_row[-1] = 127; - } + if ((extend_modes[mode] & NEED_ABOVELEFT)) { + above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127; } -#endif +#endif // CONFIG_EXT_INTRA + +#if CONFIG_EXT_INTRA + if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) { + filter_intra_predictors[ext_intra_mode](dst, dst_stride, bs, + const_above_row, left_col); + return; + } + + if (mode != DC_PRED && mode != TM_PRED && + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { + dr_predictor(dst, dst_stride, tx_size, const_above_row, left_col, p_angle); + return; + } +#endif // CONFIG_EXT_INTRA // predict if (mode == DC_PRED) { -#if CONFIG_MISC_FIXES dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, const_above_row, left_col); -#else - dc_pred[left_available][up_available][tx_size](dst, dst_stride, - const_above_row, left_col); -#endif } else { pred[mode][tx_size](dst, dst_stride, const_above_row, left_col); } } void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in, - TX_SIZE tx_size, PREDICTION_MODE mode, - const uint8_t *ref, int ref_stride, - uint8_t *dst, int dst_stride, - int aoff, int loff, int plane) { + TX_SIZE tx_size, PREDICTION_MODE mode, + const uint8_t *ref, int ref_stride, + uint8_t *dst, int dst_stride, + int aoff, int loff, int plane) { const int txw = (1 << tx_size); const int have_top = loff || xd->up_available; const int have_left = aoff || xd->left_available; const int x = aoff * 4; const int y = loff * 4; -#if CONFIG_MISC_FIXES const int bw = VPXMAX(2, 1 << bwl_in); const int bh = VPXMAX(2, 1 << bhl_in); - const int mi_row = -xd->mb_to_top_edge >> 6; - const int mi_col = -xd->mb_to_left_edge >> 6; + const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); + const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2); const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; const struct macroblockd_plane *const pd = &xd->plane[plane]; const int right_available = @@ -744,16 +1161,49 @@ const int wpx = 4 * bw; const int hpx = 4 * bh; const int txpx = 4 * txw; + // Distance between the right edge of this prediction block to + // the frame right edge + const int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + + (wpx - x - txpx); + // Distance between the bottom edge of this prediction block to + // the frame bottom edge + const int yd = (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + + (hpx - y - txpx); - int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + (wpx - x - txpx); - int yd = - (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + (hpx - y - txpx); + if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) { + const int bs = 4 * (1 << tx_size); + const int stride = 4 * (1 << bwl_in); + int r, c; + uint8_t *map = NULL; +#if CONFIG_VP9_HIGHBITDEPTH + uint16_t *palette = xd->mi[0]->mbmi.palette_mode_info.palette_colors + + plane * PALETTE_MAX_SIZE; #else - const int bw = (1 << bwl_in); - const int have_right = (aoff + txw) < bw; -#endif // CONFIG_MISC_FIXES + uint8_t *palette = xd->mi[0]->mbmi.palette_mode_info.palette_colors + + plane * PALETTE_MAX_SIZE; +#endif // CONFIG_VP9_HIGHBITDEPTH -#if CONFIG_MISC_FIXES + map = xd->plane[plane != 0].color_index_map; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); + for (r = 0; r < bs; ++r) + for (c = 0; c < bs; ++c) + dst16[r * dst_stride + c] = + palette[map[(r + y) * stride + c + x]]; + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + for (r = 0; r < bs; ++r) + for (c = 0; c < bs; ++c) + dst[r * dst_stride + c] = palette[map[(r + y) * stride + c + x]]; +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + return; + } + #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode, @@ -762,7 +1212,7 @@ have_top && have_right ? VPXMIN(txpx, xr) : 0, have_left ? VPXMIN(txpx, yd + txpx) : 0, have_bottom && have_left ? VPXMIN(txpx, yd) : 0, - x, y, plane, xd->bd); + plane); return; } #endif @@ -772,20 +1222,7 @@ have_top && have_right ? VPXMIN(txpx, xr) : 0, have_left ? VPXMIN(txpx, yd + txpx) : 0, have_bottom && have_left ? VPXMIN(txpx, yd) : 0, - x, y, plane); -#else // CONFIG_MISC_FIXES - (void) bhl_in; -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode, - tx_size, have_top, have_left, have_right, - x, y, plane, xd->bd); - return; - } -#endif - build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size, - have_top, have_left, have_right, x, y, plane); -#endif // CONFIG_MISC_FIXES + plane); } void vp10_init_intra_predictors(void) {
diff --git a/vp10/common/scale.c b/vp10/common/scale.c index ce6062c..65e14a9 100644 --- a/vp10/common/scale.c +++ b/vp10/common/scale.c
@@ -46,15 +46,15 @@ #if CONFIG_VP9_HIGHBITDEPTH void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h, - int use_highbd) { + int other_w, int other_h, + int this_w, int this_h, + int use_highbd) { #else void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h) { + int other_w, int other_h, + int this_w, int this_h) { #endif - if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) { + if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) { sf->x_scale_fp = REF_INVALID_SCALE; sf->y_scale_fp = REF_INVALID_SCALE; return; @@ -79,6 +79,16 @@ // applied in one direction only, and not at all for 0,0, seems to give the // best quality, but it may be worth trying an additional mode that does // do the filtering on full-pel. +#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS + sf->predict_ni[0][0][0] = vpx_convolve8_c; + sf->predict_ni[0][0][1] = vpx_convolve8_avg_c; + sf->predict_ni[0][1][0] = vpx_convolve8_c; + sf->predict_ni[0][1][1] = vpx_convolve8_avg_c; + sf->predict_ni[1][0][0] = vpx_convolve8_c; + sf->predict_ni[1][0][1] = vpx_convolve8_avg_c; + sf->predict_ni[1][1][0] = vpx_convolve8; + sf->predict_ni[1][1][1] = vpx_convolve8_avg; +#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS if (sf->x_step_q4 == 16) { if (sf->y_step_q4 == 16) { // No scaling in either direction. @@ -119,8 +129,19 @@ // 2D subpel motion always gets filtered in both directions sf->predict[1][1][0] = vpx_convolve8; sf->predict[1][1][1] = vpx_convolve8_avg; + #if CONFIG_VP9_HIGHBITDEPTH if (use_highbd) { +#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS + sf->highbd_predict_ni[0][0][0] = vpx_highbd_convolve8_c; + sf->highbd_predict_ni[0][0][1] = vpx_highbd_convolve8_avg_c; + sf->highbd_predict_ni[0][1][0] = vpx_highbd_convolve8_c; + sf->highbd_predict_ni[0][1][1] = vpx_highbd_convolve8_avg_c; + sf->highbd_predict_ni[1][0][0] = vpx_highbd_convolve8_c; + sf->highbd_predict_ni[1][0][1] = vpx_highbd_convolve8_avg_c; + sf->highbd_predict_ni[1][1][0] = vpx_highbd_convolve8; + sf->highbd_predict_ni[1][1][1] = vpx_highbd_convolve8_avg; +#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS if (sf->x_step_q4 == 16) { if (sf->y_step_q4 == 16) { // No scaling in either direction. @@ -162,5 +183,5 @@ sf->highbd_predict[1][1][0] = vpx_highbd_convolve8; sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg; } -#endif +#endif // CONFIG_VP9_HIGHBITDEPTH }
diff --git a/vp10/common/scale.h b/vp10/common/scale.h index 833f6c4..604b9d2 100644 --- a/vp10/common/scale.h +++ b/vp10/common/scale.h
@@ -34,7 +34,15 @@ convolve_fn_t predict[2][2][2]; // horiz, vert, avg #if CONFIG_VP9_HIGHBITDEPTH highbd_convolve_fn_t highbd_predict[2][2][2]; // horiz, vert, avg -#endif +#endif // CONFIG_VP9_HIGHBITDEPTH + +// Functions for non-interpolating filters (those that filter zero offsets) +#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS + convolve_fn_t predict_ni[2][2][2]; // horiz, vert, avg +#if CONFIG_VP9_HIGHBITDEPTH + highbd_convolve_fn_t highbd_predict_ni[2][2][2]; // horiz, vert, avg +#endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS }; MV32 vp10_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf); @@ -48,7 +56,7 @@ void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, int other_h, int this_w, int this_h); -#endif +#endif // CONFIG_VP9_HIGHBITDEPTH static INLINE int vp10_is_valid_scale(const struct scale_factors *sf) { return sf->x_scale_fp != REF_INVALID_SCALE &&
diff --git a/vp10/common/scan.c b/vp10/common/scan.c index 7217f6d..23a7b98 100644 --- a/vp10/common/scan.c +++ b/vp10/common/scan.c
@@ -702,7 +702,228 @@ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, }; -const scan_order vp10_scan_orders[TX_SIZES][TX_TYPES] = { +#if CONFIG_EXT_TX +const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { + { // TX_4X4 + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors}, + {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + }, { // TX_8X8 + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors}, + {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + }, { // TX_16X16 + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors}, + {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + }, { // TX_32X32 + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + } +}; + +const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES] = { + { // TX_4X4 + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + }, { // TX_8X8 + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + }, { // TX_16X16 + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, + }, { // TX_32X32 + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + } +}; + +#else // CONFIG_EXT_TX + +const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { { // TX_4X4 {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors}, @@ -714,14 +935,21 @@ {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors}, {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors} }, { // TX_16X16 - {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors}, {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors}, {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors} + {default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors} }, { // TX_32X32 - {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, - {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, - {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, - {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors}, } }; +#endif // CONFIG_EXT_TX
diff --git a/vp10/common/scan.h b/vp10/common/scan.h index f5a020f..aadae40 100644 --- a/vp10/common/scan.h +++ b/vp10/common/scan.h
@@ -30,7 +30,7 @@ } scan_order; extern const scan_order vp10_default_scan_orders[TX_SIZES]; -extern const scan_order vp10_scan_orders[TX_SIZES][TX_TYPES]; +extern const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES]; static INLINE int get_coef_context(const int16_t *neighbors, const uint8_t *token_cache, int c) { @@ -38,8 +38,31 @@ token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1; } -static INLINE const scan_order *get_scan(TX_SIZE tx_size, TX_TYPE tx_type) { - return &vp10_scan_orders[tx_size][tx_type]; +static INLINE const scan_order *get_intra_scan(TX_SIZE tx_size, + TX_TYPE tx_type) { + return &vp10_intra_scan_orders[tx_size][tx_type]; +} + +#if CONFIG_EXT_TX +extern const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES]; + +static INLINE const scan_order *get_inter_scan(TX_SIZE tx_size, + TX_TYPE tx_type) { + return &vp10_inter_scan_orders[tx_size][tx_type]; +} +#endif // CONFIG_EXT_TX + +static INLINE const scan_order *get_scan(TX_SIZE tx_size, + TX_TYPE tx_type, + int is_inter) { +#if CONFIG_EXT_TX + return + is_inter ? &vp10_inter_scan_orders[tx_size][tx_type] : + &vp10_intra_scan_orders[tx_size][tx_type]; +#else + (void) is_inter; + return &vp10_intra_scan_orders[tx_size][tx_type]; +#endif // CONFIG_EXT_TX } #ifdef __cplusplus
diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c index 0c7a1c2..a1f17e9 100644 --- a/vp10/common/thread_common.c +++ b/vp10/common/thread_common.c
@@ -366,6 +366,20 @@ for (j = 0; j < SWITCHABLE_FILTERS; j++) cm->counts.switchable_interp[i][j] += counts->switchable_interp[i][j]; +#if CONFIG_REF_MV + for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) + for (j = 0; j < 2; ++j) + cm->counts.newmv_mode[i][j] += counts->newmv_mode[i][j]; + + for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i) + for (j = 0; j < 2; ++j) + cm->counts.zeromv_mode[i][j] += counts->zeromv_mode[i][j]; + + for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) + for (j = 0; j < 2; ++j) + cm->counts.refmv_mode[i][j] += counts->refmv_mode[i][j]; +#endif + for (i = 0; i < INTER_MODE_CONTEXTS; i++) for (j = 0; j < INTER_MODES; j++) cm->counts.inter_mode[i][j] += counts->inter_mode[i][j]; @@ -379,13 +393,14 @@ cm->counts.comp_inter[i][j] += counts->comp_inter[i][j]; for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) + for (j = 0; j < (SINGLE_REFS - 1); j++) for (k = 0; k < 2; k++) - cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k]; + cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k]; for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) - cm->counts.comp_ref[i][j] += counts->comp_ref[i][j]; + for (j = 0; j < (COMP_REFS - 1); j++) + for (k = 0; k < 2; k++) + cm->counts.comp_ref[i][j][k] += counts->comp_ref[i][j][k]; for (i = 0; i < TX_SIZE_CONTEXTS; i++) { for (j = 0; j < TX_SIZES; j++) @@ -401,6 +416,12 @@ for (i = 0; i < TX_SIZES; i++) cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i]; +#if CONFIG_VAR_TX + for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) + for (j = 0; j < 2; ++j) + cm->counts.txfm_partition[i][j] += counts->txfm_partition[i][j]; +#endif + for (i = 0; i < SKIP_CONTEXTS; i++) for (j = 0; j < 2; j++) cm->counts.skip[i][j] += counts->skip[i][j]; @@ -435,6 +456,26 @@ comps->fp[i] += comps_t->fp[i]; } +#if CONFIG_EXT_TX + for (i = 0; i < EXT_TX_SIZES; i++) { + int s, k; + for (s = 1; s < EXT_TX_SETS_INTER; ++s) { + if (use_inter_ext_tx_for_txsize[s][i]) { + for (k = 0; k < TX_TYPES; k++) + cm->counts.inter_ext_tx[s][i][k] += counts->inter_ext_tx[s][i][k]; + } + } + for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { + if (use_intra_ext_tx_for_txsize[s][i]) { + int j; + for (j = 0; j < INTRA_MODES; ++j) + for (k = 0; k < TX_TYPES; k++) + cm->counts.intra_ext_tx[s][i][j][k] += + counts->intra_ext_tx[s][i][j][k]; + } + } + } +#else for (i = 0; i < EXT_TX_SIZES; i++) { int j; for (j = 0; j < TX_TYPES; ++j) @@ -445,8 +486,17 @@ for (k = 0; k < TX_TYPES; k++) cm->counts.inter_ext_tx[i][k] += counts->inter_ext_tx[i][k]; } +#endif // CONFIG_EXT_TX -#if CONFIG_MISC_FIXES +#if CONFIG_SUPERTX + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; i++) + for (j = 0; j < TX_SIZES; j++) + for (k = 0; k < 2; k++) + cm->counts.supertx[i][j][k] += counts->supertx[i][j][k]; + for (i = 0; i < TX_SIZES; i++) + cm->counts.supertx_size[i] += counts->supertx_size[i]; +#endif // CONFIG_SUPERTX + for (i = 0; i < PREDICTION_PROBS; i++) for (j = 0; j < 2; j++) cm->counts.seg.pred[i][j] += counts->seg.pred[i][j]; @@ -455,5 +505,10 @@ cm->counts.seg.tree_total[i] += counts->seg.tree_total[i]; cm->counts.seg.tree_mispred[i] += counts->seg.tree_mispred[i]; } -#endif + +#if CONFIG_EXT_INTRA + for (i = 0; i < PLANE_TYPES; ++i) + for (j = 0; j < 2; j++) + cm->counts.ext_intra[i][j] += counts->ext_intra[i][j]; +#endif // CONFIG_EXT_INTRA }
diff --git a/vp10/common/vp10_fwd_txfm1d.c b/vp10/common/vp10_fwd_txfm1d.c new file mode 100644 index 0000000..6e19e27 --- /dev/null +++ b/vp10/common/vp10_fwd_txfm1d.c
@@ -0,0 +1,1530 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp10/common/vp10_fwd_txfm1d.h" +#if CONFIG_COEFFICIENT_RANGE_CHECKING +#define range_check(stage, input, buf, size, bit) \ + { \ + int i, j; \ + for (i = 0; i < size; ++i) { \ + int buf_bit = get_max_bit(abs(buf[i])) + 1; \ + if (buf_bit > bit) { \ + printf("======== %s overflow ========\n", __func__); \ + printf("stage: %d node: %d\n", stage, i); \ + printf("bit: %d buf_bit: %d buf[i]: %d\n", bit, buf_bit, buf[i]); \ + printf("input:\n"); \ + for (j = 0; j < size; j++) { \ + printf("%d,", input[j]); \ + } \ + printf("\n"); \ + assert(0, "vp10_fwd_txfm1d.c: range_check overflow"); \ + } \ + } \ + } +#else +#define range_check(stage, input, buf, size, bit) \ + { \ + (void) stage; \ + (void) input; \ + (void) buf; \ + (void) size; \ + (void) bit; \ + } +#endif + +void vp10_fdct4_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 4; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[4]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[0] + input[3]; + bf1[1] = input[1] + input[2]; + bf1[2] = -input[2] + input[1]; + bf1[3] = -input[3] + input[0]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]); + bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0]; + bf1[1] = bf0[2]; + bf1[2] = bf0[1]; + bf1[3] = bf0[3]; + range_check(stage, input, bf1, size, stage_range[stage]); +} + +void vp10_fdct8_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 8; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[8]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[0] + input[7]; + bf1[1] = input[1] + input[6]; + bf1[2] = input[2] + input[5]; + bf1[3] = input[3] + input[4]; + bf1[4] = -input[4] + input[3]; + bf1[5] = -input[5] + input[2]; + bf1[6] = -input[6] + input[1]; + bf1[7] = -input[7] + input[0]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0] + bf0[3]; + bf1[1] = bf0[1] + bf0[2]; + bf1[2] = -bf0[2] + bf0[1]; + bf1[3] = -bf0[3] + bf0[0]; + bf1[4] = bf0[4]; + bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]); + bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]); + bf1[7] = bf0[7]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]); + bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]); + bf1[4] = bf0[4] + bf0[5]; + bf1[5] = -bf0[5] + bf0[4]; + bf1[6] = -bf0[6] + bf0[7]; + bf1[7] = bf0[7] + bf0[6]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]); + bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]); + bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]); + bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0]; + bf1[1] = bf0[4]; + bf1[2] = bf0[2]; + bf1[3] = bf0[6]; + bf1[4] = bf0[1]; + bf1[5] = bf0[5]; + bf1[6] = bf0[3]; + bf1[7] = bf0[7]; + range_check(stage, input, bf1, size, stage_range[stage]); +} + +void vp10_fdct16_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 16; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[16]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[0] + input[15]; + bf1[1] = input[1] + input[14]; + bf1[2] = input[2] + input[13]; + bf1[3] = input[3] + input[12]; + bf1[4] = input[4] + input[11]; + bf1[5] = input[5] + input[10]; + bf1[6] = input[6] + input[9]; + bf1[7] = input[7] + input[8]; + bf1[8] = -input[8] + input[7]; + bf1[9] = -input[9] + input[6]; + bf1[10] = -input[10] + input[5]; + bf1[11] = -input[11] + input[4]; + bf1[12] = -input[12] + input[3]; + bf1[13] = -input[13] + input[2]; + bf1[14] = -input[14] + input[1]; + bf1[15] = -input[15] + input[0]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0] + bf0[7]; + bf1[1] = bf0[1] + bf0[6]; + bf1[2] = bf0[2] + bf0[5]; + bf1[3] = bf0[3] + bf0[4]; + bf1[4] = -bf0[4] + bf0[3]; + bf1[5] = -bf0[5] + bf0[2]; + bf1[6] = -bf0[6] + bf0[1]; + bf1[7] = -bf0[7] + bf0[0]; + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]); + bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]); + bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]); + bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]); + bf1[14] = bf0[14]; + bf1[15] = bf0[15]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[3]; + bf1[1] = bf0[1] + bf0[2]; + bf1[2] = -bf0[2] + bf0[1]; + bf1[3] = -bf0[3] + bf0[0]; + bf1[4] = bf0[4]; + bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]); + bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]); + bf1[7] = bf0[7]; + bf1[8] = bf0[8] + bf0[11]; + bf1[9] = bf0[9] + bf0[10]; + bf1[10] = -bf0[10] + bf0[9]; + bf1[11] = -bf0[11] + bf0[8]; + bf1[12] = -bf0[12] + bf0[15]; + bf1[13] = -bf0[13] + bf0[14]; + bf1[14] = bf0[14] + bf0[13]; + bf1[15] = bf0[15] + bf0[12]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]); + bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]); + bf1[4] = bf0[4] + bf0[5]; + bf1[5] = -bf0[5] + bf0[4]; + bf1[6] = -bf0[6] + bf0[7]; + bf1[7] = bf0[7] + bf0[6]; + bf1[8] = bf0[8]; + bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]); + bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]); + bf1[11] = bf0[11]; + bf1[12] = bf0[12]; + bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]); + bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]); + bf1[15] = bf0[15]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]); + bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]); + bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]); + bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]); + bf1[8] = bf0[8] + bf0[9]; + bf1[9] = -bf0[9] + bf0[8]; + bf1[10] = -bf0[10] + bf0[11]; + bf1[11] = bf0[11] + bf0[10]; + bf1[12] = bf0[12] + bf0[13]; + bf1[13] = -bf0[13] + bf0[12]; + bf1[14] = -bf0[14] + bf0[15]; + bf1[15] = bf0[15] + bf0[14]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 6 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = bf0[6]; + bf1[7] = bf0[7]; + bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]); + bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]); + bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]); + bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]); + bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]); + bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]); + bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]); + bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 7 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0]; + bf1[1] = bf0[8]; + bf1[2] = bf0[4]; + bf1[3] = bf0[12]; + bf1[4] = bf0[2]; + bf1[5] = bf0[10]; + bf1[6] = bf0[6]; + bf1[7] = bf0[14]; + bf1[8] = bf0[1]; + bf1[9] = bf0[9]; + bf1[10] = bf0[5]; + bf1[11] = bf0[13]; + bf1[12] = bf0[3]; + bf1[13] = bf0[11]; + bf1[14] = bf0[7]; + bf1[15] = bf0[15]; + range_check(stage, input, bf1, size, stage_range[stage]); +} + +void vp10_fdct32_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 32; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[32]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[0] + input[31]; + bf1[1] = input[1] + input[30]; + bf1[2] = input[2] + input[29]; + bf1[3] = input[3] + input[28]; + bf1[4] = input[4] + input[27]; + bf1[5] = input[5] + input[26]; + bf1[6] = input[6] + input[25]; + bf1[7] = input[7] + input[24]; + bf1[8] = input[8] + input[23]; + bf1[9] = input[9] + input[22]; + bf1[10] = input[10] + input[21]; + bf1[11] = input[11] + input[20]; + bf1[12] = input[12] + input[19]; + bf1[13] = input[13] + input[18]; + bf1[14] = input[14] + input[17]; + bf1[15] = input[15] + input[16]; + bf1[16] = -input[16] + input[15]; + bf1[17] = -input[17] + input[14]; + bf1[18] = -input[18] + input[13]; + bf1[19] = -input[19] + input[12]; + bf1[20] = -input[20] + input[11]; + bf1[21] = -input[21] + input[10]; + bf1[22] = -input[22] + input[9]; + bf1[23] = -input[23] + input[8]; + bf1[24] = -input[24] + input[7]; + bf1[25] = -input[25] + input[6]; + bf1[26] = -input[26] + input[5]; + bf1[27] = -input[27] + input[4]; + bf1[28] = -input[28] + input[3]; + bf1[29] = -input[29] + input[2]; + bf1[30] = -input[30] + input[1]; + bf1[31] = -input[31] + input[0]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0] + bf0[15]; + bf1[1] = bf0[1] + bf0[14]; + bf1[2] = bf0[2] + bf0[13]; + bf1[3] = bf0[3] + bf0[12]; + bf1[4] = bf0[4] + bf0[11]; + bf1[5] = bf0[5] + bf0[10]; + bf1[6] = bf0[6] + bf0[9]; + bf1[7] = bf0[7] + bf0[8]; + bf1[8] = -bf0[8] + bf0[7]; + bf1[9] = -bf0[9] + bf0[6]; + bf1[10] = -bf0[10] + bf0[5]; + bf1[11] = -bf0[11] + bf0[4]; + bf1[12] = -bf0[12] + bf0[3]; + bf1[13] = -bf0[13] + bf0[2]; + bf1[14] = -bf0[14] + bf0[1]; + bf1[15] = -bf0[15] + bf0[0]; + bf1[16] = bf0[16]; + bf1[17] = bf0[17]; + bf1[18] = bf0[18]; + bf1[19] = bf0[19]; + bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]); + bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]); + bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]); + bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]); + bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]); + bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]); + bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]); + bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]); + bf1[28] = bf0[28]; + bf1[29] = bf0[29]; + bf1[30] = bf0[30]; + bf1[31] = bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[7]; + bf1[1] = bf0[1] + bf0[6]; + bf1[2] = bf0[2] + bf0[5]; + bf1[3] = bf0[3] + bf0[4]; + bf1[4] = -bf0[4] + bf0[3]; + bf1[5] = -bf0[5] + bf0[2]; + bf1[6] = -bf0[6] + bf0[1]; + bf1[7] = -bf0[7] + bf0[0]; + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]); + bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]); + bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]); + bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]); + bf1[14] = bf0[14]; + bf1[15] = bf0[15]; + bf1[16] = bf0[16] + bf0[23]; + bf1[17] = bf0[17] + bf0[22]; + bf1[18] = bf0[18] + bf0[21]; + bf1[19] = bf0[19] + bf0[20]; + bf1[20] = -bf0[20] + bf0[19]; + bf1[21] = -bf0[21] + bf0[18]; + bf1[22] = -bf0[22] + bf0[17]; + bf1[23] = -bf0[23] + bf0[16]; + bf1[24] = -bf0[24] + bf0[31]; + bf1[25] = -bf0[25] + bf0[30]; + bf1[26] = -bf0[26] + bf0[29]; + bf1[27] = -bf0[27] + bf0[28]; + bf1[28] = bf0[28] + bf0[27]; + bf1[29] = bf0[29] + bf0[26]; + bf1[30] = bf0[30] + bf0[25]; + bf1[31] = bf0[31] + bf0[24]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0] + bf0[3]; + bf1[1] = bf0[1] + bf0[2]; + bf1[2] = -bf0[2] + bf0[1]; + bf1[3] = -bf0[3] + bf0[0]; + bf1[4] = bf0[4]; + bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]); + bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]); + bf1[7] = bf0[7]; + bf1[8] = bf0[8] + bf0[11]; + bf1[9] = bf0[9] + bf0[10]; + bf1[10] = -bf0[10] + bf0[9]; + bf1[11] = -bf0[11] + bf0[8]; + bf1[12] = -bf0[12] + bf0[15]; + bf1[13] = -bf0[13] + bf0[14]; + bf1[14] = bf0[14] + bf0[13]; + bf1[15] = bf0[15] + bf0[12]; + bf1[16] = bf0[16]; + bf1[17] = bf0[17]; + bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]); + bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]); + bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]); + bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]); + bf1[22] = bf0[22]; + bf1[23] = bf0[23]; + bf1[24] = bf0[24]; + bf1[25] = bf0[25]; + bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]); + bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]); + bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]); + bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]); + bf1[30] = bf0[30]; + bf1[31] = bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]); + bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]); + bf1[4] = bf0[4] + bf0[5]; + bf1[5] = -bf0[5] + bf0[4]; + bf1[6] = -bf0[6] + bf0[7]; + bf1[7] = bf0[7] + bf0[6]; + bf1[8] = bf0[8]; + bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]); + bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]); + bf1[11] = bf0[11]; + bf1[12] = bf0[12]; + bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]); + bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]); + bf1[15] = bf0[15]; + bf1[16] = bf0[16] + bf0[19]; + bf1[17] = bf0[17] + bf0[18]; + bf1[18] = -bf0[18] + bf0[17]; + bf1[19] = -bf0[19] + bf0[16]; + bf1[20] = -bf0[20] + bf0[23]; + bf1[21] = -bf0[21] + bf0[22]; + bf1[22] = bf0[22] + bf0[21]; + bf1[23] = bf0[23] + bf0[20]; + bf1[24] = bf0[24] + bf0[27]; + bf1[25] = bf0[25] + bf0[26]; + bf1[26] = -bf0[26] + bf0[25]; + bf1[27] = -bf0[27] + bf0[24]; + bf1[28] = -bf0[28] + bf0[31]; + bf1[29] = -bf0[29] + bf0[30]; + bf1[30] = bf0[30] + bf0[29]; + bf1[31] = bf0[31] + bf0[28]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 6 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]); + bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]); + bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]); + bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]); + bf1[8] = bf0[8] + bf0[9]; + bf1[9] = -bf0[9] + bf0[8]; + bf1[10] = -bf0[10] + bf0[11]; + bf1[11] = bf0[11] + bf0[10]; + bf1[12] = bf0[12] + bf0[13]; + bf1[13] = -bf0[13] + bf0[12]; + bf1[14] = -bf0[14] + bf0[15]; + bf1[15] = bf0[15] + bf0[14]; + bf1[16] = bf0[16]; + bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]); + bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]); + bf1[19] = bf0[19]; + bf1[20] = bf0[20]; + bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]); + bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]); + bf1[23] = bf0[23]; + bf1[24] = bf0[24]; + bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]); + bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]); + bf1[27] = bf0[27]; + bf1[28] = bf0[28]; + bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]); + bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]); + bf1[31] = bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 7 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = bf0[6]; + bf1[7] = bf0[7]; + bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]); + bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]); + bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]); + bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]); + bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]); + bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]); + bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]); + bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]); + bf1[16] = bf0[16] + bf0[17]; + bf1[17] = -bf0[17] + bf0[16]; + bf1[18] = -bf0[18] + bf0[19]; + bf1[19] = bf0[19] + bf0[18]; + bf1[20] = bf0[20] + bf0[21]; + bf1[21] = -bf0[21] + bf0[20]; + bf1[22] = -bf0[22] + bf0[23]; + bf1[23] = bf0[23] + bf0[22]; + bf1[24] = bf0[24] + bf0[25]; + bf1[25] = -bf0[25] + bf0[24]; + bf1[26] = -bf0[26] + bf0[27]; + bf1[27] = bf0[27] + bf0[26]; + bf1[28] = bf0[28] + bf0[29]; + bf1[29] = -bf0[29] + bf0[28]; + bf1[30] = -bf0[30] + bf0[31]; + bf1[31] = bf0[31] + bf0[30]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 8 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = bf0[6]; + bf1[7] = bf0[7]; + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = bf0[10]; + bf1[11] = bf0[11]; + bf1[12] = bf0[12]; + bf1[13] = bf0[13]; + bf1[14] = bf0[14]; + bf1[15] = bf0[15]; + bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]); + bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]); + bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]); + bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]); + bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]); + bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]); + bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]); + bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]); + bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]); + bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]); + bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]); + bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]); + bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]); + bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]); + bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]); + bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 9 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0]; + bf1[1] = bf0[16]; + bf1[2] = bf0[8]; + bf1[3] = bf0[24]; + bf1[4] = bf0[4]; + bf1[5] = bf0[20]; + bf1[6] = bf0[12]; + bf1[7] = bf0[28]; + bf1[8] = bf0[2]; + bf1[9] = bf0[18]; + bf1[10] = bf0[10]; + bf1[11] = bf0[26]; + bf1[12] = bf0[6]; + bf1[13] = bf0[22]; + bf1[14] = bf0[14]; + bf1[15] = bf0[30]; + bf1[16] = bf0[1]; + bf1[17] = bf0[17]; + bf1[18] = bf0[9]; + bf1[19] = bf0[25]; + bf1[20] = bf0[5]; + bf1[21] = bf0[21]; + bf1[22] = bf0[13]; + bf1[23] = bf0[29]; + bf1[24] = bf0[3]; + bf1[25] = bf0[19]; + bf1[26] = bf0[11]; + bf1[27] = bf0[27]; + bf1[28] = bf0[7]; + bf1[29] = bf0[23]; + bf1[30] = bf0[15]; + bf1[31] = bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); +} + +void vp10_fadst4_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 4; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[4]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[3]; + bf1[1] = input[0]; + bf1[2] = input[1]; + bf1[3] = input[2]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(-cospi[8], bf0[1], cospi[56], bf0[0], cos_bit[stage]); + bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(-cospi[40], bf0[3], cospi[24], bf0[2], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[2]; + bf1[1] = bf0[1] + bf0[3]; + bf1[2] = -bf0[2] + bf0[0]; + bf1[3] = -bf0[3] + bf0[1]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0]; + bf1[1] = -bf0[2]; + bf1[2] = bf0[3]; + bf1[3] = -bf0[1]; + range_check(stage, input, bf1, size, stage_range[stage]); +} + +void vp10_fadst8_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 8; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[8]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[7]; + bf1[1] = input[0]; + bf1[2] = input[5]; + bf1[3] = input[2]; + bf1[4] = input[3]; + bf1[5] = input[4]; + bf1[6] = input[1]; + bf1[7] = input[6]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(-cospi[4], bf0[1], cospi[60], bf0[0], cos_bit[stage]); + bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(-cospi[20], bf0[3], cospi[44], bf0[2], cos_bit[stage]); + bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]); + bf1[5] = half_btf(-cospi[36], bf0[5], cospi[28], bf0[4], cos_bit[stage]); + bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(-cospi[52], bf0[7], cospi[12], bf0[6], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[4]; + bf1[1] = bf0[1] + bf0[5]; + bf1[2] = bf0[2] + bf0[6]; + bf1[3] = bf0[3] + bf0[7]; + bf1[4] = -bf0[4] + bf0[0]; + bf1[5] = -bf0[5] + bf0[1]; + bf1[6] = -bf0[6] + bf0[2]; + bf1[7] = -bf0[7] + bf0[3]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]); + bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]); + bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[2]; + bf1[1] = bf0[1] + bf0[3]; + bf1[2] = -bf0[2] + bf0[0]; + bf1[3] = -bf0[3] + bf0[1]; + bf1[4] = bf0[4] + bf0[6]; + bf1[5] = bf0[5] + bf0[7]; + bf1[6] = -bf0[6] + bf0[4]; + bf1[7] = -bf0[7] + bf0[5]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 6 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]); + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 7 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0]; + bf1[1] = -bf0[4]; + bf1[2] = bf0[6]; + bf1[3] = -bf0[2]; + bf1[4] = bf0[3]; + bf1[5] = -bf0[7]; + bf1[6] = bf0[5]; + bf1[7] = -bf0[1]; + range_check(stage, input, bf1, size, stage_range[stage]); +} + +void vp10_fadst16_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 16; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[16]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[15]; + bf1[1] = input[0]; + bf1[2] = input[13]; + bf1[3] = input[2]; + bf1[4] = input[11]; + bf1[5] = input[4]; + bf1[6] = input[9]; + bf1[7] = input[6]; + bf1[8] = input[7]; + bf1[9] = input[8]; + bf1[10] = input[5]; + bf1[11] = input[10]; + bf1[12] = input[3]; + bf1[13] = input[12]; + bf1[14] = input[1]; + bf1[15] = input[14]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(-cospi[2], bf0[1], cospi[62], bf0[0], cos_bit[stage]); + bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(-cospi[10], bf0[3], cospi[54], bf0[2], cos_bit[stage]); + bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]); + bf1[5] = half_btf(-cospi[18], bf0[5], cospi[46], bf0[4], cos_bit[stage]); + bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(-cospi[26], bf0[7], cospi[38], bf0[6], cos_bit[stage]); + bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]); + bf1[9] = half_btf(-cospi[34], bf0[9], cospi[30], bf0[8], cos_bit[stage]); + bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]); + bf1[11] = half_btf(-cospi[42], bf0[11], cospi[22], bf0[10], cos_bit[stage]); + bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]); + bf1[13] = half_btf(-cospi[50], bf0[13], cospi[14], bf0[12], cos_bit[stage]); + bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(-cospi[58], bf0[15], cospi[6], bf0[14], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[8]; + bf1[1] = bf0[1] + bf0[9]; + bf1[2] = bf0[2] + bf0[10]; + bf1[3] = bf0[3] + bf0[11]; + bf1[4] = bf0[4] + bf0[12]; + bf1[5] = bf0[5] + bf0[13]; + bf1[6] = bf0[6] + bf0[14]; + bf1[7] = bf0[7] + bf0[15]; + bf1[8] = -bf0[8] + bf0[0]; + bf1[9] = -bf0[9] + bf0[1]; + bf1[10] = -bf0[10] + bf0[2]; + bf1[11] = -bf0[11] + bf0[3]; + bf1[12] = -bf0[12] + bf0[4]; + bf1[13] = -bf0[13] + bf0[5]; + bf1[14] = -bf0[14] + bf0[6]; + bf1[15] = -bf0[15] + bf0[7]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = bf0[6]; + bf1[7] = bf0[7]; + bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]); + bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]); + bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]); + bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]); + bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]); + bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]); + bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[4]; + bf1[1] = bf0[1] + bf0[5]; + bf1[2] = bf0[2] + bf0[6]; + bf1[3] = bf0[3] + bf0[7]; + bf1[4] = -bf0[4] + bf0[0]; + bf1[5] = -bf0[5] + bf0[1]; + bf1[6] = -bf0[6] + bf0[2]; + bf1[7] = -bf0[7] + bf0[3]; + bf1[8] = bf0[8] + bf0[12]; + bf1[9] = bf0[9] + bf0[13]; + bf1[10] = bf0[10] + bf0[14]; + bf1[11] = bf0[11] + bf0[15]; + bf1[12] = -bf0[12] + bf0[8]; + bf1[13] = -bf0[13] + bf0[9]; + bf1[14] = -bf0[14] + bf0[10]; + bf1[15] = -bf0[15] + bf0[11]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 6 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]); + bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]); + bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]); + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = bf0[10]; + bf1[11] = bf0[11]; + bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]); + bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]); + bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 7 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[2]; + bf1[1] = bf0[1] + bf0[3]; + bf1[2] = -bf0[2] + bf0[0]; + bf1[3] = -bf0[3] + bf0[1]; + bf1[4] = bf0[4] + bf0[6]; + bf1[5] = bf0[5] + bf0[7]; + bf1[6] = -bf0[6] + bf0[4]; + bf1[7] = -bf0[7] + bf0[5]; + bf1[8] = bf0[8] + bf0[10]; + bf1[9] = bf0[9] + bf0[11]; + bf1[10] = -bf0[10] + bf0[8]; + bf1[11] = -bf0[11] + bf0[9]; + bf1[12] = bf0[12] + bf0[14]; + bf1[13] = bf0[13] + bf0[15]; + bf1[14] = -bf0[14] + bf0[12]; + bf1[15] = -bf0[15] + bf0[13]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 8 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]); + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]); + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]); + bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]); + bf1[12] = bf0[12]; + bf1[13] = bf0[13]; + bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 9 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0]; + bf1[1] = -bf0[8]; + bf1[2] = bf0[12]; + bf1[3] = -bf0[4]; + bf1[4] = bf0[6]; + bf1[5] = -bf0[14]; + bf1[6] = bf0[10]; + bf1[7] = -bf0[2]; + bf1[8] = bf0[3]; + bf1[9] = -bf0[11]; + bf1[10] = bf0[15]; + bf1[11] = -bf0[7]; + bf1[12] = bf0[5]; + bf1[13] = -bf0[13]; + bf1[14] = bf0[9]; + bf1[15] = -bf0[1]; + range_check(stage, input, bf1, size, stage_range[stage]); + +} + +void vp10_fadst32_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 32; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[32]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[31]; + bf1[1] = input[0]; + bf1[2] = input[29]; + bf1[3] = input[2]; + bf1[4] = input[27]; + bf1[5] = input[4]; + bf1[6] = input[25]; + bf1[7] = input[6]; + bf1[8] = input[23]; + bf1[9] = input[8]; + bf1[10] = input[21]; + bf1[11] = input[10]; + bf1[12] = input[19]; + bf1[13] = input[12]; + bf1[14] = input[17]; + bf1[15] = input[14]; + bf1[16] = input[15]; + bf1[17] = input[16]; + bf1[18] = input[13]; + bf1[19] = input[18]; + bf1[20] = input[11]; + bf1[21] = input[20]; + bf1[22] = input[9]; + bf1[23] = input[22]; + bf1[24] = input[7]; + bf1[25] = input[24]; + bf1[26] = input[5]; + bf1[27] = input[26]; + bf1[28] = input[3]; + bf1[29] = input[28]; + bf1[30] = input[1]; + bf1[31] = input[30]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(-cospi[1], bf0[1], cospi[63], bf0[0], cos_bit[stage]); + bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(-cospi[5], bf0[3], cospi[59], bf0[2], cos_bit[stage]); + bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]); + bf1[5] = half_btf(-cospi[9], bf0[5], cospi[55], bf0[4], cos_bit[stage]); + bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(-cospi[13], bf0[7], cospi[51], bf0[6], cos_bit[stage]); + bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]); + bf1[9] = half_btf(-cospi[17], bf0[9], cospi[47], bf0[8], cos_bit[stage]); + bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]); + bf1[11] = half_btf(-cospi[21], bf0[11], cospi[43], bf0[10], cos_bit[stage]); + bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]); + bf1[13] = half_btf(-cospi[25], bf0[13], cospi[39], bf0[12], cos_bit[stage]); + bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(-cospi[29], bf0[15], cospi[35], bf0[14], cos_bit[stage]); + bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]); + bf1[17] = half_btf(-cospi[33], bf0[17], cospi[31], bf0[16], cos_bit[stage]); + bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]); + bf1[19] = half_btf(-cospi[37], bf0[19], cospi[27], bf0[18], cos_bit[stage]); + bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]); + bf1[21] = half_btf(-cospi[41], bf0[21], cospi[23], bf0[20], cos_bit[stage]); + bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]); + bf1[23] = half_btf(-cospi[45], bf0[23], cospi[19], bf0[22], cos_bit[stage]); + bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]); + bf1[25] = half_btf(-cospi[49], bf0[25], cospi[15], bf0[24], cos_bit[stage]); + bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]); + bf1[27] = half_btf(-cospi[53], bf0[27], cospi[11], bf0[26], cos_bit[stage]); + bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]); + bf1[29] = half_btf(-cospi[57], bf0[29], cospi[7], bf0[28], cos_bit[stage]); + bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]); + bf1[31] = half_btf(-cospi[61], bf0[31], cospi[3], bf0[30], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[16]; + bf1[1] = bf0[1] + bf0[17]; + bf1[2] = bf0[2] + bf0[18]; + bf1[3] = bf0[3] + bf0[19]; + bf1[4] = bf0[4] + bf0[20]; + bf1[5] = bf0[5] + bf0[21]; + bf1[6] = bf0[6] + bf0[22]; + bf1[7] = bf0[7] + bf0[23]; + bf1[8] = bf0[8] + bf0[24]; + bf1[9] = bf0[9] + bf0[25]; + bf1[10] = bf0[10] + bf0[26]; + bf1[11] = bf0[11] + bf0[27]; + bf1[12] = bf0[12] + bf0[28]; + bf1[13] = bf0[13] + bf0[29]; + bf1[14] = bf0[14] + bf0[30]; + bf1[15] = bf0[15] + bf0[31]; + bf1[16] = -bf0[16] + bf0[0]; + bf1[17] = -bf0[17] + bf0[1]; + bf1[18] = -bf0[18] + bf0[2]; + bf1[19] = -bf0[19] + bf0[3]; + bf1[20] = -bf0[20] + bf0[4]; + bf1[21] = -bf0[21] + bf0[5]; + bf1[22] = -bf0[22] + bf0[6]; + bf1[23] = -bf0[23] + bf0[7]; + bf1[24] = -bf0[24] + bf0[8]; + bf1[25] = -bf0[25] + bf0[9]; + bf1[26] = -bf0[26] + bf0[10]; + bf1[27] = -bf0[27] + bf0[11]; + bf1[28] = -bf0[28] + bf0[12]; + bf1[29] = -bf0[29] + bf0[13]; + bf1[30] = -bf0[30] + bf0[14]; + bf1[31] = -bf0[31] + bf0[15]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = bf0[6]; + bf1[7] = bf0[7]; + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = bf0[10]; + bf1[11] = bf0[11]; + bf1[12] = bf0[12]; + bf1[13] = bf0[13]; + bf1[14] = bf0[14]; + bf1[15] = bf0[15]; + bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]); + bf1[17] = half_btf(-cospi[4], bf0[17], cospi[60], bf0[16], cos_bit[stage]); + bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]); + bf1[19] = half_btf(-cospi[20], bf0[19], cospi[44], bf0[18], cos_bit[stage]); + bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]); + bf1[21] = half_btf(-cospi[36], bf0[21], cospi[28], bf0[20], cos_bit[stage]); + bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]); + bf1[23] = half_btf(-cospi[52], bf0[23], cospi[12], bf0[22], cos_bit[stage]); + bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]); + bf1[25] = half_btf(cospi[60], bf0[25], cospi[4], bf0[24], cos_bit[stage]); + bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]); + bf1[27] = half_btf(cospi[44], bf0[27], cospi[20], bf0[26], cos_bit[stage]); + bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]); + bf1[29] = half_btf(cospi[28], bf0[29], cospi[36], bf0[28], cos_bit[stage]); + bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]); + bf1[31] = half_btf(cospi[12], bf0[31], cospi[52], bf0[30], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[8]; + bf1[1] = bf0[1] + bf0[9]; + bf1[2] = bf0[2] + bf0[10]; + bf1[3] = bf0[3] + bf0[11]; + bf1[4] = bf0[4] + bf0[12]; + bf1[5] = bf0[5] + bf0[13]; + bf1[6] = bf0[6] + bf0[14]; + bf1[7] = bf0[7] + bf0[15]; + bf1[8] = -bf0[8] + bf0[0]; + bf1[9] = -bf0[9] + bf0[1]; + bf1[10] = -bf0[10] + bf0[2]; + bf1[11] = -bf0[11] + bf0[3]; + bf1[12] = -bf0[12] + bf0[4]; + bf1[13] = -bf0[13] + bf0[5]; + bf1[14] = -bf0[14] + bf0[6]; + bf1[15] = -bf0[15] + bf0[7]; + bf1[16] = bf0[16] + bf0[24]; + bf1[17] = bf0[17] + bf0[25]; + bf1[18] = bf0[18] + bf0[26]; + bf1[19] = bf0[19] + bf0[27]; + bf1[20] = bf0[20] + bf0[28]; + bf1[21] = bf0[21] + bf0[29]; + bf1[22] = bf0[22] + bf0[30]; + bf1[23] = bf0[23] + bf0[31]; + bf1[24] = -bf0[24] + bf0[16]; + bf1[25] = -bf0[25] + bf0[17]; + bf1[26] = -bf0[26] + bf0[18]; + bf1[27] = -bf0[27] + bf0[19]; + bf1[28] = -bf0[28] + bf0[20]; + bf1[29] = -bf0[29] + bf0[21]; + bf1[30] = -bf0[30] + bf0[22]; + bf1[31] = -bf0[31] + bf0[23]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 6 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = bf0[6]; + bf1[7] = bf0[7]; + bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]); + bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]); + bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]); + bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]); + bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]); + bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]); + bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]); + bf1[16] = bf0[16]; + bf1[17] = bf0[17]; + bf1[18] = bf0[18]; + bf1[19] = bf0[19]; + bf1[20] = bf0[20]; + bf1[21] = bf0[21]; + bf1[22] = bf0[22]; + bf1[23] = bf0[23]; + bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]); + bf1[25] = half_btf(-cospi[8], bf0[25], cospi[56], bf0[24], cos_bit[stage]); + bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]); + bf1[27] = half_btf(-cospi[40], bf0[27], cospi[24], bf0[26], cos_bit[stage]); + bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]); + bf1[29] = half_btf(cospi[56], bf0[29], cospi[8], bf0[28], cos_bit[stage]); + bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]); + bf1[31] = half_btf(cospi[24], bf0[31], cospi[40], bf0[30], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 7 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[4]; + bf1[1] = bf0[1] + bf0[5]; + bf1[2] = bf0[2] + bf0[6]; + bf1[3] = bf0[3] + bf0[7]; + bf1[4] = -bf0[4] + bf0[0]; + bf1[5] = -bf0[5] + bf0[1]; + bf1[6] = -bf0[6] + bf0[2]; + bf1[7] = -bf0[7] + bf0[3]; + bf1[8] = bf0[8] + bf0[12]; + bf1[9] = bf0[9] + bf0[13]; + bf1[10] = bf0[10] + bf0[14]; + bf1[11] = bf0[11] + bf0[15]; + bf1[12] = -bf0[12] + bf0[8]; + bf1[13] = -bf0[13] + bf0[9]; + bf1[14] = -bf0[14] + bf0[10]; + bf1[15] = -bf0[15] + bf0[11]; + bf1[16] = bf0[16] + bf0[20]; + bf1[17] = bf0[17] + bf0[21]; + bf1[18] = bf0[18] + bf0[22]; + bf1[19] = bf0[19] + bf0[23]; + bf1[20] = -bf0[20] + bf0[16]; + bf1[21] = -bf0[21] + bf0[17]; + bf1[22] = -bf0[22] + bf0[18]; + bf1[23] = -bf0[23] + bf0[19]; + bf1[24] = bf0[24] + bf0[28]; + bf1[25] = bf0[25] + bf0[29]; + bf1[26] = bf0[26] + bf0[30]; + bf1[27] = bf0[27] + bf0[31]; + bf1[28] = -bf0[28] + bf0[24]; + bf1[29] = -bf0[29] + bf0[25]; + bf1[30] = -bf0[30] + bf0[26]; + bf1[31] = -bf0[31] + bf0[27]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 8 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]); + bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]); + bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]); + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = bf0[10]; + bf1[11] = bf0[11]; + bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]); + bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]); + bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]); + bf1[16] = bf0[16]; + bf1[17] = bf0[17]; + bf1[18] = bf0[18]; + bf1[19] = bf0[19]; + bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]); + bf1[21] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[20], cos_bit[stage]); + bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]); + bf1[23] = half_btf(cospi[48], bf0[23], cospi[16], bf0[22], cos_bit[stage]); + bf1[24] = bf0[24]; + bf1[25] = bf0[25]; + bf1[26] = bf0[26]; + bf1[27] = bf0[27]; + bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]); + bf1[29] = half_btf(-cospi[16], bf0[29], cospi[48], bf0[28], cos_bit[stage]); + bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]); + bf1[31] = half_btf(cospi[48], bf0[31], cospi[16], bf0[30], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 9 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[2]; + bf1[1] = bf0[1] + bf0[3]; + bf1[2] = -bf0[2] + bf0[0]; + bf1[3] = -bf0[3] + bf0[1]; + bf1[4] = bf0[4] + bf0[6]; + bf1[5] = bf0[5] + bf0[7]; + bf1[6] = -bf0[6] + bf0[4]; + bf1[7] = -bf0[7] + bf0[5]; + bf1[8] = bf0[8] + bf0[10]; + bf1[9] = bf0[9] + bf0[11]; + bf1[10] = -bf0[10] + bf0[8]; + bf1[11] = -bf0[11] + bf0[9]; + bf1[12] = bf0[12] + bf0[14]; + bf1[13] = bf0[13] + bf0[15]; + bf1[14] = -bf0[14] + bf0[12]; + bf1[15] = -bf0[15] + bf0[13]; + bf1[16] = bf0[16] + bf0[18]; + bf1[17] = bf0[17] + bf0[19]; + bf1[18] = -bf0[18] + bf0[16]; + bf1[19] = -bf0[19] + bf0[17]; + bf1[20] = bf0[20] + bf0[22]; + bf1[21] = bf0[21] + bf0[23]; + bf1[22] = -bf0[22] + bf0[20]; + bf1[23] = -bf0[23] + bf0[21]; + bf1[24] = bf0[24] + bf0[26]; + bf1[25] = bf0[25] + bf0[27]; + bf1[26] = -bf0[26] + bf0[24]; + bf1[27] = -bf0[27] + bf0[25]; + bf1[28] = bf0[28] + bf0[30]; + bf1[29] = bf0[29] + bf0[31]; + bf1[30] = -bf0[30] + bf0[28]; + bf1[31] = -bf0[31] + bf0[29]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 10 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]); + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]); + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]); + bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]); + bf1[12] = bf0[12]; + bf1[13] = bf0[13]; + bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]); + bf1[16] = bf0[16]; + bf1[17] = bf0[17]; + bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]); + bf1[19] = half_btf(-cospi[32], bf0[19], cospi[32], bf0[18], cos_bit[stage]); + bf1[20] = bf0[20]; + bf1[21] = bf0[21]; + bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]); + bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[22], cos_bit[stage]); + bf1[24] = bf0[24]; + bf1[25] = bf0[25]; + bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]); + bf1[27] = half_btf(-cospi[32], bf0[27], cospi[32], bf0[26], cos_bit[stage]); + bf1[28] = bf0[28]; + bf1[29] = bf0[29]; + bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]); + bf1[31] = half_btf(-cospi[32], bf0[31], cospi[32], bf0[30], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 11 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0]; + bf1[1] = -bf0[16]; + bf1[2] = bf0[24]; + bf1[3] = -bf0[8]; + bf1[4] = bf0[12]; + bf1[5] = -bf0[28]; + bf1[6] = bf0[20]; + bf1[7] = -bf0[4]; + bf1[8] = bf0[6]; + bf1[9] = -bf0[22]; + bf1[10] = bf0[30]; + bf1[11] = -bf0[14]; + bf1[12] = bf0[10]; + bf1[13] = -bf0[26]; + bf1[14] = bf0[18]; + bf1[15] = -bf0[2]; + bf1[16] = bf0[3]; + bf1[17] = -bf0[19]; + bf1[18] = bf0[27]; + bf1[19] = -bf0[11]; + bf1[20] = bf0[15]; + bf1[21] = -bf0[31]; + bf1[22] = bf0[23]; + bf1[23] = -bf0[7]; + bf1[24] = bf0[5]; + bf1[25] = -bf0[21]; + bf1[26] = bf0[29]; + bf1[27] = -bf0[13]; + bf1[28] = bf0[9]; + bf1[29] = -bf0[25]; + bf1[30] = bf0[17]; + bf1[31] = -bf0[1]; + range_check(stage, input, bf1, size, stage_range[stage]); +}
diff --git a/vp10/common/vp10_fwd_txfm1d.h b/vp10/common/vp10_fwd_txfm1d.h new file mode 100644 index 0000000..d5b9f40 --- /dev/null +++ b/vp10/common/vp10_fwd_txfm1d.h
@@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_FWD_TXFM1D_H_ +#define VP10_FWD_TXFM1D_H_ + +#include "vp10/common/vp10_txfm.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp10_fdct4_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_fdct8_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_fdct16_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_fdct32_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); + +void vp10_fadst4_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_fadst8_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_fadst16_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_fadst32_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); + +#ifdef __cplusplus +} +#endif + +#endif // VP10_FWD_TXFM1D_H_
diff --git a/vp10/common/vp10_fwd_txfm2d.c b/vp10/common/vp10_fwd_txfm2d.c new file mode 100644 index 0000000..67449ec --- /dev/null +++ b/vp10/common/vp10_fwd_txfm2d.c
@@ -0,0 +1,84 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp10/common/vp10_txfm.h" + +static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output, + const int stride, const TXFM_2D_CFG *cfg, + int32_t *txfm_buf) { + int i, j; + const int txfm_size = cfg->txfm_size; + const int8_t *shift = cfg->shift; + const int8_t *stage_range_col = cfg->stage_range_col; + const int8_t *stage_range_row = cfg->stage_range_row; + const int8_t *cos_bit_col = cfg->cos_bit_col; + const int8_t *cos_bit_row = cfg->cos_bit_row; + const TxfmFunc txfm_func_col = cfg->txfm_func_col; + const TxfmFunc txfm_func_row = cfg->txfm_func_row; + + // txfm_buf's length is txfm_size * txfm_size + 2 * txfm_size + // it is used for intermediate data buffering + int32_t *temp_in = txfm_buf; + int32_t *temp_out = temp_in + txfm_size; + int32_t *buf = temp_out + txfm_size; + + // Columns + for (i = 0; i < txfm_size; ++i) { + for (j = 0; j < txfm_size; ++j) + temp_in[j] = input[j * stride + i]; + round_shift_array(temp_in, txfm_size, -shift[0]); + txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col); + round_shift_array(temp_out, txfm_size, -shift[1]); + for (j = 0; j < txfm_size; ++j) + buf[j * txfm_size + i] = temp_out[j]; + } + + // Rows + for (i = 0; i < txfm_size; ++i) { + for (j = 0; j < txfm_size; ++j) + temp_in[j] = buf[j + i * txfm_size]; + txfm_func_row(temp_in, temp_out, cos_bit_row, stage_range_row); + round_shift_array(temp_out, txfm_size, -shift[2]); + for (j = 0; j < txfm_size; ++j) + output[j + i * txfm_size] = (int32_t)temp_out[j]; + } +} + +void vp10_fwd_txfm2d_4x4(const int16_t *input, int32_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd) { + int txfm_buf[4 * 4 + 4 + 4]; + (void)bd; + fwd_txfm2d_c(input, output, stride, cfg, txfm_buf); +} + +void vp10_fwd_txfm2d_8x8(const int16_t *input, int32_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd) { + int txfm_buf[8 * 8 + 8 + 8]; + (void)bd; + fwd_txfm2d_c(input, output, stride, cfg, txfm_buf); +} + +void vp10_fwd_txfm2d_16x16(const int16_t *input, int32_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd) { + int txfm_buf[16 * 16 + 16 + 16]; + (void)bd; + fwd_txfm2d_c(input, output, stride, cfg, txfm_buf); +} + +void vp10_fwd_txfm2d_32x32(const int16_t *input, int32_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd) { + int txfm_buf[32 * 32 + 32 + 32]; + (void)bd; + fwd_txfm2d_c(input, output, stride, cfg, txfm_buf); +}
diff --git a/vp10/common/vp10_fwd_txfm2d.h b/vp10/common/vp10_fwd_txfm2d.h new file mode 100644 index 0000000..64e6f56 --- /dev/null +++ b/vp10/common/vp10_fwd_txfm2d.h
@@ -0,0 +1,33 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_FWD_TXFM2D_H_ +#define VP10_FWD_TXFM2D_H_ + +#include "vp10/common/vp10_txfm.h" +#ifdef __cplusplus +extern "C" { +#endif +void vp10_fwd_txfm2d_4x4(const int16_t *input, int32_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd); +void vp10_fwd_txfm2d_8x8(const int16_t *input, int32_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd); +void vp10_fwd_txfm2d_16x16(const int16_t *input, int32_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd); +void vp10_fwd_txfm2d_32x32(const int16_t *input, int32_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd); +#ifdef __cplusplus +} +#endif +#endif // VP10_FWD_TXFM2D_H_
diff --git a/vp10/common/vp10_fwd_txfm2d_cfg.h b/vp10/common/vp10_fwd_txfm2d_cfg.h new file mode 100644 index 0000000..93fee6f --- /dev/null +++ b/vp10/common/vp10_fwd_txfm2d_cfg.h
@@ -0,0 +1,367 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_FWD_TXFM2D_CFG_H_ +#define VP10_FWD_TXFM2D_CFG_H_ +#include "vp10/common/vp10_fwd_txfm1d.h" + +// ---------------- config fwd_dct_dct_4 ---------------- +static int8_t fwd_shift_dct_dct_4[3] = {4, 0, -2}; +static int8_t fwd_stage_range_col_dct_dct_4[4] = {15, 16, 17, 17}; +static int8_t fwd_stage_range_row_dct_dct_4[4] = {17, 18, 18, 18}; +static int8_t fwd_cos_bit_col_dct_dct_4[4] = {15, 15, 15, 15}; +static int8_t fwd_cos_bit_row_dct_dct_4[4] = {15, 14, 14, 14}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_4 = { + .txfm_size = 4, + .stage_num_col = 4, + .stage_num_row = 4, + + .shift = fwd_shift_dct_dct_4, + .stage_range_col = fwd_stage_range_col_dct_dct_4, + .stage_range_row = fwd_stage_range_row_dct_dct_4, + .cos_bit_col = fwd_cos_bit_col_dct_dct_4, + .cos_bit_row = fwd_cos_bit_row_dct_dct_4, + .txfm_func_col = vp10_fdct4_new, + .txfm_func_row = vp10_fdct4_new}; + +// ---------------- config fwd_dct_dct_8 ---------------- +static int8_t fwd_shift_dct_dct_8[3] = {5, -3, -1}; +static int8_t fwd_stage_range_col_dct_dct_8[6] = {16, 17, 18, 19, 19, 19}; +static int8_t fwd_stage_range_row_dct_dct_8[6] = {16, 17, 18, 18, 18, 18}; +static int8_t fwd_cos_bit_col_dct_dct_8[6] = {15, 15, 14, 13, 13, 13}; +static int8_t fwd_cos_bit_row_dct_dct_8[6] = {15, 15, 14, 14, 14, 14}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_8 = { + .txfm_size = 8, + .stage_num_col = 6, + .stage_num_row = 6, + + .shift = fwd_shift_dct_dct_8, + .stage_range_col = fwd_stage_range_col_dct_dct_8, + .stage_range_row = fwd_stage_range_row_dct_dct_8, + .cos_bit_col = fwd_cos_bit_col_dct_dct_8, + .cos_bit_row = fwd_cos_bit_row_dct_dct_8, + .txfm_func_col = vp10_fdct8_new, + .txfm_func_row = vp10_fdct8_new}; + +// ---------------- config fwd_dct_dct_16 ---------------- +static int8_t fwd_shift_dct_dct_16[3] = {4, -3, -1}; +static int8_t fwd_stage_range_col_dct_dct_16[8] = {15, 16, 17, 18, + 19, 19, 19, 19}; +static int8_t fwd_stage_range_row_dct_dct_16[8] = {16, 17, 18, 19, + 19, 19, 19, 19}; +static int8_t fwd_cos_bit_col_dct_dct_16[8] = {15, 15, 15, 14, 13, 13, 13, 13}; +static int8_t fwd_cos_bit_row_dct_dct_16[8] = {15, 15, 14, 13, 13, 13, 13, 13}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_16 = { + .txfm_size = 16, + .stage_num_col = 8, + .stage_num_row = 8, + + .shift = fwd_shift_dct_dct_16, + .stage_range_col = fwd_stage_range_col_dct_dct_16, + .stage_range_row = fwd_stage_range_row_dct_dct_16, + .cos_bit_col = fwd_cos_bit_col_dct_dct_16, + .cos_bit_row = fwd_cos_bit_row_dct_dct_16, + .txfm_func_col = vp10_fdct16_new, + .txfm_func_row = vp10_fdct16_new}; + +// ---------------- config fwd_dct_dct_32 ---------------- +static int8_t fwd_shift_dct_dct_32[3] = {3, -3, -1}; +static int8_t fwd_stage_range_col_dct_dct_32[10] = {14, 15, 16, 17, 18, + 19, 19, 19, 19, 19}; +static int8_t fwd_stage_range_row_dct_dct_32[10] = {16, 17, 18, 19, 20, + 20, 20, 20, 20, 20}; +static int8_t fwd_cos_bit_col_dct_dct_32[10] = {15, 15, 15, 15, 14, + 13, 13, 13, 13, 13}; +static int8_t fwd_cos_bit_row_dct_dct_32[10] = {15, 15, 14, 13, 12, + 12, 12, 12, 12, 12}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_32 = { + .txfm_size = 32, + .stage_num_col = 10, + .stage_num_row = 10, + + .shift = fwd_shift_dct_dct_32, + .stage_range_col = fwd_stage_range_col_dct_dct_32, + .stage_range_row = fwd_stage_range_row_dct_dct_32, + .cos_bit_col = fwd_cos_bit_col_dct_dct_32, + .cos_bit_row = fwd_cos_bit_row_dct_dct_32, + .txfm_func_col = vp10_fdct32_new, + .txfm_func_row = vp10_fdct32_new}; + +// ---------------- config fwd_dct_adst_4 ---------------- +static int8_t fwd_shift_dct_adst_4[3] = {5, -2, -1}; +static int8_t fwd_stage_range_col_dct_adst_4[4] = {16, 17, 18, 18}; +static int8_t fwd_stage_range_row_dct_adst_4[6] = {16, 16, 16, 17, 17, 17}; +static int8_t fwd_cos_bit_col_dct_adst_4[4] = {15, 15, 14, 14}; +static int8_t fwd_cos_bit_row_dct_adst_4[6] = {15, 15, 15, 15, 15, 15}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_4 = { + .txfm_size = 4, + .stage_num_col = 4, + .stage_num_row = 6, + + .shift = fwd_shift_dct_adst_4, + .stage_range_col = fwd_stage_range_col_dct_adst_4, + .stage_range_row = fwd_stage_range_row_dct_adst_4, + .cos_bit_col = fwd_cos_bit_col_dct_adst_4, + .cos_bit_row = fwd_cos_bit_row_dct_adst_4, + .txfm_func_col = vp10_fdct4_new, + .txfm_func_row = vp10_fadst4_new}; + +// ---------------- config fwd_dct_adst_8 ---------------- +static int8_t fwd_shift_dct_adst_8[3] = {7, -3, -3}; +static int8_t fwd_stage_range_col_dct_adst_8[6] = {18, 19, 20, 21, 21, 21}; +static int8_t fwd_stage_range_row_dct_adst_8[8] = {18, 18, 18, 19, + 19, 20, 20, 20}; +static int8_t fwd_cos_bit_col_dct_adst_8[6] = {14, 13, 12, 11, 11, 11}; +static int8_t fwd_cos_bit_row_dct_adst_8[8] = {14, 14, 14, 13, 13, 12, 12, 12}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_8 = { + .txfm_size = 8, + .stage_num_col = 6, + .stage_num_row = 8, + + .shift = fwd_shift_dct_adst_8, + .stage_range_col = fwd_stage_range_col_dct_adst_8, + .stage_range_row = fwd_stage_range_row_dct_adst_8, + .cos_bit_col = fwd_cos_bit_col_dct_adst_8, + .cos_bit_row = fwd_cos_bit_row_dct_adst_8, + .txfm_func_col = vp10_fdct8_new, + .txfm_func_row = vp10_fadst8_new}; + +// ---------------- config fwd_dct_adst_16 ---------------- +static int8_t fwd_shift_dct_adst_16[3] = {4, -1, -3}; +static int8_t fwd_stage_range_col_dct_adst_16[8] = {15, 16, 17, 18, + 19, 19, 19, 19}; +static int8_t fwd_stage_range_row_dct_adst_16[10] = {18, 18, 18, 19, 19, + 20, 20, 21, 21, 21}; +static int8_t fwd_cos_bit_col_dct_adst_16[8] = {15, 15, 15, 14, 13, 13, 13, 13}; +static int8_t fwd_cos_bit_row_dct_adst_16[10] = {14, 14, 14, 13, 13, + 12, 12, 11, 11, 11}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_16 = { + .txfm_size = 16, + .stage_num_col = 8, + .stage_num_row = 10, + + .shift = fwd_shift_dct_adst_16, + .stage_range_col = fwd_stage_range_col_dct_adst_16, + .stage_range_row = fwd_stage_range_row_dct_adst_16, + .cos_bit_col = fwd_cos_bit_col_dct_adst_16, + .cos_bit_row = fwd_cos_bit_row_dct_adst_16, + .txfm_func_col = vp10_fdct16_new, + .txfm_func_row = vp10_fadst16_new}; + +// ---------------- config fwd_dct_adst_32 ---------------- +static int8_t fwd_shift_dct_adst_32[3] = {3, -1, -3}; +static int8_t fwd_stage_range_col_dct_adst_32[10] = {14, 15, 16, 17, 18, + 19, 19, 19, 19, 19}; +static int8_t fwd_stage_range_row_dct_adst_32[12] = {18, 18, 18, 19, 19, 20, + 20, 21, 21, 22, 22, 22}; +static int8_t fwd_cos_bit_col_dct_adst_32[10] = {15, 15, 15, 15, 14, + 13, 13, 13, 13, 13}; +static int8_t fwd_cos_bit_row_dct_adst_32[12] = {14, 14, 14, 13, 13, 12, + 12, 11, 11, 10, 10, 10}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_32 = { + .txfm_size = 32, + .stage_num_col = 10, + .stage_num_row = 12, + + .shift = fwd_shift_dct_adst_32, + .stage_range_col = fwd_stage_range_col_dct_adst_32, + .stage_range_row = fwd_stage_range_row_dct_adst_32, + .cos_bit_col = fwd_cos_bit_col_dct_adst_32, + .cos_bit_row = fwd_cos_bit_row_dct_adst_32, + .txfm_func_col = vp10_fdct32_new, + .txfm_func_row = vp10_fadst32_new}; + +// ---------------- config fwd_adst_adst_4 ---------------- +static int8_t fwd_shift_adst_adst_4[3] = {6, 1, -5}; +static int8_t fwd_stage_range_col_adst_adst_4[6] = {17, 17, 18, 19, 19, 19}; +static int8_t fwd_stage_range_row_adst_adst_4[6] = {20, 20, 20, 21, 21, 21}; +static int8_t fwd_cos_bit_col_adst_adst_4[6] = {15, 15, 14, 13, 13, 13}; +static int8_t fwd_cos_bit_row_adst_adst_4[6] = {12, 12, 12, 11, 11, 11}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_4 = { + .txfm_size = 4, + .stage_num_col = 6, + .stage_num_row = 6, + + .shift = fwd_shift_adst_adst_4, + .stage_range_col = fwd_stage_range_col_adst_adst_4, + .stage_range_row = fwd_stage_range_row_adst_adst_4, + .cos_bit_col = fwd_cos_bit_col_adst_adst_4, + .cos_bit_row = fwd_cos_bit_row_adst_adst_4, + .txfm_func_col = vp10_fadst4_new, + .txfm_func_row = vp10_fadst4_new}; + +// ---------------- config fwd_adst_adst_8 ---------------- +static int8_t fwd_shift_adst_adst_8[3] = {3, -1, -1}; +static int8_t fwd_stage_range_col_adst_adst_8[8] = {14, 14, 15, 16, + 16, 17, 17, 17}; +static int8_t fwd_stage_range_row_adst_adst_8[8] = {16, 16, 16, 17, + 17, 18, 18, 18}; +static int8_t fwd_cos_bit_col_adst_adst_8[8] = {15, 15, 15, 15, 15, 15, 15, 15}; +static int8_t fwd_cos_bit_row_adst_adst_8[8] = {15, 15, 15, 15, 15, 14, 14, 14}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_8 = { + .txfm_size = 8, + .stage_num_col = 8, + .stage_num_row = 8, + + .shift = fwd_shift_adst_adst_8, + .stage_range_col = fwd_stage_range_col_adst_adst_8, + .stage_range_row = fwd_stage_range_row_adst_adst_8, + .cos_bit_col = fwd_cos_bit_col_adst_adst_8, + .cos_bit_row = fwd_cos_bit_row_adst_adst_8, + .txfm_func_col = vp10_fadst8_new, + .txfm_func_row = vp10_fadst8_new}; + +// ---------------- config fwd_adst_adst_16 ---------------- +static int8_t fwd_shift_adst_adst_16[3] = {2, 0, -2}; +static int8_t fwd_stage_range_col_adst_adst_16[10] = {13, 13, 14, 15, 15, + 16, 16, 17, 17, 17}; +static int8_t fwd_stage_range_row_adst_adst_16[10] = {17, 17, 17, 18, 18, + 19, 19, 20, 20, 20}; +static int8_t fwd_cos_bit_col_adst_adst_16[10] = {15, 15, 15, 15, 15, + 15, 15, 15, 15, 15}; +static int8_t fwd_cos_bit_row_adst_adst_16[10] = {15, 15, 15, 14, 14, + 13, 13, 12, 12, 12}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_16 = { + .txfm_size = 16, + .stage_num_col = 10, + .stage_num_row = 10, + + .shift = fwd_shift_adst_adst_16, + .stage_range_col = fwd_stage_range_col_adst_adst_16, + .stage_range_row = fwd_stage_range_row_adst_adst_16, + .cos_bit_col = fwd_cos_bit_col_adst_adst_16, + .cos_bit_row = fwd_cos_bit_row_adst_adst_16, + .txfm_func_col = vp10_fadst16_new, + .txfm_func_row = vp10_fadst16_new}; + +// ---------------- config fwd_adst_adst_32 ---------------- +static int8_t fwd_shift_adst_adst_32[3] = {4, -2, -3}; +static int8_t fwd_stage_range_col_adst_adst_32[12] = {15, 15, 16, 17, 17, 18, + 18, 19, 19, 20, 20, 20}; +static int8_t fwd_stage_range_row_adst_adst_32[12] = {18, 18, 18, 19, 19, 20, + 20, 21, 21, 22, 22, 22}; +static int8_t fwd_cos_bit_col_adst_adst_32[12] = {15, 15, 15, 15, 15, 14, + 14, 13, 13, 12, 12, 12}; +static int8_t fwd_cos_bit_row_adst_adst_32[12] = {14, 14, 14, 13, 13, 12, + 12, 11, 11, 10, 10, 10}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_32 = { + .txfm_size = 32, + .stage_num_col = 12, + .stage_num_row = 12, + + .shift = fwd_shift_adst_adst_32, + .stage_range_col = fwd_stage_range_col_adst_adst_32, + .stage_range_row = fwd_stage_range_row_adst_adst_32, + .cos_bit_col = fwd_cos_bit_col_adst_adst_32, + .cos_bit_row = fwd_cos_bit_row_adst_adst_32, + .txfm_func_col = vp10_fadst32_new, + .txfm_func_row = vp10_fadst32_new}; + +// ---------------- config fwd_adst_dct_4 ---------------- +static int8_t fwd_shift_adst_dct_4[3] = {5, -4, 1}; +static int8_t fwd_stage_range_col_adst_dct_4[6] = {16, 16, 17, 18, 18, 18}; +static int8_t fwd_stage_range_row_adst_dct_4[4] = {14, 15, 15, 15}; +static int8_t fwd_cos_bit_col_adst_dct_4[6] = {15, 15, 15, 14, 14, 14}; +static int8_t fwd_cos_bit_row_adst_dct_4[4] = {15, 15, 15, 15}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_4 = { + .txfm_size = 4, + .stage_num_col = 6, + .stage_num_row = 4, + + .shift = fwd_shift_adst_dct_4, + .stage_range_col = fwd_stage_range_col_adst_dct_4, + .stage_range_row = fwd_stage_range_row_adst_dct_4, + .cos_bit_col = fwd_cos_bit_col_adst_dct_4, + .cos_bit_row = fwd_cos_bit_row_adst_dct_4, + .txfm_func_col = vp10_fadst4_new, + .txfm_func_row = vp10_fdct4_new}; + +// ---------------- config fwd_adst_dct_8 ---------------- +static int8_t fwd_shift_adst_dct_8[3] = {5, 1, -5}; +static int8_t fwd_stage_range_col_adst_dct_8[8] = {16, 16, 17, 18, + 18, 19, 19, 19}; +static int8_t fwd_stage_range_row_adst_dct_8[6] = {20, 21, 22, 22, 22, 22}; +static int8_t fwd_cos_bit_col_adst_dct_8[8] = {15, 15, 15, 14, 14, 13, 13, 13}; +static int8_t fwd_cos_bit_row_adst_dct_8[6] = {12, 11, 10, 10, 10, 10}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_8 = { + .txfm_size = 8, + .stage_num_col = 8, + .stage_num_row = 6, + + .shift = fwd_shift_adst_dct_8, + .stage_range_col = fwd_stage_range_col_adst_dct_8, + .stage_range_row = fwd_stage_range_row_adst_dct_8, + .cos_bit_col = fwd_cos_bit_col_adst_dct_8, + .cos_bit_row = fwd_cos_bit_row_adst_dct_8, + .txfm_func_col = vp10_fadst8_new, + .txfm_func_row = vp10_fdct8_new}; + +// ---------------- config fwd_adst_dct_16 ---------------- +static int8_t fwd_shift_adst_dct_16[3] = {4, -3, -1}; +static int8_t fwd_stage_range_col_adst_dct_16[10] = {15, 15, 16, 17, 17, + 18, 18, 19, 19, 19}; +static int8_t fwd_stage_range_row_adst_dct_16[8] = {16, 17, 18, 19, + 19, 19, 19, 19}; +static int8_t fwd_cos_bit_col_adst_dct_16[10] = {15, 15, 15, 15, 15, + 14, 14, 13, 13, 13}; +static int8_t fwd_cos_bit_row_adst_dct_16[8] = {15, 15, 14, 13, 13, 13, 13, 13}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_16 = { + .txfm_size = 16, + .stage_num_col = 10, + .stage_num_row = 8, + + .shift = fwd_shift_adst_dct_16, + .stage_range_col = fwd_stage_range_col_adst_dct_16, + .stage_range_row = fwd_stage_range_row_adst_dct_16, + .cos_bit_col = fwd_cos_bit_col_adst_dct_16, + .cos_bit_row = fwd_cos_bit_row_adst_dct_16, + .txfm_func_col = vp10_fadst16_new, + .txfm_func_row = vp10_fdct16_new}; + +// ---------------- config fwd_adst_dct_32 ---------------- +static int8_t fwd_shift_adst_dct_32[3] = {5, -4, -2}; +static int8_t fwd_stage_range_col_adst_dct_32[12] = {16, 16, 17, 18, 18, 19, + 19, 20, 20, 21, 21, 21}; +static int8_t fwd_stage_range_row_adst_dct_32[10] = {17, 18, 19, 20, 21, + 21, 21, 21, 21, 21}; +static int8_t fwd_cos_bit_col_adst_dct_32[12] = {15, 15, 15, 14, 14, 13, + 13, 12, 12, 11, 11, 11}; +static int8_t fwd_cos_bit_row_adst_dct_32[10] = {15, 14, 13, 12, 11, + 11, 11, 11, 11, 11}; + +static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_32 = { + .txfm_size = 32, + .stage_num_col = 12, + .stage_num_row = 10, + + .shift = fwd_shift_adst_dct_32, + .stage_range_col = fwd_stage_range_col_adst_dct_32, + .stage_range_row = fwd_stage_range_row_adst_dct_32, + .cos_bit_col = fwd_cos_bit_col_adst_dct_32, + .cos_bit_row = fwd_cos_bit_row_adst_dct_32, + .txfm_func_col = vp10_fadst32_new, + .txfm_func_row = vp10_fdct32_new}; + +#endif // VP10_FWD_TXFM2D_CFG_H_
diff --git a/vp10/common/vp10_inv_txfm1d.c b/vp10/common/vp10_inv_txfm1d.c new file mode 100644 index 0000000..b64b601 --- /dev/null +++ b/vp10/common/vp10_inv_txfm1d.c
@@ -0,0 +1,1536 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp10/common/vp10_inv_txfm1d.h" +#if CONFIG_COEFFICIENT_RANGE_CHECKING +#define range_check(stage, input, buf, size, bit) \ + { \ + int i, j; \ + for (i = 0; i < size; ++i) { \ + int buf_bit = get_max_bit(abs(buf[i])) + 1; \ + if (buf_bit > bit) { \ + printf("======== %s overflow ========\n", __func__); \ + printf("stage: %d node: %d\n", stage, i); \ + printf("bit: %d buf_bit: %d buf[i]: %d\n", bit, buf_bit, buf[i]); \ + printf("input:\n"); \ + for (j = 0; j < size; j++) { \ + printf("%d,", input[j]); \ + } \ + printf("\n"); \ + assert(0, "vp10_inv_txfm1d.c: range_check overflow"); \ + } \ + } \ + } +#else +#define range_check(stage, input, buf, size, bit) \ + { \ + (void) stage; \ + (void) input; \ + (void) buf; \ + (void) size; \ + (void) bit; \ + } +#endif + +void vp10_idct4_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 4; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[4]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[0]; + bf1[1] = input[2]; + bf1[2] = input[1]; + bf1[3] = input[3]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]); + bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[3]; + bf1[1] = bf0[1] + bf0[2]; + bf1[2] = bf0[1] - bf0[2]; + bf1[3] = bf0[0] - bf0[3]; + range_check(stage, input, bf1, size, stage_range[stage]); +} + +void vp10_idct8_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 8; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[8]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[0]; + bf1[1] = input[4]; + bf1[2] = input[2]; + bf1[3] = input[6]; + bf1[4] = input[1]; + bf1[5] = input[5]; + bf1[6] = input[3]; + bf1[7] = input[7]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]); + bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]); + bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]); + bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]); + bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]); + bf1[4] = bf0[4] + bf0[5]; + bf1[5] = bf0[4] - bf0[5]; + bf1[6] = -bf0[6] + bf0[7]; + bf1[7] = bf0[6] + bf0[7]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0] + bf0[3]; + bf1[1] = bf0[1] + bf0[2]; + bf1[2] = bf0[1] - bf0[2]; + bf1[3] = bf0[0] - bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]); + bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]); + bf1[7] = bf0[7]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[7]; + bf1[1] = bf0[1] + bf0[6]; + bf1[2] = bf0[2] + bf0[5]; + bf1[3] = bf0[3] + bf0[4]; + bf1[4] = bf0[3] - bf0[4]; + bf1[5] = bf0[2] - bf0[5]; + bf1[6] = bf0[1] - bf0[6]; + bf1[7] = bf0[0] - bf0[7]; + range_check(stage, input, bf1, size, stage_range[stage]); +} + +void vp10_idct16_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 16; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[16]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[0]; + bf1[1] = input[8]; + bf1[2] = input[4]; + bf1[3] = input[12]; + bf1[4] = input[2]; + bf1[5] = input[10]; + bf1[6] = input[6]; + bf1[7] = input[14]; + bf1[8] = input[1]; + bf1[9] = input[9]; + bf1[10] = input[5]; + bf1[11] = input[13]; + bf1[12] = input[3]; + bf1[13] = input[11]; + bf1[14] = input[7]; + bf1[15] = input[15]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = bf0[6]; + bf1[7] = bf0[7]; + bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]); + bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]); + bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]); + bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]); + bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]); + bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]); + bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]); + bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]); + bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]); + bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]); + bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]); + bf1[8] = bf0[8] + bf0[9]; + bf1[9] = bf0[8] - bf0[9]; + bf1[10] = -bf0[10] + bf0[11]; + bf1[11] = bf0[10] + bf0[11]; + bf1[12] = bf0[12] + bf0[13]; + bf1[13] = bf0[12] - bf0[13]; + bf1[14] = -bf0[14] + bf0[15]; + bf1[15] = bf0[14] + bf0[15]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]); + bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]); + bf1[4] = bf0[4] + bf0[5]; + bf1[5] = bf0[4] - bf0[5]; + bf1[6] = -bf0[6] + bf0[7]; + bf1[7] = bf0[6] + bf0[7]; + bf1[8] = bf0[8]; + bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]); + bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]); + bf1[11] = bf0[11]; + bf1[12] = bf0[12]; + bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]); + bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]); + bf1[15] = bf0[15]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[3]; + bf1[1] = bf0[1] + bf0[2]; + bf1[2] = bf0[1] - bf0[2]; + bf1[3] = bf0[0] - bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]); + bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]); + bf1[7] = bf0[7]; + bf1[8] = bf0[8] + bf0[11]; + bf1[9] = bf0[9] + bf0[10]; + bf1[10] = bf0[9] - bf0[10]; + bf1[11] = bf0[8] - bf0[11]; + bf1[12] = -bf0[12] + bf0[15]; + bf1[13] = -bf0[13] + bf0[14]; + bf1[14] = bf0[13] + bf0[14]; + bf1[15] = bf0[12] + bf0[15]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 6 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0] + bf0[7]; + bf1[1] = bf0[1] + bf0[6]; + bf1[2] = bf0[2] + bf0[5]; + bf1[3] = bf0[3] + bf0[4]; + bf1[4] = bf0[3] - bf0[4]; + bf1[5] = bf0[2] - bf0[5]; + bf1[6] = bf0[1] - bf0[6]; + bf1[7] = bf0[0] - bf0[7]; + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]); + bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]); + bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]); + bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]); + bf1[14] = bf0[14]; + bf1[15] = bf0[15]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 7 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[15]; + bf1[1] = bf0[1] + bf0[14]; + bf1[2] = bf0[2] + bf0[13]; + bf1[3] = bf0[3] + bf0[12]; + bf1[4] = bf0[4] + bf0[11]; + bf1[5] = bf0[5] + bf0[10]; + bf1[6] = bf0[6] + bf0[9]; + bf1[7] = bf0[7] + bf0[8]; + bf1[8] = bf0[7] - bf0[8]; + bf1[9] = bf0[6] - bf0[9]; + bf1[10] = bf0[5] - bf0[10]; + bf1[11] = bf0[4] - bf0[11]; + bf1[12] = bf0[3] - bf0[12]; + bf1[13] = bf0[2] - bf0[13]; + bf1[14] = bf0[1] - bf0[14]; + bf1[15] = bf0[0] - bf0[15]; + range_check(stage, input, bf1, size, stage_range[stage]); +} + +void vp10_idct32_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 32; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[32]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[0]; + bf1[1] = input[16]; + bf1[2] = input[8]; + bf1[3] = input[24]; + bf1[4] = input[4]; + bf1[5] = input[20]; + bf1[6] = input[12]; + bf1[7] = input[28]; + bf1[8] = input[2]; + bf1[9] = input[18]; + bf1[10] = input[10]; + bf1[11] = input[26]; + bf1[12] = input[6]; + bf1[13] = input[22]; + bf1[14] = input[14]; + bf1[15] = input[30]; + bf1[16] = input[1]; + bf1[17] = input[17]; + bf1[18] = input[9]; + bf1[19] = input[25]; + bf1[20] = input[5]; + bf1[21] = input[21]; + bf1[22] = input[13]; + bf1[23] = input[29]; + bf1[24] = input[3]; + bf1[25] = input[19]; + bf1[26] = input[11]; + bf1[27] = input[27]; + bf1[28] = input[7]; + bf1[29] = input[23]; + bf1[30] = input[15]; + bf1[31] = input[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = bf0[6]; + bf1[7] = bf0[7]; + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = bf0[10]; + bf1[11] = bf0[11]; + bf1[12] = bf0[12]; + bf1[13] = bf0[13]; + bf1[14] = bf0[14]; + bf1[15] = bf0[15]; + bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit[stage]); + bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit[stage]); + bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit[stage]); + bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit[stage]); + bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit[stage]); + bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit[stage]); + bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit[stage]); + bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit[stage]); + bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit[stage]); + bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit[stage]); + bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit[stage]); + bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit[stage]); + bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit[stage]); + bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit[stage]); + bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit[stage]); + bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = bf0[6]; + bf1[7] = bf0[7]; + bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]); + bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]); + bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]); + bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]); + bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]); + bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]); + bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]); + bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]); + bf1[16] = bf0[16] + bf0[17]; + bf1[17] = bf0[16] - bf0[17]; + bf1[18] = -bf0[18] + bf0[19]; + bf1[19] = bf0[18] + bf0[19]; + bf1[20] = bf0[20] + bf0[21]; + bf1[21] = bf0[20] - bf0[21]; + bf1[22] = -bf0[22] + bf0[23]; + bf1[23] = bf0[22] + bf0[23]; + bf1[24] = bf0[24] + bf0[25]; + bf1[25] = bf0[24] - bf0[25]; + bf1[26] = -bf0[26] + bf0[27]; + bf1[27] = bf0[26] + bf0[27]; + bf1[28] = bf0[28] + bf0[29]; + bf1[29] = bf0[28] - bf0[29]; + bf1[30] = -bf0[30] + bf0[31]; + bf1[31] = bf0[30] + bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]); + bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]); + bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]); + bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]); + bf1[8] = bf0[8] + bf0[9]; + bf1[9] = bf0[8] - bf0[9]; + bf1[10] = -bf0[10] + bf0[11]; + bf1[11] = bf0[10] + bf0[11]; + bf1[12] = bf0[12] + bf0[13]; + bf1[13] = bf0[12] - bf0[13]; + bf1[14] = -bf0[14] + bf0[15]; + bf1[15] = bf0[14] + bf0[15]; + bf1[16] = bf0[16]; + bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]); + bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]); + bf1[19] = bf0[19]; + bf1[20] = bf0[20]; + bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]); + bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]); + bf1[23] = bf0[23]; + bf1[24] = bf0[24]; + bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit[stage]); + bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit[stage]); + bf1[27] = bf0[27]; + bf1[28] = bf0[28]; + bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit[stage]); + bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit[stage]); + bf1[31] = bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]); + bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]); + bf1[4] = bf0[4] + bf0[5]; + bf1[5] = bf0[4] - bf0[5]; + bf1[6] = -bf0[6] + bf0[7]; + bf1[7] = bf0[6] + bf0[7]; + bf1[8] = bf0[8]; + bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]); + bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]); + bf1[11] = bf0[11]; + bf1[12] = bf0[12]; + bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]); + bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]); + bf1[15] = bf0[15]; + bf1[16] = bf0[16] + bf0[19]; + bf1[17] = bf0[17] + bf0[18]; + bf1[18] = bf0[17] - bf0[18]; + bf1[19] = bf0[16] - bf0[19]; + bf1[20] = -bf0[20] + bf0[23]; + bf1[21] = -bf0[21] + bf0[22]; + bf1[22] = bf0[21] + bf0[22]; + bf1[23] = bf0[20] + bf0[23]; + bf1[24] = bf0[24] + bf0[27]; + bf1[25] = bf0[25] + bf0[26]; + bf1[26] = bf0[25] - bf0[26]; + bf1[27] = bf0[24] - bf0[27]; + bf1[28] = -bf0[28] + bf0[31]; + bf1[29] = -bf0[29] + bf0[30]; + bf1[30] = bf0[29] + bf0[30]; + bf1[31] = bf0[28] + bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 6 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0] + bf0[3]; + bf1[1] = bf0[1] + bf0[2]; + bf1[2] = bf0[1] - bf0[2]; + bf1[3] = bf0[0] - bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]); + bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]); + bf1[7] = bf0[7]; + bf1[8] = bf0[8] + bf0[11]; + bf1[9] = bf0[9] + bf0[10]; + bf1[10] = bf0[9] - bf0[10]; + bf1[11] = bf0[8] - bf0[11]; + bf1[12] = -bf0[12] + bf0[15]; + bf1[13] = -bf0[13] + bf0[14]; + bf1[14] = bf0[13] + bf0[14]; + bf1[15] = bf0[12] + bf0[15]; + bf1[16] = bf0[16]; + bf1[17] = bf0[17]; + bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]); + bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]); + bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]); + bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]); + bf1[22] = bf0[22]; + bf1[23] = bf0[23]; + bf1[24] = bf0[24]; + bf1[25] = bf0[25]; + bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit[stage]); + bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit[stage]); + bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit[stage]); + bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit[stage]); + bf1[30] = bf0[30]; + bf1[31] = bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 7 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[7]; + bf1[1] = bf0[1] + bf0[6]; + bf1[2] = bf0[2] + bf0[5]; + bf1[3] = bf0[3] + bf0[4]; + bf1[4] = bf0[3] - bf0[4]; + bf1[5] = bf0[2] - bf0[5]; + bf1[6] = bf0[1] - bf0[6]; + bf1[7] = bf0[0] - bf0[7]; + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]); + bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]); + bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]); + bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]); + bf1[14] = bf0[14]; + bf1[15] = bf0[15]; + bf1[16] = bf0[16] + bf0[23]; + bf1[17] = bf0[17] + bf0[22]; + bf1[18] = bf0[18] + bf0[21]; + bf1[19] = bf0[19] + bf0[20]; + bf1[20] = bf0[19] - bf0[20]; + bf1[21] = bf0[18] - bf0[21]; + bf1[22] = bf0[17] - bf0[22]; + bf1[23] = bf0[16] - bf0[23]; + bf1[24] = -bf0[24] + bf0[31]; + bf1[25] = -bf0[25] + bf0[30]; + bf1[26] = -bf0[26] + bf0[29]; + bf1[27] = -bf0[27] + bf0[28]; + bf1[28] = bf0[27] + bf0[28]; + bf1[29] = bf0[26] + bf0[29]; + bf1[30] = bf0[25] + bf0[30]; + bf1[31] = bf0[24] + bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 8 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0] + bf0[15]; + bf1[1] = bf0[1] + bf0[14]; + bf1[2] = bf0[2] + bf0[13]; + bf1[3] = bf0[3] + bf0[12]; + bf1[4] = bf0[4] + bf0[11]; + bf1[5] = bf0[5] + bf0[10]; + bf1[6] = bf0[6] + bf0[9]; + bf1[7] = bf0[7] + bf0[8]; + bf1[8] = bf0[7] - bf0[8]; + bf1[9] = bf0[6] - bf0[9]; + bf1[10] = bf0[5] - bf0[10]; + bf1[11] = bf0[4] - bf0[11]; + bf1[12] = bf0[3] - bf0[12]; + bf1[13] = bf0[2] - bf0[13]; + bf1[14] = bf0[1] - bf0[14]; + bf1[15] = bf0[0] - bf0[15]; + bf1[16] = bf0[16]; + bf1[17] = bf0[17]; + bf1[18] = bf0[18]; + bf1[19] = bf0[19]; + bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]); + bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]); + bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]); + bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]); + bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]); + bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]); + bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]); + bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]); + bf1[28] = bf0[28]; + bf1[29] = bf0[29]; + bf1[30] = bf0[30]; + bf1[31] = bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 9 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[31]; + bf1[1] = bf0[1] + bf0[30]; + bf1[2] = bf0[2] + bf0[29]; + bf1[3] = bf0[3] + bf0[28]; + bf1[4] = bf0[4] + bf0[27]; + bf1[5] = bf0[5] + bf0[26]; + bf1[6] = bf0[6] + bf0[25]; + bf1[7] = bf0[7] + bf0[24]; + bf1[8] = bf0[8] + bf0[23]; + bf1[9] = bf0[9] + bf0[22]; + bf1[10] = bf0[10] + bf0[21]; + bf1[11] = bf0[11] + bf0[20]; + bf1[12] = bf0[12] + bf0[19]; + bf1[13] = bf0[13] + bf0[18]; + bf1[14] = bf0[14] + bf0[17]; + bf1[15] = bf0[15] + bf0[16]; + bf1[16] = bf0[15] - bf0[16]; + bf1[17] = bf0[14] - bf0[17]; + bf1[18] = bf0[13] - bf0[18]; + bf1[19] = bf0[12] - bf0[19]; + bf1[20] = bf0[11] - bf0[20]; + bf1[21] = bf0[10] - bf0[21]; + bf1[22] = bf0[9] - bf0[22]; + bf1[23] = bf0[8] - bf0[23]; + bf1[24] = bf0[7] - bf0[24]; + bf1[25] = bf0[6] - bf0[25]; + bf1[26] = bf0[5] - bf0[26]; + bf1[27] = bf0[4] - bf0[27]; + bf1[28] = bf0[3] - bf0[28]; + bf1[29] = bf0[2] - bf0[29]; + bf1[30] = bf0[1] - bf0[30]; + bf1[31] = bf0[0] - bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); +} + +void vp10_iadst4_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 4; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[4]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[0]; + bf1[1] = -input[3]; + bf1[2] = -input[1]; + bf1[3] = input[2]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[2]; + bf1[1] = bf0[1] + bf0[3]; + bf1[2] = bf0[0] - bf0[2]; + bf1[3] = bf0[1] - bf0[3]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(cospi[56], bf0[0], -cospi[8], bf0[1], cos_bit[stage]); + bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[24], bf0[2], -cospi[40], bf0[3], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[1]; + bf1[1] = bf0[2]; + bf1[2] = bf0[3]; + bf1[3] = bf0[0]; + range_check(stage, input, bf1, size, stage_range[stage]); +} + +void vp10_iadst8_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 8; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[8]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[0]; + bf1[1] = -input[7]; + bf1[2] = -input[3]; + bf1[3] = input[4]; + bf1[4] = -input[1]; + bf1[5] = input[6]; + bf1[6] = input[2]; + bf1[7] = -input[5]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]); + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[2]; + bf1[1] = bf0[1] + bf0[3]; + bf1[2] = bf0[0] - bf0[2]; + bf1[3] = bf0[1] - bf0[3]; + bf1[4] = bf0[4] + bf0[6]; + bf1[5] = bf0[5] + bf0[7]; + bf1[6] = bf0[4] - bf0[6]; + bf1[7] = bf0[5] - bf0[7]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]); + bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]); + bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[4]; + bf1[1] = bf0[1] + bf0[5]; + bf1[2] = bf0[2] + bf0[6]; + bf1[3] = bf0[3] + bf0[7]; + bf1[4] = bf0[0] - bf0[4]; + bf1[5] = bf0[1] - bf0[5]; + bf1[6] = bf0[2] - bf0[6]; + bf1[7] = bf0[3] - bf0[7]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 6 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit[stage]); + bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit[stage]); + bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]); + bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit[stage]); + bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 7 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[1]; + bf1[1] = bf0[6]; + bf1[2] = bf0[3]; + bf1[3] = bf0[4]; + bf1[4] = bf0[5]; + bf1[5] = bf0[2]; + bf1[6] = bf0[7]; + bf1[7] = bf0[0]; + range_check(stage, input, bf1, size, stage_range[stage]); +} + +void vp10_iadst16_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 16; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[16]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[0]; + bf1[1] = -input[15]; + bf1[2] = -input[7]; + bf1[3] = input[8]; + bf1[4] = -input[3]; + bf1[5] = input[12]; + bf1[6] = input[4]; + bf1[7] = -input[11]; + bf1[8] = -input[1]; + bf1[9] = input[14]; + bf1[10] = input[6]; + bf1[11] = -input[9]; + bf1[12] = input[2]; + bf1[13] = -input[13]; + bf1[14] = -input[5]; + bf1[15] = input[10]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]); + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]); + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]); + bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]); + bf1[12] = bf0[12]; + bf1[13] = bf0[13]; + bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[2]; + bf1[1] = bf0[1] + bf0[3]; + bf1[2] = bf0[0] - bf0[2]; + bf1[3] = bf0[1] - bf0[3]; + bf1[4] = bf0[4] + bf0[6]; + bf1[5] = bf0[5] + bf0[7]; + bf1[6] = bf0[4] - bf0[6]; + bf1[7] = bf0[5] - bf0[7]; + bf1[8] = bf0[8] + bf0[10]; + bf1[9] = bf0[9] + bf0[11]; + bf1[10] = bf0[8] - bf0[10]; + bf1[11] = bf0[9] - bf0[11]; + bf1[12] = bf0[12] + bf0[14]; + bf1[13] = bf0[13] + bf0[15]; + bf1[14] = bf0[12] - bf0[14]; + bf1[15] = bf0[13] - bf0[15]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]); + bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]); + bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]); + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = bf0[10]; + bf1[11] = bf0[11]; + bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]); + bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]); + bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[4]; + bf1[1] = bf0[1] + bf0[5]; + bf1[2] = bf0[2] + bf0[6]; + bf1[3] = bf0[3] + bf0[7]; + bf1[4] = bf0[0] - bf0[4]; + bf1[5] = bf0[1] - bf0[5]; + bf1[6] = bf0[2] - bf0[6]; + bf1[7] = bf0[3] - bf0[7]; + bf1[8] = bf0[8] + bf0[12]; + bf1[9] = bf0[9] + bf0[13]; + bf1[10] = bf0[10] + bf0[14]; + bf1[11] = bf0[11] + bf0[15]; + bf1[12] = bf0[8] - bf0[12]; + bf1[13] = bf0[9] - bf0[13]; + bf1[14] = bf0[10] - bf0[14]; + bf1[15] = bf0[11] - bf0[15]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 6 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = bf0[6]; + bf1[7] = bf0[7]; + bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]); + bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]); + bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]); + bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]); + bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]); + bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]); + bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 7 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[8]; + bf1[1] = bf0[1] + bf0[9]; + bf1[2] = bf0[2] + bf0[10]; + bf1[3] = bf0[3] + bf0[11]; + bf1[4] = bf0[4] + bf0[12]; + bf1[5] = bf0[5] + bf0[13]; + bf1[6] = bf0[6] + bf0[14]; + bf1[7] = bf0[7] + bf0[15]; + bf1[8] = bf0[0] - bf0[8]; + bf1[9] = bf0[1] - bf0[9]; + bf1[10] = bf0[2] - bf0[10]; + bf1[11] = bf0[3] - bf0[11]; + bf1[12] = bf0[4] - bf0[12]; + bf1[13] = bf0[5] - bf0[13]; + bf1[14] = bf0[6] - bf0[14]; + bf1[15] = bf0[7] - bf0[15]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 8 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit[stage]); + bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit[stage]); + bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]); + bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit[stage]); + bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit[stage]); + bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]); + bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit[stage]); + bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]); + bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit[stage]); + bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]); + bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit[stage]); + bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 9 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[1]; + bf1[1] = bf0[14]; + bf1[2] = bf0[3]; + bf1[3] = bf0[12]; + bf1[4] = bf0[5]; + bf1[5] = bf0[10]; + bf1[6] = bf0[7]; + bf1[7] = bf0[8]; + bf1[8] = bf0[9]; + bf1[9] = bf0[6]; + bf1[10] = bf0[11]; + bf1[11] = bf0[4]; + bf1[12] = bf0[13]; + bf1[13] = bf0[2]; + bf1[14] = bf0[15]; + bf1[15] = bf0[0]; + range_check(stage, input, bf1, size, stage_range[stage]); +} + +void vp10_iadst32_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range) { + const int32_t size = 32; + const int32_t *cospi; + + int32_t stage = 0; + int32_t *bf0, *bf1; + int32_t step[32]; + + // stage 0; + range_check(stage, input, input, size, stage_range[stage]); + + // stage 1; + stage++; + bf1 = output; + bf1[0] = input[0]; + bf1[1] = -input[31]; + bf1[2] = -input[15]; + bf1[3] = input[16]; + bf1[4] = -input[7]; + bf1[5] = input[24]; + bf1[6] = input[8]; + bf1[7] = -input[23]; + bf1[8] = -input[3]; + bf1[9] = input[28]; + bf1[10] = input[12]; + bf1[11] = -input[19]; + bf1[12] = input[4]; + bf1[13] = -input[27]; + bf1[14] = -input[11]; + bf1[15] = input[20]; + bf1[16] = -input[1]; + bf1[17] = input[30]; + bf1[18] = input[14]; + bf1[19] = -input[17]; + bf1[20] = input[6]; + bf1[21] = -input[25]; + bf1[22] = -input[9]; + bf1[23] = input[22]; + bf1[24] = input[2]; + bf1[25] = -input[29]; + bf1[26] = -input[13]; + bf1[27] = input[18]; + bf1[28] = -input[5]; + bf1[29] = input[26]; + bf1[30] = input[10]; + bf1[31] = -input[21]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]); + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]); + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]); + bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]); + bf1[12] = bf0[12]; + bf1[13] = bf0[13]; + bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]); + bf1[16] = bf0[16]; + bf1[17] = bf0[17]; + bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]); + bf1[19] = half_btf(cospi[32], bf0[18], -cospi[32], bf0[19], cos_bit[stage]); + bf1[20] = bf0[20]; + bf1[21] = bf0[21]; + bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]); + bf1[23] = half_btf(cospi[32], bf0[22], -cospi[32], bf0[23], cos_bit[stage]); + bf1[24] = bf0[24]; + bf1[25] = bf0[25]; + bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]); + bf1[27] = half_btf(cospi[32], bf0[26], -cospi[32], bf0[27], cos_bit[stage]); + bf1[28] = bf0[28]; + bf1[29] = bf0[29]; + bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]); + bf1[31] = half_btf(cospi[32], bf0[30], -cospi[32], bf0[31], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[2]; + bf1[1] = bf0[1] + bf0[3]; + bf1[2] = bf0[0] - bf0[2]; + bf1[3] = bf0[1] - bf0[3]; + bf1[4] = bf0[4] + bf0[6]; + bf1[5] = bf0[5] + bf0[7]; + bf1[6] = bf0[4] - bf0[6]; + bf1[7] = bf0[5] - bf0[7]; + bf1[8] = bf0[8] + bf0[10]; + bf1[9] = bf0[9] + bf0[11]; + bf1[10] = bf0[8] - bf0[10]; + bf1[11] = bf0[9] - bf0[11]; + bf1[12] = bf0[12] + bf0[14]; + bf1[13] = bf0[13] + bf0[15]; + bf1[14] = bf0[12] - bf0[14]; + bf1[15] = bf0[13] - bf0[15]; + bf1[16] = bf0[16] + bf0[18]; + bf1[17] = bf0[17] + bf0[19]; + bf1[18] = bf0[16] - bf0[18]; + bf1[19] = bf0[17] - bf0[19]; + bf1[20] = bf0[20] + bf0[22]; + bf1[21] = bf0[21] + bf0[23]; + bf1[22] = bf0[20] - bf0[22]; + bf1[23] = bf0[21] - bf0[23]; + bf1[24] = bf0[24] + bf0[26]; + bf1[25] = bf0[25] + bf0[27]; + bf1[26] = bf0[24] - bf0[26]; + bf1[27] = bf0[25] - bf0[27]; + bf1[28] = bf0[28] + bf0[30]; + bf1[29] = bf0[29] + bf0[31]; + bf1[30] = bf0[28] - bf0[30]; + bf1[31] = bf0[29] - bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]); + bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]); + bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]); + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = bf0[10]; + bf1[11] = bf0[11]; + bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]); + bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]); + bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]); + bf1[16] = bf0[16]; + bf1[17] = bf0[17]; + bf1[18] = bf0[18]; + bf1[19] = bf0[19]; + bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]); + bf1[21] = half_btf(cospi[48], bf0[20], -cospi[16], bf0[21], cos_bit[stage]); + bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]); + bf1[23] = half_btf(cospi[16], bf0[22], cospi[48], bf0[23], cos_bit[stage]); + bf1[24] = bf0[24]; + bf1[25] = bf0[25]; + bf1[26] = bf0[26]; + bf1[27] = bf0[27]; + bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]); + bf1[29] = half_btf(cospi[48], bf0[28], -cospi[16], bf0[29], cos_bit[stage]); + bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]); + bf1[31] = half_btf(cospi[16], bf0[30], cospi[48], bf0[31], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[4]; + bf1[1] = bf0[1] + bf0[5]; + bf1[2] = bf0[2] + bf0[6]; + bf1[3] = bf0[3] + bf0[7]; + bf1[4] = bf0[0] - bf0[4]; + bf1[5] = bf0[1] - bf0[5]; + bf1[6] = bf0[2] - bf0[6]; + bf1[7] = bf0[3] - bf0[7]; + bf1[8] = bf0[8] + bf0[12]; + bf1[9] = bf0[9] + bf0[13]; + bf1[10] = bf0[10] + bf0[14]; + bf1[11] = bf0[11] + bf0[15]; + bf1[12] = bf0[8] - bf0[12]; + bf1[13] = bf0[9] - bf0[13]; + bf1[14] = bf0[10] - bf0[14]; + bf1[15] = bf0[11] - bf0[15]; + bf1[16] = bf0[16] + bf0[20]; + bf1[17] = bf0[17] + bf0[21]; + bf1[18] = bf0[18] + bf0[22]; + bf1[19] = bf0[19] + bf0[23]; + bf1[20] = bf0[16] - bf0[20]; + bf1[21] = bf0[17] - bf0[21]; + bf1[22] = bf0[18] - bf0[22]; + bf1[23] = bf0[19] - bf0[23]; + bf1[24] = bf0[24] + bf0[28]; + bf1[25] = bf0[25] + bf0[29]; + bf1[26] = bf0[26] + bf0[30]; + bf1[27] = bf0[27] + bf0[31]; + bf1[28] = bf0[24] - bf0[28]; + bf1[29] = bf0[25] - bf0[29]; + bf1[30] = bf0[26] - bf0[30]; + bf1[31] = bf0[27] - bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 6 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = bf0[6]; + bf1[7] = bf0[7]; + bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]); + bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]); + bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]); + bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]); + bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]); + bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]); + bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]); + bf1[16] = bf0[16]; + bf1[17] = bf0[17]; + bf1[18] = bf0[18]; + bf1[19] = bf0[19]; + bf1[20] = bf0[20]; + bf1[21] = bf0[21]; + bf1[22] = bf0[22]; + bf1[23] = bf0[23]; + bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]); + bf1[25] = half_btf(cospi[56], bf0[24], -cospi[8], bf0[25], cos_bit[stage]); + bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]); + bf1[27] = half_btf(cospi[24], bf0[26], -cospi[40], bf0[27], cos_bit[stage]); + bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]); + bf1[29] = half_btf(cospi[8], bf0[28], cospi[56], bf0[29], cos_bit[stage]); + bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]); + bf1[31] = half_btf(cospi[40], bf0[30], cospi[24], bf0[31], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 7 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[8]; + bf1[1] = bf0[1] + bf0[9]; + bf1[2] = bf0[2] + bf0[10]; + bf1[3] = bf0[3] + bf0[11]; + bf1[4] = bf0[4] + bf0[12]; + bf1[5] = bf0[5] + bf0[13]; + bf1[6] = bf0[6] + bf0[14]; + bf1[7] = bf0[7] + bf0[15]; + bf1[8] = bf0[0] - bf0[8]; + bf1[9] = bf0[1] - bf0[9]; + bf1[10] = bf0[2] - bf0[10]; + bf1[11] = bf0[3] - bf0[11]; + bf1[12] = bf0[4] - bf0[12]; + bf1[13] = bf0[5] - bf0[13]; + bf1[14] = bf0[6] - bf0[14]; + bf1[15] = bf0[7] - bf0[15]; + bf1[16] = bf0[16] + bf0[24]; + bf1[17] = bf0[17] + bf0[25]; + bf1[18] = bf0[18] + bf0[26]; + bf1[19] = bf0[19] + bf0[27]; + bf1[20] = bf0[20] + bf0[28]; + bf1[21] = bf0[21] + bf0[29]; + bf1[22] = bf0[22] + bf0[30]; + bf1[23] = bf0[23] + bf0[31]; + bf1[24] = bf0[16] - bf0[24]; + bf1[25] = bf0[17] - bf0[25]; + bf1[26] = bf0[18] - bf0[26]; + bf1[27] = bf0[19] - bf0[27]; + bf1[28] = bf0[20] - bf0[28]; + bf1[29] = bf0[21] - bf0[29]; + bf1[30] = bf0[22] - bf0[30]; + bf1[31] = bf0[23] - bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 8 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = bf0[0]; + bf1[1] = bf0[1]; + bf1[2] = bf0[2]; + bf1[3] = bf0[3]; + bf1[4] = bf0[4]; + bf1[5] = bf0[5]; + bf1[6] = bf0[6]; + bf1[7] = bf0[7]; + bf1[8] = bf0[8]; + bf1[9] = bf0[9]; + bf1[10] = bf0[10]; + bf1[11] = bf0[11]; + bf1[12] = bf0[12]; + bf1[13] = bf0[13]; + bf1[14] = bf0[14]; + bf1[15] = bf0[15]; + bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]); + bf1[17] = half_btf(cospi[60], bf0[16], -cospi[4], bf0[17], cos_bit[stage]); + bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]); + bf1[19] = half_btf(cospi[44], bf0[18], -cospi[20], bf0[19], cos_bit[stage]); + bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]); + bf1[21] = half_btf(cospi[28], bf0[20], -cospi[36], bf0[21], cos_bit[stage]); + bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]); + bf1[23] = half_btf(cospi[12], bf0[22], -cospi[52], bf0[23], cos_bit[stage]); + bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]); + bf1[25] = half_btf(cospi[4], bf0[24], cospi[60], bf0[25], cos_bit[stage]); + bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]); + bf1[27] = half_btf(cospi[20], bf0[26], cospi[44], bf0[27], cos_bit[stage]); + bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]); + bf1[29] = half_btf(cospi[36], bf0[28], cospi[28], bf0[29], cos_bit[stage]); + bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]); + bf1[31] = half_btf(cospi[52], bf0[30], cospi[12], bf0[31], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 9 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = step; + bf1 = output; + bf1[0] = bf0[0] + bf0[16]; + bf1[1] = bf0[1] + bf0[17]; + bf1[2] = bf0[2] + bf0[18]; + bf1[3] = bf0[3] + bf0[19]; + bf1[4] = bf0[4] + bf0[20]; + bf1[5] = bf0[5] + bf0[21]; + bf1[6] = bf0[6] + bf0[22]; + bf1[7] = bf0[7] + bf0[23]; + bf1[8] = bf0[8] + bf0[24]; + bf1[9] = bf0[9] + bf0[25]; + bf1[10] = bf0[10] + bf0[26]; + bf1[11] = bf0[11] + bf0[27]; + bf1[12] = bf0[12] + bf0[28]; + bf1[13] = bf0[13] + bf0[29]; + bf1[14] = bf0[14] + bf0[30]; + bf1[15] = bf0[15] + bf0[31]; + bf1[16] = bf0[0] - bf0[16]; + bf1[17] = bf0[1] - bf0[17]; + bf1[18] = bf0[2] - bf0[18]; + bf1[19] = bf0[3] - bf0[19]; + bf1[20] = bf0[4] - bf0[20]; + bf1[21] = bf0[5] - bf0[21]; + bf1[22] = bf0[6] - bf0[22]; + bf1[23] = bf0[7] - bf0[23]; + bf1[24] = bf0[8] - bf0[24]; + bf1[25] = bf0[9] - bf0[25]; + bf1[26] = bf0[10] - bf0[26]; + bf1[27] = bf0[11] - bf0[27]; + bf1[28] = bf0[12] - bf0[28]; + bf1[29] = bf0[13] - bf0[29]; + bf1[30] = bf0[14] - bf0[30]; + bf1[31] = bf0[15] - bf0[31]; + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 10 + stage++; + cospi = cospi_arr[cos_bit[stage] - cos_bit_min]; + bf0 = output; + bf1 = step; + bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]); + bf1[1] = half_btf(cospi[63], bf0[0], -cospi[1], bf0[1], cos_bit[stage]); + bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]); + bf1[3] = half_btf(cospi[59], bf0[2], -cospi[5], bf0[3], cos_bit[stage]); + bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]); + bf1[5] = half_btf(cospi[55], bf0[4], -cospi[9], bf0[5], cos_bit[stage]); + bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]); + bf1[7] = half_btf(cospi[51], bf0[6], -cospi[13], bf0[7], cos_bit[stage]); + bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]); + bf1[9] = half_btf(cospi[47], bf0[8], -cospi[17], bf0[9], cos_bit[stage]); + bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]); + bf1[11] = half_btf(cospi[43], bf0[10], -cospi[21], bf0[11], cos_bit[stage]); + bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]); + bf1[13] = half_btf(cospi[39], bf0[12], -cospi[25], bf0[13], cos_bit[stage]); + bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]); + bf1[15] = half_btf(cospi[35], bf0[14], -cospi[29], bf0[15], cos_bit[stage]); + bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]); + bf1[17] = half_btf(cospi[31], bf0[16], -cospi[33], bf0[17], cos_bit[stage]); + bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]); + bf1[19] = half_btf(cospi[27], bf0[18], -cospi[37], bf0[19], cos_bit[stage]); + bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]); + bf1[21] = half_btf(cospi[23], bf0[20], -cospi[41], bf0[21], cos_bit[stage]); + bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]); + bf1[23] = half_btf(cospi[19], bf0[22], -cospi[45], bf0[23], cos_bit[stage]); + bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]); + bf1[25] = half_btf(cospi[15], bf0[24], -cospi[49], bf0[25], cos_bit[stage]); + bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]); + bf1[27] = half_btf(cospi[11], bf0[26], -cospi[53], bf0[27], cos_bit[stage]); + bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]); + bf1[29] = half_btf(cospi[7], bf0[28], -cospi[57], bf0[29], cos_bit[stage]); + bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]); + bf1[31] = half_btf(cospi[3], bf0[30], -cospi[61], bf0[31], cos_bit[stage]); + range_check(stage, input, bf1, size, stage_range[stage]); + + // stage 11 + stage++; + bf0 = step; + bf1 = output; + bf1[0] = bf0[1]; + bf1[1] = bf0[30]; + bf1[2] = bf0[3]; + bf1[3] = bf0[28]; + bf1[4] = bf0[5]; + bf1[5] = bf0[26]; + bf1[6] = bf0[7]; + bf1[7] = bf0[24]; + bf1[8] = bf0[9]; + bf1[9] = bf0[22]; + bf1[10] = bf0[11]; + bf1[11] = bf0[20]; + bf1[12] = bf0[13]; + bf1[13] = bf0[18]; + bf1[14] = bf0[15]; + bf1[15] = bf0[16]; + bf1[16] = bf0[17]; + bf1[17] = bf0[14]; + bf1[18] = bf0[19]; + bf1[19] = bf0[12]; + bf1[20] = bf0[21]; + bf1[21] = bf0[10]; + bf1[22] = bf0[23]; + bf1[23] = bf0[8]; + bf1[24] = bf0[25]; + bf1[25] = bf0[6]; + bf1[26] = bf0[27]; + bf1[27] = bf0[4]; + bf1[28] = bf0[29]; + bf1[29] = bf0[2]; + bf1[30] = bf0[31]; + bf1[31] = bf0[0]; + range_check(stage, input, bf1, size, stage_range[stage]); +}
diff --git a/vp10/common/vp10_inv_txfm1d.h b/vp10/common/vp10_inv_txfm1d.h new file mode 100644 index 0000000..0609b65 --- /dev/null +++ b/vp10/common/vp10_inv_txfm1d.h
@@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_INV_TXFM1D_H_ +#define VP10_INV_TXFM1D_H_ + +#include "vp10/common/vp10_txfm.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp10_idct4_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_idct8_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_idct16_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_idct32_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); + +void vp10_iadst4_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_iadst8_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_iadst16_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_iadst32_new(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); + +#ifdef __cplusplus +} +#endif + +#endif // VP10_INV_TXFM1D_H_
diff --git a/vp10/common/vp10_inv_txfm2d.c b/vp10/common/vp10_inv_txfm2d.c new file mode 100644 index 0000000..c894a42 --- /dev/null +++ b/vp10/common/vp10_inv_txfm2d.c
@@ -0,0 +1,98 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp10/common/vp10_txfm.h" + +static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output, + int stride, const TXFM_2D_CFG *cfg, + int32_t *txfm_buf) { + const int txfm_size = cfg->txfm_size; + const int8_t *shift = cfg->shift; + const int8_t *stage_range_col = cfg->stage_range_col; + const int8_t *stage_range_row = cfg->stage_range_row; + const int8_t *cos_bit_col = cfg->cos_bit_col; + const int8_t *cos_bit_row = cfg->cos_bit_row; + const TxfmFunc txfm_func_col = cfg->txfm_func_col; + const TxfmFunc txfm_func_row = cfg->txfm_func_row; + + // txfm_buf's length is txfm_size * txfm_size + 2 * txfm_size + // it is used for intermediate data buffering + int32_t *temp_in = txfm_buf; + int32_t *temp_out = temp_in + txfm_size; + int32_t *buf = temp_out + txfm_size; + int32_t *buf_ptr = buf; + int i, j; + + // Rows + for (i = 0; i < txfm_size; ++i) { + txfm_func_row(input, buf_ptr, cos_bit_row, stage_range_row); + round_shift_array(buf_ptr, txfm_size, -shift[0]); + input += txfm_size; + buf_ptr += txfm_size; + } + + // Columns + for (i = 0; i < txfm_size; ++i) { + for (j = 0; j < txfm_size; ++j) + temp_in[j] = buf[j * txfm_size + i]; + txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col); + round_shift_array(temp_out, txfm_size, -shift[1]); + for (j = 0; j < txfm_size; ++j) + output[j * stride + i] += temp_out[j]; + } +} + +void vp10_inv_txfm2d_add_4x4(const int32_t *input, uint16_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd) { + int txfm_buf[4 * 4 + 4 + 4]; + // output contains the prediction signal which is always positive and smaller + // than (1 << bd) - 1 + // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an + // int16_t* + inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf); + clamp_block((int16_t *)output, 4, stride, 0, (1 << bd) - 1); +} + +void vp10_inv_txfm2d_add_8x8(const int32_t *input, uint16_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd) { + int txfm_buf[8 * 8 + 8 + 8]; + // output contains the prediction signal which is always positive and smaller + // than (1 << bd) - 1 + // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an + // int16_t* + inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf); + clamp_block((int16_t *)output, 8, stride, 0, (1 << bd) - 1); +} + +void vp10_inv_txfm2d_add_16x16(const int32_t *input, uint16_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd) { + int txfm_buf[16 * 16 + 16 + 16]; + // output contains the prediction signal which is always positive and smaller + // than (1 << bd) - 1 + // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an + // int16_t* + inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf); + clamp_block((int16_t *)output, 16, stride, 0, (1 << bd) - 1); +} + +void vp10_inv_txfm2d_add_32x32(const int32_t *input, uint16_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd) { + int txfm_buf[32 * 32 + 32 + 32]; + // output contains the prediction signal which is always positive and smaller + // than (1 << bd) - 1 + // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an + // int16_t* + inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf); + clamp_block((int16_t *)output, 32, stride, 0, (1 << bd) - 1); +}
diff --git a/vp10/common/vp10_inv_txfm2d.h b/vp10/common/vp10_inv_txfm2d.h new file mode 100644 index 0000000..1b570ef --- /dev/null +++ b/vp10/common/vp10_inv_txfm2d.h
@@ -0,0 +1,33 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_INV_TXFM2D_C_H_ +#define VP10_INV_TXFM2D_C_H_ + +#include "vp10/common/vp10_inv_txfm2d_cfg.h" +#ifdef __cplusplus +extern "C" { +#endif +void vp10_inv_txfm2d_add_4x4(const int32_t *input, uint16_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd); +void vp10_inv_txfm2d_add_8x8(const int32_t *input, uint16_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd); +void vp10_inv_txfm2d_add_16x16(const int32_t *input, uint16_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd); +void vp10_inv_txfm2d_add_32x32(const int32_t *input, uint16_t *output, + const int stride, const TXFM_2D_CFG *cfg, + const int bd); +#ifdef __cplusplus +} +#endif +#endif // VP10_INV_TXFM2D_C_H_
diff --git a/vp10/common/vp10_inv_txfm2d_cfg.h b/vp10/common/vp10_inv_txfm2d_cfg.h new file mode 100644 index 0000000..8cd76b5 --- /dev/null +++ b/vp10/common/vp10_inv_txfm2d_cfg.h
@@ -0,0 +1,377 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_INV_TXFM2D_CFG_H_ +#define VP10_INV_TXFM2D_CFG_H_ +#include "vp10/common/vp10_inv_txfm1d.h" + +// ---------------- config inv_dct_dct_4 ---------------- +static const int8_t inv_shift_dct_dct_4[2] = {1, -5}; +static const int8_t inv_stage_range_col_dct_dct_4[4] = {17, 17, 16, 16}; +static const int8_t inv_stage_range_row_dct_dct_4[4] = {16, 16, 16, 16}; +static const int8_t inv_cos_bit_col_dct_dct_4[4] = {15, 15, 15, 15}; +static const int8_t inv_cos_bit_row_dct_dct_4[4] = {15, 15, 15, 15}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_4 = { + .txfm_size = 4, + .stage_num_col = 4, + .stage_num_row = 4, + + .shift = inv_shift_dct_dct_4, + .stage_range_col = inv_stage_range_col_dct_dct_4, + .stage_range_row = inv_stage_range_row_dct_dct_4, + .cos_bit_col = inv_cos_bit_col_dct_dct_4, + .cos_bit_row = inv_cos_bit_row_dct_dct_4, + .txfm_func_col = vp10_idct4_new, + .txfm_func_row = vp10_idct4_new}; + +// ---------------- config inv_dct_dct_8 ---------------- +static const int8_t inv_shift_dct_dct_8[2] = {0, -5}; +static const int8_t inv_stage_range_col_dct_dct_8[6] = {17, 17, 17, 17, 16, 16}; +static const int8_t inv_stage_range_row_dct_dct_8[6] = {17, 17, 17, 17, 17, 17}; +static const int8_t inv_cos_bit_col_dct_dct_8[6] = {15, 15, 15, 15, 15, 15}; +static const int8_t inv_cos_bit_row_dct_dct_8[6] = {15, 15, 15, 15, 15, 15}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_8 = { + .txfm_size = 8, + .stage_num_col = 6, + .stage_num_row = 6, + + .shift = inv_shift_dct_dct_8, + .stage_range_col = inv_stage_range_col_dct_dct_8, + .stage_range_row = inv_stage_range_row_dct_dct_8, + .cos_bit_col = inv_cos_bit_col_dct_dct_8, + .cos_bit_row = inv_cos_bit_row_dct_dct_8, + .txfm_func_col = vp10_idct8_new, + .txfm_func_row = vp10_idct8_new}; + +// ---------------- config inv_dct_dct_16 ---------------- +static const int8_t inv_shift_dct_dct_16[2] = {0, -6}; +static const int8_t inv_stage_range_col_dct_dct_16[8] = {18, 18, 18, 18, + 18, 18, 17, 17}; +static const int8_t inv_stage_range_row_dct_dct_16[8] = {18, 18, 18, 18, + 18, 18, 18, 18}; +static const int8_t inv_cos_bit_col_dct_dct_16[8] = {14, 14, 14, 14, + 14, 14, 14, 15}; +static const int8_t inv_cos_bit_row_dct_dct_16[8] = {14, 14, 14, 14, + 14, 14, 14, 14}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_16 = { + .txfm_size = 16, + .stage_num_col = 8, + .stage_num_row = 8, + + .shift = inv_shift_dct_dct_16, + .stage_range_col = inv_stage_range_col_dct_dct_16, + .stage_range_row = inv_stage_range_row_dct_dct_16, + .cos_bit_col = inv_cos_bit_col_dct_dct_16, + .cos_bit_row = inv_cos_bit_row_dct_dct_16, + .txfm_func_col = vp10_idct16_new, + .txfm_func_row = vp10_idct16_new}; + +// ---------------- config inv_dct_dct_32 ---------------- +static const int8_t inv_shift_dct_dct_32[2] = {-1, -6}; +static const int8_t inv_stage_range_col_dct_dct_32[10] = {18, 18, 18, 18, 18, + 18, 18, 18, 17, 17}; +static const int8_t inv_stage_range_row_dct_dct_32[10] = {19, 19, 19, 19, 19, + 19, 19, 19, 19, 19}; +static const int8_t inv_cos_bit_col_dct_dct_32[10] = {14, 14, 14, 14, 14, + 14, 14, 14, 14, 15}; +static const int8_t inv_cos_bit_row_dct_dct_32[10] = {13, 13, 13, 13, 13, + 13, 13, 13, 13, 13}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_32 = { + .txfm_size = 32, + .stage_num_col = 10, + .stage_num_row = 10, + + .shift = inv_shift_dct_dct_32, + .stage_range_col = inv_stage_range_col_dct_dct_32, + .stage_range_row = inv_stage_range_row_dct_dct_32, + .cos_bit_col = inv_cos_bit_col_dct_dct_32, + .cos_bit_row = inv_cos_bit_row_dct_dct_32, + .txfm_func_col = vp10_idct32_new, + .txfm_func_row = vp10_idct32_new}; + +// ---------------- config inv_dct_adst_4 ---------------- +static const int8_t inv_shift_dct_adst_4[2] = {1, -5}; +static const int8_t inv_stage_range_col_dct_adst_4[4] = {17, 17, 16, 16}; +static const int8_t inv_stage_range_row_dct_adst_4[6] = {16, 16, 16, + 16, 16, 16}; +static const int8_t inv_cos_bit_col_dct_adst_4[4] = {15, 15, 15, 15}; +static const int8_t inv_cos_bit_row_dct_adst_4[6] = {15, 15, 15, 15, 15, 15}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_4 = { + .txfm_size = 4, + .stage_num_col = 4, + .stage_num_row = 6, + + .shift = inv_shift_dct_adst_4, + .stage_range_col = inv_stage_range_col_dct_adst_4, + .stage_range_row = inv_stage_range_row_dct_adst_4, + .cos_bit_col = inv_cos_bit_col_dct_adst_4, + .cos_bit_row = inv_cos_bit_row_dct_adst_4, + .txfm_func_col = vp10_idct4_new, + .txfm_func_row = vp10_iadst4_new}; + +// ---------------- config inv_dct_adst_8 ---------------- +static const int8_t inv_shift_dct_adst_8[2] = {-1, -4}; +static const int8_t inv_stage_range_col_dct_adst_8[6] = {16, 16, 16, + 16, 15, 15}; +static const int8_t inv_stage_range_row_dct_adst_8[8] = {17, 17, 17, 17, + 17, 17, 17, 17}; +static const int8_t inv_cos_bit_col_dct_adst_8[6] = {15, 15, 15, 15, 15, 15}; +static const int8_t inv_cos_bit_row_dct_adst_8[8] = {15, 15, 15, 15, + 15, 15, 15, 15}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_8 = { + .txfm_size = 8, + .stage_num_col = 6, + .stage_num_row = 8, + + .shift = inv_shift_dct_adst_8, + .stage_range_col = inv_stage_range_col_dct_adst_8, + .stage_range_row = inv_stage_range_row_dct_adst_8, + .cos_bit_col = inv_cos_bit_col_dct_adst_8, + .cos_bit_row = inv_cos_bit_row_dct_adst_8, + .txfm_func_col = vp10_idct8_new, + .txfm_func_row = vp10_iadst8_new}; + +// ---------------- config inv_dct_adst_16 ---------------- +static const int8_t inv_shift_dct_adst_16[2] = {1, -7}; +static const int8_t inv_stage_range_col_dct_adst_16[8] = {19, 19, 19, 19, + 19, 19, 18, 18}; +static const int8_t inv_stage_range_row_dct_adst_16[10] = {18, 18, 18, 18, 18, + 18, 18, 18, 18, 18}; +static const int8_t inv_cos_bit_col_dct_adst_16[8] = {13, 13, 13, 13, + 13, 13, 13, 14}; +static const int8_t inv_cos_bit_row_dct_adst_16[10] = {14, 14, 14, 14, 14, + 14, 14, 14, 14, 14}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_16 = { + .txfm_size = 16, + .stage_num_col = 8, + .stage_num_row = 10, + + .shift = inv_shift_dct_adst_16, + .stage_range_col = inv_stage_range_col_dct_adst_16, + .stage_range_row = inv_stage_range_row_dct_adst_16, + .cos_bit_col = inv_cos_bit_col_dct_adst_16, + .cos_bit_row = inv_cos_bit_row_dct_adst_16, + .txfm_func_col = vp10_idct16_new, + .txfm_func_row = vp10_iadst16_new}; + +// ---------------- config inv_dct_adst_32 ---------------- +static const int8_t inv_shift_dct_adst_32[2] = {-1, -6}; +static const int8_t inv_stage_range_col_dct_adst_32[10] = {18, 18, 18, 18, 18, + 18, 18, 18, 17, 17}; +static const int8_t inv_stage_range_row_dct_adst_32[12] = { + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19}; +static const int8_t inv_cos_bit_col_dct_adst_32[10] = {14, 14, 14, 14, 14, + 14, 14, 14, 14, 15}; +static const int8_t inv_cos_bit_row_dct_adst_32[12] = {13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_32 = { + .txfm_size = 32, + .stage_num_col = 10, + .stage_num_row = 12, + + .shift = inv_shift_dct_adst_32, + .stage_range_col = inv_stage_range_col_dct_adst_32, + .stage_range_row = inv_stage_range_row_dct_adst_32, + .cos_bit_col = inv_cos_bit_col_dct_adst_32, + .cos_bit_row = inv_cos_bit_row_dct_adst_32, + .txfm_func_col = vp10_idct32_new, + .txfm_func_row = vp10_iadst32_new}; + +// ---------------- config inv_adst_adst_4 ---------------- +static const int8_t inv_shift_adst_adst_4[2] = {0, -4}; +static const int8_t inv_stage_range_col_adst_adst_4[6] = {16, 16, 16, + 16, 15, 15}; +static const int8_t inv_stage_range_row_adst_adst_4[6] = {16, 16, 16, + 16, 16, 16}; +static const int8_t inv_cos_bit_col_adst_adst_4[6] = {15, 15, 15, 15, 15, 15}; +static const int8_t inv_cos_bit_row_adst_adst_4[6] = {15, 15, 15, 15, 15, 15}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_4 = { + .txfm_size = 4, + .stage_num_col = 6, + .stage_num_row = 6, + + .shift = inv_shift_adst_adst_4, + .stage_range_col = inv_stage_range_col_adst_adst_4, + .stage_range_row = inv_stage_range_row_adst_adst_4, + .cos_bit_col = inv_cos_bit_col_adst_adst_4, + .cos_bit_row = inv_cos_bit_row_adst_adst_4, + .txfm_func_col = vp10_iadst4_new, + .txfm_func_row = vp10_iadst4_new}; + +// ---------------- config inv_adst_adst_8 ---------------- +static const int8_t inv_shift_adst_adst_8[2] = {-1, -4}; +static const int8_t inv_stage_range_col_adst_adst_8[8] = {16, 16, 16, 16, + 16, 16, 15, 15}; +static const int8_t inv_stage_range_row_adst_adst_8[8] = {17, 17, 17, 17, + 17, 17, 17, 17}; +static const int8_t inv_cos_bit_col_adst_adst_8[8] = {15, 15, 15, 15, + 15, 15, 15, 15}; +static const int8_t inv_cos_bit_row_adst_adst_8[8] = {15, 15, 15, 15, + 15, 15, 15, 15}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_8 = { + .txfm_size = 8, + .stage_num_col = 8, + .stage_num_row = 8, + + .shift = inv_shift_adst_adst_8, + .stage_range_col = inv_stage_range_col_adst_adst_8, + .stage_range_row = inv_stage_range_row_adst_adst_8, + .cos_bit_col = inv_cos_bit_col_adst_adst_8, + .cos_bit_row = inv_cos_bit_row_adst_adst_8, + .txfm_func_col = vp10_iadst8_new, + .txfm_func_row = vp10_iadst8_new}; + +// ---------------- config inv_adst_adst_16 ---------------- +static const int8_t inv_shift_adst_adst_16[2] = {0, -6}; +static const int8_t inv_stage_range_col_adst_adst_16[10] = {18, 18, 18, 18, 18, + 18, 18, 18, 17, 17}; +static const int8_t inv_stage_range_row_adst_adst_16[10] = {18, 18, 18, 18, 18, + 18, 18, 18, 18, 18}; +static const int8_t inv_cos_bit_col_adst_adst_16[10] = {14, 14, 14, 14, 14, + 14, 14, 14, 14, 15}; +static const int8_t inv_cos_bit_row_adst_adst_16[10] = {14, 14, 14, 14, 14, + 14, 14, 14, 14, 14}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_16 = { + .txfm_size = 16, + .stage_num_col = 10, + .stage_num_row = 10, + + .shift = inv_shift_adst_adst_16, + .stage_range_col = inv_stage_range_col_adst_adst_16, + .stage_range_row = inv_stage_range_row_adst_adst_16, + .cos_bit_col = inv_cos_bit_col_adst_adst_16, + .cos_bit_row = inv_cos_bit_row_adst_adst_16, + .txfm_func_col = vp10_iadst16_new, + .txfm_func_row = vp10_iadst16_new}; + +// ---------------- config inv_adst_adst_32 ---------------- +static const int8_t inv_shift_adst_adst_32[2] = {-1, -6}; +static const int8_t inv_stage_range_col_adst_adst_32[12] = { + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17}; +static const int8_t inv_stage_range_row_adst_adst_32[12] = { + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19}; +static const int8_t inv_cos_bit_col_adst_adst_32[12] = {14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 15}; +static const int8_t inv_cos_bit_row_adst_adst_32[12] = {13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_32 = { + .txfm_size = 32, + .stage_num_col = 12, + .stage_num_row = 12, + + .shift = inv_shift_adst_adst_32, + .stage_range_col = inv_stage_range_col_adst_adst_32, + .stage_range_row = inv_stage_range_row_adst_adst_32, + .cos_bit_col = inv_cos_bit_col_adst_adst_32, + .cos_bit_row = inv_cos_bit_row_adst_adst_32, + .txfm_func_col = vp10_iadst32_new, + .txfm_func_row = vp10_iadst32_new}; + +// ---------------- config inv_adst_dct_4 ---------------- +static const int8_t inv_shift_adst_dct_4[2] = {1, -5}; +static const int8_t inv_stage_range_col_adst_dct_4[6] = {17, 17, 17, 17, 16, 16}; +static const int8_t inv_stage_range_row_adst_dct_4[4] = {16, 16, 16, 16}; +static const int8_t inv_cos_bit_col_adst_dct_4[6] = {15, 15, 15, 15, 15, 15}; +static const int8_t inv_cos_bit_row_adst_dct_4[4] = {15, 15, 15, 15}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_4 = { + .txfm_size = 4, + .stage_num_col = 6, + .stage_num_row = 4, + + .shift = inv_shift_adst_dct_4, + .stage_range_col = inv_stage_range_col_adst_dct_4, + .stage_range_row = inv_stage_range_row_adst_dct_4, + .cos_bit_col = inv_cos_bit_col_adst_dct_4, + .cos_bit_row = inv_cos_bit_row_adst_dct_4, + .txfm_func_col = vp10_iadst4_new, + .txfm_func_row = vp10_idct4_new}; + +// ---------------- config inv_adst_dct_8 ---------------- +static const int8_t inv_shift_adst_dct_8[2] = {-1, -4}; +static const int8_t inv_stage_range_col_adst_dct_8[8] = {16, 16, 16, 16, + 16, 16, 15, 15}; +static const int8_t inv_stage_range_row_adst_dct_8[6] = {17, 17, 17, 17, 17, 17}; +static const int8_t inv_cos_bit_col_adst_dct_8[8] = {15, 15, 15, 15, 15, 15, 15, 15}; +static const int8_t inv_cos_bit_row_adst_dct_8[6] = {15, 15, 15, 15, 15, 15}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_8 = { + .txfm_size = 8, + .stage_num_col = 8, + .stage_num_row = 6, + + .shift = inv_shift_adst_dct_8, + .stage_range_col = inv_stage_range_col_adst_dct_8, + .stage_range_row = inv_stage_range_row_adst_dct_8, + .cos_bit_col = inv_cos_bit_col_adst_dct_8, + .cos_bit_row = inv_cos_bit_row_adst_dct_8, + .txfm_func_col = vp10_iadst8_new, + .txfm_func_row = vp10_idct8_new}; + +// ---------------- config inv_adst_dct_16 ---------------- +static const int8_t inv_shift_adst_dct_16[2] = {-1, -5}; +static const int8_t inv_stage_range_col_adst_dct_16[10] = {17, 17, 17, 17, 17, + 17, 17, 17, 16, 16}; +static const int8_t inv_stage_range_row_adst_dct_16[8] = {18, 18, 18, 18, + 18, 18, 18, 18}; +static const int8_t inv_cos_bit_col_adst_dct_16[10] = {15, 15, 15, 15, 15, + 15, 15, 15, 15, 15}; +static const int8_t inv_cos_bit_row_adst_dct_16[8] = {14, 14, 14, 14, 14, 14, 14, 14}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_16 = { + .txfm_size = 16, + .stage_num_col = 10, + .stage_num_row = 8, + + .shift = inv_shift_adst_dct_16, + .stage_range_col = inv_stage_range_col_adst_dct_16, + .stage_range_row = inv_stage_range_row_adst_dct_16, + .cos_bit_col = inv_cos_bit_col_adst_dct_16, + .cos_bit_row = inv_cos_bit_row_adst_dct_16, + .txfm_func_col = vp10_iadst16_new, + .txfm_func_row = vp10_idct16_new}; + +// ---------------- config inv_adst_dct_32 ---------------- +static const int8_t inv_shift_adst_dct_32[2] = {-1, -6}; +static const int8_t inv_stage_range_col_adst_dct_32[12] = {18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 17, 17}; +static const int8_t inv_stage_range_row_adst_dct_32[10] = {19, 19, 19, 19, 19, + 19, 19, 19, 19, 19}; +static const int8_t inv_cos_bit_col_adst_dct_32[12] = {14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 15}; +static const int8_t inv_cos_bit_row_adst_dct_32[10] = {13, 13, 13, 13, 13, + 13, 13, 13, 13, 13}; + +static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_32 = { + .txfm_size = 32, + .stage_num_col = 12, + .stage_num_row = 10, + + .shift = inv_shift_adst_dct_32, + .stage_range_col = inv_stage_range_col_adst_dct_32, + .stage_range_row = inv_stage_range_row_adst_dct_32, + .cos_bit_col = inv_cos_bit_col_adst_dct_32, + .cos_bit_row = inv_cos_bit_row_adst_dct_32, + .txfm_func_col = vp10_iadst32_new, + .txfm_func_row = vp10_idct32_new}; + +#endif // VP10_INV_TXFM2D_CFG_H_
diff --git a/vp10/common/vp10_txfm.h b/vp10/common/vp10_txfm.h new file mode 100644 index 0000000..b4fd753 --- /dev/null +++ b/vp10/common/vp10_txfm.h
@@ -0,0 +1,167 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_TXFM_H_ +#define VP10_TXFM_H_ + +#include <stdio.h> +#include <math.h> +#include <assert.h> + +#include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_dsp_common.h" + +static const int cos_bit_min = 10; +static const int cos_bit_max = 16; + +// cospi_arr[i][j] = (int)round(cos(M_PI*j/128) * (1<<(cos_bit_min+i))); +static const int32_t cospi_arr[7][64] = + {{ 1024, 1024, 1023, 1021, 1019, 1016, 1013, 1009, + 1004, 999, 993, 987, 980, 972, 964, 955, + 946, 936, 926, 915, 903, 891, 878, 865, + 851, 837, 822, 807, 792, 775, 759, 742, + 724, 706, 688, 669, 650, 630, 610, 590, + 569, 548, 526, 505, 483, 460, 438, 415, + 392, 369, 345, 321, 297, 273, 249, 224, + 200, 175, 150, 125, 100, 75, 50, 25}, + { 2048, 2047, 2046, 2042, 2038, 2033, 2026, 2018, + 2009, 1998, 1987, 1974, 1960, 1945, 1928, 1911, + 1892, 1872, 1851, 1829, 1806, 1782, 1757, 1730, + 1703, 1674, 1645, 1615, 1583, 1551, 1517, 1483, + 1448, 1412, 1375, 1338, 1299, 1260, 1220, 1179, + 1138, 1096, 1053, 1009, 965, 921, 876, 830, + 784, 737, 690, 642, 595, 546, 498, 449, + 400, 350, 301, 251, 201, 151, 100, 50}, + { 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, + 4017, 3996, 3973, 3948, 3920, 3889, 3857, 3822, + 3784, 3745, 3703, 3659, 3612, 3564, 3513, 3461, + 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, + 2896, 2824, 2751, 2675, 2598, 2520, 2440, 2359, + 2276, 2191, 2106, 2019, 1931, 1842, 1751, 1660, + 1567, 1474, 1380, 1285, 1189, 1092, 995, 897, + 799, 700, 601, 501, 401, 301, 201, 101}, + { 8192, 8190, 8182, 8170, 8153, 8130, 8103, 8071, + 8035, 7993, 7946, 7895, 7839, 7779, 7713, 7643, + 7568, 7489, 7405, 7317, 7225, 7128, 7027, 6921, + 6811, 6698, 6580, 6458, 6333, 6203, 6070, 5933, + 5793, 5649, 5501, 5351, 5197, 5040, 4880, 4717, + 4551, 4383, 4212, 4038, 3862, 3683, 3503, 3320, + 3135, 2948, 2760, 2570, 2378, 2185, 1990, 1795, + 1598, 1401, 1202, 1003, 803, 603, 402, 201}, + { 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143, + 16069, 15986, 15893, 15791, 15679, 15557, 15426, 15286, + 15137, 14978, 14811, 14635, 14449, 14256, 14053, 13842, + 13623, 13395, 13160, 12916, 12665, 12406, 12140, 11866, + 11585, 11297, 11003, 10702, 10394, 10080, 9760, 9434, + 9102, 8765, 8423, 8076, 7723, 7366, 7005, 6639, + 6270, 5897, 5520, 5139, 4756, 4370, 3981, 3590, + 3196, 2801, 2404, 2006, 1606, 1205, 804, 402}, + { 32768, 32758, 32729, 32679, 32610, 32522, 32413, 32286, + 32138, 31972, 31786, 31581, 31357, 31114, 30853, 30572, + 30274, 29957, 29622, 29269, 28899, 28511, 28106, 27684, + 27246, 26791, 26320, 25833, 25330, 24812, 24279, 23732, + 23170, 22595, 22006, 21403, 20788, 20160, 19520, 18868, + 18205, 17531, 16846, 16151, 15447, 14733, 14010, 13279, + 12540, 11793, 11039, 10279, 9512, 8740, 7962, 7180, + 6393, 5602, 4808, 4011, 3212, 2411, 1608, 804}, + { 65536, 65516, 65457, 65358, 65220, 65043, 64827, 64571, + 64277, 63944, 63572, 63162, 62714, 62228, 61705, 61145, + 60547, 59914, 59244, 58538, 57798, 57022, 56212, 55368, + 54491, 53581, 52639, 51665, 50660, 49624, 48559, 47464, + 46341, 45190, 44011, 42806, 41576, 40320, 39040, 37736, + 36410, 35062, 33692, 32303, 30893, 29466, 28020, 26558, + 25080, 23586, 22078, 20557, 19024, 17479, 15924, 14359, + 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608}}; + +static INLINE int32_t round_shift(int32_t value, int bit) { + // For value >= 0, + // there are twe version of rounding + // 1) (value + (1 << (bit - 1)) - 1) >> bit + // 2) (value + (1 << (bit - 1))) >> bit + // boath methods are mild unbiased + // however, the first version has slightly advantage because + // it rounds number toward zero. + // For value < 0, we also choose the version that rounds number + // toward zero. + if (bit > 0) { + if (value >= 0) + return (value + (1 << (bit - 1)) - 1) >> bit; + else + return ((value - (1 << (bit - 1))) >> bit) + 1; + } else { + return value << (-bit); + } +} + +static INLINE void round_shift_array(int32_t *arr, int size, int bit) { + int i; + if (bit == 0) { + return; + } else { + for (i = 0; i < size; i++) { + arr[i] = round_shift(arr[i], bit); + } + } +} + +static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1, + int bit) { + int32_t result_32 = w0 * in0 + w1 * in1; +#if CONFIG_COEFFICIENT_RANGE_CHECKING + int64_t result_64 = (int64_t)w0 * (int64_t)in0 + (int64_t)w1 * (int64_t)in1; + if (result_32 != result_64) { + printf( + "%s overflow result_32: %d result_64: %ld w0: %d in0: %d w1: %d in1: " + "%d\n", + __func__, result_32, result_64, w0, in0, w1, in1); + assert(0 && "half_btf overflow"); + } +#endif + return round_shift(result_32, bit); +} + +static INLINE int get_max_bit(int x) { + int max_bit = -1; + while (x) { + x = x >> 1; + max_bit++; + } + return max_bit; +} + +// TODO(angiebird): implement SSE +static INLINE void clamp_block(int16_t *block, int block_size, int stride, + int low, int high) { + int i, j; + for (i = 0; i < block_size; ++i) { + for (j = 0; j < block_size; ++j) { + block[i * stride + j] = clamp(block[i * stride + j], low, high); + } + } +} + +typedef void (*TxfmFunc)(const int32_t *input, int32_t *output, + const int8_t *cos_bit, const int8_t *stage_range); + +typedef struct TXFM_2D_CFG { + const int txfm_size; + const int stage_num_col; + const int stage_num_row; + + const int8_t *shift; + const int8_t *stage_range_col; + const int8_t *stage_range_row; + const int8_t *cos_bit_col; + const int8_t *cos_bit_row; + const TxfmFunc txfm_func_col; + const TxfmFunc txfm_func_row; +} TXFM_2D_CFG; + +#endif // VP10_TXFM_H_
diff --git a/vp10/common/x86/idct_intrin_sse2.c b/vp10/common/x86/idct_intrin_sse2.c index a2c674b..900f091 100644 --- a/vp10/common/x86/idct_intrin_sse2.c +++ b/vp10/common/x86/idct_intrin_sse2.c
@@ -11,6 +11,54 @@ #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" #include "vpx_ports/mem.h" +#include "vp10/common/enums.h" + +#if CONFIG_EXT_TX +// Reverse the 8 16 bit words in __m128i +static INLINE __m128i mm_reverse_epi16(const __m128i x) { + const __m128i a = _mm_shufflelo_epi16(x, 0x1b); + const __m128i b = _mm_shufflehi_epi16(a, 0x1b); + return _mm_shuffle_epi32(b, 0x4e); +} + +static INLINE void fliplr_4x4(__m128i in[2]) { + in[0] = _mm_shufflelo_epi16(in[0], 0x1b); + in[0] = _mm_shufflehi_epi16(in[0], 0x1b); + in[1] = _mm_shufflelo_epi16(in[1], 0x1b); + in[1] = _mm_shufflehi_epi16(in[1], 0x1b); +} + +static INLINE void fliplr_8x8(__m128i in[8]) { + in[0] = mm_reverse_epi16(in[0]); + in[1] = mm_reverse_epi16(in[1]); + in[2] = mm_reverse_epi16(in[2]); + in[3] = mm_reverse_epi16(in[3]); + + in[4] = mm_reverse_epi16(in[4]); + in[5] = mm_reverse_epi16(in[5]); + in[6] = mm_reverse_epi16(in[6]); + in[7] = mm_reverse_epi16(in[7]); +} + +static INLINE void fliplr_16x8(__m128i in[16]) { + fliplr_8x8(&in[0]); + fliplr_8x8(&in[8]); +} + +#define FLIPLR_16x16(in0, in1) do { \ + __m128i *tmp; \ + fliplr_16x8(in0); \ + fliplr_16x8(in1); \ + tmp = (in0); \ + (in0) = (in1); \ + (in1) = tmp; \ +} while (0) + +#define FLIPUD_PTR(dest, stride, size) do { \ + (dest) = (dest) + ((size) - 1) * (stride); \ + (stride) = - (stride); \ +} while (0) +#endif void vp10_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { @@ -22,22 +70,50 @@ in[1] = load_input_data(input + 8); switch (tx_type) { - case 0: // DCT_DCT + case DCT_DCT: idct4_sse2(in); idct4_sse2(in); break; - case 1: // ADST_DCT + case ADST_DCT: idct4_sse2(in); iadst4_sse2(in); break; - case 2: // DCT_ADST + case DCT_ADST: iadst4_sse2(in); idct4_sse2(in); break; - case 3: // ADST_ADST + case ADST_ADST: iadst4_sse2(in); iadst4_sse2(in); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + idct4_sse2(in); + iadst4_sse2(in); + FLIPUD_PTR(dest, stride, 4); + break; + case DCT_FLIPADST: + iadst4_sse2(in); + idct4_sse2(in); + fliplr_4x4(in); + break; + case FLIPADST_FLIPADST: + iadst4_sse2(in); + iadst4_sse2(in); + FLIPUD_PTR(dest, stride, 4); + fliplr_4x4(in); + break; + case ADST_FLIPADST: + iadst4_sse2(in); + iadst4_sse2(in); + fliplr_4x4(in); + break; + case FLIPADST_ADST: + iadst4_sse2(in); + iadst4_sse2(in); + FLIPUD_PTR(dest, stride, 4); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -52,12 +128,12 @@ // Reconstruction and Store { - __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); + __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 0)); + __m128i d1 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 1)); __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)); - d0 = _mm_unpacklo_epi32(d0, - _mm_cvtsi32_si128(*(const int *)(dest + stride))); - d2 = _mm_unpacklo_epi32( - d2, _mm_cvtsi32_si128(*(const int *)(dest + stride * 3))); + __m128i d3 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 3)); + d0 = _mm_unpacklo_epi32(d0, d1); + d2 = _mm_unpacklo_epi32(d2, d3); d0 = _mm_unpacklo_epi8(d0, zero); d2 = _mm_unpacklo_epi8(d2, zero); d0 = _mm_add_epi16(d0, in[0]); @@ -94,22 +170,50 @@ in[7] = load_input_data(input + 8 * 7); switch (tx_type) { - case 0: // DCT_DCT + case DCT_DCT: idct8_sse2(in); idct8_sse2(in); break; - case 1: // ADST_DCT + case ADST_DCT: idct8_sse2(in); iadst8_sse2(in); break; - case 2: // DCT_ADST + case DCT_ADST: iadst8_sse2(in); idct8_sse2(in); break; - case 3: // ADST_ADST + case ADST_ADST: iadst8_sse2(in); iadst8_sse2(in); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + idct8_sse2(in); + iadst8_sse2(in); + FLIPUD_PTR(dest, stride, 8); + break; + case DCT_FLIPADST: + iadst8_sse2(in); + idct8_sse2(in); + fliplr_8x8(in); + break; + case FLIPADST_FLIPADST: + iadst8_sse2(in); + iadst8_sse2(in); + FLIPUD_PTR(dest, stride, 8); + fliplr_8x8(in); + break; + case ADST_FLIPADST: + iadst8_sse2(in); + iadst8_sse2(in); + fliplr_8x8(in); + break; + case FLIPADST_ADST: + iadst8_sse2(in); + iadst8_sse2(in); + FLIPUD_PTR(dest, stride, 8); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -146,29 +250,59 @@ void vp10_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { - __m128i in0[16], in1[16]; + __m128i in[32]; + __m128i *in0 = &in[0]; + __m128i *in1 = &in[16]; load_buffer_8x16(input, in0); input += 8; load_buffer_8x16(input, in1); switch (tx_type) { - case 0: // DCT_DCT + case DCT_DCT: idct16_sse2(in0, in1); idct16_sse2(in0, in1); break; - case 1: // ADST_DCT + case ADST_DCT: idct16_sse2(in0, in1); iadst16_sse2(in0, in1); break; - case 2: // DCT_ADST + case DCT_ADST: iadst16_sse2(in0, in1); idct16_sse2(in0, in1); break; - case 3: // ADST_ADST + case ADST_ADST: iadst16_sse2(in0, in1); iadst16_sse2(in0, in1); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + idct16_sse2(in0, in1); + iadst16_sse2(in0, in1); + FLIPUD_PTR(dest, stride, 16); + break; + case DCT_FLIPADST: + iadst16_sse2(in0, in1); + idct16_sse2(in0, in1); + FLIPLR_16x16(in0, in1); + break; + case FLIPADST_FLIPADST: + iadst16_sse2(in0, in1); + iadst16_sse2(in0, in1); + FLIPUD_PTR(dest, stride, 16); + FLIPLR_16x16(in0, in1); + break; + case ADST_FLIPADST: + iadst16_sse2(in0, in1); + iadst16_sse2(in0, in1); + FLIPLR_16x16(in0, in1); + break; + case FLIPADST_ADST: + iadst16_sse2(in0, in1); + iadst16_sse2(in0, in1); + FLIPUD_PTR(dest, stride, 16); + break; +#endif // CONFIG_EXT_TX default: assert(0); break;
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index 1c3f182..1bb569d 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c
@@ -61,13 +61,26 @@ cm->ref_frame_sign_bias[GOLDEN_FRAME]) { cm->comp_fixed_ref = ALTREF_FRAME; cm->comp_var_ref[0] = LAST_FRAME; +#if CONFIG_EXT_REFS + cm->comp_var_ref[1] = LAST2_FRAME; + cm->comp_var_ref[2] = LAST3_FRAME; + cm->comp_var_ref[3] = LAST4_FRAME; + cm->comp_var_ref[4] = GOLDEN_FRAME; +#else cm->comp_var_ref[1] = GOLDEN_FRAME; +#endif // CONFIG_EXT_REFS } else if (cm->ref_frame_sign_bias[LAST_FRAME] == cm->ref_frame_sign_bias[ALTREF_FRAME]) { +#if CONFIG_EXT_REFS + assert(0); +#endif // CONFIG_EXT_REFS cm->comp_fixed_ref = GOLDEN_FRAME; cm->comp_var_ref[0] = LAST_FRAME; cm->comp_var_ref[1] = ALTREF_FRAME; } else { +#if CONFIG_EXT_REFS + assert(0); +#endif // CONFIG_EXT_REFS cm->comp_fixed_ref = LAST_FRAME; cm->comp_var_ref[0] = GOLDEN_FRAME; cm->comp_var_ref[1] = ALTREF_FRAME; @@ -83,18 +96,9 @@ return data > max ? max : data; } -#if CONFIG_MISC_FIXES static TX_MODE read_tx_mode(struct vpx_read_bit_buffer *rb) { return vpx_rb_read_bit(rb) ? TX_MODE_SELECT : vpx_rb_read_literal(rb, 2); } -#else -static TX_MODE read_tx_mode(vpx_reader *r) { - TX_MODE tx_mode = vpx_read_literal(r, 2); - if (tx_mode == ALLOW_32X32) - tx_mode += vpx_read_bit(r); - return tx_mode; -} -#endif static void read_tx_mode_probs(struct tx_probs *tx_probs, vpx_reader *r) { int i, j; @@ -120,13 +124,22 @@ } static void read_inter_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) { - int i, j; + int i; +#if CONFIG_REF_MV + for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) + vp10_diff_update_prob(r, &fc->newmv_prob[i]); + for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i) + vp10_diff_update_prob(r, &fc->zeromv_prob[i]); + for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) + vp10_diff_update_prob(r, &fc->refmv_prob[i]); +#else + int j; for (i = 0; i < INTER_MODE_CONTEXTS; ++i) for (j = 0; j < INTER_MODES - 1; ++j) vp10_diff_update_prob(r, &fc->inter_mode_probs[i][j]); +#endif } -#if CONFIG_MISC_FIXES static REFERENCE_MODE read_frame_reference_mode(const VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) { if (is_compound_reference_allowed(cm)) { @@ -137,47 +150,36 @@ return SINGLE_REFERENCE; } } -#else -static REFERENCE_MODE read_frame_reference_mode(const VP10_COMMON *cm, - vpx_reader *r) { - if (is_compound_reference_allowed(cm)) { - return vpx_read_bit(r) ? (vpx_read_bit(r) ? REFERENCE_MODE_SELECT - : COMPOUND_REFERENCE) - : SINGLE_REFERENCE; - } else { - return SINGLE_REFERENCE; - } -} -#endif static void read_frame_reference_mode_probs(VP10_COMMON *cm, vpx_reader *r) { FRAME_CONTEXT *const fc = cm->fc; - int i; + int i, j; if (cm->reference_mode == REFERENCE_MODE_SELECT) for (i = 0; i < COMP_INTER_CONTEXTS; ++i) vp10_diff_update_prob(r, &fc->comp_inter_prob[i]); - if (cm->reference_mode != COMPOUND_REFERENCE) + if (cm->reference_mode != COMPOUND_REFERENCE) { for (i = 0; i < REF_CONTEXTS; ++i) { - vp10_diff_update_prob(r, &fc->single_ref_prob[i][0]); - vp10_diff_update_prob(r, &fc->single_ref_prob[i][1]); + for (j = 0; j < (SINGLE_REFS - 1); ++j) { + vp10_diff_update_prob(r, &fc->single_ref_prob[i][j]); + } } + } - if (cm->reference_mode != SINGLE_REFERENCE) - for (i = 0; i < REF_CONTEXTS; ++i) - vp10_diff_update_prob(r, &fc->comp_ref_prob[i]); + if (cm->reference_mode != SINGLE_REFERENCE) { + for (i = 0; i < REF_CONTEXTS; ++i) { + for (j = 0; j < (COMP_REFS - 1); ++j) { + vp10_diff_update_prob(r, &fc->comp_ref_prob[i][j]); + } + } + } } static void update_mv_probs(vpx_prob *p, int n, vpx_reader *r) { int i; for (i = 0; i < n; ++i) -#if CONFIG_MISC_FIXES vp10_diff_update_prob(r, &p[i]); -#else - if (vpx_read(r, MV_UPDATE_PROB)) - p[i] = (vpx_read_literal(r, 7) << 1) | 1; -#endif } static void read_mv_probs(nmv_context *ctx, int allow_hp, vpx_reader *r) { @@ -214,7 +216,7 @@ uint8_t *dst, int stride, int eob, int block) { struct macroblockd_plane *const pd = &xd->plane[plane]; - TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block); + TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block, tx_size); const int seg_id = xd->mi[0]->mbmi.segment_id; if (eob > 0) { tran_low_t *const dqcoeff = pd->dqcoeff; @@ -369,8 +371,8 @@ col, row, plane); if (!mbmi->skip) { - TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx); - const scan_order *sc = get_scan(tx_size, tx_type); + TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size); + const scan_order *sc = get_scan(tx_size, tx_type, 0); const int eob = vp10_decode_block_tokens(xd, plane, sc, col, row, tx_size, r, mbmi->segment_id); inverse_transform_block_intra(xd, plane, tx_type, tx_size, @@ -378,14 +380,71 @@ } } +#if CONFIG_VAR_TX +static void decode_reconstruct_tx(MACROBLOCKD *const xd, vpx_reader *r, + MB_MODE_INFO *const mbmi, + int plane, BLOCK_SIZE plane_bsize, + int block, int blk_row, int blk_col, + TX_SIZE tx_size, int *eob_total) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); + TX_SIZE plane_tx_size = plane ? + get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, 0, 0) : + mbmi->inter_tx_size[tx_idx]; + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; + TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const scan_order *sc = get_scan(tx_size, tx_type, 1); + const int eob = vp10_decode_block_tokens(xd, plane, sc, + blk_col, blk_row, tx_size, + r, mbmi->segment_id); + inverse_transform_block_inter(xd, plane, tx_size, + &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col], + pd->dst.stride, eob, block); + *eob_total += eob; + } else { + int bsl = b_width_log2_lookup[bsize]; + int i; + + assert(bsl > 0); + --bsl; + + for (i = 0; i < 4; ++i) { + const int offsetr = blk_row + ((i >> 1) << bsl); + const int offsetc = blk_col + ((i & 0x01) << bsl); + int step = 1 << (2 * (tx_size - 1)); + + if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) + continue; + + decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize, block + i * step, + offsetr, offsetc, tx_size - 1, eob_total); + } + } +} +#endif // CONFIG_VAR_TX + static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r, MB_MODE_INFO *const mbmi, int plane, int row, int col, TX_SIZE tx_size) { struct macroblockd_plane *const pd = &xd->plane[plane]; PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; int block_idx = (row << 1) + col; - TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx); - const scan_order *sc = get_scan(tx_size, tx_type); + TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size); + const scan_order *sc = get_scan(tx_size, tx_type, 1); const int eob = vp10_decode_block_tokens(xd, plane, sc, col, row, tx_size, r, mbmi->segment_id); @@ -517,7 +576,9 @@ subpel_y, sf, w, h, ref, kernel, xs, ys); } } + #else + static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, int x0, int y0, int b_w, int b_h, int frame_width, int frame_height, @@ -555,6 +616,9 @@ int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, buf_stride, subpel_x, subpel_y; uint8_t *ref_frame, *buf_ptr; +#if CONFIG_EXT_INTERP + const int i_filter = IsInterpolatingFilter(xd->mi[0]->mbmi.interp_filter); +#endif // CONFIG_EXT_INTERP // Get reference frame pointer, width and height. if (plane == 0) { @@ -624,6 +688,9 @@ // Do border extension if there is motion or the // width/height is not a multiple of 8 pixels. if (is_scaled || scaled_mv.col || scaled_mv.row || +#if CONFIG_EXT_INTERP + !i_filter || +#endif (frame_width & 0x7) || (frame_height & 0x7)) { int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1; @@ -631,13 +698,21 @@ int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1; int x_pad = 0, y_pad = 0; - if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) { + if (subpel_x || +#if CONFIG_EXT_INTERP + !i_filter || +#endif + (sf->x_step_q4 != SUBPEL_SHIFTS)) { x0 -= VP9_INTERP_EXTEND - 1; x1 += VP9_INTERP_EXTEND; x_pad = 1; } - if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) { + if (subpel_y || +#if CONFIG_EXT_INTERP + !i_filter || +#endif + (sf->y_step_q4 != SUBPEL_SHIFTS)) { y0 -= VP9_INTERP_EXTEND - 1; y1 += VP9_INTERP_EXTEND; y_pad = 1; @@ -750,6 +825,49 @@ } } +static void dec_build_inter_predictors_sb_sub8x8(VP10Decoder *const pbi, + MACROBLOCKD *xd, + int mi_row, int mi_col, + int block) { + // Prediction function used in supertx: + // Use the mv at current block (which is less than 8x8) + int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; + const MODE_INFO *mi = xd->mi[0]; + const InterpKernel *kernel = vp10_filter_kernels[mi->mbmi.interp_filter]; + const int is_compound = has_second_ref(&mi->mbmi); + + // For sub8x8 uv: + // Skip uv prediction in supertx except the first block (block = 0) + int max_plane = block ? 1 : MAX_MB_PLANE; + + for (plane = 0; plane < max_plane; ++plane) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + struct buf_2d *const dst_buf = &pd->dst; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + + const int n4w_x4 = 4 * num_4x4_w; + const int n4h_x4 = 4 * num_4x4_h; + int ref; + + for (ref = 0; ref < 1 + is_compound; ++ref) { + const struct scale_factors *const sf = &xd->block_refs[ref]->sf; + struct buf_2d *const pre_buf = &pd->pre[ref]; + const int idx = xd->block_refs[ref]->idx; + BufferPool *const pool = pbi->common.buffer_pool; + RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; + const int is_scaled = vp10_is_scaled(sf); + const MV mv = average_split_mvs(pd, mi, ref, block); + dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4, + 0, 0, n4w_x4, n4h_x4, mi_x, mi_y, kernel, + sf, pre_buf, dst_buf, &mv, ref_frame_buf, + is_scaled, ref); + } + } +} + static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, int n4_wl, int n4_hl) { // get minimum log2 num4x4s dimension @@ -799,6 +917,11 @@ set_skip_context(xd, mi_row, mi_col); + +#if CONFIG_VAR_TX + xd->max_tx_size = max_txsize_lookup[bsize]; +#endif + // Distance of Mb to the various image edges. These are specified to 8th pel // as they are always compared to values that are in 1/8th pel units set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); @@ -807,7 +930,588 @@ return &xd->mi[0]->mbmi; } +#if CONFIG_SUPERTX +static MB_MODE_INFO *set_offsets_extend(VP10_COMMON *const cm, + MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE bsize_pred, + int mi_row_pred, int mi_col_pred, + int mi_row_ori, int mi_col_ori) { + // Used in supertx + // (mi_row_ori, mi_col_ori): location for mv + // (mi_row_pred, mi_col_pred, bsize_pred): region to predict + const int bw = num_8x8_blocks_wide_lookup[bsize_pred]; + const int bh = num_8x8_blocks_high_lookup[bsize_pred]; + const int offset = mi_row_ori * cm->mi_stride + mi_col_ori; + const int bwl = b_width_log2_lookup[bsize_pred]; + const int bhl = b_height_log2_lookup[bsize_pred]; + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = cm->mi + offset; + set_mi_row_col(xd, tile, mi_row_pred, bh, mi_col_pred, bw, + cm->mi_rows, cm->mi_cols); + + xd->up_available = (mi_row_ori != 0); + xd->left_available = (mi_col_ori > tile->mi_col_start); + + set_plane_n4(xd, bw, bh, bwl, bhl); + + return &xd->mi[0]->mbmi; +} + +static MB_MODE_INFO *set_mb_offsets(VP10_COMMON *const cm, + MACROBLOCKD *const xd, + BLOCK_SIZE bsize, + int mi_row, int mi_col, + int bw, int bh, + int x_mis, int y_mis) { + const int offset = mi_row * cm->mi_stride + mi_col; + const TileInfo *const tile = &xd->tile; + int x, y; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = cm->mi + offset; + xd->mi[0]->mbmi.sb_type = bsize; + for (y = 0; y < y_mis; ++y) + for (x = !y; x < x_mis; ++x) + xd->mi[y * cm->mi_stride + x] = xd->mi[0]; + + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); + return &xd->mi[0]->mbmi; +} + +static void set_offsets_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int offset = mi_row * cm->mi_stride + mi_col; + const int bwl = b_width_log2_lookup[bsize]; + const int bhl = b_height_log2_lookup[bsize]; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = cm->mi + offset; + + set_plane_n4(xd, bw, bh, bwl, bhl); + + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); + + vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); +} + +static void set_param_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd, + BLOCK_SIZE bsize, int mi_row, int mi_col, +#if CONFIG_EXT_TX + int txfm, +#endif + int skip) { + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); + const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); + const int offset = mi_row * cm->mi_stride + mi_col; + int x, y; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = cm->mi + offset; + + for (y = 0; y < y_mis; ++y) + for (x = 0; x < x_mis; ++x) { + xd->mi[y * cm->mi_stride + x]->mbmi.skip = skip; +#if CONFIG_EXT_TX + xd->mi[y * cm->mi_stride + x]->mbmi.tx_type = txfm; +#endif + } +} + +static void set_ref(VP10_COMMON *const cm, MACROBLOCKD *const xd, + int idx, int mi_row, int mi_col) { + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + RefBuffer *ref_buffer = &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME]; + xd->block_refs[idx] = ref_buffer; + if (!vp10_is_valid_scale(&ref_buffer->sf)) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid scale factors"); + vp10_setup_pre_planes(xd, idx, ref_buffer->buf, mi_row, mi_col, + &ref_buffer->sf); + xd->corrupted |= ref_buffer->buf->corrupted; +} + +static void dec_predict_b_extend( + VP10Decoder *const pbi, MACROBLOCKD *const xd, + const TileInfo *const tile, int block, + int mi_row_ori, int mi_col_ori, + int mi_row_pred, int mi_col_pred, + int mi_row_top, int mi_col_top, + uint8_t * dst_buf[3], int dst_stride[3], + BLOCK_SIZE bsize_top, + BLOCK_SIZE bsize_pred, + int b_sub8x8, int bextend) { + // Used in supertx + // (mi_row_ori, mi_col_ori): location for mv + // (mi_row_pred, mi_col_pred, bsize_pred): region to predict + // (mi_row_top, mi_col_top, bsize_top): region of the top partition size + // block: sub location of sub8x8 blocks + // b_sub8x8: 1: ori is sub8x8; 0: ori is not sub8x8 + // bextend: 1: region to predict is an extension of ori; 0: not + int r = (mi_row_pred - mi_row_top) * MI_SIZE; + int c = (mi_col_pred - mi_col_top) * MI_SIZE; + const int mi_width_top = num_8x8_blocks_wide_lookup[bsize_top]; + const int mi_height_top = num_8x8_blocks_high_lookup[bsize_top]; + MB_MODE_INFO *mbmi; + VP10_COMMON *const cm = &pbi->common; + + if (mi_row_pred < mi_row_top || mi_col_pred < mi_col_top || + mi_row_pred >= mi_row_top + mi_height_top || + mi_col_pred >= mi_col_top + mi_width_top || + mi_row_pred >= cm->mi_rows || mi_col_pred >= cm->mi_cols) + return; + + mbmi = set_offsets_extend(cm, xd, tile, bsize_pred, + mi_row_pred, mi_col_pred, + mi_row_ori, mi_col_ori); + set_ref(cm, xd, 0, mi_row_pred, mi_col_pred); + if (has_second_ref(&xd->mi[0]->mbmi)) + set_ref(cm, xd, 1, mi_row_pred, mi_col_pred); + + if (!bextend) { + mbmi->tx_size = b_width_log2_lookup[bsize_top]; + } + + xd->plane[0].dst.stride = dst_stride[0]; + xd->plane[1].dst.stride = dst_stride[1]; + xd->plane[2].dst.stride = dst_stride[2]; + xd->plane[0].dst.buf = dst_buf[0] + + (r >> xd->plane[0].subsampling_y) * dst_stride[0] + + (c >> xd->plane[0].subsampling_x); + xd->plane[1].dst.buf = dst_buf[1] + + (r >> xd->plane[1].subsampling_y) * dst_stride[1] + + (c >> xd->plane[1].subsampling_x); + xd->plane[2].dst.buf = dst_buf[2] + + (r >> xd->plane[2].subsampling_y) * dst_stride[2] + + (c >> xd->plane[2].subsampling_x); + + if (!b_sub8x8) + dec_build_inter_predictors_sb(pbi, xd, mi_row_pred, mi_col_pred); + else + dec_build_inter_predictors_sb_sub8x8(pbi, xd, mi_row_pred, mi_col_pred, + block); +} + +static void dec_extend_dir(VP10Decoder *const pbi, MACROBLOCKD *const xd, + const TileInfo *const tile, int block, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + uint8_t * dst_buf[3], int dst_stride[3], int dir) { + // dir: 0-lower, 1-upper, 2-left, 3-right + // 4-lowerleft, 5-upperleft, 6-lowerright, 7-upperright + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + int xss = xd->plane[1].subsampling_x; + int yss = xd->plane[1].subsampling_y; + int b_sub8x8 = (bsize < BLOCK_8X8) ? 1 : 0; + BLOCK_SIZE extend_bsize; + int unit, mi_row_pred, mi_col_pred; + + if (dir == 0 || dir == 1) { + extend_bsize = (mi_width == 1 || bsize < BLOCK_8X8 || xss < yss) ? + BLOCK_8X8 : BLOCK_16X8; + unit = num_8x8_blocks_wide_lookup[extend_bsize]; + mi_row_pred = mi_row + ((dir == 0) ? mi_height : -1); + mi_col_pred = mi_col; + + dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, + mi_row_top, mi_col_top, + dst_buf, dst_stride, + top_bsize, extend_bsize, b_sub8x8, 1); + + if (mi_width > unit) { + int i; + assert(!b_sub8x8); + for (i = 0; i < mi_width/unit - 1; i++) { + mi_col_pred += unit; + dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, + mi_row_top, mi_col_top, + dst_buf, dst_stride, + top_bsize, extend_bsize, b_sub8x8, 1); + } + } + } else if (dir == 2 || dir == 3) { + extend_bsize = (mi_height == 1 || bsize < BLOCK_8X8 || yss < xss) ? + BLOCK_8X8 : BLOCK_8X16; + unit = num_8x8_blocks_high_lookup[extend_bsize]; + mi_row_pred = mi_row; + mi_col_pred = mi_col + ((dir == 3) ? mi_width : -1); + + dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, + mi_row_top, mi_col_top, + dst_buf, dst_stride, + top_bsize, extend_bsize, b_sub8x8, 1); + + if (mi_height > unit) { + int i; + for (i = 0; i < mi_height/unit - 1; i++) { + mi_row_pred += unit; + dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, + mi_row_top, mi_col_top, + dst_buf, dst_stride, + top_bsize, extend_bsize, b_sub8x8, 1); + } + } + } else { + extend_bsize = BLOCK_8X8; + mi_row_pred = mi_row + ((dir == 4 || dir == 6) ? mi_height : -1); + mi_col_pred = mi_col + ((dir == 6 || dir == 7) ? mi_width : -1); + dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, + mi_row_top, mi_col_top, + dst_buf, dst_stride, + top_bsize, extend_bsize, b_sub8x8, 1); + } +} + +static void dec_extend_all(VP10Decoder *const pbi, MACROBLOCKD *const xd, + const TileInfo *const tile, int block, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + uint8_t * dst_buf[3], int dst_stride[3]) { + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 0); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 1); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 2); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 3); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 4); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 5); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 6); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 7); +} + +static void dec_predict_sb_complex(VP10Decoder *const pbi, + MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3]) { + VP10_COMMON *const cm = &pbi->common; + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + MB_MODE_INFO *mbmi; + int i, offset = mi_row * cm->mi_stride + mi_col; + uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3]; + + DECLARE_ALIGNED(16, uint8_t, + tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + int len = sizeof(uint16_t); + dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3); + dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len); + dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len); + } else { +#endif + dst_buf1[0] = tmp_buf1; + dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN; + dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf2[0] = tmp_buf2; + dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN; + dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf3[0] = tmp_buf3; + dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN; + dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN; +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = cm->mi + offset; + mbmi = &xd->mi[0]->mbmi; + partition = partition_lookup[bsl][mbmi->sb_type]; + subsize = get_subsize(bsize, partition); + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + } + + switch (partition) { + case PARTITION_NONE: + assert(bsize < top_bsize); + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + top_bsize, bsize, 0, 0); + dec_extend_all(pbi, xd, tile, 0, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride); + break; + case PARTITION_HORZ: + if (bsize == BLOCK_8X8) { + // For sub8x8, predict in 8x8 unit + // First half + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + top_bsize, BLOCK_8X8, 1, 0); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride); + + // Second half + dec_predict_b_extend(pbi, xd, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + top_bsize, BLOCK_8X8, 1, 1); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1); + + // weighted average to smooth the boundary + xd->plane[0].dst.buf = dst_buf[0]; + xd->plane[0].dst.stride = dst_stride[0]; + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[0], dst_stride[0], + dst_buf1[0], dst_stride1[0], + &xd->plane[0], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, 0); + } else { + // First half + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + top_bsize, subsize, 0, 0); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride); + else + dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 0); + + if (mi_row + hbs < cm->mi_rows) { + // Second half + dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col, + mi_row + hbs, mi_col, + mi_row_top, mi_col_top, + dst_buf1, dst_stride1, + top_bsize, subsize, 0, 0); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, + mi_row + hbs, mi_col, + mi_row_top, mi_col_top, + dst_buf1, dst_stride1); + else + dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, + mi_row + hbs, mi_col, + mi_row_top, mi_col_top, + dst_buf1, dst_stride1, 1); + + // weighted average to smooth the boundary + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp10_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top, + bsize, top_bsize, PARTITION_HORZ, i); + } + } + } + break; + case PARTITION_VERT: + if (bsize == BLOCK_8X8) { + // First half + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + top_bsize, BLOCK_8X8, 1, 0); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride); + + // Second half + dec_predict_b_extend(pbi, xd, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + top_bsize, BLOCK_8X8, 1, 1); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1); + + // Smooth + xd->plane[0].dst.buf = dst_buf[0]; + xd->plane[0].dst.stride = dst_stride[0]; + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[0], dst_stride[0], + dst_buf1[0], dst_stride1[0], + &xd->plane[0], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, 0); + } else { + // First half + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + top_bsize, subsize, 0, 0); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride); + else + dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 3); + + // Second half + if (mi_col + hbs < cm->mi_cols) { + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col + hbs, + mi_row, mi_col + hbs, mi_row_top, mi_col_top, + dst_buf1, dst_stride1, top_bsize, subsize, 0, 0); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, + mi_row, mi_col + hbs, mi_row_top, mi_col_top, + dst_buf1, dst_stride1); + else + dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, + mi_row, mi_col + hbs, mi_row_top, mi_col_top, + dst_buf1, dst_stride1, 2); + + // Smooth + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp10_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top, + bsize, top_bsize, PARTITION_VERT, i); + } + } + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + top_bsize, BLOCK_8X8, 1, 0); + dec_predict_b_extend(pbi, xd, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + top_bsize, BLOCK_8X8, 1, 1); + dec_predict_b_extend(pbi, xd, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf2, dst_stride2, + top_bsize, BLOCK_8X8, 1, 1); + dec_predict_b_extend(pbi, xd, tile, 3, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf3, dst_stride3, + top_bsize, BLOCK_8X8, 1, 1); + if (bsize < top_bsize) { + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride); + dec_extend_all(pbi, xd, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1); + dec_extend_all(pbi, xd, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf2, dst_stride2); + dec_extend_all(pbi, xd, tile, 3, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf3, dst_stride3); + } + } else { + dec_predict_sb_complex(pbi, xd, tile, mi_row, mi_col, + mi_row_top, mi_col_top, subsize, top_bsize, + dst_buf, dst_stride); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + dec_predict_sb_complex(pbi, xd, tile, mi_row, mi_col + hbs, + mi_row_top, mi_col_top, subsize, top_bsize, + dst_buf1, dst_stride1); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + dec_predict_sb_complex(pbi, xd, tile, mi_row + hbs, mi_col, + mi_row_top, mi_col_top, subsize, top_bsize, + dst_buf2, dst_stride2); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + dec_predict_sb_complex(pbi, xd, tile, mi_row + hbs, mi_col + hbs, + mi_row_top, mi_col_top, subsize, top_bsize, + dst_buf3, dst_stride3); + } + for (i = 0; i < MAX_MB_PLANE; i++) { + if (bsize == BLOCK_8X8 && i != 0) + continue; // Skip <4x4 chroma smoothing + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], dst_stride[i], + dst_buf1[i], + dst_stride1[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, i); + if (mi_row + hbs < cm->mi_rows) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf2[i], + dst_stride2[i], + dst_buf3[i], + dst_stride3[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, i); + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, i); + } + } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, i); + } + } + break; + default: + assert(0); + } +} +#endif // CONFIG_SUPERTX + static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif // CONFIG_SUPERTX int mi_row, int mi_col, vpx_reader *r, BLOCK_SIZE bsize, int bwl, int bhl) { @@ -818,8 +1522,22 @@ const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); +#if CONFIG_SUPERTX + MB_MODE_INFO *mbmi; + if (supertx_enabled) { + mbmi = set_mb_offsets(cm, xd, bsize, mi_row, mi_col, + bw, bh, x_mis, y_mis); + } else { + mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col, + bw, bh, x_mis, y_mis, bwl, bhl); + } + vp10_read_mode_info(pbi, xd, supertx_enabled, + mi_row, mi_col, r, x_mis, y_mis); +#else MB_MODE_INFO *mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis, bwl, bhl); + vp10_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); +#endif // CONFIG_SUPERTX if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { const BLOCK_SIZE uv_subsize = @@ -829,70 +1547,92 @@ VPX_CODEC_CORRUPT_FRAME, "Invalid block size."); } - vp10_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); - - if (mbmi->skip) { - dec_reset_skip_context(xd); - } - - if (!is_inter_block(mbmi)) { - int plane; - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const TX_SIZE tx_size = - plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl) - : mbmi->tx_size; - const int num_4x4_w = pd->n4_w; - const int num_4x4_h = pd->n4_h; - const int step = (1 << tx_size); - int row, col; - const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? - 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? - 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - - for (row = 0; row < max_blocks_high; row += step) - for (col = 0; col < max_blocks_wide; col += step) - predict_and_reconstruct_intra_block(xd, r, mbmi, plane, - row, col, tx_size); +#if CONFIG_SUPERTX + if (!supertx_enabled) { +#endif + if (mbmi->skip) { + dec_reset_skip_context(xd); } - } else { - // Prediction - dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); - - // Reconstruction - if (!mbmi->skip) { - int eobtotal = 0; + if (!is_inter_block(mbmi)) { int plane; - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *const pd = &xd->plane[plane]; const TX_SIZE tx_size = plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl) - : mbmi->tx_size; + : mbmi->tx_size; const int num_4x4_w = pd->n4_w; const int num_4x4_h = pd->n4_h; const int step = (1 << tx_size); int row, col; - const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? - 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? - 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + const int max_blocks_wide = num_4x4_w + + (xd->mb_to_right_edge >= 0 ? + 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = num_4x4_h + + (xd->mb_to_bottom_edge >= 0 ? + 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); for (row = 0; row < max_blocks_high; row += step) for (col = 0; col < max_blocks_wide; col += step) - eobtotal += reconstruct_inter_block(xd, r, mbmi, plane, row, col, - tx_size); + predict_and_reconstruct_intra_block(xd, r, mbmi, plane, + row, col, tx_size); } + } else { + // Prediction + dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); - if (!less8x8 && eobtotal == 0) -#if CONFIG_MISC_FIXES - mbmi->has_no_coeffs = 1; // skip loopfilter + // Reconstruction + if (!mbmi->skip) { + int eobtotal = 0; + int plane; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + int row, col; +#if CONFIG_VAR_TX + // TODO(jingning): This can be simplified for decoder performance. + const BLOCK_SIZE plane_bsize = + get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), pd); + const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; + const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; + int bw = num_4x4_blocks_wide_lookup[txb_size]; + int block = 0; + const int step = 1 << (max_tx_size << 1); + + for (row = 0; row < num_4x4_h; row += bw) { + for (col = 0; col < num_4x4_w; col += bw) { + decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize, + block, row, col, max_tx_size, &eobtotal); + block += step; + } + } #else - mbmi->skip = 1; // skip loopfilter + const TX_SIZE tx_size = + plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl) + : mbmi->tx_size; + const int step = (1 << tx_size); + const int max_blocks_wide = num_4x4_w + + (xd->mb_to_right_edge >= 0 ? + 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = num_4x4_h + + (xd->mb_to_bottom_edge >= 0 ? + 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + for (row = 0; row < max_blocks_high; row += step) + for (col = 0; col < max_blocks_wide; col += step) + eobtotal += reconstruct_inter_block(xd, r, mbmi, plane, row, col, + tx_size); #endif + } + + if (!less8x8 && eobtotal == 0) + mbmi->has_no_coeffs = 1; // skip loopfilter + } } +#if CONFIG_SUPERTX } +#endif // CONFIG_SUPERTX xd->corrupted |= vpx_reader_has_error(r); } @@ -946,8 +1686,23 @@ return p; } +#if CONFIG_SUPERTX +static int read_skip_without_seg(VP10_COMMON *cm, const MACROBLOCKD *xd, + vpx_reader *r) { + const int ctx = vp10_get_skip_context(xd); + const int skip = vpx_read(r, cm->fc->skip_probs[ctx]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->skip[ctx][skip]; + return skip; +} +#endif // CONFIG_SUPERTX + // TODO(slavarnway): eliminate bsize and subsize in future commits static void decode_partition(VP10Decoder *const pbi, MACROBLOCKD *const xd, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col, vpx_reader* r, BLOCK_SIZE bsize, int n4x4_l2) { VP10_COMMON *const cm = &pbi->common; @@ -958,6 +1713,15 @@ BLOCK_SIZE subsize; const int has_rows = (mi_row + hbs) < cm->mi_rows; const int has_cols = (mi_col + hbs) < cm->mi_cols; +#if CONFIG_SUPERTX + const int read_token = !supertx_enabled; + int skip = 0; + TX_SIZE supertx_size = b_width_log2_lookup[bsize]; + const TileInfo *const tile = &xd->tile; +#if CONFIG_EXT_TX + int txfm = DCT_DCT; +#endif // CONFIG_EXT_TX +#endif // CONFIG_SUPERTX if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; @@ -965,40 +1729,169 @@ partition = read_partition(cm, xd, mi_row, mi_col, r, has_rows, has_cols, n8x8_l2); subsize = subsize_lookup[partition][bsize]; // get_subsize(bsize, partition); +#if CONFIG_SUPERTX + if (!frame_is_intra_only(cm) && + partition != PARTITION_NONE && + bsize <= MAX_SUPERTX_BLOCK_SIZE && + !supertx_enabled && + !xd->lossless[0]) { + const int supertx_context = + partition_supertx_context_lookup[partition]; + supertx_enabled = vpx_read( + r, cm->fc->supertx_prob[supertx_context][supertx_size]); + if (xd->counts) + xd->counts->supertx[supertx_context][supertx_size][supertx_enabled]++; + } + if (supertx_enabled && read_token) { + int offset = mi_row * cm->mi_stride + mi_col; + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = cm->mi + offset; + set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[bsize], + mi_col, num_8x8_blocks_wide_lookup[bsize], + cm->mi_rows, cm->mi_cols); + set_skip_context(xd, mi_row, mi_col); + // Here skip is read without using any segment level feature + skip = read_skip_without_seg(cm, xd, r); + if (skip) + reset_skip_context(xd, bsize); +#if CONFIG_EXT_TX + if (!skip) { + if (get_ext_tx_types(supertx_size, bsize, 1) > 1) { + int eset = get_ext_tx_set(supertx_size, bsize, 1); + if (eset > 0) { + txfm = vpx_read_tree(r, vp10_ext_tx_inter_tree[eset], + cm->fc->inter_ext_tx_prob[eset][supertx_size]); + if (xd->counts) + ++xd->counts->inter_ext_tx[eset][supertx_size][txfm]; + } + } + } +#endif // CONFIG_EXT_TX + } +#endif // CONFIG_SUPERTX if (!hbs) { // calculate bmode block dimensions (log 2) xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ); - decode_block(pbi, xd, mi_row, mi_col, r, subsize, 1, 1); + decode_block(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, r, subsize, 1, 1); } else { switch (partition) { case PARTITION_NONE: - decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n4x4_l2); + decode_block(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, r, subsize, n4x4_l2, n4x4_l2); break; case PARTITION_HORZ: - decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n8x8_l2); + decode_block(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, r, subsize, n4x4_l2, n8x8_l2); if (has_rows) - decode_block(pbi, xd, mi_row + hbs, mi_col, r, subsize, n4x4_l2, - n8x8_l2); + decode_block(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row + hbs, mi_col, r, subsize, n4x4_l2, n8x8_l2); break; case PARTITION_VERT: - decode_block(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2, n4x4_l2); + decode_block(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, r, subsize, n8x8_l2, n4x4_l2); if (has_cols) - decode_block(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2, - n4x4_l2); + decode_block(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col + hbs, r, subsize, n8x8_l2, n4x4_l2); break; case PARTITION_SPLIT: - decode_partition(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2); - decode_partition(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2); - decode_partition(pbi, xd, mi_row + hbs, mi_col, r, subsize, n8x8_l2); - decode_partition(pbi, xd, mi_row + hbs, mi_col + hbs, r, subsize, - n8x8_l2); + decode_partition(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, r, subsize, n8x8_l2); + decode_partition(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col + hbs, r, subsize, n8x8_l2); + decode_partition(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row + hbs, mi_col, r, subsize, n8x8_l2); + decode_partition(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row + hbs, mi_col + hbs, r, subsize, n8x8_l2); break; default: assert(0 && "Invalid partition type"); } } +#if CONFIG_SUPERTX + if (supertx_enabled && read_token) { + uint8_t *dst_buf[3]; + int dst_stride[3], i; + + vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); + for (i = 0; i < MAX_MB_PLANE; i++) { + dst_buf[i] = xd->plane[i].dst.buf; + dst_stride[i] = xd->plane[i].dst.stride; + } + dec_predict_sb_complex(pbi, xd, tile, mi_row, mi_col, mi_row, mi_col, + bsize, bsize, dst_buf, dst_stride); + + if (!skip) { + int eobtotal = 0; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + set_offsets_topblock(cm, xd, tile, bsize, mi_row, mi_col); +#if CONFIG_EXT_TX + xd->mi[0]->mbmi.tx_type = txfm; +#endif + for (i = 0; i < MAX_MB_PLANE; ++i) { + const struct macroblockd_plane *const pd = &xd->plane[i]; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + int row, col; + const TX_SIZE tx_size = + i ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl) + : mbmi->tx_size; + const int step = (1 << tx_size); + const int max_blocks_wide = num_4x4_w + + (xd->mb_to_right_edge >= 0 ? + 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = num_4x4_h + + (xd->mb_to_bottom_edge >= 0 ? + 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + for (row = 0; row < max_blocks_high; row += step) + for (col = 0; col < max_blocks_wide; col += step) + eobtotal += reconstruct_inter_block(xd, r, mbmi, i, row, col, + tx_size); + } + if (!(subsize < BLOCK_8X8) && eobtotal == 0) + skip = 1; + } + set_param_topblock(cm, xd, bsize, mi_row, mi_col, +#if CONFIG_EXT_TX + txfm, +#endif + skip); + } +#endif // CONFIG_SUPERTX + // update partition context if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) @@ -1048,9 +1941,6 @@ static void setup_segmentation(VP10_COMMON *const cm, struct vpx_read_bit_buffer *rb) { struct segmentation *const seg = &cm->seg; -#if !CONFIG_MISC_FIXES - struct segmentation_probs *const segp = &cm->segp; -#endif int i, j; seg->update_map = 0; @@ -1067,26 +1957,11 @@ seg->update_map = vpx_rb_read_bit(rb); } if (seg->update_map) { -#if !CONFIG_MISC_FIXES - for (i = 0; i < SEG_TREE_PROBS; i++) - segp->tree_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8) - : MAX_PROB; -#endif if (frame_is_intra_only(cm) || cm->error_resilient_mode) { seg->temporal_update = 0; } else { seg->temporal_update = vpx_rb_read_bit(rb); } -#if !CONFIG_MISC_FIXES - if (seg->temporal_update) { - for (i = 0; i < PREDICTION_PROBS; i++) - segp->pred_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8) - : MAX_PROB; - } else { - for (i = 0; i < PREDICTION_PROBS; i++) - segp->pred_probs[i] = MAX_PROB; - } -#endif } // Segmentation data update @@ -1140,7 +2015,7 @@ static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) { return vpx_rb_read_bit(rb) ? - vpx_rb_read_inv_signed_literal(rb, CONFIG_MISC_FIXES ? 6 : 4) : 0; + vpx_rb_read_inv_signed_literal(rb, 6) : 0; } static void setup_quantization(VP10_COMMON *const cm, @@ -1180,7 +2055,8 @@ } static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) { - return vpx_rb_read_bit(rb) ? SWITCHABLE : vpx_rb_read_literal(rb, 2); + return vpx_rb_read_bit(rb) ? + SWITCHABLE : vpx_rb_read_literal(rb, 2 + CONFIG_EXT_INTERP); } static void setup_render_size(VP10_COMMON *cm, @@ -1283,10 +2159,8 @@ YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf; width = buf->y_crop_width; height = buf->y_crop_height; -#if CONFIG_MISC_FIXES cm->render_width = buf->render_width; cm->render_height = buf->render_height; -#endif found = 1; break; } @@ -1294,9 +2168,7 @@ if (!found) { vp10_read_frame_size(rb, &width, &height); -#if CONFIG_MISC_FIXES setup_render_size(cm, rb); -#endif } if (width <= 0 || height <= 0) @@ -1328,9 +2200,6 @@ } resize_context_buffers(cm, width, height); -#if !CONFIG_MISC_FIXES - setup_render_size(cm, rb); -#endif lock_buffer_pool(pool); if (vpx_realloc_frame_buffer( @@ -1377,14 +2246,10 @@ if (cm->log2_tile_rows) cm->log2_tile_rows += vpx_rb_read_bit(rb); -#if CONFIG_MISC_FIXES // tile size magnitude if (cm->log2_tile_rows > 0 || cm->log2_tile_cols > 0) { cm->tile_sz_mag = vpx_rb_read_literal(rb, 2); } -#else - cm->tile_sz_mag = 3; -#endif } typedef struct TileBuffer { @@ -1428,9 +2293,9 @@ if (decrypt_cb) { uint8_t be_data[4]; decrypt_cb(decrypt_state, *data, be_data, tile_sz_mag + 1); - size = mem_get_varsize(be_data, tile_sz_mag) + CONFIG_MISC_FIXES; + size = mem_get_varsize(be_data, tile_sz_mag) + 1; } else { - size = mem_get_varsize(*data, tile_sz_mag) + CONFIG_MISC_FIXES; + size = mem_get_varsize(*data, tile_sz_mag) + 1; } *data += tile_sz_mag + 1; @@ -1508,6 +2373,11 @@ memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * aligned_cols); +#if CONFIG_VAR_TX + memset(cm->above_txfm_context, 0, + sizeof(*cm->above_txfm_context) * aligned_cols); +#endif + get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); if (pbi->tile_data == NULL || @@ -1554,16 +2424,24 @@ vp10_tile_set_col(&tile, tile_data->cm, col); vp10_zero(tile_data->xd.left_context); vp10_zero(tile_data->xd.left_seg_context); +#if CONFIG_VAR_TX + vp10_zero(tile_data->xd.left_txfm_context_buffer); +#endif for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_partition(pbi, &tile_data->xd, mi_row, - mi_col, &tile_data->bit_reader, BLOCK_64X64, 4); + decode_partition(pbi, &tile_data->xd, +#if CONFIG_SUPERTX + 0, +#endif + mi_row, mi_col, &tile_data->bit_reader, + BLOCK_64X64, 4); } pbi->mb.corrupted |= tile_data->xd.corrupted; if (pbi->mb.corrupted) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Failed to decode tile data"); } +#if !CONFIG_VAR_TX // Loopfilter one row. if (cm->lf.filter_level && !cm->skip_loop_filter) { const int lf_start = mi_row - MI_BLOCK_SIZE; @@ -1590,10 +2468,15 @@ if (cm->frame_parallel_decode) vp10_frameworker_broadcast(pbi->cur_buf, mi_row << MI_BLOCK_SIZE_LOG2); +#endif } } // Loopfilter remaining rows in the frame. +#if CONFIG_VAR_TX + vp10_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb, + cm->lf.filter_level, 0, 0); +#else if (cm->lf.filter_level && !cm->skip_loop_filter) { LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; winterface->sync(&pbi->lf_worker); @@ -1601,6 +2484,7 @@ lf_data->stop = cm->mi_rows; winterface->execute(&pbi->lf_worker); } +#endif // Get last tile data. tile_data = pbi->tile_data + tile_cols * tile_rows - 1; @@ -1627,9 +2511,15 @@ mi_row += MI_BLOCK_SIZE) { vp10_zero(tile_data->xd.left_context); vp10_zero(tile_data->xd.left_seg_context); +#if CONFIG_VAR_TX + vp10_zero(tile_data->xd.left_txfm_context_buffer); +#endif for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { decode_partition(tile_data->pbi, &tile_data->xd, +#if CONFIG_SUPERTX + 0, +#endif mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64, 4); } @@ -1704,7 +2594,10 @@ sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * aligned_mi_cols); - +#if CONFIG_VAR_TX + memset(cm->above_txfm_context, 0, + sizeof(*cm->above_txfm_context) * aligned_mi_cols); +#endif // Load tile data into tile_buffers get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); @@ -1865,6 +2758,10 @@ int i, mask, ref_index = 0; size_t sz; +#if CONFIG_EXT_REFS + cm->last3_frame_type = cm->last2_frame_type; + cm->last2_frame_type = cm->last_frame_type; +#endif // CONFIG_EXT_REFS cm->last_frame_type = cm->frame_type; cm->last_intra_only = cm->intra_only; @@ -1930,13 +2827,14 @@ memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); pbi->need_resync = 0; } + if (frame_is_intra_only(cm)) + cm->allow_screen_content_tools = vpx_rb_read_bit(rb); } else { cm->intra_only = cm->show_frame ? 0 : vpx_rb_read_bit(rb); if (cm->error_resilient_mode) { cm->reset_frame_context = RESET_FRAME_CONTEXT_ALL; } else { -#if CONFIG_MISC_FIXES if (cm->intra_only) { cm->reset_frame_context = vpx_rb_read_bit(rb) ? RESET_FRAME_CONTEXT_ALL @@ -1950,40 +2848,14 @@ vpx_rb_read_bit(rb) ? RESET_FRAME_CONTEXT_ALL : RESET_FRAME_CONTEXT_CURRENT; } -#else - static const RESET_FRAME_CONTEXT_MODE reset_frame_context_conv_tbl[4] = { - RESET_FRAME_CONTEXT_NONE, RESET_FRAME_CONTEXT_NONE, - RESET_FRAME_CONTEXT_CURRENT, RESET_FRAME_CONTEXT_ALL - }; - - cm->reset_frame_context = - reset_frame_context_conv_tbl[vpx_rb_read_literal(rb, 2)]; -#endif } if (cm->intra_only) { if (!vp10_read_sync_code(rb)) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame sync code"); -#if CONFIG_MISC_FIXES + read_bitdepth_colorspace_sampling(cm, rb); -#else - if (cm->profile > PROFILE_0) { - read_bitdepth_colorspace_sampling(cm, rb); - } else { - // NOTE: The intra-only frame header does not include the specification - // of either the color format or color sub-sampling in profile 0. VP9 - // specifies that the default color format should be YUV 4:2:0 in this - // case (normative). - cm->color_space = VPX_CS_BT_601; - cm->color_range = 0; - cm->subsampling_y = cm->subsampling_x = 1; - cm->bit_depth = VPX_BITS_8; -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth = 0; -#endif - } -#endif pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES); setup_frame_size(cm, rb); @@ -2046,10 +2918,6 @@ cm->refresh_frame_context = vpx_rb_read_bit(rb) ? REFRESH_FRAME_CONTEXT_FORWARD : REFRESH_FRAME_CONTEXT_BACKWARD; -#if !CONFIG_MISC_FIXES - } else { - vpx_rb_read_bit(rb); // parallel decoding mode flag -#endif } } else { cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_OFF; @@ -2076,6 +2944,7 @@ for (; ref_index < REF_FRAMES; ++ref_index) { cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index]; + // Current thread holds the reference frame. if (cm->ref_frame_map[ref_index] >= 0) ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; @@ -2108,11 +2977,9 @@ } setup_segmentation_dequant(cm); -#if CONFIG_MISC_FIXES cm->tx_mode = (!cm->seg.enabled && xd->lossless[0]) ? ONLY_4X4 : read_tx_mode(rb); cm->reference_mode = read_frame_reference_mode(cm, rb); -#endif setup_tile_info(cm, rb); sz = vpx_rb_read_literal(rb, 16); @@ -2124,6 +2991,32 @@ return sz; } +#if CONFIG_EXT_TX +static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) { + int i, j, k; + int s; + for (s = 1; s < EXT_TX_SETS_INTER; ++s) { + if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + if (!use_inter_ext_tx_for_txsize[s][i]) continue; + for (j = 0; j < num_ext_tx_set_inter[s] - 1; ++j) + vp10_diff_update_prob(r, &fc->inter_ext_tx_prob[s][i][j]); + } + } + } + + for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { + if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + if (!use_intra_ext_tx_for_txsize[s][i]) continue; + for (j = 0; j < INTRA_MODES; ++j) + for (k = 0; k < num_ext_tx_set_intra[s] - 1; ++k) + vp10_diff_update_prob(r, &fc->intra_ext_tx_prob[s][i][j][k]); + } + } + } +} +#else static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) { int i, j, k; if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { @@ -2141,10 +3034,25 @@ } } +#endif // CONFIG_EXT_TX + +#if CONFIG_SUPERTX +static void read_supertx_probs(FRAME_CONTEXT *fc, vpx_reader *r) { + int i, j; + if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { + for (j = 1; j < TX_SIZES; ++j) { + vp10_diff_update_prob(r, &fc->supertx_prob[i][j]); + } + } + } +} +#endif // CONFIG_SUPERTX + static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data, size_t partition_size) { VP10_COMMON *const cm = &pbi->common; -#if !CONFIG_MISC_FIXES +#if CONFIG_SUPERTX MACROBLOCKD *const xd = &pbi->mb; #endif FRAME_CONTEXT *const fc = cm->fc; @@ -2156,17 +3064,18 @@ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder 0"); -#if !CONFIG_MISC_FIXES - cm->tx_mode = xd->lossless[0] ? ONLY_4X4 : read_tx_mode(&r); -#endif if (cm->tx_mode == TX_MODE_SELECT) read_tx_mode_probs(&fc->tx_probs, &r); read_coef_probs(fc, cm->tx_mode, &r); +#if CONFIG_VAR_TX + for (k = 0; k < TXFM_PARTITION_CONTEXTS; ++k) + vp10_diff_update_prob(&r, &fc->txfm_partition_prob[k]); +#endif + for (k = 0; k < SKIP_CONTEXTS; ++k) vp10_diff_update_prob(&r, &fc->skip_probs[k]); -#if CONFIG_MISC_FIXES if (cm->seg.enabled) { if (cm->seg.temporal_update) { for (k = 0; k < PREDICTION_PROBS; k++) @@ -2183,16 +3092,13 @@ for (j = 0; j < PARTITION_CONTEXTS; ++j) for (i = 0; i < PARTITION_TYPES - 1; ++i) vp10_diff_update_prob(&r, &fc->partition_prob[j][i]); -#endif if (frame_is_intra_only(cm)) { vp10_copy(cm->kf_y_prob, vp10_kf_y_mode_prob); -#if CONFIG_MISC_FIXES for (k = 0; k < INTRA_MODES; k++) for (j = 0; j < INTRA_MODES; j++) for (i = 0; i < INTRA_MODES - 1; ++i) vp10_diff_update_prob(&r, &cm->kf_y_prob[k][j][i]); -#endif } else { nmv_context *const nmvc = &fc->nmvc; @@ -2204,9 +3110,6 @@ for (i = 0; i < INTRA_INTER_CONTEXTS; i++) vp10_diff_update_prob(&r, &fc->intra_inter_prob[i]); -#if !CONFIG_MISC_FIXES - cm->reference_mode = read_frame_reference_mode(cm, &r); -#endif if (cm->reference_mode != SINGLE_REFERENCE) setup_compound_reference_mode(cm); read_frame_reference_mode_probs(cm, &r); @@ -2215,14 +3118,12 @@ for (i = 0; i < INTRA_MODES - 1; ++i) vp10_diff_update_prob(&r, &fc->y_mode_prob[j][i]); -#if !CONFIG_MISC_FIXES - for (j = 0; j < PARTITION_CONTEXTS; ++j) - for (i = 0; i < PARTITION_TYPES - 1; ++i) - vp10_diff_update_prob(&r, &fc->partition_prob[j][i]); -#endif - read_mv_probs(nmvc, cm->allow_high_precision_mv, &r); read_ext_tx_probs(fc, &r); +#if CONFIG_SUPERTX + if (!xd->lossless[0]) + read_supertx_probs(fc, &r); +#endif } return vpx_reader_has_error(&r); @@ -2263,10 +3164,14 @@ assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx))); assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip))); assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv))); - assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx, - sizeof(cm->counts.intra_ext_tx))); +#if CONFIG_EXT_TX assert(!memcmp(cm->counts.inter_ext_tx, zero_counts.inter_ext_tx, sizeof(cm->counts.inter_ext_tx))); + assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx, + sizeof(cm->counts.intra_ext_tx))); +#else + assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx, +#endif // CONFIG_EXT_TX } #endif // NDEBUG @@ -2407,14 +3312,9 @@ if (!xd->corrupted) { if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) { vp10_adapt_coef_probs(cm); -#if CONFIG_MISC_FIXES vp10_adapt_intra_frame_probs(cm); -#endif if (!frame_is_intra_only(cm)) { -#if !CONFIG_MISC_FIXES - vp10_adapt_intra_frame_probs(cm); -#endif vp10_adapt_inter_frame_probs(cm); vp10_adapt_mv_probs(cm, cm->allow_high_precision_mv); }
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c index a28ae55..ec93453 100644 --- a/vp10/decoder/decodemv.c +++ b/vp10/decoder/decodemv.c
@@ -63,7 +63,59 @@ } static PREDICTION_MODE read_inter_mode(VP10_COMMON *cm, MACROBLOCKD *xd, - vpx_reader *r, int ctx) { + vpx_reader *r, int16_t ctx) { +#if CONFIG_REF_MV + FRAME_COUNTS *counts = xd->counts; + int16_t mode_ctx = ctx & NEWMV_CTX_MASK; + vpx_prob mode_prob = cm->fc->newmv_prob[mode_ctx]; + + if (vpx_read(r, mode_prob) == 0) { + if (counts) + ++counts->newmv_mode[mode_ctx][0]; + return NEWMV; + } + if (counts) + ++counts->newmv_mode[mode_ctx][1]; + + if (ctx & (1 << ALL_ZERO_FLAG_OFFSET)) + return ZEROMV; + + mode_ctx = (ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK; + + mode_prob = cm->fc->zeromv_prob[mode_ctx]; + if (vpx_read(r, mode_prob) == 0) { + if (counts) + ++counts->zeromv_mode[mode_ctx][0]; + return ZEROMV; + } + if (counts) + ++counts->zeromv_mode[mode_ctx][1]; + + mode_ctx = (ctx >> REFMV_OFFSET) & REFMV_CTX_MASK; + + if (ctx & (1 << SKIP_NEARESTMV_OFFSET)) + mode_ctx = 6; + if (ctx & (1 << SKIP_NEARMV_OFFSET)) + mode_ctx = 7; + if (ctx & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) + mode_ctx = 8; + + mode_prob = cm->fc->refmv_prob[mode_ctx]; + + if (vpx_read(r, mode_prob) == 0) { + if (counts) + ++counts->refmv_mode[mode_ctx][0]; + + return NEARESTMV; + } else { + if (counts) + ++counts->refmv_mode[mode_ctx][1]; + return NEARMV; + } + + // Invalid prediction mode. + assert(0); +#else const int mode = vpx_read_tree(r, vp10_inter_mode_tree, cm->fc->inter_mode_probs[ctx]); FRAME_COUNTS *counts = xd->counts; @@ -71,6 +123,7 @@ ++counts->inter_mode[ctx][mode]; return NEARESTMV + mode; +#endif } static int read_segment_id(vpx_reader *r, @@ -78,6 +131,68 @@ return vpx_read_tree(r, vp10_segment_tree, segp->tree_probs); } +#if CONFIG_VAR_TX +static void read_tx_size_inter(VP10_COMMON *cm, MACROBLOCKD *xd, + MB_MODE_INFO *mbmi, FRAME_COUNTS *counts, + TX_SIZE tx_size, int blk_row, int blk_col, + vpx_reader *r) { + int is_split = 0; + const int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1); + int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; + int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1), + xd->left_txfm_context + (blk_row >> 1), + tx_size); + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> 5; + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> 5; + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + is_split = vpx_read(r, cm->fc->txfm_partition_prob[ctx]); + + if (is_split) { + BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int bsl = b_width_log2_lookup[bsize]; + int i; + + if (counts) + ++counts->txfm_partition[ctx][1]; + + if (tx_size == TX_8X8) { + mbmi->inter_tx_size[tx_idx] = TX_4X4; + mbmi->tx_size = mbmi->inter_tx_size[tx_idx]; + txfm_partition_update(xd->above_txfm_context + (blk_col >> 1), + xd->left_txfm_context + (blk_row >> 1), TX_4X4); + return; + } + + assert(bsl > 0); + --bsl; + for (i = 0; i < 4; ++i) { + int offsetr = blk_row + ((i >> 1) << bsl); + int offsetc = blk_col + ((i & 0x01) << bsl); + read_tx_size_inter(cm, xd, mbmi, counts, + tx_size - 1, offsetr, offsetc, r); + } + } else { + int idx, idy; + mbmi->inter_tx_size[tx_idx] = tx_size; + for (idy = 0; idy < (1 << tx_size) / 2; ++idy) + for (idx = 0; idx < (1 << tx_size) / 2; ++idx) + mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size; + mbmi->tx_size = mbmi->inter_tx_size[tx_idx]; + if (counts) + ++counts->txfm_partition[ctx][0]; + txfm_partition_update(xd->above_txfm_context + (blk_col >> 1), + xd->left_txfm_context + (blk_row >> 1), tx_size); + } +} +#endif + static TX_SIZE read_selected_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd, TX_SIZE max_tx_size, vpx_reader *r) { FRAME_COUNTS *counts = xd->counts; @@ -136,28 +251,18 @@ int mi_offset, int x_mis, int y_mis, vpx_reader *r) { struct segmentation *const seg = &cm->seg; -#if CONFIG_MISC_FIXES FRAME_COUNTS *counts = xd->counts; struct segmentation_probs *const segp = &cm->fc->seg; -#else - struct segmentation_probs *const segp = &cm->segp; -#endif int segment_id; -#if !CONFIG_MISC_FIXES - (void) xd; -#endif - if (!seg->enabled) return 0; // Default for disabled segmentation assert(seg->update_map && !seg->temporal_update); segment_id = read_segment_id(r, segp); -#if CONFIG_MISC_FIXES if (counts) ++counts->seg.tree_total[segment_id]; -#endif set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id); return segment_id; } @@ -177,12 +282,8 @@ static int read_inter_segment_id(VP10_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vpx_reader *r) { struct segmentation *const seg = &cm->seg; -#if CONFIG_MISC_FIXES FRAME_COUNTS *counts = xd->counts; struct segmentation_probs *const segp = &cm->fc->seg; -#else - struct segmentation_probs *const segp = &cm->segp; -#endif MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; int predicted_segment_id, segment_id; const int mi_offset = mi_row * cm->mi_cols + mi_col; @@ -210,25 +311,19 @@ const int ctx = vp10_get_pred_context_seg_id(xd); const vpx_prob pred_prob = segp->pred_probs[ctx]; mbmi->seg_id_predicted = vpx_read(r, pred_prob); -#if CONFIG_MISC_FIXES if (counts) ++counts->seg.pred[ctx][mbmi->seg_id_predicted]; -#endif if (mbmi->seg_id_predicted) { segment_id = predicted_segment_id; } else { segment_id = read_segment_id(r, segp); -#if CONFIG_MISC_FIXES if (counts) ++counts->seg.tree_mispred[segment_id]; -#endif } } else { segment_id = read_segment_id(r, segp); -#if CONFIG_MISC_FIXES if (counts) ++counts->seg.tree_total[segment_id]; -#endif } set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id); return segment_id; @@ -248,6 +343,71 @@ } } +static void read_palette_mode_info(VP10_COMMON *const cm, + MACROBLOCKD *const xd, + vpx_reader *r) { + MODE_INFO *const mi = xd->mi[0]; + MB_MODE_INFO *const mbmi = &mi->mbmi; + const MODE_INFO *above_mi = xd->above_mi; + const MODE_INFO *left_mi = xd->left_mi; + const BLOCK_SIZE bsize = mbmi->sb_type; + int i, palette_ctx = 0; + + if (above_mi) + palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0); + if (left_mi) + palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0); + if (vpx_read(r, vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8] + [palette_ctx])) { + int n; + PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info; + + pmi->palette_size[0] = + vpx_read_tree(r, vp10_palette_size_tree, + vp10_default_palette_y_size_prob[bsize - BLOCK_8X8]) + 2; + n = pmi->palette_size[0]; + + for (i = 0; i < n; ++i) + pmi->palette_colors[i] = vpx_read_literal(r, cm->bit_depth); + + xd->plane[0].color_index_map[0] = read_uniform(r, n); + assert(xd->plane[0].color_index_map[0] < n); + } +} + +#if CONFIG_EXT_INTRA +static void read_ext_intra_mode_info(VP10_COMMON *const cm, + MACROBLOCKD *const xd, vpx_reader *r) { + MODE_INFO *const mi = xd->mi[0]; + MB_MODE_INFO *const mbmi = &mi->mbmi; + FRAME_COUNTS *counts = xd->counts; + +#if !ALLOW_FILTER_INTRA_MODES + return; +#endif + if (mbmi->mode == DC_PRED) { + mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = + vpx_read(r, cm->fc->ext_intra_probs[0]); + if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) { + mbmi->ext_intra_mode_info.ext_intra_mode[0] = + read_uniform(r, FILTER_INTRA_MODES); + } + if (counts) + ++counts->ext_intra[0][mbmi->ext_intra_mode_info.use_ext_intra_mode[0]]; + } + if (mbmi->uv_mode == DC_PRED) { + mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = + vpx_read(r, cm->fc->ext_intra_probs[1]); + if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) { + mbmi->ext_intra_mode_info.ext_intra_mode[1] = + read_uniform(r, FILTER_INTRA_MODES); + } + if (counts) + ++counts->ext_intra[1][mbmi->ext_intra_mode_info.use_ext_intra_mode[1]]; + } +} +#endif // CONFIG_EXT_INTRA + static void read_intra_frame_mode_info(VP10_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vpx_reader *r) { @@ -293,10 +453,46 @@ default: mbmi->mode = read_intra_mode(r, get_y_mode_probs(cm, mi, above_mi, left_mi, 0)); +#if CONFIG_EXT_INTRA + if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) + mbmi->angle_delta[0] = + read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS; +#endif // CONFIG_EXT_INTRA } mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode); +#if CONFIG_EXT_INTRA + if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED && + bsize >= BLOCK_8X8) + mbmi->angle_delta[1] = + read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS; +#endif + mbmi->palette_mode_info.palette_size[0] = 0; + mbmi->palette_mode_info.palette_size[1] = 0; + if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools && + mbmi->mode == DC_PRED) + read_palette_mode_info(cm, xd, r); + +#if CONFIG_EXT_TX + if (get_ext_tx_types(mbmi->tx_size, mbmi->sb_type, 0) > 1 && + cm->base_qindex > 0 && !mbmi->skip && + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && + ALLOW_INTRA_EXT_TX) { + FRAME_COUNTS *counts = xd->counts; + int eset = get_ext_tx_set(mbmi->tx_size, mbmi->sb_type, 0); + if (eset > 0) { + mbmi->tx_type = vpx_read_tree( + r, vp10_ext_tx_intra_tree[eset], + cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode]); + if (counts) + ++counts->intra_ext_tx[eset][mbmi->tx_size][mbmi->mode] + [mbmi->tx_type]; + } + } else { + mbmi->tx_type = DCT_DCT; + } +#else if (mbmi->tx_size < TX_32X32 && cm->base_qindex > 0 && !mbmi->skip && !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { @@ -310,6 +506,14 @@ } else { mbmi->tx_type = DCT_DCT; } +#endif // CONFIG_EXT_TX + +#if CONFIG_EXT_INTRA + mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0; + mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0; + if (bsize >= BLOCK_8X8) + read_ext_intra_mode_info(cm, xd, r); +#endif // CONFIG_EXT_INTRA } static int read_mv_component(vpx_reader *r, @@ -399,12 +603,68 @@ if (mode == COMPOUND_REFERENCE) { const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; const int ctx = vp10_get_pred_context_comp_ref_p(cm, xd); - const int bit = vpx_read(r, fc->comp_ref_prob[ctx]); + const int bit = vpx_read(r, fc->comp_ref_prob[ctx][0]); if (counts) - ++counts->comp_ref[ctx][bit]; + ++counts->comp_ref[ctx][0][bit]; ref_frame[idx] = cm->comp_fixed_ref; + +#if CONFIG_EXT_REFS + if (!bit) { + const int ctx1 = vp10_get_pred_context_comp_ref_p1(cm, xd); + const int bit1 = vpx_read(r, fc->comp_ref_prob[ctx1][1]); + if (counts) + ++counts->comp_ref[ctx1][1][bit1]; + ref_frame[!idx] = cm->comp_var_ref[bit1 ? 0 : 1]; + } else { + const int ctx2 = vp10_get_pred_context_comp_ref_p2(cm, xd); + const int bit2 = vpx_read(r, fc->comp_ref_prob[ctx2][2]); + if (counts) + ++counts->comp_ref[ctx2][2][bit2]; + if (!bit2) { + const int ctx3 = vp10_get_pred_context_comp_ref_p3(cm, xd); + const int bit3 = vpx_read(r, fc->comp_ref_prob[ctx3][3]); + if (counts) + ++counts->comp_ref[ctx3][3][bit3]; + ref_frame[!idx] = cm->comp_var_ref[bit3 ? 2 : 3]; + } else { + ref_frame[!idx] = cm->comp_var_ref[4]; + } + } +#else ref_frame[!idx] = cm->comp_var_ref[bit]; +#endif // CONFIG_EXT_REFS } else if (mode == SINGLE_REFERENCE) { +#if CONFIG_EXT_REFS + const int ctx0 = vp10_get_pred_context_single_ref_p1(xd); + const int bit0 = vpx_read(r, fc->single_ref_prob[ctx0][0]); + if (counts) + ++counts->single_ref[ctx0][0][bit0]; + if (bit0) { + const int ctx1 = vp10_get_pred_context_single_ref_p2(xd); + const int bit1 = vpx_read(r, fc->single_ref_prob[ctx1][1]); + if (counts) + ++counts->single_ref[ctx1][1][bit1]; + ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; + } else { + const int ctx2 = vp10_get_pred_context_single_ref_p3(xd); + const int bit2 = vpx_read(r, fc->single_ref_prob[ctx2][2]); + if (counts) + ++counts->single_ref[ctx2][2][bit2]; + if (bit2) { + const int ctx4 = vp10_get_pred_context_single_ref_p5(xd); + const int bit4 = vpx_read(r, fc->single_ref_prob[ctx4][4]); + if (counts) + ++counts->single_ref[ctx4][4][bit4]; + ref_frame[0] = bit4 ? LAST4_FRAME : LAST3_FRAME; + } else { + const int ctx3 = vp10_get_pred_context_single_ref_p4(xd); + const int bit3 = vpx_read(r, fc->single_ref_prob[ctx3][3]); + if (counts) + ++counts->single_ref[ctx3][3][bit3]; + ref_frame[0] = bit3 ? LAST2_FRAME : LAST_FRAME; + } + } +#else const int ctx0 = vp10_get_pred_context_single_ref_p1(xd); const int bit0 = vpx_read(r, fc->single_ref_prob[ctx0][0]); if (counts) @@ -418,6 +678,7 @@ } else { ref_frame[0] = LAST_FRAME; } +#endif // CONFIG_EXT_REFS ref_frame[1] = NONE; } else { @@ -431,10 +692,13 @@ VP10_COMMON *const cm, MACROBLOCKD *const xd, vpx_reader *r) { const int ctx = vp10_get_pred_context_switchable_interp(xd); - const INTERP_FILTER type = - (INTERP_FILTER)vpx_read_tree(r, vp10_switchable_interp_tree, - cm->fc->switchable_interp_prob[ctx]); FRAME_COUNTS *counts = xd->counts; + INTERP_FILTER type; +#if CONFIG_EXT_INTERP + if (!vp10_is_interp_needed(xd)) return EIGHTTAP; +#endif + type = (INTERP_FILTER)vpx_read_tree(r, vp10_switchable_interp_tree, + cm->fc->switchable_interp_prob[ctx]); if (counts) ++counts->switchable_interp[ctx][type]; return type; @@ -470,9 +734,30 @@ break; default: mbmi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]); +#if CONFIG_EXT_INTRA + mbmi->angle_delta[0] = 0; + if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) + mbmi->angle_delta[0] = + read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS; +#endif // CONFIG_EXT_INTRA } mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode); +#if CONFIG_EXT_INTRA + if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED && + bsize >= BLOCK_8X8) + mbmi->angle_delta[1] = + read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS; +#endif // CONFIG_EXT_INTRA + + mbmi->palette_mode_info.palette_size[0] = 0; + mbmi->palette_mode_info.palette_size[1] = 0; +#if CONFIG_EXT_INTRA + mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0; + mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0; + if (bsize >= BLOCK_8X8) + read_ext_intra_mode_info(cm, xd, r); +#endif // CONFIG_EXT_INTRA } static INLINE int is_mv_valid(const MV *mv) { @@ -555,13 +840,15 @@ int_mv nearestmv[2], nearmv[2]; int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; int ref, is_compound; - uint8_t inter_mode_ctx[MAX_REF_FRAMES]; + int16_t inter_mode_ctx[MAX_REF_FRAMES]; + int16_t mode_ctx = 0; + MV_REFERENCE_FRAME ref_frame; read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame); is_compound = has_second_ref(mbmi); for (ref = 0; ref < 1 + is_compound; ++ref) { - const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; + MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME]; xd->block_refs[ref] = ref_buf; @@ -570,10 +857,25 @@ "Reference frame has invalid dimensions"); vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, &ref_buf->sf); - vp10_find_mv_refs(cm, xd, mi, frame, ref_mvs[frame], - mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx); } + for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) { + vp10_find_mv_refs(cm, xd, mi, ref_frame, +#if CONFIG_REF_MV + &xd->ref_mv_count[ref_frame], + xd->ref_mv_stack[ref_frame], +#endif + ref_mvs[ref_frame], + mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx); + } + + mode_ctx = inter_mode_ctx[mbmi->ref_frame[0]]; + +#if CONFIG_REF_MV + mode_ctx = vp10_mode_context_analyzer(inter_mode_ctx, + mbmi->ref_frame, bsize, -1); +#endif + if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { mbmi->mode = ZEROMV; if (bsize < BLOCK_8X8) { @@ -583,8 +885,7 @@ } } else { if (bsize >= BLOCK_8X8) - mbmi->mode = read_inter_mode(cm, xd, r, - inter_mode_ctx[mbmi->ref_frame[0]]); + mbmi->mode = read_inter_mode(cm, xd, r, mode_ctx); } if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) { @@ -594,9 +895,11 @@ } } +#if !CONFIG_EXT_INTERP mbmi->interp_filter = (cm->interp_filter == SWITCHABLE) - ? read_switchable_interp_filter(cm, xd, r) - : cm->interp_filter; + ? read_switchable_interp_filter(cm, xd, r) + : cm->interp_filter; +#endif // !CONFIG_EXT_INTERP if (bsize < BLOCK_8X8) { const int num_4x4_w = 1 << xd->bmode_blocks_wl; @@ -608,15 +911,17 @@ for (idx = 0; idx < 2; idx += num_4x4_w) { int_mv block[2]; const int j = idy * 2 + idx; - b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx[mbmi->ref_frame[0]]); +#if CONFIG_REF_MV + mode_ctx = vp10_mode_context_analyzer(inter_mode_ctx, mbmi->ref_frame, + bsize, j); +#endif + b_mode = read_inter_mode(cm, xd, r, mode_ctx); if (b_mode == NEARESTMV || b_mode == NEARMV) { - uint8_t dummy_mode_ctx[MAX_REF_FRAMES]; for (ref = 0; ref < 1 + is_compound; ++ref) vp10_append_sub8x8_mvs_for_idx(cm, xd, j, ref, mi_row, mi_col, &nearest_sub8x8[ref], - &near_sub8x8[ref], - dummy_mode_ctx); + &near_sub8x8[ref]); } if (!assign_mv(cm, xd, b_mode, block, nearestmv, @@ -645,28 +950,118 @@ xd->corrupted |= !assign_mv(cm, xd, mbmi->mode, mbmi->mv, nearestmv, nearestmv, nearmv, is_compound, allow_hp, r); } +#if CONFIG_EXT_INTERP + mbmi->interp_filter = (cm->interp_filter == SWITCHABLE) + ? read_switchable_interp_filter(cm, xd, r) + : cm->interp_filter; +#endif // CONFIG_EXT_INTERP } static void read_inter_frame_mode_info(VP10Decoder *const pbi, MACROBLOCKD *const xd, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif // CONFIG_SUPERTX int mi_row, int mi_col, vpx_reader *r) { VP10_COMMON *const cm = &pbi->common; MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; - int inter_block; + int inter_block = 1; +#if CONFIG_VAR_TX + BLOCK_SIZE bsize = mbmi->sb_type; +#endif // CONFIG_VAR_TX +#if CONFIG_SUPERTX + (void) supertx_enabled; +#endif // CONFIG_SUPERTX mbmi->mv[0].as_int = 0; mbmi->mv[1].as_int = 0; mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r); - mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); - inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r); - mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r); +#if CONFIG_SUPERTX + if (!supertx_enabled) { +#endif // CONFIG_SUPERTX + mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); + inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r); + +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && + !mbmi->skip && inter_block) { + const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; + const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; + const int bs = num_4x4_blocks_wide_lookup[txb_size]; + const int width = num_4x4_blocks_wide_lookup[bsize]; + const int height = num_4x4_blocks_high_lookup[bsize]; + int idx, idy; + for (idy = 0; idy < height; idy += bs) + for (idx = 0; idx < width; idx += bs) + read_tx_size_inter(cm, xd, mbmi, xd->counts, max_tx_size, + idy, idx, r); + if (xd->counts) { + const int ctx = get_tx_size_context(xd); + ++get_tx_counts(max_tx_size, ctx, &xd->counts->tx)[mbmi->tx_size]; + } + } else { + mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r); + if (inter_block) { + const int width = num_4x4_blocks_wide_lookup[bsize]; + const int height = num_4x4_blocks_high_lookup[bsize]; + int idx, idy; + for (idy = 0; idy < height; ++idy) + for (idx = 0; idx < width; ++idx) + mbmi->inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] = mbmi->tx_size; + } + + set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h); + set_txfm_ctx(xd->above_txfm_context, mbmi->tx_size, xd->n8_w); + } +#else + mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r); +#endif // CONFIG_VAR_TX +#if CONFIG_SUPERTX + } +#endif // CONFIG_SUPERTX if (inter_block) - read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r); + read_inter_block_mode_info(pbi, xd, + mi, mi_row, mi_col, r); else read_intra_block_mode_info(cm, xd, mi, r); +#if CONFIG_EXT_TX + if (get_ext_tx_types(mbmi->tx_size, mbmi->sb_type, inter_block) > 1 && + cm->base_qindex > 0 && !mbmi->skip && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif // CONFIG_SUPERTX + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + int eset = get_ext_tx_set(mbmi->tx_size, mbmi->sb_type, + inter_block); + FRAME_COUNTS *counts = xd->counts; + + if (inter_block) { + if (eset > 0) { + mbmi->tx_type = + vpx_read_tree(r, vp10_ext_tx_inter_tree[eset], + cm->fc->inter_ext_tx_prob[eset][mbmi->tx_size]); + if (counts) + ++counts->inter_ext_tx[eset][mbmi->tx_size][mbmi->tx_type]; + } + } else if (ALLOW_INTRA_EXT_TX) { + if (eset > 0) { + mbmi->tx_type = vpx_read_tree(r, vp10_ext_tx_intra_tree[eset], + cm->fc->intra_ext_tx_prob[eset] + [mbmi->tx_size][mbmi->mode]); + if (counts) + ++counts->intra_ext_tx[eset][mbmi->tx_size] + [mbmi->mode][mbmi->tx_type]; + } + } + } else { + mbmi->tx_type = DCT_DCT; + } +#else if (mbmi->tx_size < TX_32X32 && cm->base_qindex > 0 && !mbmi->skip && !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { @@ -688,11 +1083,15 @@ } else { mbmi->tx_type = DCT_DCT; } +#endif // CONFIG_EXT_TX } void vp10_read_mode_info(VP10Decoder *const pbi, MACROBLOCKD *xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis) { +#if CONFIG_SUPERTX + int supertx_enabled, +#endif // CONFIG_SUPERTX + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis) { VP10_COMMON *const cm = &pbi->common; MODE_INFO *const mi = xd->mi[0]; MV_REF* frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; @@ -700,9 +1099,22 @@ if (frame_is_intra_only(cm)) { read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r); +#if CONFIG_REF_MV + for (h = 0; h < y_mis; ++h) { + MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; + for (w = 0; w < x_mis; ++w) { + MV_REF *const mv = frame_mv + w; + mv->ref_frame[0] = NONE; + mv->ref_frame[1] = NONE; + } + } +#endif } else { - read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r); - + read_inter_frame_mode_info(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, r); for (h = 0; h < y_mis; ++h) { MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; for (w = 0; w < x_mis; ++w) {
diff --git a/vp10/decoder/decodemv.h b/vp10/decoder/decodemv.h index 6653be5..959a001 100644 --- a/vp10/decoder/decodemv.h +++ b/vp10/decoder/decodemv.h
@@ -20,8 +20,12 @@ #endif void vp10_read_mode_info(VP10Decoder *const pbi, MACROBLOCKD *xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis); +#if CONFIG_SUPERTX + int supertx_enabled, +#endif + + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis); #ifdef __cplusplus } // extern "C"
diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c index d8864d2..2dbadb3 100644 --- a/vp10/decoder/decoder.c +++ b/vp10/decoder/decoder.c
@@ -196,10 +196,23 @@ // later commit that adds VP9-specific controls for this functionality. if (ref_frame_flag == VP9_LAST_FLAG) { ref_buf = &cm->frame_refs[0]; +#if CONFIG_EXT_REFS + } else if (ref_frame_flag == VP9_LAST2_FLAG) { + ref_buf = &cm->frame_refs[1]; + } else if (ref_frame_flag == VP9_LAST3_FLAG) { + ref_buf = &cm->frame_refs[2]; + } else if (ref_frame_flag == VP9_LAST4_FLAG) { + ref_buf = &cm->frame_refs[3]; + } else if (ref_frame_flag == VP9_GOLD_FLAG) { + ref_buf = &cm->frame_refs[4]; + } else if (ref_frame_flag == VP9_ALT_FLAG) { + ref_buf = &cm->frame_refs[5]; +#else } else if (ref_frame_flag == VP9_GOLD_FLAG) { ref_buf = &cm->frame_refs[1]; } else if (ref_frame_flag == VP9_ALT_FLAG) { ref_buf = &cm->frame_refs[2]; +#endif // CONFIG_EXT_REFS } else { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Invalid reference frame"); @@ -243,10 +256,10 @@ // Current thread releases the holding of reference frame. decrease_ref_count(old_idx, frame_bufs, pool); - // Release the reference frame in reference map. - if ((mask & 1) && old_idx >= 0) { + // Release the reference frame holding in the reference map for the decoding + // of the next frame. + if (mask & 1) decrease_ref_count(old_idx, frame_bufs, pool); - } cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; ++ref_index; } @@ -268,7 +281,7 @@ } // Invalidate these references until the next frame starts. - for (ref_index = 0; ref_index < 3; ref_index++) + for (ref_index = 0; ref_index < REFS_PER_FRAME; ref_index++) cm->frame_refs[ref_index].idx = -1; } @@ -326,7 +339,6 @@ pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; } - if (setjmp(cm->error.jmp)) { const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); int i; @@ -350,10 +362,10 @@ // Current thread releases the holding of reference frame. decrease_ref_count(old_idx, frame_bufs, pool); - // Release the reference frame in reference map. - if ((mask & 1) && old_idx >= 0) { + // Release the reference frame holding in the reference map for the + // decoding of the next frame. + if (mask & 1) decrease_ref_count(old_idx, frame_bufs, pool); - } ++ref_index; } @@ -459,9 +471,7 @@ // an invalid bitstream and need to return an error. uint8_t marker; -#if CONFIG_MISC_FIXES size_t frame_sz_sum = 0; -#endif assert(data_sz); marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1); @@ -470,7 +480,7 @@ if ((marker & 0xe0) == 0xc0) { const uint32_t frames = (marker & 0x7) + 1; const uint32_t mag = ((marker >> 3) & 0x3) + 1; - const size_t index_sz = 2 + mag * (frames - CONFIG_MISC_FIXES); + const size_t index_sz = 2 + mag * (frames - 1); // This chunk is marked as having a superframe index but doesn't have // enough data for it, thus it's an invalid superframe index. @@ -501,20 +511,16 @@ x = clear_buffer; } - for (i = 0; i < frames - CONFIG_MISC_FIXES; ++i) { + for (i = 0; i < frames - 1; ++i) { uint32_t this_sz = 0; for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8); - this_sz += CONFIG_MISC_FIXES; + this_sz += 1; sizes[i] = this_sz; -#if CONFIG_MISC_FIXES frame_sz_sum += this_sz; -#endif } -#if CONFIG_MISC_FIXES sizes[i] = data_sz - index_sz - frame_sz_sum; -#endif *count = frames; } }
diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c index d39e3dc..011c45a 100644 --- a/vp10/decoder/detokenize.c +++ b/vp10/decoder/detokenize.c
@@ -164,11 +164,7 @@ val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, r); break; case CATEGORY6_TOKEN: { -#if CONFIG_MISC_FIXES const int skip_bits = TX_SIZES - 1 - tx_size; -#else - const int skip_bits = 0; -#endif const uint8_t *cat6p = cat6_prob + skip_bits; #if CONFIG_VP9_HIGHBITDEPTH switch (xd->bd) { @@ -257,6 +253,33 @@ } } +void vp10_decode_palette_tokens(MACROBLOCKD *const xd, int plane, + vpx_reader *r) { + MODE_INFO *const mi = xd->mi[0]; + MB_MODE_INFO *const mbmi = &mi->mbmi; + const BLOCK_SIZE bsize = mbmi->sb_type; + int rows = 4 * num_4x4_blocks_high_lookup[bsize]; + int cols = 4 * num_4x4_blocks_wide_lookup[bsize]; + int color_idx, color_ctx, color_order[PALETTE_MAX_SIZE]; + int n = mbmi->palette_mode_info.palette_size[plane != 0]; + int i, j; + uint8_t *color_map = xd->plane[plane].color_index_map; + const vpx_prob (* prob)[PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] = + plane ? vp10_default_palette_uv_color_prob : + vp10_default_palette_y_color_prob; + + for (i = 0; i < rows; ++i) { + for (j = (i == 0 ? 1 : 0); j < cols; ++j) { + color_ctx = vp10_get_palette_color_context(color_map, cols, i, j, n, + color_order); + color_idx = vpx_read_tree(r, vp10_palette_color_tree[n - 2], + prob[n - 2][color_ctx]); + assert(color_idx >= 0 && color_idx < n); + color_map[i * cols + j] = color_order[color_idx]; + } + } +} + int vp10_decode_block_tokens(MACROBLOCKD *xd, int plane, const scan_order *sc, int x, int y,
diff --git a/vp10/decoder/detokenize.h b/vp10/decoder/detokenize.h index c3fd90a..d2677f6 100644 --- a/vp10/decoder/detokenize.h +++ b/vp10/decoder/detokenize.h
@@ -20,6 +20,8 @@ extern "C" { #endif +void vp10_decode_palette_tokens(MACROBLOCKD *const xd, int plane, + vpx_reader *r); int vp10_decode_block_tokens(MACROBLOCKD *xd, int plane, const scan_order *sc, int x, int y,
diff --git a/vp10/decoder/dsubexp.c b/vp10/decoder/dsubexp.c index 36c1917..7d2872e 100644 --- a/vp10/decoder/dsubexp.c +++ b/vp10/decoder/dsubexp.c
@@ -23,13 +23,13 @@ static int decode_uniform(vpx_reader *r) { const int l = 8; - const int m = (1 << l) - 191 + CONFIG_MISC_FIXES; + const int m = (1 << l) - 190; const int v = vpx_read_literal(r, l - 1); return v < m ? v : (v << 1) - m + vpx_read_bit(r); } static int inv_remap_prob(int v, int m) { - static uint8_t inv_map_table[MAX_PROB - CONFIG_MISC_FIXES] = { + static uint8_t inv_map_table[MAX_PROB - 1] = { 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, @@ -47,9 +47,6 @@ 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, -#if !CONFIG_MISC_FIXES - 253 -#endif }; assert(v < (int)(sizeof(inv_map_table) / sizeof(inv_map_table[0]))); v = inv_map_table[v];
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index ede8bb3..344f63d 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c
@@ -38,12 +38,33 @@ static const struct vp10_token intra_mode_encodings[INTRA_MODES] = { {0, 1}, {6, 3}, {28, 5}, {30, 5}, {58, 6}, {59, 6}, {126, 7}, {127, 7}, {62, 6}, {2, 2}}; +#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4 +static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] = + {{0, 1}, {4, 3}, {3, 2}, {5, 3}}; +#else static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] = {{0, 1}, {2, 2}, {3, 2}}; +#endif // CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4 static const struct vp10_token partition_encodings[PARTITION_TYPES] = {{0, 1}, {2, 2}, {6, 3}, {7, 3}}; +#if !CONFIG_REF_MV static const struct vp10_token inter_mode_encodings[INTER_MODES] = {{2, 2}, {6, 3}, {0, 1}, {7, 3}}; +#endif +static const struct vp10_token palette_size_encodings[] = { + {0, 1}, {2, 2}, {6, 3}, {14, 4}, {30, 5}, {62, 6}, {63, 6}, +}; +static const struct vp10_token +palette_color_encodings[PALETTE_MAX_SIZE - 1][PALETTE_MAX_SIZE] = { + {{0, 1}, {1, 1}}, // 2 colors + {{0, 1}, {2, 2}, {3, 2}}, // 3 colors + {{0, 1}, {2, 2}, {6, 3}, {7, 3}}, // 4 colors + {{0, 1}, {2, 2}, {6, 3}, {14, 4}, {15, 4}}, // 5 colors + {{0, 1}, {2, 2}, {6, 3}, {14, 4}, {30, 5}, {31, 5}}, // 6 colors + {{0, 1}, {2, 2}, {6, 3}, {14, 4}, {30, 5}, {62, 6}, {63, 6}}, // 7 colors + {{0, 1}, {2, 2}, {6, 3}, {14, 4}, + {30, 5}, {62, 6}, {126, 7}, {127, 7}}, // 8 colors +}; static INLINE void write_uniform(vpx_writer *w, int n, int v) { int l = get_unsigned_bits(n); @@ -58,22 +79,82 @@ } } +#if CONFIG_EXT_TX +static struct vp10_token ext_tx_inter_encodings[EXT_TX_SETS_INTER][TX_TYPES]; +static struct vp10_token ext_tx_intra_encodings[EXT_TX_SETS_INTRA][TX_TYPES]; +#else static struct vp10_token ext_tx_encodings[TX_TYPES]; +#endif // CONFIG_EXT_TX void vp10_encode_token_init() { +#if CONFIG_EXT_TX + int s; + for (s = 1; s < EXT_TX_SETS_INTER; ++s) { + vp10_tokens_from_tree(ext_tx_inter_encodings[s], vp10_ext_tx_inter_tree[s]); + } + for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { + vp10_tokens_from_tree(ext_tx_intra_encodings[s], vp10_ext_tx_intra_tree[s]); + } +#else vp10_tokens_from_tree(ext_tx_encodings, vp10_ext_tx_tree); +#endif // CONFIG_EXT_TX } +#if CONFIG_SUPERTX +static int vp10_check_supertx(VP10_COMMON *cm, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + MODE_INFO *mi; + mi = cm->mi + (mi_row * cm->mi_stride + mi_col); + return mi[0].mbmi.tx_size == max_txsize_lookup[bsize] && + mi[0].mbmi.sb_type < bsize; +} +#endif // CONFIG_SUPERTX + static void write_intra_mode(vpx_writer *w, PREDICTION_MODE mode, const vpx_prob *probs) { vp10_write_token(w, vp10_intra_mode_tree, probs, &intra_mode_encodings[mode]); } -static void write_inter_mode(vpx_writer *w, PREDICTION_MODE mode, - const vpx_prob *probs) { +static void write_inter_mode(VP10_COMMON *cm, + vpx_writer *w, PREDICTION_MODE mode, + const int16_t mode_ctx) { +#if CONFIG_REF_MV + const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK; + const vpx_prob newmv_prob = cm->fc->newmv_prob[newmv_ctx]; + vpx_write(w, mode != NEWMV, newmv_prob); + + if (mode != NEWMV) { + const int16_t zeromv_ctx = (mode_ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK; + const vpx_prob zeromv_prob = cm->fc->zeromv_prob[zeromv_ctx]; + + if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) { + assert(mode == ZEROMV); + return; + } + + vpx_write(w, mode != ZEROMV, zeromv_prob); + + if (mode != ZEROMV) { + int16_t refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK; + vpx_prob refmv_prob; + + if (mode_ctx & (1 << SKIP_NEARESTMV_OFFSET)) + refmv_ctx = 6; + if (mode_ctx & (1 << SKIP_NEARMV_OFFSET)) + refmv_ctx = 7; + if (mode_ctx & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) + refmv_ctx = 8; + + refmv_prob = cm->fc->refmv_prob[refmv_ctx]; + vpx_write(w, mode != NEARESTMV, refmv_prob); + } + } +#else + const vpx_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx]; assert(is_inter_mode(mode)); - vp10_write_token(w, vp10_inter_mode_tree, probs, + vp10_write_token(w, vp10_inter_mode_tree, inter_probs, &inter_mode_encodings[INTER_OFFSET(mode)]); +#endif } static void encode_unsigned_max(struct vpx_write_bit_buffer *wb, @@ -114,6 +195,62 @@ return savings; } +#if CONFIG_VAR_TX +static void write_tx_size_inter(const VP10_COMMON *cm, + const MACROBLOCKD *xd, + const MB_MODE_INFO *mbmi, + TX_SIZE tx_size, int blk_row, int blk_col, + vpx_writer *w) { + const int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1); + int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; + int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1), + xd->left_txfm_context + (blk_row >> 1), + tx_size); + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> 5; + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> 5; + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == mbmi->inter_tx_size[tx_idx]) { + vpx_write(w, 0, cm->fc->txfm_partition_prob[ctx]); + txfm_partition_update(xd->above_txfm_context + (blk_col >> 1), + xd->left_txfm_context + (blk_row >> 1), tx_size); + } else { + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int bsl = b_width_log2_lookup[bsize]; + int i; + vpx_write(w, 1, cm->fc->txfm_partition_prob[ctx]); + + if (tx_size == TX_8X8) { + txfm_partition_update(xd->above_txfm_context + (blk_col >> 1), + xd->left_txfm_context + (blk_row >> 1), TX_4X4); + return; + } + + assert(bsl > 0); + --bsl; + for (i = 0; i < 4; ++i) { + int offsetr = blk_row + ((i >> 1) << bsl); + int offsetc = blk_col + ((i & 0x01) << bsl); + write_tx_size_inter(cm, xd, mbmi, tx_size - 1, offsetr, offsetc, w); + } + } +} + +static void update_txfm_partition_probs(VP10_COMMON *cm, vpx_writer *w, + FRAME_COUNTS *counts) { + int k; + for (k = 0; k < TXFM_PARTITION_CONTEXTS; ++k) + vp10_cond_prob_diff_update(w, &cm->fc->txfm_partition_prob[k], + counts->txfm_partition[k]); +} +#endif + static void write_selected_tx_size(const VP10_COMMON *cm, const MACROBLOCKD *xd, vpx_writer *w) { TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size; @@ -129,6 +266,22 @@ } } +#if CONFIG_REF_MV +static void update_inter_mode_probs(VP10_COMMON *cm, vpx_writer *w, + FRAME_COUNTS *counts) { + int i; + for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) + vp10_cond_prob_diff_update(w, &cm->fc->newmv_prob[i], + counts->newmv_mode[i]); + for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i) + vp10_cond_prob_diff_update(w, &cm->fc->zeromv_prob[i], + counts->zeromv_mode[i]); + for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) + vp10_cond_prob_diff_update(w, &cm->fc->refmv_prob[i], + counts->refmv_mode[i]); +} +#endif + static int write_skip(const VP10_COMMON *cm, const MACROBLOCKD *xd, int segment_id, const MODE_INFO *mi, vpx_writer *w) { if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { @@ -157,6 +310,60 @@ counts->switchable_interp[j], SWITCHABLE_FILTERS, w); } + +#if CONFIG_EXT_TX +static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) { + const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) - + vp10_cost_zero(GROUP_DIFF_UPDATE_PROB); + int i, j; + int s; + for (s = 1; s < EXT_TX_SETS_INTER; ++s) { + int savings = 0; + int do_update = 0; + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + if (!use_inter_ext_tx_for_txsize[s][i]) continue; + savings += prob_diff_update_savings( + vp10_ext_tx_inter_tree[s], cm->fc->inter_ext_tx_prob[s][i], + cm->counts.inter_ext_tx[s][i], num_ext_tx_set_inter[s]); + } + do_update = savings > savings_thresh; + vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + if (do_update) { + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + if (!use_inter_ext_tx_for_txsize[s][i]) continue; + prob_diff_update(vp10_ext_tx_inter_tree[s], + cm->fc->inter_ext_tx_prob[s][i], + cm->counts.inter_ext_tx[s][i], + num_ext_tx_set_inter[s], w); + } + } + } + + for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { + int savings = 0; + int do_update = 0; + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + if (!use_intra_ext_tx_for_txsize[s][i]) continue; + for (j = 0; j < INTRA_MODES; ++j) + savings += prob_diff_update_savings( + vp10_ext_tx_intra_tree[s], cm->fc->intra_ext_tx_prob[s][i][j], + cm->counts.intra_ext_tx[s][i][j], num_ext_tx_set_intra[s]); + } + do_update = savings > savings_thresh; + vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + if (do_update) { + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + if (!use_intra_ext_tx_for_txsize[s][i]) continue; + for (j = 0; j < INTRA_MODES; ++j) + prob_diff_update(vp10_ext_tx_intra_tree[s], + cm->fc->intra_ext_tx_prob[s][i][j], + cm->counts.intra_ext_tx[s][i][j], + num_ext_tx_set_intra[s], w); + } + } + } +} +#else static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) { const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) - vp10_cost_zero(GROUP_DIFF_UPDATE_PROB); @@ -199,19 +406,62 @@ } } } +#endif // CONFIG_EXT_TX + +static void pack_palette_tokens(vpx_writer *w, TOKENEXTRA **tp, + BLOCK_SIZE bsize, int n) { + int rows = 4 * num_4x4_blocks_high_lookup[bsize]; + int cols = 4 * num_4x4_blocks_wide_lookup[bsize]; + int i; + TOKENEXTRA *p = *tp; + + for (i = 0; i < rows * cols -1; ++i) { + vp10_write_token(w, vp10_palette_color_tree[n - 2], p->context_tree, + &palette_color_encodings[n - 2][p->token]); + ++p; + } + + *tp = p; +} + +#if CONFIG_SUPERTX +static void update_supertx_probs(VP10_COMMON *cm, vpx_writer *w) { + const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) - + vp10_cost_zero(GROUP_DIFF_UPDATE_PROB); + int i, j; + int savings = 0; + int do_update = 0; + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { + for (j = 1; j < TX_SIZES; ++j) { + savings += vp10_cond_prob_diff_update_savings(&cm->fc->supertx_prob[i][j], + cm->counts.supertx[i][j]); + } + } + do_update = savings > savings_thresh; + vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + if (do_update) { + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { + for (j = 1; j < TX_SIZES; ++j) { + vp10_cond_prob_diff_update(w, &cm->fc->supertx_prob[i][j], + cm->counts.supertx[i][j]); + } + } + } +} +#endif // CONFIG_SUPERTX static void pack_mb_tokens(vpx_writer *w, TOKENEXTRA **tp, const TOKENEXTRA *const stop, vpx_bit_depth_t bit_depth, const TX_SIZE tx) { TOKENEXTRA *p = *tp; -#if !CONFIG_MISC_FIXES - (void) tx; +#if CONFIG_VAR_TX + int count = 0; + const int seg_eob = 16 << (tx << 1); #endif while (p < stop && p->token != EOSB_TOKEN) { const int t = p->token; const struct vp10_token *const a = &vp10_coef_encodings[t]; - int i = 0; int v = a->value; int n = a->len; #if CONFIG_VP9_HIGHBITDEPTH @@ -228,38 +478,30 @@ #endif // CONFIG_VP9_HIGHBITDEPTH /* skip one or two nodes */ - if (p->skip_eob_node) { + if (p->skip_eob_node) n -= p->skip_eob_node; - i = 2 * p->skip_eob_node; - } + else + vpx_write(w, t != EOB_TOKEN, p->context_tree[0]); - // TODO(jbb): expanding this can lead to big gains. It allows - // much better branch prediction and would enable us to avoid numerous - // lookups and compares. + if (t != EOB_TOKEN) { + vpx_write(w, t != ZERO_TOKEN, p->context_tree[1]); - // If we have a token that's in the constrained set, the coefficient tree - // is split into two treed writes. The first treed write takes care of the - // unconstrained nodes. The second treed write takes care of the - // constrained nodes. - if (t >= TWO_TOKEN && t < EOB_TOKEN) { - int len = UNCONSTRAINED_NODES - p->skip_eob_node; - int bits = v >> (n - len); - vp10_write_tree(w, vp10_coef_tree, p->context_tree, bits, len, i); - vp10_write_tree(w, vp10_coef_con_tree, - vp10_pareto8_full[p->context_tree[PIVOT_NODE] - 1], - v, n - len, 0); - } else { - vp10_write_tree(w, vp10_coef_tree, p->context_tree, v, n, i); + if (t != ZERO_TOKEN) { + vpx_write(w, t != ONE_TOKEN, p->context_tree[2]); + + if (t != ONE_TOKEN) { + int len = UNCONSTRAINED_NODES - p->skip_eob_node; + vp10_write_tree(w, vp10_coef_con_tree, + vp10_pareto8_full[p->context_tree[PIVOT_NODE] - 1], + v, n - len, 0); + } + } } if (b->base_val) { const int e = p->extra, l = b->len; -#if CONFIG_MISC_FIXES int skip_bits = (b->base_val == CAT6_MIN_VAL) ? TX_SIZES - 1 - tx : 0; -#else - int skip_bits = 0; -#endif if (l) { const unsigned char *pb = b->prob; @@ -282,11 +524,68 @@ vpx_write_bit(w, e & 1); } ++p; + +#if CONFIG_VAR_TX + ++count; + if (t == EOB_TOKEN || count == seg_eob) + break; +#endif } *tp = p; } +#if CONFIG_VAR_TX +static void pack_txb_tokens(vpx_writer *w, + TOKENEXTRA **tp, const TOKENEXTRA *const tok_end, + MACROBLOCKD *xd, MB_MODE_INFO *mbmi, int plane, + BLOCK_SIZE plane_bsize, + vpx_bit_depth_t bit_depth, + int block, + int blk_row, int blk_col, TX_SIZE tx_size) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); + TX_SIZE plane_tx_size = plane ? + get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, 0, 0) : + mbmi->inter_tx_size[tx_idx]; + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + pack_mb_tokens(w, tp, tok_end, bit_depth, tx_size); + } else { + int bsl = b_width_log2_lookup[bsize]; + int i; + + assert(bsl > 0); + --bsl; + + for (i = 0; i < 4; ++i) { + const int offsetr = blk_row + ((i >> 1) << bsl); + const int offsetc = blk_col + ((i & 0x01) << bsl); + int step = 1 << (2 * (tx_size - 1)); + + if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) + continue; + + pack_txb_tokens(w, tp, tok_end, xd, mbmi, plane, + plane_bsize, bit_depth, block + i * step, + offsetr, offsetc, tx_size - 1); + } + } +} +#endif + static void write_segment_id(vpx_writer *w, const struct segmentation *seg, const struct segmentation_probs *segp, int segment_id) { @@ -317,31 +616,121 @@ } if (is_compound) { - vpx_write(w, mbmi->ref_frame[0] == GOLDEN_FRAME, - vp10_get_pred_prob_comp_ref_p(cm, xd)); +#if CONFIG_EXT_REFS + const int bit = (mbmi->ref_frame[0] == GOLDEN_FRAME || + mbmi->ref_frame[0] == LAST3_FRAME || + mbmi->ref_frame[0] == LAST4_FRAME); +#else + const int bit = mbmi->ref_frame[0] == GOLDEN_FRAME; +#endif // CONFIG_EXT_REFS + vpx_write(w, bit, vp10_get_pred_prob_comp_ref_p(cm, xd)); + +#if CONFIG_EXT_REFS + if (!bit) { + const int bit1 = mbmi->ref_frame[0] == LAST_FRAME; + vpx_write(w, bit1, vp10_get_pred_prob_comp_ref_p1(cm, xd)); + } else { + const int bit2 = mbmi->ref_frame[0] == GOLDEN_FRAME; + vpx_write(w, bit2, vp10_get_pred_prob_comp_ref_p2(cm, xd)); + if (!bit2) { + const int bit3 = mbmi->ref_frame[0] == LAST3_FRAME; + vpx_write(w, bit3, vp10_get_pred_prob_comp_ref_p3(cm, xd)); + } + } +#endif // CONFIG_EXT_REFS } else { +#if CONFIG_EXT_REFS + const int bit0 = (mbmi->ref_frame[0] == GOLDEN_FRAME || + mbmi->ref_frame[0] == ALTREF_FRAME); + vpx_write(w, bit0, vp10_get_pred_prob_single_ref_p1(cm, xd)); + + if (bit0) { + const int bit1 = mbmi->ref_frame[0] != GOLDEN_FRAME; + vpx_write(w, bit1, vp10_get_pred_prob_single_ref_p2(cm, xd)); + } else { + const int bit2 = (mbmi->ref_frame[0] == LAST3_FRAME || + mbmi->ref_frame[0] == LAST4_FRAME); + vpx_write(w, bit2, vp10_get_pred_prob_single_ref_p3(cm, xd)); + + if (!bit2) { + const int bit3 = mbmi->ref_frame[0] != LAST_FRAME; + vpx_write(w, bit3, vp10_get_pred_prob_single_ref_p4(cm, xd)); + } else { + const int bit4 = mbmi->ref_frame[0] != LAST3_FRAME; + vpx_write(w, bit4, vp10_get_pred_prob_single_ref_p5(cm, xd)); + } + } +#else const int bit0 = mbmi->ref_frame[0] != LAST_FRAME; vpx_write(w, bit0, vp10_get_pred_prob_single_ref_p1(cm, xd)); if (bit0) { const int bit1 = mbmi->ref_frame[0] != GOLDEN_FRAME; vpx_write(w, bit1, vp10_get_pred_prob_single_ref_p2(cm, xd)); } +#endif // CONFIG_EXT_REFS } } } +#if CONFIG_EXT_INTRA +static void write_ext_intra_mode_info(const VP10_COMMON *const cm, + const MB_MODE_INFO *const mbmi, + vpx_writer *w) { +#if !ALLOW_FILTER_INTRA_MODES + return; +#endif + if (mbmi->mode == DC_PRED) { + vpx_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[0], + cm->fc->ext_intra_probs[0]); + if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) { + EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[0]; + write_uniform(w, FILTER_INTRA_MODES, mode); + } + } + if (mbmi->uv_mode == DC_PRED) { + vpx_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[1], + cm->fc->ext_intra_probs[1]); + if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) { + EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[1]; + write_uniform(w, FILTER_INTRA_MODES, mode); + } + } +} +#endif // CONFIG_EXT_INTRA + +static void write_switchable_interp_filter(VP10_COMP *cpi, + const MACROBLOCKD *xd, + vpx_writer *w) { + VP10_COMMON *const cm = &cpi->common; + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + if (cm->interp_filter == SWITCHABLE) { + const int ctx = vp10_get_pred_context_switchable_interp(xd); +#if CONFIG_EXT_INTERP + if (!vp10_is_interp_needed(xd)) { + // if (mbmi->interp_filter != EIGHTTAP) + // printf("Error [%d]\n", mbmi->sb_type); + assert(mbmi->interp_filter == EIGHTTAP); + return; + } +#endif + vp10_write_token(w, vp10_switchable_interp_tree, + cm->fc->switchable_interp_prob[ctx], + &switchable_interp_encodings[mbmi->interp_filter]); + ++cpi->interp_filter_selected[0][mbmi->interp_filter]; + } +} + static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif vpx_writer *w) { VP10_COMMON *const cm = &cpi->common; const nmv_context *nmvc = &cm->fc->nmvc; - const MACROBLOCK *const x = &cpi->td.mb; - const MACROBLOCKD *const xd = &x->e_mbd; + const MACROBLOCK *x = &cpi->td.mb; + const MACROBLOCKD *xd = &x->e_mbd; const struct segmentation *const seg = &cm->seg; -#if CONFIG_MISC_FIXES const struct segmentation_probs *const segp = &cm->fc->seg; -#else - const struct segmentation_probs *const segp = &cm->segp; -#endif const MB_MODE_INFO *const mbmi = &mi->mbmi; const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; const PREDICTION_MODE mode = mbmi->mode; @@ -364,19 +753,60 @@ } } +#if CONFIG_SUPERTX + if (supertx_enabled) + skip = mbmi->skip; + else + skip = write_skip(cm, xd, segment_id, mi, w); +#else skip = write_skip(cm, xd, segment_id, mi, w); +#endif // CONFIG_SUPERTX - if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) - vpx_write(w, is_inter, vp10_get_intra_inter_prob(cm, xd)); +#if CONFIG_SUPERTX + if (!supertx_enabled) +#endif // CONFIG_SUPERTX + if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) + vpx_write(w, is_inter, vp10_get_intra_inter_prob(cm, xd)); if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif // CONFIG_SUPERTX !(is_inter && skip) && !xd->lossless[segment_id]) { - write_selected_tx_size(cm, xd, w); +#if CONFIG_VAR_TX + if (is_inter) { // This implies skip flag is 0. + const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; + const int txb_size = txsize_to_bsize[max_tx_size]; + const int bs = num_4x4_blocks_wide_lookup[txb_size]; + const int width = num_4x4_blocks_wide_lookup[bsize]; + const int height = num_4x4_blocks_high_lookup[bsize]; + int idx, idy; + for (idy = 0; idy < height; idy += bs) + for (idx = 0; idx < width; idx += bs) + write_tx_size_inter(cm, xd, mbmi, max_tx_size, idy, idx, w); + } else { + set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h); + set_txfm_ctx(xd->above_txfm_context, mbmi->tx_size, xd->n8_w); + + write_selected_tx_size(cm, xd, w); + } + } else { + set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h); + set_txfm_ctx(xd->above_txfm_context, mbmi->tx_size, xd->n8_w); +#else + write_selected_tx_size(cm, xd, w); +#endif } if (!is_inter) { if (bsize >= BLOCK_8X8) { write_intra_mode(w, mode, cm->fc->y_mode_prob[size_group_lookup[bsize]]); +#if CONFIG_EXT_INTRA + if (mode != DC_PRED && mode != TM_PRED) { + write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1, + MAX_ANGLE_DELTAS + mbmi->angle_delta[0]); + } +#endif // CONFIG_EXT_INTRA } else { int idx, idy; const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; @@ -389,27 +819,34 @@ } } write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mode]); +#if CONFIG_EXT_INTRA + if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED && + bsize >= BLOCK_8X8) + write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1, + MAX_ANGLE_DELTAS + mbmi->angle_delta[1]); + + if (bsize >= BLOCK_8X8) + write_ext_intra_mode_info(cm, mbmi, w); +#endif // CONFIG_EXT_INTRA } else { - const int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]]; - const vpx_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx]; + int16_t mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]]; write_ref_frames(cm, xd, w); +#if CONFIG_REF_MV + mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context, + mbmi->ref_frame, bsize, -1); +#endif + // If segment skip is not enabled code the mode. if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { if (bsize >= BLOCK_8X8) { - write_inter_mode(w, mode, inter_probs); + write_inter_mode(cm, w, mode, mode_ctx); } } - if (cm->interp_filter == SWITCHABLE) { - const int ctx = vp10_get_pred_context_switchable_interp(xd); - vp10_write_token(w, vp10_switchable_interp_tree, - cm->fc->switchable_interp_prob[ctx], - &switchable_interp_encodings[mbmi->interp_filter]); - ++cpi->interp_filter_selected[0][mbmi->interp_filter]; - } else { - assert(mbmi->interp_filter == cm->interp_filter); - } +#if !CONFIG_EXT_INTERP + write_switchable_interp_filter(cpi, xd, w); +#endif // !CONFIG_EXT_INTERP if (bsize < BLOCK_8X8) { const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; @@ -419,7 +856,11 @@ for (idx = 0; idx < 2; idx += num_4x4_w) { const int j = idy * 2 + idx; const PREDICTION_MODE b_mode = mi->bmi[j].as_mode; - write_inter_mode(w, b_mode, inter_probs); +#if CONFIG_REF_MV + mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context, + mbmi->ref_frame, bsize, j); +#endif + write_inter_mode(cm, w, b_mode, mode_ctx); if (b_mode == NEWMV) { for (ref = 0; ref < 1 + is_compound; ++ref) vp10_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv, @@ -436,9 +877,38 @@ allow_hp); } } +#if CONFIG_EXT_INTERP + write_switchable_interp_filter(cpi, xd, w); +#endif // CONFIG_EXT_INTERP } + +#if CONFIG_EXT_TX + if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter) > 1 && + cm->base_qindex > 0 && !mbmi->skip && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif // CONFIG_SUPERTX + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + int eset = get_ext_tx_set(mbmi->tx_size, bsize, is_inter); + if (is_inter) { + if (eset > 0) + vp10_write_token(w, vp10_ext_tx_inter_tree[eset], + cm->fc->inter_ext_tx_prob[eset][mbmi->tx_size], + &ext_tx_inter_encodings[eset][mbmi->tx_type]); + } else if (ALLOW_INTRA_EXT_TX) { + if (eset > 0) + vp10_write_token( + w, vp10_ext_tx_intra_tree[eset], + cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode], + &ext_tx_intra_encodings[eset][mbmi->tx_type]); + } + } +#else if (mbmi->tx_size < TX_32X32 && cm->base_qindex > 0 && !mbmi->skip && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif // CONFIG_SUPERTX !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { if (is_inter) { vp10_write_token( @@ -456,16 +926,43 @@ if (!mbmi->skip) assert(mbmi->tx_type == DCT_DCT); } +#endif // CONFIG_EXT_TX +} + +static void write_palette_mode_info(const VP10_COMMON *cm, + const MACROBLOCKD *xd, + const MODE_INFO *const mi, + vpx_writer *w) { + const MB_MODE_INFO *const mbmi = &mi->mbmi; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const BLOCK_SIZE bsize = mbmi->sb_type; + const PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info; + int palette_ctx = 0; + int n, i; + + n = pmi->palette_size[0]; + if (above_mi) + palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0); + if (left_mi) + palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0); + vpx_write(w, n > 0, + vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx]); + if (n > 0) { + vp10_write_token(w, vp10_palette_size_tree, + vp10_default_palette_y_size_prob[bsize - BLOCK_8X8], + &palette_size_encodings[n - 2]); + for (i = 0; i < n; ++i) + vpx_write_literal(w, pmi->palette_colors[i], + cm->bit_depth); + write_uniform(w, n, pmi->palette_first_color_idx[0]); + } } static void write_mb_modes_kf(const VP10_COMMON *cm, const MACROBLOCKD *xd, MODE_INFO **mi_8x8, vpx_writer *w) { const struct segmentation *const seg = &cm->seg; -#if CONFIG_MISC_FIXES const struct segmentation_probs *const segp = &cm->fc->seg; -#else - const struct segmentation_probs *const segp = &cm->segp; -#endif const MODE_INFO *const mi = mi_8x8[0]; const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; @@ -484,6 +981,11 @@ if (bsize >= BLOCK_8X8) { write_intra_mode(w, mbmi->mode, get_y_mode_probs(cm, mi, above_mi, left_mi, 0)); +#if CONFIG_EXT_INTRA + if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) + write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1, + MAX_ANGLE_DELTAS + mbmi->angle_delta[0]); +#endif // CONFIG_EXT_INTRA } else { const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; @@ -499,7 +1001,31 @@ } write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mbmi->mode]); +#if CONFIG_EXT_INTRA + if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED && + bsize >= BLOCK_8X8) + write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1, + MAX_ANGLE_DELTAS + mbmi->angle_delta[1]); +#endif // CONFIG_EXT_INTRA + if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools && + mbmi->mode == DC_PRED) + write_palette_mode_info(cm, xd, mi, w); + + +#if CONFIG_EXT_TX + if (get_ext_tx_types(mbmi->tx_size, bsize, 0) > 1 && + cm->base_qindex > 0 && !mbmi->skip && + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && + ALLOW_INTRA_EXT_TX) { + int eset = get_ext_tx_set(mbmi->tx_size, bsize, 0); + if (eset > 0) + vp10_write_token( + w, vp10_ext_tx_intra_tree[eset], + cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode], + &ext_tx_intra_encodings[eset][mbmi->tx_type]); + } +#else if (mbmi->tx_size < TX_32X32 && cm->base_qindex > 0 && !mbmi->skip && !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { @@ -509,11 +1035,20 @@ [intra_mode_to_tx_type_context[mbmi->mode]], &ext_tx_encodings[mbmi->tx_type]); } +#endif // CONFIG_EXT_TX + +#if CONFIG_EXT_INTRA + if (bsize >= BLOCK_8X8) + write_ext_intra_mode_info(cm, mbmi, w); +#endif // CONFIG_EXT_INTRA } static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile, vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col) { const VP10_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; @@ -532,15 +1067,70 @@ if (frame_is_intra_only(cm)) { write_mb_modes_kf(cm, xd, xd->mi, w); } else { - pack_inter_mode_mvs(cpi, m, w); +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); +#endif + pack_inter_mode_mvs(cpi, m, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + w); } + if (m->mbmi.palette_mode_info.palette_size[0] > 0) { + assert(*tok < tok_end); + pack_palette_tokens(w, tok, m->mbmi.sb_type, + m->mbmi.palette_mode_info.palette_size[0]); + assert(*tok < tok_end); + } + +#if CONFIG_SUPERTX + if (supertx_enabled) return; +#endif // CONFIG_SUPERTX + if (!m->mbmi.skip) { assert(*tok < tok_end); for (plane = 0; plane < MAX_MB_PLANE; ++plane) { +#if CONFIG_VAR_TX + const struct macroblockd_plane *const pd = &xd->plane[plane]; + MB_MODE_INFO *mbmi = &m->mbmi; + BLOCK_SIZE bsize = mbmi->sb_type; + const BLOCK_SIZE plane_bsize = + get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), pd); + + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + int row, col; + + if (is_inter_block(mbmi)) { + const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; + const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; + int bw = num_4x4_blocks_wide_lookup[txb_size]; + int block = 0; + const int step = 1 << (max_tx_size << 1); + for (row = 0; row < num_4x4_h; row += bw) { + for (col = 0; col < num_4x4_w; col += bw) { + pack_txb_tokens(w, tok, tok_end, xd, mbmi, plane, plane_bsize, + cm->bit_depth, block, row, col, max_tx_size); + block += step; + } + } + } else { + TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane]) + : m->mbmi.tx_size; + BLOCK_SIZE txb_size = txsize_to_bsize[tx]; + int bw = num_4x4_blocks_wide_lookup[txb_size]; + + for (row = 0; row < num_4x4_h; row += bw) + for (col = 0; col < num_4x4_w; col += bw) + pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx); + } +#else TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane]) : m->mbmi.tx_size; pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx); +#endif // CONFIG_VAR_TX assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN); (*tok)++; } @@ -572,6 +1162,9 @@ static void write_modes_sb(VP10_COMP *cpi, const TileInfo *const tile, vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col, BLOCK_SIZE bsize) { const VP10_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; @@ -580,7 +1173,12 @@ const int bs = (1 << bsl) / 4; PARTITION_TYPE partition; BLOCK_SIZE subsize; - const MODE_INFO *m = NULL; + MODE_INFO *m = NULL; +#if CONFIG_SUPERTX + const int pack_token = !supertx_enabled; + TX_SIZE supertx_size; + int plane; +#endif if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; @@ -590,36 +1188,118 @@ partition = partition_lookup[bsl][m->mbmi.sb_type]; write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w); subsize = get_subsize(bsize, partition); +#if CONFIG_SUPERTX + xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); + set_mi_row_col(xd, tile, + mi_row, num_8x8_blocks_high_lookup[bsize], + mi_col, num_8x8_blocks_wide_lookup[bsize], + cm->mi_rows, cm->mi_cols); + if (!supertx_enabled && + !frame_is_intra_only(cm) && + partition != PARTITION_NONE && bsize <= MAX_SUPERTX_BLOCK_SIZE && + !xd->lossless[0]) { + vpx_prob prob; + supertx_size = max_txsize_lookup[bsize]; + prob = cm->fc->supertx_prob[partition_supertx_context_lookup[partition]] + [supertx_size]; + supertx_enabled = (xd->mi[0]->mbmi.tx_size == supertx_size); + vpx_write(w, supertx_enabled, prob); + if (supertx_enabled) { + vpx_write(w, xd->mi[0]->mbmi.skip, vp10_get_skip_prob(cm, xd)); +#if CONFIG_EXT_TX + if (supertx_size <= TX_16X16 && !xd->mi[0]->mbmi.skip) { + int eset = get_ext_tx_set(supertx_size, bsize, 1); + if (eset > 0) { + vp10_write_token( + w, vp10_ext_tx_inter_tree[eset], + cm->fc->inter_ext_tx_prob[eset][supertx_size], + &ext_tx_inter_encodings[eset][xd->mi[0]->mbmi.tx_type]); + } + } +#endif // CONFIG_EXT_TX + } + } +#endif // CONFIG_SUPERTX if (subsize < BLOCK_8X8) { - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col); } else { switch (partition) { case PARTITION_NONE: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col); break; case PARTITION_HORZ: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col); if (mi_row + bs < cm->mi_rows) - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row + bs, mi_col); break; case PARTITION_VERT: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col); if (mi_col + bs < cm->mi_cols) - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col + bs); break; case PARTITION_SPLIT: - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs, - subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col, - subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col + bs, - subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col + bs, subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row + bs, mi_col, subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row + bs, mi_col + bs, subsize); break; default: assert(0); } } +#if CONFIG_SUPERTX + if (partition != PARTITION_NONE && supertx_enabled && pack_token && + !m->mbmi.skip) { + assert(*tok < tok_end); + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane]) + : m->mbmi.tx_size; + pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx); + assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN); + (*tok)++; + } + } +#endif // CONFIG_SUPERTX // update partition context if (bsize >= BLOCK_8X8 && @@ -636,10 +1316,16 @@ for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; mi_row += MI_BLOCK_SIZE) { vp10_zero(xd->left_seg_context); +#if CONFIG_VAR_TX + vp10_zero(xd->left_txfm_context_buffer); +#endif for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, - BLOCK_64X64); + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + 0, +#endif + mi_row, mi_col, BLOCK_64X64); } } @@ -714,7 +1400,6 @@ } } - // printf("Update %d %d, savings %d\n", update[0], update[1], savings); /* Is coef updated at all */ if (update[1] == 0 || savings < 0) { vpx_write_bit(bc, 0); @@ -875,7 +1560,7 @@ static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) { if (delta_q != 0) { vpx_wb_write_bit(wb, 1); - vpx_wb_write_inv_signed_literal(wb, delta_q, CONFIG_MISC_FIXES ? 6 : 4); + vpx_wb_write_inv_signed_literal(wb, delta_q, 6); } else { vpx_wb_write_bit(wb, 0); } @@ -892,11 +1577,7 @@ static void encode_segmentation(VP10_COMMON *cm, MACROBLOCKD *xd, struct vpx_write_bit_buffer *wb) { int i, j; - const struct segmentation *seg = &cm->seg; -#if !CONFIG_MISC_FIXES - const struct segmentation_probs *segp = &cm->segp; -#endif vpx_wb_write_bit(wb, seg->enabled); if (!seg->enabled) @@ -911,16 +1592,6 @@ if (seg->update_map) { // Select the coding strategy (temporal or spatial) vp10_choose_segmap_coding_method(cm, xd); -#if !CONFIG_MISC_FIXES - // Write out probabilities used to decode unpredicted macro-block segments - for (i = 0; i < SEG_TREE_PROBS; i++) { - const int prob = segp->tree_probs[i]; - const int update = prob != MAX_PROB; - vpx_wb_write_bit(wb, update); - if (update) - vpx_wb_write_literal(wb, prob, 8); - } -#endif // Write out the chosen coding method. if (!frame_is_intra_only(cm) && !cm->error_resilient_mode) { @@ -928,18 +1599,6 @@ } else { assert(seg->temporal_update == 0); } - -#if !CONFIG_MISC_FIXES - if (seg->temporal_update) { - for (i = 0; i < PREDICTION_PROBS; i++) { - const int prob = segp->pred_probs[i]; - const int update = prob != MAX_PROB; - vpx_wb_write_bit(wb, update); - if (update) - vpx_wb_write_literal(wb, prob, 8); - } - } -#endif } // Segmentation data @@ -967,7 +1626,6 @@ } } -#if CONFIG_MISC_FIXES static void update_seg_probs(VP10_COMP *cpi, vpx_writer *w) { VP10_COMMON *cm = &cpi->common; @@ -994,18 +1652,10 @@ if (mode != TX_MODE_SELECT) vpx_wb_write_literal(wb, mode, 2); } -#else -static void write_txfm_mode(TX_MODE mode, struct vpx_writer *wb) { - vpx_write_literal(wb, VPXMIN(mode, ALLOW_32X32), 2); - if (mode >= ALLOW_32X32) - vpx_write_bit(wb, mode == TX_MODE_SELECT); -} -#endif static void update_txfm_probs(VP10_COMMON *cm, vpx_writer *w, FRAME_COUNTS *counts) { - if (cm->tx_mode == TX_MODE_SELECT) { int i, j; unsigned int ct_8x8p[TX_SIZES - 3][2]; @@ -1039,7 +1689,7 @@ struct vpx_write_bit_buffer *wb) { vpx_wb_write_bit(wb, filter == SWITCHABLE); if (filter != SWITCHABLE) - vpx_wb_write_literal(wb, filter, 2); + vpx_wb_write_literal(wb, filter, 2 + CONFIG_EXT_INTERP); } static void fix_interp_filter(VP10_COMMON *cm, FRAME_COUNTS *counts) { @@ -1085,6 +1735,17 @@ } static int get_refresh_mask(VP10_COMP *cpi) { + int refresh_mask = 0; +#if CONFIG_EXT_REFS + int ref_frame; + for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame) { + refresh_mask |= (cpi->refresh_last_frames[ref_frame - LAST_FRAME] << + cpi->lst_fb_idxes[ref_frame - LAST_FRAME]); + } +#else + refresh_mask = cpi->refresh_last_frame << cpi->lst_fb_idx; +#endif // CONFIG_EXT_REFS + if (vp10_preserve_existing_gf(cpi)) { // We have decided to preserve the previously existing golden frame as our // new ARF frame. However, in the short term we leave it in the GF slot and, @@ -1096,15 +1757,14 @@ // Note: This is highly specific to the use of ARF as a forward reference, // and this needs to be generalized as other uses are implemented // (like RTC/temporal scalability). - return (cpi->refresh_last_frame << cpi->lst_fb_idx) | - (cpi->refresh_golden_frame << cpi->alt_fb_idx); + return refresh_mask | (cpi->refresh_golden_frame << cpi->alt_fb_idx); } else { int arf_idx = cpi->alt_fb_idx; if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) { const GF_GROUP *const gf_group = &cpi->twopass.gf_group; arf_idx = gf_group->arf_update_idx[gf_group->index]; } - return (cpi->refresh_last_frame << cpi->lst_fb_idx) | + return refresh_mask | (cpi->refresh_golden_frame << cpi->gld_fb_idx) | (cpi->refresh_alt_ref_frame << arf_idx); } @@ -1123,6 +1783,10 @@ memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols)); +#if CONFIG_VAR_TX + memset(cm->above_txfm_context, 0, + sizeof(*cm->above_txfm_context) * mi_cols_aligned_to_sb(cm->mi_cols)); +#endif for (tile_row = 0; tile_row < tile_rows; tile_row++) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { @@ -1146,7 +1810,7 @@ // size of this tile assert(residual_bc.pos > 0); - tile_sz = residual_bc.pos - CONFIG_MISC_FIXES; + tile_sz = residual_bc.pos - 1; mem_put_le32(data_ptr + total_size, tile_sz); max_tile = max_tile > tile_sz ? max_tile : tile_sz; total_size += 4; @@ -1191,10 +1855,8 @@ if (cfg != NULL) { found = cm->width == cfg->y_crop_width && cm->height == cfg->y_crop_height; -#if CONFIG_MISC_FIXES found &= cm->render_width == cfg->render_width && cm->render_height == cfg->render_height; -#endif } vpx_wb_write_bit(wb, found); if (found) { @@ -1205,15 +1867,8 @@ if (!found) { vpx_wb_write_literal(wb, cm->width - 1, 16); vpx_wb_write_literal(wb, cm->height - 1, 16); - -#if CONFIG_MISC_FIXES write_render_size(cm, wb); -#endif } - -#if !CONFIG_MISC_FIXES - write_render_size(cm, wb); -#endif } static void write_sync_code(struct vpx_write_bit_buffer *wb) { @@ -1284,12 +1939,13 @@ write_sync_code(wb); write_bitdepth_colorspace_sampling(cm, wb); write_frame_size(cm, wb); + if (frame_is_intra_only(cm)) + vpx_wb_write_bit(wb, cm->allow_screen_content_tools); } else { if (!cm->show_frame) vpx_wb_write_bit(wb, cm->intra_only); if (!cm->error_resilient_mode) { -#if CONFIG_MISC_FIXES if (cm->intra_only) { vpx_wb_write_bit(wb, cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL); @@ -1300,25 +1956,11 @@ vpx_wb_write_bit(wb, cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL); } -#else - static const int reset_frame_context_conv_tbl[3] = { 0, 2, 3 }; - - vpx_wb_write_literal(wb, - reset_frame_context_conv_tbl[cm->reset_frame_context], 2); -#endif } if (cm->intra_only) { write_sync_code(wb); - -#if CONFIG_MISC_FIXES write_bitdepth_colorspace_sampling(cm, wb); -#else - // Note for profile 0, 420 8bpp is assumed. - if (cm->profile > PROFILE_0) { - write_bitdepth_colorspace_sampling(cm, wb); - } -#endif vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES); write_frame_size(cm, wb); @@ -1344,11 +1986,9 @@ if (!cm->error_resilient_mode) { vpx_wb_write_bit(wb, cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_OFF); -#if CONFIG_MISC_FIXES if (cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_OFF) -#endif vpx_wb_write_bit(wb, cm->refresh_frame_context != - REFRESH_FRAME_CONTEXT_BACKWARD); + REFRESH_FRAME_CONTEXT_BACKWARD); } vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2); @@ -1356,7 +1996,6 @@ encode_loopfilter(&cm->lf, wb); encode_quantization(cm, wb); encode_segmentation(cm, xd, wb); -#if CONFIG_MISC_FIXES if (!cm->seg.enabled && xd->lossless[0]) cm->tx_mode = TX_4X4; else @@ -1369,36 +2008,29 @@ if (!use_hybrid_pred) vpx_wb_write_bit(wb, use_compound_pred); } -#endif write_tile_info(cm, wb); } static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) { VP10_COMMON *const cm = &cpi->common; +#if CONFIG_SUPERTX + MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; +#endif // CONFIG_SUPERTX FRAME_CONTEXT *const fc = cm->fc; FRAME_COUNTS *counts = cpi->td.counts; vpx_writer header_bc; - int i; -#if CONFIG_MISC_FIXES - int j; -#endif + int i, j; vpx_start_encode(&header_bc, data); - -#if !CONFIG_MISC_FIXES - if (cpi->td.mb.e_mbd.lossless[0]) { - cm->tx_mode = TX_4X4; - } else { - write_txfm_mode(cm->tx_mode, &header_bc); - update_txfm_probs(cm, &header_bc, counts); - } -#else update_txfm_probs(cm, &header_bc, counts); -#endif update_coef_probs(cpi, &header_bc); + +#if CONFIG_VAR_TX + update_txfm_partition_probs(cm, &header_bc, counts); +#endif + update_skip_probs(cm, &header_bc, counts); -#if CONFIG_MISC_FIXES update_seg_probs(cpi, &header_bc); for (i = 0; i < INTRA_MODES; ++i) @@ -1408,20 +2040,21 @@ for (i = 0; i < PARTITION_CONTEXTS; ++i) prob_diff_update(vp10_partition_tree, fc->partition_prob[i], counts->partition[i], PARTITION_TYPES, &header_bc); -#endif if (frame_is_intra_only(cm)) { vp10_copy(cm->kf_y_prob, vp10_kf_y_mode_prob); -#if CONFIG_MISC_FIXES for (i = 0; i < INTRA_MODES; ++i) for (j = 0; j < INTRA_MODES; ++j) prob_diff_update(vp10_intra_mode_tree, cm->kf_y_prob[i][j], counts->kf_y_mode[i][j], INTRA_MODES, &header_bc); -#endif } else { +#if CONFIG_REF_MV + update_inter_mode_probs(cm, &header_bc, counts); +#else for (i = 0; i < INTER_MODE_CONTEXTS; ++i) prob_diff_update(vp10_inter_mode_tree, cm->fc->inter_mode_probs[i], counts->inter_mode[i], INTER_MODES, &header_bc); +#endif if (cm->interp_filter == SWITCHABLE) update_switchable_interp_probs(cm, &header_bc, counts); @@ -1432,52 +2065,43 @@ if (cpi->allow_comp_inter_inter) { const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT; -#if !CONFIG_MISC_FIXES - const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE; - - vpx_write_bit(&header_bc, use_compound_pred); - if (use_compound_pred) { - vpx_write_bit(&header_bc, use_hybrid_pred); - if (use_hybrid_pred) - for (i = 0; i < COMP_INTER_CONTEXTS; i++) - vp10_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i], - counts->comp_inter[i]); - } -#else if (use_hybrid_pred) for (i = 0; i < COMP_INTER_CONTEXTS; i++) vp10_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i], counts->comp_inter[i]); -#endif } if (cm->reference_mode != COMPOUND_REFERENCE) { for (i = 0; i < REF_CONTEXTS; i++) { - vp10_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][0], - counts->single_ref[i][0]); - vp10_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][1], - counts->single_ref[i][1]); + for (j = 0; j < (SINGLE_REFS - 1); j ++) { + vp10_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][j], + counts->single_ref[i][j]); + } } } - if (cm->reference_mode != SINGLE_REFERENCE) - for (i = 0; i < REF_CONTEXTS; i++) - vp10_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i], - counts->comp_ref[i]); + if (cm->reference_mode != SINGLE_REFERENCE) { + for (i = 0; i < REF_CONTEXTS; i++) { + for (j = 0; j < (COMP_REFS - 1); j ++) { + vp10_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i][j], + counts->comp_ref[i][j]); + } + } + } for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) prob_diff_update(vp10_intra_mode_tree, cm->fc->y_mode_prob[i], counts->y_mode[i], INTRA_MODES, &header_bc); -#if !CONFIG_MISC_FIXES - for (i = 0; i < PARTITION_CONTEXTS; ++i) - prob_diff_update(vp10_partition_tree, fc->partition_prob[i], - counts->partition[i], PARTITION_TYPES, &header_bc); -#endif - vp10_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc, &counts->mv); +#if CONFIG_EXT_TX update_ext_tx_probs(cm, &header_bc); +#endif // CONFIG_EXT_TX +#if CONFIG_SUPERTX + if (!xd->lossless[0]) + update_supertx_probs(cm, &header_bc); +#endif // CONFIG_SUPERTX } vpx_stop_encode(&header_bc); @@ -1486,7 +2110,6 @@ return header_bc.pos; } -#if CONFIG_MISC_FIXES static int remux_tiles(uint8_t *dest, const int sz, const int n_tiles, const int mag) { int rpos = 0, wpos = 0, n; @@ -1526,7 +2149,6 @@ return wpos; } -#endif void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size) { uint8_t *data = dest; @@ -1534,14 +2156,9 @@ struct vpx_write_bit_buffer wb = {data, 0}; struct vpx_write_bit_buffer saved_wb; unsigned int max_tile; -#if CONFIG_MISC_FIXES VP10_COMMON *const cm = &cpi->common; const int n_log2_tiles = cm->log2_tile_rows + cm->log2_tile_cols; const int have_tiles = n_log2_tiles > 0; -#else - const int have_tiles = 0; // we have tiles, but we don't want to write a - // tile size marker in the header -#endif write_uncompressed_header(cpi, &wb); saved_wb = wb; @@ -1557,7 +2174,6 @@ data += first_part_size; data_sz = encode_tiles(cpi, data, &max_tile); -#if CONFIG_MISC_FIXES if (max_tile > 0) { int mag; unsigned int mask; @@ -1576,7 +2192,6 @@ } else { assert(n_log2_tiles == 0); } -#endif data += data_sz; // TODO(jbb): Figure out what to do if first_part_size > 16 bits.
diff --git a/vp10/encoder/bitstream.h b/vp10/encoder/bitstream.h index b1da89f..9df03da 100644 --- a/vp10/encoder/bitstream.h +++ b/vp10/encoder/bitstream.h
@@ -21,6 +21,8 @@ void vp10_encode_token_init(); void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size); +void vp10_encode_token_init(); + static INLINE int vp10_preserve_existing_gf(VP10_COMP *cpi) { return !cpi->multi_arf_allowed && cpi->refresh_golden_frame && cpi->rc.is_src_frame_alt_ref;
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h index ab0252b..1383c19 100644 --- a/vp10/encoder/block.h +++ b/vp10/encoder/block.h
@@ -13,6 +13,9 @@ #include "vp10/common/entropymv.h" #include "vp10/common/entropy.h" +#if CONFIG_REF_MV +#include "vp10/common/mvref_common.h" +#endif #ifdef __cplusplus extern "C" { @@ -24,7 +27,7 @@ unsigned int var; } diff; -struct macroblock_plane { +typedef struct macroblock_plane { DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]); tran_low_t *qcoeff; tran_low_t *coeff; @@ -40,7 +43,7 @@ int16_t *round; int64_t quant_thred[2]; -}; +} MACROBLOCK_PLANE; /* The [2] dimension is for whether we skip the EOB node (i.e. if previous * coefficient in this block was zero) or not. */ @@ -49,9 +52,20 @@ typedef struct { int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; - uint8_t mode_context[MAX_REF_FRAMES]; + int16_t mode_context[MAX_REF_FRAMES]; +#if CONFIG_REF_MV + uint8_t ref_mv_count[MAX_REF_FRAMES]; + CANDIDATE_MV ref_mv_stack[MAX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]; +#endif } MB_MODE_INFO_EXT; +typedef struct { + uint8_t best_palette_color_map[4096]; + double kmeans_data_buf[4096]; + uint8_t kmeans_indices_buf[4096]; + uint8_t kmeans_pre_indices_buf[4096]; +} PALETTE_BUFFER; + typedef struct macroblock MACROBLOCK; struct macroblock { struct macroblock_plane plane[MAX_MB_PLANE]; @@ -94,6 +108,8 @@ int *nmvsadcost_hp[2]; int **mvsadcost; + PALETTE_BUFFER *palette_buffer; + // These define limits to motion vector components to prevent them // from extending outside the UMV borders int mv_col_min; @@ -104,6 +120,9 @@ // Notes transform blocks where no coefficents are coded. // Set during mode selection. Read during block encoding. uint8_t zcoeff_blk[TX_SIZES][256]; +#if CONFIG_VAR_TX + uint8_t blk_skip[MAX_MB_PLANE][256]; +#endif int skip;
diff --git a/vp10/encoder/context_tree.c b/vp10/encoder/context_tree.c index 6c056d2..1ac802f 100644 --- a/vp10/encoder/context_tree.c +++ b/vp10/encoder/context_tree.c
@@ -28,6 +28,10 @@ CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, vpx_calloc(num_blk, sizeof(uint8_t))); for (i = 0; i < MAX_MB_PLANE; ++i) { +#if CONFIG_VAR_TX + CHECK_MEM_ERROR(cm, ctx->blk_skip[i], + vpx_calloc(num_blk, sizeof(uint8_t))); +#endif for (k = 0; k < 3; ++k) { CHECK_MEM_ERROR(cm, ctx->coeff[i][k], vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k]))); @@ -50,6 +54,10 @@ vpx_free(ctx->zcoeff_blk); ctx->zcoeff_blk = 0; for (i = 0; i < MAX_MB_PLANE; ++i) { +#if CONFIG_VAR_TX + vpx_free(ctx->blk_skip[i]); + ctx->blk_skip[i] = 0; +#endif for (k = 0; k < 3; ++k) { vpx_free(ctx->coeff[i][k]); ctx->coeff[i][k] = 0;
diff --git a/vp10/encoder/context_tree.h b/vp10/encoder/context_tree.h index 2a0fffb..55ae471 100644 --- a/vp10/encoder/context_tree.h +++ b/vp10/encoder/context_tree.h
@@ -28,6 +28,9 @@ MB_MODE_INFO_EXT mbmi_ext; uint8_t *zcoeff_blk; uint8_t *color_index_map[2]; +#if CONFIG_VAR_TX + uint8_t *blk_skip[MAX_MB_PLANE]; +#endif tran_low_t *coeff[MAX_MB_PLANE][3]; tran_low_t *qcoeff[MAX_MB_PLANE][3]; tran_low_t *dqcoeff[MAX_MB_PLANE][3];
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c index 132a141..5602753 100644 --- a/vp10/encoder/dct.c +++ b/vp10/encoder/dct.c
@@ -37,6 +37,382 @@ #endif } +#if CONFIG_EXT_TX +void fdst4(const tran_low_t *input, tran_low_t *output) { +#if USE_DST2 + tran_high_t step[4]; + tran_high_t temp1, temp2; + + step[0] = input[0] - input[3]; + step[1] = -input[1] + input[2]; + step[2] = -input[1] - input[2]; + step[3] = input[0] + input[3]; + + temp1 = (step[0] + step[1]) * cospi_16_64; + temp2 = (step[0] - step[1]) * cospi_16_64; + output[3] = fdct_round_shift(temp1); + output[1] = fdct_round_shift(temp2); + temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; + temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; + output[2] = fdct_round_shift(temp1); + output[0] = fdct_round_shift(temp2); +#else + // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2) + static const int32_t sinvalue_lookup[] = { + 141124871, 228344838, + }; + int64_t sum; + int64_t s03 = (input[0] + input[3]); + int64_t d03 = (input[0] - input[3]); + int64_t s12 = (input[1] + input[2]); + int64_t d12 = (input[1] - input[2]); + sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1]; + output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0]; + output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0]; + output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1]; + output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); +#endif // USE_DST2 +} + +void fdst8(const tran_low_t *input, tran_low_t *output) { +#if USE_DST2 + tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 + tran_high_t t0, t1, t2, t3; // needs32 + tran_high_t x0, x1, x2, x3; // canbe16 + + // stage 1 + s0 = input[0] - input[7]; + s1 = -input[1] + input[6]; + s2 = input[2] - input[5]; + s3 = -input[3] + input[4]; + s4 = -input[3] - input[4]; + s5 = input[2] + input[5]; + s6 = -input[1] - input[6]; + s7 = input[0] + input[7]; + + x0 = s0 + s3; + x1 = s1 + s2; + x2 = s1 - s2; + x3 = s0 - s3; + t0 = (x0 + x1) * cospi_16_64; + t1 = (x0 - x1) * cospi_16_64; + t2 = x2 * cospi_24_64 + x3 * cospi_8_64; + t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; + output[7] = fdct_round_shift(t0); + output[5] = fdct_round_shift(t2); + output[3] = fdct_round_shift(t1); + output[1] = fdct_round_shift(t3); + + // Stage 2 + t0 = (s6 - s5) * cospi_16_64; + t1 = (s6 + s5) * cospi_16_64; + t2 = fdct_round_shift(t0); + t3 = fdct_round_shift(t1); + + // Stage 3 + x0 = s4 + t2; + x1 = s4 - t2; + x2 = s7 - t3; + x3 = s7 + t3; + + // Stage 4 + t0 = x0 * cospi_28_64 + x3 * cospi_4_64; + t1 = x1 * cospi_12_64 + x2 * cospi_20_64; + t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; + t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; + output[6] = fdct_round_shift(t0); + output[4] = fdct_round_shift(t2); + output[2] = fdct_round_shift(t1); + output[0] = fdct_round_shift(t3); +#else + // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2 + static const int sinvalue_lookup[] = { + 86559612, 162678858, 219176632, 249238470 + }; + int64_t sum; + int64_t s07 = (input[0] + input[7]); + int64_t d07 = (input[0] - input[7]); + int64_t s16 = (input[1] + input[6]); + int64_t d16 = (input[1] - input[6]); + int64_t s25 = (input[2] + input[5]); + int64_t d25 = (input[2] - input[5]); + int64_t s34 = (input[3] + input[4]); + int64_t d34 = (input[3] - input[4]); + sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] + + s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3]; + output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] + + d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0]; + output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = (s07 + s16 - s34)* sinvalue_lookup[2]; + output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] - + d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1]; + output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] - + s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1]; + output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = (d07 - d16 + d34)* sinvalue_lookup[2]; + output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] + + s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0]; + output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] + + d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3]; + output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); +#endif // USE_DST2 +} + +void fdst16(const tran_low_t *input, tran_low_t *output) { +#if USE_DST2 + tran_high_t step1[8]; // canbe16 + tran_high_t step2[8]; // canbe16 + tran_high_t step3[8]; // canbe16 + tran_high_t in[8]; // canbe16 + tran_high_t temp1, temp2; // needs32 + + // step 1 + in[0] = input[0] - input[15]; + in[1] = -input[1] + input[14]; + in[2] = input[2] - input[13]; + in[3] = -input[3] + input[12]; + in[4] = input[4] - input[11]; + in[5] = -input[5] + input[10]; + in[6] = input[6] - input[ 9]; + in[7] = -input[7] + input[ 8]; + + step1[0] = -input[7] - input[ 8]; + step1[1] = input[6] + input[ 9]; + step1[2] = -input[5] - input[10]; + step1[3] = input[4] + input[11]; + step1[4] = -input[3] - input[12]; + step1[5] = input[2] + input[13]; + step1[6] = -input[1] - input[14]; + step1[7] = input[0] + input[15]; + + // fdct8(step, step); + { + tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 + tran_high_t t0, t1, t2, t3; // needs32 + tran_high_t x0, x1, x2, x3; // canbe16 + + // stage 1 + s0 = in[0] + in[7]; + s1 = in[1] + in[6]; + s2 = in[2] + in[5]; + s3 = in[3] + in[4]; + s4 = in[3] - in[4]; + s5 = in[2] - in[5]; + s6 = in[1] - in[6]; + s7 = in[0] - in[7]; + + // fdct4(step, step); + x0 = s0 + s3; + x1 = s1 + s2; + x2 = s1 - s2; + x3 = s0 - s3; + t0 = (x0 + x1) * cospi_16_64; + t1 = (x0 - x1) * cospi_16_64; + t2 = x3 * cospi_8_64 + x2 * cospi_24_64; + t3 = x3 * cospi_24_64 - x2 * cospi_8_64; + output[15] = fdct_round_shift(t0); + output[11] = fdct_round_shift(t2); + output[7] = fdct_round_shift(t1); + output[3] = fdct_round_shift(t3); + + // Stage 2 + t0 = (s6 - s5) * cospi_16_64; + t1 = (s6 + s5) * cospi_16_64; + t2 = fdct_round_shift(t0); + t3 = fdct_round_shift(t1); + + // Stage 3 + x0 = s4 + t2; + x1 = s4 - t2; + x2 = s7 - t3; + x3 = s7 + t3; + + // Stage 4 + t0 = x0 * cospi_28_64 + x3 * cospi_4_64; + t1 = x1 * cospi_12_64 + x2 * cospi_20_64; + t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; + t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; + output[13] = fdct_round_shift(t0); + output[9] = fdct_round_shift(t2); + output[5] = fdct_round_shift(t1); + output[1] = fdct_round_shift(t3); + } + + // step 2 + temp1 = (step1[5] - step1[2]) * cospi_16_64; + temp2 = (step1[4] - step1[3]) * cospi_16_64; + step2[2] = fdct_round_shift(temp1); + step2[3] = fdct_round_shift(temp2); + temp1 = (step1[4] + step1[3]) * cospi_16_64; + temp2 = (step1[5] + step1[2]) * cospi_16_64; + step2[4] = fdct_round_shift(temp1); + step2[5] = fdct_round_shift(temp2); + + // step 3 + step3[0] = step1[0] + step2[3]; + step3[1] = step1[1] + step2[2]; + step3[2] = step1[1] - step2[2]; + step3[3] = step1[0] - step2[3]; + step3[4] = step1[7] - step2[4]; + step3[5] = step1[6] - step2[5]; + step3[6] = step1[6] + step2[5]; + step3[7] = step1[7] + step2[4]; + + // step 4 + temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; + temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64; + step2[1] = fdct_round_shift(temp1); + step2[2] = fdct_round_shift(temp2); + temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64; + temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; + step2[5] = fdct_round_shift(temp1); + step2[6] = fdct_round_shift(temp2); + + // step 5 + step1[0] = step3[0] + step2[1]; + step1[1] = step3[0] - step2[1]; + step1[2] = step3[3] + step2[2]; + step1[3] = step3[3] - step2[2]; + step1[4] = step3[4] - step2[5]; + step1[5] = step3[4] + step2[5]; + step1[6] = step3[7] - step2[6]; + step1[7] = step3[7] + step2[6]; + + // step 6 + temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; + temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; + output[14] = fdct_round_shift(temp1); + output[6] = fdct_round_shift(temp2); + + temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; + temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; + output[10] = fdct_round_shift(temp1); + output[2] = fdct_round_shift(temp2); + + temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; + temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; + output[12] = fdct_round_shift(temp1); + output[4] = fdct_round_shift(temp2); + + temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; + temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; + output[8] = fdct_round_shift(temp1); + output[0] = fdct_round_shift(temp2); +#else + // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2) + static const int sinvalue_lookup[] = { + 47852167, 94074787, 137093803, 175444254, + 207820161, 233119001, 250479254, 259309736 + }; + int64_t sum; + int64_t s015 = (input[0] + input[15]); + int64_t d015 = (input[0] - input[15]); + int64_t s114 = (input[1] + input[14]); + int64_t d114 = (input[1] - input[14]); + int64_t s213 = (input[2] + input[13]); + int64_t d213 = (input[2] - input[13]); + int64_t s312 = (input[3] + input[12]); + int64_t d312 = (input[3] - input[12]); + int64_t s411 = (input[4] + input[11]); + int64_t d411 = (input[4] - input[11]); + int64_t s510 = (input[5] + input[10]); + int64_t d510 = (input[5] - input[10]); + int64_t s69 = (input[6] + input[9]); + int64_t d69 = (input[6] - input[9]); + int64_t s78 = (input[7] + input[8]); + int64_t d78 = (input[7] - input[8]); + sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] + + s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] + + s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] + + s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7]; + output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] + + d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] + + d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] + + d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0]; + output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] + + s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] + + s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] - + s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6]; + output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] + + d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] - + d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] - + d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1]; + output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] + + s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] - + s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] + + s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5]; + output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] - + d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] - + d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] + + d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2]; + output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] - + s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] + + s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] + + s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4]; + output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] - + d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] + + d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] - + d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3]; + output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] - + s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] + + s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] - + s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3]; + output[8] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] - + d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] + + d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] + + d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4]; + output[9] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] - + s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] - + s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] + + s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2]; + output[10] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] + + d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] - + d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] + + d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5]; + output[11] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] + + s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] - + s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] - + s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1]; + output[12] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] + + d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] + + d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] - + d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6]; + output[13] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] + + s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] + + s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] + + s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0]; + output[14] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] + + d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] + + d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] + + d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7]; + output[15] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); +#endif // USE_DST2 +} +#endif // CONFIG_EXT_TX + static void fdct4(const tran_low_t *input, tran_low_t *output) { tran_high_t temp; tran_low_t step[4]; @@ -999,29 +1375,171 @@ output[15] = (tran_low_t)-x1; } +#if CONFIG_EXT_TX +static void copy_block(const int16_t *src, int src_stride, int l, + int16_t *dest, int dest_stride) { + int i; + for (i = 0; i < l; ++i) { + memcpy(dest + dest_stride * i, src + src_stride * i, + l * sizeof(int16_t)); + } +} + +static void fliplr(int16_t *dest, int stride, int l) { + int i, j; + for (i = 0; i < l; ++i) { + for (j = 0; j < l / 2; ++j) { + const int16_t tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[i * stride + l - 1 - j]; + dest[i * stride + l - 1 - j] = tmp; + } + } +} + +static void flipud(int16_t *dest, int stride, int l) { + int i, j; + for (j = 0; j < l; ++j) { + for (i = 0; i < l / 2; ++i) { + const int16_t tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[(l - 1 - i) * stride + j]; + dest[(l - 1 - i) * stride + j] = tmp; + } + } +} + +static void fliplrud(int16_t *dest, int stride, int l) { + int i, j; + for (i = 0; i < l / 2; ++i) { + for (j = 0; j < l; ++j) { + const int16_t tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[(l - 1 - i) * stride + l - 1 - j]; + dest[(l - 1 - i) * stride + l - 1 - j] = tmp; + } + } +} + +static void copy_fliplr(const int16_t *src, int src_stride, int l, + int16_t *dest, int dest_stride) { + copy_block(src, src_stride, l, dest, dest_stride); + fliplr(dest, dest_stride, l); +} + +static void copy_flipud(const int16_t *src, int src_stride, int l, + int16_t *dest, int dest_stride) { + copy_block(src, src_stride, l, dest, dest_stride); + flipud(dest, dest_stride, l); +} + +static void copy_fliplrud(const int16_t *src, int src_stride, int l, + int16_t *dest, int dest_stride) { + copy_block(src, src_stride, l, dest, dest_stride); + fliplrud(dest, dest_stride, l); +} + +static void maybe_flip_input(const int16_t **src, int *src_stride, int l, + int16_t *buff, int tx_type) { + switch (tx_type) { + case DCT_DCT: + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + break; + case FLIPADST_DCT: + case FLIPADST_ADST: + case FLIPADST_DST: + copy_flipud(*src, *src_stride, l, buff, l); + *src = buff; + *src_stride = l; + break; + case DCT_FLIPADST: + case ADST_FLIPADST: + case DST_FLIPADST: + copy_fliplr(*src, *src_stride, l, buff, l); + *src = buff; + *src_stride = l; + break; + case FLIPADST_FLIPADST: + copy_fliplrud(*src, *src_stride, l, buff, l); + *src = buff; + *src_stride = l; + break; + default: + assert(0); + break; + } +} +#endif // CONFIG_EXT_TX + static const transform_2d FHT_4[] = { - { fdct4, fdct4 }, // DCT_DCT = 0 - { fadst4, fdct4 }, // ADST_DCT = 1 - { fdct4, fadst4 }, // DCT_ADST = 2 - { fadst4, fadst4 } // ADST_ADST = 3 + { fdct4, fdct4 }, // DCT_DCT = 0, + { fadst4, fdct4 }, // ADST_DCT = 1, + { fdct4, fadst4 }, // DCT_ADST = 2, + { fadst4, fadst4 }, // ADST_ADST = 3, +#if CONFIG_EXT_TX + { fadst4, fdct4 }, // FLIPADST_DCT = 4, + { fdct4, fadst4 }, // DCT_FLIPADST = 5, + { fadst4, fadst4 }, // FLIPADST_FLIPADST = 6, + { fadst4, fadst4 }, // ADST_FLIPADST = 7, + { fadst4, fadst4 }, // FLIPADST_ADST = 8, + { fdst4, fdct4 }, // DST_DCT = 9, + { fdct4, fdst4 }, // DCT_DST = 10, + { fdst4, fadst4 }, // DST_ADST = 11, + { fadst4, fdst4 }, // ADST_DST = 12, + { fdst4, fadst4 }, // DST_FLIPADST = 13, + { fadst4, fdst4 }, // FLIPADST_DST = 14, + { fdst4, fdst4 }, // DST_DST = 15 +#endif // CONFIG_EXT_TX }; static const transform_2d FHT_8[] = { - { fdct8, fdct8 }, // DCT_DCT = 0 - { fadst8, fdct8 }, // ADST_DCT = 1 - { fdct8, fadst8 }, // DCT_ADST = 2 - { fadst8, fadst8 } // ADST_ADST = 3 + { fdct8, fdct8 }, // DCT_DCT = 0, + { fadst8, fdct8 }, // ADST_DCT = 1, + { fdct8, fadst8 }, // DCT_ADST = 2, + { fadst8, fadst8 }, // ADST_ADST = 3, +#if CONFIG_EXT_TX + { fadst8, fdct8 }, // FLIPADST_DCT = 4, + { fdct8, fadst8 }, // DCT_FLIPADST = 5, + { fadst8, fadst8 }, // FLIPADST_FLIPADST = 6, + { fadst8, fadst8 }, // ADST_FLIPADST = 7, + { fadst8, fadst8 }, // FLIPADST_ADST = 8, + { fdst8, fdct8 }, // DST_DCT = 9, + { fdct8, fdst8 }, // DCT_DST = 10, + { fdst8, fadst8 }, // DST_ADST = 11, + { fadst8, fdst8 }, // ADST_DST = 12, + { fdst8, fadst8 }, // DST_FLIPADST = 13, + { fadst8, fdst8 }, // FLIPADST_DST = 14, + { fdst8, fdst8 }, // DST_DST = 15 +#endif // CONFIG_EXT_TX }; static const transform_2d FHT_16[] = { - { fdct16, fdct16 }, // DCT_DCT = 0 - { fadst16, fdct16 }, // ADST_DCT = 1 - { fdct16, fadst16 }, // DCT_ADST = 2 - { fadst16, fadst16 } // ADST_ADST = 3 + { fdct16, fdct16 }, // DCT_DCT = 0, + { fadst16, fdct16 }, // ADST_DCT = 1, + { fdct16, fadst16 }, // DCT_ADST = 2, + { fadst16, fadst16 }, // ADST_ADST = 3, +#if CONFIG_EXT_TX + { fadst16, fdct16 }, // FLIPADST_DCT = 4, + { fdct16, fadst16 }, // DCT_FLIPADST = 5, + { fadst16, fadst16 }, // FLIPADST_FLIPADST = 6, + { fadst16, fadst16 }, // ADST_FLIPADST = 7, + { fadst16, fadst16 }, // FLIPADST_ADST = 8, + { fdst16, fdct16 }, // DST_DCT = 9, + { fdct16, fdst16 }, // DCT_DST = 10, + { fdst16, fadst16 }, // DST_ADST = 11, + { fadst16, fdst16 }, // ADST_DST = 12, + { fdst16, fadst16 }, // DST_FLIPADST = 13, + { fadst16, fdst16 }, // FLIPADST_DST = 14, + { fdst16, fdst16 }, // DST_DST = 15 +#endif // CONFIG_EXT_TX }; void vp10_fht4x4_c(const int16_t *input, tran_low_t *output, - int stride, int tx_type) { + int stride, int tx_type) { if (tx_type == DCT_DCT) { vpx_fdct4x4_c(input, output, stride); } else { @@ -1030,6 +1548,11 @@ tran_low_t temp_in[4], temp_out[4]; const transform_2d ht = FHT_4[tx_type]; +#if CONFIG_EXT_TX + int16_t flipped_input[4 * 4]; + maybe_flip_input(&input, &stride, 4, flipped_input, tx_type); +#endif + // Columns for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) @@ -1053,15 +1576,15 @@ } void vp10_fdct8x8_quant_c(const int16_t *input, int stride, - tran_low_t *coeff_ptr, intptr_t n_coeffs, - int skip_block, - const int16_t *zbin_ptr, const int16_t *round_ptr, - const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, - uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan) { + tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, + const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, + uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan) { int eob = -1; int i, j; @@ -1165,7 +1688,7 @@ } void vp10_fht8x8_c(const int16_t *input, tran_low_t *output, - int stride, int tx_type) { + int stride, int tx_type) { if (tx_type == DCT_DCT) { vpx_fdct8x8_c(input, output, stride); } else { @@ -1174,6 +1697,11 @@ tran_low_t temp_in[8], temp_out[8]; const transform_2d ht = FHT_8[tx_type]; +#if CONFIG_EXT_TX + int16_t flipped_input[8 * 8]; + maybe_flip_input(&input, &stride, 8, flipped_input, tx_type); +#endif + // Columns for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) @@ -1251,7 +1779,7 @@ } void vp10_fht16x16_c(const int16_t *input, tran_low_t *output, - int stride, int tx_type) { + int stride, int tx_type) { if (tx_type == DCT_DCT) { vpx_fdct16x16_c(input, output, stride); } else { @@ -1260,6 +1788,11 @@ tran_low_t temp_in[16], temp_out[16]; const transform_2d ht = FHT_16[tx_type]; +#if CONFIG_EXT_TX + int16_t flipped_input[16 * 16]; + maybe_flip_input(&input, &stride, 16, flipped_input, tx_type); +#endif + // Columns for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j)
diff --git a/vp10/encoder/denoiser.c b/vp10/encoder/denoiser.c index e5d8157..43647b0 100644 --- a/vp10/encoder/denoiser.c +++ b/vp10/encoder/denoiser.c
@@ -377,9 +377,17 @@ void vp10_denoiser_update_frame_info(VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, +#if CONFIG_EXT_REFS + int refresh_last_frames[LAST_REF_FRAMES], +#else + int refresh_last_frame, +#endif // CONFIG_EXT_REFS int refresh_alt_ref_frame, - int refresh_golden_frame, - int refresh_last_frame) { + int refresh_golden_frame) { +#if CONFIG_EXT_REFS + int ref_frame; +#endif // CONFIG_EXT_REFS + if (frame_type == KEY_FRAME) { int i; // Start at 1 so as not to overwrite the INTRA_FRAME @@ -397,10 +405,19 @@ swap_frame_buffer(&denoiser->running_avg_y[GOLDEN_FRAME], &denoiser->running_avg_y[INTRA_FRAME]); } +#if CONFIG_EXT_REFS + for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame) { + if (refresh_last_frames[ref_frame - LAST_FRAME]) { + swap_frame_buffer(&denoiser->running_avg_y[ref_frame], + &denoiser->running_avg_y[INTRA_FRAME]); + } + } +#else if (refresh_last_frame) { swap_frame_buffer(&denoiser->running_avg_y[LAST_FRAME], &denoiser->running_avg_y[INTRA_FRAME]); } +#endif // CONFIG_EXT_REFS } void vp10_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) {
diff --git a/vp10/encoder/denoiser.h b/vp10/encoder/denoiser.h index e543fb0..f48cbb0 100644 --- a/vp10/encoder/denoiser.h +++ b/vp10/encoder/denoiser.h
@@ -35,9 +35,13 @@ void vp10_denoiser_update_frame_info(VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, +#if CONFIG_EXT_REFS + int refresh_last_frames[LAST_REF_FRAMES], +#else + int refresh_last_frame, +#endif // CONFIG_EXT_REFS int refresh_alt_ref_frame, - int refresh_golden_frame, - int refresh_last_frame); + int refresh_golden_frame); void vp10_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs,
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index 26ce5a1..c1301f9 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c
@@ -36,6 +36,9 @@ #include "vp10/encoder/aq_complexity.h" #include "vp10/encoder/aq_cyclicrefresh.h" #include "vp10/encoder/aq_variance.h" +#if CONFIG_SUPERTX +#include "vp10/encoder/cost.h" +#endif #include "vp10/encoder/encodeframe.h" #include "vp10/encoder/encodemb.h" #include "vp10/encoder/encodemv.h" @@ -51,6 +54,40 @@ int mi_row, int mi_col, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); +#if CONFIG_SUPERTX +static int check_intra_b(PICK_MODE_CONTEXT *ctx); + +static int check_intra_sb(VP10_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PC_TREE *pc_tree); +static void predict_superblock(VP10_COMP *cpi, ThreadData *td, + int mi_row_pred, int mi_col_pred, + BLOCK_SIZE bsize_pred, int b_sub8x8, int block); +static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, + PC_TREE *pc_tree); +static void predict_sb_complex(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + int output_enabled, BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3], + PC_TREE *pc_tree); +static void update_state_sb_supertx(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, + BLOCK_SIZE bsize, + int output_enabled, PC_TREE *pc_tree); +static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int *tmp_rate, int64_t *tmp_dist, +#if CONFIG_EXT_TX + TX_TYPE *best_tx, +#endif + PC_TREE *pc_tree); +#endif // CONFIG_SUPERTX + // This is used as a reference when computing the source variance for the // purposes of activity masking. // Eventually this should be replaced by custom no-reference routines, @@ -170,11 +207,11 @@ // Lighter version of set_offsets that only sets the mode info // pointers. -static INLINE void set_mode_info_offsets(VP10_COMP *const cpi, - MACROBLOCK *const x, - MACROBLOCKD *const xd, - int mi_row, - int mi_col) { +static void set_mode_info_offsets(VP10_COMP *const cpi, + MACROBLOCK *const x, + MACROBLOCKD *const xd, + int mi_row, + int mi_col) { VP10_COMMON *const cm = &cpi->common; const int idx_str = xd->mi_stride * mi_row + mi_col; xd->mi = cm->mi_grid_visible + idx_str; @@ -196,6 +233,12 @@ set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + xd->max_tx_size = max_txsize_lookup[bsize]; +#endif + mbmi = &xd->mi[0]->mbmi; // Set up destination pointers. @@ -239,6 +282,80 @@ xd->tile = *tile; } +#if CONFIG_SUPERTX +static void set_offsets_supertx(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize) { + MACROBLOCK *const x = &td->mb; + VP10_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + + set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); + + // Set up distance of MB to edge of frame in 1/8th pel units. + assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); + set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, + cm->mi_rows, cm->mi_cols); +} + +static void set_offsets_extend(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row_pred, int mi_col_pred, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize_pred, BLOCK_SIZE bsize_ori) { + // Used in supertx + // (mi_row_ori, mi_col_ori, bsize_ori): region for mv + // (mi_row_pred, mi_col_pred, bsize_pred): region to predict + MACROBLOCK *const x = &td->mb; + VP10_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi; + const int mi_width = num_8x8_blocks_wide_lookup[bsize_pred]; + const int mi_height = num_8x8_blocks_high_lookup[bsize_pred]; + const struct segmentation *const seg = &cm->seg; + + set_mode_info_offsets(cpi, x, xd, mi_row_ori, mi_col_ori); + + mbmi = &xd->mi[0]->mbmi; + + // Set up limit values for MV components. + // Mv beyond the range do not produce new/different prediction block. + x->mv_row_min = -(((mi_row_pred + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); + x->mv_col_min = -(((mi_col_pred + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); + x->mv_row_max = (cm->mi_rows - mi_row_pred) * MI_SIZE + VP9_INTERP_EXTEND; + x->mv_col_max = (cm->mi_cols - mi_col_pred) * MI_SIZE + VP9_INTERP_EXTEND; + + // Set up distance of MB to edge of frame in 1/8th pel units. + assert(!(mi_col_pred & (mi_width - 1)) && !(mi_row_pred & (mi_height - 1))); + set_mi_row_col(xd, tile, mi_row_pred, mi_height, mi_col_pred, mi_width, + cm->mi_rows, cm->mi_cols); + xd->up_available = (mi_row_ori != 0); + xd->left_available = (mi_col_ori > tile->mi_col_start); + + // R/D setup. + x->rddiv = cpi->rd.RDDIV; + x->rdmult = cpi->rd.RDMULT; + + // Setup segment ID. + if (seg->enabled) { + if (cpi->oxcf.aq_mode != VARIANCE_AQ) { + const uint8_t *const map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mbmi->segment_id = get_segment_id(cm, map, bsize_ori, + mi_row_ori, mi_col_ori); + } + vp10_init_plane_quantizers(cpi, x); + + x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id]; + } else { + mbmi->segment_id = 0; + x->encode_breakout = cpi->encode_breakout; + } +} +#endif // CONFIG_SUPERTX + static void set_block_size(VP10_COMP * const cpi, MACROBLOCK *const x, MACROBLOCKD *const xd, @@ -967,7 +1084,9 @@ const int mi_height = num_8x8_blocks_high_lookup[bsize]; int max_plane; +#if !CONFIG_SUPERTX assert(mi->mbmi.sb_type == bsize); +#endif *mi_addr = *mi; *x->mbmi_ext = ctx->mbmi_ext; @@ -985,8 +1104,8 @@ // and then update the quantizer. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { vp10_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row, - mi_col, bsize, ctx->rate, ctx->dist, - x->skip); + mi_col, bsize, ctx->rate, ctx->dist, + x->skip); } } @@ -1026,8 +1145,15 @@ } x->skip = ctx->skip; + +#if CONFIG_VAR_TX + for (i = 0; i < MAX_MB_PLANE; ++i) + memcpy(x->blk_skip[i], ctx->blk_skip[i], + sizeof(uint8_t) * ctx->num_4x4_blk); +#else memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); +#endif if (!output_enabled) return; @@ -1056,7 +1182,11 @@ if (is_inter_block(mbmi)) { vp10_update_mv_count(td); - if (cm->interp_filter == SWITCHABLE) { + if (cm->interp_filter == SWITCHABLE +#if CONFIG_EXT_INTERP + && vp10_is_interp_needed(xd) +#endif + ) { const int ctx = vp10_get_pred_context_switchable_interp(xd); ++td->counts->switchable_interp[ctx][mbmi->interp_filter]; } @@ -1082,6 +1212,285 @@ } } +#if CONFIG_SUPERTX +static void update_state_supertx(VP10_COMP *cpi, ThreadData *td, + PICK_MODE_CONTEXT *ctx, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int output_enabled) { + int i, y, x_idx; + VP10_COMMON *const cm = &cpi->common; + RD_COUNTS *const rdc = &td->rd_counts; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *mi = &ctx->mic; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi_addr = xd->mi[0]; + const struct segmentation *const seg = &cm->seg; + const int mis = cm->mi_stride; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int x_mis = VPXMIN(mi_width, cm->mi_cols - mi_col); + const int y_mis = VPXMIN(mi_height, cm->mi_rows - mi_row); + MV_REF *const frame_mvs = + cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; + int w, h; + + *mi_addr = *mi; + *x->mbmi_ext = ctx->mbmi_ext; + assert(is_inter_block(mbmi)); + + // If segmentation in use + if (seg->enabled && output_enabled) { + // For in frame complexity AQ copy the segment id from the segment map. + if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { + const uint8_t *const map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mi_addr->mbmi.segment_id = + get_segment_id(cm, map, bsize, mi_row, mi_col); + } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + // Else for cyclic refresh mode update the segment map, set the segment id + // and then update the quantizer. + vp10_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, + mi_row, mi_col, bsize, + ctx->rate, ctx->dist, 1); + vp10_init_plane_quantizers(cpi, x); + } + } + + // Restore the coding context of the MB to that that was in place + // when the mode was picked for it + for (y = 0; y < mi_height; y++) + for (x_idx = 0; x_idx < mi_width; x_idx++) + if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx + && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { + xd->mi[x_idx + y * mis] = mi_addr; + } + + if (cpi->oxcf.aq_mode) + vp10_init_plane_quantizers(cpi, x); + + if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) { + mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; + mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; + } + + x->skip = ctx->skip; + memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, + sizeof(uint8_t) * ctx->num_4x4_blk); + + if (!output_enabled) + return; + + if (!frame_is_intra_only(cm)) { + vp10_update_mv_count(td); + + if (cm->interp_filter == SWITCHABLE +#if CONFIG_EXT_INTERP + && vp10_is_interp_needed(xd) +#endif + ) { + const int ctx = vp10_get_pred_context_switchable_interp(xd); + ++td->counts->switchable_interp[ctx][mbmi->interp_filter]; + } + + rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; + rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; + rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; + + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) + rdc->filter_diff[i] += ctx->best_filter_diff[i]; + } + + for (h = 0; h < y_mis; ++h) { + MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; + for (w = 0; w < x_mis; ++w) { + MV_REF *const mv = frame_mv + w; + mv->ref_frame[0] = mi->mbmi.ref_frame[0]; + mv->ref_frame[1] = mi->mbmi.ref_frame[1]; + mv->mv[0].as_int = mi->mbmi.mv[0].as_int; + mv->mv[1].as_int = mi->mbmi.mv[1].as_int; + } + } +} + +static void update_state_sb_supertx(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, + BLOCK_SIZE bsize, + int output_enabled, PC_TREE *pc_tree) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + struct macroblock_plane *const p = x->plane; + struct macroblockd_plane *const pd = xd->plane; + int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition = pc_tree->partitioning; + BLOCK_SIZE subsize = get_subsize(bsize, partition); + int i; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + switch (partition) { + case PARTITION_NONE: + set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, td, &pc_tree->none, mi_row, mi_col, + subsize, output_enabled); + break; + case PARTITION_VERT: + set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, + subsize, output_enabled); + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize); + update_state_supertx(cpi, td, &pc_tree->vertical[1], + mi_row, mi_col + hbs, subsize, output_enabled); + } + break; + case PARTITION_HORZ: + set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, td, &pc_tree->horizontal[0], mi_row, mi_col, + subsize, output_enabled); + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize); + update_state_supertx(cpi, td, &pc_tree->horizontal[1], mi_row + hbs, + mi_col, subsize, output_enabled); + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, td, pc_tree->leaf_split[0], mi_row, mi_col, + subsize, output_enabled); + } else { + set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize); + update_state_sb_supertx(cpi, td, tile, mi_row, mi_col, subsize, + output_enabled, pc_tree->split[0]); + set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize); + update_state_sb_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize, + output_enabled, pc_tree->split[1]); + set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize); + update_state_sb_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize, + output_enabled, pc_tree->split[2]); + set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col + hbs, subsize); + update_state_sb_supertx(cpi, td, tile, mi_row + hbs, mi_col + hbs, + subsize, output_enabled, pc_tree->split[3]); + } + break; + default: + assert(0); + } + + for (i = 0; i < MAX_MB_PLANE; ++i) { + p[i].coeff = (&pc_tree->none)->coeff_pbuf[i][1]; + p[i].qcoeff = (&pc_tree->none)->qcoeff_pbuf[i][1]; + pd[i].dqcoeff = (&pc_tree->none)->dqcoeff_pbuf[i][1]; + p[i].eobs = (&pc_tree->none)->eobs_pbuf[i][1]; + } +} + +static void update_supertx_param(ThreadData *td, + PICK_MODE_CONTEXT *ctx, +#if CONFIG_EXT_TX + int best_tx, +#endif + TX_SIZE supertx_size) { + MACROBLOCK *const x = &td->mb; + + ctx->mic.mbmi.tx_size = supertx_size; + memcpy(ctx->zcoeff_blk, x->zcoeff_blk[supertx_size], + sizeof(uint8_t) * ctx->num_4x4_blk); + ctx->skip = x->skip; +#if CONFIG_EXT_TX + ctx->mic.mbmi.tx_type = best_tx; +#endif // CONFIG_EXT_TX +} + +static void update_supertx_param_sb(VP10_COMP *cpi, ThreadData *td, + int mi_row, int mi_col, + BLOCK_SIZE bsize, +#if CONFIG_EXT_TX + int best_tx, +#endif + TX_SIZE supertx_size, PC_TREE *pc_tree) { + VP10_COMMON *const cm = &cpi->common; + int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition = pc_tree->partitioning; + BLOCK_SIZE subsize = get_subsize(bsize, partition); + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + switch (partition) { + case PARTITION_NONE: + update_supertx_param(td, &pc_tree->none, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + break; + case PARTITION_VERT: + update_supertx_param(td, &pc_tree->vertical[0], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) + update_supertx_param(td, &pc_tree->vertical[1], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + break; + case PARTITION_HORZ: + update_supertx_param(td, &pc_tree->horizontal[0], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) + update_supertx_param(td, &pc_tree->horizontal[1], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + update_supertx_param(td, pc_tree->leaf_split[0], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + } else { + update_supertx_param_sb(cpi, td, mi_row, mi_col, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[0]); + update_supertx_param_sb(cpi, td, mi_row, mi_col + hbs, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[1]); + update_supertx_param_sb(cpi, td, mi_row + hbs, mi_col, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[2]); + update_supertx_param_sb(cpi, td, mi_row + hbs, mi_col + hbs, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[3]); + } + break; + default: + assert(0); + } +} +#endif // CONFIG_SUPERTX + void vp10_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { uint8_t *const buffers[3] = {src->y_buffer, src->u_buffer, src->v_buffer }; @@ -1113,6 +1522,9 @@ TileDataEnc *tile_data, MACROBLOCK *const x, int mi_row, int mi_col, RD_COST *rd_cost, +#if CONFIG_SUPERTX + int *totalrate_nocoef, +#endif BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd) { VP10_COMMON *const cm = &cpi->common; @@ -1140,6 +1552,15 @@ p[i].eobs = ctx->eobs_pbuf[i][0]; } + if (cm->current_video_frame == 0 && cm->allow_screen_content_tools) { + for (i = 0; i < 2; ++i) { + if (ctx->color_index_map[i] == 0) { + CHECK_MEM_ERROR(cm, ctx->color_index_map[i], + vpx_memalign(16, (ctx->num_4x4_blk << 4) * + sizeof(*ctx->color_index_map[i]))); + } + } + } for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i]; @@ -1196,17 +1617,30 @@ // as a predictor for MBs that follow in the SB if (frame_is_intra_only(cm)) { vp10_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd); +#if CONFIG_SUPERTX + *totalrate_nocoef = 0; +#endif // CONFIG_SUPERTX } else { if (bsize >= BLOCK_8X8) { - if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) + if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { vp10_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize, ctx, best_rd); - else - vp10_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, - rd_cost, bsize, ctx, best_rd); +#if CONFIG_SUPERTX + *totalrate_nocoef = rd_cost->rate; +#endif // CONFIG_SUPERTX + } else { + vp10_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, +#if CONFIG_SUPERTX + totalrate_nocoef, +#endif // CONFIG_SUPERTX + bsize, ctx, best_rd); + } } else { - vp10_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, - rd_cost, bsize, ctx, best_rd); + vp10_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost, +#if CONFIG_SUPERTX + totalrate_nocoef, +#endif // CONFIG_SUPERTX + bsize, ctx, best_rd); } } @@ -1231,7 +1665,47 @@ ctx->dist = rd_cost->dist; } -static void update_stats(VP10_COMMON *cm, ThreadData *td) { +#if CONFIG_REF_MV +static void update_inter_mode_stats(FRAME_COUNTS *counts, + PREDICTION_MODE mode, + int16_t mode_context) { + int16_t mode_ctx = mode_context & NEWMV_CTX_MASK; + if (mode == NEWMV) { + ++counts->newmv_mode[mode_ctx][0]; + return; + } else { + ++counts->newmv_mode[mode_ctx][1]; + + if (mode_context & (1 << ALL_ZERO_FLAG_OFFSET)) { + return; + } + + mode_ctx = (mode_context >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK; + if (mode == ZEROMV) { + ++counts->zeromv_mode[mode_ctx][0]; + return; + } else { + ++counts->zeromv_mode[mode_ctx][1]; + mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK; + + if (mode_context & (1 << SKIP_NEARESTMV_OFFSET)) + mode_ctx = 6; + if (mode_context & (1 << SKIP_NEARMV_OFFSET)) + mode_ctx = 7; + if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) + mode_ctx = 8; + + ++counts->refmv_mode[mode_ctx][mode != NEARESTMV]; + } + } +} +#endif + +static void update_stats(VP10_COMMON *cm, ThreadData *td +#if CONFIG_SUPERTX + , int supertx_enabled +#endif + ) { const MACROBLOCK *x = &td->mb; const MACROBLOCKD *const xd = &x->e_mbd; const MODE_INFO *const mi = xd->mi[0]; @@ -1245,6 +1719,9 @@ const int seg_ref_active = segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); if (!seg_ref_active) { +#if CONFIG_SUPERTX + if (!supertx_enabled) +#endif counts->intra_inter[vp10_get_intra_inter_context(xd)][inter_block]++; // If the segment reference feature is enabled we have only a single // reference frame allowed for the segment so exclude it from @@ -1256,23 +1733,66 @@ [has_second_ref(mbmi)]++; if (has_second_ref(mbmi)) { - counts->comp_ref[vp10_get_pred_context_comp_ref_p(cm, xd)] +#if CONFIG_EXT_REFS + const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME || + ref0 == LAST4_FRAME); + counts->comp_ref[vp10_get_pred_context_comp_ref_p(cm, xd)][0][bit]++; + if (!bit) { + counts->comp_ref[vp10_get_pred_context_comp_ref_p1(cm, xd)][1] + [ref0 == LAST_FRAME]++; + } else { + counts->comp_ref[vp10_get_pred_context_comp_ref_p2(cm, xd)][2] + [ref0 == GOLDEN_FRAME]++; + if (ref0 != GOLDEN_FRAME) { + counts->comp_ref[vp10_get_pred_context_comp_ref_p3(cm, xd)][3] + [ref0 == LAST3_FRAME]++; + } + } +#else + counts->comp_ref[vp10_get_pred_context_comp_ref_p(cm, xd)][0] [ref0 == GOLDEN_FRAME]++; +#endif // CONFIG_EXT_REFS } else { +#if CONFIG_EXT_REFS + const int bit = (ref0 == ALTREF_FRAME || ref0 == GOLDEN_FRAME); + counts->single_ref[vp10_get_pred_context_single_ref_p1(xd)][0][bit]++; + if (bit) { + counts->single_ref[vp10_get_pred_context_single_ref_p2(xd)][1] + [ref0 != GOLDEN_FRAME]++; + } else { + const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME); + counts->single_ref[vp10_get_pred_context_single_ref_p3(xd)][2] + [bit1]++; + if (!bit1) { + counts->single_ref[vp10_get_pred_context_single_ref_p4(xd)][3] + [ref0 != LAST_FRAME]++; + } else { + counts->single_ref[vp10_get_pred_context_single_ref_p5(xd)][4] + [ref0 != LAST3_FRAME]++; + } + } +#else counts->single_ref[vp10_get_pred_context_single_ref_p1(xd)][0] [ref0 != LAST_FRAME]++; if (ref0 != LAST_FRAME) counts->single_ref[vp10_get_pred_context_single_ref_p2(xd)][1] [ref0 != GOLDEN_FRAME]++; +#endif // CONFIG_EXT_REFS } } } if (inter_block && !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - const int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]]; + int16_t mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]]; if (bsize >= BLOCK_8X8) { const PREDICTION_MODE mode = mbmi->mode; +#if CONFIG_REF_MV + mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context, + mbmi->ref_frame, bsize, -1); + update_inter_mode_stats(counts, mode, mode_ctx); +#else ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)]; +#endif } else { const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; @@ -1281,7 +1801,13 @@ for (idx = 0; idx < 2; idx += num_4x4_w) { const int j = idy * 2 + idx; const PREDICTION_MODE b_mode = mi->bmi[j].as_mode; +#if CONFIG_REF_MV + mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context, + mbmi->ref_frame, bsize, j); + update_inter_mode_stats(counts, b_mode, mode_ctx); +#else ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)]; +#endif } } } @@ -1293,6 +1819,9 @@ ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], +#if CONFIG_VAR_TX + TXFM_CONTEXT ta[8], TXFM_CONTEXT tl[8], +#endif BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; int p; @@ -1317,12 +1846,21 @@ sizeof(*xd->above_seg_context) * mi_width); memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, sizeof(xd->left_seg_context[0]) * mi_height); +#if CONFIG_VAR_TX + memcpy(xd->above_txfm_context, ta, + sizeof(*xd->above_txfm_context) * mi_width); + memcpy(xd->left_txfm_context, tl, + sizeof(*xd->left_txfm_context) * mi_height); +#endif } static void save_context(MACROBLOCK *const x, int mi_row, int mi_col, ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], +#if CONFIG_VAR_TX + TXFM_CONTEXT ta[8], TXFM_CONTEXT tl[8], +#endif BLOCK_SIZE bsize) { const MACROBLOCKD *const xd = &x->e_mbd; int p; @@ -1349,6 +1887,12 @@ sizeof(*xd->above_seg_context) * mi_width); memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), sizeof(xd->left_seg_context[0]) * mi_height); +#if CONFIG_VAR_TX + memcpy(ta, xd->above_txfm_context, + sizeof(*xd->above_txfm_context) * mi_width); + memcpy(tl, xd->left_txfm_context, + sizeof(*xd->left_txfm_context) * mi_height); +#endif } static void encode_b(VP10_COMP *cpi, const TileInfo *const tile, @@ -1362,7 +1906,11 @@ encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); if (output_enabled) { +#if CONFIG_SUPERTX + update_stats(&cpi->common, td, 0); +#else update_stats(&cpi->common, td); +#endif } } @@ -1395,6 +1943,82 @@ if (output_enabled && bsize != BLOCK_4X4) td->counts->partition[ctx][partition]++; +#if CONFIG_SUPERTX + if (!frame_is_intra_only(cm) && + bsize <= MAX_SUPERTX_BLOCK_SIZE && + partition != PARTITION_NONE && + !xd->lossless[0]) { + int supertx_enabled; + TX_SIZE supertx_size = max_txsize_lookup[bsize]; + supertx_enabled = check_supertx_sb(bsize, supertx_size, pc_tree); + if (supertx_enabled) { + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + int x_idx, y_idx, i; + uint8_t *dst_buf[3]; + int dst_stride[3]; + set_skip_context(xd, mi_row, mi_col); + set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); + update_state_sb_supertx(cpi, td, tile, mi_row, mi_col, bsize, + output_enabled, pc_tree); + + vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), + mi_row, mi_col); + for (i = 0; i < MAX_MB_PLANE; i++) { + dst_buf[i] = xd->plane[i].dst.buf; + dst_stride[i] = xd->plane[i].dst.stride; + } + predict_sb_complex(cpi, td, tile, mi_row, mi_col, mi_row, mi_col, + output_enabled, bsize, bsize, + dst_buf, dst_stride, pc_tree); + + set_offsets(cpi, tile, x, mi_row, mi_col, bsize); + if (!x->skip) { + xd->mi[0]->mbmi.skip = 1; + vp10_encode_sb_supertx(x, bsize); + vp10_tokenize_sb_supertx(cpi, td, tp, !output_enabled, bsize); + } else { + xd->mi[0]->mbmi.skip = 1; + if (output_enabled) + td->counts->skip[vp10_get_skip_context(xd)][1]++; + reset_skip_context(xd, bsize); + } + if (output_enabled) { + for (y_idx = 0; y_idx < mi_height; y_idx++) + for (x_idx = 0; x_idx < mi_width; x_idx++) { + if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx + && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height + > y_idx) { + xd->mi[x_idx + y_idx * cm->mi_stride]->mbmi.skip = + xd->mi[0]->mbmi.skip; + } + } + td->counts->supertx + [partition_supertx_context_lookup[partition]][supertx_size][1]++; + td->counts->supertx_size[supertx_size]++; +#if CONFIG_EXT_TX + if (get_ext_tx_types(supertx_size, bsize, 1) > 1 && + !xd->mi[0]->mbmi.skip) { + int eset = get_ext_tx_set(supertx_size, bsize, 1); + if (eset > 0) { + ++td->counts->inter_ext_tx[eset][supertx_size] + [xd->mi[0]->mbmi.tx_type]; + } + } +#endif // CONFIG_EXT_TX + } + if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) + update_partition_context(xd, mi_row, mi_col, subsize, bsize); + return; + } else { + if (output_enabled) { + td->counts->supertx + [partition_supertx_context_lookup[partition]][supertx_size][0]++; + } + } + } +#endif // CONFIG_SUPERTX + switch (partition) { case PARTITION_NONE: encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, @@ -1519,6 +2143,9 @@ int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate, int64_t *dist, +#if CONFIG_SUPERTX + int *rate_nocoef, +#endif int do_recon, PC_TREE *pc_tree) { VP10_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; @@ -1533,12 +2160,20 @@ BLOCK_SIZE subsize; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; +#if CONFIG_VAR_TX + TXFM_CONTEXT tl[8], ta[8]; +#endif RD_COST last_part_rdc, none_rdc, chosen_rdc; BLOCK_SIZE sub_subsize = BLOCK_4X4; int splits_below = 0; BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; int do_partition_search = 1; PICK_MODE_CONTEXT *ctx = &pc_tree->none; +#if CONFIG_SUPERTX + int last_part_rate_nocoef = INT_MAX; + int none_rate_nocoef = INT_MAX; + int chosen_rate_nocoef = INT_MAX; +#endif if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; @@ -1553,8 +2188,16 @@ partition = partition_lookup[bsl][bs_type]; subsize = get_subsize(bsize, partition); +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); +#endif pc_tree->partitioning = partition; - save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); + save_context(x, mi_row, mi_col, a, l, sa, sl, +#if CONFIG_VAR_TX + ta, tl, +#endif + bsize); if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) { set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); @@ -1583,8 +2226,11 @@ mi_row + (mi_step >> 1) < cm->mi_rows && mi_col + (mi_step >> 1) < cm->mi_cols) { pc_tree->partitioning = PARTITION_NONE; - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, - ctx, INT64_MAX); + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, +#if CONFIG_SUPERTX + &none_rate_nocoef, +#endif + bsize, ctx, INT64_MAX); pl = partition_plane_context(xd, mi_row, mi_col, bsize); @@ -1592,9 +2238,16 @@ none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; none_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist); +#if CONFIG_SUPERTX + none_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; +#endif } - restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); + restore_context(x, mi_row, mi_col, a, l, sa, sl, +#if CONFIG_VAR_TX + ta, tl, +#endif + bsize); mi_8x8[0]->mbmi.sb_type = bs_type; pc_tree->partitioning = partition; } @@ -1603,68 +2256,110 @@ switch (partition) { case PARTITION_NONE: rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif bsize, ctx, INT64_MAX); break; case PARTITION_HORZ: rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif subsize, &pc_tree->horizontal[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) { RD_COST tmp_rdc; +#if CONFIG_SUPERTX + int rt_nocoef = 0; +#endif PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; vp10_rd_cost_init(&tmp_rdc); update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col, &tmp_rdc, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif subsize, &pc_tree->horizontal[1], INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp10_rd_cost_reset(&last_part_rdc); +#if CONFIG_SUPERTX + last_part_rate_nocoef = INT_MAX; +#endif break; } last_part_rdc.rate += tmp_rdc.rate; last_part_rdc.dist += tmp_rdc.dist; last_part_rdc.rdcost += tmp_rdc.rdcost; +#if CONFIG_SUPERTX + last_part_rate_nocoef += rt_nocoef; +#endif } break; case PARTITION_VERT: rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif subsize, &pc_tree->vertical[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { RD_COST tmp_rdc; +#if CONFIG_SUPERTX + int rt_nocoef = 0; +#endif PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0]; vp10_rd_cost_init(&tmp_rdc); update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1), &tmp_rdc, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp10_rd_cost_reset(&last_part_rdc); +#if CONFIG_SUPERTX + last_part_rate_nocoef = INT_MAX; +#endif break; } last_part_rdc.rate += tmp_rdc.rate; last_part_rdc.dist += tmp_rdc.dist; last_part_rdc.rdcost += tmp_rdc.rdcost; +#if CONFIG_SUPERTX + last_part_rate_nocoef += rt_nocoef; +#endif } break; case PARTITION_SPLIT: if (bsize == BLOCK_8X8) { rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif subsize, pc_tree->leaf_split[0], INT64_MAX); break; } last_part_rdc.rate = 0; last_part_rdc.dist = 0; last_part_rdc.rdcost = 0; +#if CONFIG_SUPERTX + last_part_rate_nocoef = 0; +#endif for (i = 0; i < 4; i++) { int x_idx = (i & 1) * (mi_step >> 1); int y_idx = (i >> 1) * (mi_step >> 1); int jj = i >> 1, ii = i & 0x01; RD_COST tmp_rdc; +#if CONFIG_SUPERTX + int rt_nocoef; +#endif if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; @@ -1673,13 +2368,22 @@ mi_8x8 + jj * bss * mis + ii * bss, tp, mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate, &tmp_rdc.dist, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif i != 3, pc_tree->split[i]); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp10_rd_cost_reset(&last_part_rdc); +#if CONFIG_SUPERTX + last_part_rate_nocoef = INT_MAX; +#endif break; } last_part_rdc.rate += tmp_rdc.rate; last_part_rdc.dist += tmp_rdc.dist; +#if CONFIG_SUPERTX + last_part_rate_nocoef += rt_nocoef; +#endif } break; default: @@ -1692,6 +2396,9 @@ last_part_rdc.rate += cpi->partition_cost[pl][partition]; last_part_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist); +#if CONFIG_SUPERTX + last_part_rate_nocoef += cpi->partition_cost[pl][partition]; +#endif } if (do_partition_search @@ -1705,7 +2412,14 @@ BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); chosen_rdc.rate = 0; chosen_rdc.dist = 0; - restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#if CONFIG_SUPERTX + chosen_rate_nocoef = 0; +#endif + restore_context(x, mi_row, mi_col, a, l, sa, sl, +#if CONFIG_VAR_TX + ta, tl, +#endif + bsize); pc_tree->partitioning = PARTITION_SPLIT; // Split partition. @@ -1713,27 +2427,50 @@ int x_idx = (i & 1) * (mi_step >> 1); int y_idx = (i >> 1) * (mi_step >> 1); RD_COST tmp_rdc; +#if CONFIG_SUPERTX + int rt_nocoef = 0; +#endif ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; +#if CONFIG_VAR_TX + TXFM_CONTEXT tl[8], ta[8]; +#endif if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; - save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); + save_context(x, mi_row, mi_col, a, l, sa, sl, +#if CONFIG_VAR_TX + ta, tl, +#endif + bsize); pc_tree->split[i]->partitioning = PARTITION_NONE; rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif split_subsize, &pc_tree->split[i]->none, INT64_MAX); - restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); + restore_context(x, mi_row, mi_col, a, l, sa, sl, +#if CONFIG_VAR_TX + ta, tl, +#endif + bsize); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp10_rd_cost_reset(&chosen_rdc); +#if CONFIG_SUPERTX + chosen_rate_nocoef = INT_MAX; +#endif break; } chosen_rdc.rate += tmp_rdc.rate; chosen_rdc.dist += tmp_rdc.dist; +#if CONFIG_SUPERTX + chosen_rate_nocoef += rt_nocoef; +#endif if (i != 3) encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0, @@ -1742,12 +2479,18 @@ pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, split_subsize); chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; +#if CONFIG_SUPERTX + chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT]; +#endif } pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (chosen_rdc.rate < INT_MAX) { chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; chosen_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist); +#if CONFIG_SUPERTX + chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; +#endif } } @@ -1757,15 +2500,29 @@ if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; chosen_rdc = last_part_rdc; +#if CONFIG_SUPERTX + chosen_rate_nocoef = last_part_rate_nocoef; +#endif } // If none was better set the partitioning to that. if (none_rdc.rdcost < chosen_rdc.rdcost) { if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; chosen_rdc = none_rdc; +#if CONFIG_SUPERTX + chosen_rate_nocoef = none_rate_nocoef; +#endif } - restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); +#endif + restore_context(x, mi_row, mi_col, a, l, sa, sl, +#if CONFIG_VAR_TX + ta, tl, +#endif + bsize); // We must have chosen a partitioning and encoding or we'll fail later on. // No other opportunities for success. @@ -1780,6 +2537,9 @@ *rate = chosen_rdc.rate; *dist = chosen_rdc.dist; +#if CONFIG_SUPERTX + *rate_nocoef = chosen_rate_nocoef; +#endif } static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { @@ -2029,6 +2789,9 @@ TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, RD_COST *rd_cost, +#if CONFIG_SUPERTX + int *rate_nocoef, +#endif int64_t best_rd, PC_TREE *pc_tree) { VP10_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; @@ -2037,11 +2800,21 @@ const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; +#if CONFIG_VAR_TX + TXFM_CONTEXT tl[8], ta[8]; +#endif TOKENEXTRA *tp_orig = *tp; PICK_MODE_CONTEXT *ctx = &pc_tree->none; int i, pl; BLOCK_SIZE subsize; RD_COST this_rdc, sum_rdc, best_rdc; +#if CONFIG_SUPERTX + int this_rate_nocoef, sum_rate_nocoef = 0, best_rate_nocoef = INT_MAX; + int tmp_rate; + int abort_flag; + int64_t tmp_dist, tmp_rd; + PARTITION_TYPE best_partition; +#endif // CONFIG_SUPERTX int do_split = bsize >= BLOCK_8X8; int do_rect = 1; @@ -2102,7 +2875,13 @@ partition_vert_allowed &= force_vert_split; } +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + save_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); +#else save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#endif #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { @@ -2165,14 +2944,20 @@ // PARTITION_NONE if (partition_none_allowed) { - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, - &this_rdc, bsize, ctx, best_rdc.rdcost); + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, +#if CONFIG_SUPERTX + &this_rate_nocoef, +#endif + bsize, ctx, best_rdc.rdcost); if (this_rdc.rate != INT_MAX) { if (bsize >= BLOCK_8X8) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); +#if CONFIG_SUPERTX + this_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; +#endif } if (this_rdc.rdcost < best_rdc.rdcost) { @@ -2180,6 +2965,10 @@ int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr; best_rdc = this_rdc; +#if CONFIG_SUPERTX + best_rate_nocoef = this_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; @@ -2248,7 +3037,13 @@ #endif } } +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); +#else restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#endif // CONFIG_VAR_TX } // store estimated motion vector @@ -2265,14 +3060,82 @@ if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) pc_tree->leaf_split[0]->pred_interp_filter = ctx->mic.mbmi.interp_filter; +#if CONFIG_SUPERTX + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, + &sum_rate_nocoef, subsize, pc_tree->leaf_split[0], + INT64_MAX); +#else rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, pc_tree->leaf_split[0], best_rdc.rdcost); - if (sum_rdc.rate == INT_MAX) +#endif // CONFIG_SUPERTX + if (sum_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; +#if CONFIG_SUPERTX + sum_rate_nocoef = INT_MAX; +#endif + } +#if CONFIG_SUPERTX + if (!frame_is_intra_only(cm) && sum_rdc.rdcost < INT64_MAX && + !xd->lossless[0]) { + TX_SIZE supertx_size = max_txsize_lookup[bsize]; + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_SPLIT; + + sum_rdc.rate += vp10_cost_bit( + cm->fc->supertx_prob + [partition_supertx_context_lookup[PARTITION_SPLIT]][supertx_size], + 0); + sum_rdc.rdcost = + RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + if (is_inter_mode(pc_tree->leaf_split[0]->mic.mbmi.mode)) { +#if CONFIG_EXT_TX + TX_TYPE best_tx = DCT_DCT; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = + xd->left_txfm_context_buffer + (mi_row & 0x07); + restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#endif // CONFIG_VAR_TX + rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize, + &tmp_rate, &tmp_dist, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp10_cost_bit( + cm->fc->supertx_prob + [partition_supertx_context_lookup[PARTITION_SPLIT]][supertx_size], + 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rdc.rdcost) { + sum_rdc.rdcost = tmp_rd; + sum_rdc.rate = tmp_rate; + sum_rdc.dist = tmp_dist; + update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif // CONFIG_SUPERTX } else { +#if CONFIG_SUPERTX + for (i = 0; i < 4 && sum_rdc.rdcost < INT64_MAX; ++i) { +#else for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) { - const int x_idx = (i & 1) * mi_step; - const int y_idx = (i >> 1) * mi_step; +#endif // CONFIG_SUPERTX + const int x_idx = (i & 1) * mi_step; + const int y_idx = (i >> 1) * mi_step; if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) continue; @@ -2281,20 +3144,90 @@ load_pred_mv(x, ctx); pc_tree->split[i]->index = i; +#if CONFIG_SUPERTX + rd_pick_partition(cpi, td, tile_data, tp, + mi_row + y_idx, mi_col + x_idx, + subsize, &this_rdc, &this_rate_nocoef, + INT64_MAX - sum_rdc.rdcost, pc_tree->split[i]); +#else rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rdc, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]); +#endif // CONFIG_SUPERTX if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; +#if CONFIG_SUPERTX + sum_rate_nocoef = INT_MAX; +#endif // CONFIG_SUPERTX break; } else { sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; sum_rdc.rdcost += this_rdc.rdcost; +#if CONFIG_SUPERTX + sum_rate_nocoef += this_rate_nocoef; +#endif // CONFIG_SUPERTX } } +#if CONFIG_SUPERTX + if (!frame_is_intra_only(cm) && + sum_rdc.rdcost < INT64_MAX && + i == 4 && bsize <= MAX_SUPERTX_BLOCK_SIZE && + !xd->lossless[0]) { + TX_SIZE supertx_size = max_txsize_lookup[bsize]; + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_SPLIT; + + sum_rdc.rate += vp10_cost_bit( + cm->fc->supertx_prob + [partition_supertx_context_lookup[PARTITION_SPLIT]][supertx_size], + 0); + sum_rdc.rdcost = + RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + + if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { +#if CONFIG_EXT_TX + TX_TYPE best_tx = DCT_DCT; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = + xd->left_txfm_context_buffer + (mi_row & 0x07); + restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#endif // CONFIG_VAR_TX + rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize, + &tmp_rate, &tmp_dist, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp10_cost_bit( + cm->fc->supertx_prob + [partition_supertx_context_lookup[PARTITION_SPLIT]][supertx_size], + 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rdc.rdcost) { + sum_rdc.rdcost = tmp_rd; + sum_rdc.rate = tmp_rate; + sum_rdc.dist = tmp_dist; + update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif // CONFIG_SUPERTX } if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) { @@ -2302,9 +3235,16 @@ sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); +#if CONFIG_SUPERTX + sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT]; +#endif // CONFIG_SUPERTX if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; +#if CONFIG_SUPERTX + best_rate_nocoef = sum_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif // CONFIG_SUPERTX pc_tree->partitioning = PARTITION_SPLIT; } } else { @@ -2313,23 +3253,39 @@ if (cpi->sf.less_rectangular_check) do_rect &= !partition_none_allowed; } +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); +#else restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); - } +#endif + } // if (do_split) // PARTITION_HORZ if (partition_horz_allowed && (do_rect || vp10_active_h_edge(cpi, mi_row, mi_step))) { - subsize = get_subsize(bsize, PARTITION_HORZ); + subsize = get_subsize(bsize, PARTITION_HORZ); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->horizontal[0].pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, - &pc_tree->horizontal[0], best_rdc.rdcost); + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, +#if CONFIG_SUPERTX + &sum_rate_nocoef, +#endif // CONFIG_SUPERTX + subsize, &pc_tree->horizontal[0], best_rdc.rdcost); - if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows && +#if CONFIG_SUPERTX + abort_flag = (sum_rdc.rdcost >= best_rd && bsize > BLOCK_8X8) || + (sum_rdc.rate == INT_MAX && bsize == BLOCK_8X8); + if (sum_rdc.rdcost < INT64_MAX && +#else + if (sum_rdc.rdcost < best_rdc.rdcost && +#endif // CONFIG_SUPERTX + mi_row + mi_step < cm->mi_rows && bsize > BLOCK_8X8) { PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); @@ -2341,33 +3297,113 @@ partition_none_allowed) pc_tree->horizontal[1].pred_interp_filter = ctx->mic.mbmi.interp_filter; +#if CONFIG_SUPERTX + rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, + &this_rdc, &this_rate_nocoef, + subsize, &pc_tree->horizontal[1], + INT64_MAX); +#else rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, subsize, &pc_tree->horizontal[1], best_rdc.rdcost - sum_rdc.rdcost); +#endif // CONFIG_SUPERTX if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; +#if CONFIG_SUPERTX + sum_rate_nocoef = INT_MAX; +#endif // CONFIG_SUPERTX } else { sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; sum_rdc.rdcost += this_rdc.rdcost; +#if CONFIG_SUPERTX + sum_rate_nocoef += this_rate_nocoef; +#endif // CONFIG_SUPERTX } } +#if CONFIG_SUPERTX + if (!frame_is_intra_only(cm) && !abort_flag && + sum_rdc.rdcost < INT64_MAX && bsize <= MAX_SUPERTX_BLOCK_SIZE && + !xd->lossless[0]) { + TX_SIZE supertx_size = max_txsize_lookup[bsize]; + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_HORZ; + + sum_rdc.rate += vp10_cost_bit( + cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_HORZ]] + [supertx_size], 0); + sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + + if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { +#if CONFIG_EXT_TX + TX_TYPE best_tx = DCT_DCT; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#endif // CONFIG_VAR_TX + rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize, + &tmp_rate, &tmp_dist, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp10_cost_bit( + cm->fc->supertx_prob + [partition_supertx_context_lookup[PARTITION_HORZ]][supertx_size], + 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rdc.rdcost) { + sum_rdc.rdcost = tmp_rd; + sum_rdc.rate = tmp_rate; + sum_rdc.dist = tmp_dist; + update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif // CONFIG_SUPERTX + if (sum_rdc.rdcost < best_rdc.rdcost) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ]; sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); +#if CONFIG_SUPERTX + sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_HORZ]; +#endif // CONFIG_SUPERTX if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; +#if CONFIG_SUPERTX + best_rate_nocoef = sum_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif // CONFIG_SUPERTX pc_tree->partitioning = PARTITION_HORZ; } } +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); +#else restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#endif } // PARTITION_VERT if (partition_vert_allowed && (do_rect || vp10_active_v_edge(cpi, mi_col, mi_step))) { - subsize = get_subsize(bsize, PARTITION_VERT); + subsize = get_subsize(bsize, PARTITION_VERT); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); @@ -2375,9 +3411,19 @@ partition_none_allowed) pc_tree->vertical[0].pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, - &pc_tree->vertical[0], best_rdc.rdcost); - if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols && + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, +#if CONFIG_SUPERTX + &sum_rate_nocoef, +#endif // CONFIG_SUPERTX + subsize, &pc_tree->vertical[0], best_rdc.rdcost); +#if CONFIG_SUPERTX + abort_flag = (sum_rdc.rdcost >= best_rd && bsize > BLOCK_8X8) || + (sum_rdc.rate == INT_MAX && bsize == BLOCK_8X8); + if (sum_rdc.rdcost < INT64_MAX && +#else + if (sum_rdc.rdcost < best_rdc.rdcost && +#endif // CONFIG_SUPERTX + mi_col + mi_step < cm->mi_cols && bsize > BLOCK_8X8) { update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, @@ -2389,29 +3435,106 @@ partition_none_allowed) pc_tree->vertical[1].pred_interp_filter = ctx->mic.mbmi.interp_filter; +#if CONFIG_SUPERTX + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, + &this_rate_nocoef, subsize, &pc_tree->vertical[1], + INT64_MAX - sum_rdc.rdcost); +#else rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, subsize, &pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost); +#endif // CONFIG_SUPERTX if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; +#if CONFIG_SUPERTX + sum_rate_nocoef = INT_MAX; +#endif // CONFIG_SUPERTX } else { sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; sum_rdc.rdcost += this_rdc.rdcost; +#if CONFIG_SUPERTX + sum_rate_nocoef += this_rate_nocoef; +#endif // CONFIG_SUPERTX } } +#if CONFIG_SUPERTX + if (!frame_is_intra_only(cm) && !abort_flag && + sum_rdc.rdcost < INT64_MAX && bsize <= MAX_SUPERTX_BLOCK_SIZE && + !xd->lossless[0]) { + TX_SIZE supertx_size = max_txsize_lookup[bsize]; + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_VERT; + sum_rdc.rate += vp10_cost_bit( + cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_VERT]] + [supertx_size], 0); + sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + + if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { +#if CONFIG_EXT_TX + TX_TYPE best_tx = DCT_DCT; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#endif // CONFIG_VAR_TX + rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize, + &tmp_rate, &tmp_dist, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp10_cost_bit( + cm->fc->supertx_prob + [partition_supertx_context_lookup[PARTITION_VERT]][supertx_size], + 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rdc.rdcost) { + sum_rdc.rdcost = tmp_rd; + sum_rdc.rate = tmp_rate; + sum_rdc.dist = tmp_dist; + update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif // CONFIG_SUPERTX if (sum_rdc.rdcost < best_rdc.rdcost) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT]; sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); +#if CONFIG_SUPERTX + sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_VERT]; +#endif // CONFIG_SUPERTX if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; +#if CONFIG_SUPERTX + best_rate_nocoef = sum_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif // CONFIG_SUPERTX pc_tree->partitioning = PARTITION_VERT; } } +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); +#else restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#endif } // TODO(jbb): This code added so that we avoid static analysis @@ -2420,7 +3543,9 @@ // checks occur in some sub function and thus are used... (void) best_rd; *rd_cost = best_rdc; - +#if CONFIG_SUPERTX + *rate_nocoef = best_rate_nocoef; +#endif // CONFIG_SUPERTX if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && pc_tree->index != 3) { @@ -2453,7 +3578,10 @@ // Initialize the left context for the new SB row memset(&xd->left_context, 0, sizeof(xd->left_context)); memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); - +#if CONFIG_VAR_TX + memset(xd->left_txfm_context_buffer, 0, + sizeof(xd->left_txfm_context_buffer)); +#endif // Code each SB in the row for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end; mi_col += MI_BLOCK_SIZE) { @@ -2461,6 +3589,9 @@ int dummy_rate; int64_t dummy_dist; RD_COST dummy_rdc; +#if CONFIG_SUPERTX + int dummy_rate_nocoef; +#endif // CONFIG_SUPERTX int i; int seg_skip = 0; @@ -2496,19 +3627,31 @@ set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); + BLOCK_64X64, &dummy_rate, &dummy_dist, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif // CONFIG_SUPERTX + 1, td->pc_root); } else if (cpi->partition_search_skippable_frame) { BLOCK_SIZE bsize; set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); + BLOCK_64X64, &dummy_rate, &dummy_dist, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif // CONFIG_SUPERTX + 1, td->pc_root); } else if (sf->partition_search_type == VAR_BASED_PARTITION && cm->frame_type != KEY_FRAME) { choose_partitioning(cpi, tile_info, x, mi_row, mi_col); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); + BLOCK_64X64, &dummy_rate, &dummy_dist, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif // CONFIG_SUPERTX + 1, td->pc_root); } else { // If required set upper and lower partition size limits if (sf->auto_min_max_partition_size) { @@ -2518,7 +3661,11 @@ &x->max_partition_size); } rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rdc, INT64_MAX, td->pc_root); + &dummy_rdc, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif // CONFIG_SUPERTX + INT64_MAX, td->pc_root); } } } @@ -2541,6 +3688,10 @@ 2 * aligned_mi_cols * MAX_MB_PLANE); memset(xd->above_seg_context, 0, sizeof(*xd->above_seg_context) * aligned_mi_cols); +#if CONFIG_VAR_TX + memset(cm->above_txfm_context, 0, + sizeof(*xd->above_txfm_context) * aligned_mi_cols); +#endif } static int check_dual_ref_flags(VP10_COMP *cpi) { @@ -2549,8 +3700,14 @@ if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) { return 0; } else { - return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG) - + !!(ref_flags & VP9_ALT_FLAG)) >= 2; + return (!!(ref_flags & VP9_GOLD_FLAG) + + !!(ref_flags & VP9_LAST_FLAG) + +#if CONFIG_EXT_REFS + !!(ref_flags & VP9_LAST2_FLAG) + + !!(ref_flags & VP9_LAST3_FLAG) + + !!(ref_flags & VP9_LAST4_FLAG) + +#endif // CONFIG_EXT_REFS + !!(ref_flags & VP9_ALT_FLAG)) >= 2; } } @@ -2575,6 +3732,8 @@ else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) return GOLDEN_FRAME; else + // TODO(zoeliu): TO investigate whether a frame_type other than + // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately. return LAST_FRAME; } @@ -2734,6 +3893,9 @@ x->quant_fp = cpi->sf.use_quant_fp; vp10_zero(x->skip_txfm); +#if CONFIG_VAR_TX + vp10_zero(x->blk_skip); +#endif { struct vpx_usec_timer emr_timer; @@ -2798,7 +3960,14 @@ cpi->allow_comp_inter_inter = 1; cm->comp_fixed_ref = ALTREF_FRAME; cm->comp_var_ref[0] = LAST_FRAME; +#if CONFIG_EXT_REFS + cm->comp_var_ref[1] = LAST2_FRAME; + cm->comp_var_ref[2] = LAST3_FRAME; + cm->comp_var_ref[3] = LAST4_FRAME; + cm->comp_var_ref[4] = GOLDEN_FRAME; +#else cm->comp_var_ref[1] = GOLDEN_FRAME; +#endif // CONFIG_EXT_REFS } } else { cpi->allow_comp_inter_inter = 0; @@ -2814,9 +3983,12 @@ // either compound, single or hybrid prediction as per whatever has // worked best for that type of frame in the past. // It also predicts whether another coding mode would have worked - // better that this coding mode. If that is the case, it remembers + // better than this coding mode. If that is the case, it remembers // that for subsequent frames. // It does the same analysis for transform size selection also. + // + // TODO(zoeliu): TO investigate whether a frame_type other than + // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately. const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type]; int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type]; @@ -2836,8 +4008,9 @@ else cm->reference_mode = REFERENCE_MODE_SELECT; - if (cm->interp_filter == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) { cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref); + } encode_frame_internal(cpi); @@ -2865,6 +4038,7 @@ } } +#if !CONFIG_VAR_TX if (cm->tx_mode == TX_MODE_SELECT) { int count4x4 = 0; int count8x8_lp = 0, count8x8_8x8p = 0; @@ -2885,20 +4059,36 @@ count32x32 += counts->tx.p32x32[i][TX_32X32]; } if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && +#if CONFIG_SUPERTX + cm->counts.supertx_size[TX_16X16] == 0 && + cm->counts.supertx_size[TX_32X32] == 0 && +#endif // CONFIG_SUPERTX count32x32 == 0) { cm->tx_mode = ALLOW_8X8; reset_skip_tx_size(cm, TX_8X8); } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 && - count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { + count8x8_lp == 0 && count16x16_lp == 0 && +#if CONFIG_SUPERTX + cm->counts.supertx_size[TX_8X8] == 0 && + cm->counts.supertx_size[TX_16X16] == 0 && + cm->counts.supertx_size[TX_32X32] == 0 && +#endif // CONFIG_SUPERTX + count32x32 == 0) { cm->tx_mode = ONLY_4X4; reset_skip_tx_size(cm, TX_4X4); - } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { + } else if (count8x8_lp == 0 && count16x16_lp == 0 && + count4x4 == 0) { cm->tx_mode = ALLOW_32X32; - } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { + } else if (count32x32 == 0 && count8x8_lp == 0 && +#if CONFIG_SUPERTX + cm->counts.supertx_size[TX_32X32] == 0 && +#endif // CONFIG_SUPERTX + count4x4 == 0) { cm->tx_mode = ALLOW_16X16; reset_skip_tx_size(cm, TX_16X16); } } +#endif } else { cm->reference_mode = SINGLE_REFERENCE; encode_frame_internal(cpi); @@ -2941,6 +4131,140 @@ ++counts->uv_mode[y_mode][uv_mode]; } +#if CONFIG_VAR_TX +static void update_txfm_count(MACROBLOCKD *xd, + FRAME_COUNTS *counts, + TX_SIZE tx_size, int blk_row, int blk_col) { + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1); + int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; + int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1), + xd->left_txfm_context + (blk_row >> 1), + tx_size); + TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> 5; + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> 5; + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + ++counts->txfm_partition[ctx][0]; + mbmi->tx_size = tx_size; + txfm_partition_update(xd->above_txfm_context + (blk_col >> 1), + xd->left_txfm_context + (blk_row >> 1), tx_size); + } else { + BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int bh = num_4x4_blocks_high_lookup[bsize]; + int i; + ++counts->txfm_partition[ctx][1]; + + if (tx_size == TX_8X8) { + mbmi->inter_tx_size[tx_idx] = TX_4X4; + mbmi->tx_size = TX_4X4; + txfm_partition_update(xd->above_txfm_context + (blk_col >> 1), + xd->left_txfm_context + (blk_row >> 1), TX_4X4); + return; + } + + for (i = 0; i < 4; ++i) { + int offsetr = (i >> 1) * bh / 2; + int offsetc = (i & 0x01) * bh / 2; + update_txfm_count(xd, counts, tx_size - 1, + blk_row + offsetr, blk_col + offsetc); + } + } +} + +static void tx_partition_count_update(VP10_COMMON *cm, + MACROBLOCKD *xd, + BLOCK_SIZE plane_bsize, + int mi_row, int mi_col, + FRAME_COUNTS *td_counts) { + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; + BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + + for (idy = 0; idy < mi_height; idy += bh) + for (idx = 0; idx < mi_width; idx += bh) + update_txfm_count(xd, td_counts, max_tx_size, idy, idx); +} + +static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, + int blk_row, int blk_col) { + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1); + int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; + TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> 5; + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> 5; + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + mbmi->tx_size = tx_size; + txfm_partition_update(xd->above_txfm_context + (blk_col >> 1), + xd->left_txfm_context + (blk_row >> 1), tx_size); + + } else { + BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int bsl = b_width_log2_lookup[bsize]; + int i; + + if (tx_size == TX_8X8) { + mbmi->inter_tx_size[tx_idx] = TX_4X4; + mbmi->tx_size = TX_4X4; + txfm_partition_update(xd->above_txfm_context + (blk_col >> 1), + xd->left_txfm_context + (blk_row >> 1), TX_4X4); + return; + } + + assert(bsl > 0); + --bsl; + for (i = 0; i < 4; ++i) { + int offsetr = (i >> 1) << bsl; + int offsetc = (i & 0x01) << bsl; + set_txfm_context(xd, tx_size - 1, + blk_row + offsetr, blk_col + offsetc); + } + } +} + +static void tx_partition_set_contexts(VP10_COMMON *cm, + MACROBLOCKD *xd, + BLOCK_SIZE plane_bsize, + int mi_row, int mi_col) { + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; + BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + + for (idy = 0; idy < mi_height; idy += bh) + for (idx = 0; idx < mi_width; idx += bh) + set_txfm_context(xd, max_tx_size, idy, idx); +} +#endif + static void encode_superblock(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, @@ -2977,6 +4301,16 @@ if (output_enabled) sum_intra_stats(td->counts, mi, xd->above_mi, xd->left_mi, frame_is_intra_only(cm)); + + if (bsize >= BLOCK_8X8 && output_enabled) { + if (mbmi->palette_mode_info.palette_size[0] > 0) { + mbmi->palette_mode_info.palette_first_color_idx[0] = + xd->plane[0].color_index_map[0]; + // TODO(huisu): this increases the use of token buffer. Needs stretch + // test to verify. + vp10_tokenize_palette_sb(td, bsize, 0, t); + } + } vp10_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8)); } else { int ref; @@ -2997,25 +4331,33 @@ VPXMAX(bsize, BLOCK_8X8)); vp10_encode_sb(x, VPXMAX(bsize, BLOCK_8X8)); +#if CONFIG_VAR_TX + vp10_tokenize_sb_inter(cpi, td, t, !output_enabled, + mi_row, mi_col, VPXMAX(bsize, BLOCK_8X8)); +#else vp10_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8)); +#endif } if (output_enabled) { if (cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8 && !(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) { +#if CONFIG_VAR_TX + if (is_inter_block(mbmi)) + tx_partition_count_update(cm, xd, bsize, mi_row, mi_col, td->counts); +#endif ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd), &td->counts->tx)[mbmi->tx_size]; } else { int x, y; TX_SIZE tx_size; // The new intra coding scheme requires no change of transform size - if (is_inter_block(&mi->mbmi)) { + if (is_inter_block(&mi->mbmi)) tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode], max_txsize_lookup[bsize]); - } else { + else tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4; - } for (y = 0; y < mi_height; y++) for (x = 0; x < mi_width; x++) @@ -3024,6 +4366,22 @@ } ++td->counts->tx.tx_totals[mbmi->tx_size]; ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])]; +#if CONFIG_EXT_TX + if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter_block(mbmi)) > 1 && + cm->base_qindex > 0 && !mbmi->skip && + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + int eset = get_ext_tx_set(mbmi->tx_size, bsize, + is_inter_block(mbmi)); + if (eset > 0) { + if (is_inter_block(mbmi)) { + ++td->counts->inter_ext_tx[eset][mbmi->tx_size][mbmi->tx_type]; + } else { + ++td->counts->intra_ext_tx[eset][mbmi->tx_size][mbmi->mode] + [mbmi->tx_type]; + } + } + } +#else if (mbmi->tx_size < TX_32X32 && cm->base_qindex > 0 && !mbmi->skip && !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { @@ -3035,5 +4393,822 @@ [mbmi->tx_type]; } } +#endif // CONFIG_EXT_TX +#if CONFIG_EXT_INTRA + if (bsize >= BLOCK_8X8 && !is_inter_block(mbmi)) { + if (mbmi->mode == DC_PRED) + ++td->counts->ext_intra[0] + [mbmi->ext_intra_mode_info.use_ext_intra_mode[0]]; + if (mbmi->uv_mode == DC_PRED) + ++td->counts->ext_intra[1] + [mbmi->ext_intra_mode_info.use_ext_intra_mode[1]]; + } +#endif // CONFIG_EXT_INTRA + } + +#if CONFIG_VAR_TX + if (cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8 && + is_inter_block(mbmi) && !(mbmi->skip || seg_skip)) { + if (!output_enabled) + tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col); + } else { + TX_SIZE tx_size; + // The new intra coding scheme requires no change of transform size + if (is_inter_block(mbmi)) + tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode], + max_txsize_lookup[bsize]); + else + tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4; + mbmi->tx_size = tx_size; + set_txfm_ctx(xd->left_txfm_context, tx_size, xd->n8_h); + set_txfm_ctx(xd->above_txfm_context, tx_size, xd->n8_w); + } +#endif +} + +#if CONFIG_SUPERTX +static int check_intra_b(PICK_MODE_CONTEXT *ctx) { + return !is_inter_mode((&ctx->mic)->mbmi.mode); +} + +static int check_intra_sb(VP10_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PC_TREE *pc_tree) { + VP10_COMMON *const cm = &cpi->common; + + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize = bsize; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return 1; + + if (bsize >= BLOCK_8X8) + subsize = get_subsize(bsize, pc_tree->partitioning); + else + subsize = BLOCK_4X4; + + partition = partition_lookup[bsl][subsize]; + + switch (partition) { + case PARTITION_NONE: + return check_intra_b(&pc_tree->none); + break; + case PARTITION_VERT: + if (check_intra_b(&pc_tree->vertical[0])) + return 1; + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + if (check_intra_b(&pc_tree->vertical[1])) + return 1; + } + break; + case PARTITION_HORZ: + if (check_intra_b(&pc_tree->horizontal[0])) + return 1; + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + if (check_intra_b(&pc_tree->horizontal[1])) + return 1; + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + if (check_intra_b(pc_tree->leaf_split[0])) + return 1; + } else { + if (check_intra_sb(cpi, tile, mi_row, mi_col, subsize, + pc_tree->split[0])) + return 1; + if (check_intra_sb(cpi, tile, mi_row, mi_col + hbs, subsize, + pc_tree->split[1])) + return 1; + if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col, subsize, + pc_tree->split[2])) + return 1; + if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col + hbs, subsize, + pc_tree->split[3])) + return 1; + } + break; + default: + assert(0); + } + return 0; +} + +static int check_supertx_b(TX_SIZE supertx_size, PICK_MODE_CONTEXT *ctx) { + return ctx->mic.mbmi.tx_size == supertx_size; +} + +static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, + PC_TREE *pc_tree) { + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + + partition = pc_tree->partitioning; + subsize = get_subsize(bsize, partition); + switch (partition) { + case PARTITION_NONE: + return check_supertx_b(supertx_size, &pc_tree->none); + case PARTITION_VERT: + return check_supertx_b(supertx_size, &pc_tree->vertical[0]); + case PARTITION_HORZ: + return check_supertx_b(supertx_size, &pc_tree->horizontal[0]); + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) + return check_supertx_b(supertx_size, pc_tree->leaf_split[0]); + else + return check_supertx_sb(subsize, supertx_size, pc_tree->split[0]); + default: + assert(0); + return 0; } } + +static void predict_superblock(VP10_COMP *cpi, ThreadData *td, + int mi_row_pred, int mi_col_pred, + BLOCK_SIZE bsize_pred, int b_sub8x8, int block) { + // Used in supertx + // (mi_row_ori, mi_col_ori): location for mv + // (mi_row_pred, mi_col_pred, bsize_pred): region to predict + VP10_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *mi_8x8 = xd->mi[0]; + MODE_INFO *mi = mi_8x8; + MB_MODE_INFO *mbmi = &mi->mbmi; + int ref; + const int is_compound = has_second_ref(mbmi); + + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + + for (ref = 0; ref < 1 + is_compound; ++ref) { + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, + mbmi->ref_frame[ref]); + vp10_setup_pre_planes(xd, ref, cfg, mi_row_pred, mi_col_pred, + &xd->block_refs[ref]->sf); + } + + if (!b_sub8x8) + vp10_build_inter_predictors_sb(xd, mi_row_pred, mi_col_pred, bsize_pred); + else + vp10_build_inter_predictors_sb_sub8x8(xd, mi_row_pred, mi_col_pred, + bsize_pred, block); +} + +static void predict_b_extend(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int block, + int mi_row_ori, int mi_col_ori, + int mi_row_pred, int mi_col_pred, + int mi_row_top, int mi_col_top, + uint8_t * dst_buf[3], int dst_stride[3], + BLOCK_SIZE bsize_ori, BLOCK_SIZE bsize_top, + BLOCK_SIZE bsize_pred, int output_enabled, + int b_sub8x8, int bextend) { + // Used in supertx + // (mi_row_ori, mi_col_ori): location for mv + // (mi_row_pred, mi_col_pred, bsize_pred): region to predict + // (mi_row_top, mi_col_top, bsize_top): region of the top partition size + // block: sub location of sub8x8 blocks + // b_sub8x8: 1: ori is sub8x8; 0: ori is not sub8x8 + // bextend: 1: region to predict is an extension of ori; 0: not + + MACROBLOCK *const x = &td->mb; + VP10_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + int r = (mi_row_pred - mi_row_top) * MI_SIZE; + int c = (mi_col_pred - mi_col_top) * MI_SIZE; + const int mi_width_top = num_8x8_blocks_wide_lookup[bsize_top]; + const int mi_height_top = num_8x8_blocks_high_lookup[bsize_top]; + + if (mi_row_pred < mi_row_top || mi_col_pred < mi_col_top || + mi_row_pred >= mi_row_top + mi_height_top || + mi_col_pred >= mi_col_top + mi_width_top || + mi_row_pred >= cm->mi_rows || mi_col_pred >= cm->mi_cols) + return; + + set_offsets_extend(cpi, td, tile, mi_row_pred, mi_col_pred, + mi_row_ori, mi_col_ori, bsize_pred, bsize_ori); + xd->plane[0].dst.stride = dst_stride[0]; + xd->plane[1].dst.stride = dst_stride[1]; + xd->plane[2].dst.stride = dst_stride[2]; + xd->plane[0].dst.buf = dst_buf[0] + + (r >> xd->plane[0].subsampling_y) * dst_stride[0] + + (c >> xd->plane[0].subsampling_x); + xd->plane[1].dst.buf = dst_buf[1] + + (r >> xd->plane[1].subsampling_y) * dst_stride[1] + + (c >> xd->plane[1].subsampling_x); + xd->plane[2].dst.buf = dst_buf[2] + + (r >> xd->plane[2].subsampling_y) * dst_stride[2] + + (c >> xd->plane[2].subsampling_x); + + predict_superblock(cpi, td, + mi_row_pred, mi_col_pred, bsize_pred, + b_sub8x8, block); + + if (output_enabled && !bextend) + update_stats(&cpi->common, td, 1); +} + +static void extend_dir(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int block, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + int output_enabled, + uint8_t * dst_buf[3], int dst_stride[3], int dir) { + // dir: 0-lower, 1-upper, 2-left, 3-right + // 4-lowerleft, 5-upperleft, 6-lowerright, 7-upperright + MACROBLOCKD *xd = &td->mb.e_mbd; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + int xss = xd->plane[1].subsampling_x; + int yss = xd->plane[1].subsampling_y; + int b_sub8x8 = (bsize < BLOCK_8X8) ? 1 : 0; + + BLOCK_SIZE extend_bsize; + int unit, mi_row_pred, mi_col_pred; + + if (dir == 0 || dir == 1) { // lower and upper + extend_bsize = (mi_width == 1 || bsize < BLOCK_8X8 || xss < yss) ? + BLOCK_8X8 : BLOCK_16X8; + unit = num_8x8_blocks_wide_lookup[extend_bsize]; + mi_row_pred = mi_row + ((dir == 0) ? mi_height : -1); + mi_col_pred = mi_col; + + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, + mi_row_top, mi_col_top, dst_buf, dst_stride, + bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + + if (mi_width > unit) { + int i; + for (i = 0; i < mi_width/unit - 1; i++) { + mi_col_pred += unit; + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, mi_row_top, mi_col_top, + dst_buf, dst_stride, bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + } + } + } else if (dir == 2 || dir == 3) { // left and right + extend_bsize = (mi_height == 1 || bsize < BLOCK_8X8 || yss < xss) ? + BLOCK_8X8 : BLOCK_8X16; + unit = num_8x8_blocks_high_lookup[extend_bsize]; + mi_row_pred = mi_row; + mi_col_pred = mi_col + ((dir == 3) ? mi_width : -1); + + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, mi_row_top, mi_col_top, + dst_buf, dst_stride, bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + + if (mi_height > unit) { + int i; + for (i = 0; i < mi_height/unit - 1; i++) { + mi_row_pred += unit; + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, mi_row_top, mi_col_top, + dst_buf, dst_stride, bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + } + } + } else { + extend_bsize = BLOCK_8X8; + mi_row_pred = mi_row + ((dir == 4 || dir == 6) ? mi_height : -1); + mi_col_pred = mi_col + ((dir == 6 || dir == 7) ? mi_width : -1); + + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, mi_row_top, mi_col_top, + dst_buf, dst_stride, bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + } +} + +static void extend_all(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int block, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + int output_enabled, + uint8_t * dst_buf[3], int dst_stride[3]) { + assert(block >= 0 && block < 4); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 0); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 1); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 2); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 3); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 4); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 5); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 6); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 7); +} + + +// This function generates prediction for multiple blocks, between which +// discontinuity around boundary is reduced by smoothing masks. The basic +// smoothing mask is a soft step function along horz/vert direction. In more +// complicated case when a block is split into 4 subblocks, the basic mask is +// first applied to neighboring subblocks (2 pairs) in horizontal direction and +// then applied to the 2 masked prediction mentioned above in vertical direction +// If the block is split into more than one level, at every stage, masked +// prediction is stored in dst_buf[] passed from higher level. +static void predict_sb_complex(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + int output_enabled, BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3], + PC_TREE *pc_tree) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + + int i, ctx; + uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3]; + DECLARE_ALIGNED(16, uint8_t, + tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + int len = sizeof(uint16_t); + dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3); + dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len); + dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + dst_buf1[0] = tmp_buf1; + dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN; + dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf2[0] = tmp_buf2; + dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN; + dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf3[0] = tmp_buf3; + dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN; + dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN; +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + if (bsize >= BLOCK_8X8) { + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); + subsize = get_subsize(bsize, pc_tree->partitioning); + } else { + ctx = 0; + subsize = BLOCK_4X4; + } + partition = partition_lookup[bsl][subsize]; + if (output_enabled && bsize != BLOCK_4X4 && bsize < top_bsize) + cm->counts.partition[ctx][partition]++; + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + } + + switch (partition) { + case PARTITION_NONE: + assert(bsize < top_bsize); + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + bsize, top_bsize, bsize, output_enabled, 0, 0); + extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride); + break; + case PARTITION_HORZ: + if (bsize == BLOCK_8X8) { + // Fisrt half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + + // Second half + predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + + // Smooth + xd->plane[0].dst.buf = dst_buf[0]; + xd->plane[0].dst.stride = dst_stride[0]; + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[0], dst_stride[0], + dst_buf1[0], dst_stride1[0], + &xd->plane[0], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, 0); + } else { + // First half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, subsize, output_enabled, 0, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride, 0); + + if (mi_row + hbs < cm->mi_rows) { + // Second half + predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, + mi_row + hbs, mi_col, mi_row_top, mi_col_top, + dst_buf1, dst_stride1, subsize, top_bsize, subsize, + output_enabled, 0, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col, mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col, mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1, 1); + + // Smooth + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp10_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top, + bsize, top_bsize, PARTITION_HORZ, i); + } + } + } + break; + case PARTITION_VERT: + if (bsize == BLOCK_8X8) { + // First half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + + // Second half + predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + + // Smooth + xd->plane[0].dst.buf = dst_buf[0]; + xd->plane[0].dst.stride = dst_stride[0]; + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[0], dst_stride[0], + dst_buf1[0], dst_stride1[0], + &xd->plane[0], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, 0); + } else { + // bsize: not important, not useful + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, subsize, output_enabled, 0, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride, 3); + + + if (mi_col + hbs < cm->mi_cols) { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, + mi_row, mi_col + hbs, mi_row_top, mi_col_top, + dst_buf1, dst_stride1, subsize, top_bsize, subsize, + output_enabled, 0, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, + mi_col + hbs, mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, + mi_col + hbs, mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1, 2); + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp10_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top, + bsize, top_bsize, PARTITION_VERT, i); + } + } + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 0); + predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf2, dst_stride2, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + predict_b_extend(cpi, td, tile, 3, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf3, dst_stride3, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + + if (bsize < top_bsize) { + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf2, dst_stride2); + extend_all(cpi, td, tile, 3, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf3, dst_stride3); + } + } else { + predict_sb_complex(cpi, td, tile, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, subsize, + top_bsize, dst_buf, dst_stride, + pc_tree->split[0]); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_sb_complex(cpi, td, tile, mi_row, mi_col + hbs, + mi_row_top, mi_col_top, output_enabled, subsize, + top_bsize, dst_buf1, dst_stride1, + pc_tree->split[1]); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + predict_sb_complex(cpi, td, tile, mi_row + hbs, mi_col, + mi_row_top, mi_col_top, output_enabled, subsize, + top_bsize, dst_buf2, dst_stride2, + pc_tree->split[2]); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_sb_complex(cpi, td, tile, mi_row + hbs, mi_col + hbs, + mi_row_top, mi_col_top, output_enabled, subsize, + top_bsize, dst_buf3, dst_stride3, + pc_tree->split[3]); + } + for (i = 0; i < MAX_MB_PLANE; i++) { + if (bsize == BLOCK_8X8 && i != 0) + continue; // Skip <4x4 chroma smoothing + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf1[i], + dst_stride1[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, i); + if (mi_row + hbs < cm->mi_rows) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf2[i], + dst_stride2[i], + dst_buf3[i], + dst_stride3[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, i); + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, i); + } + } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, i); + } + } + break; + default: + assert(0); + } + + + if (bsize < top_bsize && (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)) + update_partition_context(xd, mi_row, mi_col, subsize, bsize); +} + +static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int *tmp_rate, int64_t *tmp_dist, +#if CONFIG_EXT_TX + TX_TYPE *best_tx, +#endif // CONFIG_EXT_TX + PC_TREE *pc_tree) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + int plane, pnskip, skippable, skippable_uv, rate_uv, this_rate, + base_rate = *tmp_rate; + int64_t sse, pnsse, sse_uv, this_dist, dist_uv; + uint8_t *dst_buf[3]; + int dst_stride[3]; + TX_SIZE tx_size; +#if CONFIG_EXT_TX + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + TX_TYPE tx_type, best_tx_nostx = xd->mi[0]->mbmi.tx_type; + int ext_tx_set; + int tmp_rate_tx = 0, skip_tx = 0; + int64_t tmp_dist_tx = 0, rd_tx, bestrd_tx = INT64_MAX; + uint8_t tmp_zcoeff_blk = 0; +#endif // CONFIG_EXT_TX + + update_state_sb_supertx(cpi, td, tile, mi_row, mi_col, bsize, 0, pc_tree); + vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), + mi_row, mi_col); + for (plane = 0; plane < MAX_MB_PLANE; plane++) { + dst_buf[plane] = xd->plane[plane].dst.buf; + dst_stride[plane] = xd->plane[plane].dst.stride; + } + predict_sb_complex(cpi, td, tile, mi_row, mi_col, mi_row, mi_col, + 0, bsize, bsize, dst_buf, dst_stride, pc_tree); + + set_offsets(cpi, tile, x, mi_row, mi_col, bsize); +#if CONFIG_EXT_TX + *best_tx = DCT_DCT; +#endif + + // chroma + skippable_uv = 1; + rate_uv = 0; + dist_uv = 0; + sse_uv = 0; + for (plane = 1; plane < MAX_MB_PLANE; ++plane) { + tx_size = max_txsize_lookup[bsize]; + tx_size = get_uv_tx_size_impl(tx_size, bsize, + cm->subsampling_x, cm->subsampling_y); + vp10_subtract_plane(x, bsize, plane); + vp10_txfm_rd_in_plane_supertx(x, +#if CONFIG_VAR_TX + cpi, +#endif + &this_rate, &this_dist, &pnskip, &pnsse, + INT64_MAX, plane, bsize, tx_size, 0); + rate_uv += this_rate; + dist_uv += this_dist; + sse_uv += pnsse; + skippable_uv &= pnskip; + } + + // luma + tx_size = max_txsize_lookup[bsize]; + vp10_subtract_plane(x, bsize, 0); +#if CONFIG_EXT_TX + ext_tx_set = get_ext_tx_set(tx_size, bsize, 1); + for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { + if (!ext_tx_used_inter[ext_tx_set][tx_type]) + continue; + mbmi->tx_type = tx_type; + if (ext_tx_set == 1 && + mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX && + *best_tx == DCT_DCT) { + tx_type = IDTX - 1; + break; + } + vp10_txfm_rd_in_plane_supertx(x, +#if CONFIG_VAR_TX + cpi, +#endif + &this_rate, &this_dist, &pnskip, + &pnsse, INT64_MAX, 0, bsize, tx_size, 0); + if (get_ext_tx_types(tx_size, bsize, 1) > 1 && + !xd->lossless[xd->mi[0]->mbmi.segment_id] && + this_rate != INT_MAX) { + if (ext_tx_set > 0) + this_rate += cpi->inter_tx_type_costs[ext_tx_set] + [mbmi->tx_size][mbmi->tx_type]; + } + *tmp_rate = rate_uv + this_rate; + *tmp_dist = dist_uv + this_dist; + sse = sse_uv + pnsse; + skippable = skippable_uv && pnskip; + if (skippable) { + *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); + x->skip = 1; + } else { + if (RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist) + < RDCOST(x->rdmult, x->rddiv, 0, sse)) { + *tmp_rate += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0); + x->skip = 0; + } else { + *tmp_dist = sse; + *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); + x->skip = 1; + } + } + *tmp_rate += base_rate; + rd_tx = RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist); + if (rd_tx < bestrd_tx * 0.99 || tx_type == DCT_DCT) { + *best_tx = tx_type; + bestrd_tx = rd_tx; + tmp_rate_tx = *tmp_rate; + tmp_dist_tx = *tmp_dist; + skip_tx = x->skip; + tmp_zcoeff_blk = x->zcoeff_blk[tx_size][0]; + } + } + x->zcoeff_blk[tx_size][0] = tmp_zcoeff_blk; + *tmp_rate = tmp_rate_tx; + *tmp_dist = tmp_dist_tx; + x->skip = skip_tx; + xd->mi[0]->mbmi.tx_type = best_tx_nostx; + +#else // CONFIG_EXT_TX + + vp10_txfm_rd_in_plane_supertx(x, +#if CONFIG_VAR_TX + cpi, +#endif + &this_rate, &this_dist, &pnskip, &pnsse, + INT64_MAX, 0, bsize, tx_size, 0); + *tmp_rate = rate_uv + this_rate; + *tmp_dist = dist_uv + this_dist; + sse = sse_uv + pnsse; + skippable = skippable_uv && pnskip; + if (skippable) { + *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); + x->skip = 1; + } else { + if (RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist) + < RDCOST(x->rdmult, x->rddiv, 0, sse)) { + *tmp_rate += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0); + x->skip = 0; + } else { + *tmp_dist = sse; + *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); + x->skip = 1; + } + } + *tmp_rate += base_rate; +#endif // CONFIG_EXT_TX +} +#endif // CONFIG_SUPERTX
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index 92ba4dd..68cf932 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c
@@ -23,6 +23,8 @@ #include "vp10/common/scan.h" #include "vp10/encoder/encodemb.h" +#include "vp10/encoder/hybrid_fwd_txfm.h" +#include "vp10/encoder/quantize.h" #include "vp10/encoder/rd.h" #include "vp10/encoder/tokenize.h" @@ -104,8 +106,9 @@ const int mul = 1 + (tx_size == TX_32X32); const int16_t *dequant_ptr = pd->dequant; const uint8_t *const band_translate = get_band_translate(tx_size); - TX_TYPE tx_type = get_tx_type(type, xd, block); - const scan_order *const so = get_scan(tx_size, tx_type); + TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size); + const scan_order *const so = + get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); const int16_t *const scan = so->scan; const int16_t *const nb = so->neighbors; int next = eob, sz = 0; @@ -301,451 +304,107 @@ final_eob++; mb->plane[plane].eobs[block] = final_eob; + assert(final_eob <= default_eob); return final_eob; } -static INLINE void fdct32x32(int rd_transform, - const int16_t *src, tran_low_t *dst, - int src_stride) { - if (rd_transform) - vpx_fdct32x32_rd(src, dst, src_stride); - else - vpx_fdct32x32(src, dst, src_stride); -} - #if CONFIG_VP9_HIGHBITDEPTH -static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src, - tran_low_t *dst, int src_stride) { - if (rd_transform) - vpx_highbd_fdct32x32_rd(src, dst, src_stride); - else - vpx_highbd_fdct32x32(src, dst, src_stride); -} -#endif // CONFIG_VP9_HIGHBITDEPTH +typedef enum QUANT_FUNC { + QUANT_FUNC_LOWBD = 0, + QUANT_FUNC_LOWBD_32 = 1, + QUANT_FUNC_HIGHBD = 2, + QUANT_FUNC_HIGHBD_32 = 3, + QUANT_FUNC_LAST = 4 +} QUANT_FUNC; -void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, int lossless) { - if (lossless) { - vp10_fwht4x4(src_diff, coeff, diff_stride); - } else { - switch (tx_type) { - case DCT_DCT: - vpx_fdct4x4(src_diff, coeff, diff_stride); - break; - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - vp10_fht4x4(src_diff, coeff, diff_stride, tx_type); - break; - default: - assert(0); - break; - } - } -} +static VP10_QUANT_FACADE + quant_func_list[VP10_XFORM_QUANT_LAST][QUANT_FUNC_LAST] = { + {vp10_quantize_fp_facade, vp10_quantize_fp_32x32_facade, + vp10_highbd_quantize_fp_facade, vp10_highbd_quantize_fp_32x32_facade}, + {vp10_quantize_b_facade, vp10_quantize_b_32x32_facade, + vp10_highbd_quantize_b_facade, vp10_highbd_quantize_b_32x32_facade}, + {vp10_quantize_dc_facade, vp10_quantize_dc_32x32_facade, + vp10_highbd_quantize_dc_facade, vp10_highbd_quantize_dc_32x32_facade}, + {NULL, NULL, NULL, NULL}}; -static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type) { - switch (tx_type) { - case DCT_DCT: - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - vp10_fht8x8(src_diff, coeff, diff_stride, tx_type); - break; - default: - assert(0); - break; - } -} +#else +typedef enum QUANT_FUNC { + QUANT_FUNC_LOWBD = 0, + QUANT_FUNC_LOWBD_32 = 1, + QUANT_FUNC_LAST = 2 +} QUANT_FUNC; -static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type) { - switch (tx_type) { - case DCT_DCT: - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - vp10_fht16x16(src_diff, coeff, diff_stride, tx_type); - break; - default: - assert(0); - break; - } -} +static VP10_QUANT_FACADE + quant_func_list[VP10_XFORM_QUANT_LAST][QUANT_FUNC_LAST] = { + {vp10_quantize_fp_facade, vp10_quantize_fp_32x32_facade}, + {vp10_quantize_b_facade, vp10_quantize_b_32x32_facade}, + {vp10_quantize_dc_facade, vp10_quantize_dc_32x32_facade}, + {NULL, NULL}}; +#endif -static void fwd_txfm_32x32(int rd_transform, const int16_t *src_diff, - tran_low_t *coeff, int diff_stride, - TX_TYPE tx_type) { - switch (tx_type) { - case DCT_DCT: - fdct32x32(rd_transform, src_diff, coeff, diff_stride); - break; - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - assert(0); - break; - default: - assert(0); - break; - } -} +static FWD_TXFM_OPT fwd_txfm_opt_list[VP10_XFORM_QUANT_LAST] = { + FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_DC, + FWD_TXFM_OPT_NORMAL}; -#if CONFIG_VP9_HIGHBITDEPTH -void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, int lossless) { - if (lossless) { - assert(tx_type == DCT_DCT); - vp10_highbd_fwht4x4(src_diff, coeff, diff_stride); - } else { - switch (tx_type) { - case DCT_DCT: - vpx_highbd_fdct4x4(src_diff, coeff, diff_stride); - break; - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type); - break; - default: - assert(0); - break; - } - } -} - -static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type) { - switch (tx_type) { - case DCT_DCT: - vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); - break; - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); - break; - default: - assert(0); - break; - } -} - -static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type) { - switch (tx_type) { - case DCT_DCT: - vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); - break; - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); - break; - default: - assert(0); - break; - } -} - -static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff, - tran_low_t *coeff, int diff_stride, - TX_TYPE tx_type) { - switch (tx_type) { - case DCT_DCT: - highbd_fdct32x32(rd_transform, src_diff, coeff, diff_stride); - break; - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - assert(0); - break; - default: - assert(0); - break; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, - int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { +void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row, + int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, + VP10_XFORM_QUANT xform_quant_idx) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; - TX_TYPE tx_type = get_tx_type(plane_type, xd, block); - const scan_order *const scan_order = get_scan(tx_size, tx_type); + TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const scan_order *const scan_order = + get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int16_t *src_diff; + const int tx1d_size = get_tx1d_size(tx_size); + const int tx2d_size = tx1d_size * tx1d_size; + + FWD_TXFM_PARAM fwd_txfm_param; + fwd_txfm_param.tx_type = get_tx_type(plane_type, xd, block, tx_size); + fwd_txfm_param.tx_size = tx_size; + fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[xform_quant_idx]; + fwd_txfm_param.rd_transform = x->use_lp32x32fdct; + fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id]; + src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - switch (tx_size) { - case TX_32X32: - highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); - vp10_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, - p->round_fp, p->quant_fp, p->quant_shift, - qcoeff, dqcoeff, pd->dequant, - eob, scan_order->scan, - scan_order->iscan); - break; - case TX_16X16: - vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); - vp10_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - case TX_8X8: - vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); - vp10_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - case TX_4X4: - if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { - vp10_highbd_fwht4x4(src_diff, coeff, diff_stride); - } else { - vpx_highbd_fdct4x4(src_diff, coeff, diff_stride); - } - vp10_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - default: - assert(0); - } - return; - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - switch (tx_size) { - case TX_32X32: - fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); - vp10_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, - scan_order->iscan); - break; - case TX_16X16: - vpx_fdct16x16(src_diff, coeff, diff_stride); - vp10_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - case TX_8X8: - vp10_fdct8x8_quant(src_diff, diff_stride, coeff, 64, - x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - case TX_4X4: - if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { - vp10_fwht4x4(src_diff, coeff, diff_stride); + highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); + if (xform_quant_idx != VP10_XFORM_QUANT_SKIP_QUANT) { + if (x->skip_block) { + vp10_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob); } else { - vpx_fdct4x4(src_diff, coeff, diff_stride); + if (tx_size == TX_32X32) + quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD_32]( + coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order); + else + quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD]( + coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order); } - vp10_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - default: - assert(0); - break; - } -} - -void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, - int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { - MACROBLOCKD *const xd = &x->e_mbd; - const struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); - tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); - tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - uint16_t *const eob = &p->eobs[block]; - const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; - const int16_t *src_diff; - src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; - -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - switch (tx_size) { - case TX_32X32: - vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride); - vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - case TX_16X16: - vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride); - vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - case TX_8X8: - vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride); - vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - case TX_4X4: - if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { - vp10_highbd_fwht4x4(src_diff, coeff, diff_stride); - } else { - vpx_highbd_fdct4x4(src_diff, coeff, diff_stride); - } - vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - default: - assert(0); } return; } #endif // CONFIG_VP9_HIGHBITDEPTH - switch (tx_size) { - case TX_32X32: - vpx_fdct32x32_1(src_diff, coeff, diff_stride); - vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - case TX_16X16: - vpx_fdct16x16_1(src_diff, coeff, diff_stride); - vpx_quantize_dc(coeff, 256, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - case TX_8X8: - vpx_fdct8x8_1(src_diff, coeff, diff_stride); - vpx_quantize_dc(coeff, 64, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - case TX_4X4: - if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { - vp10_fwht4x4(src_diff, coeff, diff_stride); - } else { - vpx_fdct4x4(src_diff, coeff, diff_stride); - } - vpx_quantize_dc(coeff, 16, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - default: - assert(0); - break; - } -} - - - -void vp10_xform_quant(MACROBLOCK *x, int plane, int block, - int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { - MACROBLOCKD *const xd = &x->e_mbd; - const struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; - TX_TYPE tx_type = get_tx_type(plane_type, xd, block); - const scan_order *const scan_order = get_scan(tx_size, tx_type); - tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); - tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); - tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - uint16_t *const eob = &p->eobs[block]; - const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; - const int16_t *src_diff; - src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; - -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - switch (tx_size) { - case TX_32X32: - highbd_fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride, - tx_type); - vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, - p->round, p->quant, p->quant_shift, qcoeff, - dqcoeff, pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - case TX_16X16: - highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type); - vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - case TX_8X8: - highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type); - vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - case TX_4X4: - vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - xd->lossless[xd->mi[0]->mbmi.segment_id]); - vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - default: - assert(0); + fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); + if (xform_quant_idx != VP10_XFORM_QUANT_SKIP_QUANT) { + if (x->skip_block) { + vp10_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob); + } else { + if (tx_size == TX_32X32) + quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD_32]( + coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order); + else + quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD]( + coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order); } - return; - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - switch (tx_size) { - case TX_32X32: - fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride, tx_type); - vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, - scan_order->iscan); - break; - case TX_16X16: - fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type); - vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - case TX_8X8: - fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type); - vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - case TX_4X4: - vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - xd->lossless[xd->mi[0]->mbmi.segment_id]); - vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - default: - assert(0); - break; } } @@ -761,20 +420,31 @@ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint8_t *dst; ENTROPY_CONTEXT *a, *l; - TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block); + INV_TXFM_PARAM inv_txfm_param; +#if CONFIG_VAR_TX + int i; + const int bwl = b_width_log2_lookup[plane_bsize]; +#endif dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; a = &ctx->ta[plane][blk_col]; l = &ctx->tl[plane][blk_row]; // TODO(jingning): per transformed block zero forcing only enabled for // luma component. will integrate chroma components as well. - if (x->zcoeff_blk[tx_size][block] && plane == 0) { - p->eobs[block] = 0; - *a = *l = 0; - return; - } + // Turn this back on when the rate-distortion loop is synchronized with + // the recursive transform block coding. +// if (x->zcoeff_blk[tx_size][block] && plane == 0) { +// p->eobs[block] = 0; +// *a = *l = 0; +// return; +// } +#if CONFIG_VAR_TX + if (!x->skip_recode && + x->blk_skip[plane][(blk_row << bwl) + blk_col] == 0) { +#else if (!x->skip_recode) { +#endif if (x->quant_fp) { // Encoding process for rtc mode if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) { @@ -783,102 +453,151 @@ *a = *l = 0; return; } else { - vp10_xform_quant_fp(x, plane, block, blk_row, blk_col, - plane_bsize, tx_size); + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, + tx_size, VP10_XFORM_QUANT_FP); } } else { if (max_txsize_lookup[plane_bsize] == tx_size) { int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1)); if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) { // full forward transform and quantization - vp10_xform_quant(x, plane, block, blk_row, blk_col, - plane_bsize, tx_size); + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, + tx_size, VP10_XFORM_QUANT_B); } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) { // fast path forward transform and quantization - vp10_xform_quant_dc(x, plane, block, blk_row, blk_col, - plane_bsize, tx_size); + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, + tx_size, VP10_XFORM_QUANT_DC); } else { // skip forward transform p->eobs[block] = 0; *a = *l = 0; +#if !CONFIG_VAR_TX return; +#endif } } else { - vp10_xform_quant(x, plane, block, blk_row, blk_col, - plane_bsize, tx_size); + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, + tx_size, VP10_XFORM_QUANT_B); } } } +#if CONFIG_VAR_TX + else { + if (!x->skip_recode) + p->eobs[block] = 0; + } +#endif if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { - const int ctx = combine_entropy_contexts(*a, *l); + int ctx; +#if CONFIG_VAR_TX + switch (tx_size) { + case TX_4X4: + break; + case TX_8X8: + a[0] = !!*(const uint16_t *)&a[0]; + l[0] = !!*(const uint16_t *)&l[0]; + break; + case TX_16X16: + a[0] = !!*(const uint32_t *)&a[0]; + l[0] = !!*(const uint32_t *)&l[0]; + break; + case TX_32X32: + a[0] = !!*(const uint64_t *)&a[0]; + l[0] = !!*(const uint64_t *)&l[0]; + break; + default: + assert(0 && "Invalid transform size."); + break; + } +#endif + ctx = combine_entropy_contexts(*a, *l); *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0; } else { *a = *l = p->eobs[block] > 0; } +#if CONFIG_VAR_TX + for (i = 0; i < (1 << tx_size); ++i) { + a[i] = a[0]; + l[i] = l[0]; + } +#endif + if (p->eobs[block]) *(args->skip) = 0; if (p->eobs[block] == 0) return; + + // inverse transform parameters + inv_txfm_param.tx_type = get_tx_type(pd->plane_type, xd, block, tx_size); + inv_txfm_param.tx_size = tx_size; + inv_txfm_param.eob = p->eobs[block]; + inv_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id]; + #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - switch (tx_size) { - case TX_32X32: - vp10_highbd_inv_txfm_add_32x32(dqcoeff, dst, pd->dst.stride, - p->eobs[block], xd->bd, tx_type); - break; - case TX_16X16: - vp10_highbd_inv_txfm_add_16x16(dqcoeff, dst, pd->dst.stride, - p->eobs[block], xd->bd, tx_type); - break; - case TX_8X8: - vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, pd->dst.stride, - p->eobs[block], xd->bd, tx_type); - break; - case TX_4X4: - // this is like vp10_short_idct4x4 but has a special case around eob<=1 - // which is significant (not just an optimization) for the lossless - // case. - vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride, - p->eobs[block], xd->bd, tx_type, - xd->lossless[xd->mi[0]->mbmi.segment_id]); - break; - default: - assert(0 && "Invalid transform size"); - break; - } - + inv_txfm_param.bd = xd->bd; + highbd_inv_txfm_add(dqcoeff, dst, pd->dst.stride, &inv_txfm_param); return; } #endif // CONFIG_VP9_HIGHBITDEPTH + inv_txfm_add(dqcoeff, dst, pd->dst.stride, &inv_txfm_param); +} - switch (tx_size) { - case TX_32X32: - vp10_inv_txfm_add_32x32(dqcoeff, dst, pd->dst.stride, p->eobs[block], - tx_type); - break; - case TX_16X16: - vp10_inv_txfm_add_16x16(dqcoeff, dst, pd->dst.stride, p->eobs[block], - tx_type); - break; - case TX_8X8: - vp10_inv_txfm_add_8x8(dqcoeff, dst, pd->dst.stride, p->eobs[block], - tx_type); - break; - case TX_4X4: - // this is like vp10_short_idct4x4 but has a special case around eob<=1 - // which is significant (not just an optimization) for the lossless - // case. - vp10_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride, p->eobs[block], - tx_type, xd->lossless[xd->mi[0]->mbmi.segment_id]); - break; - default: - assert(0 && "Invalid transform size"); - break; +#if CONFIG_VAR_TX +static void encode_block_inter(int plane, int block, int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, + void *arg) { + struct encode_b_args *const args = arg; + MACROBLOCK *const x = args->x; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + int blk_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); + TX_SIZE plane_tx_size = plane ? + get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], bsize, + 0, 0) : + mbmi->inter_tx_size[blk_idx]; + + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + encode_block(plane, block, blk_row, blk_col, plane_bsize, + tx_size, arg); + } else { + int bsl = b_width_log2_lookup[bsize]; + int i; + + assert(bsl > 0); + --bsl; + + for (i = 0; i < 4; ++i) { + const int offsetr = blk_row + ((i >> 1) << bsl); + const int offsetc = blk_col + ((i & 0x01) << bsl); + int step = 1 << (2 * (tx_size - 1)); + + if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) + continue; + + encode_block_inter(plane, block + i * step, offsetr, offsetc, + plane_bsize, tx_size - 1, arg); + } } } +#endif static void encode_block_pass1(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, @@ -891,12 +610,13 @@ uint8_t *dst; dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; - vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size); + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, + tx_size, VP10_XFORM_QUANT_B); if (p->eobs[block] > 0) { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - if (xd->lossless[0]) { + if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { vp10_highbd_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd); } else { @@ -906,7 +626,7 @@ return; } #endif // CONFIG_VP9_HIGHBITDEPTH - if (xd->lossless[0]) { + if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { vp10_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); } else { vp10_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); @@ -917,7 +637,7 @@ void vp10_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { vp10_subtract_plane(x, bsize, 0); vp10_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, - encode_block_pass1, x); + encode_block_pass1, x); } void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { @@ -933,20 +653,72 @@ return; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { +#if CONFIG_VAR_TX + // TODO(jingning): Clean this up. + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; + const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; + const int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + int block = 0; + int step = 1 << (max_tx_size * 2); +#endif if (!x->skip_recode) vp10_subtract_plane(x, bsize, plane); if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { +#if CONFIG_VAR_TX + vp10_get_entropy_contexts(bsize, TX_4X4, pd, + ctx.ta[plane], ctx.tl[plane]); +#else const struct macroblockd_plane* const pd = &xd->plane[plane]; const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size; vp10_get_entropy_contexts(bsize, tx_size, pd, - ctx.ta[plane], ctx.tl[plane]); + ctx.ta[plane], ctx.tl[plane]); +#endif } +#if CONFIG_VAR_TX + for (idy = 0; idy < mi_height; idy += bh) { + for (idx = 0; idx < mi_width; idx += bh) { + encode_block_inter(plane, block, idy, idx, plane_bsize, + max_tx_size, &arg); + block += step; + } + } +#else + vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block, + &arg); +#endif + } +} + +#if CONFIG_SUPERTX +void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize) { + MACROBLOCKD *const xd = &x->e_mbd; + struct optimize_ctx ctx; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + struct encode_b_args arg = {x, &ctx, &mbmi->skip}; + int plane; + + mbmi->skip = 1; + if (x->skip) + return; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane* const pd = &xd->plane[plane]; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size; + vp10_subtract_plane(x, bsize, plane); + vp10_get_entropy_contexts(bsize, tx_size, pd, + ctx.ta[plane], ctx.tl[plane]); vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block, &arg); } } +#endif // CONFIG_SUPERTX void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, @@ -957,12 +729,9 @@ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; - tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block); - tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; - TX_TYPE tx_type = get_tx_type(plane_type, xd, block); - const scan_order *const scan_order = get_scan(tx_size, tx_type); + const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); PREDICTION_MODE mode; const int bwl = b_width_log2_lookup[plane_bsize]; const int bhl = b_height_log2_lookup[plane_bsize]; @@ -972,156 +741,57 @@ uint16_t *eob = &p->eobs[block]; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; + + const int tx1d_size = get_tx1d_size(tx_size); + + INV_TXFM_PARAM inv_txfm_param; + dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)]; src = &p->src.buf[4 * (blk_row * src_stride + blk_col)]; src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode; - vp10_predict_intra_block(xd, bwl, bhl, tx_size, mode, dst, dst_stride, - dst, dst_stride, blk_col, blk_row, plane); - + vp10_predict_intra_block(xd, bwl, bhl, tx_size, mode, dst, dst_stride, dst, + dst_stride, blk_col, blk_row, plane); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - switch (tx_size) { - case TX_32X32: - if (!x->skip_recode) { - vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, - src, src_stride, dst, dst_stride, xd->bd); - highbd_fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff, - diff_stride, tx_type); - vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, - p->round, p->quant, p->quant_shift, - qcoeff, dqcoeff, pd->dequant, eob, - scan_order->scan, scan_order->iscan); - } - if (*eob) - vp10_highbd_inv_txfm_add_32x32(dqcoeff, dst, dst_stride, *eob, xd->bd, - tx_type); - break; - case TX_16X16: - if (!x->skip_recode) { - vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, - src, src_stride, dst, dst_stride, xd->bd); - highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type); - vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - } - if (*eob) - vp10_highbd_inv_txfm_add_16x16(dqcoeff, dst, dst_stride, *eob, xd->bd, - tx_type); - break; - case TX_8X8: - if (!x->skip_recode) { - vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, - src, src_stride, dst, dst_stride, xd->bd); - highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type); - vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - } - if (*eob) - vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, dst_stride, *eob, xd->bd, - tx_type); - break; - case TX_4X4: - if (!x->skip_recode) { - vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, - src, src_stride, dst, dst_stride, xd->bd); - vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - xd->lossless[mbmi->segment_id]); - vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - } - - if (*eob) - // this is like vp10_short_idct4x4 but has a special case around - // eob<=1 which is significant (not just an optimization) for the - // lossless case. - vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, xd->bd, - tx_type, xd->lossless[mbmi->segment_id]); - break; - default: - assert(0); - return; - } - if (*eob) - *(args->skip) = 0; - return; + vpx_highbd_subtract_block(tx1d_size, tx1d_size, src_diff, diff_stride, src, + src_stride, dst, dst_stride, xd->bd); + } else { + vpx_subtract_block(tx1d_size, tx1d_size, src_diff, diff_stride, src, + src_stride, dst, dst_stride); } +#else + vpx_subtract_block(tx1d_size, tx1d_size, src_diff, diff_stride, src, + src_stride, dst, dst_stride); #endif // CONFIG_VP9_HIGHBITDEPTH - switch (tx_size) { - case TX_32X32: - if (!x->skip_recode) { - vpx_subtract_block(32, 32, src_diff, diff_stride, - src, src_stride, dst, dst_stride); - fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride, - tx_type); - vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, - scan_order->iscan); - } - if (*eob) - vp10_inv_txfm_add_32x32(dqcoeff, dst, dst_stride, *eob, tx_type); - break; - case TX_16X16: - if (!x->skip_recode) { - vpx_subtract_block(16, 16, src_diff, diff_stride, - src, src_stride, dst, dst_stride); - fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type); - vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, - scan_order->iscan); - } - if (*eob) - vp10_inv_txfm_add_16x16(dqcoeff, dst, dst_stride, *eob, tx_type); - break; - case TX_8X8: - if (!x->skip_recode) { - vpx_subtract_block(8, 8, src_diff, diff_stride, - src, src_stride, dst, dst_stride); - fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type); - vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, - p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, - scan_order->iscan); - } - if (*eob) - vp10_inv_txfm_add_8x8(dqcoeff, dst, dst_stride, *eob, tx_type); - break; - case TX_4X4: - if (!x->skip_recode) { - vpx_subtract_block(4, 4, src_diff, diff_stride, - src, src_stride, dst, dst_stride); - vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - xd->lossless[mbmi->segment_id]); - vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, - p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, - scan_order->iscan); - } + if (!x->skip_recode) + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, + VP10_XFORM_QUANT_B); + else + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, + VP10_XFORM_QUANT_SKIP_QUANT); - if (*eob) { - // this is like vp10_short_idct4x4 but has a special case around eob<=1 - // which is significant (not just an optimization) for the lossless - // case. - vp10_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, tx_type, - xd->lossless[mbmi->segment_id]); - } - break; - default: - assert(0); - break; - } - if (*eob) + if (*eob) { + // inverse transform + inv_txfm_param.tx_type = tx_type; + inv_txfm_param.tx_size = tx_size; + inv_txfm_param.eob = *eob; + inv_txfm_param.lossless = xd->lossless[mbmi->segment_id]; +#if CONFIG_VP9_HIGHBITDEPTH + inv_txfm_param.bd = xd->bd; + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + highbd_inv_txfm_add(dqcoeff, dst, dst_stride, &inv_txfm_param); + } else { + inv_txfm_add(dqcoeff, dst, dst_stride, &inv_txfm_param); + } +#else + inv_txfm_add(dqcoeff, dst, dst_stride, &inv_txfm_param); +#endif // CONFIG_VP9_HIGHBITDEPTH + *(args->skip) = 0; + } } void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
diff --git a/vp10/encoder/encodemb.h b/vp10/encoder/encodemb.h index 2e6516e..e208c88 100644 --- a/vp10/encoder/encodemb.h +++ b/vp10/encoder/encodemb.h
@@ -23,17 +23,24 @@ struct optimize_ctx *ctx; int8_t *skip; }; + +typedef enum VP10_XFORM_QUANT { + VP10_XFORM_QUANT_FP = 0, + VP10_XFORM_QUANT_B = 1, + VP10_XFORM_QUANT_DC = 2, + VP10_XFORM_QUANT_SKIP_QUANT = 3, + VP10_XFORM_QUANT_LAST = 4 +} VP10_XFORM_QUANT; + void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize); +#if CONFIG_SUPERTX +void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize); +#endif // CONFIG_SUPERTX void vp10_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize); -void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, - int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size); -void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, - int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size); void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size); + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, + VP10_XFORM_QUANT xform_quant_idx); void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); @@ -43,14 +50,6 @@ void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); -void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, int lossless); - -#if CONFIG_VP9_HIGHBITDEPTH -void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, int lossless); -#endif // CONFIG_VP9_HIGHBITDEPTH - #ifdef __cplusplus } // extern "C" #endif
diff --git a/vp10/encoder/encodemv.c b/vp10/encoder/encodemv.c index 0736c65..623e6f6 100644 --- a/vp10/encoder/encodemv.c +++ b/vp10/encoder/encodemv.c
@@ -137,19 +137,8 @@ static void update_mv(vpx_writer *w, const unsigned int ct[2], vpx_prob *cur_p, vpx_prob upd_p) { -#if CONFIG_MISC_FIXES (void) upd_p; vp10_cond_prob_diff_update(w, cur_p, ct); -#else - const vpx_prob new_p = get_binary_prob(ct[0], ct[1]) | 1; - const int update = cost_branch256(ct, *cur_p) + vp10_cost_zero(upd_p) > - cost_branch256(ct, new_p) + vp10_cost_one(upd_p) + 7 * 256; - vpx_write(w, update, upd_p); - if (update) { - *cur_p = new_p; - vpx_write_literal(w, new_p >> 1, 7); - } -#endif } static void write_mv_update(const vpx_tree_index *tree,
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index 9e3bec4..a90d0c9 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c
@@ -391,6 +391,9 @@ vp10_free_pc_tree(&cpi->td); + if (cpi->common.allow_screen_content_tools) + vpx_free(cpi->td.mb.palette_buffer); + if (cpi->source_diff_var != NULL) { vpx_free(cpi->source_diff_var); cpi->source_diff_var = NULL; @@ -416,10 +419,6 @@ memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1], MV_VALS * sizeof(*cpi->nmvcosts_hp[1])); -#if !CONFIG_MISC_FIXES - vp10_copy(cc->segment_pred_probs, cm->segp.pred_probs); -#endif - memcpy(cpi->coding_context.last_frame_seg_map_copy, cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols)); @@ -444,10 +443,6 @@ memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1], MV_VALS * sizeof(*cc->nmvcosts_hp[1])); -#if !CONFIG_MISC_FIXES - vp10_copy(cm->segp.pred_probs, cc->segment_pred_probs); -#endif - memcpy(cm->last_frame_seg_map, cpi->coding_context.last_frame_seg_map_copy, (cm->mi_rows * cm->mi_cols)); @@ -722,9 +717,17 @@ } static void init_buffer_indices(VP10_COMP *cpi) { +#if CONFIG_EXT_REFS + int fb_idx; + for (fb_idx = 0; fb_idx < LAST_REF_FRAMES; ++fb_idx) + cpi->lst_fb_idxes[fb_idx] = fb_idx; + cpi->gld_fb_idx = LAST_REF_FRAMES; + cpi->alt_fb_idx = cpi->gld_fb_idx + 1; +#else cpi->lst_fb_idx = 0; cpi->gld_fb_idx = 1; cpi->alt_fb_idx = 2; +#endif // CONFIG_EXT_REFS } static void init_config(struct VP10_COMP *cpi, VP10EncoderConfig *oxcf) { @@ -749,6 +752,10 @@ cpi->td.counts = &cm->counts; // change includes all joint functionality +#if CONFIG_EXT_REFS + cpi->last_ref_to_refresh = LAST_FRAME; +#endif // CONFIG_EXT_REFS + vp10_change_config(cpi, oxcf); cpi->static_mb_pct = 0; @@ -1406,6 +1413,9 @@ void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) { VP10_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; +#if CONFIG_EXT_REFS + int ref_frame; +#endif // CONFIG_EXT_REFS if (cm->profile != oxcf->profile) cm->profile = oxcf->profile; @@ -1430,13 +1440,33 @@ } cpi->refresh_golden_frame = 0; + +#if CONFIG_EXT_REFS + for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame) { + if (ref_frame == cpi->last_ref_to_refresh) + cpi->refresh_last_frames[ref_frame - LAST_FRAME] = 1; + else + cpi->refresh_last_frames[ref_frame - LAST_FRAME] = 0; + } +#else cpi->refresh_last_frame = 1; +#endif // CONFIG_EXT_REFS + cm->refresh_frame_context = oxcf->error_resilient_mode ? REFRESH_FRAME_CONTEXT_OFF : oxcf->frame_parallel_decoding_mode ? REFRESH_FRAME_CONTEXT_FORWARD : REFRESH_FRAME_CONTEXT_BACKWARD; cm->reset_frame_context = RESET_FRAME_CONTEXT_NONE; + cm->allow_screen_content_tools = (cpi->oxcf.content == VP9E_CONTENT_SCREEN); + if (cm->allow_screen_content_tools) { + MACROBLOCK *x = &cpi->td.mb; + if (x->palette_buffer == 0) { + CHECK_MEM_ERROR(cm, x->palette_buffer, + vpx_memalign(16, sizeof(*x->palette_buffer))); + } + } + vp10_reset_segment_features(cm); vp10_set_high_precision_mv(cpi, 0); @@ -1942,6 +1972,8 @@ // Deallocate allocated thread data. if (t < cpi->num_workers - 1) { + if (cpi->common.allow_screen_content_tools) + vpx_free(thread_data->td->mb.palette_buffer); vpx_free(thread_data->td->counts); vp10_free_pc_tree(thread_data->td); vpx_free(thread_data->td); @@ -2277,7 +2309,7 @@ } int vp10_use_as_reference(VP10_COMP *cpi, int ref_frame_flags) { - if (ref_frame_flags > 7) + if (ref_frame_flags > ((1 << REFS_PER_FRAME) - 1)) return -1; cpi->ref_frame_flags = ref_frame_flags; @@ -2287,7 +2319,14 @@ void vp10_update_reference(VP10_COMP *cpi, int ref_frame_flags) { cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0; cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0; +#if CONFIG_EXT_REFS + cpi->ext_refresh_last_frames[0] = (ref_frame_flags & VP9_LAST_FLAG) != 0; + cpi->ext_refresh_last_frames[1] = (ref_frame_flags & VP9_LAST2_FLAG) != 0; + cpi->ext_refresh_last_frames[2] = (ref_frame_flags & VP9_LAST3_FLAG) != 0; + cpi->ext_refresh_last_frames[3] = (ref_frame_flags & VP9_LAST4_FLAG) != 0; +#else cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0; +#endif // CONFIG_EXT_REFS cpi->ext_refresh_frame_flags_pending = 1; } @@ -2296,6 +2335,14 @@ MV_REFERENCE_FRAME ref_frame = NONE; if (ref_frame_flag == VP9_LAST_FLAG) ref_frame = LAST_FRAME; +#if CONFIG_EXT_REFS + else if (ref_frame_flag == VP9_LAST2_FLAG) + ref_frame = LAST2_FRAME; + else if (ref_frame_flag == VP9_LAST3_FLAG) + ref_frame = LAST3_FRAME; + else if (ref_frame_flag == VP9_LAST4_FLAG) + ref_frame = LAST4_FRAME; +#endif // CONFIG_EXT_REFS else if (ref_frame_flag == VP9_GOLD_FLAG) ref_frame = GOLDEN_FRAME; else if (ref_frame_flag == VP9_ALT_FLAG) @@ -2577,6 +2624,9 @@ void vp10_update_reference_frames(VP10_COMP *cpi) { VP10_COMMON * const cm = &cpi->common; BufferPool *const pool = cm->buffer_pool; +#if CONFIG_EXT_REFS + int ref_frame; +#endif // CONFIG_EXT_REFS // At this point the new frame has been encoded. // If any buffer copy / swapping is signaled it should be done here. @@ -2631,22 +2681,49 @@ } } +#if CONFIG_EXT_REFS + for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame) { + const int ref_idx = ref_frame - LAST_FRAME; + if (cpi->refresh_last_frames[ref_idx]) { + ref_cnt_fb(pool->frame_bufs, + &cm->ref_frame_map[cpi->lst_fb_idxes[ref_idx]], + cm->new_fb_idx); + if (!cpi->rc.is_src_frame_alt_ref) { + memcpy(cpi->interp_filter_selected[ref_frame], + cpi->interp_filter_selected[0], + sizeof(cpi->interp_filter_selected[0])); + } + } + } + // NOTE: The order for the refreshing of the 4 last reference frames are: + // LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME -> LAST4_FRAME -> LAST_FRAME + cpi->last_ref_to_refresh += 1; + if (cpi->last_ref_to_refresh == LAST4_FRAME) + cpi->last_ref_to_refresh = LAST_FRAME; +#else if (cpi->refresh_last_frame) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); - if (!cpi->rc.is_src_frame_alt_ref) + if (!cpi->rc.is_src_frame_alt_ref) { memcpy(cpi->interp_filter_selected[LAST_FRAME], cpi->interp_filter_selected[0], sizeof(cpi->interp_filter_selected[0])); + } } +#endif // CONFIG_EXT_REFS + #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { vp10_denoiser_update_frame_info(&cpi->denoiser, *cpi->Source, cpi->common.frame_type, +#if CONFIG_EXT_REFS + cpi->refresh_last_frames, +#else + cpi->refresh_last_frame, +#endif // CONFIG_EXT_REFS cpi->refresh_alt_ref_frame, - cpi->refresh_golden_frame, - cpi->refresh_last_frame); + cpi->refresh_golden_frame); } #endif } @@ -2670,6 +2747,9 @@ } if (lf->filter_level > 0) { +#if CONFIG_VAR_TX + vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); +#else if (cpi->num_workers > 1) vp10_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane, lf->filter_level, 0, 0, @@ -2677,6 +2757,7 @@ &cpi->lf_row_sync); else vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); +#endif } vpx_extend_frame_inner_borders(cm->frame_to_show); @@ -2700,7 +2781,16 @@ void vp10_scale_references(VP10_COMP *cpi) { VP10_COMMON *cm = &cpi->common; MV_REFERENCE_FRAME ref_frame; - const VP9_REFFRAME ref_mask[3] = {VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG}; + const VP9_REFFRAME ref_mask[REFS_PER_FRAME] = { + VP9_LAST_FLAG, +#if CONFIG_EXT_REFS + VP9_LAST2_FLAG, + VP9_LAST3_FLAG, + VP9_LAST4_FLAG, +#endif // CONFIG_EXT_REFS + VP9_GOLD_FLAG, + VP9_ALT_FLAG + }; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1). @@ -2785,10 +2875,18 @@ if (cpi->oxcf.pass == 0) { // Only release scaled references under certain conditions: // if reference will be updated, or if scaled reference has same resolution. - int refresh[3]; + int refresh[REFS_PER_FRAME]; +#if CONFIG_EXT_REFS + for (i = LAST_FRAME; i <= LAST4_FRAME; ++i) + refresh[i - LAST_FRAME] = + (cpi->refresh_last_frames[i - LAST_FRAME]) ? 1 : 0; + refresh[4] = (cpi->refresh_golden_frame) ? 1 : 0; + refresh[5] = (cpi->refresh_alt_ref_frame) ? 1 : 0; +#else refresh[0] = (cpi->refresh_last_frame) ? 1 : 0; refresh[1] = (cpi->refresh_golden_frame) ? 1 : 0; refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0; +#endif // CONFIG_EXT_REFS for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { const int idx = cpi->scaled_ref_idx[i - 1]; RefCntBuffer *const buf = idx != INVALID_IDX ? @@ -3060,7 +3158,7 @@ init_motion_estimation(cpi); for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1]; + RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - LAST_FRAME]; const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); ref_buf->idx = buf_idx; @@ -3242,14 +3340,12 @@ // update_base_skip_probs(cpi); vpx_clear_system_state(); - // Dummy pack of the bitstream using up to date stats to get an // accurate estimate of output frame size to determine if we need // to recode. if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { save_coding_context(cpi); vp10_pack_bitstream(cpi, dest, size); - rc->projected_frame_size = (int)(*size) << 3; restore_coding_context(cpi); @@ -3422,12 +3518,45 @@ static int get_ref_frame_flags(const VP10_COMP *cpi) { const int *const map = cpi->common.ref_frame_map; - const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx]; - const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx]; - const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx]; - int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; - if (gold_is_last) +#if CONFIG_EXT_REFS + const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[0]]; + const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[0]]; + + const int last2_is_last = + map[cpi->lst_fb_idxes[1]] == map[cpi->lst_fb_idxes[0]]; + const int gld_is_last2 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[1]]; + const int alt_is_last2 = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[1]]; + + const int last3_is_last = + map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[0]]; + const int last3_is_last2 = + map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[1]]; + const int gld_is_last3 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[2]]; + const int alt_is_last3 = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[2]]; + + const int last4_is_last = + map[cpi->lst_fb_idxes[3]] == map[cpi->lst_fb_idxes[0]]; + const int last4_is_last2 = + map[cpi->lst_fb_idxes[3]] == map[cpi->lst_fb_idxes[1]]; + const int last4_is_last3 = + map[cpi->lst_fb_idxes[3]] == map[cpi->lst_fb_idxes[2]]; + const int gld_is_last4 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[3]]; + const int alt_is_last4 = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[3]]; +#else + const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx]; + const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx]; +#endif // CONFIG_EXT_REFS + const int gld_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx]; + + int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; +#if CONFIG_EXT_REFS + flags |= VP9_LAST2_FLAG; + flags |= VP9_LAST3_FLAG; + flags |= VP9_LAST4_FLAG; +#endif // CONFIG_EXT_REFS + + if (gld_is_last) flags &= ~VP9_GOLD_FLAG; if (cpi->rc.frames_till_gf_update_due == INT_MAX) @@ -3436,9 +3565,26 @@ if (alt_is_last) flags &= ~VP9_ALT_FLAG; - if (gold_is_alt) + if (gld_is_alt) flags &= ~VP9_ALT_FLAG; +#if CONFIG_EXT_REFS + if (last4_is_last || last4_is_last2 || last4_is_last3) + flags &= ~VP9_LAST4_FLAG; + + if (last3_is_last || last3_is_last2) + flags &= ~VP9_LAST3_FLAG; + + if (last2_is_last) + flags &= ~VP9_LAST2_FLAG; + + if (gld_is_last4 || gld_is_last3 || gld_is_last2) + flags &= ~VP9_GOLD_FLAG; + + if (alt_is_last4 || alt_is_last3 || alt_is_last2) + flags &= ~VP9_ALT_FLAG; +#endif // CONFIG_EXT_REFS + return flags; } @@ -3452,7 +3598,15 @@ cpi->ext_refresh_frame_context_pending = 0; } if (cpi->ext_refresh_frame_flags_pending) { +#if CONFIG_EXT_REFS + int ref_frame; + for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame) { + cpi->refresh_last_frames[ref_frame - LAST_FRAME] = + cpi->ext_refresh_last_frames[ref_frame - LAST_FRAME]; + } +#else cpi->refresh_last_frame = cpi->ext_refresh_last_frame; +#endif // CONFIG_EXT_REFS cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame; cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame; cpi->ext_refresh_frame_flags_pending = 0; @@ -3521,6 +3675,17 @@ for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) { if ((ref_total[LAST_FRAME] && cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) && +#if CONFIG_EXT_REFS + (ref_total[LAST2_FRAME] == 0 || + cpi->interp_filter_selected[LAST2_FRAME][ifilter] * 50 + < ref_total[LAST2_FRAME]) && + (ref_total[LAST3_FRAME] == 0 || + cpi->interp_filter_selected[LAST3_FRAME][ifilter] * 50 + < ref_total[LAST3_FRAME]) && + (ref_total[LAST4_FRAME] == 0 || + cpi->interp_filter_selected[LAST4_FRAME][ifilter] * 50 + < ref_total[LAST4_FRAME]) && +#endif // CONFIG_EXT_REFS (ref_total[GOLDEN_FRAME] == 0 || cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50 < ref_total[GOLDEN_FRAME]) && @@ -3637,8 +3802,16 @@ } // If the encoder forced a KEY_FRAME decision - if (cm->frame_type == KEY_FRAME) + if (cm->frame_type == KEY_FRAME) { +#if CONFIG_EXT_REFS + int ref_frame; + for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame) + cpi->refresh_last_frames[ref_frame - LAST_FRAME] = 1; + cpi->last_ref_to_refresh = LAST_FRAME; +#else cpi->refresh_last_frame = 1; +#endif // CONFIG_EXT_REFS + } cm->frame_to_show = get_frame_new_buffer(cm); cm->frame_to_show->color_space = cm->color_space; @@ -3666,12 +3839,7 @@ if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) { vp10_adapt_coef_probs(cm); -#if CONFIG_MISC_FIXES vp10_adapt_intra_frame_probs(cm); -#else - if (!frame_is_intra_only(cm)) - vp10_adapt_intra_frame_probs(cm); -#endif } if (!frame_is_intra_only(cm)) { @@ -3693,6 +3861,10 @@ cpi->ref_frame_flags = get_ref_frame_flags(cpi); +#if CONFIG_EXT_REFS + cm->last3_frame_type = cm->last2_frame_type; + cm->last2_frame_type = cm->last_frame_type; +#endif // CONFIG_EXT_REFS cm->last_frame_type = cm->frame_type; vp10_rc_postencode_update(cpi, *size); @@ -3855,7 +4027,14 @@ const VP10_COMMON *cm = &cpi->common; return cm->frame_type == KEY_FRAME || +#if CONFIG_EXT_REFS + cpi->refresh_last_frames[LAST_FRAME - LAST_FRAME] || + cpi->refresh_last_frames[LAST2_FRAME - LAST_FRAME] || + cpi->refresh_last_frames[LAST3_FRAME - LAST_FRAME] || + cpi->refresh_last_frames[LAST4_FRAME - LAST_FRAME] || +#else cpi->refresh_last_frame || +#endif // CONFIG_EXT_REFS cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame || cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_OFF || @@ -3935,12 +4114,21 @@ } if (rc->is_src_frame_alt_ref) { +#if CONFIG_EXT_REFS + int ref_frame; +#endif // CONFIG_EXT_REFS + // Current frame is an ARF overlay frame. cpi->alt_ref_source = NULL; // Don't refresh the last buffer for an ARF overlay frame. It will // become the GF so preserve last as an alternative prediction option. +#if CONFIG_EXT_REFS + for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame) + cpi->refresh_last_frames[ref_frame - LAST_FRAME] = 0; +#else cpi->refresh_last_frame = 0; +#endif // CONFIG_EXT_REFS } } @@ -3991,7 +4179,16 @@ oxcf->frame_parallel_decoding_mode ? REFRESH_FRAME_CONTEXT_FORWARD : REFRESH_FRAME_CONTEXT_BACKWARD; +#if CONFIG_EXT_REFS + for (i = LAST_FRAME; i <= LAST4_FRAME; ++i) { + if (i == cpi->last_ref_to_refresh) + cpi->refresh_last_frames[i - LAST_FRAME] = 1; + else + cpi->refresh_last_frames[i - LAST_FRAME] = 0; + } +#else cpi->refresh_last_frame = 1; +#endif // CONFIG_EXT_REFS cpi->refresh_golden_frame = 0; cpi->refresh_alt_ref_frame = 0; @@ -4015,12 +4212,15 @@ cm->intra_only = 0; cpi->refresh_alt_ref_frame = 1; cpi->refresh_golden_frame = 0; +#if CONFIG_EXT_REFS + for (i = LAST_FRAME; i <= LAST4_FRAME; ++i) + cpi->refresh_last_frames[i - LAST_FRAME] = 0; +#else cpi->refresh_last_frame = 0; +#endif // CONFIG_EXT_REFS rc->is_src_frame_alt_ref = 0; - rc->source_alt_ref_pending = 0; - } else { - rc->source_alt_ref_pending = 0; } + rc->source_alt_ref_pending = 0; } if (!source) {
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index bd6a009..707255d 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h
@@ -55,10 +55,6 @@ int nmvcosts[2][MV_VALS]; int nmvcosts_hp[2][MV_VALS]; -#if !CONFIG_MISC_FIXES - vpx_prob segment_pred_probs[PREDICTION_PROBS]; -#endif - unsigned char *last_frame_seg_map_copy; // 0 = Intra, Last, GF, ARF @@ -308,17 +304,33 @@ // For a still frame, this flag is set to 1 to skip partition search. int partition_search_skippable_frame; +#if CONFIG_EXT_REFS + int last_ref_to_refresh; +#endif // CONFIG_EXT_REFS + int scaled_ref_idx[MAX_REF_FRAMES]; +#if CONFIG_EXT_REFS + int lst_fb_idxes[LAST_REF_FRAMES]; +#else int lst_fb_idx; +#endif // CONFIG_EXT_REFS int gld_fb_idx; int alt_fb_idx; +#if CONFIG_EXT_REFS + int refresh_last_frames[LAST_REF_FRAMES]; +#else int refresh_last_frame; +#endif // CONFIG_EXT_REFS int refresh_golden_frame; int refresh_alt_ref_frame; int ext_refresh_frame_flags_pending; +#if CONFIG_EXT_REFS + int ext_refresh_last_frames[LAST_REF_FRAMES]; +#else int ext_refresh_last_frame; +#endif // CONFIG_EXT_REFS int ext_refresh_golden_frame; int ext_refresh_alt_ref_frame; @@ -456,19 +468,36 @@ search_site_config ss_cfg; - int mbmode_cost[INTRA_MODES]; + int mbmode_cost[BLOCK_SIZE_GROUPS][INTRA_MODES]; +#if CONFIG_REF_MV + int newmv_mode_cost[NEWMV_MODE_CONTEXTS][2]; + int zeromv_mode_cost[ZEROMV_MODE_CONTEXTS][2]; + int refmv_mode_cost[REFMV_MODE_CONTEXTS][2]; +#endif + unsigned int inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES]; int intra_uv_mode_cost[INTRA_MODES][INTRA_MODES]; int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES]; + int palette_y_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES]; + int palette_uv_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES]; + int palette_y_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS] + [PALETTE_COLORS]; + int palette_uv_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS] + [PALETTE_COLORS]; int multi_arf_allowed; int multi_arf_enabled; int multi_arf_last_grp_enabled; - +#if CONFIG_EXT_TX + int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES]; + int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] + [TX_TYPES]; +#else int intra_tx_type_costs[EXT_TX_SIZES][TX_TYPES][TX_TYPES]; int inter_tx_type_costs[EXT_TX_SIZES][TX_TYPES]; +#endif // CONFIG_EXT_TX #if CONFIG_VP9_TEMPORAL_DENOISING VP9_DENOISER denoiser; #endif @@ -549,13 +578,17 @@ static INLINE int get_ref_frame_map_idx(const VP10_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { - if (ref_frame == LAST_FRAME) { +#if CONFIG_EXT_REFS + if (ref_frame >= LAST_FRAME && ref_frame <= LAST4_FRAME) + return cpi->lst_fb_idxes[ref_frame - 1]; +#else + if (ref_frame == LAST_FRAME) return cpi->lst_fb_idx; - } else if (ref_frame == GOLDEN_FRAME) { +#endif // CONFIG_EXT_REFS + else if (ref_frame == GOLDEN_FRAME) return cpi->gld_fb_idx; - } else { + else return cpi->alt_fb_idx; - } } static INLINE int get_ref_frame_buf_idx(const VP10_COMP *const cpi,
diff --git a/vp10/encoder/ethread.c b/vp10/encoder/ethread.c index ad47ccf..6cb9494 100644 --- a/vp10/encoder/ethread.c +++ b/vp10/encoder/ethread.c
@@ -133,6 +133,13 @@ memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } + + // Allocate buffers used by palette coding mode. + if (cpi->common.allow_screen_content_tools && i < num_workers - 1) { + MACROBLOCK *x = &thread_data->td->mb; + CHECK_MEM_ERROR(cm, x->palette_buffer, + vpx_memalign(16, sizeof(*x->palette_buffer))); + } } // Encode a frame
diff --git a/vp10/encoder/firstpass.c b/vp10/encoder/firstpass.c index c41fa3e..c865408 100644 --- a/vp10/encoder/firstpass.c +++ b/vp10/encoder/firstpass.c
@@ -1044,8 +1044,13 @@ ((twopass->this_frame_stats.intra_error / DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) { if (gld_yv12 != NULL) { +#if CONFIG_EXT_REFS + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], + cm->ref_frame_map[cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]]); +#else ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->ref_frame_map[cpi->lst_fb_idx]); +#endif // CONFIG_EXT_REFS } twopass->sr_update_lag = 1; } else { @@ -1055,14 +1060,25 @@ vpx_extend_frame_borders(new_yv12); // The frame we just compressed now becomes the last frame. +#if CONFIG_EXT_REFS + ref_cnt_fb(pool->frame_bufs, + &cm->ref_frame_map[cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]], + cm->new_fb_idx); +#else ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); +#endif // CONFIG_EXT_REFS // Special case for the first frame. Copy into the GF buffer as a second // reference. if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX) { +#if CONFIG_EXT_REFS + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], + cm->ref_frame_map[cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]]); +#else ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->ref_frame_map[cpi->lst_fb_idx]); +#endif // CONFIG_EXT_REFS } // Use this to see what the first pass reconstruction looks like. @@ -2382,28 +2398,48 @@ cpi->rc.is_src_frame_alt_ref = 0; switch (twopass->gf_group.update_type[twopass->gf_group.index]) { case KF_UPDATE: +#if CONFIG_EXT_REFS + cpi->refresh_last_frames[LAST_FRAME - LAST_FRAME] = 1; +#else cpi->refresh_last_frame = 1; +#endif // CONFIG_EXT_REFS cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 1; break; case LF_UPDATE: +#if CONFIG_EXT_REFS + cpi->refresh_last_frames[LAST_FRAME - LAST_FRAME] = 1; +#else cpi->refresh_last_frame = 1; +#endif // CONFIG_EXT_REFS cpi->refresh_golden_frame = 0; cpi->refresh_alt_ref_frame = 0; break; case GF_UPDATE: +#if CONFIG_EXT_REFS + cpi->refresh_last_frames[LAST_FRAME - LAST_FRAME] = 1; +#else cpi->refresh_last_frame = 1; +#endif // CONFIG_EXT_REFS cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 0; break; case OVERLAY_UPDATE: +#if CONFIG_EXT_REFS + cpi->refresh_last_frames[LAST_FRAME - LAST_FRAME] = 0; +#else cpi->refresh_last_frame = 0; +#endif // CONFIG_EXT_REFS cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 0; cpi->rc.is_src_frame_alt_ref = 1; break; case ARF_UPDATE: +#if CONFIG_EXT_REFS + cpi->refresh_last_frames[LAST_FRAME - LAST_FRAME] = 0; +#else cpi->refresh_last_frame = 0; +#endif // CONFIG_EXT_REFS cpi->refresh_golden_frame = 0; cpi->refresh_alt_ref_frame = 1; break;
diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c new file mode 100644 index 0000000..8b48276 --- /dev/null +++ b/vp10/encoder/hybrid_fwd_txfm.c
@@ -0,0 +1,406 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vp10_rtcd.h" +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" + +#include "vp10/common/idct.h" +#include "vp10/encoder/hybrid_fwd_txfm.h" + +static INLINE void fdct32x32(int rd_transform, const int16_t *src, + tran_low_t *dst, int src_stride) { + if (rd_transform) + vpx_fdct32x32_rd(src, dst, src_stride); + else + vpx_fdct32x32(src, dst, src_stride); +} + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src, + tran_low_t *dst, int src_stride) { + if (rd_transform) + vpx_highbd_fdct32x32_rd(src, dst, src_stride); + else + vpx_highbd_fdct32x32(src, dst, src_stride); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +#if CONFIG_EXT_TX +// Forward identity transform. +static void fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, + int bs) { + int r, c; + const int shift = bs < 32 ? 3 : 2; + + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift; + src_diff += stride; + coeff += bs; + } +} +#endif // CONFIG_EXT_TX + +void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, int lossless) { + if (lossless) { + assert(tx_type == DCT_DCT); + vp10_fwht4x4(src_diff, coeff, diff_stride); + return; + } + + switch (tx_type) { + case DCT_DCT: + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + vp10_fht4x4(src_diff, coeff, diff_stride, tx_type); + break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + case DCT_FLIPADST: + case FLIPADST_FLIPADST: + case ADST_FLIPADST: + case FLIPADST_ADST: + vp10_fht4x4(src_diff, coeff, diff_stride, tx_type); + break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + case DST_FLIPADST: + case FLIPADST_DST: + // Use C version since DST exists only in C + vp10_fht4x4_c(src_diff, coeff, diff_stride, tx_type); + break; + case IDTX: + fwd_idtx_c(src_diff, coeff, diff_stride, 4); + break; +#endif // CONFIG_EXT_TX + default: + assert(0); + break; + } +} + +static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, + FWD_TXFM_OPT fwd_txfm_opt) { + switch (tx_type) { + case DCT_DCT: + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL) + vp10_fht8x8(src_diff, coeff, diff_stride, tx_type); + else // FWD_TXFM_OPT_DC + vpx_fdct8x8_1(src_diff, coeff, diff_stride); + break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + case DCT_FLIPADST: + case FLIPADST_FLIPADST: + case ADST_FLIPADST: + case FLIPADST_ADST: + vp10_fht8x8(src_diff, coeff, diff_stride, tx_type); + break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + case DST_FLIPADST: + case FLIPADST_DST: + // Use C version since DST exists only in C + vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type); + break; + case IDTX: + fwd_idtx_c(src_diff, coeff, diff_stride, 8); + break; +#endif // CONFIG_EXT_TX + default: + assert(0); + break; + } +} + +static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, + FWD_TXFM_OPT fwd_txfm_opt) { + switch (tx_type) { + case DCT_DCT: + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL) + vp10_fht16x16(src_diff, coeff, diff_stride, tx_type); + else // FWD_TXFM_OPT_DC + vpx_fdct16x16_1(src_diff, coeff, diff_stride); + break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + case DCT_FLIPADST: + case FLIPADST_FLIPADST: + case ADST_FLIPADST: + case FLIPADST_ADST: + vp10_fht16x16(src_diff, coeff, diff_stride, tx_type); + break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + case DST_FLIPADST: + case FLIPADST_DST: + // Use C version since DST exists only in C + vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type); + break; + case IDTX: + fwd_idtx_c(src_diff, coeff, diff_stride, 16); + break; +#endif // CONFIG_EXT_TX + default: + assert(0); + break; + } +} + +static void fwd_txfm_32x32(int rd_transform, const int16_t *src_diff, + tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, + FWD_TXFM_OPT fwd_txfm_opt) { + switch (tx_type) { + case DCT_DCT: + if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL) + fdct32x32(rd_transform, src_diff, coeff, diff_stride); + else // FWD_TXFM_OPT_DC + vpx_fdct32x32_1(src_diff, coeff, diff_stride); + break; +#if CONFIG_EXT_TX + case IDTX: + fwd_idtx_c(src_diff, coeff, diff_stride, 32); + break; +#endif // CONFIG_EXT_TX + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + assert(0); + break; + default: + assert(0); + break; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, int lossless) { + if (lossless) { + assert(tx_type == DCT_DCT); + vp10_highbd_fwht4x4(src_diff, coeff, diff_stride); + return; + } + + switch (tx_type) { + case DCT_DCT: + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type); + break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + case DCT_FLIPADST: + case FLIPADST_FLIPADST: + case ADST_FLIPADST: + case FLIPADST_ADST: + vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type); + break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + case DST_FLIPADST: + case FLIPADST_DST: + // Use C version since DST exists only in C + vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type); + break; + case IDTX: + fwd_idtx_c(src_diff, coeff, diff_stride, 4); + break; +#endif // CONFIG_EXT_TX + default: + assert(0); + break; + } +} + +static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, + FWD_TXFM_OPT fwd_txfm_opt) { + (void)fwd_txfm_opt; + switch (tx_type) { + case DCT_DCT: + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); + break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + case DCT_FLIPADST: + case FLIPADST_FLIPADST: + case ADST_FLIPADST: + case FLIPADST_ADST: + vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); + break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + case DST_FLIPADST: + case FLIPADST_DST: + // Use C version since DST exists only in C + vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type); + break; + case IDTX: + fwd_idtx_c(src_diff, coeff, diff_stride, 8); + break; +#endif // CONFIG_EXT_TX + default: + assert(0); + break; + } +} + +static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, + FWD_TXFM_OPT fwd_txfm_opt) { + (void)fwd_txfm_opt; + switch (tx_type) { + case DCT_DCT: + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); + break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + case DCT_FLIPADST: + case FLIPADST_FLIPADST: + case ADST_FLIPADST: + case FLIPADST_ADST: + vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); + break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + case DST_FLIPADST: + case FLIPADST_DST: + // Use C version since DST exists only in C + vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type); + break; + case IDTX: + fwd_idtx_c(src_diff, coeff, diff_stride, 16); + break; +#endif // CONFIG_EXT_TX + default: + assert(0); + break; + } +} + +static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff, + tran_low_t *coeff, int diff_stride, + TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt) { + switch (tx_type) { + case DCT_DCT: + if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL) + highbd_fdct32x32(rd_transform, src_diff, coeff, diff_stride); + else // FWD_TXFM_OPT_DC + vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride); + break; +#if CONFIG_EXT_TX + case IDTX: + fwd_idtx_c(src_diff, coeff, diff_stride, 32); + break; +#endif // CONFIG_EXT_TX + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + assert(0); + break; + default: + assert(0); + break; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, + FWD_TXFM_PARAM *fwd_txfm_param) { + const int fwd_txfm_opt = fwd_txfm_param->fwd_txfm_opt; + const TX_TYPE tx_type = fwd_txfm_param->tx_type; + const TX_SIZE tx_size = fwd_txfm_param->tx_size; + const int rd_transform = fwd_txfm_param->rd_transform; + const int lossless = fwd_txfm_param->lossless; + switch (tx_size) { + case TX_32X32: + fwd_txfm_32x32(rd_transform, src_diff, coeff, diff_stride, tx_type, + fwd_txfm_opt); + break; + case TX_16X16: + fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + break; + case TX_8X8: + fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + break; + case TX_4X4: + vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless); + break; + default: + assert(0); + break; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +void highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param) { + const int fwd_txfm_opt = fwd_txfm_param->fwd_txfm_opt; + const TX_TYPE tx_type = fwd_txfm_param->tx_type; + const TX_SIZE tx_size = fwd_txfm_param->tx_size; + const int rd_transform = fwd_txfm_param->rd_transform; + const int lossless = fwd_txfm_param->lossless; + switch (tx_size) { + case TX_32X32: + highbd_fwd_txfm_32x32(rd_transform, src_diff, coeff, diff_stride, tx_type, + fwd_txfm_opt); + break; + case TX_16X16: + highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type, + fwd_txfm_opt); + break; + case TX_8X8: + highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + break; + case TX_4X4: + vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless); + break; + default: + assert(0); + break; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp10/encoder/hybrid_fwd_txfm.h b/vp10/encoder/hybrid_fwd_txfm.h new file mode 100644 index 0000000..62b8d5a --- /dev/null +++ b/vp10/encoder/hybrid_fwd_txfm.h
@@ -0,0 +1,62 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_ENCODER_HYBRID_FWD_TXFM_H_ +#define VP10_ENCODER_HYBRID_FWD_TXFM_H_ + +#include "./vpx_config.h" + +typedef enum FWD_TXFM_OPT { FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_DC } FWD_TXFM_OPT; + +typedef struct FWD_TXFM_PARAM { + TX_TYPE tx_type; + TX_SIZE tx_size; + FWD_TXFM_OPT fwd_txfm_opt; + int rd_transform; + int lossless; +} FWD_TXFM_PARAM; + +#ifdef __cplusplus +extern "C" { +#endif + +void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, + FWD_TXFM_PARAM *fwd_txfm_param); +void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, int lossless); + +#if CONFIG_VP9_HIGHBITDEPTH +void highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param); +void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, int lossless); +#endif // CONFIG_VP9_HIGHBITDEPTH + +static INLINE int get_tx1d_size(TX_SIZE tx_size) { + switch (tx_size) { + case TX_32X32: + return 32; + case TX_16X16: + return 16; + case TX_8X8: + return 8; + case TX_4X4: + return 4; + default: + assert(0); + return -1; + } +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP10_ENCODER_HYBRID_FWD_TXFM_H_
diff --git a/vp10/encoder/palette.c b/vp10/encoder/palette.c new file mode 100644 index 0000000..522e185 --- /dev/null +++ b/vp10/encoder/palette.c
@@ -0,0 +1,194 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> +#include "vp10/encoder/palette.h" + +static double calc_dist(const double *p1, const double *p2, int dim) { + double dist = 0; + int i = 0; + + for (i = 0; i < dim; ++i) { + dist = dist + (p1[i] - round(p2[i])) * (p1[i] - round(p2[i])); + } + return dist; +} + +void vp10_calc_indices(const double *data, const double *centroids, + uint8_t *indices, int n, int k, int dim) { + int i, j; + double min_dist, this_dist; + + for (i = 0; i < n; ++i) { + min_dist = calc_dist(data + i * dim, centroids, dim); + indices[i] = 0; + for (j = 1; j < k; ++j) { + this_dist = calc_dist(data + i * dim, centroids + j * dim, dim); + if (this_dist < min_dist) { + min_dist = this_dist; + indices[i] = j; + } + } + } +} + +// Generate a random number in the range [0, 32768). +static unsigned int lcg_rand16(unsigned int *state) { + *state = *state * 1103515245 + 12345; + return *state / 65536 % 32768; +} + +static void calc_centroids(const double *data, double *centroids, + const uint8_t *indices, int n, int k, int dim) { + int i, j, index; + int count[PALETTE_MAX_SIZE]; + unsigned int rand_state = (unsigned int)data[0]; + + assert(n <= 32768); + + memset(count, 0, sizeof(count[0]) * k); + memset(centroids, 0, sizeof(centroids[0]) * k * dim); + + for (i = 0; i < n; ++i) { + index = indices[i]; + assert(index < k); + ++count[index]; + for (j = 0; j < dim; ++j) { + centroids[index * dim + j] += data[i * dim + j]; + } + } + + for (i = 0; i < k; ++i) { + if (count[i] == 0) { + memcpy(centroids + i * dim, data + (lcg_rand16(&rand_state) % n) * dim, + sizeof(centroids[0]) * dim); + } else { + const double norm = 1.0 / count[i]; + for (j = 0; j < dim; ++j) + centroids[i * dim + j] *= norm; + } + } +} + +static double calc_total_dist(const double *data, const double *centroids, + const uint8_t *indices, int n, int k, int dim) { + double dist = 0; + int i; + (void) k; + + for (i = 0; i < n; ++i) + dist += calc_dist(data + i * dim, centroids + indices[i] * dim, dim); + + return dist; +} + +int vp10_k_means(const double *data, double *centroids, uint8_t *indices, + uint8_t *pre_indices, int n, int k, int dim, int max_itr) { + int i = 0; + double pre_dist, this_dist; + double pre_centroids[PALETTE_MAX_SIZE]; + + vp10_calc_indices(data, centroids, indices, n, k, dim); + pre_dist = calc_total_dist(data, centroids, indices, n, k, dim); + memcpy(pre_centroids, centroids, sizeof(pre_centroids[0]) * k * dim); + memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n); + while (i < max_itr) { + calc_centroids(data, centroids, indices, n, k, dim); + vp10_calc_indices(data, centroids, indices, n, k, dim); + this_dist = calc_total_dist(data, centroids, indices, n, k, dim); + + if (this_dist > pre_dist) { + memcpy(centroids, pre_centroids, sizeof(pre_centroids[0]) * k * dim); + memcpy(indices, pre_indices, sizeof(pre_indices[0]) * n); + break; + } + if (!memcmp(centroids, pre_centroids, sizeof(pre_centroids[0]) * k * dim)) + break; + + memcpy(pre_centroids, centroids, sizeof(pre_centroids[0]) * k * dim); + memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n); + pre_dist = this_dist; + ++i; + } + + return i; +} + +void vp10_insertion_sort(double *data, int n) { + int i, j, k; + double val; + + if (n <= 1) + return; + + for (i = 1; i < n; ++i) { + val = data[i]; + j = 0; + while (val > data[j] && j < i) + ++j; + + if (j == i) + continue; + + for (k = i; k > j; --k) + data[k] = data[k - 1]; + data[j] = val; + } +} + +int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols) { + int n = 0, r, c, i, val_count[256]; + uint8_t val; + memset(val_count, 0, sizeof(val_count)); + + for (r = 0; r < rows; ++r) { + for (c = 0; c < cols; ++c) { + val = src[r * stride + c]; + ++val_count[val]; + } + } + + for (i = 0; i < 256; ++i) { + if (val_count[i]) { + ++n; + } + } + + return n; +} + +#if CONFIG_VP9_HIGHBITDEPTH +int vp10_count_colors_highbd(const uint8_t *src8, int stride, int rows, + int cols, int bit_depth) { + int n = 0, r, c, i; + uint16_t val; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + int val_count[1 << 12]; + + assert(bit_depth <= 12); + memset(val_count, 0, (1 << 12) * sizeof(val_count[0])); + for (r = 0; r < rows; ++r) { + for (c = 0; c < cols; ++c) { + val = src[r * stride + c]; + ++val_count[val]; + } + } + + for (i = 0; i < (1 << bit_depth); ++i) { + if (val_count[i]) { + ++n; + } + } + + return n; +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +
diff --git a/vp10/encoder/palette.h b/vp10/encoder/palette.h new file mode 100644 index 0000000..124cf74 --- /dev/null +++ b/vp10/encoder/palette.h
@@ -0,0 +1,35 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_ENCODER_PALETTE_H_ +#define VP10_ENCODER_PALETTE_H_ + +#include "vp10/common/blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp10_insertion_sort(double *data, int n); +void vp10_calc_indices(const double *data, const double *centroids, + uint8_t *indices, int n, int k, int dim); +int vp10_k_means(const double *data, double *centroids, uint8_t *indices, + uint8_t *pre_indices, int n, int k, int dim, int max_itr); +int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols); +#if CONFIG_VP9_HIGHBITDEPTH +int vp10_count_colors_highbd(const uint8_t *src8, int stride, int rows, + int cols, int bit_depth); +#endif // CONFIG_VP9_HIGHBITDEPTH + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* VP10_ENCODER_PALETTE_H_ */
diff --git a/vp10/encoder/picklpf.c b/vp10/encoder/picklpf.c index 045e03d..1f5711d 100644 --- a/vp10/encoder/picklpf.c +++ b/vp10/encoder/picklpf.c
@@ -41,6 +41,10 @@ VP10_COMMON *const cm = &cpi->common; int64_t filt_err; +#if CONFIG_VAR_TX + vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, + 1, partial_frame); +#else if (cpi->num_workers > 1) vp10_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane, filt_level, 1, partial_frame, @@ -48,6 +52,7 @@ else vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, 1, partial_frame); +#endif #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) {
diff --git a/vp10/encoder/quantize.c b/vp10/encoder/quantize.c index 86b324f..739a06d 100644 --- a/vp10/encoder/quantize.c +++ b/vp10/encoder/quantize.c
@@ -10,16 +10,194 @@ #include <math.h> #include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/quantize.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vp10/common/quant_common.h" +#include "vp10/common/scan.h" #include "vp10/common/seg_common.h" #include "vp10/encoder/encoder.h" #include "vp10/encoder/quantize.h" #include "vp10/encoder/rd.h" +void vp10_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) { + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + *eob_ptr = 0; +} + +void vp10_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc) { + // obsolete skip_block + const int skip_block = 0; + + vp10_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan); +} + +void vp10_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc) { + // obsolete skip_block + const int skip_block = 0; + + vpx_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, p->quant, + p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, + sc->scan, sc->iscan); +} + +void vp10_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc) { + // obsolete skip_block + const int skip_block = 0; + + (void)sc; + + vpx_quantize_dc(coeff_ptr, n_coeffs, skip_block, p->round, p->quant_fp[0], + qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr); +} + +#if CONFIG_VP9_HIGHBITDEPTH +void vp10_highbd_quantize_fp_facade( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc) { + // obsolete skip_block + const int skip_block = 0; + + vp10_highbd_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan); +} + +void vp10_highbd_quantize_b_facade(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc) { + // obsolete skip_block + const int skip_block = 0; + + vpx_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan); +} + +void vp10_highbd_quantize_dc_facade( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc) { + // obsolete skip_block + const int skip_block = 0; + + (void)sc; + + vpx_highbd_quantize_dc(coeff_ptr, n_coeffs, skip_block, p->round, + p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, + pd->dequant[0], eob_ptr); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +void vp10_quantize_fp_32x32_facade(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc) { + // obsolete skip_block + const int skip_block = 0; + + vp10_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan); +} + +void vp10_quantize_b_32x32_facade(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc) { + // obsolete skip_block + const int skip_block = 0; + + vpx_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan); +} + +void vp10_quantize_dc_32x32_facade(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc) { + // obsolete skip_block + const int skip_block = 0; + + (void)sc; + (void)n_coeffs; + + vpx_quantize_dc_32x32(coeff_ptr, skip_block, p->round, p->quant_fp[0], + qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr); +} + +#if CONFIG_VP9_HIGHBITDEPTH +void vp10_highbd_quantize_fp_32x32_facade( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc) { + // obsolete skip_block + const int skip_block = 0; + + vp10_highbd_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, + p->round_fp, p->quant_fp, p->quant_shift, + qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, + sc->scan, sc->iscan); +} + +void vp10_highbd_quantize_b_32x32_facade( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc) { + // obsolete skip_block + const int skip_block = 0; + + vpx_highbd_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, + p->round, p->quant, p->quant_shift, qcoeff_ptr, + dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan, + sc->iscan); +} + +void vp10_highbd_quantize_dc_32x32_facade( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc) { + // obsolete skip_block + const int skip_block = 0; + + (void)sc; + (void)n_coeffs; + + vpx_highbd_quantize_dc_32x32(coeff_ptr, skip_block, p->round, p->quant_fp[0], + qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], + eob_ptr); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + void vp10_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr,
diff --git a/vp10/encoder/quantize.h b/vp10/encoder/quantize.h index b44088e..9c0ab3f 100644 --- a/vp10/encoder/quantize.h +++ b/vp10/encoder/quantize.h
@@ -12,12 +12,20 @@ #define VP10_ENCODER_QUANTIZE_H_ #include "./vpx_config.h" +#include "vp10/common/scan.h" #include "vp10/encoder/block.h" #ifdef __cplusplus extern "C" { #endif +typedef void (*VP10_QUANT_FACADE)(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc); + typedef struct { DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); @@ -38,7 +46,7 @@ } QUANTS; void vp10_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, - const int16_t *scan, const int16_t *iscan); + const int16_t *scan, const int16_t *iscan); struct VP10_COMP; struct VP10Common; @@ -55,6 +63,81 @@ int vp10_qindex_to_quantizer(int qindex); +void vp10_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr); + +void vp10_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc); + +void vp10_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc); + +void vp10_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc); +#if CONFIG_VP9_HIGHBITDEPTH +void vp10_highbd_quantize_fp_facade( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc); + +void vp10_highbd_quantize_b_facade(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc); + +void vp10_highbd_quantize_dc_facade( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc); +#endif // CONFIG_VP9_HIGHBITDEPTH + +void vp10_quantize_fp_32x32_facade(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc); + +void vp10_quantize_b_32x32_facade(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc); + +void vp10_quantize_dc_32x32_facade(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, + const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const scan_order *sc); +#if CONFIG_VP9_HIGHBITDEPTH +void vp10_highbd_quantize_fp_32x32_facade( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc); + +void vp10_highbd_quantize_b_32x32_facade( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc); + +void vp10_highbd_quantize_dc_32x32_facade( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, + tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc); +#endif // CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus } // extern "C" #endif
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c index f4fdb24..5dcfa55 100644 --- a/vp10/encoder/rd.c +++ b/vp10/encoder/rd.c
@@ -75,7 +75,10 @@ vp10_cost_tokens(cpi->y_mode_costs[i][j], vp10_kf_y_mode_prob[i][j], vp10_intra_mode_tree); - vp10_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp10_intra_mode_tree); + for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) + vp10_cost_tokens(cpi->mbmode_cost[i], fc->y_mode_prob[i], + vp10_intra_mode_tree); + for (i = 0; i < INTRA_MODES; ++i) vp10_cost_tokens(cpi->intra_uv_mode_cost[i], fc->uv_mode_prob[i], vp10_intra_mode_tree); @@ -84,6 +87,44 @@ vp10_cost_tokens(cpi->switchable_interp_costs[i], fc->switchable_interp_prob[i], vp10_switchable_interp_tree); + for (i = 0; i < PALETTE_BLOCK_SIZES; ++i) { + vp10_cost_tokens(cpi->palette_y_size_cost[i], + vp10_default_palette_y_size_prob[i], + vp10_palette_size_tree); + vp10_cost_tokens(cpi->palette_uv_size_cost[i], + vp10_default_palette_uv_size_prob[i], + vp10_palette_size_tree); + } + + for (i = 0; i < PALETTE_MAX_SIZE - 1; ++i) + for (j = 0; j < PALETTE_COLOR_CONTEXTS; ++j) { + vp10_cost_tokens(cpi->palette_y_color_cost[i][j], + vp10_default_palette_y_color_prob[i][j], + vp10_palette_color_tree[i]); + vp10_cost_tokens(cpi->palette_uv_color_cost[i][j], + vp10_default_palette_uv_color_prob[i][j], + vp10_palette_color_tree[i]); + } +#if CONFIG_EXT_TX + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + int s; + for (s = 1; s < EXT_TX_SETS_INTER; ++s) { + if (use_inter_ext_tx_for_txsize[s][i]) { + vp10_cost_tokens(cpi->inter_tx_type_costs[s][i], + fc->inter_ext_tx_prob[s][i], + vp10_ext_tx_inter_tree[s]); + } + } + for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { + if (use_intra_ext_tx_for_txsize[s][i]) { + for (j = 0; j < INTRA_MODES; ++j) + vp10_cost_tokens(cpi->intra_tx_type_costs[s][i][j], + fc->intra_ext_tx_prob[s][i][j], + vp10_ext_tx_intra_tree[s]); + } + } + } +#else for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { for (j = 0; j < TX_TYPES; ++j) vp10_cost_tokens(cpi->intra_tx_type_costs[i][j], @@ -95,6 +136,7 @@ fc->inter_ext_tx_prob[i], vp10_ext_tx_tree); } +#endif // CONFIG_EXT_TX } static void fill_token_costs(vp10_coeff_cost *c, @@ -311,10 +353,26 @@ cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, &cm->fc->nmvc, cm->allow_high_precision_mv); +#if CONFIG_REF_MV + for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) { + cpi->newmv_mode_cost[i][0] = vp10_cost_bit(cm->fc->newmv_prob[i], 0); + cpi->newmv_mode_cost[i][1] = vp10_cost_bit(cm->fc->newmv_prob[i], 1); + } + for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i) { + cpi->zeromv_mode_cost[i][0] = vp10_cost_bit(cm->fc->zeromv_prob[i], 0); + cpi->zeromv_mode_cost[i][1] = vp10_cost_bit(cm->fc->zeromv_prob[i], 1); + } + + for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) { + cpi->refmv_mode_cost[i][0] = vp10_cost_bit(cm->fc->refmv_prob[i], 0); + cpi->refmv_mode_cost[i][1] = vp10_cost_bit(cm->fc->refmv_prob[i], 1); + } +#else for (i = 0; i < INTER_MODE_CONTEXTS; ++i) vp10_cost_tokens((int *)cpi->inter_mode_cost[i], cm->fc->inter_mode_probs[i], vp10_inter_mode_tree); +#endif } } @@ -563,8 +621,11 @@ const MACROBLOCKD *const xd) { const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int ctx = vp10_get_pred_context_switchable_interp(xd); +#if CONFIG_EXT_INTERP + if (!vp10_is_interp_needed(xd)) return 0; +#endif // CONFIG_EXT_INTERP return SWITCHABLE_INTERP_RATE_FACTOR * - cpi->switchable_interp_costs[ctx][mbmi->interp_filter]; + cpi->switchable_interp_costs[ctx][mbmi->interp_filter]; } void vp10_set_rd_speed_thresholds(VP10_COMP *cpi) { @@ -578,10 +639,20 @@ if (sf->adaptive_rd_thresh) { rd->thresh_mult[THR_NEARESTMV] = 300; +#if CONFIG_EXT_REFS + rd->thresh_mult[THR_NEARESTL2] = 300; + rd->thresh_mult[THR_NEARESTL3] = 300; + rd->thresh_mult[THR_NEARESTL4] = 300; +#endif // CONFIG_EXT_REFS rd->thresh_mult[THR_NEARESTG] = 300; rd->thresh_mult[THR_NEARESTA] = 300; } else { rd->thresh_mult[THR_NEARESTMV] = 0; +#if CONFIG_EXT_REFS + rd->thresh_mult[THR_NEARESTL2] = 0; + rd->thresh_mult[THR_NEARESTL3] = 0; + rd->thresh_mult[THR_NEARESTL4] = 0; +#endif // CONFIG_EXT_REFS rd->thresh_mult[THR_NEARESTG] = 0; rd->thresh_mult[THR_NEARESTA] = 0; } @@ -589,26 +660,61 @@ rd->thresh_mult[THR_DC] += 1000; rd->thresh_mult[THR_NEWMV] += 1000; +#if CONFIG_EXT_REFS + rd->thresh_mult[THR_NEWL2] += 1000; + rd->thresh_mult[THR_NEWL3] += 1000; + rd->thresh_mult[THR_NEWL4] += 1000; +#endif // CONFIG_EXT_REFS rd->thresh_mult[THR_NEWA] += 1000; rd->thresh_mult[THR_NEWG] += 1000; rd->thresh_mult[THR_NEARMV] += 1000; +#if CONFIG_EXT_REFS + rd->thresh_mult[THR_NEARL2] += 1000; + rd->thresh_mult[THR_NEARL3] += 1000; + rd->thresh_mult[THR_NEARL4] += 1000; +#endif // CONFIG_EXT_REFS rd->thresh_mult[THR_NEARA] += 1000; - rd->thresh_mult[THR_COMP_NEARESTLA] += 1000; - rd->thresh_mult[THR_COMP_NEARESTGA] += 1000; + rd->thresh_mult[THR_NEARG] += 1000; + + rd->thresh_mult[THR_ZEROMV] += 2000; +#if CONFIG_EXT_REFS + rd->thresh_mult[THR_ZEROL2] += 2000; + rd->thresh_mult[THR_ZEROL3] += 2000; + rd->thresh_mult[THR_ZEROL4] += 2000; +#endif // CONFIG_EXT_REFS + rd->thresh_mult[THR_ZEROG] += 2000; + rd->thresh_mult[THR_ZEROA] += 2000; rd->thresh_mult[THR_TM] += 1000; + rd->thresh_mult[THR_COMP_NEARESTLA] += 1000; +#if CONFIG_EXT_REFS + rd->thresh_mult[THR_COMP_NEARESTL2A] += 1000; + rd->thresh_mult[THR_COMP_NEARESTL3A] += 1000; + rd->thresh_mult[THR_COMP_NEARESTL4A] += 1000; +#endif // CONFIG_EXT_REFS + rd->thresh_mult[THR_COMP_NEARESTGA] += 1000; + rd->thresh_mult[THR_COMP_NEARLA] += 1500; rd->thresh_mult[THR_COMP_NEWLA] += 2000; - rd->thresh_mult[THR_NEARG] += 1000; rd->thresh_mult[THR_COMP_NEARGA] += 1500; rd->thresh_mult[THR_COMP_NEWGA] += 2000; +#if CONFIG_EXT_REFS + rd->thresh_mult[THR_COMP_NEARL2A] += 1500; + rd->thresh_mult[THR_COMP_NEWL2A] += 2000; + rd->thresh_mult[THR_COMP_NEARL3A] += 1500; + rd->thresh_mult[THR_COMP_NEWL3A] += 2000; + rd->thresh_mult[THR_COMP_NEARL4A] += 1500; + rd->thresh_mult[THR_COMP_NEWL4A] += 2000; +#endif // CONFIG_EXT_REFS - rd->thresh_mult[THR_ZEROMV] += 2000; - rd->thresh_mult[THR_ZEROG] += 2000; - rd->thresh_mult[THR_ZEROA] += 2000; rd->thresh_mult[THR_COMP_ZEROLA] += 2500; +#if CONFIG_EXT_REFS + rd->thresh_mult[THR_COMP_ZEROL2A] += 2500; + rd->thresh_mult[THR_COMP_ZEROL3A] += 2500; + rd->thresh_mult[THR_COMP_ZEROL4A] += 2500; +#endif // CONFIG_EXT_REFS rd->thresh_mult[THR_COMP_ZEROGA] += 2500; rd->thresh_mult[THR_H_PRED] += 2000; @@ -622,9 +728,15 @@ } void vp10_set_rd_speed_thresholds_sub8x8(VP10_COMP *cpi) { - static const int thresh_mult[2][MAX_REFS] = - {{2500, 2500, 2500, 4500, 4500, 2500}, - {2000, 2000, 2000, 4000, 4000, 2000}}; + static const int thresh_mult[2][MAX_REFS] = { +#if CONFIG_EXT_REFS + {2500, 2500, 2500, 2500, 2500, 2500, 4500, 4500, 4500, 4500, 4500, 2500}, + {2000, 2000, 2000, 2000, 2000, 2000, 4000, 4000, 4000, 4000, 4000, 2000} +#else + {2500, 2500, 2500, 4500, 4500, 2500}, + {2000, 2000, 2000, 4000, 4000, 2000} +#endif // CONFIG_EXT_REFS + }; RD_OPT *const rd = &cpi->rd; const int idx = cpi->oxcf.mode == BEST; memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
diff --git a/vp10/encoder/rd.h b/vp10/encoder/rd.h index cd58bf8..42261ac 100644 --- a/vp10/encoder/rd.h +++ b/vp10/encoder/rd.h
@@ -33,8 +33,17 @@ #define INVALID_MV 0x80008000 +#if CONFIG_EXT_REFS +#define MAX_MODES 54 +#else #define MAX_MODES 30 +#endif // CONFIG_EXT_REFS + +#if CONFIG_EXT_REFS +#define MAX_REFS 12 +#else #define MAX_REFS 6 +#endif // CONFIG_EXT_REFS #define RD_THRESH_MAX_FACT 64 #define RD_THRESH_INC 1 @@ -43,34 +52,72 @@ // const MODE_DEFINITION vp10_mode_order[MAX_MODES] used in the rd code. typedef enum { THR_NEARESTMV, +#if CONFIG_EXT_REFS + THR_NEARESTL2, + THR_NEARESTL3, + THR_NEARESTL4, +#endif // CONFIG_EXT_REFS THR_NEARESTA, THR_NEARESTG, THR_DC, THR_NEWMV, +#if CONFIG_EXT_REFS + THR_NEWL2, + THR_NEWL3, + THR_NEWL4, +#endif // CONFIG_EXT_REFS THR_NEWA, THR_NEWG, THR_NEARMV, +#if CONFIG_EXT_REFS + THR_NEARL2, + THR_NEARL3, + THR_NEARL4, +#endif // CONFIG_EXT_REFS THR_NEARA, THR_NEARG, THR_ZEROMV, +#if CONFIG_EXT_REFS + THR_ZEROL2, + THR_ZEROL3, + THR_ZEROL4, +#endif // CONFIG_EXT_REFS THR_ZEROG, THR_ZEROA, THR_COMP_NEARESTLA, +#if CONFIG_EXT_REFS + THR_COMP_NEARESTL2A, + THR_COMP_NEARESTL3A, + THR_COMP_NEARESTL4A, +#endif // CONFIG_EXT_REFS THR_COMP_NEARESTGA, THR_TM, THR_COMP_NEARLA, THR_COMP_NEWLA, +#if CONFIG_EXT_REFS + THR_COMP_NEARL2A, + THR_COMP_NEWL2A, + THR_COMP_NEARL3A, + THR_COMP_NEWL3A, + THR_COMP_NEARL4A, + THR_COMP_NEWL4A, +#endif // CONFIG_EXT_REFS THR_COMP_NEARGA, THR_COMP_NEWGA, THR_COMP_ZEROLA, +#if CONFIG_EXT_REFS + THR_COMP_ZEROL2A, + THR_COMP_ZEROL3A, + THR_COMP_ZEROL4A, +#endif // CONFIG_EXT_REFS THR_COMP_ZEROGA, THR_H_PRED, @@ -85,9 +132,19 @@ typedef enum { THR_LAST, +#if CONFIG_EXT_REFS + THR_LAST2, + THR_LAST3, + THR_LAST4, +#endif // CONFIG_EXT_REFS THR_GOLD, THR_ALTR, THR_COMP_LA, +#if CONFIG_EXT_REFS + THR_COMP_L2A, + THR_COMP_L3A, + THR_COMP_L4A, +#endif // CONFIG_EXT_REFS THR_COMP_GA, THR_INTRA, } THR_MODES_SUB8X8;
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 90a716d..ca978ba 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c
@@ -35,13 +35,38 @@ #include "vp10/encoder/encodemb.h" #include "vp10/encoder/encodemv.h" #include "vp10/encoder/encoder.h" +#include "vp10/encoder/hybrid_fwd_txfm.h" #include "vp10/encoder/mcomp.h" +#include "vp10/encoder/palette.h" #include "vp10/encoder/quantize.h" #include "vp10/encoder/ratectrl.h" #include "vp10/encoder/rd.h" #include "vp10/encoder/rdopt.h" #include "vp10/encoder/aq_variance.h" +#if CONFIG_EXT_REFS + +#define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \ + (1 << LAST2_FRAME) | (1 << INTRA_FRAME) | \ + (1 << LAST3_FRAME) | (1 << LAST4_FRAME)) +#define LAST2_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \ + (1 << LAST_FRAME) | (1 << INTRA_FRAME) | \ + (1 << LAST3_FRAME) | (1 << LAST4_FRAME)) +#define LAST3_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \ + (1 << LAST_FRAME) | (1 << INTRA_FRAME) | \ + (1 << LAST2_FRAME) | (1 << LAST4_FRAME)) +#define LAST4_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \ + (1 << LAST_FRAME) | (1 << INTRA_FRAME) | \ + (1 << LAST2_FRAME) | (1 << LAST3_FRAME)) +#define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \ + (1 << LAST2_FRAME) | (1 << INTRA_FRAME) | \ + (1 << LAST3_FRAME) | (1 << LAST4_FRAME)) +#define ALT_REF_MODE_MASK ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \ + (1 << LAST2_FRAME) | (1 << INTRA_FRAME) | \ + (1 << LAST3_FRAME) | (1 << LAST4_FRAME)) + +#else + #define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \ (1 << INTRA_FRAME)) #define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \ @@ -49,12 +74,19 @@ #define ALT_REF_MODE_MASK ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \ (1 << INTRA_FRAME)) +#endif // CONFIG_EXT_REFS + #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01) #define MIN_EARLY_TERM_INDEX 3 #define NEW_MV_DISCOUNT_FACTOR 8 +#if CONFIG_EXT_TX +const double ext_tx_th = 0.98; +#else const double ext_tx_th = 0.99; +#endif + typedef struct { PREDICTION_MODE mode; @@ -66,6 +98,9 @@ } REF_DEFINITION; struct rdcost_block_args { +#if CONFIG_VAR_TX + const VP10_COMP *cpi; +#endif MACROBLOCK *x; ENTROPY_CONTEXT t_above[16]; ENTROPY_CONTEXT t_left[16]; @@ -83,34 +118,72 @@ #define LAST_NEW_MV_INDEX 6 static const MODE_DEFINITION vp10_mode_order[MAX_MODES] = { {NEARESTMV, {LAST_FRAME, NONE}}, +#if CONFIG_EXT_REFS + {NEARESTMV, {LAST2_FRAME, NONE}}, + {NEARESTMV, {LAST3_FRAME, NONE}}, + {NEARESTMV, {LAST4_FRAME, NONE}}, +#endif // CONFIG_EXT_REFS {NEARESTMV, {ALTREF_FRAME, NONE}}, {NEARESTMV, {GOLDEN_FRAME, NONE}}, {DC_PRED, {INTRA_FRAME, NONE}}, {NEWMV, {LAST_FRAME, NONE}}, +#if CONFIG_EXT_REFS + {NEWMV, {LAST2_FRAME, NONE}}, + {NEWMV, {LAST3_FRAME, NONE}}, + {NEWMV, {LAST4_FRAME, NONE}}, +#endif // CONFIG_EXT_REFS {NEWMV, {ALTREF_FRAME, NONE}}, {NEWMV, {GOLDEN_FRAME, NONE}}, {NEARMV, {LAST_FRAME, NONE}}, +#if CONFIG_EXT_REFS + {NEARMV, {LAST2_FRAME, NONE}}, + {NEARMV, {LAST3_FRAME, NONE}}, + {NEARMV, {LAST4_FRAME, NONE}}, +#endif // CONFIG_EXT_REFS {NEARMV, {ALTREF_FRAME, NONE}}, {NEARMV, {GOLDEN_FRAME, NONE}}, {ZEROMV, {LAST_FRAME, NONE}}, +#if CONFIG_EXT_REFS + {ZEROMV, {LAST2_FRAME, NONE}}, + {ZEROMV, {LAST3_FRAME, NONE}}, + {ZEROMV, {LAST4_FRAME, NONE}}, +#endif // CONFIG_EXT_REFS {ZEROMV, {GOLDEN_FRAME, NONE}}, {ZEROMV, {ALTREF_FRAME, NONE}}, {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}}, +#if CONFIG_EXT_REFS + {NEARESTMV, {LAST2_FRAME, ALTREF_FRAME}}, + {NEARESTMV, {LAST3_FRAME, ALTREF_FRAME}}, + {NEARESTMV, {LAST4_FRAME, ALTREF_FRAME}}, +#endif // CONFIG_EXT_REFS {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}}, {TM_PRED, {INTRA_FRAME, NONE}}, {NEARMV, {LAST_FRAME, ALTREF_FRAME}}, {NEWMV, {LAST_FRAME, ALTREF_FRAME}}, +#if CONFIG_EXT_REFS + {NEARMV, {LAST2_FRAME, ALTREF_FRAME}}, + {NEWMV, {LAST2_FRAME, ALTREF_FRAME}}, + {NEARMV, {LAST3_FRAME, ALTREF_FRAME}}, + {NEWMV, {LAST3_FRAME, ALTREF_FRAME}}, + {NEARMV, {LAST4_FRAME, ALTREF_FRAME}}, + {NEWMV, {LAST4_FRAME, ALTREF_FRAME}}, +#endif // CONFIG_EXT_REFS {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}}, {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}}, {ZEROMV, {LAST_FRAME, ALTREF_FRAME}}, +#if CONFIG_EXT_REFS + {ZEROMV, {LAST3_FRAME, ALTREF_FRAME}}, + {ZEROMV, {LAST2_FRAME, ALTREF_FRAME}}, + {ZEROMV, {LAST4_FRAME, ALTREF_FRAME}}, +#endif // CONFIG_EXT_REFS {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}}, {H_PRED, {INTRA_FRAME, NONE}}, @@ -125,9 +198,19 @@ static const REF_DEFINITION vp10_ref_order[MAX_REFS] = { {{LAST_FRAME, NONE}}, +#if CONFIG_EXT_REFS + {{LAST2_FRAME, NONE}}, + {{LAST3_FRAME, NONE}}, + {{LAST4_FRAME, NONE}}, +#endif // CONFIG_EXT_REFS {{GOLDEN_FRAME, NONE}}, {{ALTREF_FRAME, NONE}}, {{LAST_FRAME, ALTREF_FRAME}}, +#if CONFIG_EXT_REFS + {{LAST2_FRAME, ALTREF_FRAME}}, + {{LAST3_FRAME, ALTREF_FRAME}}, + {{LAST4_FRAME, ALTREF_FRAME}}, +#endif // CONFIG_EXT_REFS {{GOLDEN_FRAME, ALTREF_FRAME}}, {{INTRA_FRAME, NONE}}, }; @@ -346,7 +429,11 @@ }; static int cost_coeffs(MACROBLOCK *x, int plane, int block, +#if CONFIG_VAR_TX + int coeff_ctx, +#else ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, +#endif TX_SIZE tx_size, const int16_t *scan, const int16_t *nb, int use_fast_coef_costing) { @@ -361,7 +448,11 @@ unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = x->token_costs[tx_size][type][is_inter_block(mbmi)]; uint8_t token_cache[32 * 32]; +#if CONFIG_VAR_TX + int pt = coeff_ctx; +#else int pt = combine_entropy_contexts(*A, *L); +#endif int c, cost; #if CONFIG_VP9_HIGHBITDEPTH const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd); @@ -369,9 +460,11 @@ const int16_t *cat6_high_cost = vp10_get_high_cost_table(8); #endif +#if !CONFIG_VAR_TX && !CONFIG_SUPERTX // Check for consistency of tx_size with mode info assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size : get_uv_tx_size(mbmi, pd) == tx_size); +#endif // !CONFIG_VAR_TX && !CONFIG_SUPERTX if (eob == 0) { // single eob token @@ -425,8 +518,10 @@ } } +#if !CONFIG_VAR_TX // is eob first coefficient; *A = *L = (c > 0); +#endif return cost; } @@ -454,10 +549,23 @@ static int rate_block(int plane, int block, int blk_row, int blk_col, TX_SIZE tx_size, struct rdcost_block_args* args) { - return cost_coeffs(args->x, plane, block, args->t_above + blk_col, - args->t_left + blk_row, tx_size, - args->so->scan, args->so->neighbors, +#if CONFIG_VAR_TX + int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col), + *(args->t_left + blk_row)); + int coeff_cost = cost_coeffs(args->x, plane, block, coeff_ctx, + tx_size, args->so->scan, args->so->neighbors, + args->use_fast_coef_costing); + const struct macroblock_plane *p = &args->x->plane[plane]; + *(args->t_above + blk_col) = !(p->eobs[block] == 0); + *(args->t_left + blk_row) = !(p->eobs[block] == 0); + return coeff_cost; +#else + return cost_coeffs(args->x, plane, block, + args->t_above + blk_col, + args->t_left + blk_row, + tx_size, args->so->scan, args->so->neighbors, args->use_fast_coef_costing); +#endif } static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, @@ -476,24 +584,55 @@ return; if (!is_inter_block(mbmi)) { +#if CONFIG_VAR_TX + struct encode_b_args arg = {x, NULL, &mbmi->skip}; +#if CONFIG_VP9_HIGHBITDEPTH + vp10_encode_block_intra(plane, block, blk_row, blk_col, + plane_bsize, tx_size, &arg); + dist_block(x, plane, block, tx_size, &dist, &sse); +#else + uint8_t *dst, *src; + int src_stride = x->plane[plane].src.stride; + int dst_stride = xd->plane[plane].dst.stride; + unsigned int tmp_sse; + PREDICTION_MODE mode = (plane == 0) ? + get_y_mode(xd->mi[0], block) : mbmi->uv_mode; + + src = &x->plane[plane].src.buf[4 * (blk_row * src_stride + blk_col)]; + dst = &xd->plane[plane].dst.buf[4 * (blk_row * dst_stride + blk_col)]; + vp10_predict_intra_block(xd, b_width_log2_lookup[plane_bsize], + b_height_log2_lookup[plane_bsize], + tx_size, mode, dst, dst_stride, + dst, dst_stride, blk_col, blk_row, plane); + args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride, + dst, dst_stride, &tmp_sse); + sse = (int64_t)tmp_sse * 16; + vp10_encode_block_intra(plane, block, blk_row, blk_col, + plane_bsize, tx_size, &arg); + args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride, + dst, dst_stride, &tmp_sse); + dist = (int64_t)tmp_sse * 16; +#endif // CONFIG_VP9_HIGHBITDEPTH +#else struct encode_b_args arg = {x, NULL, &mbmi->skip}; vp10_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size, &arg); dist_block(x, plane, block, tx_size, &dist, &sse); +#endif } else if (max_txsize_lookup[plane_bsize] == tx_size) { if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == SKIP_TXFM_NONE) { // full forward transform and quantization vp10_xform_quant(x, plane, block, blk_row, blk_col, - plane_bsize, tx_size); + plane_bsize, tx_size, VP10_XFORM_QUANT_B); dist_block(x, plane, block, tx_size, &dist, &sse); } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == SKIP_TXFM_AC_ONLY) { // compute DC coefficient tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); - vp10_xform_quant_dc(x, plane, block, blk_row, blk_col, - plane_bsize, tx_size); + vp10_xform_quant(x, plane, block, blk_row, blk_col, + plane_bsize, tx_size, VP10_XFORM_QUANT_DC); sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; dist = sse; if (x->plane[plane].eobs[block]) { @@ -517,7 +656,8 @@ } } else { // full forward transform and quantization - vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size); + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, + VP10_XFORM_QUANT_B); dist_block(x, plane, block, tx_size, &dist, &sse); } @@ -551,6 +691,9 @@ } static void txfm_rd_in_plane(MACROBLOCK *x, +#if CONFIG_VAR_TX + const VP10_COMP *cpi, +#endif int *rate, int64_t *distortion, int *skippable, int64_t *sse, int64_t ref_best_rd, int plane, @@ -562,6 +705,9 @@ struct rdcost_block_args args; vp10_zero(args); args.x = x; +#if CONFIG_VAR_TX + args.cpi = cpi; +#endif args.best_rd = ref_best_rd; args.use_fast_coef_costing = use_fast_coef_casting; args.skippable = 1; @@ -571,11 +717,11 @@ vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); - tx_type = get_tx_type(pd->plane_type, xd, 0); - args.so = get_scan(tx_size, tx_type); + tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size); + args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); vp10_foreach_transformed_block_in_plane(xd, bsize, plane, - block_rd_txfm, &args); + block_rd_txfm, &args); if (args.exit_early) { *rate = INT_MAX; *distortion = INT64_MAX; @@ -589,6 +735,54 @@ } } +#if CONFIG_SUPERTX +void vp10_txfm_rd_in_plane_supertx(MACROBLOCK *x, +#if CONFIG_VAR_TX + const VP10_COMP *cpi, +#endif + int *rate, int64_t *distortion, + int *skippable, int64_t *sse, + int64_t ref_best_rd, int plane, + BLOCK_SIZE bsize, TX_SIZE tx_size, + int use_fast_coef_casting) { + MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + struct rdcost_block_args args; + TX_TYPE tx_type; + + vp10_zero(args); + args.x = x; +#if CONFIG_VAR_TX + args.cpi = cpi; +#endif + args.best_rd = ref_best_rd; + args.use_fast_coef_costing = use_fast_coef_casting; + + if (plane == 0) + xd->mi[0]->mbmi.tx_size = tx_size; + + vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); + + tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size); + args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); + + block_rd_txfm(plane, 0, 0, 0, get_plane_block_size(bsize, pd), + tx_size, &args); + + if (args.exit_early) { + *rate = INT_MAX; + *distortion = INT64_MAX; + *sse = INT64_MAX; + *skippable = 0; + } else { + *distortion = args.this_dist; + *rate = args.this_rate; + *sse = args.this_sse; + *skippable = !x->plane[plane].eobs[0]; + } +} +#endif // CONFIG_SUPERTX + static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, int64_t *sse, @@ -599,21 +793,92 @@ const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - TX_TYPE tx_type, best_tx_type = DCT_DCT; int r, s; int64_t d, psse, this_rd, best_rd = INT64_MAX; vpx_prob skip_prob = vp10_get_skip_prob(cm, xd); int s0 = vp10_cost_bit(skip_prob, 0); int s1 = vp10_cost_bit(skip_prob, 1); +#if CONFIG_EXT_TX + int ext_tx_set; +#endif // CONFIG_EXT_TX const int is_inter = is_inter_block(mbmi); mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size); + +#if CONFIG_EXT_TX + ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter); + + if (is_inter && + get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 && + !xd->lossless[mbmi->segment_id]) { + for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) { + if (is_inter) { + if (!ext_tx_used_inter[ext_tx_set][tx_type]) + continue; + } else { + if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) { + if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) + continue; + } + if (!ext_tx_used_intra[ext_tx_set][tx_type]) + continue; + } + + mbmi->tx_type = tx_type; + if (ext_tx_set == 1 && + mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX && + best_tx_type == DCT_DCT) { + tx_type = IDTX - 1; + continue; + } + + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &r, &d, &s, + &psse, ref_best_rd, 0, bs, mbmi->tx_size, + cpi->sf.use_fast_coef_costing); + + if (r == INT_MAX) + continue; + if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1) { + if (is_inter) { + if (ext_tx_set > 0) + r += cpi->inter_tx_type_costs[ext_tx_set] + [mbmi->tx_size][mbmi->tx_type]; + } else { + if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX) + r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size] + [mbmi->mode][mbmi->tx_type]; + } + } + + if (s) + this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse); + else + this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d); + if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] && !s) + this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse)); + + if (this_rd < ((best_tx_type == DCT_DCT) ? ext_tx_th : 1) * best_rd) { + best_rd = this_rd; + best_tx_type = mbmi->tx_type; + } + } + } + +#else // CONFIG_EXT_TX if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id]) { for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) { mbmi->tx_type = tx_type; - txfm_rd_in_plane(x, &r, &d, &s, + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &r, &d, &s, &psse, ref_best_rd, 0, bs, mbmi->tx_size, cpi->sf.use_fast_coef_costing); if (r == INT_MAX) @@ -637,10 +902,33 @@ } } } +#endif // CONFIG_EXT_TX mbmi->tx_type = best_tx_type; - txfm_rd_in_plane(x, rate, distortion, skip, + + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + rate, distortion, skip, sse, ref_best_rd, 0, bs, mbmi->tx_size, cpi->sf.use_fast_coef_costing); + +#if CONFIG_EXT_TX + if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 && + !xd->lossless[mbmi->segment_id] && *rate != INT_MAX) { + int ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter); + if (is_inter) { + if (ext_tx_set > 0) + *rate += cpi->inter_tx_type_costs[ext_tx_set][mbmi->tx_size] + [mbmi->tx_type]; + } else { + if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX) + *rate += + cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size] + [mbmi->mode][mbmi->tx_type]; + } + } +#else if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id] && *rate != INT_MAX) { if (is_inter) @@ -650,6 +938,7 @@ [intra_mode_to_tx_type_context[mbmi->mode]] [mbmi->tx_type]; } +#endif // CONFIG_EXT_TX } static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, @@ -662,7 +951,11 @@ mbmi->tx_size = TX_4X4; - txfm_rd_in_plane(x, rate, distortion, skip, + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + rate, distortion, skip, sse, ref_best_rd, 0, bs, mbmi->tx_size, cpi->sf.use_fast_coef_costing); } @@ -690,6 +983,9 @@ const int tx_select = cm->tx_mode == TX_MODE_SELECT; TX_TYPE tx_type, best_tx_type = DCT_DCT; const int is_inter = is_inter_block(mbmi); +#if CONFIG_EXT_TX + int ext_tx_set; +#endif // CONFIG_EXT_TX const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); assert(skip_prob > 0); @@ -722,11 +1018,56 @@ r_tx_size += vp10_cost_one(tx_probs[m]); } +#if CONFIG_EXT_TX + ext_tx_set = get_ext_tx_set(n, bs, is_inter); + if (is_inter) { + if (!ext_tx_used_inter[ext_tx_set][tx_type]) + continue; + } else { + if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) { + if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) + continue; + } + if (!ext_tx_used_intra[ext_tx_set][tx_type]) + continue; + } + mbmi->tx_type = tx_type; + if (ext_tx_set == 1 && + mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX && + best_tx_type == DCT_DCT) { + tx_type = IDTX - 1; + break; + } + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &r, &d, &s, + &sse, ref_best_rd, 0, bs, n, + cpi->sf.use_fast_coef_costing); + if (get_ext_tx_types(n, bs, is_inter) > 1 && + !xd->lossless[xd->mi[0]->mbmi.segment_id] && + r != INT_MAX) { + if (is_inter) { + if (ext_tx_set > 0) + r += cpi->inter_tx_type_costs[ext_tx_set] + [mbmi->tx_size][mbmi->tx_type]; + } else { + if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX) + r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size] + [mbmi->mode][mbmi->tx_type]; + } + } +#else // CONFIG_EXT_TX if (n >= TX_32X32 && tx_type != DCT_DCT) { continue; } mbmi->tx_type = tx_type; - txfm_rd_in_plane(x, &r, &d, &s, + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &r, &d, &s, &sse, ref_best_rd, 0, bs, n, cpi->sf.use_fast_coef_costing); if (n < TX_32X32 && @@ -739,6 +1080,7 @@ [intra_mode_to_tx_type_context[mbmi->mode]] [mbmi->tx_type]; } +#endif // CONFIG_EXT_TX if (r == INT_MAX) continue; @@ -783,9 +1125,11 @@ mbmi->tx_size = best_tx; mbmi->tx_type = best_tx_type; - if (mbmi->tx_size >= TX_32X32) - assert(mbmi->tx_type == DCT_DCT); - txfm_rd_in_plane(x, &r, &d, &s, + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &r, &d, &s, &sse, ref_best_rd, 0, bs, best_tx, cpi->sf.use_fast_coef_costing); } @@ -800,11 +1144,10 @@ assert(bs == xd->mi[0]->mbmi.sb_type); - if (CONFIG_MISC_FIXES && xd->lossless[0]) { + if (xd->lossless[0]) { choose_smallest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd, bs); - } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL || - xd->lossless[xd->mi[0]->mbmi.segment_id]) { + } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) { choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd, bs); } else { @@ -834,6 +1177,163 @@ return 0; } +void rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, + int palette_ctx, int dc_mode_cost, + PALETTE_MODE_INFO *palette_mode_info, + uint8_t *best_palette_color_map, + TX_SIZE *best_tx, PREDICTION_MODE *mode_selected, + int64_t *best_rd) { + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *const mic = xd->mi[0]; + int rows = 4 * num_4x4_blocks_high_lookup[bsize]; + int cols = 4 * num_4x4_blocks_wide_lookup[bsize]; + int this_rate, this_rate_tokenonly, s; + int64_t this_distortion, this_rd; + int colors, n; + int src_stride = x->plane[0].src.stride; + uint8_t *src = x->plane[0].src.buf; + +#if CONFIG_VP9_HIGHBITDEPTH + if (cpi->common.use_highbitdepth) + colors = vp10_count_colors_highbd(src, src_stride, rows, cols, + cpi->common.bit_depth); + else +#endif // CONFIG_VP9_HIGHBITDEPTH + colors = vp10_count_colors(src, src_stride, rows, cols); + palette_mode_info->palette_size[0] = 0; + + if (colors > 1 && colors <= 64 && cpi->common.allow_screen_content_tools) { + int r, c, i, j, k; + int max_itr = 50; + int color_ctx, color_idx = 0; + int color_order[PALETTE_MAX_SIZE]; + double *data = x->palette_buffer->kmeans_data_buf; + uint8_t *indices = x->palette_buffer->kmeans_indices_buf; + uint8_t *pre_indices = x->palette_buffer->kmeans_pre_indices_buf; + double centroids[PALETTE_MAX_SIZE]; + uint8_t *color_map; + double lb, ub, val; + PALETTE_MODE_INFO *pmi = &mic->mbmi.palette_mode_info; +#if CONFIG_VP9_HIGHBITDEPTH + uint16_t *src16 = CONVERT_TO_SHORTPTR(src); + if (cpi->common.use_highbitdepth) + lb = ub = src16[0]; + else +#endif // CONFIG_VP9_HIGHBITDEPTH + lb = ub = src[0]; + +#if CONFIG_VP9_HIGHBITDEPTH + if (cpi->common.use_highbitdepth) { + for (r = 0; r < rows; ++r) { + for (c = 0; c < cols; ++c) { + val = src16[r * src_stride + c]; + data[r * cols + c] = val; + if (val < lb) + lb = val; + else if (val > ub) + ub = val; + } + } + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + for (r = 0; r < rows; ++r) { + for (c = 0; c < cols; ++c) { + val = src[r * src_stride + c]; + data[r * cols + c] = val; + if (val < lb) + lb = val; + else if (val > ub) + ub = val; + } + } +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + mic->mbmi.mode = DC_PRED; + + for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; + n >= 2; --n) { + for (i = 0; i < n; ++i) + centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2; + vp10_k_means(data, centroids, indices, pre_indices, rows * cols, + n, 1, max_itr); + vp10_insertion_sort(centroids, n); + for (i = 0; i < n; ++i) + centroids[i] = round(centroids[i]); + // remove duplicates + i = 1; + k = n; + while (i < k) { + if (centroids[i] == centroids[i - 1]) { + j = i; + while (j < k - 1) { + centroids[j] = centroids[j + 1]; + ++j; + } + --k; + } else { + ++i; + } + } + +#if CONFIG_VP9_HIGHBITDEPTH + if (cpi->common.use_highbitdepth) + for (i = 0; i < k; ++i) + mic->mbmi.palette_mode_info.palette_colors[i] = + clip_pixel_highbd(round(centroids[i]), cpi->common.bit_depth); + else +#endif // CONFIG_VP9_HIGHBITDEPTH + for (i = 0; i < k; ++i) + pmi->palette_colors[i] = clip_pixel((int)round(centroids[i])); + pmi->palette_size[0] = k; + + vp10_calc_indices(data, centroids, indices, rows * cols, k, 1); + for (r = 0; r < rows; ++r) + for (c = 0; c < cols; ++c) + xd->plane[0].color_index_map[r * cols + c] = indices[r * cols + c]; + + super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, + &s, NULL, bsize, *best_rd); + if (this_rate_tokenonly == INT_MAX) + continue; + + this_rate = this_rate_tokenonly + dc_mode_cost + + cpi->common.bit_depth * k * vp10_cost_bit(128, 0) + + cpi->palette_y_size_cost[bsize - BLOCK_8X8][k - 2]; + this_rate += + vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8] + [palette_ctx], 1); + color_map = xd->plane[0].color_index_map; + this_rate += write_uniform_cost(k, xd->plane[0].color_index_map[0]); + for (i = 0; i < rows; ++i) { + for (j = (i == 0 ? 1 : 0); j < cols; ++j) { + color_ctx = vp10_get_palette_color_context(color_map, cols, i, j, + k, color_order); + for (r = 0; r < k; ++r) + if (color_map[i * cols + j] == color_order[r]) { + color_idx = r; + break; + } + assert(color_idx < k); + this_rate += + cpi->palette_y_color_cost[k - 2][color_ctx][color_idx]; + } + } + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + + if (this_rd < *best_rd) { + *best_rd = this_rd; + *palette_mode_info = mic->mbmi.palette_mode_info; + memcpy(best_palette_color_map, xd->plane[0].color_index_map, + rows * cols * sizeof(xd->plane[0].color_index_map[0])); + *mode_selected = DC_PRED; + *best_tx = mic->mbmi.tx_size; + } + } + } +} + static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, int row, int col, PREDICTION_MODE *best_mode, @@ -864,6 +1364,7 @@ memcpy(ta, a, sizeof(ta)); memcpy(tl, l, sizeof(tl)); xd->mi[0]->mbmi.tx_size = TX_4X4; + xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -880,7 +1381,7 @@ // one of the neighboring directional modes if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { if (conditional_skipintra(mode, *best_mode)) - continue; + continue; } memcpy(tempa, ta, sizeof(ta)); @@ -892,8 +1393,8 @@ const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride]; uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride]; int16_t *const src_diff = vp10_raster_block_offset_int16(BLOCK_8X8, - block, - p->src_diff); + block, + p->src_diff); tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); xd->mi[0]->bmi[block].as_mode = mode; vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride, @@ -902,11 +1403,21 @@ vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride, xd->bd); if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block); - const scan_order *so = get_scan(TX_4X4, tx_type); + TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); + const scan_order *so = get_scan(TX_4X4, tx_type, 0); +#if CONFIG_VAR_TX + const int coeff_ctx = combine_entropy_contexts(*(tempa + idx), + *(templ + idy)); +#endif vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); - ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, + ratey += cost_coeffs(x, 0, block, +#if CONFIG_VAR_TX + coeff_ctx, +#else + tempa + idx, templ + idy, +#endif + TX_4X4, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) @@ -916,11 +1427,21 @@ xd->bd, DCT_DCT, 1); } else { int64_t unused; - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block); - const scan_order *so = get_scan(TX_4X4, tx_type); + TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); + const scan_order *so = get_scan(TX_4X4, tx_type, 0); +#if CONFIG_VAR_TX + const int coeff_ctx = combine_entropy_contexts(*(tempa + idx), + *(templ + idy)); +#endif vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); - ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, + ratey += cost_coeffs(x, 0, block, +#if CONFIG_VAR_TX + coeff_ctx, +#else + tempa + idx, templ + idy, +#endif + TX_4X4, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); distortion += vp10_highbd_block_error( @@ -952,7 +1473,7 @@ num_4x4_blocks_wide * 4 * sizeof(uint16_t)); } } - next_highbd: +next_highbd: {} } if (best_rd >= rd_thresh) @@ -981,7 +1502,7 @@ // one of the neighboring directional modes if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { if (conditional_skipintra(mode, *best_mode)) - continue; + continue; } memcpy(tempa, ta, sizeof(ta)); @@ -1001,26 +1522,49 @@ vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride); if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block); - const scan_order *so = get_scan(TX_4X4, tx_type); + TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); + const scan_order *so = get_scan(TX_4X4, tx_type, 0); +#if CONFIG_VAR_TX + int coeff_ctx = combine_entropy_contexts(*(tempa + idx), + *(templ + idy)); +#endif vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); - ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, +#if CONFIG_VAR_TX + ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan, + so->neighbors, cpi->sf.use_fast_coef_costing); + *(tempa + idx) = !(p->eobs[block] == 0); + *(templ + idy) = !(p->eobs[block] == 0); +#else + ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, + TX_4X4, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); +#endif if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next; vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride, p->eobs[block], DCT_DCT, 1); } else { int64_t unused; - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block); - const scan_order *so = get_scan(TX_4X4, tx_type); + TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); + const scan_order *so = get_scan(TX_4X4, tx_type, 0); +#if CONFIG_VAR_TX + int coeff_ctx = combine_entropy_contexts(*(tempa + idx), + *(templ + idy)); +#endif vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); - ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, - so->scan, so->neighbors, - cpi->sf.use_fast_coef_costing); +#if CONFIG_VAR_TX + ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan, + so->neighbors, cpi->sf.use_fast_coef_costing); + *(tempa + idx) = !(p->eobs[block] == 0); + *(templ + idy) = !(p->eobs[block] == 0); +#else + ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, + TX_4X4, so->scan, so->neighbors, + cpi->sf.use_fast_coef_costing); +#endif distortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused) >> 2; if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) @@ -1078,11 +1622,15 @@ int tot_rate_y = 0; int64_t total_rd = 0; ENTROPY_CONTEXT t_above[4], t_left[4]; - const int *bmode_costs = cpi->mbmode_cost; + const int *bmode_costs = cpi->mbmode_cost[0]; memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); +#if CONFIG_EXT_INTRA + mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0; +#endif // CONFIG_EXT_INTRA + // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { @@ -1127,6 +1675,294 @@ return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); } +#if CONFIG_EXT_INTRA +// Return 1 if an ext intra mode is selected; return 0 otherwise. +static int rd_pick_ext_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, + int *rate, int *rate_tokenonly, + int64_t *distortion, int *skippable, + BLOCK_SIZE bsize, int mode_cost, + int64_t *best_rd) { + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *const mic = xd->mi[0]; + MB_MODE_INFO *mbmi = &mic->mbmi; + int this_rate, this_rate_tokenonly, s; + int ext_intra_selected_flag = 0; + int64_t this_distortion, this_rd; + EXT_INTRA_MODE mode; + TX_SIZE best_tx_size = TX_4X4; + EXT_INTRA_MODE_INFO ext_intra_mode_info; +#if CONFIG_EXT_TX + TX_TYPE best_tx_type; +#endif // CONFIG_EXT_TX + + vp10_zero(ext_intra_mode_info); + mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 1; + mbmi->mode = DC_PRED; + + for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) { + mbmi->ext_intra_mode_info.ext_intra_mode[0] = mode; + super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, + &s, NULL, bsize, *best_rd); + if (this_rate_tokenonly == INT_MAX) + continue; + + this_rate = this_rate_tokenonly + + vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) + + write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost; + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + + if (this_rd < *best_rd) { + *best_rd = this_rd; + best_tx_size = mic->mbmi.tx_size; + ext_intra_mode_info = mbmi->ext_intra_mode_info; +#if CONFIG_EXT_TX + best_tx_type = mic->mbmi.tx_type; +#endif // CONFIG_EXT_TX + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + *skippable = s; + ext_intra_selected_flag = 1; + } + } + + if (ext_intra_selected_flag) { + mbmi->mode = DC_PRED; + mbmi->tx_size = best_tx_size; + mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = + ext_intra_mode_info.use_ext_intra_mode[0]; + mbmi->ext_intra_mode_info.ext_intra_mode[0] = + ext_intra_mode_info.ext_intra_mode[0]; +#if CONFIG_EXT_TX + mbmi->tx_type = best_tx_type; +#endif // CONFIG_EXT_TX + return 1; + } else { + return 0; + } +} + +static int64_t rd_pick_intra_angle_sby(VP10_COMP *cpi, MACROBLOCK *x, + int *rate, int *rate_tokenonly, + int64_t *distortion, int *skippable, + BLOCK_SIZE bsize, int rate_overhead, + int64_t best_rd) { + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *const mic = xd->mi[0]; + MB_MODE_INFO *mbmi = &mic->mbmi; + int this_rate, this_rate_tokenonly, s; + int angle_delta, best_angle_delta = 0; + const double rd_adjust = 1.2; + int64_t this_distortion, this_rd, sse_dummy; + TX_SIZE best_tx_size = mic->mbmi.tx_size; +#if CONFIG_EXT_TX + TX_TYPE best_tx_type = mbmi->tx_type; +#endif // CONFIG_EXT_TX + + if (ANGLE_FAST_SEARCH) { + int deltas_level1[3] = {0, -2, 2}; + int deltas_level2[3][2] = { + {-1, 1}, {-3, -1}, {1, 3}, + }; + const int level1 = 3, level2 = 2; + int i, j, best_i = -1; + + for (i = 0; i < level1; ++i) { + mic->mbmi.angle_delta[0] = deltas_level1[i]; + super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, + &s, NULL, bsize, + (i == 0 && best_rd < INT64_MAX) ? best_rd * rd_adjust : + best_rd); + if (this_rate_tokenonly == INT_MAX) { + if (i == 0) + break; + else + continue; + } + this_rate = this_rate_tokenonly + rate_overhead; + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + if (i == 0 && best_rd < INT64_MAX && this_rd > best_rd * rd_adjust) + break; + if (this_rd < best_rd) { + best_i = i; + best_rd = this_rd; + best_angle_delta = mbmi->angle_delta[0]; + best_tx_size = mbmi->tx_size; +#if CONFIG_EXT_TX + best_tx_type = mbmi->tx_type; +#endif // CONFIG_EXT_TX + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + *skippable = s; + } + } + + if (best_i >= 0) { + for (j = 0; j < level2; ++j) { + mic->mbmi.angle_delta[0] = deltas_level2[best_i][j]; + super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, + &s, NULL, bsize, best_rd); + if (this_rate_tokenonly == INT_MAX) + continue; + this_rate = this_rate_tokenonly + rate_overhead; + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + if (this_rd < best_rd) { + best_rd = this_rd; + best_angle_delta = mbmi->angle_delta[0]; + best_tx_size = mbmi->tx_size; +#if CONFIG_EXT_TX + best_tx_type = mbmi->tx_type; +#endif // CONFIG_EXT_TX + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + *skippable = s; + } + } + } + } else { + for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS; + ++angle_delta) { + mic->mbmi.angle_delta[0] = angle_delta; + + super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, + &s, NULL, bsize, best_rd); + if (this_rate_tokenonly == INT_MAX) + continue; + + this_rate = this_rate_tokenonly + rate_overhead; + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + + if (this_rd < best_rd) { + best_rd = this_rd; + best_angle_delta = mbmi->angle_delta[0]; + best_tx_size = mbmi->tx_size; +#if CONFIG_EXT_TX + best_tx_type = mbmi->tx_type; +#endif // CONFIG_EXT_TX + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + *skippable = s; + } + } + } + + mbmi->tx_size = best_tx_size; + mbmi->angle_delta[0] = best_angle_delta; +#if CONFIG_EXT_TX + mbmi->tx_type = best_tx_type; +#endif // CONFIG_EXT_TX + + if (*rate_tokenonly < INT_MAX) { + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &this_rate_tokenonly, &this_distortion, &s, + &sse_dummy, INT64_MAX, 0, bsize, mbmi->tx_size, + cpi->sf.use_fast_coef_costing); + } + + return best_rd; +} + +static inline int get_angle_index(double angle) { + const double step = 22.5, base = 45; + return (int)round((angle - base) / step); +} + +static void angle_estimation(const uint8_t *src, int src_stride, + int rows, int cols, double *hist) { + int r, c, i, index; + const double pi = 3.1415; + double angle, dx, dy; + double temp, divisor = 0; + + for (i = 0; i < DIRECTIONAL_MODES; ++i) + hist[i] = 0; + + src += src_stride; + for (r = 1; r < rows; ++r) { + for (c = 1; c < cols; ++c) { + dx = src[c] - src[c - 1]; + dy = src[c] - src[c - src_stride]; + temp = dx * dx + dy * dy; + if (dy == 0) + angle = 90; + else + angle = (atan((double)dx / (double)dy)) * 180 / pi; + assert(angle >= -90 && angle <= 90); + index = get_angle_index(angle + 180); + if (index < DIRECTIONAL_MODES) { + hist[index] += temp; + divisor += temp; + } + if (angle > 0) { + index = get_angle_index(angle); + if (index >= 0) { + hist[index] += temp; + divisor += temp; + } + } + } + src += src_stride; + } + + if (divisor < 1) + divisor = 1; + for (i = 0; i < DIRECTIONAL_MODES; ++i) + hist[i] /= divisor; +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void highbd_angle_estimation(const uint8_t *src8, int src_stride, + int rows, int cols, double *hist) { + int r, c, i, index; + const double pi = 3.1415; + double angle, dx, dy; + double temp, divisor = 0; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + + for (i = 0; i < DIRECTIONAL_MODES; ++i) + hist[i] = 0; + + src += src_stride; + for (r = 1; r < rows; ++r) { + for (c = 1; c < cols; ++c) { + dx = src[c] - src[c - 1]; + dy = src[c] - src[c - src_stride]; + temp = dx * dx + dy * dy; + if (dy == 0) + angle = 90; + else + angle = (atan((double)dx / (double)dy)) * 180 / pi; + assert(angle >= -90 && angle <= 90); + index = get_angle_index(angle + 180); + if (index < DIRECTIONAL_MODES) { + hist[index] += temp; + divisor += temp; + } + if (angle > 0) { + index = get_angle_index(angle); + if (index >= 0) { + hist[index] += temp; + divisor += temp; + } + } + } + src += src_stride; + } + + if (divisor < 1) + divisor = 1; + for (i = 0; i < DIRECTIONAL_MODES; ++i) + hist[i] /= divisor; +} +#endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_EXT_INTRA + // This function is used only for intra_only frames static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, @@ -1140,33 +1976,117 @@ int this_rate, this_rate_tokenonly, s; int64_t this_distortion, this_rd; TX_SIZE best_tx = TX_4X4; +#if CONFIG_EXT_INTRA + EXT_INTRA_MODE_INFO ext_intra_mode_info; + int is_directional_mode, rate_overhead, best_angle_delta = 0; + uint8_t directional_mode_skip_mask[INTRA_MODES]; + const int src_stride = x->plane[0].src.stride; + const uint8_t *src = x->plane[0].src.buf; + double hist[DIRECTIONAL_MODES]; +#endif // CONFIG_EXT_INTRA TX_TYPE best_tx_type = DCT_DCT; int *bmode_costs; + PALETTE_MODE_INFO palette_mode_info; + uint8_t *best_palette_color_map = cpi->common.allow_screen_content_tools ? + x->palette_buffer->best_palette_color_map : NULL; + const int rows = 4 * num_4x4_blocks_high_lookup[bsize]; + const int cols = 4 * num_4x4_blocks_wide_lookup[bsize]; + int palette_ctx = 0; const MODE_INFO *above_mi = xd->above_mi; const MODE_INFO *left_mi = xd->left_mi; const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, 0); const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0); bmode_costs = cpi->y_mode_costs[A][L]; +#if CONFIG_EXT_INTRA + ext_intra_mode_info.use_ext_intra_mode[0] = 0; + mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0; + mic->mbmi.angle_delta[0] = 0; + memset(directional_mode_skip_mask, 0, + sizeof(directional_mode_skip_mask[0]) * INTRA_MODES); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + highbd_angle_estimation(src, src_stride, rows, cols, hist); + else +#endif + angle_estimation(src, src_stride, rows, cols, hist); + + for (mode = 0; mode < INTRA_MODES; ++mode) { + if (mode != DC_PRED && mode != TM_PRED) { + int index = get_angle_index((double)mode_to_angle_map[mode]); + double score, weight = 1.0; + score = hist[index]; + if (index > 0) { + score += hist[index - 1] * 0.5; + weight += 0.5; + } + if (index < DIRECTIONAL_MODES - 1) { + score += hist[index + 1] * 0.5; + weight += 0.5; + } + score /= weight; + if (score < ANGLE_SKIP_THRESH) + directional_mode_skip_mask[mode] = 1; + } + } +#endif // CONFIG_EXT_INTRA memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); + palette_mode_info.palette_size[0] = 0; + mic->mbmi.palette_mode_info.palette_size[0] = 0; + if (above_mi) + palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0); + if (left_mi) + palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0); /* Y Search for intra prediction mode */ - for (mode = DC_PRED; mode <= TM_PRED; mode++) { + for (mode = DC_PRED; mode <= TM_PRED; ++mode) { mic->mbmi.mode = mode; - +#if CONFIG_EXT_INTRA + is_directional_mode = (mode != DC_PRED && mode != TM_PRED); + if (is_directional_mode && directional_mode_skip_mask[mode]) + continue; + if (is_directional_mode) { + rate_overhead = bmode_costs[mode] + + write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0); + this_rate_tokenonly = INT_MAX; + this_rd = + rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rate_tokenonly, + &this_distortion, &s, bsize, rate_overhead, + best_rd); + } else { + mic->mbmi.angle_delta[0] = 0; + super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, + &s, NULL, bsize, best_rd); + } +#endif // CONFIG_EXT_INTRA super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, - &s, NULL, bsize, best_rd); + &s, NULL, bsize, best_rd); if (this_rate_tokenonly == INT_MAX) continue; this_rate = this_rate_tokenonly + bmode_costs[mode]; + if (cpi->common.allow_screen_content_tools && mode == DC_PRED) + this_rate += + vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8] + [palette_ctx], 0); +#if CONFIG_EXT_INTRA + if (mode == DC_PRED && ALLOW_FILTER_INTRA_MODES) + this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 0); + if (is_directional_mode) + this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, + MAX_ANGLE_DELTAS + + mic->mbmi.angle_delta[0]); +#endif // CONFIG_EXT_INTRA this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { mode_selected = mode; best_rd = this_rd; best_tx = mic->mbmi.tx_size; +#if CONFIG_EXT_INTRA + best_angle_delta = mic->mbmi.angle_delta[0]; +#endif // CONFIG_EXT_INTRA best_tx_type = mic->mbmi.tx_type; *rate = this_rate; *rate_tokenonly = this_rate_tokenonly; @@ -1175,13 +2095,685 @@ } } + if (cpi->common.allow_screen_content_tools) + rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED], + &palette_mode_info, best_palette_color_map, + &best_tx, &mode_selected, &best_rd); + +#if CONFIG_EXT_INTRA + if (!palette_mode_info.palette_size[0] > 0 && ALLOW_FILTER_INTRA_MODES) { + if (rd_pick_ext_intra_sby(cpi, x, rate, rate_tokenonly, distortion, + skippable, bsize, bmode_costs[DC_PRED], + &best_rd)) { + mode_selected = mic->mbmi.mode; + best_tx = mic->mbmi.tx_size; + ext_intra_mode_info = mic->mbmi.ext_intra_mode_info; + best_tx_type = mic->mbmi.tx_type; + } + } + + mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = + ext_intra_mode_info.use_ext_intra_mode[0]; + if (ext_intra_mode_info.use_ext_intra_mode[0]) { + mic->mbmi.ext_intra_mode_info.ext_intra_mode[0] = + ext_intra_mode_info.ext_intra_mode[0]; + } +#endif // CONFIG_EXT_INTRA + mic->mbmi.mode = mode_selected; mic->mbmi.tx_size = best_tx; +#if CONFIG_EXT_INTRA + mic->mbmi.angle_delta[0] = best_angle_delta; +#endif // CONFIG_EXT_INTRA mic->mbmi.tx_type = best_tx_type; + mic->mbmi.palette_mode_info.palette_size[0] = + palette_mode_info.palette_size[0]; + if (palette_mode_info.palette_size[0] > 0) { + memcpy(mic->mbmi.palette_mode_info.palette_colors, + palette_mode_info.palette_colors, + PALETTE_MAX_SIZE * sizeof(palette_mode_info.palette_colors[0])); + memcpy(xd->plane[0].color_index_map, best_palette_color_map, + rows * cols * sizeof(best_palette_color_map[0])); + } return best_rd; } +#if CONFIG_VAR_TX +static void tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, + int blk_row, int blk_col, int plane, int block, + int plane_bsize, int coeff_ctx, + int *rate, int64_t *dist, int64_t *bsse, int *skip) { + MACROBLOCKD *xd = &x->e_mbd; + const struct macroblock_plane *const p = &x->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; +#if CONFIG_VP9_HIGHBITDEPTH + const int ss_txfrm_size = tx_size << 1; + int64_t this_sse; + int shift = tx_size == TX_32X32 ? 0 : 2; + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); +#endif + unsigned int tmp_sse = 0; + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; + TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const scan_order *const scan_order = + get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); + + BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size]; + int bh = 4 * num_4x4_blocks_wide_lookup[txm_bsize]; + int src_stride = p->src.stride; + uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col]; + uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; +#if CONFIG_VP9_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, rec_buffer_alloc_16[32 * 32]); + uint8_t *rec_buffer; +#else + DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]); +#endif + + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, + VP10_XFORM_QUANT_B); + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer_alloc_16); + vpx_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, + NULL, 0, NULL, 0, bh, bh, xd->bd); + } else { + rec_buffer = (uint8_t *)rec_buffer_alloc_16; + vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, + NULL, 0, NULL, 0, bh, bh); + } +#else + vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, + NULL, 0, NULL, 0, bh, bh); +#endif + + if (blk_row + (bh >> 2) > max_blocks_high || + blk_col + (bh >> 2) > max_blocks_wide) { + int idx, idy; + unsigned int this_sse; + int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row); + int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col); + for (idy = 0; idy < blocks_height; idy += 2) { + for (idx = 0; idx < blocks_width; idx += 2) { + cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx, + src_stride, + rec_buffer + 4 * idy * 32 + 4 * idx, + 32, &this_sse); + tmp_sse += this_sse; + } + } + } else { + cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse); + } + +#if CONFIG_VP9_HIGHBITDEPTH + *dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, + &this_sse, xd->bd) >> shift; + *bsse += this_sse >> shift; +#else + *bsse += (int64_t)tmp_sse * 16; + + if (p->eobs[block] > 0) { + switch (tx_size) { + case TX_32X32: + vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type); + break; + case TX_16X16: + vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type); + break; + case TX_8X8: + vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type); + break; + case TX_4X4: + vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type, + xd->lossless[xd->mi[0]->mbmi.segment_id]); + break; + default: + assert(0 && "Invalid transform size"); + break; + } + + if ((bh >> 2) + blk_col > max_blocks_wide || + (bh >> 2) + blk_row > max_blocks_high) { + int idx, idy; + unsigned int this_sse; + int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row); + int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col); + tmp_sse = 0; + for (idy = 0; idy < blocks_height; idy += 2) { + for (idx = 0; idx < blocks_width; idx += 2) { + cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx, + src_stride, + rec_buffer + 4 * idy * 32 + 4 * idx, + 32, &this_sse); + tmp_sse += this_sse; + } + } + } else { + cpi->fn_ptr[txm_bsize].vf(src, src_stride, + rec_buffer, 32, &tmp_sse); + } + } + *dist += (int64_t)tmp_sse * 16; +#endif // CONFIG_VP9_HIGHBITDEPTH + + *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size, + scan_order->scan, scan_order->neighbors, 0); + *skip &= (p->eobs[block] == 0); +} + +static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x, + int blk_row, int blk_col, int plane, int block, + TX_SIZE tx_size, BLOCK_SIZE plane_bsize, + ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl, + TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left, + int *rate, int64_t *dist, + int64_t *bsse, int *skip, + int64_t ref_best_rd, int *is_cost_valid) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + struct macroblock_plane *const p = &x->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; + int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + int64_t this_rd = INT64_MAX; + ENTROPY_CONTEXT *pta = ta + blk_col; + ENTROPY_CONTEXT *ptl = tl + blk_row; + ENTROPY_CONTEXT stxa = 0, stxl = 0; + int coeff_ctx, i; + int ctx = txfm_partition_context(tx_above + (blk_col >> 1), + tx_left + (blk_row >> 1), tx_size); + + int64_t sum_dist = 0, sum_bsse = 0; + int64_t sum_rd = INT64_MAX; + int sum_rate = vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1); + int all_skip = 1; + int tmp_eob = 0; + int zero_blk_rate; + + if (ref_best_rd < 0) { + *is_cost_valid = 0; + return; + } + + switch (tx_size) { + case TX_4X4: + stxa = pta[0]; + stxl = ptl[0]; + break; + case TX_8X8: + stxa = !!*(const uint16_t *)&pta[0]; + stxl = !!*(const uint16_t *)&ptl[0]; + break; + case TX_16X16: + stxa = !!*(const uint32_t *)&pta[0]; + stxl = !!*(const uint32_t *)&ptl[0]; + break; + case TX_32X32: + stxa = !!*(const uint64_t *)&pta[0]; + stxl = !!*(const uint64_t *)&ptl[0]; + break; + default: + assert(0 && "Invalid transform size."); + break; + } + coeff_ctx = combine_entropy_contexts(stxa, stxl); + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + *rate = 0; + *dist = 0; + *bsse = 0; + *skip = 1; + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + zero_blk_rate = + x->token_costs[tx_size][pd->plane_type][1][0][0][coeff_ctx][EOB_TOKEN]; + + if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) { + mbmi->inter_tx_size[tx_idx] = tx_size; + tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block, + plane_bsize, coeff_ctx, rate, dist, bsse, skip); + + if ((RDCOST(x->rdmult, x->rddiv, *rate, *dist) >= + RDCOST(x->rdmult, x->rddiv, zero_blk_rate, *bsse) || *skip == 1) && + !xd->lossless[mbmi->segment_id]) { + *rate = zero_blk_rate; + *dist = *bsse; + *skip = 1; + x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 1; + p->eobs[block] = 0; + } else { + x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 0; + *skip = 0; + } + + if (tx_size > TX_4X4) + *rate += vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0); + this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist); + tmp_eob = p->eobs[block]; + } + + if (tx_size > TX_4X4) { + BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int bsl = b_height_log2_lookup[bsize]; + int sub_step = 1 << (2 * (tx_size - 1)); + int i; + int this_rate; + int64_t this_dist; + int64_t this_bsse; + int this_skip; + int this_cost_valid = 1; + int64_t tmp_rd = 0; + + --bsl; + for (i = 0; i < 4 && this_cost_valid; ++i) { + int offsetr = (i >> 1) << bsl; + int offsetc = (i & 0x01) << bsl; + select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc, + plane, block + i * sub_step, tx_size - 1, + plane_bsize, ta, tl, tx_above, tx_left, + &this_rate, &this_dist, + &this_bsse, &this_skip, + ref_best_rd - tmp_rd, &this_cost_valid); + sum_rate += this_rate; + sum_dist += this_dist; + sum_bsse += this_bsse; + all_skip &= this_skip; + tmp_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + if (this_rd < tmp_rd) + break; + } + if (this_cost_valid) + sum_rd = tmp_rd; + } + + if (this_rd < sum_rd) { + int idx, idy; + for (i = 0; i < (1 << tx_size); ++i) + pta[i] = ptl[i] = !(tmp_eob == 0); + txfm_partition_update(tx_above + (blk_col >> 1), + tx_left + (blk_row >> 1), tx_size); + mbmi->inter_tx_size[tx_idx] = tx_size; + + for (idy = 0; idy < (1 << tx_size) / 2; ++idy) + for (idx = 0; idx < (1 << tx_size) / 2; ++idx) + mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size; + mbmi->tx_size = tx_size; + if (this_rd == INT64_MAX) + *is_cost_valid = 0; + x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = *skip; + } else { + *rate = sum_rate; + *dist = sum_dist; + *bsse = sum_bsse; + *skip = all_skip; + if (sum_rd == INT64_MAX) + *is_cost_valid = 0; + } +} + +static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x, + int *rate, int64_t *distortion, int *skippable, + int64_t *sse, BLOCK_SIZE bsize, + int64_t ref_best_rd) { + MACROBLOCKD *const xd = &x->e_mbd; + int is_cost_valid = 1; + int64_t this_rd = 0; + + if (ref_best_rd < 0) + is_cost_valid = 0; + + *rate = 0; + *distortion = 0; + *sse = 0; + *skippable = 1; + + if (is_cost_valid) { + const struct macroblockd_plane *const pd = &xd->plane[0]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + int block = 0; + int step = 1 << (max_txsize_lookup[plane_bsize] * 2); + ENTROPY_CONTEXT ctxa[16], ctxl[16]; + TXFM_CONTEXT tx_above[8], tx_left[8]; + + int pnrate = 0, pnskip = 1; + int64_t pndist = 0, pnsse = 0; + + vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctxa, ctxl); + memcpy(tx_above, xd->above_txfm_context, + sizeof(TXFM_CONTEXT) * (mi_width >> 1)); + memcpy(tx_left, xd->left_txfm_context, + sizeof(TXFM_CONTEXT) * (mi_height >> 1)); + + for (idy = 0; idy < mi_height; idy += bh) { + for (idx = 0; idx < mi_width; idx += bh) { + select_tx_block(cpi, x, idy, idx, 0, block, + max_txsize_lookup[plane_bsize], plane_bsize, + ctxa, ctxl, tx_above, tx_left, + &pnrate, &pndist, &pnsse, &pnskip, + ref_best_rd - this_rd, &is_cost_valid); + *rate += pnrate; + *distortion += pndist; + *sse += pnsse; + *skippable &= pnskip; + this_rd += VPXMIN(RDCOST(x->rdmult, x->rddiv, pnrate, pndist), + RDCOST(x->rdmult, x->rddiv, 0, pnsse)); + block += step; + } + } + } + + this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion), + RDCOST(x->rdmult, x->rddiv, 0, *sse)); + if (this_rd > ref_best_rd) + is_cost_valid = 0; + + if (!is_cost_valid) { + // reset cost value + *rate = INT_MAX; + *distortion = INT64_MAX; + *sse = INT64_MAX; + *skippable = 0; + } +} + +#if CONFIG_EXT_TX +static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x, + int *rate, int64_t *distortion, int *skippable, + int64_t *sse, BLOCK_SIZE bsize, + int64_t ref_best_rd) { + const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; + const VP10_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + int64_t rd = INT64_MAX; + int64_t best_rd = INT64_MAX; + TX_TYPE tx_type, best_tx_type = DCT_DCT; + int ext_tx_set; + const int is_inter = is_inter_block(mbmi); + vpx_prob skip_prob = vp10_get_skip_prob(cm, xd); + int s0 = vp10_cost_bit(skip_prob, 0); + int s1 = vp10_cost_bit(skip_prob, 1); + TX_SIZE best_tx_size[64]; + TX_SIZE best_tx = TX_SIZES; + uint8_t best_blk_skip[256]; + const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4); + int idx, idy; + + *distortion = INT64_MAX; + *rate = INT_MAX; + *skippable = 0; + *sse = INT64_MAX; + + ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter); + + for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { + int this_rate = 0; + int this_skip = 1; + int64_t this_dist = 0; + int64_t this_sse = 0; + + if (is_inter) { + if (!ext_tx_used_inter[ext_tx_set][tx_type]) + continue; + } else { + if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) { + if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) + continue; + } + if (!ext_tx_used_intra[ext_tx_set][tx_type]) + continue; + } + + mbmi->tx_type = tx_type; + + if (ext_tx_set == 1 && + mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX && + best_tx_type == DCT_DCT) { + tx_type = IDTX - 1; + break; + } + + inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse, + bsize, ref_best_rd); + + if (get_ext_tx_types(max_tx_size, bsize, is_inter) > 1 && + !xd->lossless[xd->mi[0]->mbmi.segment_id] && + this_rate != INT_MAX) { + if (is_inter) { + if (ext_tx_set > 0) + this_rate += cpi->inter_tx_type_costs[ext_tx_set] + [max_tx_size][mbmi->tx_type]; + } else { + if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX) + this_rate += cpi->intra_tx_type_costs[ext_tx_set][max_tx_size] + [mbmi->mode][mbmi->tx_type]; + } + } + + if (this_rate == INT_MAX) + continue; + + if (this_skip) + rd = RDCOST(x->rdmult, x->rddiv, s1, this_sse); + else + rd = RDCOST(x->rdmult, x->rddiv, this_rate + s0, this_dist); + + if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !this_skip) + rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, this_sse)); + + if (rd < + (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) * + best_rd) { + best_rd = rd; + *distortion = this_dist; + *rate = this_rate; + *skippable = this_skip; + *sse = this_sse; + best_tx_type = mbmi->tx_type; + best_tx = mbmi->tx_size; + memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4); + for (idy = 0; idy < xd->n8_h; ++idy) + for (idx = 0; idx < xd->n8_w; ++idx) + best_tx_size[idy * 8 + idx] = mbmi->inter_tx_size[idy * 8 + idx]; + } + } + + mbmi->tx_type = best_tx_type; + for (idy = 0; idy < xd->n8_h; ++idy) + for (idx = 0; idx < xd->n8_w; ++idx) + mbmi->inter_tx_size[idy * 8 + idx] = best_tx_size[idy * 8 + idx]; + mbmi->tx_size = best_tx; + memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4); +} +#endif + +static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x, + int blk_row, int blk_col, int plane, int block, + TX_SIZE tx_size, BLOCK_SIZE plane_bsize, + ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx, + int *rate, int64_t *dist, int64_t *bsse, int *skip) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + struct macroblock_plane *const p = &x->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; + BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); + TX_SIZE plane_tx_size = plane ? + get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, + 0, 0) : + mbmi->inter_tx_size[tx_idx]; + + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + int coeff_ctx, i; + ENTROPY_CONTEXT *ta = above_ctx + blk_col; + ENTROPY_CONTEXT *tl = left_ctx + blk_row; + switch (tx_size) { + case TX_4X4: + break; + case TX_8X8: + ta[0] = !!*(const uint16_t *)&ta[0]; + tl[0] = !!*(const uint16_t *)&tl[0]; + break; + case TX_16X16: + ta[0] = !!*(const uint32_t *)&ta[0]; + tl[0] = !!*(const uint32_t *)&tl[0]; + break; + case TX_32X32: + ta[0] = !!*(const uint64_t *)&ta[0]; + tl[0] = !!*(const uint64_t *)&tl[0]; + break; + default: + assert(0 && "Invalid transform size."); + break; + } + coeff_ctx = combine_entropy_contexts(ta[0], tl[0]); + tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block, + plane_bsize, coeff_ctx, rate, dist, bsse, skip); + for (i = 0; i < (1 << tx_size); ++i) { + ta[i] = !(p->eobs[block] == 0); + tl[i] = !(p->eobs[block] == 0); + } + } else { + int bsl = b_width_log2_lookup[bsize]; + int step = 1 << (2 * (tx_size - 1)); + int i; + + assert(bsl > 0); + --bsl; + + for (i = 0; i < 4; ++i) { + int offsetr = (i >> 1) << bsl; + int offsetc = (i & 0x01) << bsl; + tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane, + block + i * step, tx_size - 1, plane_bsize, + above_ctx, left_ctx, rate, dist, bsse, skip); + } + } +} + +// Return value 0: early termination triggered, no valid rd cost available; +// 1: rd cost values are valid. +static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x, + int *rate, int64_t *distortion, int *skippable, + int64_t *sse, BLOCK_SIZE bsize, + int64_t ref_best_rd) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + int plane; + int is_cost_valid = 1; + int64_t this_rd; + + if (ref_best_rd < 0) + is_cost_valid = 0; + + if (is_inter_block(mbmi) && is_cost_valid) { + int plane; + for (plane = 1; plane < MAX_MB_PLANE; ++plane) + vp10_subtract_plane(x, bsize, plane); + } + + *rate = 0; + *distortion = 0; + *sse = 0; + *skippable = 1; + + for (plane = 1; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + int block = 0; + int step = 1 << (max_txsize_lookup[plane_bsize] * 2); + int pnrate = 0, pnskip = 1; + int64_t pndist = 0, pnsse = 0; + ENTROPY_CONTEXT ta[16], tl[16]; + + vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl); + + for (idy = 0; idy < mi_height; idy += bh) { + for (idx = 0; idx < mi_width; idx += bh) { + tx_block_rd(cpi, x, idy, idx, plane, block, + max_txsize_lookup[plane_bsize], plane_bsize, ta, tl, + &pnrate, &pndist, &pnsse, &pnskip); + block += step; + } + } + + if (pnrate == INT_MAX) { + is_cost_valid = 0; + break; + } + + *rate += pnrate; + *distortion += pndist; + *sse += pnsse; + *skippable &= pnskip; + + this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion), + RDCOST(x->rdmult, x->rddiv, 0, *sse)); + + if (this_rd > ref_best_rd) { + is_cost_valid = 0; + break; + } + } + + if (!is_cost_valid) { + // reset cost value + *rate = INT_MAX; + *distortion = INT64_MAX; + *sse = INT64_MAX; + *skippable = 0; + } + + return is_cost_valid; +} +#endif + // Return value 0: early termination triggered, no valid rd cost available; // 1: rd cost values are valid. static int super_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x, @@ -1211,7 +2803,11 @@ *skippable = 1; for (plane = 1; plane < MAX_MB_PLANE; ++plane) { - txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse, + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd, plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing); if (pnrate == INT_MAX) { @@ -1235,34 +2831,221 @@ return is_cost_valid; } +#if CONFIG_EXT_INTRA +// Return 1 if an ext intra mode is selected; return 0 otherwise. +static int rd_pick_ext_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x, + PICK_MODE_CONTEXT *ctx, + int *rate, int *rate_tokenonly, + int64_t *distortion, int *skippable, + BLOCK_SIZE bsize, int64_t *best_rd) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + int ext_intra_selected_flag = 0; + int this_rate_tokenonly, this_rate, s; + int64_t this_distortion, this_sse, this_rd; + EXT_INTRA_MODE mode; + EXT_INTRA_MODE_INFO ext_intra_mode_info; + + vp10_zero(ext_intra_mode_info); + mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 1; + mbmi->uv_mode = DC_PRED; + + for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) { + mbmi->ext_intra_mode_info.ext_intra_mode[1] = mode; + if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, + &this_distortion, &s, &this_sse, bsize, *best_rd)) + continue; + + this_rate = this_rate_tokenonly + + vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) + + cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] + + write_uniform_cost(FILTER_INTRA_MODES, mode); + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + if (this_rd < *best_rd) { + *best_rd = this_rd; + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + *skippable = s; + ext_intra_mode_info = mbmi->ext_intra_mode_info; + ext_intra_selected_flag = 1; + if (!x->select_tx_size) + swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE); + } + } + + + if (ext_intra_selected_flag) { + mbmi->uv_mode = DC_PRED; + mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = + ext_intra_mode_info.use_ext_intra_mode[1]; + mbmi->ext_intra_mode_info.ext_intra_mode[1] = + ext_intra_mode_info.ext_intra_mode[1]; + return 1; + } else { + return 0; + } +} + +static int rd_pick_intra_angle_sbuv(VP10_COMP *cpi, MACROBLOCK *x, + PICK_MODE_CONTEXT *ctx, + int *rate, int *rate_tokenonly, + int64_t *distortion, int *skippable, + BLOCK_SIZE bsize, int rate_overhead, + int64_t best_rd) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + int this_rate_tokenonly, this_rate, s; + int64_t this_distortion, this_sse, this_rd; + int angle_delta, best_angle_delta = 0; + const double rd_adjust = 1.2; + + (void)ctx; + *rate_tokenonly = INT_MAX; + if (ANGLE_FAST_SEARCH) { + int deltas_level1[3] = {0, -2, 2}; + int deltas_level2[3][2] = { + {-1, 1}, {-3, -1}, {1, 3}, + }; + const int level1 = 3, level2 = 2; + int i, j, best_i = -1; + + for (i = 0; i < level1; ++i) { + mbmi->angle_delta[1] = deltas_level1[i]; + if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, + &this_distortion, &s, &this_sse, bsize, + (i == 0 && best_rd < INT64_MAX) ? + best_rd * rd_adjust : best_rd)) { + if (i == 0) + break; + else + continue; + } + this_rate = this_rate_tokenonly + rate_overhead; + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + if (i == 0 && best_rd < INT64_MAX && this_rd > best_rd * rd_adjust) + break; + if (this_rd < best_rd) { + best_i = i; + best_rd = this_rd; + best_angle_delta = mbmi->angle_delta[1]; + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + *skippable = s; + } + } + + if (best_i >= 0) { + for (j = 0; j < level2; ++j) { + mbmi->angle_delta[1] = deltas_level2[best_i][j]; + if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, + &this_distortion, &s, &this_sse, bsize, best_rd)) + continue; + this_rate = this_rate_tokenonly + rate_overhead; + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + if (this_rd < best_rd) { + best_rd = this_rd; + best_angle_delta = mbmi->angle_delta[1]; + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + *skippable = s; + } + } + } + } else { + for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS; + ++angle_delta) { + mbmi->angle_delta[1] = angle_delta; + if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, + &this_distortion, &s, &this_sse, bsize, best_rd)) + continue; + this_rate = this_rate_tokenonly + rate_overhead; + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + if (this_rd < best_rd) { + best_rd = this_rd; + best_angle_delta = mbmi->angle_delta[1]; + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + *skippable = s; + } + } + } + + mbmi->angle_delta[1] = best_angle_delta; + if (*rate_tokenonly != INT_MAX) + super_block_uvrd(cpi, x, &this_rate_tokenonly, + &this_distortion, &s, &this_sse, bsize, INT_MAX); + return *rate_tokenonly != INT_MAX; +} +#endif // CONFIG_EXT_INTRA + static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE bsize, TX_SIZE max_tx_size) { MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; PREDICTION_MODE mode; PREDICTION_MODE mode_selected = DC_PRED; int64_t best_rd = INT64_MAX, this_rd; int this_rate_tokenonly, this_rate, s; int64_t this_distortion, this_sse; +#if CONFIG_EXT_INTRA + int is_directional_mode, rate_overhead, best_angle_delta = 0; + EXT_INTRA_MODE_INFO ext_intra_mode_info; + ext_intra_mode_info.use_ext_intra_mode[1] = 0; + mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0; +#endif // CONFIG_EXT_INTRA memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); + xd->mi[0]->mbmi.palette_mode_info.palette_size[1] = 0; for (mode = DC_PRED; mode <= TM_PRED; ++mode) { if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue; - xd->mi[0]->mbmi.uv_mode = mode; - + mbmi->uv_mode = mode; +#if CONFIG_EXT_INTRA + is_directional_mode = (mode != DC_PRED && mode != TM_PRED); + rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] + + write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0); + mbmi->angle_delta[1] = 0; + if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode) { + if (!rd_pick_intra_angle_sbuv(cpi, x, ctx, &this_rate, + &this_rate_tokenonly, &this_distortion, &s, + bsize, rate_overhead, best_rd)) + continue; + } else { + if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, + &this_distortion, &s, &this_sse, bsize, best_rd)) + continue; + } + this_rate = this_rate_tokenonly + + cpi->intra_uv_mode_cost[mbmi->mode][mode]; + if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode) + this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, + MAX_ANGLE_DELTAS + + mbmi->angle_delta[1]); + if (mode == DC_PRED && 0) + this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 0); +#else if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, &this_sse, bsize, best_rd)) continue; this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[xd->mi[0]->mbmi.mode][mode]; +#endif // CONFIG_EXT_INTRA + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { mode_selected = mode; +#if CONFIG_EXT_INTRA + best_angle_delta = mbmi->angle_delta[1]; +#endif // CONFIG_EXT_INTRA best_rd = this_rd; *rate = this_rate; *rate_tokenonly = this_rate_tokenonly; @@ -1273,7 +3056,23 @@ } } - xd->mi[0]->mbmi.uv_mode = mode_selected; +#if CONFIG_EXT_INTRA + if (mbmi->sb_type >= BLOCK_8X8 && ALLOW_FILTER_INTRA_MODES) { + if (rd_pick_ext_intra_sbuv(cpi, x, ctx, rate, rate_tokenonly, distortion, + skippable, bsize, &best_rd)) { + mode_selected = mbmi->uv_mode; + ext_intra_mode_info = mbmi->ext_intra_mode_info; + } + } + + mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = + ext_intra_mode_info.use_ext_intra_mode[1]; + if (ext_intra_mode_info.use_ext_intra_mode[1]) + mbmi->ext_intra_mode_info.ext_intra_mode[1] = + ext_intra_mode_info.ext_intra_mode[1]; + mbmi->angle_delta[1] = best_angle_delta; +#endif // CONFIG_EXT_INTRA + mbmi->uv_mode = mode_selected; return best_rd; } @@ -1314,9 +3113,46 @@ } static int cost_mv_ref(const VP10_COMP *cpi, PREDICTION_MODE mode, - int mode_context) { + int16_t mode_context) { +#if CONFIG_REF_MV + int mode_cost = 0; + int16_t mode_ctx = mode_context & NEWMV_CTX_MASK; + int16_t is_all_zero_mv = mode_context & (1 << ALL_ZERO_FLAG_OFFSET); + + assert(is_inter_mode(mode)); + + if (mode == NEWMV) { + mode_cost = cpi->newmv_mode_cost[mode_ctx][0]; + return mode_cost; + } else { + mode_cost = cpi->newmv_mode_cost[mode_ctx][1]; + mode_ctx = (mode_context >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK; + + if (is_all_zero_mv) + return mode_cost; + + if (mode == ZEROMV) { + mode_cost += cpi->zeromv_mode_cost[mode_ctx][0]; + return mode_cost; + } else { + mode_cost += cpi->zeromv_mode_cost[mode_ctx][1]; + mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK; + + if (mode_context & (1 << SKIP_NEARESTMV_OFFSET)) + mode_ctx = 6; + if (mode_context & (1 << SKIP_NEARMV_OFFSET)) + mode_ctx = 7; + if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) + mode_ctx = 8; + + mode_cost += cpi->refmv_mode_cost[mode_ctx][mode != NEARESTMV]; + return mode_cost; + } + } +#else assert(is_inter_mode(mode)); return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; +#endif } static int set_and_cost_bmi_mvs(VP10_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, @@ -1334,6 +3170,7 @@ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; const int is_compound = has_second_ref(mbmi); + int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]]; switch (mode) { case NEWMV: @@ -1371,8 +3208,11 @@ for (idx = 0; idx < num_4x4_blocks_wide; ++idx) memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i])); - return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mbmi->ref_frame[0]]) + - thismvcost; +#if CONFIG_REF_MV + mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context, + mbmi->ref_frame, mbmi->sb_type, i); +#endif + return cost_mv_ref(cpi, mode, mode_ctx) + thismvcost; } static int64_t encode_inter_mb_segment(VP10_COMP *cpi, @@ -1402,8 +3242,8 @@ pd->dst.stride)]; int64_t thisdistortion = 0, thissse = 0; int thisrate = 0; - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i); - const scan_order *so = get_scan(TX_4X4, tx_type); + TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, TX_4X4); + const scan_order *so = get_scan(TX_4X4, tx_type, 1); vp10_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col); @@ -1439,8 +3279,14 @@ for (idx = 0; idx < width / 4; ++idx) { int64_t ssz, rd, rd1, rd2; tran_low_t* coeff; - +#if CONFIG_VAR_TX + int coeff_ctx; +#endif k += (idy * 2 + idx); +#if CONFIG_VAR_TX + coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)), + *(tl + (k >> 1))); +#endif coeff = BLOCK_OFFSET(p->coeff, k); fwd_txm4x4(vp10_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), coeff, 8); @@ -1459,9 +3305,19 @@ 16, &ssz); #endif // CONFIG_VP9_HIGHBITDEPTH thissse += ssz; - thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4, +#if CONFIG_VAR_TX + thisrate += cost_coeffs(x, 0, k, coeff_ctx, + TX_4X4, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); + *(ta + (k & 1)) = !(p->eobs[k] == 0); + *(tl + (k >> 1)) = !(p->eobs[k] == 0); +#else + thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), + TX_4X4, + so->scan, so->neighbors, + cpi->sf.use_fast_coef_costing); +#endif rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); rd = VPXMIN(rd1, rd2); @@ -1534,25 +3390,32 @@ x->e_mbd.plane[0].pre[1] = orig_pre[1]; } -static INLINE int mv_has_subpel(const MV *mv) { - return (mv->row & 0x0F) || (mv->col & 0x0F); -} - // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion. // TODO(aconverse): Find out if this is still productive then clean up or remove static int check_best_zero_mv( - const VP10_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES], + const VP10_COMP *cpi, const int16_t mode_context[MAX_REF_FRAMES], int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int this_mode, - const MV_REFERENCE_FRAME ref_frames[2]) { + const MV_REFERENCE_FRAME ref_frames[2], + const BLOCK_SIZE bsize, int block) { if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && frame_mv[this_mode][ref_frames[0]].as_int == 0 && (ref_frames[1] == NONE || frame_mv[this_mode][ref_frames[1]].as_int == 0)) { - int rfc = mode_context[ref_frames[0]]; +#if CONFIG_REF_MV + int16_t rfc = vp10_mode_context_analyzer(mode_context, + ref_frames, bsize, block); +#else + int16_t rfc = mode_context[ref_frames[0]]; +#endif int c1 = cost_mv_ref(cpi, NEARMV, rfc); int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); int c3 = cost_mv_ref(cpi, ZEROMV, rfc); +#if !CONFIG_REF_MV + (void)bsize; + (void)block; +#endif + if (this_mode == NEARMV) { if (c1 > c3) return 0; } else if (this_mode == NEARESTMV) { @@ -1630,11 +3493,11 @@ // frame we must use a unit scaling factor during mode selection. #if CONFIG_VP9_HIGHBITDEPTH vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height, - cm->width, cm->height, - cm->use_highbitdepth); + cm->width, cm->height, + cm->use_highbitdepth); #else vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height, - cm->width, cm->height); + cm->width, cm->height); #endif // CONFIG_VP9_HIGHBITDEPTH // Allow joint search multiple times iteratively for each reference frame @@ -1832,8 +3695,7 @@ frame_mv[ZEROMV][frame].as_int = 0; vp10_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col, &frame_mv[NEARESTMV][frame], - &frame_mv[NEARMV][frame], - mbmi_ext->mode_context); + &frame_mv[NEARMV][frame]); } // search for the best motion vector on this segment @@ -1847,7 +3709,7 @@ continue; if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, - this_mode, mbmi->ref_frame)) + this_mode, mbmi->ref_frame, bsize, i)) continue; memcpy(orig_pre, pd->pre, sizeof(orig_pre)); @@ -2151,34 +4013,108 @@ if (cm->reference_mode != COMPOUND_REFERENCE) { vpx_prob ref_single_p1 = vp10_get_pred_prob_single_ref_p1(cm, xd); vpx_prob ref_single_p2 = vp10_get_pred_prob_single_ref_p2(cm, xd); +#if CONFIG_EXT_REFS + vpx_prob ref_single_p3 = vp10_get_pred_prob_single_ref_p3(cm, xd); + vpx_prob ref_single_p4 = vp10_get_pred_prob_single_ref_p4(cm, xd); + vpx_prob ref_single_p5 = vp10_get_pred_prob_single_ref_p5(cm, xd); +#endif // CONFIG_EXT_REFS unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1); if (cm->reference_mode == REFERENCE_MODE_SELECT) base_cost += vp10_cost_bit(comp_inter_p, 0); - ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] = + ref_costs_single[LAST_FRAME] = +#if CONFIG_EXT_REFS + ref_costs_single[LAST2_FRAME] = + ref_costs_single[LAST3_FRAME] = + ref_costs_single[LAST4_FRAME] = +#endif // CONFIG_EXT_REFS + ref_costs_single[GOLDEN_FRAME] = ref_costs_single[ALTREF_FRAME] = base_cost; + +#if CONFIG_EXT_REFS + ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p1, 0); + ref_costs_single[LAST2_FRAME] += vp10_cost_bit(ref_single_p1, 0); + ref_costs_single[LAST3_FRAME] += vp10_cost_bit(ref_single_p1, 0); + ref_costs_single[LAST4_FRAME] += vp10_cost_bit(ref_single_p1, 0); + ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p1, 1); + ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p1, 1); + + ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p3, 0); + ref_costs_single[LAST2_FRAME] += vp10_cost_bit(ref_single_p3, 0); + ref_costs_single[LAST3_FRAME] += vp10_cost_bit(ref_single_p3, 1); + ref_costs_single[LAST4_FRAME] += vp10_cost_bit(ref_single_p3, 1); + ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p2, 0); + ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p2, 1); + + ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p4, 0); + ref_costs_single[LAST2_FRAME] += vp10_cost_bit(ref_single_p4, 1); + ref_costs_single[LAST3_FRAME] += vp10_cost_bit(ref_single_p5, 0); + ref_costs_single[LAST4_FRAME] += vp10_cost_bit(ref_single_p5, 1); +#else ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p1, 0); ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p1, 1); ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p1, 1); ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p2, 0); ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p2, 1); +#endif // CONFIG_EXT_REFS } else { ref_costs_single[LAST_FRAME] = 512; +#if CONFIG_EXT_REFS + ref_costs_single[LAST2_FRAME] = 512; + ref_costs_single[LAST3_FRAME] = 512; + ref_costs_single[LAST4_FRAME] = 512; +#endif // CONFIG_EXT_REFS ref_costs_single[GOLDEN_FRAME] = 512; ref_costs_single[ALTREF_FRAME] = 512; } + if (cm->reference_mode != SINGLE_REFERENCE) { vpx_prob ref_comp_p = vp10_get_pred_prob_comp_ref_p(cm, xd); +#if CONFIG_EXT_REFS + vpx_prob ref_comp_p1 = vp10_get_pred_prob_comp_ref_p1(cm, xd); + vpx_prob ref_comp_p2 = vp10_get_pred_prob_comp_ref_p2(cm, xd); + vpx_prob ref_comp_p3 = vp10_get_pred_prob_comp_ref_p3(cm, xd); +#endif // CONFIG_EXT_REFS unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1); if (cm->reference_mode == REFERENCE_MODE_SELECT) base_cost += vp10_cost_bit(comp_inter_p, 1); - ref_costs_comp[LAST_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 0); - ref_costs_comp[GOLDEN_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 1); + ref_costs_comp[LAST_FRAME] = +#if CONFIG_EXT_REFS + ref_costs_comp[LAST2_FRAME] = + ref_costs_comp[LAST3_FRAME] = + ref_costs_comp[LAST4_FRAME] = +#endif // CONFIG_EXT_REFS + ref_costs_comp[GOLDEN_FRAME] = base_cost; + +#if CONFIG_EXT_REFS + ref_costs_comp[LAST_FRAME] += vp10_cost_bit(ref_comp_p, 0); + ref_costs_comp[LAST2_FRAME] += vp10_cost_bit(ref_comp_p, 0); + ref_costs_comp[LAST3_FRAME] += vp10_cost_bit(ref_comp_p, 1); + ref_costs_comp[LAST4_FRAME] += vp10_cost_bit(ref_comp_p, 1); + ref_costs_comp[GOLDEN_FRAME] += vp10_cost_bit(ref_comp_p, 1); + + ref_costs_comp[LAST_FRAME] += vp10_cost_bit(ref_comp_p1, 1); + ref_costs_comp[LAST2_FRAME] += vp10_cost_bit(ref_comp_p1, 0); + ref_costs_comp[LAST3_FRAME] += vp10_cost_bit(ref_comp_p2, 0); + ref_costs_comp[LAST4_FRAME] += vp10_cost_bit(ref_comp_p2, 0); + ref_costs_comp[GOLDEN_FRAME] += vp10_cost_bit(ref_comp_p2, 1); + + ref_costs_comp[LAST3_FRAME] += vp10_cost_bit(ref_comp_p3, 1); + ref_costs_comp[LAST4_FRAME] += vp10_cost_bit(ref_comp_p3, 0); +#else + ref_costs_comp[LAST_FRAME] += vp10_cost_bit(ref_comp_p, 0); + ref_costs_comp[GOLDEN_FRAME] += vp10_cost_bit(ref_comp_p, 1); +#endif // CONFIG_EXT_REFS } else { ref_costs_comp[LAST_FRAME] = 512; +#if CONFIG_EXT_REFS + ref_costs_comp[LAST2_FRAME] = 512; + ref_costs_comp[LAST3_FRAME] = 512; + ref_costs_comp[LAST4_FRAME] = 512; +#endif // CONFIG_EXT_REFS ref_costs_comp[GOLDEN_FRAME] = 512; } } @@ -2206,13 +4142,14 @@ sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS); } -static void setup_buffer_inter(VP10_COMP *cpi, MACROBLOCK *x, - MV_REFERENCE_FRAME ref_frame, - BLOCK_SIZE block_size, - int mi_row, int mi_col, - int_mv frame_nearest_mv[MAX_REF_FRAMES], - int_mv frame_near_mv[MAX_REF_FRAMES], - struct buf_2d yv12_mb[4][MAX_MB_PLANE]) { +static void setup_buffer_inter( + VP10_COMP *cpi, MACROBLOCK *x, + MV_REFERENCE_FRAME ref_frame, + BLOCK_SIZE block_size, + int mi_row, int mi_col, + int_mv frame_nearest_mv[MAX_REF_FRAMES], + int_mv frame_near_mv[MAX_REF_FRAMES], + struct buf_2d yv12_mb[MAX_REF_FRAMES][MAX_MB_PLANE]) { const VP10_COMMON *cm = &cpi->common; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); MACROBLOCKD *const xd = &x->e_mbd; @@ -2228,8 +4165,13 @@ vp10_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); // Gets an initial list of candidate vectors from neighbours and orders them - vp10_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col, - NULL, NULL, mbmi_ext->mode_context); + vp10_find_mv_refs(cm, xd, mi, ref_frame, +#if CONFIG_REF_MV + &mbmi_ext->ref_mv_count[ref_frame], + mbmi_ext->ref_mv_stack[ref_frame], +#endif + candidates, mi_row, mi_col, + NULL, NULL, mbmi_ext->mode_context); // Candidate refinement carried out at encoder and decoder vp10_find_best_ref_mvs(cm->allow_high_precision_mv, candidates, @@ -2372,8 +4314,6 @@ } } - - static INLINE void restore_dst_buf(MACROBLOCKD *xd, uint8_t *orig_dst[MAX_MB_PLANE], int orig_dst_stride[MAX_MB_PLANE]) { @@ -2468,6 +4408,12 @@ int skip_txfm_sb = 0; int64_t skip_sse_sb = INT64_MAX; int64_t distortion_y = 0, distortion_uv = 0; + int16_t mode_ctx = mbmi_ext->mode_context[refs[0]]; + +#if CONFIG_REF_MV + mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context, + mbmi->ref_frame, bsize, -1); +#endif #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -2572,12 +4518,10 @@ // initiation of a motion field. if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv, refs[0])) { - *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode, - mbmi_ext->mode_context[refs[0]]), - cost_mv_ref(cpi, NEARESTMV, - mbmi_ext->mode_context[refs[0]])); + *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode, mode_ctx), + cost_mv_ref(cpi, NEARESTMV, mode_ctx)); } else { - *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]); + *rate2 += cost_mv_ref(cpi, this_mode, mode_ctx); } if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd && @@ -2598,6 +4542,10 @@ if (cm->interp_filter != BILINEAR) { if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { best_filter = EIGHTTAP; +#if CONFIG_EXT_INTERP + } else if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE) { + best_filter = EIGHTTAP; +#endif } else if (best_filter == SWITCHABLE) { int newbest; int tmp_rate_sum = 0; @@ -2613,7 +4561,7 @@ rs = vp10_get_switchable_rate(cpi, xd); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); - if (i > 0 && intpel_mv) { + if (i > 0 && intpel_mv && IsInterpolatingFilter(i)) { rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum); filter_cache[i] = rd; filter_cache[SWITCHABLE_FILTERS] = @@ -2635,7 +4583,7 @@ (!i || best_needs_copy)) || (cm->interp_filter != SWITCHABLE && (cm->interp_filter == mbmi->interp_filter || - (i == 0 && intpel_mv)))) { + (i == 0 && intpel_mv && IsInterpolatingFilter(i))))) { restore_dst_buf(xd, orig_dst, orig_dst_stride); } else { for (j = 0; j < MAX_MB_PLANE; j++) { @@ -2655,7 +4603,7 @@ rd += rs_rd; *mask_filter = VPXMAX(*mask_filter, rd); - if (i == 0 && intpel_mv) { + if (i == 0 && intpel_mv && IsInterpolatingFilter(i)) { tmp_rate_sum = rate_sum; tmp_dist_sum = dist_sum; } @@ -2672,7 +4620,8 @@ if (newbest) { best_rd = rd; best_filter = mbmi->interp_filter; - if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) + if (cm->interp_filter == SWITCHABLE && i && + !(intpel_mv && IsInterpolatingFilter(i))) best_needs_copy = !best_needs_copy; } @@ -2691,6 +4640,7 @@ restore_dst_buf(xd, orig_dst, orig_dst_stride); } } + // Set the appropriate filter mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter; @@ -2750,8 +4700,27 @@ // Y cost and distortion vp10_subtract_plane(x, bsize, 0); +#if CONFIG_VAR_TX + if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) { +#if CONFIG_EXT_TX + select_tx_type_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, + bsize, ref_best_rd); +#else + inter_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, + bsize, ref_best_rd); +#endif + } else { + int idx, idy; + super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, + bsize, ref_best_rd); + for (idy = 0; idy < xd->n8_h; ++idy) + for (idx = 0; idx < xd->n8_w; ++idx) + mbmi->inter_tx_size[idy * 8 + idx] = mbmi->tx_size; + } +#else super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize, ref_best_rd); +#endif if (*rate_y == INT_MAX) { *rate2 = INT_MAX; @@ -2766,8 +4735,13 @@ rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); +#if CONFIG_VAR_TX + if (!inter_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv, + &sseuv, bsize, ref_best_rd - rdcosty)) { +#else if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv, &sseuv, bsize, ref_best_rd - rdcosty)) { +#endif *rate2 = INT_MAX; *distortion = INT64_MAX; restore_dst_buf(xd, orig_dst, orig_dst_stride); @@ -2980,7 +4954,11 @@ TileDataEnc *tile_data, MACROBLOCK *x, int mi_row, int mi_col, - RD_COST *rd_cost, BLOCK_SIZE bsize, + RD_COST *rd_cost, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif // CONFIG_SUPERTX + BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { VP10_COMMON *const cm = &cpi->common; @@ -2995,12 +4973,21 @@ unsigned char segment_id = mbmi->segment_id; int comp_pred, i, k; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; - struct buf_2d yv12_mb[4][MAX_MB_PLANE]; + struct buf_2d yv12_mb[MAX_REF_FRAMES][MAX_MB_PLANE]; int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } }; INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES]; int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES]; - static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, - VP9_ALT_FLAG }; + static const int flag_list[REFS_PER_FRAME + 1] = { + 0, + VP9_LAST_FLAG, +#if CONFIG_EXT_REFS + VP9_LAST2_FLAG, + VP9_LAST3_FLAG, + VP9_LAST4_FLAG, +#endif // CONFIG_EXT_REFS + VP9_GOLD_FLAG, + VP9_ALT_FLAG + }; int64_t best_rd = best_rd_so_far; int64_t best_pred_diff[REFERENCE_MODES]; int64_t best_pred_rd[REFERENCE_MODES]; @@ -3018,8 +5005,17 @@ int64_t dist_uv[TX_SIZES]; int skip_uv[TX_SIZES]; PREDICTION_MODE mode_uv[TX_SIZES]; +#if CONFIG_EXT_INTRA + EXT_INTRA_MODE_INFO ext_intra_mode_info_uv[TX_SIZES]; + int8_t uv_angle_delta[TX_SIZES]; + int is_directional_mode, angle_stats_ready = 0; + int rate_overhead, rate_dummy; + uint8_t directional_mode_skip_mask[INTRA_MODES]; +#endif // CONFIG_EXT_INTRA const int intra_cost_penalty = vp10_get_intra_cost_penalty( cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth); + const int * const intra_mode_cost = + cpi->mbmode_cost[size_group_lookup[bsize]]; int best_skip2 = 0; uint8_t ref_frame_skip_mask[2] = { 0 }; uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 }; @@ -3034,6 +5030,11 @@ vp10_zero(best_mbmode); +#if CONFIG_EXT_INTRA + memset(directional_mode_skip_mask, 0, + sizeof(directional_mode_skip_mask[0]) * INTRA_MODES); +#endif // CONFIG_EXT_INTRA + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; @@ -3056,9 +5057,13 @@ } rd_cost->rate = INT_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif // CONFIG_SUPERTX for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; + x->mbmi_ext->mode_context[ref_frame] = 0; if (cpi->ref_frame_flags & flag_list[ref_frame]) { assert(get_ref_frame_buffer(cpi, ref_frame) != NULL); setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, @@ -3102,7 +5107,14 @@ // an unfiltered alternative. We allow near/nearest as well // because they may result in zero-zero MVs but be cheaper. if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { - ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME); + ref_frame_skip_mask[0] = + (1 << LAST_FRAME) | +#if CONFIG_EXT_REFS + (1 << LAST2_FRAME) | + (1 << LAST3_FRAME) | + (1 << LAST4_FRAME) | +#endif // CONFIG_EXT_REFS + (1 << GOLDEN_FRAME); ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO; if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0) @@ -3159,6 +5171,8 @@ midx = end_pos; } + mbmi->palette_mode_info.palette_size[0] = 0; + mbmi->palette_mode_info.palette_size[1] = 0; for (midx = 0; midx < MAX_MODES; ++midx) { int mode_index = mode_map[midx]; int mode_excluded = 0; @@ -3186,6 +5200,20 @@ ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK; ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; break; +#if CONFIG_EXT_REFS + case LAST2_FRAME: + ref_frame_skip_mask[0] |= LAST2_FRAME_MODE_MASK; + ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; + break; + case LAST3_FRAME: + ref_frame_skip_mask[0] |= LAST3_FRAME_MODE_MASK; + ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; + break; + case LAST4_FRAME: + ref_frame_skip_mask[0] |= LAST4_FRAME_MODE_MASK; + ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; + break; +#endif // CONFIG_EXT_REFS case GOLDEN_FRAME: ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK; ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; @@ -3267,7 +5295,7 @@ } else { const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame}; if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, - this_mode, ref_frames)) + this_mode, ref_frames, bsize, -1)) continue; } @@ -3275,6 +5303,10 @@ mbmi->uv_mode = DC_PRED; mbmi->ref_frame[0] = ref_frame; mbmi->ref_frame[1] = second_ref_frame; +#if CONFIG_EXT_INTRA + mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0; + mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0; +#endif // CONFIG_EXT_INTRA // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP @@ -3295,25 +5327,127 @@ TX_SIZE uv_tx; struct macroblockd_plane *const pd = &xd->plane[1]; memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); + +#if CONFIG_EXT_INTRA + is_directional_mode = (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED); + if (is_directional_mode) { + if (!angle_stats_ready) { + const int src_stride = x->plane[0].src.stride; + const uint8_t *src = x->plane[0].src.buf; + const int rows = 4 * num_4x4_blocks_high_lookup[bsize]; + const int cols = 4 * num_4x4_blocks_wide_lookup[bsize]; + double hist[DIRECTIONAL_MODES]; + PREDICTION_MODE mode; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + highbd_angle_estimation(src, src_stride, rows, cols, hist); + else +#endif + angle_estimation(src, src_stride, rows, cols, hist); + for (mode = 0; mode < INTRA_MODES; ++mode) { + if (mode != DC_PRED && mode != TM_PRED) { + int index = get_angle_index((double)mode_to_angle_map[mode]); + double score, weight = 1.0; + score = hist[index]; + if (index > 0) { + score += hist[index - 1] * 0.5; + weight += 0.5; + } + if (index < DIRECTIONAL_MODES - 1) { + score += hist[index + 1] * 0.5; + weight += 0.5; + } + score /= weight; + if (score < ANGLE_SKIP_THRESH) + directional_mode_skip_mask[mode] = 1; + } + } + angle_stats_ready = 1; + } + if (directional_mode_skip_mask[mbmi->mode]) + continue; + rate_overhead = write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0) + + intra_mode_cost[mbmi->mode]; + rate_y = INT_MAX; + this_rd = + rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y, + &skippable, bsize, rate_overhead, best_rd); + } else { + mbmi->angle_delta[0] = 0; + super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, + NULL, bsize, best_rd); + } + + // TODO(huisu): ext-intra is turned off in lossless mode for now to + // avoid a unit test failure + if (mbmi->mode == DC_PRED && !xd->lossless[mbmi->segment_id] && + ALLOW_FILTER_INTRA_MODES) { + MB_MODE_INFO mbmi_copy = *mbmi; + + if (rate_y != INT_MAX) { + int this_rate = rate_y + intra_mode_cost[mbmi->mode] + + vp10_cost_bit(cm->fc->ext_intra_probs[0], 0); + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, distortion_y); + } else { + this_rd = best_rd; + } + + if (!rd_pick_ext_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y, + &skippable, bsize, + intra_mode_cost[mbmi->mode], &this_rd)) + *mbmi = mbmi_copy; + } +#else super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize, best_rd); +#endif // CONFIG_EXT_INTRA + if (rate_y == INT_MAX) continue; - uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x, pd->subsampling_y); if (rate_uv_intra[uv_tx] == INT_MAX) { choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]); +#if CONFIG_EXT_INTRA + ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info; + uv_angle_delta[uv_tx] = mbmi->angle_delta[1]; +#endif // CONFIG_EXT_INTRA } rate_uv = rate_uv_tokenonly[uv_tx]; distortion_uv = dist_uv[uv_tx]; skippable = skippable && skip_uv[uv_tx]; mbmi->uv_mode = mode_uv[uv_tx]; +#if CONFIG_EXT_INTRA + mbmi->angle_delta[1] = uv_angle_delta[uv_tx]; + mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = + ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1]; + if (ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1]) { + mbmi->ext_intra_mode_info.ext_intra_mode[1] = + ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1]; + } +#endif // CONFIG_EXT_INTRA - rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx]; + rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv_intra[uv_tx]; +#if CONFIG_EXT_INTRA + if (is_directional_mode) + rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, + MAX_ANGLE_DELTAS + + mbmi->angle_delta[0]); + + if (mbmi->mode == DC_PRED && ALLOW_FILTER_INTRA_MODES) { + rate2 += vp10_cost_bit(cm->fc->ext_intra_probs[0], + mbmi->ext_intra_mode_info.use_ext_intra_mode[0]); + if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) { + EXT_INTRA_MODE ext_intra_mode = + mbmi->ext_intra_mode_info.ext_intra_mode[0]; + rate2 += write_uniform_cost(FILTER_INTRA_MODES, ext_intra_mode); + } + } +#endif // CONFIG_EXT_INTRA if (this_mode != DC_PRED && this_mode != TM_PRED) rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; @@ -3326,6 +5460,7 @@ single_newmv, single_inter_filter, single_skippable, &total_sse, best_rd, &mask_filter, filter_cache); + if (this_rd == INT64_MAX) continue; @@ -3347,9 +5482,11 @@ if (skippable) { // Back out the coefficient coding costs rate2 -= (rate_y + rate_uv); - + rate_y = 0; + rate_uv = 0; // Cost the skip mb case rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); + } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) { if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { @@ -3362,6 +5499,8 @@ assert(total_sse >= 0); rate2 -= (rate_y + rate_uv); this_skip2 = 1; + rate_y = 0; + rate_uv = 0; } } else { // Add in the cost of the no skip flag. @@ -3408,6 +5547,15 @@ } rd_cost->rate = rate2; +#if CONFIG_SUPERTX + *returnrate_nocoef = rate2 - rate_y - rate_uv; + if (!disable_skip) { + *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd), + skippable || this_skip2); + } + *returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd), + mbmi->ref_frame[0] != INTRA_FRAME); +#endif // CONFIG_SUPERTX rd_cost->dist = distortion2; rd_cost->rdcost = this_rd; best_rd = this_rd; @@ -3417,8 +5565,15 @@ if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); + +#if CONFIG_VAR_TX + for (i = 0; i < MAX_MB_PLANE; ++i) + memcpy(ctx->blk_skip[i], x->blk_skip[i], + sizeof(uint8_t) * ctx->num_4x4_blk); +#else memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); +#endif // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history @@ -3522,6 +5677,19 @@ best_mbmode.mode = ZEROMV; } +#if CONFIG_REF_MV + if (best_mbmode.ref_frame[0] > INTRA_FRAME && + best_mbmode.mv[0].as_int == 0 && + (best_mbmode.ref_frame[1] == NONE || best_mbmode.mv[1].as_int == 0)) { + int16_t mode_ctx = mbmi_ext->mode_context[best_mbmode.ref_frame[0]]; + if (best_mbmode.ref_frame[1] > NONE) + mode_ctx &= (mbmi_ext->mode_context[best_mbmode.ref_frame[1]] | 0x00ff); + + if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) + best_mbmode.mode = ZEROMV; + } +#endif + if (best_mode_index < 0 || best_rd >= best_rd_so_far) { rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; @@ -3638,6 +5806,12 @@ assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)); + mbmi->palette_mode_info.palette_size[0] = 0; + mbmi->palette_mode_info.palette_size[1] = 0; +#if CONFIG_EXT_INTRA + mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0; + mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0; +#endif // CONFIG_EXT_INTRA mbmi->mode = ZEROMV; mbmi->uv_mode = DC_PRED; mbmi->ref_frame[0] = LAST_FRAME; @@ -3648,6 +5822,9 @@ if (cm->interp_filter != BILINEAR) { best_filter = EIGHTTAP; if (cm->interp_filter == SWITCHABLE && +#if CONFIG_EXT_INTERP + vp10_is_interp_needed(xd) && +#endif // CONFIG_EXT_INTERP x->source_variance >= cpi->sf.disable_filter_search_var_thresh) { int rs; int best_rs = INT_MAX; @@ -3702,14 +5879,17 @@ best_pred_diff, best_filter_diff, 0); } -void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi, - TileDataEnc *tile_data, - MACROBLOCK *x, - int mi_row, int mi_col, - RD_COST *rd_cost, - BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx, - int64_t best_rd_so_far) { +void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi, + TileDataEnc *tile_data, + struct macroblock *x, + int mi_row, int mi_col, + struct RD_COST *rd_cost, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif // CONFIG_SUPERTX + BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx, + int64_t best_rd_so_far) { VP10_COMMON *const cm = &cpi->common; RD_OPT *const rd_opt = &cpi->rd; SPEED_FEATURES *const sf = &cpi->sf; @@ -3720,9 +5900,18 @@ unsigned char segment_id = mbmi->segment_id; int comp_pred, i; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; - struct buf_2d yv12_mb[4][MAX_MB_PLANE]; - static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, - VP9_ALT_FLAG }; + struct buf_2d yv12_mb[MAX_REF_FRAMES][MAX_MB_PLANE]; + static const int flag_list[REFS_PER_FRAME + 1] = { + 0, + VP9_LAST_FLAG, +#if CONFIG_EXT_REFS + VP9_LAST2_FLAG, + VP9_LAST3_FLAG, + VP9_LAST4_FLAG, +#endif // CONFIG_EXT_REFS + VP9_GOLD_FLAG, + VP9_ALT_FLAG + }; int64_t best_rd = best_rd_so_far; int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise int64_t best_pred_diff[REFERENCE_MODES]; @@ -3749,9 +5938,19 @@ int internal_active_edge = vp10_active_edge_sb(cpi, mi_row, mi_col) && vp10_internal_image_edge(cpi); +#if CONFIG_SUPERTX + best_rd_so_far = INT64_MAX; + best_rd = best_rd_so_far; + best_yrd = best_rd_so_far; +#endif // CONFIG_SUPERTX memset(x->zcoeff_blk[TX_4X4], 0, 4); vp10_zero(best_mbmode); +#if CONFIG_EXT_INTRA + mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0; + mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0; +#endif // CONFIG_EXT_INTRA + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; @@ -3771,8 +5970,12 @@ rate_uv_intra = INT_MAX; rd_cost->rate = INT_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { + x->mbmi_ext->mode_context[ref_frame] = 0; if (cpi->ref_frame_flags & flag_list[ref_frame]) { setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], @@ -3785,6 +5988,9 @@ frame_mv[ZEROMV][ref_frame].as_int = 0; } + mbmi->palette_mode_info.palette_size[0] = 0; + mbmi->palette_mode_info.palette_size[1] = 0; + for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) { int mode_excluded = 0; int64_t this_rd = INT64_MAX; @@ -3809,15 +6015,59 @@ case INTRA_FRAME: break; case LAST_FRAME: - ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME); + ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | +#if CONFIG_EXT_REFS + (1 << LAST2_FRAME) | + (1 << LAST3_FRAME) | + (1 << LAST4_FRAME) | +#endif // CONFIG_EXT_REFS + (1 << ALTREF_FRAME); ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; break; +#if CONFIG_EXT_REFS + case LAST2_FRAME: + ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | + (1 << LAST3_FRAME) | + (1 << LAST4_FRAME) | + (1 << GOLDEN_FRAME) | + (1 << ALTREF_FRAME); + ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; + break; + case LAST3_FRAME: + ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | + (1 << LAST2_FRAME) | + (1 << LAST4_FRAME) | + (1 << GOLDEN_FRAME) | + (1 << ALTREF_FRAME); + ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; + break; + case LAST4_FRAME: + ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | + (1 << LAST2_FRAME) | + (1 << LAST3_FRAME) | + (1 << GOLDEN_FRAME) | + (1 << ALTREF_FRAME); + ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; + break; +#endif // CONFIG_EXT_REFS case GOLDEN_FRAME: - ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME); + ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | +#if CONFIG_EXT_REFS + (1 << LAST2_FRAME) | + (1 << LAST3_FRAME) | + (1 << LAST4_FRAME) | +#endif // CONFIG_EXT_REFS + (1 << ALTREF_FRAME); ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; break; case ALTREF_FRAME: - ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME); + ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | +#if CONFIG_EXT_REFS + (1 << LAST2_FRAME) | + (1 << LAST3_FRAME) | + (1 << LAST4_FRAME) | +#endif // CONFIG_EXT_REFS + (1 << LAST_FRAME); break; case NONE: case MAX_REF_FRAMES: @@ -3904,6 +6154,10 @@ xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; } +#if CONFIG_VAR_TX + mbmi->inter_tx_size[0] = mbmi->tx_size; +#endif + if (ref_frame == INTRA_FRAME) { int rate; if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, @@ -3945,8 +6199,16 @@ this_rd_thresh = (ref_frame == LAST_FRAME) ? rd_opt->threshes[segment_id][bsize][THR_LAST] : rd_opt->threshes[segment_id][bsize][THR_ALTR]; +#if CONFIG_EXT_REFS + this_rd_thresh = (ref_frame == LAST2_FRAME) ? + rd_opt->threshes[segment_id][bsize][THR_LAST2] : this_rd_thresh; + this_rd_thresh = (ref_frame == LAST3_FRAME) ? + rd_opt->threshes[segment_id][bsize][THR_LAST3] : this_rd_thresh; + this_rd_thresh = (ref_frame == LAST4_FRAME) ? + rd_opt->threshes[segment_id][bsize][THR_LAST4] : this_rd_thresh; +#endif // CONFIG_EXT_REFS this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? - rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh; + rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; @@ -3976,7 +6238,11 @@ (int) this_rd_thresh, seg_mvs, bsi, switchable_filter_index, mi_row, mi_col); - +#if CONFIG_EXT_INTERP + if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE && + mbmi->interp_filter != EIGHTTAP) // invalid configuration + continue; +#endif // CONFIG_EXT_INTERP if (tmp_rd == INT64_MAX) continue; rs = vp10_get_switchable_rate(cpi, xd); @@ -4030,15 +6296,30 @@ mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ? tmp_best_filter : cm->interp_filter); + + if (!pred_exists) { // Handles the special case when a filter that is not in the - // switchable list (bilinear, 6-tap) is indicated at the frame level + // switchable list (bilinear) is indicated at the frame level tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, &x->mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd, &rate, &rate_y, &distortion, &skippable, &total_sse, (int) this_rd_thresh, seg_mvs, bsi, 0, mi_row, mi_col); +#if CONFIG_EXT_INTERP + if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE && + mbmi->interp_filter != EIGHTTAP) { + mbmi->interp_filter = EIGHTTAP; + tmp_rd = rd_pick_best_sub8x8_mode( + cpi, x, + &x->mbmi_ext->ref_mvs[ref_frame][0], + second_ref, best_yrd, &rate, &rate_y, + &distortion, &skippable, &total_sse, + (int) this_rd_thresh, seg_mvs, bsi, 0, + mi_row, mi_col); + } +#endif // CONFIG_EXT_INTERP if (tmp_rd == INT64_MAX) continue; } else { @@ -4074,10 +6355,15 @@ vp10_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8); memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); +#if CONFIG_VAR_TX + if (!inter_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, + &uv_sse, BLOCK_8X8, tmp_best_rdu)) + continue; +#else if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, &uv_sse, BLOCK_8X8, tmp_best_rdu)) continue; - +#endif rate2 += rate_uv; distortion2 += distortion_uv; skippable = skippable && uv_skippable; @@ -4145,6 +6431,15 @@ } rd_cost->rate = rate2; +#if CONFIG_SUPERTX + *returnrate_nocoef = rate2 - rate_y - rate_uv; + if (!disable_skip) + *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd), + this_skip2); + *returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd), + mbmi->ref_frame[0] != INTRA_FRAME); + assert(*returnrate_nocoef > 0); +#endif // CONFIG_SUPERTX rd_cost->dist = distortion2; rd_cost->rdcost = this_rd; best_rd = this_rd; @@ -4154,8 +6449,14 @@ best_skip2 = this_skip2; if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); + +#if CONFIG_VAR_TX + for (i = 0; i < MAX_MB_PLANE; ++i) + memset(ctx->blk_skip[i], 0, sizeof(uint8_t) * ctx->num_4x4_blk); +#else memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4], sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); +#endif for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i]; @@ -4241,6 +6542,9 @@ if (best_rd >= best_rd_so_far) { rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif // CONFIG_SUPERTX return; } @@ -4261,6 +6565,9 @@ rd_cost->rate = INT_MAX; rd_cost->dist = INT64_MAX; rd_cost->rdcost = INT64_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif // CONFIG_SUPERTX return; }
diff --git a/vp10/encoder/rdopt.h b/vp10/encoder/rdopt.h index b1a8036..62b0aea 100644 --- a/vp10/encoder/rdopt.h +++ b/vp10/encoder/rdopt.h
@@ -43,6 +43,9 @@ struct macroblock *x, int mi_row, int mi_col, struct RD_COST *rd_cost, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif // CONFIG_SUPERTX BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far); @@ -60,12 +63,27 @@ int vp10_active_edge_sb(struct VP10_COMP *cpi, int mi_row, int mi_col); void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi, - struct TileDataEnc *tile_data, - struct macroblock *x, - int mi_row, int mi_col, - struct RD_COST *rd_cost, - BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, - int64_t best_rd_so_far); + struct TileDataEnc *tile_data, + struct macroblock *x, + int mi_row, int mi_col, + struct RD_COST *rd_cost, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif // CONFIG_SUPERTX + BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, + int64_t best_rd_so_far); + +#if CONFIG_SUPERTX +void vp10_txfm_rd_in_plane_supertx(MACROBLOCK *x, +#if CONFIG_VAR_TX + const VP10_COMP *cpi, +#endif // CONFIG_VAR_TX + int *rate, int64_t *distortion, + int *skippable, int64_t *sse, + int64_t ref_best_rd, int plane, + BLOCK_SIZE bsize, TX_SIZE tx_size, + int use_fast_coef_casting); +#endif // CONFIG_SUPERTX #ifdef __cplusplus } // extern "C"
diff --git a/vp10/encoder/segmentation.c b/vp10/encoder/segmentation.c index 6a20ee4..e936775 100644 --- a/vp10/encoder/segmentation.c +++ b/vp10/encoder/segmentation.c
@@ -58,9 +58,7 @@ segcounts[4] + segcounts[5], segcounts[6] + segcounts[7] }; const unsigned ccc[2] = { cc[0] + cc[1], cc[2] + cc[3] }; -#if CONFIG_MISC_FIXES int i; -#endif segment_tree_probs[0] = get_binary_prob(ccc[0], ccc[1]); segment_tree_probs[1] = get_binary_prob(cc[0], cc[1]); @@ -70,16 +68,12 @@ segment_tree_probs[5] = get_binary_prob(segcounts[4], segcounts[5]); segment_tree_probs[6] = get_binary_prob(segcounts[6], segcounts[7]); -#if CONFIG_MISC_FIXES for (i = 0; i < 7; i++) { const unsigned *ct = i == 0 ? ccc : i < 3 ? cc + (i & 2) : segcounts + (i - 3) * 2; vp10_prob_diff_update_savings_search(ct, cur_tree_probs[i], &segment_tree_probs[i], DIFF_UPDATE_PROB); } -#else - (void) cur_tree_probs; -#endif } // Based on set of segment counts and probabilities calculate a cost estimate @@ -214,39 +208,22 @@ void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) { struct segmentation *seg = &cm->seg; -#if CONFIG_MISC_FIXES struct segmentation_probs *segp = &cm->fc->seg; -#else - struct segmentation_probs *segp = &cm->segp; -#endif int no_pred_cost; int t_pred_cost = INT_MAX; int i, tile_col, mi_row, mi_col; -#if CONFIG_MISC_FIXES unsigned (*temporal_predictor_count)[2] = cm->counts.seg.pred; unsigned *no_pred_segcounts = cm->counts.seg.tree_total; unsigned *t_unpred_seg_counts = cm->counts.seg.tree_mispred; -#else - unsigned temporal_predictor_count[PREDICTION_PROBS][2] = { { 0 } }; - unsigned no_pred_segcounts[MAX_SEGMENTS] = { 0 }; - unsigned t_unpred_seg_counts[MAX_SEGMENTS] = { 0 }; -#endif vpx_prob no_pred_tree[SEG_TREE_PROBS]; vpx_prob t_pred_tree[SEG_TREE_PROBS]; vpx_prob t_nopred_prob[PREDICTION_PROBS]; -#if CONFIG_MISC_FIXES (void) xd; -#else - // Set default state for the segment tree probabilities and the - // temporal coding probabilities - memset(segp->tree_probs, 255, sizeof(segp->tree_probs)); - memset(segp->pred_probs, 255, sizeof(segp->pred_probs)); -#endif // First of all generate stats regarding how well the last segment map // predicts this one @@ -284,13 +261,9 @@ const int count0 = temporal_predictor_count[i][0]; const int count1 = temporal_predictor_count[i][1]; -#if CONFIG_MISC_FIXES vp10_prob_diff_update_savings_search(temporal_predictor_count[i], segp->pred_probs[i], &t_nopred_prob[i], DIFF_UPDATE_PROB); -#else - t_nopred_prob[i] = get_binary_prob(count0, count1); -#endif // Add in the predictor signaling cost t_pred_cost += count0 * vp10_cost_zero(t_nopred_prob[i]) + @@ -301,30 +274,17 @@ // Now choose which coding method to use. if (t_pred_cost < no_pred_cost) { seg->temporal_update = 1; -#if !CONFIG_MISC_FIXES - memcpy(segp->tree_probs, t_pred_tree, sizeof(t_pred_tree)); - memcpy(segp->pred_probs, t_nopred_prob, sizeof(t_nopred_prob)); -#endif } else { seg->temporal_update = 0; -#if !CONFIG_MISC_FIXES - memcpy(segp->tree_probs, no_pred_tree, sizeof(no_pred_tree)); -#endif } } void vp10_reset_segment_features(VP10_COMMON *cm) { struct segmentation *seg = &cm->seg; -#if !CONFIG_MISC_FIXES - struct segmentation_probs *segp = &cm->segp; -#endif // Set up default state for MB feature flags seg->enabled = 0; seg->update_map = 0; seg->update_data = 0; -#if !CONFIG_MISC_FIXES - memset(segp->tree_probs, 255, sizeof(segp->tree_probs)); -#endif vp10_clearall_segfeatures(seg); }
diff --git a/vp10/encoder/subexp.c b/vp10/encoder/subexp.c index d407477..8d279b1 100644 --- a/vp10/encoder/subexp.c +++ b/vp10/encoder/subexp.c
@@ -25,8 +25,7 @@ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 11 - CONFIG_MISC_FIXES, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, @@ -86,7 +85,7 @@ static void encode_uniform(vpx_writer *w, int v) { const int l = 8; - const int m = (1 << l) - 191 + CONFIG_MISC_FIXES; + const int m = (1 << l) - 190; if (v < m) { vpx_write_literal(w, v, l - 1); } else {
diff --git a/vp10/encoder/subexp.h b/vp10/encoder/subexp.h index 091334f..64eb275 100644 --- a/vp10/encoder/subexp.h +++ b/vp10/encoder/subexp.h
@@ -36,7 +36,6 @@ vpx_prob *bestp, vpx_prob upd, int stepsize); - int vp10_cond_prob_diff_update_savings(vpx_prob *oldp, const unsigned int ct[2]); #ifdef __cplusplus
diff --git a/vp10/encoder/temporal_filter.c b/vp10/encoder/temporal_filter.c index 5278d3b..4dc2122 100644 --- a/vp10/encoder/temporal_filter.c +++ b/vp10/encoder/temporal_filter.c
@@ -135,15 +135,38 @@ for (i = 0, k = 0; i < block_height; i++) { for (j = 0; j < block_width; j++, k++) { - int src_byte = frame1[byte]; - int pixel_value = *frame2++; + int pixel_value = *frame2; - modifier = src_byte - pixel_value; - // This is an integer approximation of: - // float coeff = (3.0 * modifer * modifier) / pow(2, strength); - // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); - modifier *= modifier; - modifier *= 3; + // non-local mean approach + int diff_sse[9] = { 0 }; + int idx, idy, index = 0; + + for (idy = -1; idy <= 1; ++idy) { + for (idx = -1; idx <= 1; ++idx) { + int row = i + idy; + int col = j + idx; + + if (row >= 0 && row < (int)block_height && + col >= 0 && col < (int)block_width) { + int diff = frame1[byte + idy * (int)stride + idx] - + frame2[idy * (int)block_width + idx]; + diff_sse[index] = diff * diff; + ++index; + } + } + } + + assert(index > 0); + + modifier = 0; + for (idx = 0; idx < 9; ++idx) + modifier += diff_sse[idx]; + + modifier *= 3; + modifier /= index; + + ++frame2; + modifier += rounding; modifier >>= strength; @@ -182,15 +205,38 @@ for (i = 0, k = 0; i < block_height; i++) { for (j = 0; j < block_width; j++, k++) { - int src_byte = frame1[byte]; - int pixel_value = *frame2++; + int pixel_value = *frame2; - modifier = src_byte - pixel_value; - // This is an integer approximation of: - // float coeff = (3.0 * modifer * modifier) / pow(2, strength); - // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); - modifier *= modifier; + // non-local mean approach + int diff_sse[9] = { 0 }; + int idx, idy, index = 0; + + for (idy = -1; idy <= 1; ++idy) { + for (idx = -1; idx <= 1; ++idx) { + int row = i + idy; + int col = j + idx; + + if (row >= 0 && row < (int)block_height && + col >= 0 && col < (int)block_width) { + int diff = frame1[byte + idy * (int)stride + idx] - + frame2[idy * (int)block_width + idx]; + diff_sse[index] = diff * diff; + ++index; + } + } + } + + assert(index > 0); + + modifier = 0; + for (idx = 0; idx < 9; ++idx) + modifier += diff_sse[idx]; + modifier *= 3; + modifier /= index; + + ++frame2; + modifier += rounding; modifier >>= strength; @@ -382,50 +428,50 @@ int adj_strength = strength + 2 * (mbd->bd - 8); // Apply the filter (YUV) vp10_highbd_temporal_filter_apply(f->y_buffer + mb_y_offset, - f->y_stride, - predictor, 16, 16, adj_strength, - filter_weight, - accumulator, count); + f->y_stride, + predictor, 16, 16, adj_strength, + filter_weight, + accumulator, count); vp10_highbd_temporal_filter_apply(f->u_buffer + mb_uv_offset, - f->uv_stride, predictor + 256, - mb_uv_width, mb_uv_height, - adj_strength, - filter_weight, accumulator + 256, - count + 256); + f->uv_stride, predictor + 256, + mb_uv_width, mb_uv_height, + adj_strength, + filter_weight, accumulator + 256, + count + 256); vp10_highbd_temporal_filter_apply(f->v_buffer + mb_uv_offset, - f->uv_stride, predictor + 512, - mb_uv_width, mb_uv_height, - adj_strength, filter_weight, - accumulator + 512, count + 512); + f->uv_stride, predictor + 512, + mb_uv_width, mb_uv_height, + adj_strength, filter_weight, + accumulator + 512, count + 512); } else { // Apply the filter (YUV) - vp10_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, - predictor, 16, 16, - strength, filter_weight, - accumulator, count); - vp10_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride, - predictor + 256, - mb_uv_width, mb_uv_height, strength, - filter_weight, accumulator + 256, - count + 256); - vp10_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride, - predictor + 512, - mb_uv_width, mb_uv_height, strength, - filter_weight, accumulator + 512, - count + 512); + vp10_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride, + predictor, 16, 16, + strength, filter_weight, + accumulator, count); + vp10_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, + f->uv_stride, predictor + 256, + mb_uv_width, mb_uv_height, strength, + filter_weight, accumulator + 256, + count + 256); + vp10_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, + f->uv_stride, predictor + 512, + mb_uv_width, mb_uv_height, strength, + filter_weight, accumulator + 512, + count + 512); } #else // Apply the filter (YUV) - vp10_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, + vp10_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16, strength, filter_weight, accumulator, count); - vp10_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride, + vp10_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256, mb_uv_width, mb_uv_height, strength, filter_weight, accumulator + 256, count + 256); - vp10_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride, + vp10_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512, mb_uv_width, mb_uv_height, strength, filter_weight, accumulator + 512,
diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c index 1b94190..64211a9 100644 --- a/vp10/encoder/tokenize.c +++ b/vp10/encoder/tokenize.c
@@ -487,6 +487,39 @@ return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max; } +void vp10_tokenize_palette_sb(struct ThreadData *const td, + BLOCK_SIZE bsize, int plane, + TOKENEXTRA **t) { + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + uint8_t *color_map = xd->plane[0].color_index_map; + PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info; + int n = pmi->palette_size[plane != 0]; + int i, j, k; + int color_new_idx = -1, color_ctx, color_order[PALETTE_MAX_SIZE]; + int rows = 4 * num_4x4_blocks_high_lookup[bsize]; + int cols = 4 * num_4x4_blocks_wide_lookup[bsize]; + + for (i = 0; i < rows; ++i) { + for (j = (i == 0 ? 1 : 0); j < cols; ++j) { + color_ctx = vp10_get_palette_color_context(color_map, cols, i, j, n, + color_order); + for (k = 0; k < n; ++k) + if (color_map[i * cols + j] == color_order[k]) { + color_new_idx = k; + break; + } + assert(color_new_idx >= 0 && color_new_idx < n); + + (*t)->token = color_new_idx; + (*t)->context_tree = vp10_default_palette_y_color_prob[n - 2][color_ctx]; + (*t)->skip_eob_node = 0; + ++(*t); + } + } +} + static void tokenize_b(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { @@ -508,8 +541,8 @@ const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); const int segment_id = mbmi->segment_id; const int16_t *scan, *nb; - const TX_TYPE tx_type = get_tx_type(type, xd, block); - const scan_order *const so = get_scan(tx_size, tx_type); + const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size); + const scan_order *const so = get_scan(tx_size, tx_type, is_inter_block(mbmi)); const int ref = is_inter_block(mbmi); unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] = td->rd_counts.coef_counts[tx_size][type][ref]; @@ -612,6 +645,118 @@ return result; } +#if CONFIG_VAR_TX +void tokenize_tx(ThreadData *td, TOKENEXTRA **t, + int dry_run, TX_SIZE tx_size, BLOCK_SIZE plane_bsize, + int blk_row, int blk_col, int block, int plane, + void *arg) { + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int blk_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); + TX_SIZE plane_tx_size = plane ? + get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], bsize, 0, 0) : + mbmi->inter_tx_size[blk_idx]; + + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd); + if (!dry_run) + tokenize_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg); + else + set_entropy_context_b(plane, block, blk_row, blk_col, + plane_bsize, tx_size, arg); + } else { + int bsl = b_width_log2_lookup[bsize]; + int i; + + assert(bsl > 0); + --bsl; + + for (i = 0; i < 4; ++i) { + const int offsetr = blk_row + ((i >> 1) << bsl); + const int offsetc = blk_col + ((i & 0x01) << bsl); + int step = 1 << (2 * (tx_size - 1)); + + if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) + continue; + + tokenize_tx(td, t, dry_run, tx_size - 1, plane_bsize, + offsetr, offsetc, block + i * step, plane, arg); + } + } +} + +void vp10_tokenize_sb_inter(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t, + int dry_run, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + TOKENEXTRA *t_backup = *t; + const int ctx = vp10_get_skip_context(xd); + const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id, + SEG_LVL_SKIP); + struct tokenize_b_args arg = {cpi, td, t}; + int plane; + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + if (mbmi->skip) { + if (!dry_run) + td->counts->skip[ctx][1] += skip_inc; + reset_skip_context(xd, bsize); + if (dry_run) + *t = t_backup; + return; + } + + if (!dry_run) + td->counts->skip[ctx][0] += skip_inc; + else + *t = t_backup; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; + const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + int block = 0; + int step = 1 << (max_tx_size * 2); + for (idy = 0; idy < mi_height; idy += bh) { + for (idx = 0; idx < mi_width; idx += bh) { + tokenize_tx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx, + block, plane, &arg); + block += step; + } + } + + if (!dry_run) { + (*t)->token = EOSB_TOKEN; + (*t)++; + } + } +} +#endif + void vp10_tokenize_sb(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize) { VP10_COMMON *const cm = &cpi->common; @@ -644,3 +789,40 @@ vp10_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg); } } + +#if CONFIG_SUPERTX +void vp10_tokenize_sb_supertx(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t, + int dry_run, BLOCK_SIZE bsize) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &td->mb.e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + TOKENEXTRA *t_backup = *t; + const int ctx = vp10_get_skip_context(xd); + const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id, + SEG_LVL_SKIP); + struct tokenize_b_args arg = {cpi, td, t}; + if (mbmi->skip) { + if (!dry_run) + td->counts->skip[ctx][1] += skip_inc; + reset_skip_context(xd, bsize); + if (dry_run) + *t = t_backup; + return; + } + + if (!dry_run) { + int plane; + td->counts->skip[ctx][0] += skip_inc; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + vp10_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_b, + &arg); + (*t)->token = EOSB_TOKEN; + (*t)++; + } + } else { + vp10_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg); + *t = t_backup; + } +} +#endif // CONFIG_SUPERTX
diff --git a/vp10/encoder/tokenize.h b/vp10/encoder/tokenize.h index 5bad415..c68e6f2 100644 --- a/vp10/encoder/tokenize.h +++ b/vp10/encoder/tokenize.h
@@ -51,8 +51,21 @@ struct VP10_COMP; struct ThreadData; +#if CONFIG_VAR_TX +void vp10_tokenize_sb_inter(struct VP10_COMP *cpi, struct ThreadData *td, + TOKENEXTRA **t, int dry_run, int mi_row, int mi_col, + BLOCK_SIZE bsize); +#endif + +void vp10_tokenize_palette_sb(struct ThreadData *const td, + BLOCK_SIZE bsize, int plane, + TOKENEXTRA **t); void vp10_tokenize_sb(struct VP10_COMP *cpi, struct ThreadData *td, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize); +#if CONFIG_SUPERTX +void vp10_tokenize_sb_supertx(struct VP10_COMP *cpi, struct ThreadData *td, + TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize); +#endif extern const int16_t *vp10_dct_value_cost_ptr; /* TODO: The Token field should be broken out into a separate char array to
diff --git a/vp10/encoder/x86/dct_sse2.c b/vp10/encoder/x86/dct_sse2.c index e111157..976fe45 100644 --- a/vp10/encoder/x86/dct_sse2.c +++ b/vp10/encoder/x86/dct_sse2.c
@@ -18,16 +18,37 @@ #include "vpx_dsp/x86/txfm_common_sse2.h" #include "vpx_ports/mem.h" +// Reverse the 8 16 bit words in __m128i +static INLINE __m128i mm_reverse_epi16(const __m128i x) { + const __m128i a = _mm_shufflelo_epi16(x, 0x1b); + const __m128i b = _mm_shufflehi_epi16(a, 0x1b); + return _mm_shuffle_epi32(b, 0x4e); +} + static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in, - int stride) { + int stride, int flipud, int fliplr) { const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1); const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); __m128i mask; - in[0] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); - in[1] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); - in[2] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride)); - in[3] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride)); + if (!flipud) { + in[0] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); + in[1] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); + in[2] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride)); + in[3] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride)); + } else { + in[0] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride)); + in[1] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride)); + in[2] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); + in[3] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); + } + + if (fliplr) { + in[0] = _mm_shufflelo_epi16(in[0], 0x1b); + in[1] = _mm_shufflelo_epi16(in[1], 0x1b); + in[2] = _mm_shufflelo_epi16(in[2], 0x1b); + in[3] = _mm_shufflelo_epi16(in[3], 0x1b); + } in[0] = _mm_slli_epi16(in[0], 4); in[1] = _mm_slli_epi16(in[1], 4); @@ -160,23 +181,55 @@ vpx_fdct4x4_sse2(input, output, stride); break; case ADST_DCT: - load_buffer_4x4(input, in, stride); + load_buffer_4x4(input, in, stride, 0, 0); fadst4_sse2(in); fdct4_sse2(in); write_buffer_4x4(output, in); break; case DCT_ADST: - load_buffer_4x4(input, in, stride); + load_buffer_4x4(input, in, stride, 0, 0); fdct4_sse2(in); fadst4_sse2(in); write_buffer_4x4(output, in); break; case ADST_ADST: - load_buffer_4x4(input, in, stride); + load_buffer_4x4(input, in, stride, 0, 0); fadst4_sse2(in); fadst4_sse2(in); write_buffer_4x4(output, in); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + load_buffer_4x4(input, in, stride, 1, 0); + fadst4_sse2(in); + fdct4_sse2(in); + write_buffer_4x4(output, in); + break; + case DCT_FLIPADST: + load_buffer_4x4(input, in, stride, 0, 1); + fdct4_sse2(in); + fadst4_sse2(in); + write_buffer_4x4(output, in); + break; + case FLIPADST_FLIPADST: + load_buffer_4x4(input, in, stride, 1, 1); + fadst4_sse2(in); + fadst4_sse2(in); + write_buffer_4x4(output, in); + break; + case ADST_FLIPADST: + load_buffer_4x4(input, in, stride, 0, 1); + fadst4_sse2(in); + fadst4_sse2(in); + write_buffer_4x4(output, in); + break; + case FLIPADST_ADST: + load_buffer_4x4(input, in, stride, 1, 0); + fadst4_sse2(in); + fadst4_sse2(in); + write_buffer_4x4(output, in); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -627,15 +680,37 @@ // load 8x8 array static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in, - int stride) { - in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride)); - in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride)); - in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride)); - in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride)); - in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride)); - in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride)); - in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride)); - in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride)); + int stride, int flipud, int fliplr) { + if (!flipud) { + in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride)); + in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride)); + in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride)); + in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride)); + in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride)); + in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride)); + in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride)); + in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride)); + } else { + in[0] = _mm_load_si128((const __m128i *)(input + 7 * stride)); + in[1] = _mm_load_si128((const __m128i *)(input + 6 * stride)); + in[2] = _mm_load_si128((const __m128i *)(input + 5 * stride)); + in[3] = _mm_load_si128((const __m128i *)(input + 4 * stride)); + in[4] = _mm_load_si128((const __m128i *)(input + 3 * stride)); + in[5] = _mm_load_si128((const __m128i *)(input + 2 * stride)); + in[6] = _mm_load_si128((const __m128i *)(input + 1 * stride)); + in[7] = _mm_load_si128((const __m128i *)(input + 0 * stride)); + } + + if (fliplr) { + in[0] = mm_reverse_epi16(in[0]); + in[1] = mm_reverse_epi16(in[1]); + in[2] = mm_reverse_epi16(in[2]); + in[3] = mm_reverse_epi16(in[3]); + in[4] = mm_reverse_epi16(in[4]); + in[5] = mm_reverse_epi16(in[5]); + in[6] = mm_reverse_epi16(in[6]); + in[7] = mm_reverse_epi16(in[7]); + } in[0] = _mm_slli_epi16(in[0], 2); in[1] = _mm_slli_epi16(in[1], 2); @@ -1144,26 +1219,63 @@ vpx_fdct8x8_sse2(input, output, stride); break; case ADST_DCT: - load_buffer_8x8(input, in, stride); + load_buffer_8x8(input, in, stride, 0, 0); fadst8_sse2(in); fdct8_sse2(in); right_shift_8x8(in, 1); write_buffer_8x8(output, in, 8); break; case DCT_ADST: - load_buffer_8x8(input, in, stride); + load_buffer_8x8(input, in, stride, 0, 0); fdct8_sse2(in); fadst8_sse2(in); right_shift_8x8(in, 1); write_buffer_8x8(output, in, 8); break; case ADST_ADST: - load_buffer_8x8(input, in, stride); + load_buffer_8x8(input, in, stride, 0, 0); fadst8_sse2(in); fadst8_sse2(in); right_shift_8x8(in, 1); write_buffer_8x8(output, in, 8); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + load_buffer_8x8(input, in, stride, 1, 0); + fadst8_sse2(in); + fdct8_sse2(in); + right_shift_8x8(in, 1); + write_buffer_8x8(output, in, 8); + break; + case DCT_FLIPADST: + load_buffer_8x8(input, in, stride, 0, 1); + fdct8_sse2(in); + fadst8_sse2(in); + right_shift_8x8(in, 1); + write_buffer_8x8(output, in, 8); + break; + case FLIPADST_FLIPADST: + load_buffer_8x8(input, in, stride, 1, 1); + fadst8_sse2(in); + fadst8_sse2(in); + right_shift_8x8(in, 1); + write_buffer_8x8(output, in, 8); + break; + case ADST_FLIPADST: + load_buffer_8x8(input, in, stride, 0, 1); + fadst8_sse2(in); + fadst8_sse2(in); + right_shift_8x8(in, 1); + write_buffer_8x8(output, in, 8); + break; + case FLIPADST_ADST: + load_buffer_8x8(input, in, stride, 1, 0); + fadst8_sse2(in); + fadst8_sse2(in); + right_shift_8x8(in, 1); + write_buffer_8x8(output, in, 8); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -1171,15 +1283,37 @@ } static INLINE void load_buffer_16x16(const int16_t* input, __m128i *in0, - __m128i *in1, int stride) { - // load first 8 columns - load_buffer_8x8(input, in0, stride); - load_buffer_8x8(input + 8 * stride, in0 + 8, stride); + __m128i *in1, int stride, + int flipud, int fliplr) { + // Load 4 8x8 blocks + const int16_t *topL = input; + const int16_t *topR = input + 8; + const int16_t *botL = input + 8 * stride; + const int16_t *botR = input + 8 * stride + 8; - input += 8; + const int16_t *tmp; + + if (flipud) { + // Swap left columns + tmp = topL; topL = botL; botL = tmp; + // Swap right columns + tmp = topR; topR = botR; botR = tmp; + } + + if (fliplr) { + // Swap top rows + tmp = topL; topL = topR; topR = tmp; + // Swap bottom rows + tmp = botL; botL = botR; botR = tmp; + } + + // load first 8 columns + load_buffer_8x8(topL, in0, stride, flipud, fliplr); + load_buffer_8x8(botL, in0 + 8, stride, flipud, fliplr); + // load second 8 columns - load_buffer_8x8(input, in1, stride); - load_buffer_8x8(input + 8 * stride, in1 + 8, stride); + load_buffer_8x8(topR, in1, stride, flipud, fliplr); + load_buffer_8x8(botR, in1 + 8, stride, flipud, fliplr); } static INLINE void write_buffer_16x16(tran_low_t *output, __m128i *in0, @@ -2031,26 +2165,63 @@ vpx_fdct16x16_sse2(input, output, stride); break; case ADST_DCT: - load_buffer_16x16(input, in0, in1, stride); + load_buffer_16x16(input, in0, in1, stride, 0, 0); fadst16_sse2(in0, in1); right_shift_16x16(in0, in1); fdct16_sse2(in0, in1); write_buffer_16x16(output, in0, in1, 16); break; case DCT_ADST: - load_buffer_16x16(input, in0, in1, stride); + load_buffer_16x16(input, in0, in1, stride, 0, 0); fdct16_sse2(in0, in1); right_shift_16x16(in0, in1); fadst16_sse2(in0, in1); write_buffer_16x16(output, in0, in1, 16); break; case ADST_ADST: - load_buffer_16x16(input, in0, in1, stride); + load_buffer_16x16(input, in0, in1, stride, 0, 0); fadst16_sse2(in0, in1); right_shift_16x16(in0, in1); fadst16_sse2(in0, in1); write_buffer_16x16(output, in0, in1, 16); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + load_buffer_16x16(input, in0, in1, stride, 1, 0); + fadst16_sse2(in0, in1); + right_shift_16x16(in0, in1); + fdct16_sse2(in0, in1); + write_buffer_16x16(output, in0, in1, 16); + break; + case DCT_FLIPADST: + load_buffer_16x16(input, in0, in1, stride, 0, 1); + fdct16_sse2(in0, in1); + right_shift_16x16(in0, in1); + fadst16_sse2(in0, in1); + write_buffer_16x16(output, in0, in1, 16); + break; + case FLIPADST_FLIPADST: + load_buffer_16x16(input, in0, in1, stride, 1, 1); + fadst16_sse2(in0, in1); + right_shift_16x16(in0, in1); + fadst16_sse2(in0, in1); + write_buffer_16x16(output, in0, in1, 16); + break; + case ADST_FLIPADST: + load_buffer_16x16(input, in0, in1, stride, 0, 1); + fadst16_sse2(in0, in1); + right_shift_16x16(in0, in1); + fadst16_sse2(in0, in1); + write_buffer_16x16(output, in0, in1, 16); + break; + case FLIPADST_ADST: + load_buffer_16x16(input, in0, in1, stride, 1, 0); + fadst16_sse2(in0, in1); + right_shift_16x16(in0, in1); + fadst16_sse2(in0, in1); + write_buffer_16x16(output, in0, in1, 16); + break; +#endif // CONFIG_EXT_TX default: assert(0); break;
diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk index 2eb3488..f8c2112 100644 --- a/vp10/vp10_common.mk +++ b/vp10/vp10_common.mk
@@ -63,6 +63,17 @@ VP10_COMMON_SRCS-yes += common/scan.h VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.h VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.c +VP10_COMMON_SRCS-yes += common/vp10_txfm.h +VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm1d.h +VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm1d.c +VP10_COMMON_SRCS-yes += common/vp10_inv_txfm1d.h +VP10_COMMON_SRCS-yes += common/vp10_inv_txfm1d.c +VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d.h +VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d.c +VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d_cfg.h +VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.h +VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.c +VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d_cfg.h VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.c
diff --git a/vp10/vp10_cx_iface.c b/vp10/vp10_cx_iface.c index 21c9c03..c90b936 100644 --- a/vp10/vp10_cx_iface.c +++ b/vp10/vp10_cx_iface.c
@@ -91,9 +91,6 @@ size_t pending_cx_data_sz; int pending_frame_count; size_t pending_frame_sizes[8]; -#if !CONFIG_MISC_FIXES - size_t pending_frame_magnitude; -#endif vpx_image_t preview_img; vpx_enc_frame_flags_t next_frame_flags; vp8_postproc_cfg_t preview_ppcfg; @@ -783,39 +780,30 @@ uint8_t marker = 0xc0; unsigned int mask; int mag, index_sz; -#if CONFIG_MISC_FIXES int i; size_t max_frame_sz = 0; -#endif assert(ctx->pending_frame_count); assert(ctx->pending_frame_count <= 8); // Add the number of frames to the marker byte marker |= ctx->pending_frame_count - 1; -#if CONFIG_MISC_FIXES for (i = 0; i < ctx->pending_frame_count - 1; i++) { const size_t frame_sz = (unsigned int) ctx->pending_frame_sizes[i] - 1; max_frame_sz = frame_sz > max_frame_sz ? frame_sz : max_frame_sz; } -#endif // Choose the magnitude for (mag = 0, mask = 0xff; mag < 4; mag++) { -#if CONFIG_MISC_FIXES if (max_frame_sz <= mask) break; -#else - if (ctx->pending_frame_magnitude < mask) - break; -#endif mask <<= 8; mask |= 0xff; } marker |= mag << 3; // Write the index - index_sz = 2 + (mag + 1) * (ctx->pending_frame_count - CONFIG_MISC_FIXES); + index_sz = 2 + (mag + 1) * (ctx->pending_frame_count - 1); if (ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz) { uint8_t *x = ctx->pending_cx_data + ctx->pending_cx_data_sz; int i, j; @@ -835,11 +823,11 @@ #endif *x++ = marker; - for (i = 0; i < ctx->pending_frame_count - CONFIG_MISC_FIXES; i++) { + for (i = 0; i < ctx->pending_frame_count - 1; i++) { unsigned int this_sz; assert(ctx->pending_frame_sizes[i] > 0); - this_sz = (unsigned int)ctx->pending_frame_sizes[i] - CONFIG_MISC_FIXES; + this_sz = (unsigned int)ctx->pending_frame_sizes[i] - 1; for (j = 0; j <= mag; j++) { *x++ = this_sz & 0xff; this_sz >>= 8; @@ -993,9 +981,6 @@ ctx->pending_cx_data = cx_data; ctx->pending_cx_data_sz += size; ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; -#if !CONFIG_MISC_FIXES - ctx->pending_frame_magnitude |= size; -#endif cx_data += size; cx_data_sz -= size; @@ -1012,9 +997,6 @@ ctx->pending_cx_data = NULL; ctx->pending_cx_data_sz = 0; ctx->pending_frame_count = 0; -#if !CONFIG_MISC_FIXES - ctx->pending_frame_magnitude = 0; -#endif ctx->output_cx_pkt_cb.output_cx_pkt( &pkt, ctx->output_cx_pkt_cb.user_priv); } @@ -1031,9 +1013,6 @@ if (ctx->pending_cx_data) { ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; -#if !CONFIG_MISC_FIXES - ctx->pending_frame_magnitude |= size; -#endif ctx->pending_cx_data_sz += size; // write the superframe only for the case when if (!ctx->output_cx_pkt_cb.output_cx_pkt) @@ -1043,9 +1022,6 @@ ctx->pending_cx_data = NULL; ctx->pending_cx_data_sz = 0; ctx->pending_frame_count = 0; -#if !CONFIG_MISC_FIXES - ctx->pending_frame_magnitude = 0; -#endif } else { pkt.data.frame.buf = cx_data; pkt.data.frame.sz = size;
diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk index dc3b271..7ae2fb2 100644 --- a/vp10/vp10cx.mk +++ b/vp10/vp10cx.mk
@@ -23,6 +23,8 @@ VP10_CX_SRCS-yes += encoder/cost.h VP10_CX_SRCS-yes += encoder/cost.c VP10_CX_SRCS-yes += encoder/dct.c +VP10_CX_SRCS-yes += encoder/hybrid_fwd_txfm.c +VP10_CX_SRCS-yes += encoder/hybrid_fwd_txfm.h VP10_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/denoiser.c VP10_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/denoiser.h VP10_CX_SRCS-yes += encoder/encodeframe.c @@ -51,6 +53,8 @@ VP10_CX_SRCS-yes += encoder/treewriter.h VP10_CX_SRCS-yes += encoder/mcomp.c VP10_CX_SRCS-yes += encoder/encoder.c +VP10_CX_SRCS-yes += encoder/palette.h +VP10_CX_SRCS-yes += encoder/palette.c VP10_CX_SRCS-yes += encoder/picklpf.c VP10_CX_SRCS-yes += encoder/picklpf.h VP10_CX_SRCS-yes += encoder/quantize.c
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 61eb591..41994dc 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h
@@ -98,8 +98,13 @@ return mbmi->ref_frame[0] > INTRA_FRAME; } +static INLINE int is_compound_ref(const MV_REFERENCE_FRAME *ref_frame) { + assert(ref_frame != NULL); + return ref_frame[1] > INTRA_FRAME; +} + static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) { - return mbmi->ref_frame[1] > INTRA_FRAME; + return is_compound_ref(mbmi->ref_frame); } PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi,
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c index 1f16325..e4c349c 100644 --- a/vp9/common/vp9_pred_common.c +++ b/vp9/common/vp9_pred_common.c
@@ -192,6 +192,7 @@ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; const int has_above = xd->up_available; const int has_left = xd->left_available; + // Note: // The mode info data structure has a one element border above and to the // left of the entries correpsonding to real macroblocks.
diff --git a/vp9/common/vp9_thread_common.c b/vp9/common/vp9_thread_common.c index db78d6b..033326d 100644 --- a/vp9/common/vp9_thread_common.c +++ b/vp9/common/vp9_thread_common.c
@@ -379,11 +379,11 @@ for (i = 0; i < REF_CONTEXTS; i++) for (j = 0; j < 2; j++) for (k = 0; k < 2; k++) - accum->single_ref[i][j][k] += counts->single_ref[i][j][k]; + accum->single_ref[i][j][k] += counts->single_ref[i][j][k]; for (i = 0; i < REF_CONTEXTS; i++) for (j = 0; j < 2; j++) - accum->comp_ref[i][j] += counts->comp_ref[i][j]; + accum->comp_ref[i][j] += counts->comp_ref[i][j]; for (i = 0; i < TX_SIZE_CONTEXTS; i++) { for (j = 0; j < TX_SIZES; j++)
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index f5da07e..ed49a69 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c
@@ -237,15 +237,16 @@ RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; lock_buffer_pool(pool); + for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { const int old_idx = cm->ref_frame_map[ref_index]; // Current thread releases the holding of reference frame. decrease_ref_count(old_idx, frame_bufs, pool); // Release the reference frame in reference map. - if (mask & 1) { + if (mask & 1) decrease_ref_count(old_idx, frame_bufs, pool); - } + cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; ++ref_index; } @@ -267,7 +268,7 @@ } // Invalidate these references until the next frame starts. - for (ref_index = 0; ref_index < 3; ref_index++) + for (ref_index = 0; ref_index < REFS_PER_FRAME; ref_index++) cm->frame_refs[ref_index].idx = -1; } @@ -325,7 +326,6 @@ pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; } - if (setjmp(cm->error.jmp)) { const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); int i; @@ -350,9 +350,8 @@ decrease_ref_count(old_idx, frame_bufs, pool); // Release the reference frame in reference map. - if (mask & 1) { + if (mask & 1) decrease_ref_count(old_idx, frame_bufs, pool); - } ++ref_index; }
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index 63db214..c85e4b0 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -20,7 +20,6 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_segmentation.h" - CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { size_t last_coded_q_map_size; size_t consec_zero_mv_size;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index c07eee9..7f94e19 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c
@@ -4072,7 +4072,7 @@ // either compound, single or hybrid prediction as per whatever has // worked best for that type of frame in the past. // It also predicts whether another coding mode would have worked - // better that this coding mode. If that is the case, it remembers + // better than this coding mode. If that is the case, it remembers // that for subsequent frames. // It does the same analysis for transform size selection also. const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index eda7743..21e57c4 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c
@@ -590,6 +590,12 @@ rd->thresh_mult[THR_NEARMV] += 1000; rd->thresh_mult[THR_NEARA] += 1000; + rd->thresh_mult[THR_NEARG] += 1000; + + rd->thresh_mult[THR_ZEROMV] += 2000; + rd->thresh_mult[THR_ZEROG] += 2000; + rd->thresh_mult[THR_ZEROA] += 2000; + rd->thresh_mult[THR_COMP_NEARESTLA] += 1000; rd->thresh_mult[THR_COMP_NEARESTGA] += 1000; @@ -597,13 +603,9 @@ rd->thresh_mult[THR_COMP_NEARLA] += 1500; rd->thresh_mult[THR_COMP_NEWLA] += 2000; - rd->thresh_mult[THR_NEARG] += 1000; rd->thresh_mult[THR_COMP_NEARGA] += 1500; rd->thresh_mult[THR_COMP_NEWGA] += 2000; - rd->thresh_mult[THR_ZEROMV] += 2000; - rd->thresh_mult[THR_ZEROG] += 2000; - rd->thresh_mult[THR_ZEROA] += 2000; rd->thresh_mult[THR_COMP_ZEROLA] += 2500; rd->thresh_mult[THR_COMP_ZEROGA] += 2500; @@ -618,9 +620,10 @@ } void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { - static const int thresh_mult[2][MAX_REFS] = - {{2500, 2500, 2500, 4500, 4500, 2500}, - {2000, 2000, 2000, 4000, 4000, 2000}}; + static const int thresh_mult[2][MAX_REFS] = { + {2500, 2500, 2500, 4500, 4500, 2500}, + {2000, 2000, 2000, 4000, 4000, 2000} + }; RD_OPT *const rd = &cpi->rd; const int idx = cpi->oxcf.mode == BEST; memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index b8d1720..973d8f5 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c
@@ -1559,8 +1559,8 @@ const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - const int refs[2] = {mbmi->ref_frame[0], - mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]}; + const int refs[2] = { mbmi->ref_frame[0], + mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; int_mv ref_mv[2]; int ite, ref; const InterpKernel *kernel = vp9_filter_kernels[mbmi->interp_filter]; @@ -2401,8 +2401,8 @@ const int this_mode = mbmi->mode; int_mv *frame_mv = mode_mv[this_mode]; int i; - int refs[2] = { mbmi->ref_frame[0], - (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; + const int refs[2] = { mbmi->ref_frame[0], + mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; int_mv cur_mv[2]; #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]); @@ -3135,10 +3135,14 @@ int this_skip2 = 0; int64_t total_sse = INT64_MAX; int early_term = 0; + const MV_REFERENCE_FRAME refs[2] = { + vp9_mode_order[mode_index].ref_frame[0], + vp9_mode_order[mode_index].ref_frame[1] + }; this_mode = vp9_mode_order[mode_index].mode; - ref_frame = vp9_mode_order[mode_index].ref_frame[0]; - second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; + ref_frame = refs[0]; + second_ref_frame = refs[1]; // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. @@ -3227,7 +3231,7 @@ continue; } - comp_pred = second_ref_frame > INTRA_FRAME; + comp_pred = is_compound_ref(refs); if (comp_pred) { if (!cpi->allow_comp_inter_inter) continue; @@ -3520,7 +3524,7 @@ if (best_mbmode.mode == NEWMV) { const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0], best_mbmode.ref_frame[1]}; - int comp_pred_mode = refs[1] > INTRA_FRAME; + int comp_pred_mode = is_compound_ref(refs); if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int && ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int == @@ -3814,9 +3818,13 @@ int64_t total_sse = INT_MAX; int early_term = 0; struct buf_2d backup_yv12[2][MAX_MB_PLANE]; + const MV_REFERENCE_FRAME refs[2] = { + vp9_ref_order[ref_index].ref_frame[0], + vp9_ref_order[ref_index].ref_frame[1] + }; - ref_frame = vp9_ref_order[ref_index].ref_frame[0]; - second_ref_frame = vp9_ref_order[ref_index].ref_frame[1]; + ref_frame = refs[0]; + second_ref_frame = refs[1]; #if CONFIG_BETTER_HW_COMPATIBILITY // forbid 8X4 and 4X8 partitions if any reference frame is scaled. @@ -3865,7 +3873,7 @@ tile_data->thresh_freq_fact[bsize][ref_index])) continue; - comp_pred = second_ref_frame > INTRA_FRAME; + comp_pred = is_compound_ref(refs); if (comp_pred) { if (!cpi->allow_comp_inter_inter) continue; @@ -3982,7 +3990,7 @@ rd_opt->threshes[segment_id][bsize][THR_LAST] : rd_opt->threshes[segment_id][bsize][THR_ALTR]; this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? - rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh; + rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; @@ -4004,6 +4012,7 @@ int64_t rs_rd; MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext; mbmi->interp_filter = switchable_filter_index; + tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, &mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd, &rate, @@ -4143,7 +4152,7 @@ // Estimate the reference frame signaling cost and add it // to the rolling cost variable. - if (second_ref_frame > INTRA_FRAME) { + if (is_compound_ref(mbmi->ref_frame)) { rate2 += ref_costs_comp[ref_frame]; } else { rate2 += ref_costs_single[ref_frame];
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index bd99c6d..7504c0e 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h
@@ -806,9 +806,12 @@ VPX_CTRL_USE_TYPE(VP9E_SET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *) #define VPX_CTRL_VP9E_SET_SVC_REF_FRAME_CONFIG -VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *) +/*!\brief + * + * TODO(rbultje) : add support of the control in ffmpeg + */ #define VPX_CTRL_VP9E_SET_RENDER_SIZE - +VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *) /*!\endcond */ /*! @} - end defgroup vp8_encoder */ #ifdef __cplusplus
diff --git a/vpx_dsp/bitreader_buffer.c b/vpx_dsp/bitreader_buffer.c index d7b55cf..595b9bb 100644 --- a/vpx_dsp/bitreader_buffer.c +++ b/vpx_dsp/bitreader_buffer.c
@@ -43,11 +43,7 @@ int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, int bits) { -#if CONFIG_MISC_FIXES const int nbits = sizeof(unsigned) * 8 - bits - 1; const unsigned value = (unsigned)vpx_rb_read_literal(rb, bits + 1) << nbits; return ((int) value) >> nbits; -#else - return vpx_rb_read_signed_literal(rb, bits); -#endif }
diff --git a/vpx_dsp/bitwriter_buffer.c b/vpx_dsp/bitwriter_buffer.c index 6182a72..8633372 100644 --- a/vpx_dsp/bitwriter_buffer.c +++ b/vpx_dsp/bitwriter_buffer.c
@@ -39,10 +39,5 @@ void vpx_wb_write_inv_signed_literal(struct vpx_write_bit_buffer *wb, int data, int bits) { -#if CONFIG_MISC_FIXES vpx_wb_write_literal(wb, data, bits + 1); -#else - vpx_wb_write_literal(wb, abs(data), bits); - vpx_wb_write_bit(wb, data < 0); -#endif }
diff --git a/vpx_dsp/intrapred.c b/vpx_dsp/intrapred.c index a9669e5..18bcd87 100644 --- a/vpx_dsp/intrapred.c +++ b/vpx_dsp/intrapred.c
@@ -832,11 +832,9 @@ intra_pred_no_4x4(d207) intra_pred_no_4x4(d63) intra_pred_no_4x4(d45) -#if CONFIG_MISC_FIXES intra_pred_allsizes(d207e) intra_pred_allsizes(d63e) intra_pred_no_4x4(d45e) -#endif intra_pred_no_4x4(d117) intra_pred_no_4x4(d135) intra_pred_no_4x4(d153)