Merge changes I92819356,I50b5a313,I807e60c6,I8a8df9fd into nextgenv2 * changes: Branch dct to new implementation for bd12 Change dct32x32's range Fit dct's stage range into 32-bit when bitdepth is 12 Pass tx_type into get_tx_scale
diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc index cd0dca2..777ac49 100644 --- a/test/error_resilience_test.cc +++ b/test/error_resilience_test.cc
@@ -164,6 +164,7 @@ mismatch_psnr_ += mismatch_psnr; ++mismatch_nframes_; // std::cout << "Mismatch frame psnr: " << mismatch_psnr << "\n"; + ASSERT_TRUE(0) << "Encode/Decode mismatch found"; } void SetErrorFrames(int num, unsigned int *list) {
diff --git a/test/subtract_test.cc b/test/subtract_test.cc index a3f0152..48edf1e 100644 --- a/test/subtract_test.cc +++ b/test/subtract_test.cc
@@ -15,12 +15,16 @@ #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" +#include "test/util.h" #if CONFIG_VP10 #include "vp10/common/blockd.h" #elif CONFIG_VP9 #include "vp9/common/vp9_blockd.h" #endif #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" + +#define USE_SPEED_TEST (0) typedef void (*SubtractFunc)(int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, @@ -108,4 +112,151 @@ INSTANTIATE_TEST_CASE_P(MSA, VP9SubtractBlockTest, ::testing::Values(vpx_subtract_block_msa)); #endif + +typedef void (*HBDSubtractFunc)(int rows, int cols, + int16_t *diff_ptr, ptrdiff_t diff_stride, + const uint8_t *src_ptr, ptrdiff_t src_stride, + const uint8_t *pred_ptr, ptrdiff_t pred_stride, + int bd); + +using ::std::tr1::get; +using ::std::tr1::make_tuple; +using ::std::tr1::tuple; + +// <width, height, bit_dpeth, subtract> +typedef tuple<int, int, int, HBDSubtractFunc> Params; + +#if CONFIG_VP9_HIGHBITDEPTH +class VP10HBDSubtractBlockTest : public ::testing::TestWithParam<Params> { + public: + virtual void SetUp() { + block_width_ = GET_PARAM(0); + block_height_ = GET_PARAM(1); + bit_depth_ = static_cast<vpx_bit_depth_t>(GET_PARAM(2)); + func_ = GET_PARAM(3); + + rnd_.Reset(ACMRandom::DeterministicSeed()); + + const size_t max_width = 128; + const size_t max_block_size = max_width * max_width; + src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>( + vpx_memalign(16, max_block_size * sizeof(uint16_t)))); + pred_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>( + vpx_memalign(16, max_block_size * sizeof(uint16_t)))); + diff_ = reinterpret_cast<int16_t *>( + vpx_memalign(16, max_block_size * sizeof(int16_t))); + } + + virtual void TearDown() { + vpx_free(CONVERT_TO_SHORTPTR(src_)); + vpx_free(CONVERT_TO_SHORTPTR(pred_)); + vpx_free(diff_); + } + + protected: + void RunForSpeed(); + void CheckResult(); + + private: + ACMRandom rnd_; + int block_height_; + int block_width_; + vpx_bit_depth_t bit_depth_; + HBDSubtractFunc func_; + uint8_t *src_; + uint8_t *pred_; + int16_t *diff_; +}; + +void VP10HBDSubtractBlockTest::RunForSpeed() { + const int test_num = 200000; + const int max_width = 128; + const int max_block_size = max_width * max_width; + const int mask = (1 << bit_depth_) - 1; + int i, j; + + for (j = 0; j < max_block_size; ++j) { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask; + CONVERT_TO_SHORTPTR(pred_)[j] = rnd_.Rand16() & mask; + } + + for (i = 0; i < test_num; ++i) { + func_(block_height_, block_width_, diff_, block_width_, + src_, block_width_, pred_, block_width_, bit_depth_); + } +} + +void VP10HBDSubtractBlockTest::CheckResult() { + const int test_num = 100; + const int max_width = 128; + const int max_block_size = max_width * max_width; + const int mask = (1 << bit_depth_) - 1; + int i, j; + + for (i = 0; i < test_num; ++i) { + for (j = 0; j < max_block_size; ++j) { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask; + CONVERT_TO_SHORTPTR(pred_)[j] = rnd_.Rand16() & mask; + } + + func_(block_height_, block_width_, diff_, block_width_, + src_, block_width_, pred_, block_width_, bit_depth_); + + for (int r = 0; r < block_height_; ++r) { + for (int c = 0; c < block_width_; ++c) { + EXPECT_EQ(diff_[r * block_width_ + c], + (CONVERT_TO_SHORTPTR(src_)[r * block_width_ + c] - + CONVERT_TO_SHORTPTR(pred_)[r * block_width_ + c])) + << "r = " << r << ", c = " << c << ", test: " << i; + } + } + } +} + +TEST_P(VP10HBDSubtractBlockTest, CheckResult) { + CheckResult(); +} + +#if USE_SPEED_TEST +TEST_P(VP10HBDSubtractBlockTest, CheckSpeed) { + RunForSpeed(); +} +#endif // USE_SPEED_TEST + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, VP10HBDSubtractBlockTest, ::testing::Values( + make_tuple(4, 4, 12, vpx_highbd_subtract_block_sse2), + make_tuple(4, 4, 12, vpx_highbd_subtract_block_c), + make_tuple(4, 8, 12, vpx_highbd_subtract_block_sse2), + make_tuple(4, 8, 12, vpx_highbd_subtract_block_c), + make_tuple(8, 4, 12, vpx_highbd_subtract_block_sse2), + make_tuple(8, 4, 12, vpx_highbd_subtract_block_c), + make_tuple(8, 8, 12, vpx_highbd_subtract_block_sse2), + make_tuple(8, 8, 12, vpx_highbd_subtract_block_c), + make_tuple(8, 16, 12, vpx_highbd_subtract_block_sse2), + make_tuple(8, 16, 12, vpx_highbd_subtract_block_c), + make_tuple(16, 8, 12, vpx_highbd_subtract_block_sse2), + make_tuple(16, 8, 12, vpx_highbd_subtract_block_c), + make_tuple(16, 16, 12, vpx_highbd_subtract_block_sse2), + make_tuple(16, 16, 12, vpx_highbd_subtract_block_c), + make_tuple(16, 32, 12, vpx_highbd_subtract_block_sse2), + make_tuple(16, 32, 12, vpx_highbd_subtract_block_c), + make_tuple(32, 16, 12, vpx_highbd_subtract_block_sse2), + make_tuple(32, 16, 12, vpx_highbd_subtract_block_c), + make_tuple(32, 32, 12, vpx_highbd_subtract_block_sse2), + make_tuple(32, 32, 12, vpx_highbd_subtract_block_c), + make_tuple(32, 64, 12, vpx_highbd_subtract_block_sse2), + make_tuple(32, 64, 12, vpx_highbd_subtract_block_c), + make_tuple(64, 32, 12, vpx_highbd_subtract_block_sse2), + make_tuple(64, 32, 12, vpx_highbd_subtract_block_c), + make_tuple(64, 64, 12, vpx_highbd_subtract_block_sse2), + make_tuple(64, 64, 12, vpx_highbd_subtract_block_c), + make_tuple(64, 128, 12, vpx_highbd_subtract_block_sse2), + make_tuple(64, 128, 12, vpx_highbd_subtract_block_c), + make_tuple(128, 64, 12, vpx_highbd_subtract_block_sse2), + make_tuple(128, 64, 12, vpx_highbd_subtract_block_c), + make_tuple(128, 128, 12, vpx_highbd_subtract_block_sse2), + make_tuple(128, 128, 12, vpx_highbd_subtract_block_c))); +#endif // HAVE_SSE2 +#endif // CONFIG_VP9_HIGHBITDEPTH } // namespace
diff --git a/test/variance_test.cc b/test/variance_test.cc index 97c5516..79f4e10 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc
@@ -74,6 +74,10 @@ return res; } +/* Note: + * Our codebase calculates the "diff" value in the variance algorithm by + * (src - ref). + */ static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w, int l2h, int src_stride_coeff, int ref_stride_coeff, uint32_t *sse_ptr, @@ -87,14 +91,14 @@ for (int x = 0; x < w; x++) { int diff; if (!use_high_bit_depth_) { - diff = ref[w * y * ref_stride_coeff + x] - - src[w * y * src_stride_coeff + x]; + diff = src[w * y * src_stride_coeff + x] - + ref[w * y * ref_stride_coeff + x]; se += diff; sse += diff * diff; #if CONFIG_VP9_HIGHBITDEPTH } else { - diff = CONVERT_TO_SHORTPTR(ref)[w * y * ref_stride_coeff + x] - - CONVERT_TO_SHORTPTR(src)[w * y * src_stride_coeff + x]; + diff = CONVERT_TO_SHORTPTR(src)[w * y * src_stride_coeff + x] - + CONVERT_TO_SHORTPTR(ref)[w * y * ref_stride_coeff + x]; se += diff; sse += diff * diff; #endif // CONFIG_VP9_HIGHBITDEPTH @@ -309,15 +313,15 @@ void VarianceTest<VarianceFunctionType>::RefTest() { for (int i = 0; i < 10; ++i) { for (int j = 0; j < block_size_; j++) { - if (!use_high_bit_depth_) { - src_[j] = rnd_.Rand8(); - ref_[j] = rnd_.Rand8(); + if (!use_high_bit_depth_) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); #if CONFIG_VP9_HIGHBITDEPTH - } else { - CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() && mask_; - CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() && mask_; + } else { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask_; + CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask_; #endif // CONFIG_VP9_HIGHBITDEPTH - } + } } unsigned int sse1, sse2; unsigned int var1; @@ -328,8 +332,10 @@ log2height_, stride_coeff, stride_coeff, &sse2, use_high_bit_depth_, bit_depth_); - EXPECT_EQ(sse1, sse2); - EXPECT_EQ(var1, var2); + EXPECT_EQ(sse1, sse2) + << "Error at test index: " << i; + EXPECT_EQ(var1, var2) + << "Error at test index: " << i; } } @@ -346,8 +352,8 @@ ref_[ref_ind] = rnd_.Rand8(); #if CONFIG_VP9_HIGHBITDEPTH } else { - CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() && mask_; - CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() && mask_; + CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask_; + CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask_; #endif // CONFIG_VP9_HIGHBITDEPTH } } @@ -361,8 +367,10 @@ log2height_, src_stride_coeff, ref_stride_coeff, &sse2, use_high_bit_depth_, bit_depth_); - EXPECT_EQ(sse1, sse2); - EXPECT_EQ(var1, var2); + EXPECT_EQ(sse1, sse2) + << "Error at test index: " << i; + EXPECT_EQ(var1, var2) + << "Error at test index: " << i; } } @@ -915,6 +923,15 @@ make_tuple(2, 3, &vpx_highbd_8_variance4x8_c, 8), make_tuple(2, 2, &vpx_highbd_8_variance4x4_c, 8))); +#if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSE4_1, VpxHBDVarianceTest, + ::testing::Values( + make_tuple(2, 2, &vpx_highbd_8_variance4x4_sse4_1, 8), + make_tuple(2, 2, &vpx_highbd_10_variance4x4_sse4_1, 10), + make_tuple(2, 2, &vpx_highbd_12_variance4x4_sse4_1, 12))); +#endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH + INSTANTIATE_TEST_CASE_P( C, VpxHBDSubpelVarianceTest, ::testing::Values( @@ -1117,6 +1134,22 @@ make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_sse, 0))); #endif // CONFIG_USE_X86INC +#if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSE4_1, VpxSubpelVarianceTest, + ::testing::Values( + make_tuple(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_sse4_1, 8), + make_tuple(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_sse4_1, 10), + make_tuple(2, 2, &vpx_highbd_12_sub_pixel_variance4x4_sse4_1, 12))); + +INSTANTIATE_TEST_CASE_P( + SSE4_1, VpxSubpelAvgVarianceTest, + ::testing::Values( + make_tuple(2, 2, &vpx_highbd_8_sub_pixel_avg_variance4x4_sse4_1, 8), + make_tuple(2, 2, &vpx_highbd_10_sub_pixel_avg_variance4x4_sse4_1, 10), + make_tuple(2, 2, &vpx_highbd_12_sub_pixel_avg_variance4x4_sse4_1, 12))); +#endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH + #if CONFIG_VP9_HIGHBITDEPTH /* TODO(debargha): This test does not support the highbd version INSTANTIATE_TEST_CASE_P(
diff --git a/test/vp10_ans_test.cc b/test/vp10_ans_test.cc index 0e75157..9c93dd8 100644 --- a/test/vp10_ans_test.cc +++ b/test/vp10_ans_test.cc
@@ -218,14 +218,16 @@ tree[2 * i - 1] = sym; } -// treep are the probabilites of tree nodes like: -// * -// / \ -// -sym0 * -// / \ -// -sym1 * -// / \ -// -sym2 -sym3 +/* The treep array contains the probabilities of nodes of a tree structured + * like: + * * + * / \ + * -sym0 * + * / \ + * -sym1 * + * / \ + * -sym2 -sym3 + */ void tab2tree(const rans_sym *tab, int tab_size, vpx_prob *treep) { const unsigned basep = 256; unsigned pleft = basep;
diff --git a/test/vp10_fwd_txfm1d_test.cc b/test/vp10_fwd_txfm1d_test.cc index 2d09e0d..d6643e5 100644 --- a/test/vp10_fwd_txfm1d_test.cc +++ b/test/vp10_fwd_txfm1d_test.cc
@@ -12,23 +12,28 @@ #include "test/vp10_txfm_test.h" using libvpx_test::ACMRandom; +using libvpx_test::base; +using libvpx_test::reference_hybrid_1d; +using libvpx_test::TYPE_TXFM; +using libvpx_test::TYPE_DCT; +using libvpx_test::TYPE_ADST; namespace { -static int txfm_type_num = 2; -static TYPE_TXFM txfm_type_ls[2] = {TYPE_DCT, TYPE_ADST}; +const int txfm_type_num = 2; +const TYPE_TXFM txfm_type_ls[2] = {TYPE_DCT, TYPE_ADST}; -static int txfm_size_num = 5; -static int txfm_size_ls[5] = {4, 8, 16, 32, 64}; +const int txfm_size_num = 5; +const int txfm_size_ls[5] = {4, 8, 16, 32, 64}; -static TxfmFunc fwd_txfm_func_ls[2][5] = { +const TxfmFunc fwd_txfm_func_ls[2][5] = { {vp10_fdct4_new, vp10_fdct8_new, vp10_fdct16_new, vp10_fdct32_new, vp10_fdct64_new}, {vp10_fadst4_new, vp10_fadst8_new, vp10_fadst16_new, vp10_fadst32_new, NULL}}; // the maximum stage number of fwd/inv 1d dct/adst txfm is 12 -static int8_t cos_bit[12] = {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14}; -static int8_t range_bit[12] = {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32}; +const int8_t cos_bit[12] = {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14}; +const int8_t range_bit[12] = {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32}; TEST(vp10_fwd_txfm1d, round_shift) { EXPECT_EQ(round_shift(7, 1), 4);
diff --git a/test/vp10_fwd_txfm2d_sse4_test.cc b/test/vp10_fwd_txfm2d_sse4_test.cc index d3882cd..ab9450b 100644 --- a/test/vp10_fwd_txfm2d_sse4_test.cc +++ b/test/vp10_fwd_txfm2d_sse4_test.cc
@@ -8,6 +8,9 @@ #include "vp10/common/vp10_fwd_txfm2d_cfg.h" using libvpx_test::ACMRandom; +using libvpx_test::Fwd_Txfm2d_Func; +using libvpx_test::base; +using libvpx_test::bd; namespace { @@ -58,8 +61,8 @@ } } - txfm2d_func_c(input, output_c, cfg.txfm_size, &cfg, 10); - txfm2d_func_sse4_1(input, output_sse4_1, cfg.txfm_size, &cfg, 10); + txfm2d_func_c(input, output_c, cfg.txfm_size, &cfg, bd); + txfm2d_func_sse4_1(input, output_sse4_1, cfg.txfm_size, &cfg, bd); for (int r = 0; r < txfm_size; r++) { for (int c = 0; c < txfm_size; c++) { EXPECT_EQ(output_c[r * txfm_size + c],
diff --git a/test/vp10_fwd_txfm2d_test.cc b/test/vp10_fwd_txfm2d_test.cc index 137f653..668103b 100644 --- a/test/vp10_fwd_txfm2d_test.cc +++ b/test/vp10_fwd_txfm2d_test.cc
@@ -18,6 +18,13 @@ #include "./vp10_rtcd.h" using libvpx_test::ACMRandom; +using libvpx_test::base; +using libvpx_test::bd; +using libvpx_test::compute_avg_abs_error; +using libvpx_test::Fwd_Txfm2d_Func; +using libvpx_test::TYPE_TXFM; +using libvpx_test::TYPE_DCT; +using libvpx_test::TYPE_ADST; namespace { @@ -36,8 +43,8 @@ {&fwd_txfm_2d_cfg_dct_dct_64, NULL, NULL, NULL}}; const Fwd_Txfm2d_Func fwd_txfm_func_ls[5] = { - vp10_fwd_txfm2d_4x4, vp10_fwd_txfm2d_8x8, vp10_fwd_txfm2d_16x16, - vp10_fwd_txfm2d_32x32, vp10_fwd_txfm2d_64x64}; + vp10_fwd_txfm2d_4x4_c, vp10_fwd_txfm2d_8x8_c, vp10_fwd_txfm2d_16x16_c, + vp10_fwd_txfm2d_32x32_c, vp10_fwd_txfm2d_64x64_c}; const int txfm_type_num = 4; const TYPE_TXFM type_ls_0[4] = {TYPE_DCT, TYPE_DCT, TYPE_ADST, TYPE_ADST}; @@ -106,4 +113,4 @@ } #endif // CONFIG_VP9_HIGHBITDEPTH -} // anonymous namespace +} // namespace
diff --git a/test/vp10_inv_txfm1d_test.cc b/test/vp10_inv_txfm1d_test.cc index 2e9e58d..98b2777 100644 --- a/test/vp10_inv_txfm1d_test.cc +++ b/test/vp10_inv_txfm1d_test.cc
@@ -13,27 +13,28 @@ #include "vp10/common/vp10_inv_txfm1d.h" using libvpx_test::ACMRandom; +using libvpx_test::base; namespace { -static int txfm_type_num = 2; -static int txfm_size_num = 5; -static int txfm_size_ls[5] = {4, 8, 16, 32, 64}; +const int txfm_type_num = 2; +const int txfm_size_num = 5; +const int txfm_size_ls[5] = {4, 8, 16, 32, 64}; -static TxfmFunc fwd_txfm_func_ls[2][5] = { +const TxfmFunc fwd_txfm_func_ls[2][5] = { {vp10_fdct4_new, vp10_fdct8_new, vp10_fdct16_new, vp10_fdct32_new, vp10_fdct64_new}, {vp10_fadst4_new, vp10_fadst8_new, vp10_fadst16_new, vp10_fadst32_new, NULL}}; -static TxfmFunc inv_txfm_func_ls[2][5] = { +const TxfmFunc inv_txfm_func_ls[2][5] = { {vp10_idct4_new, vp10_idct8_new, vp10_idct16_new, vp10_idct32_new, vp10_idct64_new}, {vp10_iadst4_new, vp10_iadst8_new, vp10_iadst16_new, vp10_iadst32_new, NULL}}; // the maximum stage number of fwd/inv 1d dct/adst txfm is 12 -static int8_t cos_bit[12] = {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14}; -static int8_t range_bit[12] = {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32}; +const int8_t cos_bit[12] = {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14}; +const int8_t range_bit[12] = {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32}; TEST(vp10_inv_txfm1d, round_trip) { ACMRandom rnd(ACMRandom::DeterministicSeed());
diff --git a/test/vp10_inv_txfm2d_test.cc b/test/vp10_inv_txfm2d_test.cc index 9257244..7acb329 100644 --- a/test/vp10_inv_txfm2d_test.cc +++ b/test/vp10_inv_txfm2d_test.cc
@@ -19,6 +19,11 @@ #include "vp10/common/vp10_inv_txfm2d_cfg.h" using libvpx_test::ACMRandom; +using libvpx_test::base; +using libvpx_test::bd; +using libvpx_test::compute_avg_abs_error; +using libvpx_test::Fwd_Txfm2d_Func; +using libvpx_test::Inv_Txfm2d_Func; namespace { @@ -116,4 +121,4 @@ } #endif // CONFIG_VP9_HIGHBITDEPTH -} // anonymous namespace +} // namespace
diff --git a/test/vp10_txfm_test.h b/test/vp10_txfm_test.h index a3a4258..c5bbb48 100644 --- a/test/vp10_txfm_test.h +++ b/test/vp10_txfm_test.h
@@ -23,6 +23,7 @@ #include "test/acm_random.h" #include "vp10/common/vp10_txfm.h" +namespace libvpx_test { typedef enum { TYPE_DCT = 0, TYPE_ADST, @@ -109,5 +110,5 @@ static const int bd = 10; static const int base = (1 << bd); - +} // namespace libvpx_test #endif // VP10_TXFM_TEST_H_
diff --git a/vp10/common/alloccommon.c b/vp10/common/alloccommon.c index dd58e6d..abdc72b 100644 --- a/vp10/common/alloccommon.c +++ b/vp10/common/alloccommon.c
@@ -134,7 +134,8 @@ // TODO(geza.lore): These are bigger than they need to be. // cm->tile_width would be enough but it complicates indexing a // little elsewhere. - const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int aligned_mi_cols = + ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2); int i; for (i = 0 ; i < MAX_MB_PLANE ; i++) {
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index 821d67c..4ed7f81 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h
@@ -70,6 +70,16 @@ return WEDGE_BITS_BIG; } +static INLINE int is_interinter_wedge_used(BLOCK_SIZE sb_type) { + (void) sb_type; + return get_wedge_bits(sb_type) > 0; +} + +static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) { + (void) sb_type; + return 0; // get_wedge_bits(sb_type) > 0; +} + static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) { return mode >= NEARESTMV && mode <= NEWFROMNEARMV; } @@ -166,7 +176,7 @@ #if CONFIG_VAR_TX // TODO(jingning): This effectively assigned a separate entry for each // 8x8 block. Apparently it takes much more space than needed. - TX_SIZE inter_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE]; + TX_SIZE inter_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE]; #endif int8_t skip; int8_t has_no_coeffs; @@ -190,8 +200,8 @@ #endif // CONFIG_EXT_INTRA #if CONFIG_EXT_INTER - PREDICTION_MODE interintra_mode; - PREDICTION_MODE interintra_uv_mode; + INTERINTRA_MODE interintra_mode; + INTERINTRA_MODE interintra_uv_mode; // TODO(debargha): Consolidate these flags int use_wedge_interintra; int interintra_wedge_index; @@ -315,15 +325,15 @@ const YV12_BUFFER_CONFIG *cur_buf; ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; - ENTROPY_CONTEXT left_context[MAX_MB_PLANE][2 * MI_BLOCK_SIZE]; + ENTROPY_CONTEXT left_context[MAX_MB_PLANE][2 * MAX_MIB_SIZE]; PARTITION_CONTEXT *above_seg_context; - PARTITION_CONTEXT left_seg_context[MI_BLOCK_SIZE]; + PARTITION_CONTEXT left_seg_context[MAX_MIB_SIZE]; #if CONFIG_VAR_TX TXFM_CONTEXT *above_txfm_context; TXFM_CONTEXT *left_txfm_context; - TXFM_CONTEXT left_txfm_context_buffer[MI_BLOCK_SIZE]; + TXFM_CONTEXT left_txfm_context_buffer[MAX_MIB_SIZE]; TX_SIZE max_tx_size; #if CONFIG_SUPERTX @@ -353,40 +363,12 @@ static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, PARTITION_TYPE partition) { - return subsize_lookup[partition][bsize]; + if (partition == PARTITION_INVALID) + return PARTITION_INVALID; + else + return subsize_lookup[partition][bsize]; } -#if CONFIG_EXT_PARTITION_TYPES -static INLINE PARTITION_TYPE get_partition(const MODE_INFO *const mi, - int mi_stride, int mi_rows, - int mi_cols, int mi_row, - int mi_col, BLOCK_SIZE bsize) { - const int bsl = b_width_log2_lookup[bsize]; - const int bs = (1 << bsl) / 4; - MODE_INFO m = mi[mi_row * mi_stride + mi_col]; - PARTITION_TYPE partition = partition_lookup[bsl][m.mbmi.sb_type]; - if (partition != PARTITION_NONE && bsize > BLOCK_8X8 && - mi_row + bs < mi_rows && mi_col + bs < mi_cols) { - BLOCK_SIZE h = get_subsize(bsize, PARTITION_HORZ_A); - BLOCK_SIZE v = get_subsize(bsize, PARTITION_VERT_A); - MODE_INFO m_right = mi[mi_row * mi_stride + mi_col + bs]; - MODE_INFO m_below = mi[(mi_row + bs) * mi_stride + mi_col]; - if (m.mbmi.sb_type == h) { - return m_below.mbmi.sb_type == h ? PARTITION_HORZ : PARTITION_HORZ_B; - } else if (m.mbmi.sb_type == v) { - return m_right.mbmi.sb_type == v ? PARTITION_VERT : PARTITION_VERT_B; - } else if (m_below.mbmi.sb_type == h) { - return PARTITION_HORZ_A; - } else if (m_right.mbmi.sb_type == v) { - return PARTITION_VERT_A; - } else { - return PARTITION_SPLIT; - } - } - return partition; -} -#endif // CONFIG_EXT_PARTITION_TYPES - static const TX_TYPE intra_mode_to_tx_type_context[INTRA_MODES] = { DCT_DCT, // DC ADST_DCT, // V @@ -409,15 +391,16 @@ #endif // CONFIG_SUPERTX #if CONFIG_EXT_TX -#define ALLOW_INTRA_EXT_TX 1 +#define ALLOW_INTRA_EXT_TX 1 // whether masked transforms are used for 32X32 -#define USE_MSKTX_FOR_32X32 0 +#define USE_MSKTX_FOR_32X32 0 +#define USE_REDUCED_TXSET_FOR_16X16 1 static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = { 1, 16, 12, 2 }; static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = { - 1, 12, 10 + 1, 12, 5 }; #if EXT_TX_SIZES == 4 @@ -426,7 +409,11 @@ if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0; if (tx_size == TX_32X32) return is_inter ? 3 - 2 * USE_MSKTX_FOR_32X32 : 0; - return ((is_inter || tx_size < TX_16X16) ? 1 : 2); +#if USE_REDUCED_TXSET_FOR_16X16 + return (tx_size == TX_16X16 ? 2 : 1); +#else + return (tx_size == TX_16X16 && !is_inter ? 2 : 1); +#endif // USE_REDUCED_TXSET_FOR_16X16 } static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][TX_SIZES] = { @@ -462,7 +449,7 @@ { 0, 0, 0, 0, }, // unused { 1, 1, 0, 0, }, { 0, 0, 1, 0, }, - { 0, 0, 0, 0, }, + { 0, 0, 0, 1, }, }; #endif // EXT_TX_SIZES == 4 @@ -470,14 +457,14 @@ static const int ext_tx_used_intra[EXT_TX_SETS_INTRA][TX_TYPES] = { {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0}, + {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, }; // Transform types used in each inter set static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = { {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0}, {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, }; @@ -701,6 +688,16 @@ && is_interintra_allowed_ref(mbmi->ref_frame); } +static INLINE int is_interintra_allowed_bsize_group(const int group) { + int i; + for (i = 0; i < BLOCK_SIZES; i++) { + if (size_group_lookup[i] == group && + is_interintra_allowed_bsize(i)) + return 1; + } + return 0; +} + static INLINE int is_interintra_pred(const MB_MODE_INFO *mbmi) { return (mbmi->ref_frame[1] == INTRA_FRAME) && is_interintra_allowed(mbmi); }
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index 29d5419..f1c8e30 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c
@@ -268,24 +268,29 @@ {25, 29, 50, 192, 192, 128, 180, 180}, // 6 = two intra neighbours }; -static const vpx_prob default_interintra_prob[BLOCK_SIZES] = { - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, -#if CONFIG_EXT_PARTITION - 192, 192, 192 -#endif // CONFIG_EXT_PARTITION +static const vpx_prob default_interintra_prob[BLOCK_SIZE_GROUPS] = { + 208, 208, 208, 208, +}; + +static const vpx_prob + default_interintra_mode_prob[BLOCK_SIZE_GROUPS][INTERINTRA_MODES - 1] = { + { 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8 + { 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16 + { 173, 80, 19, 176, 240, 193, 64, 35, 46 }, // block_size < 32x32 + { 221, 135, 38, 194, 248, 121, 96, 85, 29 } // block_size >= 32x32 }; static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = { - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, #if CONFIG_EXT_PARTITION - 192, 192, 192 + 208, 208, 208 #endif // CONFIG_EXT_PARTITION }; static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = { - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, #if CONFIG_EXT_PARTITION - 192, 192, 192 + 208, 208, 208 #endif // CONFIG_EXT_PARTITION }; #endif // CONFIG_EXT_INTER @@ -310,7 +315,7 @@ -D135_PRED, -D117_PRED, /* 5 = D135_NODE */ -D45_PRED, 14, /* 6 = D45_NODE */ -D63_PRED, 16, /* 7 = D63_NODE */ - -D153_PRED, -D207_PRED /* 8 = D153_NODE */ + -D153_PRED, -D207_PRED /* 8 = D153_NODE */ }; const vpx_tree_index vp10_inter_mode_tree[TREE_SIZE(INTER_MODES)] = { @@ -325,6 +330,18 @@ }; #if CONFIG_EXT_INTER +const vpx_tree_index vp10_interintra_mode_tree[TREE_SIZE(INTERINTRA_MODES)] = { + -II_DC_PRED, 2, /* 0 = II_DC_NODE */ + -II_TM_PRED, 4, /* 1 = II_TM_NODE */ + -II_V_PRED, 6, /* 2 = II_V_NODE */ + 8, 12, /* 3 = II_COM_NODE */ + -II_H_PRED, 10, /* 4 = II_H_NODE */ + -II_D135_PRED, -II_D117_PRED, /* 5 = II_D135_NODE */ + -II_D45_PRED, 14, /* 6 = II_D45_NODE */ + -II_D63_PRED, 16, /* 7 = II_D63_NODE */ + -II_D153_PRED, -II_D207_PRED /* 8 = II_D153_NODE */ +}; + const vpx_tree_index vp10_inter_compound_mode_tree [TREE_SIZE(INTER_COMPOUND_MODES)] = { -INTER_COMPOUND_OFFSET(ZERO_ZEROMV), 2, @@ -965,17 +982,12 @@ -FLIPADST_DCT, -DCT_FLIPADST, 18, 20, -ADST_ADST, -FLIPADST_FLIPADST, - -ADST_FLIPADST, -FLIPADST_ADST + -ADST_FLIPADST, -FLIPADST_ADST, }, { -IDTX, 2, -DCT_DCT, 4, - 6, 12, - 8, 10, + -ADST_ADST, 6, -ADST_DCT, -DCT_ADST, - -FLIPADST_DCT, -DCT_FLIPADST, - 14, 16, - -ADST_ADST, -FLIPADST_FLIPADST, - -ADST_FLIPADST, -FLIPADST_ADST } }; @@ -1077,50 +1089,50 @@ }, }, { { - { 8, 176, 128, 128, 128, 128, 128, 128, 128, }, - { 10, 28, 176, 192, 208, 128, 128, 128, 128, }, - { 10, 28, 176, 192, 48, 128, 128, 128, 128, }, - { 9, 160, 128, 128, 128, 128, 128, 128, 128, }, - { 8, 28, 96, 128, 128, 128, 160, 192, 128, }, - { 7, 28, 160, 176, 192, 128, 128, 128, 128, }, - { 7, 20, 160, 176, 64, 128, 128, 128, 128, }, - { 10, 23, 160, 176, 64, 128, 128, 128, 128, }, - { 8, 29, 160, 176, 192, 128, 128, 128, 128, }, - { 3, 20, 96, 128, 128, 128, 160, 192, 128, }, + { 8, 224, 64, 128, }, + { 10, 32, 16, 192, }, + { 10, 32, 16, 64, }, + { 9, 200, 64, 128, }, + { 8, 8, 224, 128, }, + { 10, 32, 16, 192, }, + { 10, 32, 16, 64, }, + { 10, 23, 80, 176, }, + { 10, 23, 80, 176, }, + { 10, 32, 16, 64, }, }, { - { 2, 176, 128, 128, 128, 128, 128, 128, 128, }, - { 4, 28, 176, 192, 208, 128, 128, 128, 128, }, - { 4, 28, 176, 192, 48, 128, 128, 128, 128, }, - { 8, 160, 128, 128, 128, 128, 128, 128, 128, }, - { 2, 28, 96, 128, 128, 128, 160, 192, 128, }, - { 3, 28, 160, 176, 192, 128, 128, 128, 128, }, - { 3, 26, 160, 176, 64, 128, 128, 128, 128, }, - { 9, 24, 160, 176, 64, 128, 128, 128, 128, }, - { 5, 24, 160, 176, 192, 128, 128, 128, 128, }, - { 2, 25, 96, 128, 128, 128, 160, 192, 128, }, + { 8, 224, 64, 128, }, + { 10, 32, 16, 192, }, + { 10, 32, 16, 64, }, + { 9, 200, 64, 128, }, + { 8, 8, 224, 128, }, + { 10, 32, 16, 192, }, + { 10, 32, 16, 64, }, + { 10, 23, 80, 176, }, + { 10, 23, 80, 176, }, + { 10, 32, 16, 64, }, }, { - { 2, 176, 128, 128, 128, 128, 128, 128, 128, }, - { 1, 28, 176, 192, 208, 128, 128, 128, 128, }, - { 1, 28, 176, 192, 48, 128, 128, 128, 128, }, - { 4, 160, 128, 128, 128, 128, 128, 128, 128, }, - { 2, 28, 96, 128, 128, 128, 160, 192, 128, }, - { 2, 28, 160, 176, 192, 128, 128, 128, 128, }, - { 3, 29, 160, 176, 64, 128, 128, 128, 128, }, - { 4, 27, 160, 176, 64, 128, 128, 128, 128, }, - { 2, 34, 160, 176, 192, 128, 128, 128, 128, }, - { 1, 25, 96, 128, 128, 128, 160, 192, 128, }, + { 8, 224, 64, 128, }, + { 10, 32, 16, 192, }, + { 10, 32, 16, 64, }, + { 9, 200, 64, 128, }, + { 8, 8, 224, 128, }, + { 10, 32, 16, 192, }, + { 10, 32, 16, 64, }, + { 10, 23, 80, 176, }, + { 10, 23, 80, 176, }, + { 10, 32, 16, 64, }, #if EXT_TX_SIZES == 4 }, { - { 2, 176, 128, 128, 128, 128, 128, 128, 128, }, - { 1, 12, 160, 176, 192, 128, 128, 128, 128, }, - { 1, 17, 160, 176, 64, 128, 128, 128, 128, }, - { 4, 41, 128, 128, 128, 128, 128, 128, 128, }, - { 2, 17, 96, 128, 128, 128, 160, 192, 128, }, - { 2, 14, 160, 176, 192, 128, 128, 128, 128, }, - { 3, 19, 160, 176, 64, 128, 128, 128, 128, }, - { 4, 27, 160, 176, 64, 128, 128, 128, 128, }, - { 2, 34, 160, 176, 192, 128, 128, 128, 128, }, - { 1, 15, 96, 128, 128, 128, 160, 192, 128, }, + { 8, 224, 64, 128, }, + { 10, 32, 16, 192, }, + { 10, 32, 16, 64, }, + { 9, 200, 64, 128, }, + { 8, 8, 224, 128, }, + { 10, 32, 16, 192, }, + { 10, 32, 16, 64, }, + { 10, 23, 80, 176, }, + { 10, 23, 80, 176, }, + { 10, 32, 16, 64, }, #endif }, }, @@ -1152,11 +1164,11 @@ #if CONFIG_EXT_INTRA static const vpx_prob default_intra_filter_probs[INTRA_FILTERS + 1][INTRA_FILTERS - 1] = { - { 98, 63, 60, }, - { 98, 82, 80, }, - { 94, 65, 103, }, - { 49, 25, 24, }, - { 72, 38, 50, }, + { 98, 63, 60, }, + { 98, 82, 80, }, + { 94, 65, 103, }, + { 49, 25, 24, }, + { 72, 38, 50, }, }; static const vpx_prob default_ext_intra_probs[2] = {230, 230}; @@ -1211,6 +1223,7 @@ #if CONFIG_EXT_INTER vp10_copy(fc->inter_compound_mode_probs, default_inter_compound_mode_probs); vp10_copy(fc->interintra_prob, default_interintra_prob); + vp10_copy(fc->interintra_mode_prob, default_interintra_mode_prob); vp10_copy(fc->wedge_interintra_prob, default_wedge_interintra_prob); vp10_copy(fc->wedge_interinter_prob, default_wedge_interinter_prob); #endif // CONFIG_EXT_INTER @@ -1317,18 +1330,23 @@ pre_fc->inter_compound_mode_probs[i], counts->inter_compound_mode[i], fc->inter_compound_mode_probs[i]); - for (i = 0; i < BLOCK_SIZES; ++i) { - if (is_interintra_allowed_bsize(i)) + for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) { + if (is_interintra_allowed_bsize_group(i)) fc->interintra_prob[i] = mode_mv_merge_probs(pre_fc->interintra_prob[i], counts->interintra[i]); } + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) { + vpx_tree_merge_probs( + vp10_interintra_mode_tree, pre_fc->interintra_mode_prob[i], + counts->interintra_mode[i], fc->interintra_mode_prob[i]); + } for (i = 0; i < BLOCK_SIZES; ++i) { - if (is_interintra_allowed_bsize(i) && get_wedge_bits(i)) + if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i)) fc->wedge_interintra_prob[i] = mode_mv_merge_probs( pre_fc->wedge_interintra_prob[i], counts->wedge_interintra[i]); } for (i = 0; i < BLOCK_SIZES; ++i) { - if (get_wedge_bits(i)) + if (is_interinter_wedge_used(i)) fc->wedge_interinter_prob[i] = mode_mv_merge_probs( pre_fc->wedge_interinter_prob[i], counts->wedge_interinter[i]); }
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h index 8219dc5..f8e507e 100644 --- a/vp10/common/entropymode.h +++ b/vp10/common/entropymode.h
@@ -34,6 +34,7 @@ #define PALETTE_MAX_SIZE 8 #define PALETTE_BLOCK_SIZES (BLOCK_LARGEST - BLOCK_8X8 + 1) #define PALETTE_Y_MODE_CONTEXTS 3 +#define PALETTE_MAX_BLOCK_SIZE (64 * 64) struct VP10Common; @@ -70,7 +71,8 @@ #if CONFIG_EXT_INTER vpx_prob inter_compound_mode_probs[INTER_MODE_CONTEXTS] [INTER_COMPOUND_MODES - 1]; - vpx_prob interintra_prob[BLOCK_SIZES]; + vpx_prob interintra_prob[BLOCK_SIZE_GROUPS]; + vpx_prob interintra_mode_prob[BLOCK_SIZE_GROUPS][INTERINTRA_MODES - 1]; vpx_prob wedge_interintra_prob[BLOCK_SIZES]; vpx_prob wedge_interinter_prob[BLOCK_SIZES]; #endif // CONFIG_EXT_INTER @@ -137,7 +139,8 @@ unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES]; #if CONFIG_EXT_INTER unsigned int inter_compound_mode[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES]; - unsigned int interintra[BLOCK_SIZES][2]; + unsigned int interintra[BLOCK_SIZE_GROUPS][2]; + unsigned int interintra_mode[BLOCK_SIZE_GROUPS][INTERINTRA_MODES]; unsigned int wedge_interintra[BLOCK_SIZES][2]; unsigned int wedge_interinter[BLOCK_SIZES][2]; #endif // CONFIG_EXT_INTER @@ -195,6 +198,8 @@ extern const vpx_tree_index vp10_intra_mode_tree[TREE_SIZE(INTRA_MODES)]; extern const vpx_tree_index vp10_inter_mode_tree[TREE_SIZE(INTER_MODES)]; #if CONFIG_EXT_INTER +extern const vpx_tree_index vp10_interintra_mode_tree + [TREE_SIZE(INTERINTRA_MODES)]; extern const vpx_tree_index vp10_inter_compound_mode_tree [TREE_SIZE(INTER_COMPOUND_MODES)]; #endif // CONFIG_EXT_INTER
diff --git a/vp10/common/enums.h b/vp10/common/enums.h index 5615cee..01f1e78 100644 --- a/vp10/common/enums.h +++ b/vp10/common/enums.h
@@ -20,24 +20,34 @@ #undef MAX_SB_SIZE +// Max superblock size #if CONFIG_EXT_PARTITION # define MAX_SB_SIZE_LOG2 7 #else # define MAX_SB_SIZE_LOG2 6 #endif // CONFIG_EXT_PARTITION - -#define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2) +#define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2) #define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE) -#define MI_SIZE_LOG2 3 -#define MI_SIZE (1 << MI_SIZE_LOG2) // pixels per mi-unit +// Min superblock size +#define MIN_SB_SIZE_LOG2 6 -#define MI_BLOCK_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2) -#define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2) // mi-units per max block +// Pixels per Mode Info (MI) unit +#define MI_SIZE_LOG2 3 +#define MI_SIZE (1 << MI_SIZE_LOG2) -#define MI_MASK (MI_BLOCK_SIZE - 1) -#define MI_MASK_2 (MI_BLOCK_SIZE * 2 - 1) +// MI-units per max superblock (MI Block - MIB) +#define MAX_MIB_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2) +#define MAX_MIB_SIZE (1 << MAX_MIB_SIZE_LOG2) +// MI-units per min superblock +#define MIN_MIB_SIZE_LOG2 (MIN_SB_SIZE_LOG2 - MI_SIZE_LOG2) + +// Mask to extract MI offset within max MIB +#define MAX_MIB_MASK (MAX_MIB_SIZE - 1) +#define MAX_MIB_MASK_2 (MAX_MIB_SIZE * 2 - 1) + +// Maximum number of tile rows and tile columns #if CONFIG_EXT_TILE # define MAX_TILE_ROWS 1024 # define MAX_TILE_COLS 1024 @@ -184,9 +194,11 @@ VP9_LAST4_FLAG = 1 << 3, VP9_GOLD_FLAG = 1 << 4, VP9_ALT_FLAG = 1 << 5, + VP9_REFFRAME_ALL = (1 << 6) - 1 #else VP9_GOLD_FLAG = 1 << 1, VP9_ALT_FLAG = 1 << 2, + VP9_REFFRAME_ALL = (1 << 3) - 1 #endif // CONFIG_EXT_REFS } VP9_REFFRAME; @@ -252,6 +264,23 @@ #define INTRA_MODES (TM_PRED + 1) +#if CONFIG_EXT_INTER +typedef enum { + II_DC_PRED = 0, + II_V_PRED, + II_H_PRED, + II_D45_PRED, + II_D135_PRED, + II_D117_PRED, + II_D153_PRED, + II_D207_PRED, + II_D63_PRED, + II_TM_PRED, + INTERINTRA_MODES +} INTERINTRA_MODE; + +#endif // CONFIG_EXT_INTER + #if CONFIG_EXT_INTRA typedef enum { FILTER_DC_PRED,
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index fe9b13c..23c131d 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c
@@ -731,10 +731,8 @@ } else { const int w = num_8x8_blocks_wide_lookup[block_size]; const int h = num_8x8_blocks_high_lookup[block_size]; - int index = shift_y; for (i = 0; i < h; i++) { - memset(&lfm->lfl_y[index], filter_level, w); - index += 8; + memset(&lfm->lfl_y[i][shift_y], filter_level, w); } } @@ -813,10 +811,8 @@ } else { const int w = num_8x8_blocks_wide_lookup[block_size]; const int h = num_8x8_blocks_high_lookup[block_size]; - int index = shift_y; for (i = 0; i < h; i++) { - memset(&lfm->lfl_y[index], filter_level, w); - index += 8; + memset(&lfm->lfl_y[i][shift_y], filter_level, w); } } @@ -867,10 +863,8 @@ const int shift_32_uv[] = {0, 2, 8, 10}; const int shift_16_uv[] = {0, 1, 4, 5}; int i; - const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? - cm->mi_rows - mi_row : MI_BLOCK_SIZE); - const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? - cm->mi_cols - mi_col : MI_BLOCK_SIZE); + const int max_rows = VPXMIN(cm->mi_rows - mi_row, MAX_MIB_SIZE); + const int max_cols = VPXMIN(cm->mi_cols - mi_col, MAX_MIB_SIZE); #if CONFIG_EXT_PARTITION assert(0 && "Not yet updated"); #endif // CONFIG_EXT_PARTITION @@ -1044,14 +1038,14 @@ lfm->above_uv[TX_4X4] &= ~above_border_uv; // We do some special edge handling. - if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) { + if (mi_row + MAX_MIB_SIZE > cm->mi_rows) { const uint64_t rows = cm->mi_rows - mi_row; // Each pixel inside the border gets a 1, const uint64_t mask_y = - (((uint64_t) 1 << (rows << MI_BLOCK_SIZE_LOG2)) - 1); + (((uint64_t) 1 << (rows << MAX_MIB_SIZE_LOG2)) - 1); const uint16_t mask_uv = - (((uint16_t) 1 << (((rows + 1) >> 1) << (MI_BLOCK_SIZE_LOG2 - 1))) - 1); + (((uint16_t) 1 << (((rows + 1) >> 1) << (MAX_MIB_SIZE_LOG2 - 1))) - 1); // Remove values completely outside our border. for (i = 0; i < TX_32X32; i++) { @@ -1075,7 +1069,7 @@ } } - if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) { + if (mi_col + MAX_MIB_SIZE > cm->mi_cols) { const uint64_t columns = cm->mi_cols - mi_col; // Each pixel inside the border gets a 1, the multiply copies the border @@ -1210,31 +1204,30 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, struct macroblockd_plane *plane, - MODE_INFO **mi_8x8, + MODE_INFO **mib, int mi_row, int mi_col) { const int ss_x = plane->subsampling_x; const int ss_y = plane->subsampling_y; const int row_step = 1 << ss_y; const int col_step = 1 << ss_x; - const int row_step_stride = cm->mi_stride * row_step; struct buf_2d *const dst = &plane->dst; uint8_t* const dst0 = dst->buf; - unsigned int mask_16x16[MI_BLOCK_SIZE] = {0}; - unsigned int mask_8x8[MI_BLOCK_SIZE] = {0}; - unsigned int mask_4x4[MI_BLOCK_SIZE] = {0}; - unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0}; - uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; + unsigned int mask_16x16[MAX_MIB_SIZE] = {0}; + unsigned int mask_8x8[MAX_MIB_SIZE] = {0}; + unsigned int mask_4x4[MAX_MIB_SIZE] = {0}; + unsigned int mask_4x4_int[MAX_MIB_SIZE] = {0}; + uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE]; int r, c; - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { + for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) { unsigned int mask_16x16_c = 0; unsigned int mask_8x8_c = 0; unsigned int mask_4x4_c = 0; unsigned int border_mask; // Determine the vertical edges that need filtering - for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { - const MODE_INFO *mi = mi_8x8[c]; + for (c = 0; c < cm->mib_size && mi_col + c < cm->mi_cols; c += col_step) { + const MODE_INFO *mi = mib[c]; const MB_MODE_INFO *mbmi = &mi[0].mbmi; const BLOCK_SIZE sb_type = mbmi->sb_type; const int skip_this = mbmi->skip && is_inter_block(mbmi); @@ -1267,8 +1260,7 @@ int tx_size_mask = 0; // Filter level can vary per MI - if (!(lfl[(r << MI_BLOCK_SIZE_LOG2) + (c >> ss_x)] = - get_filter_level(&cm->lf_info, mbmi))) + if (!(lfl[r][c >> ss_x] = get_filter_level(&cm->lf_info, mbmi))) continue; if (tx_size == TX_32X32) @@ -1288,10 +1280,10 @@ tx_size_r = VPXMIN(tx_size, cm->above_txfm_context[mi_col + c]); tx_size_c = VPXMIN(tx_size, - cm->left_txfm_context[(mi_row + r) & MI_MASK]); + cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK]); cm->above_txfm_context[mi_col + c] = tx_size; - cm->left_txfm_context[(mi_row + r) & MI_MASK] = tx_size; + cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK] = tx_size; #endif // Build masks based on the transform size of each block @@ -1365,7 +1357,7 @@ mask_8x8_c & border_mask, mask_4x4_c & border_mask, mask_4x4_int[r], - &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2], + &cm->lf_info, &lfl[r][0], (int)cm->bit_depth); } else { filter_selectively_vert(dst->buf, dst->stride, @@ -1373,7 +1365,7 @@ mask_8x8_c & border_mask, mask_4x4_c & border_mask, mask_4x4_int[r], - &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]); + &cm->lf_info, &lfl[r][0]); } #else filter_selectively_vert(dst->buf, dst->stride, @@ -1381,15 +1373,15 @@ mask_8x8_c & border_mask, mask_4x4_c & border_mask, mask_4x4_int[r], - &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]); + &cm->lf_info, &lfl[r][0]); #endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 8 * dst->stride; - mi_8x8 += row_step_stride; + dst->buf += MI_SIZE * dst->stride; + mib += row_step * cm->mi_stride; } // Now do horizontal pass dst->buf = dst0; - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { + for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) { const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; @@ -1415,7 +1407,7 @@ mask_8x8_r, mask_4x4_r, mask_4x4_int_r, - &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2], + &cm->lf_info, &lfl[r][0], (int)cm->bit_depth); } else { filter_selectively_horiz(dst->buf, dst->stride, @@ -1423,7 +1415,7 @@ mask_8x8_r, mask_4x4_r, mask_4x4_int_r, - &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]); + &cm->lf_info, &lfl[r][0]); } #else filter_selectively_horiz(dst->buf, dst->stride, @@ -1431,9 +1423,9 @@ mask_8x8_r, mask_4x4_r, mask_4x4_int_r, - &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]); + &cm->lf_info, &lfl[r][0]); #endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 8 * dst->stride; + dst->buf += MI_SIZE * dst->stride; } } @@ -1452,7 +1444,7 @@ assert(plane->subsampling_x == 0 && plane->subsampling_y == 0); // Vertical pass: do 2 rows at one time - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { + for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) { unsigned int mask_16x16_l = mask_16x16 & 0xffff; unsigned int mask_8x8_l = mask_8x8 & 0xffff; unsigned int mask_4x4_l = mask_4x4 & 0xffff; @@ -1464,24 +1456,24 @@ highbd_filter_selectively_vert_row2( plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2], (int)cm->bit_depth); + &lfm->lfl_y[r][0], (int)cm->bit_depth); } else { filter_selectively_vert_row2( plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]); + &lfm->lfl_y[r][0]); } #else filter_selectively_vert_row2( plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]); + &lfm->lfl_y[r][0]); #endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 16 * dst->stride; - mask_16x16 >>= 16; - mask_8x8 >>= 16; - mask_4x4 >>= 16; - mask_4x4_int >>= 16; + dst->buf += 2 * MI_SIZE * dst->stride; + mask_16x16 >>= 2 * MI_SIZE; + mask_8x8 >>= 2 * MI_SIZE; + mask_4x4 >>= 2 * MI_SIZE; + mask_4x4_int >>= 2 * MI_SIZE; } // Horizontal pass @@ -1491,7 +1483,7 @@ mask_4x4 = lfm->above_y[TX_4X4]; mask_4x4_int = lfm->int_4x4_y; - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { + for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r++) { unsigned int mask_16x16_r; unsigned int mask_8x8_r; unsigned int mask_4x4_r; @@ -1511,24 +1503,24 @@ highbd_filter_selectively_horiz( CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, - &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2], + &lfm->lfl_y[r][0], (int)cm->bit_depth); } else { filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, - &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]); + &lfm->lfl_y[r][0]); } #else filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, - &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]); + &lfm->lfl_y[r][0]); #endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 8 * dst->stride; - mask_16x16 >>= 8; - mask_8x8 >>= 8; - mask_4x4 >>= 8; - mask_4x4_int >>= 8; + dst->buf += MI_SIZE * dst->stride; + mask_16x16 >>= MI_SIZE; + mask_8x8 >>= MI_SIZE; + mask_4x4 >>= MI_SIZE; + mask_4x4_int >>= MI_SIZE; } } @@ -1546,16 +1538,13 @@ uint16_t mask_4x4_int = lfm->left_int_4x4_uv; assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); + assert(plane->plane_type == PLANE_TYPE_UV); // Vertical pass: do 2 rows at one time - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { - if (plane->plane_type == 1) { - for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { - lfm->lfl_uv[(r << 1) + c] = - lfm->lfl_y[(r << MI_BLOCK_SIZE_LOG2) + (c << 1)]; - lfm->lfl_uv[((r + 2) << 1) + c] = - lfm->lfl_y[((r + 2) << MI_BLOCK_SIZE_LOG2) + (c << 1)]; - } + for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 4) { + for (c = 0; c < (cm->mib_size >> 1); c++) { + lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1]; + lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1]; } { @@ -1570,25 +1559,25 @@ highbd_filter_selectively_vert_row2( plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_uv[r << 1], (int)cm->bit_depth); + &lfm->lfl_uv[r >> 1][0], (int)cm->bit_depth); } else { filter_selectively_vert_row2( plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_uv[r << 1]); + &lfm->lfl_uv[r >> 1][0]); } #else filter_selectively_vert_row2( plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_uv[r << 1]); + &lfm->lfl_uv[r >> 1][0]); #endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 16 * dst->stride; - mask_16x16 >>= 8; - mask_8x8 >>= 8; - mask_4x4 >>= 8; - mask_4x4_int >>= 8; + dst->buf += 2 * MI_SIZE * dst->stride; + mask_16x16 >>= MI_SIZE; + mask_8x8 >>= MI_SIZE; + mask_4x4 >>= MI_SIZE; + mask_4x4_int >>= MI_SIZE; } } @@ -1599,7 +1588,7 @@ mask_4x4 = lfm->above_uv[TX_4X4]; mask_4x4_int = lfm->above_int_4x4_uv; - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { + for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) { const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf); @@ -1622,23 +1611,24 @@ highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, &cm->lf_info, - &lfm->lfl_uv[r << 1], (int)cm->bit_depth); + &lfm->lfl_uv[r >> 1][0], + (int)cm->bit_depth); } else { filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, &cm->lf_info, - &lfm->lfl_uv[r << 1]); + &lfm->lfl_uv[r >> 1][0]); } #else filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, &cm->lf_info, - &lfm->lfl_uv[r << 1]); + &lfm->lfl_uv[r >> 1][0]); #endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 8 * dst->stride; - mask_16x16 >>= 4; - mask_8x8 >>= 4; - mask_4x4 >>= 4; - mask_4x4_int >>= 4; + dst->buf += MI_SIZE * dst->stride; + mask_16x16 >>= MI_SIZE / 2; + mask_8x8 >>= MI_SIZE / 2; + mask_4x4 >>= MI_SIZE / 2; + mask_4x4_int >>= MI_SIZE / 2; } } @@ -1653,12 +1643,12 @@ # if CONFIG_VAR_TX memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols); # endif // CONFIG_VAR_TX - for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { + for (mi_row = start; mi_row < stop; mi_row += cm->mib_size) { MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; # if CONFIG_VAR_TX - memset(cm->left_txfm_context, TX_SIZES, MI_BLOCK_SIZE); + memset(cm->left_txfm_context, TX_SIZES, MAX_MIB_SIZE); # endif // CONFIG_VAR_TX - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) { int plane; vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); @@ -1683,9 +1673,9 @@ else path = LF_PATH_SLOW; - for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { + for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) { int plane; vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
diff --git a/vp10/common/loopfilter.h b/vp10/common/loopfilter.h index 8fa0b80..2a88003 100644 --- a/vp10/common/loopfilter.h +++ b/vp10/common/loopfilter.h
@@ -84,8 +84,8 @@ uint16_t above_uv[TX_SIZES]; uint16_t left_int_4x4_uv; uint16_t above_int_4x4_uv; - uint8_t lfl_y[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; - uint8_t lfl_uv[MI_BLOCK_SIZE / 2 * MI_BLOCK_SIZE / 2]; + uint8_t lfl_y[MAX_MIB_SIZE][MAX_MIB_SIZE]; + uint8_t lfl_uv[MAX_MIB_SIZE / 2][MAX_MIB_SIZE / 2]; } LOOP_FILTER_MASK; /* assorted loopfilter functions which get used elsewhere */
diff --git a/vp10/common/mfqe.c b/vp10/common/mfqe.c index c715ef7..52756bd 100644 --- a/vp10/common/mfqe.c +++ b/vp10/common/mfqe.c
@@ -355,9 +355,15 @@ const YV12_BUFFER_CONFIG *show = cm->frame_to_show; // Last decoded frame and will store the MFQE result. YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer; + +#if CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES + // TODO(any): Fix for ext parition types and 128 superblocks + assert(0); +#endif // CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES + // Loop through each super block. - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) { - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MAX_MIB_SIZE) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) { MODE_INFO *mi; MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col); // Motion Info in last frame.
diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c index aa651a2..7c6633f 100644 --- a/vp10/common/mvref_common.c +++ b/vp10/common/mvref_common.c
@@ -260,7 +260,7 @@ // For each 4x4 group of blocks, when the bottom right is decoded the blocks // to the right have not been decoded therefore the bottom right does // not have a top right - while (bs < MI_BLOCK_SIZE) { + while (bs < MAX_MIB_SIZE) { if (mi_col & bs) { if ((mi_col & (2 * bs)) && (mi_row & (2 * bs))) { has_tr = 0;
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h index bdd9ffe..3ac17e2 100644 --- a/vp10/common/onyxc_int.h +++ b/vp10/common/onyxc_int.h
@@ -312,7 +312,7 @@ int log2_tile_cols, log2_tile_rows; #endif // !CONFIG_EXT_TILE int tile_cols, tile_rows; - int tile_width, tile_height; + int tile_width, tile_height; // In MI units int byte_alignment; int skip_loop_filter; @@ -332,7 +332,7 @@ ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; #if CONFIG_VAR_TX TXFM_CONTEXT *above_txfm_context; - TXFM_CONTEXT left_txfm_context[MI_BLOCK_SIZE]; + TXFM_CONTEXT left_txfm_context[MAX_MIB_SIZE]; #endif int above_context_alloc_cols; @@ -343,6 +343,10 @@ #if CONFIG_ANS rans_dec_lut token_tab[COEFF_PROB_MODELS]; #endif // CONFIG_ANS + + BLOCK_SIZE sb_size; // Size of the superblock used for this frame + int mib_size; // Size of the superblock in units of MI blocks + int mib_size_log2; // Log 2 of above. } VP10_COMMON; // TODO(hkuang): Don't need to lock the whole pool after implementing atomic @@ -372,7 +376,8 @@ return &cm->buffer_pool->frame_bufs[cm->ref_frame_map[index]].buf; } -static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP10_COMMON *cm) { +static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer( + const VP10_COMMON *const cm) { return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf; } @@ -407,8 +412,12 @@ bufs[new_idx].ref_count++; } -static INLINE int mi_cols_aligned_to_sb(int n_mis) { - return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2); +static INLINE int mi_cols_aligned_to_sb(const VP10_COMMON *cm) { + return ALIGN_POWER_OF_TWO(cm->mi_cols, cm->mib_size_log2); +} + +static INLINE int mi_rows_aligned_to_sb(const VP10_COMMON *cm) { + return ALIGN_POWER_OF_TWO(cm->mi_rows, cm->mib_size_log2); } static INLINE int frame_is_intra_only(const VP10_COMMON *const cm) { @@ -440,7 +449,7 @@ static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) { const int above_idx = mi_col * 2; - const int left_idx = (mi_row * 2) & MI_MASK_2; + const int left_idx = (mi_row * 2) & MAX_MIB_MASK_2; int i; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblockd_plane *const pd = &xd->plane[i]; @@ -451,7 +460,7 @@ static INLINE int calc_mi_size(int len) { // len is in mi units. - return len + MI_BLOCK_SIZE; + return len + MAX_MIB_SIZE; } static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, @@ -517,7 +526,8 @@ BLOCK_SIZE subsize, BLOCK_SIZE bsize) { PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; - PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); + PARTITION_CONTEXT *const left_ctx = + xd->left_seg_context + (mi_row & MAX_MIB_MASK); #if CONFIG_EXT_PARTITION_TYPES const int bw = num_8x8_blocks_wide_lookup[bsize]; @@ -581,7 +591,8 @@ int mi_row, int mi_col, BLOCK_SIZE bsize) { const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; - const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); + const PARTITION_CONTEXT *left_ctx = + xd->left_seg_context + (mi_row & MAX_MIB_MASK); const int bsl = mi_width_log2_lookup[bsize]; int above = (*above_ctx >> bsl) & 1 , left = (*left_ctx >> bsl) & 1; @@ -649,6 +660,58 @@ } #endif +static INLINE PARTITION_TYPE get_partition(const VP10_COMMON *const cm, + const int mi_row, + const int mi_col, + const BLOCK_SIZE bsize) { + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) { + return PARTITION_INVALID; + } else { + const int offset = mi_row * cm->mi_stride + mi_col; + MODE_INFO **mi = cm->mi_grid_visible + offset; + const MB_MODE_INFO *const mbmi = &mi[0]->mbmi; + const int bsl = b_width_log2_lookup[bsize]; + const PARTITION_TYPE partition = partition_lookup[bsl][mbmi->sb_type]; +#if !CONFIG_EXT_PARTITION_TYPES + return partition; +#else + const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; + + assert(cm->mi_grid_visible[offset] == &cm->mi[offset]); + + if (partition != PARTITION_NONE && + bsize > BLOCK_8X8 && + mi_row + hbs < cm->mi_rows && + mi_col + hbs < cm->mi_cols) { + const BLOCK_SIZE h = get_subsize(bsize, PARTITION_HORZ_A); + const BLOCK_SIZE v = get_subsize(bsize, PARTITION_VERT_A); + const MB_MODE_INFO *const mbmi_right = &mi[hbs]->mbmi; + const MB_MODE_INFO *const mbmi_below = &mi[hbs * cm->mi_stride]->mbmi; + if (mbmi->sb_type == h) { + return mbmi_below->sb_type == h ? PARTITION_HORZ : PARTITION_HORZ_B; + } else if (mbmi->sb_type == v) { + return mbmi_right->sb_type == v ? PARTITION_VERT : PARTITION_VERT_B; + } else if (mbmi_below->sb_type == h) { + return PARTITION_HORZ_A; + } else if (mbmi_right->sb_type == v) { + return PARTITION_VERT_A; + } else { + return PARTITION_SPLIT; + } + } + + return partition; +#endif // !CONFIG_EXT_PARTITION_TYPES + } +} + +static INLINE void set_sb_size(VP10_COMMON *const cm, + const BLOCK_SIZE sb_size) { + cm->sb_size = sb_size; + cm->mib_size = num_8x8_blocks_wide_lookup[cm->sb_size]; + cm->mib_size_log2 = mi_width_log2_lookup[cm->sb_size]; +} + #ifdef __cplusplus } // extern "C" #endif
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c index de91a21..2be4cf6 100644 --- a/vp10/common/reconinter.c +++ b/vp10/common/reconinter.c
@@ -762,7 +762,7 @@ } void vp10_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize) { + BLOCK_SIZE bsize) { build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 1, MAX_MB_PLANE - 1); #if CONFIG_EXT_INTER @@ -1176,24 +1176,18 @@ }; #if CONFIG_EXT_PARTITION -// TODO(debargha): What are the correct values here? static const uint8_t obmc_mask_64[2][64] = { - { 33, 33, 35, 35, 36, 36, 38, 38, - 40, 40, 41, 41, 43, 43, 44, 44, - 45, 45, 47, 47, 48, 48, 50, 50, - 51, 51, 52, 52, 53, 53, 55, 55, - 56, 56, 57, 57, 58, 58, 59, 59, - 60, 60, 60, 60, 61, 61, 62, 62, - 62, 62, 63, 63, 63, 63, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64 }, - { 31, 31, 29, 29, 28, 28, 26, 26, - 24, 24, 23, 23, 21, 21, 20, 20, - 19, 19, 17, 17, 16, 16, 14, 14, - 13, 13, 12, 12, 11, 11, 9, 9, - 8, 8, 7, 7, 6, 6, 5, 5, - 4, 4, 4, 4, 3, 3, 2, 2, - 2, 2, 1, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 } + { + 33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44, + 45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56, + 56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62, + 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + }, { + 31, 30, 29, 29, 28, 27, 26, 25, 24, 24, 23, 22, 21, 20, 20, 20, + 19, 18, 17, 17, 16, 15, 14, 13, 13, 13, 12, 12, 11, 10, 9, 8, + 8, 8, 7, 7, 6, 6, 5, 4, 4, 4, 4, 4, 3, 2, 2, 2, + 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + } }; #endif // CONFIG_EXT_PARTITION @@ -1599,7 +1593,45 @@ #endif // CONFIG_OBMC #if CONFIG_EXT_INTER -static void combine_interintra(PREDICTION_MODE mode, +#if CONFIG_EXT_PARTITION +static const int ii_weights1d[MAX_SB_SIZE] = { + 128, 127, 125, 124, 123, 122, 120, 119, + 118, 117, 116, 115, 113, 112, 111, 110, + 109, 108, 107, 106, 105, 104, 103, 103, + 102, 101, 100, 99, 98, 97, 97, 96, + 95, 94, 94, 93, 92, 91, 91, 90, + 89, 89, 88, 87, 87, 86, 86, 85, + 84, 84, 83, 83, 82, 82, 81, 81, + 80, 80, 79, 79, 78, 78, 77, 77, + 76, 76, 75, 75, 75, 74, 74, 73, + 73, 73, 72, 72, 72, 71, 71, 70, + 70, 70, 69, 69, 69, 69, 68, 68, + 68, 67, 67, 67, 67, 66, 66, 66, + 66, 65, 65, 65, 65, 64, 64, 64, + 64, 63, 63, 63, 63, 63, 62, 62, + 62, 62, 62, 61, 61, 61, 61, 61, + 61, 60, 60, 60, 60, 60, 60, 60, +}; +static int ii_size_scales[BLOCK_SIZES] = { + 32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 +}; +#else +static const int ii_weights1d[MAX_SB_SIZE] = { + 102, 100, 97, 95, 92, 90, 88, 86, + 84, 82, 80, 78, 76, 74, 73, 71, + 69, 68, 67, 65, 64, 62, 61, 60, + 59, 58, 57, 55, 54, 53, 52, 52, + 51, 50, 49, 48, 47, 47, 46, 45, + 45, 44, 43, 43, 42, 41, 41, 40, + 40, 39, 39, 38, 38, 38, 37, 37, + 36, 36, 36, 35, 35, 35, 34, 34, +}; +static int ii_size_scales[BLOCK_SIZES] = { + 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 +}; +#endif // CONFIG_EXT_PARTITION + +static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra, int wedge_index, BLOCK_SIZE bsize, @@ -1613,149 +1645,112 @@ static const int scale_bits = 8; static const int scale_max = 256; static const int scale_round = 127; -#if CONFIG_EXT_PARTITION - // TODO(debargha): Fill in the correct weights for 128 wide blocks. - static const int weights1d[MAX_SB_SIZE] = { - 128, 128, 125, 125, 122, 122, 119, 119, - 116, 116, 114, 114, 111, 111, 109, 109, - 107, 107, 105, 105, 103, 103, 101, 101, - 99, 99, 97, 97, 96, 96, 94, 94, - 93, 93, 91, 91, 90, 90, 89, 89, - 88, 88, 86, 86, 85, 85, 84, 84, - 83, 83, 82, 82, 81, 81, 81, 81, - 80, 80, 79, 79, 78, 78, 78, 78, - 77, 77, 76, 76, 76, 76, 75, 75, - 75, 75, 74, 74, 74, 74, 73, 73, - 73, 73, 72, 72, 72, 72, 71, 71, - 71, 71, 71, 71, 70, 70, 70, 70, - 70, 70, 70, 70, 69, 69, 69, 69, - 69, 69, 69, 69, 68, 68, 68, 68, - 68, 68, 68, 68, 68, 68, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, - }; - static int size_scales[BLOCK_SIZES] = { - 32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 - }; -#else - static const int weights1d[MAX_SB_SIZE] = { - 128, 125, 122, 119, 116, 114, 111, 109, - 107, 105, 103, 101, 99, 97, 96, 94, - 93, 91, 90, 89, 88, 86, 85, 84, - 83, 82, 81, 81, 80, 79, 78, 78, - 77, 76, 76, 75, 75, 74, 74, 73, - 73, 72, 72, 71, 71, 71, 70, 70, - 70, 70, 69, 69, 69, 69, 68, 68, - 68, 68, 68, 67, 67, 67, 67, 67, - }; - static int size_scales[BLOCK_SIZES] = { - 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 - }; -#endif // CONFIG_EXT_PARTITION - const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; - const int size_scale = size_scales[plane_bsize]; + const int size_scale = ii_size_scales[plane_bsize]; int i, j; - if (use_wedge_interintra && get_wedge_bits(bsize)) { - const uint8_t *mask = vp10_get_soft_mask(wedge_index, bsize, bh, bw); - for (i = 0; i < bh; ++i) { - for (j = 0; j < bw; ++j) { - int m = mask[i * MASK_MASTER_STRIDE + j]; - comppred[i * compstride + j] = - (intrapred[i * intrastride + j] * m + - interpred[i * interstride + j] * ((1 << WEDGE_WEIGHT_BITS) - m) + - (1 << (WEDGE_WEIGHT_BITS - 1))) >> WEDGE_WEIGHT_BITS; + if (use_wedge_interintra) { + if (get_wedge_bits(bsize)) { + const uint8_t *mask = vp10_get_soft_mask(wedge_index, bsize, bh, bw); + for (i = 0; i < bh; ++i) { + for (j = 0; j < bw; ++j) { + int m = mask[i * MASK_MASTER_STRIDE + j]; + comppred[i * compstride + j] = + (intrapred[i * intrastride + j] * m + + interpred[i * interstride + j] * ((1 << WEDGE_WEIGHT_BITS) - m) + + (1 << (WEDGE_WEIGHT_BITS - 1))) >> WEDGE_WEIGHT_BITS; + } } } return; } switch (mode) { - case V_PRED: + case II_V_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = weights1d[i * size_scale]; + int scale = ii_weights1d[i * size_scale]; comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; + >> scale_bits; } } - break; + break; - case H_PRED: + case II_H_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = weights1d[j * size_scale]; - comppred[i * compstride + j] = + int scale = ii_weights1d[j * size_scale]; + comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; + >> scale_bits; } } - break; + break; - case D63_PRED: - case D117_PRED: + case II_D63_PRED: + case II_D117_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = (weights1d[i * size_scale] * 3 + - weights1d[j * size_scale]) >> 2; - comppred[i * compstride + j] = + int scale = (ii_weights1d[i * size_scale] * 3 + + ii_weights1d[j * size_scale]) >> 2; + comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; + scale * intrapred[i * intrastride + j] + scale_round) + >> scale_bits; } } - break; + break; - case D207_PRED: - case D153_PRED: + case II_D207_PRED: + case II_D153_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = (weights1d[j * size_scale] * 3 + - weights1d[i * size_scale]) >> 2; - comppred[i * compstride + j] = + int scale = (ii_weights1d[j * size_scale] * 3 + + ii_weights1d[i * size_scale]) >> 2; + comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; + scale * intrapred[i * intrastride + j] + scale_round) + >> scale_bits; } } - break; + break; - case D135_PRED: + case II_D135_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = weights1d[(i < j ? i : j) * size_scale]; - comppred[i * compstride + j] = + int scale = ii_weights1d[(i < j ? i : j) * size_scale]; + comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; + scale * intrapred[i * intrastride + j] + scale_round) + >> scale_bits; } } - break; + break; - case D45_PRED: + case II_D45_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = (weights1d[i * size_scale] + - weights1d[j * size_scale]) >> 1; - comppred[i * compstride + j] = + int scale = (ii_weights1d[i * size_scale] + + ii_weights1d[j * size_scale]) >> 1; + comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; + scale * intrapred[i * intrastride + j] + scale_round) + >> scale_bits; } } - break; + break; - case TM_PRED: - case DC_PRED: + case II_TM_PRED: + case II_DC_PRED: default: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - comppred[i * compstride + j] = (interpred[i * interstride + j] + - intrapred[i * intrastride + j]) >> 1; + comppred[i * compstride + j] = (interpred[i * interstride + j] + + intrapred[i * intrastride + j]) >> 1; } } break; @@ -1763,7 +1758,7 @@ } #if CONFIG_VP9_HIGHBITDEPTH -static void combine_interintra_highbd(PREDICTION_MODE mode, +static void combine_interintra_highbd(INTERINTRA_MODE mode, int use_wedge_interintra, int wedge_index, BLOCK_SIZE bsize, @@ -1777,48 +1772,9 @@ static const int scale_bits = 8; static const int scale_max = 256; static const int scale_round = 127; -#if CONFIG_EXT_PARTITION - // TODO(debargha): Fill in the correct weights for 128 wide blocks. - static const int weights1d[MAX_SB_SIZE] = { - 128, 128, 125, 125, 122, 122, 119, 119, - 116, 116, 114, 114, 111, 111, 109, 109, - 107, 107, 105, 105, 103, 103, 101, 101, - 99, 99, 97, 97, 96, 96, 94, 94, - 93, 93, 91, 91, 90, 90, 89, 89, - 88, 88, 86, 86, 85, 85, 84, 84, - 83, 83, 82, 82, 81, 81, 81, 81, - 80, 80, 79, 79, 78, 78, 78, 78, - 77, 77, 76, 76, 76, 76, 75, 75, - 75, 75, 74, 74, 74, 74, 73, 73, - 73, 73, 72, 72, 72, 72, 71, 71, - 71, 71, 71, 71, 70, 70, 70, 70, - 70, 70, 70, 70, 69, 69, 69, 69, - 69, 69, 69, 69, 68, 68, 68, 68, - 68, 68, 68, 68, 68, 68, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, - }; - static int size_scales[BLOCK_SIZES] = { - 32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 - }; -#else - static const int weights1d[MAX_SB_SIZE] = { - 128, 125, 122, 119, 116, 114, 111, 109, - 107, 105, 103, 101, 99, 97, 96, 94, - 93, 91, 90, 89, 88, 86, 85, 84, - 83, 82, 81, 81, 80, 79, 78, 78, - 77, 76, 76, 75, 75, 74, 74, 73, - 73, 72, 72, 71, 71, 71, 70, 70, - 70, 70, 69, 69, 69, 69, 68, 68, - 68, 68, 68, 67, 67, 67, 67, 67, - }; - static int size_scales[BLOCK_SIZES] = { - 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 - }; -#endif // CONFIG_EXT_PARTITION - const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; - const int size_scale = size_scales[plane_bsize]; + const int size_scale = ii_size_scales[plane_bsize]; int i, j; uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8); @@ -1826,105 +1782,107 @@ uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8); (void) bd; - if (use_wedge_interintra && get_wedge_bits(bsize)) { - const uint8_t *mask = vp10_get_soft_mask(wedge_index, bsize, bh, bw); - for (i = 0; i < bh; ++i) { - for (j = 0; j < bw; ++j) { - int m = mask[i * MASK_MASTER_STRIDE + j]; - comppred[i * compstride + j] = - (intrapred[i * intrastride + j] * m + - interpred[i * interstride + j] * ((1 << WEDGE_WEIGHT_BITS) - m) + - (1 << (WEDGE_WEIGHT_BITS - 1))) >> WEDGE_WEIGHT_BITS; + if (use_wedge_interintra) { + if (get_wedge_bits(bsize)) { + const uint8_t *mask = vp10_get_soft_mask(wedge_index, bsize, bh, bw); + for (i = 0; i < bh; ++i) { + for (j = 0; j < bw; ++j) { + int m = mask[i * MASK_MASTER_STRIDE + j]; + comppred[i * compstride + j] = + (intrapred[i * intrastride + j] * m + + interpred[i * interstride + j] * ((1 << WEDGE_WEIGHT_BITS) - m) + + (1 << (WEDGE_WEIGHT_BITS - 1))) >> WEDGE_WEIGHT_BITS; + } } } return; } switch (mode) { - case V_PRED: + case II_V_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = weights1d[i * size_scale]; + int scale = ii_weights1d[i * size_scale]; comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + scale * intrapred[i * intrastride + j] + scale_round) >> scale_bits; } } - break; + break; - case H_PRED: + case II_H_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = weights1d[j * size_scale]; - comppred[i * compstride + j] = + int scale = ii_weights1d[j * size_scale]; + comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; + >> scale_bits; } } - break; + break; - case D63_PRED: - case D117_PRED: + case II_D63_PRED: + case II_D117_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = (weights1d[i * size_scale] * 3 + - weights1d[j * size_scale]) >> 2; - comppred[i * compstride + j] = + int scale = (ii_weights1d[i * size_scale] * 3 + + ii_weights1d[j * size_scale]) >> 2; + comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; + scale * intrapred[i * intrastride + j] + scale_round) + >> scale_bits; } } - break; + break; - case D207_PRED: - case D153_PRED: + case II_D207_PRED: + case II_D153_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = (weights1d[j * size_scale] * 3 + - weights1d[i * size_scale]) >> 2; - comppred[i * compstride + j] = + int scale = (ii_weights1d[j * size_scale] * 3 + + ii_weights1d[i * size_scale]) >> 2; + comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; + scale * intrapred[i * intrastride + j] + scale_round) + >> scale_bits; } } - break; + break; - case D135_PRED: + case II_D135_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = weights1d[(i < j ? i : j) * size_scale]; - comppred[i * compstride + j] = + int scale = ii_weights1d[(i < j ? i : j) * size_scale]; + comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; + scale * intrapred[i * intrastride + j] + scale_round) + >> scale_bits; } } - break; + break; - case D45_PRED: + case II_D45_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = (weights1d[i * size_scale] + - weights1d[j * size_scale]) >> 1; - comppred[i * compstride + j] = + int scale = (ii_weights1d[i * size_scale] + + ii_weights1d[j * size_scale]) >> 1; + comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; + scale * intrapred[i * intrastride + j] + scale_round) + >> scale_bits; } } - break; + break; - case TM_PRED: - case DC_PRED: + case II_TM_PRED: + case II_DC_PRED: default: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - comppred[i * compstride + j] = (interpred[i * interstride + j] + - intrapred[i * intrastride + j]) >> 1; + comppred[i * compstride + j] = (interpred[i * interstride + j] + + intrapred[i * intrastride + j]) >> 1; } } break; @@ -1957,57 +1915,122 @@ vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride, dst, dst_stride, 0, 0, plane); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2); + uint16_t *dst_216 = CONVERT_TO_SHORTPTR(dst_2); + memcpy(src_216 - ref_stride, dst_216 - dst_stride, + sizeof(*src_216) * (4 << bhl)); + } else +#endif // CONFIG_VP9_HIGHBITDEPTH + { + memcpy(src_2 - ref_stride, dst_2 - dst_stride, + sizeof(*src_2) * (4 << bhl)); + } vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, src_2, ref_stride, dst_2, dst_stride, 0, 1 << bwl, plane); } else { + int i; uint8_t *src_2 = ref + (4 << bhl); uint8_t *dst_2 = dst + (4 << bhl); vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride, dst, dst_stride, 0, 0, plane); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2); + uint16_t *dst_216 = CONVERT_TO_SHORTPTR(dst_2); + for (i = 0; i < (4 << bwl); ++i) + src_216[i * ref_stride - 1] = dst_216[i * dst_stride - 1]; + } else +#endif // CONFIG_VP9_HIGHBITDEPTH + { + for (i = 0; i < (4 << bwl); ++i) + src_2[i * ref_stride - 1] = dst_2[i * dst_stride - 1]; + } vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, src_2, ref_stride, dst_2, dst_stride, 1 << bhl, 0, plane); } } -void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, - uint8_t *ypred, - int ystride, - BLOCK_SIZE bsize) { - const int bw = 4 << b_width_log2_lookup[bsize]; +// Mapping of interintra to intra mode for use in the intra component +static const int interintra_to_intra_mode[INTERINTRA_MODES] = { + DC_PRED, + V_PRED, + H_PRED, + D45_PRED, + D135_PRED, + D117_PRED, + D153_PRED, + D207_PRED, + D63_PRED, + TM_PRED +}; + +void vp10_build_intra_predictors_for_interintra( + MACROBLOCKD *xd, + BLOCK_SIZE bsize, int plane, + uint8_t *dst, int dst_stride) { + build_intra_predictors_for_interintra( + xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, + dst, dst_stride, + interintra_to_intra_mode[xd->mi[0]->mbmi.interintra_mode], + bsize, plane); +} + +void vp10_combine_interintra(MACROBLOCKD *xd, + BLOCK_SIZE bsize, int plane, + uint8_t *inter_pred, int inter_stride, + uint8_t *intra_pred, int intra_stride) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]); - build_intra_predictors_for_interintra( - xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride, - CONVERT_TO_BYTEPTR(intrapredictor), bw, - xd->mi[0]->mbmi.interintra_mode, bsize, 0); combine_interintra_highbd(xd->mi[0]->mbmi.interintra_mode, xd->mi[0]->mbmi.use_wedge_interintra, xd->mi[0]->mbmi.interintra_wedge_index, bsize, - bsize, - xd->plane[0].dst.buf, xd->plane[0].dst.stride, - ypred, ystride, - CONVERT_TO_BYTEPTR(intrapredictor), bw, xd->bd); + plane_bsize, + xd->plane[plane].dst.buf, + xd->plane[plane].dst.stride, + inter_pred, inter_stride, + intra_pred, intra_stride, + xd->bd); + return; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + combine_interintra(xd->mi[0]->mbmi.interintra_mode, + xd->mi[0]->mbmi.use_wedge_interintra, + xd->mi[0]->mbmi.interintra_wedge_index, + bsize, + plane_bsize, + xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, + inter_pred, inter_stride, + intra_pred, intra_stride); +} + +void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, + uint8_t *ypred, + int ystride, + BLOCK_SIZE bsize) { +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + DECLARE_ALIGNED(16, uint16_t, + intrapredictor[MAX_SB_SQUARE]); + vp10_build_intra_predictors_for_interintra( + xd, bsize, 0, CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE); + vp10_combine_interintra(xd, bsize, 0, ypred, ystride, + CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE); return; } #endif // CONFIG_VP9_HIGHBITDEPTH { uint8_t intrapredictor[MAX_SB_SQUARE]; - build_intra_predictors_for_interintra( - xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride, - intrapredictor, bw, - xd->mi[0]->mbmi.interintra_mode, bsize, 0); - combine_interintra(xd->mi[0]->mbmi.interintra_mode, - xd->mi[0]->mbmi.use_wedge_interintra, - xd->mi[0]->mbmi.interintra_wedge_index, - bsize, - bsize, - xd->plane[0].dst.buf, xd->plane[0].dst.stride, - ypred, ystride, intrapredictor, bw); + vp10_build_intra_predictors_for_interintra( + xd, bsize, 0, intrapredictor, MAX_SB_SIZE); + vp10_combine_interintra(xd, bsize, 0, ypred, ystride, + intrapredictor, MAX_SB_SIZE); } } @@ -2016,41 +2039,23 @@ int ustride, int plane, BLOCK_SIZE bsize) { - const BLOCK_SIZE uvbsize = get_plane_block_size(bsize, &xd->plane[plane]); - const int bw = 4 << b_width_log2_lookup[uvbsize]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED(16, uint16_t, uintrapredictor[MAX_SB_SQUARE]); - build_intra_predictors_for_interintra( - xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, - CONVERT_TO_BYTEPTR(uintrapredictor), bw, - xd->mi[0]->mbmi.interintra_uv_mode, bsize, plane); - combine_interintra_highbd(xd->mi[0]->mbmi.interintra_uv_mode, - xd->mi[0]->mbmi.use_wedge_interintra, - xd->mi[0]->mbmi.interintra_uv_wedge_index, - bsize, - uvbsize, - xd->plane[plane].dst.buf, - xd->plane[plane].dst.stride, - upred, ustride, - CONVERT_TO_BYTEPTR(uintrapredictor), bw, xd->bd); + DECLARE_ALIGNED(16, uint16_t, + uintrapredictor[MAX_SB_SQUARE]); + vp10_build_intra_predictors_for_interintra( + xd, bsize, plane, CONVERT_TO_BYTEPTR(uintrapredictor), MAX_SB_SIZE); + vp10_combine_interintra(xd, bsize, plane, upred, ustride, + CONVERT_TO_BYTEPTR(uintrapredictor), MAX_SB_SIZE); return; } #endif // CONFIG_VP9_HIGHBITDEPTH { uint8_t uintrapredictor[MAX_SB_SQUARE]; - build_intra_predictors_for_interintra( - xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, - uintrapredictor, bw, - xd->mi[0]->mbmi.interintra_uv_mode, bsize, plane); - combine_interintra(xd->mi[0]->mbmi.interintra_uv_mode, - xd->mi[0]->mbmi.use_wedge_interintra, - xd->mi[0]->mbmi.interintra_uv_wedge_index, - bsize, - uvbsize, - xd->plane[plane].dst.buf, - xd->plane[plane].dst.stride, - upred, ustride, uintrapredictor, bw); + vp10_build_intra_predictors_for_interintra( + xd, bsize, plane, uintrapredictor, MAX_SB_SIZE); + vp10_combine_interintra(xd, bsize, plane, upred, ustride, + uintrapredictor, MAX_SB_SIZE); } }
diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h index eda1658..9067c4b 100644 --- a/vp10/common/reconinter.h +++ b/vp10/common/reconinter.h
@@ -436,6 +436,25 @@ int ustride, int vstride, BLOCK_SIZE bsize); +void vp10_build_intra_predictors_for_interintra( + MACROBLOCKD *xd, + BLOCK_SIZE bsize, int plane, + uint8_t *intra_pred, int intra_stride); +void vp10_combine_interintra( + MACROBLOCKD *xd, + BLOCK_SIZE bsize, int plane, + uint8_t *inter_pred, int inter_stride, + uint8_t *intra_pred, int intra_stride); +void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, + uint8_t *upred, + uint8_t *vpred, + int ustride, int vstride, + BLOCK_SIZE bsize); +void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, + uint8_t *ypred, + int ystride, + BLOCK_SIZE bsize); + // Encoder only void vp10_build_inter_predictors_for_planes_single_buf( MACROBLOCKD *xd, BLOCK_SIZE bsize,
diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c index 300005f..bafd0d6 100644 --- a/vp10/common/reconintra.c +++ b/vp10/common/reconintra.c
@@ -272,21 +272,19 @@ if (x + step < w) return 1; - mi_row = (mi_row & MI_MASK) >> hl; - mi_col = (mi_col & MI_MASK) >> wl; + mi_row = (mi_row & MAX_MIB_MASK) >> hl; + mi_col = (mi_col & MAX_MIB_MASK) >> wl; // If top row of coding unit if (mi_row == 0) return 1; // If rightmost column of coding unit - if (((mi_col + 1) << wl) >= MI_BLOCK_SIZE) + if (((mi_col + 1) << wl) >= MAX_MIB_SIZE) return 0; - my_order = - order[((mi_row + 0) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 0]; - tr_order = - order[((mi_row - 1) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 1]; + my_order = order[((mi_row + 0) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 0]; + tr_order = order[((mi_row - 1) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 1]; return my_order > tr_order; } else { @@ -315,17 +313,17 @@ if (y + step < h) return 1; - mi_row = (mi_row & MI_MASK) >> hl; - mi_col = (mi_col & MI_MASK) >> wl; + mi_row = (mi_row & MAX_MIB_MASK) >> hl; + mi_col = (mi_col & MAX_MIB_MASK) >> wl; if (mi_col == 0) - return (mi_row << (hl + !ss_y)) + y + step < (MI_BLOCK_SIZE << !ss_y); + return (mi_row << (hl + !ss_y)) + y + step < (MAX_MIB_SIZE << !ss_y); - if (((mi_row + 1) << hl) >= MI_BLOCK_SIZE) + if (((mi_row + 1) << hl) >= MAX_MIB_SIZE) return 0; - my_order = order[((mi_row + 0) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 0]; - bl_order = order[((mi_row + 1) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col - 1]; + my_order = order[((mi_row + 0) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 0]; + bl_order = order[((mi_row + 1) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col - 1]; return bl_order < my_order; }
diff --git a/vp10/common/scan.c b/vp10/common/scan.c index 2644ecf..9fef038 100644 --- a/vp10/common/scan.c +++ b/vp10/common/scan.c
@@ -793,8 +793,9 @@ // -1 indicates the neighbor does not exist. DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 1, 1, 8, 8, 5, 8, 2, 2, 2, 5, 9, 12, 6, 9, - 3, 6, 10, 13, 7, 10, 11, 14, 0, 0, + 0, 0, 0, 0, 4, 0, 1, 4, 4, 5, 5, 1, + 8, 8, 5, 8, 2, 2, 2, 5, 9, 12, 6, 9, + 3, 6, 10, 13, 7, 10, 11, 14, 0, 0, }; #if CONFIG_EXT_TX @@ -813,25 +814,31 @@ DECLARE_ALIGNED(16, static const int16_t, col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 4, 4, 0, 0, 8, 8, 1, 1, 5, 5, 1, 1, 9, 9, 2, 2, 6, 6, 2, 2, 3, - 3, 10, 10, 7, 7, 11, 11, 0, 0, + 0, 0, 0, 0, 4, 4, 4, 0, 8, 8, 1, 4, + 5, 8, 5, 1, 9, 12, 2, 5, 6, 9, 6, 2, + 3, 6, 10, 13, 7, 10, 11, 14, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 1, 4, 4, 2, 2, 5, 5, 4, 4, 8, 8, 6, 6, 8, 8, 9, 9, 12, - 12, 10, 10, 13, 13, 14, 14, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 4, 2, 2, + 2, 5, 4, 5, 5, 8, 3, 6, 8, 9, 6, 9, + 9, 12, 7, 10, 10, 13, 11, 14, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 8, 8, 0, 0, 16, 16, 1, 1, 24, 24, 9, 9, 1, 1, 32, 32, 17, 17, 2, - 2, 25, 25, 10, 10, 40, 40, 2, 2, 18, 18, 33, 33, 3, 3, 48, 48, 11, 11, 26, - 26, 3, 3, 41, 41, 19, 19, 34, 34, 4, 4, 27, 27, 12, 12, 49, 49, 42, 42, 20, - 20, 4, 4, 35, 35, 5, 5, 28, 28, 50, 50, 43, 43, 13, 13, 36, 36, 5, 5, 21, 21, - 51, 51, 29, 29, 6, 6, 44, 44, 14, 14, 6, 6, 37, 37, 52, 52, 22, 22, 7, 7, 30, - 30, 45, 45, 15, 15, 38, 38, 23, 23, 53, 53, 31, 31, 46, 46, 39, 39, 54, 54, - 47, 47, 55, 55, 0, 0, + 0, 0, 0, 0, 8, 8, 8, 0, 16, 16, 1, 8, + 24, 24, 9, 16, 9, 1, 32, 32, 17, 24, 2, 9, + 25, 32, 10, 17, 40, 40, 10, 2, 18, 25, 33, 40, + 3, 10, 48, 48, 11, 18, 26, 33, 11, 3, 41, 48, + 19, 26, 34, 41, 4, 11, 27, 34, 12, 19, 49, 56, + 42, 49, 20, 27, 12, 4, 35, 42, 5, 12, 28, 35, + 50, 57, 43, 50, 13, 20, 36, 43, 13, 5, 21, 28, + 51, 58, 29, 36, 6, 13, 44, 51, 14, 21, 14, 6, + 37, 44, 52, 59, 22, 29, 7, 14, 30, 37, 45, 52, + 15, 22, 38, 45, 23, 30, 53, 60, 31, 38, 46, 53, + 39, 46, 54, 61, 47, 54, 55, 62, 0, 0, }; #if CONFIG_EXT_TX @@ -860,24 +867,32 @@ DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 0, 0, 8, 8, 2, 2, 8, 8, 9, 9, 3, 3, 16, 16, 10, 10, 16, 16, - 4, 4, 17, 17, 24, 24, 11, 11, 18, 18, 25, 25, 24, 24, 5, 5, 12, 12, 19, 19, - 32, 32, 26, 26, 6, 6, 33, 33, 32, 32, 20, 20, 27, 27, 40, 40, 13, 13, 34, 34, - 40, 40, 41, 41, 28, 28, 35, 35, 48, 48, 21, 21, 42, 42, 14, 14, 48, 48, 36, - 36, 49, 49, 43, 43, 29, 29, 56, 56, 22, 22, 50, 50, 57, 57, 44, 44, 37, 37, - 51, 51, 30, 30, 58, 58, 52, 52, 45, 45, 59, 59, 38, 38, 60, 60, 46, 46, 53, - 53, 54, 54, 61, 61, 62, 62, 0, 0, + 0, 0, 0, 0, 1, 1, 0, 1, 1, 8, 2, 2, + 8, 9, 2, 9, 3, 3, 9, 16, 3, 10, 16, 17, + 4, 4, 10, 17, 17, 24, 4, 11, 11, 18, 18, 25, + 24, 25, 5, 5, 5, 12, 12, 19, 25, 32, 19, 26, + 6, 6, 26, 33, 32, 33, 13, 20, 20, 27, 33, 40, + 6, 13, 27, 34, 40, 41, 34, 41, 21, 28, 28, 35, + 41, 48, 14, 21, 35, 42, 7, 14, 48, 49, 29, 36, + 42, 49, 36, 43, 22, 29, 49, 56, 15, 22, 43, 50, + 50, 57, 37, 44, 30, 37, 44, 51, 23, 30, 51, 58, + 45, 52, 38, 45, 52, 59, 31, 38, 53, 60, 39, 46, + 46, 53, 47, 54, 54, 61, 55, 62, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 8, 8, 1, 8, 1, 1, 9, 16, 16, 16, 2, 9, 2, 2, 10, 17, 17, - 24, 24, 24, 3, 10, 3, 3, 18, 25, 25, 32, 11, 18, 32, 32, 4, 11, 26, 33, 19, - 26, 4, 4, 33, 40, 12, 19, 40, 40, 5, 12, 27, 34, 34, 41, 20, 27, 13, 20, 5, - 5, 41, 48, 48, 48, 28, 35, 35, 42, 21, 28, 6, 6, 6, 13, 42, 49, 49, 56, 36, - 43, 14, 21, 29, 36, 7, 14, 43, 50, 50, 57, 22, 29, 37, 44, 15, 22, 44, 51, - 51, 58, 30, 37, 23, 30, 52, 59, 45, 52, 38, 45, 31, 38, 53, 60, 46, 53, 39, - 46, 54, 61, 47, 54, 55, 62, 0, 0, + 0, 0, 0, 0, 8, 0, 8, 8, 1, 8, 9, 1, + 9, 16, 16, 17, 2, 9, 10, 2, 10, 17, 17, 24, + 24, 25, 3, 10, 11, 3, 18, 25, 25, 32, 11, 18, + 32, 33, 4, 11, 26, 33, 19, 26, 12, 4, 33, 40, + 12, 19, 40, 41, 5, 12, 27, 34, 34, 41, 20, 27, + 13, 20, 13, 5, 41, 48, 48, 49, 28, 35, 35, 42, + 21, 28, 6, 6, 6, 13, 42, 49, 49, 56, 36, 43, + 14, 21, 29, 36, 7, 14, 43, 50, 50, 57, 22, 29, + 37, 44, 15, 22, 44, 51, 51, 58, 30, 37, 23, 30, + 52, 59, 45, 52, 38, 45, 31, 38, 53, 60, 46, 53, + 39, 46, 54, 61, 47, 54, 55, 62, 0, 0, }; #if CONFIG_EXT_TX @@ -992,113 +1007,143 @@ DECLARE_ALIGNED(16, static const int16_t, col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 16, 16, 32, 32, 0, 0, 48, 48, 1, 1, 64, 64, - 17, 17, 80, 80, 33, 33, 1, 1, 49, 49, 96, 96, 2, 2, 65, 65, - 18, 18, 112, 112, 34, 34, 81, 81, 2, 2, 50, 50, 128, 128, 3, 3, - 97, 97, 19, 19, 66, 66, 144, 144, 82, 82, 35, 35, 113, 113, 3, 3, - 51, 51, 160, 160, 4, 4, 98, 98, 129, 129, 67, 67, 20, 20, 83, 83, - 114, 114, 36, 36, 176, 176, 4, 4, 145, 145, 52, 52, 99, 99, 5, 5, - 130, 130, 68, 68, 192, 192, 161, 161, 21, 21, 115, 115, 84, 84, 37, 37, - 146, 146, 208, 208, 53, 53, 5, 5, 100, 100, 177, 177, 131, 131, 69, 69, - 6, 6, 224, 224, 116, 116, 22, 22, 162, 162, 85, 85, 147, 147, 38, 38, - 193, 193, 101, 101, 54, 54, 6, 6, 132, 132, 178, 178, 70, 70, 163, 163, - 209, 209, 7, 7, 117, 117, 23, 23, 148, 148, 7, 7, 86, 86, 194, 194, - 225, 225, 39, 39, 179, 179, 102, 102, 133, 133, 55, 55, 164, 164, 8, 8, - 71, 71, 210, 210, 118, 118, 149, 149, 195, 195, 24, 24, 87, 87, 40, 40, - 56, 56, 134, 134, 180, 180, 226, 226, 103, 103, 8, 8, 165, 165, 211, 211, - 72, 72, 150, 150, 9, 9, 119, 119, 25, 25, 88, 88, 196, 196, 41, 41, - 135, 135, 181, 181, 104, 104, 57, 57, 227, 227, 166, 166, 120, 120, 151, 151, - 197, 197, 73, 73, 9, 9, 212, 212, 89, 89, 136, 136, 182, 182, 10, 10, - 26, 26, 105, 105, 167, 167, 228, 228, 152, 152, 42, 42, 121, 121, 213, 213, - 58, 58, 198, 198, 74, 74, 137, 137, 183, 183, 168, 168, 10, 10, 90, 90, - 229, 229, 11, 11, 106, 106, 214, 214, 153, 153, 27, 27, 199, 199, 43, 43, - 184, 184, 122, 122, 169, 169, 230, 230, 59, 59, 11, 11, 75, 75, 138, 138, - 200, 200, 215, 215, 91, 91, 12, 12, 28, 28, 185, 185, 107, 107, 154, 154, - 44, 44, 231, 231, 216, 216, 60, 60, 123, 123, 12, 12, 76, 76, 201, 201, - 170, 170, 232, 232, 139, 139, 92, 92, 13, 13, 108, 108, 29, 29, 186, 186, - 217, 217, 155, 155, 45, 45, 13, 13, 61, 61, 124, 124, 14, 14, 233, 233, - 77, 77, 14, 14, 171, 171, 140, 140, 202, 202, 30, 30, 93, 93, 109, 109, - 46, 46, 156, 156, 62, 62, 187, 187, 15, 15, 125, 125, 218, 218, 78, 78, - 31, 31, 172, 172, 47, 47, 141, 141, 94, 94, 234, 234, 203, 203, 63, 63, - 110, 110, 188, 188, 157, 157, 126, 126, 79, 79, 173, 173, 95, 95, 219, 219, - 142, 142, 204, 204, 235, 235, 111, 111, 158, 158, 127, 127, 189, 189, 220, - 220, 143, 143, 174, 174, 205, 205, 236, 236, 159, 159, 190, 190, 221, 221, - 175, 175, 237, 237, 206, 206, 222, 222, 191, 191, 238, 238, 207, 207, 223, - 223, 239, 239, 0, 0, + 0, 0, 0, 0, 16, 16, 32, 32, 16, 0, 48, 48, + 1, 16, 64, 64, 17, 32, 80, 80, 33, 48, 17, 1, + 49, 64, 96, 96, 2, 17, 65, 80, 18, 33, 112, 112, + 34, 49, 81, 96, 18, 2, 50, 65, 128, 128, 3, 18, + 97, 112, 19, 34, 66, 81, 144, 144, 82, 97, 35, 50, + 113, 128, 19, 3, 51, 66, 160, 160, 4, 19, 98, 113, + 129, 144, 67, 82, 20, 35, 83, 98, 114, 129, 36, 51, + 176, 176, 20, 4, 145, 160, 52, 67, 99, 114, 5, 20, + 130, 145, 68, 83, 192, 192, 161, 176, 21, 36, 115, 130, + 84, 99, 37, 52, 146, 161, 208, 208, 53, 68, 21, 5, + 100, 115, 177, 192, 131, 146, 69, 84, 6, 21, 224, 224, + 116, 131, 22, 37, 162, 177, 85, 100, 147, 162, 38, 53, + 193, 208, 101, 116, 54, 69, 22, 6, 132, 147, 178, 193, + 70, 85, 163, 178, 209, 224, 7, 22, 117, 132, 23, 38, + 148, 163, 23, 7, 86, 101, 194, 209, 225, 240, 39, 54, + 179, 194, 102, 117, 133, 148, 55, 70, 164, 179, 8, 23, + 71, 86, 210, 225, 118, 133, 149, 164, 195, 210, 24, 39, + 87, 102, 40, 55, 56, 71, 134, 149, 180, 195, 226, 241, + 103, 118, 24, 8, 165, 180, 211, 226, 72, 87, 150, 165, + 9, 24, 119, 134, 25, 40, 88, 103, 196, 211, 41, 56, + 135, 150, 181, 196, 104, 119, 57, 72, 227, 242, 166, 181, + 120, 135, 151, 166, 197, 212, 73, 88, 25, 9, 212, 227, + 89, 104, 136, 151, 182, 197, 10, 25, 26, 41, 105, 120, + 167, 182, 228, 243, 152, 167, 42, 57, 121, 136, 213, 228, + 58, 73, 198, 213, 74, 89, 137, 152, 183, 198, 168, 183, + 26, 10, 90, 105, 229, 244, 11, 26, 106, 121, 214, 229, + 153, 168, 27, 42, 199, 214, 43, 58, 184, 199, 122, 137, + 169, 184, 230, 245, 59, 74, 27, 11, 75, 90, 138, 153, + 200, 215, 215, 230, 91, 106, 12, 27, 28, 43, 185, 200, + 107, 122, 154, 169, 44, 59, 231, 246, 216, 231, 60, 75, + 123, 138, 28, 12, 76, 91, 201, 216, 170, 185, 232, 247, + 139, 154, 92, 107, 13, 28, 108, 123, 29, 44, 186, 201, + 217, 232, 155, 170, 45, 60, 29, 13, 61, 76, 124, 139, + 14, 14, 233, 248, 77, 92, 14, 29, 171, 186, 140, 155, + 202, 217, 30, 45, 93, 108, 109, 124, 46, 61, 156, 171, + 62, 77, 187, 202, 15, 30, 125, 140, 218, 233, 78, 93, + 31, 46, 172, 187, 47, 62, 141, 156, 94, 109, 234, 249, + 203, 218, 63, 78, 110, 125, 188, 203, 157, 172, 126, 141, + 79, 94, 173, 188, 95, 110, 219, 234, 142, 157, 204, 219, + 235, 250, 111, 126, 158, 173, 127, 142, 189, 204, 220, 235, + 143, 158, 174, 189, 205, 220, 236, 251, 159, 174, 190, 205, + 221, 236, 175, 190, 237, 252, 206, 221, 222, 237, 191, 206, + 238, 253, 207, 222, 223, 238, 239, 254, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 16, 16, 3, 3, 17, 17, - 16, 16, 4, 4, 32, 32, 18, 18, 5, 5, 33, 33, 32, 32, 19, 19, - 48, 48, 6, 6, 34, 34, 20, 20, 49, 49, 48, 48, 7, 7, 35, 35, - 64, 64, 21, 21, 50, 50, 36, 36, 64, 64, 8, 8, 65, 65, 51, 51, - 22, 22, 37, 37, 80, 80, 66, 66, 9, 9, 52, 52, 23, 23, 81, 81, - 67, 67, 80, 80, 38, 38, 10, 10, 53, 53, 82, 82, 96, 96, 68, 68, - 24, 24, 97, 97, 83, 83, 39, 39, 96, 96, 54, 54, 11, 11, 69, 69, - 98, 98, 112, 112, 84, 84, 25, 25, 40, 40, 55, 55, 113, 113, 99, 99, - 12, 12, 70, 70, 112, 112, 85, 85, 26, 26, 114, 114, 100, 100, 128, 128, - 41, 41, 56, 56, 71, 71, 115, 115, 13, 13, 86, 86, 129, 129, 101, 101, - 128, 128, 72, 72, 130, 130, 116, 116, 27, 27, 57, 57, 14, 14, 87, 87, - 42, 42, 144, 144, 102, 102, 131, 131, 145, 145, 117, 117, 73, 73, 144, 144, - 88, 88, 132, 132, 103, 103, 28, 28, 58, 58, 146, 146, 118, 118, 43, 43, - 160, 160, 147, 147, 89, 89, 104, 104, 133, 133, 161, 161, 119, 119, 160, 160, - 74, 74, 134, 134, 148, 148, 29, 29, 59, 59, 162, 162, 176, 176, 44, 44, - 120, 120, 90, 90, 105, 105, 163, 163, 177, 177, 149, 149, 176, 176, 135, 135, - 164, 164, 178, 178, 30, 30, 150, 150, 192, 192, 75, 75, 121, 121, 60, 60, - 136, 136, 193, 193, 106, 106, 151, 151, 179, 179, 192, 192, 45, 45, 165, 165, - 166, 166, 194, 194, 91, 91, 180, 180, 137, 137, 208, 208, 122, 122, 152, 152, - 208, 208, 195, 195, 76, 76, 167, 167, 209, 209, 181, 181, 224, 224, 107, 107, - 196, 196, 61, 61, 153, 153, 224, 224, 182, 182, 168, 168, 210, 210, 46, 46, - 138, 138, 92, 92, 183, 183, 225, 225, 211, 211, 240, 240, 197, 197, 169, 169, - 123, 123, 154, 154, 198, 198, 77, 77, 212, 212, 184, 184, 108, 108, 226, 226, - 199, 199, 62, 62, 227, 227, 241, 241, 139, 139, 213, 213, 170, 170, 185, 185, - 155, 155, 228, 228, 242, 242, 124, 124, 93, 93, 200, 200, 243, 243, 214, 214, - 215, 215, 229, 229, 140, 140, 186, 186, 201, 201, 78, 78, 171, 171, 109, 109, - 156, 156, 244, 244, 216, 216, 230, 230, 94, 94, 245, 245, 231, 231, 125, 125, - 202, 202, 246, 246, 232, 232, 172, 172, 217, 217, 141, 141, 110, 110, 157, - 157, 187, 187, 247, 247, 126, 126, 233, 233, 218, 218, 248, 248, 188, 188, - 203, 203, 142, 142, 173, 173, 158, 158, 249, 249, 234, 234, 204, 204, 219, - 219, 174, 174, 189, 189, 250, 250, 220, 220, 190, 190, 205, 205, 235, 235, - 206, 206, 236, 236, 251, 251, 221, 221, 252, 252, 222, 222, 237, 237, 238, - 238, 253, 253, 254, 254, 0, 0, + 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 16, + 3, 3, 2, 17, 16, 17, 4, 4, 17, 32, 3, 18, + 5, 5, 18, 33, 32, 33, 4, 19, 33, 48, 6, 6, + 19, 34, 5, 20, 34, 49, 48, 49, 7, 7, 20, 35, + 49, 64, 6, 21, 35, 50, 21, 36, 64, 65, 8, 8, + 50, 65, 36, 51, 7, 22, 22, 37, 65, 80, 51, 66, + 9, 9, 37, 52, 8, 23, 66, 81, 52, 67, 80, 81, + 23, 38, 10, 10, 38, 53, 67, 82, 81, 96, 53, 68, + 9, 24, 82, 97, 68, 83, 24, 39, 96, 97, 39, 54, + 11, 11, 54, 69, 83, 98, 97, 112, 69, 84, 10, 25, + 25, 40, 40, 55, 98, 113, 84, 99, 12, 12, 55, 70, + 112, 113, 70, 85, 11, 26, 99, 114, 85, 100, 113, 128, + 26, 41, 41, 56, 56, 71, 100, 115, 13, 13, 71, 86, + 114, 129, 86, 101, 128, 129, 57, 72, 115, 130, 101, 116, + 12, 27, 42, 57, 14, 14, 72, 87, 27, 42, 129, 144, + 87, 102, 116, 131, 130, 145, 102, 117, 58, 73, 144, 145, + 73, 88, 117, 132, 88, 103, 13, 28, 43, 58, 131, 146, + 103, 118, 28, 43, 145, 160, 132, 147, 74, 89, 89, 104, + 118, 133, 146, 161, 104, 119, 160, 161, 59, 74, 119, 134, + 133, 148, 14, 29, 44, 59, 147, 162, 161, 176, 29, 44, + 105, 120, 75, 90, 90, 105, 148, 163, 162, 177, 134, 149, + 176, 177, 120, 135, 149, 164, 163, 178, 15, 30, 135, 150, + 177, 192, 60, 75, 106, 121, 45, 60, 121, 136, 178, 193, + 91, 106, 136, 151, 164, 179, 192, 193, 30, 45, 150, 165, + 151, 166, 179, 194, 76, 91, 165, 180, 122, 137, 193, 208, + 107, 122, 137, 152, 208, 209, 180, 195, 61, 76, 152, 167, + 194, 209, 166, 181, 224, 224, 92, 107, 181, 196, 46, 61, + 138, 153, 209, 224, 167, 182, 153, 168, 195, 210, 31, 46, + 123, 138, 77, 92, 168, 183, 210, 225, 196, 211, 225, 240, + 182, 197, 154, 169, 108, 123, 139, 154, 183, 198, 62, 77, + 197, 212, 169, 184, 93, 108, 211, 226, 184, 199, 47, 62, + 212, 227, 226, 241, 124, 139, 198, 213, 155, 170, 170, 185, + 140, 155, 213, 228, 227, 242, 109, 124, 78, 93, 185, 200, + 228, 243, 199, 214, 200, 215, 214, 229, 125, 140, 171, 186, + 186, 201, 63, 78, 156, 171, 94, 109, 141, 156, 229, 244, + 201, 216, 215, 230, 79, 94, 230, 245, 216, 231, 110, 125, + 187, 202, 231, 246, 217, 232, 157, 172, 202, 217, 126, 141, + 95, 110, 142, 157, 172, 187, 232, 247, 111, 126, 218, 233, + 203, 218, 233, 248, 173, 188, 188, 203, 127, 142, 158, 173, + 143, 158, 234, 249, 219, 234, 189, 204, 204, 219, 159, 174, + 174, 189, 235, 250, 205, 220, 175, 190, 190, 205, 220, 235, + 191, 206, 221, 236, 236, 251, 206, 221, 237, 252, 207, 222, + 222, 237, 223, 238, 238, 253, 239, 254, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 16, 16, 1, 16, 1, 1, 32, 32, 17, 32, - 2, 17, 2, 2, 48, 48, 18, 33, 33, 48, 3, 18, 49, 64, 64, 64, - 34, 49, 3, 3, 19, 34, 50, 65, 4, 19, 65, 80, 80, 80, 35, 50, - 4, 4, 20, 35, 66, 81, 81, 96, 51, 66, 96, 96, 5, 20, 36, 51, - 82, 97, 21, 36, 67, 82, 97, 112, 5, 5, 52, 67, 112, 112, 37, 52, - 6, 21, 83, 98, 98, 113, 68, 83, 6, 6, 113, 128, 22, 37, 53, 68, - 84, 99, 99, 114, 128, 128, 114, 129, 69, 84, 38, 53, 7, 22, 7, 7, - 129, 144, 23, 38, 54, 69, 100, 115, 85, 100, 115, 130, 144, 144, 130, 145, - 39, 54, 70, 85, 8, 23, 55, 70, 116, 131, 101, 116, 145, 160, 24, 39, - 8, 8, 86, 101, 131, 146, 160, 160, 146, 161, 71, 86, 40, 55, 9, 24, - 117, 132, 102, 117, 161, 176, 132, 147, 56, 71, 87, 102, 25, 40, 147, 162, - 9, 9, 176, 176, 162, 177, 72, 87, 41, 56, 118, 133, 133, 148, 103, 118, - 10, 25, 148, 163, 57, 72, 88, 103, 177, 192, 26, 41, 163, 178, 192, 192, - 10, 10, 119, 134, 73, 88, 149, 164, 104, 119, 134, 149, 42, 57, 178, 193, - 164, 179, 11, 26, 58, 73, 193, 208, 89, 104, 135, 150, 120, 135, 27, 42, - 74, 89, 208, 208, 150, 165, 179, 194, 165, 180, 105, 120, 194, 209, 43, 58, - 11, 11, 136, 151, 90, 105, 151, 166, 180, 195, 59, 74, 121, 136, 209, 224, - 195, 210, 224, 224, 166, 181, 106, 121, 75, 90, 12, 27, 181, 196, 12, 12, - 210, 225, 152, 167, 167, 182, 137, 152, 28, 43, 196, 211, 122, 137, 91, 106, - 225, 240, 44, 59, 13, 28, 107, 122, 182, 197, 168, 183, 211, 226, 153, 168, - 226, 241, 60, 75, 197, 212, 138, 153, 29, 44, 76, 91, 13, 13, 183, 198, - 123, 138, 45, 60, 212, 227, 198, 213, 154, 169, 169, 184, 227, 242, 92, 107, - 61, 76, 139, 154, 14, 29, 14, 14, 184, 199, 213, 228, 108, 123, 199, 214, - 228, 243, 77, 92, 30, 45, 170, 185, 155, 170, 185, 200, 93, 108, 124, 139, - 214, 229, 46, 61, 200, 215, 229, 244, 15, 30, 109, 124, 62, 77, 140, 155, - 215, 230, 31, 46, 171, 186, 186, 201, 201, 216, 78, 93, 230, 245, 125, 140, - 47, 62, 216, 231, 156, 171, 94, 109, 231, 246, 141, 156, 63, 78, 202, 217, - 187, 202, 110, 125, 217, 232, 172, 187, 232, 247, 79, 94, 157, 172, 126, 141, - 203, 218, 95, 110, 233, 248, 218, 233, 142, 157, 111, 126, 173, 188, 188, 203, - 234, 249, 219, 234, 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, - 250, 174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, 190, 205, - 236, 251, 206, 221, 237, 252, 191, 206, 222, 237, 207, 222, 238, 253, 223, - 238, 239, 254, 0, 0, + 0, 0, 0, 0, 16, 0, 16, 16, 1, 16, 17, 1, + 32, 32, 17, 32, 2, 17, 18, 2, 48, 48, 18, 33, + 33, 48, 3, 18, 49, 64, 64, 65, 34, 49, 19, 3, + 19, 34, 50, 65, 4, 19, 65, 80, 80, 81, 35, 50, + 20, 4, 20, 35, 66, 81, 81, 96, 51, 66, 96, 97, + 5, 20, 36, 51, 82, 97, 21, 36, 67, 82, 97, 112, + 21, 5, 52, 67, 112, 113, 37, 52, 6, 21, 83, 98, + 98, 113, 68, 83, 22, 6, 113, 128, 22, 37, 53, 68, + 84, 99, 99, 114, 128, 129, 114, 129, 69, 84, 38, 53, + 7, 22, 23, 7, 129, 144, 23, 38, 54, 69, 100, 115, + 85, 100, 115, 130, 144, 145, 130, 145, 39, 54, 70, 85, + 8, 23, 55, 70, 116, 131, 101, 116, 145, 160, 24, 39, + 24, 8, 86, 101, 131, 146, 160, 161, 146, 161, 71, 86, + 40, 55, 9, 24, 117, 132, 102, 117, 161, 176, 132, 147, + 56, 71, 87, 102, 25, 40, 147, 162, 25, 9, 176, 177, + 162, 177, 72, 87, 41, 56, 118, 133, 133, 148, 103, 118, + 10, 25, 148, 163, 57, 72, 88, 103, 177, 192, 26, 41, + 163, 178, 192, 193, 26, 10, 119, 134, 73, 88, 149, 164, + 104, 119, 134, 149, 42, 57, 178, 193, 164, 179, 11, 26, + 58, 73, 193, 208, 89, 104, 135, 150, 120, 135, 27, 42, + 74, 89, 208, 209, 150, 165, 179, 194, 165, 180, 105, 120, + 194, 209, 43, 58, 27, 11, 136, 151, 90, 105, 151, 166, + 180, 195, 59, 74, 121, 136, 209, 224, 195, 210, 224, 225, + 166, 181, 106, 121, 75, 90, 12, 27, 181, 196, 28, 12, + 210, 225, 152, 167, 167, 182, 137, 152, 28, 43, 196, 211, + 122, 137, 91, 106, 225, 240, 44, 59, 13, 28, 107, 122, + 182, 197, 168, 183, 211, 226, 153, 168, 226, 241, 60, 75, + 197, 212, 138, 153, 29, 44, 76, 91, 29, 13, 183, 198, + 123, 138, 45, 60, 212, 227, 198, 213, 154, 169, 169, 184, + 227, 242, 92, 107, 61, 76, 139, 154, 14, 29, 30, 14, + 184, 199, 213, 228, 108, 123, 199, 214, 228, 243, 77, 92, + 30, 45, 170, 185, 155, 170, 185, 200, 93, 108, 124, 139, + 214, 229, 46, 61, 200, 215, 229, 244, 15, 30, 109, 124, + 62, 77, 140, 155, 215, 230, 31, 46, 171, 186, 186, 201, + 201, 216, 78, 93, 230, 245, 125, 140, 47, 62, 216, 231, + 156, 171, 94, 109, 231, 246, 141, 156, 63, 78, 202, 217, + 187, 202, 110, 125, 217, 232, 172, 187, 232, 247, 79, 94, + 157, 172, 126, 141, 203, 218, 95, 110, 233, 248, 218, 233, + 142, 157, 111, 126, 173, 188, 188, 203, 234, 249, 219, 234, + 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, 250, + 174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, + 190, 205, 236, 251, 206, 221, 237, 252, 191, 206, 222, 237, + 207, 222, 238, 253, 223, 238, 239, 254, 0, 0, }; #if CONFIG_EXT_TX @@ -1493,139 +1538,177 @@ DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 32, 32, 1, 32, 1, 1, 64, 64, 33, 64, - 2, 33, 96, 96, 2, 2, 65, 96, 34, 65, 128, 128, 97, 128, 3, 34, - 66, 97, 3, 3, 35, 66, 98, 129, 129, 160, 160, 160, 4, 35, 67, 98, - 192, 192, 4, 4, 130, 161, 161, 192, 36, 67, 99, 130, 5, 36, 68, 99, - 193, 224, 162, 193, 224, 224, 131, 162, 37, 68, 100, 131, 5, 5, 194, 225, - 225, 256, 256, 256, 163, 194, 69, 100, 132, 163, 6, 37, 226, 257, 6, 6, - 195, 226, 257, 288, 101, 132, 288, 288, 38, 69, 164, 195, 133, 164, 258, 289, - 227, 258, 196, 227, 7, 38, 289, 320, 70, 101, 320, 320, 7, 7, 165, 196, - 39, 70, 102, 133, 290, 321, 259, 290, 228, 259, 321, 352, 352, 352, 197, 228, - 134, 165, 71, 102, 8, 39, 322, 353, 291, 322, 260, 291, 103, 134, 353, 384, - 166, 197, 229, 260, 40, 71, 8, 8, 384, 384, 135, 166, 354, 385, 323, 354, - 198, 229, 292, 323, 72, 103, 261, 292, 9, 40, 385, 416, 167, 198, 104, 135, - 230, 261, 355, 386, 416, 416, 293, 324, 324, 355, 9, 9, 41, 72, 386, 417, - 199, 230, 136, 167, 417, 448, 262, 293, 356, 387, 73, 104, 387, 418, 231, 262, - 10, 41, 168, 199, 325, 356, 418, 449, 105, 136, 448, 448, 42, 73, 294, 325, - 200, 231, 10, 10, 357, 388, 137, 168, 263, 294, 388, 419, 74, 105, 419, 450, - 449, 480, 326, 357, 232, 263, 295, 326, 169, 200, 11, 42, 106, 137, 480, 480, - 450, 481, 358, 389, 264, 295, 201, 232, 138, 169, 389, 420, 43, 74, 420, 451, - 327, 358, 11, 11, 481, 512, 233, 264, 451, 482, 296, 327, 75, 106, 170, 201, - 482, 513, 512, 512, 390, 421, 359, 390, 421, 452, 107, 138, 12, 43, 202, 233, - 452, 483, 265, 296, 328, 359, 139, 170, 44, 75, 483, 514, 513, 544, 234, 265, - 297, 328, 422, 453, 12, 12, 391, 422, 171, 202, 76, 107, 514, 545, 453, 484, - 544, 544, 266, 297, 203, 234, 108, 139, 329, 360, 298, 329, 140, 171, 515, - 546, 13, 44, 423, 454, 235, 266, 545, 576, 454, 485, 45, 76, 172, 203, 330, - 361, 576, 576, 13, 13, 267, 298, 546, 577, 77, 108, 204, 235, 455, 486, 577, - 608, 299, 330, 109, 140, 547, 578, 14, 45, 14, 14, 141, 172, 578, 609, 331, - 362, 46, 77, 173, 204, 15, 15, 78, 109, 205, 236, 579, 610, 110, 141, 15, 46, - 142, 173, 47, 78, 174, 205, 16, 16, 79, 110, 206, 237, 16, 47, 111, 142, - 48, 79, 143, 174, 80, 111, 175, 206, 17, 48, 17, 17, 207, 238, 49, 80, - 81, 112, 18, 18, 18, 49, 50, 81, 82, 113, 19, 50, 51, 82, 83, 114, 608, 608, - 484, 515, 360, 391, 236, 267, 112, 143, 19, 19, 640, 640, 609, 640, 516, 547, - 485, 516, 392, 423, 361, 392, 268, 299, 237, 268, 144, 175, 113, 144, 20, 51, - 20, 20, 672, 672, 641, 672, 610, 641, 548, 579, 517, 548, 486, 517, 424, 455, - 393, 424, 362, 393, 300, 331, 269, 300, 238, 269, 176, 207, 145, 176, 114, - 145, 52, 83, 21, 52, 21, 21, 704, 704, 673, 704, 642, 673, 611, 642, 580, - 611, 549, 580, 518, 549, 487, 518, 456, 487, 425, 456, 394, 425, 363, 394, - 332, 363, 301, 332, 270, 301, 239, 270, 208, 239, 177, 208, 146, 177, 115, - 146, 84, 115, 53, 84, 22, 53, 22, 22, 705, 736, 674, 705, 643, 674, 581, 612, - 550, 581, 519, 550, 457, 488, 426, 457, 395, 426, 333, 364, 302, 333, 271, - 302, 209, 240, 178, 209, 147, 178, 85, 116, 54, 85, 23, 54, 706, 737, 675, - 706, 582, 613, 551, 582, 458, 489, 427, 458, 334, 365, 303, 334, 210, 241, - 179, 210, 86, 117, 55, 86, 707, 738, 583, 614, 459, 490, 335, 366, 211, 242, - 87, 118, 736, 736, 612, 643, 488, 519, 364, 395, 240, 271, 116, 147, 23, 23, - 768, 768, 737, 768, 644, 675, 613, 644, 520, 551, 489, 520, 396, 427, 365, - 396, 272, 303, 241, 272, 148, 179, 117, 148, 24, 55, 24, 24, 800, 800, 769, - 800, 738, 769, 676, 707, 645, 676, 614, 645, 552, 583, 521, 552, 490, 521, - 428, 459, 397, 428, 366, 397, 304, 335, 273, 304, 242, 273, 180, 211, 149, - 180, 118, 149, 56, 87, 25, 56, 25, 25, 832, 832, 801, 832, 770, 801, 739, - 770, 708, 739, 677, 708, 646, 677, 615, 646, 584, 615, 553, 584, 522, 553, - 491, 522, 460, 491, 429, 460, 398, 429, 367, 398, 336, 367, 305, 336, 274, - 305, 243, 274, 212, 243, 181, 212, 150, 181, 119, 150, 88, 119, 57, 88, 26, - 57, 26, 26, 833, 864, 802, 833, 771, 802, 709, 740, 678, 709, 647, 678, 585, - 616, 554, 585, 523, 554, 461, 492, 430, 461, 399, 430, 337, 368, 306, 337, - 275, 306, 213, 244, 182, 213, 151, 182, 89, 120, 58, 89, 27, 58, 834, 865, - 803, 834, 710, 741, 679, 710, 586, 617, 555, 586, 462, 493, 431, 462, 338, - 369, 307, 338, 214, 245, 183, 214, 90, 121, 59, 90, 835, 866, 711, 742, 587, - 618, 463, 494, 339, 370, 215, 246, 91, 122, 864, 864, 740, 771, 616, 647, - 492, 523, 368, 399, 244, 275, 120, 151, 27, 27, 896, 896, 865, 896, 772, 803, - 741, 772, 648, 679, 617, 648, 524, 555, 493, 524, 400, 431, 369, 400, 276, - 307, 245, 276, 152, 183, 121, 152, 28, 59, 28, 28, 928, 928, 897, 928, 866, - 897, 804, 835, 773, 804, 742, 773, 680, 711, 649, 680, 618, 649, 556, 587, - 525, 556, 494, 525, 432, 463, 401, 432, 370, 401, 308, 339, 277, 308, 246, - 277, 184, 215, 153, 184, 122, 153, 60, 91, 29, 60, 29, 29, 960, 960, 929, - 960, 898, 929, 867, 898, 836, 867, 805, 836, 774, 805, 743, 774, 712, 743, - 681, 712, 650, 681, 619, 650, 588, 619, 557, 588, 526, 557, 495, 526, 464, - 495, 433, 464, 402, 433, 371, 402, 340, 371, 309, 340, 278, 309, 247, 278, - 216, 247, 185, 216, 154, 185, 123, 154, 92, 123, 61, 92, 30, 61, 30, 30, - 961, 992, 930, 961, 899, 930, 837, 868, 806, 837, 775, 806, 713, 744, 682, - 713, 651, 682, 589, 620, 558, 589, 527, 558, 465, 496, 434, 465, 403, 434, - 341, 372, 310, 341, 279, 310, 217, 248, 186, 217, 155, 186, 93, 124, 62, 93, - 31, 62, 962, 993, 931, 962, 838, 869, 807, 838, 714, 745, 683, 714, 590, 621, - 559, 590, 466, 497, 435, 466, 342, 373, 311, 342, 218, 249, 187, 218, 94, - 125, 63, 94, 963, 994, 839, 870, 715, 746, 591, 622, 467, 498, 343, 374, 219, - 250, 95, 126, 868, 899, 744, 775, 620, 651, 496, 527, 372, 403, 248, 279, - 124, 155, 900, 931, 869, 900, 776, 807, 745, 776, 652, 683, 621, 652, 528, - 559, 497, 528, 404, 435, 373, 404, 280, 311, 249, 280, 156, 187, 125, 156, - 932, 963, 901, 932, 870, 901, 808, 839, 777, 808, 746, 777, 684, 715, 653, - 684, 622, 653, 560, 591, 529, 560, 498, 529, 436, 467, 405, 436, 374, 405, - 312, 343, 281, 312, 250, 281, 188, 219, 157, 188, 126, 157, 964, 995, 933, - 964, 902, 933, 871, 902, 840, 871, 809, 840, 778, 809, 747, 778, 716, 747, - 685, 716, 654, 685, 623, 654, 592, 623, 561, 592, 530, 561, 499, 530, 468, - 499, 437, 468, 406, 437, 375, 406, 344, 375, 313, 344, 282, 313, 251, 282, - 220, 251, 189, 220, 158, 189, 127, 158, 965, 996, 934, 965, 903, 934, 841, - 872, 810, 841, 779, 810, 717, 748, 686, 717, 655, 686, 593, 624, 562, 593, - 531, 562, 469, 500, 438, 469, 407, 438, 345, 376, 314, 345, 283, 314, 221, - 252, 190, 221, 159, 190, 966, 997, 935, 966, 842, 873, 811, 842, 718, 749, - 687, 718, 594, 625, 563, 594, 470, 501, 439, 470, 346, 377, 315, 346, 222, - 253, 191, 222, 967, 998, 843, 874, 719, 750, 595, 626, 471, 502, 347, 378, - 223, 254, 872, 903, 748, 779, 624, 655, 500, 531, 376, 407, 252, 283, 904, - 935, 873, 904, 780, 811, 749, 780, 656, 687, 625, 656, 532, 563, 501, 532, - 408, 439, 377, 408, 284, 315, 253, 284, 936, 967, 905, 936, 874, 905, 812, - 843, 781, 812, 750, 781, 688, 719, 657, 688, 626, 657, 564, 595, 533, 564, - 502, 533, 440, 471, 409, 440, 378, 409, 316, 347, 285, 316, 254, 285, 968, - 999, 937, 968, 906, 937, 875, 906, 844, 875, 813, 844, 782, 813, 751, 782, - 720, 751, 689, 720, 658, 689, 627, 658, 596, 627, 565, 596, 534, 565, 503, - 534, 472, 503, 441, 472, 410, 441, 379, 410, 348, 379, 317, 348, 286, 317, - 255, 286, 969, 1000, 938, 969, 907, 938, 845, 876, 814, 845, 783, 814, 721, - 752, 690, 721, 659, 690, 597, 628, 566, 597, 535, 566, 473, 504, 442, 473, - 411, 442, 349, 380, 318, 349, 287, 318, 970, 1001, 939, 970, 846, 877, 815, - 846, 722, 753, 691, 722, 598, 629, 567, 598, 474, 505, 443, 474, 350, 381, - 319, 350, 971, 1002, 847, 878, 723, 754, 599, 630, 475, 506, 351, 382, 876, - 907, 752, 783, 628, 659, 504, 535, 380, 411, 908, 939, 877, 908, 784, 815, - 753, 784, 660, 691, 629, 660, 536, 567, 505, 536, 412, 443, 381, 412, 940, - 971, 909, 940, 878, 909, 816, 847, 785, 816, 754, 785, 692, 723, 661, 692, - 630, 661, 568, 599, 537, 568, 506, 537, 444, 475, 413, 444, 382, 413, 972, - 1003, 941, 972, 910, 941, 879, 910, 848, 879, 817, 848, 786, 817, 755, 786, - 724, 755, 693, 724, 662, 693, 631, 662, 600, 631, 569, 600, 538, 569, 507, - 538, 476, 507, 445, 476, 414, 445, 383, 414, 973, 1004, 942, 973, 911, 942, - 849, 880, 818, 849, 787, 818, 725, 756, 694, 725, 663, 694, 601, 632, 570, - 601, 539, 570, 477, 508, 446, 477, 415, 446, 974, 1005, 943, 974, 850, 881, - 819, 850, 726, 757, 695, 726, 602, 633, 571, 602, 478, 509, 447, 478, 975, - 1006, 851, 882, 727, 758, 603, 634, 479, 510, 880, 911, 756, 787, 632, 663, - 508, 539, 912, 943, 881, 912, 788, 819, 757, 788, 664, 695, 633, 664, 540, - 571, 509, 540, 944, 975, 913, 944, 882, 913, 820, 851, 789, 820, 758, 789, - 696, 727, 665, 696, 634, 665, 572, 603, 541, 572, 510, 541, 976, 1007, 945, - 976, 914, 945, 883, 914, 852, 883, 821, 852, 790, 821, 759, 790, 728, 759, - 697, 728, 666, 697, 635, 666, 604, 635, 573, 604, 542, 573, 511, 542, 977, - 1008, 946, 977, 915, 946, 853, 884, 822, 853, 791, 822, 729, 760, 698, 729, - 667, 698, 605, 636, 574, 605, 543, 574, 978, 1009, 947, 978, 854, 885, 823, - 854, 730, 761, 699, 730, 606, 637, 575, 606, 979, 1010, 855, 886, 731, 762, - 607, 638, 884, 915, 760, 791, 636, 667, 916, 947, 885, 916, 792, 823, 761, - 792, 668, 699, 637, 668, 948, 979, 917, 948, 886, 917, 824, 855, 793, 824, - 762, 793, 700, 731, 669, 700, 638, 669, 980, 1011, 949, 980, 918, 949, 887, - 918, 856, 887, 825, 856, 794, 825, 763, 794, 732, 763, 701, 732, 670, 701, - 639, 670, 981, 1012, 950, 981, 919, 950, 857, 888, 826, 857, 795, 826, 733, - 764, 702, 733, 671, 702, 982, 1013, 951, 982, 858, 889, 827, 858, 734, 765, - 703, 734, 983, 1014, 859, 890, 735, 766, 888, 919, 764, 795, 920, 951, 889, - 920, 796, 827, 765, 796, 952, 983, 921, 952, 890, 921, 828, 859, 797, 828, - 766, 797, 984, 1015, 953, 984, 922, 953, 891, 922, 860, 891, 829, 860, 798, - 829, 767, 798, 985, 1016, 954, 985, 923, 954, 861, 892, 830, 861, 799, 830, - 986, 1017, 955, 986, 862, 893, 831, 862, 987, 1018, 863, 894, 892, 923, 924, - 955, 893, 924, 956, 987, 925, 956, 894, 925, 988, 1019, 957, 988, 926, 957, - 895, 926, 989, 1020, 958, 989, 927, 958, 990, 1021, 959, 990, 991, 1022, 0, 0, + 0, 0, 0, 0, 32, 0, 32, 32, 1, 32, 33, 1, + 64, 64, 33, 64, 2, 33, 96, 96, 34, 2, 65, 96, + 34, 65, 128, 128, 97, 128, 3, 34, 66, 97, 35, 3, + 35, 66, 98, 129, 129, 160, 160, 161, 4, 35, 67, 98, + 192, 192, 36, 4, 130, 161, 161, 192, 36, 67, 99, 130, + 5, 36, 68, 99, 193, 224, 162, 193, 224, 225, 131, 162, + 37, 68, 100, 131, 37, 5, 194, 225, 225, 256, 256, 257, + 163, 194, 69, 100, 132, 163, 6, 37, 226, 257, 38, 6, + 195, 226, 257, 288, 101, 132, 288, 289, 38, 69, 164, 195, + 133, 164, 258, 289, 227, 258, 196, 227, 7, 38, 289, 320, + 70, 101, 320, 321, 39, 7, 165, 196, 39, 70, 102, 133, + 290, 321, 259, 290, 228, 259, 321, 352, 352, 353, 197, 228, + 134, 165, 71, 102, 8, 39, 322, 353, 291, 322, 260, 291, + 103, 134, 353, 384, 166, 197, 229, 260, 40, 71, 40, 8, + 384, 385, 135, 166, 354, 385, 323, 354, 198, 229, 292, 323, + 72, 103, 261, 292, 9, 40, 385, 416, 167, 198, 104, 135, + 230, 261, 355, 386, 416, 417, 293, 324, 324, 355, 41, 9, + 41, 72, 386, 417, 199, 230, 136, 167, 417, 448, 262, 293, + 356, 387, 73, 104, 387, 418, 231, 262, 10, 41, 168, 199, + 325, 356, 418, 449, 105, 136, 448, 449, 42, 73, 294, 325, + 200, 231, 42, 10, 357, 388, 137, 168, 263, 294, 388, 419, + 74, 105, 419, 450, 449, 480, 326, 357, 232, 263, 295, 326, + 169, 200, 11, 42, 106, 137, 480, 481, 450, 481, 358, 389, + 264, 295, 201, 232, 138, 169, 389, 420, 43, 74, 420, 451, + 327, 358, 43, 11, 481, 512, 233, 264, 451, 482, 296, 327, + 75, 106, 170, 201, 482, 513, 512, 513, 390, 421, 359, 390, + 421, 452, 107, 138, 12, 43, 202, 233, 452, 483, 265, 296, + 328, 359, 139, 170, 44, 75, 483, 514, 513, 544, 234, 265, + 297, 328, 422, 453, 44, 12, 391, 422, 171, 202, 76, 107, + 514, 545, 453, 484, 544, 545, 266, 297, 203, 234, 108, 139, + 329, 360, 298, 329, 140, 171, 515, 546, 13, 44, 423, 454, + 235, 266, 545, 576, 454, 485, 45, 76, 172, 203, 330, 361, + 576, 577, 45, 13, 267, 298, 546, 577, 77, 108, 204, 235, + 455, 486, 577, 608, 299, 330, 109, 140, 547, 578, 14, 45, + 46, 14, 141, 172, 578, 609, 331, 362, 46, 77, 173, 204, + 15, 15, 78, 109, 205, 236, 579, 610, 110, 141, 15, 46, + 142, 173, 47, 78, 174, 205, 16, 16, 79, 110, 206, 237, + 16, 47, 111, 142, 48, 79, 143, 174, 80, 111, 175, 206, + 17, 48, 49, 17, 207, 238, 49, 80, 81, 112, 18, 18, + 18, 49, 50, 81, 82, 113, 19, 50, 51, 82, 83, 114, + 608, 609, 484, 515, 360, 391, 236, 267, 112, 143, 51, 19, + 640, 640, 609, 640, 516, 547, 485, 516, 392, 423, 361, 392, + 268, 299, 237, 268, 144, 175, 113, 144, 20, 51, 52, 20, + 672, 672, 641, 672, 610, 641, 548, 579, 517, 548, 486, 517, + 424, 455, 393, 424, 362, 393, 300, 331, 269, 300, 238, 269, + 176, 207, 145, 176, 114, 145, 52, 83, 21, 52, 53, 21, + 704, 704, 673, 704, 642, 673, 611, 642, 580, 611, 549, 580, + 518, 549, 487, 518, 456, 487, 425, 456, 394, 425, 363, 394, + 332, 363, 301, 332, 270, 301, 239, 270, 208, 239, 177, 208, + 146, 177, 115, 146, 84, 115, 53, 84, 22, 53, 54, 22, + 705, 736, 674, 705, 643, 674, 581, 612, 550, 581, 519, 550, + 457, 488, 426, 457, 395, 426, 333, 364, 302, 333, 271, 302, + 209, 240, 178, 209, 147, 178, 85, 116, 54, 85, 23, 54, + 706, 737, 675, 706, 582, 613, 551, 582, 458, 489, 427, 458, + 334, 365, 303, 334, 210, 241, 179, 210, 86, 117, 55, 86, + 707, 738, 583, 614, 459, 490, 335, 366, 211, 242, 87, 118, + 736, 737, 612, 643, 488, 519, 364, 395, 240, 271, 116, 147, + 55, 23, 768, 768, 737, 768, 644, 675, 613, 644, 520, 551, + 489, 520, 396, 427, 365, 396, 272, 303, 241, 272, 148, 179, + 117, 148, 24, 55, 56, 24, 800, 800, 769, 800, 738, 769, + 676, 707, 645, 676, 614, 645, 552, 583, 521, 552, 490, 521, + 428, 459, 397, 428, 366, 397, 304, 335, 273, 304, 242, 273, + 180, 211, 149, 180, 118, 149, 56, 87, 25, 56, 57, 25, + 832, 832, 801, 832, 770, 801, 739, 770, 708, 739, 677, 708, + 646, 677, 615, 646, 584, 615, 553, 584, 522, 553, 491, 522, + 460, 491, 429, 460, 398, 429, 367, 398, 336, 367, 305, 336, + 274, 305, 243, 274, 212, 243, 181, 212, 150, 181, 119, 150, + 88, 119, 57, 88, 26, 57, 58, 26, 833, 864, 802, 833, + 771, 802, 709, 740, 678, 709, 647, 678, 585, 616, 554, 585, + 523, 554, 461, 492, 430, 461, 399, 430, 337, 368, 306, 337, + 275, 306, 213, 244, 182, 213, 151, 182, 89, 120, 58, 89, + 27, 58, 834, 865, 803, 834, 710, 741, 679, 710, 586, 617, + 555, 586, 462, 493, 431, 462, 338, 369, 307, 338, 214, 245, + 183, 214, 90, 121, 59, 90, 835, 866, 711, 742, 587, 618, + 463, 494, 339, 370, 215, 246, 91, 122, 864, 865, 740, 771, + 616, 647, 492, 523, 368, 399, 244, 275, 120, 151, 59, 27, + 896, 896, 865, 896, 772, 803, 741, 772, 648, 679, 617, 648, + 524, 555, 493, 524, 400, 431, 369, 400, 276, 307, 245, 276, + 152, 183, 121, 152, 28, 59, 60, 28, 928, 928, 897, 928, + 866, 897, 804, 835, 773, 804, 742, 773, 680, 711, 649, 680, + 618, 649, 556, 587, 525, 556, 494, 525, 432, 463, 401, 432, + 370, 401, 308, 339, 277, 308, 246, 277, 184, 215, 153, 184, + 122, 153, 60, 91, 29, 60, 61, 29, 960, 960, 929, 960, + 898, 929, 867, 898, 836, 867, 805, 836, 774, 805, 743, 774, + 712, 743, 681, 712, 650, 681, 619, 650, 588, 619, 557, 588, + 526, 557, 495, 526, 464, 495, 433, 464, 402, 433, 371, 402, + 340, 371, 309, 340, 278, 309, 247, 278, 216, 247, 185, 216, + 154, 185, 123, 154, 92, 123, 61, 92, 30, 61, 62, 30, + 961, 992, 930, 961, 899, 930, 837, 868, 806, 837, 775, 806, + 713, 744, 682, 713, 651, 682, 589, 620, 558, 589, 527, 558, + 465, 496, 434, 465, 403, 434, 341, 372, 310, 341, 279, 310, + 217, 248, 186, 217, 155, 186, 93, 124, 62, 93, 31, 62, + 962, 993, 931, 962, 838, 869, 807, 838, 714, 745, 683, 714, + 590, 621, 559, 590, 466, 497, 435, 466, 342, 373, 311, 342, + 218, 249, 187, 218, 94, 125, 63, 94, 963, 994, 839, 870, + 715, 746, 591, 622, 467, 498, 343, 374, 219, 250, 95, 126, + 868, 899, 744, 775, 620, 651, 496, 527, 372, 403, 248, 279, + 124, 155, 900, 931, 869, 900, 776, 807, 745, 776, 652, 683, + 621, 652, 528, 559, 497, 528, 404, 435, 373, 404, 280, 311, + 249, 280, 156, 187, 125, 156, 932, 963, 901, 932, 870, 901, + 808, 839, 777, 808, 746, 777, 684, 715, 653, 684, 622, 653, + 560, 591, 529, 560, 498, 529, 436, 467, 405, 436, 374, 405, + 312, 343, 281, 312, 250, 281, 188, 219, 157, 188, 126, 157, + 964, 995, 933, 964, 902, 933, 871, 902, 840, 871, 809, 840, + 778, 809, 747, 778, 716, 747, 685, 716, 654, 685, 623, 654, + 592, 623, 561, 592, 530, 561, 499, 530, 468, 499, 437, 468, + 406, 437, 375, 406, 344, 375, 313, 344, 282, 313, 251, 282, + 220, 251, 189, 220, 158, 189, 127, 158, 965, 996, 934, 965, + 903, 934, 841, 872, 810, 841, 779, 810, 717, 748, 686, 717, + 655, 686, 593, 624, 562, 593, 531, 562, 469, 500, 438, 469, + 407, 438, 345, 376, 314, 345, 283, 314, 221, 252, 190, 221, + 159, 190, 966, 997, 935, 966, 842, 873, 811, 842, 718, 749, + 687, 718, 594, 625, 563, 594, 470, 501, 439, 470, 346, 377, + 315, 346, 222, 253, 191, 222, 967, 998, 843, 874, 719, 750, + 595, 626, 471, 502, 347, 378, 223, 254, 872, 903, 748, 779, + 624, 655, 500, 531, 376, 407, 252, 283, 904, 935, 873, 904, + 780, 811, 749, 780, 656, 687, 625, 656, 532, 563, 501, 532, + 408, 439, 377, 408, 284, 315, 253, 284, 936, 967, 905, 936, + 874, 905, 812, 843, 781, 812, 750, 781, 688, 719, 657, 688, + 626, 657, 564, 595, 533, 564, 502, 533, 440, 471, 409, 440, + 378, 409, 316, 347, 285, 316, 254, 285, 968, 999, 937, 968, + 906, 937, 875, 906, 844, 875, 813, 844, 782, 813, 751, 782, + 720, 751, 689, 720, 658, 689, 627, 658, 596, 627, 565, 596, + 534, 565, 503, 534, 472, 503, 441, 472, 410, 441, 379, 410, + 348, 379, 317, 348, 286, 317, 255, 286, 969, 1000, 938, 969, + 907, 938, 845, 876, 814, 845, 783, 814, 721, 752, 690, 721, + 659, 690, 597, 628, 566, 597, 535, 566, 473, 504, 442, 473, + 411, 442, 349, 380, 318, 349, 287, 318, 970, 1001, 939, 970, + 846, 877, 815, 846, 722, 753, 691, 722, 598, 629, 567, 598, + 474, 505, 443, 474, 350, 381, 319, 350, 971, 1002, 847, 878, + 723, 754, 599, 630, 475, 506, 351, 382, 876, 907, 752, 783, + 628, 659, 504, 535, 380, 411, 908, 939, 877, 908, 784, 815, + 753, 784, 660, 691, 629, 660, 536, 567, 505, 536, 412, 443, + 381, 412, 940, 971, 909, 940, 878, 909, 816, 847, 785, 816, + 754, 785, 692, 723, 661, 692, 630, 661, 568, 599, 537, 568, + 506, 537, 444, 475, 413, 444, 382, 413, 972, 1003, 941, 972, + 910, 941, 879, 910, 848, 879, 817, 848, 786, 817, 755, 786, + 724, 755, 693, 724, 662, 693, 631, 662, 600, 631, 569, 600, + 538, 569, 507, 538, 476, 507, 445, 476, 414, 445, 383, 414, + 973, 1004, 942, 973, 911, 942, 849, 880, 818, 849, 787, 818, + 725, 756, 694, 725, 663, 694, 601, 632, 570, 601, 539, 570, + 477, 508, 446, 477, 415, 446, 974, 1005, 943, 974, 850, 881, + 819, 850, 726, 757, 695, 726, 602, 633, 571, 602, 478, 509, + 447, 478, 975, 1006, 851, 882, 727, 758, 603, 634, 479, 510, + 880, 911, 756, 787, 632, 663, 508, 539, 912, 943, 881, 912, + 788, 819, 757, 788, 664, 695, 633, 664, 540, 571, 509, 540, + 944, 975, 913, 944, 882, 913, 820, 851, 789, 820, 758, 789, + 696, 727, 665, 696, 634, 665, 572, 603, 541, 572, 510, 541, + 976, 1007, 945, 976, 914, 945, 883, 914, 852, 883, 821, 852, + 790, 821, 759, 790, 728, 759, 697, 728, 666, 697, 635, 666, + 604, 635, 573, 604, 542, 573, 511, 542, 977, 1008, 946, 977, + 915, 946, 853, 884, 822, 853, 791, 822, 729, 760, 698, 729, + 667, 698, 605, 636, 574, 605, 543, 574, 978, 1009, 947, 978, + 854, 885, 823, 854, 730, 761, 699, 730, 606, 637, 575, 606, + 979, 1010, 855, 886, 731, 762, 607, 638, 884, 915, 760, 791, + 636, 667, 916, 947, 885, 916, 792, 823, 761, 792, 668, 699, + 637, 668, 948, 979, 917, 948, 886, 917, 824, 855, 793, 824, + 762, 793, 700, 731, 669, 700, 638, 669, 980, 1011, 949, 980, + 918, 949, 887, 918, 856, 887, 825, 856, 794, 825, 763, 794, + 732, 763, 701, 732, 670, 701, 639, 670, 981, 1012, 950, 981, + 919, 950, 857, 888, 826, 857, 795, 826, 733, 764, 702, 733, + 671, 702, 982, 1013, 951, 982, 858, 889, 827, 858, 734, 765, + 703, 734, 983, 1014, 859, 890, 735, 766, 888, 919, 764, 795, + 920, 951, 889, 920, 796, 827, 765, 796, 952, 983, 921, 952, + 890, 921, 828, 859, 797, 828, 766, 797, 984, 1015, 953, 984, + 922, 953, 891, 922, 860, 891, 829, 860, 798, 829, 767, 798, + 985, 1016, 954, 985, 923, 954, 861, 892, 830, 861, 799, 830, + 986, 1017, 955, 986, 862, 893, 831, 862, 987, 1018, 863, 894, + 892, 923, 924, 955, 893, 924, 956, 987, 925, 956, 894, 925, + 988, 1019, 957, 988, 926, 957, 895, 926, 989, 1020, 958, 989, + 927, 958, 990, 1021, 959, 990, 991, 1022, 0, 0, }; #if CONFIG_EXT_TX
diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c index b2339c6..a94aafd 100644 --- a/vp10/common/thread_common.c +++ b/vp10/common/thread_common.c
@@ -94,7 +94,7 @@ int start, int stop, int y_only, VP9LfSync *const lf_sync) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; - const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; + const int sb_cols = mi_cols_aligned_to_sb(cm) >> cm->mib_size_log2; int mi_row, mi_col; #if !CONFIG_EXT_PARTITION_TYPES enum lf_path path; @@ -116,12 +116,12 @@ #endif // CONFIG_EXT_PARTITION for (mi_row = start; mi_row < stop; - mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { + mi_row += lf_sync->num_workers * cm->mib_size) { MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { - const int r = mi_row >> MI_BLOCK_SIZE_LOG2; - const int c = mi_col >> MI_BLOCK_SIZE_LOG2; + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) { + const int r = mi_row >> cm->mib_size_log2; + const int c = mi_col >> cm->mib_size_log2; int plane; sync_read(lf_sync, r, c); @@ -175,7 +175,7 @@ VP9LfSync *lf_sync) { const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); // Number of superblock rows and cols - const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; + const int sb_rows = mi_rows_aligned_to_sb(cm) >> cm->mib_size_log2; // Decoder may allocate more threads than number of tiles based on user's // input. const int tile_cols = cm->tile_cols; @@ -215,7 +215,7 @@ // Loopfilter data vp10_loop_filter_data_reset(lf_data, frame, cm, planes); - lf_data->start = start + i * MI_BLOCK_SIZE; + lf_data->start = start + i * cm->mib_size; lf_data->stop = stop; lf_data->y_only = y_only; @@ -428,10 +428,14 @@ cm->counts.inter_compound_mode[i][j] += counts->inter_compound_mode[i][j]; - for (i = 0; i < BLOCK_SIZES; i++) + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) for (j = 0; j < 2; j++) cm->counts.interintra[i][j] += counts->interintra[i][j]; + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) + for (j = 0; j < INTERINTRA_MODES; j++) + cm->counts.interintra_mode[i][j] += counts->interintra_mode[i][j]; + for (i = 0; i < BLOCK_SIZES; i++) for (j = 0; j < 2; j++) cm->counts.wedge_interintra[i][j] += counts->wedge_interintra[i][j];
diff --git a/vp10/common/tile_common.c b/vp10/common/tile_common.c index de5f921..04b19eb 100644 --- a/vp10/common/tile_common.c +++ b/vp10/common/tile_common.c
@@ -12,9 +12,6 @@ #include "vp10/common/onyxc_int.h" #include "vpx_dsp/vpx_dsp_common.h" -#define MIN_TILE_WIDTH_B64 4 -#define MAX_TILE_WIDTH_B64 64 - void vp10_tile_set_row(TileInfo *tile, const VP10_COMMON *cm, int row) { tile->mi_row_start = row * cm->tile_height; tile->mi_row_end = VPXMIN(tile->mi_row_start + cm->tile_height, @@ -33,26 +30,35 @@ } #if !CONFIG_EXT_TILE -// TODO(geza.lore): CU_SIZE dependent. -static int get_min_log2_tile_cols(const int sb64_cols) { + +# if CONFIG_EXT_PARTITION +# define MIN_TILE_WIDTH_MAX_SB 2 +# define MAX_TILE_WIDTH_MAX_SB 32 +# else +# define MIN_TILE_WIDTH_MAX_SB 4 +# define MAX_TILE_WIDTH_MAX_SB 64 +# endif // CONFIG_EXT_PARTITION + +static int get_min_log2_tile_cols(const int max_sb_cols) { int min_log2 = 0; - while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols) + while ((MAX_TILE_WIDTH_MAX_SB << min_log2) < max_sb_cols) ++min_log2; return min_log2; } -static int get_max_log2_tile_cols(const int sb64_cols) { +static int get_max_log2_tile_cols(const int max_sb_cols) { int max_log2 = 1; - while ((sb64_cols >> max_log2) >= MIN_TILE_WIDTH_B64) + while ((max_sb_cols >> max_log2) >= MIN_TILE_WIDTH_MAX_SB) ++max_log2; return max_log2 - 1; } -void vp10_get_tile_n_bits(int mi_cols, +void vp10_get_tile_n_bits(const int mi_cols, int *min_log2_tile_cols, int *max_log2_tile_cols) { - const int sb64_cols = mi_cols_aligned_to_sb(mi_cols) >> MI_BLOCK_SIZE_LOG2; - *min_log2_tile_cols = get_min_log2_tile_cols(sb64_cols); - *max_log2_tile_cols = get_max_log2_tile_cols(sb64_cols); + const int max_sb_cols = + ALIGN_POWER_OF_TWO(mi_cols, MAX_MIB_SIZE_LOG2) >> MAX_MIB_SIZE_LOG2; + *min_log2_tile_cols = get_min_log2_tile_cols(max_sb_cols); + *max_log2_tile_cols = get_max_log2_tile_cols(max_sb_cols); assert(*min_log2_tile_cols <= *max_log2_tile_cols); } #endif // !CONFIG_EXT_TILE
diff --git a/vp10/common/tile_common.h b/vp10/common/tile_common.h index 09cf060..2babc89 100644 --- a/vp10/common/tile_common.h +++ b/vp10/common/tile_common.h
@@ -30,8 +30,8 @@ void vp10_tile_set_row(TileInfo *tile, const struct VP10Common *cm, int row); void vp10_tile_set_col(TileInfo *tile, const struct VP10Common *cm, int col); -void vp10_get_tile_n_bits(int mi_cols, - int *min_log2_tile_cols, int *max_log2_tile_cols); +void vp10_get_tile_n_bits(const int mi_cols, + int *min_log2_tile_cols, int *max_log2_tile_cols); #ifdef __cplusplus } // extern "C"
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index f592539..0e51b15 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c
@@ -16,7 +16,7 @@ #include "./vpx_scale_rtcd.h" #include "vpx_dsp/bitreader_buffer.h" -#include "vpx_dsp/bitreader.h" +#include "vp10/decoder/bitreader.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" @@ -100,14 +100,14 @@ return vpx_rb_read_bit(rb) ? TX_MODE_SELECT : vpx_rb_read_literal(rb, 2); } -static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vpx_reader *r) { +static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp10_reader *r) { int i, j; for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) vp10_diff_update_prob(r, &fc->switchable_interp_prob[j][i]); } -static void read_inter_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) { +static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp10_reader *r) { int i; #if CONFIG_REF_MV for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) @@ -130,9 +130,9 @@ } #if CONFIG_EXT_INTER -static void read_inter_compound_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) { +static void read_inter_compound_mode_probs(FRAME_CONTEXT *fc, vp10_reader *r) { int i, j; - if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + if (vp10_read(r, GROUP_DIFF_UPDATE_PROB)) { for (j = 0; j < INTER_MODE_CONTEXTS; ++j) { for (i = 0; i < INTER_COMPOUND_MODES - 1; ++i) { vp10_diff_update_prob(r, &fc->inter_compound_mode_probs[j][i]); @@ -153,7 +153,7 @@ } } -static void read_frame_reference_mode_probs(VP10_COMMON *cm, vpx_reader *r) { +static void read_frame_reference_mode_probs(VP10_COMMON *cm, vp10_reader *r) { FRAME_CONTEXT *const fc = cm->fc; int i, j; @@ -178,13 +178,13 @@ } } -static void update_mv_probs(vpx_prob *p, int n, vpx_reader *r) { +static void update_mv_probs(vpx_prob *p, int n, vp10_reader *r) { int i; for (i = 0; i < n; ++i) vp10_diff_update_prob(r, &p[i]); } -static void read_mv_probs(nmv_context *ctx, int allow_hp, vpx_reader *r) { +static void read_mv_probs(nmv_context *ctx, int allow_hp, vp10_reader *r) { int i, j; update_mv_probs(ctx->joints, MV_JOINTS - 1, r); @@ -262,7 +262,7 @@ const rans_dec_lut *const token_tab, struct AnsDecoder *const r, #else - vpx_reader *r, + vp10_reader *r, #endif // CONFIG_ANS MB_MODE_INFO *const mbmi, int plane, @@ -298,7 +298,7 @@ } #if CONFIG_VAR_TX -static void decode_reconstruct_tx(MACROBLOCKD *const xd, vpx_reader *r, +static void decode_reconstruct_tx(MACROBLOCKD *const xd, vp10_reader *r, MB_MODE_INFO *const mbmi, int plane, BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col, @@ -360,7 +360,7 @@ const rans_dec_lut *const token_tab, struct AnsDecoder *const r, #else - vpx_reader *r, + vp10_reader *r, #endif MB_MODE_INFO *const mbmi, int plane, int row, int col, TX_SIZE tx_size) { @@ -503,7 +503,7 @@ buf_ptr = ((uint8_t *)mc_buf_high) + border_offset; } #if CONFIG_EXT_INTER - if (ref && get_wedge_bits(xd->mi[0]->mbmi.sb_type) && + if (ref && is_interinter_wedge_used(xd->mi[0]->mbmi.sb_type) && xd->mi[0]->mbmi.use_wedge_interinter) vp10_make_masked_inter_predictor( buf_ptr, b_w, dst, dst_buf_stride, @@ -544,7 +544,7 @@ x0, y0, b_w, b_h, frame_width, frame_height); buf_ptr = mc_buf + border_offset; #if CONFIG_EXT_INTER - if (ref && get_wedge_bits(xd->mi[0]->mbmi.sb_type) && + if (ref && is_interinter_wedge_used(xd->mi[0]->mbmi.sb_type) && xd->mi[0]->mbmi.use_wedge_interinter) vp10_make_masked_inter_predictor( buf_ptr, b_w, dst, dst_buf_stride, @@ -753,7 +753,7 @@ } } #if CONFIG_EXT_INTER - if (ref && get_wedge_bits(mi->mbmi.sb_type) && + if (ref && is_interinter_wedge_used(mi->mbmi.sb_type) && mi->mbmi.use_wedge_interinter) { vp10_make_masked_inter_predictor( buf_ptr, buf_stride, dst, dst_buf->stride, @@ -1027,7 +1027,11 @@ set_mi_row_col(xd, tile, mi_row_pred, bh, mi_col_pred, bw, cm->mi_rows, cm->mi_cols); +#if CONFIG_EXT_TILE + xd->up_available = (mi_row_ori > tile->mi_row_start); +#else xd->up_available = (mi_row_ori != 0); +#endif // CONFIG_EXT_TILE xd->left_available = (mi_col_ori > tile->mi_col_start); set_plane_n4(xd, bw, bh, bwl, bhl); @@ -1095,7 +1099,8 @@ } #if CONFIG_VAR_TX xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK); + xd->left_txfm_context = + xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); set_txfm_ctx(xd->left_txfm_context, xd->mi[0]->mbmi.tx_size, bh); set_txfm_ctx(xd->above_txfm_context, xd->mi[0]->mbmi.tx_size, bw); #endif @@ -1292,17 +1297,15 @@ int mi_row_top, int mi_col_top, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, uint8_t *dst_buf[3], int dst_stride[3]) { - VP10_COMMON *const cm = &pbi->common; - const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; - PARTITION_TYPE partition; - BLOCK_SIZE subsize; -#if !CONFIG_EXT_PARTITION_TYPES - MB_MODE_INFO *mbmi; -#endif - int i, offset = mi_row * cm->mi_stride + mi_col; + const VP10_COMMON *const cm = &pbi->common; + const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; + const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize); + const BLOCK_SIZE subsize = get_subsize(bsize, partition); #if CONFIG_EXT_PARTITION_TYPES - BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT); + const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT); #endif + int i; + const int mi_offset = mi_row * cm->mi_stride + mi_col; uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3]; DECLARE_ALIGNED(16, uint8_t, @@ -1345,16 +1348,8 @@ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - xd->mi = cm->mi_grid_visible + offset; - xd->mi[0] = cm->mi + offset; -#if CONFIG_EXT_PARTITION_TYPES - partition = get_partition(cm->mi, cm->mi_stride, cm->mi_rows, cm->mi_cols, - mi_row, mi_col, bsize); -#else - mbmi = &xd->mi[0]->mbmi; - partition = partition_lookup[bsl][mbmi->sb_type]; -#endif - subsize = get_subsize(bsize, partition); + xd->mi = cm->mi_grid_visible + mi_offset; + xd->mi[0] = cm->mi + mi_offset; for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].dst.buf = dst_buf[i]; @@ -1808,10 +1803,7 @@ int supertx_enabled, #endif // CONFIG_SUPERTX int mi_row, int mi_col, - vpx_reader *r, -#if CONFIG_ANS - struct AnsDecoder *const tok, -#endif // CONFIG_ANS + vp10_reader *r, #if CONFIG_EXT_PARTITION_TYPES PARTITION_TYPE partition, #endif // CONFIG_EXT_PARTITION_TYPES @@ -1887,10 +1879,9 @@ for (col = 0; col < max_blocks_wide; col += step) predict_and_reconstruct_intra_block(xd, #if CONFIG_ANS - cm->token_tab, tok, -#else - r, + cm->token_tab, #endif + r, mbmi, plane, row, col, tx_size); } @@ -1991,10 +1982,9 @@ for (col = 0; col < max_blocks_wide; col += step) eobtotal += reconstruct_inter_block(xd, #if CONFIG_ANS - cm->token_tab, tok, -#else - r, + cm->token_tab, #endif + r, mbmi, plane, row, col, tx_size); #endif @@ -2008,14 +1998,15 @@ } #endif // CONFIG_SUPERTX - xd->corrupted |= vpx_reader_has_error(r); + xd->corrupted |= vp10_reader_has_error(r); } static INLINE int dec_partition_plane_context(const MACROBLOCKD *xd, int mi_row, int mi_col, int bsl) { const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; - const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); + const PARTITION_CONTEXT *left_ctx = + xd->left_seg_context + (mi_row & MAX_MIB_MASK); int above = (*above_ctx >> bsl) & 1 , left = (*left_ctx >> bsl) & 1; // assert(bsl >= 0); @@ -2029,7 +2020,8 @@ BLOCK_SIZE subsize, int bw) { PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; - PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); + PARTITION_CONTEXT *const left_ctx = + xd->left_seg_context + (mi_row & MAX_MIB_MASK); // update the partition context at the end notes. set partition bits // of block sizes larger than the current one to be one, and partition @@ -2040,7 +2032,7 @@ #endif // !CONFIG_EXT_PARTITION_TYPES static PARTITION_TYPE read_partition(VP10_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, vpx_reader *r, + int mi_row, int mi_col, vp10_reader *r, int has_rows, int has_cols, #if CONFIG_EXT_PARTITION_TYPES BLOCK_SIZE bsize, @@ -2054,16 +2046,16 @@ if (has_rows && has_cols) #if CONFIG_EXT_PARTITION_TYPES if (bsize <= BLOCK_8X8) - p = (PARTITION_TYPE)vpx_read_tree(r, vp10_partition_tree, probs); + p = (PARTITION_TYPE)vp10_read_tree(r, vp10_partition_tree, probs); else - p = (PARTITION_TYPE)vpx_read_tree(r, vp10_ext_partition_tree, probs); + p = (PARTITION_TYPE)vp10_read_tree(r, vp10_ext_partition_tree, probs); #else - p = (PARTITION_TYPE)vpx_read_tree(r, vp10_partition_tree, probs); + p = (PARTITION_TYPE)vp10_read_tree(r, vp10_partition_tree, probs); #endif // CONFIG_EXT_PARTITION_TYPES else if (!has_rows && has_cols) - p = vpx_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; + p = vp10_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; else if (has_rows && !has_cols) - p = vpx_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT; + p = vp10_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT; else p = PARTITION_SPLIT; @@ -2075,9 +2067,9 @@ #if CONFIG_SUPERTX static int read_skip_without_seg(VP10_COMMON *cm, const MACROBLOCKD *xd, - vpx_reader *r) { + vp10_reader *r) { const int ctx = vp10_get_skip_context(xd); - const int skip = vpx_read(r, cm->fc->skip_probs[ctx]); + const int skip = vp10_read(r, cm->fc->skip_probs[ctx]); FRAME_COUNTS *counts = xd->counts; if (counts) ++counts->skip[ctx][skip]; @@ -2091,10 +2083,7 @@ int supertx_enabled, #endif int mi_row, int mi_col, - vpx_reader* r, -#if CONFIG_ANS - struct AnsDecoder *const tok, -#endif // CONFIG_ANS + vp10_reader* r, BLOCK_SIZE bsize, int n4x4_l2) { VP10_COMMON *const cm = &pbi->common; const int n8x8_l2 = n4x4_l2 - 1; @@ -2132,7 +2121,7 @@ !xd->lossless[0]) { const int supertx_context = partition_supertx_context_lookup[partition]; - supertx_enabled = vpx_read( + supertx_enabled = vp10_read( r, cm->fc->supertx_prob[supertx_context][supertx_size]); if (xd->counts) xd->counts->supertx[supertx_context][supertx_size][supertx_enabled]++; @@ -2154,7 +2143,7 @@ if (get_ext_tx_types(supertx_size, bsize, 1) > 1) { int eset = get_ext_tx_set(supertx_size, bsize, 1); if (eset > 0) { - txfm = vpx_read_tree(r, vp10_ext_tx_inter_tree[eset], + txfm = vp10_read_tree(r, vp10_ext_tx_inter_tree[eset], cm->fc->inter_ext_tx_prob[eset][supertx_size]); if (xd->counts) ++xd->counts->inter_ext_tx[eset][supertx_size][txfm]; @@ -2162,7 +2151,7 @@ } #else if (supertx_size < TX_32X32) { - txfm = vpx_read_tree(r, vp10_ext_tx_tree, + txfm = vp10_read_tree(r, vp10_ext_tx_tree, cm->fc->inter_ext_tx_prob[supertx_size]); if (xd->counts) ++xd->counts->inter_ext_tx[supertx_size][txfm]; @@ -2183,9 +2172,6 @@ supertx_enabled, #endif // CONFIG_SUPERTX mi_row, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS #if CONFIG_EXT_PARTITION_TYPES partition, #endif // CONFIG_EXT_PARTITION_TYPES @@ -2198,9 +2184,6 @@ supertx_enabled, #endif // CONFIG_SUPERTX mi_row, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS #if CONFIG_EXT_PARTITION_TYPES partition, #endif // CONFIG_EXT_PARTITION_TYPES @@ -2212,9 +2195,6 @@ supertx_enabled, #endif // CONFIG_SUPERTX mi_row, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS #if CONFIG_EXT_PARTITION_TYPES partition, #endif // CONFIG_EXT_PARTITION_TYPES @@ -2225,9 +2205,6 @@ supertx_enabled, #endif // CONFIG_SUPERTX mi_row + hbs, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS #if CONFIG_EXT_PARTITION_TYPES partition, #endif // CONFIG_EXT_PARTITION_TYPES @@ -2239,9 +2216,6 @@ supertx_enabled, #endif // CONFIG_SUPERTX mi_row, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS #if CONFIG_EXT_PARTITION_TYPES partition, #endif // CONFIG_EXT_PARTITION_TYPES @@ -2252,9 +2226,6 @@ supertx_enabled, #endif // CONFIG_SUPERTX mi_row, mi_col + hbs, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS #if CONFIG_EXT_PARTITION_TYPES partition, #endif // CONFIG_EXT_PARTITION_TYPES @@ -2266,36 +2237,24 @@ supertx_enabled, #endif // CONFIG_SUPERTX mi_row, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS subsize, n8x8_l2); decode_partition(pbi, xd, #if CONFIG_SUPERTX supertx_enabled, #endif // CONFIG_SUPERTX mi_row, mi_col + hbs, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS subsize, n8x8_l2); decode_partition(pbi, xd, #if CONFIG_SUPERTX supertx_enabled, #endif // CONFIG_SUPERTX mi_row + hbs, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS subsize, n8x8_l2); decode_partition(pbi, xd, #if CONFIG_SUPERTX supertx_enabled, #endif // CONFIG_SUPERTX mi_row + hbs, mi_col + hbs, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS subsize, n8x8_l2); break; #if CONFIG_EXT_PARTITION_TYPES @@ -2305,27 +2264,18 @@ supertx_enabled, #endif mi_row, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS partition, bsize2, n8x8_l2, n8x8_l2); decode_block(pbi, xd, #if CONFIG_SUPERTX supertx_enabled, #endif mi_row, mi_col + hbs, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS partition, bsize2, n8x8_l2, n8x8_l2); decode_block(pbi, xd, #if CONFIG_SUPERTX supertx_enabled, #endif mi_row + hbs, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS partition, subsize, n4x4_l2, n8x8_l2); break; case PARTITION_HORZ_B: @@ -2334,27 +2284,18 @@ supertx_enabled, #endif mi_row, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS partition, subsize, n4x4_l2, n8x8_l2); decode_block(pbi, xd, #if CONFIG_SUPERTX supertx_enabled, #endif mi_row + hbs, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS partition, bsize2, n8x8_l2, n8x8_l2); decode_block(pbi, xd, #if CONFIG_SUPERTX supertx_enabled, #endif mi_row + hbs, mi_col + hbs, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS partition, bsize2, n8x8_l2, n8x8_l2); break; case PARTITION_VERT_A: @@ -2363,27 +2304,18 @@ supertx_enabled, #endif mi_row, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS partition, bsize2, n8x8_l2, n8x8_l2); decode_block(pbi, xd, #if CONFIG_SUPERTX supertx_enabled, #endif mi_row + hbs, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS partition, bsize2, n8x8_l2, n8x8_l2); decode_block(pbi, xd, #if CONFIG_SUPERTX supertx_enabled, #endif mi_row, mi_col + hbs, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS partition, subsize, n8x8_l2, n4x4_l2); break; case PARTITION_VERT_B: @@ -2392,27 +2324,18 @@ supertx_enabled, #endif mi_row, mi_col, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS partition, subsize, n8x8_l2, n4x4_l2); decode_block(pbi, xd, #if CONFIG_SUPERTX supertx_enabled, #endif mi_row, mi_col + hbs, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS partition, bsize2, n8x8_l2, n8x8_l2); decode_block(pbi, xd, #if CONFIG_SUPERTX supertx_enabled, #endif mi_row + hbs, mi_col + hbs, r, -#if CONFIG_ANS - tok, -#endif // CONFIG_ANS partition, bsize2, n8x8_l2, n8x8_l2); break; #endif @@ -2459,10 +2382,9 @@ for (col = 0; col < max_blocks_wide; col += step) eobtotal += reconstruct_inter_block(xd, #if CONFIG_ANS - cm->token_tab, tok, -#else - r, + cm->token_tab, #endif + r, mbmi, i, row, col, tx_size); } @@ -2512,11 +2434,12 @@ #endif // CONFIG_EXT_PARTITION_TYPES } +#if !CONFIG_ANS static void setup_bool_decoder(const uint8_t *data, const uint8_t *data_end, const size_t read_size, struct vpx_internal_error_info *error_info, - vpx_reader *r, + vp10_reader *r, vpx_decrypt_cb decrypt_cb, void *decrypt_state) { // Validate the calculated partition length. If the buffer @@ -2530,7 +2453,7 @@ vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder %d", 1); } -#if CONFIG_ANS +#else static void setup_token_decoder(const uint8_t *data, const uint8_t *data_end, const size_t read_size, @@ -2554,10 +2477,10 @@ #endif static void read_coef_probs_common(vp10_coeff_probs_model *coef_probs, - vpx_reader *r) { + vp10_reader *r) { int i, j, k, l, m; - if (vpx_read_bit(r)) + if (vp10_read_bit(r)) for (i = 0; i < PLANE_TYPES; ++i) for (j = 0; j < REF_TYPES; ++j) for (k = 0; k < COEF_BANDS; ++k) @@ -2567,7 +2490,7 @@ } static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, - vpx_reader *r) { + vp10_reader *r) { const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; TX_SIZE tx_size; for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) @@ -2895,16 +2818,24 @@ pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; } -static void setup_tile_info(VP10Decoder *const pbi, +static void read_tile_info(VP10Decoder *const pbi, struct vpx_read_bit_buffer *const rb) { VP10_COMMON *const cm = &pbi->common; #if CONFIG_EXT_TILE // Read the tile width/height - cm->tile_width = vpx_rb_read_literal(rb, 6) + 1; // in [1, 64] - cm->tile_height = vpx_rb_read_literal(rb, 6) + 1; // in [1, 64] +#if CONFIG_EXT_PARTITION + if (cm->sb_size == BLOCK_128X128) { + cm->tile_width = vpx_rb_read_literal(rb, 5) + 1; + cm->tile_height = vpx_rb_read_literal(rb, 5) + 1; + } else +#endif // CONFIG_EXT_PARTITION + { + cm->tile_width = vpx_rb_read_literal(rb, 6) + 1; + cm->tile_height = vpx_rb_read_literal(rb, 6) + 1; + } - cm->tile_width = cm->tile_width << MI_BLOCK_SIZE_LOG2; - cm->tile_height = cm->tile_height << MI_BLOCK_SIZE_LOG2; + cm->tile_width <<= cm->mib_size_log2; + cm->tile_height <<= cm->mib_size_log2; cm->tile_width = VPXMIN(cm->tile_width, cm->mi_cols); cm->tile_height = VPXMIN(cm->tile_height, cm->mi_rows); @@ -2945,12 +2876,14 @@ cm->tile_cols = 1 << cm->log2_tile_cols; cm->tile_rows = 1 << cm->log2_tile_rows; - cm->tile_width = (mi_cols_aligned_to_sb(cm->mi_cols) >> cm->log2_tile_cols); - cm->tile_height = (mi_cols_aligned_to_sb(cm->mi_rows) >> cm->log2_tile_rows); + cm->tile_width = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2); + cm->tile_width >>= cm->log2_tile_cols; + cm->tile_height = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2); + cm->tile_height >>= cm->log2_tile_rows; - // round to integer multiples of 8 - cm->tile_width = mi_cols_aligned_to_sb(cm->tile_width); - cm->tile_height = mi_cols_aligned_to_sb(cm->tile_height); + // round to integer multiples of superblock size + cm->tile_width = ALIGN_POWER_OF_TWO(cm->tile_width, MAX_MIB_SIZE_LOG2); + cm->tile_height = ALIGN_POWER_OF_TWO(cm->tile_height, MAX_MIB_SIZE_LOG2); // tile size magnitude if (cm->tile_rows > 1 || cm->tile_cols > 1) { @@ -3185,8 +3118,7 @@ int tile_row, tile_col; #if CONFIG_ENTROPY - cm->do_subframe_update = - cm->log2_tile_cols == 0 && cm->log2_tile_rows == 0; + cm->do_subframe_update = cm->tile_cols == 1 && cm->tile_rows == 1; #endif // CONFIG_ENTROPY if (cm->lf.filter_level && !cm->skip_loop_filter && @@ -3237,14 +3169,15 @@ &cm->counts : NULL; vp10_zero(td->dqcoeff); vp10_tile_init(&td->xd.tile, td->cm, tile_row, tile_col); +#if !CONFIG_ANS setup_bool_decoder(buf->data, data_end, buf->size, &cm->error, - &td->bit_reader, - pbi->decrypt_cb, pbi->decrypt_state); -#if CONFIG_ANS + &td->bit_reader, pbi->decrypt_cb, + pbi->decrypt_state); +#else setup_token_decoder(buf->data, data_end, buf->size, &cm->error, - &td->token_ans, - pbi->decrypt_cb, pbi->decrypt_state); -#endif // CONFIG_ANS + &td->bit_reader, pbi->decrypt_cb, + pbi->decrypt_state); +#endif vp10_init_macroblockd(cm, &td->xd, td->dqcoeff); td->xd.plane[0].color_index_map = td->color_index_map[0]; td->xd.plane[1].color_index_map = td->color_index_map[1]; @@ -3266,22 +3199,19 @@ vp10_zero_above_context(cm, tile_info.mi_col_start, tile_info.mi_col_end); for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end; - mi_row += MI_BLOCK_SIZE) { + mi_row += cm->mib_size) { int mi_col; vp10_zero_left_context(&td->xd); for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end; - mi_col += MI_BLOCK_SIZE) { + mi_col += cm->mib_size) { decode_partition(pbi, &td->xd, #if CONFIG_SUPERTX 0, #endif // CONFIG_SUPERTX mi_row, mi_col, &td->bit_reader, -#if CONFIG_ANS - &td->token_ans, -#endif // CONFIG_ANS - BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2); + cm->sb_size, b_width_log2_lookup[cm->sb_size]); } pbi->mb.corrupted |= td->xd.corrupted; if (pbi->mb.corrupted) @@ -3308,8 +3238,8 @@ // Loopfilter one tile row. if (cm->lf.filter_level && !cm->skip_loop_filter) { LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - const int lf_start = VPXMAX(0, tile_info.mi_row_start - MI_BLOCK_SIZE); - const int lf_end = tile_info.mi_row_end - MI_BLOCK_SIZE; + const int lf_start = VPXMAX(0, tile_info.mi_row_start - cm->mib_size); + const int lf_end = tile_info.mi_row_end - cm->mib_size; // Delay the loopfilter if the first tile row is only // a single superblock high. @@ -3333,7 +3263,7 @@ // After loopfiltering, the last 7 row pixels in each superblock row may // still be changed by the longest loopfilter of the next superblock row. if (cm->frame_parallel_decode) - vp10_frameworker_broadcast(pbi->cur_buf, mi_row << MI_BLOCK_SIZE_LOG2); + vp10_frameworker_broadcast(pbi->cur_buf, mi_row << cm->mib_size_log2); #endif // !CONFIG_VAR_TX } @@ -3369,6 +3299,7 @@ static int tile_worker_hook(TileWorkerData *const tile_data, const TileInfo *const tile) { VP10Decoder *const pbi = tile_data->pbi; + const VP10_COMMON *const cm = &pbi->common; int mi_row, mi_col; if (setjmp(tile_data->error_info.jmp)) { @@ -3383,20 +3314,17 @@ vp10_zero_above_context(&pbi->common, tile->mi_col_start, tile->mi_col_end); for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; - mi_row += MI_BLOCK_SIZE) { + mi_row += cm->mib_size) { vp10_zero_left_context(&tile_data->xd); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; - mi_col += MI_BLOCK_SIZE) { + mi_col += cm->mib_size) { decode_partition(pbi, &tile_data->xd, #if CONFIG_SUPERTX 0, #endif mi_row, mi_col, &tile_data->bit_reader, -#if CONFIG_ANS - &tile_data->token_ans, -#endif // CONFIG_ANS - BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2); + cm->sb_size, b_width_log2_lookup[cm->sb_size]); } } return !tile_data->xd.corrupted; @@ -3535,12 +3463,13 @@ vp10_zero(twd->dqcoeff); vp10_tile_init(tile_info, cm, tile_row, buf->col); vp10_tile_init(&twd->xd.tile, cm, tile_row, buf->col); +#if !CONFIG_ANS setup_bool_decoder(buf->data, data_end, buf->size, &cm->error, &twd->bit_reader, pbi->decrypt_cb, pbi->decrypt_state); -#if CONFIG_ANS +#else setup_token_decoder(buf->data, data_end, buf->size, &cm->error, - &twd->token_ans, pbi->decrypt_cb, + &twd->bit_reader, pbi->decrypt_cb, pbi->decrypt_state); #endif // CONFIG_ANS vp10_init_macroblockd(cm, &twd->xd, twd->dqcoeff); @@ -3848,6 +3777,12 @@ if (frame_is_intra_only(cm) || cm->error_resilient_mode) vp10_setup_past_independence(cm); +#if CONFIG_EXT_PARTITION + set_sb_size(cm, vpx_rb_read_bit(rb) ? BLOCK_128X128 : BLOCK_64X64); +#else + set_sb_size(cm, BLOCK_64X64); +#endif // CONFIG_EXT_PARTITION + setup_loopfilter(cm, rb); #if CONFIG_LOOP_RESTORATION setup_restoration(cm, rb); @@ -3887,7 +3822,7 @@ : read_tx_mode(rb); cm->reference_mode = read_frame_reference_mode(cm, rb); - setup_tile_info(pbi, rb); + read_tile_info(pbi, rb); sz = vpx_rb_read_literal(rb, 16); if (sz == 0) @@ -3898,11 +3833,11 @@ } #if CONFIG_EXT_TX -static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) { +static void read_ext_tx_probs(FRAME_CONTEXT *fc, vp10_reader *r) { int i, j, k; int s; for (s = 1; s < EXT_TX_SETS_INTER; ++s) { - if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + if (vp10_read(r, GROUP_DIFF_UPDATE_PROB)) { for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { if (!use_inter_ext_tx_for_txsize[s][i]) continue; for (j = 0; j < num_ext_tx_set_inter[s] - 1; ++j) @@ -3912,7 +3847,7 @@ } for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { - if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + if (vp10_read(r, GROUP_DIFF_UPDATE_PROB)) { for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { if (!use_intra_ext_tx_for_txsize[s][i]) continue; for (j = 0; j < INTRA_MODES; ++j) @@ -3925,16 +3860,16 @@ #else -static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) { +static void read_ext_tx_probs(FRAME_CONTEXT *fc, vp10_reader *r) { int i, j, k; - if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + if (vp10_read(r, GROUP_DIFF_UPDATE_PROB)) { for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { for (j = 0; j < TX_TYPES; ++j) for (k = 0; k < TX_TYPES - 1; ++k) vp10_diff_update_prob(r, &fc->intra_ext_tx_prob[i][j][k]); } } - if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + if (vp10_read(r, GROUP_DIFF_UPDATE_PROB)) { for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { for (k = 0; k < TX_TYPES - 1; ++k) vp10_diff_update_prob(r, &fc->inter_ext_tx_prob[i][k]); @@ -3944,9 +3879,9 @@ #endif // CONFIG_EXT_TX #if CONFIG_SUPERTX -static void read_supertx_probs(FRAME_CONTEXT *fc, vpx_reader *r) { +static void read_supertx_probs(FRAME_CONTEXT *fc, vp10_reader *r) { int i, j; - if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + if (vp10_read(r, GROUP_DIFF_UPDATE_PROB)) { for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { for (j = 1; j < TX_SIZES; ++j) { vp10_diff_update_prob(r, &fc->supertx_prob[i][j]); @@ -3963,13 +3898,19 @@ MACROBLOCKD *const xd = &pbi->mb; #endif FRAME_CONTEXT *const fc = cm->fc; - vpx_reader r; + vp10_reader r; int k, i, j; +#if !CONFIG_ANS if (vpx_reader_init(&r, data, partition_size, pbi->decrypt_cb, pbi->decrypt_state)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder 0"); +#else + if (ans_read_init(&r, data, partition_size)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate compressed header ANS decoder"); +#endif // !CONFIG_ANS if (cm->tx_mode == TX_MODE_SELECT) { for (i = 0; i < TX_SIZES - 1; ++i) @@ -4035,20 +3976,24 @@ #if CONFIG_EXT_INTER read_inter_compound_mode_probs(fc, &r); if (cm->reference_mode != COMPOUND_REFERENCE) { - for (i = 0; i < BLOCK_SIZES; i++) { - if (is_interintra_allowed_bsize(i)) { + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) { + if (is_interintra_allowed_bsize_group(i)) { vp10_diff_update_prob(&r, &fc->interintra_prob[i]); } } + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) { + for (j = 0; j < INTERINTRA_MODES - 1; j++) + vp10_diff_update_prob(&r, &fc->interintra_mode_prob[i][j]); + } for (i = 0; i < BLOCK_SIZES; i++) { - if (is_interintra_allowed_bsize(i) && get_wedge_bits(i)) { + if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i)) { vp10_diff_update_prob(&r, &fc->wedge_interintra_prob[i]); } } } if (cm->reference_mode != SINGLE_REFERENCE) { for (i = 0; i < BLOCK_SIZES; i++) { - if (get_wedge_bits(i)) { + if (is_interinter_wedge_used(i)) { vp10_diff_update_prob(&r, &fc->wedge_interinter_prob[i]); } } @@ -4087,7 +4032,7 @@ #endif } - return vpx_reader_has_error(&r); + return vp10_reader_has_error(&r); } #ifdef NDEBUG
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c index 8035e06..2c9cd58 100644 --- a/vp10/decoder/decodemv.c +++ b/vp10/decoder/decodemv.c
@@ -24,25 +24,25 @@ #include "vpx_dsp/vpx_dsp_common.h" -static INLINE int read_uniform(vpx_reader *r, int n) { +static INLINE int read_uniform(vp10_reader *r, int n) { int l = get_unsigned_bits(n); int m = (1 << l) - n; - int v = vpx_read_literal(r, l-1); + int v = vp10_read_literal(r, l-1); assert(l != 0); if (v < m) return v; else - return (v << 1) - m + vpx_read_literal(r, 1); + return (v << 1) - m + vp10_read_literal(r, 1); } -static PREDICTION_MODE read_intra_mode(vpx_reader *r, const vpx_prob *p) { - return (PREDICTION_MODE)vpx_read_tree(r, vp10_intra_mode_tree, p); +static PREDICTION_MODE read_intra_mode(vp10_reader *r, const vpx_prob *p) { + return (PREDICTION_MODE)vp10_read_tree(r, vp10_intra_mode_tree, p); } static PREDICTION_MODE read_intra_mode_y(VP10_COMMON *cm, MACROBLOCKD *xd, - vpx_reader *r, int size_group) { + vp10_reader *r, int size_group) { const PREDICTION_MODE y_mode = read_intra_mode(r, cm->fc->y_mode_prob[size_group]); FRAME_COUNTS *counts = xd->counts; @@ -52,7 +52,7 @@ } static PREDICTION_MODE read_intra_mode_uv(VP10_COMMON *cm, MACROBLOCKD *xd, - vpx_reader *r, + vp10_reader *r, PREDICTION_MODE y_mode) { const PREDICTION_MODE uv_mode = read_intra_mode(r, cm->fc->uv_mode_prob[y_mode]); @@ -62,17 +62,30 @@ return uv_mode; } +#if CONFIG_EXT_INTER +static INTERINTRA_MODE read_interintra_mode(VP10_COMMON *cm, MACROBLOCKD *xd, + vp10_reader *r, int size_group) { + const INTERINTRA_MODE ii_mode = + (INTERINTRA_MODE)vp10_read_tree(r, vp10_interintra_mode_tree, + cm->fc->interintra_mode_prob[size_group]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->interintra_mode[size_group][ii_mode]; + return ii_mode; +} +#endif // CONFIG_EXT_INTER + static PREDICTION_MODE read_inter_mode(VP10_COMMON *cm, MACROBLOCKD *xd, #if CONFIG_REF_MV && CONFIG_EXT_INTER MB_MODE_INFO *mbmi, #endif - vpx_reader *r, int16_t ctx) { + vp10_reader *r, int16_t ctx) { #if CONFIG_REF_MV FRAME_COUNTS *counts = xd->counts; int16_t mode_ctx = ctx & NEWMV_CTX_MASK; vpx_prob mode_prob = cm->fc->newmv_prob[mode_ctx]; - if (vpx_read(r, mode_prob) == 0) { + if (vp10_read(r, mode_prob) == 0) { if (counts) ++counts->newmv_mode[mode_ctx][0]; @@ -83,7 +96,7 @@ #if CONFIG_EXT_INTER } else { mode_prob = cm->fc->new2mv_prob; - if (vpx_read(r, mode_prob) == 0) { + if (vp10_read(r, mode_prob) == 0) { if (counts) ++counts->new2mv_mode[0]; return NEWMV; @@ -104,7 +117,7 @@ mode_ctx = (ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK; mode_prob = cm->fc->zeromv_prob[mode_ctx]; - if (vpx_read(r, mode_prob) == 0) { + if (vp10_read(r, mode_prob) == 0) { if (counts) ++counts->zeromv_mode[mode_ctx][0]; return ZEROMV; @@ -123,7 +136,7 @@ mode_prob = cm->fc->refmv_prob[mode_ctx]; - if (vpx_read(r, mode_prob) == 0) { + if (vp10_read(r, mode_prob) == 0) { if (counts) ++counts->refmv_mode[mode_ctx][0]; @@ -137,7 +150,7 @@ // Invalid prediction mode. assert(0); #else - const int mode = vpx_read_tree(r, vp10_inter_mode_tree, + const int mode = vp10_read_tree(r, vp10_inter_mode_tree, cm->fc->inter_mode_probs[ctx]); FRAME_COUNTS *counts = xd->counts; if (counts) @@ -151,7 +164,7 @@ static void read_drl_idx(const VP10_COMMON *cm, MACROBLOCKD *xd, MB_MODE_INFO *mbmi, - vpx_reader *r) { + vp10_reader *r) { uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame); mbmi->ref_mv_idx = 0; @@ -161,7 +174,7 @@ if (xd->ref_mv_count[ref_frame_type] > idx + 1) { uint8_t drl_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], idx); vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx]; - if (!vpx_read(r, drl_prob)) { + if (!vp10_read(r, drl_prob)) { mbmi->ref_mv_idx = idx; if (xd->counts) ++xd->counts->drl_mode[drl_ctx][0]; @@ -183,7 +196,7 @@ if (xd->ref_mv_count[ref_frame_type] > idx + 1) { uint8_t drl_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], idx); vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx]; - if (!vpx_read(r, drl_prob)) { + if (!vp10_read(r, drl_prob)) { mbmi->ref_mv_idx = idx - 1; if (xd->counts) ++xd->counts->drl_mode[drl_ctx][0]; @@ -201,8 +214,8 @@ #if CONFIG_EXT_INTER static PREDICTION_MODE read_inter_compound_mode(VP10_COMMON *cm, MACROBLOCKD *xd, - vpx_reader *r, int16_t ctx) { - const int mode = vpx_read_tree(r, vp10_inter_compound_mode_tree, + vp10_reader *r, int16_t ctx) { + const int mode = vp10_read_tree(r, vp10_inter_compound_mode_tree, cm->fc->inter_compound_mode_probs[ctx]); FRAME_COUNTS *counts = xd->counts; @@ -214,16 +227,16 @@ } #endif // CONFIG_EXT_INTER -static int read_segment_id(vpx_reader *r, +static int read_segment_id(vp10_reader *r, const struct segmentation_probs *segp) { - return vpx_read_tree(r, vp10_segment_tree, segp->tree_probs); + return vp10_read_tree(r, vp10_segment_tree, segp->tree_probs); } #if CONFIG_VAR_TX static void read_tx_size_inter(VP10_COMMON *cm, MACROBLOCKD *xd, MB_MODE_INFO *mbmi, FRAME_COUNTS *counts, TX_SIZE tx_size, int blk_row, int blk_col, - vpx_reader *r) { + vp10_reader *r) { int is_split = 0; const int tx_row = blk_row >> 1; const int tx_col = blk_col >> 1; @@ -232,8 +245,8 @@ int ctx = txfm_partition_context(xd->above_txfm_context + tx_col, xd->left_txfm_context + tx_row, tx_size); - TX_SIZE (*const inter_tx_size)[MI_BLOCK_SIZE] = - (TX_SIZE (*)[MI_BLOCK_SIZE])&mbmi->inter_tx_size[tx_row][tx_col]; + TX_SIZE (*const inter_tx_size)[MAX_MIB_SIZE] = + (TX_SIZE (*)[MAX_MIB_SIZE])&mbmi->inter_tx_size[tx_row][tx_col]; if (xd->mb_to_bottom_edge < 0) max_blocks_high += xd->mb_to_bottom_edge >> 5; @@ -243,7 +256,7 @@ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; - is_split = vpx_read(r, cm->fc->txfm_partition_prob[ctx]); + is_split = vp10_read(r, cm->fc->txfm_partition_prob[ctx]); if (is_split) { BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; @@ -285,11 +298,11 @@ #endif static TX_SIZE read_selected_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd, - TX_SIZE max_tx_size, vpx_reader *r) { + TX_SIZE max_tx_size, vp10_reader *r) { FRAME_COUNTS *counts = xd->counts; const int ctx = get_tx_size_context(xd); const int tx_size_cat = max_tx_size - TX_8X8; - int tx_size = vpx_read_tree(r, vp10_tx_size_tree[tx_size_cat], + int tx_size = vp10_read_tree(r, vp10_tx_size_tree[tx_size_cat], cm->fc->tx_size_probs[tx_size_cat][ctx]); if (counts) ++counts->tx_size[tx_size_cat][ctx][tx_size]; @@ -297,7 +310,7 @@ } static TX_SIZE read_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd, - int allow_select, vpx_reader *r) { + int allow_select, vp10_reader *r) { TX_MODE tx_mode = cm->tx_mode; BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; @@ -335,7 +348,7 @@ static int read_intra_segment_id(VP10_COMMON *const cm, MACROBLOCKD *const xd, int mi_offset, int x_mis, int y_mis, - vpx_reader *r) { + vp10_reader *r) { struct segmentation *const seg = &cm->seg; FRAME_COUNTS *counts = xd->counts; struct segmentation_probs *const segp = &cm->fc->seg; @@ -366,7 +379,7 @@ } static int read_inter_segment_id(VP10_COMMON *const cm, MACROBLOCKD *const xd, - int mi_row, int mi_col, vpx_reader *r) { + int mi_row, int mi_col, vp10_reader *r) { struct segmentation *const seg = &cm->seg; FRAME_COUNTS *counts = xd->counts; struct segmentation_probs *const segp = &cm->fc->seg; @@ -396,7 +409,7 @@ if (seg->temporal_update) { const int ctx = vp10_get_pred_context_seg_id(xd); const vpx_prob pred_prob = segp->pred_probs[ctx]; - mbmi->seg_id_predicted = vpx_read(r, pred_prob); + mbmi->seg_id_predicted = vp10_read(r, pred_prob); if (counts) ++counts->seg.pred[ctx][mbmi->seg_id_predicted]; if (mbmi->seg_id_predicted) { @@ -416,12 +429,12 @@ } static int read_skip(VP10_COMMON *cm, const MACROBLOCKD *xd, - int segment_id, vpx_reader *r) { + int segment_id, vp10_reader *r) { if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { return 1; } else { const int ctx = vp10_get_skip_context(xd); - const int skip = vpx_read(r, cm->fc->skip_probs[ctx]); + const int skip = vp10_read(r, cm->fc->skip_probs[ctx]); FRAME_COUNTS *counts = xd->counts; if (counts) ++counts->skip[ctx][skip]; @@ -431,7 +444,7 @@ static void read_palette_mode_info(VP10_COMMON *const cm, MACROBLOCKD *const xd, - vpx_reader *r) { + vp10_reader *r) { MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; const MODE_INFO *const above_mi = xd->above_mi; @@ -445,14 +458,14 @@ palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0); if (left_mi) palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0); - if (vpx_read(r, vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8] + if (vp10_read(r, vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8] [palette_ctx])) { pmi->palette_size[0] = - vpx_read_tree(r, vp10_palette_size_tree, + vp10_read_tree(r, vp10_palette_size_tree, vp10_default_palette_y_size_prob[bsize - BLOCK_8X8]) + 2; n = pmi->palette_size[0]; for (i = 0; i < n; ++i) - pmi->palette_colors[i] = vpx_read_literal(r, cm->bit_depth); + pmi->palette_colors[i] = vp10_read_literal(r, cm->bit_depth); xd->plane[0].color_index_map[0] = read_uniform(r, n); assert(xd->plane[0].color_index_map[0] < n); @@ -460,18 +473,18 @@ } if (mbmi->uv_mode == DC_PRED) { - if (vpx_read(r, + if (vp10_read(r, vp10_default_palette_uv_mode_prob[pmi->palette_size[0] > 0])) { pmi->palette_size[1] = - vpx_read_tree(r, vp10_palette_size_tree, + vp10_read_tree(r, vp10_palette_size_tree, vp10_default_palette_uv_size_prob[bsize - BLOCK_8X8]) + 2; n = pmi->palette_size[1]; for (i = 0; i < n; ++i) { pmi->palette_colors[PALETTE_MAX_SIZE + i] = - vpx_read_literal(r, cm->bit_depth); + vp10_read_literal(r, cm->bit_depth); pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] = - vpx_read_literal(r, cm->bit_depth); + vp10_read_literal(r, cm->bit_depth); } xd->plane[1].color_index_map[0] = read_uniform(r, n); assert(xd->plane[1].color_index_map[0] < n); @@ -481,7 +494,7 @@ #if CONFIG_EXT_INTRA static void read_ext_intra_mode_info(VP10_COMMON *const cm, - MACROBLOCKD *const xd, vpx_reader *r) { + MACROBLOCKD *const xd, vp10_reader *r) { MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; FRAME_COUNTS *counts = xd->counts; @@ -492,7 +505,7 @@ if (mbmi->mode == DC_PRED && mbmi->palette_mode_info.palette_size[0] == 0) { mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = - vpx_read(r, cm->fc->ext_intra_probs[0]); + vp10_read(r, cm->fc->ext_intra_probs[0]); if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) { mbmi->ext_intra_mode_info.ext_intra_mode[0] = read_uniform(r, FILTER_INTRA_MODES); @@ -503,7 +516,7 @@ if (mbmi->uv_mode == DC_PRED && mbmi->palette_mode_info.palette_size[1] == 0) { mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = - vpx_read(r, cm->fc->ext_intra_probs[1]); + vp10_read(r, cm->fc->ext_intra_probs[1]); if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) { mbmi->ext_intra_mode_info.ext_intra_mode[1] = read_uniform(r, FILTER_INTRA_MODES); @@ -516,7 +529,7 @@ static void read_intra_frame_mode_info(VP10_COMMON *const cm, MACROBLOCKD *const xd, - int mi_row, int mi_col, vpx_reader *r) { + int mi_row, int mi_col, vp10_reader *r) { MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; const MODE_INFO *above_mi = xd->above_mi; @@ -569,7 +582,7 @@ mbmi->angle_delta[0] * ANGLE_STEP; if (pick_intra_filter(p_angle)) { FRAME_COUNTS *counts = xd->counts; - mbmi->intra_filter = vpx_read_tree(r, vp10_intra_filter_tree, + mbmi->intra_filter = vp10_read_tree(r, vp10_intra_filter_tree, cm->fc->intra_filter_probs[ctx]); if (counts) ++counts->intra_filter[ctx][mbmi->intra_filter]; @@ -602,7 +615,7 @@ FRAME_COUNTS *counts = xd->counts; int eset = get_ext_tx_set(mbmi->tx_size, mbmi->sb_type, 0); if (eset > 0) { - mbmi->tx_type = vpx_read_tree( + mbmi->tx_type = vp10_read_tree( r, vp10_ext_tx_intra_tree[eset], cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode]); if (counts) @@ -618,7 +631,7 @@ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { FRAME_COUNTS *counts = xd->counts; TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode]; - mbmi->tx_type = vpx_read_tree( + mbmi->tx_type = vp10_read_tree( r, vp10_ext_tx_tree, cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom]); if (counts) @@ -637,16 +650,16 @@ #endif // CONFIG_EXT_INTRA } -static int read_mv_component(vpx_reader *r, +static int read_mv_component(vp10_reader *r, const nmv_component *mvcomp, int usehp) { int mag, d, fr, hp; - const int sign = vpx_read(r, mvcomp->sign); - const int mv_class = vpx_read_tree(r, vp10_mv_class_tree, mvcomp->classes); + const int sign = vp10_read(r, mvcomp->sign); + const int mv_class = vp10_read_tree(r, vp10_mv_class_tree, mvcomp->classes); const int class0 = mv_class == MV_CLASS_0; // Integer part if (class0) { - d = vpx_read_tree(r, vp10_mv_class0_tree, mvcomp->class0); + d = vp10_read_tree(r, vp10_mv_class0_tree, mvcomp->class0); mag = 0; } else { int i; @@ -654,16 +667,16 @@ d = 0; for (i = 0; i < n; ++i) - d |= vpx_read(r, mvcomp->bits[i]) << i; + d |= vp10_read(r, mvcomp->bits[i]) << i; mag = CLASS0_SIZE << (mv_class + 2); } // Fractional part - fr = vpx_read_tree(r, vp10_mv_fp_tree, class0 ? mvcomp->class0_fp[d] + fr = vp10_read_tree(r, vp10_mv_fp_tree, class0 ? mvcomp->class0_fp[d] : mvcomp->fp); // High precision part (if hp is not used, the default value of the hp is 1) - hp = usehp ? vpx_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp) + hp = usehp ? vp10_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp) : 1; // Result @@ -671,11 +684,11 @@ return sign ? -mag : mag; } -static INLINE void read_mv(vpx_reader *r, MV *mv, const MV *ref, +static INLINE void read_mv(vp10_reader *r, MV *mv, const MV *ref, const nmv_context *ctx, nmv_context_counts *counts, int allow_hp) { const MV_JOINT_TYPE joint_type = - (MV_JOINT_TYPE)vpx_read_tree(r, vp10_mv_joint_tree, ctx->joints); + (MV_JOINT_TYPE)vp10_read_tree(r, vp10_mv_joint_tree, ctx->joints); const int use_hp = allow_hp && vp10_use_mv_hp(ref); MV diff = {0, 0}; @@ -693,11 +706,11 @@ static REFERENCE_MODE read_block_reference_mode(VP10_COMMON *cm, const MACROBLOCKD *xd, - vpx_reader *r) { + vp10_reader *r) { if (cm->reference_mode == REFERENCE_MODE_SELECT) { const int ctx = vp10_get_reference_mode_context(cm, xd); const REFERENCE_MODE mode = - (REFERENCE_MODE)vpx_read(r, cm->fc->comp_inter_prob[ctx]); + (REFERENCE_MODE)vp10_read(r, cm->fc->comp_inter_prob[ctx]); FRAME_COUNTS *counts = xd->counts; if (counts) ++counts->comp_inter[ctx][mode]; @@ -709,7 +722,7 @@ // Read the referncence frame static void read_ref_frames(VP10_COMMON *const cm, MACROBLOCKD *const xd, - vpx_reader *r, + vp10_reader *r, int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { FRAME_CONTEXT *const fc = cm->fc; FRAME_COUNTS *counts = xd->counts; @@ -724,7 +737,7 @@ if (mode == COMPOUND_REFERENCE) { const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; const int ctx = vp10_get_pred_context_comp_ref_p(cm, xd); - const int bit = vpx_read(r, fc->comp_ref_prob[ctx][0]); + const int bit = vp10_read(r, fc->comp_ref_prob[ctx][0]); if (counts) ++counts->comp_ref[ctx][0][bit]; ref_frame[idx] = cm->comp_fixed_ref; @@ -732,18 +745,18 @@ #if CONFIG_EXT_REFS if (!bit) { const int ctx1 = vp10_get_pred_context_comp_ref_p1(cm, xd); - const int bit1 = vpx_read(r, fc->comp_ref_prob[ctx1][1]); + const int bit1 = vp10_read(r, fc->comp_ref_prob[ctx1][1]); if (counts) ++counts->comp_ref[ctx1][1][bit1]; ref_frame[!idx] = cm->comp_var_ref[bit1 ? 0 : 1]; } else { const int ctx2 = vp10_get_pred_context_comp_ref_p2(cm, xd); - const int bit2 = vpx_read(r, fc->comp_ref_prob[ctx2][2]); + const int bit2 = vp10_read(r, fc->comp_ref_prob[ctx2][2]); if (counts) ++counts->comp_ref[ctx2][2][bit2]; if (!bit2) { const int ctx3 = vp10_get_pred_context_comp_ref_p3(cm, xd); - const int bit3 = vpx_read(r, fc->comp_ref_prob[ctx3][3]); + const int bit3 = vp10_read(r, fc->comp_ref_prob[ctx3][3]); if (counts) ++counts->comp_ref[ctx3][3][bit3]; ref_frame[!idx] = cm->comp_var_ref[bit3 ? 2 : 3]; @@ -757,29 +770,29 @@ } else if (mode == SINGLE_REFERENCE) { #if CONFIG_EXT_REFS const int ctx0 = vp10_get_pred_context_single_ref_p1(xd); - const int bit0 = vpx_read(r, fc->single_ref_prob[ctx0][0]); + const int bit0 = vp10_read(r, fc->single_ref_prob[ctx0][0]); if (counts) ++counts->single_ref[ctx0][0][bit0]; if (bit0) { const int ctx1 = vp10_get_pred_context_single_ref_p2(xd); - const int bit1 = vpx_read(r, fc->single_ref_prob[ctx1][1]); + const int bit1 = vp10_read(r, fc->single_ref_prob[ctx1][1]); if (counts) ++counts->single_ref[ctx1][1][bit1]; ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; } else { const int ctx2 = vp10_get_pred_context_single_ref_p3(xd); - const int bit2 = vpx_read(r, fc->single_ref_prob[ctx2][2]); + const int bit2 = vp10_read(r, fc->single_ref_prob[ctx2][2]); if (counts) ++counts->single_ref[ctx2][2][bit2]; if (bit2) { const int ctx4 = vp10_get_pred_context_single_ref_p5(xd); - const int bit4 = vpx_read(r, fc->single_ref_prob[ctx4][4]); + const int bit4 = vp10_read(r, fc->single_ref_prob[ctx4][4]); if (counts) ++counts->single_ref[ctx4][4][bit4]; ref_frame[0] = bit4 ? LAST4_FRAME : LAST3_FRAME; } else { const int ctx3 = vp10_get_pred_context_single_ref_p4(xd); - const int bit3 = vpx_read(r, fc->single_ref_prob[ctx3][3]); + const int bit3 = vp10_read(r, fc->single_ref_prob[ctx3][3]); if (counts) ++counts->single_ref[ctx3][3][bit3]; ref_frame[0] = bit3 ? LAST2_FRAME : LAST_FRAME; @@ -787,12 +800,12 @@ } #else const int ctx0 = vp10_get_pred_context_single_ref_p1(xd); - const int bit0 = vpx_read(r, fc->single_ref_prob[ctx0][0]); + const int bit0 = vp10_read(r, fc->single_ref_prob[ctx0][0]); if (counts) ++counts->single_ref[ctx0][0][bit0]; if (bit0) { const int ctx1 = vp10_get_pred_context_single_ref_p2(xd); - const int bit1 = vpx_read(r, fc->single_ref_prob[ctx1][1]); + const int bit1 = vp10_read(r, fc->single_ref_prob[ctx1][1]); if (counts) ++counts->single_ref[ctx1][1][bit1]; ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; @@ -811,13 +824,13 @@ #if CONFIG_OBMC static int read_is_obmc_block(VP10_COMMON *const cm, MACROBLOCKD *const xd, - vpx_reader *r) { + vp10_reader *r) { BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; FRAME_COUNTS *counts = xd->counts; int is_obmc; if (is_obmc_allowed(&xd->mi[0]->mbmi)) { - is_obmc = vpx_read(r, cm->fc->obmc_prob[bsize]); + is_obmc = vp10_read(r, cm->fc->obmc_prob[bsize]); if (counts) ++counts->obmc[bsize][is_obmc]; return is_obmc; @@ -829,14 +842,14 @@ static INLINE INTERP_FILTER read_switchable_interp_filter( VP10_COMMON *const cm, MACROBLOCKD *const xd, - vpx_reader *r) { + vp10_reader *r) { const int ctx = vp10_get_pred_context_switchable_interp(xd); FRAME_COUNTS *counts = xd->counts; INTERP_FILTER type; #if CONFIG_EXT_INTERP if (!vp10_is_interp_needed(xd)) return EIGHTTAP_REGULAR; #endif - type = (INTERP_FILTER)vpx_read_tree(r, vp10_switchable_interp_tree, + type = (INTERP_FILTER)vp10_read_tree(r, vp10_switchable_interp_tree, cm->fc->switchable_interp_prob[ctx]); if (counts) ++counts->switchable_interp[ctx][type]; @@ -845,7 +858,7 @@ static void read_intra_block_mode_info(VP10_COMMON *const cm, MACROBLOCKD *const xd, MODE_INFO *mi, - vpx_reader *r) { + vp10_reader *r) { MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mi->mbmi.sb_type; int i; @@ -884,7 +897,7 @@ if (pick_intra_filter(p_angle)) { FRAME_COUNTS *counts = xd->counts; const int ctx = vp10_get_pred_context_intra_interp(xd); - mbmi->intra_filter = vpx_read_tree(r, vp10_intra_filter_tree, + mbmi->intra_filter = vp10_read_tree(r, vp10_intra_filter_tree, cm->fc->intra_filter_probs[ctx]); if (counts) ++counts->intra_filter[ctx][mbmi->intra_filter]; @@ -926,7 +939,7 @@ #endif int_mv mv[2], int_mv ref_mv[2], int_mv nearest_mv[2], int_mv near_mv[2], - int is_compound, int allow_hp, vpx_reader *r) { + int is_compound, int allow_hp, vp10_reader *r) { int i; int ret = 1; #if CONFIG_REF_MV @@ -1141,12 +1154,12 @@ } static int read_is_inter_block(VP10_COMMON *const cm, MACROBLOCKD *const xd, - int segment_id, vpx_reader *r) { + int segment_id, vp10_reader *r) { if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME; } else { const int ctx = vp10_get_intra_inter_context(xd); - const int is_inter = vpx_read(r, cm->fc->intra_inter_prob[ctx]); + const int is_inter = vp10_read(r, cm->fc->intra_inter_prob[ctx]); FRAME_COUNTS *counts = xd->counts; if (counts) ++counts->intra_inter[ctx][is_inter]; @@ -1157,17 +1170,17 @@ static void fpm_sync(void *const data, int mi_row) { VP10Decoder *const pbi = (VP10Decoder *)data; vp10_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame, - mi_row << MI_BLOCK_SIZE_LOG2); + mi_row << pbi->common.mib_size_log2); } static void read_inter_block_mode_info(VP10Decoder *const pbi, MACROBLOCKD *const xd, MODE_INFO *const mi, #if (CONFIG_OBMC || CONFIG_EXT_INTER) && CONFIG_SUPERTX - int mi_row, int mi_col, vpx_reader *r, + int mi_row, int mi_col, vp10_reader *r, int supertx_enabled) { #else - int mi_row, int mi_col, vpx_reader *r) { + int mi_row, int mi_col, vp10_reader *r) { #endif // CONFIG_OBMC && CONFIG_SUPERTX VP10_COMMON *const cm = &pbi->common; MB_MODE_INFO *const mbmi = &mi->mbmi; @@ -1513,13 +1526,14 @@ !supertx_enabled && #endif is_interintra_allowed(mbmi)) { - const int interintra = vpx_read(r, cm->fc->interintra_prob[bsize]); + const int bsize_group = size_group_lookup[bsize]; + const int interintra = vp10_read(r, cm->fc->interintra_prob[bsize_group]); if (xd->counts) - xd->counts->interintra[bsize][interintra]++; + xd->counts->interintra[bsize_group][interintra]++; assert(mbmi->ref_frame[1] == NONE); if (interintra) { - const PREDICTION_MODE interintra_mode = - read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]); + const INTERINTRA_MODE interintra_mode = + read_interintra_mode(cm, xd, r, bsize_group); mbmi->ref_frame[1] = INTRA_FRAME; mbmi->interintra_mode = interintra_mode; mbmi->interintra_uv_mode = interintra_mode; @@ -1530,15 +1544,15 @@ mbmi->angle_delta[1] = 0; mbmi->intra_filter = INTRA_FILTER_LINEAR; #endif // CONFIG_EXT_INTRA - if (get_wedge_bits(bsize)) { + if (is_interintra_wedge_used(bsize)) { mbmi->use_wedge_interintra = - vpx_read(r, cm->fc->wedge_interintra_prob[bsize]); + vp10_read(r, cm->fc->wedge_interintra_prob[bsize]); if (xd->counts) xd->counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++; if (mbmi->use_wedge_interintra) { mbmi->interintra_wedge_index = mbmi->interintra_uv_wedge_index = - vpx_read_literal(r, get_wedge_bits(bsize)); + vp10_read_literal(r, get_wedge_bits(bsize)); } } } @@ -1548,14 +1562,14 @@ #if CONFIG_OBMC !(is_obmc_allowed(mbmi) && mbmi->obmc) && #endif // CONFIG_OBMC - get_wedge_bits(bsize)) { + is_interinter_wedge_used(bsize)) { mbmi->use_wedge_interinter = - vpx_read(r, cm->fc->wedge_interinter_prob[bsize]); + vp10_read(r, cm->fc->wedge_interinter_prob[bsize]); if (xd->counts) xd->counts->wedge_interinter[bsize][mbmi->use_wedge_interinter]++; if (mbmi->use_wedge_interinter) { mbmi->interinter_wedge_index = - vpx_read_literal(r, get_wedge_bits(bsize)); + vp10_read_literal(r, get_wedge_bits(bsize)); } } #endif // CONFIG_EXT_INTER @@ -1572,7 +1586,7 @@ #if CONFIG_SUPERTX int supertx_enabled, #endif // CONFIG_SUPERTX - int mi_row, int mi_col, vpx_reader *r) { + int mi_row, int mi_col, vp10_reader *r) { VP10_COMMON *const cm = &pbi->common; MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; @@ -1592,7 +1606,8 @@ #if CONFIG_VAR_TX xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK); + xd->left_txfm_context = + xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && !mbmi->skip && inter_block) { const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; @@ -1668,14 +1683,14 @@ if (inter_block) { if (eset > 0) { mbmi->tx_type = - vpx_read_tree(r, vp10_ext_tx_inter_tree[eset], + vp10_read_tree(r, vp10_ext_tx_inter_tree[eset], cm->fc->inter_ext_tx_prob[eset][mbmi->tx_size]); if (counts) ++counts->inter_ext_tx[eset][mbmi->tx_size][mbmi->tx_type]; } } else if (ALLOW_INTRA_EXT_TX) { if (eset > 0) { - mbmi->tx_type = vpx_read_tree(r, vp10_ext_tx_intra_tree[eset], + mbmi->tx_type = vp10_read_tree(r, vp10_ext_tx_intra_tree[eset], cm->fc->intra_ext_tx_prob[eset] [mbmi->tx_size][mbmi->mode]); if (counts) @@ -1695,14 +1710,14 @@ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { FRAME_COUNTS *counts = xd->counts; if (inter_block) { - mbmi->tx_type = vpx_read_tree( + mbmi->tx_type = vp10_read_tree( r, vp10_ext_tx_tree, cm->fc->inter_ext_tx_prob[mbmi->tx_size]); if (counts) ++counts->inter_ext_tx[mbmi->tx_size][mbmi->tx_type]; } else { const TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode]; - mbmi->tx_type = vpx_read_tree( + mbmi->tx_type = vp10_read_tree( r, vp10_ext_tx_tree, cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom]); if (counts) @@ -1719,7 +1734,7 @@ #if CONFIG_SUPERTX int supertx_enabled, #endif // CONFIG_SUPERTX - int mi_row, int mi_col, vpx_reader *r, + int mi_row, int mi_col, vp10_reader *r, int x_mis, int y_mis) { VP10_COMMON *const cm = &pbi->common; MODE_INFO *const mi = xd->mi[0];
diff --git a/vp10/decoder/decodemv.h b/vp10/decoder/decodemv.h index 959a001..c10c6bf 100644 --- a/vp10/decoder/decodemv.h +++ b/vp10/decoder/decodemv.h
@@ -11,7 +11,7 @@ #ifndef VP10_DECODER_DECODEMV_H_ #define VP10_DECODER_DECODEMV_H_ -#include "vpx_dsp/bitreader.h" +#include "vp10/decoder/bitreader.h" #include "vp10/decoder/decoder.h" @@ -24,7 +24,7 @@ int supertx_enabled, #endif - int mi_row, int mi_col, vpx_reader *r, + int mi_row, int mi_col, vp10_reader *r, int x_mis, int y_mis); #ifdef __cplusplus
diff --git a/vp10/decoder/decoder.h b/vp10/decoder/decoder.h index 23c7424..c349252 100644 --- a/vp10/decoder/decoder.h +++ b/vp10/decoder/decoder.h
@@ -14,13 +14,10 @@ #include "./vpx_config.h" #include "vpx/vpx_codec.h" -#include "vpx_dsp/bitreader.h" +#include "vp10/decoder/bitreader.h" #include "vpx_scale/yv12config.h" #include "vpx_util/vpx_thread.h" -#if CONFIG_ANS -#include "vp10/common/ans.h" -#endif #include "vp10/common/thread_common.h" #include "vp10/common/onyxc_int.h" #include "vp10/common/ppflags.h" @@ -33,10 +30,7 @@ // TODO(hkuang): combine this with TileWorkerData. typedef struct TileData { VP10_COMMON *cm; - vpx_reader bit_reader; -#if CONFIG_ANS - struct AnsDecoder token_ans; -#endif // CONFIG_ANS + vp10_reader bit_reader; DECLARE_ALIGNED(16, MACROBLOCKD, xd); /* dqcoeff are shared by all the planes. So planes must be decoded serially */ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]); @@ -45,10 +39,7 @@ typedef struct TileWorkerData { struct VP10Decoder *pbi; - vpx_reader bit_reader; -#if CONFIG_ANS - struct AnsDecoder token_ans; -#endif // CONFIG_ANS + vp10_reader bit_reader; FRAME_COUNTS counts; DECLARE_ALIGNED(16, MACROBLOCKD, xd); /* dqcoeff are shared by all the planes. So planes must be decoded serially */
diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c index 343c7c8..def3d7a 100644 --- a/vp10/decoder/detokenize.c +++ b/vp10/decoder/detokenize.c
@@ -38,10 +38,10 @@ } while (0) #if !CONFIG_ANS -static INLINE int read_coeff(const vpx_prob *probs, int n, vpx_reader *r) { +static INLINE int read_coeff(const vpx_prob *probs, int n, vp10_reader *r) { int i, val = 0; for (i = 0; i < n; ++i) - val = (val << 1) | vpx_read(r, probs[i]); + val = (val << 1) | vp10_read(r, probs[i]); return val; } @@ -50,7 +50,7 @@ tran_low_t *dqcoeff, TX_SIZE tx_size, TX_TYPE tx_type, const int16_t *dq, int ctx, const int16_t *scan, const int16_t *nb, - vpx_reader *r) { + vp10_reader *r) { FRAME_COUNTS *counts = xd->counts; const int max_eob = 16 << (tx_size << 1); const FRAME_CONTEXT *const fc = xd->fc; @@ -120,12 +120,12 @@ prob = coef_probs[band][ctx]; if (counts) ++eob_branch_count[band][ctx]; - if (!vpx_read(r, prob[EOB_CONTEXT_NODE])) { + if (!vp10_read(r, prob[EOB_CONTEXT_NODE])) { INCREMENT_COUNT(EOB_MODEL_TOKEN); break; } - while (!vpx_read(r, prob[ZERO_CONTEXT_NODE])) { + while (!vp10_read(r, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); dqv = dq[1]; token_cache[scan[c]] = 0; @@ -137,13 +137,13 @@ prob = coef_probs[band][ctx]; } - if (!vpx_read(r, prob[ONE_CONTEXT_NODE])) { + if (!vp10_read(r, prob[ONE_CONTEXT_NODE])) { INCREMENT_COUNT(ONE_TOKEN); token = ONE_TOKEN; val = 1; } else { INCREMENT_COUNT(TWO_TOKEN); - token = vpx_read_tree(r, vp10_coef_con_tree, + token = vp10_read_tree(r, vp10_coef_con_tree, vp10_pareto8_full[prob[PIVOT_NODE] - 1]); switch (token) { case TWO_TOKEN: @@ -194,13 +194,13 @@ v = (val * dqv) >> dq_shift; #if CONFIG_COEFFICIENT_RANGE_CHECKING #if CONFIG_VP9_HIGHBITDEPTH - dqcoeff[scan[c]] = highbd_check_range((vpx_read_bit(r) ? -v : v), + dqcoeff[scan[c]] = highbd_check_range((vp10_read_bit(r) ? -v : v), xd->bd); #else - dqcoeff[scan[c]] = check_range(vpx_read_bit(r) ? -v : v); + dqcoeff[scan[c]] = check_range(vp10_read_bit(r) ? -v : v); #endif // CONFIG_VP9_HIGHBITDEPTH #else - dqcoeff[scan[c]] = vpx_read_bit(r) ? -v : v; + dqcoeff[scan[c]] = vp10_read_bit(r) ? -v : v; #endif // CONFIG_COEFFICIENT_RANGE_CHECKING token_cache[scan[c]] = vp10_pt_energy_class[token]; ++c; @@ -429,7 +429,7 @@ } void vp10_decode_palette_tokens(MACROBLOCKD *const xd, int plane, - vpx_reader *r) { + vp10_reader *r) { MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; @@ -449,7 +449,7 @@ for (j = (i == 0 ? 1 : 0); j < cols; ++j) { color_ctx = vp10_get_palette_color_context(color_map, cols, i, j, n, color_order); - color_idx = vpx_read_tree(r, vp10_palette_color_tree[n - 2], + color_idx = vp10_read_tree(r, vp10_palette_color_tree[n - 2], prob[n - 2][color_ctx]); assert(color_idx >= 0 && color_idx < n); color_map[i * cols + j] = color_order[color_idx]; @@ -468,7 +468,7 @@ #if CONFIG_ANS struct AnsDecoder *const r, #else - vpx_reader *r, + vp10_reader *r, #endif // CONFIG_ANS int seg_id) { struct macroblockd_plane *const pd = &xd->plane[plane];
diff --git a/vp10/decoder/detokenize.h b/vp10/decoder/detokenize.h index 1c9e22e..a19d90f 100644 --- a/vp10/decoder/detokenize.h +++ b/vp10/decoder/detokenize.h
@@ -21,7 +21,7 @@ #endif void vp10_decode_palette_tokens(MACROBLOCKD *const xd, int plane, - vpx_reader *r); + vp10_reader *r); int vp10_decode_block_tokens(MACROBLOCKD *const xd, #if CONFIG_ANS const rans_dec_lut *const token_tab, @@ -33,7 +33,7 @@ #if CONFIG_ANS struct AnsDecoder *const r, #else - vpx_reader *r, + vp10_reader *r, #endif // CONFIG_ANS int seg_id);
diff --git a/vp10/decoder/dsubexp.c b/vp10/decoder/dsubexp.c index 7d2872e..4d53e12 100644 --- a/vp10/decoder/dsubexp.c +++ b/vp10/decoder/dsubexp.c
@@ -21,11 +21,11 @@ return (v & 1) ? m - ((v + 1) >> 1) : m + (v >> 1); } -static int decode_uniform(vpx_reader *r) { +static int decode_uniform(vp10_reader *r) { const int l = 8; const int m = (1 << l) - 190; - const int v = vpx_read_literal(r, l - 1); - return v < m ? v : (v << 1) - m + vpx_read_bit(r); + const int v = vp10_read_literal(r, l - 1); + return v < m ? v : (v << 1) - m + vp10_read_bit(r); } static int inv_remap_prob(int v, int m) { @@ -58,18 +58,18 @@ } } -static int decode_term_subexp(vpx_reader *r) { - if (!vpx_read_bit(r)) - return vpx_read_literal(r, 4); - if (!vpx_read_bit(r)) - return vpx_read_literal(r, 4) + 16; - if (!vpx_read_bit(r)) - return vpx_read_literal(r, 5) + 32; +static int decode_term_subexp(vp10_reader *r) { + if (!vp10_read_bit(r)) + return vp10_read_literal(r, 4); + if (!vp10_read_bit(r)) + return vp10_read_literal(r, 4) + 16; + if (!vp10_read_bit(r)) + return vp10_read_literal(r, 5) + 32; return decode_uniform(r) + 64; } -void vp10_diff_update_prob(vpx_reader *r, vpx_prob* p) { - if (vpx_read(r, DIFF_UPDATE_PROB)) { +void vp10_diff_update_prob(vp10_reader *r, vpx_prob* p) { + if (vp10_read(r, DIFF_UPDATE_PROB)) { const int delp = decode_term_subexp(r); *p = (vpx_prob)inv_remap_prob(delp, *p); }
diff --git a/vp10/decoder/dsubexp.h b/vp10/decoder/dsubexp.h index 1a7ed99..c05ec6e 100644 --- a/vp10/decoder/dsubexp.h +++ b/vp10/decoder/dsubexp.h
@@ -12,13 +12,13 @@ #ifndef VP10_DECODER_DSUBEXP_H_ #define VP10_DECODER_DSUBEXP_H_ -#include "vpx_dsp/bitreader.h" +#include "vp10/decoder/bitreader.h" #ifdef __cplusplus extern "C" { #endif -void vp10_diff_update_prob(vpx_reader *r, vpx_prob* p); +void vp10_diff_update_prob(vp10_reader *r, vpx_prob* p); #ifdef __cplusplus } // extern "C"
diff --git a/vp10/encoder/aq_complexity.c b/vp10/encoder/aq_complexity.c index 9f73ecc..a4c38d1 100644 --- a/vp10/encoder/aq_complexity.c +++ b/vp10/encoder/aq_complexity.c
@@ -116,8 +116,6 @@ VP10_COMMON *const cm = &cpi->common; const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = num_8x8_blocks_wide_lookup[BLOCK_LARGEST]; - const int bh = num_8x8_blocks_high_lookup[BLOCK_LARGEST]; const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]); const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]); int x, y; @@ -130,7 +128,7 @@ // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). // It is converted to bits * 256 units. const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / - (bw * bh); + (cm->mib_size * cm->mib_size); double logvar; double low_var_thresh; const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth);
diff --git a/vp10/encoder/aq_cyclicrefresh.c b/vp10/encoder/aq_cyclicrefresh.c index defb974..057c057 100644 --- a/vp10/encoder/aq_cyclicrefresh.c +++ b/vp10/encoder/aq_cyclicrefresh.c
@@ -388,8 +388,8 @@ int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame; int xmis, ymis, x, y; memset(seg_map, CR_SEGMENT_ID_BASE, cm->mi_rows * cm->mi_cols); - sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; - sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; + sb_cols = (cm->mi_cols + cm->mib_size - 1) / cm->mib_size; + sb_rows = (cm->mi_rows + cm->mib_size - 1) / cm->mib_size; sbs_in_frame = sb_cols * sb_rows; // Number of target blocks to get the q delta (segment 1). block_count = cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100; @@ -404,8 +404,8 @@ // Get the mi_row/mi_col corresponding to superblock index i. int sb_row_index = (i / sb_cols); int sb_col_index = i - sb_row_index * sb_cols; - int mi_row = sb_row_index * MI_BLOCK_SIZE; - int mi_col = sb_col_index * MI_BLOCK_SIZE; + int mi_row = sb_row_index * cm->mib_size; + int mi_col = sb_col_index * cm->mib_size; int qindex_thresh = cpi->oxcf.content == VP9E_CONTENT_SCREEN ? vp10_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex) @@ -413,11 +413,9 @@ assert(mi_row >= 0 && mi_row < cm->mi_rows); assert(mi_col >= 0 && mi_col < cm->mi_cols); bl_index = mi_row * cm->mi_cols + mi_col; - // Loop through all 8x8 blocks in superblock and update map. - xmis = - VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_LARGEST]); - ymis = - VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_LARGEST]); + // Loop through all MI blocks in superblock and update map. + xmis = VPXMIN(cm->mi_cols - mi_col, cm->mib_size); + ymis = VPXMIN(cm->mi_rows - mi_row, cm->mib_size); for (y = 0; y < ymis; y++) { for (x = 0; x < xmis; x++) { const int bl_index2 = bl_index + y * cm->mi_cols + x;
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index 721a7a6..12e7e71 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c
@@ -93,16 +93,16 @@ {{0, 1}, {2, 2}, {6, 3}, {7, 3}}, // Max tx_size is 32X32 }; -static INLINE void write_uniform(vpx_writer *w, int n, int v) { +static INLINE void write_uniform(vp10_writer *w, int n, int v) { int l = get_unsigned_bits(n); int m = (1 << l) - n; if (l == 0) return; if (v < m) { - vpx_write_literal(w, v, l - 1); + vp10_write_literal(w, v, l - 1); } else { - vpx_write_literal(w, m + ((v - m) >> 1), l - 1); - vpx_write_literal(w, (v - m) & 1, 1); + vp10_write_literal(w, m + ((v - m) >> 1), l - 1); + vp10_write_literal(w, (v - m) & 1, 1); } } @@ -115,6 +115,9 @@ #if CONFIG_EXT_INTRA static struct vp10_token intra_filter_encodings[INTRA_FILTERS]; #endif // CONFIG_EXT_INTRA +#if CONFIG_EXT_INTER +static struct vp10_token interintra_mode_encodings[INTERINTRA_MODES]; +#endif // CONFIG_EXT_INTER void vp10_encode_token_init() { #if CONFIG_EXT_TX @@ -131,15 +134,26 @@ #if CONFIG_EXT_INTRA vp10_tokens_from_tree(intra_filter_encodings, vp10_intra_filter_tree); #endif // CONFIG_EXT_INTRA +#if CONFIG_EXT_INTER + vp10_tokens_from_tree(interintra_mode_encodings, vp10_interintra_mode_tree); +#endif // CONFIG_EXT_INTER } -static void write_intra_mode(vpx_writer *w, PREDICTION_MODE mode, +static void write_intra_mode(vp10_writer *w, PREDICTION_MODE mode, const vpx_prob *probs) { vp10_write_token(w, vp10_intra_mode_tree, probs, &intra_mode_encodings[mode]); } +#if CONFIG_EXT_INTER +static void write_interintra_mode(vp10_writer *w, INTERINTRA_MODE mode, + const vpx_prob *probs) { + vp10_write_token(w, vp10_interintra_mode_tree, probs, + &interintra_mode_encodings[mode]); +} +#endif // CONFIG_EXT_INTER + static void write_inter_mode(VP10_COMMON *cm, - vpx_writer *w, PREDICTION_MODE mode, + vp10_writer *w, PREDICTION_MODE mode, #if CONFIG_REF_MV && CONFIG_EXT_INTER int is_compound, #endif // CONFIG_REF_MV && CONFIG_EXT_INTER @@ -148,14 +162,14 @@ const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK; const vpx_prob newmv_prob = cm->fc->newmv_prob[newmv_ctx]; #if CONFIG_EXT_INTER - vpx_write(w, mode != NEWMV && mode != NEWFROMNEARMV, newmv_prob); + vp10_write(w, mode != NEWMV && mode != NEWFROMNEARMV, newmv_prob); if (!is_compound && (mode == NEWMV || mode == NEWFROMNEARMV)) - vpx_write(w, mode == NEWFROMNEARMV, cm->fc->new2mv_prob); + vp10_write(w, mode == NEWFROMNEARMV, cm->fc->new2mv_prob); if (mode != NEWMV && mode != NEWFROMNEARMV) { #else - vpx_write(w, mode != NEWMV, newmv_prob); + vp10_write(w, mode != NEWMV, newmv_prob); if (mode != NEWMV) { #endif // CONFIG_EXT_INTER @@ -167,7 +181,7 @@ return; } - vpx_write(w, mode != ZEROMV, zeromv_prob); + vp10_write(w, mode != ZEROMV, zeromv_prob); if (mode != ZEROMV) { int16_t refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK; @@ -181,7 +195,7 @@ refmv_ctx = 8; refmv_prob = cm->fc->refmv_prob[refmv_ctx]; - vpx_write(w, mode != NEARESTMV, refmv_prob); + vp10_write(w, mode != NEARESTMV, refmv_prob); } } #else @@ -196,7 +210,7 @@ static void write_drl_idx(const VP10_COMMON *cm, const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext, - vpx_writer *w) { + vp10_writer *w) { uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame); assert(mbmi->ref_mv_idx < 3); @@ -209,7 +223,7 @@ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx]; - vpx_write(w, mbmi->ref_mv_idx != idx, drl_prob); + vp10_write(w, mbmi->ref_mv_idx != idx, drl_prob); if (mbmi->ref_mv_idx == idx) return; } @@ -226,7 +240,7 @@ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx]; - vpx_write(w, mbmi->ref_mv_idx != (idx - 1), drl_prob); + vp10_write(w, mbmi->ref_mv_idx != (idx - 1), drl_prob); if (mbmi->ref_mv_idx == (idx - 1)) return; } @@ -237,7 +251,7 @@ #endif #if CONFIG_EXT_INTER -static void write_inter_compound_mode(VP10_COMMON *cm, vpx_writer *w, +static void write_inter_compound_mode(VP10_COMMON *cm, vp10_writer *w, PREDICTION_MODE mode, const int16_t mode_ctx) { const vpx_prob *const inter_compound_probs = @@ -257,7 +271,7 @@ static void prob_diff_update(const vpx_tree_index *tree, vpx_prob probs[/*n - 1*/], const unsigned int counts[/*n - 1*/], - int n, vpx_writer *w) { + int n, vp10_writer *w) { int i; unsigned int branch_ct[32][2]; @@ -292,7 +306,7 @@ const MACROBLOCKD *xd, const MB_MODE_INFO *mbmi, TX_SIZE tx_size, int blk_row, int blk_col, - vpx_writer *w) { + vp10_writer *w) { const int tx_row = blk_row >> 1; const int tx_col = blk_col >> 1; int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; @@ -310,14 +324,14 @@ return; if (tx_size == mbmi->inter_tx_size[tx_row][tx_col]) { - vpx_write(w, 0, cm->fc->txfm_partition_prob[ctx]); + vp10_write(w, 0, cm->fc->txfm_partition_prob[ctx]); txfm_partition_update(xd->above_txfm_context + tx_col, xd->left_txfm_context + tx_row, tx_size); } else { const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; int bsl = b_width_log2_lookup[bsize]; int i; - vpx_write(w, 1, cm->fc->txfm_partition_prob[ctx]); + vp10_write(w, 1, cm->fc->txfm_partition_prob[ctx]); if (tx_size == TX_8X8) { txfm_partition_update(xd->above_txfm_context + tx_col, @@ -335,7 +349,7 @@ } } -static void update_txfm_partition_probs(VP10_COMMON *cm, vpx_writer *w, +static void update_txfm_partition_probs(VP10_COMMON *cm, vp10_writer *w, FRAME_COUNTS *counts) { int k; for (k = 0; k < TXFM_PARTITION_CONTEXTS; ++k) @@ -345,7 +359,7 @@ #endif static void write_selected_tx_size(const VP10_COMMON *cm, - const MACROBLOCKD *xd, vpx_writer *w) { + const MACROBLOCKD *xd, vp10_writer *w) { TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size; BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; @@ -358,7 +372,7 @@ } #if CONFIG_REF_MV -static void update_inter_mode_probs(VP10_COMMON *cm, vpx_writer *w, +static void update_inter_mode_probs(VP10_COMMON *cm, vp10_writer *w, FRAME_COUNTS *counts) { int i; for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) @@ -380,7 +394,7 @@ #endif #if CONFIG_EXT_INTER -static void update_inter_compound_mode_probs(VP10_COMMON *cm, vpx_writer *w) { +static void update_inter_compound_mode_probs(VP10_COMMON *cm, vp10_writer *w) { const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) - vp10_cost_zero(GROUP_DIFF_UPDATE_PROB); int i; @@ -393,7 +407,7 @@ INTER_COMPOUND_MODES); } do_update = savings > savings_thresh; - vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + vp10_write(w, do_update, GROUP_DIFF_UPDATE_PROB); if (do_update) { for (i = 0; i < INTER_MODE_CONTEXTS; ++i) { prob_diff_update(vp10_inter_compound_mode_tree, @@ -406,17 +420,17 @@ #endif // CONFIG_EXT_INTER static int write_skip(const VP10_COMMON *cm, const MACROBLOCKD *xd, - int segment_id, const MODE_INFO *mi, vpx_writer *w) { + int segment_id, const MODE_INFO *mi, vp10_writer *w) { if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { return 1; } else { const int skip = mi->mbmi.skip; - vpx_write(w, skip, vp10_get_skip_prob(cm, xd)); + vp10_write(w, skip, vp10_get_skip_prob(cm, xd)); return skip; } } -static void update_skip_probs(VP10_COMMON *cm, vpx_writer *w, +static void update_skip_probs(VP10_COMMON *cm, vp10_writer *w, FRAME_COUNTS *counts) { int k; @@ -424,7 +438,7 @@ vp10_cond_prob_diff_update(w, &cm->fc->skip_probs[k], counts->skip[k]); } -static void update_switchable_interp_probs(VP10_COMMON *cm, vpx_writer *w, +static void update_switchable_interp_probs(VP10_COMMON *cm, vp10_writer *w, FRAME_COUNTS *counts) { int j; for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) @@ -435,7 +449,7 @@ #if CONFIG_EXT_TX -static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) { +static void update_ext_tx_probs(VP10_COMMON *cm, vp10_writer *w) { const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) - vp10_cost_zero(GROUP_DIFF_UPDATE_PROB); int i, j; @@ -450,7 +464,7 @@ cm->counts.inter_ext_tx[s][i], num_ext_tx_set_inter[s]); } do_update = savings > savings_thresh; - vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + vp10_write(w, do_update, GROUP_DIFF_UPDATE_PROB); if (do_update) { for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { if (!use_inter_ext_tx_for_txsize[s][i]) continue; @@ -473,7 +487,7 @@ cm->counts.intra_ext_tx[s][i][j], num_ext_tx_set_intra[s]); } do_update = savings > savings_thresh; - vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + vp10_write(w, do_update, GROUP_DIFF_UPDATE_PROB); if (do_update) { for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { if (!use_intra_ext_tx_for_txsize[s][i]) continue; @@ -489,7 +503,7 @@ #else -static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) { +static void update_ext_tx_probs(VP10_COMMON *cm, vp10_writer *w) { const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) - vp10_cost_zero(GROUP_DIFF_UPDATE_PROB); int i, j; @@ -503,7 +517,7 @@ cm->counts.intra_ext_tx[i][j], TX_TYPES); } do_update = savings > savings_thresh; - vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + vp10_write(w, do_update, GROUP_DIFF_UPDATE_PROB); if (do_update) { for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { for (j = 0; j < TX_TYPES; ++j) @@ -521,7 +535,7 @@ cm->counts.inter_ext_tx[i], TX_TYPES); } do_update = savings > savings_thresh; - vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + vp10_write(w, do_update, GROUP_DIFF_UPDATE_PROB); if (do_update) { for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { prob_diff_update(vp10_ext_tx_tree, @@ -533,7 +547,7 @@ } #endif // CONFIG_EXT_TX -static void pack_palette_tokens(vpx_writer *w, const TOKENEXTRA **tp, +static void pack_palette_tokens(vp10_writer *w, const TOKENEXTRA **tp, int n, int num) { int i; const TOKENEXTRA *p = *tp; @@ -548,7 +562,7 @@ } #if CONFIG_SUPERTX -static void update_supertx_probs(VP10_COMMON *cm, vpx_writer *w) { +static void update_supertx_probs(VP10_COMMON *cm, vp10_writer *w) { const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) - vp10_cost_zero(GROUP_DIFF_UPDATE_PROB); int i, j; @@ -561,7 +575,7 @@ } } do_update = savings > savings_thresh; - vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + vp10_write(w, do_update, GROUP_DIFF_UPDATE_PROB); if (do_update) { for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { for (j = 1; j < TX_SIZES; ++j) { @@ -574,7 +588,7 @@ #endif // CONFIG_SUPERTX #if !CONFIG_ANS -static void pack_mb_tokens(vpx_writer *w, +static void pack_mb_tokens(vp10_writer *w, const TOKENEXTRA **tp, const TOKENEXTRA *const stop, vpx_bit_depth_t bit_depth, const TX_SIZE tx) { const TOKENEXTRA *p = *tp; @@ -605,13 +619,13 @@ if (p->skip_eob_node) n -= p->skip_eob_node; else - vpx_write(w, t != EOB_TOKEN, p->context_tree[0]); + vp10_write(w, t != EOB_TOKEN, p->context_tree[0]); if (t != EOB_TOKEN) { - vpx_write(w, t != ZERO_TOKEN, p->context_tree[1]); + vp10_write(w, t != ZERO_TOKEN, p->context_tree[1]); if (t != ZERO_TOKEN) { - vpx_write(w, t != ONE_TOKEN, p->context_tree[2]); + vp10_write(w, t != ONE_TOKEN, p->context_tree[2]); if (t != ONE_TOKEN) { int len = UNCONSTRAINED_NODES - p->skip_eob_node; @@ -639,13 +653,13 @@ skip_bits--; assert(!bb); } else { - vpx_write(w, bb, pb[i >> 1]); + vp10_write(w, bb, pb[i >> 1]); } i = b->tree[i + bb]; } while (n); } - vpx_write_bit(w, e & 1); + vp10_write_bit(w, e & 1); } ++p; @@ -742,7 +756,7 @@ #endif // !CONFIG_ANS #if CONFIG_VAR_TX -static void pack_txb_tokens(vpx_writer *w, +static void pack_txb_tokens(vp10_writer *w, const TOKENEXTRA **tp, const TOKENEXTRA *const tok_end, MACROBLOCKD *xd, MB_MODE_INFO *mbmi, int plane, @@ -793,7 +807,7 @@ } #endif -static void write_segment_id(vpx_writer *w, const struct segmentation *seg, +static void write_segment_id(vp10_writer *w, const struct segmentation *seg, const struct segmentation_probs *segp, int segment_id) { if (seg->enabled && seg->update_map) @@ -802,7 +816,7 @@ // This function encodes the reference frame static void write_ref_frames(const VP10_COMMON *cm, const MACROBLOCKD *xd, - vpx_writer *w) { + vp10_writer *w) { const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int is_compound = has_second_ref(mbmi); const int segment_id = mbmi->segment_id; @@ -817,9 +831,9 @@ // does the feature use compound prediction or not // (if not specified at the frame/segment level) if (cm->reference_mode == REFERENCE_MODE_SELECT) { - vpx_write(w, is_compound, vp10_get_reference_mode_prob(cm, xd)); + vp10_write(w, is_compound, vp10_get_reference_mode_prob(cm, xd)); } else { - assert(!is_compound == (cm->reference_mode == SINGLE_REFERENCE)); + assert((!is_compound) == (cm->reference_mode == SINGLE_REFERENCE)); } if (is_compound) { @@ -830,18 +844,18 @@ #else const int bit = mbmi->ref_frame[0] == GOLDEN_FRAME; #endif // CONFIG_EXT_REFS - vpx_write(w, bit, vp10_get_pred_prob_comp_ref_p(cm, xd)); + vp10_write(w, bit, vp10_get_pred_prob_comp_ref_p(cm, xd)); #if CONFIG_EXT_REFS if (!bit) { const int bit1 = mbmi->ref_frame[0] == LAST_FRAME; - vpx_write(w, bit1, vp10_get_pred_prob_comp_ref_p1(cm, xd)); + vp10_write(w, bit1, vp10_get_pred_prob_comp_ref_p1(cm, xd)); } else { const int bit2 = mbmi->ref_frame[0] == GOLDEN_FRAME; - vpx_write(w, bit2, vp10_get_pred_prob_comp_ref_p2(cm, xd)); + vp10_write(w, bit2, vp10_get_pred_prob_comp_ref_p2(cm, xd)); if (!bit2) { const int bit3 = mbmi->ref_frame[0] == LAST3_FRAME; - vpx_write(w, bit3, vp10_get_pred_prob_comp_ref_p3(cm, xd)); + vp10_write(w, bit3, vp10_get_pred_prob_comp_ref_p3(cm, xd)); } } #endif // CONFIG_EXT_REFS @@ -849,30 +863,30 @@ #if CONFIG_EXT_REFS const int bit0 = (mbmi->ref_frame[0] == GOLDEN_FRAME || mbmi->ref_frame[0] == ALTREF_FRAME); - vpx_write(w, bit0, vp10_get_pred_prob_single_ref_p1(cm, xd)); + vp10_write(w, bit0, vp10_get_pred_prob_single_ref_p1(cm, xd)); if (bit0) { const int bit1 = mbmi->ref_frame[0] != GOLDEN_FRAME; - vpx_write(w, bit1, vp10_get_pred_prob_single_ref_p2(cm, xd)); + vp10_write(w, bit1, vp10_get_pred_prob_single_ref_p2(cm, xd)); } else { const int bit2 = (mbmi->ref_frame[0] == LAST3_FRAME || mbmi->ref_frame[0] == LAST4_FRAME); - vpx_write(w, bit2, vp10_get_pred_prob_single_ref_p3(cm, xd)); + vp10_write(w, bit2, vp10_get_pred_prob_single_ref_p3(cm, xd)); if (!bit2) { const int bit3 = mbmi->ref_frame[0] != LAST_FRAME; - vpx_write(w, bit3, vp10_get_pred_prob_single_ref_p4(cm, xd)); + vp10_write(w, bit3, vp10_get_pred_prob_single_ref_p4(cm, xd)); } else { const int bit4 = mbmi->ref_frame[0] != LAST3_FRAME; - vpx_write(w, bit4, vp10_get_pred_prob_single_ref_p5(cm, xd)); + vp10_write(w, bit4, vp10_get_pred_prob_single_ref_p5(cm, xd)); } } #else const int bit0 = mbmi->ref_frame[0] != LAST_FRAME; - vpx_write(w, bit0, vp10_get_pred_prob_single_ref_p1(cm, xd)); + vp10_write(w, bit0, vp10_get_pred_prob_single_ref_p1(cm, xd)); if (bit0) { const int bit1 = mbmi->ref_frame[0] != GOLDEN_FRAME; - vpx_write(w, bit1, vp10_get_pred_prob_single_ref_p2(cm, xd)); + vp10_write(w, bit1, vp10_get_pred_prob_single_ref_p2(cm, xd)); } #endif // CONFIG_EXT_REFS } @@ -882,13 +896,13 @@ #if CONFIG_EXT_INTRA static void write_ext_intra_mode_info(const VP10_COMMON *const cm, const MB_MODE_INFO *const mbmi, - vpx_writer *w) { + vp10_writer *w) { #if !ALLOW_FILTER_INTRA_MODES return; #endif if (mbmi->mode == DC_PRED && mbmi->palette_mode_info.palette_size[0] == 0) { - vpx_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[0], + vp10_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[0], cm->fc->ext_intra_probs[0]); if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) { EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[0]; @@ -897,7 +911,7 @@ } if (mbmi->uv_mode == DC_PRED && mbmi->palette_mode_info.palette_size[1] == 0) { - vpx_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[1], + vp10_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[1], cm->fc->ext_intra_probs[1]); if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) { EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[1]; @@ -909,7 +923,7 @@ static void write_switchable_interp_filter(VP10_COMP *cpi, const MACROBLOCKD *xd, - vpx_writer *w) { + vp10_writer *w) { VP10_COMMON *const cm = &cpi->common; const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; if (cm->interp_filter == SWITCHABLE) { @@ -930,7 +944,7 @@ static void write_palette_mode_info(const VP10_COMMON *cm, const MACROBLOCKD *xd, const MODE_INFO *const mi, - vpx_writer *w) { + vp10_writer *w) { const MB_MODE_INFO *const mbmi = &mi->mbmi; const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; @@ -945,30 +959,30 @@ palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0); if (left_mi) palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0); - vpx_write(w, n > 0, + vp10_write(w, n > 0, vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx]); if (n > 0) { vp10_write_token(w, vp10_palette_size_tree, vp10_default_palette_y_size_prob[bsize - BLOCK_8X8], &palette_size_encodings[n - 2]); for (i = 0; i < n; ++i) - vpx_write_literal(w, pmi->palette_colors[i], cm->bit_depth); + vp10_write_literal(w, pmi->palette_colors[i], cm->bit_depth); write_uniform(w, n, pmi->palette_first_color_idx[0]); } } if (mbmi->uv_mode == DC_PRED) { n = pmi->palette_size[1]; - vpx_write(w, n > 0, + vp10_write(w, n > 0, vp10_default_palette_uv_mode_prob[pmi->palette_size[0] > 0]); if (n > 0) { vp10_write_token(w, vp10_palette_size_tree, vp10_default_palette_uv_size_prob[bsize - BLOCK_8X8], &palette_size_encodings[n - 2]); for (i = 0; i < n; ++i) { - vpx_write_literal(w, pmi->palette_colors[PALETTE_MAX_SIZE + i], + vp10_write_literal(w, pmi->palette_colors[PALETTE_MAX_SIZE + i], cm->bit_depth); - vpx_write_literal(w, pmi->palette_colors[2 * PALETTE_MAX_SIZE + i], + vp10_write_literal(w, pmi->palette_colors[2 * PALETTE_MAX_SIZE + i], cm->bit_depth); } write_uniform(w, n, pmi->palette_first_color_idx[1]); @@ -980,7 +994,7 @@ #if CONFIG_SUPERTX int supertx_enabled, #endif - vpx_writer *w) { + vp10_writer *w) { VP10_COMMON *const cm = &cpi->common; #if !CONFIG_REF_MV const nmv_context *nmvc = &cm->fc->nmvc; @@ -1003,7 +1017,7 @@ if (seg->temporal_update) { const int pred_flag = mbmi->seg_id_predicted; vpx_prob pred_prob = vp10_get_pred_prob_seg_id(segp, xd); - vpx_write(w, pred_flag, pred_prob); + vp10_write(w, pred_flag, pred_prob); if (!pred_flag) write_segment_id(w, seg, segp, segment_id); } else { @@ -1024,7 +1038,7 @@ if (!supertx_enabled) #endif // CONFIG_SUPERTX if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) - vpx_write(w, is_inter, vp10_get_intra_inter_prob(cm, xd)); + vp10_write(w, is_inter, vp10_get_intra_inter_prob(cm, xd)); if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && #if CONFIG_SUPERTX @@ -1106,7 +1120,7 @@ if (!supertx_enabled) #endif // CONFIG_SUPERTX if (is_obmc_allowed(mbmi)) - vpx_write(w, mbmi->obmc, cm->fc->obmc_prob[bsize]); + vp10_write(w, mbmi->obmc, cm->fc->obmc_prob[bsize]); #endif // CONFIG_OBMC #if CONFIG_REF_MV @@ -1280,16 +1294,18 @@ #endif // CONFIG_SUPERTX is_interintra_allowed(mbmi)) { const int interintra = mbmi->ref_frame[1] == INTRA_FRAME; - vpx_write(w, interintra, cm->fc->interintra_prob[bsize]); + const int bsize_group = size_group_lookup[bsize]; + vp10_write(w, interintra, cm->fc->interintra_prob[bsize_group]); if (interintra) { - write_intra_mode(w, mbmi->interintra_mode, - cm->fc->y_mode_prob[size_group_lookup[bsize]]); + write_interintra_mode( + w, mbmi->interintra_mode, + cm->fc->interintra_mode_prob[bsize_group]); assert(mbmi->interintra_mode == mbmi->interintra_uv_mode); - if (get_wedge_bits(bsize)) { - vpx_write(w, mbmi->use_wedge_interintra, - cm->fc->wedge_interintra_prob[bsize]); + if (is_interintra_wedge_used(bsize)) { + vp10_write(w, mbmi->use_wedge_interintra, + cm->fc->wedge_interintra_prob[bsize]); if (mbmi->use_wedge_interintra) { - vpx_write_literal(w, mbmi->interintra_wedge_index, + vp10_write_literal(w, mbmi->interintra_wedge_index, get_wedge_bits(bsize)); } } @@ -1300,11 +1316,11 @@ #if CONFIG_OBMC !(is_obmc_allowed(mbmi) && mbmi->obmc) && #endif // CONFIG_OBMC - get_wedge_bits(bsize)) { - vpx_write(w, mbmi->use_wedge_interinter, - cm->fc->wedge_interinter_prob[bsize]); + is_interinter_wedge_used(bsize)) { + vp10_write(w, mbmi->use_wedge_interinter, + cm->fc->wedge_interinter_prob[bsize]); if (mbmi->use_wedge_interinter) - vpx_write_literal(w, mbmi->interinter_wedge_index, + vp10_write_literal(w, mbmi->interinter_wedge_index, get_wedge_bits(bsize)); } #endif // CONFIG_EXT_INTER @@ -1368,7 +1384,7 @@ } static void write_mb_modes_kf(const VP10_COMMON *cm, const MACROBLOCKD *xd, - MODE_INFO **mi_8x8, vpx_writer *w) { + MODE_INFO **mi_8x8, vp10_writer *w) { const struct segmentation *const seg = &cm->seg; const struct segmentation_probs *const segp = &cm->fc->seg; const MODE_INFO *const mi = mi_8x8[0]; @@ -1460,30 +1476,18 @@ #endif // CONFIG_EXT_INTRA } -#if CONFIG_ANS && CONFIG_SUPERTX -#define write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, \ - supertx_enabled, mi_row, mi_col) \ - write_modes_b(cpi, tile, w, ans, tok, tok_end, supertx_enabled, mi_row, \ - mi_col) -#elif CONFIG_SUPERTX -#define write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, \ +#if CONFIG_SUPERTX +#define write_modes_b_wrapper(cpi, tile, w, tok, tok_end, \ supertx_enabled, mi_row, mi_col) \ write_modes_b(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col) -#elif CONFIG_ANS -#define write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, \ - supertx_enabled, mi_row, mi_col) \ - write_modes_b(cpi, tile, w, ans, tok, tok_end, mi_row, mi_col) #else -#define write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, \ +#define write_modes_b_wrapper(cpi, tile, w, tok, tok_end, \ supertx_enabled, mi_row, mi_col) \ write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col) #endif // CONFIG_ANS && CONFIG_SUPERTX static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile, - vpx_writer *w, -#if CONFIG_ANS - struct BufAnsCoder *ans, -#endif // CONFIG_ANS + vp10_writer *w, const TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, #if CONFIG_SUPERTX @@ -1494,6 +1498,7 @@ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; MODE_INFO *m; int plane; + int bh, bw; #if CONFIG_ANS (void) tok; (void) tok_end; @@ -1503,18 +1508,21 @@ xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); m = xd->mi[0]; + assert(m->mbmi.sb_type <= cm->sb_size); + + bh = num_8x8_blocks_high_lookup[m->mbmi.sb_type]; + bw = num_8x8_blocks_wide_lookup[m->mbmi.sb_type]; + cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); - set_mi_row_col(xd, tile, - mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type], - mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type], - cm->mi_rows, cm->mi_cols); + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); if (frame_is_intra_only(cm)) { write_mb_modes_kf(cm, xd, xd->mi, w); } else { #if CONFIG_VAR_TX xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK); + xd->left_txfm_context = + xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); #endif pack_inter_mode_mvs(cpi, m, #if CONFIG_SUPERTX @@ -1576,7 +1584,7 @@ for (row = 0; row < num_4x4_h; row += bw) for (col = 0; col < num_4x4_w; col += bw) #if CONFIG_ANS - pack_mb_tokens_ans(ans, cm->token_tab, tok, tok_end, cm->bit_depth, + pack_mb_tokens_ans(w, cm->token_tab, tok, tok_end, cm->bit_depth, tx); #else pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx); @@ -1586,7 +1594,7 @@ TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane]) : m->mbmi.tx_size; #if CONFIG_ANS - pack_mb_tokens_ans(ans, cm->token_tab, tok, tok_end, cm->bit_depth, tx); + pack_mb_tokens_ans(w, cm->token_tab, tok, tok_end, cm->bit_depth, tx); #else pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx); #endif // CONFIG_ANS @@ -1600,7 +1608,8 @@ static void write_partition(const VP10_COMMON *const cm, const MACROBLOCKD *const xd, int hbs, int mi_row, int mi_col, - PARTITION_TYPE p, BLOCK_SIZE bsize, vpx_writer *w) { + PARTITION_TYPE p, BLOCK_SIZE bsize, + vp10_writer *w) { const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); const vpx_prob *const probs = cm->fc->partition_prob[ctx]; const int has_rows = (mi_row + hbs) < cm->mi_rows; @@ -1618,41 +1627,29 @@ #endif // CONFIG_EXT_PARTITION_TYPES } else if (!has_rows && has_cols) { assert(p == PARTITION_SPLIT || p == PARTITION_HORZ); - vpx_write(w, p == PARTITION_SPLIT, probs[1]); + vp10_write(w, p == PARTITION_SPLIT, probs[1]); } else if (has_rows && !has_cols) { assert(p == PARTITION_SPLIT || p == PARTITION_VERT); - vpx_write(w, p == PARTITION_SPLIT, probs[2]); + vp10_write(w, p == PARTITION_SPLIT, probs[2]); } else { assert(p == PARTITION_SPLIT); } } -#if CONFIG_ANS && CONFIG_SUPERTX -#define write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, \ - supertx_enabled, mi_row, mi_col, bsize) \ - write_modes_sb(cpi, tile, w, ans, tok, tok_end, supertx_enabled, mi_row, \ - mi_col, bsize) -#elif CONFIG_SUPERTX -#define write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, \ +#if CONFIG_SUPERTX +#define write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, \ supertx_enabled, mi_row, mi_col, bsize) \ write_modes_sb(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col, \ bsize) -#elif CONFIG_ANS -#define write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, \ - supertx_enabled, mi_row, mi_col, bsize) \ - write_modes_sb(cpi, tile, w, ans, tok, tok_end, mi_row, mi_col, bsize) #else -#define write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, \ +#define write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, \ supertx_enabled, mi_row, mi_col, bsize) \ write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, bsize) #endif // CONFIG_ANS && CONFIG_SUPERTX static void write_modes_sb(VP10_COMP *const cpi, const TileInfo *const tile, - vpx_writer *const w, -#if CONFIG_ANS - struct BufAnsCoder *ans, -#endif // CONFIG_ANS + vp10_writer *const w, const TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, #if CONFIG_SUPERTX @@ -1661,13 +1658,12 @@ int mi_row, int mi_col, BLOCK_SIZE bsize) { const VP10_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - - const int bsl = b_width_log2_lookup[bsize]; - const int bs = (1 << bsl) / 4; - PARTITION_TYPE partition; - BLOCK_SIZE subsize; - MODE_INFO *m = NULL; + const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; + const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize); + const BLOCK_SIZE subsize = get_subsize(bsize, partition); #if CONFIG_SUPERTX + const int mi_offset = mi_row * cm->mi_stride + mi_col; + MB_MODE_INFO *mbmi; const int pack_token = !supertx_enabled; TX_SIZE supertx_size; int plane; @@ -1676,17 +1672,10 @@ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]; - - partition = partition_lookup[bsl][m->mbmi.sb_type]; -#if CONFIG_EXT_PARTITION_TYPES - partition = get_partition(cm->mi, cm->mi_stride, cm->mi_rows, cm->mi_cols, - mi_row, mi_col, bsize); -#endif - write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w); - subsize = get_subsize(bsize, partition); + write_partition(cm, xd, hbs, mi_row, mi_col, partition, bsize, w); #if CONFIG_SUPERTX - xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); + mbmi = &cm->mi_grid_visible[mi_offset]->mbmi; + xd->mi = cm->mi_grid_visible + mi_offset; set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[bsize], mi_col, num_8x8_blocks_wide_lookup[bsize], @@ -1700,9 +1689,9 @@ prob = cm->fc->supertx_prob[partition_supertx_context_lookup[partition]] [supertx_size]; supertx_enabled = (xd->mi[0]->mbmi.tx_size == supertx_size); - vpx_write(w, supertx_enabled, prob); + vp10_write(w, supertx_enabled, prob); if (supertx_enabled) { - vpx_write(w, xd->mi[0]->mbmi.skip, vp10_get_skip_prob(cm, xd)); + vp10_write(w, xd->mi[0]->mbmi.skip, vp10_get_skip_prob(cm, xd)); #if CONFIG_EXT_TX if (get_ext_tx_types(supertx_size, bsize, 1) > 1 && !xd->mi[0]->mbmi.skip) { @@ -1726,106 +1715,70 @@ } #endif // CONFIG_SUPERTX if (subsize < BLOCK_8X8) { - write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled, + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col); } else { switch (partition) { case PARTITION_NONE: - write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled, + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col); break; case PARTITION_HORZ: - write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled, + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col); - if (mi_row + bs < cm->mi_rows) - write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, - supertx_enabled, mi_row + bs, mi_col); + if (mi_row + hbs < cm->mi_rows) + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, + supertx_enabled, mi_row + hbs, mi_col); break; case PARTITION_VERT: - write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled, + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col); - if (mi_col + bs < cm->mi_cols) - write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, - supertx_enabled, mi_row, mi_col + bs); + if (mi_col + hbs < cm->mi_cols) + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, + supertx_enabled, mi_row, mi_col + hbs); break; case PARTITION_SPLIT: - write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled, + write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col, subsize); - write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled, - mi_row, mi_col + bs, subsize); - write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled, - mi_row + bs, mi_col, subsize); - write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled, - mi_row + bs, mi_col + bs, subsize); + write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, + mi_row, mi_col + hbs, subsize); + write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, + mi_row + hbs, mi_col, subsize); + write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, + mi_row + hbs, mi_col + hbs, subsize); break; #if CONFIG_EXT_PARTITION_TYPES case PARTITION_HORZ_A: - write_modes_b(cpi, tile, w, tok, tok_end, -#if CONFIG_SUPERTX - supertx_enabled, -#endif + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, -#if CONFIG_SUPERTX - supertx_enabled, -#endif - mi_row, mi_col + bs); - write_modes_b(cpi, tile, w, tok, tok_end, -#if CONFIG_SUPERTX - supertx_enabled, -#endif - mi_row + bs, mi_col); + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, + mi_row, mi_col + hbs); + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, + mi_row + hbs, mi_col); break; case PARTITION_HORZ_B: - write_modes_b(cpi, tile, w, tok, tok_end, -#if CONFIG_SUPERTX - supertx_enabled, -#endif + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, -#if CONFIG_SUPERTX - supertx_enabled, -#endif - mi_row + bs, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, -#if CONFIG_SUPERTX - supertx_enabled, -#endif - mi_row + bs, mi_col + bs); + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, + mi_row + hbs, mi_col); + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, + mi_row + hbs, mi_col + hbs); break; case PARTITION_VERT_A: - write_modes_b(cpi, tile, w, tok, tok_end, -#if CONFIG_SUPERTX - supertx_enabled, -#endif + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, -#if CONFIG_SUPERTX - supertx_enabled, -#endif - mi_row + bs, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, -#if CONFIG_SUPERTX - supertx_enabled, -#endif - mi_row, mi_col + bs); + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, + mi_row + hbs, mi_col); + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, + mi_row, mi_col + hbs); break; case PARTITION_VERT_B: - write_modes_b(cpi, tile, w, tok, tok_end, -#if CONFIG_SUPERTX - supertx_enabled, -#endif + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, -#if CONFIG_SUPERTX - supertx_enabled, -#endif - mi_row, mi_col + bs); - write_modes_b(cpi, tile, w, tok, tok_end, -#if CONFIG_SUPERTX - supertx_enabled, -#endif - mi_row + bs, mi_col + bs); + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, + mi_row, mi_col + hbs); + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, + mi_row + hbs, mi_col + hbs); break; #endif // CONFIG_EXT_PARTITION_TYPES default: @@ -1834,22 +1787,22 @@ } #if CONFIG_SUPERTX if (partition != PARTITION_NONE && supertx_enabled && pack_token && - !m->mbmi.skip) { + !mbmi->skip) { assert(*tok < tok_end); for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - const int mbmi_txb_size = txsize_to_bsize[m->mbmi.tx_size]; + const int mbmi_txb_size = txsize_to_bsize[mbmi->tx_size]; const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi_txb_size]; const int num_4x4_h = num_4x4_blocks_high_lookup[mbmi_txb_size]; int row, col; - TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane]) - : m->mbmi.tx_size; + TX_SIZE tx = plane ? get_uv_tx_size(mbmi, &xd->plane[plane]) + : mbmi->tx_size; BLOCK_SIZE txb_size = txsize_to_bsize[tx]; int bw = num_4x4_blocks_wide_lookup[txb_size]; for (row = 0; row < num_4x4_h; row += bw) for (col = 0; col < num_4x4_w; col += bw) #if CONFIG_ANS - pack_mb_tokens_ans(ans, cm->token_tab, tok, tok_end, cm->bit_depth, + pack_mb_tokens_ans(w, cm->token_tab, tok, tok_end, cm->bit_depth, tx); #else pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx); @@ -1872,10 +1825,7 @@ static void write_modes(VP10_COMP *const cpi, const TileInfo *const tile, - vpx_writer *const w, -#if CONFIG_ANS - struct BufAnsCoder *ans, -#endif // CONFIG_ANS + vp10_writer *const w, const TOKENEXTRA **tok, const TOKENEXTRA *const tok_end) { VP10_COMMON *const cm = &cpi->common; @@ -1888,12 +1838,12 @@ vp10_zero_above_context(cm, mi_col_start, mi_col_end); - for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) { + for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += cm->mib_size) { vp10_zero_left_context(xd); - for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) { - write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, 0, - mi_row, mi_col, BLOCK_LARGEST); + for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += cm->mib_size) { + write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, 0, + mi_row, mi_col, cm->sb_size); } } } @@ -1925,7 +1875,7 @@ } } -static void update_coef_probs_common(vpx_writer* const bc, VP10_COMP *cpi, +static void update_coef_probs_common(vp10_writer* const bc, VP10_COMP *cpi, TX_SIZE tx_size, vp10_coeff_stats *frame_branch_ct, vp10_coeff_probs_model *new_coef_probs) { @@ -1971,10 +1921,10 @@ /* Is coef updated at all */ if (update[1] == 0 || savings < 0) { - vpx_write_bit(bc, 0); + vp10_write_bit(bc, 0); return; } - vpx_write_bit(bc, 1); + vp10_write_bit(bc, 1); for (i = 0; i < PLANE_TYPES; ++i) { for (j = 0; j < REF_TYPES; ++j) { for (k = 0; k < COEF_BANDS; ++k) { @@ -1996,7 +1946,7 @@ *oldp, &newp, upd); if (s > 0 && newp != *oldp) u = 1; - vpx_write(bc, u, upd); + vp10_write(bc, u, upd); if (u) { /* send/use new probability */ vp10_write_prob_diff_update(bc, newp, *oldp); @@ -2044,11 +1994,11 @@ if (u == 1 && updates == 1) { int v; // first update - vpx_write_bit(bc, 1); + vp10_write_bit(bc, 1); for (v = 0; v < noupdates_before_first; ++v) - vpx_write(bc, 0, upd); + vp10_write(bc, 0, upd); } - vpx_write(bc, u, upd); + vp10_write(bc, u, upd); if (u) { /* send/use new probability */ vp10_write_prob_diff_update(bc, newp, *oldp); @@ -2060,7 +2010,7 @@ } } if (updates == 0) { - vpx_write_bit(bc, 0); // no updates + vp10_write_bit(bc, 0); // no updates } return; } @@ -2118,7 +2068,7 @@ } } -static void update_coef_probs_subframe(vpx_writer* const bc, VP10_COMP *cpi, +static void update_coef_probs_subframe(vp10_writer* const bc, VP10_COMP *cpi, TX_SIZE tx_size, vp10_coeff_stats branch_ct[COEF_PROBS_BUFS][TX_SIZES] @@ -2177,10 +2127,10 @@ /* Is coef updated at all */ if (update[1] == 0 || savings < 0) { - vpx_write_bit(bc, 0); + vp10_write_bit(bc, 0); return; } - vpx_write_bit(bc, 1); + vp10_write_bit(bc, 1); for (i = 0; i < PLANE_TYPES; ++i) { for (j = 0; j < REF_TYPES; ++j) { for (k = 0; k < COEF_BANDS; ++k) { @@ -2209,7 +2159,7 @@ max_idx); if (s > 0 && newp != *oldp) u = 1; - vpx_write(bc, u, upd); + vp10_write(bc, u, upd); if (u) { /* send/use new probability */ vp10_write_prob_diff_update(bc, newp, *oldp); @@ -2261,11 +2211,11 @@ if (u == 1 && updates == 1) { int v; // first update - vpx_write_bit(bc, 1); + vp10_write_bit(bc, 1); for (v = 0; v < noupdates_before_first; ++v) - vpx_write(bc, 0, upd); + vp10_write(bc, 0, upd); } - vpx_write(bc, u, upd); + vp10_write(bc, u, upd); if (u) { /* send/use new probability */ vp10_write_prob_diff_update(bc, newp, *oldp); @@ -2277,7 +2227,7 @@ } } if (updates == 0) { - vpx_write_bit(bc, 0); // no updates + vp10_write_bit(bc, 0); // no updates } return; } @@ -2287,7 +2237,7 @@ } #endif // CONFIG_ENTROPY -static void update_coef_probs(VP10_COMP *cpi, vpx_writer* w) { +static void update_coef_probs(VP10_COMP *cpi, vp10_writer* w) { const TX_MODE tx_mode = cpi->common.tx_mode; const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; TX_SIZE tx_size; @@ -2318,7 +2268,7 @@ vp10_coeff_probs_model frame_coef_probs[PLANE_TYPES]; if (cpi->td.counts->tx_size_totals[tx_size] <= 20 || (tx_size >= TX_16X16 && cpi->sf.tx_size_search_method == USE_TX_8X8)) { - vpx_write_bit(w, 0); + vp10_write_bit(w, 0); } else { #if CONFIG_ENTROPY if (cm->do_subframe_update && @@ -2363,8 +2313,8 @@ vp10_copy(eob_counts_copy, cm->counts.eob_branch); for (i = 1; i <= cpi->common.coef_probs_update_idx; ++i) { for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - full_to_model_counts(cm->counts.coef[tx_size], - subframe_stats->coef_counts_buf[i][tx_size]); + vp10_full_to_model_counts(cm->counts.coef[tx_size], + subframe_stats->coef_counts_buf[i][tx_size]); vp10_copy(cm->counts.eob_branch, subframe_stats->eob_counts_buf[i]); vp10_partial_adapt_probs(cm, 0, 0); vp10_copy(subframe_stats->coef_probs_buf[i], cm->fc->coef_probs); @@ -2512,7 +2462,7 @@ } } -static void update_seg_probs(VP10_COMP *cpi, vpx_writer *w) { +static void update_seg_probs(VP10_COMP *cpi, vp10_writer *w) { VP10_COMMON *cm = &cpi->common; if (!cpi->common.seg.enabled) @@ -2540,7 +2490,7 @@ } -static void update_txfm_probs(VP10_COMMON *cm, vpx_writer *w, +static void update_txfm_probs(VP10_COMMON *cm, vp10_writer *w, FRAME_COUNTS *counts) { if (cm->tx_mode == TX_MODE_SELECT) { int i, j; @@ -2582,21 +2532,32 @@ } } -static void write_tile_info(VP10_COMMON *const cm, +static void write_tile_info(const VP10_COMMON *const cm, struct vpx_write_bit_buffer *wb) { #if CONFIG_EXT_TILE - // TODO(geza.lore): Dependent on CU_SIZE const int tile_width = - mi_cols_aligned_to_sb(cm->tile_width) >> MI_BLOCK_SIZE_LOG2; + ALIGN_POWER_OF_TWO(cm->tile_width, cm->mib_size_log2) >> cm->mib_size_log2; const int tile_height = - mi_cols_aligned_to_sb(cm->tile_height) >> MI_BLOCK_SIZE_LOG2; + ALIGN_POWER_OF_TWO(cm->tile_height, cm->mib_size_log2) >> cm->mib_size_log2; - assert(tile_width > 0 && tile_width <= 64); - assert(tile_height > 0 && tile_height <= 64); + assert(tile_width > 0); + assert(tile_height > 0); // Write the tile sizes - vpx_wb_write_literal(wb, tile_width - 1, 6); - vpx_wb_write_literal(wb, tile_height - 1, 6); +#if CONFIG_EXT_PARTITION + if (cm->sb_size == BLOCK_128X128) { + assert(tile_width <= 32); + assert(tile_height <= 32); + vpx_wb_write_literal(wb, tile_width - 1, 5); + vpx_wb_write_literal(wb, tile_height - 1, 5); + } else +#endif // CONFIG_EXT_PARTITION + { + assert(tile_width <= 64); + assert(tile_height <= 64); + vpx_wb_write_literal(wb, tile_width - 1, 6); + vpx_wb_write_literal(wb, tile_height - 1, 6); + } #else int min_log2_tile_cols, max_log2_tile_cols, ones; vp10_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); @@ -2713,11 +2674,10 @@ uint8_t *const dst, unsigned int *max_tile_size, unsigned int *max_tile_col_size) { - VP10_COMMON *const cm = &cpi->common; - vpx_writer mode_bc; + const VP10_COMMON *const cm = &cpi->common; + vp10_writer mode_bc; #if CONFIG_ANS struct AnsCoder token_ans; - struct BufAnsCoder buffered_ans; #endif // CONFIG_ANS int tile_row, tile_col; TOKENEXTRA *(*const tok_buffers)[MAX_TILE_COLS] = cpi->tile_tok; @@ -2758,6 +2718,7 @@ unsigned int tile_size; const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col]; const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col]; + const int data_offset = have_tiles ? 4 : 0; vp10_tile_set_row(&tile_info, cm, tile_row); @@ -2765,28 +2726,20 @@ // Is CONFIG_EXT_TILE = 1, every tile in the row has a header, // even for the last one, unless no tiling is used at all. - if (have_tiles) { - total_size += 4; - vpx_start_encode(&mode_bc, buf->data + 4); - } else { - vpx_start_encode(&mode_bc, buf->data); - } - + total_size += data_offset; #if !CONFIG_ANS + vpx_start_encode(&mode_bc, buf->data + data_offset); write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end); assert(tok == tok_end); vpx_stop_encode(&mode_bc); tile_size = mode_bc.pos; #else - buf_ans_write_init(&buffered_ans, uco_ans_buf, ans_window_size); - write_modes(cpi, &tile_info, &mode_bc, &buffered_ans, &tok, tok_end); + buf_ans_write_init(&mode_bc, uco_ans_buf, ans_window_size); + write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end); assert(tok == tok_end); - vpx_stop_encode(&mode_bc); - tile_size = mode_bc.pos; - - ans_write_init(&token_ans, dst + total_size + tile_size); - buf_ans_flush(&buffered_ans, &token_ans); - tile_size += ans_write_end(&token_ans); + ans_write_init(&token_ans, buf->data + data_offset); + buf_ans_flush(&mode_bc, &token_ans); + tile_size = ans_write_end(&token_ans); #endif // !CONFIG_ANS buf->size = tile_size; @@ -2850,23 +2803,19 @@ if (!is_last_tile) total_size += 4; - vpx_start_encode(&mode_bc, dst + total_size); - #if !CONFIG_ANS + vpx_start_encode(&mode_bc, dst + total_size); write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end); assert(tok == tok_end); vpx_stop_encode(&mode_bc); tile_size = mode_bc.pos; #else - buf_ans_write_init(&buffered_ans, uco_ans_buf, ans_window_size); - write_modes(cpi, &tile_info, &mode_bc, &buffered_ans, &tok, tok_end); + buf_ans_write_init(&mode_bc, uco_ans_buf, ans_window_size); + write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end); assert(tok == tok_end); - vpx_stop_encode(&mode_bc); - tile_size = mode_bc.pos; - - ans_write_init(&token_ans, dst + total_size + tile_size); - buf_ans_flush(&buffered_ans, &token_ans); - tile_size += ans_write_end(&token_ans); + ans_write_init(&token_ans, dst + total_size); + buf_ans_flush(&mode_bc, &token_ans); + tile_size = ans_write_end(&token_ans); #endif // !CONFIG_ANS assert(tile_size > 0); @@ -3059,6 +3008,15 @@ vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2); + assert(cm->mib_size == num_8x8_blocks_wide_lookup[cm->sb_size]); + assert(cm->mib_size == 1 << cm->mib_size_log2); +#if CONFIG_EXT_PARTITION + assert(cm->sb_size == BLOCK_128X128 || cm->sb_size == BLOCK_64X64); + vpx_wb_write_bit(wb, cm->sb_size == BLOCK_128X128 ? 1 : 0); +#else + assert(cm->sb_size == BLOCK_64X64); +#endif // CONFIG_EXT_PARTITION + encode_loopfilter(cm, wb); #if CONFIG_LOOP_RESTORATION encode_restoration(cm, wb); @@ -3088,10 +3046,20 @@ #endif // CONFIG_SUPERTX FRAME_CONTEXT *const fc = cm->fc; FRAME_COUNTS *counts = cpi->td.counts; - vpx_writer header_bc; + vp10_writer header_bc; int i, j; +#if CONFIG_ANS + struct AnsCoder header_ans; + struct buffered_ans_symbol *uco_ans_buf; + const int ans_window_size = 50000; // TODO(aconverse): revisit window size + int header_size; + CHECK_MEM_ERROR(cm, uco_ans_buf, + vpx_malloc(ans_window_size * sizeof(*uco_ans_buf))); + buf_ans_write_init(&header_bc, uco_ans_buf, ans_window_size); +#else vpx_start_encode(&header_bc, data); +#endif update_txfm_probs(cm, &header_bc, counts); update_coef_probs(cpi, &header_bc); @@ -3144,15 +3112,21 @@ update_inter_compound_mode_probs(cm, &header_bc); if (cm->reference_mode != COMPOUND_REFERENCE) { - for (i = 0; i < BLOCK_SIZES; i++) { - if (is_interintra_allowed_bsize(i)) { + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) { + if (is_interintra_allowed_bsize_group(i)) { vp10_cond_prob_diff_update(&header_bc, &fc->interintra_prob[i], cm->counts.interintra[i]); } } + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) { + prob_diff_update(vp10_interintra_mode_tree, + cm->fc->interintra_mode_prob[i], + counts->interintra_mode[i], + INTERINTRA_MODES, &header_bc); + } for (i = 0; i < BLOCK_SIZES; i++) { - if (is_interintra_allowed_bsize(i) && get_wedge_bits(i)) + if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i)) vp10_cond_prob_diff_update(&header_bc, &fc->wedge_interintra_prob[i], cm->counts.wedge_interintra[i]); @@ -3160,7 +3134,7 @@ } if (cm->reference_mode != SINGLE_REFERENCE) { for (i = 0; i < BLOCK_SIZES; i++) - if (get_wedge_bits(i)) + if (is_interinter_wedge_used(i)) vp10_cond_prob_diff_update(&header_bc, &fc->wedge_interinter_prob[i], cm->counts.wedge_interinter[i]); @@ -3223,10 +3197,18 @@ #endif // CONFIG_SUPERTX } +#if CONFIG_ANS + ans_write_init(&header_ans, data); + buf_ans_flush(&header_bc, &header_ans); + vpx_free(uco_ans_buf); + header_size = ans_write_end(&header_ans); + assert(header_size <= 0xffff); + return header_size; +#else vpx_stop_encode(&header_bc); assert(header_bc.pos <= 0xffff); - return header_bc.pos; +#endif // CONFIG_ANS } static int choose_size_bytes(uint32_t size, int spare_msbs) {
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h index b5e61d9..2e8af98 100644 --- a/vp10/encoder/block.h +++ b/vp10/encoder/block.h
@@ -64,7 +64,7 @@ typedef struct { uint8_t best_palette_color_map[MAX_SB_SQUARE]; - double kmeans_data_buf[2 * MAX_SB_SQUARE]; + float kmeans_data_buf[2 * MAX_SB_SQUARE]; uint8_t kmeans_indices_buf[MAX_SB_SQUARE]; uint8_t kmeans_pre_indices_buf[MAX_SB_SQUARE]; } PALETTE_BUFFER; @@ -140,11 +140,11 @@ // Notes transform blocks where no coefficents are coded. // Set during mode selection. Read during block encoding. - uint8_t zcoeff_blk[TX_SIZES][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; + uint8_t zcoeff_blk[TX_SIZES][MAX_MIB_SIZE * MAX_MIB_SIZE * 4]; #if CONFIG_VAR_TX - uint8_t blk_skip[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; + uint8_t blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4]; #if CONFIG_REF_MV - uint8_t blk_skip_drl[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; + uint8_t blk_skip_drl[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4]; #endif #endif
diff --git a/vp10/encoder/context_tree.c b/vp10/encoder/context_tree.c index b7c8260..41155c9 100644 --- a/vp10/encoder/context_tree.c +++ b/vp10/encoder/context_tree.c
@@ -244,8 +244,16 @@ } ++square_index; } - td->pc_root = &td->pc_tree[tree_nodes - 1]; - td->pc_root[0].none.best_mode_index = 2; + + // Set up the root node for the largest superblock size + i = MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2; + td->pc_root[i] = &td->pc_tree[tree_nodes - 1]; + td->pc_root[i]->none.best_mode_index = 2; + // Set up the root nodes for the rest of the possible superblock sizes + while (--i >= 0) { + td->pc_root[i] = td->pc_root[i+1]->split[0]; + td->pc_root[i]->none.best_mode_index = 2; + } } void vp10_free_pc_tree(ThreadData *td) {
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index b73f66c..06463c1 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c
@@ -49,6 +49,12 @@ #include "vp10/encoder/segmentation.h" #include "vp10/encoder/tokenize.h" +#if CONFIG_VP9_HIGHBITDEPTH +# define IF_HBD(...) __VA_ARGS__ +#else +# define IF_HBD(...) +#endif // CONFIG_VP9_HIGHBITDEPTH + static void encode_superblock(VP10_COMP *cpi, ThreadData * td, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, @@ -276,7 +282,8 @@ #if CONFIG_VAR_TX xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK); + xd->left_txfm_context = + xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); xd->max_tx_size = max_txsize_lookup[bsize]; #endif @@ -372,7 +379,11 @@ assert(!(mi_col_pred & (mi_width - 1)) && !(mi_row_pred & (mi_height - 1))); set_mi_row_col(xd, tile, mi_row_pred, mi_height, mi_col_pred, mi_width, cm->mi_rows, cm->mi_cols); +#if CONFIG_EXT_TILE + xd->up_available = (mi_row_ori > tile->mi_row_start); +#else xd->up_available = (mi_row_ori != 0); +#endif // CONFIG_EXT_TILE xd->left_available = (mi_col_ori > tile->mi_col_start); // R/D setup. @@ -408,234 +419,102 @@ } } -typedef struct { - int64_t sum_square_error; - int64_t sum_error; - int log2_count; - int variance; -} var; - -typedef struct { - var none; - var horz[2]; - var vert[2]; -} partition_variance; - -typedef struct { - partition_variance part_variances; - var split[4]; -} v4x4; - -typedef struct { - partition_variance part_variances; - v4x4 split[4]; -} v8x8; - -typedef struct { - partition_variance part_variances; - v8x8 split[4]; -} v16x16; - -typedef struct { - partition_variance part_variances; - v16x16 split[4]; -} v32x32; - -typedef struct { - partition_variance part_variances; - v32x32 split[4]; -} v64x64; - -#if CONFIG_EXT_PARTITION -typedef struct { - partition_variance part_variances; - v64x64 split[4]; -} v128x128; -#endif // CONFIG_EXT_PARTITION - -typedef struct { - partition_variance *part_variances; - var *split[4]; -} variance_node; - -typedef enum { - V16X16, - V32X32, - V64X64, -#if CONFIG_EXT_PARTITION - V128X128, -#endif // CONFIG_EXT_PARTITION -} TREE_LEVEL; - -static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { - int i; - node->part_variances = NULL; - switch (bsize) { -#if CONFIG_EXT_PARTITION - case BLOCK_128X128: { - v128x128 *vt = (v128x128 *) data; - node->part_variances = &vt->part_variances; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i].part_variances.none; - break; - } -#endif // CONFIG_EXT_PARTITION - case BLOCK_64X64: { - v64x64 *vt = (v64x64 *) data; - node->part_variances = &vt->part_variances; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i].part_variances.none; - break; - } - case BLOCK_32X32: { - v32x32 *vt = (v32x32 *) data; - node->part_variances = &vt->part_variances; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i].part_variances.none; - break; - } - case BLOCK_16X16: { - v16x16 *vt = (v16x16 *) data; - node->part_variances = &vt->part_variances; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i].part_variances.none; - break; - } - case BLOCK_8X8: { - v8x8 *vt = (v8x8 *) data; - node->part_variances = &vt->part_variances; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i].part_variances.none; - break; - } - case BLOCK_4X4: { - v4x4 *vt = (v4x4 *) data; - node->part_variances = &vt->part_variances; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i]; - break; - } - default: { - assert(0); - break; - } - } -} - -// Set variance values given sum square error, sum error, count. -static void fill_variance(int64_t s2, int64_t s, int c, var *v) { - v->sum_square_error = s2; - v->sum_error = s; - v->log2_count = c; -} - -static void get_variance(var *v) { - v->variance = (int)(256 * (v->sum_square_error - - ((v->sum_error * v->sum_error) >> v->log2_count)) >> v->log2_count); -} - -static void sum_2_variances(const var *a, const var *b, var *r) { - assert(a->log2_count == b->log2_count); - fill_variance(a->sum_square_error + b->sum_square_error, - a->sum_error + b->sum_error, a->log2_count + 1, r); -} - -static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { - variance_node node; - memset(&node, 0, sizeof(node)); - tree_to_node(data, bsize, &node); - sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); - sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); - sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); - sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); - sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], - &node.part_variances->none); -} - -static int set_vt_partitioning(VP10_COMP *cpi, +static void set_vt_partitioning(VP10_COMP *cpi, MACROBLOCK *const x, MACROBLOCKD *const xd, - void *data, - BLOCK_SIZE bsize, + VAR_TREE *vt, int mi_row, int mi_col, - int64_t threshold, - BLOCK_SIZE bsize_min, - int force_split) { + const int64_t *const threshold, + const BLOCK_SIZE *const bsize_min) { VP10_COMMON * const cm = &cpi->common; - variance_node vt; - const int block_width = num_8x8_blocks_wide_lookup[bsize]; - const int block_height = num_8x8_blocks_high_lookup[bsize]; - const int low_res = (cm->width <= 352 && cm->height <= 288); + const int hbw = num_8x8_blocks_wide_lookup[vt->bsize] / 2; + const int hbh = num_8x8_blocks_high_lookup[vt->bsize] / 2; + const int has_cols = mi_col + hbw < cm->mi_cols; + const int has_rows = mi_row + hbh < cm->mi_rows; - assert(block_height == block_width); - tree_to_node(data, bsize, &vt); + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; - if (force_split == 1) - return 0; + assert(vt->bsize >= BLOCK_8X8); + + assert(hbh == hbw); + + if (vt->force_split || (!has_cols && !has_rows)) + goto split; // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if // variance is below threshold, otherwise split will be selected. // No check for vert/horiz split as too few samples for variance. - if (bsize == bsize_min) { - // Variance already computed to set the force_split. - if (low_res || cm->frame_type == KEY_FRAME) - get_variance(&vt.part_variances->none); - if (mi_col + block_width / 2 < cm->mi_cols && - mi_row + block_height / 2 < cm->mi_rows && - vt.part_variances->none.variance < threshold) { - set_block_size(cpi, x, xd, mi_row, mi_col, bsize); - return 1; + if (vt->bsize == bsize_min[0]) { + if (has_cols && has_rows && + vt->variances.none.variance < threshold[0]) { + set_block_size(cpi, x, xd, mi_row, mi_col, vt->bsize); + return; + } else { + BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_SPLIT); + set_block_size(cpi, x, xd, mi_row, mi_col, subsize); + if (vt->bsize > BLOCK_8X8) { + set_block_size(cpi, x, xd, mi_row, mi_col + hbw, subsize); + set_block_size(cpi, x, xd, mi_row + hbh, mi_col, subsize); + set_block_size(cpi, x, xd, mi_row + hbh, mi_col + hbw, subsize); + } + return; } - return 0; - } else if (bsize > bsize_min) { - // Variance already computed to set the force_split. - if (low_res || cm->frame_type == KEY_FRAME) - get_variance(&vt.part_variances->none); + } else if (vt->bsize > bsize_min[0]) { // For key frame: take split for bsize above 32X32 or very high variance. if (cm->frame_type == KEY_FRAME && - (bsize > BLOCK_32X32 || - vt.part_variances->none.variance > (threshold << 4))) { - return 0; + (vt->bsize > BLOCK_32X32 || + vt->variances.none.variance > (threshold[0] << 4))) { + goto split; } // If variance is low, take the bsize (no split). - if (mi_col + block_width / 2 < cm->mi_cols && - mi_row + block_height / 2 < cm->mi_rows && - vt.part_variances->none.variance < threshold) { - set_block_size(cpi, x, xd, mi_row, mi_col, bsize); - return 1; + if (has_cols && has_rows && + vt->variances.none.variance < threshold[0]) { + set_block_size(cpi, x, xd, mi_row, mi_col, vt->bsize); + return; } // Check vertical split. - if (mi_row + block_height / 2 < cm->mi_rows) { - BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); - get_variance(&vt.part_variances->vert[0]); - get_variance(&vt.part_variances->vert[1]); - if (vt.part_variances->vert[0].variance < threshold && - vt.part_variances->vert[1].variance < threshold && + if (has_rows) { + BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_VERT); + if (vt->variances.vert[0].variance < threshold[0] && + vt->variances.vert[1].variance < threshold[0] && get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { set_block_size(cpi, x, xd, mi_row, mi_col, subsize); - set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize); - return 1; + set_block_size(cpi, x, xd, mi_row, mi_col + hbw, subsize); + return; } } // Check horizontal split. - if (mi_col + block_width / 2 < cm->mi_cols) { - BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); - get_variance(&vt.part_variances->horz[0]); - get_variance(&vt.part_variances->horz[1]); - if (vt.part_variances->horz[0].variance < threshold && - vt.part_variances->horz[1].variance < threshold && + if (has_cols) { + BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_HORZ); + if (vt->variances.horz[0].variance < threshold[0] && + vt->variances.horz[1].variance < threshold[0] && get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { set_block_size(cpi, x, xd, mi_row, mi_col, subsize); - set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize); - return 1; + set_block_size(cpi, x, xd, mi_row + hbh, mi_col, subsize); + return; } } - - return 0; } - return 0; + +split: + { + set_vt_partitioning(cpi, x, xd, vt->split[0], + mi_row, mi_col, + threshold + 1, bsize_min + 1); + set_vt_partitioning(cpi, x, xd, vt->split[1], + mi_row, mi_col + hbw, + threshold + 1, bsize_min + 1); + set_vt_partitioning(cpi, x, xd, vt->split[2], + mi_row + hbh, mi_col, + threshold + 1, bsize_min + 1); + set_vt_partitioning(cpi, x, xd, vt->split[3], + mi_row + hbh, mi_col + hbw, + threshold + 1, bsize_min + 1); + return; + } } // Set the variance split thresholds for following the block sizes: @@ -649,23 +528,24 @@ const int64_t threshold_base = (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]); if (is_key_frame) { - thresholds[0] = threshold_base; - thresholds[1] = threshold_base >> 2; - thresholds[2] = threshold_base >> 2; - thresholds[3] = threshold_base << 2; - } else { thresholds[1] = threshold_base; + thresholds[2] = threshold_base >> 2; + thresholds[3] = threshold_base >> 2; + thresholds[4] = threshold_base << 2; + } else { + thresholds[2] = threshold_base; if (cm->width <= 352 && cm->height <= 288) { - thresholds[0] = threshold_base >> 2; - thresholds[2] = threshold_base << 3; + thresholds[1] = threshold_base >> 2; + thresholds[3] = threshold_base << 3; } else { - thresholds[0] = threshold_base; - thresholds[1] = (5 * threshold_base) >> 2; + thresholds[1] = threshold_base; + thresholds[2] = (5 * threshold_base) >> 2; if (cm->width >= 1920 && cm->height >= 1080) - thresholds[1] = (7 * threshold_base) >> 2; - thresholds[2] = threshold_base << cpi->oxcf.speed; + thresholds[2] = (7 * threshold_base) >> 2; + thresholds[3] = threshold_base << cpi->oxcf.speed; } } + thresholds[0] = INT64_MIN; } void vp10_set_variance_partition_thresholds(VP10_COMP *cpi, int q) { @@ -694,10 +574,10 @@ } // Compute the minmax over the 8x8 subblocks. -static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, - int dp, int x16_idx, int y16_idx, +static int compute_minmax_8x8(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, #if CONFIG_VP9_HIGHBITDEPTH - int highbd_flag, + int highbd, #endif int pixels_wide, int pixels_high) { @@ -706,24 +586,26 @@ int minmax_min = 255; // Loop over the 4 8x8 subblocks. for (k = 0; k < 4; k++) { - int x8_idx = x16_idx + ((k & 1) << 3); - int y8_idx = y16_idx + ((k >> 1) << 3); + const int x8_idx = ((k & 1) << 3); + const int y8_idx = ((k >> 1) << 3); int min = 0; int max = 0; if (x8_idx < pixels_wide && y8_idx < pixels_high) { + const int src_offset = y8_idx * src_stride + x8_idx; + const int ref_offset = y8_idx * ref_stride + x8_idx; #if CONFIG_VP9_HIGHBITDEPTH - if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { - vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp, - d + y8_idx * dp + x8_idx, dp, + if (highbd) { + vpx_highbd_minmax_8x8(src + src_offset, src_stride, + ref + ref_offset, ref_stride, &min, &max); } else { - vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, - d + y8_idx * dp + x8_idx, dp, + vpx_minmax_8x8(src + src_offset, src_stride, + ref + ref_offset, ref_stride, &min, &max); } #else - vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, - d + y8_idx * dp + x8_idx, dp, + vpx_minmax_8x8(src + src_offset, src_stride, + ref + ref_offset, ref_stride, &min, &max); #endif if ((max - min) > minmax_max) @@ -735,117 +617,259 @@ return (minmax_max - minmax_min); } -static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d, - int dp, int x8_idx, int y8_idx, v8x8 *vst, #if CONFIG_VP9_HIGHBITDEPTH - int highbd_flag, -#endif - int pixels_wide, - int pixels_high, - int is_key_frame) { - int k; - for (k = 0; k < 4; k++) { - int x4_idx = x8_idx + ((k & 1) << 2); - int y4_idx = y8_idx + ((k >> 1) << 2); - unsigned int sse = 0; - int sum = 0; - if (x4_idx < pixels_wide && y4_idx < pixels_high) { - int s_avg; - int d_avg = 128; -#if CONFIG_VP9_HIGHBITDEPTH - if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { - s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp); - if (!is_key_frame) - d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp); - } else { - s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); - if (!is_key_frame) - d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); - } +static INLINE int avg_4x4(const uint8_t *const src, const int stride, + const int highbd) { + if (highbd) { + return vpx_highbd_avg_4x4(src, stride); + } else { + return vpx_avg_4x4(src, stride); + } +} #else - s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); - if (!is_key_frame) - d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); +static INLINE int avg_4x4(const uint8_t *const src, const int stride) { + return vpx_avg_4x4(src, stride); +} #endif - sum = s_avg - d_avg; - sse = sum * sum; - } - fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE int avg_8x8(const uint8_t *const src, const int stride, + const int highbd) { + if (highbd) { + return vpx_highbd_avg_8x8(src, stride); + } else { + return vpx_avg_8x8(src, stride); + } +} +#else +static INLINE int avg_8x8(const uint8_t *const src, const int stride) { + return vpx_avg_8x8(src, stride); +} +#endif + +static void init_variance_tree(VAR_TREE *const vt, +#if CONFIG_VP9_HIGHBITDEPTH + const int highbd, +#endif + BLOCK_SIZE bsize, + BLOCK_SIZE leaf_size, + const int width, const int height, + const uint8_t *const src, const int src_stride, + const uint8_t *const ref, const int ref_stride) { + assert(bsize >= leaf_size); + + vt->bsize = bsize; + + vt->force_split = 0; + + vt->src = src; + vt->src_stride = src_stride; + vt->ref = ref; + vt->ref_stride = ref_stride; + + vt->width = width; + vt->height = height; + +#if CONFIG_VP9_HIGHBITDEPTH + vt->highbd = highbd; +#endif // CONFIG_VP9_HIGHBITDEPTH + + if (bsize > leaf_size) { + const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); + const int px = num_4x4_blocks_wide_lookup[subsize] * 4; + + init_variance_tree(vt->split[0], +#if CONFIG_VP9_HIGHBITDEPTH + highbd, +#endif // CONFIG_VP9_HIGHBITDEPTH + subsize, leaf_size, + VPXMIN(px, width), VPXMIN(px, height), + src, src_stride, + ref, ref_stride); + init_variance_tree(vt->split[1], +#if CONFIG_VP9_HIGHBITDEPTH + highbd, +#endif // CONFIG_VP9_HIGHBITDEPTH + subsize, leaf_size, + width - px, VPXMIN(px, height), + src + px, src_stride, + ref + px, ref_stride); + init_variance_tree(vt->split[2], +#if CONFIG_VP9_HIGHBITDEPTH + highbd, +#endif // CONFIG_VP9_HIGHBITDEPTH + subsize, leaf_size, + VPXMIN(px, width), height - px, + src + px * src_stride, src_stride, + ref + px * ref_stride, ref_stride); + init_variance_tree(vt->split[3], +#if CONFIG_VP9_HIGHBITDEPTH + highbd, +#endif // CONFIG_VP9_HIGHBITDEPTH + subsize, leaf_size, + width - px, height - px, + src + px * src_stride + px, src_stride, + ref + px * ref_stride + px, ref_stride); } } -static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d, - int dp, int x16_idx, int y16_idx, v16x16 *vst, -#if CONFIG_VP9_HIGHBITDEPTH - int highbd_flag, -#endif - int pixels_wide, - int pixels_high, - int is_key_frame) { - int k; - for (k = 0; k < 4; k++) { - int x8_idx = x16_idx + ((k & 1) << 3); - int y8_idx = y16_idx + ((k >> 1) << 3); + +// Fill the variance tree based on averaging pixel values (sub-sampling), at +// the leaf node size. +static void fill_variance_tree(VAR_TREE *const vt, + const BLOCK_SIZE leaf_size) { + if (vt->bsize > leaf_size) { + fill_variance_tree(vt->split[0], leaf_size); + fill_variance_tree(vt->split[1], leaf_size); + fill_variance_tree(vt->split[2], leaf_size); + fill_variance_tree(vt->split[3], leaf_size); + fill_variance_node(vt); + } else if (vt->width <= 0 || vt->height <= 0) { + fill_variance(0, 0, 0, &vt->variances.none); + } else { unsigned int sse = 0; int sum = 0; - if (x8_idx < pixels_wide && y8_idx < pixels_high) { - int s_avg; - int d_avg = 128; -#if CONFIG_VP9_HIGHBITDEPTH - if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { - s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); - if (!is_key_frame) - d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); - } else { - s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); - if (!is_key_frame) - d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); - } -#else - s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); - if (!is_key_frame) - d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); -#endif - sum = s_avg - d_avg; - sse = sum * sum; + int src_avg; + int ref_avg; + assert(leaf_size == BLOCK_4X4 || leaf_size == BLOCK_8X8); + if (leaf_size == BLOCK_4X4) { + src_avg = avg_4x4(vt->src, vt->src_stride IF_HBD(, vt->highbd)); + ref_avg = avg_4x4(vt->ref, vt->ref_stride IF_HBD(, vt->highbd)); + } else { + src_avg = avg_8x8(vt->src, vt->src_stride IF_HBD(, vt->highbd)); + ref_avg = avg_8x8(vt->ref, vt->ref_stride IF_HBD(, vt->highbd)); } - fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); + sum = src_avg - ref_avg; + sse = sum * sum; + fill_variance(sse, sum, 0, &vt->variances.none); } } +static void refine_variance_tree(VAR_TREE *const vt, const int64_t threshold) { + if (vt->bsize >= BLOCK_8X8) { + if (vt->bsize == BLOCK_16X16) { + if (vt->variances.none.variance <= threshold) + return; + else + vt->force_split = 0; + } + + refine_variance_tree(vt->split[0], threshold); + refine_variance_tree(vt->split[1], threshold); + refine_variance_tree(vt->split[2], threshold); + refine_variance_tree(vt->split[3], threshold); + + if (vt->bsize <= BLOCK_16X16) + fill_variance_node(vt); + } else if (vt->width <= 0 || vt->height <= 0) { + fill_variance(0, 0, 0, &vt->variances.none); + } else { + const int src_avg = avg_4x4(vt->src, vt->src_stride IF_HBD(, vt->highbd)); + const int ref_avg = avg_4x4(vt->ref, vt->ref_stride IF_HBD(, vt->highbd)); + const int sum = src_avg - ref_avg; + const unsigned int sse = sum * sum; + assert(vt->bsize == BLOCK_4X4); + fill_variance(sse, sum, 0, &vt->variances.none); + } +} + +static int check_split_key_frame(VAR_TREE *const vt, + const int64_t threshold) { + if (vt->bsize == BLOCK_32X32) { + vt->force_split = vt->variances.none.variance > threshold; + } else { + vt->force_split |= check_split_key_frame(vt->split[0], threshold); + vt->force_split |= check_split_key_frame(vt->split[1], threshold); + vt->force_split |= check_split_key_frame(vt->split[2], threshold); + vt->force_split |= check_split_key_frame(vt->split[3], threshold); + } + return vt->force_split; +} + +static int check_split(VP10_COMP *const cpi, + VAR_TREE *const vt, + const int segment_id, + const int64_t *const thresholds + ) { + if (vt->bsize == BLOCK_16X16) { + vt->force_split = vt->variances.none.variance > thresholds[0]; + if (!vt->force_split && + vt->variances.none.variance > thresholds[-1] && + !cyclic_refresh_segment_id_boosted(segment_id)) { + // We have some nominal amount of 16x16 variance (based on average), + // compute the minmax over the 8x8 sub-blocks, and if above threshold, + // force split to 8x8 block for this 16x16 block. + int minmax = compute_minmax_8x8(vt->src, vt->src_stride, + vt->ref, vt->ref_stride, +#if CONFIG_VP9_HIGHBITDEPTH + vt->highbd, +#endif + vt->width, vt->height); + vt->force_split = minmax > cpi->vbp_threshold_minmax; + } + } else { + vt->force_split |= check_split(cpi, vt->split[0], + segment_id, thresholds + 1); + vt->force_split |= check_split(cpi, vt->split[1], + segment_id, thresholds + 1); + vt->force_split |= check_split(cpi, vt->split[2], + segment_id, thresholds + 1); + vt->force_split |= check_split(cpi, vt->split[3], + segment_id, thresholds + 1); + + if (vt->bsize == BLOCK_32X32 && !vt->force_split) { + vt->force_split = vt->variances.none.variance > thresholds[0]; + } + } + + return vt->force_split; +} + // This function chooses partitioning based on the variance between source and -// reconstructed last, where variance is computed for down-sampled inputs. -static int choose_partitioning(VP10_COMP *cpi, +// reconstructed last (or golden), where variance is computed for down-sampled +// inputs. +static void choose_partitioning(VP10_COMP *const cpi, + ThreadData *const td, const TileInfo *const tile, - MACROBLOCK *x, - int mi_row, int mi_col) { - VP10_COMMON * const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - int i, j, k, m; - v64x64 vt; - v16x16 vt2[16]; - int force_split[21]; - uint8_t *s; - const uint8_t *d; - int sp; - int dp; - int pixels_wide = 8 * num_8x8_blocks_wide_lookup[BLOCK_LARGEST]; - int pixels_high = 8 * num_8x8_blocks_high_lookup[BLOCK_LARGEST]; - int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], - cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]}; + MACROBLOCK *const x, + const int mi_row, const int mi_col) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + VAR_TREE *const vt = td->var_root[cm->mib_size_log2 - MIN_MIB_SIZE_LOG2]; + int i; + const uint8_t *src; + const uint8_t *ref; + int src_stride; + int ref_stride; + int pixels_wide = 8 * num_8x8_blocks_wide_lookup[cm->sb_size]; + int pixels_high = 8 * num_8x8_blocks_high_lookup[cm->sb_size]; + int64_t thresholds[5] = { + cpi->vbp_thresholds[0], + cpi->vbp_thresholds[1], + cpi->vbp_thresholds[2], + cpi->vbp_thresholds[3], + cpi->vbp_thresholds[4], + }; + BLOCK_SIZE bsize_min[5] = { + BLOCK_16X16, + BLOCK_16X16, + BLOCK_16X16, + cpi->vbp_bsize_min, + BLOCK_8X8 + }; + const int start_level = cm->sb_size == BLOCK_64X64 ? 1 : 0; + const int64_t *const thre = thresholds + start_level; + const BLOCK_SIZE *const bmin = bsize_min + start_level; - // Always use 4x4 partition for key frame. const int is_key_frame = (cm->frame_type == KEY_FRAME); - const int use_4x4_partition = is_key_frame; const int low_res = (cm->width <= 352 && cm->height <= 288); - int variance4x4downsample[16]; int segment_id = CR_SEGMENT_ID_BASE; if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - segment_id = get_segment_id(cm, map, BLOCK_LARGEST, mi_row, mi_col); + segment_id = get_segment_id(cm, map, cm->sb_size, mi_row, mi_col); if (cyclic_refresh_segment_id_boosted(segment_id)) { int q = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex); @@ -853,45 +877,38 @@ } } -#if CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES - printf("Not yet implemented: choose_partitioning\n"); - exit(-1); -#endif // CONFIG_EXT_PARTITION - - set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_LARGEST); + set_offsets(cpi, tile, x, mi_row, mi_col, cm->sb_size); if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); - s = x->plane[0].src.buf; - sp = x->plane[0].src.stride; + src = x->plane[0].src.buf; + src_stride = x->plane[0].src.stride; if (!is_key_frame) { MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; unsigned int uv_sad; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); - - const YV12_BUFFER_CONFIG *yv12_g = NULL; + const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); unsigned int y_sad, y_sad_g; - const int max_mi_block_size = num_8x8_blocks_wide_lookup[BLOCK_LARGEST]; - const int is_right_edge = mi_col + max_mi_block_size / 2 > cm->mi_cols; - const int is_left_edge = mi_row + max_mi_block_size / 2 > cm->mi_rows; + const int hbs = cm->mib_size / 2; + const int split_vert = mi_col + hbs >= cm->mi_cols; + const int split_horz = mi_row + hbs >= cm->mi_rows; BLOCK_SIZE bsize; - if (is_right_edge && is_left_edge) - bsize = get_subsize(BLOCK_LARGEST, PARTITION_SPLIT); - else if (is_right_edge) - bsize = get_subsize(BLOCK_LARGEST, PARTITION_VERT); - else if (is_left_edge) - bsize = get_subsize(BLOCK_LARGEST, PARTITION_HORZ); + if (split_vert && split_horz) + bsize = get_subsize(cm->sb_size, PARTITION_SPLIT); + else if (split_vert) + bsize = get_subsize(cm->sb_size, PARTITION_VERT); + else if (split_horz) + bsize = get_subsize(cm->sb_size, PARTITION_HORZ); else - bsize = BLOCK_LARGEST; + bsize = cm->sb_size; assert(yv12 != NULL); - yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); if (yv12_g && yv12_g != yv12) { vp10_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, @@ -908,11 +925,12 @@ &cm->frame_refs[LAST_FRAME - 1].sf); mbmi->ref_frame[0] = LAST_FRAME; mbmi->ref_frame[1] = NONE; - mbmi->sb_type = BLOCK_LARGEST; + mbmi->sb_type = cm->sb_size; mbmi->mv[0].as_int = 0; mbmi->interp_filter = BILINEAR; y_sad = vp10_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); + if (y_sad_g < y_sad) { vp10_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, &cm->frame_refs[GOLDEN_FRAME - 1].sf); @@ -923,9 +941,9 @@ x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv; } - vp10_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_LARGEST); + vp10_build_inter_predictors_sb(xd, mi_row, mi_col, cm->sb_size); - for (i = 1; i <= 2; ++i) { + for (i = 1; i < MAX_MB_PLANE; ++i) { struct macroblock_plane *p = &x->plane[i]; struct macroblockd_plane *pd = &xd->plane[i]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); @@ -939,196 +957,65 @@ x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2); } - d = xd->plane[0].dst.buf; - dp = xd->plane[0].dst.stride; + ref = xd->plane[0].dst.buf; + ref_stride = xd->plane[0].dst.stride; // If the y_sad is very small, take the largest partition and exit. // Don't check on boosted segment for now, as largest is suppressed there. if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) { - if (!is_right_edge && !is_left_edge) { - set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_LARGEST); - return 0; + if (!split_vert && !split_horz) { + set_block_size(cpi, x, xd, mi_row, mi_col, cm->sb_size); + return; } } } else { - d = VP10_VAR_OFFS; - dp = 0; + ref = VP10_VAR_OFFS; + ref_stride = 0; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { switch (xd->bd) { case 10: - d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10); + ref = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10); break; case 12: - d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12); + ref = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12); break; case 8: default: - d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8); + ref = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8); break; } } #endif // CONFIG_VP9_HIGHBITDEPTH } - // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, - // 5-20 for the 16x16 blocks. - force_split[0] = 0; - // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances - // for splits. - for (i = 0; i < 4; i++) { - const int x32_idx = ((i & 1) << 5); - const int y32_idx = ((i >> 1) << 5); - const int i2 = i << 2; - force_split[i + 1] = 0; - for (j = 0; j < 4; j++) { - const int x16_idx = x32_idx + ((j & 1) << 4); - const int y16_idx = y32_idx + ((j >> 1) << 4); - const int split_index = 5 + i2 + j; - v16x16 *vst = &vt.split[i].split[j]; - force_split[split_index] = 0; - variance4x4downsample[i2 + j] = 0; - if (!is_key_frame) { - fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, + init_variance_tree(vt, #if CONFIG_VP9_HIGHBITDEPTH - xd->cur_buf->flags, -#endif - pixels_wide, - pixels_high, - is_key_frame); - fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); - get_variance(&vt.split[i].split[j].part_variances.none); - if (vt.split[i].split[j].part_variances.none.variance > - thresholds[2]) { - // 16X16 variance is above threshold for split, so force split to 8x8 - // for this 16x16 block (this also forces splits for upper levels). - force_split[split_index] = 1; - force_split[i + 1] = 1; - force_split[0] = 1; - } else if (vt.split[i].split[j].part_variances.none.variance > - thresholds[1] && - !cyclic_refresh_segment_id_boosted(segment_id)) { - // We have some nominal amount of 16x16 variance (based on average), - // compute the minmax over the 8x8 sub-blocks, and if above threshold, - // force split to 8x8 block for this 16x16 block. - int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx, -#if CONFIG_VP9_HIGHBITDEPTH - xd->cur_buf->flags, -#endif - pixels_wide, pixels_high); - if (minmax > cpi->vbp_threshold_minmax) { - force_split[split_index] = 1; - force_split[i + 1] = 1; - force_split[0] = 1; - } - } - } - if (is_key_frame || (low_res && - vt.split[i].split[j].part_variances.none.variance > - (thresholds[1] << 1))) { - force_split[split_index] = 0; - // Go down to 4x4 down-sampling for variance. - variance4x4downsample[i2 + j] = 1; - for (k = 0; k < 4; k++) { - int x8_idx = x16_idx + ((k & 1) << 3); - int y8_idx = y16_idx + ((k >> 1) << 3); - v8x8 *vst2 = is_key_frame ? &vst->split[k] : - &vt2[i2 + j].split[k]; - fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2, -#if CONFIG_VP9_HIGHBITDEPTH - xd->cur_buf->flags, -#endif - pixels_wide, - pixels_high, - is_key_frame); - } - } + xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, +#endif // CONFIG_VP9_HIGHBITDEPTH + cm->sb_size, + (is_key_frame || low_res) ? BLOCK_4X4 : BLOCK_8X8, + pixels_wide, pixels_high, + src, src_stride, ref, ref_stride); + + // Fill in the entire tree of variances and compute splits. + if (is_key_frame) { + fill_variance_tree(vt, BLOCK_4X4); + check_split_key_frame(vt, thre[1]); + } else { + fill_variance_tree(vt, BLOCK_8X8); + check_split(cpi, vt, segment_id, thre); + if (low_res) { + refine_variance_tree(vt, thre[1] << 1); } } - // Fill the rest of the variance tree by summing split partition values. - for (i = 0; i < 4; i++) { - const int i2 = i << 2; - for (j = 0; j < 4; j++) { - if (variance4x4downsample[i2 + j] == 1) { - v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : - &vt.split[i].split[j]; - for (m = 0; m < 4; m++) - fill_variance_tree(&vtemp->split[m], BLOCK_8X8); - fill_variance_tree(vtemp, BLOCK_16X16); - } - } - fill_variance_tree(&vt.split[i], BLOCK_32X32); - // If variance of this 32x32 block is above the threshold, force the block - // to split. This also forces a split on the upper (64x64) level. - if (!force_split[i + 1]) { - get_variance(&vt.split[i].part_variances.none); - if (vt.split[i].part_variances.none.variance > thresholds[1]) { - force_split[i + 1] = 1; - force_split[0] = 1; - } - } - } - if (!force_split[0]) { - fill_variance_tree(&vt, BLOCK_64X64); - get_variance(&vt.part_variances.none); - } + vt->force_split |= mi_col + cm->mib_size > cm->mi_cols || + mi_row + cm->mib_size > cm->mi_rows; // Now go through the entire structure, splitting every block size until // we get to one that's got a variance lower than our threshold. - if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || - !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col, - thresholds[0], BLOCK_16X16, force_split[0])) { - for (i = 0; i < 4; ++i) { - const int x32_idx = ((i & 1) << 2); - const int y32_idx = ((i >> 1) << 2); - const int i2 = i << 2; - if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32, - (mi_row + y32_idx), (mi_col + x32_idx), - thresholds[1], BLOCK_16X16, - force_split[i + 1])) { - for (j = 0; j < 4; ++j) { - const int x16_idx = ((j & 1) << 1); - const int y16_idx = ((j >> 1) << 1); - // For inter frames: if variance4x4downsample[] == 1 for this 16x16 - // block, then the variance is based on 4x4 down-sampling, so use vt2 - // in set_vt_partioning(), otherwise use vt. - v16x16 *vtemp = (!is_key_frame && - variance4x4downsample[i2 + j] == 1) ? - &vt2[i2 + j] : &vt.split[i].split[j]; - if (!set_vt_partitioning(cpi, x, xd, vtemp, BLOCK_16X16, - mi_row + y32_idx + y16_idx, - mi_col + x32_idx + x16_idx, - thresholds[2], - cpi->vbp_bsize_min, - force_split[5 + i2 + j])) { - for (k = 0; k < 4; ++k) { - const int x8_idx = (k & 1); - const int y8_idx = (k >> 1); - if (use_4x4_partition) { - if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k], - BLOCK_8X8, - mi_row + y32_idx + y16_idx + y8_idx, - mi_col + x32_idx + x16_idx + x8_idx, - thresholds[3], BLOCK_8X8, 0)) { - set_block_size(cpi, x, xd, - (mi_row + y32_idx + y16_idx + y8_idx), - (mi_col + x32_idx + x16_idx + x8_idx), - BLOCK_4X4); - } - } else { - set_block_size(cpi, x, xd, - (mi_row + y32_idx + y16_idx + y8_idx), - (mi_col + x32_idx + x16_idx + x8_idx), - BLOCK_8X8); - } - } - } - } - } - } - } - return 0; + set_vt_partitioning(cpi, x, xd, vt, mi_row, mi_col, thre, bmin); } static void update_state(VP10_COMP *cpi, ThreadData *td, @@ -1309,7 +1196,7 @@ int mi_row, int mi_col, BLOCK_SIZE bsize, int output_enabled) { int y, x_idx; -#if CONFIG_VAR_TX +#if CONFIG_VAR_TX || CONFIG_REF_MV int i; #endif VP10_COMMON *const cm = &cpi->common; @@ -2022,13 +1909,14 @@ !supertx_enabled && #endif is_interintra_allowed(mbmi)) { + const int bsize_group = size_group_lookup[bsize]; if (mbmi->ref_frame[1] == INTRA_FRAME) { - counts->y_mode[size_group_lookup[bsize]][mbmi->interintra_mode]++; - counts->interintra[bsize][1]++; - if (get_wedge_bits(bsize)) + counts->interintra[bsize_group][1]++; + counts->interintra_mode[bsize_group][mbmi->interintra_mode]++; + if (is_interintra_wedge_used(bsize)) counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++; } else { - counts->interintra[bsize][0]++; + counts->interintra[bsize_group][0]++; } } if (cm->reference_mode != SINGLE_REFERENCE && @@ -2036,7 +1924,7 @@ #if CONFIG_OBMC !(is_obmc_allowed(mbmi) && mbmi->obmc) && #endif // CONFIG_OBMC - get_wedge_bits(bsize)) { + is_interinter_wedge_used(bsize)) { counts->wedge_interinter[bsize][mbmi->use_wedge_interinter]++; } #endif // CONFIG_EXT_INTER @@ -2146,15 +2034,15 @@ } typedef struct { - ENTROPY_CONTEXT a[2 * MI_BLOCK_SIZE * MAX_MB_PLANE]; - ENTROPY_CONTEXT l[2 * MI_BLOCK_SIZE * MAX_MB_PLANE]; - PARTITION_CONTEXT sa[MI_BLOCK_SIZE]; - PARTITION_CONTEXT sl[MI_BLOCK_SIZE]; + ENTROPY_CONTEXT a[2 * MAX_MIB_SIZE * MAX_MB_PLANE]; + ENTROPY_CONTEXT l[2 * MAX_MIB_SIZE * MAX_MB_PLANE]; + PARTITION_CONTEXT sa[MAX_MIB_SIZE]; + PARTITION_CONTEXT sl[MAX_MIB_SIZE]; #if CONFIG_VAR_TX TXFM_CONTEXT *p_ta; TXFM_CONTEXT *p_tl; - TXFM_CONTEXT ta[MI_BLOCK_SIZE]; - TXFM_CONTEXT tl[MI_BLOCK_SIZE]; + TXFM_CONTEXT ta[MAX_MIB_SIZE]; + TXFM_CONTEXT tl[MAX_MIB_SIZE]; #endif } RD_SEARCH_MACROBLOCK_CONTEXT; @@ -2175,14 +2063,14 @@ xd->plane[p].subsampling_x); memcpy( xd->left_context[p] - + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), + + ((mi_row & MAX_MIB_MASK) * 2 >> xd->plane[p].subsampling_y), ctx->l + num_4x4_blocks_high * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } memcpy(xd->above_seg_context + mi_col, ctx->sa, sizeof(*xd->above_seg_context) * mi_width); - memcpy(xd->left_seg_context + (mi_row & MI_MASK), ctx->sl, + memcpy(xd->left_seg_context + (mi_row & MAX_MIB_MASK), ctx->sl, sizeof(xd->left_seg_context[0]) * mi_height); #if CONFIG_VAR_TX xd->above_txfm_context = ctx->p_ta; @@ -2214,13 +2102,13 @@ memcpy( ctx->l + num_4x4_blocks_high * p, xd->left_context[p] - + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), + + ((mi_row & MAX_MIB_MASK) * 2 >> xd->plane[p].subsampling_y), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } memcpy(ctx->sa, xd->above_seg_context + mi_col, sizeof(*xd->above_seg_context) * mi_width); - memcpy(ctx->sl, xd->left_seg_context + (mi_row & MI_MASK), + memcpy(ctx->sl, xd->left_seg_context + (mi_row & MAX_MIB_MASK), sizeof(xd->left_seg_context[0]) * mi_height); #if CONFIG_VAR_TX memcpy(ctx->ta, xd->above_txfm_context, @@ -2262,35 +2150,24 @@ TOKENEXTRA **tp, int mi_row, int mi_col, int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) { - VP10_COMMON *const cm = &cpi->common; + const VP10_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; - const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; - int ctx; - PARTITION_TYPE partition; - BLOCK_SIZE subsize = bsize; + const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); + const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; + const PARTITION_TYPE partition = pc_tree->partitioning; + const BLOCK_SIZE subsize = get_subsize(bsize, partition); #if CONFIG_EXT_PARTITION_TYPES - BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT); + const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT); #endif + assert(bsize >= BLOCK_8X8); + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - if (bsize >= BLOCK_8X8) { - ctx = partition_plane_context(xd, mi_row, mi_col, bsize); - subsize = get_subsize(bsize, pc_tree->partitioning); - } else { - ctx = 0; - subsize = BLOCK_4X4; - } - - partition = partition_lookup[bsl][subsize]; -#if CONFIG_EXT_PARTITION_TYPES - if (bsize > BLOCK_8X8) - partition = pc_tree->partitioning; -#endif - if (output_enabled && bsize != BLOCK_4X4) + if (output_enabled) td->counts->partition[ctx][partition]++; #if CONFIG_SUPERTX @@ -2494,7 +2371,7 @@ } // Check to see if the given partition size is allowed for a specified number -// of 8x8 block rows and columns remaining in the image. +// of mi block rows and columns remaining in the image. // If not then return the largest allowed partition size static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left, int cols_left, @@ -2513,62 +2390,64 @@ return bsize; } -static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, - int bh_in, int bw_in, int row8x8_remaining, int col8x8_remaining, - BLOCK_SIZE bsize, MODE_INFO **mi_8x8) { +static void set_partial_sb_partition(const VP10_COMMON *const cm, + MODE_INFO *mi, + int bh_in, int bw_in, + int mi_rows_remaining, + int mi_cols_remaining, + BLOCK_SIZE bsize, MODE_INFO **mib) { int bh = bh_in; int r, c; - for (r = 0; r < MI_BLOCK_SIZE; r += bh) { + for (r = 0; r < cm->mib_size; r += bh) { int bw = bw_in; - for (c = 0; c < MI_BLOCK_SIZE; c += bw) { - const int index = r * mis + c; - mi_8x8[index] = mi + index; - mi_8x8[index]->mbmi.sb_type = find_partition_size(bsize, - row8x8_remaining - r, col8x8_remaining - c, &bh, &bw); + for (c = 0; c < cm->mib_size; c += bw) { + const int index = r * cm->mi_stride + c; + mib[index] = mi + index; + mib[index]->mbmi.sb_type = find_partition_size(bsize, + mi_rows_remaining - r, mi_cols_remaining - c, &bh, &bw); } } } -// This function attempts to set all mode info entries in a given SB64 +// This function attempts to set all mode info entries in a given superblock // to the same block partition size. // However, at the bottom and right borders of the image the requested size // may not be allowed in which case this code attempts to choose the largest // allowable partition. static void set_fixed_partitioning(VP10_COMP *cpi, const TileInfo *const tile, - MODE_INFO **mi_8x8, int mi_row, int mi_col, + MODE_INFO **mib, int mi_row, int mi_col, BLOCK_SIZE bsize) { VP10_COMMON *const cm = &cpi->common; - const int mis = cm->mi_stride; - const int row8x8_remaining = tile->mi_row_end - mi_row; - const int col8x8_remaining = tile->mi_col_end - mi_col; + const int mi_rows_remaining = tile->mi_row_end - mi_row; + const int mi_cols_remaining = tile->mi_col_end - mi_col; int block_row, block_col; - MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; + MODE_INFO *const mi_upper_left = cm->mi + mi_row * cm->mi_stride + mi_col; int bh = num_8x8_blocks_high_lookup[bsize]; int bw = num_8x8_blocks_wide_lookup[bsize]; - assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); + assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0)); - // Apply the requested partition size to the SB64 if it is all "in image" - if ((col8x8_remaining >= MI_BLOCK_SIZE) && - (row8x8_remaining >= MI_BLOCK_SIZE)) { - for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) { - for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { - int index = block_row * mis + block_col; - mi_8x8[index] = mi_upper_left + index; - mi_8x8[index]->mbmi.sb_type = bsize; + // Apply the requested partition size to the SB if it is all "in image" + if ((mi_cols_remaining >= cm->mib_size) && + (mi_rows_remaining >= cm->mib_size)) { + for (block_row = 0; block_row < cm->mib_size; block_row += bh) { + for (block_col = 0; block_col < cm->mib_size; block_col += bw) { + int index = block_row * cm->mi_stride + block_col; + mib[index] = mi_upper_left + index; + mib[index]->mbmi.sb_type = bsize; } } } else { - // Else this is a partial SB64. - set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining, - col8x8_remaining, bsize, mi_8x8); + // Else this is a partial SB. + set_partial_sb_partition(cm, mi_upper_left, bh, bw, + mi_rows_remaining, mi_cols_remaining, bsize, mib); } } static void rd_use_partition(VP10_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, - MODE_INFO **mi_8x8, TOKENEXTRA **tp, + MODE_INFO **mib, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate, int64_t *dist, @@ -2580,18 +2459,17 @@ TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; - const int mis = cm->mi_stride; - const int bsl = b_width_log2_lookup[bsize]; - const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2; - const int bss = (1 << bsl) / 4; - int i, pl; - PARTITION_TYPE partition = PARTITION_NONE; - BLOCK_SIZE subsize; + const int bs = num_8x8_blocks_wide_lookup[bsize]; + const int hbs = bs / 2; + int i; + const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize); + const BLOCK_SIZE subsize = get_subsize(bsize, partition); RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; RD_COST last_part_rdc, none_rdc, chosen_rdc; BLOCK_SIZE sub_subsize = BLOCK_4X4; int splits_below = 0; - BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; + BLOCK_SIZE bs_type = mib[0]->mbmi.sb_type; int do_partition_search = 1; PICK_MODE_CONTEXT *ctx = &pc_tree->none; #if CONFIG_SUPERTX @@ -2600,10 +2478,6 @@ int chosen_rate_nocoef = INT_MAX; #endif -#if CONFIG_EXT_PARTITION_TYPES - assert(0); -#endif - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; @@ -2614,14 +2488,12 @@ vp10_rd_cost_reset(&none_rdc); vp10_rd_cost_reset(&chosen_rdc); - partition = partition_lookup[bsl][bs_type]; - subsize = get_subsize(bsize, partition); - pc_tree->partitioning = partition; #if CONFIG_VAR_TX xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK); + xd->left_txfm_context = + xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); #endif save_context(x, &x_ctx, mi_row, mi_col, bsize); @@ -2640,7 +2512,7 @@ splits_below = 1; for (i = 0; i < 4; i++) { int jj = i >> 1, ii = i & 0x01; - MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss]; + MODE_INFO *this_mi = mib[jj * hbs * cm->mi_stride + ii * hbs]; if (this_mi && this_mi->mbmi.sb_type >= sub_subsize) { splits_below = 0; } @@ -2650,8 +2522,8 @@ // If partition is not none try none unless each of the 4 splits are split // even further.. if (partition != PARTITION_NONE && !splits_below && - mi_row + (mi_step >> 1) < cm->mi_rows && - mi_col + (mi_step >> 1) < cm->mi_cols) { + mi_row + hbs < cm->mi_rows && + mi_col + hbs < cm->mi_cols) { pc_tree->partitioning = PARTITION_NONE; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, #if CONFIG_SUPERTX @@ -2662,8 +2534,6 @@ #endif bsize, ctx, INT64_MAX); - pl = partition_plane_context(xd, mi_row, mi_col, bsize); - if (none_rdc.rate < INT_MAX) { none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; none_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, none_rdc.rate, @@ -2675,7 +2545,7 @@ restore_context(x, &x_ctx, mi_row, mi_col, bsize); - mi_8x8[0]->mbmi.sb_type = bs_type; + mib[0]->mbmi.sb_type = bs_type; pc_tree->partitioning = partition; } } @@ -2702,7 +2572,7 @@ subsize, &pc_tree->horizontal[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && - bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) { + bsize >= BLOCK_8X8 && mi_row + hbs < cm->mi_rows) { RD_COST tmp_rdc; #if CONFIG_SUPERTX int rt_nocoef = 0; @@ -2712,7 +2582,7 @@ update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); rd_pick_sb_modes(cpi, tile_data, x, - mi_row + (mi_step >> 1), mi_col, &tmp_rdc, + mi_row + hbs, mi_col, &tmp_rdc, #if CONFIG_SUPERTX &rt_nocoef, #endif @@ -2745,7 +2615,7 @@ #endif subsize, &pc_tree->vertical[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && - bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { + bsize >= BLOCK_8X8 && mi_col + hbs < cm->mi_cols) { RD_COST tmp_rdc; #if CONFIG_SUPERTX int rt_nocoef = 0; @@ -2755,7 +2625,7 @@ update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); rd_pick_sb_modes(cpi, tile_data, x, - mi_row, mi_col + (mi_step >> 1), &tmp_rdc, + mi_row, mi_col + hbs, &tmp_rdc, #if CONFIG_SUPERTX &rt_nocoef, #endif @@ -2798,8 +2668,8 @@ last_part_rate_nocoef = 0; #endif for (i = 0; i < 4; i++) { - int x_idx = (i & 1) * (mi_step >> 1); - int y_idx = (i >> 1) * (mi_step >> 1); + int x_idx = (i & 1) * hbs; + int y_idx = (i >> 1) * hbs; int jj = i >> 1, ii = i & 0x01; RD_COST tmp_rdc; #if CONFIG_SUPERTX @@ -2810,7 +2680,7 @@ vp10_rd_cost_init(&tmp_rdc); rd_use_partition(cpi, td, tile_data, - mi_8x8 + jj * bss * mis + ii * bss, tp, + mib + jj * hbs * cm->mi_stride + ii * hbs, tp, mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate, &tmp_rdc.dist, #if CONFIG_SUPERTX @@ -2831,12 +2701,18 @@ #endif } break; +#if CONFIG_EXT_PARTITION_TYPES + case PARTITION_VERT_A: + case PARTITION_VERT_B: + case PARTITION_HORZ_A: + case PARTITION_HORZ_B: + assert(0 && "Cannot handle extended partiton types"); +#endif // CONFIG_EXT_PARTITION_TYPES default: assert(0); break; } - pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (last_part_rdc.rate < INT_MAX) { last_part_rdc.rate += cpi->partition_cost[pl][partition]; last_part_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, @@ -2850,10 +2726,10 @@ && cpi->sf.adjust_partitioning_from_last_frame && cpi->sf.partition_search_type == SEARCH_PARTITION && partition != PARTITION_SPLIT && bsize > BLOCK_8X8 - && (mi_row + mi_step < cm->mi_rows || - mi_row + (mi_step >> 1) == cm->mi_rows) - && (mi_col + mi_step < cm->mi_cols || - mi_col + (mi_step >> 1) == cm->mi_cols)) { + && (mi_row + bs < cm->mi_rows || + mi_row + hbs == cm->mi_rows) + && (mi_col + bs < cm->mi_cols || + mi_col + hbs == cm->mi_cols)) { BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); chosen_rdc.rate = 0; chosen_rdc.dist = 0; @@ -2867,8 +2743,8 @@ // Split partition. for (i = 0; i < 4; i++) { - int x_idx = (i & 1) * (mi_step >> 1); - int y_idx = (i >> 1) * (mi_step >> 1); + int x_idx = (i & 1) * hbs; + int y_idx = (i >> 1) * hbs; RD_COST tmp_rdc; #if CONFIG_SUPERTX int rt_nocoef = 0; @@ -2910,14 +2786,11 @@ encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0, split_subsize, pc_tree->split[i]); - pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, - split_subsize); chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; #if CONFIG_SUPERTX chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT]; #endif } - pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (chosen_rdc.rate < INT_MAX) { chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; chosen_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, @@ -2930,7 +2803,7 @@ // If last_part is better set the partitioning to that. if (last_part_rdc.rdcost < chosen_rdc.rdcost) { - mi_8x8[0]->mbmi.sb_type = bsize; + mib[0]->mbmi.sb_type = bsize; if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; chosen_rdc = last_part_rdc; @@ -2952,11 +2825,11 @@ // We must have chosen a partitioning and encoding or we'll fail later on. // No other opportunities for success. - if (bsize == BLOCK_LARGEST) + if (bsize == cm->sb_size) assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX); if (do_recon) { - int output_enabled = (bsize == BLOCK_LARGEST); + int output_enabled = (bsize == cm->sb_size); encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, pc_tree); } @@ -2980,13 +2853,13 @@ }; static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { - BLOCK_8X8, // 4x4 - BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 4x8, 8x4, 8x8 - BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, // 8x16, 16x8, 16x16 - BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 16x32, 32x16, 32x32 - BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 32x64, 64x32, 64x64 + BLOCK_8X8, // 4x4 + BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 4x8, 8x4, 8x8 + BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, // 8x16, 16x8, 16x16 + BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 16x32, 32x16, 32x32 + BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 32x64, 64x32, 64x64 #if CONFIG_EXT_PARTITION - BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 // 64x128, 128x64, 128x128 + BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST // 64x128, 128x64, 128x128 #endif // CONFIG_EXT_PARTITION }; @@ -3004,26 +2877,24 @@ // Look at all the mode_info entries for blocks that are part of this // partition and find the min and max values for sb_type. -// At the moment this is designed to work on a 64x64 SB but could be +// At the moment this is designed to work on a superblock but could be // adjusted to use a size parameter. // // The min and max are assumed to have been initialized prior to calling this -// function so repeat calls can accumulate a min and max of more than one sb64. -static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8, +// function so repeat calls can accumulate a min and max of more than one +// superblock. +static void get_sb_partition_size_range(const VP10_COMMON *const cm, + MACROBLOCKD *xd, MODE_INFO **mib, BLOCK_SIZE *min_block_size, - BLOCK_SIZE *max_block_size, - int bs_hist[BLOCK_SIZES]) { - int sb_width_in_blocks = MI_BLOCK_SIZE; - int sb_height_in_blocks = MI_BLOCK_SIZE; + BLOCK_SIZE *max_block_size) { int i, j; int index = 0; // Check the sb_type for each block that belongs to this region. - for (i = 0; i < sb_height_in_blocks; ++i) { - for (j = 0; j < sb_width_in_blocks; ++j) { - MODE_INFO *mi = mi_8x8[index+j]; - BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : 0; - bs_hist[sb_type]++; + for (i = 0; i < cm->mib_size; ++i) { + for (j = 0; j < cm->mib_size; ++j) { + MODE_INFO *mi = mib[index+j]; + BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : BLOCK_4X4; *min_block_size = VPXMIN(*min_block_size, sb_type); *max_block_size = VPXMAX(*max_block_size, sb_type); } @@ -3042,12 +2913,11 @@ MODE_INFO **mi = xd->mi; const int left_in_image = xd->left_available && mi[-1]; const int above_in_image = xd->up_available && mi[-xd->mi_stride]; - const int row8x8_remaining = tile->mi_row_end - mi_row; - const int col8x8_remaining = tile->mi_col_end - mi_col; + const int mi_rows_remaining = tile->mi_row_end - mi_row; + const int mi_cols_remaining = tile->mi_col_end - mi_col; int bh, bw; BLOCK_SIZE min_size = BLOCK_4X4; BLOCK_SIZE max_size = BLOCK_LARGEST; - int bs_hist[BLOCK_SIZES] = {0}; // Trap case where we do not have a prediction. if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { @@ -3061,19 +2931,17 @@ if (cm->frame_type != KEY_FRAME) { MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]; - get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist); + get_sb_partition_size_range(cm, xd, prev_mi, &min_size, &max_size); } - // Find the min and max partition sizes used in the left SB64 + // Find the min and max partition sizes used in the left superblock if (left_in_image) { - MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE]; - get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size, - bs_hist); + MODE_INFO **left_sb_mi = &mi[-cm->mib_size]; + get_sb_partition_size_range(cm, xd, left_sb_mi, &min_size, &max_size); } - // Find the min and max partition sizes used in the above SB64. + // Find the min and max partition sizes used in the above suprblock. if (above_in_image) { - MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE]; - get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size, - bs_hist); + MODE_INFO **above_sb_mi = &mi[-xd->mi_stride * cm->mib_size]; + get_sb_partition_size_range(cm, xd, above_sb_mi, &min_size, &max_size); } // Adjust observed min and max for "relaxed" auto partition case. @@ -3084,29 +2952,28 @@ } // Check border cases where max and min from neighbors may not be legal. - max_size = find_partition_size(max_size, - row8x8_remaining, col8x8_remaining, + max_size = find_partition_size(max_size, mi_rows_remaining, mi_cols_remaining, &bh, &bw); + min_size = VPXMIN(min_size, max_size); + // Test for blocks at the edge of the active image. // This may be the actual edge of the image or where there are formatting // bars. if (vp10_active_edge_sb(cpi, mi_row, mi_col)) { min_size = BLOCK_4X4; } else { - min_size = - VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size)); + min_size = VPXMIN(cpi->sf.rd_auto_partition_min_limit, min_size); } // When use_square_partition_only is true, make sure at least one square // partition is allowed by selecting the next smaller square size as // *min_block_size. - if (cpi->sf.use_square_partition_only && - next_square_size[max_size] < min_size) { - min_size = next_square_size[max_size]; + if (cpi->sf.use_square_partition_only) { + min_size = VPXMIN(min_size, next_square_size[max_size]); } - *min_block_size = min_size; - *max_block_size = max_size; + *min_block_size = VPXMIN(min_size, cm->sb_size); + *max_block_size = VPXMIN(max_size, cm->sb_size); } // TODO(jingning) refactor functions setting partition search range @@ -3159,8 +3026,8 @@ max_size = max_partition_size[max_size]; } - *min_bs = min_size; - *max_bs = max_size; + *min_bs = VPXMIN(min_size, cm->sb_size); + *max_bs = VPXMIN(max_size, cm->sb_size); } static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { @@ -3508,7 +3375,8 @@ #if CONFIG_VAR_TX xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK); + xd->left_txfm_context = + xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); #endif save_context(x, &x_ctx, mi_row, mi_col, bsize); @@ -4203,12 +4071,12 @@ if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && pc_tree->index != 3) { - int output_enabled = (bsize == BLOCK_LARGEST); + int output_enabled = (bsize == cm->sb_size); encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, pc_tree); } - if (bsize == BLOCK_LARGEST) { + if (bsize == cm->sb_size) { assert(tp_orig < *tp || (tp_orig == *tp && xd->mi[0]->mbmi.skip)); assert(best_rdc.rate < INT_MAX); assert(best_rdc.dist < INT64_MAX); @@ -4239,7 +4107,7 @@ // Code each SB in the row for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end; - mi_col += MI_BLOCK_SIZE) { + mi_col += cm->mib_size) { const struct segmentation *const seg = &cm->seg; int dummy_rate; int64_t dummy_dist; @@ -4252,6 +4120,7 @@ const int idx_str = cm->mi_stride * mi_row + mi_col; MODE_INFO **mi = cm->mi_grid_visible + idx_str; + PC_TREE *const pc_root = td->pc_root[cm->mib_size_log2 - MIN_MIB_SIZE_LOG2]; if (sf->adaptive_pred_interp_filter) { for (i = 0; i < leaf_nodes; ++i) @@ -4266,61 +4135,60 @@ } vp10_zero(x->pred_mv); - td->pc_root->index = 0; + pc_root->index = 0; if (seg->enabled) { const uint8_t *const map = seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - int segment_id = get_segment_id(cm, map, BLOCK_LARGEST, mi_row, mi_col); + int segment_id = get_segment_id(cm, map, cm->sb_size, mi_row, mi_col); seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); } x->source_variance = UINT_MAX; if (sf->partition_search_type == FIXED_PARTITION || seg_skip) { - const BLOCK_SIZE bsize = - seg_skip ? BLOCK_LARGEST : sf->always_this_block_size; - set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST); + BLOCK_SIZE bsize; + set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->sb_size); + bsize = seg_skip ? cm->sb_size : sf->always_this_block_size; set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - BLOCK_LARGEST, &dummy_rate, &dummy_dist, + cm->sb_size, &dummy_rate, &dummy_dist, #if CONFIG_SUPERTX &dummy_rate_nocoef, #endif // CONFIG_SUPERTX - 1, td->pc_root); + 1, pc_root); } else if (cpi->partition_search_skippable_frame) { BLOCK_SIZE bsize; - set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST); + set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->sb_size); bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - BLOCK_LARGEST, &dummy_rate, &dummy_dist, + cm->sb_size, &dummy_rate, &dummy_dist, #if CONFIG_SUPERTX &dummy_rate_nocoef, #endif // CONFIG_SUPERTX - 1, td->pc_root); - } else if (sf->partition_search_type == VAR_BASED_PARTITION && - cm->frame_type != KEY_FRAME) { - choose_partitioning(cpi, tile_info, x, mi_row, mi_col); + 1, pc_root); + } else if (sf->partition_search_type == VAR_BASED_PARTITION) { + choose_partitioning(cpi, td, tile_info, x, mi_row, mi_col); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - BLOCK_LARGEST, &dummy_rate, &dummy_dist, + cm->sb_size, &dummy_rate, &dummy_dist, #if CONFIG_SUPERTX &dummy_rate_nocoef, #endif // CONFIG_SUPERTX - 1, td->pc_root); + 1, pc_root); } else { // If required set upper and lower partition size limits if (sf->auto_min_max_partition_size) { - set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST); + set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->sb_size); rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, &x->min_partition_size, &x->max_partition_size); } - rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_LARGEST, + rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size, &dummy_rdc, #if CONFIG_SUPERTX &dummy_rate_nocoef, #endif // CONFIG_SUPERTX - INT64_MAX, td->pc_root); + INT64_MAX, pc_root); } } #if CONFIG_ENTROPY @@ -4334,8 +4202,8 @@ SUBFRAME_STATS *subframe_stats = &cpi->subframe_stats; for (t = TX_4X4; t <= TX_32X32; ++t) - full_to_model_counts(cpi->td.counts->coef[t], - cpi->td.rd_counts.coef_counts[t]); + vp10_full_to_model_counts(cpi->td.counts->coef[t], + cpi->td.rd_counts.coef_counts[t]); vp10_partial_adapt_probs(cm, mi_row, mi_col); ++cm->coef_probs_update_idx; vp10_copy(subframe_stats->coef_probs_buf[cm->coef_probs_update_idx], @@ -4344,7 +4212,7 @@ cpi->td.rd_counts.coef_counts); vp10_copy(subframe_stats->eob_counts_buf[cm->coef_probs_update_idx], cm->counts.eob_branch); - fill_token_costs(x->token_costs, cm->fc->coef_probs); + vp10_fill_token_costs(x->token_costs, cm->fc->coef_probs); } } #endif // CONFIG_ENTROPY @@ -4476,7 +4344,7 @@ td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count; for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; - mi_row += MI_BLOCK_SIZE) { + mi_row += cm->mib_size) { encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); } @@ -4519,6 +4387,9 @@ RD_COUNTS *const rdc = &cpi->td.rd_counts; int i; + x->min_partition_size = VPXMIN(x->min_partition_size, cm->sb_size); + x->max_partition_size = VPXMIN(x->max_partition_size, cm->sb_size); + xd->mi = cm->mi_grid_visible; xd->mi[0] = cm->mi; @@ -4566,6 +4437,10 @@ #endif #endif + if (cpi->sf.partition_search_type == VAR_BASED_PARTITION && + cpi->td.var_root[0] == NULL) + vp10_setup_var_tree(&cpi->common, &cpi->td); + { struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); @@ -4744,7 +4619,6 @@ } #endif } else { - cm->reference_mode = SINGLE_REFERENCE; encode_frame_internal(cpi); } } @@ -4848,7 +4722,8 @@ int idx, idy; xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK); + xd->left_txfm_context = + xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); for (idy = 0; idy < mi_height; idy += bh) for (idx = 0; idx < mi_width; idx += bh) @@ -4913,7 +4788,8 @@ int idx, idy; xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK); + xd->left_txfm_context = + xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); for (idy = 0; idy < mi_height; idy += bh) for (idx = 0; idx < mi_width; idx += bh) @@ -5158,29 +5034,20 @@ static int check_intra_sb(VP10_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) { - VP10_COMMON *const cm = &cpi->common; + const VP10_COMMON *const cm = &cpi->common; - const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; - PARTITION_TYPE partition; - BLOCK_SIZE subsize = bsize; + const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; + const PARTITION_TYPE partition = pc_tree->partitioning; + const BLOCK_SIZE subsize = get_subsize(bsize, partition); #if CONFIG_EXT_PARTITION_TYPES int i; #endif + assert(bsize >= BLOCK_8X8); + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return 1; - if (bsize >= BLOCK_8X8) - subsize = get_subsize(bsize, pc_tree->partitioning); - else - subsize = BLOCK_4X4; - - partition = partition_lookup[bsl][subsize]; -#if CONFIG_EXT_PARTITION_TYPES - if (bsize > BLOCK_8X8) - partition = pc_tree->partitioning; -#endif - switch (partition) { case PARTITION_NONE: return check_intra_b(&pc_tree->none); @@ -5516,14 +5383,15 @@ MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; - const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; - PARTITION_TYPE partition; - BLOCK_SIZE subsize; + const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); + const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; + const PARTITION_TYPE partition = pc_tree->partitioning; + const BLOCK_SIZE subsize = get_subsize(bsize, partition); #if CONFIG_EXT_PARTITION_TYPES - BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT); + const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT); #endif - int i, ctx; + int i; uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3]; DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); @@ -5531,6 +5399,12 @@ int dst_stride1[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; int dst_stride2[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; int dst_stride3[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; + + assert(bsize >= BLOCK_8X8); + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int len = sizeof(uint16_t); @@ -5558,23 +5432,8 @@ } #endif // CONFIG_VP9_HIGHBITDEPTH - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) - return; - - if (bsize >= BLOCK_8X8) { - ctx = partition_plane_context(xd, mi_row, mi_col, bsize); - subsize = get_subsize(bsize, pc_tree->partitioning); - } else { - ctx = 0; - subsize = BLOCK_4X4; - } - partition = partition_lookup[bsl][subsize]; -#if CONFIG_EXT_PARTITION_TYPES - if (bsize > BLOCK_8X8) - partition = pc_tree->partitioning; -#endif - if (output_enabled && bsize != BLOCK_4X4 && bsize < top_bsize) - cm->counts.partition[ctx][partition]++; + if (output_enabled && bsize < top_bsize) + cm->counts.partition[ctx][partition]++; for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].dst.buf = dst_buf[i]; @@ -6112,8 +5971,8 @@ sse_uv = 0; for (plane = 1; plane < MAX_MB_PLANE; ++plane) { #if CONFIG_VAR_TX - ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE]; - ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE]; + ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE]; + ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE]; const struct macroblockd_plane *const pd = &xd->plane[plane]; int coeff_ctx = 1; @@ -6157,8 +6016,8 @@ #endif // CONFIG_EXT_TX for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { #if CONFIG_VAR_TX - ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE]; - ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE]; + ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE]; + ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE]; const struct macroblockd_plane *const pd = &xd->plane[0]; int coeff_ctx = 1; #endif // CONFIG_VAR_TX
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index 060fe0b..9acf00c 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c
@@ -29,8 +29,8 @@ #include "vp10/encoder/tokenize.h" struct optimize_ctx { - ENTROPY_CONTEXT ta[MAX_MB_PLANE][2 * MI_BLOCK_SIZE]; - ENTROPY_CONTEXT tl[MAX_MB_PLANE][2 * MI_BLOCK_SIZE]; + ENTROPY_CONTEXT ta[MAX_MB_PLANE][2 * MAX_MIB_SIZE]; + ENTROPY_CONTEXT tl[MAX_MB_PLANE][2 * MAX_MIB_SIZE]; }; void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
diff --git a/vp10/encoder/encodemv.c b/vp10/encoder/encodemv.c index a5bfd1a..a2d0659 100644 --- a/vp10/encoder/encodemv.c +++ b/vp10/encoder/encodemv.c
@@ -31,7 +31,7 @@ vp10_tokens_from_tree(mv_fp_encodings, vp10_mv_fp_tree); } -static void encode_mv_component(vpx_writer* w, int comp, +static void encode_mv_component(vp10_writer* w, int comp, const nmv_component* mvcomp, int usehp) { int offset; const int sign = comp < 0; @@ -44,7 +44,7 @@ assert(comp != 0); // Sign - vpx_write(w, sign, mvcomp->sign); + vp10_write(w, sign, mvcomp->sign); // Class vp10_write_token(w, vp10_mv_class_tree, mvcomp->classes, @@ -58,7 +58,7 @@ int i; const int n = mv_class + CLASS0_BITS - 1; // number of bits for (i = 0; i < n; ++i) - vpx_write(w, (d >> i) & 1, mvcomp->bits[i]); + vp10_write(w, (d >> i) & 1, mvcomp->bits[i]); } // Fractional bits @@ -68,7 +68,7 @@ // High precision bit if (usehp) - vpx_write(w, hp, + vp10_write(w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp); } @@ -135,7 +135,7 @@ } } -static void update_mv(vpx_writer *w, const unsigned int ct[2], vpx_prob *cur_p, +static void update_mv(vp10_writer *w, const unsigned int ct[2], vpx_prob *cur_p, vpx_prob upd_p) { (void) upd_p; vp10_cond_prob_diff_update(w, cur_p, ct); @@ -144,7 +144,7 @@ static void write_mv_update(const vpx_tree_index *tree, vpx_prob probs[/*n - 1*/], const unsigned int counts[/*n - 1*/], - int n, vpx_writer *w) { + int n, vp10_writer *w) { int i; unsigned int branch_ct[32][2]; @@ -156,7 +156,7 @@ update_mv(w, branch_ct[i], &probs[i], MV_UPDATE_PROB); } -void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vpx_writer *w, +void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vp10_writer *w, nmv_context_counts *const nmv_counts) { int i, j; #if CONFIG_REF_MV @@ -235,7 +235,7 @@ #endif } -void vp10_encode_mv(VP10_COMP* cpi, vpx_writer* w, +void vp10_encode_mv(VP10_COMP* cpi, vp10_writer* w, const MV* mv, const MV* ref, const nmv_context* mvctx, int usehp) { const MV diff = {mv->row - ref->row,
diff --git a/vp10/encoder/encodemv.h b/vp10/encoder/encodemv.h index c753d34..a026b04 100644 --- a/vp10/encoder/encodemv.h +++ b/vp10/encoder/encodemv.h
@@ -20,10 +20,10 @@ void vp10_entropy_mv_init(void); -void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vpx_writer *w, +void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vp10_writer *w, nmv_context_counts *const counts); -void vp10_encode_mv(VP10_COMP *cpi, vpx_writer* w, const MV* mv, const MV* ref, +void vp10_encode_mv(VP10_COMP *cpi, vp10_writer* w, const MV* mv, const MV* ref, const nmv_context* mvctx, int usehp); void vp10_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index 77af3dd..b34b15e 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c
@@ -248,6 +248,29 @@ #endif } +static BLOCK_SIZE select_sb_size(const VP10_COMP *const cpi) { +#if CONFIG_EXT_PARTITION + if (cpi->oxcf.superblock_size == VPX_SUPERBLOCK_SIZE_64X64) + return BLOCK_64X64; + + if (cpi->oxcf.superblock_size == VPX_SUPERBLOCK_SIZE_128X128) + return BLOCK_128X128; + + assert(cpi->oxcf.superblock_size == VPX_SUPERBLOCK_SIZE_DYNAMIC); + + assert(IMPLIES(cpi->common.tile_cols > 1, + cpi->common.tile_width % MAX_MIB_SIZE == 0)); + assert(IMPLIES(cpi->common.tile_rows > 1, + cpi->common.tile_height % MAX_MIB_SIZE == 0)); + + // TODO(any): Possibly could improve this with a heuristic. + return BLOCK_128X128; +#else + (void)cpi; + return BLOCK_64X64; +#endif // CONFIG_EXT_PARTITION +} + static void setup_frame(VP10_COMP *cpi) { VP10_COMMON *const cm = &cpi->common; // Set up entropy context depending on frame type. The decoder mandates @@ -269,6 +292,8 @@ *cm->fc = cm->frame_contexts[cm->frame_context_idx]; vp10_zero(cpi->interp_filter_selected[0]); } + + set_sb_size(cm, select_sb_size(cpi)); } static void vp10_enc_setup_mi(VP10_COMMON *cm) { @@ -438,6 +463,9 @@ vp10_free_pc_tree(&cpi->td); + if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) + vp10_free_var_tree(&cpi->td); + if (cpi->common.allow_screen_content_tools) vpx_free(cpi->td.mb.palette_buffer); @@ -786,15 +814,31 @@ vp10_rc_update_framerate(cpi); } -static void set_tile_limits(VP10_COMP *cpi) { +static void set_tile_info(VP10_COMP *cpi) { VP10_COMMON *const cm = &cpi->common; + #if CONFIG_EXT_TILE - cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 64) << MI_BLOCK_SIZE_LOG2; - cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64) << MI_BLOCK_SIZE_LOG2; +#if CONFIG_EXT_PARTITION + if (cpi->oxcf.superblock_size != VPX_SUPERBLOCK_SIZE_64X64) { + cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 32); + cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 32); + cm->tile_width <<= MAX_MIB_SIZE_LOG2; + cm->tile_height <<= MAX_MIB_SIZE_LOG2; + } else +#endif // CONFIG_EXT_PARTITION + { + cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 64); + cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64); + cm->tile_width <<= MAX_MIB_SIZE_LOG2 - 1; + cm->tile_height <<= MAX_MIB_SIZE_LOG2 - 1; + } cm->tile_width = VPXMIN(cm->tile_width, cm->mi_cols); cm->tile_height = VPXMIN(cm->tile_height, cm->mi_rows); + assert(cm->tile_width >> MAX_MIB_SIZE <= 32); + assert(cm->tile_height >> MAX_MIB_SIZE <= 32); + // Get the number of tiles cm->tile_cols = 1; while (cm->tile_cols * cm->tile_width < cm->mi_cols) @@ -814,11 +858,14 @@ cm->tile_cols = 1 << cm->log2_tile_cols; cm->tile_rows = 1 << cm->log2_tile_rows; - cm->tile_width = (mi_cols_aligned_to_sb(cm->mi_cols) >> cm->log2_tile_cols); - cm->tile_height = (mi_cols_aligned_to_sb(cm->mi_rows) >> cm->log2_tile_rows); - // round to integer multiples of 8 - cm->tile_width = mi_cols_aligned_to_sb(cm->tile_width); - cm->tile_height = mi_cols_aligned_to_sb(cm->tile_height); + cm->tile_width = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2); + cm->tile_width >>= cm->log2_tile_cols; + cm->tile_height = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2); + cm->tile_height >>= cm->log2_tile_rows; + + // round to integer multiples of max superblock size + cm->tile_width = ALIGN_POWER_OF_TWO(cm->tile_width, MAX_MIB_SIZE_LOG2); + cm->tile_height = ALIGN_POWER_OF_TWO(cm->tile_height, MAX_MIB_SIZE_LOG2); #endif // CONFIG_EXT_TILE } @@ -832,7 +879,7 @@ memset(cpi->mbmi_ext_base, 0, cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base)); - set_tile_limits(cpi); + set_tile_info(cpi); } static void init_buffer_indices(VP10_COMP *cpi) { @@ -1955,6 +2002,8 @@ CHECK_MEM_ERROR(cm, x->palette_buffer, vpx_memalign(16, sizeof(*x->palette_buffer))); } + // Reallocate the pc_tree, as it's contents depends on + // the state of cm->allow_screen_content_tools vp10_free_pc_tree(&cpi->td); vp10_setup_pc_tree(&cpi->common, &cpi->td); } @@ -2015,7 +2064,7 @@ cpi->last_frame_distortion = 0; #endif - set_tile_limits(cpi); + set_tile_info(cpi); cpi->ext_refresh_frame_flags_pending = 0; cpi->ext_refresh_frame_context_pending = 0; @@ -2542,6 +2591,8 @@ vpx_free(thread_data->td->mb.palette_buffer); vpx_free(thread_data->td->counts); vp10_free_pc_tree(thread_data->td); + if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) + vp10_free_var_tree(thread_data->td); vpx_free(thread_data->td); } } @@ -3362,13 +3413,9 @@ model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN]; } -#if CONFIG_ENTROPY -void full_to_model_counts(vp10_coeff_count_model *model_count, - vp10_coeff_count *full_count) { -#else -static void full_to_model_counts(vp10_coeff_count_model *model_count, - vp10_coeff_count *full_count) { -#endif // CONFIG_ENTROPY + +void vp10_full_to_model_counts(vp10_coeff_count_model *model_count, + vp10_coeff_count *full_count) { int i, j, k, l; for (i = 0; i < PLANE_TYPES; ++i) @@ -3699,8 +3746,7 @@ setup_frame(cpi); #if CONFIG_ENTROPY - cm->do_subframe_update = - cm->log2_tile_cols == 0 && cm->log2_tile_rows == 0; + cm->do_subframe_update = cm->tile_cols == 1 && cm->tile_rows == 1; vp10_copy(cm->starting_coef_probs, cm->fc->coef_probs); vp10_copy(cpi->subframe_stats.enc_starting_coef_probs, cm->fc->coef_probs); @@ -3827,8 +3873,7 @@ #endif // CONFIG_ENTROPY #if CONFIG_ENTROPY - cm->do_subframe_update = - cm->log2_tile_cols == 0 && cm->log2_tile_rows == 0; + cm->do_subframe_update = cm->tile_cols == 1 && cm->tile_rows == 1; if (loop_count == 0 || frame_is_intra_only(cm) || cm->error_resilient_mode) { vp10_copy(cm->starting_coef_probs, cm->fc->coef_probs); @@ -4361,8 +4406,8 @@ vp10_update_reference_frames(cpi); for (t = TX_4X4; t <= TX_32X32; t++) - full_to_model_counts(cpi->td.counts->coef[t], - cpi->td.rd_counts.coef_counts[t]); + vp10_full_to_model_counts(cpi->td.counts->coef[t], + cpi->td.rd_counts.coef_counts[t]); if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) { #if CONFIG_ENTROPY @@ -5092,10 +5137,16 @@ void vp10_apply_encoding_flags(VP10_COMP *cpi, vpx_enc_frame_flags_t flags) { if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF)) { - int ref = 7; + int ref = VP9_REFFRAME_ALL; - if (flags & VP8_EFLAG_NO_REF_LAST) + if (flags & VP8_EFLAG_NO_REF_LAST) { ref ^= VP9_LAST_FLAG; +#if CONFIG_EXT_REFS + ref ^= VP9_LAST2_FLAG; + ref ^= VP9_LAST3_FLAG; + ref ^= VP9_LAST4_FLAG; +#endif // CONFIG_EXT_REFS + } if (flags & VP8_EFLAG_NO_REF_GF) ref ^= VP9_GOLD_FLAG; @@ -5109,10 +5160,16 @@ if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF | VP8_EFLAG_FORCE_ARF)) { - int upd = 7; + int upd = VP9_REFFRAME_ALL; - if (flags & VP8_EFLAG_NO_UPD_LAST) + if (flags & VP8_EFLAG_NO_UPD_LAST) { upd ^= VP9_LAST_FLAG; +#if CONFIG_EXT_REFS + upd ^= VP9_LAST2_FLAG; + upd ^= VP9_LAST3_FLAG; + upd ^= VP9_LAST4_FLAG; +#endif // CONFIG_EXT_REFS + } if (flags & VP8_EFLAG_NO_UPD_GF) upd ^= VP9_GOLD_FLAG;
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index 9e1b6fb..0f0d1f3 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h
@@ -34,6 +34,7 @@ #include "vp10/encoder/rd.h" #include "vp10/encoder/speed_features.h" #include "vp10/encoder/tokenize.h" +#include "vp10/encoder/variance_tree.h" #if CONFIG_VP9_TEMPORAL_DENOISING #include "vp10/encoder/denoiser.h" @@ -235,6 +236,10 @@ int color_range; int render_width; int render_height; + +#if CONFIG_EXT_PARTITION + vpx_superblock_size_t superblock_size; +#endif // CONFIG_EXT_PARTITION } VP10EncoderConfig; static INLINE int is_lossless_requested(const VP10EncoderConfig *cfg) { @@ -262,7 +267,10 @@ PICK_MODE_CONTEXT *leaf_tree; PC_TREE *pc_tree; - PC_TREE *pc_root; + PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1]; + + VAR_TREE *var_tree; + VAR_TREE *var_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1]; } ThreadData; struct EncWorkerData; @@ -418,7 +426,7 @@ // clips, and 300 for < HD clips. int encode_breakout; - unsigned char *segmentation_map; + uint8_t *segmentation_map; // segment threashold for encode breakout int segment_encode_breakout[MAX_SEGMENTS]; @@ -507,6 +515,7 @@ #if CONFIG_EXT_INTER unsigned int inter_compound_mode_cost[INTER_MODE_CONTEXTS] [INTER_COMPOUND_MODES]; + unsigned int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES]; #endif // CONFIG_EXT_INTER #if CONFIG_OBMC int obmc_cost[BLOCK_SIZES][2]; @@ -563,9 +572,12 @@ int resize_count; // VAR_BASED_PARTITION thresholds - // 0 - threshold_64x64; 1 - threshold_32x32; - // 2 - threshold_16x16; 3 - vbp_threshold_8x8; - int64_t vbp_thresholds[4]; + // 0 - threshold_128x128; + // 1 - threshold_64x64; + // 2 - threshold_32x32; + // 3 - threshold_16x16; + // 4 - threshold_8x8; + int64_t vbp_thresholds[5]; int64_t vbp_threshold_minmax; int64_t vbp_threshold_sad; BLOCK_SIZE vbp_bsize_min; @@ -625,10 +637,8 @@ int vp10_get_quantizer(struct VP10_COMP *cpi); -#if CONFIG_ENTROPY -void full_to_model_counts(vp10_coeff_count_model *model_count, - vp10_coeff_count *full_count); -#endif // CONFIG_ENTROPY +void vp10_full_to_model_counts(vp10_coeff_count_model *model_count, + vp10_coeff_count *full_count); static INLINE int frame_is_kf_gf_arf(const VP10_COMP *cpi) { return frame_is_intra_only(&cpi->common) ||
diff --git a/vp10/encoder/ethread.c b/vp10/encoder/ethread.c index 2742ed2..e552ec5 100644 --- a/vp10/encoder/ethread.c +++ b/vp10/encoder/ethread.c
@@ -93,6 +93,10 @@ thread_data->td->pc_tree = NULL; vp10_setup_pc_tree(cm, thread_data->td); + // Set up variance tree if needed. + if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) + vp10_setup_var_tree(cm, &cpi->td); + // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts)));
diff --git a/vp10/encoder/firstpass.c b/vp10/encoder/firstpass.c index dd3e437..5936a24 100644 --- a/vp10/encoder/firstpass.c +++ b/vp10/encoder/firstpass.c
@@ -491,7 +491,8 @@ TileInfo tile; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; - const PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none; + const PICK_MODE_CONTEXT *ctx = + &cpi->td.pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2]->none; int i; int recon_yoffset, recon_uvoffset;
diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c index 4327d97..9423ed2 100644 --- a/vp10/encoder/mcomp.c +++ b/vp10/encoder/mcomp.c
@@ -24,6 +24,7 @@ #include "vp10/encoder/encoder.h" #include "vp10/encoder/mcomp.h" +#include "vp10/encoder/rdopt.h" // #define NEW_DIAMOND_SEARCH @@ -2655,6 +2656,29 @@ v = INT_MAX; \ } +#undef CHECK_BETTER0 +#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c) + +#undef CHECK_BETTER1 +#define CHECK_BETTER1(v, r, c) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + thismse = upsampled_masked_pref_error(xd, \ + mask, mask_stride, \ + vfp, z, src_stride, \ + upre(y, y_stride, r, c), \ + y_stride, \ + w, h, &sse); \ + if ((v = MVC(r, c) + thismse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } + int vp10_find_best_masked_sub_pixel_tree(const MACROBLOCK *x, const uint8_t *mask, int mask_stride, MV *bestmv, const MV *ref_mv, @@ -2671,8 +2695,8 @@ const MACROBLOCKD *xd = &x->e_mbd; unsigned int besterr = INT_MAX; unsigned int sse; - unsigned int whichdir; int thismse; + unsigned int whichdir; unsigned int halfiters = iters_per_step; unsigned int quarteriters = iters_per_step; unsigned int eighthiters = iters_per_step; @@ -2747,6 +2771,276 @@ return besterr; } +static unsigned int setup_masked_center_error(const uint8_t *mask, + int mask_stride, + const MV *bestmv, + const MV *ref_mv, + int error_per_bit, + const vp10_variance_fn_ptr_t *vfp, + const uint8_t *const src, + const int src_stride, + const uint8_t *const y, + int y_stride, + int offset, + int *mvjcost, int *mvcost[2], + unsigned int *sse1, + int *distortion) { + unsigned int besterr; + besterr = vfp->mvf(y + offset, y_stride, src, src_stride, + mask, mask_stride, sse1); + *distortion = besterr; + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); + return besterr; +} + +static int upsampled_masked_pref_error(const MACROBLOCKD *xd, + const uint8_t *mask, + int mask_stride, + const vp10_variance_fn_ptr_t *vfp, + const uint8_t *const src, + const int src_stride, + const uint8_t *const y, int y_stride, + int w, int h, unsigned int *sse) { + unsigned int besterr; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]); + vpx_highbd_upsampled_pred(pred16, w, h, y, y_stride); + + besterr = vfp->mvf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, + mask, mask_stride, sse); + } else { + DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); +#else + DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); + (void) xd; +#endif // CONFIG_VP9_HIGHBITDEPTH + vpx_upsampled_pred(pred, w, h, y, y_stride); + + besterr = vfp->mvf(pred, w, src, src_stride, + mask, mask_stride, sse); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif + return besterr; +} + +static unsigned int upsampled_setup_masked_center_error( + const MACROBLOCKD *xd, + const uint8_t *mask, int mask_stride, + const MV *bestmv, const MV *ref_mv, + int error_per_bit, const vp10_variance_fn_ptr_t *vfp, + const uint8_t *const src, const int src_stride, + const uint8_t *const y, int y_stride, + int w, int h, int offset, int *mvjcost, int *mvcost[2], + unsigned int *sse1, int *distortion) { + unsigned int besterr = upsampled_masked_pref_error( + xd, mask, mask_stride, vfp, src, src_stride, + y + offset, y_stride, w, h, sse1); + *distortion = besterr; + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); + return besterr; +} + +int vp10_find_best_masked_sub_pixel_tree_up(VP10_COMP *cpi, + MACROBLOCK *x, + const uint8_t *mask, + int mask_stride, + int mi_row, int mi_col, + MV *bestmv, const MV *ref_mv, + int allow_hp, + int error_per_bit, + const vp10_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, + int is_second, + int use_upsampled_ref) { + const uint8_t *const z = x->plane[0].src.buf; + const uint8_t *const src_address = z; + const int src_stride = x->plane[0].src.stride; + MACROBLOCKD *xd = &x->e_mbd; + struct macroblockd_plane *const pd = &xd->plane[0]; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + unsigned int besterr = INT_MAX; + unsigned int sse; + unsigned int thismse; + + int rr = ref_mv->row; + int rc = ref_mv->col; + int br = bestmv->row * 8; + int bc = bestmv->col * 8; + int hstep = 4; + int iter; + int round = 3 - forced_stop; + const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); + const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); + const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); + const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); + int tr = br; + int tc = bc; + const MV *search_step = search_step_table; + int idx, best_idx = -1; + unsigned int cost_array[5]; + int kr, kc; + const int w = 4 * num_4x4_blocks_wide_lookup[mbmi->sb_type]; + const int h = 4 * num_4x4_blocks_high_lookup[mbmi->sb_type]; + int offset; + int y_stride; + const uint8_t *y; + + const struct buf_2d backup_pred = pd->pre[is_second]; + if (use_upsampled_ref) { + int ref = xd->mi[0]->mbmi.ref_frame[is_second]; + const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref); + setup_pred_plane(&pd->pre[is_second], upsampled_ref->y_buffer, + upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), + NULL, pd->subsampling_x, pd->subsampling_y); + } + y = pd->pre[is_second].buf; + y_stride = pd->pre[is_second].stride; + offset = bestmv->row * y_stride + bestmv->col; + + if (!(allow_hp && vp10_use_mv_hp(ref_mv))) + if (round == 3) + round = 2; + + bestmv->row *= 8; + bestmv->col *= 8; + + // use_upsampled_ref can be 0 or 1 + if (use_upsampled_ref) + besterr = upsampled_setup_masked_center_error( + xd, mask, mask_stride, bestmv, ref_mv, error_per_bit, + vfp, z, src_stride, y, y_stride, + w, h, (offset << 3), + mvjcost, mvcost, sse1, distortion); + else + besterr = setup_masked_center_error( + mask, mask_stride, bestmv, ref_mv, error_per_bit, + vfp, z, src_stride, y, y_stride, + offset, mvjcost, mvcost, sse1, distortion); + + for (iter = 0; iter < round; ++iter) { + // Check vertical and horizontal sub-pixel positions. + for (idx = 0; idx < 4; ++idx) { + tr = br + search_step[idx].row; + tc = bc + search_step[idx].col; + if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { + MV this_mv = {tr, tc}; + + if (use_upsampled_ref) { + const uint8_t *const pre_address = y + tr * y_stride + tc; + + thismse = upsampled_masked_pref_error(xd, + mask, mask_stride, + vfp, src_address, src_stride, + pre_address, y_stride, + w, h, &sse); + } else { + const uint8_t *const pre_address = y + (tr >> 3) * y_stride + + (tc >> 3); + thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr), + src_address, src_stride, + mask, mask_stride, &sse); + } + + cost_array[idx] = thismse + + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); + + if (cost_array[idx] < besterr) { + best_idx = idx; + besterr = cost_array[idx]; + *distortion = thismse; + *sse1 = sse; + } + } else { + cost_array[idx] = INT_MAX; + } + } + + // Check diagonal sub-pixel position + kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); + kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); + + tc = bc + kc; + tr = br + kr; + if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { + MV this_mv = {tr, tc}; + + if (use_upsampled_ref) { + const uint8_t *const pre_address = y + tr * y_stride + tc; + + thismse = upsampled_masked_pref_error(xd, + mask, mask_stride, + vfp, src_address, src_stride, + pre_address, y_stride, + w, h, &sse); + } else { + const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); + + thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr), + src_address, src_stride, mask, mask_stride, &sse); + } + + cost_array[4] = thismse + + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); + + if (cost_array[4] < besterr) { + best_idx = 4; + besterr = cost_array[4]; + *distortion = thismse; + *sse1 = sse; + } + } else { + cost_array[idx] = INT_MAX; + } + + if (best_idx < 4 && best_idx >= 0) { + br += search_step[best_idx].row; + bc += search_step[best_idx].col; + } else if (best_idx == 4) { + br = tr; + bc = tc; + } + + if (iters_per_step > 1 && best_idx != -1) { + if (use_upsampled_ref) { + SECOND_LEVEL_CHECKS_BEST(1); + } else { + SECOND_LEVEL_CHECKS_BEST(0); + } + } + + tr = br; + tc = bc; + + search_step += 4; + hstep >>= 1; + best_idx = -1; + } + + // These lines insure static analysis doesn't warn that + // tr and tc aren't used after the above point. + (void) tr; + (void) tc; + + bestmv->row = br; + bestmv->col = bc; + + if (use_upsampled_ref) { + pd->pre[is_second] = backup_pred; + } + + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) + return INT_MAX; + + return besterr; +} + #undef DIST #undef MVC #undef CHECK_BETTER
diff --git a/vp10/encoder/mcomp.h b/vp10/encoder/mcomp.h index f99cd8b..c12e7af 100644 --- a/vp10/encoder/mcomp.h +++ b/vp10/encoder/mcomp.h
@@ -169,7 +169,24 @@ int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion, - unsigned int *sse1, int is_second); + unsigned int *sse1, + int is_second); +int vp10_find_best_masked_sub_pixel_tree_up(struct VP10_COMP *cpi, + MACROBLOCK *x, + const uint8_t *mask, + int mask_stride, + int mi_row, int mi_col, + MV *bestmv, const MV *ref_mv, + int allow_hp, + int error_per_bit, + const vp10_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, + int is_second, + int use_upsampled_ref); int vp10_masked_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, int mask_stride, MV *mvp_full, int step_param,
diff --git a/vp10/encoder/palette.c b/vp10/encoder/palette.c index d413935..cbc3582 100644 --- a/vp10/encoder/palette.c +++ b/vp10/encoder/palette.c
@@ -11,20 +11,21 @@ #include <math.h> #include "vp10/encoder/palette.h" -static double calc_dist(const double *p1, const double *p2, int dim) { - double dist = 0; +static float calc_dist(const float *p1, const float *p2, int dim) { + float dist = 0; int i = 0; for (i = 0; i < dim; ++i) { - dist = dist + (p1[i] - round(p2[i])) * (p1[i] - round(p2[i])); + float diff = p1[i] - roundf(p2[i]); + dist += diff * diff; } return dist; } -void vp10_calc_indices(const double *data, const double *centroids, +void vp10_calc_indices(const float *data, const float *centroids, uint8_t *indices, int n, int k, int dim) { int i, j; - double min_dist, this_dist; + float min_dist, this_dist; for (i = 0; i < n; ++i) { min_dist = calc_dist(data + i * dim, centroids, dim); @@ -45,7 +46,7 @@ return *state / 65536 % 32768; } -static void calc_centroids(const double *data, double *centroids, +static void calc_centroids(const float *data, float *centroids, const uint8_t *indices, int n, int k, int dim) { int i, j, index; int count[PALETTE_MAX_SIZE]; @@ -70,16 +71,16 @@ memcpy(centroids + i * dim, data + (lcg_rand16(&rand_state) % n) * dim, sizeof(centroids[0]) * dim); } else { - const double norm = 1.0 / count[i]; + const float norm = 1.0f / count[i]; for (j = 0; j < dim; ++j) centroids[i * dim + j] *= norm; } } } -static double calc_total_dist(const double *data, const double *centroids, +static float calc_total_dist(const float *data, const float *centroids, const uint8_t *indices, int n, int k, int dim) { - double dist = 0; + float dist = 0; int i; (void) k; @@ -89,11 +90,11 @@ return dist; } -int vp10_k_means(const double *data, double *centroids, uint8_t *indices, +int vp10_k_means(const float *data, float *centroids, uint8_t *indices, uint8_t *pre_indices, int n, int k, int dim, int max_itr) { int i = 0; - double pre_dist, this_dist; - double pre_centroids[2 * PALETTE_MAX_SIZE]; + float pre_dist, this_dist; + float pre_centroids[2 * PALETTE_MAX_SIZE]; vp10_calc_indices(data, centroids, indices, n, k, dim); pre_dist = calc_total_dist(data, centroids, indices, n, k, dim); @@ -121,9 +122,9 @@ return i; } -void vp10_insertion_sort(double *data, int n) { +void vp10_insertion_sort(float *data, int n) { int i, j, k; - double val; + float val; if (n <= 1) return;
diff --git a/vp10/encoder/palette.h b/vp10/encoder/palette.h index 124cf74..40d9ef9 100644 --- a/vp10/encoder/palette.h +++ b/vp10/encoder/palette.h
@@ -17,10 +17,10 @@ extern "C" { #endif -void vp10_insertion_sort(double *data, int n); -void vp10_calc_indices(const double *data, const double *centroids, +void vp10_insertion_sort(float *data, int n); +void vp10_calc_indices(const float *data, const float *centroids, uint8_t *indices, int n, int k, int dim); -int vp10_k_means(const double *data, double *centroids, uint8_t *indices, +int vp10_k_means(const float *data, float *centroids, uint8_t *indices, uint8_t *pre_indices, int n, int k, int dim, int max_itr); int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols); #if CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c index 203ac42..dc34f1f 100644 --- a/vp10/encoder/rd.c +++ b/vp10/encoder/rd.c
@@ -152,13 +152,8 @@ #endif // CONFIG_EXT_INTRA } -#if CONFIG_ENTROPY -void fill_token_costs(vp10_coeff_cost *c, - vp10_coeff_probs_model (*p)[PLANE_TYPES]) { -#else -static void fill_token_costs(vp10_coeff_cost *c, - vp10_coeff_probs_model (*p)[PLANE_TYPES]) { -#endif // CONFIG_ENTROPY +void vp10_fill_token_costs(vp10_coeff_cost *c, + vp10_coeff_probs_model (*p)[PLANE_TYPES]) { int i, j, k, l; TX_SIZE t; for (t = TX_4X4; t <= TX_32X32; ++t) @@ -397,7 +392,7 @@ #endif } if (cpi->oxcf.pass != 1) { - fill_token_costs(x->token_costs, cm->fc->coef_probs); + vp10_fill_token_costs(x->token_costs, cm->fc->coef_probs); if (cpi->sf.partition_search_type != VAR_BASED_PARTITION || cm->frame_type == KEY_FRAME) { @@ -445,12 +440,16 @@ for (i = 0; i < INTER_MODE_CONTEXTS; ++i) vp10_cost_tokens((int *)cpi->inter_mode_cost[i], cm->fc->inter_mode_probs[i], vp10_inter_mode_tree); -#endif +#endif // CONFIG_REF_MV #if CONFIG_EXT_INTER for (i = 0; i < INTER_MODE_CONTEXTS; ++i) vp10_cost_tokens((int *)cpi->inter_compound_mode_cost[i], cm->fc->inter_compound_mode_probs[i], vp10_inter_compound_mode_tree); + for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) + vp10_cost_tokens((int *)cpi->interintra_mode_cost[i], + cm->fc->interintra_mode_prob[i], + vp10_interintra_mode_tree); #endif // CONFIG_EXT_INTER #if CONFIG_OBMC for (i = BLOCK_8X8; i < BLOCK_SIZES; i++) { @@ -563,8 +562,8 @@ void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, const struct macroblockd_plane *pd, - ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE], - ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE]) { + ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE], + ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE]) { const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; @@ -931,14 +930,15 @@ memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx])); } -void vp10_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh, - int bsize, int best_mode_index) { +void vp10_update_rd_thresh_fact(const VP10_COMMON *const cm, + int (*factor_buf)[MAX_MODES], int rd_thresh, + int bsize, int best_mode_index) { if (rd_thresh > 0) { const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES; int mode; for (mode = 0; mode < top_mode; ++mode) { const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4); - const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_LARGEST); + const BLOCK_SIZE max_size = VPXMIN(bsize + 2, cm->sb_size); BLOCK_SIZE bs; for (bs = min_size; bs <= max_size; ++bs) { int *const fact = &factor_buf[bs][mode];
diff --git a/vp10/encoder/rd.h b/vp10/encoder/rd.h index 533e775..7aad9eb 100644 --- a/vp10/encoder/rd.h +++ b/vp10/encoder/rd.h
@@ -330,20 +330,19 @@ void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, const struct macroblockd_plane *pd, - ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE], - ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE]); + ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE], + ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE]); void vp10_set_rd_speed_thresholds(struct VP10_COMP *cpi); void vp10_set_rd_speed_thresholds_sub8x8(struct VP10_COMP *cpi); -void vp10_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh, - int bsize, int best_mode_index); +void vp10_update_rd_thresh_fact(const VP10_COMMON *const cm, + int (*fact)[MAX_MODES], int rd_thresh, + int bsize, int best_mode_index); -#if CONFIG_ENTROPY -void fill_token_costs(vp10_coeff_cost *c, - vp10_coeff_probs_model (*p)[PLANE_TYPES]); -#endif // CONFIG_ENTROPY +void vp10_fill_token_costs(vp10_coeff_cost *c, + vp10_coeff_probs_model (*p)[PLANE_TYPES]); static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, int thresh_fact) {
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 656e9f7..b3f8336 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c
@@ -87,8 +87,8 @@ const double ext_tx_th = 0.99; #endif -const double ADST_FLIP_SVM[8] = {-7.3283, -3.0450, -3.2450, 3.6403, // vert - -9.4204, -3.1821, -4.6851, 4.1469}; // horz +const double ADST_FLIP_SVM[8] = {-6.6623, -2.8062, -3.2531, 3.1671, // vert + -7.7051, -3.2234, -3.6193, 3.4533}; // horz typedef struct { PREDICTION_MODE mode; @@ -102,8 +102,8 @@ struct rdcost_block_args { const VP10_COMP *cpi; MACROBLOCK *x; - ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE]; - ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE]; + ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE]; + ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE]; int this_rate; int64_t this_dist; int64_t this_sse; @@ -355,14 +355,14 @@ // constants for prune 1 and prune 2 decision boundaries #define FAST_EXT_TX_CORR_MID 0.0 #define FAST_EXT_TX_EDST_MID 0.1 -#define FAST_EXT_TX_CORR_MARGIN 0.5 -#define FAST_EXT_TX_EDST_MARGIN 0.05 +#define FAST_EXT_TX_CORR_MARGIN 0.3 +#define FAST_EXT_TX_EDST_MARGIN 0.5 typedef enum { DCT_1D = 0, ADST_1D = 1, FLIPADST_1D = 2, - DST_1D = 3, + IDTX_1D = 3, TX_TYPES_1D = 4, } TX_TYPE_1D; @@ -568,18 +568,18 @@ } } -int dct_vs_dst(int16_t *diff, int stride, int w, int h, - double *hcorr, double *vcorr) { +int dct_vs_idtx(int16_t *diff, int stride, int w, int h, + double *hcorr, double *vcorr) { int prune_bitmask = 0; get_horver_correlation(diff, stride, w, h, hcorr, vcorr); if (*vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN) - prune_bitmask |= 1 << DST_1D; + prune_bitmask |= 1 << IDTX_1D; else if (*vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN) prune_bitmask |= 1 << DCT_1D; if (*hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN) - prune_bitmask |= 1 << (DST_1D + 8); + prune_bitmask |= 1 << (IDTX_1D + 8); else if (*hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN) prune_bitmask |= 1 << (DCT_1D + 8); return prune_bitmask; @@ -600,7 +600,7 @@ vp10_subtract_plane(x, bsize, 0); return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, hdist, vdist) | - dct_vs_dst(p->src_diff, bw, bw, bh, &hcorr, &vcorr); + dct_vs_idtx(p->src_diff, bw, bw, bh, &hcorr, &vcorr); } #endif // CONFIG_EXT_TX @@ -653,13 +653,13 @@ FLIPADST_1D, ADST_1D, FLIPADST_1D, - DST_1D, + IDTX_1D, DCT_1D, - DST_1D, + IDTX_1D, ADST_1D, - DST_1D, + IDTX_1D, FLIPADST_1D, - DST_1D, + IDTX_1D, }; static TX_TYPE_1D htx_tab[TX_TYPES] = { DCT_1D, @@ -671,16 +671,14 @@ FLIPADST_1D, FLIPADST_1D, ADST_1D, + IDTX_1D, + IDTX_1D, DCT_1D, - DST_1D, + IDTX_1D, ADST_1D, - DST_1D, + IDTX_1D, FLIPADST_1D, - DST_1D, - DST_1D, }; - if (tx_type >= IDTX) - return 1; return !(((prune >> vtx_tab[tx_type]) & 1) | ((prune >> (htx_tab[tx_type] + 8)) & 1)); #else @@ -794,8 +792,8 @@ dist_sum += dist; } else { vp10_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs], - pd->dequant[1] >> dequant_shift, - &rate, &dist); + pd->dequant[1] >> dequant_shift, + &rate, &dist); rate_sum += rate; dist_sum += dist; } @@ -1440,7 +1438,9 @@ last_rd = INT64_MAX; for (n = start_tx; n >= end_tx; --n) { if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, n)) - continue; + continue; + if (max_tx_size == TX_32X32 && n == TX_4X4) + continue; #if CONFIG_EXT_TX ext_tx_set = get_ext_tx_set(n, bs, is_inter); if (is_inter) { @@ -1769,12 +1769,12 @@ const int max_itr = 50; int color_ctx, color_idx = 0; int color_order[PALETTE_MAX_SIZE]; - double *const data = x->palette_buffer->kmeans_data_buf; + float *const data = x->palette_buffer->kmeans_data_buf; uint8_t *const indices = x->palette_buffer->kmeans_indices_buf; uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf; - double centroids[PALETTE_MAX_SIZE]; + float centroids[PALETTE_MAX_SIZE]; uint8_t *const color_map = xd->plane[0].color_index_map; - double lb, ub, val; + float lb, ub, val; MB_MODE_INFO *const mbmi = &mic->mbmi; PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; #if CONFIG_VP9_HIGHBITDEPTH @@ -1818,6 +1818,9 @@ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0; #endif // CONFIG_EXT_INTRA + if (rows * cols > PALETTE_MAX_BLOCK_SIZE) + return 0; + for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2; --n) { for (i = 0; i < n; ++i) @@ -1826,7 +1829,7 @@ n, 1, max_itr); vp10_insertion_sort(centroids, n); for (i = 0; i < n; ++i) - centroids[i] = round(centroids[i]); + centroids[i] = roundf(centroids[i]); // remove duplicates i = 1; k = n; @@ -1846,12 +1849,12 @@ #if CONFIG_VP9_HIGHBITDEPTH if (cpi->common.use_highbitdepth) for (i = 0; i < k; ++i) - pmi->palette_colors[i] = clip_pixel_highbd((int)round(centroids[i]), + pmi->palette_colors[i] = clip_pixel_highbd((int)lroundf(centroids[i]), cpi->common.bit_depth); else #endif // CONFIG_VP9_HIGHBITDEPTH for (i = 0; i < k; ++i) - pmi->palette_colors[i] = clip_pixel((int)round(centroids[i])); + pmi->palette_colors[i] = clip_pixel((int)lroundf(centroids[i])); pmi->palette_size[0] = k; vp10_calc_indices(data, centroids, indices, rows * cols, k, 1); @@ -2949,8 +2952,8 @@ struct macroblockd_plane *const pd = &xd->plane[plane]; const int tx_row = blk_row >> (1 - pd->subsampling_y); const int tx_col = blk_col >> (1 - pd->subsampling_x); - TX_SIZE (*const inter_tx_size)[MI_BLOCK_SIZE] = - (TX_SIZE (*)[MI_BLOCK_SIZE])&mbmi->inter_tx_size[tx_row][tx_col]; + TX_SIZE (*const inter_tx_size)[MAX_MIB_SIZE] = + (TX_SIZE (*)[MAX_MIB_SIZE])&mbmi->inter_tx_size[tx_row][tx_col]; int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; int64_t this_rd = INT64_MAX; @@ -3120,10 +3123,10 @@ int idx, idy; int block = 0; int step = 1 << (max_txsize_lookup[plane_bsize] * 2); - ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE]; - ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE]; - TXFM_CONTEXT tx_above[MI_BLOCK_SIZE]; - TXFM_CONTEXT tx_left[MI_BLOCK_SIZE]; + ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE]; + ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE]; + TXFM_CONTEXT tx_above[MAX_MIB_SIZE]; + TXFM_CONTEXT tx_left[MAX_MIB_SIZE]; int pnrate = 0, pnskip = 1; int64_t pndist = 0, pnsse = 0; @@ -3235,9 +3238,9 @@ int64_t best_rd = INT64_MAX; TX_TYPE tx_type, best_tx_type = DCT_DCT; const int is_inter = is_inter_block(mbmi); - TX_SIZE best_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE]; + TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE]; TX_SIZE best_tx = TX_SIZES; - uint8_t best_blk_skip[MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; + uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 4]; const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4); int idx, idy; int prune = 0; @@ -3420,8 +3423,8 @@ int step = 1 << (max_txsize_lookup[plane_bsize] * 2); int pnrate = 0, pnskip = 1; int64_t pndist = 0, pnsse = 0; - ENTROPY_CONTEXT ta[2 * MI_BLOCK_SIZE]; - ENTROPY_CONTEXT tl[2 * MI_BLOCK_SIZE]; + ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE]; + ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE]; vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl); @@ -3547,6 +3550,9 @@ const uint8_t *const src_u = x->plane[1].src.buf; const uint8_t *const src_v = x->plane[2].src.buf; + if (rows * cols > PALETTE_MAX_BLOCK_SIZE) + return; + #if CONFIG_EXT_INTRA mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0; #endif // CONFIG_EXT_INTRA @@ -3572,12 +3578,12 @@ int color_ctx, color_idx = 0; int color_order[PALETTE_MAX_SIZE]; int64_t this_sse; - double lb_u, ub_u, val_u; - double lb_v, ub_v, val_v; - double *const data = x->palette_buffer->kmeans_data_buf; + float lb_u, ub_u, val_u; + float lb_v, ub_v, val_v; + float *const data = x->palette_buffer->kmeans_data_buf; uint8_t *const indices = x->palette_buffer->kmeans_indices_buf; uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf; - double centroids[2 * PALETTE_MAX_SIZE]; + float centroids[2 * PALETTE_MAX_SIZE]; uint8_t *const color_map = xd->plane[1].color_index_map; PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; @@ -3646,12 +3652,12 @@ #if CONFIG_VP9_HIGHBITDEPTH if (cpi->common.use_highbitdepth) pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = - clip_pixel_highbd(round(centroids[j * 2 + i - 1]), + clip_pixel_highbd(roundf(centroids[j * 2 + i - 1]), cpi->common.bit_depth); else #endif // CONFIG_VP9_HIGHBITDEPTH pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = - clip_pixel(round(centroids[j * 2 + i - 1])); + clip_pixel(roundf(centroids[j * 2 + i - 1])); } } for (r = 0; r < rows; ++r) @@ -4952,44 +4958,54 @@ #if !CONFIG_EXT_INTER if (filter_idx > 0) { BEST_SEG_INFO* ref_bsi = bsi_buf; - if (seg_mvs[i][mbmi->ref_frame[0]].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[0].as_int && - ref_bsi->rdstat[i][mode_idx].mvs[0].as_int != INVALID_MV) - if (bsi->ref_mv[0]->as_int == - ref_bsi->rdstat[i][mode_idx].pred_mv[0].as_int) - --run_mv_search; + SEG_RDSTAT *ref_rdstat = &ref_bsi->rdstat[i][mode_idx]; - if (!has_second_rf) { - --run_mv_search; - } else { - if (seg_mvs[i][mbmi->ref_frame[1]].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[1].as_int && - ref_bsi->rdstat[i][mode_idx].mvs[1].as_int != INVALID_MV) - if (bsi->ref_mv[1]->as_int == - ref_bsi->rdstat[i][mode_idx].pred_mv[1].as_int) + if (has_second_rf) { + if (seg_mvs[i][mbmi->ref_frame[0]].as_int == + ref_rdstat->mvs[0].as_int && + ref_rdstat->mvs[0].as_int != INVALID_MV) + if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int) --run_mv_search; + + if (seg_mvs[i][mbmi->ref_frame[1]].as_int == + ref_rdstat->mvs[1].as_int && + ref_rdstat->mvs[1].as_int != INVALID_MV) + if (bsi->ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int) + --run_mv_search; + } else { + if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int && + ref_rdstat->mvs[0].as_int != INVALID_MV) { + run_mv_search = 0; + seg_mvs[i][mbmi->ref_frame[0]].as_int = + ref_rdstat->mvs[0].as_int; + } } if (run_mv_search != 0 && filter_idx > 1) { ref_bsi = bsi_buf + 1; + ref_rdstat = &ref_bsi->rdstat[i][mode_idx]; run_mv_search = 2; - if (seg_mvs[i][mbmi->ref_frame[0]].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[0].as_int && - ref_bsi->rdstat[i][mode_idx].mvs[0].as_int != INVALID_MV) - if (bsi->ref_mv[0]->as_int == - ref_bsi->rdstat[i][mode_idx].pred_mv[0].as_int) - --run_mv_search; - - if (!has_second_rf) { - --run_mv_search; - } else { - if (seg_mvs[i][mbmi->ref_frame[1]].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[1].as_int && - ref_bsi->rdstat[i][mode_idx].mvs[1].as_int != INVALID_MV) - if (bsi->ref_mv[1]->as_int == - ref_bsi->rdstat[i][mode_idx].pred_mv[1].as_int) + if (has_second_rf) { + if (seg_mvs[i][mbmi->ref_frame[0]].as_int == + ref_rdstat->mvs[0].as_int && + ref_rdstat->mvs[0].as_int != INVALID_MV) + if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int) --run_mv_search; + + if (seg_mvs[i][mbmi->ref_frame[1]].as_int == + ref_rdstat->mvs[1].as_int && + ref_rdstat->mvs[1].as_int != INVALID_MV) + if (bsi->ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int) + --run_mv_search; + } else { + if (bsi->ref_mv[0]->as_int == + ref_rdstat->pred_mv[0].as_int && + ref_rdstat->mvs[0].as_int != INVALID_MV) { + run_mv_search = 0; + seg_mvs[i][mbmi->ref_frame[0]].as_int = + ref_rdstat->mvs[0].as_int; + } } } } @@ -5069,8 +5085,8 @@ } #if CONFIG_REF_MV - mvp_full.row = best_ref_mv->as_mv.row >> 3; - mvp_full.col = best_ref_mv->as_mv.col >> 3; + mvp_full.row = bsi->ref_mv[0]->as_mv.row >> 3; + mvp_full.col = bsi->ref_mv[0]->as_mv.col >> 3; #else mvp_full.row = bsi->mvp.as_mv.row >> 3; mvp_full.col = bsi->mvp.as_mv.col >> 3; @@ -5731,10 +5747,9 @@ step_param = cpi->mv_step_param; } - if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST) { - int boffset = - 2 * (b_width_log2_lookup[BLOCK_LARGEST] - - VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); + if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) { + int boffset = 2 * (b_width_log2_lookup[cm->sb_size] - + VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); step_param = VPXMAX(step_param, boffset); } @@ -5905,9 +5920,9 @@ } // TODO(debargha): is show_frame needed here? - if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST && + if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size && cm->show_frame) { - int boffset = 2 * (b_width_log2_lookup[BLOCK_LARGEST] - + int boffset = 2 * (b_width_log2_lookup[cm->sb_size] - VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); step_param = VPXMAX(step_param, boffset); } @@ -5958,15 +5973,18 @@ if (bestsme < INT_MAX) { int dis; /* TODO: use dis in distortion calculation later. */ - vp10_find_best_masked_sub_pixel_tree(x, mask, mask_stride, - &tmp_mv->as_mv, &ref_mv, - cm->allow_high_precision_mv, - x->errorperbit, - &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, - x->nmvjointcost, x->mvcost, - &dis, &x->pred_sse[ref], ref_idx); + vp10_find_best_masked_sub_pixel_tree_up(cpi, x, mask, mask_stride, + mi_row, mi_col, + &tmp_mv->as_mv, &ref_mv, + cm->allow_high_precision_mv, + x->errorperbit, + &cpi->fn_ptr[bsize], + cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_iters_per_step, + x->nmvjointcost, x->mvcost, + &dis, &x->pred_sse[ref], + ref_idx, + cpi->sf.use_upsampled_references); } *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); @@ -6197,8 +6215,8 @@ #if CONFIG_EXT_INTER int mv_idx = (this_mode == NEWFROMNEARMV) ? 1 : 0; int_mv single_newmv[MAX_REF_FRAMES]; - const int * const intra_mode_cost = - cpi->mbmode_cost[size_group_lookup[bsize]]; + const unsigned int *const interintra_mode_cost = + cpi->interintra_mode_cost[size_group_lookup[bsize]]; const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME); #if CONFIG_REF_MV uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame); @@ -6223,7 +6241,7 @@ int best_rate_y, best_rate_uv; #endif // CONFIG_SUPERTX #if CONFIG_VAR_TX - uint8_t best_blk_skip[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; + uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4]; #endif // CONFIG_VAR_TX int64_t best_distortion = INT64_MAX; unsigned int best_pred_var = UINT_MAX; @@ -6623,7 +6641,7 @@ rs = cm->interp_filter == SWITCHABLE ? vp10_get_switchable_rate(cpi, xd) : 0; #if CONFIG_EXT_INTER - if (is_comp_pred && get_wedge_bits(bsize)) { + if (is_comp_pred && is_interinter_wedge_used(bsize)) { int wedge_index, best_wedge_index = WEDGE_NONE, rs; int rate_sum; int64_t dist_sum; @@ -6773,7 +6791,7 @@ } if (is_comp_interintra_pred) { - PREDICTION_MODE interintra_mode, best_interintra_mode = DC_PRED; + INTERINTRA_MODE best_interintra_mode = II_DC_PRED; int64_t best_interintra_rd = INT64_MAX; int rmode, rate_sum; int64_t dist_sum; @@ -6786,6 +6804,16 @@ bh = 4 << b_height_log2_lookup[mbmi->sb_type]; int_mv tmp_mv; int tmp_rate_mv = 0; + DECLARE_ALIGNED(16, uint8_t, + intrapred_[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); + uint8_t *intrapred; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + intrapred = CONVERT_TO_BYTEPTR(intrapred_); + else +#endif // CONFIG_VP9_HIGHBITDEPTH + intrapred = intrapred_; + mbmi->ref_frame[1] = NONE; for (j = 0; j < MAX_MB_PLANE; j++) { xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE; @@ -6795,44 +6823,56 @@ restore_dst_buf(xd, orig_dst, orig_dst_stride); mbmi->ref_frame[1] = INTRA_FRAME; - for (interintra_mode = DC_PRED; interintra_mode <= TM_PRED; - ++interintra_mode) { - mbmi->interintra_mode = interintra_mode; - mbmi->interintra_uv_mode = interintra_mode; - rmode = intra_mode_cost[mbmi->interintra_mode]; - vp10_build_interintra_predictors(xd, - tmp_buf, - tmp_buf + MAX_SB_SQUARE, - tmp_buf + 2 * MAX_SB_SQUARE, - MAX_SB_SIZE, - MAX_SB_SIZE, - MAX_SB_SIZE, - bsize); + for (j = 0; j < INTERINTRA_MODES; ++j) { + mbmi->interintra_mode = (INTERINTRA_MODE)j; + mbmi->interintra_uv_mode = (INTERINTRA_MODE)j; + rmode = interintra_mode_cost[mbmi->interintra_mode]; + vp10_build_intra_predictors_for_interintra( + xd, bsize, 0, intrapred, MAX_SB_SIZE); + vp10_combine_interintra(xd, bsize, 0, tmp_buf, MAX_SB_SIZE, + intrapred, MAX_SB_SIZE); + vp10_build_intra_predictors_for_interintra( + xd, bsize, 1, intrapred + MAX_SB_SQUARE, MAX_SB_SIZE); + vp10_build_intra_predictors_for_interintra( + xd, bsize, 2, intrapred + 2 * MAX_SB_SQUARE, MAX_SB_SIZE); + vp10_combine_interintra(xd, bsize, 1, + tmp_buf + MAX_SB_SQUARE, MAX_SB_SIZE, + intrapred + MAX_SB_SQUARE, MAX_SB_SIZE); + vp10_combine_interintra(xd, bsize, 2, + tmp_buf + 2 * MAX_SB_SQUARE, MAX_SB_SIZE, + intrapred + 2 * MAX_SB_SQUARE, MAX_SB_SIZE); model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, &skip_txfm_sb, &skip_sse_sb); rd = RDCOST(x->rdmult, x->rddiv, rate_mv + rmode + rate_sum, dist_sum); if (rd < best_interintra_rd) { best_interintra_rd = rd; - best_interintra_mode = interintra_mode; + best_interintra_mode = mbmi->interintra_mode; } } mbmi->interintra_mode = best_interintra_mode; mbmi->interintra_uv_mode = best_interintra_mode; if (ref_best_rd < INT64_MAX && - best_interintra_rd / 2 > ref_best_rd) { + best_interintra_rd > 2 * ref_best_rd) { return INT64_MAX; } - wedge_bits = get_wedge_bits(bsize); - rmode = intra_mode_cost[mbmi->interintra_mode]; - if (wedge_bits) { - vp10_build_interintra_predictors(xd, - tmp_buf, - tmp_buf + MAX_SB_SQUARE, - tmp_buf + 2 * MAX_SB_SQUARE, - MAX_SB_SIZE, - MAX_SB_SIZE, - MAX_SB_SIZE, - bsize); + vp10_build_intra_predictors_for_interintra( + xd, bsize, 0, intrapred, MAX_SB_SIZE); + vp10_build_intra_predictors_for_interintra( + xd, bsize, 1, intrapred + MAX_SB_SQUARE, MAX_SB_SIZE); + vp10_build_intra_predictors_for_interintra( + xd, bsize, 2, intrapred + 2 * MAX_SB_SQUARE, MAX_SB_SIZE); + + rmode = interintra_mode_cost[mbmi->interintra_mode]; + if (is_interintra_wedge_used(bsize)) { + wedge_bits = get_wedge_bits(bsize); + vp10_combine_interintra(xd, bsize, 0, tmp_buf, MAX_SB_SIZE, + intrapred, MAX_SB_SIZE); + vp10_combine_interintra(xd, bsize, 1, + tmp_buf + MAX_SB_SQUARE, MAX_SB_SIZE, + intrapred + MAX_SB_SQUARE, MAX_SB_SIZE); + vp10_combine_interintra(xd, bsize, 2, + tmp_buf + 2 * MAX_SB_SQUARE, MAX_SB_SIZE, + intrapred + 2 * MAX_SB_SQUARE, MAX_SB_SIZE); model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, &skip_txfm_sb, &skip_sse_sb); rwedge = vp10_cost_bit(cm->fc->wedge_interintra_prob[bsize], 0); @@ -6847,14 +6887,15 @@ for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { mbmi->interintra_wedge_index = wedge_index; mbmi->interintra_uv_wedge_index = wedge_index; - vp10_build_interintra_predictors(xd, - tmp_buf, - tmp_buf + MAX_SB_SQUARE, - tmp_buf + 2 * MAX_SB_SQUARE, - MAX_SB_SIZE, - MAX_SB_SIZE, - MAX_SB_SIZE, - bsize); + vp10_combine_interintra(xd, bsize, 0, + tmp_buf, MAX_SB_SIZE, + intrapred, MAX_SB_SIZE); + vp10_combine_interintra(xd, bsize, 1, + tmp_buf + MAX_SB_SQUARE, MAX_SB_SIZE, + intrapred + MAX_SB_SQUARE, MAX_SB_SIZE); + vp10_combine_interintra(xd, bsize, 2, + tmp_buf + 2 * MAX_SB_SQUARE, MAX_SB_SIZE, + intrapred + 2 * MAX_SB_SQUARE, MAX_SB_SIZE); model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, &skip_txfm_sb, &skip_sse_sb); rd = RDCOST(x->rdmult, x->rddiv, @@ -6908,9 +6949,9 @@ pred_exists = 0; tmp_rd = best_interintra_rd; *compmode_interintra_cost = - vp10_cost_bit(cm->fc->interintra_prob[bsize], 1); - *compmode_interintra_cost += intra_mode_cost[mbmi->interintra_mode]; - if (get_wedge_bits(bsize)) { + vp10_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 1); + *compmode_interintra_cost += interintra_mode_cost[mbmi->interintra_mode]; + if (is_interintra_wedge_used(bsize)) { *compmode_interintra_cost += vp10_cost_bit( cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra); if (mbmi->use_wedge_interintra) { @@ -6919,7 +6960,7 @@ } } else if (is_interintra_allowed(mbmi)) { *compmode_interintra_cost = - vp10_cost_bit(cm->fc->interintra_prob[bsize], 0); + vp10_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 0); } #if CONFIG_EXT_INTERP @@ -7416,8 +7457,8 @@ // bars embedded in the stream. int vp10_active_edge_sb(VP10_COMP *cpi, int mi_row, int mi_col) { - return vp10_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) || - vp10_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE); + return vp10_active_h_edge(cpi, mi_row, cpi->common.mib_size) || + vp10_active_v_edge(cpi, mi_col, cpi->common.mib_size); } static void restore_uv_color_map(VP10_COMP *cpi, MACROBLOCK *x) { @@ -7432,9 +7473,9 @@ int src_stride = x->plane[1].src.stride; const uint8_t *const src_u = x->plane[1].src.buf; const uint8_t *const src_v = x->plane[2].src.buf; - double *const data = x->palette_buffer->kmeans_data_buf; + float *const data = x->palette_buffer->kmeans_data_buf; uint8_t *const indices = x->palette_buffer->kmeans_indices_buf; - double centroids[2 * PALETTE_MAX_SIZE]; + float centroids[2 * PALETTE_MAX_SIZE]; uint8_t *const color_map = xd->plane[1].color_index_map; int r, c; #if CONFIG_VP9_HIGHBITDEPTH @@ -7552,6 +7593,8 @@ uint8_t ref_frame_skip_mask[2] = { 0 }; #if CONFIG_EXT_INTER uint32_t mode_skip_mask[MAX_REF_FRAMES] = { 0 }; + MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME; + int64_t best_single_inter_rd = INT64_MAX; #else uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 }; #endif // CONFIG_EXT_INTER @@ -8178,6 +8221,8 @@ #endif #if CONFIG_EXT_INTER if (second_ref_frame == INTRA_FRAME) { + if (best_single_inter_ref != ref_frame) + continue; mbmi->interintra_mode = best_intra_mode; mbmi->interintra_uv_mode = best_intra_mode; #if CONFIG_EXT_INTRA @@ -8477,7 +8522,6 @@ #endif // CONFIG_OBMC } - // Apply an adjustment to the rd value based on the similarity of the // source variance and reconstructed variance. rd_variance_adjustment(cpi, x, bsize, &this_rd, ref_frame, @@ -8487,11 +8531,18 @@ x->source_variance); if (ref_frame == INTRA_FRAME) { - // Keep record of best intra rd + // Keep record of best intra rd if (this_rd < best_intra_rd) { best_intra_rd = this_rd; best_intra_mode = mbmi->mode; } +#if CONFIG_EXT_INTER + } else if (second_ref_frame == NONE) { + if (this_rd < best_single_inter_rd) { + best_single_inter_rd = this_rd; + best_single_inter_ref = mbmi->ref_frame[0]; + } +#endif // CONFIG_EXT_INTER } if (!disable_skip && ref_frame == INTRA_FRAME) { @@ -8525,7 +8576,7 @@ *returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd), mbmi->ref_frame[0] != INTRA_FRAME); #if CONFIG_OBMC - if (is_inter_block(mbmi) && is_obmc_allowed(mbmi)) + if (is_neighbor_overlappable(mbmi) && is_obmc_allowed(mbmi)) *returnrate_nocoef -= cpi->obmc_cost[bsize][mbmi->obmc]; #endif // CONFIG_OBMC #endif // CONFIG_SUPERTX @@ -8898,8 +8949,8 @@ !is_inter_block(&best_mbmode)); if (!cpi->rc.is_src_frame_alt_ref) - vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact, - sf->adaptive_rd_thresh, bsize, best_mode_index); + vp10_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact, + sf->adaptive_rd_thresh, bsize, best_mode_index); // macroblock modes *mbmi = best_mbmode; @@ -9045,8 +9096,8 @@ assert((cm->interp_filter == SWITCHABLE) || (cm->interp_filter == mbmi->interp_filter)); - vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact, - cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV); + vp10_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact, + cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV); vp10_zero(best_pred_diff); @@ -9767,8 +9818,8 @@ (cm->interp_filter == best_mbmode.interp_filter) || !is_inter_block(&best_mbmode)); - vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact, - sf->adaptive_rd_thresh, bsize, best_ref_index); + vp10_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact, + sf->adaptive_rd_thresh, bsize, best_ref_index); // macroblock modes *mbmi = best_mbmode;
diff --git a/vp10/encoder/segmentation.c b/vp10/encoder/segmentation.c index f719467..f3fa210 100644 --- a/vp10/encoder/segmentation.c +++ b/vp10/encoder/segmentation.c
@@ -180,8 +180,7 @@ if (bsize == BLOCK_8X8) partition = PARTITION_NONE; else - partition = get_partition(cm->mi, cm->mi_stride, cm->mi_rows, cm->mi_cols, - mi_row, mi_col, bsize); + partition = get_partition(cm, mi_row, mi_col, bsize); switch (partition) { case PARTITION_NONE: count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count, @@ -328,13 +327,13 @@ mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride + tile_info.mi_col_start; for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end; - mi_row += MI_BLOCK_SIZE, mi_ptr += MI_BLOCK_SIZE * cm->mi_stride) { + mi_row += cm->mib_size, mi_ptr += cm->mib_size * cm->mi_stride) { MODE_INFO **mi = mi_ptr; for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end; - mi_col += MI_BLOCK_SIZE, mi += MI_BLOCK_SIZE) { + mi_col += cm->mib_size, mi += cm->mib_size) { count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, - mi_row, mi_col, BLOCK_LARGEST); + mi_row, mi_col, cm->sb_size); } } }
diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h index ea4df6e..6ba074d 100644 --- a/vp10/encoder/speed_features.h +++ b/vp10/encoder/speed_features.h
@@ -319,8 +319,8 @@ // Disable testing non square partitions. (eg 16x32) int use_square_partition_only; - // Sets min and max partition sizes for this 64x64 region based on the - // same 64x64 in last encoded frame, and the left and above neighbor. + // Sets min and max partition sizes for this superblock based on the + // same superblock in last encoded frame, and the left and above neighbor. AUTO_MIN_MAX_MODE auto_min_max_partition_size; // Ensures the rd based auto partition search will always // go down at least to the specified level.
diff --git a/vp10/encoder/subexp.c b/vp10/encoder/subexp.c index d944d01..6d9c45f 100644 --- a/vp10/encoder/subexp.c +++ b/vp10/encoder/subexp.c
@@ -7,7 +7,7 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#include "vpx_dsp/bitwriter.h" +#include "vp10/encoder/bitwriter.h" #include "vp10/common/common.h" #include "vp10/common/entropy.h" @@ -83,35 +83,35 @@ return update_bits[delp] << VP9_PROB_COST_SHIFT; } -static void encode_uniform(vpx_writer *w, int v) { +static void encode_uniform(vp10_writer *w, int v) { const int l = 8; const int m = (1 << l) - 190; if (v < m) { - vpx_write_literal(w, v, l - 1); + vp10_write_literal(w, v, l - 1); } else { - vpx_write_literal(w, m + ((v - m) >> 1), l - 1); - vpx_write_literal(w, (v - m) & 1, 1); + vp10_write_literal(w, m + ((v - m) >> 1), l - 1); + vp10_write_literal(w, (v - m) & 1, 1); } } -static INLINE int write_bit_gte(vpx_writer *w, int word, int test) { - vpx_write_literal(w, word >= test, 1); +static INLINE int write_bit_gte(vp10_writer *w, int word, int test) { + vp10_write_literal(w, word >= test, 1); return word >= test; } -static void encode_term_subexp(vpx_writer *w, int word) { +static void encode_term_subexp(vp10_writer *w, int word) { if (!write_bit_gte(w, word, 16)) { - vpx_write_literal(w, word, 4); + vp10_write_literal(w, word, 4); } else if (!write_bit_gte(w, word, 32)) { - vpx_write_literal(w, word - 16, 4); + vp10_write_literal(w, word - 16, 4); } else if (!write_bit_gte(w, word, 64)) { - vpx_write_literal(w, word - 32, 5); + vp10_write_literal(w, word - 32, 5); } else { encode_uniform(w, word - 64); } } -void vp10_write_prob_diff_update(vpx_writer *w, vpx_prob newp, vpx_prob oldp) { +void vp10_write_prob_diff_update(vp10_writer *w, vpx_prob newp, vpx_prob oldp) { const int delp = remap_prob(newp, oldp); encode_term_subexp(w, delp); } @@ -262,7 +262,7 @@ } #endif // CONFIG_ENTROPY -void vp10_cond_prob_diff_update(vpx_writer *w, vpx_prob *oldp, +void vp10_cond_prob_diff_update(vp10_writer *w, vpx_prob *oldp, const unsigned int ct[2]) { const vpx_prob upd = DIFF_UPDATE_PROB; vpx_prob newp = get_binary_prob(ct[0], ct[1]); @@ -270,11 +270,11 @@ upd); assert(newp >= 1); if (savings > 0) { - vpx_write(w, 1, upd); + vp10_write(w, 1, upd); vp10_write_prob_diff_update(w, newp, *oldp); *oldp = newp; } else { - vpx_write(w, 0, upd); + vp10_write(w, 0, upd); } }
diff --git a/vp10/encoder/subexp.h b/vp10/encoder/subexp.h index 0f9227c..756b499 100644 --- a/vp10/encoder/subexp.h +++ b/vp10/encoder/subexp.h
@@ -18,12 +18,12 @@ #include "vpx_dsp/prob.h" -struct vpx_writer; +struct vp10_writer; -void vp10_write_prob_diff_update(struct vpx_writer *w, +void vp10_write_prob_diff_update(struct vp10_writer *w, vpx_prob newp, vpx_prob oldp); -void vp10_cond_prob_diff_update(struct vpx_writer *w, vpx_prob *oldp, +void vp10_cond_prob_diff_update(struct vp10_writer *w, vpx_prob *oldp, const unsigned int ct[2]); int vp10_prob_diff_update_savings_search(const unsigned int *ct,
diff --git a/vp10/encoder/variance_tree.c b/vp10/encoder/variance_tree.c new file mode 100644 index 0000000..d11ef2d --- /dev/null +++ b/vp10/encoder/variance_tree.c
@@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp10/encoder/variance_tree.h" +#include "vp10/encoder/encoder.h" + + + +void vp10_setup_var_tree(struct VP10Common *cm, ThreadData *td) { + int i, j; +#if CONFIG_EXT_PARTITION + const int leaf_nodes = 1024; + const int tree_nodes = 1024 + 256 + 64 + 16 + 4 + 1; +#else + const int leaf_nodes = 256; + const int tree_nodes = 256 + 64 + 16 + 4 + 1; +#endif // CONFIG_EXT_PARTITION + int index = 0; + VAR_TREE *this_var; + int nodes; + + vpx_free(td->var_tree); + CHECK_MEM_ERROR(cm, td->var_tree, vpx_calloc(tree_nodes, + sizeof(*td->var_tree))); + + this_var = &td->var_tree[0]; + + // Sets up all the leaf nodes in the tree. + for (index = 0; index < leaf_nodes; ++index) { + VAR_TREE *const leaf = &td->var_tree[index]; + leaf->split[0] = NULL; + } + + // Each node has 4 leaf nodes, fill in the child pointers + // from leafs to the root. + for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) { + for (i = 0; i < nodes; ++i, ++index) { + VAR_TREE *const node = &td->var_tree[index]; + for (j = 0; j < 4; j++) + node->split[j] = this_var++; + } + } + + // Set up the root node for the largest superblock size + i = MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2; + td->var_root[i] = &td->var_tree[tree_nodes - 1]; + // Set up the root nodes for the rest of the possible superblock sizes + while (--i >= 0) { + td->var_root[i] = td->var_root[i+1]->split[0]; + } +} + +void vp10_free_var_tree(ThreadData *td) { + vpx_free(td->var_tree); + td->var_tree = NULL; +}
diff --git a/vp10/encoder/variance_tree.h b/vp10/encoder/variance_tree.h new file mode 100644 index 0000000..a10f7e7 --- /dev/null +++ b/vp10/encoder/variance_tree.h
@@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_ENCODER_VARIANCE_TREE_H_ +#define VP10_ENCODER_VARIANCE_TREE_H_ + +#include <assert.h> + +#include "./vpx_config.h" + +#include "vpx/vpx_integer.h" + +#include "vp10/common/enums.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP10Common; +struct ThreadData; + +typedef struct { + int64_t sum_square_error; + int64_t sum_error; + int log2_count; + int variance; +} var; + +typedef struct { + var none; + var horz[2]; + var vert[2]; +} partition_variance; + +typedef struct VAR_TREE { + int force_split; + partition_variance variances; + struct VAR_TREE *split[4]; + BLOCK_SIZE bsize; + const uint8_t *src; + const uint8_t *ref; + int src_stride; + int ref_stride; + int width; + int height; +#if CONFIG_VP9_HIGHBITDEPTH + int highbd; +#endif // CONFIG_VP9_HIGHBITDEPTH +} VAR_TREE; + +void vp10_setup_var_tree(struct VP10Common *cm, struct ThreadData *td); +void vp10_free_var_tree(struct ThreadData *td); + +// Set variance values given sum square error, sum error, count. +static INLINE void fill_variance(int64_t s2, int64_t s, int c, var *v) { + v->sum_square_error = s2; + v->sum_error = s; + v->log2_count = c; + v->variance = (int)(256 * (v->sum_square_error - + ((v->sum_error * v->sum_error) >> v->log2_count)) >> v->log2_count); +} + +static INLINE void sum_2_variances(const var *a, const var *b, var *r) { + assert(a->log2_count == b->log2_count); + fill_variance(a->sum_square_error + b->sum_square_error, + a->sum_error + b->sum_error, a->log2_count + 1, r); +} + +static INLINE void fill_variance_node(VAR_TREE *vt) { + sum_2_variances(&vt->split[0]->variances.none, + &vt->split[1]->variances.none, + &vt->variances.horz[0]); + sum_2_variances(&vt->split[2]->variances.none, + &vt->split[3]->variances.none, + &vt->variances.horz[1]); + sum_2_variances(&vt->split[0]->variances.none, + &vt->split[2]->variances.none, + &vt->variances.vert[0]); + sum_2_variances(&vt->split[1]->variances.none, + &vt->split[3]->variances.none, + &vt->variances.vert[1]); + sum_2_variances(&vt->variances.vert[0], + &vt->variances.vert[1], + &vt->variances.none); +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* VP10_ENCODER_VARIANCE_TREE_H_ */
diff --git a/vp10/vp10_cx_iface.c b/vp10/vp10_cx_iface.c index 047fcfb..0cad961 100644 --- a/vp10/vp10_cx_iface.c +++ b/vp10/vp10_cx_iface.c
@@ -49,40 +49,42 @@ int color_range; int render_width; int render_height; + vpx_superblock_size_t superblock_size; }; static struct vp10_extracfg default_extra_cfg = { - 0, // cpu_used - 1, // enable_auto_alt_ref - 0, // noise_sensitivity - 0, // sharpness - 0, // static_thresh + 0, // cpu_used + 1, // enable_auto_alt_ref + 0, // noise_sensitivity + 0, // sharpness + 0, // static_thresh #if CONFIG_EXT_TILE - 64, // tile_columns - 64, // tile_rows + UINT_MAX, // tile_columns + UINT_MAX, // tile_rows #else - 0, // tile_columns - 0, // tile_rows + 0, // tile_columns + 0, // tile_rows #endif // CONFIG_EXT_TILE - 7, // arnr_max_frames - 5, // arnr_strength - 0, // min_gf_interval; 0 -> default decision - 0, // max_gf_interval; 0 -> default decision - VP8_TUNE_PSNR, // tuning - 10, // cq_level - 0, // rc_max_intra_bitrate_pct - 0, // rc_max_inter_bitrate_pct - 0, // gf_cbr_boost_pct - 0, // lossless - 1, // frame_parallel_decoding_mode - NO_AQ, // aq_mode - 0, // frame_periodic_delta_q - VPX_BITS_8, // Bit depth - VP9E_CONTENT_DEFAULT, // content - VPX_CS_UNKNOWN, // color space - 0, // color range - 0, // render width - 0, // render height + 7, // arnr_max_frames + 5, // arnr_strength + 0, // min_gf_interval; 0 -> default decision + 0, // max_gf_interval; 0 -> default decision + VP8_TUNE_PSNR, // tuning + 10, // cq_level + 0, // rc_max_intra_bitrate_pct + 0, // rc_max_inter_bitrate_pct + 0, // gf_cbr_boost_pct + 0, // lossless + 1, // frame_parallel_decoding_mode + NO_AQ, // aq_mode + 0, // frame_periodic_delta_q + VPX_BITS_8, // Bit depth + VP9E_CONTENT_DEFAULT, // content + VPX_CS_UNKNOWN, // color space + 0, // color range + 0, // render width + 0, // render height + VPX_SUPERBLOCK_SIZE_DYNAMIC // superblock_size }; struct vpx_codec_alg_priv { @@ -199,12 +201,26 @@ RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2); RANGE_CHECK(extra_cfg, cpu_used, -8, 8); RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6); + RANGE_CHECK(extra_cfg, superblock_size, + VPX_SUPERBLOCK_SIZE_64X64, VPX_SUPERBLOCK_SIZE_DYNAMIC); #if CONFIG_EXT_TILE // TODO(any): Waring. If CONFIG_EXT_TILE is true, tile_columns really // means tile_width, and tile_rows really means tile_hight. The interface // should be sanitized. - RANGE_CHECK(extra_cfg, tile_columns, 1, 64); - RANGE_CHECK(extra_cfg, tile_rows, 1, 64); +#if CONFIG_EXT_PARTITION + if (extra_cfg->superblock_size != VPX_SUPERBLOCK_SIZE_64X64) { + if (extra_cfg->tile_columns != UINT_MAX) + RANGE_CHECK(extra_cfg, tile_columns, 1, 32); + if (extra_cfg->tile_rows != UINT_MAX) + RANGE_CHECK(extra_cfg, tile_rows, 1, 32); + } else +#endif // CONFIG_EXT_PARTITION + { + if (extra_cfg->tile_columns != UINT_MAX) + RANGE_CHECK(extra_cfg, tile_columns, 1, 64); + if (extra_cfg->tile_rows != UINT_MAX) + RANGE_CHECK(extra_cfg, tile_rows, 1, 64); + } #else RANGE_CHECK(extra_cfg, tile_columns, 0, 6); RANGE_CHECK(extra_cfg, tile_rows, 0, 2); @@ -416,8 +432,25 @@ oxcf->tuning = extra_cfg->tuning; oxcf->content = extra_cfg->content; +#if CONFIG_EXT_PARTITION + oxcf->superblock_size = extra_cfg->superblock_size; +#endif // CONFIG_EXT_PARTITION + +#if CONFIG_EXT_TILE + { +#if CONFIG_EXT_PARTITION + const unsigned int max = + extra_cfg->superblock_size == VPX_SUPERBLOCK_SIZE_64X64 ? 64 : 32; +#else + const unsigned int max = 64; +#endif // CONFIG_EXT_PARTITION + oxcf->tile_columns = VPXMIN(extra_cfg->tile_columns, max); + oxcf->tile_rows = VPXMIN(extra_cfg->tile_rows, max); + } +#else oxcf->tile_columns = extra_cfg->tile_columns; oxcf->tile_rows = extra_cfg->tile_rows; +#endif // CONFIG_EXT_TILE oxcf->error_resilient_mode = cfg->g_error_resilient; oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode; @@ -1247,6 +1280,13 @@ return update_extra_cfg(ctx, &extra_cfg); } +static vpx_codec_err_t ctrl_set_superblock_size(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp10_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.superblock_size = CAST(VP10E_SET_SUPERBLOCK_SIZE, args); + return update_extra_cfg(ctx, &extra_cfg); +} + static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP8_COPY_REFERENCE, ctrl_copy_reference}, {VP8E_USE_REFERENCE, ctrl_use_reference}, @@ -1283,6 +1323,7 @@ {VP9E_SET_MIN_GF_INTERVAL, ctrl_set_min_gf_interval}, {VP9E_SET_MAX_GF_INTERVAL, ctrl_set_max_gf_interval}, {VP9E_SET_RENDER_SIZE, ctrl_set_render_size}, + {VP10E_SET_SUPERBLOCK_SIZE, ctrl_set_superblock_size}, // Getters {VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer},
diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk index 34b766f..d174c8b 100644 --- a/vp10/vp10cx.mk +++ b/vp10/vp10cx.mk
@@ -21,6 +21,8 @@ VP10_CX_SRCS-yes += encoder/bitwriter.h VP10_CX_SRCS-yes += encoder/context_tree.c VP10_CX_SRCS-yes += encoder/context_tree.h +VP10_CX_SRCS-yes += encoder/variance_tree.c +VP10_CX_SRCS-yes += encoder/variance_tree.h VP10_CX_SRCS-yes += encoder/cost.h VP10_CX_SRCS-yes += encoder/cost.c VP10_CX_SRCS-yes += encoder/dct.c
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 5600ed4..9f8004b 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c
@@ -210,7 +210,7 @@ if (cm->reference_mode == REFERENCE_MODE_SELECT) { vpx_write(w, is_compound, vp9_get_reference_mode_prob(cm, xd)); } else { - assert(!is_compound == (cm->reference_mode == SINGLE_REFERENCE)); + assert((!is_compound) == (cm->reference_mode == SINGLE_REFERENCE)); } if (is_compound) {
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index d9764a4..1306481 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h
@@ -560,6 +560,15 @@ * Supported in codecs: VP9 */ VP9E_SET_RENDER_SIZE, + + /*!\brief Codec control function to set intended superblock size. + * + * By default, the superblock size is determined separately for each + * frame by the encoder. + * + * Supported in codecs: VP10 + */ + VP10E_SET_SUPERBLOCK_SIZE, }; /*!\brief vpx 1-D scaling mode @@ -820,6 +829,9 @@ */ #define VPX_CTRL_VP9E_SET_RENDER_SIZE VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *) + +VPX_CTRL_USE_TYPE(VP10E_SET_SUPERBLOCK_SIZE, unsigned int) +#define VPX_CTRL_VP10E_SET_SUPERBLOCK_SIZE /*!\endcond */ /*! @} - end defgroup vp8_encoder */ #ifdef __cplusplus
diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h index b6037bb..e65e3f4 100644 --- a/vpx/vpx_codec.h +++ b/vpx/vpx_codec.h
@@ -222,6 +222,18 @@ VPX_BITS_12 = 12, /**< 12 bits */ } vpx_bit_depth_t; + /*!\brief Superblock size selection. + * + * Defines the superblock size used for encoding. The superblock size can + * either be fixed at 64x64 or 128x128 pixels, or it can be dynamically + * selected by the encoder for each frame. + */ + typedef enum vpx_superblock_size { + VPX_SUPERBLOCK_SIZE_64X64, /**< Always use 64x64 superblocks. */ + VPX_SUPERBLOCK_SIZE_128X128, /**< Always use 128x128 superblocks. */ + VPX_SUPERBLOCK_SIZE_DYNAMIC /**< Select superblock size dynamically. */ + } vpx_superblock_size_t; + /* * Library Version Number Interface *
diff --git a/vpx_dsp/avg.c b/vpx_dsp/avg.c index 26fe785..d3695a9 100644 --- a/vpx_dsp/avg.c +++ b/vpx_dsp/avg.c
@@ -12,22 +12,22 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" -unsigned int vpx_avg_8x8_c(const uint8_t *s, int p) { +unsigned int vpx_avg_8x8_c(const uint8_t *src, int stride) { int i, j; int sum = 0; - for (i = 0; i < 8; ++i, s+=p) - for (j = 0; j < 8; sum += s[j], ++j) {} + for (i = 0; i < 8; ++i, src += stride) + for (j = 0; j < 8; sum += src[j], ++j) {} - return (sum + 32) >> 6; + return ROUND_POWER_OF_TWO(sum, 6); } -unsigned int vpx_avg_4x4_c(const uint8_t *s, int p) { +unsigned int vpx_avg_4x4_c(const uint8_t *src, int stride) { int i, j; int sum = 0; - for (i = 0; i < 4; ++i, s+=p) - for (j = 0; j < 4; sum += s[j], ++j) {} + for (i = 0; i < 4; ++i, src += stride) + for (j = 0; j < 4; sum += src[j], ++j) {} - return (sum + 8) >> 4; + return ROUND_POWER_OF_TWO(sum, 4); } // src_diff: first pass, 9 bit, dynamic range [-255, 255] @@ -176,14 +176,15 @@ return var; } -void vpx_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, +void vpx_minmax_8x8_c(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, int *min, int *max) { int i, j; *min = 255; *max = 0; - for (i = 0; i < 8; ++i, s += p, d += dp) { + for (i = 0; i < 8; ++i, src += src_stride, ref += ref_stride) { for (j = 0; j < 8; ++j) { - int diff = abs(s[j]-d[j]); + int diff = abs(src[j]-ref[j]); *min = diff < *min ? diff : *min; *max = diff > *max ? diff : *max; } @@ -191,24 +192,24 @@ } #if CONFIG_VP9_HIGHBITDEPTH -unsigned int vpx_highbd_avg_8x8_c(const uint8_t *s8, int p) { +unsigned int vpx_highbd_avg_8x8_c(const uint8_t *src, int stride) { int i, j; int sum = 0; - const uint16_t* s = CONVERT_TO_SHORTPTR(s8); - for (i = 0; i < 8; ++i, s+=p) + const uint16_t* s = CONVERT_TO_SHORTPTR(src); + for (i = 0; i < 8; ++i, s += stride) for (j = 0; j < 8; sum += s[j], ++j) {} - return (sum + 32) >> 6; + return ROUND_POWER_OF_TWO(sum, 6); } -unsigned int vpx_highbd_avg_4x4_c(const uint8_t *s8, int p) { +unsigned int vpx_highbd_avg_4x4_c(const uint8_t *src, int stride) { int i, j; int sum = 0; - const uint16_t* s = CONVERT_TO_SHORTPTR(s8); - for (i = 0; i < 4; ++i, s+=p) + const uint16_t* s = CONVERT_TO_SHORTPTR(src); + for (i = 0; i < 4; ++i, s+=stride) for (j = 0; j < 4; sum += s[j], ++j) {} - return (sum + 8) >> 4; + return ROUND_POWER_OF_TWO(sum, 4); } void vpx_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8,
diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c index 24f42df..e6be1dd 100644 --- a/vpx_dsp/variance.c +++ b/vpx_dsp/variance.c
@@ -433,7 +433,7 @@ return *sse; \ } -static void highbd_var_filter_block2d_bil_first_pass( +void vpx_highbd_var_filter_block2d_bil_first_pass( const uint8_t *src_ptr8, uint16_t *output_ptr, unsigned int src_pixels_per_line, @@ -459,7 +459,7 @@ } } -static void highbd_var_filter_block2d_bil_second_pass( +void vpx_highbd_var_filter_block2d_bil_second_pass( const uint16_t *src_ptr, uint16_t *output_ptr, unsigned int src_pixels_per_line, @@ -492,13 +492,14 @@ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ - dst_stride, sse); \ + dst_stride, sse); \ } \ \ uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \ @@ -509,10 +510,11 @@ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, sse); \ @@ -526,10 +528,11 @@ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, sse); \ @@ -546,16 +549,17 @@ uint16_t temp2[H * W]; \ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ - vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ - CONVERT_TO_BYTEPTR(temp2), W); \ + vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \ + CONVERT_TO_BYTEPTR(temp2), W); \ \ return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ - dst_stride, sse); \ + dst_stride, sse); \ } \ \ uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ @@ -568,13 +572,14 @@ uint16_t temp2[H * W]; \ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ - vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ - CONVERT_TO_BYTEPTR(temp2), W); \ + vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \ + CONVERT_TO_BYTEPTR(temp2), W); \ \ return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ W, dst, dst_stride, sse); \ @@ -590,13 +595,14 @@ uint16_t temp2[H * W]; \ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ - vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ - CONVERT_TO_BYTEPTR(temp2), W); \ + vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \ + CONVERT_TO_BYTEPTR(temp2), W); \ \ return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ W, dst, dst_stride, sse); \ @@ -635,9 +641,9 @@ HIGHBD_MSE(8, 16) HIGHBD_MSE(8, 8) -void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, - int width, int height, const uint8_t *ref8, - int ref_stride) { +void vpx_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8, + int width, int height, const uint8_t *ref8, + int ref_stride) { int i, j; uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); @@ -914,11 +920,11 @@ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ - H + 1, W, \ - bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ + H + 1, W, \ + bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, \ @@ -934,11 +940,11 @@ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ - H + 1, W, \ - bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ + H + 1, W, \ + bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_10_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, \ @@ -954,11 +960,11 @@ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ - H + 1, W, \ - bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ + H + 1, W, \ + bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_12_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, \
diff --git a/vpx_dsp/variance.h b/vpx_dsp/variance.h index 161d647..dea2af9 100644 --- a/vpx_dsp/variance.h +++ b/vpx_dsp/variance.h
@@ -23,10 +23,10 @@ #define FILTER_WEIGHT 128 typedef unsigned int(*vpx_sad_fn_t)(const uint8_t *a, int a_stride, - const uint8_t *b_ptr, int b_stride); + const uint8_t *b, int b_stride); -typedef unsigned int(*vpx_sad_avg_fn_t)(const uint8_t *a_ptr, int a_stride, - const uint8_t *b_ptr, int b_stride, +typedef unsigned int(*vpx_sad_avg_fn_t)(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, const uint8_t *second_pred); typedef void (*vp8_copy32xn_fn_t)(const uint8_t *a, int a_stride, @@ -50,10 +50,10 @@ const uint8_t *b, int b_stride, unsigned int *sse); -typedef unsigned int (*vpx_subp_avg_variance_fn_t)(const uint8_t *a_ptr, +typedef unsigned int (*vpx_subp_avg_variance_fn_t)(const uint8_t *a, int a_stride, int xoffset, int yoffset, - const uint8_t *b_ptr, + const uint8_t *b, int b_stride, unsigned int *sse, const uint8_t *second_pred); @@ -75,26 +75,25 @@ #endif // CONFIG_VP8 #if CONFIG_VP10 && CONFIG_EXT_INTER -typedef unsigned int(*vpx_masked_sad_fn_t)(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, +typedef unsigned int(*vpx_masked_sad_fn_t)(const uint8_t *src, + int src_stride, + const uint8_t *ref, int ref_stride, const uint8_t *msk_ptr, int msk_stride); -typedef unsigned int (*vpx_masked_variance_fn_t)(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, +typedef unsigned int (*vpx_masked_variance_fn_t)(const uint8_t *src, + int src_stride, + const uint8_t *ref, int ref_stride, - const uint8_t *msk_ptr, + const uint8_t *msk, int msk_stride, unsigned int *sse); -typedef unsigned int (*vpx_masked_subpixvariance_fn_t)(const uint8_t *src_ptr, - int source_stride, - int xoffset, - int yoffset, - const uint8_t *ref_ptr, - int Refstride, - const uint8_t *msk_ptr, +typedef unsigned int (*vpx_masked_subpixvariance_fn_t)(const uint8_t *src, + int src_stride, + int xoffset, int yoffset, + const uint8_t *ref, + int ref_stride, + const uint8_t *msk, int msk_stride, unsigned int *sse); #endif // CONFIG_VP10 && CONFIG_EXT_INTER @@ -130,6 +129,24 @@ } vp10_variance_fn_ptr_t; #endif // CONFIG_VP10 +void vpx_highbd_var_filter_block2d_bil_first_pass( + const uint8_t *src_ptr8, + uint16_t *output_ptr, + unsigned int src_pixels_per_line, + int pixel_step, + unsigned int output_height, + unsigned int output_width, + const uint8_t *filter); + +void vpx_highbd_var_filter_block2d_bil_second_pass( + const uint16_t *src_ptr, + uint16_t *output_ptr, + unsigned int src_pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const uint8_t *filter); + #ifdef __cplusplus } // extern "C" #endif
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index e371849..46ef5fc 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk
@@ -266,6 +266,11 @@ endif endif +# high bit depth subtract +ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) +DSP_SRCS-$(HAVE_SSE2) += x86/highbd_subtract_sse2.c +endif + endif # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER ifeq ($(CONFIG_VP10_ENCODER),yes) @@ -350,6 +355,7 @@ ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_sse2.c +DSP_SRCS-$(HAVE_SSE4_1) += x86/highbd_variance_sse4.c DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_impl_sse2.asm ifeq ($(CONFIG_USE_X86INC),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_subpel_variance_impl_sse2.asm
diff --git a/vpx_dsp/vpx_dsp_common.h b/vpx_dsp/vpx_dsp_common.h index e127031..3571eea 100644 --- a/vpx_dsp/vpx_dsp_common.h +++ b/vpx_dsp/vpx_dsp_common.h
@@ -30,6 +30,8 @@ #define VPXMIN(x, y) (((x) < (y)) ? (x) : (y)) #define VPXMAX(x, y) (((x) > (y)) ? (x) : (y)) +#define IMPLIES(a, b) (!(a) || (b)) // Logical 'a implies b' (or 'a -> b') + // These can be used to give a hint about branch outcomes. // This can have an effect, even if your target processor has a // good branch predictor, as these hints can affect basic block
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index d01e81d..a648e45 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -965,10 +965,6 @@ # add_proto qw/void vpx_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; specialize qw/vpx_subtract_block neon msa/, "$sse2_x86inc"; -if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { - add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd"; - specialize qw/vpx_highbd_subtract_block/; -} if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") { # @@ -991,6 +987,8 @@ specialize qw/vpx_highbd_avg_8x8/; add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *, int p"; specialize qw/vpx_highbd_avg_4x4/; + add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd"; + specialize qw/vpx_highbd_subtract_block sse2/; } # @@ -1316,10 +1314,17 @@ if ($w != 128 && $h != 128 && $w != 4 && $h != 4) { specialize "vpx_highbd_${bd}_variance${w}x${h}", "sse2"; } + if ($w == 4 && $h == 4) { + specialize "vpx_highbd_${bd}_variance${w}x${h}", "sse4_1"; + } if ($w != 128 && $h != 128 && $w != 4) { specialize "vpx_highbd_${bd}_sub_pixel_variance${w}x${h}", $sse2_x86inc; specialize "vpx_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", $sse2_x86inc; } + if ($w == 4 && $h == 4) { + specialize "vpx_highbd_${bd}_sub_pixel_variance${w}x${h}", "sse4_1"; + specialize "vpx_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "sse4_1"; + } } } } # CONFIG_VP9_HIGHBITDEPTH
diff --git a/vpx_dsp/x86/highbd_subtract_sse2.c b/vpx_dsp/x86/highbd_subtract_sse2.c new file mode 100644 index 0000000..33e464b --- /dev/null +++ b/vpx_dsp/x86/highbd_subtract_sse2.c
@@ -0,0 +1,366 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> +#include <emmintrin.h> +#include <stddef.h> + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" + +typedef void (*SubtractWxHFuncType)( + int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride); + +static void subtract_4x4(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + __m128i u0, u1, u2, u3; + __m128i v0, v1, v2, v3; + __m128i x0, x1, x2, x3; + int64_t *store_diff = (int64_t *) (diff + 0 * diff_stride); + + u0 = _mm_loadu_si128((__m128i const *) (src + 0 * src_stride)); + u1 = _mm_loadu_si128((__m128i const *) (src + 1 * src_stride)); + u2 = _mm_loadu_si128((__m128i const *) (src + 2 * src_stride)); + u3 = _mm_loadu_si128((__m128i const *) (src + 3 * src_stride)); + + v0 = _mm_loadu_si128((__m128i const *) (pred + 0 * pred_stride)); + v1 = _mm_loadu_si128((__m128i const *) (pred + 1 * pred_stride)); + v2 = _mm_loadu_si128((__m128i const *) (pred + 2 * pred_stride)); + v3 = _mm_loadu_si128((__m128i const *) (pred + 3 * pred_stride)); + + x0 = _mm_sub_epi16(u0, v0); + x1 = _mm_sub_epi16(u1, v1); + x2 = _mm_sub_epi16(u2, v2); + x3 = _mm_sub_epi16(u3, v3); + + _mm_storel_epi64((__m128i *)store_diff, x0); + store_diff = (int64_t *) (diff + 1 * diff_stride); + _mm_storel_epi64((__m128i *)store_diff, x1); + store_diff = (int64_t *) (diff + 2 * diff_stride); + _mm_storel_epi64((__m128i *)store_diff, x2); + store_diff = (int64_t *) (diff + 3 * diff_stride); + _mm_storel_epi64((__m128i *)store_diff, x3); +} + +static void subtract_4x8(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + __m128i u0, u1, u2, u3, u4, u5, u6, u7; + __m128i v0, v1, v2, v3, v4, v5, v6, v7; + __m128i x0, x1, x2, x3, x4, x5, x6, x7; + int64_t *store_diff = (int64_t *) (diff + 0 * diff_stride); + + u0 = _mm_loadu_si128((__m128i const *) (src + 0 * src_stride)); + u1 = _mm_loadu_si128((__m128i const *) (src + 1 * src_stride)); + u2 = _mm_loadu_si128((__m128i const *) (src + 2 * src_stride)); + u3 = _mm_loadu_si128((__m128i const *) (src + 3 * src_stride)); + u4 = _mm_loadu_si128((__m128i const *) (src + 4 * src_stride)); + u5 = _mm_loadu_si128((__m128i const *) (src + 5 * src_stride)); + u6 = _mm_loadu_si128((__m128i const *) (src + 6 * src_stride)); + u7 = _mm_loadu_si128((__m128i const *) (src + 7 * src_stride)); + + v0 = _mm_loadu_si128((__m128i const *) (pred + 0 * pred_stride)); + v1 = _mm_loadu_si128((__m128i const *) (pred + 1 * pred_stride)); + v2 = _mm_loadu_si128((__m128i const *) (pred + 2 * pred_stride)); + v3 = _mm_loadu_si128((__m128i const *) (pred + 3 * pred_stride)); + v4 = _mm_loadu_si128((__m128i const *) (pred + 4 * pred_stride)); + v5 = _mm_loadu_si128((__m128i const *) (pred + 5 * pred_stride)); + v6 = _mm_loadu_si128((__m128i const *) (pred + 6 * pred_stride)); + v7 = _mm_loadu_si128((__m128i const *) (pred + 7 * pred_stride)); + + x0 = _mm_sub_epi16(u0, v0); + x1 = _mm_sub_epi16(u1, v1); + x2 = _mm_sub_epi16(u2, v2); + x3 = _mm_sub_epi16(u3, v3); + x4 = _mm_sub_epi16(u4, v4); + x5 = _mm_sub_epi16(u5, v5); + x6 = _mm_sub_epi16(u6, v6); + x7 = _mm_sub_epi16(u7, v7); + + _mm_storel_epi64((__m128i *)store_diff, x0); + store_diff = (int64_t *) (diff + 1 * diff_stride); + _mm_storel_epi64((__m128i *)store_diff, x1); + store_diff = (int64_t *) (diff + 2 * diff_stride); + _mm_storel_epi64((__m128i *)store_diff, x2); + store_diff = (int64_t *) (diff + 3 * diff_stride); + _mm_storel_epi64((__m128i *)store_diff, x3); + store_diff = (int64_t *) (diff + 4 * diff_stride); + _mm_storel_epi64((__m128i *)store_diff, x4); + store_diff = (int64_t *) (diff + 5 * diff_stride); + _mm_storel_epi64((__m128i *)store_diff, x5); + store_diff = (int64_t *) (diff + 6 * diff_stride); + _mm_storel_epi64((__m128i *)store_diff, x6); + store_diff = (int64_t *) (diff + 7 * diff_stride); + _mm_storel_epi64((__m128i *)store_diff, x7); +} + +static void subtract_8x4(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + __m128i u0, u1, u2, u3; + __m128i v0, v1, v2, v3; + __m128i x0, x1, x2, x3; + + u0 = _mm_loadu_si128((__m128i const *) (src + 0 * src_stride)); + u1 = _mm_loadu_si128((__m128i const *) (src + 1 * src_stride)); + u2 = _mm_loadu_si128((__m128i const *) (src + 2 * src_stride)); + u3 = _mm_loadu_si128((__m128i const *) (src + 3 * src_stride)); + + v0 = _mm_loadu_si128((__m128i const *) (pred + 0 * pred_stride)); + v1 = _mm_loadu_si128((__m128i const *) (pred + 1 * pred_stride)); + v2 = _mm_loadu_si128((__m128i const *) (pred + 2 * pred_stride)); + v3 = _mm_loadu_si128((__m128i const *) (pred + 3 * pred_stride)); + + x0 = _mm_sub_epi16(u0, v0); + x1 = _mm_sub_epi16(u1, v1); + x2 = _mm_sub_epi16(u2, v2); + x3 = _mm_sub_epi16(u3, v3); + + _mm_storeu_si128((__m128i *) (diff + 0 * diff_stride), x0); + _mm_storeu_si128((__m128i *) (diff + 1 * diff_stride), x1); + _mm_storeu_si128((__m128i *) (diff + 2 * diff_stride), x2); + _mm_storeu_si128((__m128i *) (diff + 3 * diff_stride), x3); +} + +static void subtract_8x8(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + __m128i u0, u1, u2, u3, u4, u5, u6, u7; + __m128i v0, v1, v2, v3, v4, v5, v6, v7; + __m128i x0, x1, x2, x3, x4, x5, x6, x7; + + u0 = _mm_loadu_si128((__m128i const *) (src + 0 * src_stride)); + u1 = _mm_loadu_si128((__m128i const *) (src + 1 * src_stride)); + u2 = _mm_loadu_si128((__m128i const *) (src + 2 * src_stride)); + u3 = _mm_loadu_si128((__m128i const *) (src + 3 * src_stride)); + u4 = _mm_loadu_si128((__m128i const *) (src + 4 * src_stride)); + u5 = _mm_loadu_si128((__m128i const *) (src + 5 * src_stride)); + u6 = _mm_loadu_si128((__m128i const *) (src + 6 * src_stride)); + u7 = _mm_loadu_si128((__m128i const *) (src + 7 * src_stride)); + + v0 = _mm_loadu_si128((__m128i const *) (pred + 0 * pred_stride)); + v1 = _mm_loadu_si128((__m128i const *) (pred + 1 * pred_stride)); + v2 = _mm_loadu_si128((__m128i const *) (pred + 2 * pred_stride)); + v3 = _mm_loadu_si128((__m128i const *) (pred + 3 * pred_stride)); + v4 = _mm_loadu_si128((__m128i const *) (pred + 4 * pred_stride)); + v5 = _mm_loadu_si128((__m128i const *) (pred + 5 * pred_stride)); + v6 = _mm_loadu_si128((__m128i const *) (pred + 6 * pred_stride)); + v7 = _mm_loadu_si128((__m128i const *) (pred + 7 * pred_stride)); + + x0 = _mm_sub_epi16(u0, v0); + x1 = _mm_sub_epi16(u1, v1); + x2 = _mm_sub_epi16(u2, v2); + x3 = _mm_sub_epi16(u3, v3); + x4 = _mm_sub_epi16(u4, v4); + x5 = _mm_sub_epi16(u5, v5); + x6 = _mm_sub_epi16(u6, v6); + x7 = _mm_sub_epi16(u7, v7); + + _mm_storeu_si128((__m128i *) (diff + 0 * diff_stride), x0); + _mm_storeu_si128((__m128i *) (diff + 1 * diff_stride), x1); + _mm_storeu_si128((__m128i *) (diff + 2 * diff_stride), x2); + _mm_storeu_si128((__m128i *) (diff + 3 * diff_stride), x3); + _mm_storeu_si128((__m128i *) (diff + 4 * diff_stride), x4); + _mm_storeu_si128((__m128i *) (diff + 5 * diff_stride), x5); + _mm_storeu_si128((__m128i *) (diff + 6 * diff_stride), x6); + _mm_storeu_si128((__m128i *) (diff + 7 * diff_stride), x7); +} + +static void subtract_8x16(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + subtract_8x8(diff, diff_stride, src, src_stride, pred, pred_stride); + diff += diff_stride << 3; + src += src_stride << 3; + pred += pred_stride << 3; + subtract_8x8(diff, diff_stride, src, src_stride, pred, pred_stride); +} + +static void subtract_16x8(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + subtract_8x8(diff, diff_stride, src, src_stride, pred, pred_stride); + diff += 8; + src += 8; + pred += 8; + subtract_8x8(diff, diff_stride, src, src_stride, pred, pred_stride); +} + +static void subtract_16x16(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + subtract_16x8(diff, diff_stride, src, src_stride, pred, pred_stride); + diff += diff_stride << 3; + src += src_stride << 3; + pred += pred_stride << 3; + subtract_16x8(diff, diff_stride, src, src_stride, pred, pred_stride); +} + +static void subtract_16x32(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + subtract_16x16(diff, diff_stride, src, src_stride, pred, pred_stride); + diff += diff_stride << 4; + src += src_stride << 4; + pred += pred_stride << 4; + subtract_16x16(diff, diff_stride, src, src_stride, pred, pred_stride); +} + +static void subtract_32x16(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + subtract_16x16(diff, diff_stride, src, src_stride, pred, pred_stride); + diff += 16; + src += 16; + pred += 16; + subtract_16x16(diff, diff_stride, src, src_stride, pred, pred_stride); +} + +static void subtract_32x32(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + subtract_32x16(diff, diff_stride, src, src_stride, pred, pred_stride); + diff += diff_stride << 4; + src += src_stride << 4; + pred += pred_stride << 4; + subtract_32x16(diff, diff_stride, src, src_stride, pred, pred_stride); +} + +static void subtract_32x64(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + subtract_32x32(diff, diff_stride, src, src_stride, pred, pred_stride); + diff += diff_stride << 5; + src += src_stride << 5; + pred += pred_stride << 5; + subtract_32x32(diff, diff_stride, src, src_stride, pred, pred_stride); +} + +static void subtract_64x32(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + subtract_32x32(diff, diff_stride, src, src_stride, pred, pred_stride); + diff += 32; + src += 32; + pred += 32; + subtract_32x32(diff, diff_stride, src, src_stride, pred, pred_stride); +} + +static void subtract_64x64(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + subtract_64x32(diff, diff_stride, src, src_stride, pred, pred_stride); + diff += diff_stride << 5; + src += src_stride << 5; + pred += pred_stride << 5; + subtract_64x32(diff, diff_stride, src, src_stride, pred, pred_stride); +} + +static void subtract_64x128(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + subtract_64x64(diff, diff_stride, src, src_stride, pred, pred_stride); + diff += diff_stride << 6; + src += src_stride << 6; + pred += pred_stride << 6; + subtract_64x64(diff, diff_stride, src, src_stride, pred, pred_stride); +} + +static void subtract_128x64(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + subtract_64x64(diff, diff_stride, src, src_stride, pred, pred_stride); + diff += 64; + src += 64; + pred += 64; + subtract_64x64(diff, diff_stride, src, src_stride, pred, pred_stride); +} + +static void subtract_128x128(int16_t *diff, ptrdiff_t diff_stride, + const uint16_t *src, ptrdiff_t src_stride, + const uint16_t *pred, ptrdiff_t pred_stride) { + subtract_128x64(diff, diff_stride, src, src_stride, pred, pred_stride); + diff += diff_stride << 6; + src += src_stride << 6; + pred += pred_stride << 6; + subtract_128x64(diff, diff_stride, src, src_stride, pred, pred_stride); +} + +static SubtractWxHFuncType getSubtractFunc(int rows, int cols) { + SubtractWxHFuncType ret_func_ptr = NULL; + if (rows == 4) { + if (cols == 4) { + ret_func_ptr = subtract_4x4; + } else if (cols == 8) { + ret_func_ptr = subtract_8x4; + } + } else if (rows == 8) { + if (cols == 4) { + ret_func_ptr = subtract_4x8; + } else if (cols == 8) { + ret_func_ptr = subtract_8x8; + } else if (cols == 16) { + ret_func_ptr = subtract_16x8; + } + } else if (rows == 16) { + if (cols == 8) { + ret_func_ptr = subtract_8x16; + } else if (cols == 16) { + ret_func_ptr = subtract_16x16; + } else if (cols == 32) { + ret_func_ptr = subtract_32x16; + } + } else if (rows == 32) { + if (cols == 16) { + ret_func_ptr = subtract_16x32; + } else if (cols == 32) { + ret_func_ptr = subtract_32x32; + } else if (cols == 64) { + ret_func_ptr = subtract_64x32; + } + } else if (rows == 64) { + if (cols == 32) { + ret_func_ptr = subtract_32x64; + } else if (cols == 64) { + ret_func_ptr = subtract_64x64; + } else if (cols == 128) { + ret_func_ptr = subtract_128x64; + } + } else if (rows == 128) { + if (cols == 64) { + ret_func_ptr = subtract_64x128; + } else if (cols == 128) { + ret_func_ptr = subtract_128x128; + } + } + if (!ret_func_ptr) { + assert(0); + } + return ret_func_ptr; +} + +void vpx_highbd_subtract_block_sse2( + int rows, int cols, + int16_t *diff, ptrdiff_t diff_stride, + const uint8_t *src8, ptrdiff_t src_stride, + const uint8_t *pred8, + ptrdiff_t pred_stride, + int bd) { + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); + SubtractWxHFuncType func; + (void) bd; + + func = getSubtractFunc(rows, cols); + func(diff, diff_stride, src, src_stride, pred, pred_stride); +}
diff --git a/vpx_dsp/x86/highbd_variance_sse4.c b/vpx_dsp/x86/highbd_variance_sse4.c new file mode 100644 index 0000000..5c1dfe4 --- /dev/null +++ b/vpx_dsp/x86/highbd_variance_sse4.c
@@ -0,0 +1,248 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <smmintrin.h> /* SSE4.1 */ + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" + +#include "vpx_dsp/variance.h" +#include "vpx_dsp/vpx_filter.h" + +static INLINE void variance4x4_64_sse4_1(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + uint64_t *sse, int64_t *sum) { + __m128i u0, u1, u2, u3; + __m128i s0, s1, s2, s3; + __m128i t0, t1, x0, y0; + __m128i a0, a1, a2, a3; + __m128i b0, b1, b2, b3; + __m128i k_one_epi16 = _mm_set1_epi16((int16_t)1); + + uint16_t *a = CONVERT_TO_SHORTPTR(a8); + uint16_t *b = CONVERT_TO_SHORTPTR(b8); + + a0 = _mm_loadu_si128((__m128i const *) (a + 0 * a_stride)); + a1 = _mm_loadu_si128((__m128i const *) (a + 1 * a_stride)); + a2 = _mm_loadu_si128((__m128i const *) (a + 2 * a_stride)); + a3 = _mm_loadu_si128((__m128i const *) (a + 3 * a_stride)); + + b0 = _mm_loadu_si128((__m128i const *) (b + 0 * b_stride)); + b1 = _mm_loadu_si128((__m128i const *) (b + 1 * b_stride)); + b2 = _mm_loadu_si128((__m128i const *) (b + 2 * b_stride)); + b3 = _mm_loadu_si128((__m128i const *) (b + 3 * b_stride)); + + u0 = _mm_unpacklo_epi16(a0, a1); + u1 = _mm_unpacklo_epi16(a2, a3); + u2 = _mm_unpacklo_epi16(b0, b1); + u3 = _mm_unpacklo_epi16(b2, b3); + + s0 = _mm_sub_epi16(u0, u2); + s1 = _mm_sub_epi16(u1, u3); + + t0 = _mm_madd_epi16(s0, k_one_epi16); + t1 = _mm_madd_epi16(s1, k_one_epi16); + + s2 = _mm_hadd_epi32(t0, t1); + s3 = _mm_hadd_epi32(s2, s2); + y0 = _mm_hadd_epi32(s3, s3); + + t0 = _mm_madd_epi16(s0, s0); + t1 = _mm_madd_epi16(s1, s1); + + s2 = _mm_hadd_epi32(t0, t1); + s3 = _mm_hadd_epi32(s2, s2); + x0 = _mm_hadd_epi32(s3, s3); + + *sse = (uint64_t)_mm_extract_epi32(x0, 0); + *sum = (int64_t)_mm_extract_epi32(y0, 0); +} + +uint32_t vpx_highbd_8_variance4x4_sse4_1(const uint8_t *a, + int a_stride, + const uint8_t *b, + int b_stride, + uint32_t *sse) { + int64_t sum; + uint64_t local_sse; + + variance4x4_64_sse4_1(a, a_stride, b, b_stride, &local_sse, &sum); + *sse = (uint32_t)local_sse; + + return *sse - ((sum * sum) >> 4); +} + +uint32_t vpx_highbd_10_variance4x4_sse4_1(const uint8_t *a, + int a_stride, + const uint8_t *b, + int b_stride, + uint32_t *sse) { + int64_t sum; + uint64_t local_sse; + + variance4x4_64_sse4_1(a, a_stride, b, b_stride, &local_sse, &sum); + *sse = (uint32_t)ROUND_POWER_OF_TWO(local_sse, 4); + sum = ROUND_POWER_OF_TWO(sum, 2); + + return *sse - ((sum * sum) >> 4); +} + +uint32_t vpx_highbd_12_variance4x4_sse4_1(const uint8_t *a, + int a_stride, + const uint8_t *b, + int b_stride, + uint32_t *sse) { + int64_t sum; + uint64_t local_sse; + + variance4x4_64_sse4_1(a, a_stride, b, b_stride, &local_sse, &sum); + *sse = (uint32_t)ROUND_POWER_OF_TWO(local_sse, 8); + sum = ROUND_POWER_OF_TWO(sum, 4); + + return *sse - ((sum * sum) >> 4); +} + +// Sub-pixel +uint32_t vpx_highbd_8_sub_pixel_variance4x4_sse4_1( + const uint8_t *src, int src_stride, + int xoffset, int yoffset, + const uint8_t *dst, int dst_stride, + uint32_t *sse) { + + uint16_t fdata3[(4 + 1) * 4]; + uint16_t temp2[4 * 4]; + + vpx_highbd_var_filter_block2d_bil_first_pass( + src, fdata3, src_stride, 1, 4 + 1, + 4, bilinear_filters_2t[xoffset]); + vpx_highbd_var_filter_block2d_bil_second_pass( + fdata3, temp2, 4, 4, 4, 4, + bilinear_filters_2t[yoffset]); + + return vpx_highbd_8_variance4x4(CONVERT_TO_BYTEPTR(temp2), + 4, dst, dst_stride, sse); +} + +uint32_t vpx_highbd_10_sub_pixel_variance4x4_sse4_1( + const uint8_t *src, int src_stride, + int xoffset, int yoffset, + const uint8_t *dst, int dst_stride, + uint32_t *sse) { + + uint16_t fdata3[(4 + 1) * 4]; + uint16_t temp2[4 * 4]; + + vpx_highbd_var_filter_block2d_bil_first_pass( + src, fdata3, src_stride, 1, 4 + 1, + 4, bilinear_filters_2t[xoffset]); + vpx_highbd_var_filter_block2d_bil_second_pass( + fdata3, temp2, 4, 4, 4, 4, + bilinear_filters_2t[yoffset]); + + return vpx_highbd_10_variance4x4(CONVERT_TO_BYTEPTR(temp2), + 4, dst, dst_stride, sse); +} + +uint32_t vpx_highbd_12_sub_pixel_variance4x4_sse4_1( + const uint8_t *src, int src_stride, + int xoffset, int yoffset, + const uint8_t *dst, int dst_stride, + uint32_t *sse) { + + uint16_t fdata3[(4 + 1) * 4]; + uint16_t temp2[4 * 4]; + + vpx_highbd_var_filter_block2d_bil_first_pass( + src, fdata3, src_stride, 1, 4 + 1, + 4, bilinear_filters_2t[xoffset]); + vpx_highbd_var_filter_block2d_bil_second_pass( + fdata3, temp2, 4, 4, 4, 4, + bilinear_filters_2t[yoffset]); + + return vpx_highbd_12_variance4x4(CONVERT_TO_BYTEPTR(temp2), + 4, dst, dst_stride, sse); +} + +// Sub-pixel average + +uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4_sse4_1( + const uint8_t *src, int src_stride, + int xoffset, int yoffset, + const uint8_t *dst, int dst_stride, + uint32_t *sse, + const uint8_t *second_pred) { + + uint16_t fdata3[(4 + 1) * 4]; + uint16_t temp2[4 * 4]; + DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]); + + vpx_highbd_var_filter_block2d_bil_first_pass( + src, fdata3, src_stride, 1, 4 + 1, + 4, bilinear_filters_2t[xoffset]); + vpx_highbd_var_filter_block2d_bil_second_pass( + fdata3, temp2, 4, 4, 4, 4, + bilinear_filters_2t[yoffset]); + + vpx_highbd_comp_avg_pred(temp3, second_pred, 4, 4, + CONVERT_TO_BYTEPTR(temp2), 4); + + return vpx_highbd_8_variance4x4(CONVERT_TO_BYTEPTR(temp3), + 4, dst, dst_stride, sse); +} + +uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4_sse4_1( + const uint8_t *src, int src_stride, + int xoffset, int yoffset, + const uint8_t *dst, int dst_stride, + uint32_t *sse, + const uint8_t *second_pred) { + + uint16_t fdata3[(4 + 1) * 4]; + uint16_t temp2[4 * 4]; + DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]); + + vpx_highbd_var_filter_block2d_bil_first_pass( + src, fdata3, src_stride, 1, 4 + 1, + 4, bilinear_filters_2t[xoffset]); + vpx_highbd_var_filter_block2d_bil_second_pass( + fdata3, temp2, 4, 4, 4, 4, + bilinear_filters_2t[yoffset]); + + vpx_highbd_comp_avg_pred(temp3, second_pred, 4, 4, + CONVERT_TO_BYTEPTR(temp2), 4); + + return vpx_highbd_10_variance4x4(CONVERT_TO_BYTEPTR(temp3), + 4, dst, dst_stride, sse); +} + +uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4_sse4_1( + const uint8_t *src, int src_stride, + int xoffset, int yoffset, + const uint8_t *dst, int dst_stride, + uint32_t *sse, + const uint8_t *second_pred) { + + uint16_t fdata3[(4 + 1) * 4]; + uint16_t temp2[4 * 4]; + DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]); + + vpx_highbd_var_filter_block2d_bil_first_pass( + src, fdata3, src_stride, 1, 4 + 1, + 4, bilinear_filters_2t[xoffset]); + vpx_highbd_var_filter_block2d_bil_second_pass( + fdata3, temp2, 4, 4, 4, 4, + bilinear_filters_2t[yoffset]); + + vpx_highbd_comp_avg_pred(temp3, second_pred, 4, 4, + CONVERT_TO_BYTEPTR(temp2), 4); + + return vpx_highbd_12_variance4x4(CONVERT_TO_BYTEPTR(temp3), + 4, dst, dst_stride, sse); +}
diff --git a/vpx_ports/msvc.h b/vpx_ports/msvc.h index cab7740..d6b8503 100644 --- a/vpx_ports/msvc.h +++ b/vpx_ports/msvc.h
@@ -26,6 +26,20 @@ else return floor(x + 0.5); } + +static INLINE float roundf(float x) { + if (x < 0) + return (float)ceil(x - 0.5f); + else + return (float)floor(x + 0.5f); +} + +static INLINE long lroundf(float x) { + if (x < 0) + return (long)(x - 0.5f); + else + return (long)(x + 0.5f); +} #endif // _MSC_VER < 1800 #endif // _MSC_VER
diff --git a/vpxenc.c b/vpxenc.c index 5e14934..ca29816 100644 --- a/vpxenc.c +++ b/vpxenc.c
@@ -476,6 +476,17 @@ #endif #if CONFIG_VP10_ENCODER +#if CONFIG_EXT_PARTITION +static const struct arg_enum_list superblock_size_enum[] = { + {"dynamic", VPX_SUPERBLOCK_SIZE_DYNAMIC}, + {"64", VPX_SUPERBLOCK_SIZE_64X64}, + {"128", VPX_SUPERBLOCK_SIZE_128X128}, + {NULL, 0} +}; +static const arg_def_t superblock_size = ARG_DEF_ENUM( + NULL, "sb-size", 1, "Superblock size to use", superblock_size_enum); +#endif // CONFIG_EXT_PARTITION + static const arg_def_t *vp10_args[] = { &cpu_used_vp9, &auto_altref, &sharpness, &static_thresh, &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type, @@ -484,6 +495,9 @@ &frame_parallel_decoding, &aq_mode, &frame_periodic_boost, &noise_sens, &tune_content, &input_color_space, &min_gf_interval, &max_gf_interval, +#if CONFIG_EXT_PARTITION + &superblock_size, +#endif // CONFIG_EXT_PARTITION #if CONFIG_VP9_HIGHBITDEPTH &bitdeptharg, &inbitdeptharg, #endif // CONFIG_VP9_HIGHBITDEPTH @@ -500,6 +514,9 @@ VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_NOISE_SENSITIVITY, VP9E_SET_TUNE_CONTENT, VP9E_SET_COLOR_SPACE, VP9E_SET_MIN_GF_INTERVAL, VP9E_SET_MAX_GF_INTERVAL, +#if CONFIG_EXT_PARTITION + VP10E_SET_SUPERBLOCK_SIZE, +#endif // CONFIG_EXT_PARTITION 0 }; #endif