Merge "Small speed up for super_block_uvrd" into nextgenv2
diff --git a/configure b/configure
index ed1d048..eda83f1 100755
--- a/configure
+++ b/configure
@@ -282,6 +282,7 @@
ans
loop_restoration
ext_partition
+ ext_partition_types
ext_tile
obmc
entropy
diff --git a/test/tile_independence_test.cc b/test/tile_independence_test.cc
index 193bd45..e074461 100644
--- a/test/tile_independence_test.cc
+++ b/test/tile_independence_test.cc
@@ -86,11 +86,11 @@
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
cfg_.rc_target_bitrate = 500;
- cfg_.g_lag_in_frames = 25;
+ cfg_.g_lag_in_frames = 12;
cfg_.rc_end_usage = VPX_VBR;
libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 144,
- timebase.den, timebase.num, 0, 30);
+ timebase.den, timebase.num, 0, 15);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
const char *md5_fw_str = md5_fw_order_.Get();
@@ -104,5 +104,5 @@
VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1));
-VP10_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1));
+VP10_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 1, 1));
} // namespace
diff --git a/test/vp10_fht16x16_test.cc b/test/vp10_fht16x16_test.cc
index d501e10..3967149 100644
--- a/test/vp10_fht16x16_test.cc
+++ b/test/vp10_fht16x16_test.cc
@@ -103,20 +103,6 @@
make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 7,
VPX_BITS_8, 256),
make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 8,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 9,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 10,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 11,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 12,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 13,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 14,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 15,
VPX_BITS_8, 256)));
#endif // !CONFIG_EXT_TX
#endif // HAVE_SSE2
diff --git a/test/vp10_fht4x4_test.cc b/test/vp10_fht4x4_test.cc
index d2598f9..bee1a0c 100644
--- a/test/vp10_fht4x4_test.cc
+++ b/test/vp10_fht4x4_test.cc
@@ -102,20 +102,6 @@
make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 7,
VPX_BITS_8, 16),
make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 8,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 9,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 10,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 11,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 12,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 13,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 14,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 15,
VPX_BITS_8, 16)));
#endif // !CONFIG_EXT_TX
#endif // HAVE_SSE2
diff --git a/test/vp10_fht8x8_test.cc b/test/vp10_fht8x8_test.cc
index 47feb3d..96f5632 100644
--- a/test/vp10_fht8x8_test.cc
+++ b/test/vp10_fht8x8_test.cc
@@ -102,20 +102,6 @@
make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 7,
VPX_BITS_8, 64),
make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 8,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 9,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 10,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 11,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 12,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 13,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 14,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 15,
VPX_BITS_8, 64)));
#endif // !CONFIG_EXT_TX
#endif // HAVE_SSE2
diff --git a/test/vp10_fwd_txfm1d_test.cc b/test/vp10_fwd_txfm1d_test.cc
index a39e0ef..bcbc617 100644
--- a/test/vp10_fwd_txfm1d_test.cc
+++ b/test/vp10_fwd_txfm1d_test.cc
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "test/vp10_txfm_test.h"
#include "vp10/common/vp10_fwd_txfm1d.h"
+#include "test/vp10_txfm_test.h"
using libvpx_test::ACMRandom;
@@ -17,12 +17,14 @@
static int txfm_type_num = 2;
static TYPE_TXFM txfm_type_ls[2] = {TYPE_DCT, TYPE_ADST};
-static int txfm_size_num = 4;
-static int txfm_size_ls[4] = {4, 8, 16, 32};
+static int txfm_size_num = 5;
+static int txfm_size_ls[5] = {4, 8, 16, 32, 64};
-static TxfmFunc fwd_txfm_func_ls[2][4] = {
- {vp10_fdct4_new, vp10_fdct8_new, vp10_fdct16_new, vp10_fdct32_new},
- {vp10_fadst4_new, vp10_fadst8_new, vp10_fadst16_new, vp10_fadst32_new}};
+static TxfmFunc fwd_txfm_func_ls[2][5] = {
+ {vp10_fdct4_new, vp10_fdct8_new, vp10_fdct16_new, vp10_fdct32_new,
+ vp10_fdct64_new},
+ {vp10_fadst4_new, vp10_fadst8_new, vp10_fadst16_new, vp10_fadst32_new,
+ NULL}};
// the maximum stage number of fwd/inv 1d dct/adst txfm is 12
static int8_t cos_bit[12] = {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14};
@@ -104,19 +106,21 @@
int max_error = 7;
const int count_test_block = 5000;
- for (int ti = 0; ti < count_test_block; ++ti) {
- for (int ni = 0; ni < txfm_size; ++ni) {
- input[ni] = rnd.Rand16() % base - rnd.Rand16() % base;
- ref_input[ni] = static_cast<double>(input[ni]);
- }
+ if (fwd_txfm_func != NULL) {
+ for (int ti = 0; ti < count_test_block; ++ti) {
+ for (int ni = 0; ni < txfm_size; ++ni) {
+ input[ni] = rnd.Rand16() % base - rnd.Rand16() % base;
+ ref_input[ni] = static_cast<double>(input[ni]);
+ }
- fwd_txfm_func(input, output, cos_bit, range_bit);
- reference_hybrid_1d(ref_input, ref_output, txfm_size, txfm_type);
+ fwd_txfm_func(input, output, cos_bit, range_bit);
+ reference_hybrid_1d(ref_input, ref_output, txfm_size, txfm_type);
- for (int ni = 0; ni < txfm_size; ++ni) {
- EXPECT_LE(
- abs(output[ni] - static_cast<int32_t>(round(ref_output[ni]))),
- max_error);
+ for (int ni = 0; ni < txfm_size; ++ni) {
+ EXPECT_LE(
+ abs(output[ni] - static_cast<int32_t>(round(ref_output[ni]))),
+ max_error);
+ }
}
}
}
diff --git a/test/vp10_fwd_txfm2d_test.cc b/test/vp10_fwd_txfm2d_test.cc
index e6416cc..137f653 100644
--- a/test/vp10_fwd_txfm2d_test.cc
+++ b/test/vp10_fwd_txfm2d_test.cc
@@ -8,36 +8,36 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <math.h>
#include <stdio.h>
#include <stdlib.h>
-#include <math.h>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
#include "test/vp10_txfm_test.h"
-#include "vp10/common/vp10_fwd_txfm2d.h"
#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
+#include "./vp10_rtcd.h"
using libvpx_test::ACMRandom;
namespace {
-const int txfm_size_num = 4;
-const int txfm_size_ls[4] = {4, 8, 16, 32};
-const TXFM_2D_CFG fwd_txfm_cfg_ls[4][4] = {
- {fwd_txfm_2d_cfg_dct_dct_4, fwd_txfm_2d_cfg_dct_adst_4,
- fwd_txfm_2d_cfg_adst_adst_4, fwd_txfm_2d_cfg_adst_dct_4},
- {fwd_txfm_2d_cfg_dct_dct_8, fwd_txfm_2d_cfg_dct_adst_8,
- fwd_txfm_2d_cfg_adst_adst_8, fwd_txfm_2d_cfg_adst_dct_8},
- {fwd_txfm_2d_cfg_dct_dct_16, fwd_txfm_2d_cfg_dct_adst_16,
- fwd_txfm_2d_cfg_adst_adst_16, fwd_txfm_2d_cfg_adst_dct_16},
- {fwd_txfm_2d_cfg_dct_dct_32, fwd_txfm_2d_cfg_dct_adst_32,
- fwd_txfm_2d_cfg_adst_adst_32, fwd_txfm_2d_cfg_adst_dct_32}};
+#if CONFIG_VP9_HIGHBITDEPTH
+const int txfm_size_num = 5;
+const int txfm_size_ls[5] = {4, 8, 16, 32, 64};
+const TXFM_2D_CFG* fwd_txfm_cfg_ls[5][4] = {
+ {&fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_adst_4,
+ &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_dct_4},
+ {&fwd_txfm_2d_cfg_dct_dct_8, &fwd_txfm_2d_cfg_dct_adst_8,
+ &fwd_txfm_2d_cfg_adst_adst_8, &fwd_txfm_2d_cfg_adst_dct_8},
+ {&fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_adst_16,
+ &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_dct_16},
+ {&fwd_txfm_2d_cfg_dct_dct_32, &fwd_txfm_2d_cfg_dct_adst_32,
+ &fwd_txfm_2d_cfg_adst_adst_32, &fwd_txfm_2d_cfg_adst_dct_32},
+ {&fwd_txfm_2d_cfg_dct_dct_64, NULL, NULL, NULL}};
-const Fwd_Txfm2d_Func fwd_txfm_func_ls[4] = {
+const Fwd_Txfm2d_Func fwd_txfm_func_ls[5] = {
vp10_fwd_txfm2d_4x4, vp10_fwd_txfm2d_8x8, vp10_fwd_txfm2d_16x16,
- vp10_fwd_txfm2d_32x32};
+ vp10_fwd_txfm2d_32x32, vp10_fwd_txfm2d_64x64};
const int txfm_type_num = 4;
const TYPE_TXFM type_ls_0[4] = {TYPE_DCT, TYPE_DCT, TYPE_ADST, TYPE_ADST};
@@ -54,44 +54,48 @@
for (int txfm_type_idx = 0; txfm_type_idx < txfm_type_num;
++txfm_type_idx) {
- TXFM_2D_CFG fwd_txfm_cfg = fwd_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
- Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx];
- TYPE_TXFM type0 = type_ls_0[txfm_type_idx];
- TYPE_TXFM type1 = type_ls_1[txfm_type_idx];
- int amplify_bit =
- fwd_txfm_cfg.shift[0] + fwd_txfm_cfg.shift[1] + fwd_txfm_cfg.shift[2];
- double amplify_factor =
- amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
+ const TXFM_2D_CFG* fwd_txfm_cfg =
+ fwd_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
+ if (fwd_txfm_cfg != NULL) {
+ Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx];
+ TYPE_TXFM type0 = type_ls_0[txfm_type_idx];
+ TYPE_TXFM type1 = type_ls_1[txfm_type_idx];
+ int amplify_bit = fwd_txfm_cfg->shift[0] + fwd_txfm_cfg->shift[1] +
+ fwd_txfm_cfg->shift[2];
+ double amplify_factor =
+ amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- int count = 5000;
- double avg_abs_error = 0;
- for (int ci = 0; ci < count; ci++) {
- for (int ni = 0; ni < sqr_txfm_size; ++ni) {
- input[ni] = rnd.Rand16() % base;
- ref_input[ni] = static_cast<double>(input[ni]);
- output[ni] = 0;
- ref_output[ni] = 0;
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int count = 500;
+ double avg_abs_error = 0;
+ for (int ci = 0; ci < count; ci++) {
+ for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+ input[ni] = rnd.Rand16() % base;
+ ref_input[ni] = static_cast<double>(input[ni]);
+ output[ni] = 0;
+ ref_output[ni] = 0;
+ }
+
+ fwd_txfm_func(input, output, txfm_size, fwd_txfm_cfg, bd);
+ reference_hybrid_2d(ref_input, ref_output, txfm_size, type0, type1);
+
+ for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+ ref_output[ni] = round(ref_output[ni] * amplify_factor);
+ EXPECT_LE(fabs(output[ni] - ref_output[ni]) / amplify_factor, 60);
+ }
+ avg_abs_error += compute_avg_abs_error<int32_t, double>(
+ output, ref_output, sqr_txfm_size);
}
- fwd_txfm_func(input, output, txfm_size, &fwd_txfm_cfg, bd);
- reference_hybrid_2d(ref_input, ref_output, txfm_size, type0, type1);
-
- for (int ni = 0; ni < sqr_txfm_size; ++ni) {
- ref_output[ni] = round(ref_output[ni] * amplify_factor);
- EXPECT_LE(fabs(output[ni] - ref_output[ni]) / amplify_factor, 30);
- }
- avg_abs_error += compute_avg_abs_error<int32_t, double>(
- output, ref_output, sqr_txfm_size);
+ avg_abs_error /= amplify_factor;
+ avg_abs_error /= count;
+ // max_abs_avg_error comes from upper bound of avg_abs_error
+ // printf("type0: %d type1: %d txfm_size: %d accuracy_avg_abs_error:
+ // %f\n",
+ // type0, type1, txfm_size, avg_abs_error);
+ double max_abs_avg_error = 5;
+ EXPECT_LE(avg_abs_error, max_abs_avg_error);
}
-
- avg_abs_error /= amplify_factor;
- avg_abs_error /= count;
- // max_abs_avg_error comes from upper bound of avg_abs_error
- // printf("type0: %d type1: %d txfm_size: %d accuracy_avg_abs_error:
- // %f\n", type0, type1, txfm_size, avg_abs_error);
- double max_abs_avg_error = 1.5;
- EXPECT_LE(avg_abs_error, max_abs_avg_error);
}
delete[] input;
@@ -100,5 +104,6 @@
delete[] ref_output;
}
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
} // anonymous namespace
diff --git a/test/vp10_inv_txfm1d_test.cc b/test/vp10_inv_txfm1d_test.cc
index 3b716c8..2e9e58d 100644
--- a/test/vp10_inv_txfm1d_test.cc
+++ b/test/vp10_inv_txfm1d_test.cc
@@ -16,16 +16,20 @@
namespace {
static int txfm_type_num = 2;
-static int txfm_size_num = 4;
-static int txfm_size_ls[4] = {4, 8, 16, 32};
+static int txfm_size_num = 5;
+static int txfm_size_ls[5] = {4, 8, 16, 32, 64};
-static TxfmFunc fwd_txfm_func_ls[2][4] = {
- {vp10_fdct4_new, vp10_fdct8_new, vp10_fdct16_new, vp10_fdct32_new},
- {vp10_fadst4_new, vp10_fadst8_new, vp10_fadst16_new, vp10_fadst32_new}};
+static TxfmFunc fwd_txfm_func_ls[2][5] = {
+ {vp10_fdct4_new, vp10_fdct8_new, vp10_fdct16_new, vp10_fdct32_new,
+ vp10_fdct64_new},
+ {vp10_fadst4_new, vp10_fadst8_new, vp10_fadst16_new, vp10_fadst32_new,
+ NULL}};
-static TxfmFunc inv_txfm_func_ls[2][4] = {
- {vp10_idct4_new, vp10_idct8_new, vp10_idct16_new, vp10_idct32_new},
- {vp10_iadst4_new, vp10_iadst8_new, vp10_iadst16_new, vp10_iadst32_new}};
+static TxfmFunc inv_txfm_func_ls[2][5] = {
+ {vp10_idct4_new, vp10_idct8_new, vp10_idct16_new, vp10_idct32_new,
+ vp10_idct64_new},
+ {vp10_iadst4_new, vp10_iadst8_new, vp10_iadst16_new, vp10_iadst32_new,
+ NULL}};
// the maximum stage number of fwd/inv 1d dct/adst txfm is 12
static int8_t cos_bit[12] = {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14};
@@ -44,19 +48,22 @@
TxfmFunc inv_txfm_func = inv_txfm_func_ls[ti][si];
int max_error = 2;
- const int count_test_block = 5000;
- for (int ci = 0; ci < count_test_block; ++ci) {
- for (int ni = 0; ni < txfm_size; ++ni) {
- input[ni] = rnd.Rand16() % base - rnd.Rand16() % base;
- }
+ if (fwd_txfm_func != NULL) {
+ const int count_test_block = 5000;
+ for (int ci = 0; ci < count_test_block; ++ci) {
+ for (int ni = 0; ni < txfm_size; ++ni) {
+ input[ni] = rnd.Rand16() % base - rnd.Rand16() % base;
+ }
- fwd_txfm_func(input, output, cos_bit, range_bit);
- inv_txfm_func(output, round_trip_output, cos_bit, range_bit);
+ fwd_txfm_func(input, output, cos_bit, range_bit);
+ inv_txfm_func(output, round_trip_output, cos_bit, range_bit);
- for (int ni = 0; ni < txfm_size; ++ni) {
- EXPECT_LE(abs(input[ni] - round_shift(round_trip_output[ni],
- get_max_bit(txfm_size) - 1)),
- max_error);
+ for (int ni = 0; ni < txfm_size; ++ni) {
+ int node_err =
+ abs(input[ni] - round_shift(round_trip_output[ni],
+ get_max_bit(txfm_size) - 1));
+ EXPECT_LE(node_err, max_error);
+ }
}
}
}
diff --git a/test/vp10_inv_txfm2d_test.cc b/test/vp10_inv_txfm2d_test.cc
index 603821e..9257244 100644
--- a/test/vp10_inv_txfm2d_test.cc
+++ b/test/vp10_inv_txfm2d_test.cc
@@ -12,47 +12,48 @@
#include <stdio.h>
#include <stdlib.h>
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
+#include "./vp10_rtcd.h"
#include "test/acm_random.h"
#include "test/vp10_txfm_test.h"
-#include "vp10/common/vp10_fwd_txfm2d.h"
#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
-#include "vp10/common/vp10_inv_txfm2d.h"
#include "vp10/common/vp10_inv_txfm2d_cfg.h"
using libvpx_test::ACMRandom;
namespace {
-const int txfm_size_num = 4;
-const int txfm_size_ls[4] = {4, 8, 16, 32};
-const TXFM_2D_CFG fwd_txfm_cfg_ls[4][4] = {
- {fwd_txfm_2d_cfg_dct_dct_4, fwd_txfm_2d_cfg_dct_adst_4,
- fwd_txfm_2d_cfg_adst_adst_4, fwd_txfm_2d_cfg_adst_dct_4},
- {fwd_txfm_2d_cfg_dct_dct_8, fwd_txfm_2d_cfg_dct_adst_8,
- fwd_txfm_2d_cfg_adst_adst_8, fwd_txfm_2d_cfg_adst_dct_8},
- {fwd_txfm_2d_cfg_dct_dct_16, fwd_txfm_2d_cfg_dct_adst_16,
- fwd_txfm_2d_cfg_adst_adst_16, fwd_txfm_2d_cfg_adst_dct_16},
- {fwd_txfm_2d_cfg_dct_dct_32, fwd_txfm_2d_cfg_dct_adst_32,
- fwd_txfm_2d_cfg_adst_adst_32, fwd_txfm_2d_cfg_adst_dct_32}};
+#if CONFIG_VP9_HIGHBITDEPTH
+const int txfm_size_num = 5;
+const int txfm_size_ls[5] = {4, 8, 16, 32, 64};
+const TXFM_2D_CFG* fwd_txfm_cfg_ls[5][4] = {
+ {&fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_adst_4,
+ &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_dct_4},
+ {&fwd_txfm_2d_cfg_dct_dct_8, &fwd_txfm_2d_cfg_dct_adst_8,
+ &fwd_txfm_2d_cfg_adst_adst_8, &fwd_txfm_2d_cfg_adst_dct_8},
+ {&fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_adst_16,
+ &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_dct_16},
+ {&fwd_txfm_2d_cfg_dct_dct_32, &fwd_txfm_2d_cfg_dct_adst_32,
+ &fwd_txfm_2d_cfg_adst_adst_32, &fwd_txfm_2d_cfg_adst_dct_32},
+ {&fwd_txfm_2d_cfg_dct_dct_64, NULL, NULL, NULL}};
-const TXFM_2D_CFG inv_txfm_cfg_ls[4][4] = {
- {inv_txfm_2d_cfg_dct_dct_4, inv_txfm_2d_cfg_dct_adst_4,
- inv_txfm_2d_cfg_adst_adst_4, inv_txfm_2d_cfg_adst_dct_4},
- {inv_txfm_2d_cfg_dct_dct_8, inv_txfm_2d_cfg_dct_adst_8,
- inv_txfm_2d_cfg_adst_adst_8, inv_txfm_2d_cfg_adst_dct_8},
- {inv_txfm_2d_cfg_dct_dct_16, inv_txfm_2d_cfg_dct_adst_16,
- inv_txfm_2d_cfg_adst_adst_16, inv_txfm_2d_cfg_adst_dct_16},
- {inv_txfm_2d_cfg_dct_dct_32, inv_txfm_2d_cfg_dct_adst_32,
- inv_txfm_2d_cfg_adst_adst_32, inv_txfm_2d_cfg_adst_dct_32}};
+const TXFM_2D_CFG* inv_txfm_cfg_ls[5][4] = {
+ {&inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_adst_4,
+ &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_dct_4},
+ {&inv_txfm_2d_cfg_dct_dct_8, &inv_txfm_2d_cfg_dct_adst_8,
+ &inv_txfm_2d_cfg_adst_adst_8, &inv_txfm_2d_cfg_adst_dct_8},
+ {&inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_adst_16,
+ &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_dct_16},
+ {&inv_txfm_2d_cfg_dct_dct_32, &inv_txfm_2d_cfg_dct_adst_32,
+ &inv_txfm_2d_cfg_adst_adst_32, &inv_txfm_2d_cfg_adst_dct_32},
+ {&inv_txfm_2d_cfg_dct_dct_64, NULL, NULL, NULL}};
-const Fwd_Txfm2d_Func fwd_txfm_func_ls[4] = {
- vp10_fwd_txfm2d_4x4, vp10_fwd_txfm2d_8x8, vp10_fwd_txfm2d_16x16,
- vp10_fwd_txfm2d_32x32};
-const Inv_Txfm2d_Func inv_txfm_func_ls[4] = {
- vp10_inv_txfm2d_add_4x4, vp10_inv_txfm2d_add_8x8, vp10_inv_txfm2d_add_16x16,
- vp10_inv_txfm2d_add_32x32};
+const Fwd_Txfm2d_Func fwd_txfm_func_ls[5] = {
+ vp10_fwd_txfm2d_4x4_c, vp10_fwd_txfm2d_8x8_c, vp10_fwd_txfm2d_16x16_c,
+ vp10_fwd_txfm2d_32x32_c, vp10_fwd_txfm2d_64x64_c};
+const Inv_Txfm2d_Func inv_txfm_func_ls[5] = {
+ vp10_inv_txfm2d_add_4x4_c, vp10_inv_txfm2d_add_8x8_c,
+ vp10_inv_txfm2d_add_16x16_c, vp10_inv_txfm2d_add_32x32_c,
+ vp10_inv_txfm2d_add_64x64_c};
const int txfm_type_num = 4;
@@ -66,44 +67,46 @@
for (int txfm_type_idx = 0; txfm_type_idx < txfm_type_num;
++txfm_type_idx) {
- const TXFM_2D_CFG fwd_txfm_cfg =
+ const TXFM_2D_CFG* fwd_txfm_cfg =
fwd_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
- const TXFM_2D_CFG inv_txfm_cfg =
+ const TXFM_2D_CFG* inv_txfm_cfg =
inv_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
- const Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx];
- const Inv_Txfm2d_Func inv_txfm_func = inv_txfm_func_ls[txfm_size_idx];
- const int count = 5000;
- double avg_abs_error = 0;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int ci = 0; ci < count; ci++) {
- for (int ni = 0; ni < sqr_txfm_size; ++ni) {
- if (ci == 0) {
- int extreme_input = base - 1;
- input[ni] = extreme_input; // extreme case
- ref_input[ni] = 0;
- } else {
- input[ni] = rnd.Rand16() % base;
- ref_input[ni] = 0;
+ if (fwd_txfm_cfg != NULL) {
+ const Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx];
+ const Inv_Txfm2d_Func inv_txfm_func = inv_txfm_func_ls[txfm_size_idx];
+ const int count = 1000;
+ double avg_abs_error = 0;
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ for (int ci = 0; ci < count; ci++) {
+ for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+ if (ci == 0) {
+ int extreme_input = base - 1;
+ input[ni] = extreme_input; // extreme case
+ ref_input[ni] = 0;
+ } else {
+ input[ni] = rnd.Rand16() % base;
+ ref_input[ni] = 0;
+ }
}
+
+ fwd_txfm_func(input, output, txfm_size, fwd_txfm_cfg, bd);
+ inv_txfm_func(output, ref_input, txfm_size, inv_txfm_cfg, bd);
+
+ for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+ EXPECT_LE(abs(input[ni] - ref_input[ni]), 2);
+ }
+ avg_abs_error += compute_avg_abs_error<int16_t, uint16_t>(
+ input, ref_input, sqr_txfm_size);
}
- fwd_txfm_func(input, output, txfm_size, &fwd_txfm_cfg, bd);
- inv_txfm_func(output, ref_input, txfm_size, &inv_txfm_cfg, bd);
-
- for (int ni = 0; ni < sqr_txfm_size; ++ni) {
- EXPECT_LE(abs(input[ni] - ref_input[ni]), 2);
- }
- avg_abs_error += compute_avg_abs_error<int16_t, uint16_t>(
- input, ref_input, sqr_txfm_size);
+ avg_abs_error /= count;
+ // max_abs_avg_error comes from upper bound of
+ // printf("txfm_size: %d accuracy_avg_abs_error: %f\n",
+ // txfm_size, avg_abs_error);
+ // TODO(angiebird): this upper bound is from adst_adst_8
+ const double max_abs_avg_error = 0.024;
+ EXPECT_LE(avg_abs_error, max_abs_avg_error);
}
-
- avg_abs_error /= count;
- // max_abs_avg_error comes from upper bound of
- // printf("txfm_size: %d accuracy_avg_abs_error: %f\n", txfm_size,
- // avg_abs_error);
- // TODO(angiebird): this upper bound is from adst_adst_8
- const double max_abs_avg_error = 0.024;
- EXPECT_LE(avg_abs_error, max_abs_avg_error);
}
delete[] input;
@@ -111,5 +114,6 @@
delete[] output;
}
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
} // anonymous namespace
diff --git a/vp10/common/alloccommon.c b/vp10/common/alloccommon.c
index e14aee7..b3c216e 100644
--- a/vp10/common/alloccommon.c
+++ b/vp10/common/alloccommon.c
@@ -97,10 +97,13 @@
}
void vp10_free_context_buffers(VP10_COMMON *cm) {
+ int i;
cm->free_mi(cm);
free_seg_map(cm);
- vpx_free(cm->above_context);
- cm->above_context = NULL;
+ for (i = 0 ; i < MAX_MB_PLANE ; i++) {
+ vpx_free(cm->above_context[i]);
+ cm->above_context[i] = NULL;
+ }
vpx_free(cm->above_seg_context);
cm->above_seg_context = NULL;
#if CONFIG_VAR_TX
@@ -128,11 +131,14 @@
}
if (cm->above_context_alloc_cols < cm->mi_cols) {
- vpx_free(cm->above_context);
- cm->above_context = (ENTROPY_CONTEXT *)vpx_calloc(
- 2 * mi_cols_aligned_to_sb(cm->mi_cols) * MAX_MB_PLANE,
- sizeof(*cm->above_context));
- if (!cm->above_context) goto fail;
+ int i;
+ for (i = 0 ; i < MAX_MB_PLANE ; i++) {
+ vpx_free(cm->above_context[i]);
+ cm->above_context[i] = (ENTROPY_CONTEXT *)vpx_calloc(
+ 2 * mi_cols_aligned_to_sb(cm->mi_cols),
+ sizeof(*cm->above_context[0]));
+ if (!cm->above_context[i]) goto fail;
+ }
vpx_free(cm->above_seg_context);
cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc(
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 016fc75..de91431 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -167,9 +167,9 @@
PREDICTION_MODE mode;
TX_SIZE tx_size;
#if CONFIG_VAR_TX
- // TODO(jingning): This effectively assigned 64 entries for each 8x8 block.
+ // TODO(jingning): This effectively assigned an entry for each 8x8 block.
// Apparently it takes much more space than needed.
- TX_SIZE inter_tx_size[64];
+ TX_SIZE inter_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
#endif
int8_t skip;
int8_t has_no_coeffs;
@@ -212,6 +212,9 @@
#if CONFIG_REF_MV
uint8_t ref_mv_idx;
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition;
+#endif
} MB_MODE_INFO;
typedef struct MODE_INFO {
@@ -356,6 +359,37 @@
return subsize_lookup[partition][bsize];
}
+#if CONFIG_EXT_PARTITION_TYPES
+static INLINE PARTITION_TYPE get_partition(const MODE_INFO *const mi,
+ int mi_stride, int mi_rows,
+ int mi_cols, int mi_row,
+ int mi_col, BLOCK_SIZE bsize) {
+ const int bsl = b_width_log2_lookup[bsize];
+ const int bs = (1 << bsl) / 4;
+ MODE_INFO m = mi[mi_row * mi_stride + mi_col];
+ PARTITION_TYPE partition = partition_lookup[bsl][m.mbmi.sb_type];
+ if (partition != PARTITION_NONE && bsize > BLOCK_8X8 &&
+ mi_row + bs < mi_rows && mi_col + bs < mi_cols) {
+ BLOCK_SIZE h = get_subsize(bsize, PARTITION_HORZ_A);
+ BLOCK_SIZE v = get_subsize(bsize, PARTITION_VERT_A);
+ MODE_INFO m_right = mi[mi_row * mi_stride + mi_col + bs];
+ MODE_INFO m_below = mi[(mi_row + bs) * mi_stride + mi_col];
+ if (m.mbmi.sb_type == h) {
+ return m_below.mbmi.sb_type == h ? PARTITION_HORZ : PARTITION_HORZ_B;
+ } else if (m.mbmi.sb_type == v) {
+ return m_right.mbmi.sb_type == v ? PARTITION_VERT : PARTITION_VERT_B;
+ } else if (m_below.mbmi.sb_type == h) {
+ return PARTITION_HORZ_A;
+ } else if (m_right.mbmi.sb_type == v) {
+ return PARTITION_VERT_A;
+ } else {
+ return PARTITION_SPLIT;
+ }
+ }
+ return partition;
+}
+#endif // CONFIG_EXT_PARTITION_TYPES
+
static const TX_TYPE intra_mode_to_tx_type_context[INTRA_MODES] = {
DCT_DCT, // DC
ADST_DCT, // V
@@ -383,10 +417,10 @@
#define USE_MSKTX_FOR_32X32 0
static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = {
- 1, 19, 12, 2
+ 1, 16, 12, 2
};
static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = {
- 1, 17, 10
+ 1, 12, 10
};
#if EXT_TX_SIZES == 4
@@ -437,17 +471,17 @@
// Transform types used in each intra set
static const int ext_tx_used_intra[EXT_TX_SETS_INTRA][TX_TYPES] = {
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, },
+ {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
};
// Transform types used in each inter set
static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = {
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1},
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0},
+ {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1},
+ {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
};
static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs,
diff --git a/vp10/common/common_data.h b/vp10/common/common_data.h
index 84476fa..67d6e3a 100644
--- a/vp10/common/common_data.h
+++ b/vp10/common/common_data.h
@@ -80,6 +80,59 @@
}
};
+#if CONFIG_EXT_PARTITION_TYPES
+static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][BLOCK_SIZES] = {
+ { // PARTITION_NONE
+ BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+ BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
+ BLOCK_16X16, BLOCK_16X32, BLOCK_32X16,
+ BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
+ BLOCK_64X64,
+ }, { // PARTITION_HORZ
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_64X32,
+ }, { // PARTITION_VERT
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_32X64,
+ }, { // PARTITION_SPLIT
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_32X32,
+ }, { // PARTITION_HORZ_A
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_64X32,
+ }, { // PARTITION_HORZ_B
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_64X32,
+ }, { // PARTITION_VERT_A
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_32X64,
+ }, { // PARTITION_VERT_B
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID,
+ BLOCK_32X64,
+ }
+};
+#else
static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = {
{ // PARTITION_NONE
BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
@@ -107,6 +160,7 @@
BLOCK_32X32,
}
};
+#endif // CONFIG_EXT_PARTITION_TYPES
static const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = {
TX_4X4, TX_4X4, TX_4X4,
@@ -180,9 +234,16 @@
{{TX_32X32, TX_16X16}, {TX_16X16, TX_16X16}},
};
+#if CONFIG_EXT_PARTITION_TYPES
+static const int partition_supertx_context_lookup[EXT_PARTITION_TYPES] = {
+ -1, 0, 0, 1, 0, 0, 0, 0
+};
+
+#else
static const int partition_supertx_context_lookup[PARTITION_TYPES] = {
-1, 0, 0, 1
};
+#endif // CONFIG_EXT_PARTITION_TYPES
#endif // CONFIG_SUPERTX
#ifdef __cplusplus
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index d48679e..b57ed7a 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -148,6 +148,31 @@
{ 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm
};
+#if CONFIG_EXT_PARTITION_TYPES
+static const vpx_prob default_partition_probs[PARTITION_CONTEXTS]
+ [EXT_PARTITION_TYPES - 1] = {
+ // 8x8 -> 4x4
+ { 199, 122, 141, 128, 128, 128, 128 }, // a/l both not split
+ { 147, 63, 159, 128, 128, 128, 128 }, // a split, l not split
+ { 148, 133, 118, 128, 128, 128, 128 }, // l split, a not split
+ { 121, 104, 114, 128, 128, 128, 128 }, // a/l both split
+ // 16x16 -> 8x8
+ { 174, 73, 87, 128, 128, 128, 128 }, // a/l both not split
+ { 92, 41, 83, 128, 128, 128, 128 }, // a split, l not split
+ { 82, 99, 50, 128, 128, 128, 128 }, // l split, a not split
+ { 53, 39, 39, 128, 128, 128, 128 }, // a/l both split
+ // 32x32 -> 16x16
+ { 177, 58, 59, 128, 128, 128, 128 }, // a/l both not split
+ { 68, 26, 63, 128, 128, 128, 128 }, // a split, l not split
+ { 52, 79, 25, 128, 128, 128, 128 }, // l split, a not split
+ { 17, 14, 12, 128, 128, 128, 128 }, // a/l both split
+ // 64x64 -> 32x32
+ { 222, 34, 30, 128, 128, 128, 128 }, // a/l both not split
+ { 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split
+ { 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split
+ { 10, 7, 6, 128, 128, 128, 128 }, // a/l both split
+};
+#else
static const vpx_prob default_partition_probs[PARTITION_CONTEXTS]
[PARTITION_TYPES - 1] = {
// 8x8 -> 4x4
@@ -171,6 +196,7 @@
{ 58, 32, 12 }, // l split, a not split
{ 10, 7, 6 }, // a/l both split
};
+#endif // CONFIG_EXT_PARTITION_TYPES
#if CONFIG_REF_MV
static const vpx_prob default_newmv_prob[NEWMV_MODE_CONTEXTS] = {
@@ -186,7 +212,7 @@
};
static const vpx_prob default_drl_prob[DRL_MODE_CONTEXTS] = {
- 128, 128, 128,
+ 128, 160, 180, 128, 160
};
#if CONFIG_EXT_INTER
@@ -292,6 +318,18 @@
-PARTITION_VERT, -PARTITION_SPLIT
};
+#if CONFIG_EXT_PARTITION_TYPES
+const vpx_tree_index vp10_ext_partition_tree[TREE_SIZE(EXT_PARTITION_TYPES)] = {
+ -PARTITION_NONE, 2,
+ 6, 4,
+ 8, -PARTITION_SPLIT,
+ -PARTITION_HORZ, 10,
+ -PARTITION_VERT, 12,
+ -PARTITION_HORZ_A, -PARTITION_HORZ_B,
+ -PARTITION_VERT_A, -PARTITION_VERT_B
+};
+#endif // CONFIG_EXT_PARTITION_TYPES
+
static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = {
9, 102, 187, 225
};
@@ -836,47 +874,27 @@
const vpx_tree_index vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER]
[TREE_SIZE(TX_TYPES)] = {
{ // ToDo(yaowu): remove used entry 0.
- -IDTX, 2,
- -V_DCT, 4,
- -H_DCT, 6,
- -DCT_DCT, 8,
- -DST_DST, 10,
- 12, 22,
- 14, 16,
- -DST_DCT, -DCT_DST,
- 18, 20,
- -ADST_DCT, -DCT_ADST,
- -FLIPADST_DCT, -DCT_FLIPADST,
- 24, 30,
- 26, 28,
- -DST_ADST, -ADST_DST,
- -DST_FLIPADST, -FLIPADST_DST,
- 32, 34,
- -ADST_ADST, -FLIPADST_FLIPADST,
- -ADST_FLIPADST, -FLIPADST_ADST,
+ 0
}, {
-IDTX, 2,
- -V_DCT, 4,
- -H_DCT, 6,
- -DCT_DCT, 8,
- -DST_DST, 10,
- 12, 22,
- 14, 16,
- -DST_DCT, -DCT_DST,
- 18, 20,
+ 4, 14,
+ 6, 8,
+ -V_DCT, -H_DCT,
+ 10, 12,
+ -V_ADST, -H_ADST,
+ -V_FLIPADST, -H_FLIPADST,
+ -DCT_DCT, 16,
+ 18, 24,
+ 20, 22,
-ADST_DCT, -DCT_ADST,
-FLIPADST_DCT, -DCT_FLIPADST,
- 24, 30,
26, 28,
- -DST_ADST, -ADST_DST,
- -DST_FLIPADST, -FLIPADST_DST,
- 32, 34,
-ADST_ADST, -FLIPADST_FLIPADST,
- -ADST_FLIPADST, -FLIPADST_ADST,
+ -ADST_FLIPADST, -FLIPADST_ADST
}, {
-IDTX, 2,
- -V_DCT, 4,
- -H_DCT, 6,
+ 4, 6,
+ -V_DCT, -H_DCT,
-DCT_DCT, 8,
10, 16,
12, 14,
@@ -893,39 +911,19 @@
const vpx_tree_index vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA]
[TREE_SIZE(TX_TYPES)] = {
{ // ToDo(yaowu): remove unused entry 0.
- -IDTX, 2,
- -DCT_DCT, 4,
- -DST_DST, 6,
- 8, 18,
- 10, 12,
- -DST_DCT, -DCT_DST,
- 14, 16,
- -ADST_DCT, -DCT_ADST,
- -FLIPADST_DCT, -DCT_FLIPADST,
- 20, 26,
- 22, 24,
- -DST_ADST, -ADST_DST,
- -DST_FLIPADST, -FLIPADST_DST,
- 28, 30,
- -ADST_ADST, -FLIPADST_FLIPADST,
- -ADST_FLIPADST, -FLIPADST_ADST,
+ 0
}, {
-IDTX, 2,
-DCT_DCT, 4,
- -DST_DST, 6,
- 8, 18,
- 10, 12,
- -DST_DCT, -DCT_DST,
- 14, 16,
+ 6, 8,
+ -V_DCT, -H_DCT,
+ 10, 16,
+ 12, 14,
-ADST_DCT, -DCT_ADST,
-FLIPADST_DCT, -DCT_FLIPADST,
- 20, 26,
- 22, 24,
- -DST_ADST, -ADST_DST,
- -DST_FLIPADST, -FLIPADST_DST,
- 28, 30,
+ 18, 20,
-ADST_ADST, -FLIPADST_FLIPADST,
- -ADST_FLIPADST, -FLIPADST_ADST,
+ -ADST_FLIPADST, -FLIPADST_ADST
}, {
-IDTX, 2,
-DCT_DCT, 4,
@@ -942,33 +940,25 @@
static const vpx_prob
default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = {
{ // ToDo(yaowu): remove unused entry 0.
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 0 },
+ { 0 },
+ { 0 },
#if EXT_TX_SIZES == 4
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 0 },
#endif
}, {
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128},
+ { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128},
+ { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128},
#if EXT_TX_SIZES == 4
- { 12, 15, 15, 160, 16, 144, 160, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128},
#endif
}, {
- { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 },
+ { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
+ { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
+ { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
#if EXT_TX_SIZES == 4
- { 12, 15, 15, 160, 128, 128, 128, 128, 128, 128, 128 },
+ { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
#endif
}, {
{ 12, },
@@ -985,266 +975,110 @@
[INTRA_MODES][TX_TYPES - 1] = {
{ // ToDo(yaowu): remove unused entry 0.
{
- { 8, 11, 24, 112, 87, 137, 127, 134,
- 128, 86, 128, 124, 125, 133, 176, 123, },
- { 10, 9, 39, 106, 73, 155, 163, 228,
- 35, 62, 129, 127, 133, 114, 213, 234, },
- { 10, 9, 14, 88, 91, 127, 151, 51,
- 210, 89, 126, 58, 52, 116, 217, 24, },
- { 9, 6, 29, 113, 98, 131, 149, 210,
- 119, 60, 124, 93, 90, 143, 170, 197, },
- { 8, 8, 38, 101, 111, 166, 167, 141,
- 130, 105, 128, 75, 75, 118, 197, 117, },
- { 7, 8, 39, 91, 101, 153, 166, 200,
- 99, 77, 123, 90, 83, 144, 224, 192, },
- { 7, 10, 26, 86, 119, 154, 130, 101,
- 152, 91, 129, 75, 79, 137, 219, 77, },
- { 10, 13, 20, 86, 102, 162, 112, 76,
- 171, 86, 134, 122, 106, 124, 196, 44, },
- { 8, 9, 33, 108, 100, 144, 148, 215,
- 77, 60, 125, 125, 128, 126, 198, 220, },
- { 3, 10, 29, 111, 69, 141, 204, 141,
- 139, 93, 120, 75, 77, 163, 242, 124, },
+ { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 },
}, {
- { 2, 53, 18, 147, 96, 98, 136, 133,
- 131, 120, 153, 163, 169, 137, 173, 124, },
- { 4, 18, 34, 133, 54, 130, 179, 228,
- 28, 72, 153, 164, 168, 118, 227, 239, },
- { 4, 18, 13, 125, 72, 110, 176, 36,
- 221, 104, 148, 75, 72, 117, 225, 19, },
- { 8, 33, 24, 162, 113, 99, 147, 226,
- 103, 85, 153, 143, 153, 124, 155, 210, },
- { 2, 15, 35, 107, 127, 158, 192, 128,
- 126, 116, 151, 95, 88, 182, 241, 119, },
- { 3, 15, 36, 112, 100, 146, 194, 189,
- 90, 98, 152, 99, 100, 165, 235, 175, },
- { 3, 16, 29, 109, 103, 140, 182, 76,
- 173, 104, 147, 82, 85, 159, 235, 70, },
- { 9, 24, 14, 120, 86, 156, 161, 34,
- 177, 121, 142, 128, 128, 126, 185, 37, },
- { 5, 24, 29, 152, 98, 99, 174, 228,
- 82, 76, 147, 149, 128, 132, 191, 225, },
- { 2, 15, 29, 111, 77, 126, 200, 135,
- 117, 93, 152, 96, 84, 191, 245, 135, },
+ { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 },
}, {
- { 2, 69, 13, 173, 111, 69, 137, 159,
- 159, 146, 151, 193, 203, 131, 180, 123, },
- { 1, 12, 33, 164, 32, 98, 204, 242,
- 23, 99, 149, 215, 232, 110, 239, 245, },
- { 1, 17, 9, 136, 82, 83, 171, 28,
- 231, 128, 135, 76, 64, 118, 235, 17, },
- { 4, 41, 17, 195, 131, 58, 161, 237,
- 141, 97, 153, 189, 191, 117, 182, 202, },
- { 2, 17, 36, 104, 149, 137, 217, 139,
- 191, 119, 125, 107, 115, 223, 249, 110, },
- { 2, 14, 24, 127, 91, 135, 219, 198,
- 113, 91, 164, 125, 173, 211, 250, 116, },
- { 3, 19, 24, 120, 102, 130, 209, 81,
- 187, 95, 143, 102, 50, 190, 244, 56, },
- { 4, 27, 10, 128, 91, 157, 181, 33,
- 181, 150, 141, 141, 166, 114, 215, 25, },
- { 2, 34, 27, 187, 102, 77, 210, 245,
- 113, 107, 136, 184, 188, 121, 210, 234, },
- { 1, 15, 22, 141, 59, 94, 208, 133,
- 154, 95, 152, 112, 105, 191, 242, 111, },
+ { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 },
#if EXT_TX_SIZES == 4
}, {
- { 2, 69, 13, 173, 111, 69, 137, 159,
- 159, 146, 151, 193, 203, 131, 180, 123, },
- { 1, 12, 33, 164, 32, 98, 204, 242,
- 23, 99, 149, 215, 232, 110, 239, 245, },
- { 1, 17, 9, 136, 82, 83, 171, 28,
- 231, 128, 135, 76, 64, 118, 235, 17, },
- { 4, 41, 17, 195, 131, 58, 161, 237,
- 141, 97, 153, 189, 191, 117, 182, 202, },
- { 2, 17, 36, 104, 149, 137, 217, 139,
- 191, 119, 125, 107, 115, 223, 249, 110, },
- { 2, 14, 24, 127, 91, 135, 219, 198,
- 113, 91, 164, 125, 173, 211, 250, 116, },
- { 3, 19, 24, 120, 102, 130, 209, 81,
- 187, 95, 143, 102, 50, 190, 244, 56, },
- { 4, 27, 10, 128, 91, 157, 181, 33,
- 181, 150, 141, 141, 166, 114, 215, 25, },
- { 2, 34, 27, 187, 102, 77, 210, 245,
- 113, 107, 136, 184, 188, 121, 210, 234, },
- { 1, 15, 22, 141, 59, 94, 208, 133,
- 154, 95, 152, 112, 105, 191, 242, 111, },
+ { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 },
#endif
},
}, {
{
- { 8, 11, 24, 112, 87, 137, 127, 134,
- 128, 86, 128, 124, 125, 133, 176, 123, },
- { 10, 9, 39, 106, 73, 155, 163, 228,
- 35, 62, 129, 127, 133, 114, 213, 234, },
- { 10, 9, 14, 88, 91, 127, 151, 51,
- 210, 89, 126, 58, 52, 116, 217, 24, },
- { 9, 6, 29, 113, 98, 131, 149, 210,
- 119, 60, 124, 93, 90, 143, 170, 197, },
- { 8, 8, 38, 101, 111, 166, 167, 141,
- 130, 105, 128, 75, 75, 118, 197, 117, },
- { 7, 8, 39, 91, 101, 153, 166, 200,
- 99, 77, 123, 90, 83, 144, 224, 192, },
- { 7, 10, 26, 86, 119, 154, 130, 101,
- 152, 91, 129, 75, 79, 137, 219, 77, },
- { 10, 13, 20, 86, 102, 162, 112, 76,
- 171, 86, 134, 122, 106, 124, 196, 44, },
- { 8, 9, 33, 108, 100, 144, 148, 215,
- 77, 60, 125, 125, 128, 126, 198, 220, },
- { 3, 10, 29, 111, 69, 141, 204, 141,
- 139, 93, 120, 75, 77, 163, 242, 124, },
+ { 8, 176, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 10, 28, 32, 128, 176, 192, 208, 128, 128, 128, 128, },
+ { 10, 28, 32, 128, 176, 192, 48, 128, 128, 128, 128, },
+ { 9, 160, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 8, 28, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
+ { 7, 28, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 7, 20, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 10, 23, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 8, 29, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 20, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
}, {
- { 2, 53, 18, 147, 96, 98, 136, 133,
- 131, 120, 153, 163, 169, 137, 173, 124, },
- { 4, 18, 34, 133, 54, 130, 179, 228,
- 28, 72, 153, 164, 168, 118, 227, 239, },
- { 4, 18, 13, 125, 72, 110, 176, 36,
- 221, 104, 148, 75, 72, 117, 225, 19, },
- { 8, 33, 24, 162, 113, 99, 147, 226,
- 103, 85, 153, 143, 153, 124, 155, 210, },
- { 2, 15, 35, 107, 127, 158, 192, 128,
- 126, 116, 151, 95, 88, 182, 241, 119, },
- { 3, 15, 36, 112, 100, 146, 194, 189,
- 90, 98, 152, 99, 100, 165, 235, 175, },
- { 3, 16, 29, 109, 103, 140, 182, 76,
- 173, 104, 147, 82, 85, 159, 235, 70, },
- { 9, 24, 14, 120, 86, 156, 161, 34,
- 177, 121, 142, 128, 128, 126, 185, 37, },
- { 5, 24, 29, 152, 98, 99, 174, 228,
- 82, 76, 147, 149, 128, 132, 191, 225, },
- { 2, 15, 29, 111, 77, 126, 200, 135,
- 117, 93, 152, 96, 84, 191, 245, 135, },
+ { 2, 176, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 4, 28, 32, 128, 176, 192, 208, 128, 128, 128, 128, },
+ { 4, 28, 32, 128, 176, 192, 48, 128, 128, 128, 128, },
+ { 8, 160, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 28, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
+ { 3, 28, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 26, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 9, 24, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 5, 24, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 2, 25, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
}, {
- { 2, 69, 13, 173, 111, 69, 137, 159,
- 159, 146, 151, 193, 203, 131, 180, 123, },
- { 1, 12, 33, 164, 32, 98, 204, 242,
- 23, 99, 149, 215, 232, 110, 239, 245, },
- { 1, 17, 9, 136, 82, 83, 171, 28,
- 231, 128, 135, 76, 64, 118, 235, 17, },
- { 4, 41, 17, 195, 131, 58, 161, 237,
- 141, 97, 153, 189, 191, 117, 182, 202, },
- { 2, 17, 36, 104, 149, 137, 217, 139,
- 191, 119, 125, 107, 115, 223, 249, 110, },
- { 2, 14, 24, 127, 91, 135, 219, 198,
- 113, 91, 164, 125, 173, 211, 250, 116, },
- { 3, 19, 24, 120, 102, 130, 209, 81,
- 187, 95, 143, 102, 50, 190, 244, 56, },
- { 4, 27, 10, 128, 91, 157, 181, 33,
- 181, 150, 141, 141, 166, 114, 215, 25, },
- { 2, 34, 27, 187, 102, 77, 210, 245,
- 113, 107, 136, 184, 188, 121, 210, 234, },
- { 1, 15, 22, 141, 59, 94, 208, 133,
- 154, 95, 152, 112, 105, 191, 242, 111, },
+ { 2, 176, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 28, 32, 128, 176, 192, 208, 128, 128, 128, 128, },
+ { 1, 28, 32, 128, 176, 192, 48, 128, 128, 128, 128, },
+ { 4, 160, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 28, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
+ { 2, 28, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 29, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 4, 27, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 2, 34, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 1, 25, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
#if EXT_TX_SIZES == 4
}, {
- { 2, 69, 13, 173, 111, 69, 137, 159,
- 159, 146, 151, 193, 203, 131, 180, 123, },
- { 1, 12, 33, 164, 32, 98, 204, 242,
- 23, 99, 149, 215, 232, 110, 239, 245, },
- { 1, 17, 9, 136, 82, 83, 171, 28,
- 231, 128, 135, 76, 64, 118, 235, 17, },
- { 4, 41, 17, 195, 131, 58, 161, 237,
- 141, 97, 153, 189, 191, 117, 182, 202, },
- { 2, 17, 36, 104, 149, 137, 217, 139,
- 191, 119, 125, 107, 115, 223, 249, 110, },
- { 2, 14, 24, 127, 91, 135, 219, 198,
- 113, 91, 164, 125, 173, 211, 250, 116, },
- { 3, 19, 24, 120, 102, 130, 209, 81,
- 187, 95, 143, 102, 50, 190, 244, 56, },
- { 4, 27, 10, 128, 91, 157, 181, 33,
- 181, 150, 141, 141, 166, 114, 215, 25, },
- { 2, 34, 27, 187, 102, 77, 210, 245,
- 113, 107, 136, 184, 188, 121, 210, 234, },
- { 1, 15, 22, 141, 59, 94, 208, 133,
- 154, 95, 152, 112, 105, 191, 242, 111, },
+ { 2, 176, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 12, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 1, 17, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 4, 41, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 17, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
+ { 2, 14, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 19, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 4, 27, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 2, 34, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 1, 15, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
#endif
},
}, {
{
- { 8, 176, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 10, 28, 176, 192, 208, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 10, 28, 176, 192, 48, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 9, 160, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 8, 28, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 7, 28, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 7, 20, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 10, 23, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 8, 29, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 3, 20, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 8, 176, 128, 128, 128, 128, 128, 128, 128, },
+ { 10, 28, 176, 192, 208, 128, 128, 128, 128, },
+ { 10, 28, 176, 192, 48, 128, 128, 128, 128, },
+ { 9, 160, 128, 128, 128, 128, 128, 128, 128, },
+ { 8, 28, 96, 128, 128, 128, 160, 192, 128, },
+ { 7, 28, 160, 176, 192, 128, 128, 128, 128, },
+ { 7, 20, 160, 176, 64, 128, 128, 128, 128, },
+ { 10, 23, 160, 176, 64, 128, 128, 128, 128, },
+ { 8, 29, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 20, 96, 128, 128, 128, 160, 192, 128, },
}, {
- { 2, 176, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 4, 28, 176, 192, 208, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 4, 28, 176, 192, 48, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 8, 160, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 28, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 3, 28, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 3, 26, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 9, 24, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 5, 24, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 25, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 176, 128, 128, 128, 128, 128, 128, 128, },
+ { 4, 28, 176, 192, 208, 128, 128, 128, 128, },
+ { 4, 28, 176, 192, 48, 128, 128, 128, 128, },
+ { 8, 160, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 28, 96, 128, 128, 128, 160, 192, 128, },
+ { 3, 28, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 26, 160, 176, 64, 128, 128, 128, 128, },
+ { 9, 24, 160, 176, 64, 128, 128, 128, 128, },
+ { 5, 24, 160, 176, 192, 128, 128, 128, 128, },
+ { 2, 25, 96, 128, 128, 128, 160, 192, 128, },
}, {
- { 2, 176, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 1, 28, 176, 192, 208, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 1, 28, 176, 192, 48, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 4, 160, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 28, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 28, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 3, 29, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 4, 27, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 34, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 1, 25, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 176, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 28, 176, 192, 208, 128, 128, 128, 128, },
+ { 1, 28, 176, 192, 48, 128, 128, 128, 128, },
+ { 4, 160, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 28, 96, 128, 128, 128, 160, 192, 128, },
+ { 2, 28, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 29, 160, 176, 64, 128, 128, 128, 128, },
+ { 4, 27, 160, 176, 64, 128, 128, 128, 128, },
+ { 2, 34, 160, 176, 192, 128, 128, 128, 128, },
+ { 1, 25, 96, 128, 128, 128, 160, 192, 128, },
#if EXT_TX_SIZES == 4
}, {
- { 2, 176, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 1, 12, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 1, 17, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 4, 41, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 17, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 14, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 3, 19, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 4, 27, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 34, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 1, 15, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 176, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 12, 160, 176, 192, 128, 128, 128, 128, },
+ { 1, 17, 160, 176, 64, 128, 128, 128, 128, },
+ { 4, 41, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 17, 96, 128, 128, 128, 160, 192, 128, },
+ { 2, 14, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 19, 160, 176, 64, 128, 128, 128, 128, },
+ { 4, 27, 160, 176, 64, 128, 128, 128, 128, },
+ { 2, 34, 160, 176, 192, 128, 128, 128, 128, },
+ { 1, 15, 96, 128, 128, 128, 160, 192, 128, },
#endif
},
},
@@ -1323,8 +1157,7 @@
vp10_copy(fc->newmv_prob, default_newmv_prob);
vp10_copy(fc->zeromv_prob, default_zeromv_prob);
vp10_copy(fc->refmv_prob, default_refmv_prob);
- vp10_copy(fc->drl_prob0, default_drl_prob);
- vp10_copy(fc->drl_prob1, default_drl_prob);
+ vp10_copy(fc->drl_prob, default_drl_prob);
#if CONFIG_EXT_INTER
fc->new2mv_prob = default_new2mv_prob;
#endif // CONFIG_EXT_INTER
@@ -1408,12 +1241,8 @@
counts->refmv_mode[i]);
for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- fc->drl_prob0[i] = mode_mv_merge_probs(pre_fc->drl_prob0[i],
- counts->drl_mode0[i]);
- for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- fc->drl_prob1[i] = mode_mv_merge_probs(pre_fc->drl_prob1[i],
- counts->drl_mode1[i]);
-
+ fc->drl_prob[i] = mode_mv_merge_probs(pre_fc->drl_prob[i],
+ counts->drl_mode[i]);
#if CONFIG_EXT_INTER
fc->new2mv_prob = mode_mv_merge_probs(pre_fc->new2mv_prob,
counts->new2mv_mode);
@@ -1558,9 +1387,17 @@
vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->uv_mode_prob[i],
counts->uv_mode[i], fc->uv_mode_prob[i]);
+#if CONFIG_EXT_PARTITION_TYPES
+ vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[0],
+ counts->partition[0], fc->partition_prob[0]);
+ for (i = 1; i < PARTITION_CONTEXTS; i++)
+ vpx_tree_merge_probs(vp10_ext_partition_tree, pre_fc->partition_prob[i],
+ counts->partition[i], fc->partition_prob[i]);
+#else
for (i = 0; i < PARTITION_CONTEXTS; i++)
vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[i],
counts->partition[i], fc->partition_prob[i]);
+#endif // CONFIG_EXT_PARTITION_TYPES
#if CONFIG_EXT_INTRA
for (i = 0; i < PLANE_TYPES; ++i) {
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index 2443d60..3d5fe9e 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h
@@ -46,7 +46,11 @@
typedef struct frame_contexts {
vpx_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
vpx_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
+#if CONFIG_EXT_PARTITION_TYPES
+ vpx_prob partition_prob[PARTITION_CONTEXTS][EXT_PARTITION_TYPES - 1];
+#else
vpx_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1];
+#endif
vp10_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES];
vpx_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
[SWITCHABLE_FILTERS - 1];
@@ -55,8 +59,7 @@
vpx_prob newmv_prob[NEWMV_MODE_CONTEXTS];
vpx_prob zeromv_prob[ZEROMV_MODE_CONTEXTS];
vpx_prob refmv_prob[REFMV_MODE_CONTEXTS];
- vpx_prob drl_prob0[DRL_MODE_CONTEXTS];
- vpx_prob drl_prob1[DRL_MODE_CONTEXTS];
+ vpx_prob drl_prob[DRL_MODE_CONTEXTS];
#if CONFIG_EXT_INTER
vpx_prob new2mv_prob;
@@ -111,7 +114,11 @@
unsigned int kf_y_mode[INTRA_MODES][INTRA_MODES][INTRA_MODES];
unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
unsigned int uv_mode[INTRA_MODES][INTRA_MODES];
+#if CONFIG_EXT_PARTITION_TYPES
+ unsigned int partition[PARTITION_CONTEXTS][EXT_PARTITION_TYPES];
+#else
unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES];
+#endif
vp10_coeff_count_model coef[TX_SIZES][PLANE_TYPES];
unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES]
[COEF_BANDS][COEFF_CONTEXTS];
@@ -121,8 +128,7 @@
unsigned int newmv_mode[NEWMV_MODE_CONTEXTS][2];
unsigned int zeromv_mode[ZEROMV_MODE_CONTEXTS][2];
unsigned int refmv_mode[REFMV_MODE_CONTEXTS][2];
- unsigned int drl_mode0[DRL_MODE_CONTEXTS][2];
- unsigned int drl_mode1[DRL_MODE_CONTEXTS][2];
+ unsigned int drl_mode[DRL_MODE_CONTEXTS][2];
#if CONFIG_EXT_INTER
unsigned int new2mv_mode[2];
#endif // CONFIG_EXT_INTER
@@ -193,6 +199,10 @@
[TREE_SIZE(INTER_COMPOUND_MODES)];
#endif // CONFIG_EXT_INTER
extern const vpx_tree_index vp10_partition_tree[TREE_SIZE(PARTITION_TYPES)];
+#if CONFIG_EXT_PARTITION_TYPES
+extern const vpx_tree_index vp10_ext_partition_tree
+ [TREE_SIZE(EXT_PARTITION_TYPES)];
+#endif
extern const vpx_tree_index vp10_switchable_interp_tree
[TREE_SIZE(SWITCHABLE_FILTERS)];
extern const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)];
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index 87bcc8a..16e4520 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -69,6 +69,21 @@
typedef uint8_t BLOCK_SIZE;
+#if CONFIG_EXT_PARTITION_TYPES
+typedef enum PARTITION_TYPE {
+ PARTITION_NONE,
+ PARTITION_HORZ,
+ PARTITION_VERT,
+ PARTITION_SPLIT,
+ PARTITION_HORZ_A, // HORZ split and the left partition is split again
+ PARTITION_HORZ_B, // HORZ split and the right partition is split again
+ PARTITION_VERT_A, // VERT split and the top partition is split again
+ PARTITION_VERT_B, // VERT split and the bottom partition is split again
+ EXT_PARTITION_TYPES,
+ PARTITION_TYPES = PARTITION_SPLIT + 1,
+ PARTITION_INVALID = EXT_PARTITION_TYPES
+} PARTITION_TYPE;
+#else
typedef enum PARTITION_TYPE {
PARTITION_NONE,
PARTITION_HORZ,
@@ -77,10 +92,11 @@
PARTITION_TYPES,
PARTITION_INVALID = PARTITION_TYPES
} PARTITION_TYPE;
+#endif // CONFIG_EXT_PARTITION_TYPES
typedef char PARTITION_CONTEXT;
#define PARTITION_PLOFFSET 4 // number of probability models per block size
-#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
+#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
// block transform size
typedef uint8_t TX_SIZE;
@@ -111,21 +127,17 @@
FLIPADST_FLIPADST = 6,
ADST_FLIPADST = 7,
FLIPADST_ADST = 8,
- DST_DCT = 9,
- DCT_DST = 10,
- DST_ADST = 11,
- ADST_DST = 12,
- DST_FLIPADST = 13,
- FLIPADST_DST = 14,
- DST_DST = 15,
- IDTX = 16,
- V_DCT = 17,
- H_DCT = 18,
+ IDTX = 9,
+ V_DCT = 10,
+ H_DCT = 11,
+ V_ADST = 12,
+ H_ADST = 13,
+ V_FLIPADST = 14,
+ H_FLIPADST = 15,
#endif // CONFIG_EXT_TX
TX_TYPES,
} TX_TYPE;
-
#if CONFIG_EXT_TX
#define EXT_TX_SIZES 4 // number of sizes that use extended transforms
#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER
@@ -247,7 +259,7 @@
#define NEWMV_MODE_CONTEXTS 7
#define ZEROMV_MODE_CONTEXTS 2
#define REFMV_MODE_CONTEXTS 9
-#define DRL_MODE_CONTEXTS 3
+#define DRL_MODE_CONTEXTS 5
#define ZEROMV_OFFSET 3
#define REFMV_OFFSET 4
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index 863f0db..0e211ad 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -19,247 +19,6 @@
#include "vpx_ports/mem.h"
#if CONFIG_EXT_TX
-void idst4_c(const tran_low_t *input, tran_low_t *output) {
- tran_low_t step[4];
- tran_high_t temp1, temp2;
- // stage 1
- temp1 = (input[3] + input[1]) * cospi_16_64;
- temp2 = (input[3] - input[1]) * cospi_16_64;
- step[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = input[2] * cospi_24_64 - input[0] * cospi_8_64;
- temp2 = input[2] * cospi_8_64 + input[0] * cospi_24_64;
- step[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- // stage 2
- output[0] = WRAPLOW(step[0] + step[3], 8);
- output[1] = WRAPLOW(-step[1] - step[2], 8);
- output[2] = WRAPLOW(step[1] - step[2], 8);
- output[3] = WRAPLOW(step[3] - step[0], 8);
-}
-
-void idst8_c(const tran_low_t *input, tran_low_t *output) {
- // vp9_igentx8(input, output, Tx8);
- tran_low_t step1[8], step2[8];
- tran_high_t temp1, temp2;
- // stage 1
- step1[0] = input[7];
- step1[2] = input[3];
- step1[1] = input[5];
- step1[3] = input[1];
- temp1 = input[6] * cospi_28_64 - input[0] * cospi_4_64;
- temp2 = input[6] * cospi_4_64 + input[0] * cospi_28_64;
- step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = input[2] * cospi_12_64 - input[4] * cospi_20_64;
- temp2 = input[2] * cospi_20_64 + input[4] * cospi_12_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- // stage 2
- temp1 = (step1[0] + step1[2]) * cospi_16_64;
- temp2 = (step1[0] - step1[2]) * cospi_16_64;
- step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64;
- temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64;
- step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step2[4] = WRAPLOW(step1[4] + step1[5], 8);
- step2[5] = WRAPLOW(step1[4] - step1[5], 8);
- step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
- step2[7] = WRAPLOW(step1[6] + step1[7], 8);
-
- // stage 3
- step1[0] = WRAPLOW(step2[0] + step2[3], 8);
- step1[1] = WRAPLOW(step2[1] + step2[2], 8);
- step1[2] = WRAPLOW(step2[1] - step2[2], 8);
- step1[3] = WRAPLOW(step2[0] - step2[3], 8);
- step1[4] = step2[4];
- temp1 = (step2[6] - step2[5]) * cospi_16_64;
- temp2 = (step2[5] + step2[6]) * cospi_16_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step1[7] = step2[7];
-
- // stage 4
- output[0] = WRAPLOW(step1[0] + step1[7], 8);
- output[1] = WRAPLOW(-step1[1] - step1[6], 8);
- output[2] = WRAPLOW(step1[2] + step1[5], 8);
- output[3] = WRAPLOW(-step1[3] - step1[4], 8);
- output[4] = WRAPLOW(step1[3] - step1[4], 8);
- output[5] = WRAPLOW(-step1[2] + step1[5], 8);
- output[6] = WRAPLOW(step1[1] - step1[6], 8);
- output[7] = WRAPLOW(-step1[0] + step1[7], 8);
-}
-
-void idst16_c(const tran_low_t *input, tran_low_t *output) {
- tran_low_t step1[16], step2[16];
- tran_high_t temp1, temp2;
-
- // stage 1
- step1[0] = input[15];
- step1[1] = input[7];
- step1[2] = input[11];
- step1[3] = input[3];
- step1[4] = input[13];
- step1[5] = input[5];
- step1[6] = input[9];
- step1[7] = input[1];
- step1[8] = input[14];
- step1[9] = input[6];
- step1[10] = input[10];
- step1[11] = input[2];
- step1[12] = input[12];
- step1[13] = input[4];
- step1[14] = input[8];
- step1[15] = input[0];
-
- // stage 2
- step2[0] = step1[0];
- step2[1] = step1[1];
- step2[2] = step1[2];
- step2[3] = step1[3];
- step2[4] = step1[4];
- step2[5] = step1[5];
- step2[6] = step1[6];
- step2[7] = step1[7];
-
- temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
- temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
- step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
- temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
- step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
- temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
- step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
- temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
- step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- // stage 3
- step1[0] = step2[0];
- step1[1] = step2[1];
- step1[2] = step2[2];
- step1[3] = step2[3];
-
- temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
- temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
- step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
- temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- step1[8] = WRAPLOW(step2[8] + step2[9], 8);
- step1[9] = WRAPLOW(step2[8] - step2[9], 8);
- step1[10] = WRAPLOW(-step2[10] + step2[11], 8);
- step1[11] = WRAPLOW(step2[10] + step2[11], 8);
- step1[12] = WRAPLOW(step2[12] + step2[13], 8);
- step1[13] = WRAPLOW(step2[12] - step2[13], 8);
- step1[14] = WRAPLOW(-step2[14] + step2[15], 8);
- step1[15] = WRAPLOW(step2[14] + step2[15], 8);
-
- // stage 4
- temp1 = (step1[0] + step1[1]) * cospi_16_64;
- temp2 = (step1[0] - step1[1]) * cospi_16_64;
- step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
- temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
- step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step2[4] = WRAPLOW(step1[4] + step1[5], 8);
- step2[5] = WRAPLOW(step1[4] - step1[5], 8);
- step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
- step2[7] = WRAPLOW(step1[6] + step1[7], 8);
-
- step2[8] = step1[8];
- step2[15] = step1[15];
- temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
- temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
- step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
- temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
- step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step2[11] = step1[11];
- step2[12] = step1[12];
-
- // stage 5
- step1[0] = WRAPLOW(step2[0] + step2[3], 8);
- step1[1] = WRAPLOW(step2[1] + step2[2], 8);
- step1[2] = WRAPLOW(step2[1] - step2[2], 8);
- step1[3] = WRAPLOW(step2[0] - step2[3], 8);
- step1[4] = step2[4];
- temp1 = (step2[6] - step2[5]) * cospi_16_64;
- temp2 = (step2[5] + step2[6]) * cospi_16_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step1[7] = step2[7];
-
- step1[8] = WRAPLOW(step2[8] + step2[11], 8);
- step1[9] = WRAPLOW(step2[9] + step2[10], 8);
- step1[10] = WRAPLOW(step2[9] - step2[10], 8);
- step1[11] = WRAPLOW(step2[8] - step2[11], 8);
- step1[12] = WRAPLOW(-step2[12] + step2[15], 8);
- step1[13] = WRAPLOW(-step2[13] + step2[14], 8);
- step1[14] = WRAPLOW(step2[13] + step2[14], 8);
- step1[15] = WRAPLOW(step2[12] + step2[15], 8);
-
- // stage 6
- step2[0] = WRAPLOW(step1[0] + step1[7], 8);
- step2[1] = WRAPLOW(step1[1] + step1[6], 8);
- step2[2] = WRAPLOW(step1[2] + step1[5], 8);
- step2[3] = WRAPLOW(step1[3] + step1[4], 8);
- step2[4] = WRAPLOW(step1[3] - step1[4], 8);
- step2[5] = WRAPLOW(step1[2] - step1[5], 8);
- step2[6] = WRAPLOW(step1[1] - step1[6], 8);
- step2[7] = WRAPLOW(step1[0] - step1[7], 8);
- step2[8] = step1[8];
- step2[9] = step1[9];
- temp1 = (-step1[10] + step1[13]) * cospi_16_64;
- temp2 = (step1[10] + step1[13]) * cospi_16_64;
- step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = (-step1[11] + step1[12]) * cospi_16_64;
- temp2 = (step1[11] + step1[12]) * cospi_16_64;
- step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step2[14] = step1[14];
- step2[15] = step1[15];
-
- // stage 7
- output[0] = WRAPLOW(step2[0] + step2[15], 8);
- output[1] = WRAPLOW(-step2[1] - step2[14], 8);
- output[2] = WRAPLOW(step2[2] + step2[13], 8);
- output[3] = WRAPLOW(-step2[3] - step2[12], 8);
- output[4] = WRAPLOW(step2[4] + step2[11], 8);
- output[5] = WRAPLOW(-step2[5] - step2[10], 8);
- output[6] = WRAPLOW(step2[6] + step2[9], 8);
- output[7] = WRAPLOW(-step2[7] - step2[8], 8);
- output[8] = WRAPLOW(step2[7] - step2[8], 8);
- output[9] = WRAPLOW(-step2[6] + step2[9], 8);
- output[10] = WRAPLOW(step2[5] - step2[10], 8);
- output[11] = WRAPLOW(-step2[4] + step2[11], 8);
- output[12] = WRAPLOW(step2[3] - step2[12], 8);
- output[13] = WRAPLOW(-step2[2] + step2[13], 8);
- output[14] = WRAPLOW(step2[1] - step2[14], 8);
- output[15] = WRAPLOW(-step2[0] + step2[15], 8);
-}
-
-#if CONFIG_EXT_TX
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 4; ++i)
@@ -285,21 +44,6 @@
}
// For use in lieu of DST
-static void ihalfcenter32_c(const tran_low_t *input, tran_low_t *output) {
- int i;
- tran_low_t inputhalf[16];
- for (i = 0; i < 8; ++i) {
- output[i] = input[16 + i] * 4;
- output[24 + i] = input[24 + i] * 4;
- }
- // Multiply input by sqrt(2)
- for (i = 0; i < 16; ++i) {
- inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
- }
- idct16_c(inputhalf, output + 8);
- // Note overall scaling factor is 4 times orthogonal
-}
-
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
int i;
tran_low_t inputhalf[16];
@@ -379,7 +123,6 @@
// Note overall scaling factor is 4 times orthogonal
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-#endif // CONFIG_EXT_TX
// Inverse identity transform and add.
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
@@ -412,24 +155,21 @@
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
case IDTX:
case V_DCT:
case H_DCT:
+ case V_ADST:
+ case H_ADST:
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
- case FLIPADST_DST:
+ case V_FLIPADST:
// flip UD
FLIPUD_PTR(*dst, *dstride, size);
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
- case DST_FLIPADST:
+ case H_FLIPADST:
// flip LR
FLIPUD_PTR(*src, *sstride, size);
break;
@@ -716,24 +456,21 @@
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
case IDTX:
case V_DCT:
case H_DCT:
+ case V_ADST:
+ case H_ADST:
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
- case FLIPADST_DST:
+ case V_FLIPADST:
// flip UD
FLIPUD_PTR(*dst, *dstride, size);
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
- case DST_FLIPADST:
+ case H_FLIPADST:
// flip LR
FLIPUD_PTR(*src, *sstride, size);
break;
@@ -754,26 +491,23 @@
void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
static const transform_2d IHT_4[] = {
- { idct4_c, idct4_c }, // DCT_DCT = 0,
- { iadst4_c, idct4_c }, // ADST_DCT = 1,
- { idct4_c, iadst4_c }, // DCT_ADST = 2,
- { iadst4_c, iadst4_c }, // ADST_ADST = 3,
+ { idct4_c, idct4_c }, // DCT_DCT
+ { iadst4_c, idct4_c }, // ADST_DCT
+ { idct4_c, iadst4_c }, // DCT_ADST
+ { iadst4_c, iadst4_c }, // ADST_ADST
#if CONFIG_EXT_TX
- { iadst4_c, idct4_c }, // FLIPADST_DCT = 4,
- { idct4_c, iadst4_c }, // DCT_FLIPADST = 5,
- { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST = 6,
- { iadst4_c, iadst4_c }, // ADST_FLIPADST = 7,
- { iadst4_c, iadst4_c }, // FLIPADST_ADST = 8,
- { idst4_c, idct4_c }, // DST_DCT = 9,
- { idct4_c, idst4_c }, // DCT_DST = 10,
- { idst4_c, iadst4_c }, // DST_ADST = 11,
- { iadst4_c, idst4_c }, // ADST_DST = 12,
- { idst4_c, iadst4_c }, // DST_FLIPADST = 13,
- { iadst4_c, idst4_c }, // FLIPADST_DST = 14,
- { idst4_c, idst4_c }, // DST_DST = 15
- { iidtx4_c, iidtx4_c }, // IDTX = 16
- { idct4_c, iidtx4_c }, // V_DCT = 17
- { iidtx4_c, idct4_c }, // H_DCT = 18
+ { iadst4_c, idct4_c }, // FLIPADST_DCT
+ { idct4_c, iadst4_c }, // DCT_FLIPADST
+ { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST
+ { iadst4_c, iadst4_c }, // ADST_FLIPADST
+ { iadst4_c, iadst4_c }, // FLIPADST_ADST
+ { iidtx4_c, iidtx4_c }, // IDTX
+ { idct4_c, iidtx4_c }, // V_DCT
+ { iidtx4_c, idct4_c }, // H_DCT
+ { iadst4_c, iidtx4_c }, // V_ADST
+ { iidtx4_c, iadst4_c }, // H_ADST
+ { iadst4_c, iidtx4_c }, // V_FLIPADST
+ { iidtx4_c, iadst4_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
@@ -820,26 +554,23 @@
void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
static const transform_2d IHT_8[] = {
- { idct8_c, idct8_c }, // DCT_DCT = 0,
- { iadst8_c, idct8_c }, // ADST_DCT = 1,
- { idct8_c, iadst8_c }, // DCT_ADST = 2,
- { iadst8_c, iadst8_c }, // ADST_ADST = 3,
+ { idct8_c, idct8_c }, // DCT_DCT
+ { iadst8_c, idct8_c }, // ADST_DCT
+ { idct8_c, iadst8_c }, // DCT_ADST
+ { iadst8_c, iadst8_c }, // ADST_ADST
#if CONFIG_EXT_TX
- { iadst8_c, idct8_c }, // FLIPADST_DCT = 4,
- { idct8_c, iadst8_c }, // DCT_FLIPADST = 5,
- { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST = 6,
- { iadst8_c, iadst8_c }, // ADST_FLIPADST = 7,
- { iadst8_c, iadst8_c }, // FLIPADST_ADST = 8,
- { idst8_c, idct8_c }, // DST_DCT = 9,
- { idct8_c, idst8_c }, // DCT_DST = 10,
- { idst8_c, iadst8_c }, // DST_ADST = 11,
- { iadst8_c, idst8_c }, // ADST_DST = 12,
- { idst8_c, iadst8_c }, // DST_FLIPADST = 13,
- { iadst8_c, idst8_c }, // FLIPADST_DST = 14,
- { idst8_c, idst8_c }, // DST_DST = 15
- { iidtx8_c, iidtx8_c }, // IDTX = 16
- { idct8_c, iidtx8_c }, // V_DCT = 17
- { iidtx8_c, idct8_c }, // H_DCT = 18
+ { iadst8_c, idct8_c }, // FLIPADST_DCT
+ { idct8_c, iadst8_c }, // DCT_FLIPADST
+ { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST
+ { iadst8_c, iadst8_c }, // ADST_FLIPADST
+ { iadst8_c, iadst8_c }, // FLIPADST_ADST
+ { iidtx8_c, iidtx8_c }, // IDTX
+ { idct8_c, iidtx8_c }, // V_DCT
+ { iidtx8_c, idct8_c }, // H_DCT
+ { iadst8_c, iidtx8_c }, // V_ADST
+ { iidtx8_c, iadst8_c }, // H_ADST
+ { iadst8_c, iidtx8_c }, // V_FLIPADST
+ { iidtx8_c, iadst8_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
@@ -886,26 +617,23 @@
void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
static const transform_2d IHT_16[] = {
- { idct16_c, idct16_c }, // DCT_DCT = 0,
- { iadst16_c, idct16_c }, // ADST_DCT = 1,
- { idct16_c, iadst16_c }, // DCT_ADST = 2,
- { iadst16_c, iadst16_c }, // ADST_ADST = 3,
+ { idct16_c, idct16_c }, // DCT_DCT
+ { iadst16_c, idct16_c }, // ADST_DCT
+ { idct16_c, iadst16_c }, // DCT_ADST
+ { iadst16_c, iadst16_c }, // ADST_ADST
#if CONFIG_EXT_TX
- { iadst16_c, idct16_c }, // FLIPADST_DCT = 4,
- { idct16_c, iadst16_c }, // DCT_FLIPADST = 5,
- { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST = 6,
- { iadst16_c, iadst16_c }, // ADST_FLIPADST = 7,
- { iadst16_c, iadst16_c }, // FLIPADST_ADST = 8,
- { idst16_c, idct16_c }, // DST_DCT = 9,
- { idct16_c, idst16_c }, // DCT_DST = 10,
- { idst16_c, iadst16_c }, // DST_ADST = 11,
- { iadst16_c, idst16_c }, // ADST_DST = 12,
- { idst16_c, iadst16_c }, // DST_FLIPADST = 13,
- { iadst16_c, idst16_c }, // FLIPADST_DST = 14,
- { idst16_c, idst16_c }, // DST_DST = 15
- { iidtx16_c, iidtx16_c }, // IDTX = 16
- { idct16_c, iidtx16_c }, // V_DCT = 17
- { iidtx16_c, idct16_c }, // H_DCT = 18
+ { iadst16_c, idct16_c }, // FLIPADST_DCT
+ { idct16_c, iadst16_c }, // DCT_FLIPADST
+ { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST
+ { iadst16_c, iadst16_c }, // ADST_FLIPADST
+ { iadst16_c, iadst16_c }, // FLIPADST_ADST
+ { iidtx16_c, iidtx16_c }, // IDTX
+ { idct16_c, iidtx16_c }, // V_DCT
+ { iidtx16_c, idct16_c }, // H_DCT
+ { iadst16_c, iidtx16_c }, // V_ADST
+ { iidtx16_c, iadst16_c }, // H_ADST
+ { iadst16_c, iidtx16_c }, // V_FLIPADST
+ { iidtx16_c, iadst16_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
@@ -953,25 +681,22 @@
void vp10_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
int stride, int tx_type) {
static const transform_2d IHT_32[] = {
- { idct32_c, idct32_c }, // DCT_DCT = 0,
- { ihalfright32_c, idct32_c }, // ADST_DCT = 1,
- { idct32_c, ihalfright32_c }, // DCT_ADST = 2,
- { ihalfright32_c, ihalfright32_c }, // ADST_ADST = 3,
- { ihalfright32_c, idct32_c }, // FLIPADST_DCT = 4,
- { idct32_c, ihalfright32_c }, // DCT_FLIPADST = 5,
- { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST = 6,
- { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST = 7,
- { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST = 8,
- { ihalfcenter32_c, idct32_c }, // DST_DCT = 9,
- { idct32_c, ihalfcenter32_c }, // DCT_DST = 10,
- { ihalfcenter32_c, ihalfright32_c }, // DST_ADST = 11,
- { ihalfright32_c, ihalfcenter32_c }, // ADST_DST = 12,
- { ihalfcenter32_c, ihalfright32_c }, // DST_FLIPADST = 13,
- { ihalfright32_c, ihalfcenter32_c }, // FLIPADST_DST = 14,
- { ihalfcenter32_c, ihalfcenter32_c }, // DST_DST = 15
- { iidtx32_c, iidtx32_c }, // IDTX = 16
- { idct32_c, iidtx32_c }, // V_DCT = 17
- { iidtx32_c, idct32_c }, // H_DCT = 18
+ { idct32_c, idct32_c }, // DCT_DCT
+ { ihalfright32_c, idct32_c }, // ADST_DCT
+ { idct32_c, ihalfright32_c }, // DCT_ADST
+ { ihalfright32_c, ihalfright32_c }, // ADST_ADST
+ { ihalfright32_c, idct32_c }, // FLIPADST_DCT
+ { idct32_c, ihalfright32_c }, // DCT_FLIPADST
+ { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST
+ { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST
+ { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST
+ { iidtx32_c, iidtx32_c }, // IDTX
+ { idct32_c, iidtx32_c }, // V_DCT
+ { iidtx32_c, idct32_c }, // H_DCT
+ { ihalfright32_c, iidtx16_c }, // V_ADST
+ { iidtx16_c, ihalfright32_c }, // H_ADST
+ { ihalfright32_c, iidtx16_c }, // V_FLIPADST
+ { iidtx16_c, ihalfright32_c }, // H_FLIPADST
};
int i, j;
@@ -1098,15 +823,12 @@
case FLIPADST_ADST:
vp10_iht4x4_16_add(input, dest, stride, tx_type);
break;
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST only exists in C code
vp10_iht4x4_16_add_c(input, dest, stride, tx_type);
break;
@@ -1139,15 +861,12 @@
case FLIPADST_ADST:
vp10_iht8x8_64_add(input, dest, stride, tx_type);
break;
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST only exists in C code
vp10_iht8x8_64_add_c(input, dest, stride, tx_type);
break;
@@ -1180,15 +899,12 @@
case FLIPADST_ADST:
vp10_iht16x16_256_add(input, dest, stride, tx_type);
break;
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST only exists in C code
vp10_iht16x16_256_add_c(input, dest, stride, tx_type);
break;
@@ -1217,15 +933,12 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
break;
case IDTX:
@@ -1242,26 +955,23 @@
void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
static const highbd_transform_2d HIGH_IHT_4[] = {
- { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0,
- { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1,
- { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2,
- { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_ADST = 3,
+ { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT
+ { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT
+ { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_ADST
#if CONFIG_EXT_TX
- { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT = 4,
- { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST = 5,
- { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST = 6,
- { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST = 7,
- { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST = 8,
- { highbd_idst4_c, vpx_highbd_idct4_c }, // DST_DCT = 9,
- { vpx_highbd_idct4_c, highbd_idst4_c }, // DCT_DST = 10,
- { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_ADST = 11,
- { vpx_highbd_iadst4_c, highbd_idst4_c }, // ADST_DST = 12,
- { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_FLIPADST = 13,
- { vpx_highbd_iadst4_c, highbd_idst4_c }, // FLIPADST_DST = 14,
- { highbd_idst4_c, highbd_idst4_c }, // DST_DST = 15
- { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX = 16
- { vpx_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT = 17
- { highbd_iidtx4_c, vpx_highbd_idct4_c }, // H_DCT = 18
+ { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT
+ { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST
+ { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX
+ { vpx_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT
+ { highbd_iidtx4_c, vpx_highbd_idct4_c }, // H_DCT
+ { vpx_highbd_iadst4_c, highbd_iidtx4_c }, // V_ADST
+ { highbd_iidtx4_c, vpx_highbd_iadst4_c }, // H_ADST
+ { vpx_highbd_iadst4_c, highbd_iidtx4_c }, // V_FLIPADST
+ { highbd_iidtx4_c, vpx_highbd_iadst4_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
@@ -1311,26 +1021,23 @@
void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
static const highbd_transform_2d HIGH_IHT_8[] = {
- { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0,
- { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1,
- { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2,
- { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_ADST = 3,
+ { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT
+ { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT
+ { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_ADST
#if CONFIG_EXT_TX
- { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT = 4,
- { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST = 5,
- { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST = 6,
- { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST = 7,
- { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST = 8,
- { highbd_idst8_c, vpx_highbd_idct8_c }, // DST_DCT = 9,
- { vpx_highbd_idct8_c, highbd_idst8_c }, // DCT_DST = 10,
- { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_ADST = 11,
- { vpx_highbd_iadst8_c, highbd_idst8_c }, // ADST_DST = 12,
- { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_FLIPADST = 13,
- { vpx_highbd_iadst8_c, highbd_idst8_c }, // FLIPADST_DST = 14,
- { highbd_idst8_c, highbd_idst8_c }, // DST_DST = 15
- { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX = 16
- { vpx_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT = 17
- { highbd_iidtx8_c, vpx_highbd_idct8_c }, // H_DCT = 18
+ { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT
+ { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST
+ { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX
+ { vpx_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT
+ { highbd_iidtx8_c, vpx_highbd_idct8_c }, // H_DCT
+ { vpx_highbd_iadst8_c, highbd_iidtx8_c }, // V_ADST
+ { highbd_iidtx8_c, vpx_highbd_iadst8_c }, // H_ADST
+ { vpx_highbd_iadst8_c, highbd_iidtx8_c }, // V_FLIPADST
+ { highbd_iidtx8_c, vpx_highbd_iadst8_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
@@ -1380,26 +1087,23 @@
void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
static const highbd_transform_2d HIGH_IHT_16[] = {
- { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0,
- { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1,
- { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2,
- { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_ADST = 3,
+ { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT
+ { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT
+ { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_ADST
#if CONFIG_EXT_TX
- { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT = 4,
- { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST = 5,
- { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST = 6,
- { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST = 7,
- { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST = 8,
- { highbd_idst16_c, vpx_highbd_idct16_c }, // DST_DCT = 9,
- { vpx_highbd_idct16_c, highbd_idst16_c }, // DCT_DST = 10,
- { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_ADST = 11,
- { vpx_highbd_iadst16_c, highbd_idst16_c }, // ADST_DST = 12,
- { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_FLIPADST = 13,
- { vpx_highbd_iadst16_c, highbd_idst16_c }, // FLIPADST_DST = 14,
- { highbd_idst16_c, highbd_idst16_c }, // DST_DST = 15
- { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX = 16
- { vpx_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT = 17
- { highbd_iidtx16_c, vpx_highbd_idct16_c }, // H_DCT = 18
+ { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT
+ { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST
+ { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX
+ { vpx_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT
+ { highbd_iidtx16_c, vpx_highbd_idct16_c }, // H_DCT
+ { vpx_highbd_iadst16_c, highbd_iidtx16_c }, // V_ADST
+ { highbd_iidtx16_c, vpx_highbd_iadst16_c }, // H_ADST
+ { vpx_highbd_iadst16_c, highbd_iidtx16_c }, // V_FLIPADST
+ { highbd_iidtx16_c, vpx_highbd_iadst16_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
@@ -1459,16 +1163,13 @@
{ highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
{ highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
{ highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
- { highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT
- { vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST
- { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST
- { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST
- { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST
- { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST
- { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX
+ { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX
{ vpx_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT
- { highbd_iidtx32_c, vpx_highbd_idct32_c }, // H_DCT
+ { highbd_iidtx32_c, vpx_highbd_idct32_c }, // H_DCT
+ { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_ADST
+ { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_ADST
+ { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_FLIPADST
+ { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_FLIPADST
};
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
@@ -1602,15 +1303,12 @@
case FLIPADST_ADST:
vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
break;
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST only exists in C code
vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
break;
@@ -1644,15 +1342,12 @@
case FLIPADST_ADST:
vp10_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
break;
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST only exists in C code
vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
break;
@@ -1686,15 +1381,12 @@
case FLIPADST_ADST:
vp10_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
break;
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST only exists in C code
vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
break;
@@ -1724,15 +1416,12 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
break;
case IDTX:
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c
index d9891bb..25941d0 100644
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -1276,9 +1276,9 @@
#if CONFIG_VAR_TX
if (is_inter_block(mbmi) && !mbmi->skip)
tx_size = (plane->plane_type == PLANE_TYPE_UV) ?
- get_uv_tx_size_impl(mbmi->inter_tx_size[blk_row * 8 + blk_col],
+ get_uv_tx_size_impl(mbmi->inter_tx_size[blk_row][ blk_col],
sb_type, ss_x, ss_y) :
- mbmi->inter_tx_size[blk_row * 8 + blk_col];
+ mbmi->inter_tx_size[blk_row][blk_col];
tx_size_r = VPXMIN(tx_size, cm->above_txfm_context[mi_col + c]);
tx_size_c = VPXMIN(tx_size, cm->left_txfm_context[(mi_row + r) & 0x07]);
@@ -1634,7 +1634,7 @@
int start, int stop, int y_only) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
int mi_row, mi_col;
-#if !CONFIG_VAR_TX
+#if !CONFIG_VAR_TX && !CONFIG_EXT_PARTITION_TYPES
enum lf_path path;
LOOP_FILTER_MASK lfm;
@@ -1646,7 +1646,7 @@
path = LF_PATH_444;
else
path = LF_PATH_SLOW;
-#endif
+#endif // !CONFIG_VAR_TX && !CONFIG_EXT_PARTITION_TYPES
#if CONFIG_VAR_TX
memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols);
@@ -1661,7 +1661,7 @@
vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
-#if CONFIG_VAR_TX
+#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION_TYPES
for (plane = 0; plane < num_planes; ++plane)
vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
@@ -1684,7 +1684,7 @@
break;
}
}
-#endif
+#endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION_TYPES
}
}
}
diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c
index c67beed..2a8bc78 100644
--- a/vp10/common/mvref_common.c
+++ b/vp10/common/mvref_common.c
@@ -246,31 +246,51 @@
return newmv_count;
}
+// This function assumes MI blocks are 8x8 and coding units are 64x64
static int has_top_right(const MACROBLOCKD *xd,
int mi_row, int mi_col, int bs) {
+ // In a split partition all apart from the bottom right has a top right
int has_tr = !((mi_row & bs) & (bs * 2 - 1)) ||
!((mi_col & bs) & (bs * 2 - 1));
// Filter out partial right-most boundaries
+ // For each 4x4 group of blocks, when the bottom right is decoded the blocks
+ // to the right have not been decoded therefore the second from bottom in the
+ // right-most column does not have a top right
if ((mi_col & bs) & (bs * 2 - 1)) {
if (((mi_col & (2 * bs)) & (bs * 4 - 1)) &&
((mi_row & (2 * bs)) & (bs * 4 - 1)))
has_tr = 0;
}
+ // If the right had side of the block lines up with the right had edge end of
+ // a group of 8x8 MI blocks (i.e. edge of a coding unit) and is not on the top
+ // row of that coding unit, it does not have a top right
if (has_tr)
if (((mi_col + xd->n8_w) & 0x07) == 0)
if ((mi_row & 0x07) > 0)
has_tr = 0;
+ // The left had of two vertical rectangles always has a top right (as the
+ // block above will have been decoded)
if (xd->n8_w < xd->n8_h)
if (!xd->is_sec_rect)
has_tr = 1;
+ // The bottom of two horizontal rectangles never has a top right (as the block
+ // to the right won't have been decoded)
if (xd->n8_w > xd->n8_h)
if (xd->is_sec_rect)
has_tr = 0;
+#if CONFIG_EXT_PARTITION_TYPES
+ // The bottom left square of a Vertical A does not have a top right as it is
+ // decoded before the right hand rectangle of the partition
+ if (xd->mi[0]->mbmi.partition == PARTITION_VERT_A)
+ if ((mi_row & bs) && !(mi_col & bs))
+ has_tr = 0;
+#endif // CONFIG_EXT_PARTITION_TYPES
+
return has_tr;
}
@@ -749,6 +769,10 @@
void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd,
int block, int ref, int mi_row, int mi_col,
+#if CONFIG_REF_MV
+ CANDIDATE_MV *ref_mv_stack,
+ uint8_t *ref_mv_count,
+#endif
#if CONFIG_EXT_INTER
int_mv *mv_list,
#endif // CONFIG_EXT_INTER
@@ -760,11 +784,11 @@
b_mode_info *bmi = mi->bmi;
int n;
#if CONFIG_REF_MV
- CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE];
CANDIDATE_MV tmp_mv;
- uint8_t ref_mv_count = 0, idx;
+ uint8_t idx;
uint8_t above_count = 0, left_count = 0;
MV_REFERENCE_FRAME rf[2] = { mi->mbmi.ref_frame[ref], NONE };
+ *ref_mv_count = 0;
#endif
assert(MAX_MV_REF_CANDIDATES == 2);
@@ -774,12 +798,12 @@
#if CONFIG_REF_MV
scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf,
- -1, 0, ref_mv_stack, &ref_mv_count);
- above_count = ref_mv_count;
+ -1, 0, ref_mv_stack, ref_mv_count);
+ above_count = *ref_mv_count;
scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf,
- 0, -1, ref_mv_stack, &ref_mv_count);
- left_count = ref_mv_count - above_count;
+ 0, -1, ref_mv_stack, ref_mv_count);
+ left_count = *ref_mv_count - above_count;
if (above_count > 1 && left_count > 0) {
tmp_mv = ref_mv_stack[1];
@@ -787,7 +811,7 @@
ref_mv_stack[above_count] = tmp_mv;
}
- for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, ref_mv_count); ++idx) {
+ for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *ref_mv_count); ++idx) {
mv_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
clamp_mv_ref(&mv_list[idx].as_mv,
xd->n8_w << 3, xd->n8_h << 3, xd);
diff --git a/vp10/common/mvref_common.h b/vp10/common/mvref_common.h
index bc6d824..76530e9 100644
--- a/vp10/common/mvref_common.h
+++ b/vp10/common/mvref_common.h
@@ -289,16 +289,24 @@
static INLINE uint8_t vp10_drl_ctx(const CANDIDATE_MV *ref_mv_stack,
int ref_idx) {
if (ref_mv_stack[ref_idx].weight > REF_CAT_LEVEL &&
- ref_mv_stack[ref_idx + 1].weight > REF_CAT_LEVEL)
- return 0;
+ ref_mv_stack[ref_idx + 1].weight > REF_CAT_LEVEL) {
+ if (ref_mv_stack[ref_idx].weight == ref_mv_stack[ref_idx + 1].weight)
+ return 0;
+ else
+ return 1;
+ }
if (ref_mv_stack[ref_idx].weight > REF_CAT_LEVEL &&
ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
- return 1;
+ return 2;
if (ref_mv_stack[ref_idx].weight < REF_CAT_LEVEL &&
- ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
- return 2;
+ ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL) {
+ if (ref_mv_stack[ref_idx].weight == ref_mv_stack[ref_idx + 1].weight)
+ return 3;
+ else
+ return 4;
+ }
assert(0);
return 0;
@@ -327,6 +335,10 @@
void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd,
int block, int ref, int mi_row, int mi_col,
+#if CONFIG_REF_MV
+ CANDIDATE_MV *ref_mv_stack,
+ uint8_t *ref_mv_count,
+#endif
#if CONFIG_EXT_INTER
int_mv *mv_list,
#endif // CONFIG_EXT_INTER
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h
index bcc69f3..2dd09b5 100644
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h
@@ -313,7 +313,7 @@
BufferPool *buffer_pool;
PARTITION_CONTEXT *above_seg_context;
- ENTROPY_CONTEXT *above_context;
+ ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
#if CONFIG_VAR_TX
TXFM_CONTEXT *above_txfm_context;
TXFM_CONTEXT left_txfm_context[8];
@@ -405,9 +405,7 @@
for (i = 0; i < MAX_MB_PLANE; ++i) {
xd->plane[i].dqcoeff = dqcoeff;
- xd->above_context[i] = cm->above_context +
- i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols);
-
+ xd->above_context[i] = cm->above_context[i];
if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
} else {
@@ -501,6 +499,12 @@
PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col;
PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK);
+#if CONFIG_EXT_PARTITION_TYPES
+ const int bw = num_8x8_blocks_wide_lookup[bsize];
+ const int bh = num_8x8_blocks_high_lookup[bsize];
+ memset(above_ctx, partition_context_lookup[subsize].above, bw);
+ memset(left_ctx, partition_context_lookup[subsize].left, bh);
+#else
// num_4x4_blocks_wide_lookup[bsize] / 2
const int bs = num_8x8_blocks_wide_lookup[bsize];
@@ -509,8 +513,50 @@
// bits of smaller block sizes to be zero.
memset(above_ctx, partition_context_lookup[subsize].above, bs);
memset(left_ctx, partition_context_lookup[subsize].left, bs);
+#endif // CONFIG_EXT_PARTITION_TYPES
}
+#if CONFIG_EXT_PARTITION_TYPES
+static INLINE void update_ext_partition_context(MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ BLOCK_SIZE subsize,
+ BLOCK_SIZE bsize,
+ PARTITION_TYPE partition) {
+ if (bsize >= BLOCK_8X8) {
+ const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
+ BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+ switch (partition) {
+ case PARTITION_SPLIT:
+ if (bsize != BLOCK_8X8)
+ break;
+ case PARTITION_NONE:
+ case PARTITION_HORZ:
+ case PARTITION_VERT:
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+ break;
+ case PARTITION_HORZ_A:
+ update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
+ update_partition_context(xd, mi_row + hbs, mi_col, subsize, subsize);
+ break;
+ case PARTITION_HORZ_B:
+ update_partition_context(xd, mi_row, mi_col, subsize, subsize);
+ update_partition_context(xd, mi_row + hbs, mi_col, bsize2, subsize);
+ break;
+ case PARTITION_VERT_A:
+ update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
+ update_partition_context(xd, mi_row, mi_col + hbs, subsize, subsize);
+ break;
+ case PARTITION_VERT_B:
+ update_partition_context(xd, mi_row, mi_col, subsize, subsize);
+ update_partition_context(xd, mi_row, mi_col + hbs, bsize2, subsize);
+ break;
+ default:
+ assert(0 && "Invalid partition type");
+ }
+ }
+}
+#endif // CONFIG_EXT_PARTITION_TYPES
+
static INLINE int partition_plane_context(const MACROBLOCKD *xd,
int mi_row, int mi_col,
BLOCK_SIZE bsize) {
@@ -525,6 +571,27 @@
return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
}
+static INLINE void vp10_zero_above_context(VP10_COMMON *const cm,
+ int mi_col_start, int mi_col_end) {
+ const int width = mi_col_end - mi_col_start;
+ int i;
+
+ for (i = 0 ; i < MAX_MB_PLANE ; i++)
+ vp10_zero_array(cm->above_context[i] + 2 * mi_col_start, 2 * width);
+ vp10_zero_array(cm->above_seg_context + mi_col_start, width);
+#if CONFIG_VAR_TX
+ vp10_zero_array(cm->above_txfm_context + mi_col_start, width);
+#endif // CONFIG_VAR_TX
+}
+
+static INLINE void vp10_zero_left_context(MACROBLOCKD *const xd) {
+ vp10_zero(xd->left_context);
+ vp10_zero(xd->left_seg_context);
+#if CONFIG_VAR_TX
+ vp10_zero(xd->left_txfm_context_buffer);
+#endif
+}
+
#if CONFIG_VAR_TX
static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx,
TX_SIZE tx_size,
diff --git a/vp10/common/pred_common.h b/vp10/common/pred_common.h
index 83a3597..385a3e1 100644
--- a/vp10/common/pred_common.h
+++ b/vp10/common/pred_common.h
@@ -192,9 +192,9 @@
TX_SIZE max_tx_size, int ctx) {
const struct macroblockd_plane *const pd = &xd->plane[0];
const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
- int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
- (blk_col >> (1 - pd->subsampling_x));
- TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c
index 174ff80..5175389 100644
--- a/vp10/common/reconinter.c
+++ b/vp10/common/reconinter.c
@@ -1372,6 +1372,189 @@
}
} // each mi in the left column
}
+
+void vp10_build_prediction_by_above_preds(VP10_COMMON *cm,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]) {
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ int i, j, mi_step, ref;
+
+ if (mi_row == 0)
+ return;
+
+ for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
+ int mi_row_offset = -1;
+ int mi_col_offset = i;
+ int mi_x, mi_y, bw, bh;
+ MODE_INFO *above_mi = xd->mi[mi_col_offset +
+ mi_row_offset * xd->mi_stride];
+ MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
+
+ mi_step = VPXMIN(xd->n8_w,
+ num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
+
+ if (!is_neighbor_overlappable(above_mbmi))
+ continue;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ struct macroblockd_plane *const pd = &xd->plane[j];
+ setup_pred_plane(&pd->dst,
+ tmp_buf[j], tmp_stride[j],
+ 0, i, NULL,
+ pd->subsampling_x, pd->subsampling_y);
+ }
+ for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
+ MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
+ RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+
+ xd->block_refs[ref] = ref_buf;
+ if ((!vp10_is_valid_scale(&ref_buf->sf)))
+ vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+ "Reference frame has invalid dimensions");
+ vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i,
+ &ref_buf->sf);
+ }
+
+ xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
+ mi_x = (mi_col + i) << MI_SIZE_LOG2;
+ mi_y = mi_row << MI_SIZE_LOG2;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ const struct macroblockd_plane *pd = &xd->plane[j];
+ bw = (mi_step * 8) >> pd->subsampling_x;
+ bh = VPXMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y,
+ 4);
+
+ if (above_mbmi->sb_type < BLOCK_8X8) {
+ const PARTITION_TYPE bp = BLOCK_8X8 - above_mbmi->sb_type;
+ const int have_vsplit = bp != PARTITION_HORZ;
+ const int have_hsplit = bp != PARTITION_VERT;
+ const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+ const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+ const int pw = 8 >> (have_vsplit | pd->subsampling_x);
+ int x, y;
+
+ for (y = 0; y < num_4x4_h; ++y)
+ for (x = 0; x < num_4x4_w; ++x) {
+ if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT)
+ && y == 0 && !pd->subsampling_y)
+ continue;
+
+ build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
+ y * 2 + x, bw, bh,
+ 4 * x, 0, pw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+ 0, 0,
+#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
+ mi_x, mi_y);
+ }
+ } else {
+ build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
+ 0, bw, bh, 0, 0, bw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+ 0, 0,
+#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
+ mi_x, mi_y);
+ }
+ }
+ }
+ xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
+}
+
+void vp10_build_prediction_by_left_preds(VP10_COMMON *cm,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]) {
+ const TileInfo *const tile = &xd->tile;
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ int i, j, mi_step, ref;
+
+ if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start) ||
+ (mi_col - 1) >= tile->mi_col_end)
+ return;
+
+ for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
+ int mi_row_offset = i;
+ int mi_col_offset = -1;
+ int mi_x, mi_y, bw, bh;
+ MODE_INFO *left_mi = xd->mi[mi_col_offset +
+ mi_row_offset * xd->mi_stride];
+ MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
+ const int is_compound = has_second_ref(left_mbmi);
+
+ mi_step = VPXMIN(xd->n8_h,
+ num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
+
+ if (!is_neighbor_overlappable(left_mbmi))
+ continue;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ struct macroblockd_plane *const pd = &xd->plane[j];
+ setup_pred_plane(&pd->dst,
+ tmp_buf[j], tmp_stride[j],
+ i, 0, NULL,
+ pd->subsampling_x, pd->subsampling_y);
+ }
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
+ RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+
+ xd->block_refs[ref] = ref_buf;
+ if ((!vp10_is_valid_scale(&ref_buf->sf)))
+ vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+ "Reference frame has invalid dimensions");
+ vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col,
+ &ref_buf->sf);
+ }
+
+ xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
+ mi_x = mi_col << MI_SIZE_LOG2;
+ mi_y = (mi_row + i) << MI_SIZE_LOG2;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ const struct macroblockd_plane *pd = &xd->plane[j];
+ bw = VPXMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x,
+ 4);
+ bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
+
+ if (left_mbmi->sb_type < BLOCK_8X8) {
+ const PARTITION_TYPE bp = BLOCK_8X8 - left_mbmi->sb_type;
+ const int have_vsplit = bp != PARTITION_HORZ;
+ const int have_hsplit = bp != PARTITION_VERT;
+ const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+ const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+ const int ph = 8 >> (have_hsplit | pd->subsampling_y);
+ int x, y;
+
+ for (y = 0; y < num_4x4_h; ++y)
+ for (x = 0; x < num_4x4_w; ++x) {
+ if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT)
+ && x == 0 && !pd->subsampling_x)
+ continue;
+
+ build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
+ y * 2 + x, bw, bh,
+ 0, 4 * y, bw, ph,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+ 0, 0,
+#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
+ mi_x, mi_y);
+ }
+ } else {
+ build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0,
+ bw, bh, 0, 0, bw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+ 0, 0,
+#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
+ mi_x, mi_y);
+ }
+ }
+ }
+ xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
+}
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h
index c6e89df..4dcd203 100644
--- a/vp10/common/reconinter.h
+++ b/vp10/common/reconinter.h
@@ -430,6 +430,16 @@
int tmp_stride1[MAX_MB_PLANE],
uint8_t *tmp_buf2[MAX_MB_PLANE],
int tmp_stride2[MAX_MB_PLANE]);
+void vp10_build_prediction_by_above_preds(VP10_COMMON *cm,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]);
+void vp10_build_prediction_by_left_preds(VP10_COMMON *cm,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]);
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c
index e28f01c..10a66f8 100644
--- a/vp10/common/reconintra.c
+++ b/vp10/common/reconintra.c
@@ -99,9 +99,40 @@
orders_16x32, orders_32x16, orders_32x32,
orders_32x64, orders_64x32, orders_64x64,
};
+#if CONFIG_EXT_PARTITION_TYPES
+static const uint8_t orders_verta_32x32[4] = {
+ 0, 2,
+ 1, 2,
+};
+static const uint8_t orders_verta_16x16[16] = {
+ 0, 2, 4, 6,
+ 1, 2, 5, 6,
+ 8, 10, 12, 14,
+ 9, 10, 13, 14,
+};
+static const uint8_t orders_verta_8x8[64] = {
+ 0, 2, 4, 6, 16, 18, 20, 22,
+ 1, 2, 5, 6, 17, 18, 21, 22,
+ 8, 10, 12, 14, 24, 26, 28, 30,
+ 9, 10, 13, 14, 25, 26, 29, 30,
+ 32, 34, 36, 38, 48, 50, 52, 54,
+ 33, 34, 37, 38, 49, 50, 53, 54,
+ 40, 42, 44, 46, 56, 58, 60, 62,
+ 41, 42, 45, 46, 57, 58, 61, 62,
+};
+static const uint8_t *const orders_verta[BLOCK_SIZES] = {
+ orders_verta_8x8, orders_verta_8x8, orders_verta_8x8, orders_verta_8x8,
+ orders_8x16, orders_16x8, orders_verta_16x16,
+ orders_16x32, orders_32x16, orders_verta_32x32,
+ orders_32x64, orders_64x32, orders_64x64,
+};
+#endif // CONFIG_EXT_PARTITION_TYPES
static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
int right_available,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition,
+#endif
TX_SIZE txsz, int y, int x, int ss_x) {
const int wl = mi_width_log2_lookup[bsize];
const int w = VPXMAX(num_4x4_blocks_wide_lookup[bsize] >> ss_x, 1);
@@ -113,8 +144,14 @@
if (y == 0) {
const int hl = mi_height_log2_lookup[bsize];
- const uint8_t *order = orders[bsize];
+ const uint8_t *order;
int my_order, tr_order;
+#if CONFIG_EXT_PARTITION_TYPES
+ if (partition == PARTITION_VERT_A)
+ order = orders_verta[bsize];
+ else
+#endif // CONFIG_EXT_PARTITION_TYPES
+ order = orders[bsize];
if (x + step < w)
return 1;
@@ -122,9 +159,11 @@
mi_row = (mi_row & 7) >> hl;
mi_col = (mi_col & 7) >> wl;
+ // If top row of coding unit
if (mi_row == 0)
return right_available;
+ // If rightmost column of coding unit
if (((mi_col + 1) << wl) >= 8)
return 0;
@@ -1346,8 +1385,14 @@
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int right_available =
mi_col + (1 << mi_width_log2_lookup[bsize]) < xd->tile.mi_col_end;
+#if CONFIG_EXT_PARTITION_TYPES
+ const PARTITION_TYPE partition = xd->mi[0]->mbmi.partition;
+#endif
const int have_right = vp10_has_right(bsize, mi_row, mi_col,
right_available,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
tx_size, row_off, col_off,
pd->subsampling_x);
const int have_bottom = vp10_has_bottom(bsize, mi_row, mi_col,
diff --git a/vp10/common/scan.c b/vp10/common/scan.c
index 6dc5604..2644ecf 100644
--- a/vp10/common/scan.c
+++ b/vp10/common/scan.c
@@ -2882,13 +2882,10 @@
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
+ {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors},
+ {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
+ {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors},
{row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
{col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors},
}, { // TX_8X8
@@ -2902,13 +2899,10 @@
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
+ {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
+ {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
+ {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
{row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
{col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
}, { // TX_16X16
@@ -2930,22 +2924,12 @@
default_scan_16x16_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16,
default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
- {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
+ {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
+ {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
+ {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
+ {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
+ {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
+ {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
}, { // TX_32X32
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
@@ -2965,26 +2949,14 @@
qtr_scan_32x32_neighbors},
{qtr_scan_32x32, vp10_qtr_iscan_32x32,
qtr_scan_32x32_neighbors},
- {h2_scan_32x32, vp10_h2_iscan_32x32,
- h2_scan_32x32_neighbors},
- {v2_scan_32x32, vp10_v2_iscan_32x32,
- v2_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
- {h2_scan_32x32, vp10_h2_iscan_32x32,
- h2_scan_32x32_neighbors},
- {v2_scan_32x32, vp10_v2_iscan_32x32,
- v2_scan_32x32_neighbors},
+ {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors},
+ {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
+ {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors},
+ {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
+ {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors},
+ {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
}
};
@@ -3000,13 +2972,10 @@
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors},
+ {mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors},
+ {mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors},
+ {mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors},
{mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors},
{mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors},
}, { // TX_8X8
@@ -3020,13 +2989,10 @@
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors},
+ {mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors},
+ {mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors},
+ {mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors},
{mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors},
{mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors},
}, { // TX_16X16
@@ -3050,22 +3016,12 @@
default_scan_16x16_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16,
default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors},
- {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors},
+ {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors},
+ {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors},
+ {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors},
+ {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors},
+ {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors},
+ {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors},
}, { // TX_32X32
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
@@ -3085,24 +3041,14 @@
qtr_scan_32x32_neighbors},
{qtr_scan_32x32, vp10_qtr_iscan_32x32,
qtr_scan_32x32_neighbors},
- {h2_scan_32x32, vp10_h2_iscan_32x32,
- h2_scan_32x32_neighbors},
- {v2_scan_32x32, vp10_v2_iscan_32x32,
- v2_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
{mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors},
{mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
+ {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors},
+ {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
+ {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors},
+ {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
}
};
diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c
index f8bfc89..aebcb11 100644
--- a/vp10/common/thread_common.c
+++ b/vp10/common/thread_common.c
@@ -96,7 +96,9 @@
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
int mi_row, mi_col;
+#if !CONFIG_EXT_PARTITION_TYPES
enum lf_path path;
+ LOOP_FILTER_MASK lfm;
if (y_only)
path = LF_PATH_444;
else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
@@ -105,6 +107,7 @@
path = LF_PATH_444;
else
path = LF_PATH_SLOW;
+#endif // !CONFIG_EXT_PARTITION_TYPES
for (mi_row = start; mi_row < stop;
mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
@@ -113,13 +116,17 @@
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
const int r = mi_row >> MI_BLOCK_SIZE_LOG2;
const int c = mi_col >> MI_BLOCK_SIZE_LOG2;
- LOOP_FILTER_MASK lfm;
int plane;
sync_read(lf_sync, r, c);
vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
+#if CONFIG_EXT_PARTITION_TYPES
+ for (plane = 0; plane < num_planes; ++plane)
+ vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
+ mi_row, mi_col);
+#else
// TODO(JBB): Make setup_mask work for non 420.
vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
&lfm);
@@ -139,7 +146,7 @@
break;
}
}
-
+#endif // CONFIG_EXT_PARTITION_TYPES
sync_write(lf_sync, r, c, sb_cols);
}
}
@@ -331,7 +338,11 @@
cm->counts.uv_mode[i][j] += counts->uv_mode[i][j];
for (i = 0; i < PARTITION_CONTEXTS; i++)
+#if CONFIG_EXT_PARTITION_TYPES
+ for (j = 0; j < (i ? EXT_PARTITION_TYPES : PARTITION_TYPES); j++)
+#else
for (j = 0; j < PARTITION_TYPES; j++)
+#endif
cm->counts.partition[i][j] += counts->partition[i][j];
if (is_dec) {
@@ -387,11 +398,7 @@
for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
for (j = 0; j < 2; ++j)
- cm->counts.drl_mode0[i][j] += counts->drl_mode0[i][j];
-
- for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- for (j = 0; j < 2; ++j)
- cm->counts.drl_mode1[i][j] += counts->drl_mode1[i][j];
+ cm->counts.drl_mode[i][j] += counts->drl_mode[i][j];
#if CONFIG_EXT_INTER
for (j = 0; j < 2; ++j)
diff --git a/vp10/common/vp10_fwd_txfm1d.c b/vp10/common/vp10_fwd_txfm1d.c
index f3da5c9..ef24362 100644
--- a/vp10/common/vp10_fwd_txfm1d.c
+++ b/vp10/common/vp10_fwd_txfm1d.c
@@ -15,8 +15,8 @@
{ \
int i, j; \
for (i = 0; i < size; ++i) { \
- int buf_bit = get_max_bit(abs(buf[i])) + 1; \
- if (buf_bit > bit) { \
+ int buf_bit = get_max_bit(abs(buf[i])) + 1; \
+ if (buf_bit > bit) { \
printf("======== %s overflow ========\n", __func__); \
printf("stage: %d node: %d\n", stage, i); \
printf("bit: %d buf_bit: %d buf[i]: %d\n", bit, buf_bit, buf[i]); \
@@ -32,11 +32,11 @@
#else
#define range_check(stage, input, buf, size, bit) \
{ \
- (void) stage; \
- (void) input; \
- (void) buf; \
- (void) size; \
- (void) bit; \
+ (void)stage; \
+ (void)input; \
+ (void)buf; \
+ (void)size; \
+ (void)bit; \
}
#endif
@@ -1092,7 +1092,6 @@
bf1[14] = bf0[9];
bf1[15] = -bf0[1];
range_check(stage, input, bf1, size, stage_range[stage]);
-
}
void vp10_fadst32_new(const int32_t *input, int32_t *output,
@@ -1529,3 +1528,796 @@
bf1[31] = -bf0[1];
range_check(stage, input, bf1, size, stage_range[stage]);
}
+
+void vp10_fdct64_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 64;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[64];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf1 = output;
+ bf1[0] = input[0] + input[63];
+ bf1[1] = input[1] + input[62];
+ bf1[2] = input[2] + input[61];
+ bf1[3] = input[3] + input[60];
+ bf1[4] = input[4] + input[59];
+ bf1[5] = input[5] + input[58];
+ bf1[6] = input[6] + input[57];
+ bf1[7] = input[7] + input[56];
+ bf1[8] = input[8] + input[55];
+ bf1[9] = input[9] + input[54];
+ bf1[10] = input[10] + input[53];
+ bf1[11] = input[11] + input[52];
+ bf1[12] = input[12] + input[51];
+ bf1[13] = input[13] + input[50];
+ bf1[14] = input[14] + input[49];
+ bf1[15] = input[15] + input[48];
+ bf1[16] = input[16] + input[47];
+ bf1[17] = input[17] + input[46];
+ bf1[18] = input[18] + input[45];
+ bf1[19] = input[19] + input[44];
+ bf1[20] = input[20] + input[43];
+ bf1[21] = input[21] + input[42];
+ bf1[22] = input[22] + input[41];
+ bf1[23] = input[23] + input[40];
+ bf1[24] = input[24] + input[39];
+ bf1[25] = input[25] + input[38];
+ bf1[26] = input[26] + input[37];
+ bf1[27] = input[27] + input[36];
+ bf1[28] = input[28] + input[35];
+ bf1[29] = input[29] + input[34];
+ bf1[30] = input[30] + input[33];
+ bf1[31] = input[31] + input[32];
+ bf1[32] = -input[32] + input[31];
+ bf1[33] = -input[33] + input[30];
+ bf1[34] = -input[34] + input[29];
+ bf1[35] = -input[35] + input[28];
+ bf1[36] = -input[36] + input[27];
+ bf1[37] = -input[37] + input[26];
+ bf1[38] = -input[38] + input[25];
+ bf1[39] = -input[39] + input[24];
+ bf1[40] = -input[40] + input[23];
+ bf1[41] = -input[41] + input[22];
+ bf1[42] = -input[42] + input[21];
+ bf1[43] = -input[43] + input[20];
+ bf1[44] = -input[44] + input[19];
+ bf1[45] = -input[45] + input[18];
+ bf1[46] = -input[46] + input[17];
+ bf1[47] = -input[47] + input[16];
+ bf1[48] = -input[48] + input[15];
+ bf1[49] = -input[49] + input[14];
+ bf1[50] = -input[50] + input[13];
+ bf1[51] = -input[51] + input[12];
+ bf1[52] = -input[52] + input[11];
+ bf1[53] = -input[53] + input[10];
+ bf1[54] = -input[54] + input[9];
+ bf1[55] = -input[55] + input[8];
+ bf1[56] = -input[56] + input[7];
+ bf1[57] = -input[57] + input[6];
+ bf1[58] = -input[58] + input[5];
+ bf1[59] = -input[59] + input[4];
+ bf1[60] = -input[60] + input[3];
+ bf1[61] = -input[61] + input[2];
+ bf1[62] = -input[62] + input[1];
+ bf1[63] = -input[63] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[31];
+ bf1[1] = bf0[1] + bf0[30];
+ bf1[2] = bf0[2] + bf0[29];
+ bf1[3] = bf0[3] + bf0[28];
+ bf1[4] = bf0[4] + bf0[27];
+ bf1[5] = bf0[5] + bf0[26];
+ bf1[6] = bf0[6] + bf0[25];
+ bf1[7] = bf0[7] + bf0[24];
+ bf1[8] = bf0[8] + bf0[23];
+ bf1[9] = bf0[9] + bf0[22];
+ bf1[10] = bf0[10] + bf0[21];
+ bf1[11] = bf0[11] + bf0[20];
+ bf1[12] = bf0[12] + bf0[19];
+ bf1[13] = bf0[13] + bf0[18];
+ bf1[14] = bf0[14] + bf0[17];
+ bf1[15] = bf0[15] + bf0[16];
+ bf1[16] = -bf0[16] + bf0[15];
+ bf1[17] = -bf0[17] + bf0[14];
+ bf1[18] = -bf0[18] + bf0[13];
+ bf1[19] = -bf0[19] + bf0[12];
+ bf1[20] = -bf0[20] + bf0[11];
+ bf1[21] = -bf0[21] + bf0[10];
+ bf1[22] = -bf0[22] + bf0[9];
+ bf1[23] = -bf0[23] + bf0[8];
+ bf1[24] = -bf0[24] + bf0[7];
+ bf1[25] = -bf0[25] + bf0[6];
+ bf1[26] = -bf0[26] + bf0[5];
+ bf1[27] = -bf0[27] + bf0[4];
+ bf1[28] = -bf0[28] + bf0[3];
+ bf1[29] = -bf0[29] + bf0[2];
+ bf1[30] = -bf0[30] + bf0[1];
+ bf1[31] = -bf0[31] + bf0[0];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[34];
+ bf1[35] = bf0[35];
+ bf1[36] = bf0[36];
+ bf1[37] = bf0[37];
+ bf1[38] = bf0[38];
+ bf1[39] = bf0[39];
+ bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
+ bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
+ bf1[48] = half_btf(cospi[32], bf0[48], cospi[32], bf0[47], cos_bit[stage]);
+ bf1[49] = half_btf(cospi[32], bf0[49], cospi[32], bf0[46], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[32], bf0[50], cospi[32], bf0[45], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[32], bf0[51], cospi[32], bf0[44], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[32], bf0[52], cospi[32], bf0[43], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[32], bf0[53], cospi[32], bf0[42], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[32], bf0[54], cospi[32], bf0[41], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[32], bf0[55], cospi[32], bf0[40], cos_bit[stage]);
+ bf1[56] = bf0[56];
+ bf1[57] = bf0[57];
+ bf1[58] = bf0[58];
+ bf1[59] = bf0[59];
+ bf1[60] = bf0[60];
+ bf1[61] = bf0[61];
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = -bf0[8] + bf0[7];
+ bf1[9] = -bf0[9] + bf0[6];
+ bf1[10] = -bf0[10] + bf0[5];
+ bf1[11] = -bf0[11] + bf0[4];
+ bf1[12] = -bf0[12] + bf0[3];
+ bf1[13] = -bf0[13] + bf0[2];
+ bf1[14] = -bf0[14] + bf0[1];
+ bf1[15] = -bf0[15] + bf0[0];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[47];
+ bf1[33] = bf0[33] + bf0[46];
+ bf1[34] = bf0[34] + bf0[45];
+ bf1[35] = bf0[35] + bf0[44];
+ bf1[36] = bf0[36] + bf0[43];
+ bf1[37] = bf0[37] + bf0[42];
+ bf1[38] = bf0[38] + bf0[41];
+ bf1[39] = bf0[39] + bf0[40];
+ bf1[40] = -bf0[40] + bf0[39];
+ bf1[41] = -bf0[41] + bf0[38];
+ bf1[42] = -bf0[42] + bf0[37];
+ bf1[43] = -bf0[43] + bf0[36];
+ bf1[44] = -bf0[44] + bf0[35];
+ bf1[45] = -bf0[45] + bf0[34];
+ bf1[46] = -bf0[46] + bf0[33];
+ bf1[47] = -bf0[47] + bf0[32];
+ bf1[48] = -bf0[48] + bf0[63];
+ bf1[49] = -bf0[49] + bf0[62];
+ bf1[50] = -bf0[50] + bf0[61];
+ bf1[51] = -bf0[51] + bf0[60];
+ bf1[52] = -bf0[52] + bf0[59];
+ bf1[53] = -bf0[53] + bf0[58];
+ bf1[54] = -bf0[54] + bf0[57];
+ bf1[55] = -bf0[55] + bf0[56];
+ bf1[56] = bf0[56] + bf0[55];
+ bf1[57] = bf0[57] + bf0[54];
+ bf1[58] = bf0[58] + bf0[53];
+ bf1[59] = bf0[59] + bf0[52];
+ bf1[60] = bf0[60] + bf0[51];
+ bf1[61] = bf0[61] + bf0[50];
+ bf1[62] = bf0[62] + bf0[49];
+ bf1[63] = bf0[63] + bf0[48];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = -bf0[4] + bf0[3];
+ bf1[5] = -bf0[5] + bf0[2];
+ bf1[6] = -bf0[6] + bf0[1];
+ bf1[7] = -bf0[7] + bf0[0];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[23];
+ bf1[17] = bf0[17] + bf0[22];
+ bf1[18] = bf0[18] + bf0[21];
+ bf1[19] = bf0[19] + bf0[20];
+ bf1[20] = -bf0[20] + bf0[19];
+ bf1[21] = -bf0[21] + bf0[18];
+ bf1[22] = -bf0[22] + bf0[17];
+ bf1[23] = -bf0[23] + bf0[16];
+ bf1[24] = -bf0[24] + bf0[31];
+ bf1[25] = -bf0[25] + bf0[30];
+ bf1[26] = -bf0[26] + bf0[29];
+ bf1[27] = -bf0[27] + bf0[28];
+ bf1[28] = bf0[28] + bf0[27];
+ bf1[29] = bf0[29] + bf0[26];
+ bf1[30] = bf0[30] + bf0[25];
+ bf1[31] = bf0[31] + bf0[24];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[34];
+ bf1[35] = bf0[35];
+ bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit[stage]);
+ bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit[stage]);
+ bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit[stage]);
+ bf1[44] = bf0[44];
+ bf1[45] = bf0[45];
+ bf1[46] = bf0[46];
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = bf0[49];
+ bf1[50] = bf0[50];
+ bf1[51] = bf0[51];
+ bf1[52] = half_btf(cospi[48], bf0[52], -cospi[16], bf0[43], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[48], bf0[53], -cospi[16], bf0[42], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[48], bf0[54], -cospi[16], bf0[41], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[48], bf0[55], -cospi[16], bf0[40], cos_bit[stage]);
+ bf1[56] = half_btf(cospi[16], bf0[56], cospi[48], bf0[39], cos_bit[stage]);
+ bf1[57] = half_btf(cospi[16], bf0[57], cospi[48], bf0[38], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[16], bf0[58], cospi[48], bf0[37], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[16], bf0[59], cospi[48], bf0[36], cos_bit[stage]);
+ bf1[60] = bf0[60];
+ bf1[61] = bf0[61];
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = -bf0[2] + bf0[1];
+ bf1[3] = -bf0[3] + bf0[0];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = -bf0[10] + bf0[9];
+ bf1[11] = -bf0[11] + bf0[8];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[14] + bf0[13];
+ bf1[15] = bf0[15] + bf0[12];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[39];
+ bf1[33] = bf0[33] + bf0[38];
+ bf1[34] = bf0[34] + bf0[37];
+ bf1[35] = bf0[35] + bf0[36];
+ bf1[36] = -bf0[36] + bf0[35];
+ bf1[37] = -bf0[37] + bf0[34];
+ bf1[38] = -bf0[38] + bf0[33];
+ bf1[39] = -bf0[39] + bf0[32];
+ bf1[40] = -bf0[40] + bf0[47];
+ bf1[41] = -bf0[41] + bf0[46];
+ bf1[42] = -bf0[42] + bf0[45];
+ bf1[43] = -bf0[43] + bf0[44];
+ bf1[44] = bf0[44] + bf0[43];
+ bf1[45] = bf0[45] + bf0[42];
+ bf1[46] = bf0[46] + bf0[41];
+ bf1[47] = bf0[47] + bf0[40];
+ bf1[48] = bf0[48] + bf0[55];
+ bf1[49] = bf0[49] + bf0[54];
+ bf1[50] = bf0[50] + bf0[53];
+ bf1[51] = bf0[51] + bf0[52];
+ bf1[52] = -bf0[52] + bf0[51];
+ bf1[53] = -bf0[53] + bf0[50];
+ bf1[54] = -bf0[54] + bf0[49];
+ bf1[55] = -bf0[55] + bf0[48];
+ bf1[56] = -bf0[56] + bf0[63];
+ bf1[57] = -bf0[57] + bf0[62];
+ bf1[58] = -bf0[58] + bf0[61];
+ bf1[59] = -bf0[59] + bf0[60];
+ bf1[60] = bf0[60] + bf0[59];
+ bf1[61] = bf0[61] + bf0[58];
+ bf1[62] = bf0[62] + bf0[57];
+ bf1[63] = bf0[63] + bf0[56];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = -bf0[5] + bf0[4];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[7] + bf0[6];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[19];
+ bf1[17] = bf0[17] + bf0[18];
+ bf1[18] = -bf0[18] + bf0[17];
+ bf1[19] = -bf0[19] + bf0[16];
+ bf1[20] = -bf0[20] + bf0[23];
+ bf1[21] = -bf0[21] + bf0[22];
+ bf1[22] = bf0[22] + bf0[21];
+ bf1[23] = bf0[23] + bf0[20];
+ bf1[24] = bf0[24] + bf0[27];
+ bf1[25] = bf0[25] + bf0[26];
+ bf1[26] = -bf0[26] + bf0[25];
+ bf1[27] = -bf0[27] + bf0[24];
+ bf1[28] = -bf0[28] + bf0[31];
+ bf1[29] = -bf0[29] + bf0[30];
+ bf1[30] = bf0[30] + bf0[29];
+ bf1[31] = bf0[31] + bf0[28];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit[stage]);
+ bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit[stage]);
+ bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit[stage]);
+ bf1[38] = bf0[38];
+ bf1[39] = bf0[39];
+ bf1[40] = bf0[40];
+ bf1[41] = bf0[41];
+ bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit[stage]);
+ bf1[46] = bf0[46];
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = bf0[49];
+ bf1[50] = half_btf(cospi[24], bf0[50], -cospi[40], bf0[45], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[24], bf0[51], -cospi[40], bf0[44], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[40], bf0[52], cospi[24], bf0[43], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[40], bf0[53], cospi[24], bf0[42], cos_bit[stage]);
+ bf1[54] = bf0[54];
+ bf1[55] = bf0[55];
+ bf1[56] = bf0[56];
+ bf1[57] = bf0[57];
+ bf1[58] = half_btf(cospi[56], bf0[58], -cospi[8], bf0[37], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[56], bf0[59], -cospi[8], bf0[36], cos_bit[stage]);
+ bf1[60] = half_btf(cospi[8], bf0[60], cospi[56], bf0[35], cos_bit[stage]);
+ bf1[61] = half_btf(cospi[8], bf0[61], cospi[56], bf0[34], cos_bit[stage]);
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = -bf0[9] + bf0[8];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[11] + bf0[10];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = -bf0[13] + bf0[12];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[15] + bf0[14];
+ bf1[16] = bf0[16];
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[35];
+ bf1[33] = bf0[33] + bf0[34];
+ bf1[34] = -bf0[34] + bf0[33];
+ bf1[35] = -bf0[35] + bf0[32];
+ bf1[36] = -bf0[36] + bf0[39];
+ bf1[37] = -bf0[37] + bf0[38];
+ bf1[38] = bf0[38] + bf0[37];
+ bf1[39] = bf0[39] + bf0[36];
+ bf1[40] = bf0[40] + bf0[43];
+ bf1[41] = bf0[41] + bf0[42];
+ bf1[42] = -bf0[42] + bf0[41];
+ bf1[43] = -bf0[43] + bf0[40];
+ bf1[44] = -bf0[44] + bf0[47];
+ bf1[45] = -bf0[45] + bf0[46];
+ bf1[46] = bf0[46] + bf0[45];
+ bf1[47] = bf0[47] + bf0[44];
+ bf1[48] = bf0[48] + bf0[51];
+ bf1[49] = bf0[49] + bf0[50];
+ bf1[50] = -bf0[50] + bf0[49];
+ bf1[51] = -bf0[51] + bf0[48];
+ bf1[52] = -bf0[52] + bf0[55];
+ bf1[53] = -bf0[53] + bf0[54];
+ bf1[54] = bf0[54] + bf0[53];
+ bf1[55] = bf0[55] + bf0[52];
+ bf1[56] = bf0[56] + bf0[59];
+ bf1[57] = bf0[57] + bf0[58];
+ bf1[58] = -bf0[58] + bf0[57];
+ bf1[59] = -bf0[59] + bf0[56];
+ bf1[60] = -bf0[60] + bf0[63];
+ bf1[61] = -bf0[61] + bf0[62];
+ bf1[62] = bf0[62] + bf0[61];
+ bf1[63] = bf0[63] + bf0[60];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
+ bf1[16] = bf0[16] + bf0[17];
+ bf1[17] = -bf0[17] + bf0[16];
+ bf1[18] = -bf0[18] + bf0[19];
+ bf1[19] = bf0[19] + bf0[18];
+ bf1[20] = bf0[20] + bf0[21];
+ bf1[21] = -bf0[21] + bf0[20];
+ bf1[22] = -bf0[22] + bf0[23];
+ bf1[23] = bf0[23] + bf0[22];
+ bf1[24] = bf0[24] + bf0[25];
+ bf1[25] = -bf0[25] + bf0[24];
+ bf1[26] = -bf0[26] + bf0[27];
+ bf1[27] = bf0[27] + bf0[26];
+ bf1[28] = bf0[28] + bf0[29];
+ bf1[29] = -bf0[29] + bf0[28];
+ bf1[30] = -bf0[30] + bf0[31];
+ bf1[31] = bf0[31] + bf0[30];
+ bf1[32] = bf0[32];
+ bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit[stage]);
+ bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit[stage]);
+ bf1[35] = bf0[35];
+ bf1[36] = bf0[36];
+ bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit[stage]);
+ bf1[39] = bf0[39];
+ bf1[40] = bf0[40];
+ bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit[stage]);
+ bf1[43] = bf0[43];
+ bf1[44] = bf0[44];
+ bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit[stage]);
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = half_btf(cospi[12], bf0[49], -cospi[52], bf0[46], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[52], bf0[50], cospi[12], bf0[45], cos_bit[stage]);
+ bf1[51] = bf0[51];
+ bf1[52] = bf0[52];
+ bf1[53] = half_btf(cospi[44], bf0[53], -cospi[20], bf0[42], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[20], bf0[54], cospi[44], bf0[41], cos_bit[stage]);
+ bf1[55] = bf0[55];
+ bf1[56] = bf0[56];
+ bf1[57] = half_btf(cospi[28], bf0[57], -cospi[36], bf0[38], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[36], bf0[58], cospi[28], bf0[37], cos_bit[stage]);
+ bf1[59] = bf0[59];
+ bf1[60] = bf0[60];
+ bf1[61] = half_btf(cospi[60], bf0[61], -cospi[4], bf0[34], cos_bit[stage]);
+ bf1[62] = half_btf(cospi[4], bf0[62], cospi[60], bf0[33], cos_bit[stage]);
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
+ bf1[32] = bf0[32] + bf0[33];
+ bf1[33] = -bf0[33] + bf0[32];
+ bf1[34] = -bf0[34] + bf0[35];
+ bf1[35] = bf0[35] + bf0[34];
+ bf1[36] = bf0[36] + bf0[37];
+ bf1[37] = -bf0[37] + bf0[36];
+ bf1[38] = -bf0[38] + bf0[39];
+ bf1[39] = bf0[39] + bf0[38];
+ bf1[40] = bf0[40] + bf0[41];
+ bf1[41] = -bf0[41] + bf0[40];
+ bf1[42] = -bf0[42] + bf0[43];
+ bf1[43] = bf0[43] + bf0[42];
+ bf1[44] = bf0[44] + bf0[45];
+ bf1[45] = -bf0[45] + bf0[44];
+ bf1[46] = -bf0[46] + bf0[47];
+ bf1[47] = bf0[47] + bf0[46];
+ bf1[48] = bf0[48] + bf0[49];
+ bf1[49] = -bf0[49] + bf0[48];
+ bf1[50] = -bf0[50] + bf0[51];
+ bf1[51] = bf0[51] + bf0[50];
+ bf1[52] = bf0[52] + bf0[53];
+ bf1[53] = -bf0[53] + bf0[52];
+ bf1[54] = -bf0[54] + bf0[55];
+ bf1[55] = bf0[55] + bf0[54];
+ bf1[56] = bf0[56] + bf0[57];
+ bf1[57] = -bf0[57] + bf0[56];
+ bf1[58] = -bf0[58] + bf0[59];
+ bf1[59] = bf0[59] + bf0[58];
+ bf1[60] = bf0[60] + bf0[61];
+ bf1[61] = -bf0[61] + bf0[60];
+ bf1[62] = -bf0[62] + bf0[63];
+ bf1[63] = bf0[63] + bf0[62];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 10
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = bf0[26];
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = half_btf(cospi[63], bf0[32], cospi[1], bf0[63], cos_bit[stage]);
+ bf1[33] = half_btf(cospi[31], bf0[33], cospi[33], bf0[62], cos_bit[stage]);
+ bf1[34] = half_btf(cospi[47], bf0[34], cospi[17], bf0[61], cos_bit[stage]);
+ bf1[35] = half_btf(cospi[15], bf0[35], cospi[49], bf0[60], cos_bit[stage]);
+ bf1[36] = half_btf(cospi[55], bf0[36], cospi[9], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(cospi[23], bf0[37], cospi[41], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(cospi[39], bf0[38], cospi[25], bf0[57], cos_bit[stage]);
+ bf1[39] = half_btf(cospi[7], bf0[39], cospi[57], bf0[56], cos_bit[stage]);
+ bf1[40] = half_btf(cospi[59], bf0[40], cospi[5], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(cospi[27], bf0[41], cospi[37], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(cospi[43], bf0[42], cospi[21], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(cospi[11], bf0[43], cospi[53], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(cospi[51], bf0[44], cospi[13], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(cospi[19], bf0[45], cospi[45], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(cospi[35], bf0[46], cospi[29], bf0[49], cos_bit[stage]);
+ bf1[47] = half_btf(cospi[3], bf0[47], cospi[61], bf0[48], cos_bit[stage]);
+ bf1[48] = half_btf(cospi[3], bf0[48], -cospi[61], bf0[47], cos_bit[stage]);
+ bf1[49] = half_btf(cospi[35], bf0[49], -cospi[29], bf0[46], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[19], bf0[50], -cospi[45], bf0[45], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[51], bf0[51], -cospi[13], bf0[44], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[11], bf0[52], -cospi[53], bf0[43], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[43], bf0[53], -cospi[21], bf0[42], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[27], bf0[54], -cospi[37], bf0[41], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[59], bf0[55], -cospi[5], bf0[40], cos_bit[stage]);
+ bf1[56] = half_btf(cospi[7], bf0[56], -cospi[57], bf0[39], cos_bit[stage]);
+ bf1[57] = half_btf(cospi[39], bf0[57], -cospi[25], bf0[38], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[23], bf0[58], -cospi[41], bf0[37], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[55], bf0[59], -cospi[9], bf0[36], cos_bit[stage]);
+ bf1[60] = half_btf(cospi[15], bf0[60], -cospi[49], bf0[35], cos_bit[stage]);
+ bf1[61] = half_btf(cospi[47], bf0[61], -cospi[17], bf0[34], cos_bit[stage]);
+ bf1[62] = half_btf(cospi[31], bf0[62], -cospi[33], bf0[33], cos_bit[stage]);
+ bf1[63] = half_btf(cospi[63], bf0[63], -cospi[1], bf0[32], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 11
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[32];
+ bf1[2] = bf0[16];
+ bf1[3] = bf0[48];
+ bf1[4] = bf0[8];
+ bf1[5] = bf0[40];
+ bf1[6] = bf0[24];
+ bf1[7] = bf0[56];
+ bf1[8] = bf0[4];
+ bf1[9] = bf0[36];
+ bf1[10] = bf0[20];
+ bf1[11] = bf0[52];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[44];
+ bf1[14] = bf0[28];
+ bf1[15] = bf0[60];
+ bf1[16] = bf0[2];
+ bf1[17] = bf0[34];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[50];
+ bf1[20] = bf0[10];
+ bf1[21] = bf0[42];
+ bf1[22] = bf0[26];
+ bf1[23] = bf0[58];
+ bf1[24] = bf0[6];
+ bf1[25] = bf0[38];
+ bf1[26] = bf0[22];
+ bf1[27] = bf0[54];
+ bf1[28] = bf0[14];
+ bf1[29] = bf0[46];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[62];
+ bf1[32] = bf0[1];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[17];
+ bf1[35] = bf0[49];
+ bf1[36] = bf0[9];
+ bf1[37] = bf0[41];
+ bf1[38] = bf0[25];
+ bf1[39] = bf0[57];
+ bf1[40] = bf0[5];
+ bf1[41] = bf0[37];
+ bf1[42] = bf0[21];
+ bf1[43] = bf0[53];
+ bf1[44] = bf0[13];
+ bf1[45] = bf0[45];
+ bf1[46] = bf0[29];
+ bf1[47] = bf0[61];
+ bf1[48] = bf0[3];
+ bf1[49] = bf0[35];
+ bf1[50] = bf0[19];
+ bf1[51] = bf0[51];
+ bf1[52] = bf0[11];
+ bf1[53] = bf0[43];
+ bf1[54] = bf0[27];
+ bf1[55] = bf0[59];
+ bf1[56] = bf0[7];
+ bf1[57] = bf0[39];
+ bf1[58] = bf0[23];
+ bf1[59] = bf0[55];
+ bf1[60] = bf0[15];
+ bf1[61] = bf0[47];
+ bf1[62] = bf0[31];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
diff --git a/vp10/common/vp10_fwd_txfm1d.h b/vp10/common/vp10_fwd_txfm1d.h
index d5b9f40..d06e305 100644
--- a/vp10/common/vp10_fwd_txfm1d.h
+++ b/vp10/common/vp10_fwd_txfm1d.h
@@ -25,6 +25,8 @@
const int8_t *cos_bit, const int8_t *stage_range);
void vp10_fdct32_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct64_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
void vp10_fadst4_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
diff --git a/vp10/common/vp10_fwd_txfm2d.c b/vp10/common/vp10_fwd_txfm2d.c
index 67449ec..32214ae 100644
--- a/vp10/common/vp10_fwd_txfm2d.c
+++ b/vp10/common/vp10_fwd_txfm2d.c
@@ -9,8 +9,47 @@
*/
#include "vp10/common/vp10_txfm.h"
+#include "vp10/common/vp10_fwd_txfm1d.h"
-static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
+typedef void (*TxfmFunc)(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+static inline TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
+ switch (txfm_type) {
+ case TXFM_TYPE_DCT4:
+ return vp10_fdct4_new;
+ break;
+ case TXFM_TYPE_DCT8:
+ return vp10_fdct8_new;
+ break;
+ case TXFM_TYPE_DCT16:
+ return vp10_fdct16_new;
+ break;
+ case TXFM_TYPE_DCT32:
+ return vp10_fdct32_new;
+ break;
+ case TXFM_TYPE_DCT64:
+ return vp10_fdct64_new;
+ break;
+ case TXFM_TYPE_ADST4:
+ return vp10_fadst4_new;
+ break;
+ case TXFM_TYPE_ADST8:
+ return vp10_fadst8_new;
+ break;
+ case TXFM_TYPE_ADST16:
+ return vp10_fadst16_new;
+ break;
+ case TXFM_TYPE_ADST32:
+ return vp10_fadst32_new;
+ break;
+ default:
+ assert(0);
+ return NULL;
+ }
+}
+
+static inline void fwd_txfm2d_c(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
int32_t *txfm_buf) {
int i, j;
@@ -20,8 +59,8 @@
const int8_t *stage_range_row = cfg->stage_range_row;
const int8_t *cos_bit_col = cfg->cos_bit_col;
const int8_t *cos_bit_row = cfg->cos_bit_row;
- const TxfmFunc txfm_func_col = cfg->txfm_func_col;
- const TxfmFunc txfm_func_row = cfg->txfm_func_row;
+ const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
+ const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
// txfm_buf's length is txfm_size * txfm_size + 2 * txfm_size
// it is used for intermediate data buffering
@@ -51,7 +90,7 @@
}
}
-void vp10_fwd_txfm2d_4x4(const int16_t *input, int32_t *output,
+void vp10_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int bd) {
int txfm_buf[4 * 4 + 4 + 4];
@@ -59,7 +98,7 @@
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
-void vp10_fwd_txfm2d_8x8(const int16_t *input, int32_t *output,
+void vp10_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int bd) {
int txfm_buf[8 * 8 + 8 + 8];
@@ -67,7 +106,7 @@
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
-void vp10_fwd_txfm2d_16x16(const int16_t *input, int32_t *output,
+void vp10_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int bd) {
int txfm_buf[16 * 16 + 16 + 16];
@@ -75,10 +114,18 @@
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
-void vp10_fwd_txfm2d_32x32(const int16_t *input, int32_t *output,
+void vp10_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int bd) {
int txfm_buf[32 * 32 + 32 + 32];
(void)bd;
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
+
+void vp10_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[64 * 64 + 64 + 64];
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+}
diff --git a/vp10/common/vp10_fwd_txfm2d.h b/vp10/common/vp10_fwd_txfm2d.h
deleted file mode 100644
index 64e6f56..0000000
--- a/vp10/common/vp10_fwd_txfm2d.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP10_FWD_TXFM2D_H_
-#define VP10_FWD_TXFM2D_H_
-
-#include "vp10/common/vp10_txfm.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-void vp10_fwd_txfm2d_4x4(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- const int bd);
-void vp10_fwd_txfm2d_8x8(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- const int bd);
-void vp10_fwd_txfm2d_16x16(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- const int bd);
-void vp10_fwd_txfm2d_32x32(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- const int bd);
-#ifdef __cplusplus
-}
-#endif
-#endif // VP10_FWD_TXFM2D_H_
diff --git a/vp10/common/vp10_fwd_txfm2d_cfg.h b/vp10/common/vp10_fwd_txfm2d_cfg.h
index 5c2b4ca..3c0a906 100644
--- a/vp10/common/vp10_fwd_txfm2d_cfg.h
+++ b/vp10/common/vp10_fwd_txfm2d_cfg.h
@@ -27,8 +27,8 @@
fwd_stage_range_row_dct_dct_4, // .stage_range_row
fwd_cos_bit_col_dct_dct_4, // .cos_bit_col
fwd_cos_bit_row_dct_dct_4, // .cos_bit_row
- vp10_fdct4_new, // .txfm_func_col
- vp10_fdct4_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT4, // .txfm_type_col
+ TXFM_TYPE_DCT4}; // .txfm_type_row
// ---------------- config fwd_dct_dct_8 ----------------
static const int8_t fwd_shift_dct_dct_8[3] = {5, -3, -1};
@@ -46,8 +46,8 @@
fwd_stage_range_row_dct_dct_8, // .stage_range_row
fwd_cos_bit_col_dct_dct_8, // .cos_bit_col
fwd_cos_bit_row_dct_dct_8, // .cos_bit_row
- vp10_fdct8_new, // .txfm_func_col
- vp10_fdct8_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT8, // .txfm_type_col
+ TXFM_TYPE_DCT8}; // .txfm_type_row
// ---------------- config fwd_dct_dct_16 ----------------
static const int8_t fwd_shift_dct_dct_16[3] = {4, -3, -1};
@@ -69,8 +69,8 @@
fwd_stage_range_row_dct_dct_16, // .stage_range_row
fwd_cos_bit_col_dct_dct_16, // .cos_bit_col
fwd_cos_bit_row_dct_dct_16, // .cos_bit_row
- vp10_fdct16_new, // .txfm_func_col
- vp10_fdct16_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT16, // .txfm_type_col
+ TXFM_TYPE_DCT16}; // .txfm_type_row
// ---------------- config fwd_dct_dct_32 ----------------
static const int8_t fwd_shift_dct_dct_32[3] = {3, -3, -1};
@@ -92,8 +92,31 @@
fwd_stage_range_row_dct_dct_32, // .stage_range_row
fwd_cos_bit_col_dct_dct_32, // .cos_bit_col
fwd_cos_bit_row_dct_dct_32, // .cos_bit_row
- vp10_fdct32_new, // .txfm_func_col
- vp10_fdct32_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT32, // .txfm_type_col
+ TXFM_TYPE_DCT32}; // .txfm_type_row
+
+// ---------------- config fwd_dct_dct_64 ----------------
+static const int8_t fwd_shift_dct_dct_64[3] = {2, -2, -2};
+static const int8_t fwd_stage_range_col_dct_dct_64[12] = {
+ 13, 14, 15, 16, 17, 18, 19, 19, 19, 19, 19, 19};
+static const int8_t fwd_stage_range_row_dct_dct_64[12] = {
+ 17, 18, 19, 20, 21, 22, 22, 22, 22, 22, 22, 22};
+static const int8_t fwd_cos_bit_col_dct_dct_64[12] = {15, 15, 15, 15, 15, 14,
+ 13, 13, 13, 13, 13, 13};
+static const int8_t fwd_cos_bit_row_dct_dct_64[12] = {15, 14, 13, 12, 11, 10,
+ 10, 10, 10, 10, 10, 10};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_64 = {
+ 64, // .txfm_size
+ 12, // .stage_num_col
+ 12, // .stage_num_row
+ fwd_shift_dct_dct_64, // .shift
+ fwd_stage_range_col_dct_dct_64, // .stage_range_col
+ fwd_stage_range_row_dct_dct_64, // .stage_range_row
+ fwd_cos_bit_col_dct_dct_64, // .cos_bit_col
+ fwd_cos_bit_row_dct_dct_64, // .cos_bit_row
+ TXFM_TYPE_DCT64, // .txfm_type_col
+ TXFM_TYPE_DCT64}; // .txfm_type_row
// ---------------- config fwd_dct_adst_4 ----------------
static const int8_t fwd_shift_dct_adst_4[3] = {5, -2, -1};
@@ -112,8 +135,8 @@
fwd_stage_range_row_dct_adst_4, // .stage_range_row
fwd_cos_bit_col_dct_adst_4, // .cos_bit_col
fwd_cos_bit_row_dct_adst_4, // .cos_bit_row
- vp10_fdct4_new, // .txfm_func_col
- vp10_fadst4_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT4, // .txfm_type_col
+ TXFM_TYPE_ADST4}; // .txfm_type_row
// ---------------- config fwd_dct_adst_8 ----------------
static const int8_t fwd_shift_dct_adst_8[3] = {7, -3, -3};
@@ -134,8 +157,8 @@
fwd_stage_range_row_dct_adst_8, // .stage_range_row
fwd_cos_bit_col_dct_adst_8, // .cos_bit_col
fwd_cos_bit_row_dct_adst_8, // .cos_bit_row
- vp10_fdct8_new, // .txfm_func_col
- vp10_fadst8_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT8, // .txfm_type_col
+ TXFM_TYPE_ADST8}; // .txfm_type_row
// ---------------- config fwd_dct_adst_16 ----------------
static const int8_t fwd_shift_dct_adst_16[3] = {4, -1, -3};
@@ -157,8 +180,8 @@
fwd_stage_range_row_dct_adst_16, // .stage_range_row
fwd_cos_bit_col_dct_adst_16, // .cos_bit_col
fwd_cos_bit_row_dct_adst_16, // .cos_bit_row
- vp10_fdct16_new, // .txfm_func_col
- vp10_fadst16_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT16, // .txfm_type_col
+ TXFM_TYPE_ADST16}; // .txfm_type_row
// ---------------- config fwd_dct_adst_32 ----------------
static const int8_t fwd_shift_dct_adst_32[3] = {3, -1, -3};
@@ -180,8 +203,8 @@
fwd_stage_range_row_dct_adst_32, // .stage_range_row
fwd_cos_bit_col_dct_adst_32, // .cos_bit_col
fwd_cos_bit_row_dct_adst_32, // .cos_bit_row
- vp10_fdct32_new, // .txfm_func_col
- vp10_fadst32_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT32, // .txfm_type_col
+ TXFM_TYPE_ADST32}; // .txfm_type_row
// ---------------- config fwd_adst_adst_4 ----------------
static const int8_t fwd_shift_adst_adst_4[3] = {6, 1, -5};
@@ -201,8 +224,8 @@
fwd_stage_range_row_adst_adst_4, // .stage_range_row
fwd_cos_bit_col_adst_adst_4, // .cos_bit_col
fwd_cos_bit_row_adst_adst_4, // .cos_bit_row
- vp10_fadst4_new, // .txfm_func_col
- vp10_fadst4_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST4, // .txfm_type_col
+ TXFM_TYPE_ADST4}; // .txfm_type_row
// ---------------- config fwd_adst_adst_8 ----------------
static const int8_t fwd_shift_adst_adst_8[3] = {3, -1, -1};
@@ -224,8 +247,8 @@
fwd_stage_range_row_adst_adst_8, // .stage_range_row
fwd_cos_bit_col_adst_adst_8, // .cos_bit_col
fwd_cos_bit_row_adst_adst_8, // .cos_bit_row
- vp10_fadst8_new, // .txfm_func_col
- vp10_fadst8_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST8, // .txfm_type_col
+ TXFM_TYPE_ADST8}; // .txfm_type_row
// ---------------- config fwd_adst_adst_16 ----------------
static const int8_t fwd_shift_adst_adst_16[3] = {2, 0, -2};
@@ -247,8 +270,8 @@
fwd_stage_range_row_adst_adst_16, // .stage_range_row
fwd_cos_bit_col_adst_adst_16, // .cos_bit_col
fwd_cos_bit_row_adst_adst_16, // .cos_bit_row
- vp10_fadst16_new, // .txfm_func_col
- vp10_fadst16_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST16, // .txfm_type_col
+ TXFM_TYPE_ADST16}; // .txfm_type_row
// ---------------- config fwd_adst_adst_32 ----------------
static const int8_t fwd_shift_adst_adst_32[3] = {4, -2, -3};
@@ -270,8 +293,8 @@
fwd_stage_range_row_adst_adst_32, // .stage_range_row
fwd_cos_bit_col_adst_adst_32, // .cos_bit_col
fwd_cos_bit_row_adst_adst_32, // .cos_bit_row
- vp10_fadst32_new, // .txfm_func_col
- vp10_fadst32_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST32, // .txfm_type_col
+ TXFM_TYPE_ADST32}; // .txfm_type_row
// ---------------- config fwd_adst_dct_4 ----------------
static const int8_t fwd_shift_adst_dct_4[3] = {5, -4, 1};
@@ -290,8 +313,8 @@
fwd_stage_range_row_adst_dct_4, // .stage_range_row
fwd_cos_bit_col_adst_dct_4, // .cos_bit_col
fwd_cos_bit_row_adst_dct_4, // .cos_bit_row
- vp10_fadst4_new, // .txfm_func_col
- vp10_fdct4_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST4, // .txfm_type_col
+ TXFM_TYPE_DCT4}; // .txfm_type_row
// ---------------- config fwd_adst_dct_8 ----------------
static const int8_t fwd_shift_adst_dct_8[3] = {5, 1, -5};
@@ -312,8 +335,8 @@
fwd_stage_range_row_adst_dct_8, // .stage_range_row
fwd_cos_bit_col_adst_dct_8, // .cos_bit_col
fwd_cos_bit_row_adst_dct_8, // .cos_bit_row
- vp10_fadst8_new, // .txfm_func_col
- vp10_fdct8_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST8, // .txfm_type_col
+ TXFM_TYPE_DCT8}; // .txfm_type_row
// ---------------- config fwd_adst_dct_16 ----------------
static const int8_t fwd_shift_adst_dct_16[3] = {4, -3, -1};
@@ -335,8 +358,8 @@
fwd_stage_range_row_adst_dct_16, // .stage_range_row
fwd_cos_bit_col_adst_dct_16, // .cos_bit_col
fwd_cos_bit_row_adst_dct_16, // .cos_bit_row
- vp10_fadst16_new, // .txfm_func_col
- vp10_fdct16_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST16, // .txfm_type_col
+ TXFM_TYPE_DCT16}; // .txfm_type_row
// ---------------- config fwd_adst_dct_32 ----------------
static const int8_t fwd_shift_adst_dct_32[3] = {5, -4, -2};
@@ -358,7 +381,7 @@
fwd_stage_range_row_adst_dct_32, // .stage_range_row
fwd_cos_bit_col_adst_dct_32, // .cos_bit_col
fwd_cos_bit_row_adst_dct_32, // .cos_bit_row
- vp10_fadst32_new, // .txfm_func_col
- vp10_fdct32_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST32, // .txfm_type_col
+ TXFM_TYPE_DCT32}; // .txfm_type_row
#endif // VP10_FWD_TXFM2D_CFG_H_
diff --git a/vp10/common/vp10_inv_txfm1d.c b/vp10/common/vp10_inv_txfm1d.c
index 606ca55..494000f 100644
--- a/vp10/common/vp10_inv_txfm1d.c
+++ b/vp10/common/vp10_inv_txfm1d.c
@@ -32,11 +32,11 @@
#else
#define range_check(stage, input, buf, size, bit) \
{ \
- (void) stage; \
- (void) input; \
- (void) buf; \
- (void) size; \
- (void) bit; \
+ (void)stage; \
+ (void)input; \
+ (void)buf; \
+ (void)size; \
+ (void)bit; \
}
#endif
@@ -1535,3 +1535,796 @@
bf1[31] = bf0[0];
range_check(stage, input, bf1, size, stage_range[stage]);
}
+
+void vp10_idct64_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 64;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[64];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[32];
+ bf1[2] = input[16];
+ bf1[3] = input[48];
+ bf1[4] = input[8];
+ bf1[5] = input[40];
+ bf1[6] = input[24];
+ bf1[7] = input[56];
+ bf1[8] = input[4];
+ bf1[9] = input[36];
+ bf1[10] = input[20];
+ bf1[11] = input[52];
+ bf1[12] = input[12];
+ bf1[13] = input[44];
+ bf1[14] = input[28];
+ bf1[15] = input[60];
+ bf1[16] = input[2];
+ bf1[17] = input[34];
+ bf1[18] = input[18];
+ bf1[19] = input[50];
+ bf1[20] = input[10];
+ bf1[21] = input[42];
+ bf1[22] = input[26];
+ bf1[23] = input[58];
+ bf1[24] = input[6];
+ bf1[25] = input[38];
+ bf1[26] = input[22];
+ bf1[27] = input[54];
+ bf1[28] = input[14];
+ bf1[29] = input[46];
+ bf1[30] = input[30];
+ bf1[31] = input[62];
+ bf1[32] = input[1];
+ bf1[33] = input[33];
+ bf1[34] = input[17];
+ bf1[35] = input[49];
+ bf1[36] = input[9];
+ bf1[37] = input[41];
+ bf1[38] = input[25];
+ bf1[39] = input[57];
+ bf1[40] = input[5];
+ bf1[41] = input[37];
+ bf1[42] = input[21];
+ bf1[43] = input[53];
+ bf1[44] = input[13];
+ bf1[45] = input[45];
+ bf1[46] = input[29];
+ bf1[47] = input[61];
+ bf1[48] = input[3];
+ bf1[49] = input[35];
+ bf1[50] = input[19];
+ bf1[51] = input[51];
+ bf1[52] = input[11];
+ bf1[53] = input[43];
+ bf1[54] = input[27];
+ bf1[55] = input[59];
+ bf1[56] = input[7];
+ bf1[57] = input[39];
+ bf1[58] = input[23];
+ bf1[59] = input[55];
+ bf1[60] = input[15];
+ bf1[61] = input[47];
+ bf1[62] = input[31];
+ bf1[63] = input[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = bf0[26];
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = half_btf(cospi[63], bf0[32], -cospi[1], bf0[63], cos_bit[stage]);
+ bf1[33] = half_btf(cospi[31], bf0[33], -cospi[33], bf0[62], cos_bit[stage]);
+ bf1[34] = half_btf(cospi[47], bf0[34], -cospi[17], bf0[61], cos_bit[stage]);
+ bf1[35] = half_btf(cospi[15], bf0[35], -cospi[49], bf0[60], cos_bit[stage]);
+ bf1[36] = half_btf(cospi[55], bf0[36], -cospi[9], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(cospi[23], bf0[37], -cospi[41], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(cospi[39], bf0[38], -cospi[25], bf0[57], cos_bit[stage]);
+ bf1[39] = half_btf(cospi[7], bf0[39], -cospi[57], bf0[56], cos_bit[stage]);
+ bf1[40] = half_btf(cospi[59], bf0[40], -cospi[5], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(cospi[27], bf0[41], -cospi[37], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(cospi[43], bf0[42], -cospi[21], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(cospi[11], bf0[43], -cospi[53], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(cospi[51], bf0[44], -cospi[13], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(cospi[19], bf0[45], -cospi[45], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(cospi[35], bf0[46], -cospi[29], bf0[49], cos_bit[stage]);
+ bf1[47] = half_btf(cospi[3], bf0[47], -cospi[61], bf0[48], cos_bit[stage]);
+ bf1[48] = half_btf(cospi[61], bf0[47], cospi[3], bf0[48], cos_bit[stage]);
+ bf1[49] = half_btf(cospi[29], bf0[46], cospi[35], bf0[49], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[45], bf0[45], cospi[19], bf0[50], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[13], bf0[44], cospi[51], bf0[51], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[53], bf0[43], cospi[11], bf0[52], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[21], bf0[42], cospi[43], bf0[53], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[37], bf0[41], cospi[27], bf0[54], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[5], bf0[40], cospi[59], bf0[55], cos_bit[stage]);
+ bf1[56] = half_btf(cospi[57], bf0[39], cospi[7], bf0[56], cos_bit[stage]);
+ bf1[57] = half_btf(cospi[25], bf0[38], cospi[39], bf0[57], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[41], bf0[37], cospi[23], bf0[58], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[9], bf0[36], cospi[55], bf0[59], cos_bit[stage]);
+ bf1[60] = half_btf(cospi[49], bf0[35], cospi[15], bf0[60], cos_bit[stage]);
+ bf1[61] = half_btf(cospi[17], bf0[34], cospi[47], bf0[61], cos_bit[stage]);
+ bf1[62] = half_btf(cospi[33], bf0[33], cospi[31], bf0[62], cos_bit[stage]);
+ bf1[63] = half_btf(cospi[1], bf0[32], cospi[63], bf0[63], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit[stage]);
+ bf1[32] = bf0[32] + bf0[33];
+ bf1[33] = bf0[32] - bf0[33];
+ bf1[34] = -bf0[34] + bf0[35];
+ bf1[35] = bf0[34] + bf0[35];
+ bf1[36] = bf0[36] + bf0[37];
+ bf1[37] = bf0[36] - bf0[37];
+ bf1[38] = -bf0[38] + bf0[39];
+ bf1[39] = bf0[38] + bf0[39];
+ bf1[40] = bf0[40] + bf0[41];
+ bf1[41] = bf0[40] - bf0[41];
+ bf1[42] = -bf0[42] + bf0[43];
+ bf1[43] = bf0[42] + bf0[43];
+ bf1[44] = bf0[44] + bf0[45];
+ bf1[45] = bf0[44] - bf0[45];
+ bf1[46] = -bf0[46] + bf0[47];
+ bf1[47] = bf0[46] + bf0[47];
+ bf1[48] = bf0[48] + bf0[49];
+ bf1[49] = bf0[48] - bf0[49];
+ bf1[50] = -bf0[50] + bf0[51];
+ bf1[51] = bf0[50] + bf0[51];
+ bf1[52] = bf0[52] + bf0[53];
+ bf1[53] = bf0[52] - bf0[53];
+ bf1[54] = -bf0[54] + bf0[55];
+ bf1[55] = bf0[54] + bf0[55];
+ bf1[56] = bf0[56] + bf0[57];
+ bf1[57] = bf0[56] - bf0[57];
+ bf1[58] = -bf0[58] + bf0[59];
+ bf1[59] = bf0[58] + bf0[59];
+ bf1[60] = bf0[60] + bf0[61];
+ bf1[61] = bf0[60] - bf0[61];
+ bf1[62] = -bf0[62] + bf0[63];
+ bf1[63] = bf0[62] + bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16] + bf0[17];
+ bf1[17] = bf0[16] - bf0[17];
+ bf1[18] = -bf0[18] + bf0[19];
+ bf1[19] = bf0[18] + bf0[19];
+ bf1[20] = bf0[20] + bf0[21];
+ bf1[21] = bf0[20] - bf0[21];
+ bf1[22] = -bf0[22] + bf0[23];
+ bf1[23] = bf0[22] + bf0[23];
+ bf1[24] = bf0[24] + bf0[25];
+ bf1[25] = bf0[24] - bf0[25];
+ bf1[26] = -bf0[26] + bf0[27];
+ bf1[27] = bf0[26] + bf0[27];
+ bf1[28] = bf0[28] + bf0[29];
+ bf1[29] = bf0[28] - bf0[29];
+ bf1[30] = -bf0[30] + bf0[31];
+ bf1[31] = bf0[30] + bf0[31];
+ bf1[32] = bf0[32];
+ bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit[stage]);
+ bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit[stage]);
+ bf1[35] = bf0[35];
+ bf1[36] = bf0[36];
+ bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit[stage]);
+ bf1[39] = bf0[39];
+ bf1[40] = bf0[40];
+ bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit[stage]);
+ bf1[43] = bf0[43];
+ bf1[44] = bf0[44];
+ bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit[stage]);
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = half_btf(-cospi[52], bf0[46], cospi[12], bf0[49], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[12], bf0[45], cospi[52], bf0[50], cos_bit[stage]);
+ bf1[51] = bf0[51];
+ bf1[52] = bf0[52];
+ bf1[53] = half_btf(-cospi[20], bf0[42], cospi[44], bf0[53], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[44], bf0[41], cospi[20], bf0[54], cos_bit[stage]);
+ bf1[55] = bf0[55];
+ bf1[56] = bf0[56];
+ bf1[57] = half_btf(-cospi[36], bf0[38], cospi[28], bf0[57], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[28], bf0[37], cospi[36], bf0[58], cos_bit[stage]);
+ bf1[59] = bf0[59];
+ bf1[60] = bf0[60];
+ bf1[61] = half_btf(-cospi[4], bf0[34], cospi[60], bf0[61], cos_bit[stage]);
+ bf1[62] = half_btf(cospi[60], bf0[33], cospi[4], bf0[62], cos_bit[stage]);
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = bf0[8] - bf0[9];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[10] + bf0[11];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = bf0[12] - bf0[13];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[14] + bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit[stage]);
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit[stage]);
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[35];
+ bf1[33] = bf0[33] + bf0[34];
+ bf1[34] = bf0[33] - bf0[34];
+ bf1[35] = bf0[32] - bf0[35];
+ bf1[36] = -bf0[36] + bf0[39];
+ bf1[37] = -bf0[37] + bf0[38];
+ bf1[38] = bf0[37] + bf0[38];
+ bf1[39] = bf0[36] + bf0[39];
+ bf1[40] = bf0[40] + bf0[43];
+ bf1[41] = bf0[41] + bf0[42];
+ bf1[42] = bf0[41] - bf0[42];
+ bf1[43] = bf0[40] - bf0[43];
+ bf1[44] = -bf0[44] + bf0[47];
+ bf1[45] = -bf0[45] + bf0[46];
+ bf1[46] = bf0[45] + bf0[46];
+ bf1[47] = bf0[44] + bf0[47];
+ bf1[48] = bf0[48] + bf0[51];
+ bf1[49] = bf0[49] + bf0[50];
+ bf1[50] = bf0[49] - bf0[50];
+ bf1[51] = bf0[48] - bf0[51];
+ bf1[52] = -bf0[52] + bf0[55];
+ bf1[53] = -bf0[53] + bf0[54];
+ bf1[54] = bf0[53] + bf0[54];
+ bf1[55] = bf0[52] + bf0[55];
+ bf1[56] = bf0[56] + bf0[59];
+ bf1[57] = bf0[57] + bf0[58];
+ bf1[58] = bf0[57] - bf0[58];
+ bf1[59] = bf0[56] - bf0[59];
+ bf1[60] = -bf0[60] + bf0[63];
+ bf1[61] = -bf0[61] + bf0[62];
+ bf1[62] = bf0[61] + bf0[62];
+ bf1[63] = bf0[60] + bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = bf0[4] - bf0[5];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[6] + bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[19];
+ bf1[17] = bf0[17] + bf0[18];
+ bf1[18] = bf0[17] - bf0[18];
+ bf1[19] = bf0[16] - bf0[19];
+ bf1[20] = -bf0[20] + bf0[23];
+ bf1[21] = -bf0[21] + bf0[22];
+ bf1[22] = bf0[21] + bf0[22];
+ bf1[23] = bf0[20] + bf0[23];
+ bf1[24] = bf0[24] + bf0[27];
+ bf1[25] = bf0[25] + bf0[26];
+ bf1[26] = bf0[25] - bf0[26];
+ bf1[27] = bf0[24] - bf0[27];
+ bf1[28] = -bf0[28] + bf0[31];
+ bf1[29] = -bf0[29] + bf0[30];
+ bf1[30] = bf0[29] + bf0[30];
+ bf1[31] = bf0[28] + bf0[31];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit[stage]);
+ bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit[stage]);
+ bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit[stage]);
+ bf1[38] = bf0[38];
+ bf1[39] = bf0[39];
+ bf1[40] = bf0[40];
+ bf1[41] = bf0[41];
+ bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit[stage]);
+ bf1[46] = bf0[46];
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = bf0[49];
+ bf1[50] = half_btf(-cospi[40], bf0[45], cospi[24], bf0[50], cos_bit[stage]);
+ bf1[51] = half_btf(-cospi[40], bf0[44], cospi[24], bf0[51], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[24], bf0[43], cospi[40], bf0[52], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[24], bf0[42], cospi[40], bf0[53], cos_bit[stage]);
+ bf1[54] = bf0[54];
+ bf1[55] = bf0[55];
+ bf1[56] = bf0[56];
+ bf1[57] = bf0[57];
+ bf1[58] = half_btf(-cospi[8], bf0[37], cospi[56], bf0[58], cos_bit[stage]);
+ bf1[59] = half_btf(-cospi[8], bf0[36], cospi[56], bf0[59], cos_bit[stage]);
+ bf1[60] = half_btf(cospi[56], bf0[35], cospi[8], bf0[60], cos_bit[stage]);
+ bf1[61] = half_btf(cospi[56], bf0[34], cospi[8], bf0[61], cos_bit[stage]);
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = bf0[9] - bf0[10];
+ bf1[11] = bf0[8] - bf0[11];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[13] + bf0[14];
+ bf1[15] = bf0[12] + bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit[stage]);
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[39];
+ bf1[33] = bf0[33] + bf0[38];
+ bf1[34] = bf0[34] + bf0[37];
+ bf1[35] = bf0[35] + bf0[36];
+ bf1[36] = bf0[35] - bf0[36];
+ bf1[37] = bf0[34] - bf0[37];
+ bf1[38] = bf0[33] - bf0[38];
+ bf1[39] = bf0[32] - bf0[39];
+ bf1[40] = -bf0[40] + bf0[47];
+ bf1[41] = -bf0[41] + bf0[46];
+ bf1[42] = -bf0[42] + bf0[45];
+ bf1[43] = -bf0[43] + bf0[44];
+ bf1[44] = bf0[43] + bf0[44];
+ bf1[45] = bf0[42] + bf0[45];
+ bf1[46] = bf0[41] + bf0[46];
+ bf1[47] = bf0[40] + bf0[47];
+ bf1[48] = bf0[48] + bf0[55];
+ bf1[49] = bf0[49] + bf0[54];
+ bf1[50] = bf0[50] + bf0[53];
+ bf1[51] = bf0[51] + bf0[52];
+ bf1[52] = bf0[51] - bf0[52];
+ bf1[53] = bf0[50] - bf0[53];
+ bf1[54] = bf0[49] - bf0[54];
+ bf1[55] = bf0[48] - bf0[55];
+ bf1[56] = -bf0[56] + bf0[63];
+ bf1[57] = -bf0[57] + bf0[62];
+ bf1[58] = -bf0[58] + bf0[61];
+ bf1[59] = -bf0[59] + bf0[60];
+ bf1[60] = bf0[59] + bf0[60];
+ bf1[61] = bf0[58] + bf0[61];
+ bf1[62] = bf0[57] + bf0[62];
+ bf1[63] = bf0[56] + bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = bf0[3] - bf0[4];
+ bf1[5] = bf0[2] - bf0[5];
+ bf1[6] = bf0[1] - bf0[6];
+ bf1[7] = bf0[0] - bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[23];
+ bf1[17] = bf0[17] + bf0[22];
+ bf1[18] = bf0[18] + bf0[21];
+ bf1[19] = bf0[19] + bf0[20];
+ bf1[20] = bf0[19] - bf0[20];
+ bf1[21] = bf0[18] - bf0[21];
+ bf1[22] = bf0[17] - bf0[22];
+ bf1[23] = bf0[16] - bf0[23];
+ bf1[24] = -bf0[24] + bf0[31];
+ bf1[25] = -bf0[25] + bf0[30];
+ bf1[26] = -bf0[26] + bf0[29];
+ bf1[27] = -bf0[27] + bf0[28];
+ bf1[28] = bf0[27] + bf0[28];
+ bf1[29] = bf0[26] + bf0[29];
+ bf1[30] = bf0[25] + bf0[30];
+ bf1[31] = bf0[24] + bf0[31];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[34];
+ bf1[35] = bf0[35];
+ bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit[stage]);
+ bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit[stage]);
+ bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit[stage]);
+ bf1[44] = bf0[44];
+ bf1[45] = bf0[45];
+ bf1[46] = bf0[46];
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = bf0[49];
+ bf1[50] = bf0[50];
+ bf1[51] = bf0[51];
+ bf1[52] = half_btf(-cospi[16], bf0[43], cospi[48], bf0[52], cos_bit[stage]);
+ bf1[53] = half_btf(-cospi[16], bf0[42], cospi[48], bf0[53], cos_bit[stage]);
+ bf1[54] = half_btf(-cospi[16], bf0[41], cospi[48], bf0[54], cos_bit[stage]);
+ bf1[55] = half_btf(-cospi[16], bf0[40], cospi[48], bf0[55], cos_bit[stage]);
+ bf1[56] = half_btf(cospi[48], bf0[39], cospi[16], bf0[56], cos_bit[stage]);
+ bf1[57] = half_btf(cospi[48], bf0[38], cospi[16], bf0[57], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[48], bf0[37], cospi[16], bf0[58], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[48], bf0[36], cospi[16], bf0[59], cos_bit[stage]);
+ bf1[60] = bf0[60];
+ bf1[61] = bf0[61];
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = bf0[7] - bf0[8];
+ bf1[9] = bf0[6] - bf0[9];
+ bf1[10] = bf0[5] - bf0[10];
+ bf1[11] = bf0[4] - bf0[11];
+ bf1[12] = bf0[3] - bf0[12];
+ bf1[13] = bf0[2] - bf0[13];
+ bf1[14] = bf0[1] - bf0[14];
+ bf1[15] = bf0[0] - bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[47];
+ bf1[33] = bf0[33] + bf0[46];
+ bf1[34] = bf0[34] + bf0[45];
+ bf1[35] = bf0[35] + bf0[44];
+ bf1[36] = bf0[36] + bf0[43];
+ bf1[37] = bf0[37] + bf0[42];
+ bf1[38] = bf0[38] + bf0[41];
+ bf1[39] = bf0[39] + bf0[40];
+ bf1[40] = bf0[39] - bf0[40];
+ bf1[41] = bf0[38] - bf0[41];
+ bf1[42] = bf0[37] - bf0[42];
+ bf1[43] = bf0[36] - bf0[43];
+ bf1[44] = bf0[35] - bf0[44];
+ bf1[45] = bf0[34] - bf0[45];
+ bf1[46] = bf0[33] - bf0[46];
+ bf1[47] = bf0[32] - bf0[47];
+ bf1[48] = -bf0[48] + bf0[63];
+ bf1[49] = -bf0[49] + bf0[62];
+ bf1[50] = -bf0[50] + bf0[61];
+ bf1[51] = -bf0[51] + bf0[60];
+ bf1[52] = -bf0[52] + bf0[59];
+ bf1[53] = -bf0[53] + bf0[58];
+ bf1[54] = -bf0[54] + bf0[57];
+ bf1[55] = -bf0[55] + bf0[56];
+ bf1[56] = bf0[55] + bf0[56];
+ bf1[57] = bf0[54] + bf0[57];
+ bf1[58] = bf0[53] + bf0[58];
+ bf1[59] = bf0[52] + bf0[59];
+ bf1[60] = bf0[51] + bf0[60];
+ bf1[61] = bf0[50] + bf0[61];
+ bf1[62] = bf0[49] + bf0[62];
+ bf1[63] = bf0[48] + bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 10
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[31];
+ bf1[1] = bf0[1] + bf0[30];
+ bf1[2] = bf0[2] + bf0[29];
+ bf1[3] = bf0[3] + bf0[28];
+ bf1[4] = bf0[4] + bf0[27];
+ bf1[5] = bf0[5] + bf0[26];
+ bf1[6] = bf0[6] + bf0[25];
+ bf1[7] = bf0[7] + bf0[24];
+ bf1[8] = bf0[8] + bf0[23];
+ bf1[9] = bf0[9] + bf0[22];
+ bf1[10] = bf0[10] + bf0[21];
+ bf1[11] = bf0[11] + bf0[20];
+ bf1[12] = bf0[12] + bf0[19];
+ bf1[13] = bf0[13] + bf0[18];
+ bf1[14] = bf0[14] + bf0[17];
+ bf1[15] = bf0[15] + bf0[16];
+ bf1[16] = bf0[15] - bf0[16];
+ bf1[17] = bf0[14] - bf0[17];
+ bf1[18] = bf0[13] - bf0[18];
+ bf1[19] = bf0[12] - bf0[19];
+ bf1[20] = bf0[11] - bf0[20];
+ bf1[21] = bf0[10] - bf0[21];
+ bf1[22] = bf0[9] - bf0[22];
+ bf1[23] = bf0[8] - bf0[23];
+ bf1[24] = bf0[7] - bf0[24];
+ bf1[25] = bf0[6] - bf0[25];
+ bf1[26] = bf0[5] - bf0[26];
+ bf1[27] = bf0[4] - bf0[27];
+ bf1[28] = bf0[3] - bf0[28];
+ bf1[29] = bf0[2] - bf0[29];
+ bf1[30] = bf0[1] - bf0[30];
+ bf1[31] = bf0[0] - bf0[31];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[34];
+ bf1[35] = bf0[35];
+ bf1[36] = bf0[36];
+ bf1[37] = bf0[37];
+ bf1[38] = bf0[38];
+ bf1[39] = bf0[39];
+ bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
+ bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
+ bf1[48] = half_btf(cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
+ bf1[49] = half_btf(cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
+ bf1[56] = bf0[56];
+ bf1[57] = bf0[57];
+ bf1[58] = bf0[58];
+ bf1[59] = bf0[59];
+ bf1[60] = bf0[60];
+ bf1[61] = bf0[61];
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 11
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[63];
+ bf1[1] = bf0[1] + bf0[62];
+ bf1[2] = bf0[2] + bf0[61];
+ bf1[3] = bf0[3] + bf0[60];
+ bf1[4] = bf0[4] + bf0[59];
+ bf1[5] = bf0[5] + bf0[58];
+ bf1[6] = bf0[6] + bf0[57];
+ bf1[7] = bf0[7] + bf0[56];
+ bf1[8] = bf0[8] + bf0[55];
+ bf1[9] = bf0[9] + bf0[54];
+ bf1[10] = bf0[10] + bf0[53];
+ bf1[11] = bf0[11] + bf0[52];
+ bf1[12] = bf0[12] + bf0[51];
+ bf1[13] = bf0[13] + bf0[50];
+ bf1[14] = bf0[14] + bf0[49];
+ bf1[15] = bf0[15] + bf0[48];
+ bf1[16] = bf0[16] + bf0[47];
+ bf1[17] = bf0[17] + bf0[46];
+ bf1[18] = bf0[18] + bf0[45];
+ bf1[19] = bf0[19] + bf0[44];
+ bf1[20] = bf0[20] + bf0[43];
+ bf1[21] = bf0[21] + bf0[42];
+ bf1[22] = bf0[22] + bf0[41];
+ bf1[23] = bf0[23] + bf0[40];
+ bf1[24] = bf0[24] + bf0[39];
+ bf1[25] = bf0[25] + bf0[38];
+ bf1[26] = bf0[26] + bf0[37];
+ bf1[27] = bf0[27] + bf0[36];
+ bf1[28] = bf0[28] + bf0[35];
+ bf1[29] = bf0[29] + bf0[34];
+ bf1[30] = bf0[30] + bf0[33];
+ bf1[31] = bf0[31] + bf0[32];
+ bf1[32] = bf0[31] - bf0[32];
+ bf1[33] = bf0[30] - bf0[33];
+ bf1[34] = bf0[29] - bf0[34];
+ bf1[35] = bf0[28] - bf0[35];
+ bf1[36] = bf0[27] - bf0[36];
+ bf1[37] = bf0[26] - bf0[37];
+ bf1[38] = bf0[25] - bf0[38];
+ bf1[39] = bf0[24] - bf0[39];
+ bf1[40] = bf0[23] - bf0[40];
+ bf1[41] = bf0[22] - bf0[41];
+ bf1[42] = bf0[21] - bf0[42];
+ bf1[43] = bf0[20] - bf0[43];
+ bf1[44] = bf0[19] - bf0[44];
+ bf1[45] = bf0[18] - bf0[45];
+ bf1[46] = bf0[17] - bf0[46];
+ bf1[47] = bf0[16] - bf0[47];
+ bf1[48] = bf0[15] - bf0[48];
+ bf1[49] = bf0[14] - bf0[49];
+ bf1[50] = bf0[13] - bf0[50];
+ bf1[51] = bf0[12] - bf0[51];
+ bf1[52] = bf0[11] - bf0[52];
+ bf1[53] = bf0[10] - bf0[53];
+ bf1[54] = bf0[9] - bf0[54];
+ bf1[55] = bf0[8] - bf0[55];
+ bf1[56] = bf0[7] - bf0[56];
+ bf1[57] = bf0[6] - bf0[57];
+ bf1[58] = bf0[5] - bf0[58];
+ bf1[59] = bf0[4] - bf0[59];
+ bf1[60] = bf0[3] - bf0[60];
+ bf1[61] = bf0[2] - bf0[61];
+ bf1[62] = bf0[1] - bf0[62];
+ bf1[63] = bf0[0] - bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
diff --git a/vp10/common/vp10_inv_txfm1d.h b/vp10/common/vp10_inv_txfm1d.h
index 0609b65..fd547a6 100644
--- a/vp10/common/vp10_inv_txfm1d.h
+++ b/vp10/common/vp10_inv_txfm1d.h
@@ -25,6 +25,8 @@
const int8_t *cos_bit, const int8_t *stage_range);
void vp10_idct32_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct64_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
void vp10_iadst4_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
diff --git a/vp10/common/vp10_inv_txfm2d.c b/vp10/common/vp10_inv_txfm2d.c
index c894a42..d9f713c 100644
--- a/vp10/common/vp10_inv_txfm2d.c
+++ b/vp10/common/vp10_inv_txfm2d.c
@@ -9,8 +9,47 @@
*/
#include "vp10/common/vp10_txfm.h"
+#include "vp10/common/vp10_inv_txfm1d.h"
-static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output,
+typedef void (*TxfmFunc)(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+static inline TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
+ switch (txfm_type) {
+ case TXFM_TYPE_DCT4:
+ return vp10_idct4_new;
+ break;
+ case TXFM_TYPE_DCT8:
+ return vp10_idct8_new;
+ break;
+ case TXFM_TYPE_DCT16:
+ return vp10_idct16_new;
+ break;
+ case TXFM_TYPE_DCT32:
+ return vp10_idct32_new;
+ break;
+ case TXFM_TYPE_DCT64:
+ return vp10_idct64_new;
+ break;
+ case TXFM_TYPE_ADST4:
+ return vp10_iadst4_new;
+ break;
+ case TXFM_TYPE_ADST8:
+ return vp10_iadst8_new;
+ break;
+ case TXFM_TYPE_ADST16:
+ return vp10_iadst16_new;
+ break;
+ case TXFM_TYPE_ADST32:
+ return vp10_iadst32_new;
+ break;
+ default:
+ assert(0);
+ return NULL;
+ }
+}
+
+static inline void inv_txfm2d_add_c(const int32_t *input, int16_t *output,
int stride, const TXFM_2D_CFG *cfg,
int32_t *txfm_buf) {
const int txfm_size = cfg->txfm_size;
@@ -19,8 +58,8 @@
const int8_t *stage_range_row = cfg->stage_range_row;
const int8_t *cos_bit_col = cfg->cos_bit_col;
const int8_t *cos_bit_row = cfg->cos_bit_row;
- const TxfmFunc txfm_func_col = cfg->txfm_func_col;
- const TxfmFunc txfm_func_row = cfg->txfm_func_row;
+ const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->txfm_type_col);
+ const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->txfm_type_row);
// txfm_buf's length is txfm_size * txfm_size + 2 * txfm_size
// it is used for intermediate data buffering
@@ -49,7 +88,7 @@
}
}
-void vp10_inv_txfm2d_add_4x4(const int32_t *input, uint16_t *output,
+void vp10_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int bd) {
int txfm_buf[4 * 4 + 4 + 4];
@@ -61,7 +100,7 @@
clamp_block((int16_t *)output, 4, stride, 0, (1 << bd) - 1);
}
-void vp10_inv_txfm2d_add_8x8(const int32_t *input, uint16_t *output,
+void vp10_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int bd) {
int txfm_buf[8 * 8 + 8 + 8];
@@ -73,7 +112,7 @@
clamp_block((int16_t *)output, 8, stride, 0, (1 << bd) - 1);
}
-void vp10_inv_txfm2d_add_16x16(const int32_t *input, uint16_t *output,
+void vp10_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int bd) {
int txfm_buf[16 * 16 + 16 + 16];
@@ -85,7 +124,7 @@
clamp_block((int16_t *)output, 16, stride, 0, (1 << bd) - 1);
}
-void vp10_inv_txfm2d_add_32x32(const int32_t *input, uint16_t *output,
+void vp10_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int bd) {
int txfm_buf[32 * 32 + 32 + 32];
@@ -96,3 +135,15 @@
inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
clamp_block((int16_t *)output, 32, stride, 0, (1 << bd) - 1);
}
+
+void vp10_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[64 * 64 + 64 + 64];
+ // output contains the prediction signal which is always positive and smaller
+ // than (1 << bd) - 1
+ // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+ // int16_t*
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+ clamp_block((int16_t *)output, 64, stride, 0, (1 << bd) - 1);
+}
diff --git a/vp10/common/vp10_inv_txfm2d.h b/vp10/common/vp10_inv_txfm2d.h
deleted file mode 100644
index 1b570ef..0000000
--- a/vp10/common/vp10_inv_txfm2d.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP10_INV_TXFM2D_C_H_
-#define VP10_INV_TXFM2D_C_H_
-
-#include "vp10/common/vp10_inv_txfm2d_cfg.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-void vp10_inv_txfm2d_add_4x4(const int32_t *input, uint16_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- const int bd);
-void vp10_inv_txfm2d_add_8x8(const int32_t *input, uint16_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- const int bd);
-void vp10_inv_txfm2d_add_16x16(const int32_t *input, uint16_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- const int bd);
-void vp10_inv_txfm2d_add_32x32(const int32_t *input, uint16_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- const int bd);
-#ifdef __cplusplus
-}
-#endif
-#endif // VP10_INV_TXFM2D_C_H_
diff --git a/vp10/common/vp10_inv_txfm2d_cfg.h b/vp10/common/vp10_inv_txfm2d_cfg.h
index fc552fe..ee965ba 100644
--- a/vp10/common/vp10_inv_txfm2d_cfg.h
+++ b/vp10/common/vp10_inv_txfm2d_cfg.h
@@ -11,7 +11,6 @@
#ifndef VP10_INV_TXFM2D_CFG_H_
#define VP10_INV_TXFM2D_CFG_H_
#include "vp10/common/vp10_inv_txfm1d.h"
-
// ---------------- config inv_dct_dct_4 ----------------
static const int8_t inv_shift_dct_dct_4[2] = {1, -5};
static const int8_t inv_stage_range_col_dct_dct_4[4] = {17, 17, 16, 16};
@@ -28,8 +27,8 @@
inv_stage_range_row_dct_dct_4, // .stage_range_row
inv_cos_bit_col_dct_dct_4, // .cos_bit_col
inv_cos_bit_row_dct_dct_4, // .cos_bit_row
- vp10_idct4_new, // .txfm_func_col
- vp10_idct4_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT4, // .txfm_type_col
+ TXFM_TYPE_DCT4}; // .txfm_type_row
// ---------------- config inv_dct_dct_8 ----------------
static const int8_t inv_shift_dct_dct_8[2] = {0, -5};
@@ -47,8 +46,8 @@
inv_stage_range_row_dct_dct_8, // .stage_range_row
inv_cos_bit_col_dct_dct_8, // .cos_bit_col
inv_cos_bit_row_dct_dct_8, // .cos_bit_row
- vp10_idct8_new, // .txfm_func_col
- vp10_idct8_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT8, // .txfm_type_col
+ TXFM_TYPE_DCT8}; // .txfm_type_row
// ---------------- config inv_dct_dct_16 ----------------
static const int8_t inv_shift_dct_dct_16[2] = {0, -6};
@@ -70,8 +69,8 @@
inv_stage_range_row_dct_dct_16, // .stage_range_row
inv_cos_bit_col_dct_dct_16, // .cos_bit_col
inv_cos_bit_row_dct_dct_16, // .cos_bit_row
- vp10_idct16_new, // .txfm_func_col
- vp10_idct16_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT16, // .txfm_type_col
+ TXFM_TYPE_DCT16}; // .txfm_type_row
// ---------------- config inv_dct_dct_32 ----------------
static const int8_t inv_shift_dct_dct_32[2] = {-1, -6};
@@ -93,8 +92,31 @@
inv_stage_range_row_dct_dct_32, // .stage_range_row
inv_cos_bit_col_dct_dct_32, // .cos_bit_col
inv_cos_bit_row_dct_dct_32, // .cos_bit_row
- vp10_idct32_new, // .txfm_func_col
- vp10_idct32_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT32, // .txfm_type_col
+ TXFM_TYPE_DCT32}; // .txfm_type_row
+
+// ---------------- config inv_dct_dct_64 ----------------
+static const int8_t inv_shift_dct_dct_64[2] = {-1, -7};
+static const int8_t inv_stage_range_col_dct_dct_64[12] = {
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18};
+static const int8_t inv_stage_range_row_dct_dct_64[12] = {
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20};
+static const int8_t inv_cos_bit_col_dct_dct_64[12] = {13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 14};
+static const int8_t inv_cos_bit_row_dct_dct_64[12] = {12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_64 = {
+ 64, // .txfm_size
+ 12, // .stage_num_col
+ 12, // .stage_num_row
+ inv_shift_dct_dct_64, // .shift
+ inv_stage_range_col_dct_dct_64, // .stage_range_col
+ inv_stage_range_row_dct_dct_64, // .stage_range_row
+ inv_cos_bit_col_dct_dct_64, // .cos_bit_col
+ inv_cos_bit_row_dct_dct_64, // .cos_bit_row
+ TXFM_TYPE_DCT64, // .txfm_type_col
+ TXFM_TYPE_DCT64}; // .txfm_type_row
// ---------------- config inv_dct_adst_4 ----------------
static const int8_t inv_shift_dct_adst_4[2] = {1, -5};
@@ -113,8 +135,8 @@
inv_stage_range_row_dct_adst_4, // .stage_range_row
inv_cos_bit_col_dct_adst_4, // .cos_bit_col
inv_cos_bit_row_dct_adst_4, // .cos_bit_row
- vp10_idct4_new, // .txfm_func_col
- vp10_iadst4_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT4, // .txfm_type_col
+ TXFM_TYPE_ADST4}; // .txfm_type_row
// ---------------- config inv_dct_adst_8 ----------------
static const int8_t inv_shift_dct_adst_8[2] = {-1, -4};
@@ -135,8 +157,8 @@
inv_stage_range_row_dct_adst_8, // .stage_range_row
inv_cos_bit_col_dct_adst_8, // .cos_bit_col
inv_cos_bit_row_dct_adst_8, // .cos_bit_row
- vp10_idct8_new, // .txfm_func_col
- vp10_iadst8_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT8, // .txfm_type_col
+ TXFM_TYPE_ADST8}; // .txfm_type_row
// ---------------- config inv_dct_adst_16 ----------------
static const int8_t inv_shift_dct_adst_16[2] = {1, -7};
@@ -158,8 +180,8 @@
inv_stage_range_row_dct_adst_16, // .stage_range_row
inv_cos_bit_col_dct_adst_16, // .cos_bit_col
inv_cos_bit_row_dct_adst_16, // .cos_bit_row
- vp10_idct16_new, // .txfm_func_col
- vp10_iadst16_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT16, // .txfm_type_col
+ TXFM_TYPE_ADST16}; // .txfm_type_row
// ---------------- config inv_dct_adst_32 ----------------
static const int8_t inv_shift_dct_adst_32[2] = {-1, -6};
@@ -181,8 +203,8 @@
inv_stage_range_row_dct_adst_32, // .stage_range_row
inv_cos_bit_col_dct_adst_32, // .cos_bit_col
inv_cos_bit_row_dct_adst_32, // .cos_bit_row
- vp10_idct32_new, // .txfm_func_col
- vp10_iadst32_new}; // .txfm_func_row;
+ TXFM_TYPE_DCT32, // .txfm_type_col
+ TXFM_TYPE_ADST32}; // .txfm_type_row
// ---------------- config inv_adst_adst_4 ----------------
static const int8_t inv_shift_adst_adst_4[2] = {0, -4};
@@ -202,8 +224,8 @@
inv_stage_range_row_adst_adst_4, // .stage_range_row
inv_cos_bit_col_adst_adst_4, // .cos_bit_col
inv_cos_bit_row_adst_adst_4, // .cos_bit_row
- vp10_iadst4_new, // .txfm_func_col
- vp10_iadst4_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST4, // .txfm_type_col
+ TXFM_TYPE_ADST4}; // .txfm_type_row
// ---------------- config inv_adst_adst_8 ----------------
static const int8_t inv_shift_adst_adst_8[2] = {-1, -4};
@@ -225,8 +247,8 @@
inv_stage_range_row_adst_adst_8, // .stage_range_row
inv_cos_bit_col_adst_adst_8, // .cos_bit_col
inv_cos_bit_row_adst_adst_8, // .cos_bit_row
- vp10_iadst8_new, // .txfm_func_col
- vp10_iadst8_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST8, // .txfm_type_col
+ TXFM_TYPE_ADST8}; // .txfm_type_row
// ---------------- config inv_adst_adst_16 ----------------
static const int8_t inv_shift_adst_adst_16[2] = {0, -6};
@@ -248,8 +270,8 @@
inv_stage_range_row_adst_adst_16, // .stage_range_row
inv_cos_bit_col_adst_adst_16, // .cos_bit_col
inv_cos_bit_row_adst_adst_16, // .cos_bit_row
- vp10_iadst16_new, // .txfm_func_col
- vp10_iadst16_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST16, // .txfm_type_col
+ TXFM_TYPE_ADST16}; // .txfm_type_row
// ---------------- config inv_adst_adst_32 ----------------
static const int8_t inv_shift_adst_adst_32[2] = {-1, -6};
@@ -271,8 +293,8 @@
inv_stage_range_row_adst_adst_32, // .stage_range_row
inv_cos_bit_col_adst_adst_32, // .cos_bit_col
inv_cos_bit_row_adst_adst_32, // .cos_bit_row
- vp10_iadst32_new, // .txfm_func_col
- vp10_iadst32_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST32, // .txfm_type_col
+ TXFM_TYPE_ADST32}; // .txfm_type_row
// ---------------- config inv_adst_dct_4 ----------------
static const int8_t inv_shift_adst_dct_4[2] = {1, -5};
@@ -291,8 +313,8 @@
inv_stage_range_row_adst_dct_4, // .stage_range_row
inv_cos_bit_col_adst_dct_4, // .cos_bit_col
inv_cos_bit_row_adst_dct_4, // .cos_bit_row
- vp10_iadst4_new, // .txfm_func_col
- vp10_idct4_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST4, // .txfm_type_col
+ TXFM_TYPE_DCT4}; // .txfm_type_row
// ---------------- config inv_adst_dct_8 ----------------
static const int8_t inv_shift_adst_dct_8[2] = {-1, -4};
@@ -313,8 +335,8 @@
inv_stage_range_row_adst_dct_8, // .stage_range_row
inv_cos_bit_col_adst_dct_8, // .cos_bit_col
inv_cos_bit_row_adst_dct_8, // .cos_bit_row
- vp10_iadst8_new, // .txfm_func_col
- vp10_idct8_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST8, // .txfm_type_col
+ TXFM_TYPE_DCT8}; // .txfm_type_row
// ---------------- config inv_adst_dct_16 ----------------
static const int8_t inv_shift_adst_dct_16[2] = {-1, -5};
@@ -336,8 +358,8 @@
inv_stage_range_row_adst_dct_16, // .stage_range_row
inv_cos_bit_col_adst_dct_16, // .cos_bit_col
inv_cos_bit_row_adst_dct_16, // .cos_bit_row
- vp10_iadst16_new, // .txfm_func_col
- vp10_idct16_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST16, // .txfm_type_col
+ TXFM_TYPE_DCT16}; // .txfm_type_row
// ---------------- config inv_adst_dct_32 ----------------
static const int8_t inv_shift_adst_dct_32[2] = {-1, -6};
@@ -359,7 +381,7 @@
inv_stage_range_row_adst_dct_32, // .stage_range_row
inv_cos_bit_col_adst_dct_32, // .cos_bit_col
inv_cos_bit_row_adst_dct_32, // .cos_bit_row
- vp10_iadst32_new, // .txfm_func_col
- vp10_idct32_new}; // .txfm_func_row;
+ TXFM_TYPE_ADST32, // .txfm_type_col
+ TXFM_TYPE_DCT32}; // .txfm_type_row
#endif // VP10_INV_TXFM2D_CFG_H_
diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl
index f617ff6..4612395 100644
--- a/vp10/common/vp10_rtcd_defs.pl
+++ b/vp10/common/vp10_rtcd_defs.pl
@@ -7,6 +7,7 @@
#include "vpx/vpx_integer.h"
#include "vp10/common/common.h"
#include "vp10/common/enums.h"
+#include "vp10/common/vp10_txfm.h"
struct macroblockd;
@@ -611,6 +612,32 @@
} # CONFIG_EMULATE_HARDWARE
} # CONFIG_VP9_HIGHBITDEPTH
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ #fwd txfm
+ add_proto qw/void vp10_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ specialize qw/vp10_fwd_txfm2d_4x4/;
+ add_proto qw/void vp10_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ specialize qw/vp10_fwd_txfm2d_8x8/;
+ add_proto qw/void vp10_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ specialize qw/vp10_fwd_txfm2d_16x16/;
+ add_proto qw/void vp10_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ specialize qw/vp10_fwd_txfm2d_32x32/;
+ add_proto qw/void vp10_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ specialize qw/vp10_fwd_txfm2d_64x64/;
+
+ #inv txfm
+ add_proto qw/void vp10_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ specialize qw/vp10_inv_txfm2d_add_4x4/;
+ add_proto qw/void vp10_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ specialize qw/vp10_inv_txfm2d_add_8x8/;
+ add_proto qw/void vp10_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ specialize qw/vp10_inv_txfm2d_add_16x16/;
+ add_proto qw/void vp10_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ specialize qw/vp10_inv_txfm2d_add_32x32/;
+ add_proto qw/void vp10_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ specialize qw/vp10_inv_txfm2d_add_64x64/;
+}
+
#
# Motion search
#
diff --git a/vp10/common/vp10_txfm.h b/vp10/common/vp10_txfm.h
index b4fd753..ad7b38f 100644
--- a/vp10/common/vp10_txfm.h
+++ b/vp10/common/vp10_txfm.h
@@ -118,9 +118,9 @@
int64_t result_64 = (int64_t)w0 * (int64_t)in0 + (int64_t)w1 * (int64_t)in1;
if (result_32 != result_64) {
printf(
- "%s overflow result_32: %d result_64: %ld w0: %d in0: %d w1: %d in1: "
+ "%s overflow result_32: %d result_64: %lld w0: %d in0: %d w1: %d in1: "
"%d\n",
- __func__, result_32, result_64, w0, in0, w1, in1);
+ __func__, result_32, (long long int)result_64, w0, in0, w1, in1);
assert(0 && "half_btf overflow");
}
#endif
@@ -150,6 +150,18 @@
typedef void (*TxfmFunc)(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
+typedef enum TXFM_TYPE {
+ TXFM_TYPE_DCT4,
+ TXFM_TYPE_DCT8,
+ TXFM_TYPE_DCT16,
+ TXFM_TYPE_DCT32,
+ TXFM_TYPE_DCT64,
+ TXFM_TYPE_ADST4,
+ TXFM_TYPE_ADST8,
+ TXFM_TYPE_ADST16,
+ TXFM_TYPE_ADST32,
+} TXFM_TYPE;
+
typedef struct TXFM_2D_CFG {
const int txfm_size;
const int stage_num_col;
@@ -160,8 +172,8 @@
const int8_t *stage_range_row;
const int8_t *cos_bit_col;
const int8_t *cos_bit_row;
- const TxfmFunc txfm_func_col;
- const TxfmFunc txfm_func_row;
+ const TXFM_TYPE txfm_type_col;
+ const TXFM_TYPE txfm_type_row;
} TXFM_2D_CFG;
#endif // VP10_TXFM_H_
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index ce6317c..935e4f7 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -117,9 +117,7 @@
for (i = 0; i < REFMV_MODE_CONTEXTS; ++i)
vp10_diff_update_prob(r, &fc->refmv_prob[i]);
for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- vp10_diff_update_prob(r, &fc->drl_prob0[i]);
- for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- vp10_diff_update_prob(r, &fc->drl_prob1[i]);
+ vp10_diff_update_prob(r, &fc->drl_prob[i]);
#if CONFIG_EXT_INTER
vp10_diff_update_prob(r, &fc->new2mv_prob);
#endif // CONFIG_EXT_INTER
@@ -215,58 +213,28 @@
}
}
-static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane,
- const TX_SIZE tx_size,
- uint8_t *dst, int stride,
- int eob, int block) {
+static void inverse_transform_block(MACROBLOCKD* xd, int plane,
+ const TX_TYPE tx_type,
+ const TX_SIZE tx_size,
+ uint8_t *dst, int stride,
+ int eob) {
struct macroblockd_plane *const pd = &xd->plane[plane];
- TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
const int seg_id = xd->mi[0]->mbmi.segment_id;
if (eob > 0) {
tran_low_t *const dqcoeff = pd->dqcoeff;
+ INV_TXFM_PARAM inv_txfm_param;
+ inv_txfm_param.tx_type = tx_type;
+ inv_txfm_param.tx_size = tx_size;
+ inv_txfm_param.eob = eob;
+ inv_txfm_param.lossless = xd->lossless[seg_id];
+
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- switch (tx_size) {
- case TX_4X4:
- vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, xd->bd,
- tx_type, xd->lossless[seg_id]);
- break;
- case TX_8X8:
- vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, xd->bd,
- tx_type);
- break;
- case TX_16X16:
- vp10_highbd_inv_txfm_add_16x16(dqcoeff, dst, stride, eob, xd->bd,
- tx_type);
- break;
- case TX_32X32:
- vp10_highbd_inv_txfm_add_32x32(dqcoeff, dst, stride, eob, xd->bd,
- tx_type);
- break;
- default:
- assert(0 && "Invalid transform size");
- return;
- }
+ inv_txfm_param.bd = xd->bd;
+ highbd_inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
- switch (tx_size) {
- case TX_4X4:
- vp10_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, tx_type,
- xd->lossless[seg_id]);
- break;
- case TX_8X8:
- vp10_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, tx_type);
- break;
- case TX_16X16:
- vp10_inv_txfm_add_16x16(dqcoeff, dst, stride, eob, tx_type);
- break;
- case TX_32X32:
- vp10_inv_txfm_add_32x32(dqcoeff, dst, stride, eob, tx_type);
- break;
- default:
- assert(0 && "Invalid transform size");
- return;
- }
+ inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -289,75 +257,6 @@
}
}
-static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane,
- const TX_TYPE tx_type,
- const TX_SIZE tx_size,
- uint8_t *dst, int stride,
- int eob) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const int seg_id = xd->mi[0]->mbmi.segment_id;
- if (eob > 0) {
- tran_low_t *const dqcoeff = pd->dqcoeff;
-#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- switch (tx_size) {
- case TX_4X4:
- vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, xd->bd,
- tx_type, xd->lossless[seg_id]);
- break;
- case TX_8X8:
- vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, xd->bd,
- tx_type);
- break;
- case TX_16X16:
- vp10_highbd_inv_txfm_add_16x16(dqcoeff, dst, stride, eob, xd->bd,
- tx_type);
- break;
- case TX_32X32:
- vp10_highbd_inv_txfm_add_32x32(dqcoeff, dst, stride, eob, xd->bd,
- tx_type);
- break;
- default:
- assert(0 && "Invalid transform size");
- return;
- }
- } else {
-#endif // CONFIG_VP9_HIGHBITDEPTH
- switch (tx_size) {
- case TX_4X4:
- vp10_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, tx_type,
- xd->lossless[seg_id]);
- break;
- case TX_8X8:
- vp10_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, tx_type);
- break;
- case TX_16X16:
- vp10_inv_txfm_add_16x16(dqcoeff, dst, stride, eob, tx_type);
- break;
- case TX_32X32:
- vp10_inv_txfm_add_32x32(dqcoeff, dst, stride, eob, tx_type);
- break;
- default:
- assert(0 && "Invalid transform size");
- return;
- }
-#if CONFIG_VP9_HIGHBITDEPTH
- }
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
- if (eob == 1) {
- dqcoeff[0] = 0;
- } else {
- if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
- memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
- else if (tx_size == TX_32X32 && eob <= 34)
- memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
- else
- memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
- }
- }
-}
-
static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd,
#if CONFIG_ANS
const rans_dec_lut *const token_tab,
@@ -393,8 +292,8 @@
#endif // CONFIG_ANS
plane, sc, col, row, tx_size,
r, mbmi->segment_id);
- inverse_transform_block_intra(xd, plane, tx_type, tx_size,
- dst, pd->dst.stride, eob);
+ inverse_transform_block(xd, plane, tx_type, tx_size,
+ dst, pd->dst.stride, eob);
}
}
@@ -406,11 +305,11 @@
TX_SIZE tx_size, int *eob_total) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
- int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
- (blk_col >> (1 - pd->subsampling_x));
- TX_SIZE plane_tx_size = plane ?
- get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, 0, 0) :
- mbmi->inter_tx_size[tx_idx];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ const TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0) :
+ mbmi->inter_tx_size[tx_row][tx_col];
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
@@ -429,9 +328,9 @@
const int eob = vp10_decode_block_tokens(xd, plane, sc,
blk_col, blk_row, tx_size,
r, mbmi->segment_id);
- inverse_transform_block_inter(xd, plane, tx_size,
+ inverse_transform_block(xd, plane, tx_type, tx_size,
&pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col],
- pd->dst.stride, eob, block);
+ pd->dst.stride, eob);
*eob_total += eob;
} else {
int bsl = b_width_log2_lookup[bsize];
@@ -477,14 +376,14 @@
plane, sc, col, row, tx_size, r,
mbmi->segment_id);
- inverse_transform_block_inter(xd, plane, tx_size,
- &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
- pd->dst.stride, eob, block_idx);
+ inverse_transform_block(xd, plane, tx_type, tx_size,
+ &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
+ pd->dst.stride, eob);
return eob;
}
#endif // !CONFIG_VAR_TX || CONFIG_SUPER_TX
-#if (CONFIG_SUPERTX || CONFIG_OBMC)
+#if CONFIG_SUPERTX
static void build_mc_border(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
int x, int y, int b_w, int b_h, int w, int h) {
@@ -873,9 +772,7 @@
interp_filter, xs, ys, xd);
#endif // CONFIG_EXT_INTER
}
-#endif // (CONFIG_SUPERTX || CONFIG_OBMC)
-#if CONFIG_SUPERTX
static void dec_build_inter_predictors_sb_extend(
VP10Decoder *const pbi, MACROBLOCKD *xd,
#if CONFIG_EXT_INTER
@@ -1046,237 +943,6 @@
}
#endif // CONFIG_SUPERTX
-#if CONFIG_OBMC
-static void dec_build_prediction_by_above_preds(VP10Decoder *const pbi,
- MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE],
- int tmp_stride[MAX_MB_PLANE]) {
- VP10_COMMON *const cm = &pbi->common;
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
- int i, j, mi_step, ref;
-
- if (mi_row == 0)
- return;
-
- for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
- int mi_row_offset = -1;
- int mi_col_offset = i;
- int mi_x, mi_y, bw, bh;
- const MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * cm->mi_stride];
- const MB_MODE_INFO *mbmi = &mi->mbmi;
- const BLOCK_SIZE sb_type = mbmi->sb_type;
- const int is_compound = has_second_ref(mbmi);
- const INTERP_FILTER interp_filter = mbmi->interp_filter;
-
- mi_step = VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[sb_type]);
-
- if (!is_neighbor_overlappable(mbmi))
- continue;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- struct macroblockd_plane *const pd = &xd->plane[j];
- setup_pred_plane(&pd->dst,
- tmp_buf[j], tmp_stride[j],
- 0, i, NULL,
- pd->subsampling_x, pd->subsampling_y);
- }
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
- RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- if ((!vp10_is_valid_scale(&ref_buf->sf)))
- vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i,
- &ref_buf->sf);
- }
-
- xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
- mi_x = (mi_col + i) << MI_SIZE_LOG2;
- mi_y = mi_row << MI_SIZE_LOG2;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- struct macroblockd_plane *pd = &xd->plane[j];
- struct buf_2d *const dst_buf = &pd->dst;
- bw = (mi_step * 8) >> pd->subsampling_x;
- bh = VPXMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y,
- 4);
-
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
- struct buf_2d *const pre_buf = &pd->pre[ref];
- const int idx = xd->block_refs[ref]->idx;
- BufferPool *const pool = pbi->common.buffer_pool;
- RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
- const int is_scaled = vp10_is_scaled(sf);
-
- if (sb_type < BLOCK_8X8) {
- const PARTITION_TYPE bp = BLOCK_8X8 - sb_type;
- const int have_vsplit = bp != PARTITION_HORZ;
- const int have_hsplit = bp != PARTITION_VERT;
- const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
- const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
- const int pw = 8 >> (have_vsplit | pd->subsampling_x);
- int x, y;
-
- for (y = 0; y < num_4x4_h; ++y)
- for (x = 0; x < num_4x4_w; ++x) {
- const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
- if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT)
- && y == 0 && !pd->subsampling_y)
- continue;
-
- dec_build_inter_predictors(pbi, xd, j,
- mi_col_offset, mi_row_offset,
- bw, bh,
- 4 * x, 0, pw, bh,
-#if CONFIG_EXT_INTER && CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX
- mi_x, mi_y,
- interp_filter, sf, pre_buf, dst_buf,
- &mv, ref_frame_buf, is_scaled, ref);
- }
- } else {
- const MV mv = mi->mbmi.mv[ref].as_mv;
- dec_build_inter_predictors(pbi, xd, j,
- mi_col_offset, mi_row_offset,
- bw, bh,
- 0, 0, bw, bh,
-#if CONFIG_EXT_INTER && CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX
- mi_x, mi_y, interp_filter,
- sf, pre_buf, dst_buf, &mv, ref_frame_buf,
- is_scaled, ref);
- }
- }
- }
- }
- xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
-}
-
-static void dec_build_prediction_by_left_preds(VP10Decoder *const pbi,
- MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE],
- int tmp_stride[MAX_MB_PLANE]) {
- VP10_COMMON *const cm = &pbi->common;
- const TileInfo *const tile = &xd->tile;
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
- int i, j, mi_step, ref;
-
- if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start) ||
- (mi_col - 1) >= tile->mi_col_end)
- return;
-
- for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
- int mi_row_offset = i;
- int mi_col_offset = -1;
- int mi_x, mi_y, bw, bh;
- const MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * cm->mi_stride];
- const MB_MODE_INFO *mbmi = &mi->mbmi;
- const BLOCK_SIZE sb_type = mbmi->sb_type;
- const int is_compound = has_second_ref(mbmi);
- const INTERP_FILTER interp_filter = mbmi->interp_filter;
-
- mi_step = VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[sb_type]);
-
- if (!is_neighbor_overlappable(mbmi))
- continue;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- struct macroblockd_plane *const pd = &xd->plane[j];
- setup_pred_plane(&pd->dst,
- tmp_buf[j], tmp_stride[j],
- i, 0, NULL,
- pd->subsampling_x, pd->subsampling_y);
- }
-
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
- RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- if ((!vp10_is_valid_scale(&ref_buf->sf)))
- vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col,
- &ref_buf->sf);
- }
-
- xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
- mi_x = mi_col << MI_SIZE_LOG2;
- mi_y = (mi_row + i) << MI_SIZE_LOG2;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- struct macroblockd_plane *pd = &xd->plane[j];
- struct buf_2d *const dst_buf = &pd->dst;
- bw = VPXMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x,
- 4);
- bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
-
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
- struct buf_2d *const pre_buf = &pd->pre[ref];
- const int idx = xd->block_refs[ref]->idx;
- BufferPool *const pool = pbi->common.buffer_pool;
- RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
- const int is_scaled = vp10_is_scaled(sf);
-
- if (sb_type < BLOCK_8X8) {
- const PARTITION_TYPE bp = BLOCK_8X8 - sb_type;
- const int have_vsplit = bp != PARTITION_HORZ;
- const int have_hsplit = bp != PARTITION_VERT;
- const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
- const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
- const int ph = 8 >> (have_hsplit | pd->subsampling_y);
- int x, y;
-
- for (y = 0; y < num_4x4_h; ++y)
- for (x = 0; x < num_4x4_w; ++x) {
- const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
- if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT)
- && x == 0 && !pd->subsampling_x)
- continue;
-
- dec_build_inter_predictors(pbi, xd, j,
-#if CONFIG_OBMC
- mi_col_offset, mi_row_offset,
-#endif // CONFIG_OBMC
- bw, bh,
- 0, 4 * y, bw, ph,
-#if CONFIG_EXT_INTER && CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX
- mi_x, mi_y,
- interp_filter, sf, pre_buf, dst_buf,
- &mv, ref_frame_buf, is_scaled, ref);
- }
- } else {
- const MV mv = mi->mbmi.mv[ref].as_mv;
- dec_build_inter_predictors(pbi, xd, j,
-#if CONFIG_OBMC
- mi_col_offset, mi_row_offset,
-#endif // CONFIG_OBMC
- bw, bh,
- 0, 0, bw, bh,
-#if CONFIG_EXT_INTER && CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX
- mi_x, mi_y, interp_filter,
- sf, pre_buf, dst_buf, &mv, ref_frame_buf,
- is_scaled, ref);
- }
- }
- }
- }
- xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
-}
-#endif // CONFIG_OBMC
-
static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi,
int n4_wl, int n4_hl) {
// get minimum log2 num4x4s dimension
@@ -1628,8 +1294,13 @@
const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
PARTITION_TYPE partition;
BLOCK_SIZE subsize;
+#if !CONFIG_EXT_PARTITION_TYPES
MB_MODE_INFO *mbmi;
+#endif
int i, offset = mi_row * cm->mi_stride + mi_col;
+#if CONFIG_EXT_PARTITION_TYPES
+ BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3];
DECLARE_ALIGNED(16, uint8_t,
@@ -1674,8 +1345,13 @@
xd->mi = cm->mi_grid_visible + offset;
xd->mi[0] = cm->mi + offset;
+#if CONFIG_EXT_PARTITION_TYPES
+ partition = get_partition(cm->mi, cm->mi_stride, cm->mi_rows, cm->mi_cols,
+ mi_row, mi_col, bsize);
+#else
mbmi = &xd->mi[0]->mbmi;
partition = partition_lookup[bsl][mbmi->sb_type];
+#endif
subsize = get_subsize(bsize, partition);
for (i = 0; i < MAX_MB_PLANE; i++) {
@@ -1921,6 +1597,204 @@
}
}
break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col + hbs, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1, top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col,
+ mi_row + hbs, mi_col, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2, top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize,
+ mi_row + hbs, mi_col, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize,
+ mi_row + hbs, mi_col, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2, 1);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i], dst_stride[i],
+ dst_buf1[i], dst_stride1[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i], dst_stride[i],
+ dst_buf2[i], dst_stride2[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ break;
+ case PARTITION_VERT_A:
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col,
+ mi_row + hbs, mi_col, mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1, top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col + hbs,
+ mi_row, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2,
+ top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize,
+ mi_row, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize,
+ mi_row, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2, 2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i], dst_stride[i],
+ dst_buf1[i], dst_stride1[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i], dst_stride[i],
+ dst_buf2[i], dst_stride2[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, i);
+ }
+ break;
+ case PARTITION_HORZ_B:
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 0);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1, top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col + hbs,
+ mi_row + hbs, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2, top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize,
+ mi_row + hbs, mi_col + hbs,
+ mi_row_top, mi_col_top, dst_buf2, dst_stride2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf1[i];
+ xd->plane[i].dst.stride = dst_stride1[i];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf1[i], dst_stride1[i],
+ dst_buf2[i], dst_stride2[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i], dst_stride[i],
+ dst_buf1[i], dst_stride1[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ break;
+ case PARTITION_VERT_B:
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 3);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col + hbs,
+ mi_row, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1, top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col + hbs,
+ mi_row + hbs, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2, top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize,
+ mi_row + hbs, mi_col + hbs,
+ mi_row_top, mi_col_top, dst_buf2, dst_stride2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf1[i];
+ xd->plane[i].dst.stride = dst_stride1[i];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf1[i], dst_stride1[i],
+ dst_buf2[i], dst_stride2[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i], dst_stride[i],
+ dst_buf1[i], dst_stride1[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, i);
+ }
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
default:
assert(0);
}
@@ -1936,6 +1810,9 @@
#if CONFIG_ANS
struct AnsDecoder *const tok,
#endif // CONFIG_ANS
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
BLOCK_SIZE bsize,
int bwl, int bhl) {
VP10_COMMON *const cm = &pbi->common;
@@ -1954,11 +1831,17 @@
mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col,
bw, bh, x_mis, y_mis, bwl, bhl);
}
+#if CONFIG_EXT_PARTITION_TYPES
+ xd->mi[0]->mbmi.partition = partition;
+#endif
vp10_read_mode_info(pbi, xd, supertx_enabled,
mi_row, mi_col, r, x_mis, y_mis);
#else
MB_MODE_INFO *mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col,
bw, bh, x_mis, y_mis, bwl, bhl);
+#if CONFIG_EXT_PARTITION_TYPES
+ xd->mi[0]->mbmi.partition = partition;
+#endif
vp10_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis);
#endif // CONFIG_SUPERTX
@@ -1998,6 +1881,9 @@
(xd->mb_to_bottom_edge >= 0 ?
0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+ if (plane <= 1 && mbmi->palette_mode_info.palette_size[plane])
+ vp10_decode_palette_tokens(xd, plane, r);
+
for (row = 0; row < max_blocks_high; row += step)
for (col = 0; col < max_blocks_wide; col += step)
predict_and_reconstruct_intra_block(xd,
@@ -2053,10 +1939,10 @@
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif // CONFIG_VP9_HIGHBITDEPTH
- dec_build_prediction_by_above_preds(pbi, xd, mi_row, mi_col,
- dst_buf1, dst_stride1);
- dec_build_prediction_by_left_preds(pbi, xd, mi_row, mi_col,
- dst_buf2, dst_stride2);
+ vp10_build_prediction_by_above_preds(cm, xd, mi_row, mi_col,
+ dst_buf1, dst_stride1);
+ vp10_build_prediction_by_left_preds(cm, xd, mi_row, mi_col,
+ dst_buf2, dst_stride2);
vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm),
mi_row, mi_col);
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0, NULL, NULL,
@@ -2140,6 +2026,7 @@
return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
}
+#if !CONFIG_EXT_PARTITION_TYPES
static INLINE void dec_update_partition_context(MACROBLOCKD *xd,
int mi_row, int mi_col,
BLOCK_SIZE subsize,
@@ -2153,17 +2040,29 @@
memset(above_ctx, partition_context_lookup[subsize].above, bw);
memset(left_ctx, partition_context_lookup[subsize].left, bw);
}
+#endif // !CONFIG_EXT_PARTITION_TYPES
static PARTITION_TYPE read_partition(VP10_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, vpx_reader *r,
- int has_rows, int has_cols, int bsl) {
+ int has_rows, int has_cols,
+#if CONFIG_EXT_PARTITION_TYPES
+ BLOCK_SIZE bsize,
+#endif
+ int bsl) {
const int ctx = dec_partition_plane_context(xd, mi_row, mi_col, bsl);
const vpx_prob *const probs = cm->fc->partition_prob[ctx];
FRAME_COUNTS *counts = xd->counts;
PARTITION_TYPE p;
if (has_rows && has_cols)
+#if CONFIG_EXT_PARTITION_TYPES
+ if (bsize <= BLOCK_8X8)
+ p = (PARTITION_TYPE)vpx_read_tree(r, vp10_partition_tree, probs);
+ else
+ p = (PARTITION_TYPE)vpx_read_tree(r, vp10_ext_partition_tree, probs);
+#else
p = (PARTITION_TYPE)vpx_read_tree(r, vp10_partition_tree, probs);
+#endif // CONFIG_EXT_PARTITION_TYPES
else if (!has_rows && has_cols)
p = vpx_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ;
else if (has_rows && !has_cols)
@@ -2206,6 +2105,9 @@
const int hbs = num_8x8_wh >> 1;
PARTITION_TYPE partition;
BLOCK_SIZE subsize;
+#if CONFIG_EXT_PARTITION_TYPES
+ BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
const int has_rows = (mi_row + hbs) < cm->mi_rows;
const int has_cols = (mi_col + hbs) < cm->mi_cols;
#if CONFIG_SUPERTX
@@ -2220,6 +2122,9 @@
return;
partition = read_partition(cm, xd, mi_row, mi_col, r, has_rows, has_cols,
+#if CONFIG_EXT_PARTITION_TYPES
+ bsize,
+#endif
n8x8_l2);
subsize = subsize_lookup[partition][bsize]; // get_subsize(bsize, partition);
#if CONFIG_SUPERTX
@@ -2284,6 +2189,9 @@
#if CONFIG_ANS
tok,
#endif // CONFIG_ANS
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
subsize, 1, 1);
} else {
switch (partition) {
@@ -2296,6 +2204,9 @@
#if CONFIG_ANS
tok,
#endif // CONFIG_ANS
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
subsize, n4x4_l2, n4x4_l2);
break;
case PARTITION_HORZ:
@@ -2307,6 +2218,9 @@
#if CONFIG_ANS
tok,
#endif // CONFIG_ANS
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
subsize, n4x4_l2, n8x8_l2);
if (has_rows)
decode_block(pbi, xd,
@@ -2317,6 +2231,9 @@
#if CONFIG_ANS
tok,
#endif // CONFIG_ANS
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
subsize, n4x4_l2, n8x8_l2);
break;
case PARTITION_VERT:
@@ -2328,6 +2245,9 @@
#if CONFIG_ANS
tok,
#endif // CONFIG_ANS
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
subsize, n8x8_l2, n4x4_l2);
if (has_cols)
decode_block(pbi, xd,
@@ -2338,6 +2258,9 @@
#if CONFIG_ANS
tok,
#endif // CONFIG_ANS
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
subsize, n8x8_l2, n4x4_l2);
break;
case PARTITION_SPLIT:
@@ -2378,6 +2301,124 @@
#endif // CONFIG_ANS
subsize, n8x8_l2);
break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ partition, bsize2, n8x8_l2, n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col + hbs, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ partition, bsize2, n8x8_l2, n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row + hbs, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ partition, subsize, n4x4_l2, n8x8_l2);
+ break;
+ case PARTITION_HORZ_B:
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ partition, subsize, n4x4_l2, n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row + hbs, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ partition, bsize2, n8x8_l2, n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row + hbs, mi_col + hbs, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ partition, bsize2, n8x8_l2, n8x8_l2);
+ break;
+ case PARTITION_VERT_A:
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ partition, bsize2, n8x8_l2, n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row + hbs, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ partition, bsize2, n8x8_l2, n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col + hbs, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ partition, subsize, n8x8_l2, n4x4_l2);
+ break;
+ case PARTITION_VERT_B:
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ partition, subsize, n8x8_l2, n4x4_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col + hbs, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ partition, bsize2, n8x8_l2, n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row + hbs, mi_col + hbs, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ partition, bsize2, n8x8_l2, n8x8_l2);
+ break;
+#endif
default:
assert(0 && "Invalid partition type");
}
@@ -2421,7 +2462,7 @@
for (col = 0; col < max_blocks_wide; col += step)
eobtotal += reconstruct_inter_block(xd,
#if CONFIG_ANS
- pbi->token_tab, tok,
+ cm->token_tab, tok,
#else
r,
#endif
@@ -2435,10 +2476,43 @@
}
#endif // CONFIG_SUPERTX
+#if CONFIG_EXT_PARTITION_TYPES
+ if (bsize >= BLOCK_8X8) {
+ switch (partition) {
+ case PARTITION_SPLIT:
+ if (bsize > BLOCK_8X8)
+ break;
+ case PARTITION_NONE:
+ case PARTITION_HORZ:
+ case PARTITION_VERT:
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+ break;
+ case PARTITION_HORZ_A:
+ update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
+ update_partition_context(xd, mi_row + hbs, mi_col, subsize, subsize);
+ break;
+ case PARTITION_HORZ_B:
+ update_partition_context(xd, mi_row, mi_col, subsize, subsize);
+ update_partition_context(xd, mi_row + hbs, mi_col, bsize2, subsize);
+ break;
+ case PARTITION_VERT_A:
+ update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
+ update_partition_context(xd, mi_row, mi_col + hbs, subsize, subsize);
+ break;
+ case PARTITION_VERT_B:
+ update_partition_context(xd, mi_row, mi_col, subsize, subsize);
+ update_partition_context(xd, mi_row, mi_col + hbs, bsize2, subsize);
+ break;
+ default:
+ assert(0 && "Invalid partition type");
+ }
+ }
+#else
// update partition context
if (bsize >= BLOCK_8X8 &&
(bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh);
+#endif // CONFIG_EXT_PARTITION_TYPES
}
static void setup_bool_decoder(const uint8_t *data,
@@ -2962,18 +3036,7 @@
assert(tile_rows <= 4);
assert(tile_cols <= (1 << 6));
- // Note: this memset assumes above_context[0], [1] and [2]
- // are allocated as part of the same buffer.
- memset(cm->above_context, 0,
- sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols);
-
- memset(cm->above_seg_context, 0,
- sizeof(*cm->above_seg_context) * aligned_cols);
-
-#if CONFIG_VAR_TX
- memset(cm->above_txfm_context, 0,
- sizeof(*cm->above_txfm_context) * aligned_cols);
-#endif
+ vp10_zero_above_context(cm, 0, aligned_cols);
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
@@ -3032,11 +3095,7 @@
tile_cols - tile_col - 1 : tile_col;
tile_data = pbi->tile_data + tile_cols * tile_row + col;
vp10_tile_set_col(&tile, tile_data->cm, col);
- vp10_zero(tile_data->xd.left_context);
- vp10_zero(tile_data->xd.left_seg_context);
-#if CONFIG_VAR_TX
- vp10_zero(tile_data->xd.left_txfm_context_buffer);
-#endif
+ vp10_zero_left_context(&tile_data->xd);
for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
mi_col += MI_BLOCK_SIZE) {
decode_partition(pbi, &tile_data->xd,
@@ -3126,11 +3185,7 @@
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
- vp10_zero(tile_data->xd.left_context);
- vp10_zero(tile_data->xd.left_seg_context);
-#if CONFIG_VAR_TX
- vp10_zero(tile_data->xd.left_txfm_context_buffer);
-#endif
+ vp10_zero_left_context(&tile_data->xd);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
decode_partition(tile_data->pbi, &tile_data->xd,
@@ -3211,16 +3266,8 @@
worker->data2 = &pbi->tile_worker_info[n];
}
- // Note: this memset assumes above_context[0], [1] and [2]
- // are allocated as part of the same buffer.
- memset(cm->above_context, 0,
- sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols);
- memset(cm->above_seg_context, 0,
- sizeof(*cm->above_seg_context) * aligned_mi_cols);
-#if CONFIG_VAR_TX
- memset(cm->above_txfm_context, 0,
- sizeof(*cm->above_txfm_context) * aligned_mi_cols);
-#endif
+ vp10_zero_above_context(cm, 0, aligned_mi_cols);
+
// Load tile data into tile_buffers
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
@@ -3720,9 +3767,17 @@
for (i = 0; i < INTRA_MODES - 1; ++i)
vp10_diff_update_prob(&r, &fc->uv_mode_prob[j][i]);
+#if CONFIG_EXT_PARTITION_TYPES
+ for (i = 0; i < PARTITION_TYPES - 1; ++i)
+ vp10_diff_update_prob(&r, &fc->partition_prob[0][i]);
+ for (j = 1; j < PARTITION_CONTEXTS; ++j)
+ for (i = 0; i < EXT_PARTITION_TYPES - 1; ++i)
+ vp10_diff_update_prob(&r, &fc->partition_prob[j][i]);
+#else
for (j = 0; j < PARTITION_CONTEXTS; ++j)
for (i = 0; i < PARTITION_TYPES - 1; ++i)
vp10_diff_update_prob(&r, &fc->partition_prob[j][i]);
+#endif // CONFIG_EXT_PARTITION_TYPES
#if CONFIG_EXT_INTRA
for (i = 0; i < INTRA_FILTERS + 1; ++i)
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index eb336be..5b2fa1f 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -155,33 +155,45 @@
uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
mbmi->ref_mv_idx = 0;
- if (xd->ref_mv_count[ref_frame_type] > 2) {
- uint8_t drl0_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], 1);
- vpx_prob drl0_prob = cm->fc->drl_prob0[drl0_ctx];
- if (vpx_read(r, drl0_prob)) {
- mbmi->ref_mv_idx = 1;
- if (xd->counts)
- ++xd->counts->drl_mode0[drl0_ctx][1];
- if (xd->ref_mv_count[ref_frame_type] > 3) {
- uint8_t drl1_ctx =
- vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], 2);
- vpx_prob drl1_prob = cm->fc->drl_prob1[drl1_ctx];
- if (vpx_read(r, drl1_prob)) {
- mbmi->ref_mv_idx = 2;
+ if (mbmi->mode == NEWMV) {
+ int idx;
+ for (idx = 0; idx < 2; ++idx) {
+ if (xd->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], idx);
+ vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx];
+ if (!vpx_read(r, drl_prob)) {
+ mbmi->ref_mv_idx = idx;
if (xd->counts)
- ++xd->counts->drl_mode1[drl1_ctx][1];
-
+ ++xd->counts->drl_mode[drl_ctx][0];
return;
}
-
+ mbmi->ref_mv_idx = idx + 1;
if (xd->counts)
- ++xd->counts->drl_mode1[drl1_ctx][0];
+ ++xd->counts->drl_mode[drl_ctx][1];
}
- return;
}
+ }
- if (xd->counts)
- ++xd->counts->drl_mode0[drl0_ctx][0];
+ if (mbmi->mode == NEARMV) {
+ int idx;
+ // Offset the NEARESTMV mode.
+ // TODO(jingning): Unify the two syntax decoding loops after the NEARESTMV
+ // mode is factored in.
+ for (idx = 1; idx < 3; ++idx) {
+ if (xd->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], idx);
+ vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx];
+ if (!vpx_read(r, drl_prob)) {
+ mbmi->ref_mv_idx = idx - 1;
+ if (xd->counts)
+ ++xd->counts->drl_mode[drl_ctx][0];
+ return;
+ }
+ mbmi->ref_mv_idx = idx;
+ if (xd->counts)
+ ++xd->counts->drl_mode[drl_ctx][1];
+ }
+ }
}
}
#endif
@@ -213,12 +225,15 @@
TX_SIZE tx_size, int blk_row, int blk_col,
vpx_reader *r) {
int is_split = 0;
- const int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+ const int tx_row = blk_row >> 1;
+ const int tx_col = blk_col >> 1;
int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
- int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1),
- xd->left_txfm_context + (blk_row >> 1),
+ int ctx = txfm_partition_context(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row,
tx_size);
+ TX_SIZE (*const inter_tx_size)[MI_BLOCK_SIZE] =
+ (TX_SIZE (*)[MI_BLOCK_SIZE])&mbmi->inter_tx_size[tx_row][tx_col];
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> 5;
@@ -239,10 +254,10 @@
++counts->txfm_partition[ctx][1];
if (tx_size == TX_8X8) {
- mbmi->inter_tx_size[tx_idx] = TX_4X4;
- mbmi->tx_size = mbmi->inter_tx_size[tx_idx];
- txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
- xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+ inter_tx_size[0][0] = TX_4X4;
+ mbmi->tx_size = TX_4X4;
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, TX_4X4);
return;
}
@@ -256,15 +271,15 @@
}
} else {
int idx, idy;
- mbmi->inter_tx_size[tx_idx] = tx_size;
+ inter_tx_size[0][0] = tx_size;
for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
- mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size;
- mbmi->tx_size = mbmi->inter_tx_size[tx_idx];
+ inter_tx_size[idy][idx] = tx_size;
+ mbmi->tx_size = tx_size;
if (counts)
++counts->txfm_partition[ctx][0];
- txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
- xd->left_txfm_context + (blk_row >> 1), tx_size);
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, tx_size);
}
}
#endif
@@ -1243,7 +1258,7 @@
#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
r, mode_ctx);
#if CONFIG_REF_MV
- if (mbmi->mode == NEARMV)
+ if (mbmi->mode == NEARMV || mbmi->mode == NEWMV)
read_drl_idx(cm, xd, mbmi, r);
#endif
}
@@ -1376,6 +1391,10 @@
#else
if (b_mode == NEARESTMV || b_mode == NEARMV) {
#endif // CONFIG_EXT_INTER
+#if CONFIG_REF_MV
+ CANDIDATE_MV ref_mv_stack[2][MAX_REF_MV_STACK_SIZE];
+ uint8_t ref_mv_count[2];
+#endif
for (ref = 0; ref < 1 + is_compound; ++ref)
#if CONFIG_EXT_INTER
{
@@ -1384,6 +1403,10 @@
mv_ref_list, j, mi_row, mi_col, NULL);
#endif // CONFIG_EXT_INTER
vp10_append_sub8x8_mvs_for_idx(cm, xd, j, ref, mi_row, mi_col,
+#if CONFIG_REF_MV
+ ref_mv_stack[ref],
+ &ref_mv_count[ref],
+#endif
#if CONFIG_EXT_INTER
mv_ref_list,
#endif // CONFIG_EXT_INTER
@@ -1436,6 +1459,22 @@
mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
} else {
+ int ref;
+ for (ref = 0; ref < 1 + is_compound && mbmi->mode == NEWMV; ++ref) {
+ int_mv ref_mv = nearestmv[ref];
+#if CONFIG_REF_MV
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+ if (xd->ref_mv_count[ref_frame_type] > 1) {
+ ref_mv = (ref == 0) ?
+ xd->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx].this_mv :
+ xd->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx].comp_mv;
+ clamp_mv_ref(&ref_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+ lower_mv_precision(&ref_mv.as_mv, allow_hp);
+ }
+#endif
+ nearestmv[ref] = ref_mv;
+ }
+
xd->corrupted |= !assign_mv(cm, xd, mbmi->mode,
#if CONFIG_REF_MV
0,
@@ -1565,7 +1604,7 @@
int idx, idy;
for (idy = 0; idy < height; ++idy)
for (idx = 0; idx < width; ++idx)
- mbmi->inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] = mbmi->tx_size;
+ mbmi->inter_tx_size[idy >> 1][idx >> 1] = mbmi->tx_size;
}
set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h);
@@ -1584,7 +1623,7 @@
xd->mi[0]->mbmi.tx_size = xd->supertx_size;
for (idy = 0; idy < height; ++idy)
for (idx = 0; idx < width; ++idx)
- xd->mi[0]->mbmi.inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] =
+ xd->mi[0]->mbmi.inter_tx_size[idy >> 1][idx >> 1] =
xd->supertx_size;
}
#endif // CONFIG_VAR_TX
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 1ef2ea5..7f58b52 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -28,6 +28,7 @@
#include "vp10/common/seg_common.h"
#include "vp10/common/tile_common.h"
+#include "vp10/encoder/buf_ans.h"
#include "vp10/encoder/cost.h"
#include "vp10/encoder/bitstream.h"
#include "vp10/encoder/encodemv.h"
@@ -49,6 +50,10 @@
static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
{{0, 1}, {2, 2}, {3, 2}};
#endif // CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+#if CONFIG_EXT_PARTITION_TYPES
+static const struct vp10_token ext_partition_encodings[EXT_PARTITION_TYPES] =
+ {{0, 1}, {4, 3}, {12, 4}, {7, 3}, {10, 4}, {11, 4}, {26, 5}, {27, 5}};
+#endif
static const struct vp10_token partition_encodings[PARTITION_TYPES] =
{{0, 1}, {2, 2}, {6, 3}, {7, 3}};
#if !CONFIG_REF_MV
@@ -193,18 +198,40 @@
const MB_MODE_INFO_EXT *mbmi_ext,
vpx_writer *w) {
uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
- if (mbmi_ext->ref_mv_count[ref_frame_type] > 2) {
- uint8_t drl0_ctx =
- vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
- vpx_prob drl0_prob = cm->fc->drl_prob0[drl0_ctx];
- vpx_write(w, mbmi->ref_mv_idx != 0, drl0_prob);
- if (mbmi_ext->ref_mv_count[ref_frame_type] > 3 &&
- mbmi->ref_mv_idx > 0) {
- uint8_t drl1_ctx =
- vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 2);
- vpx_prob drl1_prob = cm->fc->drl_prob1[drl1_ctx];
- vpx_write(w, mbmi->ref_mv_idx != 1, drl1_prob);
+
+ assert(mbmi->ref_mv_idx < 3);
+
+ if (mbmi->mode == NEWMV) {
+ int idx;
+ for (idx = 0; idx < 2; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx];
+
+ vpx_write(w, mbmi->ref_mv_idx != idx, drl_prob);
+ if (mbmi->ref_mv_idx == idx)
+ return;
+ }
}
+ return;
+ }
+
+ if (mbmi->mode == NEARMV) {
+ int idx;
+ // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
+ for (idx = 1; idx < 3; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx];
+
+ vpx_write(w, mbmi->ref_mv_idx != (idx - 1), drl_prob);
+ if (mbmi->ref_mv_idx == (idx - 1))
+ return;
+ }
+ }
+ return;
}
}
#endif
@@ -266,11 +293,12 @@
const MB_MODE_INFO *mbmi,
TX_SIZE tx_size, int blk_row, int blk_col,
vpx_writer *w) {
- const int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+ const int tx_row = blk_row >> 1;
+ const int tx_col = blk_col >> 1;
int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
- int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1),
- xd->left_txfm_context + (blk_row >> 1),
+ int ctx = txfm_partition_context(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row,
tx_size);
if (xd->mb_to_bottom_edge < 0)
@@ -281,10 +309,10 @@
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
return;
- if (tx_size == mbmi->inter_tx_size[tx_idx]) {
+ if (tx_size == mbmi->inter_tx_size[tx_row][tx_col]) {
vpx_write(w, 0, cm->fc->txfm_partition_prob[ctx]);
- txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
- xd->left_txfm_context + (blk_row >> 1), tx_size);
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, tx_size);
} else {
const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bsl = b_width_log2_lookup[bsize];
@@ -292,8 +320,8 @@
vpx_write(w, 1, cm->fc->txfm_partition_prob[ctx]);
if (tx_size == TX_8X8) {
- txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
- xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, TX_4X4);
return;
}
@@ -343,11 +371,8 @@
vp10_cond_prob_diff_update(w, &cm->fc->refmv_prob[i],
counts->refmv_mode[i]);
for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- vp10_cond_prob_diff_update(w, &cm->fc->drl_prob0[i],
- counts->drl_mode0[i]);
- for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- vp10_cond_prob_diff_update(w, &cm->fc->drl_prob1[i],
- counts->drl_mode1[i]);
+ vp10_cond_prob_diff_update(w, &cm->fc->drl_prob[i],
+ counts->drl_mode[i]);
#if CONFIG_EXT_INTER
vp10_cond_prob_diff_update(w, &cm->fc->new2mv_prob, counts->new2mv_mode);
#endif // CONFIG_EXT_INTER
@@ -634,65 +659,85 @@
*tp = p;
}
#else
-// This function serializes the tokens backwards both in token order and
-// bit order in each token.
-static void pack_mb_tokens_ans(struct AnsCoder *const ans,
- rans_dec_lut token_tab[COEFF_PROB_MODELS],
- const TOKENEXTRA *const start,
+// This function serializes the tokens in forward order using a buffered ans
+// coder.
+static void pack_mb_tokens_ans(struct BufAnsCoder *ans,
+ const rans_dec_lut token_tab[COEFF_PROB_MODELS],
+ TOKENEXTRA **tp,
const TOKENEXTRA *const stop,
- vpx_bit_depth_t bit_depth) {
- const TOKENEXTRA *p;
- TX_SIZE tx_size = TX_SIZES;
+ vpx_bit_depth_t bit_depth,
+ const TX_SIZE tx) {
+ TOKENEXTRA *p = *tp;
+#if CONFIG_VAR_TX
+ int count = 0;
+ const int seg_eob = 16 << (tx << 1);
+#endif // CONFIG_VAR_TX
- for (p = stop - 1; p >= start; --p) {
+ while (p < stop && p->token != EOSB_TOKEN) {
const int t = p->token;
- if (t == EOSB_TOKEN) {
- tx_size = (TX_SIZE)p->extra;
- } else {
#if CONFIG_VP9_HIGHBITDEPTH
- const vp10_extra_bit *const b =
- (bit_depth == VPX_BITS_12) ? &vp10_extra_bits_high12[t] :
- (bit_depth == VPX_BITS_10) ? &vp10_extra_bits_high10[t] :
- &vp10_extra_bits[t];
+ const vp10_extra_bit *b;
+ if (bit_depth == VPX_BITS_12)
+ b = &vp10_extra_bits_high12[t];
+ else if (bit_depth == VPX_BITS_10)
+ b = &vp10_extra_bits_high10[t];
+ else
+ b = &vp10_extra_bits[t];
#else
const vp10_extra_bit *const b = &vp10_extra_bits[t];
- (void) bit_depth;
+ (void)bit_depth;
#endif // CONFIG_VP9_HIGHBITDEPTH
- if (t != EOB_TOKEN && t != ZERO_TOKEN) {
- // Write extra bits first
- const int e = p->extra;
- const int l = b->len;
- const int skip_bits = (t == CATEGORY6_TOKEN) ? TX_SIZES - 1 - tx_size : 0;
- assert(tx_size < TX_SIZES);
- uabs_write(ans, e & 1, 128);
- if (l) {
- const int v = e >> 1;
- int n;
- for (n = 0; n < l - skip_bits; ++n) {
- const int bb = (v >> n) & 1;
- uabs_write(ans, bb, b->prob[l - 1 - n]);
- }
- for (; n < l; ++n) {
- assert(((v >> n) & 1) == 0);
- }
- }
+ /* skip one or two nodes */
+ if (!p->skip_eob_node)
+ buf_uabs_write(ans, t != EOB_TOKEN, p->context_tree[0]);
- {
+ if (t != EOB_TOKEN) {
+ buf_uabs_write(ans, t != ZERO_TOKEN, p->context_tree[1]);
+
+ if (t != ZERO_TOKEN) {
struct rans_sym s;
const rans_dec_lut *token_cdf =
&token_tab[p->context_tree[PIVOT_NODE] - 1];
s.cum_prob = (*token_cdf)[t - ONE_TOKEN];
s.prob = (*token_cdf)[t - ONE_TOKEN + 1] - s.cum_prob;
- rans_write(ans, &s);
+ buf_rans_write(ans, &s);
}
}
- if (t != EOB_TOKEN)
- uabs_write(ans, t != ZERO_TOKEN, p->context_tree[1]);
- if (!p->skip_eob_node)
- uabs_write(ans, t != EOB_TOKEN, p->context_tree[0]);
+
+ if (b->base_val) {
+ const int e = p->extra, l = b->len;
+ int skip_bits = (b->base_val == CAT6_MIN_VAL) ? TX_SIZES - 1 - tx : 0;
+
+ if (l) {
+ const unsigned char *pb = b->prob;
+ int v = e >> 1;
+ int n = l; /* number of bits in v, assumed nonzero */
+ int i = 0;
+
+ do {
+ const int bb = (v >> --n) & 1;
+ if (skip_bits) {
+ skip_bits--;
+ assert(!bb);
+ } else {
+ buf_uabs_write(ans, bb, pb[i >> 1]);
+ }
+ i = b->tree[i + bb];
+ } while (n);
+ }
+
+ buf_uabs_write(ans, e & 1, 128);
+ }
+ ++p;
+
+#if CONFIG_VAR_TX
+ ++count;
+ if (t == EOB_TOKEN || count == seg_eob) break;
+#endif // CONFIG_VAR_TX
}
- }
+
+ *tp = p;
}
#endif // !CONFIG_ANS
@@ -706,11 +751,11 @@
int blk_row, int blk_col, TX_SIZE tx_size) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
- int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
- (blk_col >> (1 - pd->subsampling_x));
- TX_SIZE plane_tx_size = plane ?
- get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, 0, 0) :
- mbmi->inter_tx_size[tx_idx];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ const TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0) :
+ mbmi->inter_tx_size[tx_row][tx_col];
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
@@ -1088,7 +1133,7 @@
mode_ctx);
#if CONFIG_REF_MV
- if (mode == NEARMV)
+ if (mode == NEARMV || mode == NEWMV)
write_drl_idx(cm, mbmi, mbmi_ext, w);
#endif
}
@@ -1175,13 +1220,15 @@
#else
if (mode == NEWMV) {
#endif // CONFIG_EXT_INTER
+ int_mv ref_mv;
for (ref = 0; ref < 1 + is_compound; ++ref) {
#if CONFIG_REF_MV
- int nmv_ctx =
- vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[ref]],
- mbmi_ext->ref_mv_stack[mbmi->ref_frame[ref]]);
- const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+ int nmv_ctx =
+ vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[ref]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[ref]]);
+ const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
#endif
+ ref_mv = mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0];
#if CONFIG_EXT_INTER
if (mode == NEWFROMNEARMV)
vp10_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
@@ -1190,8 +1237,8 @@
else
#endif // CONFIG_EXT_INTER
vp10_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
- &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, nmvc,
- allow_hp);
+ &ref_mv.as_mv, nmvc,
+ allow_hp);
}
#if CONFIG_EXT_INTER
} else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
@@ -1408,9 +1455,31 @@
#endif // CONFIG_EXT_INTRA
}
+#if CONFIG_ANS && CONFIG_SUPERTX
+#define write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, \
+ supertx_enabled, mi_row, mi_col) \
+ write_modes_b(cpi, tile, w, ans, tok, tok_end, supertx_enabled, mi_row, \
+ mi_col)
+#elif CONFIG_SUPERTX
+#define write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, \
+ supertx_enabled, mi_row, mi_col) \
+ write_modes_b(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col)
+#elif CONFIG_ANS
+#define write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, \
+ supertx_enabled, mi_row, mi_col) \
+ write_modes_b(cpi, tile, w, ans, tok, tok_end, mi_row, mi_col)
+#else
+#define write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, \
+ supertx_enabled, mi_row, mi_col) \
+ write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col)
+#endif // CONFIG_ANS && CONFIG_SUPERTX
+
static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile,
- vpx_writer *w, TOKENEXTRA **tok,
- const TOKENEXTRA *const tok_end,
+ vpx_writer *w,
+#if CONFIG_ANS
+ struct BufAnsCoder *ans,
+#endif // CONFIG_ANS
+ TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
#if CONFIG_SUPERTX
int supertx_enabled,
#endif
@@ -1457,7 +1526,7 @@
assert(*tok < tok_end);
pack_palette_tokens(w, tok, m->mbmi.palette_mode_info.palette_size[plane],
rows * cols - 1);
- assert(*tok < tok_end);
+ assert(*tok < tok_end + m->mbmi.skip);
}
}
@@ -1465,7 +1534,6 @@
if (supertx_enabled) return;
#endif // CONFIG_SUPERTX
-#if !CONFIG_ANS
if (!m->mbmi.skip) {
assert(*tok < tok_end);
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
@@ -1501,18 +1569,26 @@
for (row = 0; row < num_4x4_h; row += bw)
for (col = 0; col < num_4x4_w; col += bw)
+#if CONFIG_ANS
+ pack_mb_tokens_ans(ans, cm->token_tab, tok, tok_end, cm->bit_depth,
+ tx);
+#else
pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+#endif // CONFIG_ANS
}
#else
TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane])
: m->mbmi.tx_size;
+#if CONFIG_ANS
+ pack_mb_tokens_ans(ans, cm->token_tab, tok, tok_end, cm->bit_depth, tx);
+#else
pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+#endif // CONFIG_ANS
#endif // CONFIG_VAR_TX
assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
(*tok)++;
}
}
-#endif
}
static void write_partition(const VP10_COMMON *const cm,
@@ -1525,7 +1601,15 @@
const int has_cols = (mi_col + hbs) < cm->mi_cols;
if (has_rows && has_cols) {
+#if CONFIG_EXT_PARTITION_TYPES
+ if (bsize <= BLOCK_8X8)
+ vp10_write_token(w, vp10_partition_tree, probs, &partition_encodings[p]);
+ else
+ vp10_write_token(w, vp10_ext_partition_tree, probs,
+ &ext_partition_encodings[p]);
+#else
vp10_write_token(w, vp10_partition_tree, probs, &partition_encodings[p]);
+#endif // CONFIG_EXT_PARTITION_TYPES
} else if (!has_rows && has_cols) {
assert(p == PARTITION_SPLIT || p == PARTITION_HORZ);
vpx_write(w, p == PARTITION_SPLIT, probs[1]);
@@ -1537,8 +1621,31 @@
}
}
-static void write_modes_sb(VP10_COMP *cpi,
- const TileInfo *const tile, vpx_writer *w,
+#if CONFIG_ANS && CONFIG_SUPERTX
+#define write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, \
+ supertx_enabled, mi_row, mi_col, bsize) \
+ write_modes_sb(cpi, tile, w, ans, tok, tok_end, supertx_enabled, mi_row, \
+ mi_col, bsize)
+#elif CONFIG_SUPERTX
+#define write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, \
+ supertx_enabled, mi_row, mi_col, bsize) \
+ write_modes_sb(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col, \
+ bsize)
+#elif CONFIG_ANS
+#define write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, \
+ supertx_enabled, mi_row, mi_col, bsize) \
+ write_modes_sb(cpi, tile, w, ans, tok, tok_end, mi_row, mi_col, bsize)
+#else
+#define write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, \
+ supertx_enabled, mi_row, mi_col, bsize) \
+ write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, bsize)
+#endif // CONFIG_ANS && CONFIG_SUPERTX
+
+static void write_modes_sb(VP10_COMP *cpi, const TileInfo *const tile,
+ vpx_writer *w,
+#if CONFIG_ANS
+ struct BufAnsCoder *ans,
+#endif // CONFIG_ANS
TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
#if CONFIG_SUPERTX
int supertx_enabled,
@@ -1564,6 +1671,10 @@
m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col];
partition = partition_lookup[bsl][m->mbmi.sb_type];
+#if CONFIG_EXT_PARTITION_TYPES
+ partition = get_partition(cm->mi, cm->mi_stride, cm->mi_rows, cm->mi_cols,
+ mi_row, mi_col, bsize);
+#endif
write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w);
subsize = get_subsize(bsize, partition);
#if CONFIG_SUPERTX
@@ -1607,68 +1718,108 @@
}
#endif // CONFIG_SUPERTX
if (subsize < BLOCK_8X8) {
- write_modes_b(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
- supertx_enabled,
-#endif // CONFIG_SUPERTX
- mi_row, mi_col);
+ write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled,
+ mi_row, mi_col);
} else {
switch (partition) {
case PARTITION_NONE:
- write_modes_b(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
- supertx_enabled,
-#endif // CONFIG_SUPERTX
- mi_row, mi_col);
+ write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled,
+ mi_row, mi_col);
break;
case PARTITION_HORZ:
- write_modes_b(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
- supertx_enabled,
-#endif // CONFIG_SUPERTX
- mi_row, mi_col);
+ write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled,
+ mi_row, mi_col);
if (mi_row + bs < cm->mi_rows)
- write_modes_b(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
- supertx_enabled,
-#endif // CONFIG_SUPERTX
- mi_row + bs, mi_col);
+ write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end,
+ supertx_enabled, mi_row + bs, mi_col);
break;
case PARTITION_VERT:
+ write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled,
+ mi_row, mi_col);
+ if (mi_col + bs < cm->mi_cols)
+ write_modes_b_wrapper(cpi, tile, w, ans, tok, tok_end,
+ supertx_enabled, mi_row, mi_col + bs);
+ break;
+ case PARTITION_SPLIT:
+ write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled,
+ mi_row, mi_col, subsize);
+ write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled,
+ mi_row, mi_col + bs, subsize);
+ write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled,
+ mi_row + bs, mi_col, subsize);
+ write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, supertx_enabled,
+ mi_row + bs, mi_col + bs, subsize);
+ break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
write_modes_b(cpi, tile, w, tok, tok_end,
#if CONFIG_SUPERTX
supertx_enabled,
-#endif // CONFIG_SUPERTX
+#endif
mi_row, mi_col);
- if (mi_col + bs < cm->mi_cols)
- write_modes_b(cpi, tile, w, tok, tok_end,
+ write_modes_b(cpi, tile, w, tok, tok_end,
#if CONFIG_SUPERTX
- supertx_enabled,
-#endif // CONFIG_SUPERTX
- mi_row, mi_col + bs);
+ supertx_enabled,
+#endif
+ mi_row, mi_col + bs);
+ write_modes_b(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row + bs, mi_col);
break;
- case PARTITION_SPLIT:
- write_modes_sb(cpi, tile, w, tok, tok_end,
+ case PARTITION_HORZ_B:
+ write_modes_b(cpi, tile, w, tok, tok_end,
#if CONFIG_SUPERTX
- supertx_enabled,
-#endif // CONFIG_SUPERTX
- mi_row, mi_col, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end,
+ supertx_enabled,
+#endif
+ mi_row, mi_col);
+ write_modes_b(cpi, tile, w, tok, tok_end,
#if CONFIG_SUPERTX
- supertx_enabled,
-#endif // CONFIG_SUPERTX
- mi_row, mi_col + bs, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end,
+ supertx_enabled,
+#endif
+ mi_row + bs, mi_col);
+ write_modes_b(cpi, tile, w, tok, tok_end,
#if CONFIG_SUPERTX
- supertx_enabled,
-#endif // CONFIG_SUPERTX
- mi_row + bs, mi_col, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
- supertx_enabled,
-#endif // CONFIG_SUPERTX
- mi_row + bs, mi_col + bs, subsize);
+ supertx_enabled,
+#endif
+ mi_row + bs, mi_col + bs);
break;
+ case PARTITION_VERT_A:
+ write_modes_b(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col);
+ write_modes_b(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row + bs, mi_col);
+ write_modes_b(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col + bs);
+ break;
+ case PARTITION_VERT_B:
+ write_modes_b(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col);
+ write_modes_b(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col + bs);
+ write_modes_b(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row + bs, mi_col + bs);
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
default:
assert(0);
}
@@ -1689,7 +1840,12 @@
for (row = 0; row < num_4x4_h; row += bw)
for (col = 0; col < num_4x4_w; col += bw)
+#if CONFIG_ANS
+ pack_mb_tokens_ans(ans, cm->token_tab, tok, tok_end, cm->bit_depth,
+ tx);
+#else
pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+#endif
assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
(*tok)++;
}
@@ -1697,30 +1853,31 @@
#endif // CONFIG_SUPERTX
// update partition context
+#if CONFIG_EXT_PARTITION_TYPES
+ update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
+#else
if (bsize >= BLOCK_8X8 &&
(bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+#endif // CONFIG_EXT_PARTITION_TYPES
}
-static void write_modes(VP10_COMP *cpi,
- const TileInfo *const tile, vpx_writer *w,
+static void write_modes(VP10_COMP *cpi, const TileInfo *const tile,
+ vpx_writer *w,
+#if CONFIG_ANS
+ struct BufAnsCoder *ans,
+#endif // CONFIG_ANS
TOKENEXTRA **tok, const TOKENEXTRA *const tok_end) {
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
int mi_row, mi_col;
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
- vp10_zero(xd->left_seg_context);
-#if CONFIG_VAR_TX
- vp10_zero(xd->left_txfm_context_buffer);
-#endif
+ vp10_zero_left_context(xd);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE)
- write_modes_sb(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
- 0,
-#endif
- mi_row, mi_col, BLOCK_64X64);
+ write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, 0, mi_row, mi_col,
+ BLOCK_64X64);
}
}
@@ -2182,20 +2339,20 @@
vpx_writer mode_bc;
#if CONFIG_ANS
struct AnsCoder token_ans;
-#endif
+ struct BufAnsCoder buffered_ans;
+#endif // CONFIG_ANS
int tile_row, tile_col;
TOKENEXTRA *tok_end;
size_t total_size = 0;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
unsigned int max_tile = 0;
+ const int ans_window_size = get_token_alloc(cm->mb_rows, cm->mb_cols) * 3;
+ struct buffered_ans_symbol *uco_ans_buf =
+ malloc(ans_window_size * sizeof(*uco_ans_buf));
+ assert(uco_ans_buf);
- memset(cm->above_seg_context, 0,
- sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols));
-#if CONFIG_VAR_TX
- memset(cm->above_txfm_context, 0,
- sizeof(*cm->above_txfm_context) * mi_cols_aligned_to_sb(cm->mi_cols));
-#endif
+ vp10_zero_above_context(cm, 0, mi_cols_aligned_to_sb(cm->mi_cols));
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
@@ -2213,8 +2370,8 @@
#if !CONFIG_ANS
(void) token_section_size;
- write_modes(cpi, &cpi->tile_data[tile_idx].tile_info,
- &mode_bc, &tok, tok_end);
+ write_modes(cpi, &cpi->tile_data[tile_idx].tile_info, &mode_bc, &tok,
+ tok_end);
assert(tok == tok_end);
vpx_stop_encode(&mode_bc);
if (put_tile_size) {
@@ -2229,12 +2386,13 @@
}
total_size += mode_bc.pos;
#else
+ buf_ans_write_init(&buffered_ans, uco_ans_buf, ans_window_size);
write_modes(cpi, &cpi->tile_data[tile_idx].tile_info, &mode_bc,
- NULL, NULL);
+ &buffered_ans, &tok, tok_end);
+ assert(tok == tok_end);
vpx_stop_encode(&mode_bc);
ans_write_init(&token_ans, mode_data_start + mode_bc.pos);
- pack_mb_tokens_ans(&token_ans, cm->token_tab, tok, tok_end,
- cm->bit_depth);
+ buf_ans_flush(&buffered_ans, &token_ans);
token_section_size = ans_write_end(&token_ans);
if (put_tile_size) {
// size of this tile
@@ -2248,6 +2406,9 @@
}
*max_tile_sz = max_tile;
+#if CONFIG_ANS
+ free(uco_ans_buf);
+#endif // CONFIG_ANS
return total_size;
}
@@ -2467,9 +2628,18 @@
prob_diff_update(vp10_intra_mode_tree, fc->uv_mode_prob[i],
counts->uv_mode[i], INTRA_MODES, &header_bc);
+#if CONFIG_EXT_PARTITION_TYPES
+ prob_diff_update(vp10_partition_tree, fc->partition_prob[0],
+ counts->partition[0], PARTITION_TYPES, &header_bc);
+ for (i = 1; i < PARTITION_CONTEXTS; ++i)
+ prob_diff_update(vp10_ext_partition_tree, fc->partition_prob[i],
+ counts->partition[i], EXT_PARTITION_TYPES,
+ &header_bc);
+#else
for (i = 0; i < PARTITION_CONTEXTS; ++i)
prob_diff_update(vp10_partition_tree, fc->partition_prob[i],
counts->partition[i], PARTITION_TYPES, &header_bc);
+#endif // CONFIG_EXT_PARTITION_TYPES
#if CONFIG_EXT_INTRA
for (i = 0; i < INTRA_FILTERS + 1; ++i)
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h
index ce650b1..295213f 100644
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@@ -164,12 +164,12 @@
int quant_fp;
// skip forward transform and quantization
- uint8_t skip_txfm[MAX_MB_PLANE << 2];
+ uint8_t skip_txfm[MAX_MB_PLANE][4];
#define SKIP_TXFM_NONE 0
#define SKIP_TXFM_AC_DC 1
#define SKIP_TXFM_AC_ONLY 2
- int64_t bsse[MAX_MB_PLANE << 2];
+ int64_t bsse[MAX_MB_PLANE][4];
// Used to store sub partition's choices.
MV pred_mv[MAX_REF_FRAMES];
diff --git a/vp10/encoder/buf_ans.h b/vp10/encoder/buf_ans.h
new file mode 100644
index 0000000..ae76873
--- /dev/null
+++ b/vp10/encoder/buf_ans.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_BUF_ANS_H_
+#define VP10_ENCODER_BUF_ANS_H_
+// Buffered forward ANS writer.
+// Symbols are written to the writer in forward (decode) order and serialzed
+// backwards due to ANS's stack like behavior.
+
+#include <assert.h>
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem_ops.h"
+#include "vp10/common/ans.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#define ANS_METHOD_UABS 0
+#define ANS_METHOD_RANS 1
+
+struct buffered_ans_symbol {
+ uint8_t method; // one of ANS_METHOD_UABS or ANS_METHOD_RANS
+ // TODO(aconverse): Should be possible to write this interms of start for ABS
+ AnsP8 val_start; // Boolean value for ABS, start in symbol cycle for Rans
+ AnsP8 prob; // Probability of this symbol
+};
+
+struct BufAnsCoder {
+ struct buffered_ans_symbol *buf;
+ int size;
+ int offset;
+};
+
+static INLINE void buf_ans_write_init(struct BufAnsCoder *const c,
+ struct buffered_ans_symbol *sym_arr,
+ int size) {
+ c->buf = sym_arr;
+ c->size = size;
+ c->offset = 0;
+}
+
+static INLINE void buf_uabs_write(struct BufAnsCoder *const c,
+ uint8_t val, AnsP8 prob) {
+ assert(c->offset < c->size);
+ c->buf[c->offset].method = ANS_METHOD_UABS;
+ c->buf[c->offset].val_start = val;
+ c->buf[c->offset].prob = prob;
+ ++c->offset;
+}
+
+static INLINE void buf_rans_write(struct BufAnsCoder *const c,
+ const struct rans_sym *const sym) {
+ assert(c->offset < c->size);
+ c->buf[c->offset].method = ANS_METHOD_RANS;
+ c->buf[c->offset].val_start = sym->cum_prob;
+ c->buf[c->offset].prob = sym->prob;
+ ++c->offset;
+}
+
+static INLINE void buf_ans_flush(const struct BufAnsCoder *const c,
+ struct AnsCoder *ans) {
+ int offset;
+ for (offset = c->offset - 1; offset >= 0; --offset) {
+ if (c->buf[offset].method == ANS_METHOD_RANS) {
+ struct rans_sym sym;
+ sym.prob = c->buf[offset].prob;
+ sym.cum_prob = c->buf[offset].val_start;
+ rans_write(ans, &sym);
+ } else {
+ uabs_write(ans, c->buf[offset].val_start, c->buf[offset].prob);
+ }
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+#endif // VP10_ENCODER_BUF_ANS_H_
diff --git a/vp10/encoder/context_tree.c b/vp10/encoder/context_tree.c
index 3cd23ec..0a76195 100644
--- a/vp10/encoder/context_tree.c
+++ b/vp10/encoder/context_tree.c
@@ -19,11 +19,17 @@
};
static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition,
+#endif
PICK_MODE_CONTEXT *ctx) {
const int num_blk = (num_4x4_blk < 4 ? 4 : num_4x4_blk);
const int num_pix = num_blk << 4;
int i, k;
ctx->num_4x4_blk = num_blk;
+#if CONFIG_EXT_PARTITION_TYPES
+ ctx->partition = partition;
+#endif
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_blk, sizeof(uint8_t)));
@@ -78,6 +84,46 @@
static void alloc_tree_contexts(VP10_COMMON *cm, PC_TREE *tree,
int num_4x4_blk) {
+#if CONFIG_EXT_PARTITION_TYPES
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_NONE, &tree->none);
+ alloc_mode_context(cm, num_4x4_blk/2, PARTITION_HORZ, &tree->horizontal[0]);
+ alloc_mode_context(cm, num_4x4_blk/2, PARTITION_VERT, &tree->vertical[0]);
+ alloc_mode_context(cm, num_4x4_blk/2, PARTITION_VERT, &tree->horizontal[1]);
+ alloc_mode_context(cm, num_4x4_blk/2, PARTITION_VERT, &tree->vertical[1]);
+
+ alloc_mode_context(cm, num_4x4_blk/4, PARTITION_HORZ_A,
+ &tree->horizontala[0]);
+ alloc_mode_context(cm, num_4x4_blk/4, PARTITION_HORZ_A,
+ &tree->horizontala[1]);
+ alloc_mode_context(cm, num_4x4_blk/2, PARTITION_HORZ_A,
+ &tree->horizontala[2]);
+ alloc_mode_context(cm, num_4x4_blk/2, PARTITION_HORZ_B,
+ &tree->horizontalb[0]);
+ alloc_mode_context(cm, num_4x4_blk/4, PARTITION_HORZ_B,
+ &tree->horizontalb[1]);
+ alloc_mode_context(cm, num_4x4_blk/4, PARTITION_HORZ_B,
+ &tree->horizontalb[2]);
+ alloc_mode_context(cm, num_4x4_blk/4, PARTITION_VERT_A, &tree->verticala[0]);
+ alloc_mode_context(cm, num_4x4_blk/4, PARTITION_VERT_A, &tree->verticala[1]);
+ alloc_mode_context(cm, num_4x4_blk/2, PARTITION_VERT_A, &tree->verticala[2]);
+ alloc_mode_context(cm, num_4x4_blk/2, PARTITION_VERT_B, &tree->verticalb[0]);
+ alloc_mode_context(cm, num_4x4_blk/4, PARTITION_VERT_B, &tree->verticalb[1]);
+ alloc_mode_context(cm, num_4x4_blk/4, PARTITION_VERT_B, &tree->verticalb[2]);
+#ifdef CONFIG_SUPERTX
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_HORZ,
+ &tree->horizontal_supertx);
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_VERT, &tree->vertical_supertx);
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_SPLIT, &tree->split_supertx);
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_HORZ_A,
+ &tree->horizontala_supertx);
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_HORZ_B,
+ &tree->horizontalb_supertx);
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_VERT_A,
+ &tree->verticala_supertx);
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_VERT_B,
+ &tree->verticalb_supertx);
+#endif // CONFIG_SUPERTX
+#else
alloc_mode_context(cm, num_4x4_blk, &tree->none);
alloc_mode_context(cm, num_4x4_blk/2, &tree->horizontal[0]);
alloc_mode_context(cm, num_4x4_blk/2, &tree->vertical[0]);
@@ -94,9 +140,19 @@
memset(&tree->horizontal[1], 0, sizeof(tree->horizontal[1]));
memset(&tree->vertical[1], 0, sizeof(tree->vertical[1]));
}
+#endif // CONFIG_EXT_PARTITION_TYPES
}
static void free_tree_contexts(PC_TREE *tree) {
+#if CONFIG_EXT_PARTITION_TYPES
+ int i;
+ for (i = 0; i < 3; i++) {
+ free_mode_context(&tree->horizontala[i]);
+ free_mode_context(&tree->horizontalb[i]);
+ free_mode_context(&tree->verticala[i]);
+ free_mode_context(&tree->verticalb[i]);
+ }
+#endif // CONFIG_EXT_PARTITION_TYPES
free_mode_context(&tree->none);
free_mode_context(&tree->horizontal[0]);
free_mode_context(&tree->horizontal[1]);
@@ -106,7 +162,13 @@
free_mode_context(&tree->horizontal_supertx);
free_mode_context(&tree->vertical_supertx);
free_mode_context(&tree->split_supertx);
-#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ free_mode_context(&tree->horizontala_supertx);
+ free_mode_context(&tree->horizontalb_supertx);
+ free_mode_context(&tree->verticala_supertx);
+ free_mode_context(&tree->verticalb_supertx);
+#endif // CONFIG_EXT_PARTITION_TYPES
+#endif // CONFIG_SUPERTX
}
// This function sets up a tree of contexts such that at each square
@@ -135,8 +197,13 @@
// 4x4 blocks smaller than 8x8 but in the same 8x8 block share the same
// context so we only need to allocate 1 for each 8x8 block.
- for (i = 0; i < leaf_nodes; ++i)
+ for (i = 0; i < leaf_nodes; ++i) {
+#if CONFIG_EXT_PARTITION_TYPES
+ alloc_mode_context(cm, 1, PARTITION_NONE, &td->leaf_tree[i]);
+#else
alloc_mode_context(cm, 1, &td->leaf_tree[i]);
+#endif
+ }
// Sets up all the leaf nodes in the tree.
for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) {
diff --git a/vp10/encoder/context_tree.h b/vp10/encoder/context_tree.h
index 4fa5806..de17e3e 100644
--- a/vp10/encoder/context_tree.h
+++ b/vp10/encoder/context_tree.h
@@ -54,7 +54,6 @@
int hybrid_pred_diff;
int comp_pred_diff;
int single_pred_diff;
- int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
// TODO(jingning) Use RD_COST struct here instead. This involves a boarder
// scope of refactoring.
@@ -74,6 +73,9 @@
// search loop
MV pred_mv[MAX_REF_FRAMES];
INTERP_FILTER pred_interp_filter;
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition;
+#endif
} PICK_MODE_CONTEXT;
typedef struct PC_TREE {
@@ -83,6 +85,12 @@
PICK_MODE_CONTEXT none;
PICK_MODE_CONTEXT horizontal[2];
PICK_MODE_CONTEXT vertical[2];
+#if CONFIG_EXT_PARTITION_TYPES
+ PICK_MODE_CONTEXT horizontala[3];
+ PICK_MODE_CONTEXT horizontalb[3];
+ PICK_MODE_CONTEXT verticala[3];
+ PICK_MODE_CONTEXT verticalb[3];
+#endif
union {
struct PC_TREE *split[4];
PICK_MODE_CONTEXT *leaf_split[4];
@@ -91,6 +99,12 @@
PICK_MODE_CONTEXT horizontal_supertx;
PICK_MODE_CONTEXT vertical_supertx;
PICK_MODE_CONTEXT split_supertx;
+#if CONFIG_EXT_PARTITION_TYPES
+ PICK_MODE_CONTEXT horizontala_supertx;
+ PICK_MODE_CONTEXT horizontalb_supertx;
+ PICK_MODE_CONTEXT verticala_supertx;
+ PICK_MODE_CONTEXT verticalb_supertx;
+#endif
#endif
} PC_TREE;
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 8a1ee20..11d4a8e 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -36,219 +36,6 @@
#endif
}
-#if CONFIG_EXT_TX
-void fdst4(const tran_low_t *input, tran_low_t *output) {
- tran_high_t step[4];
- tran_high_t temp1, temp2;
-
- step[0] = input[0] - input[3];
- step[1] = -input[1] + input[2];
- step[2] = -input[1] - input[2];
- step[3] = input[0] + input[3];
-
- temp1 = (step[0] + step[1]) * cospi_16_64;
- temp2 = (step[0] - step[1]) * cospi_16_64;
- output[3] = fdct_round_shift(temp1);
- output[1] = fdct_round_shift(temp2);
- temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
- temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
- output[2] = fdct_round_shift(temp1);
- output[0] = fdct_round_shift(temp2);
-}
-
-void fdst8(const tran_low_t *input, tran_low_t *output) {
- tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
- tran_high_t t0, t1, t2, t3; // needs32
- tran_high_t x0, x1, x2, x3; // canbe16
-
- // stage 1
- s0 = input[0] - input[7];
- s1 = -input[1] + input[6];
- s2 = input[2] - input[5];
- s3 = -input[3] + input[4];
- s4 = -input[3] - input[4];
- s5 = input[2] + input[5];
- s6 = -input[1] - input[6];
- s7 = input[0] + input[7];
-
- x0 = s0 + s3;
- x1 = s1 + s2;
- x2 = s1 - s2;
- x3 = s0 - s3;
- t0 = (x0 + x1) * cospi_16_64;
- t1 = (x0 - x1) * cospi_16_64;
- t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
- t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
- output[7] = fdct_round_shift(t0);
- output[5] = fdct_round_shift(t2);
- output[3] = fdct_round_shift(t1);
- output[1] = fdct_round_shift(t3);
-
- // Stage 2
- t0 = (s6 - s5) * cospi_16_64;
- t1 = (s6 + s5) * cospi_16_64;
- t2 = fdct_round_shift(t0);
- t3 = fdct_round_shift(t1);
-
- // Stage 3
- x0 = s4 + t2;
- x1 = s4 - t2;
- x2 = s7 - t3;
- x3 = s7 + t3;
-
- // Stage 4
- t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
- t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
- t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
- t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
- output[6] = fdct_round_shift(t0);
- output[4] = fdct_round_shift(t2);
- output[2] = fdct_round_shift(t1);
- output[0] = fdct_round_shift(t3);
-}
-
-void fdst16(const tran_low_t *input, tran_low_t *output) {
- tran_high_t step1[8]; // canbe16
- tran_high_t step2[8]; // canbe16
- tran_high_t step3[8]; // canbe16
- tran_high_t in[8]; // canbe16
- tran_high_t temp1, temp2; // needs32
-
- // step 1
- in[0] = input[0] - input[15];
- in[1] = -input[1] + input[14];
- in[2] = input[2] - input[13];
- in[3] = -input[3] + input[12];
- in[4] = input[4] - input[11];
- in[5] = -input[5] + input[10];
- in[6] = input[6] - input[ 9];
- in[7] = -input[7] + input[ 8];
-
- step1[0] = -input[7] - input[ 8];
- step1[1] = input[6] + input[ 9];
- step1[2] = -input[5] - input[10];
- step1[3] = input[4] + input[11];
- step1[4] = -input[3] - input[12];
- step1[5] = input[2] + input[13];
- step1[6] = -input[1] - input[14];
- step1[7] = input[0] + input[15];
-
- // fdct8(step, step);
- {
- tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
- tran_high_t t0, t1, t2, t3; // needs32
- tran_high_t x0, x1, x2, x3; // canbe16
-
- // stage 1
- s0 = in[0] + in[7];
- s1 = in[1] + in[6];
- s2 = in[2] + in[5];
- s3 = in[3] + in[4];
- s4 = in[3] - in[4];
- s5 = in[2] - in[5];
- s6 = in[1] - in[6];
- s7 = in[0] - in[7];
-
- // fdct4(step, step);
- x0 = s0 + s3;
- x1 = s1 + s2;
- x2 = s1 - s2;
- x3 = s0 - s3;
- t0 = (x0 + x1) * cospi_16_64;
- t1 = (x0 - x1) * cospi_16_64;
- t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
- t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
- output[15] = fdct_round_shift(t0);
- output[11] = fdct_round_shift(t2);
- output[7] = fdct_round_shift(t1);
- output[3] = fdct_round_shift(t3);
-
- // Stage 2
- t0 = (s6 - s5) * cospi_16_64;
- t1 = (s6 + s5) * cospi_16_64;
- t2 = fdct_round_shift(t0);
- t3 = fdct_round_shift(t1);
-
- // Stage 3
- x0 = s4 + t2;
- x1 = s4 - t2;
- x2 = s7 - t3;
- x3 = s7 + t3;
-
- // Stage 4
- t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
- t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
- t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
- t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
- output[13] = fdct_round_shift(t0);
- output[9] = fdct_round_shift(t2);
- output[5] = fdct_round_shift(t1);
- output[1] = fdct_round_shift(t3);
- }
-
- // step 2
- temp1 = (step1[5] - step1[2]) * cospi_16_64;
- temp2 = (step1[4] - step1[3]) * cospi_16_64;
- step2[2] = fdct_round_shift(temp1);
- step2[3] = fdct_round_shift(temp2);
- temp1 = (step1[4] + step1[3]) * cospi_16_64;
- temp2 = (step1[5] + step1[2]) * cospi_16_64;
- step2[4] = fdct_round_shift(temp1);
- step2[5] = fdct_round_shift(temp2);
-
- // step 3
- step3[0] = step1[0] + step2[3];
- step3[1] = step1[1] + step2[2];
- step3[2] = step1[1] - step2[2];
- step3[3] = step1[0] - step2[3];
- step3[4] = step1[7] - step2[4];
- step3[5] = step1[6] - step2[5];
- step3[6] = step1[6] + step2[5];
- step3[7] = step1[7] + step2[4];
-
- // step 4
- temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
- temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
- step2[1] = fdct_round_shift(temp1);
- step2[2] = fdct_round_shift(temp2);
- temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
- temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
- step2[5] = fdct_round_shift(temp1);
- step2[6] = fdct_round_shift(temp2);
-
- // step 5
- step1[0] = step3[0] + step2[1];
- step1[1] = step3[0] - step2[1];
- step1[2] = step3[3] + step2[2];
- step1[3] = step3[3] - step2[2];
- step1[4] = step3[4] - step2[5];
- step1[5] = step3[4] + step2[5];
- step1[6] = step3[7] - step2[6];
- step1[7] = step3[7] + step2[6];
-
- // step 6
- temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
- temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
- output[14] = fdct_round_shift(temp1);
- output[6] = fdct_round_shift(temp2);
-
- temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
- temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
- output[10] = fdct_round_shift(temp1);
- output[2] = fdct_round_shift(temp2);
-
- temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
- temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
- output[12] = fdct_round_shift(temp1);
- output[4] = fdct_round_shift(temp2);
-
- temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
- temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
- output[8] = fdct_round_shift(temp1);
- output[0] = fdct_round_shift(temp2);
-}
-#endif // CONFIG_EXT_TX
-
static void fdct4(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[4];
@@ -1236,22 +1023,6 @@
output[i] = input[i] * 4;
}
-// For use in lieu of DST
-static void fhalfcenter32(const tran_low_t *input, tran_low_t *output) {
- int i;
- tran_low_t inputhalf[16];
- for (i = 0; i < 8; ++i) {
- output[16 + i] = input[i] * 4;
- output[24 + i] = input[24 + i] * 4;
- }
- // Multiply input by sqrt(2)
- for (i = 0; i < 16; ++i) {
- inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 8] * Sqrt2);
- }
- fdct16(inputhalf, output);
- // Note overall scaling factor is 4 times orthogonal
-}
-
// For use in lieu of ADST
static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
int i;
@@ -1334,25 +1105,22 @@
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
case IDTX:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
- case FLIPADST_DST:
+ case V_FLIPADST:
copy_flipud(*src, *src_stride, l, buff, l);
*src = buff;
*src_stride = l;
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
- case DST_FLIPADST:
+ case H_FLIPADST:
copy_fliplr(*src, *src_stride, l, buff, l);
*src = buff;
*src_stride = l;
@@ -1370,98 +1138,86 @@
#endif // CONFIG_EXT_TX
static const transform_2d FHT_4[] = {
- { fdct4, fdct4 }, // DCT_DCT = 0,
- { fadst4, fdct4 }, // ADST_DCT = 1,
- { fdct4, fadst4 }, // DCT_ADST = 2,
- { fadst4, fadst4 }, // ADST_ADST = 3,
+ { fdct4, fdct4 }, // DCT_DCT
+ { fadst4, fdct4 }, // ADST_DCT
+ { fdct4, fadst4 }, // DCT_ADST
+ { fadst4, fadst4 }, // ADST_ADST
#if CONFIG_EXT_TX
- { fadst4, fdct4 }, // FLIPADST_DCT = 4,
- { fdct4, fadst4 }, // DCT_FLIPADST = 5,
- { fadst4, fadst4 }, // FLIPADST_FLIPADST = 6,
- { fadst4, fadst4 }, // ADST_FLIPADST = 7,
- { fadst4, fadst4 }, // FLIPADST_ADST = 8,
- { fdst4, fdct4 }, // DST_DCT = 9,
- { fdct4, fdst4 }, // DCT_DST = 10,
- { fdst4, fadst4 }, // DST_ADST = 11,
- { fadst4, fdst4 }, // ADST_DST = 12,
- { fdst4, fadst4 }, // DST_FLIPADST = 13,
- { fadst4, fdst4 }, // FLIPADST_DST = 14,
- { fdst4, fdst4 }, // DST_DST = 15
- { fidtx4, fidtx4 }, // IDTX = 16
- { fdct4, fidtx4 }, // V_DCT = 17
- { fidtx4, fdct4 }, // H_DCT = 18
+ { fadst4, fdct4 }, // FLIPADST_DCT
+ { fdct4, fadst4 }, // DCT_FLIPADST
+ { fadst4, fadst4 }, // FLIPADST_FLIPADST
+ { fadst4, fadst4 }, // ADST_FLIPADST
+ { fadst4, fadst4 }, // FLIPADST_ADST
+ { fidtx4, fidtx4 }, // IDTX
+ { fdct4, fidtx4 }, // V_DCT
+ { fidtx4, fdct4 }, // H_DCT
+ { fadst4, fidtx4 }, // V_ADST
+ { fidtx4, fadst4 }, // H_ADST
+ { fadst4, fidtx4 }, // V_FLIPADST
+ { fidtx4, fadst4 }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
static const transform_2d FHT_8[] = {
- { fdct8, fdct8 }, // DCT_DCT = 0,
- { fadst8, fdct8 }, // ADST_DCT = 1,
- { fdct8, fadst8 }, // DCT_ADST = 2,
- { fadst8, fadst8 }, // ADST_ADST = 3,
+ { fdct8, fdct8 }, // DCT_DCT
+ { fadst8, fdct8 }, // ADST_DCT
+ { fdct8, fadst8 }, // DCT_ADST
+ { fadst8, fadst8 }, // ADST_ADST
#if CONFIG_EXT_TX
- { fadst8, fdct8 }, // FLIPADST_DCT = 4,
- { fdct8, fadst8 }, // DCT_FLIPADST = 5,
- { fadst8, fadst8 }, // FLIPADST_FLIPADST = 6,
- { fadst8, fadst8 }, // ADST_FLIPADST = 7,
- { fadst8, fadst8 }, // FLIPADST_ADST = 8,
- { fdst8, fdct8 }, // DST_DCT = 9,
- { fdct8, fdst8 }, // DCT_DST = 10,
- { fdst8, fadst8 }, // DST_ADST = 11,
- { fadst8, fdst8 }, // ADST_DST = 12,
- { fdst8, fadst8 }, // DST_FLIPADST = 13,
- { fadst8, fdst8 }, // FLIPADST_DST = 14,
- { fdst8, fdst8 }, // DST_DST = 15
- { fidtx8, fidtx8 }, // IDTX = 16
- { fdct8, fidtx8 }, // V_DCT = 17
- { fidtx8, fdct8 }, // H_DCT = 18
+ { fadst8, fdct8 }, // FLIPADST_DCT
+ { fdct8, fadst8 }, // DCT_FLIPADST
+ { fadst8, fadst8 }, // FLIPADST_FLIPADST
+ { fadst8, fadst8 }, // ADST_FLIPADST
+ { fadst8, fadst8 }, // FLIPADST_ADST
+ { fidtx8, fidtx8 }, // IDTX
+ { fdct8, fidtx8 }, // V_DCT
+ { fidtx8, fdct8 }, // H_DCT
+ { fadst8, fidtx8 }, // V_ADST
+ { fidtx8, fadst8 }, // H_ADST
+ { fadst8, fidtx8 }, // V_FLIPADST
+ { fidtx8, fadst8 }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
static const transform_2d FHT_16[] = {
- { fdct16, fdct16 }, // DCT_DCT = 0,
- { fadst16, fdct16 }, // ADST_DCT = 1,
- { fdct16, fadst16 }, // DCT_ADST = 2,
- { fadst16, fadst16 }, // ADST_ADST = 3,
+ { fdct16, fdct16 }, // DCT_DCT
+ { fadst16, fdct16 }, // ADST_DCT
+ { fdct16, fadst16 }, // DCT_ADST
+ { fadst16, fadst16 }, // ADST_ADST
#if CONFIG_EXT_TX
- { fadst16, fdct16 }, // FLIPADST_DCT = 4,
- { fdct16, fadst16 }, // DCT_FLIPADST = 5,
- { fadst16, fadst16 }, // FLIPADST_FLIPADST = 6,
- { fadst16, fadst16 }, // ADST_FLIPADST = 7,
- { fadst16, fadst16 }, // FLIPADST_ADST = 8,
- { fdst16, fdct16 }, // DST_DCT = 9,
- { fdct16, fdst16 }, // DCT_DST = 10,
- { fdst16, fadst16 }, // DST_ADST = 11,
- { fadst16, fdst16 }, // ADST_DST = 12,
- { fdst16, fadst16 }, // DST_FLIPADST = 13,
- { fadst16, fdst16 }, // FLIPADST_DST = 14,
- { fdst16, fdst16 }, // DST_DST = 15
- { fidtx16, fidtx16 }, // IDTX = 16
- { fdct16, fidtx16 }, // V_DCT = 17
- { fidtx16, fdct16 }, // H_DCT = 18
+ { fadst16, fdct16 }, // FLIPADST_DCT
+ { fdct16, fadst16 }, // DCT_FLIPADST
+ { fadst16, fadst16 }, // FLIPADST_FLIPADST
+ { fadst16, fadst16 }, // ADST_FLIPADST
+ { fadst16, fadst16 }, // FLIPADST_ADST
+ { fidtx16, fidtx16 }, // IDTX
+ { fdct16, fidtx16 }, // V_DCT
+ { fidtx16, fdct16 }, // H_DCT
+ { fadst16, fidtx16 }, // V_ADST
+ { fidtx16, fadst16 }, // H_ADST
+ { fadst16, fidtx16 }, // V_FLIPADST
+ { fidtx16, fadst16 }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
#if CONFIG_EXT_TX
static const transform_2d FHT_32[] = {
- { fdct32, fdct32 }, // DCT_DCT = 0,
- { fhalfright32, fdct32 }, // ADST_DCT = 1,
- { fdct32, fhalfright32 }, // DCT_ADST = 2,
- { fhalfright32, fhalfright32 }, // ADST_ADST = 3,
- { fhalfright32, fdct32 }, // FLIPADST_DCT = 4,
- { fdct32, fhalfright32 }, // DCT_FLIPADST = 5,
- { fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST = 6,
- { fhalfright32, fhalfright32 }, // ADST_FLIPADST = 7,
- { fhalfright32, fhalfright32 }, // FLIPADST_ADST = 8,
- { fhalfcenter32, fdct32 }, // DST_DCT = 9,
- { fdct32, fhalfcenter32 }, // DCT_DST = 10,
- { fhalfcenter32, fhalfright32 }, // DST_ADST = 11,
- { fhalfright32, fhalfcenter32 }, // ADST_DST = 12,
- { fhalfcenter32, fhalfright32 }, // DST_FLIPADST = 13,
- { fhalfright32, fhalfcenter32 }, // FLIPADST_DST = 14,
- { fhalfcenter32, fhalfcenter32 }, // DST_DST = 15
- { fidtx32, fidtx32 }, // IDTX = 16
- { fdct32, fidtx32 }, // V_DCT = 17
- { fidtx32, fdct32 }, // H_DCT = 18
+ { fdct32, fdct32 }, // DCT_DCT
+ { fhalfright32, fdct32 }, // ADST_DCT
+ { fdct32, fhalfright32 }, // DCT_ADST
+ { fhalfright32, fhalfright32 }, // ADST_ADST
+ { fhalfright32, fdct32 }, // FLIPADST_DCT
+ { fdct32, fhalfright32 }, // DCT_FLIPADST
+ { fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST
+ { fhalfright32, fhalfright32 }, // ADST_FLIPADST
+ { fhalfright32, fhalfright32 }, // FLIPADST_ADST
+ { fidtx32, fidtx32 }, // IDTX
+ { fdct32, fidtx32 }, // V_DCT
+ { fidtx32, fdct32 }, // H_DCT
+ { fhalfright32, fidtx32 }, // V_ADST
+ { fidtx32, fhalfright32 }, // H_ADST
+ { fhalfright32, fidtx32 }, // V_FLIPADST
+ { fidtx32, fhalfright32 }, // H_FLIPADST
};
#endif // CONFIG_EXT_TX
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index c5a68a9..61f6e9c 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -792,6 +792,10 @@
}
}
+#if CONFIG_EXT_PARTITION_TYPES
+ assert(0);
+#endif
+
set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
if (xd->mb_to_right_edge < 0)
@@ -1085,6 +1089,10 @@
const int mi_height = num_8x8_blocks_high_lookup[bsize];
int max_plane;
+#if CONFIG_REF_MV
+ int8_t rf_type;
+#endif
+
#if !CONFIG_SUPERTX
assert(mi->mbmi.sb_type == bsize);
#endif
@@ -1092,6 +1100,23 @@
*mi_addr = *mi;
*x->mbmi_ext = ctx->mbmi_ext;
+#if CONFIG_REF_MV
+ rf_type = vp10_ref_frame_type(mbmi->ref_frame);
+ if (x->mbmi_ext->ref_mv_count[rf_type] > 1 &&
+ mbmi->sb_type >= BLOCK_8X8 &&
+ mbmi->mode == NEWMV) {
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+ int_mv this_mv = (i == 0) ?
+ x->mbmi_ext->ref_mv_stack[rf_type][mbmi->ref_mv_idx].this_mv :
+ x->mbmi_ext->ref_mv_stack[rf_type][mbmi->ref_mv_idx].comp_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+ lower_mv_precision(&this_mv.as_mv, cm->allow_high_precision_mv);
+ x->mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0] = this_mv;
+ mbmi->pred_mv[i] = this_mv;
+ }
+ }
+#endif
+
// If segmentation in use
if (seg->enabled) {
// For in frame complexity AQ copy the segment id from the segment map.
@@ -1194,9 +1219,6 @@
rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
-
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- rdc->filter_diff[i] += ctx->best_filter_diff[i];
}
for (h = 0; h < y_mis; ++h) {
@@ -1216,7 +1238,10 @@
PICK_MODE_CONTEXT *ctx,
int mi_row, int mi_col, BLOCK_SIZE bsize,
int output_enabled) {
- int i, y, x_idx;
+ int y, x_idx;
+#if CONFIG_VAR_TX
+ int i;
+#endif
VP10_COMMON *const cm = &cpi->common;
RD_COUNTS *const rdc = &td->rd_counts;
MACROBLOCK *const x = &td->mb;
@@ -1234,11 +1259,32 @@
cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
int w, h;
+#if CONFIG_REF_MV
+ int8_t rf_type;
+#endif
+
*mi_addr = *mi;
*x->mbmi_ext = ctx->mbmi_ext;
assert(is_inter_block(mbmi));
assert(mbmi->tx_size == ctx->mic.mbmi.tx_size);
+#if CONFIG_REF_MV
+ rf_type = vp10_ref_frame_type(mbmi->ref_frame);
+ if (x->mbmi_ext->ref_mv_count[rf_type] > 1 &&
+ mbmi->sb_type >= BLOCK_8X8 &&
+ mbmi->mode == NEWMV) {
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+ int_mv this_mv = (i == 0) ?
+ x->mbmi_ext->ref_mv_stack[rf_type][mbmi->ref_mv_idx].this_mv :
+ x->mbmi_ext->ref_mv_stack[rf_type][mbmi->ref_mv_idx].comp_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+ lower_mv_precision(&this_mv.as_mv, cm->allow_high_precision_mv);
+ x->mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0] = this_mv;
+ mbmi->pred_mv[i] = this_mv;
+ }
+ }
+#endif
+
// If segmentation in use
if (seg->enabled && output_enabled) {
// For in frame complexity AQ copy the segment id from the segment map.
@@ -1290,7 +1336,7 @@
int idy, idx;
for (idy = 0; idy < (1 << mtx) / 2; ++idy)
for (idx = 0; idx < (1 << mtx) / 2; ++idx)
- mbmi->inter_tx_size[(idy << 3) + idx] = mbmi->tx_size;
+ mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
}
#endif // CONFIG_VAR_TX
#if CONFIG_OBMC
@@ -1316,9 +1362,6 @@
rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
-
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- rdc->filter_diff[i] += ctx->best_filter_diff[i];
}
for (h = 0; h < y_mis; ++h) {
@@ -1347,6 +1390,9 @@
PARTITION_TYPE partition = pc_tree->partitioning;
BLOCK_SIZE subsize = get_subsize(bsize, partition);
int i;
+#if CONFIG_EXT_PARTITION_TYPES
+ BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
PICK_MODE_CONTEXT *pmc = NULL;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
@@ -1401,6 +1447,56 @@
}
pmc = &pc_tree->split_supertx;
break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->horizontala[0], mi_row, mi_col,
+ bsize2, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->horizontala[1], mi_row,
+ mi_col + hbs, bsize2, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->horizontala[2], mi_row + hbs,
+ mi_col, subsize, output_enabled);
+ pmc = &pc_tree->horizontala_supertx;
+ break;
+ case PARTITION_HORZ_B:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->horizontalb[0], mi_row, mi_col,
+ subsize, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->horizontalb[1], mi_row + hbs,
+ mi_col, bsize2, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col + hbs, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->horizontalb[2], mi_row + hbs,
+ mi_col + hbs, bsize2, output_enabled);
+ pmc = &pc_tree->horizontalb_supertx;
+ break;
+ case PARTITION_VERT_A:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->verticala[0], mi_row, mi_col,
+ bsize2, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->verticala[1], mi_row + hbs,
+ mi_col, bsize2, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize);
+ update_state_supertx(cpi, td, &pc_tree->verticala[2], mi_row,
+ mi_col + hbs, subsize, output_enabled);
+ pmc = &pc_tree->verticala_supertx;
+ break;
+ case PARTITION_VERT_B:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->verticalb[0], mi_row, mi_col,
+ subsize, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->verticalb[1], mi_row,
+ mi_col + hbs, bsize2, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col + hbs, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->verticalb[2], mi_row + hbs,
+ mi_col + hbs, bsize2, output_enabled);
+ pmc = &pc_tree->verticalb_supertx;
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
default:
assert(0);
}
@@ -1449,6 +1545,9 @@
int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
PARTITION_TYPE partition = pc_tree->partitioning;
BLOCK_SIZE subsize = get_subsize(bsize, partition);
+#if CONFIG_EXT_PARTITION_TYPES
+ int i;
+#endif
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
@@ -1497,6 +1596,28 @@
supertx_size, pc_tree->split[3]);
}
break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ for ( i = 0; i < 3; i++)
+ update_supertx_param(td, &pc_tree->horizontala[i], best_tx,
+ supertx_size);
+ break;
+ case PARTITION_HORZ_B:
+ for ( i = 0; i < 3; i++)
+ update_supertx_param(td, &pc_tree->horizontalb[i], best_tx,
+ supertx_size);
+ break;
+ case PARTITION_VERT_A:
+ for ( i = 0; i < 3; i++)
+ update_supertx_param(td, &pc_tree->verticala[i], best_tx,
+ supertx_size);
+ break;
+ case PARTITION_VERT_B:
+ for ( i = 0; i < 3; i++)
+ update_supertx_param(td, &pc_tree->verticalb[i], best_tx,
+ supertx_size);
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
default:
assert(0);
}
@@ -1537,6 +1658,9 @@
#if CONFIG_SUPERTX
int *totalrate_nocoef,
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition,
+#endif
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd) {
VP10_COMMON *const cm = &cpi->common;
@@ -1564,6 +1688,9 @@
// block as a supertx block, even if rdopt did not pick it as such.
mbmi->tx_size = max_txsize_lookup[bsize];
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ mbmi->partition = partition;
+#endif
for (i = 0; i < MAX_MB_PLANE; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][0];
@@ -1654,6 +1781,9 @@
totalrate_nocoef,
#endif // CONFIG_SUPERTX
bsize, ctx, best_rd);
+#if CONFIG_SUPERTX
+ assert(*totalrate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
}
} else {
vp10_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost,
@@ -1661,6 +1791,9 @@
totalrate_nocoef,
#endif // CONFIG_SUPERTX
bsize, ctx, best_rd);
+#if CONFIG_SUPERTX
+ assert(*totalrate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
}
}
@@ -1867,24 +2000,34 @@
#endif // CONFIG_EXT_INTER
mode_ctx);
+ if (mode == NEWMV) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+ int idx;
+
+ for (idx = 0; idx < 2; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx];
+
+ if (mbmi->ref_mv_idx == idx)
+ break;
+ }
+ }
+ }
+
if (mode == NEARMV) {
uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
- if (mbmi_ext->ref_mv_count[ref_frame_type] > 2) {
- uint8_t drl0_ctx =
- vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
- if (mbmi->ref_mv_idx == 0)
- ++counts->drl_mode0[drl0_ctx][0];
- else
- ++counts->drl_mode0[drl0_ctx][1];
+ int idx;
- if (mbmi_ext->ref_mv_count[ref_frame_type] > 3 &&
- mbmi->ref_mv_idx > 0) {
- uint8_t drl1_ctx =
- vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 2);
- if (mbmi->ref_mv_idx == 1)
- ++counts->drl_mode1[drl1_ctx][0];
- else
- ++counts->drl_mode1[drl1_ctx][1];
+ for (idx = 1; idx < 3; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1];
+
+ if (mbmi->ref_mv_idx == idx - 1)
+ break;
}
}
}
@@ -1941,15 +2084,24 @@
}
}
-static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col,
- ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
- ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
- PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
+
+typedef struct {
+ ENTROPY_CONTEXT a[16 * MAX_MB_PLANE];
+ ENTROPY_CONTEXT l[16 * MAX_MB_PLANE];
+ PARTITION_CONTEXT sa[8];
+ PARTITION_CONTEXT sl[8];
#if CONFIG_VAR_TX
- TXFM_CONTEXT ta[8], TXFM_CONTEXT tl[8],
+ TXFM_CONTEXT *p_ta;
+ TXFM_CONTEXT *p_tl;
+ TXFM_CONTEXT ta[8];
+ TXFM_CONTEXT tl[8];
#endif
- BLOCK_SIZE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
+} RD_SEARCH_MACROBLOCK_CONTEXT;
+
+static void restore_context(MACROBLOCK *x,
+ const RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
+ int mi_row, int mi_col, BLOCK_SIZE bsize) {
+ MACROBLOCKD *xd = &x->e_mbd;
int p;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
@@ -1958,37 +2110,34 @@
for (p = 0; p < MAX_MB_PLANE; p++) {
memcpy(
xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
- a + num_4x4_blocks_wide * p,
+ ctx->a + num_4x4_blocks_wide * p,
(sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
xd->plane[p].subsampling_x);
memcpy(
xd->left_context[p]
+ ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
- l + num_4x4_blocks_high * p,
+ ctx->l + num_4x4_blocks_high * p,
(sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
xd->plane[p].subsampling_y);
}
- memcpy(xd->above_seg_context + mi_col, sa,
+ memcpy(xd->above_seg_context + mi_col, ctx->sa,
sizeof(*xd->above_seg_context) * mi_width);
- memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
+ memcpy(xd->left_seg_context + (mi_row & MI_MASK), ctx->sl,
sizeof(xd->left_seg_context[0]) * mi_height);
#if CONFIG_VAR_TX
- memcpy(xd->above_txfm_context, ta,
+ xd->above_txfm_context = ctx->p_ta;
+ xd->left_txfm_context = ctx->p_tl;
+ memcpy(xd->above_txfm_context, ctx->ta,
sizeof(*xd->above_txfm_context) * mi_width);
- memcpy(xd->left_txfm_context, tl,
+ memcpy(xd->left_txfm_context, ctx->tl,
sizeof(*xd->left_txfm_context) * mi_height);
#endif
}
-static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
- ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
- ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
- PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
-#if CONFIG_VAR_TX
- TXFM_CONTEXT ta[8], TXFM_CONTEXT tl[8],
-#endif
- BLOCK_SIZE bsize) {
- const MACROBLOCKD *const xd = &x->e_mbd;
+static void save_context(const MACROBLOCK *x,
+ RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
+ int mi_row, int mi_col, BLOCK_SIZE bsize) {
+ const MACROBLOCKD *xd = &x->e_mbd;
int p;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
@@ -1998,26 +2147,28 @@
// buffer the above/left context information of the block in search.
for (p = 0; p < MAX_MB_PLANE; ++p) {
memcpy(
- a + num_4x4_blocks_wide * p,
+ ctx->a + num_4x4_blocks_wide * p,
xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
(sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
xd->plane[p].subsampling_x);
memcpy(
- l + num_4x4_blocks_high * p,
+ ctx->l + num_4x4_blocks_high * p,
xd->left_context[p]
+ ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
(sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
xd->plane[p].subsampling_y);
}
- memcpy(sa, xd->above_seg_context + mi_col,
+ memcpy(ctx->sa, xd->above_seg_context + mi_col,
sizeof(*xd->above_seg_context) * mi_width);
- memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
+ memcpy(ctx->sl, xd->left_seg_context + (mi_row & MI_MASK),
sizeof(xd->left_seg_context[0]) * mi_height);
#if CONFIG_VAR_TX
- memcpy(ta, xd->above_txfm_context,
+ memcpy(ctx->ta, xd->above_txfm_context,
sizeof(*xd->above_txfm_context) * mi_width);
- memcpy(tl, xd->left_txfm_context,
+ memcpy(ctx->tl, xd->left_txfm_context,
sizeof(*xd->left_txfm_context) * mi_height);
+ ctx->p_ta = xd->above_txfm_context;
+ ctx->p_tl = xd->left_txfm_context;
#endif
}
@@ -2025,9 +2176,15 @@
ThreadData *td,
TOKENEXTRA **tp, int mi_row, int mi_col,
int output_enabled, BLOCK_SIZE bsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition,
+#endif
PICK_MODE_CONTEXT *ctx) {
MACROBLOCK *const x = &td->mb;
set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
+#if CONFIG_EXT_PARTITION_TYPES
+ x->e_mbd.mi[0]->mbmi.partition = partition;
+#endif
update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
@@ -2053,6 +2210,9 @@
int ctx;
PARTITION_TYPE partition;
BLOCK_SIZE subsize = bsize;
+#if CONFIG_EXT_PARTITION_TYPES
+ BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
@@ -2066,6 +2226,10 @@
}
partition = partition_lookup[bsl][subsize];
+#if CONFIG_EXT_PARTITION_TYPES
+ if (bsize > BLOCK_8X8)
+ partition = pc_tree->partitioning;
+#endif
if (output_enabled && bsize != BLOCK_4X4)
td->counts->partition[ctx][partition]++;
@@ -2144,8 +2308,13 @@
}
#endif // CONFIG_EXT_TX
}
+#if CONFIG_EXT_PARTITION_TYPES
+ update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize,
+ partition);
+#else
if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+#endif
#if CONFIG_VAR_TX
set_txfm_ctx(xd->left_txfm_context, supertx_size, xd->n8_h);
set_txfm_ctx(xd->above_txfm_context, supertx_size, mi_height);
@@ -2163,27 +2332,47 @@
switch (partition) {
case PARTITION_NONE:
encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
&pc_tree->none);
break;
case PARTITION_VERT:
encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
&pc_tree->vertical[0]);
if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled,
- subsize, &pc_tree->vertical[1]);
+ subsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ &pc_tree->vertical[1]);
}
break;
case PARTITION_HORZ:
encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
&pc_tree->horizontal[0]);
if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled,
- subsize, &pc_tree->horizontal[1]);
+ subsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ &pc_tree->horizontal[1]);
}
break;
case PARTITION_SPLIT:
if (bsize == BLOCK_8X8) {
encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
pc_tree->leaf_split[0]);
} else {
encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
@@ -2196,13 +2385,52 @@
subsize, pc_tree->split[3]);
}
break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, bsize2,
+ partition, &pc_tree->horizontala[0]);
+ encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled, bsize2,
+ partition, &pc_tree->horizontala[1]);
+ encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled, subsize,
+ partition, &pc_tree->horizontala[2]);
+ break;
+ case PARTITION_HORZ_B:
+ encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
+ partition, &pc_tree->horizontalb[0]);
+ encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled, bsize2,
+ partition, &pc_tree->horizontalb[1]);
+ encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col + hbs, output_enabled,
+ bsize2, partition, &pc_tree->horizontalb[2]);
+ break;
+ case PARTITION_VERT_A:
+ encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, bsize2,
+ partition, &pc_tree->verticala[0]);
+ encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled, bsize2,
+ partition, &pc_tree->verticala[1]);
+ encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled, subsize,
+ partition, &pc_tree->verticala[2]);
+
+ break;
+ case PARTITION_VERT_B:
+ encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
+ partition, &pc_tree->verticalb[0]);
+ encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled, bsize2,
+ partition, &pc_tree->verticalb[1]);
+ encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col + hbs, output_enabled,
+ bsize2, partition, &pc_tree->verticalb[2]);
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
default:
assert(0 && "Invalid partition type.");
break;
}
+#if CONFIG_EXT_PARTITION_TYPES
+ update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
+#else
if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+#endif // CONFIG_EXT_PARTITION_TYPES
}
// Check to see if the given partition size is allowed for a specified number
@@ -2299,11 +2527,7 @@
int i, pl;
PARTITION_TYPE partition = PARTITION_NONE;
BLOCK_SIZE subsize;
- ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
- PARTITION_CONTEXT sl[8], sa[8];
-#if CONFIG_VAR_TX
- TXFM_CONTEXT tl[8], ta[8];
-#endif
+ RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
RD_COST last_part_rdc, none_rdc, chosen_rdc;
BLOCK_SIZE sub_subsize = BLOCK_4X4;
int splits_below = 0;
@@ -2316,6 +2540,10 @@
int chosen_rate_nocoef = INT_MAX;
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ assert(0);
+#endif
+
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
@@ -2329,16 +2557,14 @@
partition = partition_lookup[bsl][bs_type];
subsize = get_subsize(bsize, partition);
+ pc_tree->partitioning = partition;
+
#if CONFIG_VAR_TX
xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
#endif
- pc_tree->partitioning = partition;
- save_context(x, mi_row, mi_col, a, l, sa, sl,
-#if CONFIG_VAR_TX
- ta, tl,
-#endif
- bsize);
+
+ save_context(x, &x_ctx, mi_row, mi_col, bsize);
if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
@@ -2371,6 +2597,9 @@
#if CONFIG_SUPERTX
&none_rate_nocoef,
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_NONE,
+#endif
bsize, ctx, INT64_MAX);
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -2384,11 +2613,8 @@
#endif
}
- restore_context(x, mi_row, mi_col, a, l, sa, sl,
-#if CONFIG_VAR_TX
- ta, tl,
-#endif
- bsize);
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
mi_8x8[0]->mbmi.sb_type = bs_type;
pc_tree->partitioning = partition;
}
@@ -2400,6 +2626,9 @@
#if CONFIG_SUPERTX
&last_part_rate_nocoef,
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_NONE,
+#endif
bsize, ctx, INT64_MAX);
break;
case PARTITION_HORZ:
@@ -2407,6 +2636,9 @@
#if CONFIG_SUPERTX
&last_part_rate_nocoef,
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_HORZ,
+#endif
subsize, &pc_tree->horizontal[0],
INT64_MAX);
if (last_part_rdc.rate != INT_MAX &&
@@ -2424,6 +2656,9 @@
#if CONFIG_SUPERTX
&rt_nocoef,
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_HORZ,
+#endif
subsize, &pc_tree->horizontal[1], INT64_MAX);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
vp10_rd_cost_reset(&last_part_rdc);
@@ -2445,6 +2680,9 @@
#if CONFIG_SUPERTX
&last_part_rate_nocoef,
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_VERT,
+#endif
subsize, &pc_tree->vertical[0], INT64_MAX);
if (last_part_rdc.rate != INT_MAX &&
bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) {
@@ -2461,6 +2699,9 @@
#if CONFIG_SUPERTX
&rt_nocoef,
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_VERT,
+#endif
subsize, &pc_tree->vertical[bsize > BLOCK_8X8],
INT64_MAX);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
@@ -2484,6 +2725,9 @@
#if CONFIG_SUPERTX
&last_part_rate_nocoef,
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_SPLIT,
+#endif
subsize, pc_tree->leaf_split[0], INT64_MAX);
break;
}
@@ -2556,11 +2800,9 @@
#if CONFIG_SUPERTX
chosen_rate_nocoef = 0;
#endif
- restore_context(x, mi_row, mi_col, a, l, sa, sl,
-#if CONFIG_VAR_TX
- ta, tl,
-#endif
- bsize);
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
pc_tree->partitioning = PARTITION_SPLIT;
// Split partition.
@@ -2571,33 +2813,24 @@
#if CONFIG_SUPERTX
int rt_nocoef = 0;
#endif
- ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
- PARTITION_CONTEXT sl[8], sa[8];
-#if CONFIG_VAR_TX
- TXFM_CONTEXT tl[8], ta[8];
-#endif
+ RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
- save_context(x, mi_row, mi_col, a, l, sa, sl,
-#if CONFIG_VAR_TX
- ta, tl,
-#endif
- bsize);
+ save_context(x, &x_ctx, mi_row, mi_col, bsize);
pc_tree->split[i]->partitioning = PARTITION_NONE;
rd_pick_sb_modes(cpi, tile_data, x,
mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
#if CONFIG_SUPERTX
&rt_nocoef,
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_SPLIT,
+#endif
split_subsize, &pc_tree->split[i]->none, INT64_MAX);
- restore_context(x, mi_row, mi_col, a, l, sa, sl,
-#if CONFIG_VAR_TX
- ta, tl,
-#endif
- bsize);
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
vp10_rd_cost_reset(&chosen_rdc);
@@ -2655,15 +2888,7 @@
#endif
}
-#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
-#endif
- restore_context(x, mi_row, mi_col, a, l, sa, sl,
-#if CONFIG_VAR_TX
- ta, tl,
-#endif
- bsize);
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
// We must have chosen a partitioning and encoding or we'll fail later on.
// No other opportunities for success.
@@ -2923,6 +3148,192 @@
}
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+static void rd_test_partition3(VP10_COMP *cpi, ThreadData *td,
+ TileDataEnc *tile_data,
+ TOKENEXTRA **tp, PC_TREE *pc_tree,
+ RD_COST *best_rdc, PICK_MODE_CONTEXT ctxs[3],
+ PICK_MODE_CONTEXT *ctx,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ PARTITION_TYPE partition,
+#if CONFIG_SUPERTX
+ int64_t best_rd, int *best_rate_nocoef,
+ RD_SEARCH_MACROBLOCK_CONTEXT* x_ctx,
+#endif
+ int mi_row0, int mi_col0, BLOCK_SIZE subsize0,
+ int mi_row1, int mi_col1, BLOCK_SIZE subsize1,
+ int mi_row2, int mi_col2, BLOCK_SIZE subsize2) {
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ RD_COST this_rdc, sum_rdc;
+#if CONFIG_SUPERTX
+ VP10_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
+ int this_rate_nocoef, sum_rate_nocoef;
+ int abort_flag;
+ PARTITION_TYPE best_partition;
+ int tmp_rate;
+ int64_t tmp_dist, tmp_rd;
+#endif
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, ctx);
+
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row0, mi_col0, &sum_rdc,
+#if CONFIG_SUPERTX
+ &sum_rate_nocoef,
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ subsize0, &ctxs[0], best_rdc->rdcost);
+#if CONFIG_SUPERTX
+ abort_flag = sum_rdc.rdcost >= best_rd;
+#endif
+
+#if CONFIG_SUPERTX
+ if (sum_rdc.rdcost < INT64_MAX) {
+#else
+ if (sum_rdc.rdcost < best_rdc->rdcost) {
+#endif
+ PICK_MODE_CONTEXT *ctx = &ctxs[0];
+ update_state(cpi, td, ctx, mi_row0, mi_col0, subsize0, 0);
+ encode_superblock(cpi, td, tp, 0, mi_row0, mi_col0, subsize0, ctx);
+
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, ctx);
+
+#if CONFIG_SUPERTX
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row1, mi_col1, &this_rdc,
+ &this_rate_nocoef,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ subsize1, &ctxs[1], INT64_MAX - sum_rdc.rdcost);
+#else
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row1, mi_col1, &this_rdc,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ subsize1, &ctxs[1], best_rdc->rdcost - sum_rdc.rdcost);
+#endif // CONFIG_SUPERTX
+
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef = INT_MAX;
+#endif
+ } else {
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += this_rate_nocoef;
+#endif
+ }
+
+#if CONFIG_SUPERTX
+ if (sum_rdc.rdcost < INT64_MAX) {
+#else
+ if (sum_rdc.rdcost < best_rdc->rdcost) {
+#endif
+ PICK_MODE_CONTEXT *ctx = &ctxs[1];
+ update_state(cpi, td, ctx, mi_row1, mi_col1, subsize1, 0);
+ encode_superblock(cpi, td, tp, 0, mi_row1, mi_col1, subsize1, ctx);
+
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, ctx);
+
+#if CONFIG_SUPERTX
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row2, mi_col2, &this_rdc,
+ &this_rate_nocoef,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ subsize2, &ctxs[2], INT64_MAX - sum_rdc.rdcost);
+#else
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row2, mi_col2, &this_rdc,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ subsize2, &ctxs[2], best_rdc->rdcost - sum_rdc.rdcost);
+#endif // CONFIG_SUPERTX
+
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef = INT_MAX;
+#endif
+ } else {
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += this_rate_nocoef;
+#endif
+ }
+
+#if CONFIG_SUPERTX
+ if (cm->frame_type != KEY_FRAME && !abort_flag &&
+ sum_rdc.rdcost < INT64_MAX && bsize <= MAX_SUPERTX_BLOCK_SIZE &&
+ !xd->lossless[0]) {
+ TX_SIZE supertx_size = max_txsize_lookup[bsize];
+ best_partition = pc_tree->partitioning;
+ pc_tree->partitioning = partition;
+ sum_rdc.rate += vp10_cost_bit(
+ cm->fc->supertx_prob
+ [partition_supertx_context_lookup[partition]][supertx_size],
+ 0);
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate,
+ sum_rdc.dist);
+
+ if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
+ TX_TYPE best_tx = DCT_DCT;
+
+ tmp_rate = sum_rate_nocoef;
+ tmp_dist = 0;
+ restore_context(x, x_ctx, mi_row, mi_col, bsize);
+ rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize, &tmp_rate,
+ &tmp_dist, &best_tx, pc_tree);
+
+ tmp_rate += vp10_cost_bit(
+ cm->fc->supertx_prob
+ [partition_supertx_context_lookup[partition]][supertx_size],
+ 1);
+ tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist);
+ if (tmp_rd < sum_rdc.rdcost) {
+ sum_rdc.rdcost = tmp_rd;
+ sum_rdc.rate = tmp_rate;
+ sum_rdc.dist = tmp_dist;
+ update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx,
+ supertx_size, pc_tree);
+ }
+ }
+ pc_tree->partitioning = best_partition;
+ }
+#endif // CONFIG_SUPERTX
+
+ if (sum_rdc.rdcost < best_rdc->rdcost) {
+ int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+ sum_rdc.rate += cpi->partition_cost[pl][partition];
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate,
+ sum_rdc.dist);
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += cpi->partition_cost[pl][partition];
+#endif
+ if (sum_rdc.rdcost < best_rdc->rdcost) {
+#if CONFIG_SUPERTX
+ *best_rate_nocoef = sum_rate_nocoef;
+ assert(*best_rate_nocoef >= 0);
+#endif
+ *best_rdc = sum_rdc;
+ pc_tree->partitioning = partition;
+ }
+ }
+ }
+ }
+}
+#endif // CONFIG_EXT_PARTITION_TYPES
+
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
@@ -2939,11 +3350,7 @@
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
- ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
- PARTITION_CONTEXT sl[8], sa[8];
-#if CONFIG_VAR_TX
- TXFM_CONTEXT tl[8], ta[8];
-#endif
+ RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
TOKENEXTRA *tp_orig = *tp;
PICK_MODE_CONTEXT *ctx = &pc_tree->none;
int i;
@@ -2959,6 +3366,9 @@
#endif // CONFIG_SUPERTX
int do_split = bsize >= BLOCK_8X8;
int do_rect = 1;
+#if CONFIG_EXT_PARTITION_TYPES
+ BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
// Override skipping rectangular partition operations for edge blocks
const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
@@ -3020,11 +3430,10 @@
#if CONFIG_VAR_TX
xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
- save_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
- save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
#endif
+ save_context(x, &x_ctx, mi_row, mi_col, bsize);
+
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
@@ -3090,6 +3499,9 @@
#if CONFIG_SUPERTX
&this_rate_nocoef,
#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_NONE,
+#endif
bsize, ctx, best_rdc.rdcost);
if (this_rdc.rate != INT_MAX) {
if (bsize >= BLOCK_8X8) {
@@ -3178,13 +3590,8 @@
#endif
}
}
-#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
- restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif // CONFIG_VAR_TX
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
}
// store estimated motion vector
@@ -3203,11 +3610,17 @@
ctx->mic.mbmi.interp_filter;
#if CONFIG_SUPERTX
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
- &sum_rate_nocoef, subsize, pc_tree->leaf_split[0],
- INT64_MAX);
+ &sum_rate_nocoef,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_SPLIT,
+#endif
+ subsize, pc_tree->leaf_split[0], INT64_MAX);
#else
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
- pc_tree->leaf_split[0], best_rdc.rdcost);
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_SPLIT,
+#endif
+ subsize, pc_tree->leaf_split[0], best_rdc.rdcost);
#endif // CONFIG_SUPERTX
if (sum_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
@@ -3232,14 +3645,9 @@
TX_TYPE best_tx = DCT_DCT;
tmp_rate = sum_rate_nocoef;
tmp_dist = 0;
-#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MI_MASK);
- restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif // CONFIG_VAR_TX
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
&tmp_rate, &tmp_dist,
&best_tx,
@@ -3326,14 +3734,9 @@
tmp_rate = sum_rate_nocoef;
tmp_dist = 0;
-#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MI_MASK);
- restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif // CONFIG_VAR_TX
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
&tmp_rate, &tmp_dist,
&best_tx,
@@ -3380,13 +3783,8 @@
if (cpi->sf.less_rectangular_check)
do_rect &= !partition_none_allowed;
}
-#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
- restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
} // if (do_split)
// PARTITION_HORZ
@@ -3403,6 +3801,9 @@
#if CONFIG_SUPERTX
&sum_rate_nocoef,
#endif // CONFIG_SUPERTX
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_HORZ,
+#endif
subsize, &pc_tree->horizontal[0], best_rdc.rdcost);
#if CONFIG_SUPERTX
@@ -3427,11 +3828,18 @@
#if CONFIG_SUPERTX
rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col,
&this_rdc, &this_rate_nocoef,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_HORZ,
+#endif
subsize, &pc_tree->horizontal[1],
INT64_MAX);
#else
rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col,
- &this_rdc, subsize, &pc_tree->horizontal[1],
+ &this_rdc,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_HORZ,
+#endif
+ subsize, &pc_tree->horizontal[1],
best_rdc.rdcost - sum_rdc.rdcost);
#endif // CONFIG_SUPERTX
if (this_rdc.rate == INT_MAX) {
@@ -3466,13 +3874,9 @@
TX_TYPE best_tx = DCT_DCT;
tmp_rate = sum_rate_nocoef;
tmp_dist = 0;
-#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
- restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif // CONFIG_VAR_TX
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
&tmp_rate, &tmp_dist,
&best_tx,
@@ -3511,13 +3915,8 @@
pc_tree->partitioning = PARTITION_HORZ;
}
}
-#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
- restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
}
// PARTITION_VERT
if (partition_vert_allowed &&
@@ -3534,6 +3933,9 @@
#if CONFIG_SUPERTX
&sum_rate_nocoef,
#endif // CONFIG_SUPERTX
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_VERT,
+#endif
subsize, &pc_tree->vertical[0], best_rdc.rdcost);
#if CONFIG_SUPERTX
abort_flag = (sum_rdc.rdcost >= best_rd && bsize > BLOCK_8X8) ||
@@ -3556,11 +3958,19 @@
ctx->mic.mbmi.interp_filter;
#if CONFIG_SUPERTX
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
- &this_rate_nocoef, subsize, &pc_tree->vertical[1],
+ &this_rate_nocoef,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_VERT,
+#endif
+ subsize, &pc_tree->vertical[1],
INT64_MAX - sum_rdc.rdcost);
#else
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step,
- &this_rdc, subsize,
+ &this_rdc,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_VERT,
+#endif
+ subsize,
&pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost);
#endif // CONFIG_SUPERTX
if (this_rdc.rate == INT_MAX) {
@@ -3594,13 +4004,9 @@
tmp_rate = sum_rate_nocoef;
tmp_dist = 0;
-#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
- restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif // CONFIG_VAR_TX
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
&tmp_rate, &tmp_dist,
&best_tx,
@@ -3640,15 +4046,72 @@
pc_tree->partitioning = PARTITION_VERT;
}
}
-#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
- restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
}
+#if CONFIG_EXT_PARTITION_TYPES
+ // PARTITION_HORZ_A
+ if (partition_horz_allowed && do_rect && bsize > BLOCK_8X8 &&
+ partition_none_allowed) {
+ subsize = get_subsize(bsize, PARTITION_HORZ_A);
+ rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
+ pc_tree->horizontala,
+ ctx, mi_row, mi_col, bsize, PARTITION_HORZ_A,
+#if CONFIG_SUPERTX
+ best_rd, &best_rate_nocoef, &x_ctx,
+#endif
+ mi_row, mi_col, bsize2,
+ mi_row, mi_col + mi_step, bsize2,
+ mi_row + mi_step, mi_col, subsize);
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+ }
+ // PARTITION_HORZ_B
+ if (partition_horz_allowed && do_rect && bsize > BLOCK_8X8 &&
+ partition_none_allowed) {
+ subsize = get_subsize(bsize, PARTITION_HORZ_B);
+ rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
+ pc_tree->horizontalb,
+ ctx, mi_row, mi_col, bsize, PARTITION_HORZ_B,
+#if CONFIG_SUPERTX
+ best_rd, &best_rate_nocoef, &x_ctx,
+#endif
+ mi_row, mi_col, subsize,
+ mi_row + mi_step, mi_col, bsize2,
+ mi_row + mi_step, mi_col + mi_step, bsize2);
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+ }
+ // PARTITION_VERT_A
+ if (partition_vert_allowed && do_rect && bsize > BLOCK_8X8 &&
+ partition_none_allowed) {
+ subsize = get_subsize(bsize, PARTITION_VERT_A);
+ rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
+ pc_tree->verticala,
+ ctx, mi_row, mi_col, bsize, PARTITION_VERT_A,
+#if CONFIG_SUPERTX
+ best_rd, &best_rate_nocoef, &x_ctx,
+#endif
+ mi_row, mi_col, bsize2,
+ mi_row + mi_step, mi_col, bsize2,
+ mi_row, mi_col + mi_step, subsize);
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+ }
+ // PARTITION_VERT_B
+ if (partition_vert_allowed && do_rect && bsize > BLOCK_8X8 &&
+ partition_none_allowed) {
+ subsize = get_subsize(bsize, PARTITION_VERT_B);
+ rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
+ pc_tree->verticalb,
+ ctx, mi_row, mi_col, bsize, PARTITION_VERT_B,
+#if CONFIG_SUPERTX
+ best_rd, &best_rate_nocoef, &x_ctx,
+#endif
+ mi_row, mi_col, subsize,
+ mi_row, mi_col + mi_step, bsize2,
+ mi_row + mi_step, mi_col + mi_step, bsize2);
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+ }
+#endif // CONFIG_EXT_PARTITION_TYPES
+
// TODO(jbb): This code added so that we avoid static analysis
// warning related to the fact that best_rd isn't used after this
// point. This code should be refactored so that the duplicate
@@ -3687,13 +4150,8 @@
SPEED_FEATURES *const sf = &cpi->sf;
int mi_col;
- // Initialize the left context for the new SB row
- memset(&xd->left_context, 0, sizeof(xd->left_context));
- memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
-#if CONFIG_VAR_TX
- memset(xd->left_txfm_context_buffer, 0,
- sizeof(xd->left_txfm_context_buffer));
-#endif
+ vp10_zero_left_context(xd);
+
// Code each SB in the row
for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
@@ -3791,19 +4249,9 @@
// Copy data over into macro block data structures.
vp10_setup_src_planes(x, cpi->Source, 0, 0);
- vp10_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
+ vp10_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
- // Note: this memset assumes above_context[0], [1] and [2]
- // are allocated as part of the same buffer.
- memset(xd->above_context[0], 0,
- sizeof(*xd->above_context[0]) *
- 2 * aligned_mi_cols * MAX_MB_PLANE);
- memset(xd->above_seg_context, 0,
- sizeof(*xd->above_seg_context) * aligned_mi_cols);
-#if CONFIG_VAR_TX
- memset(cm->above_txfm_context, 0,
- sizeof(*xd->above_txfm_context) * aligned_mi_cols);
-#endif
+ vp10_zero_above_context(cm, 0, aligned_mi_cols);
}
static int check_dual_ref_flags(VP10_COMP *cpi) {
@@ -3971,7 +4419,6 @@
vp10_zero(*td->counts);
vp10_zero(rdc->coef_counts);
vp10_zero(rdc->comp_pred_diff);
- vp10_zero(rdc->filter_diff);
rdc->m_search_count = 0; // Count of motion search hits.
rdc->ex_search_count = 0; // Exhaustive mesh search hits.
@@ -4039,31 +4486,9 @@
cpi->last_frame_distortion = cpi->frame_distortion;
#endif
}
-
-static INTERP_FILTER get_interp_filter(
- const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) {
-#if CONFIG_EXT_INTERP
- if (!is_alt_ref &&
- threshes[EIGHTTAP_SMOOTH2] > threshes[EIGHTTAP_SMOOTH] &&
- threshes[EIGHTTAP_SMOOTH2] > threshes[EIGHTTAP_REGULAR] &&
- threshes[EIGHTTAP_SMOOTH2] > threshes[MULTITAP_SHARP] &&
- threshes[EIGHTTAP_SMOOTH2] > threshes[SWITCHABLE - 1]) {
- return EIGHTTAP_SMOOTH2;
- }
-#endif // CONFIG_EXT_INTERP
- if (!is_alt_ref &&
- threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_REGULAR] &&
- threshes[EIGHTTAP_SMOOTH] > threshes[MULTITAP_SHARP] &&
- threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) {
- return EIGHTTAP_SMOOTH;
- } else if (threshes[MULTITAP_SHARP] > threshes[EIGHTTAP_REGULAR] &&
- threshes[MULTITAP_SHARP] > threshes[SWITCHABLE - 1]) {
- return MULTITAP_SHARP;
- } else if (threshes[EIGHTTAP_REGULAR] > threshes[SWITCHABLE - 1]) {
- return EIGHTTAP_REGULAR;
- } else {
- return SWITCHABLE;
- }
+static INTERP_FILTER get_cm_interp_filter(VP10_COMP *cpi) {
+ (void)cpi;
+ return SWITCHABLE;
}
void vp10_encode_frame(VP10_COMP *cpi) {
@@ -4116,7 +4541,6 @@
// INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
- int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type];
const int is_alt_ref = frame_type == ALTREF_FRAME;
/* prediction (compound, single or hybrid) mode selection */
@@ -4134,7 +4558,7 @@
cm->reference_mode = REFERENCE_MODE_SELECT;
if (cm->interp_filter == SWITCHABLE) {
- cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref);
+ cm->interp_filter = get_cm_interp_filter(cpi);
}
encode_frame_internal(cpi);
@@ -4142,9 +4566,6 @@
for (i = 0; i < REFERENCE_MODES; ++i)
mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2;
-
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
int single_count_zero = 0;
int comp_count_zero = 0;
@@ -4260,13 +4681,14 @@
FRAME_COUNTS *counts,
TX_SIZE tx_size, int blk_row, int blk_col) {
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+ const int tx_row = blk_row >> 1;
+ const int tx_col = blk_col >> 1;
int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
- int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1),
- xd->left_txfm_context + (blk_row >> 1),
+ int ctx = txfm_partition_context(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row,
tx_size);
- TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx];
+ const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> 5;
@@ -4279,8 +4701,8 @@
if (tx_size == plane_tx_size) {
++counts->txfm_partition[ctx][0];
mbmi->tx_size = tx_size;
- txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
- xd->left_txfm_context + (blk_row >> 1), tx_size);
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, tx_size);
} else {
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bh = num_4x4_blocks_high_lookup[bsize];
@@ -4288,10 +4710,10 @@
++counts->txfm_partition[ctx][1];
if (tx_size == TX_8X8) {
- mbmi->inter_tx_size[tx_idx] = TX_4X4;
+ mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4;
mbmi->tx_size = TX_4X4;
- txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
- xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, TX_4X4);
return;
}
@@ -4327,10 +4749,11 @@
static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size,
int blk_row, int blk_col) {
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+ const int tx_row = blk_row >> 1;
+ const int tx_col = blk_col >> 1;
int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
- TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx];
+ const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> 5;
@@ -4342,8 +4765,8 @@
if (tx_size == plane_tx_size) {
mbmi->tx_size = tx_size;
- txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
- xd->left_txfm_context + (blk_row >> 1), tx_size);
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, tx_size);
} else {
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
@@ -4351,10 +4774,10 @@
int i;
if (tx_size == TX_8X8) {
- mbmi->inter_tx_size[tx_idx] = TX_4X4;
+ mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4;
mbmi->tx_size = TX_4X4;
- txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
- xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, TX_4X4);
return;
}
@@ -4520,9 +4943,9 @@
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif // CONFIG_VP9_HIGHBITDEPTH
- vp10_build_prediction_by_above_preds(cpi, xd, mi_row, mi_col, dst_buf1,
+ vp10_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1,
dst_stride1);
- vp10_build_prediction_by_left_preds(cpi, xd, mi_row, mi_col, dst_buf2,
+ vp10_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
dst_stride2);
vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm),
mi_row, mi_col);
@@ -4637,6 +5060,9 @@
const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
PARTITION_TYPE partition;
BLOCK_SIZE subsize = bsize;
+#if CONFIG_EXT_PARTITION_TYPES
+ int i;
+#endif
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return 1;
@@ -4647,6 +5073,10 @@
subsize = BLOCK_4X4;
partition = partition_lookup[bsl][subsize];
+#if CONFIG_EXT_PARTITION_TYPES
+ if (bsize > BLOCK_8X8)
+ partition = pc_tree->partitioning;
+#endif
switch (partition) {
case PARTITION_NONE:
@@ -4687,6 +5117,32 @@
return 1;
}
break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ for (i = 0; i < 3; i++) {
+ if (check_intra_b(&pc_tree->horizontala[i]))
+ return 1;
+ }
+ break;
+ case PARTITION_HORZ_B:
+ for (i = 0; i < 3; i++) {
+ if (check_intra_b(&pc_tree->horizontalb[i]))
+ return 1;
+ }
+ break;
+ case PARTITION_VERT_A:
+ for (i = 0; i < 3; i++) {
+ if (check_intra_b(&pc_tree->verticala[i]))
+ return 1;
+ }
+ break;
+ case PARTITION_VERT_B:
+ for (i = 0; i < 3; i++) {
+ if (check_intra_b(&pc_tree->verticalb[i]))
+ return 1;
+ }
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
default:
assert(0);
}
@@ -4716,6 +5172,16 @@
return check_supertx_b(supertx_size, pc_tree->leaf_split[0]);
else
return check_supertx_sb(subsize, supertx_size, pc_tree->split[0]);
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ return check_supertx_b(supertx_size, &pc_tree->horizontala[0]);
+ case PARTITION_HORZ_B:
+ return check_supertx_b(supertx_size, &pc_tree->horizontalb[0]);
+ case PARTITION_VERT_A:
+ return check_supertx_b(supertx_size, &pc_tree->verticala[0]);
+ case PARTITION_VERT_B:
+ return check_supertx_b(supertx_size, &pc_tree->verticalb[0]);
+#endif // CONFIG_EXT_PARTITION_TYPES
default:
assert(0);
return 0;
@@ -4950,6 +5416,9 @@
const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
PARTITION_TYPE partition;
BLOCK_SIZE subsize;
+#if CONFIG_EXT_PARTITION_TYPES
+ BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
int i, ctx;
uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3];
@@ -5000,6 +5469,10 @@
subsize = BLOCK_4X4;
}
partition = partition_lookup[bsl][subsize];
+#if CONFIG_EXT_PARTITION_TYPES
+ if (bsize > BLOCK_8X8)
+ partition = pc_tree->partitioning;
+#endif
if (output_enabled && bsize != BLOCK_4X4 && bsize < top_bsize)
cm->counts.partition[ctx][partition]++;
@@ -5260,13 +5733,225 @@
}
}
break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ bsize2, top_bsize, bsize2, output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs,
+ mi_row, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1, bsize2, top_bsize, bsize2,
+ output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, output_enabled, dst_buf1, dst_stride1);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf2, dst_stride2,
+ subsize, top_bsize, subsize, output_enabled, 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf2, dst_stride2);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf2, dst_stride2, 1);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i], dst_stride[i],
+ dst_buf1[i], dst_stride1[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i], dst_stride[i],
+ dst_buf2[i], dst_stride2[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+
+ break;
+ case PARTITION_VERT_A:
+
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ bsize2, top_bsize, bsize2, output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ bsize2, top_bsize, bsize2, output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf1, dst_stride1);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf2,
+ dst_stride2, subsize, top_bsize, subsize, output_enabled,
+ 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf2, dst_stride2);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf2, dst_stride2, 2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i], dst_stride[i],
+ dst_buf1[i], dst_stride1[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i], dst_stride[i],
+ dst_buf2[i], dst_stride2[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, i);
+ }
+ break;
+ case PARTITION_HORZ_B:
+
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ subsize, top_bsize, subsize, output_enabled, 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf, dst_stride, 0);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ bsize2, top_bsize, bsize2, output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf1, dst_stride1);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col + hbs,
+ mi_row + hbs, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2, bsize2, top_bsize, bsize2,
+ output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row + hbs,
+ mi_col + hbs, mi_row_top, mi_col_top, output_enabled, dst_buf2,
+ dst_stride2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf1[i];
+ xd->plane[i].dst.stride = dst_stride1[i];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf1[i], dst_stride1[i],
+ dst_buf2[i], dst_stride2[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i], dst_stride[i],
+ dst_buf1[i], dst_stride1[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ break;
+ case PARTITION_VERT_B:
+
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ subsize, top_bsize, subsize, output_enabled, 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf, dst_stride, 3);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1, bsize2, top_bsize, bsize2, output_enabled,
+ 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, output_enabled, dst_buf1, dst_stride1);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col + hbs,
+ mi_row + hbs, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2, bsize2, top_bsize, bsize2,
+ output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row + hbs,
+ mi_col + hbs, mi_row_top, mi_col_top, output_enabled, dst_buf2,
+ dst_stride2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf1[i];
+ xd->plane[i].dst.stride = dst_stride1[i];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf1[i], dst_stride1[i],
+ dst_buf2[i], dst_stride2[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i], dst_stride[i],
+ dst_buf1[i], dst_stride1[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, i);
+ }
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
default:
assert(0);
}
+#if CONFIG_EXT_PARTITION_TYPES
+ if (bsize < top_bsize)
+ update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
+#else
if (bsize < top_bsize && (partition != PARTITION_SPLIT || bsize == BLOCK_8X8))
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+#endif // CONFIG_EXT_PARTITION_TYPES
}
static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td,
@@ -5313,7 +5998,7 @@
// to reuse distortion values from the RD estimation, so we reset these
// flags here before evaluating RD for supertx coding.
for (plane = 0 ; plane < MAX_MB_PLANE ; plane++)
- x->skip_txfm[plane << 2] = SKIP_TXFM_NONE;
+ x->skip_txfm[plane][0] = SKIP_TXFM_NONE;
mbmi = &xd->mi[0]->mbmi;
best_tx_nostx = mbmi->tx_type;
@@ -5378,11 +6063,6 @@
#if CONFIG_EXT_TX
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
- if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX && *best_tx == DCT_DCT) {
- tx_type = IDTX - 1;
- continue;
- }
#else
if (tx_size >= TX_32X32 && tx_type != DCT_DCT)
continue;
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 700088c..c42b7f1 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -449,7 +449,7 @@
#endif
if (x->quant_fp) {
// Encoding process for rtc mode
- if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
+ if (x->skip_txfm[0][0] == SKIP_TXFM_AC_DC && plane == 0) {
// skip forward transform
p->eobs[block] = 0;
*a = *l = 0;
@@ -460,12 +460,12 @@
}
} else {
if (max_txsize_lookup[plane_bsize] == tx_size) {
- int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
- if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
+ int blk_index = (block >> (tx_size << 1));
+ if (x->skip_txfm[plane][blk_index] == SKIP_TXFM_NONE) {
// full forward transform and quantization
vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, VP10_XFORM_QUANT_B);
- } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
+ } else if (x->skip_txfm[plane][blk_index] == SKIP_TXFM_AC_ONLY) {
// fast path forward transform and quantization
vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, VP10_XFORM_QUANT_DC);
@@ -558,12 +558,11 @@
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
const struct macroblockd_plane *const pd = &xd->plane[plane];
- int blk_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
- (blk_col >> (1 - pd->subsampling_x));
- TX_SIZE plane_tx_size = plane ?
- get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], bsize,
- 0, 0) :
- mbmi->inter_tx_size[blk_idx];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ const TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0) :
+ mbmi->inter_tx_size[tx_row][tx_col];
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
diff --git a/vp10/encoder/encodemv.c b/vp10/encoder/encodemv.c
index 61429aa..7941363 100644
--- a/vp10/encoder/encodemv.c
+++ b/vp10/encoder/encodemv.c
@@ -282,9 +282,9 @@
const MV diff = {mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col};
#if CONFIG_REF_MV
- int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
- mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
- nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+ int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
+ nmv_context_counts *counts = &nmv_counts[nmv_ctx];
#endif
vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
}
@@ -332,9 +332,9 @@
const MV diff = {mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col};
#if CONFIG_REF_MV
- int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
- mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
- nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+ int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
+ nmv_context_counts *counts = &nmv_counts[nmv_ctx];
#endif
vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
}
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index afe3292..b2fe978 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -251,7 +251,6 @@
typedef struct RD_COUNTS {
vp10_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
int64_t comp_pred_diff[REFERENCE_MODES];
- int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
int m_search_count;
int ex_search_count;
} RD_COUNTS;
@@ -488,7 +487,6 @@
int zeromv_mode_cost[ZEROMV_MODE_CONTEXTS][2];
int refmv_mode_cost[REFMV_MODE_CONTEXTS][2];
int drl_mode_cost0[DRL_MODE_CONTEXTS][2];
- int drl_mode_cost1[DRL_MODE_CONTEXTS][2];
#if CONFIG_EXT_INTER
int new2mv_mode_cost[2];
#endif // CONFIG_EXT_INTER
@@ -505,7 +503,11 @@
int intra_uv_mode_cost[INTRA_MODES][INTRA_MODES];
int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
+#if CONFIG_EXT_PARTITION_TYPES
+ int partition_cost[PARTITION_CONTEXTS][EXT_PARTITION_TYPES];
+#else
int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
+#endif
int palette_y_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
int palette_uv_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
int palette_y_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS]
diff --git a/vp10/encoder/ethread.c b/vp10/encoder/ethread.c
index 6cb9494..c586b9a 100644
--- a/vp10/encoder/ethread.c
+++ b/vp10/encoder/ethread.c
@@ -19,9 +19,6 @@
for (i = 0; i < REFERENCE_MODES; i++)
td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
- td->rd_counts.filter_diff[i] += td_t->rd_counts.filter_diff[i];
-
for (i = 0; i < TX_SIZES; i++)
for (j = 0; j < PLANE_TYPES; j++)
for (k = 0; k < REF_TYPES; k++)
diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c
index faedb43..785fef0 100644
--- a/vp10/encoder/hybrid_fwd_txfm.c
+++ b/vp10/encoder/hybrid_fwd_txfm.c
@@ -54,17 +54,14 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
@@ -96,17 +93,14 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
@@ -138,17 +132,14 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
vp10_fht16x16(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
@@ -180,17 +171,14 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
@@ -227,15 +215,12 @@
case FLIPADST_ADST:
vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
break;
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
@@ -270,15 +255,12 @@
case FLIPADST_ADST:
vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
break;
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST exists only in C
vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
break;
@@ -314,15 +296,12 @@
case FLIPADST_ADST:
vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
break;
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST exists only in C
vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
break;
@@ -355,15 +334,12 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c
index 1f147d7..23184ed 100644
--- a/vp10/encoder/mcomp.c
+++ b/vp10/encoder/mcomp.c
@@ -685,47 +685,6 @@
{0, -1}, {0, 1}, {-1, 0}, {1, 0}
};
-#if CONFIG_VP9_HIGHBITDEPTH
-// TODO(yunqing): Optimize the following 2 functions.
-static void highbd_comp_avg_upsampled_pred(uint16_t *comp_pred,
- const uint8_t *pred8,
- int width, int height,
- const uint8_t *ref8,
- int ref_stride) {
- int i, j;
- int stride = ref_stride << 3;
-
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- const int tmp = pred[j] + ref[(j << 3)];
- comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
- }
- comp_pred += width;
- pred += width;
- ref += stride;
- }
-}
-
-static void highbd_upsampled_pred(uint16_t *comp_pred,
- int width, int height,
- const uint8_t *ref8,
- int ref_stride) {
- int i, j;
- int stride = ref_stride << 3;
-
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- comp_pred[j] = ref[(j << 3)];
- }
- comp_pred += width;
- ref += stride;
- }
-}
-#endif
-
static int upsampled_pref_error(const MACROBLOCKD *xd,
const vp10_variance_fn_ptr_t *vfp,
const uint8_t *const src, const int src_stride,
@@ -737,10 +696,10 @@
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]);
if (second_pred != NULL)
- highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y,
- y_stride);
+ vpx_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y,
+ y_stride);
else
- highbd_upsampled_pred(pred16, w, h, y, y_stride);
+ vpx_highbd_upsampled_pred(pred16, w, h, y, y_stride);
besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride,
sse);
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index 78e8e9a..7097dc1 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -393,9 +393,17 @@
if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
cm->frame_type == KEY_FRAME) {
+#if CONFIG_EXT_PARTITION_TYPES
+ vp10_cost_tokens(cpi->partition_cost[0], cm->fc->partition_prob[0],
+ vp10_partition_tree);
+ for (i = 1; i < PARTITION_CONTEXTS; ++i)
+ vp10_cost_tokens(cpi->partition_cost[i], cm->fc->partition_prob[i],
+ vp10_ext_partition_tree);
+#else
for (i = 0; i < PARTITION_CONTEXTS; ++i)
vp10_cost_tokens(cpi->partition_cost[i], cm->fc->partition_prob[i],
vp10_partition_tree);
+#endif // CONFIG_EXT_PARTITION_TYPES
}
fill_mode_costs(cpi);
@@ -418,13 +426,8 @@
}
for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
- cpi->drl_mode_cost0[i][0] = vp10_cost_bit(cm->fc->drl_prob0[i], 0);
- cpi->drl_mode_cost0[i][1] = vp10_cost_bit(cm->fc->drl_prob0[i], 1);
- }
-
- for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
- cpi->drl_mode_cost1[i][0] = vp10_cost_bit(cm->fc->drl_prob1[i], 0);
- cpi->drl_mode_cost1[i][1] = vp10_cost_bit(cm->fc->drl_prob1[i], 1);
+ cpi->drl_mode_cost0[i][0] = vp10_cost_bit(cm->fc->drl_prob[i], 0);
+ cpi->drl_mode_cost0[i][1] = vp10_cost_bit(cm->fc->drl_prob[i], 1);
}
#if CONFIG_EXT_INTER
cpi->new2mv_mode_cost[0] = vp10_cost_bit(cm->fc->new2mv_prob, 0);
diff --git a/vp10/encoder/rd.h b/vp10/encoder/rd.h
index 5a6a44a..61feabe 100644
--- a/vp10/encoder/rd.h
+++ b/vp10/encoder/rd.h
@@ -279,8 +279,6 @@
int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
- int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
-
int RDMULT;
int RDDIV;
} RD_OPT;
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 63163d7..fb6e0c3 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -377,16 +377,35 @@
unsigned int var[16];
double total = 0;
const int f_index = bsize - 6;
+
if (f_index < 0) {
int i, j, index;
int w_shift = bw == 8 ? 1 : 2;
int h_shift = bh == 8 ? 1 : 2;
- for (i = 0; i < bh; ++i)
- for (j = 0; j < bw; ++j) {
- index = (j >> w_shift) + ((i >> h_shift) << 2);
- esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
- (src[j + i * src_stride] - dst[j + i * dst_stride]);
- }
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->common.use_highbitdepth) {
+ uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+ uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
+ for (i = 0; i < bh; ++i)
+ for (j = 0; j < bw; ++j) {
+ index = (j >> w_shift) + ((i >> h_shift) << 2);
+ esq[index] += (src16[j + i * src_stride] -
+ dst16[j + i * dst_stride]) *
+ (src16[j + i * src_stride] -
+ dst16[j + i * dst_stride]);
+ }
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ for (i = 0; i < bh; ++i)
+ for (j = 0; j < bw; ++j) {
+ index = (j >> w_shift) + ((i >> h_shift) << 2);
+ esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
+ (src[j + i * src_stride] - dst[j + i * dst_stride]);
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
} else {
var[0] = cpi->fn_ptr[f_index].vf(src, src_stride,
dst, dst_stride, &esq[0]);
@@ -584,17 +603,6 @@
dct_vs_dst(p->src_diff, bw, bw, bh, &hcorr, &vcorr);
}
-static int prune_three_for_sby(const VP10_COMP *cpi,
- BLOCK_SIZE bsize,
- MACROBLOCK *x,
- MACROBLOCKD *xd) {
- (void) cpi;
- (void) bsize;
- (void) x;
- (void) xd;
- return 0;
-}
-
#endif // CONFIG_EXT_TX
// Performance drop: 0.3%, Speed improvement: 5%
@@ -625,9 +633,6 @@
case PRUNE_TWO :
return prune_two_for_sby(cpi, bsize, x, xd);
break;
- case PRUNE_THREE :
- return prune_three_for_sby(cpi, bsize, x, xd);
- break;
#endif
}
assert(0);
@@ -745,18 +750,18 @@
var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
dst, pd->dst.stride, &sse);
- x->bsse[(i << 2) + block_idx] = sse;
+ x->bsse[i][block_idx] = sse;
sum_sse += sse;
- x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
+ x->skip_txfm[i][block_idx] = SKIP_TXFM_NONE;
if (!x->select_tx_size) {
// Check if all ac coefficients can be quantized to zero.
if (var < ac_thr || var == 0) {
- x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;
+ x->skip_txfm[i][block_idx] = SKIP_TXFM_AC_ONLY;
// Check if dc coefficient can be quantized to zero.
if (sse - var < dc_thr || sse == var) {
- x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;
+ x->skip_txfm[i][block_idx] = SKIP_TXFM_AC_DC;
if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
low_err_skip = 1;
@@ -1149,21 +1154,21 @@
dist = (int64_t)tmp * 16;
}
} else if (max_txsize_lookup[plane_bsize] == tx_size) {
- if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
+ if (x->skip_txfm[plane][block >> (tx_size << 1)] ==
SKIP_TXFM_NONE) {
// full forward transform and quantization
vp10_xform_quant(x, plane, block, blk_row, blk_col,
plane_bsize, tx_size, VP10_XFORM_QUANT_B);
dist_block(args->cpi, x, plane, block, blk_row, blk_col,
tx_size, &dist, &sse);
- } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
+ } else if (x->skip_txfm[plane][block >> (tx_size << 1)] ==
SKIP_TXFM_AC_ONLY) {
// compute DC coefficient
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
vp10_xform_quant(x, plane, block, blk_row, blk_col,
plane_bsize, tx_size, VP10_XFORM_QUANT_DC);
- sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
+ sse = x->bsse[plane][block >> (tx_size << 1)] << 4;
dist = sse;
if (x->plane[plane].eobs[block]) {
const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
@@ -1181,7 +1186,7 @@
// SKIP_TXFM_AC_DC
// skip forward transform
x->plane[plane].eobs[block] = 0;
- sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
+ sse = x->bsse[plane][block >> (tx_size << 1)] << 4;
dist = sse;
}
} else {
@@ -1306,6 +1311,179 @@
}
#endif // CONFIG_SUPERTX
+static int64_t txfm_yrd(VP10_COMP *cpi, MACROBLOCK *x,
+ int *r, int64_t *d, int *s, int64_t *sse,
+ int64_t ref_best_rd,
+ BLOCK_SIZE bs, TX_TYPE tx_type, int tx_size) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int64_t rd = INT64_MAX;
+ vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
+ int s0, s1;
+ const TX_SIZE max_tx_size = max_txsize_lookup[bs];
+ const int tx_select = cm->tx_mode == TX_MODE_SELECT;
+ const int is_inter = is_inter_block(mbmi);
+ const int r_tx_size =
+ cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)][tx_size];
+#if CONFIG_EXT_TX
+ int ext_tx_set;
+#endif // CONFIG_EXT_TX
+
+ assert(skip_prob > 0);
+ s0 = vp10_cost_bit(skip_prob, 0);
+ s1 = vp10_cost_bit(skip_prob, 1);
+
+ mbmi->tx_type = tx_type;
+ mbmi->tx_size = tx_size;
+ txfm_rd_in_plane(x,
+ cpi,
+ r, d, s,
+ sse, ref_best_rd, 0, bs, tx_size,
+ cpi->sf.use_fast_coef_costing);
+ if (*r == INT_MAX)
+ return INT64_MAX;
+#if CONFIG_EXT_TX
+ ext_tx_set = get_ext_tx_set(tx_size, bs, is_inter);
+ if (get_ext_tx_types(tx_size, bs, is_inter) > 1 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ *r += cpi->inter_tx_type_costs[ext_tx_set]
+ [mbmi->tx_size][mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
+ *r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+ }
+
+#else
+ if (tx_size < TX_32X32 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id] && !FIXED_TX_TYPE) {
+ if (is_inter) {
+ *r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+ } else {
+ *r += cpi->intra_tx_type_costs[mbmi->tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]]
+ [mbmi->tx_type];
+ }
+ }
+#endif // CONFIG_EXT_TX
+
+ if (*s) {
+ if (is_inter) {
+ rd = RDCOST(x->rdmult, x->rddiv, s1, *sse);
+ } else {
+ rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, *sse);
+ }
+ } else {
+ rd = RDCOST(x->rdmult, x->rddiv, *r + s0 + r_tx_size * tx_select, *d);
+ }
+
+ if (tx_select && !(*s && is_inter))
+ *r += r_tx_size;
+
+ if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !(*s))
+ rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, *sse));
+
+ return rd;
+}
+
+static int64_t choose_tx_size_fix_type(VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate,
+ int64_t *distortion,
+ int *skip,
+ int64_t *psse,
+ int64_t ref_best_rd,
+ BLOCK_SIZE bs, TX_TYPE tx_type,
+ int prune) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int r, s;
+ int64_t d, sse;
+ int64_t rd = INT64_MAX;
+ int n;
+ int start_tx, end_tx;
+ int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
+ const TX_SIZE max_tx_size = max_txsize_lookup[bs];
+ TX_SIZE best_tx = max_tx_size;
+ const int tx_select = cm->tx_mode == TX_MODE_SELECT;
+ const int is_inter = is_inter_block(mbmi);
+#if CONFIG_EXT_TX
+ int ext_tx_set;
+#endif // CONFIG_EXT_TX
+
+ if (tx_select) {
+ start_tx = max_tx_size;
+ end_tx = 0;
+ } else {
+ const TX_SIZE chosen_tx_size =
+ VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
+ start_tx = chosen_tx_size;
+ end_tx = chosen_tx_size;
+ }
+
+ *distortion = INT64_MAX;
+ *rate = INT_MAX;
+ *skip = 0;
+ *psse = INT64_MAX;
+
+ mbmi->tx_type = tx_type;
+ last_rd = INT64_MAX;
+ for (n = start_tx; n >= end_tx; --n) {
+ if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, n))
+ continue;
+#if CONFIG_EXT_TX
+ ext_tx_set = get_ext_tx_set(n, bs, is_inter);
+ if (is_inter) {
+ if (!ext_tx_used_inter[ext_tx_set][tx_type])
+ continue;
+ if (cpi->sf.tx_type_search > 0) {
+ if (!do_tx_type_search(tx_type, prune))
+ continue;
+ }
+ } else {
+ if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
+ if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
+ continue;
+ }
+ if (!ext_tx_used_intra[ext_tx_set][tx_type])
+ continue;
+ }
+#else // CONFIG_EXT_TX
+ if (n >= TX_32X32 && tx_type != DCT_DCT)
+ continue;
+ if (is_inter && cpi->sf.tx_type_search > 0 &&
+ !do_tx_type_search(tx_type, prune))
+ continue;
+#endif // CONFIG_EXT_TX
+
+ rd = txfm_yrd(cpi, x, &r, &d, &s, &sse, ref_best_rd, bs, tx_type, n);
+
+ // Early termination in transform size search.
+ if (cpi->sf.tx_size_search_breakout &&
+ (rd == INT64_MAX ||
+ (s == 1 && tx_type != DCT_DCT && n < start_tx) ||
+ (n < (int) max_tx_size && rd > last_rd)))
+ break;
+
+ last_rd = rd;
+ if (rd < best_rd) {
+ best_tx = n;
+ best_rd = rd;
+ *distortion = d;
+ *rate = r;
+ *skip = s;
+ *psse = sse;
+ }
+ }
+ mbmi->tx_size = best_tx;
+
+ return best_rd;
+}
+
static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
int *rate, int64_t *distortion,
int *skip, int64_t *sse,
@@ -1344,11 +1522,6 @@
if (cpi->sf.tx_type_search > 0) {
if (!do_tx_type_search(tx_type, prune))
continue;
- } else if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- continue;
}
} else {
if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
@@ -1357,12 +1530,6 @@
}
if (!ext_tx_used_intra[ext_tx_set][tx_type])
continue;
- if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- continue;
- }
}
mbmi->tx_type = tx_type;
@@ -1470,166 +1637,36 @@
int64_t *psse,
int64_t ref_best_rd,
BLOCK_SIZE bs) {
- const TX_SIZE max_tx_size = max_txsize_lookup[bs];
- VP10_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
int r, s;
int64_t d, sse;
int64_t rd = INT64_MAX;
- int n;
- int s0, s1;
- int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
- TX_SIZE best_tx = max_tx_size;
- int start_tx, end_tx;
- const int tx_select = cm->tx_mode == TX_MODE_SELECT;
+ int64_t best_rd = INT64_MAX;
+ TX_SIZE best_tx = max_txsize_lookup[bs];
const int is_inter = is_inter_block(mbmi);
TX_TYPE tx_type, best_tx_type = DCT_DCT;
int prune = 0;
-#if CONFIG_EXT_TX
- int ext_tx_set;
-#endif // CONFIG_EXT_TX
if (is_inter && cpi->sf.tx_type_search > 0)
prune = prune_tx_types(cpi, bs, x, xd);
- assert(skip_prob > 0);
- s0 = vp10_cost_bit(skip_prob, 0);
- s1 = vp10_cost_bit(skip_prob, 1);
-
- if (tx_select) {
- start_tx = max_tx_size;
- end_tx = 0;
- } else {
- const TX_SIZE chosen_tx_size =
- VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
- start_tx = chosen_tx_size;
- end_tx = chosen_tx_size;
- }
-
*distortion = INT64_MAX;
*rate = INT_MAX;
*skip = 0;
*psse = INT64_MAX;
for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
- last_rd = INT64_MAX;
- for (n = start_tx; n >= end_tx; --n) {
- const int r_tx_size =
- cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)][n];
- if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, n))
- continue;
-#if CONFIG_EXT_TX
- ext_tx_set = get_ext_tx_set(n, bs, is_inter);
- if (is_inter) {
- if (!ext_tx_used_inter[ext_tx_set][tx_type])
- continue;
- if (cpi->sf.tx_type_search > 0) {
- if (!do_tx_type_search(tx_type, prune))
- continue;
- } else if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- continue;
- }
- } else {
- if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
- if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
- continue;
- }
- if (!ext_tx_used_intra[ext_tx_set][tx_type])
- continue;
- if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- break;
- }
- }
- mbmi->tx_type = tx_type;
- txfm_rd_in_plane(x,
- cpi,
- &r, &d, &s,
- &sse, ref_best_rd, 0, bs, n,
- cpi->sf.use_fast_coef_costing);
- if (get_ext_tx_types(n, bs, is_inter) > 1 &&
- !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
- r != INT_MAX) {
- if (is_inter) {
- if (ext_tx_set > 0)
- r += cpi->inter_tx_type_costs[ext_tx_set]
- [mbmi->tx_size][mbmi->tx_type];
- } else {
- if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
- r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
- [mbmi->mode][mbmi->tx_type];
- }
- }
-#else // CONFIG_EXT_TX
- if (n >= TX_32X32 && tx_type != DCT_DCT) {
- continue;
- }
- mbmi->tx_type = tx_type;
- txfm_rd_in_plane(x,
- cpi,
- &r, &d, &s,
- &sse, ref_best_rd, 0, bs, n,
- cpi->sf.use_fast_coef_costing);
- if (n < TX_32X32 &&
- !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
- r != INT_MAX && !FIXED_TX_TYPE) {
- if (is_inter) {
- r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
- if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
- continue;
- } else {
- r += cpi->intra_tx_type_costs[mbmi->tx_size]
- [intra_mode_to_tx_type_context[mbmi->mode]]
- [mbmi->tx_type];
- }
- }
-#endif // CONFIG_EXT_TX
-
- if (r == INT_MAX)
- continue;
-
- if (s) {
- if (is_inter) {
- rd = RDCOST(x->rdmult, x->rddiv, s1, sse);
- } else {
- rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, sse);
- }
- } else {
- rd = RDCOST(x->rdmult, x->rddiv, r + s0 + r_tx_size * tx_select, d);
- }
-
- if (tx_select && !(s && is_inter))
- r += r_tx_size;
-
- if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !s)
- rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, sse));
-
- // Early termination in transform size search.
- if (cpi->sf.tx_size_search_breakout &&
- (rd == INT64_MAX ||
- (s == 1 && tx_type != DCT_DCT && n < start_tx) ||
- (n < (int) max_tx_size && rd > last_rd)))
- break;
-
- last_rd = rd;
- if (rd <
- (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) *
- best_rd) {
- best_tx = n;
- best_rd = rd;
- *distortion = d;
- *rate = r;
- *skip = s;
- *psse = sse;
- best_tx_type = mbmi->tx_type;
- }
+ rd = choose_tx_size_fix_type(cpi, x, &r, &d, &s, &sse, ref_best_rd, bs,
+ tx_type, prune);
+ if (rd < (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) * best_rd) {
+ best_rd = rd;
+ *distortion = d;
+ *rate = r;
+ *skip = s;
+ *psse = sse;
+ best_tx_type = tx_type;
+ best_tx = mbmi->tx_size;
}
}
@@ -2846,57 +2883,21 @@
*bsse += tmp * 16;
if (p->eobs[block] > 0) {
- const int lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+ INV_TXFM_PARAM inv_txfm_param;
+ inv_txfm_param.tx_type = tx_type;
+ inv_txfm_param.tx_size = tx_size;
+ inv_txfm_param.eob = p->eobs[block];
+ inv_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- const int bd = xd->bd;
- switch (tx_size) {
- case TX_32X32:
- vp10_highbd_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32,
- p->eobs[block], bd, tx_type);
- break;
- case TX_16X16:
- vp10_highbd_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32,
- p->eobs[block], bd, tx_type);
- break;
- case TX_8X8:
- vp10_highbd_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32,
- p->eobs[block], bd, tx_type);
- break;
- case TX_4X4:
- vp10_highbd_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32,
- p->eobs[block], bd, tx_type, lossless);
- break;
- default:
- assert(0 && "Invalid transform size");
- break;
- }
+ inv_txfm_param.bd = xd->bd;
+ highbd_inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param);
} else {
-#else
- {
-#endif // CONFIG_VP9_HIGHBITDEPTH
- switch (tx_size) {
- case TX_32X32:
- vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block],
- tx_type);
- break;
- case TX_16X16:
- vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block],
- tx_type);
- break;
- case TX_8X8:
- vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block],
- tx_type);
- break;
- case TX_4X4:
- vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block],
- tx_type, lossless);
- break;
- default:
- assert(0 && "Invalid transform size");
- break;
- }
+ inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param);
}
+#else // CONFIG_VP9_HIGHBITDEPTH
+ inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param);
+#endif // CONFIG_VP9_HIGHBITDEPTH
if ((bh >> 2) + blk_col > max_blocks_wide ||
(bh >> 2) + blk_row > max_blocks_high) {
@@ -2938,8 +2939,10 @@
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
- int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
- (blk_col >> (1 - pd->subsampling_x));
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ TX_SIZE (*const inter_tx_size)[MI_BLOCK_SIZE] =
+ (TX_SIZE (*)[MI_BLOCK_SIZE])&mbmi->inter_tx_size[tx_row][tx_col];
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
int64_t this_rd = INT64_MAX;
@@ -3002,7 +3005,7 @@
x->token_costs[tx_size][pd->plane_type][1][0][0][coeff_ctx][EOB_TOKEN];
if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
- mbmi->inter_tx_size[tx_idx] = tx_size;
+ inter_tx_size[0][0] = tx_size;
vp10_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
plane_bsize, coeff_ctx, rate, dist, bsse, skip);
@@ -3065,11 +3068,10 @@
pta[i] = ptl[i] = !(tmp_eob == 0);
txfm_partition_update(tx_above + (blk_col >> 1),
tx_left + (blk_row >> 1), tx_size);
- mbmi->inter_tx_size[tx_idx] = tx_size;
-
+ inter_tx_size[0][0] = tx_size;
for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
- mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size;
+ inter_tx_size[idy][idx] = tx_size;
mbmi->tx_size = tx_size;
if (this_rd == INT64_MAX)
*is_cost_valid = 0;
@@ -3154,22 +3156,76 @@
}
}
+static int64_t select_tx_size_fix_type(const VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int64_t *dist,
+ int *skippable,
+ int64_t *sse, BLOCK_SIZE bsize,
+ int64_t ref_best_rd, TX_TYPE tx_type) {
+ const VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ const int is_inter = is_inter_block(mbmi);
+#if CONFIG_EXT_TX
+ int ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter);
+#endif // CONFIG_EXT_TX
+ vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
+ int s0 = vp10_cost_bit(skip_prob, 0);
+ int s1 = vp10_cost_bit(skip_prob, 1);
+ int64_t rd;
+
+ mbmi->tx_type = tx_type;
+ inter_block_yrd(cpi, x, rate, dist, skippable, sse, bsize, ref_best_rd);
+
+ if (*rate == INT_MAX)
+ return INT64_MAX;
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(max_tx_size, bsize, is_inter) > 1 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ *rate += cpi->inter_tx_type_costs[ext_tx_set]
+ [max_tx_size][mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
+ *rate += cpi->intra_tx_type_costs[ext_tx_set][max_tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+ }
+#else // CONFIG_EXT_TX
+ if (max_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ if (is_inter)
+ *rate += cpi->inter_tx_type_costs[max_tx_size][mbmi->tx_type];
+ else
+ *rate += cpi->intra_tx_type_costs[max_tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]][mbmi->tx_type];
+ }
+#endif // CONFIG_EXT_TX
+
+ if (*skippable)
+ rd = RDCOST(x->rdmult, x->rddiv, s1, *sse);
+ else
+ rd = RDCOST(x->rdmult, x->rddiv, *rate + s0, *dist);
+
+ if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !(*skippable))
+ rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, *sse));
+
+ return rd;
+}
+
static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
int *rate, int64_t *distortion, int *skippable,
int64_t *sse, BLOCK_SIZE bsize,
int64_t ref_best_rd) {
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
- const VP10_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int64_t rd = INT64_MAX;
int64_t best_rd = INT64_MAX;
TX_TYPE tx_type, best_tx_type = DCT_DCT;
const int is_inter = is_inter_block(mbmi);
- vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
- int s0 = vp10_cost_bit(skip_prob, 0);
- int s1 = vp10_cost_bit(skip_prob, 1);
- TX_SIZE best_tx_size[64];
+ TX_SIZE best_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
TX_SIZE best_tx = TX_SIZES;
uint8_t best_blk_skip[256];
const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
@@ -3199,11 +3255,6 @@
if (cpi->sf.tx_type_search > 0) {
if (!do_tx_type_search(tx_type, prune))
continue;
- } else if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- continue;
}
} else {
if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
@@ -3212,66 +3263,16 @@
}
if (!ext_tx_used_intra[ext_tx_set][tx_type])
continue;
- if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- break;
- }
- }
-
- mbmi->tx_type = tx_type;
-
- inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse,
- bsize, ref_best_rd);
-
- if (get_ext_tx_types(max_tx_size, bsize, is_inter) > 1 &&
- !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
- this_rate != INT_MAX) {
- if (is_inter) {
- if (ext_tx_set > 0)
- this_rate += cpi->inter_tx_type_costs[ext_tx_set]
- [max_tx_size][mbmi->tx_type];
- } else {
- if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
- this_rate += cpi->intra_tx_type_costs[ext_tx_set][max_tx_size]
- [mbmi->mode][mbmi->tx_type];
- }
}
#else // CONFIG_EXT_TX
- if (max_tx_size >= TX_32X32 && tx_type != DCT_DCT)
- continue;
-
- mbmi->tx_type = tx_type;
-
- inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse,
- bsize, ref_best_rd);
-
- if (max_tx_size < TX_32X32 &&
- !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
- this_rate != INT_MAX) {
- if (is_inter) {
- this_rate += cpi->inter_tx_type_costs[max_tx_size][mbmi->tx_type];
- if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
- continue;
- } else {
- this_rate += cpi->intra_tx_type_costs[max_tx_size]
- [intra_mode_to_tx_type_context[mbmi->mode]]
- [mbmi->tx_type];
- }
- }
-#endif // CONFIG_EXT_TX
-
- if (this_rate == INT_MAX)
+ if (max_tx_size >= TX_32X32 && tx_type != DCT_DCT)
continue;
-
- if (this_skip)
- rd = RDCOST(x->rdmult, x->rddiv, s1, this_sse);
- else
- rd = RDCOST(x->rdmult, x->rddiv, this_rate + s0, this_dist);
-
- if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !this_skip)
- rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, this_sse));
+ if (is_inter && cpi->sf.tx_type_search > 0 &&
+ !do_tx_type_search(tx_type, prune))
+ continue;
+#endif // CONFIG_EXT_TX
+ rd = select_tx_size_fix_type(cpi, x, &this_rate, &this_dist, &this_skip,
+ &this_sse, bsize, ref_best_rd, tx_type);
if (rd < (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) * best_rd) {
best_rd = rd;
@@ -3284,14 +3285,14 @@
memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
for (idy = 0; idy < xd->n8_h; ++idy)
for (idx = 0; idx < xd->n8_w; ++idx)
- best_tx_size[idy * 8 + idx] = mbmi->inter_tx_size[idy * 8 + idx];
+ best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
}
}
mbmi->tx_type = best_tx_type;
for (idy = 0; idy < xd->n8_h; ++idy)
for (idx = 0; idx < xd->n8_w; ++idx)
- mbmi->inter_tx_size[idy * 8 + idx] = best_tx_size[idy * 8 + idx];
+ mbmi->inter_tx_size[idy][idx] = best_tx_size[idy][idx];
mbmi->tx_size = best_tx;
memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
}
@@ -3306,12 +3307,11 @@
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
- int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
- (blk_col >> (1 - pd->subsampling_x));
- TX_SIZE plane_tx_size = plane ?
- get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize,
- 0, 0) :
- mbmi->inter_tx_size[tx_idx];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ const TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0) :
+ mbmi->inter_tx_size[tx_row][tx_col];
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
@@ -3847,7 +3847,7 @@
mbmi->angle_delta[1] = best_angle_delta;
if (*rate_tokenonly != INT_MAX)
super_block_uvrd(cpi, x, &this_rate_tokenonly,
- &this_distortion, &s, &this_sse, bsize, INT_MAX);
+ &this_distortion, &s, &this_sse, bsize, INT64_MAX);
return *rate_tokenonly != INT_MAX;
}
#endif // CONFIG_EXT_INTRA
@@ -4826,6 +4826,10 @@
int64_t best_rd = INT64_MAX;
const int i = idy * 2 + idx;
int ref;
+#if CONFIG_REF_MV
+ CANDIDATE_MV ref_mv_stack[2][MAX_REF_MV_STACK_SIZE];
+ uint8_t ref_mv_count[2];
+#endif
#if CONFIG_EXT_INTER
int mv_idx;
int_mv ref_mvs_sub8x8[2][2];
@@ -4840,6 +4844,10 @@
#endif // CONFIG_EXT_INTER
frame_mv[ZEROMV][frame].as_int = 0;
vp10_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col,
+#if CONFIG_REF_MV
+ ref_mv_stack[ref],
+ &ref_mv_count[ref],
+#endif
#if CONFIG_EXT_INTER
mv_ref_list,
#endif // CONFIG_EXT_INTER
@@ -5214,6 +5222,7 @@
bsi->rdstat[i][mode_idx].tl,
idy, idx,
mi_row, mi_col);
+
if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
bsi->rdstat[i][mode_idx].brate, 0);
@@ -5457,7 +5466,6 @@
static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int mode_index,
int64_t comp_pred_diff[REFERENCE_MODES],
- int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
int skippable) {
MACROBLOCKD *const xd = &x->e_mbd;
@@ -5471,9 +5479,6 @@
ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
-
- memcpy(ctx->best_filter_diff, best_filter_diff,
- sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
}
static void setup_buffer_inter(
@@ -6041,9 +6046,7 @@
INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
int (*single_skippable)[MAX_REF_FRAMES],
int64_t *psse,
- const int64_t ref_best_rd,
- int64_t *mask_filter,
- int64_t filter_cache[]) {
+ const int64_t ref_best_rd) {
VP10_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
@@ -6101,8 +6104,8 @@
int orig_dst_stride[MAX_MB_PLANE];
int rs = 0;
INTERP_FILTER best_filter = SWITCHABLE;
- uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
- int64_t bsse[MAX_MB_PLANE << 2] = {0};
+ uint8_t skip_txfm[MAX_MB_PLANE][4] = {{0}};
+ int64_t bsse[MAX_MB_PLANE][4] = {{0}};
int skip_txfm_sb = 0;
int64_t skip_sse_sb = INT64_MAX;
@@ -6386,11 +6389,6 @@
if (is_comp_pred)
intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
- // Search for best switchable filter by checking the variance of
- // pred error irrespective of whether the filter will be used
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- filter_cache[i] = INT64_MAX;
-
best_filter = predict_interp_filter(cpi, x, bsize, mi_row, mi_col,
single_filter);
if (cm->interp_filter != BILINEAR && best_filter == SWITCHABLE) {
@@ -6410,12 +6408,8 @@
if (i > 0 && intpel_mv && IsInterpolatingFilter(i)) {
rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
- filter_cache[i] = rd;
- filter_cache[SWITCHABLE_FILTERS] =
- VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
rd += rs_rd;
- *mask_filter = VPXMAX(*mask_filter, rd);
} else {
int rate_sum = 0;
int64_t dist_sum = 0;
@@ -6447,12 +6441,8 @@
&tmp_skip_sb, &tmp_skip_sse);
rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
- filter_cache[i] = rd;
- filter_cache[SWITCHABLE_FILTERS] =
- VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
rd += rs_rd;
- *mask_filter = VPXMAX(*mask_filter, rd);
if (i == 0 && intpel_mv && IsInterpolatingFilter(i)) {
tmp_rate_sum = rate_sum;
@@ -6964,7 +6954,7 @@
bsize, ref_best_rd);
for (idy = 0; idy < xd->n8_h; ++idy)
for (idx = 0; idx < xd->n8_w; ++idx)
- mbmi->inter_tx_size[idy * 8 + idx] = mbmi->tx_size;
+ mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
}
#else
super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
@@ -7396,8 +7386,6 @@
int64_t best_rd = best_rd_so_far;
int64_t best_pred_diff[REFERENCE_MODES];
int64_t best_pred_rd[REFERENCE_MODES];
- int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
- int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
MB_MODE_INFO best_mbmode;
int best_mode_skippable = 0;
int midx, best_mode_index = -1;
@@ -7435,8 +7423,6 @@
int64_t mode_threshold[MAX_MODES];
int *mode_map = tile_data->mode_map[bsize];
const int mode_search_skip_flags = sf->mode_search_skip_flags;
- int64_t mask_filter = 0;
- int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
int palette_ctx = 0;
const int rows = 4 * num_4x4_blocks_high_lookup[bsize];
@@ -7492,16 +7478,11 @@
sizeof(directional_mode_skip_mask[0]) * INTRA_MODES);
#endif // CONFIG_EXT_INTRA
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- filter_cache[i] = INT64_MAX;
-
estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
&comp_mode_p);
for (i = 0; i < REFERENCE_MODES; ++i)
best_pred_rd[i] = INT64_MAX;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
- best_filter_rd[i] = INT64_MAX;
for (i = 0; i < TX_SIZES; i++)
rate_uv_intra[i] = INT_MAX;
for (i = 0; i < MAX_REF_FRAMES; ++i)
@@ -7555,9 +7536,9 @@
#endif // CONFIG_REF_MV
#if CONFIG_OBMC
- vp10_build_prediction_by_above_preds(cpi, xd, mi_row, mi_col, dst_buf1,
+ vp10_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1,
dst_stride1);
- vp10_build_prediction_by_left_preds(cpi, xd, mi_row, mi_col, dst_buf2,
+ vp10_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
dst_stride2);
vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
#endif // CONFIG_OBMC
@@ -8048,6 +8029,13 @@
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
} else {
+#if CONFIG_REF_MV
+ int_mv backup_ref_mv[2];
+
+ backup_ref_mv[0] = mbmi_ext->ref_mvs[ref_frame][0];
+ if (comp_pred)
+ backup_ref_mv[1] = mbmi_ext->ref_mvs[second_ref_frame][0];
+#endif
#if CONFIG_EXT_INTER
if (second_ref_frame == INTRA_FRAME) {
mbmi->interintra_mode = best_intra_mode;
@@ -8066,6 +8054,19 @@
#if CONFIG_REF_MV
mbmi->ref_mv_idx = 0;
ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+
+ if (this_mode == NEWMV &&
+ mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
+ int ref;
+ for (ref = 0; ref < 1 + comp_pred; ++ref) {
+ int_mv this_mv = (ref == 0) ?
+ mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv :
+ mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+ lower_mv_precision(&this_mv.as_mv, cm->allow_high_precision_mv);
+ mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
+ }
+ }
#endif
this_rd = handle_inter_mode(cpi, x, bsize,
&rate2, &distortion2, &skippable,
@@ -8086,25 +8087,35 @@
#endif // CONFIG_EXT_INTER
single_inter_filter,
single_skippable,
- &total_sse, best_rd,
- &mask_filter, filter_cache);
+ &total_sse, best_rd);
#if CONFIG_REF_MV
// TODO(jingning): This needs some refactoring to improve code quality
// and reduce redundant steps.
- if (mbmi->mode == NEARMV &&
- mbmi_ext->ref_mv_count[ref_frame_type] > 2) {
+ if ((mbmi->mode == NEARMV &&
+ mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
+ (mbmi->mode == NEWMV &&
+ mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
int_mv backup_mv = frame_mv[NEARMV][ref_frame];
- int_mv cur_mv = mbmi_ext->ref_mv_stack[ref_frame][2].this_mv;
MB_MODE_INFO backup_mbmi = *mbmi;
-
+ int backup_skip = x->skip;
int64_t tmp_ref_rd = this_rd;
int ref_idx;
- int ref_set = VPXMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 2);
- uint8_t drl0_ctx =
- vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
- rate2 += cpi->drl_mode_cost0[drl0_ctx][0];
+ // TODO(jingning): This should be deprecated shortly.
+ int idx_offset = (mbmi->mode == NEARMV) ? 1 : 0;
+ int ref_set =
+ VPXMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 1 - idx_offset);
+
+ uint8_t drl_ctx = vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
+ idx_offset);
+ // Dummy
+ int_mv backup_fmv[2];
+ backup_fmv[0] = frame_mv[NEWMV][ref_frame];
+ if (comp_pred)
+ backup_fmv[1] = frame_mv[NEWMV][second_ref_frame];
+
+ rate2 += cpi->drl_mode_cost0[drl_ctx][0];
if (this_rd < INT64_MAX) {
if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
@@ -8129,18 +8140,33 @@
int tmp_rate = 0, tmp_rate_y = 0, tmp_rate_uv = 0;
int tmp_skip = 1;
int64_t tmp_dist = 0, tmp_sse = 0;
+ int dummy_disable_skip = 0;
+ int ref;
+ int_mv cur_mv;
- cur_mv = mbmi_ext->ref_mv_stack[ref_frame][2 + ref_idx].this_mv;
+ mbmi->ref_mv_idx = 1 + ref_idx;
+
+ for (ref = 0; ref < 1 + comp_pred; ++ref) {
+ int_mv this_mv = (ref == 0) ?
+ mbmi_ext->ref_mv_stack[ref_frame_type]
+ [mbmi->ref_mv_idx].this_mv :
+ mbmi_ext->ref_mv_stack[ref_frame_type]
+ [mbmi->ref_mv_idx].comp_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+ lower_mv_precision(&this_mv.as_mv, cm->allow_high_precision_mv);
+ mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
+ }
+
+ cur_mv = mbmi_ext->ref_mv_stack[ref_frame]
+ [mbmi->ref_mv_idx + idx_offset].this_mv;
lower_mv_precision(&cur_mv.as_mv, cm->allow_high_precision_mv);
clamp_mv2(&cur_mv.as_mv, xd);
if (!mv_check_bounds(x, &cur_mv.as_mv)) {
- int64_t dummy_filter_cache[SWITCHABLE_FILTER_CONTEXTS];
INTERP_FILTER dummy_single_inter_filter[MB_MODE_COUNT]
[MAX_REF_FRAMES];
int dummy_single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
int dummy_disable_skip = 0;
- int64_t dummy_mask_filter = 0;
#if CONFIG_EXT_INTER
int_mv dummy_single_newmvs[2][MAX_REF_FRAMES] =
{ { { 0 } }, { { 0 } } };
@@ -8151,7 +8177,6 @@
#else
int_mv dummy_single_newmv[MAX_REF_FRAMES] = { { 0 } };
#endif
- mbmi->ref_mv_idx = 1 + ref_idx;
frame_mv[NEARMV][ref_frame] = cur_mv;
tmp_alt_rd = handle_inter_mode(cpi, x, bsize,
@@ -8173,17 +8198,23 @@
#endif
dummy_single_inter_filter,
dummy_single_skippable,
- &tmp_sse, best_rd,
- &dummy_mask_filter,
- dummy_filter_cache);
+ &tmp_sse, best_rd);
}
- tmp_rate += cpi->drl_mode_cost0[drl0_ctx][1];
+ for (i = 0; i < mbmi->ref_mv_idx; ++i) {
+ uint8_t drl1_ctx = 0;
+ drl1_ctx = vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
+ i + idx_offset);
+ tmp_rate += cpi->drl_mode_cost0[drl1_ctx][1];
+ }
- if (mbmi_ext->ref_mv_count[ref_frame_type] > 3) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] >
+ mbmi->ref_mv_idx + idx_offset + 1 &&
+ ref_idx < ref_set - 1) {
uint8_t drl1_ctx =
- vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 2);
- tmp_rate += cpi->drl_mode_cost1[drl1_ctx][ref_idx];
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
+ mbmi->ref_mv_idx + idx_offset);
+ tmp_rate += cpi->drl_mode_cost0[drl1_ctx][0];
}
if (tmp_alt_rd < INT64_MAX) {
@@ -8206,6 +8237,7 @@
if (tmp_ref_rd > tmp_alt_rd) {
rate2 = tmp_rate;
+ disable_skip = dummy_disable_skip;
distortion2 = tmp_dist;
skippable = tmp_skip;
rate_y = tmp_rate_y;
@@ -8214,6 +8246,7 @@
this_rd = tmp_alt_rd;
tmp_ref_rd = tmp_alt_rd;
backup_mbmi = *mbmi;
+ backup_skip = x->skip;
#if CONFIG_VAR_TX
for (i = 0; i < MAX_MB_PLANE; ++i)
memcpy(x->blk_skip_drl[i], x->blk_skip[i],
@@ -8221,16 +8254,23 @@
#endif
} else {
*mbmi = backup_mbmi;
+ x->skip = backup_skip;
}
}
frame_mv[NEARMV][ref_frame] = backup_mv;
+ frame_mv[NEWMV][ref_frame] = backup_fmv[0];
+ if (comp_pred)
+ frame_mv[NEWMV][second_ref_frame] = backup_fmv[1];
#if CONFIG_VAR_TX
for (i = 0; i < MAX_MB_PLANE; ++i)
memcpy(x->blk_skip[i], x->blk_skip_drl[i],
sizeof(uint8_t) * ctx->num_4x4_blk);
#endif
}
+ mbmi_ext->ref_mvs[ref_frame][0] = backup_ref_mv[0];
+ if (comp_pred)
+ mbmi_ext->ref_mvs[second_ref_frame][0] = backup_ref_mv[1];
#endif // CONFIG_REF_MV
if (this_rd == INT64_MAX)
@@ -8317,8 +8357,6 @@
if (!disable_skip && ref_frame == INTRA_FRAME) {
for (i = 0; i < REFERENCE_MODES; ++i)
best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
- best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
}
// Did this mode help.. i.e. is it the new best mode
@@ -8338,11 +8376,12 @@
rd_cost->rate = rate2;
#if CONFIG_SUPERTX
- *returnrate_nocoef = rate2 - rate_y - rate_uv;
- if (!disable_skip) {
- *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd),
- skippable || this_skip2);
- }
+ if (x->skip)
+ *returnrate_nocoef = rate2;
+ else
+ *returnrate_nocoef = rate2 - rate_y - rate_uv;
+ *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd),
+ disable_skip || skippable || this_skip2);
*returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd),
mbmi->ref_frame[0] != INTRA_FRAME);
#if CONFIG_OBMC
@@ -8417,29 +8456,6 @@
}
if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
-
- /* keep record of best filter type */
- if (!mode_excluded && cm->interp_filter != BILINEAR) {
- int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
- SWITCHABLE_FILTERS : cm->interp_filter];
-
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
- int64_t adj_rd;
- if (ref == INT64_MAX)
- adj_rd = 0;
- else if (filter_cache[i] == INT64_MAX)
- // when early termination is triggered, the encoder does not have
- // access to the rate-distortion cost. it only knows that the cost
- // should be above the maximum valid value. hence it takes the known
- // maximum plus an arbitrary constant as the rate-distortion cost.
- adj_rd = mask_filter - ref + 10;
- else
- adj_rd = filter_cache[i] - ref;
-
- adj_rd += this_rd;
- best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
- }
- }
}
if (early_term)
@@ -8453,6 +8469,9 @@
if (cm->allow_screen_content_tools && !is_inter_mode(best_mbmode.mode)) {
PREDICTION_MODE mode_selected;
int rate2 = 0, rate_y = 0;
+#if CONFIG_SUPERTX
+ int best_rate_nocoef;
+#endif
int64_t distortion2 = 0, distortion_y = 0, dummy_rd = best_rd, this_rd;
int skippable = 0, rate_overhead = 0;
TX_SIZE best_tx_size, uv_tx;
@@ -8522,8 +8541,14 @@
if (skippable) {
rate2 -= (rate_y + rate_uv_tokenonly[uv_tx]);
+#if CONFIG_SUPERTX
+ best_rate_nocoef = rate2;
+#endif
rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
} else {
+#if CONFIG_SUPERTX
+ best_rate_nocoef = rate2 - (rate_y + rate_uv_tokenonly[uv_tx]);
+#endif
rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
}
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
@@ -8533,6 +8558,9 @@
mbmi->mv[0].as_int = 0;
max_plane = 1;
rd_cost->rate = rate2;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = best_rate_nocoef;
+#endif
rd_cost->dist = distortion2;
rd_cost->rdcost = this_rd;
best_rd = this_rd;
@@ -8563,17 +8591,21 @@
#if CONFIG_REF_MV
const uint8_t rf_type = vp10_ref_frame_type(best_mbmode.ref_frame);
if (!comp_pred_mode) {
- if (best_mbmode.ref_mv_idx > 0 && refs[1] == NONE) {
- int idx = best_mbmode.ref_mv_idx + 1;
- int_mv cur_mv = mbmi_ext->ref_mv_stack[refs[0]][idx].this_mv;
+ int i;
+ int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2) ?
+ VPXMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2) : INT_MAX;
+
+ for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
+ int_mv cur_mv = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
lower_mv_precision(&cur_mv.as_mv, cm->allow_high_precision_mv);
- frame_mv[NEARMV][refs[0]] = cur_mv;
+ if (cur_mv.as_int == best_mbmode.mv[0].as_int) {
+ best_mbmode.mode = NEARMV;
+ best_mbmode.ref_mv_idx = i;
+ }
}
if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
best_mbmode.mode = NEARESTMV;
- else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
- best_mbmode.mode = NEARMV;
else if (best_mbmode.mv[0].as_int == 0)
best_mbmode.mode = ZEROMV;
} else {
@@ -8581,21 +8613,37 @@
const int allow_hp = cm->allow_high_precision_mv;
int_mv nearestmv[2] = { frame_mv[NEARESTMV][refs[0]],
frame_mv[NEARESTMV][refs[1]] };
-
int_mv nearmv[2] = { frame_mv[NEARMV][refs[0]],
frame_mv[NEARMV][refs[1]] };
+#if CONFIG_EXT_INTER
+ if (mbmi_ext->ref_mv_count[rf_type] > 1) {
+ nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][1].this_mv;
+ nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][1].comp_mv;
+ }
+#else
+ int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2) ?
+ VPXMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2) : INT_MAX;
+
+ for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
+ nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
+ nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
+ lower_mv_precision(&nearmv[0].as_mv, allow_hp);
+ lower_mv_precision(&nearmv[1].as_mv, allow_hp);
+
+ if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
+ nearmv[1].as_int == best_mbmode.mv[1].as_int) {
+ best_mbmode.mode = NEARMV;
+ best_mbmode.ref_mv_idx = i;
+ }
+ }
+#endif
+
if (mbmi_ext->ref_mv_count[rf_type] >= 1) {
nearestmv[0] = mbmi_ext->ref_mv_stack[rf_type][0].this_mv;
nearestmv[1] = mbmi_ext->ref_mv_stack[rf_type][0].comp_mv;
}
- if (mbmi_ext->ref_mv_count[rf_type] > 1) {
- int ref_mv_idx = best_mbmode.ref_mv_idx + 1;
- nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][ref_mv_idx].this_mv;
- nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][ref_mv_idx].comp_mv;
- }
-
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
lower_mv_precision(&nearestmv[i].as_mv, allow_hp);
lower_mv_precision(&nearmv[i].as_mv, allow_hp);
@@ -8615,9 +8663,6 @@
best_mbmode.mode = ZERO_ZEROMV;
#else
best_mbmode.mode = NEARESTMV;
- else if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
- nearmv[1].as_int == best_mbmode.mv[1].as_int)
- best_mbmode.mode = NEARMV;
else if (best_mbmode.mv[0].as_int == 0 && best_mbmode.mv[1].as_int == 0)
best_mbmode.mode = ZEROMV;
#endif // CONFIG_EXT_INTER
@@ -8734,21 +8779,6 @@
best_pred_diff[i] = best_rd - best_pred_rd[i];
}
- if (!x->skip) {
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
- if (best_filter_rd[i] == INT64_MAX)
- best_filter_diff[i] = 0;
- else
- best_filter_diff[i] = best_rd - best_filter_rd[i];
- }
- if (cm->interp_filter == SWITCHABLE)
- assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
- } else {
- vp10_zero(best_filter_diff);
- }
-
- // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
- // updating code causes PSNR loss. Need to figure out the confliction.
x->skip |= best_mode_skippable;
if (!x->skip && !x->select_tx_size) {
@@ -8772,7 +8802,7 @@
assert(best_mode_index >= 0);
store_coding_context(x, ctx, best_mode_index, best_pred_diff,
- best_filter_diff, best_mode_skippable);
+ best_mode_skippable);
if (cm->allow_screen_content_tools && pmi->palette_size[1] > 0) {
restore_uv_color_map(cpi, x);
@@ -8793,7 +8823,6 @@
const int comp_pred = 0;
int i;
int64_t best_pred_diff[REFERENCE_MODES];
- int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
vpx_prob comp_mode_p;
INTERP_FILTER best_filter = SWITCHABLE;
@@ -8878,12 +8907,11 @@
cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
vp10_zero(best_pred_diff);
- vp10_zero(best_filter_diff);
if (!x->select_tx_size)
swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
store_coding_context(x, ctx, THR_ZEROMV,
- best_pred_diff, best_filter_diff, 0);
+ best_pred_diff, 0);
}
void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
@@ -8923,8 +8951,6 @@
int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
int64_t best_pred_diff[REFERENCE_MODES];
int64_t best_pred_rd[REFERENCE_MODES];
- int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
- int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
MB_MODE_INFO best_mbmode;
int ref_index, best_ref_index = 0;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
@@ -8944,8 +8970,6 @@
b_mode_info best_bmodes[4];
int best_skip2 = 0;
int ref_frame_skip_mask[2] = { 0 };
- int64_t mask_filter = 0;
- int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
int internal_active_edge =
vp10_active_edge_sb(cpi, mi_row, mi_col) && vp10_internal_image_edge(cpi);
@@ -8969,9 +8993,6 @@
mbmi->use_wedge_interintra = 0;
#endif // CONFIG_EXT_INTER
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- filter_cache[i] = INT64_MAX;
-
for (i = 0; i < 4; i++) {
int j;
#if CONFIG_EXT_INTER
@@ -8991,8 +9012,6 @@
for (i = 0; i < REFERENCE_MODES; ++i)
best_pred_rd[i] = INT64_MAX;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
- best_filter_rd[i] = INT64_MAX;
rate_uv_intra = INT_MAX;
rd_cost->rate = INT_MAX;
@@ -9187,7 +9206,7 @@
}
#if CONFIG_VAR_TX
- mbmi->inter_tx_size[0] = mbmi->tx_size;
+ mbmi->inter_tx_size[0][0] = mbmi->tx_size;
#endif
if (ref_frame == INTRA_FRAME) {
@@ -9250,8 +9269,6 @@
#endif // CONFIG_EXT_REFS
this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- filter_cache[i] = INT64_MAX;
// TODO(any): Add search of the tx_type to improve rd performance at the
// expense of speed.
@@ -9295,14 +9312,9 @@
continue;
rs = vp10_get_switchable_rate(cpi, xd);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
- filter_cache[switchable_filter_index] = tmp_rd;
- filter_cache[SWITCHABLE_FILTERS] =
- VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
tmp_rd += rs_rd;
- mask_filter = VPXMAX(mask_filter, tmp_rd);
-
newbest = (tmp_rd < tmp_best_rd);
if (newbest) {
tmp_best_filter = mbmi->interp_filter;
@@ -9476,8 +9488,6 @@
if (!disable_skip && ref_frame == INTRA_FRAME) {
for (i = 0; i < REFERENCE_MODES; ++i)
best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
- best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
}
// Did this mode help.. i.e. is it the new best mode
@@ -9572,29 +9582,6 @@
best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
}
- /* keep record of best filter type */
- if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
- cm->interp_filter != BILINEAR) {
- int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
- SWITCHABLE_FILTERS : cm->interp_filter];
- int64_t adj_rd;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
- if (ref == INT64_MAX)
- adj_rd = 0;
- else if (filter_cache[i] == INT64_MAX)
- // when early termination is triggered, the encoder does not have
- // access to the rate-distortion cost. it only knows that the cost
- // should be above the maximum valid value. hence it takes the known
- // maximum plus an arbitrary constant as the rate-distortion cost.
- adj_rd = mask_filter - ref + 10;
- else
- adj_rd = filter_cache[i] - ref;
-
- adj_rd += this_rd;
- best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
- }
- }
-
if (early_term)
break;
@@ -9666,226 +9653,6 @@
best_pred_diff[i] = best_rd - best_pred_rd[i];
}
- if (!x->skip) {
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
- if (best_filter_rd[i] == INT64_MAX)
- best_filter_diff[i] = 0;
- else
- best_filter_diff[i] = best_rd - best_filter_rd[i];
- }
- if (cm->interp_filter == SWITCHABLE)
- assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
- } else {
- vp10_zero(best_filter_diff);
- }
-
store_coding_context(x, ctx, best_ref_index,
- best_pred_diff, best_filter_diff, 0);
+ best_pred_diff, 0);
}
-
-#if CONFIG_OBMC
-void vp10_build_prediction_by_above_preds(VP10_COMP *cpi,
- MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE],
- int tmp_stride[MAX_MB_PLANE]) {
- VP10_COMMON *const cm = &cpi->common;
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
- int i, j, mi_step, ref;
-
- if (mi_row == 0)
- return;
-
- for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
- int mi_row_offset = -1;
- int mi_col_offset = i;
- int mi_x, mi_y, bw, bh;
- MODE_INFO *above_mi = xd->mi[mi_col_offset +
- mi_row_offset * xd->mi_stride];
- MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
-
- mi_step = VPXMIN(xd->n8_w,
- num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
-
- if (!is_neighbor_overlappable(above_mbmi))
- continue;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- struct macroblockd_plane *const pd = &xd->plane[j];
- setup_pred_plane(&pd->dst,
- tmp_buf[j], tmp_stride[j],
- 0, i, NULL,
- pd->subsampling_x, pd->subsampling_y);
- }
- /*
- set_ref_ptrs(cm, xd, above_mbmi->ref_frame[0], above_mbmi->ref_frame[1]);
- for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
- YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(
- cpi, above_mbmi->ref_frame[ref]);
- assert(cfg != NULL);
- vp10_setup_pre_planes(xd, ref, cfg, mi_row, mi_col + i,
- &xd->block_refs[ref]->sf);
- }
- */
- for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
- MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
- RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- if ((!vp10_is_valid_scale(&ref_buf->sf)))
- vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i,
- &ref_buf->sf);
- }
-
- xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
- mi_x = (mi_col + i) << MI_SIZE_LOG2;
- mi_y = mi_row << MI_SIZE_LOG2;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- const struct macroblockd_plane *pd = &xd->plane[j];
- bw = (mi_step * 8) >> pd->subsampling_x;
- bh = VPXMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y,
- 4);
-
- if (above_mbmi->sb_type < BLOCK_8X8) {
- const PARTITION_TYPE bp = BLOCK_8X8 - above_mbmi->sb_type;
- const int have_vsplit = bp != PARTITION_HORZ;
- const int have_hsplit = bp != PARTITION_VERT;
- const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
- const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
- const int pw = 8 >> (have_vsplit | pd->subsampling_x);
- int x, y;
-
- for (y = 0; y < num_4x4_h; ++y)
- for (x = 0; x < num_4x4_w; ++x) {
- if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT)
- && y == 0 && !pd->subsampling_y)
- continue;
-
- build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
- y * 2 + x, bw, bh,
- 4 * x, 0, pw, bh,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
- 0, 0,
-#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
- mi_x, mi_y);
- }
- } else {
- build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
- 0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
- 0, 0,
-#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
- mi_x, mi_y);
- }
- }
- }
- xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
-}
-
-void vp10_build_prediction_by_left_preds(VP10_COMP *cpi,
- MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE],
- int tmp_stride[MAX_MB_PLANE]) {
- VP10_COMMON *const cm = &cpi->common;
- const TileInfo *const tile = &xd->tile;
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
- int i, j, mi_step, ref;
-
- if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start) ||
- (mi_col - 1) >= tile->mi_col_end)
- return;
-
- for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
- int mi_row_offset = i;
- int mi_col_offset = -1;
- int mi_x, mi_y, bw, bh;
- MODE_INFO *left_mi = xd->mi[mi_col_offset +
- mi_row_offset * xd->mi_stride];
- MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
- const int is_compound = has_second_ref(left_mbmi);
-
- mi_step = VPXMIN(xd->n8_h,
- num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
-
- if (!is_neighbor_overlappable(left_mbmi))
- continue;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- struct macroblockd_plane *const pd = &xd->plane[j];
- setup_pred_plane(&pd->dst,
- tmp_buf[j], tmp_stride[j],
- i, 0, NULL,
- pd->subsampling_x, pd->subsampling_y);
- }
- /*
- set_ref_ptrs(cm, xd, left_mbmi->ref_frame[0], left_mbmi->ref_frame[1]);
- for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) {
- YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi,
- left_mbmi->ref_frame[ref]);
- assert(cfg != NULL);
- vp10_setup_pre_planes(xd, ref, cfg, mi_row + i, mi_col,
- &xd->block_refs[ref]->sf);
- }
- */
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
- RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- if ((!vp10_is_valid_scale(&ref_buf->sf)))
- vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col,
- &ref_buf->sf);
- }
-
- xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
- mi_x = mi_col << MI_SIZE_LOG2;
- mi_y = (mi_row + i) << MI_SIZE_LOG2;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- const struct macroblockd_plane *pd = &xd->plane[j];
- bw = VPXMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x,
- 4);
- bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
-
- if (left_mbmi->sb_type < BLOCK_8X8) {
- const PARTITION_TYPE bp = BLOCK_8X8 - left_mbmi->sb_type;
- const int have_vsplit = bp != PARTITION_HORZ;
- const int have_hsplit = bp != PARTITION_VERT;
- const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
- const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
- const int ph = 8 >> (have_hsplit | pd->subsampling_y);
- int x, y;
-
- for (y = 0; y < num_4x4_h; ++y)
- for (x = 0; x < num_4x4_w; ++x) {
- if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT)
- && x == 0 && !pd->subsampling_x)
- continue;
-
- build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
- y * 2 + x, bw, bh,
- 0, 4 * y, bw, ph,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
- 0, 0,
-#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
- mi_x, mi_y);
- }
- } else {
- build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0,
- bw, bh, 0, 0, bw, bh,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
- 0, 0,
-#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
- mi_x, mi_y);
- }
- }
- }
- xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
-}
-#endif // CONFIG_OBMC
diff --git a/vp10/encoder/rdopt.h b/vp10/encoder/rdopt.h
index 174ad4d..ab57c1e 100644
--- a/vp10/encoder/rdopt.h
+++ b/vp10/encoder/rdopt.h
@@ -90,19 +90,6 @@
int use_fast_coef_casting);
#endif // CONFIG_SUPERTX
-#if CONFIG_OBMC
-void vp10_build_prediction_by_above_preds(VP10_COMP *cpi,
- MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE],
- int tmp_stride[MAX_MB_PLANE]);
-void vp10_build_prediction_by_left_preds(VP10_COMP *cpi,
- MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE],
- int tmp_stride[MAX_MB_PLANE]);
-#endif // CONFIG_OBMC
-
static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(VP10_COMP *cpi,
const int ref) {
// Use up-sampled reference frames.
diff --git a/vp10/encoder/segmentation.c b/vp10/encoder/segmentation.c
index 969b87f..477e32d 100644
--- a/vp10/encoder/segmentation.c
+++ b/vp10/encoder/segmentation.c
@@ -164,15 +164,106 @@
int mi_row, int mi_col,
BLOCK_SIZE bsize) {
const int mis = cm->mi_stride;
- int bw, bh;
const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2;
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition;
+#else
+ const int bw = num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type];
+ const int bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type];
+#endif // CONFIG_EXT_PARTITION_TYPES
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
- bw = num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type];
- bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type];
+#if CONFIG_EXT_PARTITION_TYPES
+ if (bsize == BLOCK_8X8)
+ partition = PARTITION_NONE;
+ else
+ partition = get_partition(cm->mi, cm->mi_stride, cm->mi_rows, cm->mi_cols,
+ mi_row, mi_col, bsize);
+ switch (partition) {
+ case PARTITION_NONE:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, bs, bs, mi_row, mi_col);
+ break;
+ case PARTITION_HORZ:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, bs, hbs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, bs, hbs,
+ mi_row + hbs, mi_col);
+ break;
+ case PARTITION_VERT:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs,
+ no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, hbs, bs, mi_row, mi_col + hbs);
+ break;
+ case PARTITION_HORZ_A:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, hbs, hbs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
+ mi_row, mi_col + hbs);
+ count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, bs, hbs,
+ mi_row + hbs, mi_col);
+ break;
+ case PARTITION_HORZ_B:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, bs, hbs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
+ mi_row + hbs, mi_col);
+ count_segs(cm, xd, tile, mi + hbs + hbs * mis, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
+ mi_row + hbs, mi_col + hbs);
+ break;
+ case PARTITION_VERT_A:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, hbs, hbs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
+ mi_row + hbs, mi_col);
+ count_segs(cm, xd, tile, mi + hbs,
+ no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, hbs, bs, mi_row, mi_col + hbs);
+ break;
+ case PARTITION_VERT_B:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs,
+ no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, hbs, hbs, mi_row, mi_col + hbs);
+ count_segs(cm, xd, tile, mi + hbs + hbs * mis,
+ no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, hbs, hbs, mi_row + hbs, mi_col + hbs);
+ break;
+ case PARTITION_SPLIT:
+ {
+ const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
+ int n;
+
+ assert(num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type] < bs &&
+ num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type] < bs);
+
+ for (n = 0; n < 4; n++) {
+ const int mi_dc = hbs * (n & 1);
+ const int mi_dr = hbs * (n >> 1);
+
+ count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc],
+ no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts,
+ mi_row + mi_dr, mi_col + mi_dc, subsize);
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+#else
if (bw == bs && bh == bs) {
count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts, bs, bs, mi_row, mi_col);
@@ -204,6 +295,7 @@
mi_row + mi_dr, mi_col + mi_dc, subsize);
}
}
+#endif // CONFIG_EXT_PARTITION_TYPES
}
void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c
index 169ae2c..c50b949 100644
--- a/vp10/encoder/speed_features.c
+++ b/vp10/encoder/speed_features.c
@@ -199,9 +199,6 @@
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
sf->adaptive_interp_filter_search = 1;
-#if CONFIG_EXT_TX
- sf->tx_type_search = PRUNE_THREE;
-#endif
}
if (speed >= 4) {
diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h
index 02ee204..ea4df6e 100644
--- a/vp10/encoder/speed_features.h
+++ b/vp10/encoder/speed_features.h
@@ -188,8 +188,6 @@
#if CONFIG_EXT_TX
// eliminates two tx types in each direction
PRUNE_TWO = 2,
- // eliminates three tx types in each direction
- PRUNE_THREE = 3,
#endif
} TX_TYPE_SEARCH;
diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c
index c71c985..822ccc9 100644
--- a/vp10/encoder/tokenize.c
+++ b/vp10/encoder/tokenize.c
@@ -565,11 +565,11 @@
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
- int blk_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
- (blk_col >> (1 - pd->subsampling_x));
- TX_SIZE plane_tx_size = plane ?
- get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], bsize, 0, 0) :
- mbmi->inter_tx_size[blk_idx];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ const TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0) :
+ mbmi->inter_tx_size[tx_row][tx_col];
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
@@ -693,14 +693,6 @@
vp10_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_b,
&arg);
(*t)->token = EOSB_TOKEN;
-#if CONFIG_ANS
- // TODO(aconverse): clip the number of bits in tokenize_b
- // Smuggle TX_SIZE in the unused extrabits field so the ANS encoder
- // knows the maximum number of extrabits to write at the end of the block
- // (where it starts).
- (*t)->extra = (EXTRABIT)(plane ? get_uv_tx_size(mbmi, &xd->plane[plane])
- : mbmi->tx_size);
-#endif // CONFIG_ANS
(*t)++;
}
} else {
diff --git a/vp10/encoder/x86/dct_sse2.c b/vp10/encoder/x86/dct_sse2.c
index 8ff7c9c..8a55425 100644
--- a/vp10/encoder/x86/dct_sse2.c
+++ b/vp10/encoder/x86/dct_sse2.c
@@ -172,42 +172,6 @@
transpose_4x4(in);
}
-#if CONFIG_EXT_TX
-static void fdst4_sse2(__m128i *in) {
- const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t) cospi_16_64);
- const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
- const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64);
- const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
- const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
-
- __m128i u[4], v[4];
-
- u[0] = _mm_unpacklo_epi16(in[0], in[1]);
- u[1] = _mm_unpacklo_epi16(in[3], in[2]);
-
- v[0] = _mm_add_epi16(u[0], u[1]);
- v[1] = _mm_sub_epi16(u[0], u[1]);
-
- u[0] = _mm_madd_epi16(v[0], k__cospi_p24_p08);
- u[1] = _mm_madd_epi16(v[1], k__cospi_p16_p16);
- u[2] = _mm_madd_epi16(v[0], k__cospi_p08_m24);
- u[3] = _mm_madd_epi16(v[1], k__cospi_p16_m16);
-
- v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
- v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
- v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
- v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
- u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
- u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
- u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
- u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
-
- in[0] = _mm_packs_epi32(u[0], u[2]);
- in[1] = _mm_packs_epi32(u[1], u[3]);
- transpose_4x4(in);
-}
-#endif // CONFIG_EXT_TX
-
void vp10_fht4x4_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
__m128i in[4];
@@ -265,48 +229,6 @@
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
- case DST_DST:
- load_buffer_4x4(input, in, stride, 0, 0);
- fdst4_sse2(in);
- fdst4_sse2(in);
- write_buffer_4x4(output, in);
- break;
- case DCT_DST:
- load_buffer_4x4(input, in, stride, 0, 0);
- fdct4_sse2(in);
- fdst4_sse2(in);
- write_buffer_4x4(output, in);
- break;
- case DST_DCT:
- load_buffer_4x4(input, in, stride, 0, 0);
- fdst4_sse2(in);
- fdct4_sse2(in);
- write_buffer_4x4(output, in);
- break;
- case DST_ADST:
- load_buffer_4x4(input, in, stride, 0, 0);
- fdst4_sse2(in);
- fadst4_sse2(in);
- write_buffer_4x4(output, in);
- break;
- case ADST_DST:
- load_buffer_4x4(input, in, stride, 0, 0);
- fadst4_sse2(in);
- fdst4_sse2(in);
- write_buffer_4x4(output, in);
- break;
- case DST_FLIPADST:
- load_buffer_4x4(input, in, stride, 0, 1);
- fdst4_sse2(in);
- fadst4_sse2(in);
- write_buffer_4x4(output, in);
- break;
- case FLIPADST_DST:
- load_buffer_4x4(input, in, stride, 1, 0);
- fadst4_sse2(in);
- fdst4_sse2(in);
- write_buffer_4x4(output, in);
- break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -1288,155 +1210,6 @@
array_transpose_8x8(in, in);
}
-#if CONFIG_EXT_TX
-static void fdst8_sse2(__m128i *in) {
- // Constants
- const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t) cospi_16_64);
- const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
- const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t) -cospi_16_64);
- const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
- const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
- const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
- const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
- const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
- const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64);
- const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64);
- const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
-
- __m128i s0, s1, s2, s3, s4, s5, s6, s7;
- __m128i x0, x1, x2, x3, x4, x5, x6, x7;
- __m128i t0, t1, t2, t3, t4, t5, t6, t7;
-
- s0 = _mm_sub_epi16(in[0], in[7]);
- s1 = _mm_sub_epi16(in[1], in[6]); // -s1
- s2 = _mm_sub_epi16(in[2], in[5]);
- s3 = _mm_sub_epi16(in[3], in[4]); // -s3
- s4 = _mm_add_epi16(in[3], in[4]); // -s4
- s5 = _mm_add_epi16(in[2], in[5]);
- s6 = _mm_add_epi16(in[1], in[6]); // -s6
- s7 = _mm_add_epi16(in[0], in[7]);
-
- x0 = _mm_sub_epi16(s0, s3);
- x1 = _mm_sub_epi16(s1, s2); // -x1
- x2 = _mm_add_epi16(s1, s2); // -x2
- x3 = _mm_add_epi16(s0, s3);
-
- // Interleave
- t0 = _mm_unpacklo_epi16(x0, x1);
- t1 = _mm_unpackhi_epi16(x0, x1);
- t2 = _mm_unpacklo_epi16(x2, x3);
- t3 = _mm_unpackhi_epi16(x2, x3);
-
- // Perform butterfly multiplication/addition
- x0 = _mm_madd_epi16(t0, k__cospi_p16_m16);
- x1 = _mm_madd_epi16(t1, k__cospi_p16_m16);
- x2 = _mm_madd_epi16(t0, k__cospi_p16_p16);
- x3 = _mm_madd_epi16(t1, k__cospi_p16_p16);
- x4 = _mm_madd_epi16(t2, k__cospi_m24_p08);
- x5 = _mm_madd_epi16(t3, k__cospi_m24_p08);
- x6 = _mm_madd_epi16(t2, k__cospi_p08_p24);
- x7 = _mm_madd_epi16(t3, k__cospi_p08_p24);
-
- // Rounding
- t0 = _mm_add_epi32(x0, k__DCT_CONST_ROUNDING);
- t1 = _mm_add_epi32(x1, k__DCT_CONST_ROUNDING);
- t2 = _mm_add_epi32(x2, k__DCT_CONST_ROUNDING);
- t3 = _mm_add_epi32(x3, k__DCT_CONST_ROUNDING);
- t4 = _mm_add_epi32(x4, k__DCT_CONST_ROUNDING);
- t5 = _mm_add_epi32(x5, k__DCT_CONST_ROUNDING);
- t6 = _mm_add_epi32(x6, k__DCT_CONST_ROUNDING);
- t7 = _mm_add_epi32(x7, k__DCT_CONST_ROUNDING);
- // Shift
- x0 = _mm_srai_epi32(t0, DCT_CONST_BITS);
- x1 = _mm_srai_epi32(t1, DCT_CONST_BITS);
- x2 = _mm_srai_epi32(t2, DCT_CONST_BITS);
- x3 = _mm_srai_epi32(t3, DCT_CONST_BITS);
- x4 = _mm_srai_epi32(t4, DCT_CONST_BITS);
- x5 = _mm_srai_epi32(t5, DCT_CONST_BITS);
- x6 = _mm_srai_epi32(t6, DCT_CONST_BITS);
- x7 = _mm_srai_epi32(t7, DCT_CONST_BITS);
-
- // Pack 32b integer to 16b with signed saturation
- in[7] = _mm_packs_epi32(x0, x1);
- in[5] = _mm_packs_epi32(x4, x5);
- in[3] = _mm_packs_epi32(x2, x3);
- in[1] = _mm_packs_epi32(x6, x7);
-
- // Interleave
- s0 = _mm_unpacklo_epi16(s6, s5);
- s1 = _mm_unpackhi_epi16(s6, s5);
-
- // Perform butterfly multiplication/addition
- x0 = _mm_madd_epi16(s0, k__cospi_m16_m16);
- x1 = _mm_madd_epi16(s1, k__cospi_m16_m16);
- x2 = _mm_madd_epi16(s0, k__cospi_m16_p16);
- x3 = _mm_madd_epi16(s1, k__cospi_m16_p16);
-
- // Rounding
- t0 = _mm_add_epi32(x0, k__DCT_CONST_ROUNDING);
- t1 = _mm_add_epi32(x1, k__DCT_CONST_ROUNDING);
- t2 = _mm_add_epi32(x2, k__DCT_CONST_ROUNDING);
- t3 = _mm_add_epi32(x3, k__DCT_CONST_ROUNDING);
-
- // Shift
- x0 = _mm_srai_epi32(t0, DCT_CONST_BITS);
- x1 = _mm_srai_epi32(t1, DCT_CONST_BITS);
- x2 = _mm_srai_epi32(t2, DCT_CONST_BITS);
- x3 = _mm_srai_epi32(t3, DCT_CONST_BITS);
-
- // Pack 32b integer to 16b with signed saturation
- t2 = _mm_packs_epi32(x0, x1);
- t3 = _mm_packs_epi32(x2, x3);
-
- x0 = _mm_sub_epi16(t2, s4);
- x1 = _mm_add_epi16(t2, s4); // -x1
- x2 = _mm_sub_epi16(s7, t3);
- x3 = _mm_add_epi16(s7, t3);
-
- s0 = _mm_unpacklo_epi16(x0, x3);
- s1 = _mm_unpackhi_epi16(x0, x3);
- s2 = _mm_unpacklo_epi16(x1, x2);
- s3 = _mm_unpackhi_epi16(x1, x2);
-
- t0 = _mm_madd_epi16(s0, k__cospi_p28_p04);
- t1 = _mm_madd_epi16(s1, k__cospi_p28_p04);
- t2 = _mm_madd_epi16(s2, k__cospi_m12_p20);
- t3 = _mm_madd_epi16(s3, k__cospi_m12_p20);
- t4 = _mm_madd_epi16(s2, k__cospi_p20_p12);
- t5 = _mm_madd_epi16(s3, k__cospi_p20_p12);
- t6 = _mm_madd_epi16(s0, k__cospi_m04_p28);
- t7 = _mm_madd_epi16(s1, k__cospi_m04_p28);
-
- // Rounding
- x0 = _mm_add_epi32(t0, k__DCT_CONST_ROUNDING);
- x1 = _mm_add_epi32(t1, k__DCT_CONST_ROUNDING);
- x2 = _mm_add_epi32(t2, k__DCT_CONST_ROUNDING);
- x3 = _mm_add_epi32(t3, k__DCT_CONST_ROUNDING);
- x4 = _mm_add_epi32(t4, k__DCT_CONST_ROUNDING);
- x5 = _mm_add_epi32(t5, k__DCT_CONST_ROUNDING);
- x6 = _mm_add_epi32(t6, k__DCT_CONST_ROUNDING);
- x7 = _mm_add_epi32(t7, k__DCT_CONST_ROUNDING);
- // Shift
- s0 = _mm_srai_epi32(x0, DCT_CONST_BITS);
- s1 = _mm_srai_epi32(x1, DCT_CONST_BITS);
- s2 = _mm_srai_epi32(x2, DCT_CONST_BITS);
- s3 = _mm_srai_epi32(x3, DCT_CONST_BITS);
- s4 = _mm_srai_epi32(x4, DCT_CONST_BITS);
- s5 = _mm_srai_epi32(x5, DCT_CONST_BITS);
- s6 = _mm_srai_epi32(x6, DCT_CONST_BITS);
- s7 = _mm_srai_epi32(x7, DCT_CONST_BITS);
-
- in[6] = _mm_packs_epi32(s0, s1);
- in[4] = _mm_packs_epi32(s4, s5);
- in[2] = _mm_packs_epi32(s2, s3);
- in[0] = _mm_packs_epi32(s6, s7);
-
- // coeffs: [x3 x2 x1 x0, x7 x6 x5 x4]
- // Transpose
- array_transpose_8x8(in, in);
-}
-#endif // CONFIG_EXT_TX
-
void vp10_fht8x8_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
__m128i in[8];
@@ -1502,55 +1275,6 @@
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
- case DST_DST:
- load_buffer_8x8(input, in, stride, 0, 0);
- fdst8_sse2(in);
- fdst8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
- case DCT_DST:
- load_buffer_8x8(input, in, stride, 0, 0);
- fdct8_sse2(in);
- fdst8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
- case DST_DCT:
- load_buffer_8x8(input, in, stride, 0, 0);
- fdst8_sse2(in);
- fdct8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
- case DST_ADST:
- load_buffer_8x8(input, in, stride, 0, 0);
- fdst8_sse2(in);
- fadst8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
- case ADST_DST:
- load_buffer_8x8(input, in, stride, 0, 0);
- fadst8_sse2(in);
- fdst8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
- case DST_FLIPADST:
- load_buffer_8x8(input, in, stride, 0, 1);
- fdst8_sse2(in);
- fadst8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
- case FLIPADST_DST:
- load_buffer_8x8(input, in, stride, 1, 0);
- fadst8_sse2(in);
- fdst8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -2420,351 +2144,6 @@
in[15] = _mm_sub_epi16(kZero, s[1]);
}
-#if CONFIG_EXT_TX
-static void fdst16_8col(__m128i *in) {
- const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
- const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t) cospi_16_64);
- const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
- const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
-
- const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t) -cospi_16_64);
- const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
- const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64);
- const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64);
- const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64);
- const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64);
-
- const __m128i k__cospi_m08_m24 = pair_set_epi16(-cospi_8_64, -cospi_24_64);
- const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
-
- const __m128i k__cospi_m30_p02 = pair_set_epi16(-cospi_30_64, cospi_2_64);
- const __m128i k__cospi_m14_p18 = pair_set_epi16(-cospi_14_64, cospi_18_64);
- const __m128i k__cospi_m22_p10 = pair_set_epi16(-cospi_22_64, cospi_10_64);
- const __m128i k__cospi_m06_p26 = pair_set_epi16(-cospi_6_64, cospi_26_64);
- const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64);
- const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
- const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64);
- const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
-
- const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
-
- __m128i u0, u1, u2, u3, u4, u5, u6, u7;
- __m128i v0, v1, v2, v3, v4, v5, v6, v7;
- __m128i s0, s1, s2, s3, s4, s5, s6, s7;
- __m128i x0, x1, x2, x3, t0, t1, t2, t3;
- __m128i y0, y1, y2, y3, y4, y5, y6, y7;
- __m128i w0, w1, w2, w3, w4, w5, w6, w7;
-
- // (1)
- u0 = _mm_sub_epi16(in[0], in[15]);
- v7 = _mm_add_epi16(in[0], in[15]);
-
- u1 = _mm_sub_epi16(in[1], in[14]); // -u1
- v6 = _mm_add_epi16(in[1], in[14]); // -v6
-
- u2 = _mm_sub_epi16(in[2], in[13]);
- v5 = _mm_add_epi16(in[2], in[13]);
-
- u3 = _mm_sub_epi16(in[3], in[12]); // -u3
- v4 = _mm_add_epi16(in[3], in[12]); // -v4
-
- u4 = _mm_sub_epi16(in[4], in[11]);
- v3 = _mm_add_epi16(in[4], in[11]);
-
- u5 = _mm_sub_epi16(in[5], in[10]); // -u5
- v2 = _mm_add_epi16(in[5], in[10]); // -v2
-
- u6 = _mm_sub_epi16(in[6], in[9]);
- v1 = _mm_add_epi16(in[6], in[9]);
-
- u7 = _mm_sub_epi16(in[7], in[8]); // -u7
- v0 = _mm_add_epi16(in[7], in[8]); // -v0
-
- s0 = _mm_sub_epi16(u0, u7);
- s1 = _mm_sub_epi16(u1, u6); // -s1
- s2 = _mm_sub_epi16(u2, u5);
- s3 = _mm_sub_epi16(u3, u4); // -s3
- s4 = _mm_add_epi16(u3, u4); // -s4
- s5 = _mm_add_epi16(u2, u5);
- s6 = _mm_add_epi16(u1, u6); // -s6
- s7 = _mm_add_epi16(u0, u7);
-
- x0 = _mm_sub_epi16(s0, s3);
- x1 = _mm_sub_epi16(s1, s2); // -x1
- x2 = _mm_add_epi16(s1, s2); // -x2
- x3 = _mm_add_epi16(s0, s3);
-
- y0 = _mm_unpacklo_epi16(x0, x1);
- y1 = _mm_unpackhi_epi16(x0, x1);
- y2 = _mm_unpacklo_epi16(x2, x3);
- y3 = _mm_unpackhi_epi16(x2, x3);
-
- t0 = _mm_madd_epi16(y0, k__cospi_p16_m16);
- t1 = _mm_madd_epi16(y1, k__cospi_p16_m16);
- t2 = _mm_madd_epi16(y0, k__cospi_p16_p16);
- t3 = _mm_madd_epi16(y1, k__cospi_p16_p16);
- x0 = _mm_madd_epi16(y2, k__cospi_m24_p08);
- x1 = _mm_madd_epi16(y3, k__cospi_m24_p08);
- x2 = _mm_madd_epi16(y2, k__cospi_p08_p24);
- x3 = _mm_madd_epi16(y3, k__cospi_p08_p24);
-
- y0 = _mm_add_epi32(t0, k__DCT_CONST_ROUNDING);
- y1 = _mm_add_epi32(t1, k__DCT_CONST_ROUNDING);
- y2 = _mm_add_epi32(t2, k__DCT_CONST_ROUNDING);
- y3 = _mm_add_epi32(t3, k__DCT_CONST_ROUNDING);
- y4 = _mm_add_epi32(x0, k__DCT_CONST_ROUNDING);
- y5 = _mm_add_epi32(x1, k__DCT_CONST_ROUNDING);
- y6 = _mm_add_epi32(x2, k__DCT_CONST_ROUNDING);
- y7 = _mm_add_epi32(x3, k__DCT_CONST_ROUNDING);
-
- t0 = _mm_srai_epi32(y0, DCT_CONST_BITS);
- t1 = _mm_srai_epi32(y1, DCT_CONST_BITS);
- t2 = _mm_srai_epi32(y2, DCT_CONST_BITS);
- t3 = _mm_srai_epi32(y3, DCT_CONST_BITS);
- x0 = _mm_srai_epi32(y4, DCT_CONST_BITS);
- x1 = _mm_srai_epi32(y5, DCT_CONST_BITS);
- x2 = _mm_srai_epi32(y6, DCT_CONST_BITS);
- x3 = _mm_srai_epi32(y7, DCT_CONST_BITS);
-
- in[15] = _mm_packs_epi32(t0, t1);
- in[11] = _mm_packs_epi32(x0, x1);
- in[7] = _mm_packs_epi32(t2, t3);
- in[3] = _mm_packs_epi32(x2, x3);
-
- // (2)
- t0 = _mm_unpacklo_epi16(s6, s5);
- t1 = _mm_unpackhi_epi16(s6, s5);
-
- y0 = _mm_madd_epi16(t0, k__cospi_m16_m16);
- y1 = _mm_madd_epi16(t1, k__cospi_m16_m16);
- y2 = _mm_madd_epi16(t0, k__cospi_m16_p16);
- y3 = _mm_madd_epi16(t1, k__cospi_m16_p16);
-
- x0 = _mm_add_epi32(y0, k__DCT_CONST_ROUNDING);
- x1 = _mm_add_epi32(y1, k__DCT_CONST_ROUNDING);
- x2 = _mm_add_epi32(y2, k__DCT_CONST_ROUNDING);
- x3 = _mm_add_epi32(y3, k__DCT_CONST_ROUNDING);
-
- y4 = _mm_srai_epi32(x0, DCT_CONST_BITS);
- y5 = _mm_srai_epi32(x1, DCT_CONST_BITS);
- y6 = _mm_srai_epi32(x2, DCT_CONST_BITS);
- y7 = _mm_srai_epi32(x3, DCT_CONST_BITS);
-
- t2 = _mm_packs_epi32(y4, y5);
- t3 = _mm_packs_epi32(y6, y7);
-
- x0 = _mm_sub_epi16(s4, t2); // -x0
- x1 = _mm_add_epi16(s4, t2); // -x1
- x2 = _mm_sub_epi16(s7, t3);
- x3 = _mm_add_epi16(s7, t3);
-
- y0 = _mm_unpacklo_epi16(x0, x3);
- y1 = _mm_unpackhi_epi16(x0, x3);
- y2 = _mm_unpacklo_epi16(x1, x2);
- y3 = _mm_unpackhi_epi16(x1, x2);
-
- w0 = _mm_madd_epi16(y0, k__cospi_m28_p04);
- w1 = _mm_madd_epi16(y1, k__cospi_m28_p04);
- w2 = _mm_madd_epi16(y2, k__cospi_m12_p20);
- w3 = _mm_madd_epi16(y3, k__cospi_m12_p20);
- w4 = _mm_madd_epi16(y2, k__cospi_p20_p12);
- w5 = _mm_madd_epi16(y3, k__cospi_p20_p12);
- w6 = _mm_madd_epi16(y0, k__cospi_p04_p28);
- w7 = _mm_madd_epi16(y1, k__cospi_p04_p28);
-
- u0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING);
- u1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING);
- u2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING);
- u3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING);
- u4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING);
- u5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING);
- u6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING);
- u7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING);
-
- y0 = _mm_srai_epi32(u0, DCT_CONST_BITS);
- y1 = _mm_srai_epi32(u1, DCT_CONST_BITS);
- y2 = _mm_srai_epi32(u2, DCT_CONST_BITS);
- y3 = _mm_srai_epi32(u3, DCT_CONST_BITS);
- y4 = _mm_srai_epi32(u4, DCT_CONST_BITS);
- y5 = _mm_srai_epi32(u5, DCT_CONST_BITS);
- y6 = _mm_srai_epi32(u6, DCT_CONST_BITS);
- y7 = _mm_srai_epi32(u7, DCT_CONST_BITS);
-
- in[13] = _mm_packs_epi32(y0, y1);
- in[9] = _mm_packs_epi32(y4, y5);
- in[5] = _mm_packs_epi32(y2, y3);
- in[1] = _mm_packs_epi32(y6, y7);
-
- // (3)
- y0 = _mm_unpacklo_epi16(v5, v2);
- y1 = _mm_unpackhi_epi16(v5, v2);
- y2 = _mm_unpacklo_epi16(v4, v3);
- y3 = _mm_unpackhi_epi16(v4, v3);
-
- u0 = _mm_madd_epi16(y0, k__cospi_p16_p16);
- u1 = _mm_madd_epi16(y1, k__cospi_p16_p16);
- u2 = _mm_madd_epi16(y2, k__cospi_m16_m16);
- u3 = _mm_madd_epi16(y3, k__cospi_m16_m16);
- u4 = _mm_madd_epi16(y2, k__cospi_m16_p16);
- u5 = _mm_madd_epi16(y3, k__cospi_m16_p16);
- u6 = _mm_madd_epi16(y0, k__cospi_p16_m16);
- u7 = _mm_madd_epi16(y1, k__cospi_p16_m16);
-
- w0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
- w1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
- w2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
- w3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
- w4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
- w5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
- w6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
- w7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
-
- s0 = _mm_srai_epi32(w0, DCT_CONST_BITS);
- s1 = _mm_srai_epi32(w1, DCT_CONST_BITS);
- s2 = _mm_srai_epi32(w2, DCT_CONST_BITS);
- s3 = _mm_srai_epi32(w3, DCT_CONST_BITS);
- s4 = _mm_srai_epi32(w4, DCT_CONST_BITS);
- s5 = _mm_srai_epi32(w5, DCT_CONST_BITS);
- s6 = _mm_srai_epi32(w6, DCT_CONST_BITS);
- s7 = _mm_srai_epi32(w7, DCT_CONST_BITS);
-
- y2 = _mm_packs_epi32(s0, s1);
- y3 = _mm_packs_epi32(s2, s3);
- y4 = _mm_packs_epi32(s4, s5);
- y5 = _mm_packs_epi32(s6, s7);
-
- // step 3
- w0 = _mm_sub_epi16(v0, y3); // -w0
- w1 = _mm_add_epi16(v1, y2);
- w2 = _mm_sub_epi16(v1, y2);
- w3 = _mm_add_epi16(v0, y3); // -w3
- w4 = _mm_sub_epi16(v7, y4);
- w5 = _mm_add_epi16(v6, y5); // -w5
- w6 = _mm_sub_epi16(v6, y5); // -w6
- w7 = _mm_add_epi16(v7, y4);
-
- // step 4
- x0 = _mm_unpacklo_epi16(w1, w6);
- x1 = _mm_unpackhi_epi16(w1, w6);
- x2 = _mm_unpacklo_epi16(w2, w5);
- x3 = _mm_unpackhi_epi16(w2, w5);
-
- u0 = _mm_madd_epi16(x0, k__cospi_m08_m24);
- u1 = _mm_madd_epi16(x1, k__cospi_m08_m24);
- u2 = _mm_madd_epi16(x2, k__cospi_p24_m08);
- u3 = _mm_madd_epi16(x3, k__cospi_p24_m08);
- u4 = _mm_madd_epi16(x2, k__cospi_p08_p24);
- u5 = _mm_madd_epi16(x3, k__cospi_p08_p24);
- u6 = _mm_madd_epi16(x0, k__cospi_p24_m08);
- u7 = _mm_madd_epi16(x1, k__cospi_p24_m08);
-
- s0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
- s1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
- s2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
- s3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
- s4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
- s5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
- s6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
- s7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
-
- u0 = _mm_srai_epi32(s0, DCT_CONST_BITS);
- u1 = _mm_srai_epi32(s1, DCT_CONST_BITS);
- u2 = _mm_srai_epi32(s2, DCT_CONST_BITS);
- u3 = _mm_srai_epi32(s3, DCT_CONST_BITS);
- u4 = _mm_srai_epi32(s4, DCT_CONST_BITS);
- u5 = _mm_srai_epi32(s5, DCT_CONST_BITS);
- u6 = _mm_srai_epi32(s6, DCT_CONST_BITS);
- u7 = _mm_srai_epi32(s7, DCT_CONST_BITS);
-
- y1 = _mm_packs_epi32(u0, u1);
- y2 = _mm_packs_epi32(u2, u3);
- y5 = _mm_packs_epi32(u4, u5);
- y6 = _mm_packs_epi32(u6, u7);
-
- // step 5
- v0 = _mm_sub_epi16(w0, y1); // -v0
- v1 = _mm_add_epi16(w0, y1); // -v1
- v2 = _mm_sub_epi16(w3, y2); // -v2
- v3 = _mm_add_epi16(w3, y2); // -v3
- v4 = _mm_sub_epi16(w4, y5);
- v5 = _mm_add_epi16(w4, y5);
- v6 = _mm_sub_epi16(w7, y6);
- v7 = _mm_add_epi16(w7, y6);
-
- u0 = _mm_unpacklo_epi16(v0, v7);
- u1 = _mm_unpackhi_epi16(v0, v7);
- u2 = _mm_unpacklo_epi16(v1, v6);
- u3 = _mm_unpackhi_epi16(v1, v6);
- u4 = _mm_unpacklo_epi16(v2, v5);
- u5 = _mm_unpackhi_epi16(v2, v5);
- u6 = _mm_unpacklo_epi16(v3, v4);
- u7 = _mm_unpackhi_epi16(v3, v4);
-
- s0 = _mm_madd_epi16(u0, k__cospi_m30_p02); // x0
- s1 = _mm_madd_epi16(u1, k__cospi_m30_p02);
- s2 = _mm_madd_epi16(u2, k__cospi_m14_p18); // x1
- s3 = _mm_madd_epi16(u3, k__cospi_m14_p18);
- s4 = _mm_madd_epi16(u4, k__cospi_m22_p10); // x2
- s5 = _mm_madd_epi16(u5, k__cospi_m22_p10);
- s6 = _mm_madd_epi16(u6, k__cospi_m06_p26); // x3
- s7 = _mm_madd_epi16(u7, k__cospi_m06_p26);
-
- w0 = _mm_madd_epi16(u6, k__cospi_p26_p06); // x4
- w1 = _mm_madd_epi16(u7, k__cospi_p26_p06);
- w2 = _mm_madd_epi16(u4, k__cospi_p10_p22); // x5
- w3 = _mm_madd_epi16(u5, k__cospi_p10_p22);
- w4 = _mm_madd_epi16(u2, k__cospi_p18_p14); // x6
- w5 = _mm_madd_epi16(u3, k__cospi_p18_p14);
- w6 = _mm_madd_epi16(u0, k__cospi_p02_p30); // x7
- w7 = _mm_madd_epi16(u1, k__cospi_p02_p30);
-
- v0 = _mm_add_epi32(s0, k__DCT_CONST_ROUNDING);
- v1 = _mm_add_epi32(s1, k__DCT_CONST_ROUNDING);
- v2 = _mm_add_epi32(s2, k__DCT_CONST_ROUNDING);
- v3 = _mm_add_epi32(s3, k__DCT_CONST_ROUNDING);
- v4 = _mm_add_epi32(s4, k__DCT_CONST_ROUNDING);
- v5 = _mm_add_epi32(s5, k__DCT_CONST_ROUNDING);
- v6 = _mm_add_epi32(s6, k__DCT_CONST_ROUNDING);
- v7 = _mm_add_epi32(s7, k__DCT_CONST_ROUNDING);
-
- y0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING);
- y1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING);
- y2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING);
- y3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING);
- y4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING);
- y5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING);
- y6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING);
- y7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING);
-
- u0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
- u1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
- u2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
- u3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
- u4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
- u5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
- u6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
- u7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
-
- s0 = _mm_srai_epi32(y0, DCT_CONST_BITS);
- s1 = _mm_srai_epi32(y1, DCT_CONST_BITS);
- s2 = _mm_srai_epi32(y2, DCT_CONST_BITS);
- s3 = _mm_srai_epi32(y3, DCT_CONST_BITS);
- s4 = _mm_srai_epi32(y4, DCT_CONST_BITS);
- s5 = _mm_srai_epi32(y5, DCT_CONST_BITS);
- s6 = _mm_srai_epi32(y6, DCT_CONST_BITS);
- s7 = _mm_srai_epi32(y7, DCT_CONST_BITS);
-
- in[14] = _mm_packs_epi32(u0, u1);
- in[6] = _mm_packs_epi32(u2, u3);
- in[10] = _mm_packs_epi32(u4, u5);
- in[2] = _mm_packs_epi32(u6, u7);
- in[12] = _mm_packs_epi32(s0, s1);
- in[4] = _mm_packs_epi32(s2, s3);
- in[8] = _mm_packs_epi32(s4, s5);
- in[0] = _mm_packs_epi32(s6, s7);
-}
-#endif // CONFIG_EXT_TX
-
static void fdct16_sse2(__m128i *in0, __m128i *in1) {
fdct16_8col(in0);
fdct16_8col(in1);
@@ -2777,14 +2156,6 @@
array_transpose_16x16(in0, in1);
}
-#if CONFIG_EXT_TX
-static void fdst16_sse2(__m128i *in0, __m128i *in1) {
- fdst16_8col(in0);
- fdst16_8col(in1);
- array_transpose_16x16(in0, in1);
-}
-#endif // CONFIG_EXT_TX
-
void vp10_fht16x16_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
__m128i in0[16], in1[16];
@@ -2850,55 +2221,6 @@
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
- case DST_DST:
- load_buffer_16x16(input, in0, in1, stride, 0, 0);
- fdst16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fdst16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
- case DCT_DST:
- load_buffer_16x16(input, in0, in1, stride, 0, 0);
- fdct16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fdst16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
- case DST_DCT:
- load_buffer_16x16(input, in0, in1, stride, 0, 0);
- fdst16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fdct16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
- case DST_ADST:
- load_buffer_16x16(input, in0, in1, stride, 0, 0);
- fdst16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fadst16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
- case ADST_DST:
- load_buffer_16x16(input, in0, in1, stride, 0, 0);
- fadst16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fdst16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
- case DST_FLIPADST:
- load_buffer_16x16(input, in0, in1, stride, 0, 1);
- fdst16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fadst16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
- case FLIPADST_DST:
- load_buffer_16x16(input, in0, in1, stride, 1, 0);
- fadst16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fdst16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
#endif // CONFIG_EXT_TX
default:
assert(0);
diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c
index ee1e305..24f42df 100644
--- a/vpx_dsp/variance.c
+++ b/vpx_dsp/variance.c
@@ -651,6 +651,44 @@
ref += ref_stride;
}
}
+
+void vpx_highbd_upsampled_pred_c(uint16_t *comp_pred,
+ int width, int height,
+ const uint8_t *ref8,
+ int ref_stride) {
+ int i, j;
+ int stride = ref_stride << 3;
+
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ comp_pred[j] = ref[(j << 3)];
+ }
+ comp_pred += width;
+ ref += stride;
+ }
+}
+
+void vpx_highbd_comp_avg_upsampled_pred_c(uint16_t *comp_pred,
+ const uint8_t *pred8,
+ int width, int height,
+ const uint8_t *ref8,
+ int ref_stride) {
+ int i, j;
+ int stride = ref_stride << 3;
+
+ uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ const int tmp = pred[j] + ref[(j << 3)];
+ comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
+ }
+ comp_pred += width;
+ pred += width;
+ ref += stride;
+ }
+}
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP10 && CONFIG_EXT_INTER
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index e5c002a..ced7009 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1238,6 +1238,13 @@
add_proto qw/void vpx_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
specialize qw/vpx_comp_avg_upsampled_pred sse2/;
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vpx_highbd_upsampled_pred/, "uint16_t *comp_pred, int width, int height, const uint8_t *ref8, int ref_stride";
+ specialize qw/vpx_highbd_upsampled_pred sse2/;
+ add_proto qw/void vpx_highbd_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
+ specialize qw/vpx_highbd_comp_avg_upsampled_pred sse2/;
+}
+
#
# ...
#
diff --git a/vpx_dsp/x86/highbd_variance_sse2.c b/vpx_dsp/x86/highbd_variance_sse2.c
index 81ec5db..e2b79bf 100644
--- a/vpx_dsp/x86/highbd_variance_sse2.c
+++ b/vpx_dsp/x86/highbd_variance_sse2.c
@@ -7,7 +7,11 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+
+#include <emmintrin.h> // SSE2
+
#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx_ports/mem.h"
@@ -591,3 +595,136 @@
#undef FNS
#undef FN
#endif // CONFIG_USE_X86INC
+
+void vpx_highbd_upsampled_pred_sse2(uint16_t *comp_pred,
+ int width, int height,
+ const uint8_t *ref8,
+ int ref_stride) {
+ int i, j;
+ int stride = ref_stride << 3;
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+
+ if (width >= 8) {
+ // read 8 points at one time
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j+= 8) {
+ __m128i s0 = _mm_cvtsi32_si128(*(const uint32_t *)ref);
+ __m128i s1 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 8));
+ __m128i s2 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 16));
+ __m128i s3 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 24));
+ __m128i s4 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 32));
+ __m128i s5 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 40));
+ __m128i s6 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 48));
+ __m128i s7 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 56));
+ __m128i t0, t1, t2, t3;
+
+ t0 = _mm_unpacklo_epi16(s0, s1);
+ t1 = _mm_unpacklo_epi16(s2, s3);
+ t2 = _mm_unpacklo_epi16(s4, s5);
+ t3 = _mm_unpacklo_epi16(s6, s7);
+ t0 = _mm_unpacklo_epi32(t0, t1);
+ t2 = _mm_unpacklo_epi32(t2, t3);
+ t0 = _mm_unpacklo_epi64(t0, t2);
+
+ _mm_storeu_si128((__m128i *)(comp_pred), t0);
+ comp_pred += 8;
+ ref += 64; // 8 * 8;
+ }
+ ref += stride - (width << 3);
+ }
+ } else {
+ // read 4 points at one time
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j+= 4) {
+ __m128i s0 = _mm_cvtsi32_si128(*(const uint32_t *)ref);
+ __m128i s1 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 8));
+ __m128i s2 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 16));
+ __m128i s3 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 24));
+ __m128i t0, t1;
+
+ t0 = _mm_unpacklo_epi16(s0, s1);
+ t1 = _mm_unpacklo_epi16(s2, s3);
+ t0 = _mm_unpacklo_epi32(t0, t1);
+
+ _mm_storel_epi64((__m128i *)(comp_pred), t0);
+ comp_pred += 4;
+ ref += 4 * 8;
+ }
+ ref += stride - (width << 3);
+ }
+ }
+}
+
+void vpx_highbd_comp_avg_upsampled_pred_sse2(uint16_t *comp_pred,
+ const uint8_t *pred8,
+ int width, int height,
+ const uint8_t *ref8,
+ int ref_stride) {
+ const __m128i one = _mm_set1_epi16(1);
+ int i, j;
+ int stride = ref_stride << 3;
+ uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+
+ if (width >= 8) {
+ // read 8 points at one time
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j+= 8) {
+ __m128i s0 = _mm_cvtsi32_si128(*(const uint32_t *)ref);
+ __m128i s1 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 8));
+ __m128i s2 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 16));
+ __m128i s3 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 24));
+ __m128i s4 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 32));
+ __m128i s5 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 40));
+ __m128i s6 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 48));
+ __m128i s7 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 56));
+ __m128i p0 = _mm_loadu_si128((const __m128i *)pred);
+ __m128i t0, t1, t2, t3;
+
+ t0 = _mm_unpacklo_epi16(s0, s1);
+ t1 = _mm_unpacklo_epi16(s2, s3);
+ t2 = _mm_unpacklo_epi16(s4, s5);
+ t3 = _mm_unpacklo_epi16(s6, s7);
+ t0 = _mm_unpacklo_epi32(t0, t1);
+ t2 = _mm_unpacklo_epi32(t2, t3);
+ t0 = _mm_unpacklo_epi64(t0, t2);
+
+ p0 = _mm_adds_epu16(t0, p0);
+ p0 = _mm_adds_epu16(p0, one);
+ p0 = _mm_srli_epi16(p0, 1);
+
+ _mm_storeu_si128((__m128i *)(comp_pred), p0);
+ comp_pred += 8;
+ pred += 8;
+ ref += 8 * 8;
+ }
+ ref += stride - (width << 3);
+ }
+ } else {
+ // read 4 points at one time
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j+= 4) {
+ __m128i s0 = _mm_cvtsi32_si128(*(const uint32_t *)ref);
+ __m128i s1 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 8));
+ __m128i s2 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 16));
+ __m128i s3 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 24));
+ __m128i p0 = _mm_loadl_epi64((const __m128i *)pred);
+ __m128i t0, t1;
+
+ t0 = _mm_unpacklo_epi16(s0, s1);
+ t1 = _mm_unpacklo_epi16(s2, s3);
+ t0 = _mm_unpacklo_epi32(t0, t1);
+
+ p0 = _mm_adds_epu16(t0, p0);
+ p0 = _mm_adds_epu16(p0, one);
+ p0 = _mm_srli_epi16(p0, 1);
+
+ _mm_storel_epi64((__m128i *)(comp_pred), p0);
+ comp_pred += 4;
+ pred += 4;
+ ref += 4 * 8;
+ }
+ ref += stride - (width << 3);
+ }
+ }
+}
diff --git a/vpx_dsp/x86/variance_sse2.c b/vpx_dsp/x86/variance_sse2.c
index 63fc1e6..dc51173 100644
--- a/vpx_dsp/x86/variance_sse2.c
+++ b/vpx_dsp/x86/variance_sse2.c
@@ -509,12 +509,11 @@
s2 = _mm_unpacklo_epi8(t1, s3);
s4 = _mm_unpacklo_epi8(t2, s5);
s6 = _mm_unpacklo_epi8(t3, s7);
+ s0 = _mm_unpacklo_epi32(s0, s2);
+ s4 = _mm_unpacklo_epi32(s4, s6);
+ s0 = _mm_unpacklo_epi64(s0, s4);
- *(int *)comp_pred = _mm_cvtsi128_si32(s0);
- *(int *)(comp_pred + 4) = _mm_cvtsi128_si32(s2);
- *(int *)(comp_pred + 8) = _mm_cvtsi128_si32(s4);
- *(int *)(comp_pred + 12) = _mm_cvtsi128_si32(s6);
-
+ _mm_storeu_si128((__m128i *)(comp_pred), s0);
comp_pred += 16;
ref += 16 * 8;
}
@@ -537,9 +536,9 @@
s0 = _mm_unpacklo_epi8(t0, s1);
s2 = _mm_unpacklo_epi8(t1, s3);
+ s0 = _mm_unpacklo_epi32(s0, s2);
- *(int *)comp_pred = _mm_cvtsi128_si32(s0);
- *(int *)(comp_pred + 4) = _mm_cvtsi128_si32(s2);
+ _mm_storel_epi64((__m128i *)(comp_pred), s0);
comp_pred += 8;
ref += 8 * 8;
}
@@ -558,7 +557,6 @@
s0 = _mm_unpacklo_epi8(t0, s1);
*(int *)comp_pred = _mm_cvtsi128_si32(s0);
-
comp_pred += 4;
ref += 4 * 8;
}
@@ -621,14 +619,7 @@
p1 = _mm_srli_epi16(p1, 1);
p0 = _mm_packus_epi16(p0, p1);
- *(int *)comp_pred = _mm_cvtsi128_si32(p0);
- p0 = _mm_srli_si128(p0, 4);
- *(int *)(comp_pred + 4) = _mm_cvtsi128_si32(p0);
- p0 = _mm_srli_si128(p0, 4);
- *(int *)(comp_pred + 8) = _mm_cvtsi128_si32(p0);
- p0 = _mm_srli_si128(p0, 4);
- *(int *)(comp_pred + 12) = _mm_cvtsi128_si32(p0);
-
+ _mm_storeu_si128((__m128i *)(comp_pred), p0);
comp_pred += 16;
pred += 16;
ref += 16 * 8;
@@ -662,10 +653,7 @@
p0 = _mm_srli_epi16(p0, 1);
p0 = _mm_packus_epi16(p0, zero);
- *(int *)comp_pred = _mm_cvtsi128_si32(p0);
- p0 = _mm_srli_si128(p0, 4);
- *(int *)(comp_pred + 4) = _mm_cvtsi128_si32(p0);
-
+ _mm_storel_epi64((__m128i *)(comp_pred), p0);
comp_pred += 8;
pred += 8;
ref += 8 * 8;
@@ -693,7 +681,6 @@
p0 = _mm_packus_epi16(p0, zero);
*(int *)comp_pred = _mm_cvtsi128_si32(p0);
-
comp_pred += 4;
pred += 4;
ref += 4 * 8;