Merge branch 'master' into nextgenv2
diff --git a/configure b/configure
index 24992c4..315c427 100755
--- a/configure
+++ b/configure
@@ -266,7 +266,14 @@
spatial_svc
fp_mb_stats
emulate_hardware
+ var_tx
+ ext_tx
misc_fixes
+ ext_intra
+ ext_inter
+ ext_interp
+ ext_refs
+ supertx
"
CONFIG_LIST="
dependency_tracking
diff --git a/test/test.mk b/test/test.mk
index 8d66244..face2ad 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -170,6 +170,10 @@
## VP10
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_txfm_test.h
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm1d_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm1d_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm2d_test.cc
endif # CONFIG_SHARED
diff --git a/test/vp10_fwd_txfm1d_test.cc b/test/vp10_fwd_txfm1d_test.cc
new file mode 100644
index 0000000..a39e0ef
--- /dev/null
+++ b/test/vp10_fwd_txfm1d_test.cc
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "test/vp10_txfm_test.h"
+#include "vp10/common/vp10_fwd_txfm1d.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+static int txfm_type_num = 2;
+static TYPE_TXFM txfm_type_ls[2] = {TYPE_DCT, TYPE_ADST};
+
+static int txfm_size_num = 4;
+static int txfm_size_ls[4] = {4, 8, 16, 32};
+
+static TxfmFunc fwd_txfm_func_ls[2][4] = {
+ {vp10_fdct4_new, vp10_fdct8_new, vp10_fdct16_new, vp10_fdct32_new},
+ {vp10_fadst4_new, vp10_fadst8_new, vp10_fadst16_new, vp10_fadst32_new}};
+
+// the maximum stage number of fwd/inv 1d dct/adst txfm is 12
+static int8_t cos_bit[12] = {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14};
+static int8_t range_bit[12] = {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32};
+
+TEST(vp10_fwd_txfm1d, round_shift) {
+ EXPECT_EQ(round_shift(7, 1), 3);
+ EXPECT_EQ(round_shift(-7, 1), -3);
+
+ EXPECT_EQ(round_shift(7, 2), 2);
+ EXPECT_EQ(round_shift(-7, 2), -2);
+
+ EXPECT_EQ(round_shift(8, 2), 2);
+ EXPECT_EQ(round_shift(-8, 2), -2);
+}
+
+TEST(vp10_fwd_txfm1d, get_max_bit) {
+ int max_bit = get_max_bit(8);
+ EXPECT_EQ(max_bit, 3);
+}
+
+TEST(vp10_fwd_txfm1d, half_btf) {
+ int32_t max = (1 << 15) - 1;
+ int32_t w0 = max;
+ int32_t in0 = max;
+ int32_t w1 = max;
+ int32_t in1 = max;
+ int32_t result_32 = half_btf(w0, in0, w1, in1, 0);
+ int64_t result_64 = (int64_t)w0 * (int64_t)in0 + (int64_t)w1 * (int64_t)in1;
+ EXPECT_EQ(result_32, result_64);
+}
+
+TEST(vp10_fwd_txfm1d, cospi_arr) {
+ for (int i = 0; i < 7; i++) {
+ for (int j = 0; j < 64; j++) {
+ EXPECT_EQ(cospi_arr[i][j],
+ (int32_t)round(cos(M_PI * j / 128) * (1 << (cos_bit_min + i))));
+ }
+ }
+}
+
+TEST(vp10_fwd_txfm1d, clamp_block) {
+ int16_t block[5][5] = {{7, -5, 6, -3, 9},
+ {7, -5, 6, -3, 9},
+ {7, -5, 6, -3, 9},
+ {7, -5, 6, -3, 9},
+ {7, -5, 6, -3, 9}};
+
+ int16_t ref_block[5][5] = {{7, -5, 6, -3, 9},
+ {7, -5, 6, -3, 9},
+ {7, -4, 2, -3, 9},
+ {7, -4, 2, -3, 9},
+ {7, -4, 2, -3, 9}};
+
+ int row = 2;
+ int col = 1;
+ int block_size = 3;
+ int stride = 5;
+ clamp_block(block[row] + col, block_size, stride, -4, 2);
+ for (int r = 0; r < stride; r++) {
+ for (int c = 0; c < stride; c++) {
+ EXPECT_EQ(block[r][c], ref_block[r][c]);
+ }
+ }
+}
+
+TEST(vp10_fwd_txfm1d, accuracy) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ for (int si = 0; si < txfm_size_num; ++si) {
+ int txfm_size = txfm_size_ls[si];
+ int32_t *input = new int32_t[txfm_size];
+ int32_t *output = new int32_t[txfm_size];
+ double *ref_input = new double[txfm_size];
+ double *ref_output = new double[txfm_size];
+
+ for (int ti = 0; ti < txfm_type_num; ++ti) {
+ TYPE_TXFM txfm_type = txfm_type_ls[ti];
+ TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[ti][si];
+ int max_error = 7;
+
+ const int count_test_block = 5000;
+ for (int ti = 0; ti < count_test_block; ++ti) {
+ for (int ni = 0; ni < txfm_size; ++ni) {
+ input[ni] = rnd.Rand16() % base - rnd.Rand16() % base;
+ ref_input[ni] = static_cast<double>(input[ni]);
+ }
+
+ fwd_txfm_func(input, output, cos_bit, range_bit);
+ reference_hybrid_1d(ref_input, ref_output, txfm_size, txfm_type);
+
+ for (int ni = 0; ni < txfm_size; ++ni) {
+ EXPECT_LE(
+ abs(output[ni] - static_cast<int32_t>(round(ref_output[ni]))),
+ max_error);
+ }
+ }
+ }
+
+ delete[] input;
+ delete[] output;
+ delete[] ref_input;
+ delete[] ref_output;
+ }
+}
+} // namespace
diff --git a/test/vp10_fwd_txfm2d_test.cc b/test/vp10_fwd_txfm2d_test.cc
new file mode 100644
index 0000000..e6416cc
--- /dev/null
+++ b/test/vp10_fwd_txfm2d_test.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "test/vp10_txfm_test.h"
+#include "vp10/common/vp10_fwd_txfm2d.h"
+#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+
+const int txfm_size_num = 4;
+const int txfm_size_ls[4] = {4, 8, 16, 32};
+const TXFM_2D_CFG fwd_txfm_cfg_ls[4][4] = {
+ {fwd_txfm_2d_cfg_dct_dct_4, fwd_txfm_2d_cfg_dct_adst_4,
+ fwd_txfm_2d_cfg_adst_adst_4, fwd_txfm_2d_cfg_adst_dct_4},
+ {fwd_txfm_2d_cfg_dct_dct_8, fwd_txfm_2d_cfg_dct_adst_8,
+ fwd_txfm_2d_cfg_adst_adst_8, fwd_txfm_2d_cfg_adst_dct_8},
+ {fwd_txfm_2d_cfg_dct_dct_16, fwd_txfm_2d_cfg_dct_adst_16,
+ fwd_txfm_2d_cfg_adst_adst_16, fwd_txfm_2d_cfg_adst_dct_16},
+ {fwd_txfm_2d_cfg_dct_dct_32, fwd_txfm_2d_cfg_dct_adst_32,
+ fwd_txfm_2d_cfg_adst_adst_32, fwd_txfm_2d_cfg_adst_dct_32}};
+
+const Fwd_Txfm2d_Func fwd_txfm_func_ls[4] = {
+ vp10_fwd_txfm2d_4x4, vp10_fwd_txfm2d_8x8, vp10_fwd_txfm2d_16x16,
+ vp10_fwd_txfm2d_32x32};
+
+const int txfm_type_num = 4;
+const TYPE_TXFM type_ls_0[4] = {TYPE_DCT, TYPE_DCT, TYPE_ADST, TYPE_ADST};
+const TYPE_TXFM type_ls_1[4] = {TYPE_DCT, TYPE_ADST, TYPE_ADST, TYPE_DCT};
+
+TEST(vp10_fwd_txfm2d, accuracy) {
+ for (int txfm_size_idx = 0; txfm_size_idx < txfm_size_num; ++txfm_size_idx) {
+ int txfm_size = txfm_size_ls[txfm_size_idx];
+ int sqr_txfm_size = txfm_size * txfm_size;
+ int16_t* input = new int16_t[sqr_txfm_size];
+ int32_t* output = new int32_t[sqr_txfm_size];
+ double* ref_input = new double[sqr_txfm_size];
+ double* ref_output = new double[sqr_txfm_size];
+
+ for (int txfm_type_idx = 0; txfm_type_idx < txfm_type_num;
+ ++txfm_type_idx) {
+ TXFM_2D_CFG fwd_txfm_cfg = fwd_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
+ Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx];
+ TYPE_TXFM type0 = type_ls_0[txfm_type_idx];
+ TYPE_TXFM type1 = type_ls_1[txfm_type_idx];
+ int amplify_bit =
+ fwd_txfm_cfg.shift[0] + fwd_txfm_cfg.shift[1] + fwd_txfm_cfg.shift[2];
+ double amplify_factor =
+ amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
+
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int count = 5000;
+ double avg_abs_error = 0;
+ for (int ci = 0; ci < count; ci++) {
+ for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+ input[ni] = rnd.Rand16() % base;
+ ref_input[ni] = static_cast<double>(input[ni]);
+ output[ni] = 0;
+ ref_output[ni] = 0;
+ }
+
+ fwd_txfm_func(input, output, txfm_size, &fwd_txfm_cfg, bd);
+ reference_hybrid_2d(ref_input, ref_output, txfm_size, type0, type1);
+
+ for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+ ref_output[ni] = round(ref_output[ni] * amplify_factor);
+ EXPECT_LE(fabs(output[ni] - ref_output[ni]) / amplify_factor, 30);
+ }
+ avg_abs_error += compute_avg_abs_error<int32_t, double>(
+ output, ref_output, sqr_txfm_size);
+ }
+
+ avg_abs_error /= amplify_factor;
+ avg_abs_error /= count;
+ // max_abs_avg_error comes from upper bound of avg_abs_error
+ // printf("type0: %d type1: %d txfm_size: %d accuracy_avg_abs_error:
+ // %f\n", type0, type1, txfm_size, avg_abs_error);
+ double max_abs_avg_error = 1.5;
+ EXPECT_LE(avg_abs_error, max_abs_avg_error);
+ }
+
+ delete[] input;
+ delete[] output;
+ delete[] ref_input;
+ delete[] ref_output;
+ }
+}
+
+} // anonymous namespace
diff --git a/test/vp10_inv_txfm1d_test.cc b/test/vp10_inv_txfm1d_test.cc
new file mode 100644
index 0000000..3b716c8
--- /dev/null
+++ b/test/vp10_inv_txfm1d_test.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "test/vp10_txfm_test.h"
+#include "vp10/common/vp10_fwd_txfm1d.h"
+#include "vp10/common/vp10_inv_txfm1d.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+static int txfm_type_num = 2;
+static int txfm_size_num = 4;
+static int txfm_size_ls[4] = {4, 8, 16, 32};
+
+static TxfmFunc fwd_txfm_func_ls[2][4] = {
+ {vp10_fdct4_new, vp10_fdct8_new, vp10_fdct16_new, vp10_fdct32_new},
+ {vp10_fadst4_new, vp10_fadst8_new, vp10_fadst16_new, vp10_fadst32_new}};
+
+static TxfmFunc inv_txfm_func_ls[2][4] = {
+ {vp10_idct4_new, vp10_idct8_new, vp10_idct16_new, vp10_idct32_new},
+ {vp10_iadst4_new, vp10_iadst8_new, vp10_iadst16_new, vp10_iadst32_new}};
+
+// the maximum stage number of fwd/inv 1d dct/adst txfm is 12
+static int8_t cos_bit[12] = {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14};
+static int8_t range_bit[12] = {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32};
+
+TEST(vp10_inv_txfm1d, round_trip) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ for (int si = 0; si < txfm_size_num; ++si) {
+ int txfm_size = txfm_size_ls[si];
+ int32_t *input = new int32_t[txfm_size];
+ int32_t *output = new int32_t[txfm_size];
+ int32_t *round_trip_output = new int32_t[txfm_size];
+
+ for (int ti = 0; ti < txfm_type_num; ++ti) {
+ TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[ti][si];
+ TxfmFunc inv_txfm_func = inv_txfm_func_ls[ti][si];
+ int max_error = 2;
+
+ const int count_test_block = 5000;
+ for (int ci = 0; ci < count_test_block; ++ci) {
+ for (int ni = 0; ni < txfm_size; ++ni) {
+ input[ni] = rnd.Rand16() % base - rnd.Rand16() % base;
+ }
+
+ fwd_txfm_func(input, output, cos_bit, range_bit);
+ inv_txfm_func(output, round_trip_output, cos_bit, range_bit);
+
+ for (int ni = 0; ni < txfm_size; ++ni) {
+ EXPECT_LE(abs(input[ni] - round_shift(round_trip_output[ni],
+ get_max_bit(txfm_size) - 1)),
+ max_error);
+ }
+ }
+ }
+ delete[] input;
+ delete[] output;
+ delete[] round_trip_output;
+ }
+}
+
+} // namespace
diff --git a/test/vp10_inv_txfm_test.cc b/test/vp10_inv_txfm_test.cc
index c49081e..6c0a3d2 100644
--- a/test/vp10_inv_txfm_test.cc
+++ b/test/vp10_inv_txfm_test.cc
@@ -203,7 +203,7 @@
// quantization with maximum allowed step sizes
test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336;
for (int j = 1; j < last_nonzero_; ++j)
- test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]]
+ test_coef_block1[get_scan(tx_size_, DCT_DCT, 0)->scan[j]]
= (output_ref_block[j] / 1828) * 1828;
}
@@ -265,7 +265,7 @@
max_energy_leftover = 0;
coef = 0;
}
- test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] = coef;
+ test_coef_block1[get_scan(tx_size_, DCT_DCT, 0)->scan[j]] = coef;
}
memcpy(test_coef_block2, test_coef_block1,
diff --git a/test/vp10_txfm_test.h b/test/vp10_txfm_test.h
new file mode 100644
index 0000000..967d38b
--- /dev/null
+++ b/test/vp10_txfm_test.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_TXFM_TEST_H_
+#define VP10_TXFM_TEST_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef _MSC_VER
+#define _USE_MATH_DEFINES
+#endif
+#include <math.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "vp10/common/vp10_txfm.h"
+
+typedef enum {
+ TYPE_DCT = 0,
+ TYPE_ADST,
+ TYPE_IDCT,
+ TYPE_IADST,
+ TYPE_LAST
+} TYPE_TXFM;
+
+static double invSqrt2 = 1 / pow(2, 0.5);
+
+static void reference_dct_1d(const double* in, double* out, int size) {
+ for (int k = 0; k < size; ++k) {
+ out[k] = 0;
+ for (int n = 0; n < size; ++n) {
+ out[k] += in[n] * cos(M_PI * (2 * n + 1) * k / (2 * size));
+ }
+ if (k == 0) out[k] = out[k] * invSqrt2;
+ }
+}
+
+static void reference_adst_1d(const double* in, double* out, int size) {
+ for (int k = 0; k < size; ++k) {
+ out[k] = 0;
+ for (int n = 0; n < size; ++n) {
+ out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (4 * size));
+ }
+ }
+}
+
+static void reference_hybrid_1d(double* in, double* out, int size, int type) {
+ if (type == TYPE_DCT)
+ reference_dct_1d(in, out, size);
+ else
+ reference_adst_1d(in, out, size);
+}
+
+static void reference_hybrid_2d(double* in, double* out, int size, int type0,
+ int type1) {
+ double* tempOut = new double[size * size];
+
+ for (int r = 0; r < size; r++) {
+ // out ->tempOut
+ for (int c = 0; c < size; c++) {
+ tempOut[r * size + c] = in[c * size + r];
+ }
+ }
+
+ // dct each row: in -> out
+ for (int r = 0; r < size; r++) {
+ reference_hybrid_1d(tempOut + r * size, out + r * size, size, type0);
+ }
+
+ for (int r = 0; r < size; r++) {
+ // out ->tempOut
+ for (int c = 0; c < size; c++) {
+ tempOut[r * size + c] = out[c * size + r];
+ }
+ }
+
+ for (int r = 0; r < size; r++) {
+ reference_hybrid_1d(tempOut + r * size, out + r * size, size, type1);
+ }
+ delete[] tempOut;
+}
+
+template <typename Type1, typename Type2>
+static double compute_avg_abs_error(const Type1* a, const Type2* b,
+ const int size) {
+ double error = 0;
+ for (int i = 0; i < size; i++) {
+ error += fabs(static_cast<double>(a[i]) - static_cast<double>(b[i]));
+ }
+ error = error / size;
+ return error;
+}
+
+typedef void (*TxfmFunc)(const int32_t* in, int32_t* out, const int8_t* cos_bit,
+ const int8_t* range_bit);
+
+typedef void (*Fwd_Txfm2d_Func)(const int16_t*, int32_t*, const int,
+ const TXFM_2D_CFG*, const int);
+typedef void (*Inv_Txfm2d_Func)(const int32_t*, uint16_t*, const int,
+ const TXFM_2D_CFG*, const int);
+
+static const int bd = 10;
+static const int base = (1 << bd);
+
+#endif // VP10_TXFM_TEST_H_
diff --git a/test/vp9_arf_freq_test.cc b/test/vp9_arf_freq_test.cc
index 89200d4..670529c 100644
--- a/test/vp9_arf_freq_test.cc
+++ b/test/vp9_arf_freq_test.cc
@@ -78,19 +78,19 @@
return !strcmp(dot, ".y4m");
}
-class ArfFreqTest
+class ArfFreqTestLarge
: public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWith3Params<TestVideoParam, \
TestEncodeParam, int> {
protected:
- ArfFreqTest()
+ ArfFreqTestLarge()
: EncoderTest(GET_PARAM(0)),
test_video_param_(GET_PARAM(1)),
test_encode_param_(GET_PARAM(2)),
min_arf_requested_(GET_PARAM(3)) {
}
- virtual ~ArfFreqTest() {}
+ virtual ~ArfFreqTestLarge() {}
virtual void SetUp() {
InitializeConfig();
@@ -190,7 +190,7 @@
int run_of_visible_frames_;
};
-TEST_P(ArfFreqTest, MinArfFreqTest) {
+TEST_P(ArfFreqTestLarge, MinArfFreqTest) {
cfg_.rc_target_bitrate = kBitrate;
cfg_.g_error_resilient = 0;
cfg_.g_profile = test_video_param_.profile;
@@ -225,26 +225,26 @@
}
VP9_INSTANTIATE_TEST_CASE(
- ArfFreqTest,
+ ArfFreqTestLarge,
::testing::ValuesIn(kTestVectors),
::testing::ValuesIn(kEncodeVectors),
::testing::ValuesIn(kMinArfVectors));
#if CONFIG_VP9_HIGHBITDEPTH
-# if CONFIG_VP10_ENCODER
+#if CONFIG_VP10_ENCODER
// TODO(angiebird): 25-29 fail in high bitdepth mode.
INSTANTIATE_TEST_CASE_P(
- DISABLED_VP10, ArfFreqTest,
+ DISABLED_VP10, ArfFreqTestLarge,
::testing::Combine(
::testing::Values(static_cast<const libvpx_test::CodecFactory *>(
&libvpx_test::kVP10)),
::testing::ValuesIn(kTestVectors),
::testing::ValuesIn(kEncodeVectors),
::testing::ValuesIn(kMinArfVectors)));
-# endif // CONFIG_VP10_ENCODER
+#endif // CONFIG_VP10_ENCODER
#else
VP10_INSTANTIATE_TEST_CASE(
- ArfFreqTest,
+ ArfFreqTestLarge,
::testing::ValuesIn(kTestVectors),
::testing::ValuesIn(kEncodeVectors),
::testing::ValuesIn(kMinArfVectors));
diff --git a/test/vp9_ethread_test.cc b/test/vp9_ethread_test.cc
index 63f6dfe..8ac5c33 100644
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -108,7 +108,7 @@
TEST_P(VPxEncoderThreadTest, EncoderResultTest) {
std::vector<std::string> single_thr_md5, multi_thr_md5;
- ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20);
+ ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 18);
cfg_.rc_target_bitrate = 1000;
@@ -138,5 +138,5 @@
VP10_INSTANTIATE_TEST_CASE(
VPxEncoderThreadTest,
::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood),
- ::testing::Range(1, 3));
+ ::testing::Range(1, 2));
} // namespace
diff --git a/vp10/common/alloccommon.c b/vp10/common/alloccommon.c
index 9ca86e5..364afde 100644
--- a/vp10/common/alloccommon.c
+++ b/vp10/common/alloccommon.c
@@ -97,6 +97,10 @@
cm->above_context = NULL;
vpx_free(cm->above_seg_context);
cm->above_seg_context = NULL;
+#if CONFIG_VAR_TX
+ vpx_free(cm->above_txfm_context);
+ cm->above_txfm_context = NULL;
+#endif
}
int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) {
@@ -128,6 +132,14 @@
cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc(
mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context));
if (!cm->above_seg_context) goto fail;
+
+#if CONFIG_VAR_TX
+ vpx_free(cm->above_txfm_context);
+ cm->above_txfm_context = (TXFM_CONTEXT *)vpx_calloc(
+ mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_txfm_context));
+ if (!cm->above_txfm_context) goto fail;
+#endif
+
cm->above_context_alloc_cols = cm->mi_cols;
}
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index b89d791..3b94cc5 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -38,6 +38,13 @@
FRAME_TYPES,
} FRAME_TYPE;
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+#define IsInterpolatingFilter(filter) \
+ (vp10_filter_kernels[filter][0][SUBPEL_TAPS / 2 - 1] == 128)
+#else
+#define IsInterpolatingFilter(filter) (1)
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+
static INLINE int is_inter_mode(PREDICTION_MODE mode) {
return mode >= NEARESTMV && mode <= NEWMV;
}
@@ -77,16 +84,27 @@
uint8_t palette_first_color_idx[2];
} PALETTE_MODE_INFO;
+#if CONFIG_EXT_INTRA
+typedef struct {
+ // 1: an ext intra mode is used; 0: otherwise.
+ uint8_t use_ext_intra_mode[PLANE_TYPES];
+ EXT_INTRA_MODE ext_intra_mode[PLANE_TYPES];
+} EXT_INTRA_MODE_INFO;
+#endif // CONFIG_EXT_INTRA
+
// This structure now relates to 8x8 block regions.
typedef struct {
// Common for both INTER and INTRA blocks
BLOCK_SIZE sb_type;
PREDICTION_MODE mode;
TX_SIZE tx_size;
- int8_t skip;
-#if CONFIG_MISC_FIXES
- int8_t has_no_coeffs;
+#if CONFIG_VAR_TX
+ // TODO(jingning): This effectively assigned 64 entries for each 8x8 block.
+ // Apparently it takes much more space than needed.
+ TX_SIZE inter_tx_size[64];
#endif
+ int8_t skip;
+ int8_t has_no_coeffs;
int8_t segment_id;
int8_t seg_id_predicted; // valid only when temporal_update is enabled
@@ -97,6 +115,14 @@
// Only for INTER blocks
INTERP_FILTER interp_filter;
MV_REFERENCE_FRAME ref_frame[2];
+#if CONFIG_EXT_TX
+ TX_TYPE tx_type;
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_EXT_INTRA
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+ int8_t angle_delta[2];
+#endif // CONFIG_EXT_INTRA
// TODO(slavarnway): Delete and use bmi[3].as_mv[] instead.
int_mv mv[2];
@@ -186,6 +212,8 @@
int up_available;
int left_available;
+ const vpx_prob (*partition_probs)[PARTITION_TYPES - 1];
+
/* Distance of MB away from frame edges */
int mb_to_left_edge;
int mb_to_right_edge;
@@ -206,6 +234,16 @@
PARTITION_CONTEXT *above_seg_context;
PARTITION_CONTEXT left_seg_context[8];
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT *above_txfm_context;
+ TXFM_CONTEXT *left_txfm_context;
+ TXFM_CONTEXT left_txfm_context_buffer[8];
+
+ // dimension in the unit of 8x8 block of the current block
+ int16_t n8_w, n8_h;
+ TX_SIZE max_tx_size;
+#endif
+
#if CONFIG_VP9_HIGHBITDEPTH
/* Bit depth: 8, 10, 12 */
int bd;
@@ -235,16 +273,158 @@
ADST_ADST, // TM
};
-static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd,
- int block_idx) {
+#if CONFIG_EXT_TX
+static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = {
+ 1, 17, 10, 2
+};
+static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = {
+ 1, 17,
+};
+
+#define USE_IDTX_FOR_32X32 0
+static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs,
+ int is_inter) {
+ (void) is_inter;
+ if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0;
+#if USE_IDTX_FOR_32X32
+ if (tx_size == TX_32X32) return is_inter ? 3 : 0;
+#else
+ if (tx_size == TX_32X32) return 0;
+#endif
+ return 1;
+}
+
+static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs,
+ int is_inter) {
+ const int set = get_ext_tx_set(tx_size, bs, is_inter);
+ return is_inter ? num_ext_tx_set_inter[set] : num_ext_tx_set_intra[set];
+}
+
+static const int use_intra_ext_tx_for_tx[EXT_TX_SETS_INTRA][TX_SIZES] = {
+ { 0, 0, 0, 0, }, // unused
+ { 1, 1, 1, 0, },
+};
+
+static const int use_inter_ext_tx_for_tx[EXT_TX_SETS_INTER][TX_SIZES] = {
+ { 0, 0, 0, 0, }, // unused
+ { 1, 1, 1, 0, },
+ { 0, 0, 0, 0, },
+ { 0, 0, 0, USE_IDTX_FOR_32X32, },
+};
+
+static const int ext_tx_used_intra[EXT_TX_SETS_INTRA][TX_TYPES] = {
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
+};
+
+static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = {
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, },
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, },
+};
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_EXT_INTRA
+#define ALLOW_FILTER_INTRA_MODES 1
+#define ANGLE_STEP 3
+#define MAX_ANGLE_DELTAS 3
+#define ANGLE_FAST_SEARCH 1
+
+static uint8_t mode_to_angle_map[INTRA_MODES] = {
+ 0, 90, 180, 45, 135, 111, 157, 203, 67, 0,
+};
+
+static const TX_TYPE filter_intra_mode_to_tx_type_lookup[FILTER_INTRA_MODES] = {
+ DCT_DCT, // FILTER_DC
+ ADST_DCT, // FILTER_V
+ DCT_ADST, // FILTER_H
+ DCT_DCT, // FILTER_D45
+ ADST_ADST, // FILTER_D135
+ ADST_DCT, // FILTER_D117
+ DCT_ADST, // FILTER_D153
+ DCT_ADST, // FILTER_D207
+ ADST_DCT, // FILTER_D63
+ ADST_ADST, // FILTER_TM
+};
+#endif // CONFIG_EXT_INTRA
+
+static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
+ const MACROBLOCKD *xd,
+ int block_idx, TX_SIZE tx_size) {
const MODE_INFO *const mi = xd->mi[0];
const MB_MODE_INFO *const mbmi = &mi->mbmi;
- if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] ||
- is_inter_block(mbmi) || mbmi->tx_size >= TX_32X32)
- return DCT_DCT;
+#if CONFIG_EXT_INTRA
+ if (!is_inter_block(mbmi)) {
+ const int use_ext_intra_mode_info =
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[plane_type];
+ const EXT_INTRA_MODE ext_intra_mode =
+ mbmi->ext_intra_mode_info.ext_intra_mode[plane_type];
+ const PREDICTION_MODE mode = (plane_type == PLANE_TYPE_Y) ?
+ get_y_mode(mi, block_idx) : mbmi->uv_mode;
+ if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32)
+ return DCT_DCT;
+
+#if CONFIG_EXT_TX
+ if (mbmi->sb_type >= BLOCK_8X8 && plane_type == PLANE_TYPE_Y)
+ return mbmi->tx_type;
+#endif // CONFIG_EXT_TX
+
+ if (use_ext_intra_mode_info)
+ return filter_intra_mode_to_tx_type_lookup[ext_intra_mode];
+
+ if (mode == DC_PRED) {
+ return DCT_DCT;
+ } else if (mode == TM_PRED) {
+ return ADST_ADST;
+ } else {
+ int angle = mode_to_angle_map[mode];
+ if (mbmi->sb_type >= BLOCK_8X8)
+ angle += mbmi->angle_delta[plane_type] * ANGLE_STEP;
+ assert(angle > 0 && angle < 270);
+ if (angle == 135)
+ return ADST_ADST;
+ else if (angle < 45 || angle > 225)
+ return DCT_DCT;
+ else if (angle < 135)
+ return ADST_DCT;
+ else
+ return DCT_ADST;
+ }
+ }
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_TX
+#if USE_IDTX_FOR_32X32
+ if (xd->lossless[mbmi->segment_id] || tx_size > TX_32X32 ||
+ (tx_size >= TX_32X32 && !is_inter_block(mbmi)))
+#else
+ if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32)
+#endif
+ return DCT_DCT;
+ if (mbmi->sb_type >= BLOCK_8X8) {
+ if (plane_type == PLANE_TYPE_Y)
+ return mbmi->tx_type;
+ if (is_inter_block(mbmi))
+ // UV Inter only
+ return (mbmi->tx_type == IDTX && tx_size == TX_32X32 ?
+ DCT_DCT : mbmi->tx_type);
+ }
+
+ // Sub8x8-Inter/Intra OR UV-Intra
+ if (is_inter_block(mbmi)) // Sub8x8-Inter
+ return DCT_DCT;
+ else // Sub8x8 Intra OR UV-Intra
+ return intra_mode_to_tx_type_lookup[plane_type == PLANE_TYPE_Y ?
+ get_y_mode(mi, block_idx) : mbmi->uv_mode];
+#else
+ if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] ||
+ is_inter_block(mbmi) || tx_size >= TX_32X32)
+ return DCT_DCT;
return intra_mode_to_tx_type_lookup[get_y_mode(mi, block_idx)];
+#endif // CONFIG_EXT_TX
}
void vp10_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y);
@@ -292,7 +472,6 @@
const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
foreach_transformed_block_visitor visit, void *arg);
-
void vp10_foreach_transformed_block(
const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
foreach_transformed_block_visitor visit, void *arg);
diff --git a/vp10/common/entropy.h b/vp10/common/entropy.h
index 2f93cb3..c1de3b2 100644
--- a/vp10/common/entropy.h
+++ b/vp10/common/entropy.h
@@ -22,6 +22,7 @@
#endif
#define DIFF_UPDATE_PROB 252
+#define GROUP_DIFF_UPDATE_PROB 252
// Coefficient token alphabet
#define ZERO_TOKEN 0 // 0 Extra Bits 0+0
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index 8098305..9142d6a 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -127,21 +127,6 @@
}
};
-#if !CONFIG_MISC_FIXES
-const vpx_prob vp10_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = {
- { 144, 11, 54, 157, 195, 130, 46, 58, 108 }, // y = dc
- { 118, 15, 123, 148, 131, 101, 44, 93, 131 }, // y = v
- { 113, 12, 23, 188, 226, 142, 26, 32, 125 }, // y = h
- { 120, 11, 50, 123, 163, 135, 64, 77, 103 }, // y = d45
- { 113, 9, 36, 155, 111, 157, 32, 44, 161 }, // y = d135
- { 116, 9, 55, 176, 76, 96, 37, 61, 149 }, // y = d117
- { 115, 9, 28, 141, 161, 167, 21, 25, 193 }, // y = d153
- { 120, 12, 32, 145, 195, 142, 32, 38, 86 }, // y = d207
- { 116, 12, 64, 120, 140, 125, 49, 115, 121 }, // y = d63
- { 102, 19, 66, 162, 182, 122, 35, 59, 128 } // y = tm
-};
-#endif
-
static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = {
{ 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8
{ 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16
@@ -162,32 +147,6 @@
{ 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm
};
-#if !CONFIG_MISC_FIXES
-const vpx_prob vp10_kf_partition_probs[PARTITION_CONTEXTS]
- [PARTITION_TYPES - 1] = {
- // 8x8 -> 4x4
- { 158, 97, 94 }, // a/l both not split
- { 93, 24, 99 }, // a split, l not split
- { 85, 119, 44 }, // l split, a not split
- { 62, 59, 67 }, // a/l both split
- // 16x16 -> 8x8
- { 149, 53, 53 }, // a/l both not split
- { 94, 20, 48 }, // a split, l not split
- { 83, 53, 24 }, // l split, a not split
- { 52, 18, 18 }, // a/l both split
- // 32x32 -> 16x16
- { 150, 40, 39 }, // a/l both not split
- { 78, 12, 26 }, // a split, l not split
- { 67, 33, 11 }, // l split, a not split
- { 24, 7, 5 }, // a/l both split
- // 64x64 -> 32x32
- { 174, 35, 49 }, // a/l both not split
- { 68, 11, 27 }, // a split, l not split
- { 57, 15, 9 }, // l split, a not split
- { 12, 3, 3 }, // a/l both split
-};
-#endif
-
static const vpx_prob default_partition_probs[PARTITION_CONTEXTS]
[PARTITION_TYPES - 1] = {
// 8x8 -> 4x4
@@ -742,10 +701,26 @@
ct_8x8p[0][1] = tx_count_8x8p[TX_8X8];
}
+#if CONFIG_VAR_TX
+static const vpx_prob default_txfm_partition_probs[TXFM_PARTITION_CONTEXTS] = {
+ 192, 128, 64, 192, 128, 64, 192, 128, 64,
+};
+#endif
+
static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = {
192, 128, 64
};
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
+ [SWITCHABLE_FILTERS - 1] = {
+ { 235, 192, 128},
+ { 36, 243, 208},
+ { 34, 16, 128},
+ { 36, 243, 48},
+ { 149, 160, 128},
+};
+#else
static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
[SWITCHABLE_FILTERS - 1] = {
{ 235, 162, },
@@ -753,14 +728,332 @@
{ 34, 3, },
{ 149, 144, },
};
+#endif
-#if CONFIG_MISC_FIXES
+#if CONFIG_EXT_TX
+const vpx_tree_index vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER]
+ [TREE_SIZE(TX_TYPES)] = {
+ { // ToDo(yaowu): remove used entry 0.
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ -DST_DST, 6,
+ 8, 18,
+ 10, 12,
+ -DST_DCT, -DCT_DST,
+ 14, 16,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 20, 26,
+ 22, 24,
+ -DST_ADST, -ADST_DST,
+ -DST_FLIPADST, -FLIPADST_DST,
+ 28, 30,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST,
+ }, {
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ -DST_DST, 6,
+ 8, 18,
+ 10, 12,
+ -DST_DCT, -DCT_DST,
+ 14, 16,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 20, 26,
+ 22, 24,
+ -DST_ADST, -ADST_DST,
+ -DST_FLIPADST, -FLIPADST_DST,
+ 28, 30,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST,
+ }, {
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ 6, 12,
+ 8, 10,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 14, 16,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST
+ }, {
+ -IDTX, -DCT_DCT,
+ }
+};
+
+const vpx_tree_index vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA]
+ [TREE_SIZE(TX_TYPES)] = {
+ { // ToDo(yaowu): remove unused entry 0.
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ -DST_DST, 6,
+ 8, 18,
+ 10, 12,
+ -DST_DCT, -DCT_DST,
+ 14, 16,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 20, 26,
+ 22, 24,
+ -DST_ADST, -ADST_DST,
+ -DST_FLIPADST, -FLIPADST_DST,
+ 28, 30,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST,
+ }, {
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ -DST_DST, 6,
+ 8, 18,
+ 10, 12,
+ -DST_DCT, -DCT_DST,
+ 14, 16,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 20, 26,
+ 22, 24,
+ -DST_ADST, -ADST_DST,
+ -DST_FLIPADST, -FLIPADST_DST,
+ 28, 30,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST,
+ }
+};
+
+static const vpx_prob
+default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = {
+ { // ToDo(yaowu): remove unused entry 0.
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+#if EXT_TX_SIZES == 4
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+#endif
+ }, {
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+#if EXT_TX_SIZES == 4
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+#endif
+ }, {
+ { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
+#if EXT_TX_SIZES == 4
+ { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
+#endif
+ }, {
+ { 12, },
+ { 12, },
+ { 12, },
+#if EXT_TX_SIZES == 4
+ { 12, },
+#endif
+ }
+};
+
+static const vpx_prob
+default_intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES]
+ [INTRA_MODES][TX_TYPES - 1] = {
+ { // ToDo(yaowu): remove unused entry 0.
+ {
+ { 8, 11, 24, 112, 87, 137, 127, 134,
+ 128, 86, 128, 124, 125, 133, 176, 123, },
+ { 10, 9, 39, 106, 73, 155, 163, 228,
+ 35, 62, 129, 127, 133, 114, 213, 234, },
+ { 10, 9, 14, 88, 91, 127, 151, 51,
+ 210, 89, 126, 58, 52, 116, 217, 24, },
+ { 9, 6, 29, 113, 98, 131, 149, 210,
+ 119, 60, 124, 93, 90, 143, 170, 197, },
+ { 8, 8, 38, 101, 111, 166, 167, 141,
+ 130, 105, 128, 75, 75, 118, 197, 117, },
+ { 7, 8, 39, 91, 101, 153, 166, 200,
+ 99, 77, 123, 90, 83, 144, 224, 192, },
+ { 7, 10, 26, 86, 119, 154, 130, 101,
+ 152, 91, 129, 75, 79, 137, 219, 77, },
+ { 10, 13, 20, 86, 102, 162, 112, 76,
+ 171, 86, 134, 122, 106, 124, 196, 44, },
+ { 8, 9, 33, 108, 100, 144, 148, 215,
+ 77, 60, 125, 125, 128, 126, 198, 220, },
+ { 3, 10, 29, 111, 69, 141, 204, 141,
+ 139, 93, 120, 75, 77, 163, 242, 124, },
+ }, {
+ { 2, 53, 18, 147, 96, 98, 136, 133,
+ 131, 120, 153, 163, 169, 137, 173, 124, },
+ { 4, 18, 34, 133, 54, 130, 179, 228,
+ 28, 72, 153, 164, 168, 118, 227, 239, },
+ { 4, 18, 13, 125, 72, 110, 176, 36,
+ 221, 104, 148, 75, 72, 117, 225, 19, },
+ { 8, 33, 24, 162, 113, 99, 147, 226,
+ 103, 85, 153, 143, 153, 124, 155, 210, },
+ { 2, 15, 35, 107, 127, 158, 192, 128,
+ 126, 116, 151, 95, 88, 182, 241, 119, },
+ { 3, 15, 36, 112, 100, 146, 194, 189,
+ 90, 98, 152, 99, 100, 165, 235, 175, },
+ { 3, 16, 29, 109, 103, 140, 182, 76,
+ 173, 104, 147, 82, 85, 159, 235, 70, },
+ { 9, 24, 14, 120, 86, 156, 161, 34,
+ 177, 121, 142, 128, 128, 126, 185, 37, },
+ { 5, 24, 29, 152, 98, 99, 174, 228,
+ 82, 76, 147, 149, 128, 132, 191, 225, },
+ { 2, 15, 29, 111, 77, 126, 200, 135,
+ 117, 93, 152, 96, 84, 191, 245, 135, },
+ }, {
+ { 2, 69, 13, 173, 111, 69, 137, 159,
+ 159, 146, 151, 193, 203, 131, 180, 123, },
+ { 1, 12, 33, 164, 32, 98, 204, 242,
+ 23, 99, 149, 215, 232, 110, 239, 245, },
+ { 1, 17, 9, 136, 82, 83, 171, 28,
+ 231, 128, 135, 76, 64, 118, 235, 17, },
+ { 4, 41, 17, 195, 131, 58, 161, 237,
+ 141, 97, 153, 189, 191, 117, 182, 202, },
+ { 2, 17, 36, 104, 149, 137, 217, 139,
+ 191, 119, 125, 107, 115, 223, 249, 110, },
+ { 2, 14, 24, 127, 91, 135, 219, 198,
+ 113, 91, 164, 125, 173, 211, 250, 116, },
+ { 3, 19, 24, 120, 102, 130, 209, 81,
+ 187, 95, 143, 102, 50, 190, 244, 56, },
+ { 4, 27, 10, 128, 91, 157, 181, 33,
+ 181, 150, 141, 141, 166, 114, 215, 25, },
+ { 2, 34, 27, 187, 102, 77, 210, 245,
+ 113, 107, 136, 184, 188, 121, 210, 234, },
+ { 1, 15, 22, 141, 59, 94, 208, 133,
+ 154, 95, 152, 112, 105, 191, 242, 111, },
+#if EXT_TX_SIZES == 4
+ }, {
+ { 2, 69, 13, 173, 111, 69, 137, 159,
+ 159, 146, 151, 193, 203, 131, 180, 123, },
+ { 1, 12, 33, 164, 32, 98, 204, 242,
+ 23, 99, 149, 215, 232, 110, 239, 245, },
+ { 1, 17, 9, 136, 82, 83, 171, 28,
+ 231, 128, 135, 76, 64, 118, 235, 17, },
+ { 4, 41, 17, 195, 131, 58, 161, 237,
+ 141, 97, 153, 189, 191, 117, 182, 202, },
+ { 2, 17, 36, 104, 149, 137, 217, 139,
+ 191, 119, 125, 107, 115, 223, 249, 110, },
+ { 2, 14, 24, 127, 91, 135, 219, 198,
+ 113, 91, 164, 125, 173, 211, 250, 116, },
+ { 3, 19, 24, 120, 102, 130, 209, 81,
+ 187, 95, 143, 102, 50, 190, 244, 56, },
+ { 4, 27, 10, 128, 91, 157, 181, 33,
+ 181, 150, 141, 141, 166, 114, 215, 25, },
+ { 2, 34, 27, 187, 102, 77, 210, 245,
+ 113, 107, 136, 184, 188, 121, 210, 234, },
+ { 1, 15, 22, 141, 59, 94, 208, 133,
+ 154, 95, 152, 112, 105, 191, 242, 111, },
+#endif
+ },
+ }, {
+ {
+ { 8, 11, 24, 112, 87, 137, 127, 134,
+ 128, 86, 128, 124, 125, 133, 176, 123, },
+ { 10, 9, 39, 106, 73, 155, 163, 228,
+ 35, 62, 129, 127, 133, 114, 213, 234, },
+ { 10, 9, 14, 88, 91, 127, 151, 51,
+ 210, 89, 126, 58, 52, 116, 217, 24, },
+ { 9, 6, 29, 113, 98, 131, 149, 210,
+ 119, 60, 124, 93, 90, 143, 170, 197, },
+ { 8, 8, 38, 101, 111, 166, 167, 141,
+ 130, 105, 128, 75, 75, 118, 197, 117, },
+ { 7, 8, 39, 91, 101, 153, 166, 200,
+ 99, 77, 123, 90, 83, 144, 224, 192, },
+ { 7, 10, 26, 86, 119, 154, 130, 101,
+ 152, 91, 129, 75, 79, 137, 219, 77, },
+ { 10, 13, 20, 86, 102, 162, 112, 76,
+ 171, 86, 134, 122, 106, 124, 196, 44, },
+ { 8, 9, 33, 108, 100, 144, 148, 215,
+ 77, 60, 125, 125, 128, 126, 198, 220, },
+ { 3, 10, 29, 111, 69, 141, 204, 141,
+ 139, 93, 120, 75, 77, 163, 242, 124, },
+ }, {
+ { 2, 53, 18, 147, 96, 98, 136, 133,
+ 131, 120, 153, 163, 169, 137, 173, 124, },
+ { 4, 18, 34, 133, 54, 130, 179, 228,
+ 28, 72, 153, 164, 168, 118, 227, 239, },
+ { 4, 18, 13, 125, 72, 110, 176, 36,
+ 221, 104, 148, 75, 72, 117, 225, 19, },
+ { 8, 33, 24, 162, 113, 99, 147, 226,
+ 103, 85, 153, 143, 153, 124, 155, 210, },
+ { 2, 15, 35, 107, 127, 158, 192, 128,
+ 126, 116, 151, 95, 88, 182, 241, 119, },
+ { 3, 15, 36, 112, 100, 146, 194, 189,
+ 90, 98, 152, 99, 100, 165, 235, 175, },
+ { 3, 16, 29, 109, 103, 140, 182, 76,
+ 173, 104, 147, 82, 85, 159, 235, 70, },
+ { 9, 24, 14, 120, 86, 156, 161, 34,
+ 177, 121, 142, 128, 128, 126, 185, 37, },
+ { 5, 24, 29, 152, 98, 99, 174, 228,
+ 82, 76, 147, 149, 128, 132, 191, 225, },
+ { 2, 15, 29, 111, 77, 126, 200, 135,
+ 117, 93, 152, 96, 84, 191, 245, 135, },
+ }, {
+ { 2, 69, 13, 173, 111, 69, 137, 159,
+ 159, 146, 151, 193, 203, 131, 180, 123, },
+ { 1, 12, 33, 164, 32, 98, 204, 242,
+ 23, 99, 149, 215, 232, 110, 239, 245, },
+ { 1, 17, 9, 136, 82, 83, 171, 28,
+ 231, 128, 135, 76, 64, 118, 235, 17, },
+ { 4, 41, 17, 195, 131, 58, 161, 237,
+ 141, 97, 153, 189, 191, 117, 182, 202, },
+ { 2, 17, 36, 104, 149, 137, 217, 139,
+ 191, 119, 125, 107, 115, 223, 249, 110, },
+ { 2, 14, 24, 127, 91, 135, 219, 198,
+ 113, 91, 164, 125, 173, 211, 250, 116, },
+ { 3, 19, 24, 120, 102, 130, 209, 81,
+ 187, 95, 143, 102, 50, 190, 244, 56, },
+ { 4, 27, 10, 128, 91, 157, 181, 33,
+ 181, 150, 141, 141, 166, 114, 215, 25, },
+ { 2, 34, 27, 187, 102, 77, 210, 245,
+ 113, 107, 136, 184, 188, 121, 210, 234, },
+ { 1, 15, 22, 141, 59, 94, 208, 133,
+ 154, 95, 152, 112, 105, 191, 242, 111, },
+#if EXT_TX_SIZES == 4
+ }, {
+ { 2, 69, 13, 173, 111, 69, 137, 159,
+ 159, 146, 151, 193, 203, 131, 180, 123, },
+ { 1, 12, 33, 164, 32, 98, 204, 242,
+ 23, 99, 149, 215, 232, 110, 239, 245, },
+ { 1, 17, 9, 136, 82, 83, 171, 28,
+ 231, 128, 135, 76, 64, 118, 235, 17, },
+ { 4, 41, 17, 195, 131, 58, 161, 237,
+ 141, 97, 153, 189, 191, 117, 182, 202, },
+ { 2, 17, 36, 104, 149, 137, 217, 139,
+ 191, 119, 125, 107, 115, 223, 249, 110, },
+ { 2, 14, 24, 127, 91, 135, 219, 198,
+ 113, 91, 164, 125, 173, 211, 250, 116, },
+ { 3, 19, 24, 120, 102, 130, 209, 81,
+ 187, 95, 143, 102, 50, 190, 244, 56, },
+ { 4, 27, 10, 128, 91, 157, 181, 33,
+ 181, 150, 141, 141, 166, 114, 215, 25, },
+ { 2, 34, 27, 187, 102, 77, 210, 245,
+ 113, 107, 136, 184, 188, 121, 210, 234, },
+ { 1, 15, 22, 141, 59, 94, 208, 133,
+ 154, 95, 152, 112, 105, 191, 242, 111, },
+#endif
+ },
+ }
+};
+#endif // CONFIG_EXT_TX
+
// FIXME(someone) need real defaults here
static const struct segmentation_probs default_seg_probs = {
{ 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128 },
};
-#endif
+
+#if CONFIG_EXT_INTRA
+static const vpx_prob default_ext_intra_probs[2] = {230, 230};
+#endif // CONFIG_EXT_INTRA
static void init_mode_probs(FRAME_CONTEXT *fc) {
vp10_copy(fc->uv_mode_prob, default_uv_probs);
@@ -772,19 +1065,36 @@
vp10_copy(fc->comp_ref_prob, default_comp_ref_p);
vp10_copy(fc->single_ref_prob, default_single_ref_p);
fc->tx_probs = default_tx_probs;
+#if CONFIG_VAR_TX
+ vp10_copy(fc->txfm_partition_prob, default_txfm_partition_probs);
+#endif
vp10_copy(fc->skip_probs, default_skip_probs);
vp10_copy(fc->inter_mode_probs, default_inter_mode_probs);
-#if CONFIG_MISC_FIXES
+#if CONFIG_EXT_TX
+ vp10_copy(fc->inter_ext_tx_prob, default_inter_ext_tx_prob);
+ vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob);
+#endif // CONFIG_EXT_TX
vp10_copy(fc->seg.tree_probs, default_seg_probs.tree_probs);
vp10_copy(fc->seg.pred_probs, default_seg_probs.pred_probs);
-#endif
+#if CONFIG_EXT_INTRA
+ vp10_copy(fc->ext_intra_probs, default_ext_intra_probs);
+#endif // CONFIG_EXT_INTRA
}
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
const vpx_tree_index vp10_switchable_interp_tree
- [TREE_SIZE(SWITCHABLE_FILTERS)] = {
+[TREE_SIZE(SWITCHABLE_FILTERS)] = {
+ -EIGHTTAP, 2,
+ 4, -EIGHTTAP_SHARP,
+ -EIGHTTAP_SMOOTH, -EIGHTTAP_SMOOTH2,
+};
+#else
+const vpx_tree_index vp10_switchable_interp_tree
+[TREE_SIZE(SWITCHABLE_FILTERS)] = {
-EIGHTTAP, 2,
-EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP
};
+#endif // CONFIG_EXT_INTERP
void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) {
int i, j;
@@ -814,16 +1124,6 @@
vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->y_mode_prob[i],
counts->y_mode[i], fc->y_mode_prob[i]);
-#if !CONFIG_MISC_FIXES
- for (i = 0; i < INTRA_MODES; ++i)
- vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->uv_mode_prob[i],
- counts->uv_mode[i], fc->uv_mode_prob[i]);
-
- for (i = 0; i < PARTITION_CONTEXTS; i++)
- vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[i],
- counts->partition[i], fc->partition_prob[i]);
-#endif
-
if (cm->interp_filter == SWITCHABLE) {
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
vpx_tree_merge_probs(vp10_switchable_interp_tree,
@@ -863,11 +1163,42 @@
}
}
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT)
+ for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
+ fc->txfm_partition_prob[i] =
+ mode_mv_merge_probs(pre_fc->txfm_partition_prob[i],
+ counts->txfm_partition[i]);
+#endif
+
for (i = 0; i < SKIP_CONTEXTS; ++i)
fc->skip_probs[i] = mode_mv_merge_probs(
pre_fc->skip_probs[i], counts->skip[i]);
-#if CONFIG_MISC_FIXES
+#if CONFIG_EXT_TX
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ if (use_inter_ext_tx_for_tx[s][i]) {
+ vpx_tree_merge_probs(vp10_ext_tx_inter_tree[s],
+ pre_fc->inter_ext_tx_prob[s][i],
+ counts->inter_ext_tx[s][i],
+ fc->inter_ext_tx_prob[s][i]);
+ }
+ }
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ if (use_intra_ext_tx_for_tx[s][i]) {
+ int j;
+ for (j = 0; j < INTRA_MODES; ++j)
+ vpx_tree_merge_probs(vp10_ext_tx_intra_tree[s],
+ pre_fc->intra_ext_tx_prob[s][i][j],
+ counts->intra_ext_tx[s][i][j],
+ fc->intra_ext_tx_prob[s][i][j]);
+ }
+ }
+ }
+#endif // CONFIG_EXT_TX
+
if (cm->seg.temporal_update) {
for (i = 0; i < PREDICTION_PROBS; i++)
fc->seg.pred_probs[i] = mode_mv_merge_probs(pre_fc->seg.pred_probs[i],
@@ -887,7 +1218,13 @@
for (i = 0; i < PARTITION_CONTEXTS; i++)
vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[i],
counts->partition[i], fc->partition_prob[i]);
-#endif
+
+#if CONFIG_EXT_INTRA
+ for (i = 0; i < PLANE_TYPES; ++i) {
+ fc->ext_intra_probs[i] = mode_mv_merge_probs(
+ pre_fc->ext_intra_probs[i], counts->ext_intra[i]);
+ }
+#endif // CONFIG_EXT_INTRA
}
static void set_default_lf_deltas(struct loopfilter *lf) {
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index c9b667b..0d2ed50 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h
@@ -32,7 +32,6 @@
#define PALETTE_BLOCK_SIZES (BLOCK_64X64 - BLOCK_8X8 + 1)
#define PALETTE_Y_MODE_CONTEXTS 3
-
struct VP10Common;
struct tx_probs {
@@ -67,12 +66,21 @@
vpx_prob single_ref_prob[REF_CONTEXTS][2];
vpx_prob comp_ref_prob[REF_CONTEXTS];
struct tx_probs tx_probs;
+#if CONFIG_VAR_TX
+ vpx_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
+#endif
vpx_prob skip_probs[SKIP_CONTEXTS];
nmv_context nmvc;
-#if CONFIG_MISC_FIXES
- struct segmentation_probs seg;
-#endif
int initialized;
+#if CONFIG_EXT_TX
+ vpx_prob inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1];
+ vpx_prob intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
+ [TX_TYPES - 1];
+#endif // CONFIG_EXT_TX
+ struct segmentation_probs seg;
+#if CONFIG_EXT_INTRA
+ vpx_prob ext_intra_probs[PLANE_TYPES];
+#endif // CONFIG_EXT_INTRA
} FRAME_CONTEXT;
typedef struct FRAME_COUNTS {
@@ -91,20 +99,24 @@
unsigned int single_ref[REF_CONTEXTS][2][2];
unsigned int comp_ref[REF_CONTEXTS][2];
struct tx_counts tx;
+#if CONFIG_VAR_TX
+ unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
+#endif
unsigned int skip[SKIP_CONTEXTS][2];
nmv_context_counts mv;
-#if CONFIG_MISC_FIXES
+#if CONFIG_EXT_TX
+ unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
+ unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
+ [TX_TYPES];
+#endif // CONFIG_EXT_TX
struct seg_counts seg;
-#endif
+#if CONFIG_EXT_INTRA
+ unsigned int ext_intra[PLANE_TYPES][2];
+#endif // CONFIG_EXT_INTRA
} FRAME_COUNTS;
extern const vpx_prob vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
[INTRA_MODES - 1];
-#if !CONFIG_MISC_FIXES
-extern const vpx_prob vp10_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
-extern const vpx_prob vp10_kf_partition_probs[PARTITION_CONTEXTS]
- [PARTITION_TYPES - 1];
-#endif
extern const vpx_prob
vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS];
extern const vpx_prob
@@ -138,6 +150,13 @@
void vp10_tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
unsigned int (*ct_8x8p)[2]);
+#if CONFIG_EXT_TX
+extern const vpx_tree_index
+ vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER][TREE_SIZE(TX_TYPES)];
+extern const vpx_tree_index
+ vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA][TREE_SIZE(TX_TYPES)];
+#endif // CONFIG_EXT_TX
+
static INLINE int vp10_ceil_log2(int n) {
int i = 1, p = 2;
while (p < n) {
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index 5f67e30..3f9395e 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -94,9 +94,33 @@
ADST_DCT = 1, // ADST in vertical, DCT in horizontal
DCT_ADST = 2, // DCT in vertical, ADST in horizontal
ADST_ADST = 3, // ADST in both directions
- TX_TYPES = 4
+#if CONFIG_EXT_TX
+ FLIPADST_DCT = 4,
+ DCT_FLIPADST = 5,
+ FLIPADST_FLIPADST = 6,
+ ADST_FLIPADST = 7,
+ FLIPADST_ADST = 8,
+ DST_DCT = 9,
+ DCT_DST = 10,
+ DST_ADST = 11,
+ ADST_DST = 12,
+ DST_FLIPADST = 13,
+ FLIPADST_DST = 14,
+ DST_DST = 15,
+ IDTX = 16,
+#endif // CONFIG_EXT_TX
+ TX_TYPES,
} TX_TYPE;
+#if CONFIG_EXT_TX
+#define EXT_TX_SIZES 3 // number of sizes that use extended transforms
+#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER
+#define EXT_TX_SETS_INTRA 2 // Sets of transform selections for INTRA
+
+#define TX_TYPES_10 10
+#define TX_TYPES_2 2
+#endif // CONFIG_EXT_TX
+
typedef enum {
VP9_LAST_FLAG = 1 << 0,
VP9_GOLD_FLAG = 1 << 1,
@@ -132,6 +156,24 @@
PALETTE_COLORS
} PALETTE_COLOR;
+#if CONFIG_EXT_INTRA
+typedef enum {
+ FILTER_DC_PRED,
+ FILTER_V_PRED,
+ FILTER_H_PRED,
+ FILTER_D45_PRED,
+ FILTER_D135_PRED,
+ FILTER_D117_PRED,
+ FILTER_D153_PRED,
+ FILTER_D207_PRED,
+ FILTER_D63_PRED,
+ FILTER_TM_PRED,
+ EXT_INTRA_MODES,
+} EXT_INTRA_MODE;
+
+#define FILTER_INTRA_MODES (FILTER_TM_PRED + 1)
+#endif // CONFIG_EXT_INTRA
+
#define DC_PRED 0 // Average of above and left pixels
#define V_PRED 1 // Vertical
#define H_PRED 2 // Horizontal
@@ -163,6 +205,11 @@
#define COMP_INTER_CONTEXTS 5
#define REF_CONTEXTS 5
+#if CONFIG_VAR_TX
+#define TXFM_PARTITION_CONTEXTS 9
+typedef TX_SIZE TXFM_CONTEXT;
+#endif
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/filter.c b/vp10/common/filter.c
index dda279f..a9225b6 100644
--- a/vp10/common/filter.c
+++ b/vp10/common/filter.c
@@ -32,9 +32,28 @@
{ 0, 0, 0, 8, 120, 0, 0, 0 }
};
-// Lagrangian interpolation filter
DECLARE_ALIGNED(256, static const InterpKernel,
sub_pel_filters_8[SUBPEL_SHIFTS]) = {
+#if CONFIG_EXT_INTERP
+ // intfilt 0.575
+ {0, 0, 0, 128, 0, 0, 0, 0},
+ {0, 1, -5, 126, 8, -3, 1, 0},
+ {-1, 3, -10, 123, 18, -6, 2, -1},
+ {-1, 4, -14, 118, 27, -9, 3, 0},
+ {-1, 5, -16, 112, 37, -12, 4, -1},
+ {-1, 5, -18, 105, 48, -14, 4, -1},
+ {-1, 6, -19, 97, 58, -17, 5, -1},
+ {-1, 6, -20, 88, 68, -18, 6, -1},
+ {-1, 6, -19, 78, 78, -19, 6, -1},
+ {-1, 6, -18, 68, 88, -20, 6, -1},
+ {-1, 5, -17, 58, 97, -19, 6, -1},
+ {-1, 4, -14, 48, 105, -18, 5, -1},
+ {-1, 4, -12, 37, 112, -16, 5, -1},
+ {0, 3, -9, 27, 118, -14, 4, -1},
+ {-1, 2, -6, 18, 123, -10, 3, -1},
+ {0, 1, -3, 8, 126, -5, 1, 0},
+#else
+ // Lagrangian interpolation filter
{ 0, 0, 0, 128, 0, 0, 0, 0},
{ 0, 1, -5, 126, 8, -3, 1, 0},
{ -1, 3, -10, 122, 18, -6, 2, 0},
@@ -51,11 +70,31 @@
{ -1, 3, -9, 27, 118, -13, 4, -1},
{ 0, 2, -6, 18, 122, -10, 3, -1},
{ 0, 1, -3, 8, 126, -5, 1, 0}
+#endif // CONFIG_EXT_INTERP
};
-// DCT based filter
DECLARE_ALIGNED(256, static const InterpKernel,
- sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
+ sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
+#if CONFIG_EXT_INTERP
+ // intfilt 0.8
+ {0, 0, 0, 128, 0, 0, 0, 0},
+ {-1, 2, -6, 127, 9, -4, 2, -1},
+ {-2, 5, -12, 124, 18, -7, 4, -2},
+ {-2, 7, -16, 119, 28, -11, 5, -2},
+ {-3, 8, -19, 114, 38, -14, 7, -3},
+ {-3, 9, -22, 107, 49, -17, 8, -3},
+ {-4, 10, -23, 99, 60, -20, 10, -4},
+ {-4, 11, -23, 90, 70, -22, 10, -4},
+ {-4, 11, -23, 80, 80, -23, 11, -4},
+ {-4, 10, -22, 70, 90, -23, 11, -4},
+ {-4, 10, -20, 60, 99, -23, 10, -4},
+ {-3, 8, -17, 49, 107, -22, 9, -3},
+ {-3, 7, -14, 38, 114, -19, 8, -3},
+ {-2, 5, -11, 28, 119, -16, 7, -2},
+ {-2, 4, -7, 18, 124, -12, 5, -2},
+ {-1, 2, -4, 9, 127, -6, 2, -1},
+#else
+ // DCT based filter
{0, 0, 0, 128, 0, 0, 0, 0},
{-1, 3, -7, 127, 8, -3, 1, 0},
{-2, 5, -13, 125, 17, -6, 3, -1},
@@ -72,11 +111,58 @@
{-2, 5, -10, 27, 121, -17, 7, -3},
{-1, 3, -6, 17, 125, -13, 5, -2},
{0, 1, -3, 8, 127, -7, 3, -1}
+#endif // CONFIG_EXT_INTERP
};
-// freqmultiplier = 0.5
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+
DECLARE_ALIGNED(256, static const InterpKernel,
- sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
+ sub_pel_filters_8smooth2[SUBPEL_SHIFTS]) = {
+// freqmultiplier = 0.35
+ {0, 0, 0, 128, 0, 0, 0, 0},
+ {-1, 8, 31, 47, 34, 10, 0, -1},
+ {-1, 7, 29, 46, 36, 12, 0, -1},
+ {-1, 6, 28, 46, 37, 13, 0, -1},
+ {-1, 5, 26, 46, 38, 14, 1, -1},
+ {-1, 4, 25, 45, 39, 16, 1, -1},
+ {-1, 4, 23, 44, 41, 17, 1, -1},
+ {-1, 3, 21, 44, 42, 18, 2, -1},
+ {-1, 2, 20, 43, 43, 20, 2, -1},
+ {-1, 2, 18, 42, 44, 21, 3, -1},
+ {-1, 1, 17, 41, 44, 23, 4, -1},
+ {-1, 1, 16, 39, 45, 25, 4, -1},
+ {-1, 1, 14, 38, 46, 26, 5, -1},
+ {-1, 0, 13, 37, 46, 28, 6, -1},
+ {-1, 0, 12, 36, 46, 29, 7, -1},
+ {-1, 0, 10, 34, 47, 31, 8, -1},
+};
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
+// freqmultiplier = 0.75
+ {0, 0, 0, 128, 0, 0, 0, 0},
+ {2, -10, 19, 95, 31, -11, 2, 0},
+ {2, -9, 14, 94, 37, -12, 2, 0},
+ {2, -8, 9, 92, 43, -12, 1, 1},
+ {2, -7, 5, 90, 49, -12, 1, 0},
+ {2, -5, 1, 86, 55, -12, 0, 1},
+ {1, -4, -2, 82, 61, -11, 0, 1},
+ {1, -3, -5, 77, 67, -9, -1, 1},
+ {1, -2, -7, 72, 72, -7, -2, 1},
+ {1, -1, -9, 67, 77, -5, -3, 1},
+ {1, 0, -11, 61, 82, -2, -4, 1},
+ {1, 0, -12, 55, 86, 1, -5, 2},
+ {0, 1, -12, 49, 90, 5, -7, 2},
+ {1, 1, -12, 43, 92, 9, -8, 2},
+ {0, 2, -12, 37, 94, 14, -9, 2},
+ {0, 2, -11, 31, 95, 19, -10, 2},
+};
+
+#else
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
+// freqmultiplier = 0.5
{ 0, 0, 0, 128, 0, 0, 0, 0},
{-3, -1, 32, 64, 38, 1, -3, 0},
{-2, -2, 29, 63, 41, 2, -3, 0},
@@ -95,10 +181,14 @@
{ 0, -3, 1, 38, 64, 32, -1, -3}
};
+#endif // CONFIG_EXT_INTERP
-const InterpKernel *vp10_filter_kernels[4] = {
+const InterpKernel *vp10_filter_kernels[SWITCHABLE_FILTERS + 1] = {
sub_pel_filters_8,
- sub_pel_filters_8lp,
- sub_pel_filters_8s,
+ sub_pel_filters_8smooth,
+ sub_pel_filters_8sharp,
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+ sub_pel_filters_8smooth2,
+#endif
bilinear_filters
};
diff --git a/vp10/common/filter.h b/vp10/common/filter.h
index 826cd03..de26b76 100644
--- a/vp10/common/filter.h
+++ b/vp10/common/filter.h
@@ -24,16 +24,24 @@
#define EIGHTTAP 0
#define EIGHTTAP_SMOOTH 1
#define EIGHTTAP_SHARP 2
+
+#if CONFIG_EXT_INTERP
+#define SUPPORT_NONINTERPOLATING_FILTERS 0 /* turn it on for experimentation */
+#define EIGHTTAP_SMOOTH2 3
+#define SWITCHABLE_FILTERS 4 /* Number of switchable filters */
+#else
#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */
-#define BILINEAR 3
+#endif // CONFIG_EXT_INTERP
// The codec can operate in four possible inter prediction filter mode:
// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
+
+#define BILINEAR (SWITCHABLE_FILTERS)
+#define SWITCHABLE (SWITCHABLE_FILTERS + 1) /* the last one */
#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
-#define SWITCHABLE 4 /* should be the last one */
typedef uint8_t INTERP_FILTER;
-extern const InterpKernel *vp10_filter_kernels[4];
+extern const InterpKernel *vp10_filter_kernels[SWITCHABLE_FILTERS + 1];
#ifdef __cplusplus
} // extern "C"
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index 5ee15c8..3b806dd 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -13,107 +13,614 @@
#include "./vp10_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vp10/common/blockd.h"
+#include "vp10/common/enums.h"
#include "vp10/common/idct.h"
#include "vpx_dsp/inv_txfm.h"
#include "vpx_ports/mem.h"
+#if CONFIG_EXT_TX
+void idst4_c(const tran_low_t *input, tran_low_t *output) {
+ // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 141124871, 228344838,
+ };
+ int64_t sum;
+ int64_t s03 = (input[0] + input[3]);
+ int64_t d03 = (input[0] - input[3]);
+ int64_t s12 = (input[1] + input[2]);
+ int64_t d12 = (input[1] - input[2]);
+ sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+}
+
+void idst8_c(const tran_low_t *input, tran_low_t *output) {
+ // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
+ static const int32_t sinvalue_lookup[] = {
+ 86559612, 162678858, 219176632, 249238470
+ };
+ int64_t sum;
+ int64_t s07 = (input[0] + input[7]);
+ int64_t d07 = (input[0] - input[7]);
+ int64_t s16 = (input[1] + input[6]);
+ int64_t d16 = (input[1] - input[6]);
+ int64_t s25 = (input[2] + input[5]);
+ int64_t d25 = (input[2] - input[5]);
+ int64_t s34 = (input[3] + input[4]);
+ int64_t d34 = (input[3] - input[4]);
+ sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
+ d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = (s07 + s16 - s34)* sinvalue_lookup[2];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = (d07 - d16 + d34)* sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
+ s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+}
+
+void idst16_c(const tran_low_t *input, tran_low_t *output) {
+ // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 47852167, 94074787, 137093803, 175444254,
+ 207820161, 233119001, 250479254, 259309736
+ };
+ int64_t sum;
+ int64_t s015 = (input[0] + input[15]);
+ int64_t d015 = (input[0] - input[15]);
+ int64_t s114 = (input[1] + input[14]);
+ int64_t d114 = (input[1] - input[14]);
+ int64_t s213 = (input[2] + input[13]);
+ int64_t d213 = (input[2] - input[13]);
+ int64_t s312 = (input[3] + input[12]);
+ int64_t d312 = (input[3] - input[12]);
+ int64_t s411 = (input[4] + input[11]);
+ int64_t d411 = (input[4] - input[11]);
+ int64_t s510 = (input[5] + input[10]);
+ int64_t d510 = (input[5] - input[10]);
+ int64_t s69 = (input[6] + input[9]);
+ int64_t d69 = (input[6] - input[9]);
+ int64_t s78 = (input[7] + input[8]);
+ int64_t d78 = (input[7] - input[8]);
+ sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
+ s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
+ s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
+ s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
+ d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
+ d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
+ d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
+ s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
+ s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
+ s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
+ d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
+ d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
+ d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
+ s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
+ s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
+ s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
+ d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
+ d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
+ d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
+ s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
+ s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
+ s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
+ d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
+ d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
+ d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
+ s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
+ s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
+ s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
+ output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
+ d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
+ d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
+ d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
+ output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
+ s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
+ s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
+ s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
+ output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
+ d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
+ d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
+ d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
+ output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
+ s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
+ s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
+ s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
+ output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
+ d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
+ d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
+ d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
+ output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
+ s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
+ s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
+ s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
+ output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
+ d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
+ d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
+ d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
+ output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+}
+
+// Inverse identiy transform and add.
+static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int bs) {
+ int r, c;
+ const int shift = bs < 32 ? 3 : 2;
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c)
+ dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
+ dest += stride;
+ input += bs;
+ }
+}
+
+#define FLIPUD_PTR(dest, stride, size) do { \
+ (dest) = (dest) + ((size) - 1) * (stride); \
+ (stride) = - (stride); \
+} while (0)
+
+static void maybe_flip_strides(uint8_t **dst, int *dstride,
+ tran_low_t **src, int *sstride,
+ int tx_type, int size) {
+ // Note that the transpose of src will be added to dst. In order to LR
+ // flip the addends (in dst coordinates), we UD flip the src. To UD flip
+ // the addends, we UD flip the dst.
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ break;
+ case FLIPADST_DCT:
+ case FLIPADST_ADST:
+ case FLIPADST_DST:
+ // flip UD
+ FLIPUD_PTR(*dst, *dstride, size);
+ break;
+ case DCT_FLIPADST:
+ case ADST_FLIPADST:
+ case DST_FLIPADST:
+ // flip LR
+ FLIPUD_PTR(*src, *sstride, size);
+ break;
+ case FLIPADST_FLIPADST:
+ // flip UD
+ FLIPUD_PTR(*dst, *dstride, size);
+ // flip LR
+ FLIPUD_PTR(*src, *sstride, size);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 141124871, 228344838,
+ };
+ int64_t sum;
+ int64_t s03 = (input[0] + input[3]);
+ int64_t d03 = (input[0] - input[3]);
+ int64_t s12 = (input[1] + input[2]);
+ int64_t d12 = (input[1] - input[2]);
+
+#if !CONFIG_EMULATE_HARDWARE
+ (void)bd;
+#endif
+
+ sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+}
+
+void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
+ static const int32_t sinvalue_lookup[] = {
+ 86559612, 162678858, 219176632, 249238470
+ };
+ int64_t sum;
+ int64_t s07 = (input[0] + input[7]);
+ int64_t d07 = (input[0] - input[7]);
+ int64_t s16 = (input[1] + input[6]);
+ int64_t d16 = (input[1] - input[6]);
+ int64_t s25 = (input[2] + input[5]);
+ int64_t d25 = (input[2] - input[5]);
+ int64_t s34 = (input[3] + input[4]);
+ int64_t d34 = (input[3] - input[4]);
+
+#if !CONFIG_EMULATE_HARDWARE
+ (void)bd;
+#endif
+
+ sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
+ d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = (s07 + s16 - s34)* sinvalue_lookup[2];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = (d07 - d16 + d34)* sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
+ s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+}
+
+void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 47852167, 94074787, 137093803, 175444254,
+ 207820161, 233119001, 250479254, 259309736
+ };
+ int64_t sum;
+ int64_t s015 = (input[0] + input[15]);
+ int64_t d015 = (input[0] - input[15]);
+ int64_t s114 = (input[1] + input[14]);
+ int64_t d114 = (input[1] - input[14]);
+ int64_t s213 = (input[2] + input[13]);
+ int64_t d213 = (input[2] - input[13]);
+ int64_t s312 = (input[3] + input[12]);
+ int64_t d312 = (input[3] - input[12]);
+ int64_t s411 = (input[4] + input[11]);
+ int64_t d411 = (input[4] - input[11]);
+ int64_t s510 = (input[5] + input[10]);
+ int64_t d510 = (input[5] - input[10]);
+ int64_t s69 = (input[6] + input[9]);
+ int64_t d69 = (input[6] - input[9]);
+ int64_t s78 = (input[7] + input[8]);
+ int64_t d78 = (input[7] - input[8]);
+
+#if !CONFIG_EMULATE_HARDWARE
+ (void)bd;
+#endif
+
+ sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
+ s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
+ s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
+ s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
+ d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
+ d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
+ d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
+ s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
+ s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
+ s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
+ d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
+ d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
+ d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
+ s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
+ s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
+ s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
+ d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
+ d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
+ d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
+ s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
+ s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
+ s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
+ d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
+ d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
+ d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
+ s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
+ s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
+ s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
+ output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
+ d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
+ d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
+ d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
+ output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
+ s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
+ s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
+ s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
+ output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
+ d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
+ d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
+ d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
+ output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
+ s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
+ s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
+ s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
+ output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
+ d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
+ d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
+ d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
+ output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
+ s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
+ s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
+ s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
+ output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
+ d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
+ d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
+ d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
+ output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+}
+
+static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bs, int bd) {
+ int r, c;
+ const int shift = bs < 32 ? 3 : 2;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c)
+ dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
+ dest += stride;
+ input += bs;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_TX
+
void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- int tx_type) {
- const transform_2d IHT_4[] = {
- { idct4_c, idct4_c }, // DCT_DCT = 0
- { iadst4_c, idct4_c }, // ADST_DCT = 1
- { idct4_c, iadst4_c }, // DCT_ADST = 2
- { iadst4_c, iadst4_c } // ADST_ADST = 3
+ int tx_type) {
+ static const transform_2d IHT_4[] = {
+ { idct4_c, idct4_c }, // DCT_DCT = 0,
+ { iadst4_c, idct4_c }, // ADST_DCT = 1,
+ { idct4_c, iadst4_c }, // DCT_ADST = 2,
+ { iadst4_c, iadst4_c }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { iadst4_c, idct4_c }, // FLIPADST_DCT = 4,
+ { idct4_c, iadst4_c }, // DCT_FLIPADST = 5,
+ { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST = 6,
+ { iadst4_c, iadst4_c }, // ADST_FLIPADST = 7,
+ { iadst4_c, iadst4_c }, // FLIPADST_ADST = 8,
+ { idst4_c, idct4_c }, // DST_DCT = 9,
+ { idct4_c, idst4_c }, // DCT_DST = 10,
+ { idst4_c, iadst4_c }, // DST_ADST = 11,
+ { iadst4_c, idst4_c }, // ADST_DST = 12,
+ { idst4_c, iadst4_c }, // DST_FLIPADST = 13,
+ { iadst4_c, idst4_c }, // FLIPADST_DST = 14,
+ { idst4_c, idst4_c }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
};
int i, j;
- tran_low_t out[4 * 4];
- tran_low_t *outptr = out;
- tran_low_t temp_in[4], temp_out[4];
+ tran_low_t tmp;
+ tran_low_t out[4][4];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 4;
// inverse transform row vectors
for (i = 0; i < 4; ++i) {
- IHT_4[tx_type].rows(input, outptr);
+ IHT_4[tx_type].rows(input, out[i]);
input += 4;
- outptr += 4;
+ }
+
+ // transpose
+ for (i = 1 ; i < 4; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
}
// inverse transform column vectors
for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = out[j * 4 + i];
- IHT_4[tx_type].cols(temp_in, temp_out);
+ IHT_4[tx_type].cols(out[i], out[i]);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j) {
- dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
- ROUND_POWER_OF_TWO(temp_out[j], 4));
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
}
}
}
-static const transform_2d IHT_8[] = {
- { idct8_c, idct8_c }, // DCT_DCT = 0
- { iadst8_c, idct8_c }, // ADST_DCT = 1
- { idct8_c, iadst8_c }, // DCT_ADST = 2
- { iadst8_c, iadst8_c } // ADST_ADST = 3
-};
-
void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
+ static const transform_2d IHT_8[] = {
+ { idct8_c, idct8_c }, // DCT_DCT = 0,
+ { iadst8_c, idct8_c }, // ADST_DCT = 1,
+ { idct8_c, iadst8_c }, // DCT_ADST = 2,
+ { iadst8_c, iadst8_c }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { iadst8_c, idct8_c }, // FLIPADST_DCT = 4,
+ { idct8_c, iadst8_c }, // DCT_FLIPADST = 5,
+ { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST = 6,
+ { iadst8_c, iadst8_c }, // ADST_FLIPADST = 7,
+ { iadst8_c, iadst8_c }, // FLIPADST_ADST = 8,
+ { idst8_c, idct8_c }, // DST_DCT = 9,
+ { idct8_c, idst8_c }, // DCT_DST = 10,
+ { idst8_c, iadst8_c }, // DST_ADST = 11,
+ { iadst8_c, idst8_c }, // ADST_DST = 12,
+ { idst8_c, iadst8_c }, // DST_FLIPADST = 13,
+ { iadst8_c, idst8_c }, // FLIPADST_DST = 14,
+ { idst8_c, idst8_c }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
+ };
+
int i, j;
- tran_low_t out[8 * 8];
- tran_low_t *outptr = out;
- tran_low_t temp_in[8], temp_out[8];
- const transform_2d ht = IHT_8[tx_type];
+ tran_low_t tmp;
+ tran_low_t out[8][8];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 8;
// inverse transform row vectors
for (i = 0; i < 8; ++i) {
- ht.rows(input, outptr);
- input += 8;
- outptr += 8;
+ IHT_8[tx_type].rows(input, out[i]);
+ input += 8;
+ }
+
+ // transpose
+ for (i = 1 ; i < 8; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
}
// inverse transform column vectors
for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j)
- temp_in[j] = out[j * 8 + i];
- ht.cols(temp_in, temp_out);
+ IHT_8[tx_type].cols(out[i], out[i]);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) {
- dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
- ROUND_POWER_OF_TWO(temp_out[j], 5));
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
}
}
}
-static const transform_2d IHT_16[] = {
- { idct16_c, idct16_c }, // DCT_DCT = 0
- { iadst16_c, idct16_c }, // ADST_DCT = 1
- { idct16_c, iadst16_c }, // DCT_ADST = 2
- { iadst16_c, iadst16_c } // ADST_ADST = 3
-};
-
void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
- int i, j;
- tran_low_t out[16 * 16];
- tran_low_t *outptr = out;
- tran_low_t temp_in[16], temp_out[16];
- const transform_2d ht = IHT_16[tx_type];
+ static const transform_2d IHT_16[] = {
+ { idct16_c, idct16_c }, // DCT_DCT = 0,
+ { iadst16_c, idct16_c }, // ADST_DCT = 1,
+ { idct16_c, iadst16_c }, // DCT_ADST = 2,
+ { iadst16_c, iadst16_c }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { iadst16_c, idct16_c }, // FLIPADST_DCT = 4,
+ { idct16_c, iadst16_c }, // DCT_FLIPADST = 5,
+ { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST = 6,
+ { iadst16_c, iadst16_c }, // ADST_FLIPADST = 7,
+ { iadst16_c, iadst16_c }, // FLIPADST_ADST = 8,
+ { idst16_c, idct16_c }, // DST_DCT = 9,
+ { idct16_c, idst16_c }, // DCT_DST = 10,
+ { idst16_c, iadst16_c }, // DST_ADST = 11,
+ { iadst16_c, idst16_c }, // ADST_DST = 12,
+ { idst16_c, iadst16_c }, // DST_FLIPADST = 13,
+ { iadst16_c, idst16_c }, // FLIPADST_DST = 14,
+ { idst16_c, idst16_c }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
+ };
- // Rows
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[16][16];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 16;
+
+ // inverse transform row vectors
for (i = 0; i < 16; ++i) {
- ht.rows(input, outptr);
- input += 16;
- outptr += 16;
+ IHT_16[tx_type].rows(input, out[i]);
+ input += 16;
}
- // Columns
+ // transpose
+ for (i = 1 ; i < 16; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j * 16 + i];
- ht.cols(temp_in, temp_out);
+ IHT_16[tx_type].cols(out[i], out[i]);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) {
- dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
- ROUND_POWER_OF_TWO(temp_out[j], 6));
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
}
}
}
@@ -183,20 +690,43 @@
if (lossless) {
assert(tx_type == DCT_DCT);
vp10_iwht4x4_add(input, dest, stride, eob);
- } else {
- switch (tx_type) {
- case DCT_DCT:
- vp10_idct4x4_add(input, dest, stride, eob);
- break;
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- vp10_iht4x4_16_add(input, dest, stride, tx_type);
- break;
- default:
- assert(0);
- break;
- }
+ return;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT:
+ vp10_idct4x4_add(input, dest, stride, eob);
+ break;
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_iht4x4_16_add(input, dest, stride, tx_type);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_iht4x4_16_add(input, dest, stride, tx_type);
+ break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_iht4x4_16_add_c(input, dest, stride, tx_type);
+ break;
+ case IDTX:
+ inv_idtx_add_c(input, dest, stride, 4);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
}
}
@@ -211,6 +741,28 @@
case ADST_ADST:
vp10_iht8x8_64_add(input, dest, stride, tx_type);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_iht8x8_64_add(input, dest, stride, tx_type);
+ break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_iht8x8_64_add_c(input, dest, stride, tx_type);
+ break;
+ case IDTX:
+ inv_idtx_add_c(input, dest, stride, 8);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -228,6 +780,28 @@
case ADST_ADST:
vp10_iht16x16_256_add(input, dest, stride, tx_type);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_iht16x16_256_add(input, dest, stride, tx_type);
+ break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_iht16x16_256_add_c(input, dest, stride, tx_type);
+ break;
+ case IDTX:
+ inv_idtx_add_c(input, dest, stride, 16);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -240,6 +814,11 @@
case DCT_DCT:
vp10_idct32x32_add(input, dest, stride, eob);
break;
+#if CONFIG_EXT_TX
+ case IDTX:
+ inv_idtx_add_c(input, dest, stride, 32);
+ break;
+#endif // CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
@@ -254,104 +833,201 @@
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
- const highbd_transform_2d IHT_4[] = {
- { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0
- { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1
- { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
- { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3
+ static const highbd_transform_2d HIGH_IHT_4[] = {
+ { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0,
+ { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1,
+ { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2,
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT = 4,
+ { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST = 5,
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST = 6,
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST = 7,
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST = 8,
+ { highbd_idst4_c, vpx_highbd_idct4_c }, // DST_DCT = 9,
+ { vpx_highbd_idct4_c, highbd_idst4_c }, // DCT_DST = 10,
+ { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_ADST = 11,
+ { vpx_highbd_iadst4_c, highbd_idst4_c }, // ADST_DST = 12,
+ { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_FLIPADST = 13,
+ { vpx_highbd_iadst4_c, highbd_idst4_c }, // FLIPADST_DST = 14,
+ { highbd_idst4_c, highbd_idst4_c }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
};
+
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
int i, j;
- tran_low_t out[4 * 4];
- tran_low_t *outptr = out;
- tran_low_t temp_in[4], temp_out[4];
+ tran_low_t tmp;
+ tran_low_t out[4][4];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 4;
- // Inverse transform row vectors.
+ // inverse transform row vectors
for (i = 0; i < 4; ++i) {
- IHT_4[tx_type].rows(input, outptr, bd);
+ HIGH_IHT_4[tx_type].rows(input, out[i], bd);
input += 4;
- outptr += 4;
}
- // Inverse transform column vectors.
+ // transpose
+ for (i = 1 ; i < 4; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = out[j * 4 + i];
- IHT_4[tx_type].cols(temp_in, temp_out, bd);
+ HIGH_IHT_4[tx_type].cols(out[i], out[i], bd);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides((uint8_t**)&dest, &stride,
+ &outp, &outstride, tx_type, 4 * 2);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j) {
- dest[j * stride + i] = highbd_clip_pixel_add(
- dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = highbd_clip_pixel_add(dest[d],
+ ROUND_POWER_OF_TWO(outp[s], 4), bd);
}
}
}
-static const highbd_transform_2d HIGH_IHT_8[] = {
- { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0
- { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1
- { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2
- { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3
-};
-
void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
- int i, j;
- tran_low_t out[8 * 8];
- tran_low_t *outptr = out;
- tran_low_t temp_in[8], temp_out[8];
- const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
+ static const highbd_transform_2d HIGH_IHT_8[] = {
+ { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0,
+ { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1,
+ { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2,
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT = 4,
+ { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST = 5,
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST = 6,
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST = 7,
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST = 8,
+ { highbd_idst8_c, vpx_highbd_idct8_c }, // DST_DCT = 9,
+ { vpx_highbd_idct8_c, highbd_idst8_c }, // DCT_DST = 10,
+ { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_ADST = 11,
+ { vpx_highbd_iadst8_c, highbd_idst8_c }, // ADST_DST = 12,
+ { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_FLIPADST = 13,
+ { vpx_highbd_iadst8_c, highbd_idst8_c }, // FLIPADST_DST = 14,
+ { highbd_idst8_c, highbd_idst8_c }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
+ };
+
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- // Inverse transform row vectors.
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[8][8];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 8;
+
+ // inverse transform row vectors
for (i = 0; i < 8; ++i) {
- ht.rows(input, outptr, bd);
- input += 8;
- outptr += 8;
+ HIGH_IHT_8[tx_type].rows(input, out[i], bd);
+ input += 8;
}
- // Inverse transform column vectors.
+ // transpose
+ for (i = 1 ; i < 8; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j)
- temp_in[j] = out[j * 8 + i];
- ht.cols(temp_in, temp_out, bd);
+ HIGH_IHT_8[tx_type].cols(out[i], out[i], bd);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides((uint8_t**)&dest,
+ &stride, &outp, &outstride, tx_type, 8 * 2);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) {
- dest[j * stride + i] = highbd_clip_pixel_add(
- dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = highbd_clip_pixel_add(dest[d],
+ ROUND_POWER_OF_TWO(outp[s], 5), bd);
}
}
}
-static const highbd_transform_2d HIGH_IHT_16[] = {
- { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0
- { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1
- { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2
- { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3
-};
-
void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
- int i, j;
- tran_low_t out[16 * 16];
- tran_low_t *outptr = out;
- tran_low_t temp_in[16], temp_out[16];
- const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
+ static const highbd_transform_2d HIGH_IHT_16[] = {
+ { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0,
+ { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1,
+ { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2,
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT = 4,
+ { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST = 5,
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST = 6,
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST = 7,
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST = 8,
+ { highbd_idst16_c, vpx_highbd_idct16_c }, // DST_DCT = 9,
+ { vpx_highbd_idct16_c, highbd_idst16_c }, // DCT_DST = 10,
+ { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_ADST = 11,
+ { vpx_highbd_iadst16_c, highbd_idst16_c }, // ADST_DST = 12,
+ { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_FLIPADST = 13,
+ { vpx_highbd_iadst16_c, highbd_idst16_c }, // FLIPADST_DST = 14,
+ { highbd_idst16_c, highbd_idst16_c }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
+ };
+
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- // Rows
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[16][16];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 16;
+
+ // inverse transform row vectors
for (i = 0; i < 16; ++i) {
- ht.rows(input, outptr, bd);
- input += 16;
- outptr += 16;
+ HIGH_IHT_16[tx_type].rows(input, out[i], bd);
+ input += 16;
}
- // Columns
+ // transpose
+ for (i = 1 ; i < 16; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j * 16 + i];
- ht.cols(temp_in, temp_out, bd);
+ HIGH_IHT_16[tx_type].cols(out[i], out[i], bd);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides((uint8_t**)&dest, &stride,
+ &outp, &outstride, tx_type, 16 * 2);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) {
- dest[j * stride + i] = highbd_clip_pixel_add(
- dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = highbd_clip_pixel_add(dest[d],
+ ROUND_POWER_OF_TWO(outp[s], 6), bd);
}
}
}
@@ -425,20 +1101,43 @@
if (lossless) {
assert(tx_type == DCT_DCT);
vp10_highbd_iwht4x4_add(input, dest, stride, eob, bd);
- } else {
- switch (tx_type) {
- case DCT_DCT:
- vp10_highbd_idct4x4_add(input, dest, stride, eob, bd);
- break;
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
- break;
- default:
- assert(0);
- break;
- }
+ return;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT:
+ vp10_highbd_idct4x4_add(input, dest, stride, eob, bd);
+ break;
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
+ break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 4, bd);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
}
}
@@ -454,6 +1153,28 @@
case ADST_ADST:
vp10_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
+ break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 8, bd);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -472,6 +1193,28 @@
case ADST_ADST:
vp10_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
+ break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 16, bd);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -485,6 +1228,11 @@
case DCT_DCT:
vp10_highbd_idct32x32_add(input, dest, stride, eob, bd);
break;
+#if CONFIG_EXT_TX
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 32, bd);
+ break;
+#endif // CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c
index a1925de..e3c50ea 100644
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -719,11 +719,7 @@
uint64_t *const int_4x4_y = &lfm->int_4x4_y;
uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
-#if CONFIG_MISC_FIXES
uint16_t *const int_4x4_uv = &lfm->left_int_4x4_uv;
-#else
- uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
-#endif
int i;
// If filter level is 0 we don't loop filter.
@@ -758,13 +754,8 @@
// If the block has no coefficients and is not intra we skip applying
// the loop filter on block edges.
-#if CONFIG_MISC_FIXES
if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi))
return;
-#else
- if (mbmi->skip && is_inter_block(mbmi))
- return;
-#endif
// Here we are adding a mask for the transform size. The transform
// size mask is set to be correct for a 64x64 prediction block size. We
@@ -821,13 +812,8 @@
*above_y |= above_prediction_mask[block_size] << shift_y;
*left_y |= left_prediction_mask[block_size] << shift_y;
-#if CONFIG_MISC_FIXES
if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi))
return;
-#else
- if (mbmi->skip && is_inter_block(mbmi))
- return;
-#endif
*above_y |= (size_mask[block_size] &
above_64x64_txform_mask[tx_size_y]) << shift_y;
@@ -1019,11 +1005,7 @@
lfm->above_uv[i] &= mask_uv;
}
lfm->int_4x4_y &= mask_y;
-#if CONFIG_MISC_FIXES
lfm->above_int_4x4_uv = lfm->left_int_4x4_uv & mask_uv;
-#else
- lfm->int_4x4_uv &= mask_uv;
-#endif
// We don't apply a wide loop filter on the last uv block row. If set
// apply the shorter one instead.
@@ -1057,11 +1039,7 @@
lfm->above_uv[i] &= mask_uv;
}
lfm->int_4x4_y &= mask_y;
-#if CONFIG_MISC_FIXES
lfm->left_int_4x4_uv &= mask_uv_int;
-#else
- lfm->int_4x4_uv &= mask_uv_int;
-#endif
// We don't apply a wide loop filter on the last uv column. If set
// apply the shorter one instead.
@@ -1091,11 +1069,7 @@
assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8]));
assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
-#if CONFIG_MISC_FIXES
assert(!(lfm->left_int_4x4_uv & lfm->left_uv[TX_16X16]));
-#else
- assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16]));
-#endif
assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
@@ -1103,11 +1077,7 @@
assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
-#if CONFIG_MISC_FIXES
assert(!(lfm->above_int_4x4_uv & lfm->above_uv[TX_16X16]));
-#else
- assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
-#endif
}
static void filter_selectively_vert(uint8_t *s, int pitch,
@@ -1183,9 +1153,9 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp10_filter_block_plane_non420(VP10_COMMON *cm,
- struct macroblockd_plane *plane,
- MODE_INFO **mi_8x8,
- int mi_row, int mi_col) {
+ struct macroblockd_plane *plane,
+ MODE_INFO **mi_8x8,
+ int mi_row, int mi_col) {
const int ss_x = plane->subsampling_x;
const int ss_y = plane->subsampling_y;
const int row_step = 1 << ss_y;
@@ -1209,49 +1179,103 @@
// Determine the vertical edges that need filtering
for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
const MODE_INFO *mi = mi_8x8[c];
- const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type;
- const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi);
+ const MB_MODE_INFO *mbmi = &mi[0].mbmi;
+ const BLOCK_SIZE sb_type = mbmi->sb_type;
+ const int skip_this = mbmi->skip && is_inter_block(mbmi);
+ const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
+ const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
+
// left edge of current unit is block/partition edge -> no skip
const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ?
- !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1;
+ !blk_col : 1;
const int skip_this_c = skip_this && !block_edge_left;
// top edge of current unit is block/partition edge -> no skip
const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ?
- !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1;
+ !blk_row : 1;
const int skip_this_r = skip_this && !block_edge_above;
+
+#if CONFIG_VAR_TX
+ TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
+ ? get_uv_tx_size(mbmi, plane) : mbmi->tx_size;
+#else
const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
- ? get_uv_tx_size(&mi[0].mbmi, plane)
- : mi[0].mbmi.tx_size;
+ ? get_uv_tx_size(mbmi, plane)
+ : mbmi->tx_size;
+#endif
+
const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
+ TX_SIZE tx_size_c = tx_size;
+ TX_SIZE tx_size_r = tx_size;
+
+ int tx_size_mask = 0;
// Filter level can vary per MI
if (!(lfl[(r << 3) + (c >> ss_x)] =
- get_filter_level(&cm->lf_info, &mi[0].mbmi)))
+ get_filter_level(&cm->lf_info, mbmi)))
continue;
+ if (tx_size == TX_32X32)
+ tx_size_mask = 3;
+ else if (tx_size == TX_16X16)
+ tx_size_mask = 1;
+ else
+ tx_size_mask = 0;
+
+#if CONFIG_VAR_TX
+ if (is_inter_block(mbmi) && !mbmi->skip)
+ tx_size = (plane->plane_type == PLANE_TYPE_UV) ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[blk_row * 8 + blk_col],
+ sb_type, ss_x, ss_y) :
+ mbmi->inter_tx_size[blk_row * 8 + blk_col];
+
+ tx_size_r = VPXMIN(tx_size, cm->above_txfm_context[mi_col + c]);
+ tx_size_c = VPXMIN(tx_size, cm->left_txfm_context[(mi_row + r) & 0x07]);
+
+ cm->above_txfm_context[mi_col + c] = tx_size;
+ cm->left_txfm_context[(mi_row + r) & 0x07] = tx_size;
+#endif
+
// Build masks based on the transform size of each block
- if (tx_size == TX_32X32) {
- if (!skip_this_c && ((c >> ss_x) & 3) == 0) {
+ // handle vertical mask
+ if (tx_size_c == TX_32X32) {
+ if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
if (!skip_border_4x4_c)
mask_16x16_c |= 1 << (c >> ss_x);
else
mask_8x8_c |= 1 << (c >> ss_x);
}
- if (!skip_this_r && ((r >> ss_y) & 3) == 0) {
+ } else if (tx_size_c == TX_16X16) {
+ if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
+ if (!skip_border_4x4_c)
+ mask_16x16_c |= 1 << (c >> ss_x);
+ else
+ mask_8x8_c |= 1 << (c >> ss_x);
+ }
+ } else {
+ // force 8x8 filtering on 32x32 boundaries
+ if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
+ if (tx_size_c == TX_8X8 || ((c >> ss_x) & 3) == 0)
+ mask_8x8_c |= 1 << (c >> ss_x);
+ else
+ mask_4x4_c |= 1 << (c >> ss_x);
+ }
+
+ if (!skip_this && tx_size_c < TX_8X8 && !skip_border_4x4_c &&
+ ((c >> ss_x) & tx_size_mask) == 0)
+ mask_4x4_int[r] |= 1 << (c >> ss_x);
+ }
+
+ // set horizontal mask
+ if (tx_size_r == TX_32X32) {
+ if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
if (!skip_border_4x4_r)
mask_16x16[r] |= 1 << (c >> ss_x);
else
mask_8x8[r] |= 1 << (c >> ss_x);
}
- } else if (tx_size == TX_16X16) {
- if (!skip_this_c && ((c >> ss_x) & 1) == 0) {
- if (!skip_border_4x4_c)
- mask_16x16_c |= 1 << (c >> ss_x);
- else
- mask_8x8_c |= 1 << (c >> ss_x);
- }
- if (!skip_this_r && ((r >> ss_y) & 1) == 0) {
+ } else if (tx_size_r == TX_16X16) {
+ if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
if (!skip_border_4x4_r)
mask_16x16[r] |= 1 << (c >> ss_x);
else
@@ -1259,21 +1283,15 @@
}
} else {
// force 8x8 filtering on 32x32 boundaries
- if (!skip_this_c) {
- if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0)
- mask_8x8_c |= 1 << (c >> ss_x);
- else
- mask_4x4_c |= 1 << (c >> ss_x);
- }
-
- if (!skip_this_r) {
- if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0)
+ if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
+ if (tx_size_r == TX_8X8 || ((r >> ss_y) & 3) == 0)
mask_8x8[r] |= 1 << (c >> ss_x);
else
mask_4x4[r] |= 1 << (c >> ss_x);
}
- if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c)
+ if (!skip_this && tx_size_r < TX_8X8 && !skip_border_4x4_c &&
+ ((r >> ss_y) & tx_size_mask) == 0)
mask_4x4_int[r] |= 1 << (c >> ss_x);
}
}
@@ -1462,11 +1480,7 @@
uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
-#if CONFIG_MISC_FIXES
uint16_t mask_4x4_int = lfm->left_int_4x4_uv;
-#else
- uint16_t mask_4x4_int = lfm->int_4x4_uv;
-#endif
assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
@@ -1518,11 +1532,7 @@
mask_16x16 = lfm->above_uv[TX_16X16];
mask_8x8 = lfm->above_uv[TX_8X8];
mask_4x4 = lfm->above_uv[TX_4X4];
-#if CONFIG_MISC_FIXES
mask_4x4_int = lfm->above_int_4x4_uv;
-#else
- mask_4x4_int = lfm->int_4x4_uv;
-#endif
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
@@ -1568,13 +1578,14 @@
}
void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
- VP10_COMMON *cm,
- struct macroblockd_plane planes[MAX_MB_PLANE],
- int start, int stop, int y_only) {
+ VP10_COMMON *cm,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
+ int start, int stop, int y_only) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
+ int mi_row, mi_col;
+#if !CONFIG_VAR_TX
enum lf_path path;
LOOP_FILTER_MASK lfm;
- int mi_row, mi_col;
if (y_only)
path = LF_PATH_444;
@@ -1584,19 +1595,29 @@
path = LF_PATH_444;
else
path = LF_PATH_SLOW;
+#endif
+#if CONFIG_VAR_TX
+ memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols);
+#endif
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
-
+#if CONFIG_VAR_TX
+ memset(cm->left_txfm_context, TX_SIZES, 8);
+#endif
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
int plane;
vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
+#if CONFIG_VAR_TX
+ for (plane = 0; plane < num_planes; ++plane)
+ vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
+ mi_row, mi_col);
+#else
// TODO(JBB): Make setup_mask work for non 420.
vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
&lfm);
-
vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
for (plane = 1; plane < num_planes; ++plane) {
switch (path) {
@@ -1612,6 +1633,7 @@
break;
}
}
+#endif
}
}
}
diff --git a/vp10/common/loopfilter.h b/vp10/common/loopfilter.h
index 8db705a..513fbe3 100644
--- a/vp10/common/loopfilter.h
+++ b/vp10/common/loopfilter.h
@@ -80,12 +80,8 @@
uint64_t int_4x4_y;
uint16_t left_uv[TX_SIZES];
uint16_t above_uv[TX_SIZES];
-#if CONFIG_MISC_FIXES
uint16_t left_int_4x4_uv;
uint16_t above_int_4x4_uv;
-#else
- uint16_t int_4x4_uv;
-#endif
uint8_t lfl_y[64];
uint8_t lfl_uv[16];
} LOOP_FILTER_MASK;
diff --git a/vp10/common/mv.h b/vp10/common/mv.h
index b4971a5..4cc2638 100644
--- a/vp10/common/mv.h
+++ b/vp10/common/mv.h
@@ -48,6 +48,9 @@
mv->row = clamp(mv->row, min_row, max_row);
}
+static INLINE int mv_has_subpel(const MV *mv) {
+ return (mv->row & 0x0F) || (mv->col & 0x0F);
+}
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c
index 1ef80c2..a8cc216 100644
--- a/vp10/common/mvref_common.c
+++ b/vp10/common/mvref_common.c
@@ -30,11 +30,6 @@
const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type] << 3;
const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type] << 3;
-#if !CONFIG_MISC_FIXES
- // Blank the reference vector list
- memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
-#endif
-
// The nearest 2 blocks are treated differently
// if the size < 8x8 we get the mv from the bmi substructure,
// and we also need to keep a mode count.
@@ -133,9 +128,6 @@
}
if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME &&
-#if !CONFIG_MISC_FIXES
- prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int &&
-#endif
prev_frame_mvs->ref_frame[1] != ref_frame) {
int_mv mv = prev_frame_mvs->mv[1];
if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] !=
@@ -148,17 +140,9 @@
}
Done:
-
mode_context[ref_frame] = counter_to_context[context_counter];
-
-#if CONFIG_MISC_FIXES
for (i = refmv_count; i < MAX_MV_REF_CANDIDATES; ++i)
mv_ref_list[i].as_int = 0;
-#else
- // Clamp vectors
- for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
- clamp_mv_ref(&mv_ref_list[i].as_mv, bw, bh, xd);
-#endif
}
void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd,
diff --git a/vp10/common/mvref_common.h b/vp10/common/mvref_common.h
index 0a98866..c6b91ec 100644
--- a/vp10/common/mvref_common.h
+++ b/vp10/common/mvref_common.h
@@ -119,26 +119,13 @@
};
// clamp_mv_ref
-#if CONFIG_MISC_FIXES
#define MV_BORDER (8 << 3) // Allow 8 pels in 1/8th pel units
-#else
-#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
-#endif
static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) {
-#if CONFIG_MISC_FIXES
clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER,
xd->mb_to_right_edge + bw * 8 + MV_BORDER,
xd->mb_to_top_edge - bh * 8 - MV_BORDER,
xd->mb_to_bottom_edge + bh * 8 + MV_BORDER);
-#else
- (void) bw;
- (void) bh;
- clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
- xd->mb_to_right_edge + MV_BORDER,
- xd->mb_to_top_edge - MV_BORDER,
- xd->mb_to_bottom_edge + MV_BORDER);
-#endif
}
// This function returns either the appropriate sub block or block's mv
@@ -164,11 +151,7 @@
return mv;
}
-#if CONFIG_MISC_FIXES
#define CLIP_IN_ADD(mv, bw, bh, xd) clamp_mv_ref(mv, bw, bh, xd)
-#else
-#define CLIP_IN_ADD(mv, bw, bh, xd) do {} while (0)
-#endif
// This macro is used to add a motion vector mv_ref list if it isn't
// already in the list. If it's the second motion vector it will also
@@ -194,8 +177,6 @@
ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \
refmv_count, mv_ref_list, bw, bh, xd, Done); \
if (has_second_ref(mbmi) && \
- (CONFIG_MISC_FIXES || \
- (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) && \
(mbmi)->ref_frame[1] != ref_frame) \
ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \
refmv_count, mv_ref_list, bw, bh, xd, Done); \
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h
index 6814133..e3fef90 100644
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h
@@ -20,6 +20,7 @@
#include "vp10/common/entropymv.h"
#include "vp10/common/entropy.h"
#include "vp10/common/entropymode.h"
+#include "vp10/common/mv.h"
#include "vp10/common/frame_buffers.h"
#include "vp10/common/quant_common.h"
#include "vp10/common/tile_common.h"
@@ -254,9 +255,6 @@
struct loopfilter lf;
struct segmentation seg;
-#if !CONFIG_MISC_FIXES
- struct segmentation_probs segp;
-#endif
int frame_parallel_decode; // frame-based threading.
@@ -301,6 +299,10 @@
PARTITION_CONTEXT *above_seg_context;
ENTROPY_CONTEXT *above_context;
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT *above_txfm_context;
+ TXFM_CONTEXT left_txfm_context[8];
+#endif
int above_context_alloc_cols;
// scratch memory for intraonly/keyframe forward updates from default tables
@@ -397,6 +399,9 @@
}
xd->above_seg_context = cm->above_seg_context;
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context;
+#endif
xd->mi_stride = cm->mi_stride;
xd->error_info = &cm->error;
}
@@ -446,6 +451,11 @@
xd->left_mi = NULL;
xd->left_mbmi = NULL;
}
+
+#if CONFIG_VAR_TX
+ xd->n8_h = bh;
+ xd->n8_w = bw;
+#endif
}
static INLINE const vpx_prob *get_y_mode_probs(const VP10_COMMON *cm,
@@ -489,6 +499,84 @@
return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
}
+#if CONFIG_VAR_TX
+static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx,
+ TX_SIZE tx_size,
+ int len) {
+ int i;
+ for (i = 0; i < len; ++i)
+ txfm_ctx[i] = tx_size;
+}
+
+static INLINE void txfm_partition_update(TXFM_CONTEXT *above_ctx,
+ TXFM_CONTEXT *left_ctx,
+ TX_SIZE tx_size) {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bs = num_8x8_blocks_high_lookup[bsize];
+ int i;
+ for (i = 0; i < bs; ++i) {
+ above_ctx[i] = tx_size;
+ left_ctx[i] = tx_size;
+ }
+}
+
+static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx,
+ TXFM_CONTEXT *left_ctx,
+ TX_SIZE tx_size) {
+ int above = *above_ctx < tx_size;
+ int left = *left_ctx < tx_size;
+ return (tx_size - 1) * 3 + above + left;
+}
+#endif
+
+#if CONFIG_EXT_INTERP
+static INLINE int vp10_is_interp_needed(const MACROBLOCKD *const xd) {
+ MODE_INFO *const mi = xd->mi[0];
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const int is_compound = has_second_ref(mbmi);
+ int intpel_mv;
+
+#if SUPPORT_NONINTERPOLATING_FILTERS
+ // TODO(debargha): This is is currently only for experimentation
+ // with non-interpolating filters. Remove later.
+ // If any of the filters are non-interpolating, then indicate the
+ // interpolation filter always.
+ int i;
+ for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
+ if (!IsInterpolatingFilter(i)) return 1;
+ }
+#endif
+
+ // For scaled references, interpolation filter is indicated all the time.
+ if (vp10_is_scaled(&xd->block_refs[0]->sf))
+ return 1;
+ if (is_compound && vp10_is_scaled(&xd->block_refs[1]->sf))
+ return 1;
+
+ if (bsize < BLOCK_8X8) {
+ intpel_mv =
+ !mv_has_subpel(&mi->bmi[0].as_mv[0].as_mv) &&
+ !mv_has_subpel(&mi->bmi[1].as_mv[0].as_mv) &&
+ !mv_has_subpel(&mi->bmi[2].as_mv[0].as_mv) &&
+ !mv_has_subpel(&mi->bmi[3].as_mv[0].as_mv);
+ if (is_compound && intpel_mv) {
+ intpel_mv &=
+ !mv_has_subpel(&mi->bmi[0].as_mv[1].as_mv) &&
+ !mv_has_subpel(&mi->bmi[1].as_mv[1].as_mv) &&
+ !mv_has_subpel(&mi->bmi[2].as_mv[1].as_mv) &&
+ !mv_has_subpel(&mi->bmi[3].as_mv[1].as_mv);
+ }
+ } else {
+ intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
+ if (is_compound && intpel_mv) {
+ intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
+ }
+ }
+ return !intpel_mv;
+}
+#endif // CONFIG_EXT_INTERP
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/pred_common.h b/vp10/common/pred_common.h
index d6d7146..ebba225 100644
--- a/vp10/common/pred_common.h
+++ b/vp10/common/pred_common.h
@@ -165,6 +165,71 @@
}
}
+#if CONFIG_VAR_TX
+static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd,
+ MB_MODE_INFO *mbmi, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, int blk_row, int blk_col,
+ TX_SIZE max_tx_size, int ctx,
+ struct tx_counts *tx_counts) {
+ const struct macroblockd_plane *const pd = &xd->plane[0];
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx];
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ ++get_tx_counts(max_tx_size, ctx, tx_counts)[tx_size];
+ mbmi->tx_size = tx_size;
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
+ continue;
+ update_tx_counts(cm, xd, mbmi, plane_bsize,
+ tx_size - 1, offsetr, offsetc,
+ max_tx_size, ctx, tx_counts);
+ }
+ }
+}
+
+static INLINE void inter_block_tx_count_update(VP10_COMMON *cm,
+ MACROBLOCKD *xd,
+ MB_MODE_INFO *mbmi,
+ BLOCK_SIZE plane_bsize,
+ int ctx,
+ struct tx_counts *tx_counts) {
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+
+ for (idy = 0; idy < mi_height; idy += bh)
+ for (idx = 0; idx < mi_width; idx += bh)
+ update_tx_counts(cm, xd, mbmi, plane_bsize, max_tx_size, idy, idx,
+ max_tx_size, ctx, tx_counts);
+}
+#endif
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c
index fdcb967..88c7569 100644
--- a/vp10/common/reconinter.c
+++ b/vp10/common/reconinter.c
@@ -64,9 +64,9 @@
}
void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
- int bw, int bh,
- int x, int y, int w, int h,
- int mi_x, int mi_y) {
+ int bw, int bh,
+ int x, int y, int w, int h,
+ int mi_x, int mi_y) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const MODE_INFO *mi = xd->mi[0];
const int is_compound = has_second_ref(&mi->mbmi);
diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h
index 5678f47..bcfa3f6 100644
--- a/vp10/common/reconinter.h
+++ b/vp10/common/reconinter.h
@@ -28,9 +28,22 @@
int w, int h, int ref,
const InterpKernel *kernel,
int xs, int ys) {
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ if (kernel[0][SUBPEL_TAPS / 2 - 1] == 128) {
+ // Interpolating filter
+ sf->predict[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+ } else {
+ sf->predict_ni[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+ }
+#else
sf->predict[subpel_x != 0][subpel_y != 0][ref](
src, src_stride, dst, dst_stride,
kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
}
#if CONFIG_VP9_HIGHBITDEPTH
@@ -42,9 +55,22 @@
int w, int h, int ref,
const InterpKernel *kernel,
int xs, int ys, int bd) {
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ if (kernel[0][SUBPEL_TAPS / 2 - 1] == 128) {
+ // Interpolating filter
+ sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+ } else {
+ sf->highbd_predict_ni[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+ }
+#else
sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
src, src_stride, dst, dst_stride,
kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -192,7 +218,6 @@
void vp10_setup_pre_planes(MACROBLOCKD *xd, int idx,
const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
const struct scale_factors *sf);
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c
index 14af7eb..06f38f0 100644
--- a/vp10/common/reconintra.c
+++ b/vp10/common/reconintra.c
@@ -8,6 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <math.h>
+
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
@@ -21,7 +23,6 @@
#include "vp10/common/reconintra.h"
#include "vp10/common/onyxc_int.h"
-#if CONFIG_MISC_FIXES
enum {
NEED_LEFT = 1 << 1,
NEED_ABOVE = 1 << 2,
@@ -42,28 +43,7 @@
NEED_ABOVE | NEED_ABOVERIGHT, // D63
NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // TM
};
-#else
-enum {
- NEED_LEFT = 1 << 1,
- NEED_ABOVE = 1 << 2,
- NEED_ABOVERIGHT = 1 << 3,
-};
-static const uint8_t extend_modes[INTRA_MODES] = {
- NEED_ABOVE | NEED_LEFT, // DC
- NEED_ABOVE, // V
- NEED_LEFT, // H
- NEED_ABOVERIGHT, // D45
- NEED_LEFT | NEED_ABOVE, // D135
- NEED_LEFT | NEED_ABOVE, // D117
- NEED_LEFT | NEED_ABOVE, // D153
- NEED_LEFT, // D207
- NEED_ABOVERIGHT, // D63
- NEED_LEFT | NEED_ABOVE, // TM
-};
-#endif
-
-#if CONFIG_MISC_FIXES
static const uint8_t orders_64x64[1] = { 0 };
static const uint8_t orders_64x32[2] = { 0, 1 };
static const uint8_t orders_32x64[2] = { 0, 1 };
@@ -188,7 +168,6 @@
return 0;
}
}
-#endif
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left);
@@ -216,15 +195,9 @@
INIT_ALL_SIZES(pred[V_PRED], v);
INIT_ALL_SIZES(pred[H_PRED], h);
-#if CONFIG_MISC_FIXES
- INIT_ALL_SIZES(pred[D207_PRED], d207e);
- INIT_ALL_SIZES(pred[D45_PRED], d45e);
- INIT_ALL_SIZES(pred[D63_PRED], d63e);
-#else
INIT_ALL_SIZES(pred[D207_PRED], d207);
INIT_ALL_SIZES(pred[D45_PRED], d45);
INIT_ALL_SIZES(pred[D63_PRED], d63);
-#endif
INIT_ALL_SIZES(pred[D117_PRED], d117);
INIT_ALL_SIZES(pred[D135_PRED], d135);
INIT_ALL_SIZES(pred[D153_PRED], d153);
@@ -238,15 +211,9 @@
#if CONFIG_VP9_HIGHBITDEPTH
INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
-#if CONFIG_MISC_FIXES
- INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207e);
- INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45e);
- INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63);
-#else
INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207);
INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45);
INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63);
-#endif
INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117);
INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135);
INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153);
@@ -261,12 +228,598 @@
#undef intra_pred_allsizes
}
-#if CONFIG_MISC_FIXES
-static inline void memset16(uint16_t *dst, int val, int n) {
+static INLINE void memset16(uint16_t *dst, int val, int n) {
while (n--)
*dst++ = val;
}
-#endif
+
+#if CONFIG_EXT_INTRA
+#define PI 3.14159265
+#define FILTER_INTRA_PREC_BITS 10
+#define FILTER_INTRA_ROUND_VAL 511
+
+static const uint8_t ext_intra_extend_modes[FILTER_INTRA_MODES] = {
+ NEED_LEFT | NEED_ABOVE, // FILTER_DC
+ NEED_LEFT | NEED_ABOVE, // FILTER_V
+ NEED_LEFT | NEED_ABOVE, // FILTER_H
+ NEED_LEFT | NEED_ABOVE, // FILTER_D45
+ NEED_LEFT | NEED_ABOVE, // FILTER_D135
+ NEED_LEFT | NEED_ABOVE, // FILTER_D117
+ NEED_LEFT | NEED_ABOVE, // FILTER_D153
+ NEED_LEFT | NEED_ABOVE, // FILTER_D207
+ NEED_LEFT | NEED_ABOVE, // FILTER_D63
+ NEED_LEFT | NEED_ABOVE, // FILTER_TM
+};
+
+// Directional prediction, zone 1: 0 < angle < 90
+static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left,
+ int dx, int dy) {
+ int r, c, x, y, base, shift, val;
+
+ (void)left;
+ (void)dy;
+ assert(dy == 1);
+ assert(dx < 0);
+
+ for (r = 0; r < bs; ++r) {
+ y = r + 1;
+ for (c = 0; c < bs; ++c) {
+ x = c * 256 - y * dx;
+ base = x >> 8;
+ shift = x - base * 256;
+ if (base < 2 * bs - 1) {
+ val =
+ (above[base] * (256 - shift) + above[base + 1] * shift + 128) >> 8;
+ dst[c] = clip_pixel(val);
+ } else {
+ dst[c] = above[2 * bs - 1];
+ }
+ }
+ dst += stride;
+ }
+}
+
+// Directional prediction, zone 2: 90 < angle < 180
+static void dr_prediction_z2(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left,
+ int dx, int dy) {
+ int r, c, x, y, val1, val2, shift, val, base;
+
+ assert(dx > 0);
+ assert(dy > 0);
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ y = r + 1;
+ x = c * 256 - y * dx;
+ if (x >= -256) {
+ if (x <= 0) {
+ val1 = above[-1];
+ val2 = above[0];
+ shift = x + 256;
+ } else {
+ base = x >> 8;
+ val1 = above[base];
+ val2 = above[base + 1];
+ shift = x - base * 256;
+ }
+ } else {
+ x = c + 1;
+ y = r * 256 - x * dy;
+ base = y >> 8;
+ if (base >= 0) {
+ val1 = left[base];
+ val2 = left[base + 1];
+ shift = y - base * 256;
+ } else {
+ val1 = val2 = left[0];
+ shift = 0;
+ }
+ }
+ val = (val1 * (256 - shift) + val2 * shift + 128) >> 8;
+ dst[c] = clip_pixel(val);
+ }
+ dst += stride;
+ }
+}
+
+// Directional prediction, zone 3: 180 < angle < 270
+static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left,
+ int dx, int dy) {
+ int r, c, x, y, base, shift, val;
+
+ (void)above;
+ (void)dx;
+ assert(dx == 1);
+ assert(dy < 0);
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ x = c + 1;
+ y = r * 256 - x * dy;
+ base = y >> 8;
+ shift = y - base * 256;
+ if (base < bs - 1) {
+ val =
+ (left[base] * (256 - shift) + left[base + 1] * shift + 128) >> 8;
+ dst[c] = clip_pixel(val);
+ } else {
+ dst[c] = left[bs - 1];
+ }
+ }
+ dst += stride;
+ }
+}
+
+static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
+ const uint8_t *above, const uint8_t *left, int angle) {
+ double t = 0;
+ int dx, dy;
+ int bs = 4 << tx_size;
+
+ if (angle != 90 && angle != 180)
+ t = tan(angle * PI / 180.0);
+ if (angle > 0 && angle < 90) {
+ dx = -((int)(256 / t));
+ dy = 1;
+ dr_prediction_z1(dst, stride, bs, above, left, dx, dy);
+ } else if (angle > 90 && angle < 180) {
+ t = -t;
+ dx = (int)(256 / t);
+ dy = (int)(256 * t);
+ dr_prediction_z2(dst, stride, bs, above, left, dx, dy);
+ } else if (angle > 180 && angle < 270) {
+ dx = 1;
+ dy = -((int)(256 * t));
+ dr_prediction_z3(dst, stride, bs, above, left, dx, dy);
+ } else if (angle == 90) {
+ pred[V_PRED][tx_size](dst, stride, above, left);
+ } else if (angle == 180) {
+ pred[H_PRED][tx_size](dst, stride, above, left);
+ }
+}
+
+static int filter_intra_taps_4[TX_SIZES][INTRA_MODES][4] = {
+ {
+ {735, 881, -537, -54},
+ {1005, 519, -488, -11},
+ {383, 990, -343, -6},
+ {442, 805, -542, 319},
+ {658, 616, -133, -116},
+ {875, 442, -141, -151},
+ {386, 741, -23, -80},
+ {390, 1027, -446, 51},
+ {679, 606, -523, 262},
+ {903, 922, -778, -23},
+ },
+ {
+ {648, 803, -444, 16},
+ {972, 620, -576, 7},
+ {561, 967, -499, -5},
+ {585, 762, -468, 144},
+ {596, 619, -182, -9},
+ {895, 459, -176, -153},
+ {557, 722, -126, -129},
+ {601, 839, -523, 105},
+ {562, 709, -499, 251},
+ {803, 872, -695, 43},
+ },
+ {
+ {423, 728, -347, 111},
+ {963, 685, -665, 23},
+ {281, 1024, -480, 216},
+ {640, 596, -437, 78},
+ {429, 669, -259, 99},
+ {740, 646, -415, 23},
+ {568, 771, -346, 40},
+ {404, 833, -486, 209},
+ {398, 712, -423, 307},
+ {939, 935, -887, 17},
+ },
+ {
+ {477, 737, -393, 150},
+ {881, 630, -546, 67},
+ {506, 984, -443, -20},
+ {114, 459, -270, 528},
+ {433, 528, 14, 3},
+ {837, 470, -301, -30},
+ {181, 777, 89, -107},
+ {-29, 716, -232, 259},
+ {589, 646, -495, 255},
+ {740, 884, -728, 77},
+ },
+};
+
+static void filter_intra_predictors_4tap(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above,
+ const uint8_t *left,
+ int mode) {
+ int k, r, c;
+ int pred[33][65];
+ int mean, ipred;
+ const TX_SIZE tx_size = (bs == 32) ? TX_32X32 :
+ ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+ const int c0 = filter_intra_taps_4[tx_size][mode][0];
+ const int c1 = filter_intra_taps_4[tx_size][mode][1];
+ const int c2 = filter_intra_taps_4[tx_size][mode][2];
+ const int c3 = filter_intra_taps_4[tx_size][mode][3];
+
+ k = 0;
+ mean = 0;
+ while (k < bs) {
+ mean = mean + (int)left[k];
+ mean = mean + (int)above[k];
+ k++;
+ }
+ mean = (mean + bs) / (2 * bs);
+
+ for (r = 0; r < bs; ++r)
+ pred[r + 1][0] = (int)left[r] - mean;
+
+ for (c = 0; c < 2 * bs + 1; ++c)
+ pred[0][c] = (int)above[c - 1] - mean;
+
+ for (r = 1; r < bs + 1; ++r)
+ for (c = 1; c < 2 * bs + 1 - r; ++c) {
+ ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
+ c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
+ pred[r][c] = ipred < 0 ?
+ -((-ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS) :
+ ((ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS);
+ }
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ ipred = pred[r + 1][c + 1] + mean;
+ dst[c] = clip_pixel(ipred);
+ }
+ dst += stride;
+ }
+}
+
+static void dc_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED);
+}
+
+static void v_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED);
+}
+
+static void h_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED);
+}
+
+static void d45_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED);
+}
+
+static void d135_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED);
+}
+
+static void d117_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED);
+}
+
+static void d153_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED);
+}
+
+static void d207_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED);
+}
+
+static void d63_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED);
+}
+
+static void tm_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED);
+}
+
+static void (*filter_intra_predictors[EXT_INTRA_MODES])(uint8_t *dst,
+ ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) = {
+ dc_filter_predictor, v_filter_predictor, h_filter_predictor,
+ d45_filter_predictor, d135_filter_predictor, d117_filter_predictor,
+ d153_filter_predictor, d207_filter_predictor, d63_filter_predictor,
+ tm_filter_predictor,
+};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+// Directional prediction, zone 1: 0 < angle < 90
+static void highbd_dr_prediction_z1(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above, const uint16_t *left,
+ int dx, int dy, int bd) {
+ int r, c, x, y, base, shift, val;
+
+ (void)left;
+ (void)dy;
+ assert(dy == 1);
+ assert(dx < 0);
+
+ for (r = 0; r < bs; ++r) {
+ y = r + 1;
+ for (c = 0; c < bs; ++c) {
+ x = c * 256 - y * dx;
+ base = x >> 8;
+ shift = x - base * 256;
+ if (base < 2 * bs - 1) {
+ val =
+ (above[base] * (256 - shift) + above[base + 1] * shift + 128) >> 8;
+ dst[c] = clip_pixel_highbd(val, bd);
+ } else {
+ dst[c] = above[2 * bs - 1];
+ }
+ }
+ dst += stride;
+ }
+}
+
+// Directional prediction, zone 2: 90 < angle < 180
+static void highbd_dr_prediction_z2(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above, const uint16_t *left,
+ int dx, int dy, int bd) {
+ int r, c, x, y, val1, val2, shift, val, base;
+
+ assert(dx > 0);
+ assert(dy > 0);
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ y = r + 1;
+ x = c * 256 - y * dx;
+ if (x >= -256) {
+ if (x <= 0) {
+ val1 = above[-1];
+ val2 = above[0];
+ shift = x + 256;
+ } else {
+ base = x >> 8;
+ val1 = above[base];
+ val2 = above[base + 1];
+ shift = x - base * 256;
+ }
+ } else {
+ x = c + 1;
+ y = r * 256 - x * dy;
+ base = y >> 8;
+ if (base >= 0) {
+ val1 = left[base];
+ val2 = left[base + 1];
+ shift = y - base * 256;
+ } else {
+ val1 = val2 = left[0];
+ shift = 0;
+ }
+ }
+ val = (val1 * (256 - shift) + val2 * shift + 128) >> 8;
+ dst[c] = clip_pixel_highbd(val, bd);
+ }
+ dst += stride;
+ }
+}
+
+// Directional prediction, zone 3: 180 < angle < 270
+static void highbd_dr_prediction_z3(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above, const uint16_t *left,
+ int dx, int dy, int bd) {
+ int r, c, x, y, base, shift, val;
+
+ (void)above;
+ (void)dx;
+ assert(dx == 1);
+ assert(dy < 0);
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ x = c + 1;
+ y = r * 256 - x * dy;
+ base = y >> 8;
+ shift = y - base * 256;
+ if (base < bs - 1) {
+ val =
+ (left[base] * (256 - shift) + left[base + 1] * shift + 128) >> 8;
+ dst[c] = clip_pixel_highbd(val, bd);
+ } else {
+ dst[c] = left[bs - 1];
+ }
+ }
+ dst += stride;
+ }
+}
+
+static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r;
+ (void) left;
+ (void) bd;
+ for (r = 0; r < bs; r++) {
+ memcpy(dst, above, bs * sizeof(uint16_t));
+ dst += stride;
+ }
+}
+
+static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r;
+ (void) above;
+ (void) bd;
+ for (r = 0; r < bs; r++) {
+ vpx_memset16(dst, left[r], bs);
+ dst += stride;
+ }
+}
+
+static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above, const uint16_t *left,
+ int angle, int bd) {
+ double t = 0;
+ int dx, dy;
+
+ if (angle != 90 && angle != 180)
+ t = tan(angle * PI / 180.0);
+ if (angle > 0 && angle < 90) {
+ dx = -((int)(256 / t));
+ dy = 1;
+ highbd_dr_prediction_z1(dst, stride, bs, above, left, dx, dy, bd);
+ } else if (angle > 90 && angle < 180) {
+ t = -t;
+ dx = (int)(256 / t);
+ dy = (int)(256 * t);
+ highbd_dr_prediction_z2(dst, stride, bs, above, left, dx, dy, bd);
+ } else if (angle > 180 && angle < 270) {
+ dx = 1;
+ dy = -((int)(256 * t));
+ highbd_dr_prediction_z3(dst, stride, bs, above, left, dx, dy, bd);
+ } else if (angle == 90) {
+ highbd_v_predictor(dst, stride, bs, above, left, bd);
+ } else if (angle == 180) {
+ highbd_h_predictor(dst, stride, bs, above, left, bd);
+ }
+}
+
+static void highbd_filter_intra_predictors_4tap(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int mode,
+ int bd) {
+ int k, r, c;
+ int pred[33][65];
+ int mean, ipred;
+ const TX_SIZE tx_size = (bs == 32) ? TX_32X32 :
+ ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+ const int c0 = filter_intra_taps_4[tx_size][mode][0];
+ const int c1 = filter_intra_taps_4[tx_size][mode][1];
+ const int c2 = filter_intra_taps_4[tx_size][mode][2];
+ const int c3 = filter_intra_taps_4[tx_size][mode][3];
+
+ k = 0;
+ mean = 0;
+ while (k < bs) {
+ mean = mean + (int)left[k];
+ mean = mean + (int)above[k];
+ k++;
+ }
+ mean = (mean + bs) / (2 * bs);
+
+ for (r = 0; r < bs; ++r)
+ pred[r + 1][0] = (int)left[r] - mean;
+
+ for (c = 0; c < 2 * bs + 1; ++c)
+ pred[0][c] = (int)above[c - 1] - mean;
+
+ for (r = 1; r < bs + 1; ++r)
+ for (c = 1; c < 2 * bs + 1 - r; ++c) {
+ ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
+ c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
+ pred[r][c] = ipred < 0 ?
+ -((-ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS) :
+ ((ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS);
+ }
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ ipred = pred[r + 1][c + 1] + mean;
+ dst[c] = clip_pixel_highbd(ipred, bd);
+ }
+ dst += stride;
+ }
+}
+
+static void highbd_dc_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED,
+ bd);
+}
+
+static void highbd_v_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED,
+ bd);
+}
+
+static void highbd_h_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED,
+ bd);
+}
+
+static void highbd_d45_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED,
+ bd);
+}
+
+static void highbd_d135_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED,
+ bd);
+}
+
+static void highbd_d117_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED,
+ bd);
+}
+
+static void highbd_d153_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED,
+ bd);
+}
+
+static void highbd_d207_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED,
+ bd);
+}
+
+static void highbd_d63_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED,
+ bd);
+}
+
+static void highbd_tm_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED,
+ bd);
+}
+
+static void (*highbd_filter_intra_predictors[EXT_INTRA_MODES])(uint16_t *dst,
+ ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left,
+ int bd) = {
+ highbd_dc_filter_predictor, highbd_v_filter_predictor,
+ highbd_h_filter_predictor, highbd_d45_filter_predictor,
+ highbd_d135_filter_predictor, highbd_d117_filter_predictor,
+ highbd_d153_filter_predictor, highbd_d207_filter_predictor,
+ highbd_d63_filter_predictor, highbd_tm_filter_predictor,
+};
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_INTRA
#if CONFIG_VP9_HIGHBITDEPTH
static void build_intra_predictors_high(const MACROBLOCKD *xd,
@@ -276,56 +829,71 @@
int dst_stride,
PREDICTION_MODE mode,
TX_SIZE tx_size,
-#if CONFIG_MISC_FIXES
int n_top_px, int n_topright_px,
int n_left_px, int n_bottomleft_px,
-#else
- int up_available,
- int left_available,
- int right_available,
-#endif
- int x, int y,
- int plane, int bd) {
+ int plane) {
int i;
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
-#if CONFIG_MISC_FIXES
- DECLARE_ALIGNED(16, uint16_t, left_col[32]);
-#else
DECLARE_ALIGNED(16, uint16_t, left_col[64]);
-#endif
DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]);
uint16_t *above_row = above_data + 16;
const uint16_t *const_above_row = above_row;
const int bs = 4 << tx_size;
-#if CONFIG_MISC_FIXES
+ int need_left = extend_modes[mode] & NEED_LEFT;
+ int need_above = extend_modes[mode] & NEED_ABOVE;
const uint16_t *above_ref = ref - ref_stride;
-#else
- int frame_width, frame_height;
- int x0, y0;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
-#endif
- const int need_left = extend_modes[mode] & NEED_LEFT;
- const int need_above = extend_modes[mode] & NEED_ABOVE;
- const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT;
- int base = 128 << (bd - 8);
+ int base = 128 << (xd->bd - 8);
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
// 129 C D .. W X
// 129 E F .. U V
// 129 G H .. S T T T T T
-#if CONFIG_MISC_FIXES
- (void) x;
- (void) y;
+#if CONFIG_EXT_INTRA
+ const EXT_INTRA_MODE_INFO *ext_intra_mode_info =
+ &xd->mi[0]->mbmi.ext_intra_mode_info;
+ const EXT_INTRA_MODE ext_intra_mode =
+ ext_intra_mode_info->ext_intra_mode[plane != 0];
+ int p_angle = 0;
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ p_angle = mode_to_angle_map[mode] +
+ xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+ if (p_angle <= 90)
+ need_above = 1, need_left = 0;
+ else if (p_angle < 180)
+ need_above = 1, need_left = 1;
+ else
+ need_above = 0, need_left = 1;
+ }
+
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ EXT_INTRA_MODE ext_intra_mode =
+ ext_intra_mode_info->ext_intra_mode[plane != 0];
+ need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
+ need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
+ }
+#endif // CONFIG_EXT_INTRA
+
(void) plane;
- (void) need_left;
- (void) need_above;
- (void) need_aboveright;
// NEED_LEFT
- if (extend_modes[mode] & NEED_LEFT) {
+ if (need_left) {
+#if CONFIG_EXT_INTRA
+ int need_bottom;
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ need_bottom = 0;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_bottom = p_angle > 180;
+ } else {
+ need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+ }
+#else
const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+#endif // CONFIG_EXT_INTRA
i = 0;
if (n_left_px > 0) {
for (; i < n_left_px; i++)
@@ -343,8 +911,20 @@
}
// NEED_ABOVE
- if (extend_modes[mode] & NEED_ABOVE) {
+ if (need_above) {
+#if CONFIG_EXT_INTRA
+ int need_right;
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ need_right = 1;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_right = p_angle < 90;
+ } else {
+ need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+ }
+#else
const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+#endif // CONFIG_EXT_INTRA
if (n_top_px > 0) {
memcpy(above_row, above_ref, n_top_px * 2);
i = n_top_px;
@@ -360,142 +940,41 @@
}
}
- if (extend_modes[mode] & NEED_ABOVELEFT) {
+#if CONFIG_EXT_INTRA
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] ||
+ (extend_modes[mode] & NEED_ABOVELEFT) ||
+ (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) {
above_row[-1] = n_top_px > 0 ?
(n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
}
#else
- // Get current frame pointer, width and height.
- if (plane == 0) {
- frame_width = xd->cur_buf->y_width;
- frame_height = xd->cur_buf->y_height;
- } else {
- frame_width = xd->cur_buf->uv_width;
- frame_height = xd->cur_buf->uv_height;
+ if ((extend_modes[mode] & NEED_ABOVELEFT)) {
+ above_row[-1] = n_top_px > 0 ?
+ (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
+ }
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTRA
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ highbd_filter_intra_predictors[ext_intra_mode](dst, dst_stride, bs,
+ const_above_row, left_col, xd->bd);
+ return;
}
- // Get block position in current frame.
- x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
- y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
-
- // NEED_LEFT
- if (need_left) {
- if (left_available) {
- if (xd->mb_to_bottom_edge < 0) {
- /* slower path if the block needs border extension */
- if (y0 + bs <= frame_height) {
- for (i = 0; i < bs; ++i)
- left_col[i] = ref[i * ref_stride - 1];
- } else {
- const int extend_bottom = frame_height - y0;
- for (i = 0; i < extend_bottom; ++i)
- left_col[i] = ref[i * ref_stride - 1];
- for (; i < bs; ++i)
- left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
- }
- } else {
- /* faster path if the block does not need extension */
- for (i = 0; i < bs; ++i)
- left_col[i] = ref[i * ref_stride - 1];
- }
- } else {
- // TODO(Peter): this value should probably change for high bitdepth
- vpx_memset16(left_col, base + 1, bs);
- }
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ highbd_dr_predictor(dst, dst_stride, bs, const_above_row, left_col,
+ p_angle, xd->bd);
+ return;
}
-
- // NEED_ABOVE
- if (need_above) {
- if (up_available) {
- const uint16_t *above_ref = ref - ref_stride;
- if (xd->mb_to_right_edge < 0) {
- /* slower path if the block needs border extension */
- if (x0 + bs <= frame_width) {
- memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
- } else if (x0 <= frame_width) {
- const int r = frame_width - x0;
- memcpy(above_row, above_ref, r * sizeof(above_row[0]));
- vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width);
- }
- } else {
- /* faster path if the block does not need extension */
- if (bs == 4 && right_available && left_available) {
- const_above_row = above_ref;
- } else {
- memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
- }
- }
- above_row[-1] = left_available ? above_ref[-1] : (base + 1);
- } else {
- vpx_memset16(above_row, base - 1, bs);
- above_row[-1] = base - 1;
- }
- }
-
- // NEED_ABOVERIGHT
- if (need_aboveright) {
- if (up_available) {
- const uint16_t *above_ref = ref - ref_stride;
- if (xd->mb_to_right_edge < 0) {
- /* slower path if the block needs border extension */
- if (x0 + 2 * bs <= frame_width) {
- if (right_available && bs == 4) {
- memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0]));
- } else {
- memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
- vpx_memset16(above_row + bs, above_row[bs - 1], bs);
- }
- } else if (x0 + bs <= frame_width) {
- const int r = frame_width - x0;
- if (right_available && bs == 4) {
- memcpy(above_row, above_ref, r * sizeof(above_row[0]));
- vpx_memset16(above_row + r, above_row[r - 1],
- x0 + 2 * bs - frame_width);
- } else {
- memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
- vpx_memset16(above_row + bs, above_row[bs - 1], bs);
- }
- } else if (x0 <= frame_width) {
- const int r = frame_width - x0;
- memcpy(above_row, above_ref, r * sizeof(above_row[0]));
- vpx_memset16(above_row + r, above_row[r - 1],
- x0 + 2 * bs - frame_width);
- }
- // TODO(Peter) this value should probably change for high bitdepth
- above_row[-1] = left_available ? above_ref[-1] : (base + 1);
- } else {
- /* faster path if the block does not need extension */
- if (bs == 4 && right_available && left_available) {
- const_above_row = above_ref;
- } else {
- memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
- if (bs == 4 && right_available)
- memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
- else
- vpx_memset16(above_row + bs, above_row[bs - 1], bs);
- // TODO(Peter): this value should probably change for high bitdepth
- above_row[-1] = left_available ? above_ref[-1] : (base + 1);
- }
- }
- } else {
- vpx_memset16(above_row, base - 1, bs * 2);
- // TODO(Peter): this value should probably change for high bitdepth
- above_row[-1] = base - 1;
- }
- }
-#endif
+#endif // CONFIG_EXT_INTRA
// predict
if (mode == DC_PRED) {
-#if CONFIG_MISC_FIXES
dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride,
const_above_row,
left_col, xd->bd);
-#else
- dc_pred_high[left_available][up_available][tx_size](dst, dst_stride,
- const_above_row,
- left_col, xd->bd);
-#endif
} else {
pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col,
xd->bd);
@@ -506,28 +985,44 @@
static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
int ref_stride, uint8_t *dst, int dst_stride,
PREDICTION_MODE mode, TX_SIZE tx_size,
-#if CONFIG_MISC_FIXES
int n_top_px, int n_topright_px,
int n_left_px, int n_bottomleft_px,
-#else
- int up_available, int left_available,
- int right_available,
-#endif
- int x, int y, int plane) {
+ int plane) {
int i;
-#if CONFIG_MISC_FIXES
DECLARE_ALIGNED(16, uint8_t, left_col[64]);
const uint8_t *above_ref = ref - ref_stride;
-#else
- DECLARE_ALIGNED(16, uint8_t, left_col[32]);
- int frame_width, frame_height;
- int x0, y0;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
-#endif
DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]);
uint8_t *above_row = above_data + 16;
const uint8_t *const_above_row = above_row;
const int bs = 4 << tx_size;
+ int need_left = extend_modes[mode] & NEED_LEFT;
+ int need_above = extend_modes[mode] & NEED_ABOVE;
+#if CONFIG_EXT_INTRA
+ const EXT_INTRA_MODE_INFO *ext_intra_mode_info =
+ &xd->mi[0]->mbmi.ext_intra_mode_info;
+ const EXT_INTRA_MODE ext_intra_mode =
+ ext_intra_mode_info->ext_intra_mode[plane != 0];
+ int p_angle = 0;
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ p_angle = mode_to_angle_map[mode] +
+ xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+ if (p_angle <= 90)
+ need_above = 1, need_left = 0;
+ else if (p_angle < 180)
+ need_above = 1, need_left = 1;
+ else
+ need_above = 0, need_left = 1;
+ }
+
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ EXT_INTRA_MODE ext_intra_mode =
+ ext_intra_mode_info->ext_intra_mode[plane != 0];
+ need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
+ need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
+ }
+#endif // CONFIG_EXT_INTRA
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
@@ -536,34 +1031,28 @@
// 129 G H .. S T T T T T
// ..
-#if CONFIG_MISC_FIXES
(void) xd;
- (void) x;
- (void) y;
(void) plane;
assert(n_top_px >= 0);
assert(n_topright_px >= 0);
assert(n_left_px >= 0);
assert(n_bottomleft_px >= 0);
-#else
- // Get current frame pointer, width and height.
- if (plane == 0) {
- frame_width = xd->cur_buf->y_width;
- frame_height = xd->cur_buf->y_height;
- } else {
- frame_width = xd->cur_buf->uv_width;
- frame_height = xd->cur_buf->uv_height;
- }
-
- // Get block position in current frame.
- x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
- y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
-#endif
// NEED_LEFT
- if (extend_modes[mode] & NEED_LEFT) {
-#if CONFIG_MISC_FIXES
+ if (need_left) {
+#if CONFIG_EXT_INTRA
+ int need_bottom;
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ need_bottom = 0;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_bottom = p_angle > 180;
+ } else {
+ need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+ }
+#else
const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+#endif // CONFIG_EXT_INTRA
i = 0;
if (n_left_px > 0) {
for (; i < n_left_px; i++)
@@ -578,35 +1067,23 @@
} else {
memset(left_col, 129, bs << need_bottom);
}
-#else
- if (left_available) {
- if (xd->mb_to_bottom_edge < 0) {
- /* slower path if the block needs border extension */
- if (y0 + bs <= frame_height) {
- for (i = 0; i < bs; ++i)
- left_col[i] = ref[i * ref_stride - 1];
- } else {
- const int extend_bottom = frame_height - y0;
- for (i = 0; i < extend_bottom; ++i)
- left_col[i] = ref[i * ref_stride - 1];
- for (; i < bs; ++i)
- left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
- }
- } else {
- /* faster path if the block does not need extension */
- for (i = 0; i < bs; ++i)
- left_col[i] = ref[i * ref_stride - 1];
- }
- } else {
- memset(left_col, 129, bs);
- }
-#endif
}
// NEED_ABOVE
- if (extend_modes[mode] & NEED_ABOVE) {
-#if CONFIG_MISC_FIXES
+ if (need_above) {
+#if CONFIG_EXT_INTRA
+ int need_right;
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ need_right = 1;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_right = p_angle < 90;
+ } else {
+ need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+ }
+#else
const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+#endif // CONFIG_EXT_INTRA
if (n_top_px > 0) {
memcpy(above_row, above_ref, n_top_px);
i = n_top_px;
@@ -620,111 +1097,54 @@
} else {
memset(above_row, 127, bs << need_right);
}
-#else
- if (up_available) {
- const uint8_t *above_ref = ref - ref_stride;
- if (xd->mb_to_right_edge < 0) {
- /* slower path if the block needs border extension */
- if (x0 + bs <= frame_width) {
- memcpy(above_row, above_ref, bs);
- } else if (x0 <= frame_width) {
- const int r = frame_width - x0;
- memcpy(above_row, above_ref, r);
- memset(above_row + r, above_row[r - 1], x0 + bs - frame_width);
- }
- } else {
- /* faster path if the block does not need extension */
- if (bs == 4 && right_available && left_available) {
- const_above_row = above_ref;
- } else {
- memcpy(above_row, above_ref, bs);
- }
- }
- above_row[-1] = left_available ? above_ref[-1] : 129;
- } else {
- memset(above_row, 127, bs);
- above_row[-1] = 127;
- }
-#endif
}
-#if CONFIG_MISC_FIXES
- if (extend_modes[mode] & NEED_ABOVELEFT) {
+#if CONFIG_EXT_INTRA
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] ||
+ (extend_modes[mode] & NEED_ABOVELEFT) ||
+ (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) {
above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
}
#else
- // NEED_ABOVERIGHT
- if (extend_modes[mode] & NEED_ABOVERIGHT) {
- if (up_available) {
- const uint8_t *above_ref = ref - ref_stride;
- if (xd->mb_to_right_edge < 0) {
- /* slower path if the block needs border extension */
- if (x0 + 2 * bs <= frame_width) {
- if (right_available && bs == 4) {
- memcpy(above_row, above_ref, 2 * bs);
- } else {
- memcpy(above_row, above_ref, bs);
- memset(above_row + bs, above_row[bs - 1], bs);
- }
- } else if (x0 + bs <= frame_width) {
- const int r = frame_width - x0;
- if (right_available && bs == 4) {
- memcpy(above_row, above_ref, r);
- memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width);
- } else {
- memcpy(above_row, above_ref, bs);
- memset(above_row + bs, above_row[bs - 1], bs);
- }
- } else if (x0 <= frame_width) {
- const int r = frame_width - x0;
- memcpy(above_row, above_ref, r);
- memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width);
- }
- } else {
- /* faster path if the block does not need extension */
- if (bs == 4 && right_available && left_available) {
- const_above_row = above_ref;
- } else {
- memcpy(above_row, above_ref, bs);
- if (bs == 4 && right_available)
- memcpy(above_row + bs, above_ref + bs, bs);
- else
- memset(above_row + bs, above_row[bs - 1], bs);
- }
- }
- above_row[-1] = left_available ? above_ref[-1] : 129;
- } else {
- memset(above_row, 127, bs * 2);
- above_row[-1] = 127;
- }
+ if ((extend_modes[mode] & NEED_ABOVELEFT)) {
+ above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
}
-#endif
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTRA
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ filter_intra_predictors[ext_intra_mode](dst, dst_stride, bs,
+ const_above_row, left_col);
+ return;
+ }
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ dr_predictor(dst, dst_stride, tx_size, const_above_row, left_col, p_angle);
+ return;
+ }
+#endif // CONFIG_EXT_INTRA
// predict
if (mode == DC_PRED) {
-#if CONFIG_MISC_FIXES
dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride,
const_above_row, left_col);
-#else
- dc_pred[left_available][up_available][tx_size](dst, dst_stride,
- const_above_row, left_col);
-#endif
} else {
pred[mode][tx_size](dst, dst_stride, const_above_row, left_col);
}
}
void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
- TX_SIZE tx_size, PREDICTION_MODE mode,
- const uint8_t *ref, int ref_stride,
- uint8_t *dst, int dst_stride,
- int aoff, int loff, int plane) {
+ TX_SIZE tx_size, PREDICTION_MODE mode,
+ const uint8_t *ref, int ref_stride,
+ uint8_t *dst, int dst_stride,
+ int aoff, int loff, int plane) {
const int txw = (1 << tx_size);
const int have_top = loff || xd->up_available;
const int have_left = aoff || xd->left_available;
const int x = aoff * 4;
const int y = loff * 4;
-#if CONFIG_MISC_FIXES
const int bw = VPXMAX(2, 1 << bwl_in);
const int bh = VPXMAX(2, 1 << bhl_in);
const int mi_row = -xd->mb_to_top_edge >> 6;
@@ -748,10 +1168,6 @@
int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + (wpx - x - txpx);
int yd =
(xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + (hpx - y - txpx);
-#else
- const int bw = (1 << bwl_in);
- const int have_right = (aoff + txw) < bw;
-#endif // CONFIG_MISC_FIXES
if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) {
const int bs = 4 * (1 << tx_size);
@@ -787,7 +1203,6 @@
return;
}
-#if CONFIG_MISC_FIXES
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode,
@@ -796,7 +1211,7 @@
have_top && have_right ? VPXMIN(txpx, xr) : 0,
have_left ? VPXMIN(txpx, yd + txpx) : 0,
have_bottom && have_left ? VPXMIN(txpx, yd) : 0,
- x, y, plane, xd->bd);
+ plane);
return;
}
#endif
@@ -806,20 +1221,7 @@
have_top && have_right ? VPXMIN(txpx, xr) : 0,
have_left ? VPXMIN(txpx, yd + txpx) : 0,
have_bottom && have_left ? VPXMIN(txpx, yd) : 0,
- x, y, plane);
-#else // CONFIG_MISC_FIXES
- (void) bhl_in;
-#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode,
- tx_size, have_top, have_left, have_right,
- x, y, plane, xd->bd);
- return;
- }
-#endif
- build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
- have_top, have_left, have_right, x, y, plane);
-#endif // CONFIG_MISC_FIXES
+ plane);
}
void vp10_init_intra_predictors(void) {
diff --git a/vp10/common/scale.c b/vp10/common/scale.c
index ce6062c..65e14a9 100644
--- a/vp10/common/scale.c
+++ b/vp10/common/scale.c
@@ -46,15 +46,15 @@
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_setup_scale_factors_for_frame(struct scale_factors *sf,
- int other_w, int other_h,
- int this_w, int this_h,
- int use_highbd) {
+ int other_w, int other_h,
+ int this_w, int this_h,
+ int use_highbd) {
#else
void vp10_setup_scale_factors_for_frame(struct scale_factors *sf,
- int other_w, int other_h,
- int this_w, int this_h) {
+ int other_w, int other_h,
+ int this_w, int this_h) {
#endif
- if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
+ if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
sf->x_scale_fp = REF_INVALID_SCALE;
sf->y_scale_fp = REF_INVALID_SCALE;
return;
@@ -79,6 +79,16 @@
// applied in one direction only, and not at all for 0,0, seems to give the
// best quality, but it may be worth trying an additional mode that does
// do the filtering on full-pel.
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ sf->predict_ni[0][0][0] = vpx_convolve8_c;
+ sf->predict_ni[0][0][1] = vpx_convolve8_avg_c;
+ sf->predict_ni[0][1][0] = vpx_convolve8_c;
+ sf->predict_ni[0][1][1] = vpx_convolve8_avg_c;
+ sf->predict_ni[1][0][0] = vpx_convolve8_c;
+ sf->predict_ni[1][0][1] = vpx_convolve8_avg_c;
+ sf->predict_ni[1][1][0] = vpx_convolve8;
+ sf->predict_ni[1][1][1] = vpx_convolve8_avg;
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
if (sf->x_step_q4 == 16) {
if (sf->y_step_q4 == 16) {
// No scaling in either direction.
@@ -119,8 +129,19 @@
// 2D subpel motion always gets filtered in both directions
sf->predict[1][1][0] = vpx_convolve8;
sf->predict[1][1][1] = vpx_convolve8_avg;
+
#if CONFIG_VP9_HIGHBITDEPTH
if (use_highbd) {
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ sf->highbd_predict_ni[0][0][0] = vpx_highbd_convolve8_c;
+ sf->highbd_predict_ni[0][0][1] = vpx_highbd_convolve8_avg_c;
+ sf->highbd_predict_ni[0][1][0] = vpx_highbd_convolve8_c;
+ sf->highbd_predict_ni[0][1][1] = vpx_highbd_convolve8_avg_c;
+ sf->highbd_predict_ni[1][0][0] = vpx_highbd_convolve8_c;
+ sf->highbd_predict_ni[1][0][1] = vpx_highbd_convolve8_avg_c;
+ sf->highbd_predict_ni[1][1][0] = vpx_highbd_convolve8;
+ sf->highbd_predict_ni[1][1][1] = vpx_highbd_convolve8_avg;
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
if (sf->x_step_q4 == 16) {
if (sf->y_step_q4 == 16) {
// No scaling in either direction.
@@ -162,5 +183,5 @@
sf->highbd_predict[1][1][0] = vpx_highbd_convolve8;
sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg;
}
-#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
diff --git a/vp10/common/scale.h b/vp10/common/scale.h
index 833f6c4..604b9d2 100644
--- a/vp10/common/scale.h
+++ b/vp10/common/scale.h
@@ -34,7 +34,15 @@
convolve_fn_t predict[2][2][2]; // horiz, vert, avg
#if CONFIG_VP9_HIGHBITDEPTH
highbd_convolve_fn_t highbd_predict[2][2][2]; // horiz, vert, avg
-#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+// Functions for non-interpolating filters (those that filter zero offsets)
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ convolve_fn_t predict_ni[2][2][2]; // horiz, vert, avg
+#if CONFIG_VP9_HIGHBITDEPTH
+ highbd_convolve_fn_t highbd_predict_ni[2][2][2]; // horiz, vert, avg
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
};
MV32 vp10_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf);
@@ -48,7 +56,7 @@
void vp10_setup_scale_factors_for_frame(struct scale_factors *sf,
int other_w, int other_h,
int this_w, int this_h);
-#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
static INLINE int vp10_is_valid_scale(const struct scale_factors *sf) {
return sf->x_scale_fp != REF_INVALID_SCALE &&
diff --git a/vp10/common/scan.c b/vp10/common/scan.c
index 7217f6d..23a7b98 100644
--- a/vp10/common/scan.c
+++ b/vp10/common/scan.c
@@ -702,7 +702,228 @@
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
};
-const scan_order vp10_scan_orders[TX_SIZES][TX_TYPES] = {
+#if CONFIG_EXT_TX
+const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+ { // TX_4X4
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
+ {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ }, { // TX_8X8
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
+ {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ }, { // TX_16X16
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
+ {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ }, { // TX_32X32
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ }
+};
+
+const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES] = {
+ { // TX_4X4
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ }, { // TX_8X8
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ }, { // TX_16X16
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ }, { // TX_32X32
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ }
+};
+
+#else // CONFIG_EXT_TX
+
+const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ // TX_4X4
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
@@ -714,14 +935,21 @@
{col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}
}, { // TX_16X16
- {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
{row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
{col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors}
}, { // TX_32X32
- {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
- {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
- {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
- {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
}
};
+#endif // CONFIG_EXT_TX
diff --git a/vp10/common/scan.h b/vp10/common/scan.h
index f5a020f..aadae40 100644
--- a/vp10/common/scan.h
+++ b/vp10/common/scan.h
@@ -30,7 +30,7 @@
} scan_order;
extern const scan_order vp10_default_scan_orders[TX_SIZES];
-extern const scan_order vp10_scan_orders[TX_SIZES][TX_TYPES];
+extern const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES];
static INLINE int get_coef_context(const int16_t *neighbors,
const uint8_t *token_cache, int c) {
@@ -38,8 +38,31 @@
token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
}
-static INLINE const scan_order *get_scan(TX_SIZE tx_size, TX_TYPE tx_type) {
- return &vp10_scan_orders[tx_size][tx_type];
+static INLINE const scan_order *get_intra_scan(TX_SIZE tx_size,
+ TX_TYPE tx_type) {
+ return &vp10_intra_scan_orders[tx_size][tx_type];
+}
+
+#if CONFIG_EXT_TX
+extern const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES];
+
+static INLINE const scan_order *get_inter_scan(TX_SIZE tx_size,
+ TX_TYPE tx_type) {
+ return &vp10_inter_scan_orders[tx_size][tx_type];
+}
+#endif // CONFIG_EXT_TX
+
+static INLINE const scan_order *get_scan(TX_SIZE tx_size,
+ TX_TYPE tx_type,
+ int is_inter) {
+#if CONFIG_EXT_TX
+ return
+ is_inter ? &vp10_inter_scan_orders[tx_size][tx_type] :
+ &vp10_intra_scan_orders[tx_size][tx_type];
+#else
+ (void) is_inter;
+ return &vp10_intra_scan_orders[tx_size][tx_type];
+#endif // CONFIG_EXT_TX
}
#ifdef __cplusplus
diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c
index e83cb8e..55b3537 100644
--- a/vp10/common/thread_common.c
+++ b/vp10/common/thread_common.c
@@ -401,6 +401,12 @@
for (i = 0; i < TX_SIZES; i++)
cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i];
+#if CONFIG_VAR_TX
+ for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
+ for (j = 0; j < 2; ++j)
+ cm->counts.txfm_partition[i][j] += counts->txfm_partition[i][j];
+#endif
+
for (i = 0; i < SKIP_CONTEXTS; i++)
for (j = 0; j < 2; j++)
cm->counts.skip[i][j] += counts->skip[i][j];
@@ -435,7 +441,27 @@
comps->fp[i] += comps_t->fp[i];
}
-#if CONFIG_MISC_FIXES
+#if CONFIG_EXT_TX
+ for (i = 0; i < EXT_TX_SIZES; i++) {
+ int s, k;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ if (use_inter_ext_tx_for_tx[s][i]) {
+ for (k = 0; k < TX_TYPES; k++)
+ cm->counts.inter_ext_tx[s][i][k] += counts->inter_ext_tx[s][i][k];
+ }
+ }
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ if (use_intra_ext_tx_for_tx[s][i]) {
+ int j;
+ for (j = 0; j < INTRA_MODES; ++j)
+ for (k = 0; k < TX_TYPES; k++)
+ cm->counts.intra_ext_tx[s][i][j][k] +=
+ counts->intra_ext_tx[s][i][j][k];
+ }
+ }
+ }
+#endif // CONFIG_EXT_TX
+
for (i = 0; i < PREDICTION_PROBS; i++)
for (j = 0; j < 2; j++)
cm->counts.seg.pred[i][j] += counts->seg.pred[i][j];
@@ -444,5 +470,10 @@
cm->counts.seg.tree_total[i] += counts->seg.tree_total[i];
cm->counts.seg.tree_mispred[i] += counts->seg.tree_mispred[i];
}
-#endif
+
+#if CONFIG_EXT_INTRA
+ for (i = 0; i < PLANE_TYPES; ++i)
+ for (j = 0; j < 2; j++)
+ cm->counts.ext_intra[i][j] += counts->ext_intra[i][j];
+#endif // CONFIG_EXT_INTRA
}
diff --git a/vp10/common/vp10_fwd_txfm1d.c b/vp10/common/vp10_fwd_txfm1d.c
new file mode 100644
index 0000000..6e19e27
--- /dev/null
+++ b/vp10/common/vp10_fwd_txfm1d.c
@@ -0,0 +1,1530 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp10/common/vp10_fwd_txfm1d.h"
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+#define range_check(stage, input, buf, size, bit) \
+ { \
+ int i, j; \
+ for (i = 0; i < size; ++i) { \
+ int buf_bit = get_max_bit(abs(buf[i])) + 1; \
+ if (buf_bit > bit) { \
+ printf("======== %s overflow ========\n", __func__); \
+ printf("stage: %d node: %d\n", stage, i); \
+ printf("bit: %d buf_bit: %d buf[i]: %d\n", bit, buf_bit, buf[i]); \
+ printf("input:\n"); \
+ for (j = 0; j < size; j++) { \
+ printf("%d,", input[j]); \
+ } \
+ printf("\n"); \
+ assert(0, "vp10_fwd_txfm1d.c: range_check overflow"); \
+ } \
+ } \
+ }
+#else
+#define range_check(stage, input, buf, size, bit) \
+ { \
+ (void) stage; \
+ (void) input; \
+ (void) buf; \
+ (void) size; \
+ (void) bit; \
+ }
+#endif
+
+void vp10_fdct4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 4;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[4];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0] + input[3];
+ bf1[1] = input[1] + input[2];
+ bf1[2] = -input[2] + input[1];
+ bf1[3] = -input[3] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[2];
+ bf1[2] = bf0[1];
+ bf1[3] = bf0[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fdct8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 8;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[8];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0] + input[7];
+ bf1[1] = input[1] + input[6];
+ bf1[2] = input[2] + input[5];
+ bf1[3] = input[3] + input[4];
+ bf1[4] = -input[4] + input[3];
+ bf1[5] = -input[5] + input[2];
+ bf1[6] = -input[6] + input[1];
+ bf1[7] = -input[7] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = -bf0[2] + bf0[1];
+ bf1[3] = -bf0[3] + bf0[0];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = -bf0[5] + bf0[4];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[7] + bf0[6];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[4];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[6];
+ bf1[4] = bf0[1];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[3];
+ bf1[7] = bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fdct16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 16;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[16];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0] + input[15];
+ bf1[1] = input[1] + input[14];
+ bf1[2] = input[2] + input[13];
+ bf1[3] = input[3] + input[12];
+ bf1[4] = input[4] + input[11];
+ bf1[5] = input[5] + input[10];
+ bf1[6] = input[6] + input[9];
+ bf1[7] = input[7] + input[8];
+ bf1[8] = -input[8] + input[7];
+ bf1[9] = -input[9] + input[6];
+ bf1[10] = -input[10] + input[5];
+ bf1[11] = -input[11] + input[4];
+ bf1[12] = -input[12] + input[3];
+ bf1[13] = -input[13] + input[2];
+ bf1[14] = -input[14] + input[1];
+ bf1[15] = -input[15] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = -bf0[4] + bf0[3];
+ bf1[5] = -bf0[5] + bf0[2];
+ bf1[6] = -bf0[6] + bf0[1];
+ bf1[7] = -bf0[7] + bf0[0];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = -bf0[2] + bf0[1];
+ bf1[3] = -bf0[3] + bf0[0];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = -bf0[10] + bf0[9];
+ bf1[11] = -bf0[11] + bf0[8];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[14] + bf0[13];
+ bf1[15] = bf0[15] + bf0[12];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = -bf0[5] + bf0[4];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[7] + bf0[6];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = -bf0[9] + bf0[8];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[11] + bf0[10];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = -bf0[13] + bf0[12];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[15] + bf0[14];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[8];
+ bf1[2] = bf0[4];
+ bf1[3] = bf0[12];
+ bf1[4] = bf0[2];
+ bf1[5] = bf0[10];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[14];
+ bf1[8] = bf0[1];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[5];
+ bf1[11] = bf0[13];
+ bf1[12] = bf0[3];
+ bf1[13] = bf0[11];
+ bf1[14] = bf0[7];
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fdct32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 32;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[32];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0] + input[31];
+ bf1[1] = input[1] + input[30];
+ bf1[2] = input[2] + input[29];
+ bf1[3] = input[3] + input[28];
+ bf1[4] = input[4] + input[27];
+ bf1[5] = input[5] + input[26];
+ bf1[6] = input[6] + input[25];
+ bf1[7] = input[7] + input[24];
+ bf1[8] = input[8] + input[23];
+ bf1[9] = input[9] + input[22];
+ bf1[10] = input[10] + input[21];
+ bf1[11] = input[11] + input[20];
+ bf1[12] = input[12] + input[19];
+ bf1[13] = input[13] + input[18];
+ bf1[14] = input[14] + input[17];
+ bf1[15] = input[15] + input[16];
+ bf1[16] = -input[16] + input[15];
+ bf1[17] = -input[17] + input[14];
+ bf1[18] = -input[18] + input[13];
+ bf1[19] = -input[19] + input[12];
+ bf1[20] = -input[20] + input[11];
+ bf1[21] = -input[21] + input[10];
+ bf1[22] = -input[22] + input[9];
+ bf1[23] = -input[23] + input[8];
+ bf1[24] = -input[24] + input[7];
+ bf1[25] = -input[25] + input[6];
+ bf1[26] = -input[26] + input[5];
+ bf1[27] = -input[27] + input[4];
+ bf1[28] = -input[28] + input[3];
+ bf1[29] = -input[29] + input[2];
+ bf1[30] = -input[30] + input[1];
+ bf1[31] = -input[31] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = -bf0[8] + bf0[7];
+ bf1[9] = -bf0[9] + bf0[6];
+ bf1[10] = -bf0[10] + bf0[5];
+ bf1[11] = -bf0[11] + bf0[4];
+ bf1[12] = -bf0[12] + bf0[3];
+ bf1[13] = -bf0[13] + bf0[2];
+ bf1[14] = -bf0[14] + bf0[1];
+ bf1[15] = -bf0[15] + bf0[0];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = -bf0[4] + bf0[3];
+ bf1[5] = -bf0[5] + bf0[2];
+ bf1[6] = -bf0[6] + bf0[1];
+ bf1[7] = -bf0[7] + bf0[0];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[23];
+ bf1[17] = bf0[17] + bf0[22];
+ bf1[18] = bf0[18] + bf0[21];
+ bf1[19] = bf0[19] + bf0[20];
+ bf1[20] = -bf0[20] + bf0[19];
+ bf1[21] = -bf0[21] + bf0[18];
+ bf1[22] = -bf0[22] + bf0[17];
+ bf1[23] = -bf0[23] + bf0[16];
+ bf1[24] = -bf0[24] + bf0[31];
+ bf1[25] = -bf0[25] + bf0[30];
+ bf1[26] = -bf0[26] + bf0[29];
+ bf1[27] = -bf0[27] + bf0[28];
+ bf1[28] = bf0[28] + bf0[27];
+ bf1[29] = bf0[29] + bf0[26];
+ bf1[30] = bf0[30] + bf0[25];
+ bf1[31] = bf0[31] + bf0[24];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = -bf0[2] + bf0[1];
+ bf1[3] = -bf0[3] + bf0[0];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = -bf0[10] + bf0[9];
+ bf1[11] = -bf0[11] + bf0[8];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[14] + bf0[13];
+ bf1[15] = bf0[15] + bf0[12];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = -bf0[5] + bf0[4];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[7] + bf0[6];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[19];
+ bf1[17] = bf0[17] + bf0[18];
+ bf1[18] = -bf0[18] + bf0[17];
+ bf1[19] = -bf0[19] + bf0[16];
+ bf1[20] = -bf0[20] + bf0[23];
+ bf1[21] = -bf0[21] + bf0[22];
+ bf1[22] = bf0[22] + bf0[21];
+ bf1[23] = bf0[23] + bf0[20];
+ bf1[24] = bf0[24] + bf0[27];
+ bf1[25] = bf0[25] + bf0[26];
+ bf1[26] = -bf0[26] + bf0[25];
+ bf1[27] = -bf0[27] + bf0[24];
+ bf1[28] = -bf0[28] + bf0[31];
+ bf1[29] = -bf0[29] + bf0[30];
+ bf1[30] = bf0[30] + bf0[29];
+ bf1[31] = bf0[31] + bf0[28];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = -bf0[9] + bf0[8];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[11] + bf0[10];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = -bf0[13] + bf0[12];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[15] + bf0[14];
+ bf1[16] = bf0[16];
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
+ bf1[16] = bf0[16] + bf0[17];
+ bf1[17] = -bf0[17] + bf0[16];
+ bf1[18] = -bf0[18] + bf0[19];
+ bf1[19] = bf0[19] + bf0[18];
+ bf1[20] = bf0[20] + bf0[21];
+ bf1[21] = -bf0[21] + bf0[20];
+ bf1[22] = -bf0[22] + bf0[23];
+ bf1[23] = bf0[23] + bf0[22];
+ bf1[24] = bf0[24] + bf0[25];
+ bf1[25] = -bf0[25] + bf0[24];
+ bf1[26] = -bf0[26] + bf0[27];
+ bf1[27] = bf0[27] + bf0[26];
+ bf1[28] = bf0[28] + bf0[29];
+ bf1[29] = -bf0[29] + bf0[28];
+ bf1[30] = -bf0[30] + bf0[31];
+ bf1[31] = bf0[31] + bf0[30];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[16];
+ bf1[2] = bf0[8];
+ bf1[3] = bf0[24];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[20];
+ bf1[6] = bf0[12];
+ bf1[7] = bf0[28];
+ bf1[8] = bf0[2];
+ bf1[9] = bf0[18];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[26];
+ bf1[12] = bf0[6];
+ bf1[13] = bf0[22];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[30];
+ bf1[16] = bf0[1];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[9];
+ bf1[19] = bf0[25];
+ bf1[20] = bf0[5];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[13];
+ bf1[23] = bf0[29];
+ bf1[24] = bf0[3];
+ bf1[25] = bf0[19];
+ bf1[26] = bf0[11];
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[7];
+ bf1[29] = bf0[23];
+ bf1[30] = bf0[15];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 4;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[4];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[3];
+ bf1[1] = input[0];
+ bf1[2] = input[1];
+ bf1[3] = input[2];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[8], bf0[1], cospi[56], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[40], bf0[3], cospi[24], bf0[2], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = -bf0[2] + bf0[0];
+ bf1[3] = -bf0[3] + bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[2];
+ bf1[2] = bf0[3];
+ bf1[3] = -bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 8;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[8];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[7];
+ bf1[1] = input[0];
+ bf1[2] = input[5];
+ bf1[3] = input[2];
+ bf1[4] = input[3];
+ bf1[5] = input[4];
+ bf1[6] = input[1];
+ bf1[7] = input[6];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[4], bf0[1], cospi[60], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[20], bf0[3], cospi[44], bf0[2], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[36], bf0[5], cospi[28], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[52], bf0[7], cospi[12], bf0[6], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = -bf0[4] + bf0[0];
+ bf1[5] = -bf0[5] + bf0[1];
+ bf1[6] = -bf0[6] + bf0[2];
+ bf1[7] = -bf0[7] + bf0[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = -bf0[2] + bf0[0];
+ bf1[3] = -bf0[3] + bf0[1];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = -bf0[6] + bf0[4];
+ bf1[7] = -bf0[7] + bf0[5];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[4];
+ bf1[2] = bf0[6];
+ bf1[3] = -bf0[2];
+ bf1[4] = bf0[3];
+ bf1[5] = -bf0[7];
+ bf1[6] = bf0[5];
+ bf1[7] = -bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 16;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[16];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[15];
+ bf1[1] = input[0];
+ bf1[2] = input[13];
+ bf1[3] = input[2];
+ bf1[4] = input[11];
+ bf1[5] = input[4];
+ bf1[6] = input[9];
+ bf1[7] = input[6];
+ bf1[8] = input[7];
+ bf1[9] = input[8];
+ bf1[10] = input[5];
+ bf1[11] = input[10];
+ bf1[12] = input[3];
+ bf1[13] = input[12];
+ bf1[14] = input[1];
+ bf1[15] = input[14];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[2], bf0[1], cospi[62], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[10], bf0[3], cospi[54], bf0[2], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[18], bf0[5], cospi[46], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[26], bf0[7], cospi[38], bf0[6], cos_bit[stage]);
+ bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[34], bf0[9], cospi[30], bf0[8], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[42], bf0[11], cospi[22], bf0[10], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[50], bf0[13], cospi[14], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(-cospi[58], bf0[15], cospi[6], bf0[14], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[8];
+ bf1[1] = bf0[1] + bf0[9];
+ bf1[2] = bf0[2] + bf0[10];
+ bf1[3] = bf0[3] + bf0[11];
+ bf1[4] = bf0[4] + bf0[12];
+ bf1[5] = bf0[5] + bf0[13];
+ bf1[6] = bf0[6] + bf0[14];
+ bf1[7] = bf0[7] + bf0[15];
+ bf1[8] = -bf0[8] + bf0[0];
+ bf1[9] = -bf0[9] + bf0[1];
+ bf1[10] = -bf0[10] + bf0[2];
+ bf1[11] = -bf0[11] + bf0[3];
+ bf1[12] = -bf0[12] + bf0[4];
+ bf1[13] = -bf0[13] + bf0[5];
+ bf1[14] = -bf0[14] + bf0[6];
+ bf1[15] = -bf0[15] + bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
+ bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = -bf0[4] + bf0[0];
+ bf1[5] = -bf0[5] + bf0[1];
+ bf1[6] = -bf0[6] + bf0[2];
+ bf1[7] = -bf0[7] + bf0[3];
+ bf1[8] = bf0[8] + bf0[12];
+ bf1[9] = bf0[9] + bf0[13];
+ bf1[10] = bf0[10] + bf0[14];
+ bf1[11] = bf0[11] + bf0[15];
+ bf1[12] = -bf0[12] + bf0[8];
+ bf1[13] = -bf0[13] + bf0[9];
+ bf1[14] = -bf0[14] + bf0[10];
+ bf1[15] = -bf0[15] + bf0[11];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = -bf0[2] + bf0[0];
+ bf1[3] = -bf0[3] + bf0[1];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = -bf0[6] + bf0[4];
+ bf1[7] = -bf0[7] + bf0[5];
+ bf1[8] = bf0[8] + bf0[10];
+ bf1[9] = bf0[9] + bf0[11];
+ bf1[10] = -bf0[10] + bf0[8];
+ bf1[11] = -bf0[11] + bf0[9];
+ bf1[12] = bf0[12] + bf0[14];
+ bf1[13] = bf0[13] + bf0[15];
+ bf1[14] = -bf0[14] + bf0[12];
+ bf1[15] = -bf0[15] + bf0[13];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[8];
+ bf1[2] = bf0[12];
+ bf1[3] = -bf0[4];
+ bf1[4] = bf0[6];
+ bf1[5] = -bf0[14];
+ bf1[6] = bf0[10];
+ bf1[7] = -bf0[2];
+ bf1[8] = bf0[3];
+ bf1[9] = -bf0[11];
+ bf1[10] = bf0[15];
+ bf1[11] = -bf0[7];
+ bf1[12] = bf0[5];
+ bf1[13] = -bf0[13];
+ bf1[14] = bf0[9];
+ bf1[15] = -bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+}
+
+void vp10_fadst32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 32;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[32];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[31];
+ bf1[1] = input[0];
+ bf1[2] = input[29];
+ bf1[3] = input[2];
+ bf1[4] = input[27];
+ bf1[5] = input[4];
+ bf1[6] = input[25];
+ bf1[7] = input[6];
+ bf1[8] = input[23];
+ bf1[9] = input[8];
+ bf1[10] = input[21];
+ bf1[11] = input[10];
+ bf1[12] = input[19];
+ bf1[13] = input[12];
+ bf1[14] = input[17];
+ bf1[15] = input[14];
+ bf1[16] = input[15];
+ bf1[17] = input[16];
+ bf1[18] = input[13];
+ bf1[19] = input[18];
+ bf1[20] = input[11];
+ bf1[21] = input[20];
+ bf1[22] = input[9];
+ bf1[23] = input[22];
+ bf1[24] = input[7];
+ bf1[25] = input[24];
+ bf1[26] = input[5];
+ bf1[27] = input[26];
+ bf1[28] = input[3];
+ bf1[29] = input[28];
+ bf1[30] = input[1];
+ bf1[31] = input[30];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[1], bf0[1], cospi[63], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[5], bf0[3], cospi[59], bf0[2], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[9], bf0[5], cospi[55], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[13], bf0[7], cospi[51], bf0[6], cos_bit[stage]);
+ bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[17], bf0[9], cospi[47], bf0[8], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[21], bf0[11], cospi[43], bf0[10], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[25], bf0[13], cospi[39], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(-cospi[29], bf0[15], cospi[35], bf0[14], cos_bit[stage]);
+ bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]);
+ bf1[17] = half_btf(-cospi[33], bf0[17], cospi[31], bf0[16], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[37], bf0[19], cospi[27], bf0[18], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[41], bf0[21], cospi[23], bf0[20], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[45], bf0[23], cospi[19], bf0[22], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(-cospi[49], bf0[25], cospi[15], bf0[24], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[53], bf0[27], cospi[11], bf0[26], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(-cospi[57], bf0[29], cospi[7], bf0[28], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(-cospi[61], bf0[31], cospi[3], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[16];
+ bf1[1] = bf0[1] + bf0[17];
+ bf1[2] = bf0[2] + bf0[18];
+ bf1[3] = bf0[3] + bf0[19];
+ bf1[4] = bf0[4] + bf0[20];
+ bf1[5] = bf0[5] + bf0[21];
+ bf1[6] = bf0[6] + bf0[22];
+ bf1[7] = bf0[7] + bf0[23];
+ bf1[8] = bf0[8] + bf0[24];
+ bf1[9] = bf0[9] + bf0[25];
+ bf1[10] = bf0[10] + bf0[26];
+ bf1[11] = bf0[11] + bf0[27];
+ bf1[12] = bf0[12] + bf0[28];
+ bf1[13] = bf0[13] + bf0[29];
+ bf1[14] = bf0[14] + bf0[30];
+ bf1[15] = bf0[15] + bf0[31];
+ bf1[16] = -bf0[16] + bf0[0];
+ bf1[17] = -bf0[17] + bf0[1];
+ bf1[18] = -bf0[18] + bf0[2];
+ bf1[19] = -bf0[19] + bf0[3];
+ bf1[20] = -bf0[20] + bf0[4];
+ bf1[21] = -bf0[21] + bf0[5];
+ bf1[22] = -bf0[22] + bf0[6];
+ bf1[23] = -bf0[23] + bf0[7];
+ bf1[24] = -bf0[24] + bf0[8];
+ bf1[25] = -bf0[25] + bf0[9];
+ bf1[26] = -bf0[26] + bf0[10];
+ bf1[27] = -bf0[27] + bf0[11];
+ bf1[28] = -bf0[28] + bf0[12];
+ bf1[29] = -bf0[29] + bf0[13];
+ bf1[30] = -bf0[30] + bf0[14];
+ bf1[31] = -bf0[31] + bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]);
+ bf1[17] = half_btf(-cospi[4], bf0[17], cospi[60], bf0[16], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[20], bf0[19], cospi[44], bf0[18], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[36], bf0[21], cospi[28], bf0[20], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[52], bf0[23], cospi[12], bf0[22], cos_bit[stage]);
+ bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[60], bf0[25], cospi[4], bf0[24], cos_bit[stage]);
+ bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[44], bf0[27], cospi[20], bf0[26], cos_bit[stage]);
+ bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[28], bf0[29], cospi[36], bf0[28], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[12], bf0[31], cospi[52], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[8];
+ bf1[1] = bf0[1] + bf0[9];
+ bf1[2] = bf0[2] + bf0[10];
+ bf1[3] = bf0[3] + bf0[11];
+ bf1[4] = bf0[4] + bf0[12];
+ bf1[5] = bf0[5] + bf0[13];
+ bf1[6] = bf0[6] + bf0[14];
+ bf1[7] = bf0[7] + bf0[15];
+ bf1[8] = -bf0[8] + bf0[0];
+ bf1[9] = -bf0[9] + bf0[1];
+ bf1[10] = -bf0[10] + bf0[2];
+ bf1[11] = -bf0[11] + bf0[3];
+ bf1[12] = -bf0[12] + bf0[4];
+ bf1[13] = -bf0[13] + bf0[5];
+ bf1[14] = -bf0[14] + bf0[6];
+ bf1[15] = -bf0[15] + bf0[7];
+ bf1[16] = bf0[16] + bf0[24];
+ bf1[17] = bf0[17] + bf0[25];
+ bf1[18] = bf0[18] + bf0[26];
+ bf1[19] = bf0[19] + bf0[27];
+ bf1[20] = bf0[20] + bf0[28];
+ bf1[21] = bf0[21] + bf0[29];
+ bf1[22] = bf0[22] + bf0[30];
+ bf1[23] = bf0[23] + bf0[31];
+ bf1[24] = -bf0[24] + bf0[16];
+ bf1[25] = -bf0[25] + bf0[17];
+ bf1[26] = -bf0[26] + bf0[18];
+ bf1[27] = -bf0[27] + bf0[19];
+ bf1[28] = -bf0[28] + bf0[20];
+ bf1[29] = -bf0[29] + bf0[21];
+ bf1[30] = -bf0[30] + bf0[22];
+ bf1[31] = -bf0[31] + bf0[23];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
+ bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(-cospi[8], bf0[25], cospi[56], bf0[24], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[40], bf0[27], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[56], bf0[29], cospi[8], bf0[28], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[24], bf0[31], cospi[40], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = -bf0[4] + bf0[0];
+ bf1[5] = -bf0[5] + bf0[1];
+ bf1[6] = -bf0[6] + bf0[2];
+ bf1[7] = -bf0[7] + bf0[3];
+ bf1[8] = bf0[8] + bf0[12];
+ bf1[9] = bf0[9] + bf0[13];
+ bf1[10] = bf0[10] + bf0[14];
+ bf1[11] = bf0[11] + bf0[15];
+ bf1[12] = -bf0[12] + bf0[8];
+ bf1[13] = -bf0[13] + bf0[9];
+ bf1[14] = -bf0[14] + bf0[10];
+ bf1[15] = -bf0[15] + bf0[11];
+ bf1[16] = bf0[16] + bf0[20];
+ bf1[17] = bf0[17] + bf0[21];
+ bf1[18] = bf0[18] + bf0[22];
+ bf1[19] = bf0[19] + bf0[23];
+ bf1[20] = -bf0[20] + bf0[16];
+ bf1[21] = -bf0[21] + bf0[17];
+ bf1[22] = -bf0[22] + bf0[18];
+ bf1[23] = -bf0[23] + bf0[19];
+ bf1[24] = bf0[24] + bf0[28];
+ bf1[25] = bf0[25] + bf0[29];
+ bf1[26] = bf0[26] + bf0[30];
+ bf1[27] = bf0[27] + bf0[31];
+ bf1[28] = -bf0[28] + bf0[24];
+ bf1[29] = -bf0[29] + bf0[25];
+ bf1[30] = -bf0[30] + bf0[26];
+ bf1[31] = -bf0[31] + bf0[27];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[20], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[48], bf0[23], cospi[16], bf0[22], cos_bit[stage]);
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = bf0[26];
+ bf1[27] = bf0[27];
+ bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(-cospi[16], bf0[29], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[48], bf0[31], cospi[16], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = -bf0[2] + bf0[0];
+ bf1[3] = -bf0[3] + bf0[1];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = -bf0[6] + bf0[4];
+ bf1[7] = -bf0[7] + bf0[5];
+ bf1[8] = bf0[8] + bf0[10];
+ bf1[9] = bf0[9] + bf0[11];
+ bf1[10] = -bf0[10] + bf0[8];
+ bf1[11] = -bf0[11] + bf0[9];
+ bf1[12] = bf0[12] + bf0[14];
+ bf1[13] = bf0[13] + bf0[15];
+ bf1[14] = -bf0[14] + bf0[12];
+ bf1[15] = -bf0[15] + bf0[13];
+ bf1[16] = bf0[16] + bf0[18];
+ bf1[17] = bf0[17] + bf0[19];
+ bf1[18] = -bf0[18] + bf0[16];
+ bf1[19] = -bf0[19] + bf0[17];
+ bf1[20] = bf0[20] + bf0[22];
+ bf1[21] = bf0[21] + bf0[23];
+ bf1[22] = -bf0[22] + bf0[20];
+ bf1[23] = -bf0[23] + bf0[21];
+ bf1[24] = bf0[24] + bf0[26];
+ bf1[25] = bf0[25] + bf0[27];
+ bf1[26] = -bf0[26] + bf0[24];
+ bf1[27] = -bf0[27] + bf0[25];
+ bf1[28] = bf0[28] + bf0[30];
+ bf1[29] = bf0[29] + bf0[31];
+ bf1[30] = -bf0[30] + bf0[28];
+ bf1[31] = -bf0[31] + bf0[29];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 10
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[32], bf0[19], cospi[32], bf0[18], cos_bit[stage]);
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[22], cos_bit[stage]);
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[32], bf0[27], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(-cospi[32], bf0[31], cospi[32], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 11
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[16];
+ bf1[2] = bf0[24];
+ bf1[3] = -bf0[8];
+ bf1[4] = bf0[12];
+ bf1[5] = -bf0[28];
+ bf1[6] = bf0[20];
+ bf1[7] = -bf0[4];
+ bf1[8] = bf0[6];
+ bf1[9] = -bf0[22];
+ bf1[10] = bf0[30];
+ bf1[11] = -bf0[14];
+ bf1[12] = bf0[10];
+ bf1[13] = -bf0[26];
+ bf1[14] = bf0[18];
+ bf1[15] = -bf0[2];
+ bf1[16] = bf0[3];
+ bf1[17] = -bf0[19];
+ bf1[18] = bf0[27];
+ bf1[19] = -bf0[11];
+ bf1[20] = bf0[15];
+ bf1[21] = -bf0[31];
+ bf1[22] = bf0[23];
+ bf1[23] = -bf0[7];
+ bf1[24] = bf0[5];
+ bf1[25] = -bf0[21];
+ bf1[26] = bf0[29];
+ bf1[27] = -bf0[13];
+ bf1[28] = bf0[9];
+ bf1[29] = -bf0[25];
+ bf1[30] = bf0[17];
+ bf1[31] = -bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
diff --git a/vp10/common/vp10_fwd_txfm1d.h b/vp10/common/vp10_fwd_txfm1d.h
new file mode 100644
index 0000000..d5b9f40
--- /dev/null
+++ b/vp10/common/vp10_fwd_txfm1d.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_FWD_TXFM1D_H_
+#define VP10_FWD_TXFM1D_H_
+
+#include "vp10/common/vp10_txfm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_fdct4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_fadst4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // VP10_FWD_TXFM1D_H_
diff --git a/vp10/common/vp10_fwd_txfm2d.c b/vp10/common/vp10_fwd_txfm2d.c
new file mode 100644
index 0000000..67449ec
--- /dev/null
+++ b/vp10/common/vp10_fwd_txfm2d.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp10/common/vp10_txfm.h"
+
+static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ int32_t *txfm_buf) {
+ int i, j;
+ const int txfm_size = cfg->txfm_size;
+ const int8_t *shift = cfg->shift;
+ const int8_t *stage_range_col = cfg->stage_range_col;
+ const int8_t *stage_range_row = cfg->stage_range_row;
+ const int8_t *cos_bit_col = cfg->cos_bit_col;
+ const int8_t *cos_bit_row = cfg->cos_bit_row;
+ const TxfmFunc txfm_func_col = cfg->txfm_func_col;
+ const TxfmFunc txfm_func_row = cfg->txfm_func_row;
+
+ // txfm_buf's length is txfm_size * txfm_size + 2 * txfm_size
+ // it is used for intermediate data buffering
+ int32_t *temp_in = txfm_buf;
+ int32_t *temp_out = temp_in + txfm_size;
+ int32_t *buf = temp_out + txfm_size;
+
+ // Columns
+ for (i = 0; i < txfm_size; ++i) {
+ for (j = 0; j < txfm_size; ++j)
+ temp_in[j] = input[j * stride + i];
+ round_shift_array(temp_in, txfm_size, -shift[0]);
+ txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
+ round_shift_array(temp_out, txfm_size, -shift[1]);
+ for (j = 0; j < txfm_size; ++j)
+ buf[j * txfm_size + i] = temp_out[j];
+ }
+
+ // Rows
+ for (i = 0; i < txfm_size; ++i) {
+ for (j = 0; j < txfm_size; ++j)
+ temp_in[j] = buf[j + i * txfm_size];
+ txfm_func_row(temp_in, temp_out, cos_bit_row, stage_range_row);
+ round_shift_array(temp_out, txfm_size, -shift[2]);
+ for (j = 0; j < txfm_size; ++j)
+ output[j + i * txfm_size] = (int32_t)temp_out[j];
+ }
+}
+
+void vp10_fwd_txfm2d_4x4(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[4 * 4 + 4 + 4];
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_8x8(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[8 * 8 + 8 + 8];
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_16x16(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[16 * 16 + 16 + 16];
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_32x32(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[32 * 32 + 32 + 32];
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+}
diff --git a/vp10/common/vp10_fwd_txfm2d.h b/vp10/common/vp10_fwd_txfm2d.h
new file mode 100644
index 0000000..64e6f56
--- /dev/null
+++ b/vp10/common/vp10_fwd_txfm2d.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_FWD_TXFM2D_H_
+#define VP10_FWD_TXFM2D_H_
+
+#include "vp10/common/vp10_txfm.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+void vp10_fwd_txfm2d_4x4(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd);
+void vp10_fwd_txfm2d_8x8(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd);
+void vp10_fwd_txfm2d_16x16(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd);
+void vp10_fwd_txfm2d_32x32(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd);
+#ifdef __cplusplus
+}
+#endif
+#endif // VP10_FWD_TXFM2D_H_
diff --git a/vp10/common/vp10_fwd_txfm2d_cfg.h b/vp10/common/vp10_fwd_txfm2d_cfg.h
new file mode 100644
index 0000000..93fee6f
--- /dev/null
+++ b/vp10/common/vp10_fwd_txfm2d_cfg.h
@@ -0,0 +1,367 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_FWD_TXFM2D_CFG_H_
+#define VP10_FWD_TXFM2D_CFG_H_
+#include "vp10/common/vp10_fwd_txfm1d.h"
+
+// ---------------- config fwd_dct_dct_4 ----------------
+static int8_t fwd_shift_dct_dct_4[3] = {4, 0, -2};
+static int8_t fwd_stage_range_col_dct_dct_4[4] = {15, 16, 17, 17};
+static int8_t fwd_stage_range_row_dct_dct_4[4] = {17, 18, 18, 18};
+static int8_t fwd_cos_bit_col_dct_dct_4[4] = {15, 15, 15, 15};
+static int8_t fwd_cos_bit_row_dct_dct_4[4] = {15, 14, 14, 14};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_4 = {
+ .txfm_size = 4,
+ .stage_num_col = 4,
+ .stage_num_row = 4,
+
+ .shift = fwd_shift_dct_dct_4,
+ .stage_range_col = fwd_stage_range_col_dct_dct_4,
+ .stage_range_row = fwd_stage_range_row_dct_dct_4,
+ .cos_bit_col = fwd_cos_bit_col_dct_dct_4,
+ .cos_bit_row = fwd_cos_bit_row_dct_dct_4,
+ .txfm_func_col = vp10_fdct4_new,
+ .txfm_func_row = vp10_fdct4_new};
+
+// ---------------- config fwd_dct_dct_8 ----------------
+static int8_t fwd_shift_dct_dct_8[3] = {5, -3, -1};
+static int8_t fwd_stage_range_col_dct_dct_8[6] = {16, 17, 18, 19, 19, 19};
+static int8_t fwd_stage_range_row_dct_dct_8[6] = {16, 17, 18, 18, 18, 18};
+static int8_t fwd_cos_bit_col_dct_dct_8[6] = {15, 15, 14, 13, 13, 13};
+static int8_t fwd_cos_bit_row_dct_dct_8[6] = {15, 15, 14, 14, 14, 14};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_8 = {
+ .txfm_size = 8,
+ .stage_num_col = 6,
+ .stage_num_row = 6,
+
+ .shift = fwd_shift_dct_dct_8,
+ .stage_range_col = fwd_stage_range_col_dct_dct_8,
+ .stage_range_row = fwd_stage_range_row_dct_dct_8,
+ .cos_bit_col = fwd_cos_bit_col_dct_dct_8,
+ .cos_bit_row = fwd_cos_bit_row_dct_dct_8,
+ .txfm_func_col = vp10_fdct8_new,
+ .txfm_func_row = vp10_fdct8_new};
+
+// ---------------- config fwd_dct_dct_16 ----------------
+static int8_t fwd_shift_dct_dct_16[3] = {4, -3, -1};
+static int8_t fwd_stage_range_col_dct_dct_16[8] = {15, 16, 17, 18,
+ 19, 19, 19, 19};
+static int8_t fwd_stage_range_row_dct_dct_16[8] = {16, 17, 18, 19,
+ 19, 19, 19, 19};
+static int8_t fwd_cos_bit_col_dct_dct_16[8] = {15, 15, 15, 14, 13, 13, 13, 13};
+static int8_t fwd_cos_bit_row_dct_dct_16[8] = {15, 15, 14, 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_16 = {
+ .txfm_size = 16,
+ .stage_num_col = 8,
+ .stage_num_row = 8,
+
+ .shift = fwd_shift_dct_dct_16,
+ .stage_range_col = fwd_stage_range_col_dct_dct_16,
+ .stage_range_row = fwd_stage_range_row_dct_dct_16,
+ .cos_bit_col = fwd_cos_bit_col_dct_dct_16,
+ .cos_bit_row = fwd_cos_bit_row_dct_dct_16,
+ .txfm_func_col = vp10_fdct16_new,
+ .txfm_func_row = vp10_fdct16_new};
+
+// ---------------- config fwd_dct_dct_32 ----------------
+static int8_t fwd_shift_dct_dct_32[3] = {3, -3, -1};
+static int8_t fwd_stage_range_col_dct_dct_32[10] = {14, 15, 16, 17, 18,
+ 19, 19, 19, 19, 19};
+static int8_t fwd_stage_range_row_dct_dct_32[10] = {16, 17, 18, 19, 20,
+ 20, 20, 20, 20, 20};
+static int8_t fwd_cos_bit_col_dct_dct_32[10] = {15, 15, 15, 15, 14,
+ 13, 13, 13, 13, 13};
+static int8_t fwd_cos_bit_row_dct_dct_32[10] = {15, 15, 14, 13, 12,
+ 12, 12, 12, 12, 12};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_32 = {
+ .txfm_size = 32,
+ .stage_num_col = 10,
+ .stage_num_row = 10,
+
+ .shift = fwd_shift_dct_dct_32,
+ .stage_range_col = fwd_stage_range_col_dct_dct_32,
+ .stage_range_row = fwd_stage_range_row_dct_dct_32,
+ .cos_bit_col = fwd_cos_bit_col_dct_dct_32,
+ .cos_bit_row = fwd_cos_bit_row_dct_dct_32,
+ .txfm_func_col = vp10_fdct32_new,
+ .txfm_func_row = vp10_fdct32_new};
+
+// ---------------- config fwd_dct_adst_4 ----------------
+static int8_t fwd_shift_dct_adst_4[3] = {5, -2, -1};
+static int8_t fwd_stage_range_col_dct_adst_4[4] = {16, 17, 18, 18};
+static int8_t fwd_stage_range_row_dct_adst_4[6] = {16, 16, 16, 17, 17, 17};
+static int8_t fwd_cos_bit_col_dct_adst_4[4] = {15, 15, 14, 14};
+static int8_t fwd_cos_bit_row_dct_adst_4[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_4 = {
+ .txfm_size = 4,
+ .stage_num_col = 4,
+ .stage_num_row = 6,
+
+ .shift = fwd_shift_dct_adst_4,
+ .stage_range_col = fwd_stage_range_col_dct_adst_4,
+ .stage_range_row = fwd_stage_range_row_dct_adst_4,
+ .cos_bit_col = fwd_cos_bit_col_dct_adst_4,
+ .cos_bit_row = fwd_cos_bit_row_dct_adst_4,
+ .txfm_func_col = vp10_fdct4_new,
+ .txfm_func_row = vp10_fadst4_new};
+
+// ---------------- config fwd_dct_adst_8 ----------------
+static int8_t fwd_shift_dct_adst_8[3] = {7, -3, -3};
+static int8_t fwd_stage_range_col_dct_adst_8[6] = {18, 19, 20, 21, 21, 21};
+static int8_t fwd_stage_range_row_dct_adst_8[8] = {18, 18, 18, 19,
+ 19, 20, 20, 20};
+static int8_t fwd_cos_bit_col_dct_adst_8[6] = {14, 13, 12, 11, 11, 11};
+static int8_t fwd_cos_bit_row_dct_adst_8[8] = {14, 14, 14, 13, 13, 12, 12, 12};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_8 = {
+ .txfm_size = 8,
+ .stage_num_col = 6,
+ .stage_num_row = 8,
+
+ .shift = fwd_shift_dct_adst_8,
+ .stage_range_col = fwd_stage_range_col_dct_adst_8,
+ .stage_range_row = fwd_stage_range_row_dct_adst_8,
+ .cos_bit_col = fwd_cos_bit_col_dct_adst_8,
+ .cos_bit_row = fwd_cos_bit_row_dct_adst_8,
+ .txfm_func_col = vp10_fdct8_new,
+ .txfm_func_row = vp10_fadst8_new};
+
+// ---------------- config fwd_dct_adst_16 ----------------
+static int8_t fwd_shift_dct_adst_16[3] = {4, -1, -3};
+static int8_t fwd_stage_range_col_dct_adst_16[8] = {15, 16, 17, 18,
+ 19, 19, 19, 19};
+static int8_t fwd_stage_range_row_dct_adst_16[10] = {18, 18, 18, 19, 19,
+ 20, 20, 21, 21, 21};
+static int8_t fwd_cos_bit_col_dct_adst_16[8] = {15, 15, 15, 14, 13, 13, 13, 13};
+static int8_t fwd_cos_bit_row_dct_adst_16[10] = {14, 14, 14, 13, 13,
+ 12, 12, 11, 11, 11};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_16 = {
+ .txfm_size = 16,
+ .stage_num_col = 8,
+ .stage_num_row = 10,
+
+ .shift = fwd_shift_dct_adst_16,
+ .stage_range_col = fwd_stage_range_col_dct_adst_16,
+ .stage_range_row = fwd_stage_range_row_dct_adst_16,
+ .cos_bit_col = fwd_cos_bit_col_dct_adst_16,
+ .cos_bit_row = fwd_cos_bit_row_dct_adst_16,
+ .txfm_func_col = vp10_fdct16_new,
+ .txfm_func_row = vp10_fadst16_new};
+
+// ---------------- config fwd_dct_adst_32 ----------------
+static int8_t fwd_shift_dct_adst_32[3] = {3, -1, -3};
+static int8_t fwd_stage_range_col_dct_adst_32[10] = {14, 15, 16, 17, 18,
+ 19, 19, 19, 19, 19};
+static int8_t fwd_stage_range_row_dct_adst_32[12] = {18, 18, 18, 19, 19, 20,
+ 20, 21, 21, 22, 22, 22};
+static int8_t fwd_cos_bit_col_dct_adst_32[10] = {15, 15, 15, 15, 14,
+ 13, 13, 13, 13, 13};
+static int8_t fwd_cos_bit_row_dct_adst_32[12] = {14, 14, 14, 13, 13, 12,
+ 12, 11, 11, 10, 10, 10};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_32 = {
+ .txfm_size = 32,
+ .stage_num_col = 10,
+ .stage_num_row = 12,
+
+ .shift = fwd_shift_dct_adst_32,
+ .stage_range_col = fwd_stage_range_col_dct_adst_32,
+ .stage_range_row = fwd_stage_range_row_dct_adst_32,
+ .cos_bit_col = fwd_cos_bit_col_dct_adst_32,
+ .cos_bit_row = fwd_cos_bit_row_dct_adst_32,
+ .txfm_func_col = vp10_fdct32_new,
+ .txfm_func_row = vp10_fadst32_new};
+
+// ---------------- config fwd_adst_adst_4 ----------------
+static int8_t fwd_shift_adst_adst_4[3] = {6, 1, -5};
+static int8_t fwd_stage_range_col_adst_adst_4[6] = {17, 17, 18, 19, 19, 19};
+static int8_t fwd_stage_range_row_adst_adst_4[6] = {20, 20, 20, 21, 21, 21};
+static int8_t fwd_cos_bit_col_adst_adst_4[6] = {15, 15, 14, 13, 13, 13};
+static int8_t fwd_cos_bit_row_adst_adst_4[6] = {12, 12, 12, 11, 11, 11};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_4 = {
+ .txfm_size = 4,
+ .stage_num_col = 6,
+ .stage_num_row = 6,
+
+ .shift = fwd_shift_adst_adst_4,
+ .stage_range_col = fwd_stage_range_col_adst_adst_4,
+ .stage_range_row = fwd_stage_range_row_adst_adst_4,
+ .cos_bit_col = fwd_cos_bit_col_adst_adst_4,
+ .cos_bit_row = fwd_cos_bit_row_adst_adst_4,
+ .txfm_func_col = vp10_fadst4_new,
+ .txfm_func_row = vp10_fadst4_new};
+
+// ---------------- config fwd_adst_adst_8 ----------------
+static int8_t fwd_shift_adst_adst_8[3] = {3, -1, -1};
+static int8_t fwd_stage_range_col_adst_adst_8[8] = {14, 14, 15, 16,
+ 16, 17, 17, 17};
+static int8_t fwd_stage_range_row_adst_adst_8[8] = {16, 16, 16, 17,
+ 17, 18, 18, 18};
+static int8_t fwd_cos_bit_col_adst_adst_8[8] = {15, 15, 15, 15, 15, 15, 15, 15};
+static int8_t fwd_cos_bit_row_adst_adst_8[8] = {15, 15, 15, 15, 15, 14, 14, 14};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_8 = {
+ .txfm_size = 8,
+ .stage_num_col = 8,
+ .stage_num_row = 8,
+
+ .shift = fwd_shift_adst_adst_8,
+ .stage_range_col = fwd_stage_range_col_adst_adst_8,
+ .stage_range_row = fwd_stage_range_row_adst_adst_8,
+ .cos_bit_col = fwd_cos_bit_col_adst_adst_8,
+ .cos_bit_row = fwd_cos_bit_row_adst_adst_8,
+ .txfm_func_col = vp10_fadst8_new,
+ .txfm_func_row = vp10_fadst8_new};
+
+// ---------------- config fwd_adst_adst_16 ----------------
+static int8_t fwd_shift_adst_adst_16[3] = {2, 0, -2};
+static int8_t fwd_stage_range_col_adst_adst_16[10] = {13, 13, 14, 15, 15,
+ 16, 16, 17, 17, 17};
+static int8_t fwd_stage_range_row_adst_adst_16[10] = {17, 17, 17, 18, 18,
+ 19, 19, 20, 20, 20};
+static int8_t fwd_cos_bit_col_adst_adst_16[10] = {15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15};
+static int8_t fwd_cos_bit_row_adst_adst_16[10] = {15, 15, 15, 14, 14,
+ 13, 13, 12, 12, 12};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_16 = {
+ .txfm_size = 16,
+ .stage_num_col = 10,
+ .stage_num_row = 10,
+
+ .shift = fwd_shift_adst_adst_16,
+ .stage_range_col = fwd_stage_range_col_adst_adst_16,
+ .stage_range_row = fwd_stage_range_row_adst_adst_16,
+ .cos_bit_col = fwd_cos_bit_col_adst_adst_16,
+ .cos_bit_row = fwd_cos_bit_row_adst_adst_16,
+ .txfm_func_col = vp10_fadst16_new,
+ .txfm_func_row = vp10_fadst16_new};
+
+// ---------------- config fwd_adst_adst_32 ----------------
+static int8_t fwd_shift_adst_adst_32[3] = {4, -2, -3};
+static int8_t fwd_stage_range_col_adst_adst_32[12] = {15, 15, 16, 17, 17, 18,
+ 18, 19, 19, 20, 20, 20};
+static int8_t fwd_stage_range_row_adst_adst_32[12] = {18, 18, 18, 19, 19, 20,
+ 20, 21, 21, 22, 22, 22};
+static int8_t fwd_cos_bit_col_adst_adst_32[12] = {15, 15, 15, 15, 15, 14,
+ 14, 13, 13, 12, 12, 12};
+static int8_t fwd_cos_bit_row_adst_adst_32[12] = {14, 14, 14, 13, 13, 12,
+ 12, 11, 11, 10, 10, 10};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_32 = {
+ .txfm_size = 32,
+ .stage_num_col = 12,
+ .stage_num_row = 12,
+
+ .shift = fwd_shift_adst_adst_32,
+ .stage_range_col = fwd_stage_range_col_adst_adst_32,
+ .stage_range_row = fwd_stage_range_row_adst_adst_32,
+ .cos_bit_col = fwd_cos_bit_col_adst_adst_32,
+ .cos_bit_row = fwd_cos_bit_row_adst_adst_32,
+ .txfm_func_col = vp10_fadst32_new,
+ .txfm_func_row = vp10_fadst32_new};
+
+// ---------------- config fwd_adst_dct_4 ----------------
+static int8_t fwd_shift_adst_dct_4[3] = {5, -4, 1};
+static int8_t fwd_stage_range_col_adst_dct_4[6] = {16, 16, 17, 18, 18, 18};
+static int8_t fwd_stage_range_row_adst_dct_4[4] = {14, 15, 15, 15};
+static int8_t fwd_cos_bit_col_adst_dct_4[6] = {15, 15, 15, 14, 14, 14};
+static int8_t fwd_cos_bit_row_adst_dct_4[4] = {15, 15, 15, 15};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_4 = {
+ .txfm_size = 4,
+ .stage_num_col = 6,
+ .stage_num_row = 4,
+
+ .shift = fwd_shift_adst_dct_4,
+ .stage_range_col = fwd_stage_range_col_adst_dct_4,
+ .stage_range_row = fwd_stage_range_row_adst_dct_4,
+ .cos_bit_col = fwd_cos_bit_col_adst_dct_4,
+ .cos_bit_row = fwd_cos_bit_row_adst_dct_4,
+ .txfm_func_col = vp10_fadst4_new,
+ .txfm_func_row = vp10_fdct4_new};
+
+// ---------------- config fwd_adst_dct_8 ----------------
+static int8_t fwd_shift_adst_dct_8[3] = {5, 1, -5};
+static int8_t fwd_stage_range_col_adst_dct_8[8] = {16, 16, 17, 18,
+ 18, 19, 19, 19};
+static int8_t fwd_stage_range_row_adst_dct_8[6] = {20, 21, 22, 22, 22, 22};
+static int8_t fwd_cos_bit_col_adst_dct_8[8] = {15, 15, 15, 14, 14, 13, 13, 13};
+static int8_t fwd_cos_bit_row_adst_dct_8[6] = {12, 11, 10, 10, 10, 10};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_8 = {
+ .txfm_size = 8,
+ .stage_num_col = 8,
+ .stage_num_row = 6,
+
+ .shift = fwd_shift_adst_dct_8,
+ .stage_range_col = fwd_stage_range_col_adst_dct_8,
+ .stage_range_row = fwd_stage_range_row_adst_dct_8,
+ .cos_bit_col = fwd_cos_bit_col_adst_dct_8,
+ .cos_bit_row = fwd_cos_bit_row_adst_dct_8,
+ .txfm_func_col = vp10_fadst8_new,
+ .txfm_func_row = vp10_fdct8_new};
+
+// ---------------- config fwd_adst_dct_16 ----------------
+static int8_t fwd_shift_adst_dct_16[3] = {4, -3, -1};
+static int8_t fwd_stage_range_col_adst_dct_16[10] = {15, 15, 16, 17, 17,
+ 18, 18, 19, 19, 19};
+static int8_t fwd_stage_range_row_adst_dct_16[8] = {16, 17, 18, 19,
+ 19, 19, 19, 19};
+static int8_t fwd_cos_bit_col_adst_dct_16[10] = {15, 15, 15, 15, 15,
+ 14, 14, 13, 13, 13};
+static int8_t fwd_cos_bit_row_adst_dct_16[8] = {15, 15, 14, 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_16 = {
+ .txfm_size = 16,
+ .stage_num_col = 10,
+ .stage_num_row = 8,
+
+ .shift = fwd_shift_adst_dct_16,
+ .stage_range_col = fwd_stage_range_col_adst_dct_16,
+ .stage_range_row = fwd_stage_range_row_adst_dct_16,
+ .cos_bit_col = fwd_cos_bit_col_adst_dct_16,
+ .cos_bit_row = fwd_cos_bit_row_adst_dct_16,
+ .txfm_func_col = vp10_fadst16_new,
+ .txfm_func_row = vp10_fdct16_new};
+
+// ---------------- config fwd_adst_dct_32 ----------------
+static int8_t fwd_shift_adst_dct_32[3] = {5, -4, -2};
+static int8_t fwd_stage_range_col_adst_dct_32[12] = {16, 16, 17, 18, 18, 19,
+ 19, 20, 20, 21, 21, 21};
+static int8_t fwd_stage_range_row_adst_dct_32[10] = {17, 18, 19, 20, 21,
+ 21, 21, 21, 21, 21};
+static int8_t fwd_cos_bit_col_adst_dct_32[12] = {15, 15, 15, 14, 14, 13,
+ 13, 12, 12, 11, 11, 11};
+static int8_t fwd_cos_bit_row_adst_dct_32[10] = {15, 14, 13, 12, 11,
+ 11, 11, 11, 11, 11};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_32 = {
+ .txfm_size = 32,
+ .stage_num_col = 12,
+ .stage_num_row = 10,
+
+ .shift = fwd_shift_adst_dct_32,
+ .stage_range_col = fwd_stage_range_col_adst_dct_32,
+ .stage_range_row = fwd_stage_range_row_adst_dct_32,
+ .cos_bit_col = fwd_cos_bit_col_adst_dct_32,
+ .cos_bit_row = fwd_cos_bit_row_adst_dct_32,
+ .txfm_func_col = vp10_fadst32_new,
+ .txfm_func_row = vp10_fdct32_new};
+
+#endif // VP10_FWD_TXFM2D_CFG_H_
diff --git a/vp10/common/vp10_inv_txfm1d.c b/vp10/common/vp10_inv_txfm1d.c
new file mode 100644
index 0000000..b64b601
--- /dev/null
+++ b/vp10/common/vp10_inv_txfm1d.c
@@ -0,0 +1,1536 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp10/common/vp10_inv_txfm1d.h"
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+#define range_check(stage, input, buf, size, bit) \
+ { \
+ int i, j; \
+ for (i = 0; i < size; ++i) { \
+ int buf_bit = get_max_bit(abs(buf[i])) + 1; \
+ if (buf_bit > bit) { \
+ printf("======== %s overflow ========\n", __func__); \
+ printf("stage: %d node: %d\n", stage, i); \
+ printf("bit: %d buf_bit: %d buf[i]: %d\n", bit, buf_bit, buf[i]); \
+ printf("input:\n"); \
+ for (j = 0; j < size; j++) { \
+ printf("%d,", input[j]); \
+ } \
+ printf("\n"); \
+ assert(0, "vp10_inv_txfm1d.c: range_check overflow"); \
+ } \
+ } \
+ }
+#else
+#define range_check(stage, input, buf, size, bit) \
+ { \
+ (void) stage; \
+ (void) input; \
+ (void) buf; \
+ (void) size; \
+ (void) bit; \
+ }
+#endif
+
+void vp10_idct4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 4;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[4];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[2];
+ bf1[2] = input[1];
+ bf1[3] = input[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_idct8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 8;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[8];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[4];
+ bf1[2] = input[2];
+ bf1[3] = input[6];
+ bf1[4] = input[1];
+ bf1[5] = input[5];
+ bf1[6] = input[3];
+ bf1[7] = input[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = bf0[4] - bf0[5];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[6] + bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = bf0[3] - bf0[4];
+ bf1[5] = bf0[2] - bf0[5];
+ bf1[6] = bf0[1] - bf0[6];
+ bf1[7] = bf0[0] - bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_idct16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 16;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[16];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[8];
+ bf1[2] = input[4];
+ bf1[3] = input[12];
+ bf1[4] = input[2];
+ bf1[5] = input[10];
+ bf1[6] = input[6];
+ bf1[7] = input[14];
+ bf1[8] = input[1];
+ bf1[9] = input[9];
+ bf1[10] = input[5];
+ bf1[11] = input[13];
+ bf1[12] = input[3];
+ bf1[13] = input[11];
+ bf1[14] = input[7];
+ bf1[15] = input[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = bf0[8] - bf0[9];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[10] + bf0[11];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = bf0[12] - bf0[13];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[14] + bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = bf0[4] - bf0[5];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[6] + bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = bf0[9] - bf0[10];
+ bf1[11] = bf0[8] - bf0[11];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[13] + bf0[14];
+ bf1[15] = bf0[12] + bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = bf0[3] - bf0[4];
+ bf1[5] = bf0[2] - bf0[5];
+ bf1[6] = bf0[1] - bf0[6];
+ bf1[7] = bf0[0] - bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = bf0[7] - bf0[8];
+ bf1[9] = bf0[6] - bf0[9];
+ bf1[10] = bf0[5] - bf0[10];
+ bf1[11] = bf0[4] - bf0[11];
+ bf1[12] = bf0[3] - bf0[12];
+ bf1[13] = bf0[2] - bf0[13];
+ bf1[14] = bf0[1] - bf0[14];
+ bf1[15] = bf0[0] - bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_idct32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 32;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[32];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[16];
+ bf1[2] = input[8];
+ bf1[3] = input[24];
+ bf1[4] = input[4];
+ bf1[5] = input[20];
+ bf1[6] = input[12];
+ bf1[7] = input[28];
+ bf1[8] = input[2];
+ bf1[9] = input[18];
+ bf1[10] = input[10];
+ bf1[11] = input[26];
+ bf1[12] = input[6];
+ bf1[13] = input[22];
+ bf1[14] = input[14];
+ bf1[15] = input[30];
+ bf1[16] = input[1];
+ bf1[17] = input[17];
+ bf1[18] = input[9];
+ bf1[19] = input[25];
+ bf1[20] = input[5];
+ bf1[21] = input[21];
+ bf1[22] = input[13];
+ bf1[23] = input[29];
+ bf1[24] = input[3];
+ bf1[25] = input[19];
+ bf1[26] = input[11];
+ bf1[27] = input[27];
+ bf1[28] = input[7];
+ bf1[29] = input[23];
+ bf1[30] = input[15];
+ bf1[31] = input[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16] + bf0[17];
+ bf1[17] = bf0[16] - bf0[17];
+ bf1[18] = -bf0[18] + bf0[19];
+ bf1[19] = bf0[18] + bf0[19];
+ bf1[20] = bf0[20] + bf0[21];
+ bf1[21] = bf0[20] - bf0[21];
+ bf1[22] = -bf0[22] + bf0[23];
+ bf1[23] = bf0[22] + bf0[23];
+ bf1[24] = bf0[24] + bf0[25];
+ bf1[25] = bf0[24] - bf0[25];
+ bf1[26] = -bf0[26] + bf0[27];
+ bf1[27] = bf0[26] + bf0[27];
+ bf1[28] = bf0[28] + bf0[29];
+ bf1[29] = bf0[28] - bf0[29];
+ bf1[30] = -bf0[30] + bf0[31];
+ bf1[31] = bf0[30] + bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = bf0[8] - bf0[9];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[10] + bf0[11];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = bf0[12] - bf0[13];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[14] + bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit[stage]);
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit[stage]);
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = bf0[4] - bf0[5];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[6] + bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[19];
+ bf1[17] = bf0[17] + bf0[18];
+ bf1[18] = bf0[17] - bf0[18];
+ bf1[19] = bf0[16] - bf0[19];
+ bf1[20] = -bf0[20] + bf0[23];
+ bf1[21] = -bf0[21] + bf0[22];
+ bf1[22] = bf0[21] + bf0[22];
+ bf1[23] = bf0[20] + bf0[23];
+ bf1[24] = bf0[24] + bf0[27];
+ bf1[25] = bf0[25] + bf0[26];
+ bf1[26] = bf0[25] - bf0[26];
+ bf1[27] = bf0[24] - bf0[27];
+ bf1[28] = -bf0[28] + bf0[31];
+ bf1[29] = -bf0[29] + bf0[30];
+ bf1[30] = bf0[29] + bf0[30];
+ bf1[31] = bf0[28] + bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = bf0[9] - bf0[10];
+ bf1[11] = bf0[8] - bf0[11];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[13] + bf0[14];
+ bf1[15] = bf0[12] + bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit[stage]);
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = bf0[3] - bf0[4];
+ bf1[5] = bf0[2] - bf0[5];
+ bf1[6] = bf0[1] - bf0[6];
+ bf1[7] = bf0[0] - bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[23];
+ bf1[17] = bf0[17] + bf0[22];
+ bf1[18] = bf0[18] + bf0[21];
+ bf1[19] = bf0[19] + bf0[20];
+ bf1[20] = bf0[19] - bf0[20];
+ bf1[21] = bf0[18] - bf0[21];
+ bf1[22] = bf0[17] - bf0[22];
+ bf1[23] = bf0[16] - bf0[23];
+ bf1[24] = -bf0[24] + bf0[31];
+ bf1[25] = -bf0[25] + bf0[30];
+ bf1[26] = -bf0[26] + bf0[29];
+ bf1[27] = -bf0[27] + bf0[28];
+ bf1[28] = bf0[27] + bf0[28];
+ bf1[29] = bf0[26] + bf0[29];
+ bf1[30] = bf0[25] + bf0[30];
+ bf1[31] = bf0[24] + bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = bf0[7] - bf0[8];
+ bf1[9] = bf0[6] - bf0[9];
+ bf1[10] = bf0[5] - bf0[10];
+ bf1[11] = bf0[4] - bf0[11];
+ bf1[12] = bf0[3] - bf0[12];
+ bf1[13] = bf0[2] - bf0[13];
+ bf1[14] = bf0[1] - bf0[14];
+ bf1[15] = bf0[0] - bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[31];
+ bf1[1] = bf0[1] + bf0[30];
+ bf1[2] = bf0[2] + bf0[29];
+ bf1[3] = bf0[3] + bf0[28];
+ bf1[4] = bf0[4] + bf0[27];
+ bf1[5] = bf0[5] + bf0[26];
+ bf1[6] = bf0[6] + bf0[25];
+ bf1[7] = bf0[7] + bf0[24];
+ bf1[8] = bf0[8] + bf0[23];
+ bf1[9] = bf0[9] + bf0[22];
+ bf1[10] = bf0[10] + bf0[21];
+ bf1[11] = bf0[11] + bf0[20];
+ bf1[12] = bf0[12] + bf0[19];
+ bf1[13] = bf0[13] + bf0[18];
+ bf1[14] = bf0[14] + bf0[17];
+ bf1[15] = bf0[15] + bf0[16];
+ bf1[16] = bf0[15] - bf0[16];
+ bf1[17] = bf0[14] - bf0[17];
+ bf1[18] = bf0[13] - bf0[18];
+ bf1[19] = bf0[12] - bf0[19];
+ bf1[20] = bf0[11] - bf0[20];
+ bf1[21] = bf0[10] - bf0[21];
+ bf1[22] = bf0[9] - bf0[22];
+ bf1[23] = bf0[8] - bf0[23];
+ bf1[24] = bf0[7] - bf0[24];
+ bf1[25] = bf0[6] - bf0[25];
+ bf1[26] = bf0[5] - bf0[26];
+ bf1[27] = bf0[4] - bf0[27];
+ bf1[28] = bf0[3] - bf0[28];
+ bf1[29] = bf0[2] - bf0[29];
+ bf1[30] = bf0[1] - bf0[30];
+ bf1[31] = bf0[0] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 4;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[4];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = -input[3];
+ bf1[2] = -input[1];
+ bf1[3] = input[2];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = bf0[0] - bf0[2];
+ bf1[3] = bf0[1] - bf0[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[56], bf0[0], -cospi[8], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[24], bf0[2], -cospi[40], bf0[3], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[1];
+ bf1[1] = bf0[2];
+ bf1[2] = bf0[3];
+ bf1[3] = bf0[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 8;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[8];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = -input[7];
+ bf1[2] = -input[3];
+ bf1[3] = input[4];
+ bf1[4] = -input[1];
+ bf1[5] = input[6];
+ bf1[6] = input[2];
+ bf1[7] = -input[5];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = bf0[0] - bf0[2];
+ bf1[3] = bf0[1] - bf0[3];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = bf0[4] - bf0[6];
+ bf1[7] = bf0[5] - bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = bf0[0] - bf0[4];
+ bf1[5] = bf0[1] - bf0[5];
+ bf1[6] = bf0[2] - bf0[6];
+ bf1[7] = bf0[3] - bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[1];
+ bf1[1] = bf0[6];
+ bf1[2] = bf0[3];
+ bf1[3] = bf0[4];
+ bf1[4] = bf0[5];
+ bf1[5] = bf0[2];
+ bf1[6] = bf0[7];
+ bf1[7] = bf0[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 16;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[16];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = -input[15];
+ bf1[2] = -input[7];
+ bf1[3] = input[8];
+ bf1[4] = -input[3];
+ bf1[5] = input[12];
+ bf1[6] = input[4];
+ bf1[7] = -input[11];
+ bf1[8] = -input[1];
+ bf1[9] = input[14];
+ bf1[10] = input[6];
+ bf1[11] = -input[9];
+ bf1[12] = input[2];
+ bf1[13] = -input[13];
+ bf1[14] = -input[5];
+ bf1[15] = input[10];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]);
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = bf0[0] - bf0[2];
+ bf1[3] = bf0[1] - bf0[3];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = bf0[4] - bf0[6];
+ bf1[7] = bf0[5] - bf0[7];
+ bf1[8] = bf0[8] + bf0[10];
+ bf1[9] = bf0[9] + bf0[11];
+ bf1[10] = bf0[8] - bf0[10];
+ bf1[11] = bf0[9] - bf0[11];
+ bf1[12] = bf0[12] + bf0[14];
+ bf1[13] = bf0[13] + bf0[15];
+ bf1[14] = bf0[12] - bf0[14];
+ bf1[15] = bf0[13] - bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = bf0[0] - bf0[4];
+ bf1[5] = bf0[1] - bf0[5];
+ bf1[6] = bf0[2] - bf0[6];
+ bf1[7] = bf0[3] - bf0[7];
+ bf1[8] = bf0[8] + bf0[12];
+ bf1[9] = bf0[9] + bf0[13];
+ bf1[10] = bf0[10] + bf0[14];
+ bf1[11] = bf0[11] + bf0[15];
+ bf1[12] = bf0[8] - bf0[12];
+ bf1[13] = bf0[9] - bf0[13];
+ bf1[14] = bf0[10] - bf0[14];
+ bf1[15] = bf0[11] - bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]);
+ bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[8];
+ bf1[1] = bf0[1] + bf0[9];
+ bf1[2] = bf0[2] + bf0[10];
+ bf1[3] = bf0[3] + bf0[11];
+ bf1[4] = bf0[4] + bf0[12];
+ bf1[5] = bf0[5] + bf0[13];
+ bf1[6] = bf0[6] + bf0[14];
+ bf1[7] = bf0[7] + bf0[15];
+ bf1[8] = bf0[0] - bf0[8];
+ bf1[9] = bf0[1] - bf0[9];
+ bf1[10] = bf0[2] - bf0[10];
+ bf1[11] = bf0[3] - bf0[11];
+ bf1[12] = bf0[4] - bf0[12];
+ bf1[13] = bf0[5] - bf0[13];
+ bf1[14] = bf0[6] - bf0[14];
+ bf1[15] = bf0[7] - bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit[stage]);
+ bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[1];
+ bf1[1] = bf0[14];
+ bf1[2] = bf0[3];
+ bf1[3] = bf0[12];
+ bf1[4] = bf0[5];
+ bf1[5] = bf0[10];
+ bf1[6] = bf0[7];
+ bf1[7] = bf0[8];
+ bf1[8] = bf0[9];
+ bf1[9] = bf0[6];
+ bf1[10] = bf0[11];
+ bf1[11] = bf0[4];
+ bf1[12] = bf0[13];
+ bf1[13] = bf0[2];
+ bf1[14] = bf0[15];
+ bf1[15] = bf0[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 32;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[32];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = -input[31];
+ bf1[2] = -input[15];
+ bf1[3] = input[16];
+ bf1[4] = -input[7];
+ bf1[5] = input[24];
+ bf1[6] = input[8];
+ bf1[7] = -input[23];
+ bf1[8] = -input[3];
+ bf1[9] = input[28];
+ bf1[10] = input[12];
+ bf1[11] = -input[19];
+ bf1[12] = input[4];
+ bf1[13] = -input[27];
+ bf1[14] = -input[11];
+ bf1[15] = input[20];
+ bf1[16] = -input[1];
+ bf1[17] = input[30];
+ bf1[18] = input[14];
+ bf1[19] = -input[17];
+ bf1[20] = input[6];
+ bf1[21] = -input[25];
+ bf1[22] = -input[9];
+ bf1[23] = input[22];
+ bf1[24] = input[2];
+ bf1[25] = -input[29];
+ bf1[26] = -input[13];
+ bf1[27] = input[18];
+ bf1[28] = -input[5];
+ bf1[29] = input[26];
+ bf1[30] = input[10];
+ bf1[31] = -input[21];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]);
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[32], bf0[18], -cospi[32], bf0[19], cos_bit[stage]);
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[32], bf0[22], -cospi[32], bf0[23], cos_bit[stage]);
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[26], -cospi[32], bf0[27], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[32], bf0[30], -cospi[32], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = bf0[0] - bf0[2];
+ bf1[3] = bf0[1] - bf0[3];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = bf0[4] - bf0[6];
+ bf1[7] = bf0[5] - bf0[7];
+ bf1[8] = bf0[8] + bf0[10];
+ bf1[9] = bf0[9] + bf0[11];
+ bf1[10] = bf0[8] - bf0[10];
+ bf1[11] = bf0[9] - bf0[11];
+ bf1[12] = bf0[12] + bf0[14];
+ bf1[13] = bf0[13] + bf0[15];
+ bf1[14] = bf0[12] - bf0[14];
+ bf1[15] = bf0[13] - bf0[15];
+ bf1[16] = bf0[16] + bf0[18];
+ bf1[17] = bf0[17] + bf0[19];
+ bf1[18] = bf0[16] - bf0[18];
+ bf1[19] = bf0[17] - bf0[19];
+ bf1[20] = bf0[20] + bf0[22];
+ bf1[21] = bf0[21] + bf0[23];
+ bf1[22] = bf0[20] - bf0[22];
+ bf1[23] = bf0[21] - bf0[23];
+ bf1[24] = bf0[24] + bf0[26];
+ bf1[25] = bf0[25] + bf0[27];
+ bf1[26] = bf0[24] - bf0[26];
+ bf1[27] = bf0[25] - bf0[27];
+ bf1[28] = bf0[28] + bf0[30];
+ bf1[29] = bf0[29] + bf0[31];
+ bf1[30] = bf0[28] - bf0[30];
+ bf1[31] = bf0[29] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[48], bf0[20], -cospi[16], bf0[21], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[16], bf0[22], cospi[48], bf0[23], cos_bit[stage]);
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = bf0[26];
+ bf1[27] = bf0[27];
+ bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[48], bf0[28], -cospi[16], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[16], bf0[30], cospi[48], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = bf0[0] - bf0[4];
+ bf1[5] = bf0[1] - bf0[5];
+ bf1[6] = bf0[2] - bf0[6];
+ bf1[7] = bf0[3] - bf0[7];
+ bf1[8] = bf0[8] + bf0[12];
+ bf1[9] = bf0[9] + bf0[13];
+ bf1[10] = bf0[10] + bf0[14];
+ bf1[11] = bf0[11] + bf0[15];
+ bf1[12] = bf0[8] - bf0[12];
+ bf1[13] = bf0[9] - bf0[13];
+ bf1[14] = bf0[10] - bf0[14];
+ bf1[15] = bf0[11] - bf0[15];
+ bf1[16] = bf0[16] + bf0[20];
+ bf1[17] = bf0[17] + bf0[21];
+ bf1[18] = bf0[18] + bf0[22];
+ bf1[19] = bf0[19] + bf0[23];
+ bf1[20] = bf0[16] - bf0[20];
+ bf1[21] = bf0[17] - bf0[21];
+ bf1[22] = bf0[18] - bf0[22];
+ bf1[23] = bf0[19] - bf0[23];
+ bf1[24] = bf0[24] + bf0[28];
+ bf1[25] = bf0[25] + bf0[29];
+ bf1[26] = bf0[26] + bf0[30];
+ bf1[27] = bf0[27] + bf0[31];
+ bf1[28] = bf0[24] - bf0[28];
+ bf1[29] = bf0[25] - bf0[29];
+ bf1[30] = bf0[26] - bf0[30];
+ bf1[31] = bf0[27] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]);
+ bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[56], bf0[24], -cospi[8], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[24], bf0[26], -cospi[40], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[8], bf0[28], cospi[56], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[40], bf0[30], cospi[24], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[8];
+ bf1[1] = bf0[1] + bf0[9];
+ bf1[2] = bf0[2] + bf0[10];
+ bf1[3] = bf0[3] + bf0[11];
+ bf1[4] = bf0[4] + bf0[12];
+ bf1[5] = bf0[5] + bf0[13];
+ bf1[6] = bf0[6] + bf0[14];
+ bf1[7] = bf0[7] + bf0[15];
+ bf1[8] = bf0[0] - bf0[8];
+ bf1[9] = bf0[1] - bf0[9];
+ bf1[10] = bf0[2] - bf0[10];
+ bf1[11] = bf0[3] - bf0[11];
+ bf1[12] = bf0[4] - bf0[12];
+ bf1[13] = bf0[5] - bf0[13];
+ bf1[14] = bf0[6] - bf0[14];
+ bf1[15] = bf0[7] - bf0[15];
+ bf1[16] = bf0[16] + bf0[24];
+ bf1[17] = bf0[17] + bf0[25];
+ bf1[18] = bf0[18] + bf0[26];
+ bf1[19] = bf0[19] + bf0[27];
+ bf1[20] = bf0[20] + bf0[28];
+ bf1[21] = bf0[21] + bf0[29];
+ bf1[22] = bf0[22] + bf0[30];
+ bf1[23] = bf0[23] + bf0[31];
+ bf1[24] = bf0[16] - bf0[24];
+ bf1[25] = bf0[17] - bf0[25];
+ bf1[26] = bf0[18] - bf0[26];
+ bf1[27] = bf0[19] - bf0[27];
+ bf1[28] = bf0[20] - bf0[28];
+ bf1[29] = bf0[21] - bf0[29];
+ bf1[30] = bf0[22] - bf0[30];
+ bf1[31] = bf0[23] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[60], bf0[16], -cospi[4], bf0[17], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[44], bf0[18], -cospi[20], bf0[19], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[28], bf0[20], -cospi[36], bf0[21], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[12], bf0[22], -cospi[52], bf0[23], cos_bit[stage]);
+ bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[4], bf0[24], cospi[60], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[20], bf0[26], cospi[44], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[36], bf0[28], cospi[28], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[52], bf0[30], cospi[12], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[16];
+ bf1[1] = bf0[1] + bf0[17];
+ bf1[2] = bf0[2] + bf0[18];
+ bf1[3] = bf0[3] + bf0[19];
+ bf1[4] = bf0[4] + bf0[20];
+ bf1[5] = bf0[5] + bf0[21];
+ bf1[6] = bf0[6] + bf0[22];
+ bf1[7] = bf0[7] + bf0[23];
+ bf1[8] = bf0[8] + bf0[24];
+ bf1[9] = bf0[9] + bf0[25];
+ bf1[10] = bf0[10] + bf0[26];
+ bf1[11] = bf0[11] + bf0[27];
+ bf1[12] = bf0[12] + bf0[28];
+ bf1[13] = bf0[13] + bf0[29];
+ bf1[14] = bf0[14] + bf0[30];
+ bf1[15] = bf0[15] + bf0[31];
+ bf1[16] = bf0[0] - bf0[16];
+ bf1[17] = bf0[1] - bf0[17];
+ bf1[18] = bf0[2] - bf0[18];
+ bf1[19] = bf0[3] - bf0[19];
+ bf1[20] = bf0[4] - bf0[20];
+ bf1[21] = bf0[5] - bf0[21];
+ bf1[22] = bf0[6] - bf0[22];
+ bf1[23] = bf0[7] - bf0[23];
+ bf1[24] = bf0[8] - bf0[24];
+ bf1[25] = bf0[9] - bf0[25];
+ bf1[26] = bf0[10] - bf0[26];
+ bf1[27] = bf0[11] - bf0[27];
+ bf1[28] = bf0[12] - bf0[28];
+ bf1[29] = bf0[13] - bf0[29];
+ bf1[30] = bf0[14] - bf0[30];
+ bf1[31] = bf0[15] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 10
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[63], bf0[0], -cospi[1], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[59], bf0[2], -cospi[5], bf0[3], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[55], bf0[4], -cospi[9], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[51], bf0[6], -cospi[13], bf0[7], cos_bit[stage]);
+ bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[47], bf0[8], -cospi[17], bf0[9], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[43], bf0[10], -cospi[21], bf0[11], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[39], bf0[12], -cospi[25], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[35], bf0[14], -cospi[29], bf0[15], cos_bit[stage]);
+ bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[31], bf0[16], -cospi[33], bf0[17], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[27], bf0[18], -cospi[37], bf0[19], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[23], bf0[20], -cospi[41], bf0[21], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[19], bf0[22], -cospi[45], bf0[23], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[15], bf0[24], -cospi[49], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[11], bf0[26], -cospi[53], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[7], bf0[28], -cospi[57], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[3], bf0[30], -cospi[61], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 11
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[1];
+ bf1[1] = bf0[30];
+ bf1[2] = bf0[3];
+ bf1[3] = bf0[28];
+ bf1[4] = bf0[5];
+ bf1[5] = bf0[26];
+ bf1[6] = bf0[7];
+ bf1[7] = bf0[24];
+ bf1[8] = bf0[9];
+ bf1[9] = bf0[22];
+ bf1[10] = bf0[11];
+ bf1[11] = bf0[20];
+ bf1[12] = bf0[13];
+ bf1[13] = bf0[18];
+ bf1[14] = bf0[15];
+ bf1[15] = bf0[16];
+ bf1[16] = bf0[17];
+ bf1[17] = bf0[14];
+ bf1[18] = bf0[19];
+ bf1[19] = bf0[12];
+ bf1[20] = bf0[21];
+ bf1[21] = bf0[10];
+ bf1[22] = bf0[23];
+ bf1[23] = bf0[8];
+ bf1[24] = bf0[25];
+ bf1[25] = bf0[6];
+ bf1[26] = bf0[27];
+ bf1[27] = bf0[4];
+ bf1[28] = bf0[29];
+ bf1[29] = bf0[2];
+ bf1[30] = bf0[31];
+ bf1[31] = bf0[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
diff --git a/vp10/common/vp10_inv_txfm1d.h b/vp10/common/vp10_inv_txfm1d.h
new file mode 100644
index 0000000..0609b65
--- /dev/null
+++ b/vp10/common/vp10_inv_txfm1d.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_INV_TXFM1D_H_
+#define VP10_INV_TXFM1D_H_
+
+#include "vp10/common/vp10_txfm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_idct4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_iadst4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // VP10_INV_TXFM1D_H_
diff --git a/vp10/common/vp10_inv_txfm2d_cfg.h b/vp10/common/vp10_inv_txfm2d_cfg.h
new file mode 100644
index 0000000..8cd76b5
--- /dev/null
+++ b/vp10/common/vp10_inv_txfm2d_cfg.h
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_INV_TXFM2D_CFG_H_
+#define VP10_INV_TXFM2D_CFG_H_
+#include "vp10/common/vp10_inv_txfm1d.h"
+
+// ---------------- config inv_dct_dct_4 ----------------
+static const int8_t inv_shift_dct_dct_4[2] = {1, -5};
+static const int8_t inv_stage_range_col_dct_dct_4[4] = {17, 17, 16, 16};
+static const int8_t inv_stage_range_row_dct_dct_4[4] = {16, 16, 16, 16};
+static const int8_t inv_cos_bit_col_dct_dct_4[4] = {15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_dct_dct_4[4] = {15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_4 = {
+ .txfm_size = 4,
+ .stage_num_col = 4,
+ .stage_num_row = 4,
+
+ .shift = inv_shift_dct_dct_4,
+ .stage_range_col = inv_stage_range_col_dct_dct_4,
+ .stage_range_row = inv_stage_range_row_dct_dct_4,
+ .cos_bit_col = inv_cos_bit_col_dct_dct_4,
+ .cos_bit_row = inv_cos_bit_row_dct_dct_4,
+ .txfm_func_col = vp10_idct4_new,
+ .txfm_func_row = vp10_idct4_new};
+
+// ---------------- config inv_dct_dct_8 ----------------
+static const int8_t inv_shift_dct_dct_8[2] = {0, -5};
+static const int8_t inv_stage_range_col_dct_dct_8[6] = {17, 17, 17, 17, 16, 16};
+static const int8_t inv_stage_range_row_dct_dct_8[6] = {17, 17, 17, 17, 17, 17};
+static const int8_t inv_cos_bit_col_dct_dct_8[6] = {15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_dct_dct_8[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_8 = {
+ .txfm_size = 8,
+ .stage_num_col = 6,
+ .stage_num_row = 6,
+
+ .shift = inv_shift_dct_dct_8,
+ .stage_range_col = inv_stage_range_col_dct_dct_8,
+ .stage_range_row = inv_stage_range_row_dct_dct_8,
+ .cos_bit_col = inv_cos_bit_col_dct_dct_8,
+ .cos_bit_row = inv_cos_bit_row_dct_dct_8,
+ .txfm_func_col = vp10_idct8_new,
+ .txfm_func_row = vp10_idct8_new};
+
+// ---------------- config inv_dct_dct_16 ----------------
+static const int8_t inv_shift_dct_dct_16[2] = {0, -6};
+static const int8_t inv_stage_range_col_dct_dct_16[8] = {18, 18, 18, 18,
+ 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_dct_dct_16[8] = {18, 18, 18, 18,
+ 18, 18, 18, 18};
+static const int8_t inv_cos_bit_col_dct_dct_16[8] = {14, 14, 14, 14,
+ 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_dct_dct_16[8] = {14, 14, 14, 14,
+ 14, 14, 14, 14};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_16 = {
+ .txfm_size = 16,
+ .stage_num_col = 8,
+ .stage_num_row = 8,
+
+ .shift = inv_shift_dct_dct_16,
+ .stage_range_col = inv_stage_range_col_dct_dct_16,
+ .stage_range_row = inv_stage_range_row_dct_dct_16,
+ .cos_bit_col = inv_cos_bit_col_dct_dct_16,
+ .cos_bit_row = inv_cos_bit_row_dct_dct_16,
+ .txfm_func_col = vp10_idct16_new,
+ .txfm_func_row = vp10_idct16_new};
+
+// ---------------- config inv_dct_dct_32 ----------------
+static const int8_t inv_shift_dct_dct_32[2] = {-1, -6};
+static const int8_t inv_stage_range_col_dct_dct_32[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_dct_dct_32[10] = {19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19};
+static const int8_t inv_cos_bit_col_dct_dct_32[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_dct_dct_32[10] = {13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_32 = {
+ .txfm_size = 32,
+ .stage_num_col = 10,
+ .stage_num_row = 10,
+
+ .shift = inv_shift_dct_dct_32,
+ .stage_range_col = inv_stage_range_col_dct_dct_32,
+ .stage_range_row = inv_stage_range_row_dct_dct_32,
+ .cos_bit_col = inv_cos_bit_col_dct_dct_32,
+ .cos_bit_row = inv_cos_bit_row_dct_dct_32,
+ .txfm_func_col = vp10_idct32_new,
+ .txfm_func_row = vp10_idct32_new};
+
+// ---------------- config inv_dct_adst_4 ----------------
+static const int8_t inv_shift_dct_adst_4[2] = {1, -5};
+static const int8_t inv_stage_range_col_dct_adst_4[4] = {17, 17, 16, 16};
+static const int8_t inv_stage_range_row_dct_adst_4[6] = {16, 16, 16,
+ 16, 16, 16};
+static const int8_t inv_cos_bit_col_dct_adst_4[4] = {15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_dct_adst_4[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_4 = {
+ .txfm_size = 4,
+ .stage_num_col = 4,
+ .stage_num_row = 6,
+
+ .shift = inv_shift_dct_adst_4,
+ .stage_range_col = inv_stage_range_col_dct_adst_4,
+ .stage_range_row = inv_stage_range_row_dct_adst_4,
+ .cos_bit_col = inv_cos_bit_col_dct_adst_4,
+ .cos_bit_row = inv_cos_bit_row_dct_adst_4,
+ .txfm_func_col = vp10_idct4_new,
+ .txfm_func_row = vp10_iadst4_new};
+
+// ---------------- config inv_dct_adst_8 ----------------
+static const int8_t inv_shift_dct_adst_8[2] = {-1, -4};
+static const int8_t inv_stage_range_col_dct_adst_8[6] = {16, 16, 16,
+ 16, 15, 15};
+static const int8_t inv_stage_range_row_dct_adst_8[8] = {17, 17, 17, 17,
+ 17, 17, 17, 17};
+static const int8_t inv_cos_bit_col_dct_adst_8[6] = {15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_dct_adst_8[8] = {15, 15, 15, 15,
+ 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_8 = {
+ .txfm_size = 8,
+ .stage_num_col = 6,
+ .stage_num_row = 8,
+
+ .shift = inv_shift_dct_adst_8,
+ .stage_range_col = inv_stage_range_col_dct_adst_8,
+ .stage_range_row = inv_stage_range_row_dct_adst_8,
+ .cos_bit_col = inv_cos_bit_col_dct_adst_8,
+ .cos_bit_row = inv_cos_bit_row_dct_adst_8,
+ .txfm_func_col = vp10_idct8_new,
+ .txfm_func_row = vp10_iadst8_new};
+
+// ---------------- config inv_dct_adst_16 ----------------
+static const int8_t inv_shift_dct_adst_16[2] = {1, -7};
+static const int8_t inv_stage_range_col_dct_adst_16[8] = {19, 19, 19, 19,
+ 19, 19, 18, 18};
+static const int8_t inv_stage_range_row_dct_adst_16[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18};
+static const int8_t inv_cos_bit_col_dct_adst_16[8] = {13, 13, 13, 13,
+ 13, 13, 13, 14};
+static const int8_t inv_cos_bit_row_dct_adst_16[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_16 = {
+ .txfm_size = 16,
+ .stage_num_col = 8,
+ .stage_num_row = 10,
+
+ .shift = inv_shift_dct_adst_16,
+ .stage_range_col = inv_stage_range_col_dct_adst_16,
+ .stage_range_row = inv_stage_range_row_dct_adst_16,
+ .cos_bit_col = inv_cos_bit_col_dct_adst_16,
+ .cos_bit_row = inv_cos_bit_row_dct_adst_16,
+ .txfm_func_col = vp10_idct16_new,
+ .txfm_func_row = vp10_iadst16_new};
+
+// ---------------- config inv_dct_adst_32 ----------------
+static const int8_t inv_shift_dct_adst_32[2] = {-1, -6};
+static const int8_t inv_stage_range_col_dct_adst_32[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_dct_adst_32[12] = {
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19};
+static const int8_t inv_cos_bit_col_dct_adst_32[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_dct_adst_32[12] = {13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_32 = {
+ .txfm_size = 32,
+ .stage_num_col = 10,
+ .stage_num_row = 12,
+
+ .shift = inv_shift_dct_adst_32,
+ .stage_range_col = inv_stage_range_col_dct_adst_32,
+ .stage_range_row = inv_stage_range_row_dct_adst_32,
+ .cos_bit_col = inv_cos_bit_col_dct_adst_32,
+ .cos_bit_row = inv_cos_bit_row_dct_adst_32,
+ .txfm_func_col = vp10_idct32_new,
+ .txfm_func_row = vp10_iadst32_new};
+
+// ---------------- config inv_adst_adst_4 ----------------
+static const int8_t inv_shift_adst_adst_4[2] = {0, -4};
+static const int8_t inv_stage_range_col_adst_adst_4[6] = {16, 16, 16,
+ 16, 15, 15};
+static const int8_t inv_stage_range_row_adst_adst_4[6] = {16, 16, 16,
+ 16, 16, 16};
+static const int8_t inv_cos_bit_col_adst_adst_4[6] = {15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_adst_4[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_4 = {
+ .txfm_size = 4,
+ .stage_num_col = 6,
+ .stage_num_row = 6,
+
+ .shift = inv_shift_adst_adst_4,
+ .stage_range_col = inv_stage_range_col_adst_adst_4,
+ .stage_range_row = inv_stage_range_row_adst_adst_4,
+ .cos_bit_col = inv_cos_bit_col_adst_adst_4,
+ .cos_bit_row = inv_cos_bit_row_adst_adst_4,
+ .txfm_func_col = vp10_iadst4_new,
+ .txfm_func_row = vp10_iadst4_new};
+
+// ---------------- config inv_adst_adst_8 ----------------
+static const int8_t inv_shift_adst_adst_8[2] = {-1, -4};
+static const int8_t inv_stage_range_col_adst_adst_8[8] = {16, 16, 16, 16,
+ 16, 16, 15, 15};
+static const int8_t inv_stage_range_row_adst_adst_8[8] = {17, 17, 17, 17,
+ 17, 17, 17, 17};
+static const int8_t inv_cos_bit_col_adst_adst_8[8] = {15, 15, 15, 15,
+ 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_adst_8[8] = {15, 15, 15, 15,
+ 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_8 = {
+ .txfm_size = 8,
+ .stage_num_col = 8,
+ .stage_num_row = 8,
+
+ .shift = inv_shift_adst_adst_8,
+ .stage_range_col = inv_stage_range_col_adst_adst_8,
+ .stage_range_row = inv_stage_range_row_adst_adst_8,
+ .cos_bit_col = inv_cos_bit_col_adst_adst_8,
+ .cos_bit_row = inv_cos_bit_row_adst_adst_8,
+ .txfm_func_col = vp10_iadst8_new,
+ .txfm_func_row = vp10_iadst8_new};
+
+// ---------------- config inv_adst_adst_16 ----------------
+static const int8_t inv_shift_adst_adst_16[2] = {0, -6};
+static const int8_t inv_stage_range_col_adst_adst_16[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_adst_adst_16[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18};
+static const int8_t inv_cos_bit_col_adst_adst_16[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_adst_adst_16[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_16 = {
+ .txfm_size = 16,
+ .stage_num_col = 10,
+ .stage_num_row = 10,
+
+ .shift = inv_shift_adst_adst_16,
+ .stage_range_col = inv_stage_range_col_adst_adst_16,
+ .stage_range_row = inv_stage_range_row_adst_adst_16,
+ .cos_bit_col = inv_cos_bit_col_adst_adst_16,
+ .cos_bit_row = inv_cos_bit_row_adst_adst_16,
+ .txfm_func_col = vp10_iadst16_new,
+ .txfm_func_row = vp10_iadst16_new};
+
+// ---------------- config inv_adst_adst_32 ----------------
+static const int8_t inv_shift_adst_adst_32[2] = {-1, -6};
+static const int8_t inv_stage_range_col_adst_adst_32[12] = {
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_adst_adst_32[12] = {
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19};
+static const int8_t inv_cos_bit_col_adst_adst_32[12] = {14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_adst_adst_32[12] = {13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_32 = {
+ .txfm_size = 32,
+ .stage_num_col = 12,
+ .stage_num_row = 12,
+
+ .shift = inv_shift_adst_adst_32,
+ .stage_range_col = inv_stage_range_col_adst_adst_32,
+ .stage_range_row = inv_stage_range_row_adst_adst_32,
+ .cos_bit_col = inv_cos_bit_col_adst_adst_32,
+ .cos_bit_row = inv_cos_bit_row_adst_adst_32,
+ .txfm_func_col = vp10_iadst32_new,
+ .txfm_func_row = vp10_iadst32_new};
+
+// ---------------- config inv_adst_dct_4 ----------------
+static const int8_t inv_shift_adst_dct_4[2] = {1, -5};
+static const int8_t inv_stage_range_col_adst_dct_4[6] = {17, 17, 17, 17, 16, 16};
+static const int8_t inv_stage_range_row_adst_dct_4[4] = {16, 16, 16, 16};
+static const int8_t inv_cos_bit_col_adst_dct_4[6] = {15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_dct_4[4] = {15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_4 = {
+ .txfm_size = 4,
+ .stage_num_col = 6,
+ .stage_num_row = 4,
+
+ .shift = inv_shift_adst_dct_4,
+ .stage_range_col = inv_stage_range_col_adst_dct_4,
+ .stage_range_row = inv_stage_range_row_adst_dct_4,
+ .cos_bit_col = inv_cos_bit_col_adst_dct_4,
+ .cos_bit_row = inv_cos_bit_row_adst_dct_4,
+ .txfm_func_col = vp10_iadst4_new,
+ .txfm_func_row = vp10_idct4_new};
+
+// ---------------- config inv_adst_dct_8 ----------------
+static const int8_t inv_shift_adst_dct_8[2] = {-1, -4};
+static const int8_t inv_stage_range_col_adst_dct_8[8] = {16, 16, 16, 16,
+ 16, 16, 15, 15};
+static const int8_t inv_stage_range_row_adst_dct_8[6] = {17, 17, 17, 17, 17, 17};
+static const int8_t inv_cos_bit_col_adst_dct_8[8] = {15, 15, 15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_dct_8[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_8 = {
+ .txfm_size = 8,
+ .stage_num_col = 8,
+ .stage_num_row = 6,
+
+ .shift = inv_shift_adst_dct_8,
+ .stage_range_col = inv_stage_range_col_adst_dct_8,
+ .stage_range_row = inv_stage_range_row_adst_dct_8,
+ .cos_bit_col = inv_cos_bit_col_adst_dct_8,
+ .cos_bit_row = inv_cos_bit_row_adst_dct_8,
+ .txfm_func_col = vp10_iadst8_new,
+ .txfm_func_row = vp10_idct8_new};
+
+// ---------------- config inv_adst_dct_16 ----------------
+static const int8_t inv_shift_adst_dct_16[2] = {-1, -5};
+static const int8_t inv_stage_range_col_adst_dct_16[10] = {17, 17, 17, 17, 17,
+ 17, 17, 17, 16, 16};
+static const int8_t inv_stage_range_row_adst_dct_16[8] = {18, 18, 18, 18,
+ 18, 18, 18, 18};
+static const int8_t inv_cos_bit_col_adst_dct_16[10] = {15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_dct_16[8] = {14, 14, 14, 14, 14, 14, 14, 14};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_16 = {
+ .txfm_size = 16,
+ .stage_num_col = 10,
+ .stage_num_row = 8,
+
+ .shift = inv_shift_adst_dct_16,
+ .stage_range_col = inv_stage_range_col_adst_dct_16,
+ .stage_range_row = inv_stage_range_row_adst_dct_16,
+ .cos_bit_col = inv_cos_bit_col_adst_dct_16,
+ .cos_bit_row = inv_cos_bit_row_adst_dct_16,
+ .txfm_func_col = vp10_iadst16_new,
+ .txfm_func_row = vp10_idct16_new};
+
+// ---------------- config inv_adst_dct_32 ----------------
+static const int8_t inv_shift_adst_dct_32[2] = {-1, -6};
+static const int8_t inv_stage_range_col_adst_dct_32[12] = {18, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_adst_dct_32[10] = {19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19};
+static const int8_t inv_cos_bit_col_adst_dct_32[12] = {14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_adst_dct_32[10] = {13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_32 = {
+ .txfm_size = 32,
+ .stage_num_col = 12,
+ .stage_num_row = 10,
+
+ .shift = inv_shift_adst_dct_32,
+ .stage_range_col = inv_stage_range_col_adst_dct_32,
+ .stage_range_row = inv_stage_range_row_adst_dct_32,
+ .cos_bit_col = inv_cos_bit_col_adst_dct_32,
+ .cos_bit_row = inv_cos_bit_row_adst_dct_32,
+ .txfm_func_col = vp10_iadst32_new,
+ .txfm_func_row = vp10_idct32_new};
+
+#endif // VP10_INV_TXFM2D_CFG_H_
diff --git a/vp10/common/vp10_txfm.h b/vp10/common/vp10_txfm.h
new file mode 100644
index 0000000..427bccb
--- /dev/null
+++ b/vp10/common/vp10_txfm.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_TXFM_H_
+#define VP10_TXFM_H_
+
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+static const int cos_bit_min = 10;
+static const int cos_bit_max = 16;
+
+// cospi_arr[i][j] = (int)round(cos(M_PI*j/128) * (1<<(cos_bit_min+i)));
+static const int32_t cospi_arr[7][64] =
+ {{ 1024, 1024, 1023, 1021, 1019, 1016, 1013, 1009,
+ 1004, 999, 993, 987, 980, 972, 964, 955,
+ 946, 936, 926, 915, 903, 891, 878, 865,
+ 851, 837, 822, 807, 792, 775, 759, 742,
+ 724, 706, 688, 669, 650, 630, 610, 590,
+ 569, 548, 526, 505, 483, 460, 438, 415,
+ 392, 369, 345, 321, 297, 273, 249, 224,
+ 200, 175, 150, 125, 100, 75, 50, 25},
+ { 2048, 2047, 2046, 2042, 2038, 2033, 2026, 2018,
+ 2009, 1998, 1987, 1974, 1960, 1945, 1928, 1911,
+ 1892, 1872, 1851, 1829, 1806, 1782, 1757, 1730,
+ 1703, 1674, 1645, 1615, 1583, 1551, 1517, 1483,
+ 1448, 1412, 1375, 1338, 1299, 1260, 1220, 1179,
+ 1138, 1096, 1053, 1009, 965, 921, 876, 830,
+ 784, 737, 690, 642, 595, 546, 498, 449,
+ 400, 350, 301, 251, 201, 151, 100, 50},
+ { 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036,
+ 4017, 3996, 3973, 3948, 3920, 3889, 3857, 3822,
+ 3784, 3745, 3703, 3659, 3612, 3564, 3513, 3461,
+ 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967,
+ 2896, 2824, 2751, 2675, 2598, 2520, 2440, 2359,
+ 2276, 2191, 2106, 2019, 1931, 1842, 1751, 1660,
+ 1567, 1474, 1380, 1285, 1189, 1092, 995, 897,
+ 799, 700, 601, 501, 401, 301, 201, 101},
+ { 8192, 8190, 8182, 8170, 8153, 8130, 8103, 8071,
+ 8035, 7993, 7946, 7895, 7839, 7779, 7713, 7643,
+ 7568, 7489, 7405, 7317, 7225, 7128, 7027, 6921,
+ 6811, 6698, 6580, 6458, 6333, 6203, 6070, 5933,
+ 5793, 5649, 5501, 5351, 5197, 5040, 4880, 4717,
+ 4551, 4383, 4212, 4038, 3862, 3683, 3503, 3320,
+ 3135, 2948, 2760, 2570, 2378, 2185, 1990, 1795,
+ 1598, 1401, 1202, 1003, 803, 603, 402, 201},
+ { 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143,
+ 16069, 15986, 15893, 15791, 15679, 15557, 15426, 15286,
+ 15137, 14978, 14811, 14635, 14449, 14256, 14053, 13842,
+ 13623, 13395, 13160, 12916, 12665, 12406, 12140, 11866,
+ 11585, 11297, 11003, 10702, 10394, 10080, 9760, 9434,
+ 9102, 8765, 8423, 8076, 7723, 7366, 7005, 6639,
+ 6270, 5897, 5520, 5139, 4756, 4370, 3981, 3590,
+ 3196, 2801, 2404, 2006, 1606, 1205, 804, 402},
+ { 32768, 32758, 32729, 32679, 32610, 32522, 32413, 32286,
+ 32138, 31972, 31786, 31581, 31357, 31114, 30853, 30572,
+ 30274, 29957, 29622, 29269, 28899, 28511, 28106, 27684,
+ 27246, 26791, 26320, 25833, 25330, 24812, 24279, 23732,
+ 23170, 22595, 22006, 21403, 20788, 20160, 19520, 18868,
+ 18205, 17531, 16846, 16151, 15447, 14733, 14010, 13279,
+ 12540, 11793, 11039, 10279, 9512, 8740, 7962, 7180,
+ 6393, 5602, 4808, 4011, 3212, 2411, 1608, 804},
+ { 65536, 65516, 65457, 65358, 65220, 65043, 64827, 64571,
+ 64277, 63944, 63572, 63162, 62714, 62228, 61705, 61145,
+ 60547, 59914, 59244, 58538, 57798, 57022, 56212, 55368,
+ 54491, 53581, 52639, 51665, 50660, 49624, 48559, 47464,
+ 46341, 45190, 44011, 42806, 41576, 40320, 39040, 37736,
+ 36410, 35062, 33692, 32303, 30893, 29466, 28020, 26558,
+ 25080, 23586, 22078, 20557, 19024, 17479, 15924, 14359,
+ 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608}};
+
+static INLINE int32_t round_shift(int32_t value, int bit) {
+ // For value >= 0,
+ // there are twe version of rounding
+ // 1) (value + (1 << (bit - 1)) - 1) >> bit
+ // 2) (value + (1 << (bit - 1))) >> bit
+ // boath methods are mild unbiased
+ // however, the first version has slightly advantage because
+ // it rounds number toward zero.
+ // For value < 0, we also choose the version that rounds number
+ // toward zero.
+ if (bit > 0) {
+ if (value >= 0)
+ return (value + (1 << (bit - 1)) - 1) >> bit;
+ else
+ return ((value - (1 << (bit - 1))) >> bit) + 1;
+ } else {
+ return value << (-bit);
+ }
+}
+
+static INLINE void round_shift_array(int32_t *arr, int size, int bit) {
+ int i;
+ if (bit == 0) {
+ return;
+ } else {
+ for (i = 0; i < size; i++) {
+ arr[i] = round_shift(arr[i], bit);
+ }
+ }
+}
+
+static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1,
+ int bit) {
+ int32_t result_32 = w0 * in0 + w1 * in1;
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+ int64_t result_64 = (int64_t)w0 * (int64_t)in0 + (int64_t)w1 * (int64_t)in1;
+ if (result_32 != result_64) {
+ printf(
+ "%s overflow result_32: %d result_64: %ld w0: %d in0: %d w1: %d in1: "
+ "%d\n",
+ __func__, result_32, result_64, w0, in0, w1, in1);
+ assert(0 && "half_btf overflow");
+ }
+#endif
+ return round_shift(result_32, bit);
+}
+
+static INLINE int get_max_bit(int x) {
+ int max_bit = -1;
+ while (x) {
+ x = x >> 1;
+ max_bit++;
+ }
+ return max_bit;
+}
+
+// TODO(angiebird): implement SSE
+static INLINE void clamp_block(int16_t *block, int block_size, int stride,
+ int low, int high) {
+ int i, j;
+ for (i = 0; i < block_size; ++i) {
+ for (j = 0; j < block_size; ++j) {
+ block[i * stride + j] = clamp(block[i * stride + j], low, high);
+ }
+ }
+}
+
+typedef void (*TxfmFunc)(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+typedef struct TXFM_2D_CFG {
+ int txfm_size;
+ int stage_num_col;
+ int stage_num_row;
+
+ int8_t *shift;
+ int8_t *stage_range_col;
+ int8_t *stage_range_row;
+ int8_t *cos_bit_col;
+ int8_t *cos_bit_row;
+ TxfmFunc txfm_func_col;
+ TxfmFunc txfm_func_row;
+} TXFM_2D_CFG;
+
+#endif // VP10_TXFM_H_
diff --git a/vp10/common/x86/idct_intrin_sse2.c b/vp10/common/x86/idct_intrin_sse2.c
index a2c674b..900f091 100644
--- a/vp10/common/x86/idct_intrin_sse2.c
+++ b/vp10/common/x86/idct_intrin_sse2.c
@@ -11,6 +11,54 @@
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
+#include "vp10/common/enums.h"
+
+#if CONFIG_EXT_TX
+// Reverse the 8 16 bit words in __m128i
+static INLINE __m128i mm_reverse_epi16(const __m128i x) {
+ const __m128i a = _mm_shufflelo_epi16(x, 0x1b);
+ const __m128i b = _mm_shufflehi_epi16(a, 0x1b);
+ return _mm_shuffle_epi32(b, 0x4e);
+}
+
+static INLINE void fliplr_4x4(__m128i in[2]) {
+ in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
+ in[0] = _mm_shufflehi_epi16(in[0], 0x1b);
+ in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
+ in[1] = _mm_shufflehi_epi16(in[1], 0x1b);
+}
+
+static INLINE void fliplr_8x8(__m128i in[8]) {
+ in[0] = mm_reverse_epi16(in[0]);
+ in[1] = mm_reverse_epi16(in[1]);
+ in[2] = mm_reverse_epi16(in[2]);
+ in[3] = mm_reverse_epi16(in[3]);
+
+ in[4] = mm_reverse_epi16(in[4]);
+ in[5] = mm_reverse_epi16(in[5]);
+ in[6] = mm_reverse_epi16(in[6]);
+ in[7] = mm_reverse_epi16(in[7]);
+}
+
+static INLINE void fliplr_16x8(__m128i in[16]) {
+ fliplr_8x8(&in[0]);
+ fliplr_8x8(&in[8]);
+}
+
+#define FLIPLR_16x16(in0, in1) do { \
+ __m128i *tmp; \
+ fliplr_16x8(in0); \
+ fliplr_16x8(in1); \
+ tmp = (in0); \
+ (in0) = (in1); \
+ (in1) = tmp; \
+} while (0)
+
+#define FLIPUD_PTR(dest, stride, size) do { \
+ (dest) = (dest) + ((size) - 1) * (stride); \
+ (stride) = - (stride); \
+} while (0)
+#endif
void vp10_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
@@ -22,22 +70,50 @@
in[1] = load_input_data(input + 8);
switch (tx_type) {
- case 0: // DCT_DCT
+ case DCT_DCT:
idct4_sse2(in);
idct4_sse2(in);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
idct4_sse2(in);
iadst4_sse2(in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
iadst4_sse2(in);
idct4_sse2(in);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
iadst4_sse2(in);
iadst4_sse2(in);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ idct4_sse2(in);
+ iadst4_sse2(in);
+ FLIPUD_PTR(dest, stride, 4);
+ break;
+ case DCT_FLIPADST:
+ iadst4_sse2(in);
+ idct4_sse2(in);
+ fliplr_4x4(in);
+ break;
+ case FLIPADST_FLIPADST:
+ iadst4_sse2(in);
+ iadst4_sse2(in);
+ FLIPUD_PTR(dest, stride, 4);
+ fliplr_4x4(in);
+ break;
+ case ADST_FLIPADST:
+ iadst4_sse2(in);
+ iadst4_sse2(in);
+ fliplr_4x4(in);
+ break;
+ case FLIPADST_ADST:
+ iadst4_sse2(in);
+ iadst4_sse2(in);
+ FLIPUD_PTR(dest, stride, 4);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -52,12 +128,12 @@
// Reconstruction and Store
{
- __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest));
+ __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 0));
+ __m128i d1 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 1));
__m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
- d0 = _mm_unpacklo_epi32(d0,
- _mm_cvtsi32_si128(*(const int *)(dest + stride)));
- d2 = _mm_unpacklo_epi32(
- d2, _mm_cvtsi32_si128(*(const int *)(dest + stride * 3)));
+ __m128i d3 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 3));
+ d0 = _mm_unpacklo_epi32(d0, d1);
+ d2 = _mm_unpacklo_epi32(d2, d3);
d0 = _mm_unpacklo_epi8(d0, zero);
d2 = _mm_unpacklo_epi8(d2, zero);
d0 = _mm_add_epi16(d0, in[0]);
@@ -94,22 +170,50 @@
in[7] = load_input_data(input + 8 * 7);
switch (tx_type) {
- case 0: // DCT_DCT
+ case DCT_DCT:
idct8_sse2(in);
idct8_sse2(in);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
idct8_sse2(in);
iadst8_sse2(in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
iadst8_sse2(in);
idct8_sse2(in);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
iadst8_sse2(in);
iadst8_sse2(in);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ idct8_sse2(in);
+ iadst8_sse2(in);
+ FLIPUD_PTR(dest, stride, 8);
+ break;
+ case DCT_FLIPADST:
+ iadst8_sse2(in);
+ idct8_sse2(in);
+ fliplr_8x8(in);
+ break;
+ case FLIPADST_FLIPADST:
+ iadst8_sse2(in);
+ iadst8_sse2(in);
+ FLIPUD_PTR(dest, stride, 8);
+ fliplr_8x8(in);
+ break;
+ case ADST_FLIPADST:
+ iadst8_sse2(in);
+ iadst8_sse2(in);
+ fliplr_8x8(in);
+ break;
+ case FLIPADST_ADST:
+ iadst8_sse2(in);
+ iadst8_sse2(in);
+ FLIPUD_PTR(dest, stride, 8);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -146,29 +250,59 @@
void vp10_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
int stride, int tx_type) {
- __m128i in0[16], in1[16];
+ __m128i in[32];
+ __m128i *in0 = &in[0];
+ __m128i *in1 = &in[16];
load_buffer_8x16(input, in0);
input += 8;
load_buffer_8x16(input, in1);
switch (tx_type) {
- case 0: // DCT_DCT
+ case DCT_DCT:
idct16_sse2(in0, in1);
idct16_sse2(in0, in1);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
idct16_sse2(in0, in1);
iadst16_sse2(in0, in1);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
iadst16_sse2(in0, in1);
idct16_sse2(in0, in1);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
iadst16_sse2(in0, in1);
iadst16_sse2(in0, in1);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ idct16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ FLIPUD_PTR(dest, stride, 16);
+ break;
+ case DCT_FLIPADST:
+ iadst16_sse2(in0, in1);
+ idct16_sse2(in0, in1);
+ FLIPLR_16x16(in0, in1);
+ break;
+ case FLIPADST_FLIPADST:
+ iadst16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ FLIPUD_PTR(dest, stride, 16);
+ FLIPLR_16x16(in0, in1);
+ break;
+ case ADST_FLIPADST:
+ iadst16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ FLIPLR_16x16(in0, in1);
+ break;
+ case FLIPADST_ADST:
+ iadst16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ FLIPUD_PTR(dest, stride, 16);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 5c95e16..531be1f 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -83,18 +83,9 @@
return data > max ? max : data;
}
-#if CONFIG_MISC_FIXES
static TX_MODE read_tx_mode(struct vpx_read_bit_buffer *rb) {
return vpx_rb_read_bit(rb) ? TX_MODE_SELECT : vpx_rb_read_literal(rb, 2);
}
-#else
-static TX_MODE read_tx_mode(vpx_reader *r) {
- TX_MODE tx_mode = vpx_read_literal(r, 2);
- if (tx_mode == ALLOW_32X32)
- tx_mode += vpx_read_bit(r);
- return tx_mode;
-}
-#endif
static void read_tx_mode_probs(struct tx_probs *tx_probs, vpx_reader *r) {
int i, j;
@@ -126,7 +117,6 @@
vp10_diff_update_prob(r, &fc->inter_mode_probs[i][j]);
}
-#if CONFIG_MISC_FIXES
static REFERENCE_MODE read_frame_reference_mode(const VP10_COMMON *cm,
struct vpx_read_bit_buffer *rb) {
if (is_compound_reference_allowed(cm)) {
@@ -137,18 +127,6 @@
return SINGLE_REFERENCE;
}
}
-#else
-static REFERENCE_MODE read_frame_reference_mode(const VP10_COMMON *cm,
- vpx_reader *r) {
- if (is_compound_reference_allowed(cm)) {
- return vpx_read_bit(r) ? (vpx_read_bit(r) ? REFERENCE_MODE_SELECT
- : COMPOUND_REFERENCE)
- : SINGLE_REFERENCE;
- } else {
- return SINGLE_REFERENCE;
- }
-}
-#endif
static void read_frame_reference_mode_probs(VP10_COMMON *cm, vpx_reader *r) {
FRAME_CONTEXT *const fc = cm->fc;
@@ -172,12 +150,7 @@
static void update_mv_probs(vpx_prob *p, int n, vpx_reader *r) {
int i;
for (i = 0; i < n; ++i)
-#if CONFIG_MISC_FIXES
vp10_diff_update_prob(r, &p[i]);
-#else
- if (vpx_read(r, MV_UPDATE_PROB))
- p[i] = (vpx_read_literal(r, 7) << 1) | 1;
-#endif
}
static void read_mv_probs(nmv_context *ctx, int allow_hp, vpx_reader *r) {
@@ -214,7 +187,7 @@
uint8_t *dst, int stride,
int eob, int block) {
struct macroblockd_plane *const pd = &xd->plane[plane];
- TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block);
+ TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
const int seg_id = xd->mi[0]->mbmi.segment_id;
if (eob > 0) {
tran_low_t *const dqcoeff = pd->dqcoeff;
@@ -369,8 +342,8 @@
col, row, plane);
if (!mbmi->skip) {
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx);
- const scan_order *sc = get_scan(tx_size, tx_type);
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
+ const scan_order *sc = get_scan(tx_size, tx_type, 0);
const int eob = vp10_decode_block_tokens(xd, plane, sc, col, row, tx_size,
r, mbmi->segment_id);
inverse_transform_block_intra(xd, plane, tx_type, tx_size,
@@ -378,14 +351,71 @@
}
}
+#if CONFIG_VAR_TX
+static void decode_reconstruct_tx(MACROBLOCKD *const xd, vpx_reader *r,
+ MB_MODE_INFO *const mbmi,
+ int plane, BLOCK_SIZE plane_bsize,
+ int block, int blk_row, int blk_col,
+ TX_SIZE tx_size, int *eob_total) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, 0, 0) :
+ mbmi->inter_tx_size[tx_idx];
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *sc = get_scan(tx_size, tx_type, 1);
+ const int eob = vp10_decode_block_tokens(xd, plane, sc,
+ blk_col, blk_row, tx_size,
+ r, mbmi->segment_id);
+ inverse_transform_block_inter(xd, plane, tx_size,
+ &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col],
+ pd->dst.stride, eob, block);
+ *eob_total += eob;
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+ int step = 1 << (2 * (tx_size - 1));
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
+ continue;
+
+ decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize, block + i * step,
+ offsetr, offsetc, tx_size - 1, eob_total);
+ }
+ }
+}
+#endif
+
static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r,
MB_MODE_INFO *const mbmi, int plane,
int row, int col, TX_SIZE tx_size) {
struct macroblockd_plane *const pd = &xd->plane[plane];
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
int block_idx = (row << 1) + col;
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx);
- const scan_order *sc = get_scan(tx_size, tx_type);
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
+ const scan_order *sc = get_scan(tx_size, tx_type, 1);
const int eob = vp10_decode_block_tokens(xd, plane, sc, col, row, tx_size, r,
mbmi->segment_id);
@@ -555,6 +585,9 @@
int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height,
buf_stride, subpel_x, subpel_y;
uint8_t *ref_frame, *buf_ptr;
+#if CONFIG_EXT_INTERP
+ const int i_filter = IsInterpolatingFilter(xd->mi[0]->mbmi.interp_filter);
+#endif // CONFIG_EXT_INTERP
// Get reference frame pointer, width and height.
if (plane == 0) {
@@ -624,6 +657,9 @@
// Do border extension if there is motion or the
// width/height is not a multiple of 8 pixels.
if (is_scaled || scaled_mv.col || scaled_mv.row ||
+#if CONFIG_EXT_INTERP
+ !i_filter ||
+#endif
(frame_width & 0x7) || (frame_height & 0x7)) {
int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
@@ -631,13 +667,21 @@
int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
int x_pad = 0, y_pad = 0;
- if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) {
+ if (subpel_x ||
+#if CONFIG_EXT_INTERP
+ !i_filter ||
+#endif
+ (sf->x_step_q4 != SUBPEL_SHIFTS)) {
x0 -= VP9_INTERP_EXTEND - 1;
x1 += VP9_INTERP_EXTEND;
x_pad = 1;
}
- if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) {
+ if (subpel_y ||
+#if CONFIG_EXT_INTERP
+ !i_filter ||
+#endif
+ (sf->y_step_q4 != SUBPEL_SHIFTS)) {
y0 -= VP9_INTERP_EXTEND - 1;
y1 += VP9_INTERP_EXTEND;
y_pad = 1;
@@ -799,6 +843,11 @@
set_skip_context(xd, mi_row, mi_col);
+
+#if CONFIG_VAR_TX
+ xd->max_tx_size = max_txsize_lookup[bsize];
+#endif
+
// Distance of Mb to the various image edges. These are specified to 8th pel
// as they are always compared to values that are in 1/8th pel units
set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
@@ -870,13 +919,31 @@
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int num_4x4_w = pd->n4_w;
+ const int num_4x4_h = pd->n4_h;
+ int row, col;
+#if CONFIG_VAR_TX
+ // TODO(jingning): This can be simplified for decoder performance.
+ const BLOCK_SIZE plane_bsize =
+ get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), pd);
+ const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bw = num_4x4_blocks_wide_lookup[txb_size];
+ int block = 0;
+ const int step = 1 << (max_tx_size << 1);
+
+ for (row = 0; row < num_4x4_h; row += bw) {
+ for (col = 0; col < num_4x4_w; col += bw) {
+ decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize,
+ block, row, col, max_tx_size, &eobtotal);
+ block += step;
+ }
+ }
+#else
const TX_SIZE tx_size =
plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
: mbmi->tx_size;
- const int num_4x4_w = pd->n4_w;
- const int num_4x4_h = pd->n4_h;
const int step = (1 << tx_size);
- int row, col;
const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ?
0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ?
@@ -886,14 +953,11 @@
for (col = 0; col < max_blocks_wide; col += step)
eobtotal += reconstruct_inter_block(xd, r, mbmi, plane, row, col,
tx_size);
+#endif
}
if (!less8x8 && eobtotal == 0)
-#if CONFIG_MISC_FIXES
mbmi->has_no_coeffs = 1; // skip loopfilter
-#else
- mbmi->skip = 1; // skip loopfilter
-#endif
}
}
@@ -1051,9 +1115,6 @@
static void setup_segmentation(VP10_COMMON *const cm,
struct vpx_read_bit_buffer *rb) {
struct segmentation *const seg = &cm->seg;
-#if !CONFIG_MISC_FIXES
- struct segmentation_probs *const segp = &cm->segp;
-#endif
int i, j;
seg->update_map = 0;
@@ -1070,26 +1131,11 @@
seg->update_map = vpx_rb_read_bit(rb);
}
if (seg->update_map) {
-#if !CONFIG_MISC_FIXES
- for (i = 0; i < SEG_TREE_PROBS; i++)
- segp->tree_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8)
- : MAX_PROB;
-#endif
if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
seg->temporal_update = 0;
} else {
seg->temporal_update = vpx_rb_read_bit(rb);
}
-#if !CONFIG_MISC_FIXES
- if (seg->temporal_update) {
- for (i = 0; i < PREDICTION_PROBS; i++)
- segp->pred_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8)
- : MAX_PROB;
- } else {
- for (i = 0; i < PREDICTION_PROBS; i++)
- segp->pred_probs[i] = MAX_PROB;
- }
-#endif
}
// Segmentation data update
@@ -1143,7 +1189,7 @@
static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) {
return vpx_rb_read_bit(rb) ?
- vpx_rb_read_inv_signed_literal(rb, CONFIG_MISC_FIXES ? 6 : 4) : 0;
+ vpx_rb_read_inv_signed_literal(rb, 6) : 0;
}
static void setup_quantization(VP10_COMMON *const cm, MACROBLOCKD *const xd,
@@ -1156,15 +1202,9 @@
cm->uv_ac_delta_q = read_delta_q(rb);
cm->dequant_bit_depth = cm->bit_depth;
for (i = 0; i < (cm->seg.enabled ? MAX_SEGMENTS : 1); ++i) {
-#if CONFIG_MISC_FIXES
const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
-#endif
xd->lossless[i] = cm->y_dc_delta_q == 0 &&
-#if CONFIG_MISC_FIXES
qindex == 0 &&
-#else
- cm->base_qindex == 0 &&
-#endif
cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
}
@@ -1202,7 +1242,8 @@
}
static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) {
- return vpx_rb_read_bit(rb) ? SWITCHABLE : vpx_rb_read_literal(rb, 2);
+ return vpx_rb_read_bit(rb) ?
+ SWITCHABLE : vpx_rb_read_literal(rb, 2 + CONFIG_EXT_INTERP);
}
static void setup_render_size(VP10_COMMON *cm,
@@ -1305,10 +1346,8 @@
YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf;
width = buf->y_crop_width;
height = buf->y_crop_height;
-#if CONFIG_MISC_FIXES
cm->render_width = buf->render_width;
cm->render_height = buf->render_height;
-#endif
found = 1;
break;
}
@@ -1316,9 +1355,7 @@
if (!found) {
vp10_read_frame_size(rb, &width, &height);
-#if CONFIG_MISC_FIXES
setup_render_size(cm, rb);
-#endif
}
if (width <= 0 || height <= 0)
@@ -1350,9 +1387,6 @@
}
resize_context_buffers(cm, width, height);
-#if !CONFIG_MISC_FIXES
- setup_render_size(cm, rb);
-#endif
lock_buffer_pool(pool);
if (vpx_realloc_frame_buffer(
@@ -1399,14 +1433,10 @@
if (cm->log2_tile_rows)
cm->log2_tile_rows += vpx_rb_read_bit(rb);
-#if CONFIG_MISC_FIXES
// tile size magnitude
if (cm->log2_tile_rows > 0 || cm->log2_tile_cols > 0) {
cm->tile_sz_mag = vpx_rb_read_literal(rb, 2);
}
-#else
- cm->tile_sz_mag = 3;
-#endif
}
typedef struct TileBuffer {
@@ -1450,9 +1480,9 @@
if (decrypt_cb) {
uint8_t be_data[4];
decrypt_cb(decrypt_state, *data, be_data, tile_sz_mag + 1);
- size = mem_get_varsize(be_data, tile_sz_mag) + CONFIG_MISC_FIXES;
+ size = mem_get_varsize(be_data, tile_sz_mag) + 1;
} else {
- size = mem_get_varsize(*data, tile_sz_mag) + CONFIG_MISC_FIXES;
+ size = mem_get_varsize(*data, tile_sz_mag) + 1;
}
*data += tile_sz_mag + 1;
@@ -1530,6 +1560,11 @@
memset(cm->above_seg_context, 0,
sizeof(*cm->above_seg_context) * aligned_cols);
+#if CONFIG_VAR_TX
+ memset(cm->above_txfm_context, 0,
+ sizeof(*cm->above_txfm_context) * aligned_cols);
+#endif
+
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
if (pbi->tile_data == NULL ||
@@ -1576,6 +1611,9 @@
vp10_tile_set_col(&tile, tile_data->cm, col);
vp10_zero(tile_data->xd.left_context);
vp10_zero(tile_data->xd.left_seg_context);
+#if CONFIG_VAR_TX
+ vp10_zero(tile_data->xd.left_txfm_context_buffer);
+#endif
for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
mi_col += MI_BLOCK_SIZE) {
decode_partition(pbi, &tile_data->xd, mi_row,
@@ -1586,6 +1624,7 @@
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Failed to decode tile data");
}
+#if !CONFIG_VAR_TX
// Loopfilter one row.
if (cm->lf.filter_level && !cm->skip_loop_filter) {
const int lf_start = mi_row - MI_BLOCK_SIZE;
@@ -1612,10 +1651,15 @@
if (cm->frame_parallel_decode)
vp10_frameworker_broadcast(pbi->cur_buf,
mi_row << MI_BLOCK_SIZE_LOG2);
+#endif
}
}
// Loopfilter remaining rows in the frame.
+#if CONFIG_VAR_TX
+ vp10_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
+ cm->lf.filter_level, 0, 0);
+#else
if (cm->lf.filter_level && !cm->skip_loop_filter) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
winterface->sync(&pbi->lf_worker);
@@ -1623,6 +1667,7 @@
lf_data->stop = cm->mi_rows;
winterface->execute(&pbi->lf_worker);
}
+#endif
// Get last tile data.
tile_data = pbi->tile_data + tile_cols * tile_rows - 1;
@@ -1649,6 +1694,9 @@
mi_row += MI_BLOCK_SIZE) {
vp10_zero(tile_data->xd.left_context);
vp10_zero(tile_data->xd.left_seg_context);
+#if CONFIG_VAR_TX
+ vp10_zero(tile_data->xd.left_txfm_context_buffer);
+#endif
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
decode_partition(tile_data->pbi, &tile_data->xd,
@@ -1726,7 +1774,10 @@
sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols);
memset(cm->above_seg_context, 0,
sizeof(*cm->above_seg_context) * aligned_mi_cols);
-
+#if CONFIG_VAR_TX
+ memset(cm->above_txfm_context, 0,
+ sizeof(*cm->above_txfm_context) * aligned_mi_cols);
+#endif
// Load tile data into tile_buffers
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
@@ -1881,9 +1932,7 @@
static size_t read_uncompressed_header(VP10Decoder *pbi,
struct vpx_read_bit_buffer *rb) {
VP10_COMMON *const cm = &pbi->common;
-#if CONFIG_MISC_FIXES
MACROBLOCKD *const xd = &pbi->mb;
-#endif
BufferPool *const pool = cm->buffer_pool;
RefCntBuffer *const frame_bufs = pool->frame_bufs;
int i, mask, ref_index = 0;
@@ -1962,7 +2011,6 @@
if (cm->error_resilient_mode) {
cm->reset_frame_context = RESET_FRAME_CONTEXT_ALL;
} else {
-#if CONFIG_MISC_FIXES
if (cm->intra_only) {
cm->reset_frame_context =
vpx_rb_read_bit(rb) ? RESET_FRAME_CONTEXT_ALL
@@ -1976,40 +2024,14 @@
vpx_rb_read_bit(rb) ? RESET_FRAME_CONTEXT_ALL
: RESET_FRAME_CONTEXT_CURRENT;
}
-#else
- static const RESET_FRAME_CONTEXT_MODE reset_frame_context_conv_tbl[4] = {
- RESET_FRAME_CONTEXT_NONE, RESET_FRAME_CONTEXT_NONE,
- RESET_FRAME_CONTEXT_CURRENT, RESET_FRAME_CONTEXT_ALL
- };
-
- cm->reset_frame_context =
- reset_frame_context_conv_tbl[vpx_rb_read_literal(rb, 2)];
-#endif
}
if (cm->intra_only) {
if (!vp10_read_sync_code(rb))
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid frame sync code");
-#if CONFIG_MISC_FIXES
+
read_bitdepth_colorspace_sampling(cm, rb);
-#else
- if (cm->profile > PROFILE_0) {
- read_bitdepth_colorspace_sampling(cm, rb);
- } else {
- // NOTE: The intra-only frame header does not include the specification
- // of either the color format or color sub-sampling in profile 0. VP9
- // specifies that the default color format should be YUV 4:2:0 in this
- // case (normative).
- cm->color_space = VPX_CS_BT_601;
- cm->color_range = 0;
- cm->subsampling_y = cm->subsampling_x = 1;
- cm->bit_depth = VPX_BITS_8;
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth = 0;
-#endif
- }
-#endif
pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES);
setup_frame_size(cm, rb);
@@ -2072,10 +2094,6 @@
cm->refresh_frame_context =
vpx_rb_read_bit(rb) ? REFRESH_FRAME_CONTEXT_FORWARD
: REFRESH_FRAME_CONTEXT_BACKWARD;
-#if !CONFIG_MISC_FIXES
- } else {
- vpx_rb_read_bit(rb); // parallel decoding mode flag
-#endif
}
} else {
cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_OFF;
@@ -2116,11 +2134,9 @@
setup_quantization(cm, &pbi->mb, rb);
setup_segmentation(cm, rb);
setup_segmentation_dequant(cm);
-#if CONFIG_MISC_FIXES
cm->tx_mode = (!cm->seg.enabled && xd->lossless[0]) ? ONLY_4X4
: read_tx_mode(rb);
cm->reference_mode = read_frame_reference_mode(cm, rb);
-#endif
setup_tile_info(cm, rb);
sz = vpx_rb_read_literal(rb, 16);
@@ -2132,12 +2148,36 @@
return sz;
}
+#if CONFIG_EXT_TX
+static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
+ int i, j, k;
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_inter_ext_tx_for_tx[s][i]) continue;
+ for (j = 0; j < num_ext_tx_set_inter[s] - 1; ++j)
+ vp10_diff_update_prob(r, &fc->inter_ext_tx_prob[s][i][j]);
+ }
+ }
+ }
+
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_intra_ext_tx_for_tx[s][i]) continue;
+ for (j = 0; j < INTRA_MODES; ++j)
+ for (k = 0; k < num_ext_tx_set_intra[s] - 1; ++k)
+ vp10_diff_update_prob(r, &fc->intra_ext_tx_prob[s][i][j][k]);
+ }
+ }
+ }
+}
+#endif // CONFIG_EXT_TX
+
static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data,
size_t partition_size) {
VP10_COMMON *const cm = &pbi->common;
-#if !CONFIG_MISC_FIXES
- MACROBLOCKD *const xd = &pbi->mb;
-#endif
FRAME_CONTEXT *const fc = cm->fc;
vpx_reader r;
int k, i, j;
@@ -2147,17 +2187,18 @@
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder 0");
-#if !CONFIG_MISC_FIXES
- cm->tx_mode = xd->lossless[0] ? ONLY_4X4 : read_tx_mode(&r);
-#endif
if (cm->tx_mode == TX_MODE_SELECT)
read_tx_mode_probs(&fc->tx_probs, &r);
read_coef_probs(fc, cm->tx_mode, &r);
+#if CONFIG_VAR_TX
+ for (k = 0; k < TXFM_PARTITION_CONTEXTS; ++k)
+ vp10_diff_update_prob(&r, &fc->txfm_partition_prob[k]);
+#endif
+
for (k = 0; k < SKIP_CONTEXTS; ++k)
vp10_diff_update_prob(&r, &fc->skip_probs[k]);
-#if CONFIG_MISC_FIXES
if (cm->seg.enabled) {
if (cm->seg.temporal_update) {
for (k = 0; k < PREDICTION_PROBS; k++)
@@ -2174,16 +2215,13 @@
for (j = 0; j < PARTITION_CONTEXTS; ++j)
for (i = 0; i < PARTITION_TYPES - 1; ++i)
vp10_diff_update_prob(&r, &fc->partition_prob[j][i]);
-#endif
if (frame_is_intra_only(cm)) {
vp10_copy(cm->kf_y_prob, vp10_kf_y_mode_prob);
-#if CONFIG_MISC_FIXES
for (k = 0; k < INTRA_MODES; k++)
for (j = 0; j < INTRA_MODES; j++)
for (i = 0; i < INTRA_MODES - 1; ++i)
vp10_diff_update_prob(&r, &cm->kf_y_prob[k][j][i]);
-#endif
} else {
nmv_context *const nmvc = &fc->nmvc;
@@ -2195,9 +2233,6 @@
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
vp10_diff_update_prob(&r, &fc->intra_inter_prob[i]);
-#if !CONFIG_MISC_FIXES
- cm->reference_mode = read_frame_reference_mode(cm, &r);
-#endif
if (cm->reference_mode != SINGLE_REFERENCE)
setup_compound_reference_mode(cm);
read_frame_reference_mode_probs(cm, &r);
@@ -2206,13 +2241,10 @@
for (i = 0; i < INTRA_MODES - 1; ++i)
vp10_diff_update_prob(&r, &fc->y_mode_prob[j][i]);
-#if !CONFIG_MISC_FIXES
- for (j = 0; j < PARTITION_CONTEXTS; ++j)
- for (i = 0; i < PARTITION_TYPES - 1; ++i)
- vp10_diff_update_prob(&r, &fc->partition_prob[j][i]);
-#endif
-
read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
+#if CONFIG_EXT_TX
+ read_ext_tx_probs(fc, &r);
+#endif
}
return vpx_reader_has_error(&r);
@@ -2253,6 +2285,13 @@
assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
+
+#if CONFIG_EXT_TX
+ assert(!memcmp(cm->counts.inter_ext_tx, zero_counts.inter_ext_tx,
+ sizeof(cm->counts.inter_ext_tx)));
+ assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx,
+ sizeof(cm->counts.intra_ext_tx)));
+#endif // CONFIG_EXT_TX
}
#endif // NDEBUG
@@ -2393,14 +2432,9 @@
if (!xd->corrupted) {
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
vp10_adapt_coef_probs(cm);
-#if CONFIG_MISC_FIXES
vp10_adapt_intra_frame_probs(cm);
-#endif
if (!frame_is_intra_only(cm)) {
-#if !CONFIG_MISC_FIXES
- vp10_adapt_intra_frame_probs(cm);
-#endif
vp10_adapt_inter_frame_probs(cm);
vp10_adapt_mv_probs(cm, cm->allow_high_precision_mv);
}
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index 38ea073..34d2c73 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -78,6 +78,68 @@
return vpx_read_tree(r, vp10_segment_tree, segp->tree_probs);
}
+#if CONFIG_VAR_TX
+static void read_tx_size_inter(VP10_COMMON *cm, MACROBLOCKD *xd,
+ MB_MODE_INFO *mbmi, FRAME_COUNTS *counts,
+ TX_SIZE tx_size, int blk_row, int blk_col,
+ vpx_reader *r) {
+ int is_split = 0;
+ const int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+ int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1),
+ tx_size);
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> 5;
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> 5;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ is_split = vpx_read(r, cm->fc->txfm_partition_prob[ctx]);
+
+ if (is_split) {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ if (counts)
+ ++counts->txfm_partition[ctx][1];
+
+ if (tx_size == TX_8X8) {
+ mbmi->inter_tx_size[tx_idx] = TX_4X4;
+ mbmi->tx_size = mbmi->inter_tx_size[tx_idx];
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+ return;
+ }
+
+ assert(bsl > 0);
+ --bsl;
+ for (i = 0; i < 4; ++i) {
+ int offsetr = blk_row + ((i >> 1) << bsl);
+ int offsetc = blk_col + ((i & 0x01) << bsl);
+ read_tx_size_inter(cm, xd, mbmi, counts,
+ tx_size - 1, offsetr, offsetc, r);
+ }
+ } else {
+ int idx, idy;
+ mbmi->inter_tx_size[tx_idx] = tx_size;
+ for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
+ for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
+ mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size;
+ mbmi->tx_size = mbmi->inter_tx_size[tx_idx];
+ if (counts)
+ ++counts->txfm_partition[ctx][0];
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), tx_size);
+ }
+}
+#endif
+
static TX_SIZE read_selected_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd,
TX_SIZE max_tx_size, vpx_reader *r) {
FRAME_COUNTS *counts = xd->counts;
@@ -134,28 +196,18 @@
int mi_offset, int x_mis, int y_mis,
vpx_reader *r) {
struct segmentation *const seg = &cm->seg;
-#if CONFIG_MISC_FIXES
FRAME_COUNTS *counts = xd->counts;
struct segmentation_probs *const segp = &cm->fc->seg;
-#else
- struct segmentation_probs *const segp = &cm->segp;
-#endif
int segment_id;
-#if !CONFIG_MISC_FIXES
- (void) xd;
-#endif
-
if (!seg->enabled)
return 0; // Default for disabled segmentation
assert(seg->update_map && !seg->temporal_update);
segment_id = read_segment_id(r, segp);
-#if CONFIG_MISC_FIXES
if (counts)
++counts->seg.tree_total[segment_id];
-#endif
set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
return segment_id;
}
@@ -175,12 +227,8 @@
static int read_inter_segment_id(VP10_COMMON *const cm, MACROBLOCKD *const xd,
int mi_row, int mi_col, vpx_reader *r) {
struct segmentation *const seg = &cm->seg;
-#if CONFIG_MISC_FIXES
FRAME_COUNTS *counts = xd->counts;
struct segmentation_probs *const segp = &cm->fc->seg;
-#else
- struct segmentation_probs *const segp = &cm->segp;
-#endif
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int predicted_segment_id, segment_id;
const int mi_offset = mi_row * cm->mi_cols + mi_col;
@@ -208,25 +256,19 @@
const int ctx = vp10_get_pred_context_seg_id(xd);
const vpx_prob pred_prob = segp->pred_probs[ctx];
mbmi->seg_id_predicted = vpx_read(r, pred_prob);
-#if CONFIG_MISC_FIXES
if (counts)
++counts->seg.pred[ctx][mbmi->seg_id_predicted];
-#endif
if (mbmi->seg_id_predicted) {
segment_id = predicted_segment_id;
} else {
segment_id = read_segment_id(r, segp);
-#if CONFIG_MISC_FIXES
if (counts)
++counts->seg.tree_mispred[segment_id];
-#endif
}
} else {
segment_id = read_segment_id(r, segp);
-#if CONFIG_MISC_FIXES
if (counts)
++counts->seg.tree_total[segment_id];
-#endif
}
set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
return segment_id;
@@ -278,6 +320,39 @@
}
}
+#if CONFIG_EXT_INTRA
+static void read_ext_intra_mode_info(VP10_COMMON *const cm,
+ MACROBLOCKD *const xd, vpx_reader *r) {
+ MODE_INFO *const mi = xd->mi[0];
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ FRAME_COUNTS *counts = xd->counts;
+
+#if !ALLOW_FILTER_INTRA_MODES
+ return;
+#endif
+ if (mbmi->mode == DC_PRED) {
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] =
+ vpx_read(r, cm->fc->ext_intra_probs[0]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[0] =
+ read_uniform(r, FILTER_INTRA_MODES);
+ }
+ if (counts)
+ ++counts->ext_intra[0][mbmi->ext_intra_mode_info.use_ext_intra_mode[0]];
+ }
+ if (mbmi->uv_mode == DC_PRED) {
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ vpx_read(r, cm->fc->ext_intra_probs[1]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ read_uniform(r, FILTER_INTRA_MODES);
+ }
+ if (counts)
+ ++counts->ext_intra[1][mbmi->ext_intra_mode_info.use_ext_intra_mode[1]];
+ }
+}
+#endif // CONFIG_EXT_INTRA
+
static void read_intra_frame_mode_info(VP10_COMMON *const cm,
MACROBLOCKD *const xd,
int mi_row, int mi_col, vpx_reader *r) {
@@ -323,15 +398,52 @@
default:
mbmi->mode = read_intra_mode(r,
get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
+#if CONFIG_EXT_INTRA
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
+ mbmi->angle_delta[0] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+#endif // CONFIG_EXT_INTRA
}
mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
+#if CONFIG_EXT_INTRA
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED &&
+ bsize >= BLOCK_8X8)
+ mbmi->angle_delta[1] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+#endif
mbmi->palette_mode_info.palette_size[0] = 0;
mbmi->palette_mode_info.palette_size[1] = 0;
if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools &&
mbmi->mode == DC_PRED)
read_palette_mode_info(cm, xd, r);
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, mbmi->sb_type, 0) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ FRAME_COUNTS *counts = xd->counts;
+ int eset = get_ext_tx_set(mbmi->tx_size, mbmi->sb_type, 0);
+ if (eset > 0) {
+ mbmi->tx_type = vpx_read_tree(
+ r, vp10_ext_tx_intra_tree[eset],
+ cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode]);
+ if (counts)
+ ++counts->intra_ext_tx[eset][mbmi->tx_size][mbmi->mode]
+ [mbmi->tx_type];
+ }
+ } else {
+ mbmi->tx_type = DCT_DCT;
+ }
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+ if (bsize >= BLOCK_8X8)
+ read_ext_intra_mode_info(cm, xd, r);
+#endif // CONFIG_EXT_INTRA
}
static int read_mv_component(vpx_reader *r,
@@ -453,10 +565,13 @@
VP10_COMMON *const cm, MACROBLOCKD *const xd,
vpx_reader *r) {
const int ctx = vp10_get_pred_context_switchable_interp(xd);
- const INTERP_FILTER type =
- (INTERP_FILTER)vpx_read_tree(r, vp10_switchable_interp_tree,
- cm->fc->switchable_interp_prob[ctx]);
FRAME_COUNTS *counts = xd->counts;
+ INTERP_FILTER type;
+#if CONFIG_EXT_INTERP
+ if (!vp10_is_interp_needed(xd)) return EIGHTTAP;
+#endif
+ type = (INTERP_FILTER)vpx_read_tree(r, vp10_switchable_interp_tree,
+ cm->fc->switchable_interp_prob[ctx]);
if (counts)
++counts->switchable_interp[ctx][type];
return type;
@@ -492,12 +607,30 @@
break;
default:
mbmi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]);
+#if CONFIG_EXT_INTRA
+ mbmi->angle_delta[0] = 0;
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
+ mbmi->angle_delta[0] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+#endif // CONFIG_EXT_INTRA
}
mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
+#if CONFIG_EXT_INTRA
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED &&
+ bsize >= BLOCK_8X8)
+ mbmi->angle_delta[1] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+#endif // CONFIG_EXT_INTRA
mbmi->palette_mode_info.palette_size[0] = 0;
mbmi->palette_mode_info.palette_size[1] = 0;
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+ if (bsize >= BLOCK_8X8)
+ read_ext_intra_mode_info(cm, xd, r);
+#endif // CONFIG_EXT_INTRA
}
static INLINE int is_mv_valid(const MV *mv) {
@@ -619,9 +752,11 @@
}
}
+#if !CONFIG_EXT_INTERP
mbmi->interp_filter = (cm->interp_filter == SWITCHABLE)
- ? read_switchable_interp_filter(cm, xd, r)
- : cm->interp_filter;
+ ? read_switchable_interp_filter(cm, xd, r)
+ : cm->interp_filter;
+#endif // !CONFIG_EXT_INTERP
if (bsize < BLOCK_8X8) {
const int num_4x4_w = 1 << xd->bmode_blocks_wl;
@@ -670,6 +805,11 @@
xd->corrupted |= !assign_mv(cm, xd, mbmi->mode, mbmi->mv, nearestmv,
nearestmv, nearmv, is_compound, allow_hp, r);
}
+#if CONFIG_EXT_INTERP
+ mbmi->interp_filter = (cm->interp_filter == SWITCHABLE)
+ ? read_switchable_interp_filter(cm, xd, r)
+ : cm->interp_filter;
+#endif // CONFIG_EXT_INTERP
}
static void read_inter_frame_mode_info(VP10Decoder *const pbi,
@@ -679,18 +819,88 @@
MODE_INFO *const mi = xd->mi[0];
MB_MODE_INFO *const mbmi = &mi->mbmi;
int inter_block;
+#if CONFIG_VAR_TX
+ BLOCK_SIZE bsize = mbmi->sb_type;
+#endif
mbmi->mv[0].as_int = 0;
mbmi->mv[1].as_int = 0;
mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r);
mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
+
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+ if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
+ !mbmi->skip && inter_block) {
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ const int bs = num_4x4_blocks_wide_lookup[txb_size];
+ const int width = num_4x4_blocks_wide_lookup[bsize];
+ const int height = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ for (idy = 0; idy < height; idy += bs)
+ for (idx = 0; idx < width; idx += bs)
+ read_tx_size_inter(cm, xd, mbmi, xd->counts, max_tx_size,
+ idy, idx, r);
+ if (xd->counts) {
+ const int ctx = get_tx_size_context(xd);
+ ++get_tx_counts(max_tx_size, ctx, &xd->counts->tx)[mbmi->tx_size];
+ }
+ } else {
+ mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r);
+ if (inter_block) {
+ const int width = num_4x4_blocks_wide_lookup[bsize];
+ const int height = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ for (idy = 0; idy < height; ++idy)
+ for (idx = 0; idx < width; ++idx)
+ mbmi->inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] = mbmi->tx_size;
+ }
+
+ set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h);
+ set_txfm_ctx(xd->above_txfm_context, mbmi->tx_size, xd->n8_w);
+ }
+#else
mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r);
+#endif
if (inter_block)
read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r);
else
read_intra_block_mode_info(cm, xd, mi, r);
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, mbmi->sb_type, inter_block) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ int eset = get_ext_tx_set(mbmi->tx_size, mbmi->sb_type,
+ inter_block);
+ FRAME_COUNTS *counts = xd->counts;
+
+ if (inter_block) {
+ if (eset > 0) {
+ mbmi->tx_type =
+ vpx_read_tree(r, vp10_ext_tx_inter_tree[eset],
+ cm->fc->inter_ext_tx_prob[eset][mbmi->tx_size]);
+ if (counts)
+ ++counts->inter_ext_tx[eset][mbmi->tx_size][mbmi->tx_type];
+ }
+ } else {
+ if (eset > 0) {
+ mbmi->tx_type = vpx_read_tree(r, vp10_ext_tx_intra_tree[eset],
+ cm->fc->intra_ext_tx_prob[eset]
+ [mbmi->tx_size][mbmi->mode]);
+ if (counts)
+ ++counts->intra_ext_tx[eset][mbmi->tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+ }
+ } else {
+ mbmi->tx_type = DCT_DCT;
+ }
+#endif // CONFIG_EXT_TX
}
void vp10_read_mode_info(VP10Decoder *const pbi, MACROBLOCKD *xd,
@@ -705,7 +915,6 @@
read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r);
} else {
read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r);
-
for (h = 0; h < y_mis; ++h) {
MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
for (w = 0; w < x_mis; ++w) {
diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c
index d8864d2..7499230 100644
--- a/vp10/decoder/decoder.c
+++ b/vp10/decoder/decoder.c
@@ -459,9 +459,7 @@
// an invalid bitstream and need to return an error.
uint8_t marker;
-#if CONFIG_MISC_FIXES
size_t frame_sz_sum = 0;
-#endif
assert(data_sz);
marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1);
@@ -470,7 +468,7 @@
if ((marker & 0xe0) == 0xc0) {
const uint32_t frames = (marker & 0x7) + 1;
const uint32_t mag = ((marker >> 3) & 0x3) + 1;
- const size_t index_sz = 2 + mag * (frames - CONFIG_MISC_FIXES);
+ const size_t index_sz = 2 + mag * (frames - 1);
// This chunk is marked as having a superframe index but doesn't have
// enough data for it, thus it's an invalid superframe index.
@@ -501,20 +499,16 @@
x = clear_buffer;
}
- for (i = 0; i < frames - CONFIG_MISC_FIXES; ++i) {
+ for (i = 0; i < frames - 1; ++i) {
uint32_t this_sz = 0;
for (j = 0; j < mag; ++j)
this_sz |= (*x++) << (j * 8);
- this_sz += CONFIG_MISC_FIXES;
+ this_sz += 1;
sizes[i] = this_sz;
-#if CONFIG_MISC_FIXES
frame_sz_sum += this_sz;
-#endif
}
-#if CONFIG_MISC_FIXES
sizes[i] = data_sz - index_sz - frame_sz_sum;
-#endif
*count = frames;
}
}
diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c
index cda9888..011c45a 100644
--- a/vp10/decoder/detokenize.c
+++ b/vp10/decoder/detokenize.c
@@ -164,11 +164,7 @@
val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, r);
break;
case CATEGORY6_TOKEN: {
-#if CONFIG_MISC_FIXES
const int skip_bits = TX_SIZES - 1 - tx_size;
-#else
- const int skip_bits = 0;
-#endif
const uint8_t *cat6p = cat6_prob + skip_bits;
#if CONFIG_VP9_HIGHBITDEPTH
switch (xd->bd) {
diff --git a/vp10/decoder/dsubexp.c b/vp10/decoder/dsubexp.c
index 36c1917..d0b8d7d 100644
--- a/vp10/decoder/dsubexp.c
+++ b/vp10/decoder/dsubexp.c
@@ -23,13 +23,13 @@
static int decode_uniform(vpx_reader *r) {
const int l = 8;
- const int m = (1 << l) - 191 + CONFIG_MISC_FIXES;
+ const int m = (1 << l) - 192;
const int v = vpx_read_literal(r, l - 1);
return v < m ? v : (v << 1) - m + vpx_read_bit(r);
}
static int inv_remap_prob(int v, int m) {
- static uint8_t inv_map_table[MAX_PROB - CONFIG_MISC_FIXES] = {
+ static uint8_t inv_map_table[MAX_PROB - 1] = {
7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 189,
202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27,
@@ -47,9 +47,6 @@
207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, 222,
223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238,
239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,
-#if !CONFIG_MISC_FIXES
- 253
-#endif
};
assert(v < (int)(sizeof(inv_map_table) / sizeof(inv_map_table[0])));
v = inv_map_table[v];
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index d9b4be4..59edec2 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -38,8 +38,13 @@
static const struct vp10_token intra_mode_encodings[INTRA_MODES] = {
{0, 1}, {6, 3}, {28, 5}, {30, 5}, {58, 6}, {59, 6}, {126, 7}, {127, 7},
{62, 6}, {2, 2}};
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
+ {{0, 1}, {4, 3}, {3, 2}, {5, 3}};
+#else
static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
{{0, 1}, {2, 2}, {3, 2}};
+#endif // CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
static const struct vp10_token partition_encodings[PARTITION_TYPES] =
{{0, 1}, {2, 2}, {6, 3}, {7, 3}};
static const struct vp10_token inter_mode_encodings[INTER_MODES] =
@@ -72,6 +77,23 @@
}
}
+#if CONFIG_EXT_TX
+static struct vp10_token ext_tx_inter_encodings[EXT_TX_SETS_INTER][TX_TYPES];
+static struct vp10_token ext_tx_intra_encodings[EXT_TX_SETS_INTRA][TX_TYPES];
+#endif // CONFIG_EXT_TX
+
+void vp10_encode_token_init() {
+#if CONFIG_EXT_TX
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ vp10_tokens_from_tree(ext_tx_inter_encodings[s], vp10_ext_tx_inter_tree[s]);
+ }
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ vp10_tokens_from_tree(ext_tx_intra_encodings[s], vp10_ext_tx_intra_tree[s]);
+ }
+#endif // CONFIG_EXT_TX
+}
+
static void write_intra_mode(vpx_writer *w, PREDICTION_MODE mode,
const vpx_prob *probs) {
vp10_write_token(w, vp10_intra_mode_tree, probs, &intra_mode_encodings[mode]);
@@ -104,6 +126,80 @@
vp10_cond_prob_diff_update(w, &probs[i], branch_ct[i]);
}
+static int prob_diff_update_savings(const vpx_tree_index *tree,
+ vpx_prob probs[/*n - 1*/],
+ const unsigned int counts[/*n - 1*/],
+ int n) {
+ int i;
+ unsigned int branch_ct[32][2];
+ int savings = 0;
+
+ // Assuming max number of probabilities <= 32
+ assert(n <= 32);
+ vp10_tree_probs_from_distribution(tree, branch_ct, counts);
+ for (i = 0; i < n - 1; ++i) {
+ savings += vp10_cond_prob_diff_update_savings(&probs[i],
+ branch_ct[i]);
+ }
+ return savings;
+}
+
+#if CONFIG_VAR_TX
+static void write_tx_size_inter(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd,
+ const MB_MODE_INFO *mbmi,
+ TX_SIZE tx_size, int blk_row, int blk_col,
+ vpx_writer *w) {
+ const int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+ int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1),
+ tx_size);
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> 5;
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> 5;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == mbmi->inter_tx_size[tx_idx]) {
+ vpx_write(w, 0, cm->fc->txfm_partition_prob[ctx]);
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), tx_size);
+ } else {
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+ vpx_write(w, 1, cm->fc->txfm_partition_prob[ctx]);
+
+ if (tx_size == TX_8X8) {
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+ return;
+ }
+
+ assert(bsl > 0);
+ --bsl;
+ for (i = 0; i < 4; ++i) {
+ int offsetr = blk_row + ((i >> 1) << bsl);
+ int offsetc = blk_col + ((i & 0x01) << bsl);
+ write_tx_size_inter(cm, xd, mbmi, tx_size - 1, offsetr, offsetc, w);
+ }
+ }
+}
+
+static void update_txfm_partition_probs(VP10_COMMON *cm, vpx_writer *w,
+ FRAME_COUNTS *counts) {
+ int k;
+ for (k = 0; k < TXFM_PARTITION_CONTEXTS; ++k)
+ vp10_cond_prob_diff_update(w, &cm->fc->txfm_partition_prob[k],
+ counts->txfm_partition[k]);
+}
+#endif
+
static void write_selected_tx_size(const VP10_COMMON *cm,
const MACROBLOCKD *xd, vpx_writer *w) {
TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size;
@@ -147,6 +243,60 @@
counts->switchable_interp[j], SWITCHABLE_FILTERS, w);
}
+#if CONFIG_EXT_TX
+static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) {
+ const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) -
+ vp10_cost_zero(GROUP_DIFF_UPDATE_PROB);
+ int i, j;
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ int savings = 0;
+ int do_update = 0;
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_inter_ext_tx_for_tx[s][i]) continue;
+ savings += prob_diff_update_savings(
+ vp10_ext_tx_inter_tree[s], cm->fc->inter_ext_tx_prob[s][i],
+ cm->counts.inter_ext_tx[s][i], num_ext_tx_set_inter[s]);
+ }
+ do_update = savings > savings_thresh;
+ vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_inter_ext_tx_for_tx[s][i]) continue;
+ prob_diff_update(vp10_ext_tx_inter_tree[s],
+ cm->fc->inter_ext_tx_prob[s][i],
+ cm->counts.inter_ext_tx[s][i],
+ num_ext_tx_set_inter[s], w);
+ }
+ }
+ }
+
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ int savings = 0;
+ int do_update = 0;
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_intra_ext_tx_for_tx[s][i]) continue;
+ for (j = 0; j < INTRA_MODES; ++j)
+ savings += prob_diff_update_savings(
+ vp10_ext_tx_intra_tree[s], cm->fc->intra_ext_tx_prob[s][i][j],
+ cm->counts.intra_ext_tx[s][i][j], num_ext_tx_set_intra[s]);
+ }
+ do_update = savings > savings_thresh;
+ vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_intra_ext_tx_for_tx[s][i]) continue;
+ for (j = 0; j < INTRA_MODES; ++j)
+ prob_diff_update(vp10_ext_tx_intra_tree[s],
+ cm->fc->intra_ext_tx_prob[s][i][j],
+ cm->counts.intra_ext_tx[s][i][j],
+ num_ext_tx_set_intra[s], w);
+ }
+ }
+ }
+}
+#endif // CONFIG_EXT_TX
+
static void pack_palette_tokens(vpx_writer *w, TOKENEXTRA **tp,
BLOCK_SIZE bsize, int n) {
int rows = 4 * num_4x4_blocks_high_lookup[bsize];
@@ -167,8 +317,9 @@
TOKENEXTRA **tp, const TOKENEXTRA *const stop,
vpx_bit_depth_t bit_depth, const TX_SIZE tx) {
TOKENEXTRA *p = *tp;
-#if !CONFIG_MISC_FIXES
- (void) tx;
+#if CONFIG_VAR_TX
+ int count = 0;
+ const int seg_eob = 16 << (tx << 1);
#endif
while (p < stop && p->token != EOSB_TOKEN) {
@@ -217,12 +368,8 @@
if (b->base_val) {
const int e = p->extra, l = b->len;
-#if CONFIG_MISC_FIXES
int skip_bits =
(b->base_val == CAT6_MIN_VAL) ? TX_SIZES - 1 - tx : 0;
-#else
- int skip_bits = 0;
-#endif
if (l) {
const unsigned char *pb = b->prob;
@@ -245,11 +392,68 @@
vpx_write_bit(w, e & 1);
}
++p;
+
+#if CONFIG_VAR_TX
+ ++count;
+ if (t == EOB_TOKEN || count == seg_eob)
+ break;
+#endif
}
*tp = p;
}
+#if CONFIG_VAR_TX
+static void pack_txb_tokens(vpx_writer *w,
+ TOKENEXTRA **tp, const TOKENEXTRA *const tok_end,
+ MACROBLOCKD *xd, MB_MODE_INFO *mbmi, int plane,
+ BLOCK_SIZE plane_bsize,
+ vpx_bit_depth_t bit_depth,
+ int block,
+ int blk_row, int blk_col, TX_SIZE tx_size) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, 0, 0) :
+ mbmi->inter_tx_size[tx_idx];
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ pack_mb_tokens(w, tp, tok_end, bit_depth, tx_size);
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+ int step = 1 << (2 * (tx_size - 1));
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
+ continue;
+
+ pack_txb_tokens(w, tp, tok_end, xd, mbmi, plane,
+ plane_bsize, bit_depth, block + i * step,
+ offsetr, offsetc, tx_size - 1);
+ }
+ }
+}
+#endif
+
static void write_segment_id(vpx_writer *w, const struct segmentation *seg,
const struct segmentation_probs *segp,
int segment_id) {
@@ -293,18 +497,62 @@
}
}
+#if CONFIG_EXT_INTRA
+static void write_ext_intra_mode_info(const VP10_COMMON *const cm,
+ const MB_MODE_INFO *const mbmi,
+ vpx_writer *w) {
+#if !ALLOW_FILTER_INTRA_MODES
+ return;
+#endif
+ if (mbmi->mode == DC_PRED) {
+ vpx_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[0],
+ cm->fc->ext_intra_probs[0]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
+ EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[0];
+ write_uniform(w, FILTER_INTRA_MODES, mode);
+ }
+ }
+ if (mbmi->uv_mode == DC_PRED) {
+ vpx_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[1],
+ cm->fc->ext_intra_probs[1]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) {
+ EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[1];
+ write_uniform(w, FILTER_INTRA_MODES, mode);
+ }
+ }
+}
+#endif // CONFIG_EXT_INTRA
+
+static void write_switchable_interp_filter(VP10_COMP *cpi,
+ const MACROBLOCKD *xd,
+ vpx_writer *w) {
+ VP10_COMMON *const cm = &cpi->common;
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ if (cm->interp_filter == SWITCHABLE) {
+ const int ctx = vp10_get_pred_context_switchable_interp(xd);
+#if CONFIG_EXT_INTERP
+ if (!vp10_is_interp_needed(xd)) {
+ // if (mbmi->interp_filter != EIGHTTAP)
+ // printf("Error [%d]\n", mbmi->sb_type);
+ assert(mbmi->interp_filter == EIGHTTAP);
+ return;
+ }
+#endif
+ vp10_write_token(w, vp10_switchable_interp_tree,
+ cm->fc->switchable_interp_prob[ctx],
+ &switchable_interp_encodings[mbmi->interp_filter]);
+ ++cpi->interp_filter_selected[0][mbmi->interp_filter];
+ }
+}
+
static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
vpx_writer *w) {
VP10_COMMON *const cm = &cpi->common;
const nmv_context *nmvc = &cm->fc->nmvc;
- const MACROBLOCK *const x = &cpi->td.mb;
- const MACROBLOCKD *const xd = &x->e_mbd;
+ const MACROBLOCK *x = &cpi->td.mb;
+ const MACROBLOCKD *xd = &x->e_mbd;
const struct segmentation *const seg = &cm->seg;
-#if CONFIG_MISC_FIXES
const struct segmentation_probs *const segp = &cm->fc->seg;
-#else
- const struct segmentation_probs *const segp = &cm->segp;
-#endif
const MB_MODE_INFO *const mbmi = &mi->mbmi;
const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
const PREDICTION_MODE mode = mbmi->mode;
@@ -334,12 +582,40 @@
if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
!(is_inter && skip)) {
- write_selected_tx_size(cm, xd, w);
+#if CONFIG_VAR_TX
+ if (is_inter) { // This implies skip flag is 0.
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ const int txb_size = txsize_to_bsize[max_tx_size];
+ const int bs = num_4x4_blocks_wide_lookup[txb_size];
+ const int width = num_4x4_blocks_wide_lookup[bsize];
+ const int height = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ for (idy = 0; idy < height; idy += bs)
+ for (idx = 0; idx < width; idx += bs)
+ write_tx_size_inter(cm, xd, mbmi, max_tx_size, idy, idx, w);
+ } else {
+ set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h);
+ set_txfm_ctx(xd->above_txfm_context, mbmi->tx_size, xd->n8_w);
+
+ write_selected_tx_size(cm, xd, w);
+ }
+ } else {
+ set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h);
+ set_txfm_ctx(xd->above_txfm_context, mbmi->tx_size, xd->n8_w);
+#else
+ write_selected_tx_size(cm, xd, w);
+#endif
}
if (!is_inter) {
if (bsize >= BLOCK_8X8) {
write_intra_mode(w, mode, cm->fc->y_mode_prob[size_group_lookup[bsize]]);
+#if CONFIG_EXT_INTRA
+ if (mode != DC_PRED && mode != TM_PRED) {
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[0]);
+ }
+#endif // CONFIG_EXT_INTRA
} else {
int idx, idy;
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
@@ -352,6 +628,15 @@
}
}
write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mode]);
+#if CONFIG_EXT_INTRA
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED &&
+ bsize >= BLOCK_8X8)
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
+
+ if (bsize >= BLOCK_8X8)
+ write_ext_intra_mode_info(cm, mbmi, w);
+#endif // CONFIG_EXT_INTRA
} else {
const int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
const vpx_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx];
@@ -364,15 +649,9 @@
}
}
- if (cm->interp_filter == SWITCHABLE) {
- const int ctx = vp10_get_pred_context_switchable_interp(xd);
- vp10_write_token(w, vp10_switchable_interp_tree,
- cm->fc->switchable_interp_prob[ctx],
- &switchable_interp_encodings[mbmi->interp_filter]);
- ++cpi->interp_filter_selected[0][mbmi->interp_filter];
- } else {
- assert(mbmi->interp_filter == cm->interp_filter);
- }
+#if !CONFIG_EXT_INTERP
+ write_switchable_interp_filter(cpi, xd, w);
+#endif // !CONFIG_EXT_INTERP
if (bsize < BLOCK_8X8) {
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
@@ -399,7 +678,30 @@
allow_hp);
}
}
+#if CONFIG_EXT_INTERP
+ write_switchable_interp_filter(cpi, xd, w);
+#endif // CONFIG_EXT_INTERP
}
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ int eset = get_ext_tx_set(mbmi->tx_size, bsize, is_inter);
+ if (is_inter) {
+ if (eset > 0)
+ vp10_write_token(w, vp10_ext_tx_inter_tree[eset],
+ cm->fc->inter_ext_tx_prob[eset][mbmi->tx_size],
+ &ext_tx_inter_encodings[eset][mbmi->tx_type]);
+ } else {
+ if (eset > 0)
+ vp10_write_token(
+ w, vp10_ext_tx_intra_tree[eset],
+ cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode],
+ &ext_tx_intra_encodings[eset][mbmi->tx_type]);
+ }
+ }
+#endif // CONFIG_EXT_TX
}
static void write_palette_mode_info(const VP10_COMMON *cm,
@@ -435,11 +737,7 @@
static void write_mb_modes_kf(const VP10_COMMON *cm, const MACROBLOCKD *xd,
MODE_INFO **mi_8x8, vpx_writer *w) {
const struct segmentation *const seg = &cm->seg;
-#if CONFIG_MISC_FIXES
const struct segmentation_probs *const segp = &cm->fc->seg;
-#else
- const struct segmentation_probs *const segp = &cm->segp;
-#endif
const MODE_INFO *const mi = mi_8x8[0];
const MODE_INFO *const above_mi = xd->above_mi;
const MODE_INFO *const left_mi = xd->left_mi;
@@ -457,6 +755,11 @@
if (bsize >= BLOCK_8X8) {
write_intra_mode(w, mbmi->mode,
get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
+#if CONFIG_EXT_INTRA
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[0]);
+#endif // CONFIG_EXT_INTRA
} else {
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -472,10 +775,35 @@
}
write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mbmi->mode]);
+#if CONFIG_EXT_INTRA
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED &&
+ bsize >= BLOCK_8X8)
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
+#endif // CONFIG_EXT_INTRA
if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools &&
mbmi->mode == DC_PRED)
write_palette_mode_info(cm, xd, mi, w);
+
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bsize, 0) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ int eset = get_ext_tx_set(mbmi->tx_size, bsize, 0);
+ if (eset > 0)
+ vp10_write_token(
+ w, vp10_ext_tx_intra_tree[eset],
+ cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode],
+ &ext_tx_intra_encodings[eset][mbmi->tx_type]);
+ }
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_EXT_INTRA
+ if (bsize >= BLOCK_8X8)
+ write_ext_intra_mode_info(cm, mbmi, w);
+#endif // CONFIG_EXT_INTRA
}
static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile,
@@ -499,6 +827,10 @@
if (frame_is_intra_only(cm)) {
write_mb_modes_kf(cm, xd, xd->mi, w);
} else {
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+#endif
pack_inter_mode_mvs(cpi, m, w);
}
@@ -512,9 +844,45 @@
if (!m->mbmi.skip) {
assert(*tok < tok_end);
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_VAR_TX
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ MB_MODE_INFO *mbmi = &m->mbmi;
+ BLOCK_SIZE bsize = mbmi->sb_type;
+ const BLOCK_SIZE plane_bsize =
+ get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), pd);
+
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ int row, col;
+
+ if (is_inter_block(mbmi)) {
+ const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bw = num_4x4_blocks_wide_lookup[txb_size];
+ int block = 0;
+ const int step = 1 << (max_tx_size << 1);
+ for (row = 0; row < num_4x4_h; row += bw) {
+ for (col = 0; col < num_4x4_w; col += bw) {
+ pack_txb_tokens(w, tok, tok_end, xd, mbmi, plane, plane_bsize,
+ cm->bit_depth, block, row, col, max_tx_size);
+ block += step;
+ }
+ }
+ } else {
+ TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane])
+ : m->mbmi.tx_size;
+ BLOCK_SIZE txb_size = txsize_to_bsize[tx];
+ int bw = num_4x4_blocks_wide_lookup[txb_size];
+
+ for (row = 0; row < num_4x4_h; row += bw)
+ for (col = 0; col < num_4x4_w; col += bw)
+ pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+ }
+#else
TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane])
: m->mbmi.tx_size;
pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+#endif
assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
(*tok)++;
}
@@ -610,6 +978,9 @@
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
vp10_zero(xd->left_seg_context);
+#if CONFIG_VAR_TX
+ vp10_zero(xd->left_txfm_context_buffer);
+#endif
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE)
write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col,
@@ -688,7 +1059,6 @@
}
}
- // printf("Update %d %d, savings %d\n", update[0], update[1], savings);
/* Is coef updated at all */
if (update[1] == 0 || savings < 0) {
vpx_write_bit(bc, 0);
@@ -849,7 +1219,7 @@
static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) {
if (delta_q != 0) {
vpx_wb_write_bit(wb, 1);
- vpx_wb_write_inv_signed_literal(wb, delta_q, CONFIG_MISC_FIXES ? 6 : 4);
+ vpx_wb_write_inv_signed_literal(wb, delta_q, 6);
} else {
vpx_wb_write_bit(wb, 0);
}
@@ -866,11 +1236,7 @@
static void encode_segmentation(VP10_COMMON *cm, MACROBLOCKD *xd,
struct vpx_write_bit_buffer *wb) {
int i, j;
-
const struct segmentation *seg = &cm->seg;
-#if !CONFIG_MISC_FIXES
- const struct segmentation_probs *segp = &cm->segp;
-#endif
vpx_wb_write_bit(wb, seg->enabled);
if (!seg->enabled)
@@ -885,16 +1251,6 @@
if (seg->update_map) {
// Select the coding strategy (temporal or spatial)
vp10_choose_segmap_coding_method(cm, xd);
-#if !CONFIG_MISC_FIXES
- // Write out probabilities used to decode unpredicted macro-block segments
- for (i = 0; i < SEG_TREE_PROBS; i++) {
- const int prob = segp->tree_probs[i];
- const int update = prob != MAX_PROB;
- vpx_wb_write_bit(wb, update);
- if (update)
- vpx_wb_write_literal(wb, prob, 8);
- }
-#endif
// Write out the chosen coding method.
if (!frame_is_intra_only(cm) && !cm->error_resilient_mode) {
@@ -902,18 +1258,6 @@
} else {
assert(seg->temporal_update == 0);
}
-
-#if !CONFIG_MISC_FIXES
- if (seg->temporal_update) {
- for (i = 0; i < PREDICTION_PROBS; i++) {
- const int prob = segp->pred_probs[i];
- const int update = prob != MAX_PROB;
- vpx_wb_write_bit(wb, update);
- if (update)
- vpx_wb_write_literal(wb, prob, 8);
- }
- }
-#endif
}
// Segmentation data
@@ -941,7 +1285,6 @@
}
}
-#if CONFIG_MISC_FIXES
static void update_seg_probs(VP10_COMP *cpi, vpx_writer *w) {
VP10_COMMON *cm = &cpi->common;
@@ -968,19 +1311,9 @@
if (mode != TX_MODE_SELECT)
vpx_wb_write_literal(wb, mode, 2);
}
-#endif
static void update_txfm_probs(VP10_COMMON *cm, vpx_writer *w,
FRAME_COUNTS *counts) {
-#if !CONFIG_MISC_FIXES
- // Mode
- vpx_write_literal(w, VPXMIN(cm->tx_mode, ALLOW_32X32), 2);
- if (cm->tx_mode >= ALLOW_32X32)
- vpx_write_bit(w, cm->tx_mode == TX_MODE_SELECT);
-
- // Probabilities
-#endif
-
if (cm->tx_mode == TX_MODE_SELECT) {
int i, j;
unsigned int ct_8x8p[TX_SIZES - 3][2];
@@ -1014,7 +1347,7 @@
struct vpx_write_bit_buffer *wb) {
vpx_wb_write_bit(wb, filter == SWITCHABLE);
if (filter != SWITCHABLE)
- vpx_wb_write_literal(wb, filter, 2);
+ vpx_wb_write_literal(wb, filter, 2 + CONFIG_EXT_INTERP);
}
static void fix_interp_filter(VP10_COMMON *cm, FRAME_COUNTS *counts) {
@@ -1098,6 +1431,10 @@
memset(cm->above_seg_context, 0,
sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols));
+#if CONFIG_VAR_TX
+ memset(cm->above_txfm_context, 0,
+ sizeof(*cm->above_txfm_context) * mi_cols_aligned_to_sb(cm->mi_cols));
+#endif
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
@@ -1121,7 +1458,7 @@
// size of this tile
assert(residual_bc.pos > 0);
- tile_sz = residual_bc.pos - CONFIG_MISC_FIXES;
+ tile_sz = residual_bc.pos - 1;
mem_put_le32(data_ptr + total_size, tile_sz);
max_tile = max_tile > tile_sz ? max_tile : tile_sz;
total_size += 4;
@@ -1166,10 +1503,8 @@
if (cfg != NULL) {
found = cm->width == cfg->y_crop_width &&
cm->height == cfg->y_crop_height;
-#if CONFIG_MISC_FIXES
found &= cm->render_width == cfg->render_width &&
cm->render_height == cfg->render_height;
-#endif
}
vpx_wb_write_bit(wb, found);
if (found) {
@@ -1180,15 +1515,8 @@
if (!found) {
vpx_wb_write_literal(wb, cm->width - 1, 16);
vpx_wb_write_literal(wb, cm->height - 1, 16);
-
-#if CONFIG_MISC_FIXES
write_render_size(cm, wb);
-#endif
}
-
-#if !CONFIG_MISC_FIXES
- write_render_size(cm, wb);
-#endif
}
static void write_sync_code(struct vpx_write_bit_buffer *wb) {
@@ -1266,7 +1594,6 @@
vpx_wb_write_bit(wb, cm->intra_only);
if (!cm->error_resilient_mode) {
-#if CONFIG_MISC_FIXES
if (cm->intra_only) {
vpx_wb_write_bit(wb,
cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL);
@@ -1277,25 +1604,11 @@
vpx_wb_write_bit(wb,
cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL);
}
-#else
- static const int reset_frame_context_conv_tbl[3] = { 0, 2, 3 };
-
- vpx_wb_write_literal(wb,
- reset_frame_context_conv_tbl[cm->reset_frame_context], 2);
-#endif
}
if (cm->intra_only) {
write_sync_code(wb);
-
-#if CONFIG_MISC_FIXES
write_bitdepth_colorspace_sampling(cm, wb);
-#else
- // Note for profile 0, 420 8bpp is assumed.
- if (cm->profile > PROFILE_0) {
- write_bitdepth_colorspace_sampling(cm, wb);
- }
-#endif
vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
write_frame_size(cm, wb);
@@ -1321,11 +1634,9 @@
if (!cm->error_resilient_mode) {
vpx_wb_write_bit(wb,
cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_OFF);
-#if CONFIG_MISC_FIXES
if (cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_OFF)
-#endif
vpx_wb_write_bit(wb, cm->refresh_frame_context !=
- REFRESH_FRAME_CONTEXT_BACKWARD);
+ REFRESH_FRAME_CONTEXT_BACKWARD);
}
vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2);
@@ -1333,7 +1644,6 @@
encode_loopfilter(&cm->lf, wb);
encode_quantization(cm, wb);
encode_segmentation(cm, xd, wb);
-#if CONFIG_MISC_FIXES
if (!cm->seg.enabled && xd->lossless[0])
cm->tx_mode = TX_4X4;
else
@@ -1346,7 +1656,6 @@
if (!use_hybrid_pred)
vpx_wb_write_bit(wb, use_compound_pred);
}
-#endif
write_tile_info(cm, wb);
}
@@ -1356,24 +1665,17 @@
FRAME_CONTEXT *const fc = cm->fc;
FRAME_COUNTS *counts = cpi->td.counts;
vpx_writer header_bc;
- int i;
-#if CONFIG_MISC_FIXES
- int j;
-#endif
+ int i, j;
vpx_start_encode(&header_bc, data);
-
-#if !CONFIG_MISC_FIXES
- if (cpi->td.mb.e_mbd.lossless[0])
- cm->tx_mode = TX_4X4;
- else
- update_txfm_probs(cm, &header_bc, counts);
-#else
update_txfm_probs(cm, &header_bc, counts);
-#endif
update_coef_probs(cpi, &header_bc);
+
+#if CONFIG_VAR_TX
+ update_txfm_partition_probs(cm, &header_bc, counts);
+#endif
+
update_skip_probs(cm, &header_bc, counts);
-#if CONFIG_MISC_FIXES
update_seg_probs(cpi, &header_bc);
for (i = 0; i < INTRA_MODES; ++i)
@@ -1383,16 +1685,13 @@
for (i = 0; i < PARTITION_CONTEXTS; ++i)
prob_diff_update(vp10_partition_tree, fc->partition_prob[i],
counts->partition[i], PARTITION_TYPES, &header_bc);
-#endif
if (frame_is_intra_only(cm)) {
vp10_copy(cm->kf_y_prob, vp10_kf_y_mode_prob);
-#if CONFIG_MISC_FIXES
for (i = 0; i < INTRA_MODES; ++i)
for (j = 0; j < INTRA_MODES; ++j)
prob_diff_update(vp10_intra_mode_tree, cm->kf_y_prob[i][j],
counts->kf_y_mode[i][j], INTRA_MODES, &header_bc);
-#endif
} else {
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
prob_diff_update(vp10_inter_mode_tree, cm->fc->inter_mode_probs[i],
@@ -1407,23 +1706,10 @@
if (cpi->allow_comp_inter_inter) {
const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
-#if !CONFIG_MISC_FIXES
- const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE;
-
- vpx_write_bit(&header_bc, use_compound_pred);
- if (use_compound_pred) {
- vpx_write_bit(&header_bc, use_hybrid_pred);
- if (use_hybrid_pred)
- for (i = 0; i < COMP_INTER_CONTEXTS; i++)
- vp10_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i],
- counts->comp_inter[i]);
- }
-#else
if (use_hybrid_pred)
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
vp10_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i],
counts->comp_inter[i]);
-#endif
}
if (cm->reference_mode != COMPOUND_REFERENCE) {
@@ -1444,14 +1730,11 @@
prob_diff_update(vp10_intra_mode_tree, cm->fc->y_mode_prob[i],
counts->y_mode[i], INTRA_MODES, &header_bc);
-#if !CONFIG_MISC_FIXES
- for (i = 0; i < PARTITION_CONTEXTS; ++i)
- prob_diff_update(vp10_partition_tree, fc->partition_prob[i],
- counts->partition[i], PARTITION_TYPES, &header_bc);
-#endif
-
vp10_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc,
&counts->mv);
+#if CONFIG_EXT_TX
+ update_ext_tx_probs(cm, &header_bc);
+#endif // CONFIG_EXT_TX
}
vpx_stop_encode(&header_bc);
@@ -1460,7 +1743,6 @@
return header_bc.pos;
}
-#if CONFIG_MISC_FIXES
static int remux_tiles(uint8_t *dest, const int sz,
const int n_tiles, const int mag) {
int rpos = 0, wpos = 0, n;
@@ -1500,7 +1782,6 @@
return wpos;
}
-#endif
void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size) {
uint8_t *data = dest;
@@ -1508,14 +1789,9 @@
struct vpx_write_bit_buffer wb = {data, 0};
struct vpx_write_bit_buffer saved_wb;
unsigned int max_tile;
-#if CONFIG_MISC_FIXES
VP10_COMMON *const cm = &cpi->common;
const int n_log2_tiles = cm->log2_tile_rows + cm->log2_tile_cols;
const int have_tiles = n_log2_tiles > 0;
-#else
- const int have_tiles = 0; // we have tiles, but we don't want to write a
- // tile size marker in the header
-#endif
write_uncompressed_header(cpi, &wb);
saved_wb = wb;
@@ -1531,7 +1807,6 @@
data += first_part_size;
data_sz = encode_tiles(cpi, data, &max_tile);
-#if CONFIG_MISC_FIXES
if (max_tile > 0) {
int mag;
unsigned int mask;
@@ -1550,7 +1825,6 @@
} else {
assert(n_log2_tiles == 0);
}
-#endif
data += data_sz;
// TODO(jbb): Figure out what to do if first_part_size > 16 bits.
diff --git a/vp10/encoder/bitstream.h b/vp10/encoder/bitstream.h
index aa0ed2f..0284920 100644
--- a/vp10/encoder/bitstream.h
+++ b/vp10/encoder/bitstream.h
@@ -20,6 +20,8 @@
void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size);
+void vp10_encode_token_init();
+
static INLINE int vp10_preserve_existing_gf(VP10_COMP *cpi) {
return !cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
cpi->rc.is_src_frame_alt_ref;
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h
index df3830c..55ec46e 100644
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@@ -111,6 +111,9 @@
// Notes transform blocks where no coefficents are coded.
// Set during mode selection. Read during block encoding.
uint8_t zcoeff_blk[TX_SIZES][256];
+#if CONFIG_VAR_TX
+ uint8_t blk_skip[MAX_MB_PLANE][256];
+#endif
int skip;
diff --git a/vp10/encoder/context_tree.c b/vp10/encoder/context_tree.c
index 6c056d2..1ac802f 100644
--- a/vp10/encoder/context_tree.c
+++ b/vp10/encoder/context_tree.c
@@ -28,6 +28,10 @@
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_blk, sizeof(uint8_t)));
for (i = 0; i < MAX_MB_PLANE; ++i) {
+#if CONFIG_VAR_TX
+ CHECK_MEM_ERROR(cm, ctx->blk_skip[i],
+ vpx_calloc(num_blk, sizeof(uint8_t)));
+#endif
for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k])));
@@ -50,6 +54,10 @@
vpx_free(ctx->zcoeff_blk);
ctx->zcoeff_blk = 0;
for (i = 0; i < MAX_MB_PLANE; ++i) {
+#if CONFIG_VAR_TX
+ vpx_free(ctx->blk_skip[i]);
+ ctx->blk_skip[i] = 0;
+#endif
for (k = 0; k < 3; ++k) {
vpx_free(ctx->coeff[i][k]);
ctx->coeff[i][k] = 0;
diff --git a/vp10/encoder/context_tree.h b/vp10/encoder/context_tree.h
index 2a0fffb..55ae471 100644
--- a/vp10/encoder/context_tree.h
+++ b/vp10/encoder/context_tree.h
@@ -28,6 +28,9 @@
MB_MODE_INFO_EXT mbmi_ext;
uint8_t *zcoeff_blk;
uint8_t *color_index_map[2];
+#if CONFIG_VAR_TX
+ uint8_t *blk_skip[MAX_MB_PLANE];
+#endif
tran_low_t *coeff[MAX_MB_PLANE][3];
tran_low_t *qcoeff[MAX_MB_PLANE][3];
tran_low_t *dqcoeff[MAX_MB_PLANE][3];
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index c61babe..c77e143 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -34,12 +34,177 @@
#endif
}
+#if CONFIG_EXT_TX
+void fdst4(const tran_low_t *input, tran_low_t *output) {
+ // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 141124871, 228344838,
+ };
+ int64_t sum;
+ int64_t s03 = (input[0] + input[3]);
+ int64_t d03 = (input[0] - input[3]);
+ int64_t s12 = (input[1] + input[2]);
+ int64_t d12 = (input[1] - input[2]);
+ sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
+ output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
+ output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
+ output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
+ output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+}
+
+void fdst8(const tran_low_t *input, tran_low_t *output) {
+ // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
+ static const int sinvalue_lookup[] = {
+ 86559612, 162678858, 219176632, 249238470
+ };
+ int64_t sum;
+ int64_t s07 = (input[0] + input[7]);
+ int64_t d07 = (input[0] - input[7]);
+ int64_t s16 = (input[1] + input[6]);
+ int64_t d16 = (input[1] - input[6]);
+ int64_t s25 = (input[2] + input[5]);
+ int64_t d25 = (input[2] - input[5]);
+ int64_t s34 = (input[3] + input[4]);
+ int64_t d34 = (input[3] - input[4]);
+ sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
+ output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
+ d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
+ output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = (s07 + s16 - s34)* sinvalue_lookup[2];
+ output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
+ output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
+ output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = (d07 - d16 + d34)* sinvalue_lookup[2];
+ output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
+ s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
+ output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
+ output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+}
+
+void fdst16(const tran_low_t *input, tran_low_t *output) {
+ // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
+ static const int sinvalue_lookup[] = {
+ 47852167, 94074787, 137093803, 175444254,
+ 207820161, 233119001, 250479254, 259309736
+ };
+ int64_t sum;
+ int64_t s015 = (input[0] + input[15]);
+ int64_t d015 = (input[0] - input[15]);
+ int64_t s114 = (input[1] + input[14]);
+ int64_t d114 = (input[1] - input[14]);
+ int64_t s213 = (input[2] + input[13]);
+ int64_t d213 = (input[2] - input[13]);
+ int64_t s312 = (input[3] + input[12]);
+ int64_t d312 = (input[3] - input[12]);
+ int64_t s411 = (input[4] + input[11]);
+ int64_t d411 = (input[4] - input[11]);
+ int64_t s510 = (input[5] + input[10]);
+ int64_t d510 = (input[5] - input[10]);
+ int64_t s69 = (input[6] + input[9]);
+ int64_t d69 = (input[6] - input[9]);
+ int64_t s78 = (input[7] + input[8]);
+ int64_t d78 = (input[7] - input[8]);
+ sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
+ s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
+ s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
+ s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
+ output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
+ d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
+ d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
+ d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
+ output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
+ s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
+ s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
+ s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
+ output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
+ d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
+ d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
+ d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
+ output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
+ s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
+ s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
+ s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
+ output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
+ d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
+ d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
+ d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
+ output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
+ s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
+ s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
+ s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
+ output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
+ d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
+ d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
+ d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
+ output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
+ s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
+ s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
+ s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
+ output[8] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
+ d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
+ d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
+ d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
+ output[9] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
+ s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
+ s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
+ s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
+ output[10] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
+ d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
+ d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
+ d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
+ output[11] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
+ s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
+ s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
+ s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
+ output[12] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
+ d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
+ d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
+ d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
+ output[13] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
+ s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
+ s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
+ s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
+ output[14] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
+ d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
+ d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
+ d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
+ output[15] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+}
+#endif // CONFIG_EXT_TX
+
static void fdct4(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[4];
// stage 0
- range_check(input, 4, 11);
+ range_check(input, 4, 14);
// stage 1
output[0] = input[0] + input[3];
@@ -47,7 +212,7 @@
output[2] = input[1] - input[2];
output[3] = input[0] - input[3];
- range_check(output, 4, 12);
+ range_check(output, 4, 15);
// stage 2
temp = output[0] * cospi_16_64 + output[1] * cospi_16_64;
@@ -59,7 +224,7 @@
temp = output[3] * cospi_24_64 + output[2] * -cospi_8_64;
step[3] = (tran_low_t)fdct_round_shift(temp);
- range_check(step, 4, 13);
+ range_check(step, 4, 16);
// stage 3
output[0] = step[0];
@@ -67,7 +232,7 @@
output[2] = step[1];
output[3] = step[3];
- range_check(output, 4, 13);
+ range_check(output, 4, 16);
}
static void fdct8(const tran_low_t *input, tran_low_t *output) {
@@ -75,7 +240,7 @@
tran_low_t step[8];
// stage 0
- range_check(input, 8, 12);
+ range_check(input, 8, 13);
// stage 1
output[0] = input[0] + input[7];
@@ -87,7 +252,7 @@
output[6] = input[1] - input[6];
output[7] = input[0] - input[7];
- range_check(output, 8, 13);
+ range_check(output, 8, 14);
// stage 2
step[0] = output[0] + output[3];
@@ -101,7 +266,7 @@
step[6] = (tran_low_t)fdct_round_shift(temp);
step[7] = output[7];
- range_check(step, 8, 14);
+ range_check(step, 8, 15);
// stage 3
temp = step[0] * cospi_16_64 + step[1] * cospi_16_64;
@@ -117,7 +282,7 @@
output[6] = step[7] - step[6];
output[7] = step[7] + step[6];
- range_check(output, 8, 14);
+ range_check(output, 8, 16);
// stage 4
step[0] = output[0];
@@ -133,7 +298,7 @@
temp = output[7] * cospi_28_64 + output[4] * -cospi_4_64;
step[7] = (tran_low_t)fdct_round_shift(temp);
- range_check(step, 8, 14);
+ range_check(step, 8, 16);
// stage 5
output[0] = step[0];
@@ -145,7 +310,7 @@
output[6] = step[3];
output[7] = step[7];
- range_check(output, 8, 14);
+ range_check(output, 8, 16);
}
static void fdct16(const tran_low_t *input, tran_low_t *output) {
@@ -996,29 +1161,171 @@
output[15] = (tran_low_t)-x1;
}
+#if CONFIG_EXT_TX
+static void copy_block(const int16_t *src, int src_stride, int l,
+ int16_t *dest, int dest_stride) {
+ int i;
+ for (i = 0; i < l; ++i) {
+ memcpy(dest + dest_stride * i, src + src_stride * i,
+ l * sizeof(int16_t));
+ }
+}
+
+static void fliplr(int16_t *dest, int stride, int l) {
+ int i, j;
+ for (i = 0; i < l; ++i) {
+ for (j = 0; j < l / 2; ++j) {
+ const int16_t tmp = dest[i * stride + j];
+ dest[i * stride + j] = dest[i * stride + l - 1 - j];
+ dest[i * stride + l - 1 - j] = tmp;
+ }
+ }
+}
+
+static void flipud(int16_t *dest, int stride, int l) {
+ int i, j;
+ for (j = 0; j < l; ++j) {
+ for (i = 0; i < l / 2; ++i) {
+ const int16_t tmp = dest[i * stride + j];
+ dest[i * stride + j] = dest[(l - 1 - i) * stride + j];
+ dest[(l - 1 - i) * stride + j] = tmp;
+ }
+ }
+}
+
+static void fliplrud(int16_t *dest, int stride, int l) {
+ int i, j;
+ for (i = 0; i < l / 2; ++i) {
+ for (j = 0; j < l; ++j) {
+ const int16_t tmp = dest[i * stride + j];
+ dest[i * stride + j] = dest[(l - 1 - i) * stride + l - 1 - j];
+ dest[(l - 1 - i) * stride + l - 1 - j] = tmp;
+ }
+ }
+}
+
+static void copy_fliplr(const int16_t *src, int src_stride, int l,
+ int16_t *dest, int dest_stride) {
+ copy_block(src, src_stride, l, dest, dest_stride);
+ fliplr(dest, dest_stride, l);
+}
+
+static void copy_flipud(const int16_t *src, int src_stride, int l,
+ int16_t *dest, int dest_stride) {
+ copy_block(src, src_stride, l, dest, dest_stride);
+ flipud(dest, dest_stride, l);
+}
+
+static void copy_fliplrud(const int16_t *src, int src_stride, int l,
+ int16_t *dest, int dest_stride) {
+ copy_block(src, src_stride, l, dest, dest_stride);
+ fliplrud(dest, dest_stride, l);
+}
+
+static void maybe_flip_input(const int16_t **src, int *src_stride, int l,
+ int16_t *buff, int tx_type) {
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ break;
+ case FLIPADST_DCT:
+ case FLIPADST_ADST:
+ case FLIPADST_DST:
+ copy_flipud(*src, *src_stride, l, buff, l);
+ *src = buff;
+ *src_stride = l;
+ break;
+ case DCT_FLIPADST:
+ case ADST_FLIPADST:
+ case DST_FLIPADST:
+ copy_fliplr(*src, *src_stride, l, buff, l);
+ *src = buff;
+ *src_stride = l;
+ break;
+ case FLIPADST_FLIPADST:
+ copy_fliplrud(*src, *src_stride, l, buff, l);
+ *src = buff;
+ *src_stride = l;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+#endif // CONFIG_EXT_TX
+
static const transform_2d FHT_4[] = {
- { fdct4, fdct4 }, // DCT_DCT = 0
- { fadst4, fdct4 }, // ADST_DCT = 1
- { fdct4, fadst4 }, // DCT_ADST = 2
- { fadst4, fadst4 } // ADST_ADST = 3
+ { fdct4, fdct4 }, // DCT_DCT = 0,
+ { fadst4, fdct4 }, // ADST_DCT = 1,
+ { fdct4, fadst4 }, // DCT_ADST = 2,
+ { fadst4, fadst4 }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { fadst4, fdct4 }, // FLIPADST_DCT = 4,
+ { fdct4, fadst4 }, // DCT_FLIPADST = 5,
+ { fadst4, fadst4 }, // FLIPADST_FLIPADST = 6,
+ { fadst4, fadst4 }, // ADST_FLIPADST = 7,
+ { fadst4, fadst4 }, // FLIPADST_ADST = 8,
+ { fdst4, fdct4 }, // DST_DCT = 9,
+ { fdct4, fdst4 }, // DCT_DST = 10,
+ { fdst4, fadst4 }, // DST_ADST = 11,
+ { fadst4, fdst4 }, // ADST_DST = 12,
+ { fdst4, fadst4 }, // DST_FLIPADST = 13,
+ { fadst4, fdst4 }, // FLIPADST_DST = 14,
+ { fdst4, fdst4 }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
};
static const transform_2d FHT_8[] = {
- { fdct8, fdct8 }, // DCT_DCT = 0
- { fadst8, fdct8 }, // ADST_DCT = 1
- { fdct8, fadst8 }, // DCT_ADST = 2
- { fadst8, fadst8 } // ADST_ADST = 3
+ { fdct8, fdct8 }, // DCT_DCT = 0,
+ { fadst8, fdct8 }, // ADST_DCT = 1,
+ { fdct8, fadst8 }, // DCT_ADST = 2,
+ { fadst8, fadst8 }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { fadst8, fdct8 }, // FLIPADST_DCT = 4,
+ { fdct8, fadst8 }, // DCT_FLIPADST = 5,
+ { fadst8, fadst8 }, // FLIPADST_FLIPADST = 6,
+ { fadst8, fadst8 }, // ADST_FLIPADST = 7,
+ { fadst8, fadst8 }, // FLIPADST_ADST = 8,
+ { fdst8, fdct8 }, // DST_DCT = 9,
+ { fdct8, fdst8 }, // DCT_DST = 10,
+ { fdst8, fadst8 }, // DST_ADST = 11,
+ { fadst8, fdst8 }, // ADST_DST = 12,
+ { fdst8, fadst8 }, // DST_FLIPADST = 13,
+ { fadst8, fdst8 }, // FLIPADST_DST = 14,
+ { fdst8, fdst8 }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
};
static const transform_2d FHT_16[] = {
- { fdct16, fdct16 }, // DCT_DCT = 0
- { fadst16, fdct16 }, // ADST_DCT = 1
- { fdct16, fadst16 }, // DCT_ADST = 2
- { fadst16, fadst16 } // ADST_ADST = 3
+ { fdct16, fdct16 }, // DCT_DCT = 0,
+ { fadst16, fdct16 }, // ADST_DCT = 1,
+ { fdct16, fadst16 }, // DCT_ADST = 2,
+ { fadst16, fadst16 }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { fadst16, fdct16 }, // FLIPADST_DCT = 4,
+ { fdct16, fadst16 }, // DCT_FLIPADST = 5,
+ { fadst16, fadst16 }, // FLIPADST_FLIPADST = 6,
+ { fadst16, fadst16 }, // ADST_FLIPADST = 7,
+ { fadst16, fadst16 }, // FLIPADST_ADST = 8,
+ { fdst16, fdct16 }, // DST_DCT = 9,
+ { fdct16, fdst16 }, // DCT_DST = 10,
+ { fdst16, fadst16 }, // DST_ADST = 11,
+ { fadst16, fdst16 }, // ADST_DST = 12,
+ { fdst16, fadst16 }, // DST_FLIPADST = 13,
+ { fadst16, fdst16 }, // FLIPADST_DST = 14,
+ { fdst16, fdst16 }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
};
void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
+ int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct4x4_c(input, output, stride);
} else {
@@ -1027,6 +1334,11 @@
tran_low_t temp_in[4], temp_out[4];
const transform_2d ht = FHT_4[tx_type];
+#if CONFIG_EXT_TX
+ int16_t flipped_input[4 * 4];
+ maybe_flip_input(&input, &stride, 4, flipped_input, tx_type);
+#endif
+
// Columns
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
@@ -1050,15 +1362,15 @@
}
void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
- tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr,
- uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
+ tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
int eob = -1;
int i, j;
@@ -1162,7 +1474,7 @@
}
void vp10_fht8x8_c(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
+ int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct8x8_c(input, output, stride);
} else {
@@ -1171,6 +1483,11 @@
tran_low_t temp_in[8], temp_out[8];
const transform_2d ht = FHT_8[tx_type];
+#if CONFIG_EXT_TX
+ int16_t flipped_input[8 * 8];
+ maybe_flip_input(&input, &stride, 8, flipped_input, tx_type);
+#endif
+
// Columns
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
@@ -1248,7 +1565,7 @@
}
void vp10_fht16x16_c(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
+ int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct16x16_c(input, output, stride);
} else {
@@ -1257,6 +1574,11 @@
tran_low_t temp_in[16], temp_out[16];
const transform_2d ht = FHT_16[tx_type];
+#if CONFIG_EXT_TX
+ int16_t flipped_input[16 * 16];
+ maybe_flip_input(&input, &stride, 16, flipped_input, tx_type);
+#endif
+
// Columns
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index f303b01..127cd04 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -196,6 +196,12 @@
set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+ xd->max_tx_size = max_txsize_lookup[bsize];
+#endif
+
mbmi = &xd->mi[0]->mbmi;
// Set up destination pointers.
@@ -1026,8 +1032,15 @@
}
x->skip = ctx->skip;
+
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memcpy(x->blk_skip[i], ctx->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#else
memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
+#endif
if (!output_enabled)
return;
@@ -1056,7 +1069,11 @@
if (is_inter_block(mbmi)) {
vp10_update_mv_count(td);
- if (cm->interp_filter == SWITCHABLE) {
+ if (cm->interp_filter == SWITCHABLE
+#if CONFIG_EXT_INTERP
+ && vp10_is_interp_needed(xd)
+#endif
+ ) {
const int ctx = vp10_get_pred_context_switchable_interp(xd);
++td->counts->switchable_interp[ctx][mbmi->interp_filter];
}
@@ -1302,6 +1319,9 @@
ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT ta[8], TXFM_CONTEXT tl[8],
+#endif
BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd;
int p;
@@ -1326,12 +1346,21 @@
sizeof(*xd->above_seg_context) * mi_width);
memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
sizeof(xd->left_seg_context[0]) * mi_height);
+#if CONFIG_VAR_TX
+ memcpy(xd->above_txfm_context, ta,
+ sizeof(*xd->above_txfm_context) * mi_width);
+ memcpy(xd->left_txfm_context, tl,
+ sizeof(*xd->left_txfm_context) * mi_height);
+#endif
}
static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT ta[8], TXFM_CONTEXT tl[8],
+#endif
BLOCK_SIZE bsize) {
const MACROBLOCKD *const xd = &x->e_mbd;
int p;
@@ -1358,6 +1387,12 @@
sizeof(*xd->above_seg_context) * mi_width);
memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
sizeof(xd->left_seg_context[0]) * mi_height);
+#if CONFIG_VAR_TX
+ memcpy(ta, xd->above_txfm_context,
+ sizeof(*xd->above_txfm_context) * mi_width);
+ memcpy(tl, xd->left_txfm_context,
+ sizeof(*xd->left_txfm_context) * mi_height);
+#endif
}
static void encode_b(VP10_COMP *cpi, const TileInfo *const tile,
@@ -1542,6 +1577,9 @@
BLOCK_SIZE subsize;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT tl[8], ta[8];
+#endif
RD_COST last_part_rdc, none_rdc, chosen_rdc;
BLOCK_SIZE sub_subsize = BLOCK_4X4;
int splits_below = 0;
@@ -1562,8 +1600,16 @@
partition = partition_lookup[bsl][bs_type];
subsize = get_subsize(bsize, partition);
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+#endif
pc_tree->partitioning = partition;
- save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+ save_context(x, mi_row, mi_col, a, l, sa, sl,
+#if CONFIG_VAR_TX
+ ta, tl,
+#endif
+ bsize);
if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
@@ -1603,7 +1649,11 @@
none_rdc.dist);
}
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl,
+#if CONFIG_VAR_TX
+ ta, tl,
+#endif
+ bsize);
mi_8x8[0]->mbmi.sb_type = bs_type;
pc_tree->partitioning = partition;
}
@@ -1714,7 +1764,11 @@
BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
chosen_rdc.rate = 0;
chosen_rdc.dist = 0;
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl,
+#if CONFIG_VAR_TX
+ ta, tl,
+#endif
+ bsize);
pc_tree->partitioning = PARTITION_SPLIT;
// Split partition.
@@ -1724,17 +1778,28 @@
RD_COST tmp_rdc;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT tl[8], ta[8];
+#endif
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
- save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+ save_context(x, mi_row, mi_col, a, l, sa, sl,
+#if CONFIG_VAR_TX
+ ta, tl,
+#endif
+ bsize);
pc_tree->split[i]->partitioning = PARTITION_NONE;
rd_pick_sb_modes(cpi, tile_data, x,
mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
split_subsize, &pc_tree->split[i]->none, INT64_MAX);
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl,
+#if CONFIG_VAR_TX
+ ta, tl,
+#endif
+ bsize);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
vp10_rd_cost_reset(&chosen_rdc);
@@ -1774,7 +1839,15 @@
chosen_rdc = none_rdc;
}
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+#endif
+ restore_context(x, mi_row, mi_col, a, l, sa, sl,
+#if CONFIG_VAR_TX
+ ta, tl,
+#endif
+ bsize);
// We must have chosen a partitioning and encoding or we'll fail later on.
// No other opportunities for success.
@@ -2046,6 +2119,9 @@
const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT tl[8], ta[8];
+#endif
TOKENEXTRA *tp_orig = *tp;
PICK_MODE_CONTEXT *ctx = &pc_tree->none;
int i, pl;
@@ -2111,7 +2187,13 @@
partition_vert_allowed &= force_vert_split;
}
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+ save_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#endif
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
@@ -2257,7 +2339,13 @@
#endif
}
}
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#endif
}
// store estimated motion vector
@@ -2322,7 +2410,13 @@
if (cpi->sf.less_rectangular_check)
do_rect &= !partition_none_allowed;
}
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#endif
}
// PARTITION_HORZ
@@ -2371,7 +2465,13 @@
pc_tree->partitioning = PARTITION_HORZ;
}
}
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#endif
}
// PARTITION_VERT
if (partition_vert_allowed &&
@@ -2420,7 +2520,13 @@
pc_tree->partitioning = PARTITION_VERT;
}
}
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#endif
}
// TODO(jbb): This code added so that we avoid static analysis
@@ -2430,7 +2536,6 @@
(void) best_rd;
*rd_cost = best_rdc;
-
if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
pc_tree->index != 3) {
int output_enabled = (bsize == BLOCK_64X64);
@@ -2462,7 +2567,10 @@
// Initialize the left context for the new SB row
memset(&xd->left_context, 0, sizeof(xd->left_context));
memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
-
+#if CONFIG_VAR_TX
+ memset(xd->left_txfm_context_buffer, 0,
+ sizeof(xd->left_txfm_context_buffer));
+#endif
// Code each SB in the row
for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
@@ -2550,6 +2658,10 @@
2 * aligned_mi_cols * MAX_MB_PLANE);
memset(xd->above_seg_context, 0,
sizeof(*xd->above_seg_context) * aligned_mi_cols);
+#if CONFIG_VAR_TX
+ memset(cm->above_txfm_context, 0,
+ sizeof(*xd->above_txfm_context) * aligned_mi_cols);
+#endif
}
static int check_dual_ref_flags(VP10_COMP *cpi) {
@@ -2706,15 +2818,9 @@
vp10_zero(rdc->filter_diff);
for (i = 0; i < (cm->seg.enabled ? MAX_SEGMENTS : 1); ++i) {
-#if CONFIG_MISC_FIXES
const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
-#endif
xd->lossless[i] = cm->y_dc_delta_q == 0 &&
-#if CONFIG_MISC_FIXES
qindex == 0 &&
-#else
- cm->base_qindex == 0 &&
-#endif
cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
}
@@ -2741,6 +2847,9 @@
x->quant_fp = cpi->sf.use_quant_fp;
vp10_zero(x->skip_txfm);
+#if CONFIG_VAR_TX
+ vp10_zero(x->blk_skip);
+#endif
{
struct vpx_usec_timer emr_timer;
@@ -2843,8 +2952,9 @@
else
cm->reference_mode = REFERENCE_MODE_SELECT;
- if (cm->interp_filter == SWITCHABLE)
+ if (cm->interp_filter == SWITCHABLE) {
cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref);
+ }
encode_frame_internal(cpi);
@@ -2872,6 +2982,7 @@
}
}
+#if !CONFIG_VAR_TX
if (cm->tx_mode == TX_MODE_SELECT) {
int count4x4 = 0;
int count8x8_lp = 0, count8x8_8x8p = 0;
@@ -2906,6 +3017,7 @@
reset_skip_tx_size(cm, TX_16X16);
}
}
+#endif
} else {
cm->reference_mode = SINGLE_REFERENCE;
encode_frame_internal(cpi);
@@ -2948,6 +3060,140 @@
++counts->uv_mode[y_mode][uv_mode];
}
+#if CONFIG_VAR_TX
+static void update_txfm_count(MACROBLOCKD *xd,
+ FRAME_COUNTS *counts,
+ TX_SIZE tx_size, int blk_row, int blk_col) {
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+ int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1),
+ tx_size);
+ TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> 5;
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> 5;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ ++counts->txfm_partition[ctx][0];
+ mbmi->tx_size = tx_size;
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), tx_size);
+ } else {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bh = num_4x4_blocks_high_lookup[bsize];
+ int i;
+ ++counts->txfm_partition[ctx][1];
+
+ if (tx_size == TX_8X8) {
+ mbmi->inter_tx_size[tx_idx] = TX_4X4;
+ mbmi->tx_size = TX_4X4;
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+ return;
+ }
+
+ for (i = 0; i < 4; ++i) {
+ int offsetr = (i >> 1) * bh / 2;
+ int offsetc = (i & 0x01) * bh / 2;
+ update_txfm_count(xd, counts, tx_size - 1,
+ blk_row + offsetr, blk_col + offsetc);
+ }
+ }
+}
+
+static void tx_partition_count_update(VP10_COMMON *cm,
+ MACROBLOCKD *xd,
+ BLOCK_SIZE plane_bsize,
+ int mi_row, int mi_col,
+ FRAME_COUNTS *td_counts) {
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+
+ for (idy = 0; idy < mi_height; idy += bh)
+ for (idx = 0; idx < mi_width; idx += bh)
+ update_txfm_count(xd, td_counts, max_tx_size, idy, idx);
+}
+
+static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size,
+ int blk_row, int blk_col) {
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+ int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> 5;
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> 5;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ mbmi->tx_size = tx_size;
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), tx_size);
+
+ } else {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ if (tx_size == TX_8X8) {
+ mbmi->inter_tx_size[tx_idx] = TX_4X4;
+ mbmi->tx_size = TX_4X4;
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+ return;
+ }
+
+ assert(bsl > 0);
+ --bsl;
+ for (i = 0; i < 4; ++i) {
+ int offsetr = (i >> 1) << bsl;
+ int offsetc = (i & 0x01) << bsl;
+ set_txfm_context(xd, tx_size - 1,
+ blk_row + offsetr, blk_col + offsetc);
+ }
+ }
+}
+
+static void tx_partition_set_contexts(VP10_COMMON *cm,
+ MACROBLOCKD *xd,
+ BLOCK_SIZE plane_bsize,
+ int mi_row, int mi_col) {
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+
+ for (idy = 0; idy < mi_height; idy += bh)
+ for (idx = 0; idx < mi_width; idx += bh)
+ set_txfm_context(xd, max_tx_size, idy, idx);
+}
+#endif
+
static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
TOKENEXTRA **t, int output_enabled,
int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -3014,25 +3260,33 @@
VPXMAX(bsize, BLOCK_8X8));
vp10_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
+#if CONFIG_VAR_TX
+ vp10_tokenize_sb_inter(cpi, td, t, !output_enabled,
+ mi_row, mi_col, VPXMAX(bsize, BLOCK_8X8));
+#else
vp10_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8));
+#endif
}
if (output_enabled) {
if (cm->tx_mode == TX_MODE_SELECT &&
mbmi->sb_type >= BLOCK_8X8 &&
!(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) {
+#if CONFIG_VAR_TX
+ if (is_inter_block(mbmi))
+ tx_partition_count_update(cm, xd, bsize, mi_row, mi_col, td->counts);
+#endif
++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd),
&td->counts->tx)[mbmi->tx_size];
} else {
int x, y;
TX_SIZE tx_size;
// The new intra coding scheme requires no change of transform size
- if (is_inter_block(&mi->mbmi)) {
+ if (is_inter_block(&mi->mbmi))
tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
max_txsize_lookup[bsize]);
- } else {
+ else
tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
- }
for (y = 0; y < mi_height; y++)
for (x = 0; x < mi_width; x++)
@@ -3041,5 +3295,50 @@
}
++td->counts->tx.tx_totals[mbmi->tx_size];
++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter_block(mbmi)) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ int eset = get_ext_tx_set(mbmi->tx_size, bsize,
+ is_inter_block(mbmi));
+ if (eset > 0) {
+ if (is_inter_block(mbmi)) {
+ ++td->counts->inter_ext_tx[eset][mbmi->tx_size][mbmi->tx_type];
+ } else {
+ ++td->counts->intra_ext_tx[eset][mbmi->tx_size][mbmi->mode]
+ [mbmi->tx_type];
+ }
+ }
+ }
+#endif // CONFIG_EXT_TX
+#if CONFIG_EXT_INTRA
+ if (bsize >= BLOCK_8X8 && !is_inter_block(mbmi)) {
+ if (mbmi->mode == DC_PRED)
+ ++td->counts->ext_intra[0]
+ [mbmi->ext_intra_mode_info.use_ext_intra_mode[0]];
+ if (mbmi->uv_mode == DC_PRED)
+ ++td->counts->ext_intra[1]
+ [mbmi->ext_intra_mode_info.use_ext_intra_mode[1]];
+ }
+#endif // CONFIG_EXT_INTRA
}
+
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8 &&
+ is_inter_block(mbmi) && !(mbmi->skip || seg_skip)) {
+ if (!output_enabled)
+ tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col);
+ } else {
+ TX_SIZE tx_size;
+ // The new intra coding scheme requires no change of transform size
+ if (is_inter_block(mbmi))
+ tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
+ max_txsize_lookup[bsize]);
+ else
+ tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
+ mbmi->tx_size = tx_size;
+ set_txfm_ctx(xd->left_txfm_context, tx_size, xd->n8_h);
+ set_txfm_ctx(xd->above_txfm_context, tx_size, xd->n8_w);
+ }
+#endif
}
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 92ba4dd..00e3780 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -104,8 +104,9 @@
const int mul = 1 + (tx_size == TX_32X32);
const int16_t *dequant_ptr = pd->dequant;
const uint8_t *const band_translate = get_band_translate(tx_size);
- TX_TYPE tx_type = get_tx_type(type, xd, block);
- const scan_order *const so = get_scan(tx_size, tx_type);
+ TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
+ const scan_order *const so =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
const int16_t *const scan = so->scan;
const int16_t *const nb = so->neighbors;
int next = eob, sz = 0;
@@ -301,6 +302,7 @@
final_eob++;
mb->plane[plane].eobs[block] = final_eob;
+ assert(final_eob <= default_eob);
return final_eob;
}
@@ -323,24 +325,62 @@
}
#endif // CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_EXT_TX
+// Forward identity transform.
+static void fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride,
+ int bs) {
+ int r, c;
+ const int shift = bs < 32 ? 3 : 2;
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c)
+ coeff[c] = src_diff[c] << shift;
+ src_diff += stride;
+ coeff += bs;
+ }
+}
+#endif // CONFIG_EXT_TX
+
void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless) {
if (lossless) {
+ assert(tx_type == DCT_DCT);
vp10_fwht4x4(src_diff, coeff, diff_stride);
- } else {
- switch (tx_type) {
- case DCT_DCT:
- vpx_fdct4x4(src_diff, coeff, diff_stride);
- break;
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
- break;
- default:
- assert(0);
- break;
- }
+ return;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 4);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
}
}
@@ -353,6 +393,65 @@
case ADST_ADST:
vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 8);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void fwd_txfm_8x8_1(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type) {
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vpx_fdct8x8_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 8);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -368,6 +467,65 @@
case ADST_ADST:
vp10_fht16x16(src_diff, coeff, diff_stride, tx_type);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 16);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void fwd_txfm_16x16_1(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type) {
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vpx_fdct16x16_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 16);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -381,6 +539,34 @@
case DCT_DCT:
fdct32x32(rd_transform, src_diff, coeff, diff_stride);
break;
+#if CONFIG_EXT_TX
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 32);
+ break;
+#endif // CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ assert(0);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void fwd_txfm_32x32_1(const int16_t *src_diff,
+ tran_low_t *coeff, int diff_stride,
+ TX_TYPE tx_type) {
+ switch (tx_type) {
+ case DCT_DCT:
+ vpx_fdct32x32_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 32);
+ break;
+#endif // CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
@@ -398,34 +584,112 @@
if (lossless) {
assert(tx_type == DCT_DCT);
vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
- } else {
- switch (tx_type) {
- case DCT_DCT:
- vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
- break;
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
- break;
- default:
- assert(0);
- break;
- }
+ return;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 4);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
}
}
static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type) {
+ int diff_stride, TX_TYPE tx_type) {
switch (tx_type) {
case DCT_DCT:
- vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
- break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 8);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void highbd_fwd_txfm_8x8_1(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type) {
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 8);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -433,16 +697,73 @@
}
static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type) {
+ int diff_stride, TX_TYPE tx_type) {
switch (tx_type) {
case DCT_DCT:
- vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
- break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 16);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void highbd_fwd_txfm_16x16_1(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type) {
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 16);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -456,6 +777,34 @@
case DCT_DCT:
highbd_fdct32x32(rd_transform, src_diff, coeff, diff_stride);
break;
+#if CONFIG_EXT_TX
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 32);
+ break;
+#endif // CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ assert(0);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void highbd_fwd_txfm_32x32_1(const int16_t *src_diff,
+ tran_low_t *coeff, int diff_stride,
+ TX_TYPE tx_type) {
+ switch (tx_type) {
+ case DCT_DCT:
+ vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 32);
+ break;
+#endif // CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
@@ -475,8 +824,9 @@
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block);
- const scan_order *const scan_order = get_scan(tx_size, tx_type);
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *const scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -489,37 +839,35 @@
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
case TX_32X32:
- highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+ highbd_fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride,
+ tx_type);
vp10_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
- p->round_fp, p->quant_fp, p->quant_shift,
- qcoeff, dqcoeff, pd->dequant,
- eob, scan_order->scan,
- scan_order->iscan);
+ p->round_fp, p->quant_fp, p->quant_shift,
+ qcoeff, dqcoeff, pd->dequant,
+ eob, scan_order->scan,
+ scan_order->iscan);
break;
case TX_16X16:
- vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
+ highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type);
vp10_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, eob,
+ scan_order->scan, scan_order->iscan);
break;
case TX_8X8:
- vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
+ highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type);
vp10_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, eob,
+ scan_order->scan, scan_order->iscan);
break;
case TX_4X4:
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
- } else {
- vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
- }
+ vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
+ xd->lossless[xd->mi[0]->mbmi.segment_id]);
vp10_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, eob,
+ scan_order->scan, scan_order->iscan);
break;
default:
assert(0);
@@ -530,36 +878,33 @@
switch (tx_size) {
case TX_32X32:
- fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+ fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride, tx_type);
vp10_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob, scan_order->scan,
- scan_order->iscan);
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, eob, scan_order->scan,
+ scan_order->iscan);
break;
case TX_16X16:
- vpx_fdct16x16(src_diff, coeff, diff_stride);
+ fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type);
vp10_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, eob,
+ scan_order->scan, scan_order->iscan);
break;
case TX_8X8:
- vp10_fdct8x8_quant(src_diff, diff_stride, coeff, 64,
- x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type);
+ vp10_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, eob,
+ scan_order->scan, scan_order->iscan);
break;
case TX_4X4:
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- vp10_fwht4x4(src_diff, coeff, diff_stride);
- } else {
- vpx_fdct4x4(src_diff, coeff, diff_stride);
- }
+ vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
+ xd->lossless[xd->mi[0]->mbmi.segment_id]);
vp10_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, eob,
+ scan_order->scan, scan_order->iscan);
break;
default:
assert(0);
@@ -573,6 +918,8 @@
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -585,29 +932,26 @@
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
case TX_32X32:
- vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
+ highbd_fwd_txfm_32x32_1(src_diff, coeff, diff_stride, tx_type);
vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_16X16:
- vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
+ highbd_fwd_txfm_16x16_1(src_diff, coeff, diff_stride, tx_type);
vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_8X8:
- vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
+ highbd_fwd_txfm_8x8_1(src_diff, coeff, diff_stride, tx_type);
vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_4X4:
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
- } else {
- vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
- }
+ vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
+ xd->lossless[xd->mi[0]->mbmi.segment_id]);
vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
@@ -621,29 +965,26 @@
switch (tx_size) {
case TX_32X32:
- vpx_fdct32x32_1(src_diff, coeff, diff_stride);
+ fwd_txfm_32x32_1(src_diff, coeff, diff_stride, tx_type);
vpx_quantize_dc_32x32(coeff, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_16X16:
- vpx_fdct16x16_1(src_diff, coeff, diff_stride);
+ fwd_txfm_16x16_1(src_diff, coeff, diff_stride, tx_type);
vpx_quantize_dc(coeff, 256, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_8X8:
- vpx_fdct8x8_1(src_diff, coeff, diff_stride);
+ fwd_txfm_8x8_1(src_diff, coeff, diff_stride, tx_type);
vpx_quantize_dc(coeff, 64, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_4X4:
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- vp10_fwht4x4(src_diff, coeff, diff_stride);
- } else {
- vpx_fdct4x4(src_diff, coeff, diff_stride);
- }
+ vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
+ xd->lossless[xd->mi[0]->mbmi.segment_id]);
vpx_quantize_dc(coeff, 16, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
@@ -654,8 +995,6 @@
}
}
-
-
void vp10_xform_quant(MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
@@ -663,8 +1002,9 @@
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block);
- const scan_order *const scan_order = get_scan(tx_size, tx_type);
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *const scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -678,7 +1018,7 @@
switch (tx_size) {
case TX_32X32:
highbd_fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride,
- tx_type);
+ tx_type);
vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
p->round, p->quant, p->quant_shift, qcoeff,
dqcoeff, pd->dequant, eob,
@@ -761,20 +1101,31 @@
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint8_t *dst;
ENTROPY_CONTEXT *a, *l;
- TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block);
+ TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
+#if CONFIG_VAR_TX
+ int i;
+ const int bwl = b_width_log2_lookup[plane_bsize];
+#endif
dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
a = &ctx->ta[plane][blk_col];
l = &ctx->tl[plane][blk_row];
// TODO(jingning): per transformed block zero forcing only enabled for
// luma component. will integrate chroma components as well.
- if (x->zcoeff_blk[tx_size][block] && plane == 0) {
- p->eobs[block] = 0;
- *a = *l = 0;
- return;
- }
+ // Turn this back on when the rate-distortion loop is synchronized with
+ // the recursive transform block coding.
+// if (x->zcoeff_blk[tx_size][block] && plane == 0) {
+// p->eobs[block] = 0;
+// *a = *l = 0;
+// return;
+// }
+#if CONFIG_VAR_TX
+ if (!x->skip_recode &&
+ x->blk_skip[plane][(blk_row << bwl) + blk_col] == 0) {
+#else
if (!x->skip_recode) {
+#endif
if (x->quant_fp) {
// Encoding process for rtc mode
if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
@@ -801,7 +1152,9 @@
// skip forward transform
p->eobs[block] = 0;
*a = *l = 0;
+#if !CONFIG_VAR_TX
return;
+#endif
}
} else {
vp10_xform_quant(x, plane, block, blk_row, blk_col,
@@ -809,14 +1162,49 @@
}
}
}
+#if CONFIG_VAR_TX
+ else {
+ if (!x->skip_recode)
+ p->eobs[block] = 0;
+ }
+#endif
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
- const int ctx = combine_entropy_contexts(*a, *l);
+ int ctx;
+#if CONFIG_VAR_TX
+ switch (tx_size) {
+ case TX_4X4:
+ break;
+ case TX_8X8:
+ a[0] = !!*(const uint16_t *)&a[0];
+ l[0] = !!*(const uint16_t *)&l[0];
+ break;
+ case TX_16X16:
+ a[0] = !!*(const uint32_t *)&a[0];
+ l[0] = !!*(const uint32_t *)&l[0];
+ break;
+ case TX_32X32:
+ a[0] = !!*(const uint64_t *)&a[0];
+ l[0] = !!*(const uint64_t *)&l[0];
+ break;
+ default:
+ assert(0 && "Invalid transform size.");
+ break;
+ }
+#endif
+ ctx = combine_entropy_contexts(*a, *l);
*a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0;
} else {
*a = *l = p->eobs[block] > 0;
}
+#if CONFIG_VAR_TX
+ for (i = 0; i < (1 << tx_size); ++i) {
+ a[i] = a[0];
+ l[i] = l[0];
+ }
+#endif
+
if (p->eobs[block])
*(args->skip) = 0;
@@ -880,6 +1268,59 @@
}
}
+#if CONFIG_VAR_TX
+static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ void *arg) {
+ struct encode_b_args *const args = arg;
+ MACROBLOCK *const x = args->x;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ int blk_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], bsize,
+ 0, 0) :
+ mbmi->inter_tx_size[blk_idx];
+
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ encode_block(plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, arg);
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+ int step = 1 << (2 * (tx_size - 1));
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
+ continue;
+
+ encode_block_inter(plane, block + i * step, offsetr, offsetc,
+ plane_bsize, tx_size - 1, arg);
+ }
+ }
+}
+#endif
+
static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
@@ -896,7 +1337,7 @@
if (p->eobs[block] > 0) {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- if (xd->lossless[0]) {
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
vp10_highbd_iwht4x4_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
} else {
@@ -906,7 +1347,7 @@
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
- if (xd->lossless[0]) {
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
vp10_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
} else {
vp10_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
@@ -917,7 +1358,7 @@
void vp10_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
vp10_subtract_plane(x, bsize, 0);
vp10_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
- encode_block_pass1, x);
+ encode_block_pass1, x);
}
void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
@@ -933,18 +1374,46 @@
return;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_VAR_TX
+ // TODO(jingning): Clean this up.
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ const int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+ int block = 0;
+ int step = 1 << (max_tx_size * 2);
+#endif
if (!x->skip_recode)
vp10_subtract_plane(x, bsize, plane);
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
+#if CONFIG_VAR_TX
+ vp10_get_entropy_contexts(bsize, TX_4X4, pd,
+ ctx.ta[plane], ctx.tl[plane]);
+#else
const struct macroblockd_plane* const pd = &xd->plane[plane];
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
vp10_get_entropy_contexts(bsize, tx_size, pd,
- ctx.ta[plane], ctx.tl[plane]);
+ ctx.ta[plane], ctx.tl[plane]);
+#endif
}
+#if CONFIG_VAR_TX
+ for (idy = 0; idy < mi_height; idy += bh) {
+ for (idx = 0; idx < mi_width; idx += bh) {
+ encode_block_inter(plane, block, idy, idx, plane_bsize,
+ max_tx_size, &arg);
+ block += step;
+ }
+ }
+#else
vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
- &arg);
+ &arg);
+#endif
}
}
@@ -961,8 +1430,8 @@
tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block);
- const scan_order *const scan_order = get_scan(tx_size, tx_type);
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *const scan_order = get_scan(tx_size, tx_type, 0);
PREDICTION_MODE mode;
const int bwl = b_width_log2_lookup[plane_bsize];
const int bhl = b_height_log2_lookup[plane_bsize];
@@ -978,7 +1447,7 @@
mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
vp10_predict_intra_block(xd, bwl, bhl, tx_size, mode, dst, dst_stride,
- dst, dst_stride, blk_col, blk_row, plane);
+ dst, dst_stride, blk_col, blk_row, plane);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -1113,7 +1582,7 @@
// which is significant (not just an optimization) for the lossless
// case.
vp10_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, tx_type,
- xd->lossless[mbmi->segment_id]);
+ xd->lossless[xd->mi[0]->mbmi.segment_id]);
}
break;
default:
diff --git a/vp10/encoder/encodemv.c b/vp10/encoder/encodemv.c
index 0736c65..623e6f6 100644
--- a/vp10/encoder/encodemv.c
+++ b/vp10/encoder/encodemv.c
@@ -137,19 +137,8 @@
static void update_mv(vpx_writer *w, const unsigned int ct[2], vpx_prob *cur_p,
vpx_prob upd_p) {
-#if CONFIG_MISC_FIXES
(void) upd_p;
vp10_cond_prob_diff_update(w, cur_p, ct);
-#else
- const vpx_prob new_p = get_binary_prob(ct[0], ct[1]) | 1;
- const int update = cost_branch256(ct, *cur_p) + vp10_cost_zero(upd_p) >
- cost_branch256(ct, new_p) + vp10_cost_one(upd_p) + 7 * 256;
- vpx_write(w, update, upd_p);
- if (update) {
- *cur_p = new_p;
- vpx_write_literal(w, new_p >> 1, 7);
- }
-#endif
}
static void write_mv_update(const vpx_tree_index *tree,
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 5b646a2..c9911f8 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -328,6 +328,7 @@
vp10_rc_init_minq_luts();
vp10_entropy_mv_init();
vp10_temporal_filter_init();
+ vp10_encode_token_init();
init_done = 1;
}
}
@@ -418,10 +419,6 @@
memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1],
MV_VALS * sizeof(*cpi->nmvcosts_hp[1]));
-#if !CONFIG_MISC_FIXES
- vp10_copy(cc->segment_pred_probs, cm->segp.pred_probs);
-#endif
-
memcpy(cpi->coding_context.last_frame_seg_map_copy,
cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols));
@@ -446,10 +443,6 @@
memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1],
MV_VALS * sizeof(*cc->nmvcosts_hp[1]));
-#if !CONFIG_MISC_FIXES
- vp10_copy(cm->segp.pred_probs, cc->segment_pred_probs);
-#endif
-
memcpy(cm->last_frame_seg_map,
cpi->coding_context.last_frame_seg_map_copy,
(cm->mi_rows * cm->mi_cols));
@@ -2679,6 +2672,9 @@
}
if (lf->filter_level > 0) {
+#if CONFIG_VAR_TX
+ vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
+#else
if (cpi->num_workers > 1)
vp10_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
lf->filter_level, 0, 0,
@@ -2686,6 +2682,7 @@
&cpi->lf_row_sync);
else
vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
+#endif
}
vpx_extend_frame_inner_borders(cm->frame_to_show);
@@ -3256,7 +3253,6 @@
if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
save_coding_context(cpi);
vp10_pack_bitstream(cpi, dest, size);
-
rc->projected_frame_size = (int)(*size) << 3;
restore_coding_context(cpi);
@@ -3673,12 +3669,7 @@
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
vp10_adapt_coef_probs(cm);
-#if CONFIG_MISC_FIXES
vp10_adapt_intra_frame_probs(cm);
-#else
- if (!frame_is_intra_only(cm))
- vp10_adapt_intra_frame_probs(cm);
-#endif
}
if (!frame_is_intra_only(cm)) {
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index 7028803..e20b569 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -55,10 +55,6 @@
int nmvcosts[2][MV_VALS];
int nmvcosts_hp[2][MV_VALS];
-#if !CONFIG_MISC_FIXES
- vpx_prob segment_pred_probs[PREDICTION_PROBS];
-#endif
-
unsigned char *last_frame_seg_map_copy;
// 0 = Intra, Last, GF, ARF
@@ -466,6 +462,11 @@
[PALETTE_COLORS];
int palette_uv_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS]
[PALETTE_COLORS];
+#if CONFIG_EXT_TX
+ int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
+ int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
+ [TX_TYPES];
+#endif // CONFIG_EXT_TX
int multi_arf_allowed;
int multi_arf_enabled;
diff --git a/vp10/encoder/picklpf.c b/vp10/encoder/picklpf.c
index 045e03d..1f5711d 100644
--- a/vp10/encoder/picklpf.c
+++ b/vp10/encoder/picklpf.c
@@ -41,6 +41,10 @@
VP10_COMMON *const cm = &cpi->common;
int64_t filt_err;
+#if CONFIG_VAR_TX
+ vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
+ 1, partial_frame);
+#else
if (cpi->num_workers > 1)
vp10_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane,
filt_level, 1, partial_frame,
@@ -48,6 +52,7 @@
else
vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
1, partial_frame);
+#endif
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index 4ed1ae2..cb32252 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -102,6 +102,26 @@
vp10_default_palette_uv_color_prob[i][j],
vp10_palette_color_tree[i]);
}
+#if CONFIG_EXT_TX
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ if (use_inter_ext_tx_for_tx[s][i]) {
+ vp10_cost_tokens(cpi->inter_tx_type_costs[s][i],
+ fc->inter_ext_tx_prob[s][i],
+ vp10_ext_tx_inter_tree[s]);
+ }
+ }
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ if (use_intra_ext_tx_for_tx[s][i]) {
+ for (j = 0; j < INTRA_MODES; ++j)
+ vp10_cost_tokens(cpi->intra_tx_type_costs[s][i][j],
+ fc->intra_ext_tx_prob[s][i][j],
+ vp10_ext_tx_intra_tree[s]);
+ }
+ }
+ }
+#endif // CONFIG_EXT_TX
}
static void fill_token_costs(vp10_coeff_cost *c,
@@ -570,8 +590,11 @@
const MACROBLOCKD *const xd) {
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const int ctx = vp10_get_pred_context_switchable_interp(xd);
+#if CONFIG_EXT_INTERP
+ if (!vp10_is_interp_needed(xd)) return 0;
+#endif // CONFIG_EXT_INTERP
return SWITCHABLE_INTERP_RATE_FACTOR *
- cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
+ cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
}
void vp10_set_rd_speed_thresholds(VP10_COMP *cpi) {
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 8ba2110..e0912b5 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -55,6 +55,10 @@
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
+#if CONFIG_EXT_TX
+const double ext_tx_th = 0.98;
+#endif
+
typedef struct {
PREDICTION_MODE mode;
MV_REFERENCE_FRAME ref_frame[2];
@@ -65,6 +69,9 @@
} REF_DEFINITION;
struct rdcost_block_args {
+#if CONFIG_VAR_TX
+ const VP10_COMP *cpi;
+#endif
MACROBLOCK *x;
ENTROPY_CONTEXT t_above[16];
ENTROPY_CONTEXT t_left[16];
@@ -345,7 +352,11 @@
};
static int cost_coeffs(MACROBLOCK *x,
int plane, int block,
+#if CONFIG_VAR_TX
+ int coeff_ctx,
+#else
ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
+#endif
TX_SIZE tx_size,
const int16_t *scan, const int16_t *nb,
int use_fast_coef_costing) {
@@ -360,7 +371,11 @@
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
x->token_costs[tx_size][type][is_inter_block(mbmi)];
uint8_t token_cache[32 * 32];
+#if CONFIG_VAR_TX
+ int pt = coeff_ctx;
+#else
int pt = combine_entropy_contexts(*A, *L);
+#endif
int c, cost;
#if CONFIG_VP9_HIGHBITDEPTH
const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
@@ -368,9 +383,11 @@
const int16_t *cat6_high_cost = vp10_get_high_cost_table(8);
#endif
+#if !CONFIG_VAR_TX
// Check for consistency of tx_size with mode info
assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
: get_uv_tx_size(mbmi, pd) == tx_size);
+#endif
if (eob == 0) {
// single eob token
@@ -424,8 +441,10 @@
}
}
+#if !CONFIG_VAR_TX
// is eob first coefficient;
*A = *L = (c > 0);
+#endif
return cost;
}
@@ -453,10 +472,23 @@
static int rate_block(int plane, int block, int blk_row, int blk_col,
TX_SIZE tx_size, struct rdcost_block_args* args) {
- return cost_coeffs(args->x, plane, block, args->t_above + blk_col,
- args->t_left + blk_row, tx_size,
- args->so->scan, args->so->neighbors,
+#if CONFIG_VAR_TX
+ int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col),
+ *(args->t_left + blk_row));
+ int coeff_cost = cost_coeffs(args->x, plane, block, coeff_ctx,
+ tx_size, args->so->scan, args->so->neighbors,
+ args->use_fast_coef_costing);
+ const struct macroblock_plane *p = &args->x->plane[plane];
+ *(args->t_above + blk_col) = !(p->eobs[block] == 0);
+ *(args->t_left + blk_row) = !(p->eobs[block] == 0);
+ return coeff_cost;
+#else
+ return cost_coeffs(args->x, plane, block,
+ args->t_above + blk_col,
+ args->t_left + blk_row,
+ tx_size, args->so->scan, args->so->neighbors,
args->use_fast_coef_costing);
+#endif
}
static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
@@ -475,10 +507,41 @@
return;
if (!is_inter_block(mbmi)) {
+#if CONFIG_VAR_TX
+ struct encode_b_args arg = {x, NULL, &mbmi->skip};
+#if CONFIG_VP9_HIGHBITDEPTH
+ vp10_encode_block_intra(plane, block, blk_row, blk_col,
+ plane_bsize, tx_size, &arg);
+ dist_block(x, plane, block, tx_size, &dist, &sse);
+#else
+ uint8_t *dst, *src;
+ int src_stride = x->plane[plane].src.stride;
+ int dst_stride = xd->plane[plane].dst.stride;
+ unsigned int tmp_sse;
+ PREDICTION_MODE mode = (plane == 0) ?
+ get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
+
+ src = &x->plane[plane].src.buf[4 * (blk_row * src_stride + blk_col)];
+ dst = &xd->plane[plane].dst.buf[4 * (blk_row * dst_stride + blk_col)];
+ vp10_predict_intra_block(xd, b_width_log2_lookup[plane_bsize],
+ b_height_log2_lookup[plane_bsize],
+ tx_size, mode, dst, dst_stride,
+ dst, dst_stride, blk_col, blk_row, plane);
+ args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
+ dst, dst_stride, &tmp_sse);
+ sse = (int64_t)tmp_sse * 16;
+ vp10_encode_block_intra(plane, block, blk_row, blk_col,
+ plane_bsize, tx_size, &arg);
+ args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
+ dst, dst_stride, &tmp_sse);
+ dist = (int64_t)tmp_sse * 16;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#else
struct encode_b_args arg = {x, NULL, &mbmi->skip};
vp10_encode_block_intra(plane, block, blk_row, blk_col,
plane_bsize, tx_size, &arg);
dist_block(x, plane, block, tx_size, &dist, &sse);
+#endif
} else if (max_txsize_lookup[plane_bsize] == tx_size) {
if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
SKIP_TXFM_NONE) {
@@ -550,6 +613,9 @@
}
static void txfm_rd_in_plane(MACROBLOCK *x,
+#if CONFIG_VAR_TX
+ const VP10_COMP *cpi,
+#endif
int *rate, int64_t *distortion,
int *skippable, int64_t *sse,
int64_t ref_best_rd, int plane,
@@ -561,6 +627,9 @@
struct rdcost_block_args args;
vp10_zero(args);
args.x = x;
+#if CONFIG_VAR_TX
+ args.cpi = cpi;
+#endif
args.best_rd = ref_best_rd;
args.use_fast_coef_costing = use_fast_coef_casting;
args.skippable = 1;
@@ -570,11 +639,11 @@
vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
- tx_type = get_tx_type(pd->plane_type, xd, 0);
- args.so = get_scan(tx_size, tx_type);
+ tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
+ args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
vp10_foreach_transformed_block_in_plane(xd, bsize, plane,
- block_rd_txfm, &args);
+ block_rd_txfm, &args);
if (args.exit_early) {
*rate = INT_MAX;
*distortion = INT64_MAX;
@@ -598,12 +667,101 @@
const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+#if CONFIG_EXT_TX
+ TX_TYPE tx_type, best_tx_type = DCT_DCT;
+ int r, s;
+ int64_t d, psse, this_rd, best_rd = INT64_MAX;
+ vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
+ int s0 = vp10_cost_bit(skip_prob, 0);
+ int s1 = vp10_cost_bit(skip_prob, 1);
+ int ext_tx_set;
+ const int is_inter = is_inter_block(mbmi);
+#endif // CONFIG_EXT_TX
mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
- txfm_rd_in_plane(x, rate, distortion, skip,
+#if CONFIG_EXT_TX
+ ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter);
+
+ if (is_inter &&
+ get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
+ !xd->lossless[mbmi->segment_id]) {
+ for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+ if (is_inter) {
+ if (!ext_tx_used_inter[ext_tx_set][tx_type])
+ continue;
+ } else {
+ if (!ext_tx_used_intra[ext_tx_set][tx_type])
+ continue;
+ }
+
+ mbmi->tx_type = tx_type;
+ if (ext_tx_set == 1 &&
+ mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ continue;
+ }
+
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &r, &d, &s,
+ &psse, ref_best_rd, 0, bs, mbmi->tx_size,
+ cpi->sf.use_fast_coef_costing);
+
+ if (r == INT_MAX)
+ continue;
+ if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1) {
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ r += cpi->inter_tx_type_costs[ext_tx_set]
+ [mbmi->tx_size][mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0)
+ r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+ }
+
+ if (s)
+ this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
+ else
+ this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d);
+ if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] && !s)
+ this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse));
+
+ if (this_rd < ((best_tx_type == DCT_DCT) ? ext_tx_th : 1) * best_rd) {
+ best_rd = this_rd;
+ best_tx_type = mbmi->tx_type;
+ }
+ }
+ }
+
+ mbmi->tx_type = best_tx_type;
+#endif // CONFIG_EXT_TX
+
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ rate, distortion, skip,
sse, ref_best_rd, 0, bs,
mbmi->tx_size, cpi->sf.use_fast_coef_costing);
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
+ !xd->lossless[mbmi->segment_id] && *rate != INT_MAX) {
+ int ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter);
+ if (is_inter)
+ *rate += cpi->inter_tx_type_costs[ext_tx_set][mbmi->tx_size]
+ [mbmi->tx_type];
+ else
+ *rate += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+#endif // CONFIG_EXT_TX
}
static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
@@ -616,7 +774,11 @@
mbmi->tx_size = TX_4X4;
- txfm_rd_in_plane(x, rate, distortion, skip,
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ rate, distortion, skip,
sse, ref_best_rd, 0, bs,
mbmi->tx_size, cpi->sf.use_fast_coef_costing);
}
@@ -633,87 +795,162 @@
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
- int r[TX_SIZES][2], s[TX_SIZES];
- int64_t d[TX_SIZES], sse[TX_SIZES];
- int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
- {INT64_MAX, INT64_MAX},
- {INT64_MAX, INT64_MAX},
- {INT64_MAX, INT64_MAX}};
+ int r, s;
+ int64_t d, sse;
+ int64_t rd = INT64_MAX;
int n, m;
int s0, s1;
- int64_t best_rd = INT64_MAX;
+ int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
TX_SIZE best_tx = max_tx_size;
int start_tx, end_tx;
+ const int tx_select = cm->tx_mode == TX_MODE_SELECT;
+#if CONFIG_EXT_TX
+ TX_TYPE tx_type, best_tx_type = DCT_DCT;
+ int ext_tx_set;
+#endif // CONFIG_EXT_TX
+ const int is_inter = is_inter_block(mbmi);
const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
assert(skip_prob > 0);
s0 = vp10_cost_bit(skip_prob, 0);
s1 = vp10_cost_bit(skip_prob, 1);
- if (cm->tx_mode == TX_MODE_SELECT) {
+ if (tx_select) {
start_tx = max_tx_size;
end_tx = 0;
} else {
- TX_SIZE chosen_tx_size = VPXMIN(max_tx_size,
- tx_mode_to_biggest_tx_size[cm->tx_mode]);
+ const TX_SIZE chosen_tx_size =
+ VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
start_tx = chosen_tx_size;
end_tx = chosen_tx_size;
}
- for (n = start_tx; n >= end_tx; n--) {
- int r_tx_size = 0;
- for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
- if (m == n)
- r_tx_size += vp10_cost_zero(tx_probs[m]);
- else
- r_tx_size += vp10_cost_one(tx_probs[m]);
- }
- txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
- &sse[n], ref_best_rd, 0, bs, n,
- cpi->sf.use_fast_coef_costing);
- r[n][1] = r[n][0];
- if (r[n][0] < INT_MAX) {
- r[n][1] += r_tx_size;
- }
- if (d[n] == INT64_MAX || r[n][0] == INT_MAX) {
- rd[n][0] = rd[n][1] = INT64_MAX;
- } else if (s[n]) {
- if (is_inter_block(mbmi)) {
- rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
- r[n][1] -= r_tx_size;
- } else {
- rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
- rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]);
+ *distortion = INT64_MAX;
+ *rate = INT_MAX;
+ *skip = 0;
+ *psse = INT64_MAX;
+
+#if CONFIG_EXT_TX
+ for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+#endif // CONFIG_EXT_TX
+ last_rd = INT64_MAX;
+ for (n = start_tx; n >= end_tx; --n) {
+ int r_tx_size = 0;
+ for (m = 0; m <= n - (n == (int) max_tx_size); ++m) {
+ if (m == n)
+ r_tx_size += vp10_cost_zero(tx_probs[m]);
+ else
+ r_tx_size += vp10_cost_one(tx_probs[m]);
}
- } else {
- rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
- rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
- }
- if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
- !s[n] && sse[n] != INT64_MAX) {
- rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
- rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
- }
+#if CONFIG_EXT_TX
+ ext_tx_set = get_ext_tx_set(n, bs, is_inter);
+ if (is_inter) {
+ if (!ext_tx_used_inter[ext_tx_set][tx_type])
+ continue;
+ } else {
+ if (!ext_tx_used_intra[ext_tx_set][tx_type])
+ continue;
+ }
+ mbmi->tx_type = tx_type;
+ if (ext_tx_set == 1 &&
+ mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ break;
+ }
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &r, &d, &s,
+ &sse, ref_best_rd, 0, bs, n,
+ cpi->sf.use_fast_coef_costing);
+ if (get_ext_tx_types(n, bs, is_inter) > 1 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+ r != INT_MAX) {
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ r += cpi->inter_tx_type_costs[ext_tx_set]
+ [mbmi->tx_size][mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0)
+ r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+ }
+#else // CONFIG_EXT_TX
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &r, &d, &s,
+ &sse, ref_best_rd, 0, bs, n,
+ cpi->sf.use_fast_coef_costing);
+#endif // CONFIG_EXT_TX
- // Early termination in transform size search.
- if (cpi->sf.tx_size_search_breakout &&
- (rd[n][1] == INT64_MAX ||
- (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
- s[n] == 1))
- break;
+ if (r == INT_MAX)
+ continue;
- if (rd[n][1] < best_rd) {
- best_tx = n;
- best_rd = rd[n][1];
+ if (s) {
+ if (is_inter) {
+ rd = RDCOST(x->rdmult, x->rddiv, s1, sse);
+ } else {
+ rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, sse);
+ }
+ } else {
+ rd = RDCOST(x->rdmult, x->rddiv, r + s0 + r_tx_size * tx_select, d);
+ }
+
+ if (tx_select && !(s && is_inter))
+ r += r_tx_size;
+
+ if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !s)
+ rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, sse));
+
+ // Early termination in transform size search.
+ if (cpi->sf.tx_size_search_breakout &&
+ (rd == INT64_MAX ||
+#if CONFIG_EXT_TX
+ (s == 1 && tx_type != DCT_DCT && n < start_tx) ||
+#else
+ (s == 1 && n < start_tx) ||
+#endif
+ (n < (int) max_tx_size && rd > last_rd)))
+ break;
+
+ last_rd = rd;
+ if (rd <
+#if CONFIG_EXT_TX
+ (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) *
+#endif // CONFIG_EXT_TX
+ best_rd) {
+ best_tx = n;
+ best_rd = rd;
+ *distortion = d;
+ *rate = r;
+ *skip = s;
+ *psse = sse;
+#if CONFIG_EXT_TX
+ best_tx_type = mbmi->tx_type;
+#endif // CONFIG_EXT_TX
+ }
}
+#if CONFIG_EXT_TX
}
- mbmi->tx_size = best_tx;
+#endif // CONFIG_EXT_TX
- *distortion = d[mbmi->tx_size];
- *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
- *skip = s[mbmi->tx_size];
- *psse = sse[mbmi->tx_size];
+ mbmi->tx_size = best_tx;
+#if CONFIG_EXT_TX
+ mbmi->tx_type = best_tx_type;
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &r, &d, &s,
+ &sse, ref_best_rd, 0, bs, best_tx,
+ cpi->sf.use_fast_coef_costing);
+#endif // CONFIG_EXT_TX
}
static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
@@ -726,11 +963,10 @@
assert(bs == xd->mi[0]->mbmi.sb_type);
- if (CONFIG_MISC_FIXES && xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
choose_smallest_tx_size(cpi, x, rate, distortion, skip, ret_sse,
ref_best_rd, bs);
- } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
- xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
bs);
} else {
@@ -964,7 +1200,7 @@
// one of the neighboring directional modes
if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
if (conditional_skipintra(mode, *best_mode))
- continue;
+ continue;
}
memcpy(tempa, ta, sizeof(ta));
@@ -976,8 +1212,8 @@
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
int16_t *const src_diff = vp10_raster_block_offset_int16(BLOCK_8X8,
- block,
- p->src_diff);
+ block,
+ p->src_diff);
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
xd->mi[0]->bmi[block].as_mode = mode;
vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
@@ -986,11 +1222,21 @@
vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
dst, dst_stride, xd->bd);
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
- const scan_order *so = get_scan(TX_4X4, tx_type);
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+#if CONFIG_VAR_TX
+ const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
+ *(templ + idy));
+#endif
vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
- ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
+ ratey += cost_coeffs(x, 0, block,
+#if CONFIG_VAR_TX
+ coeff_ctx,
+#else
+ tempa + idx, templ + idy,
+#endif
+ TX_4X4,
so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
@@ -1000,11 +1246,21 @@
xd->bd, DCT_DCT, 1);
} else {
int64_t unused;
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
- const scan_order *so = get_scan(TX_4X4, tx_type);
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+#if CONFIG_VAR_TX
+ const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
+ *(templ + idy));
+#endif
vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
- ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
+ ratey += cost_coeffs(x, 0, block,
+#if CONFIG_VAR_TX
+ coeff_ctx,
+#else
+ tempa + idx, templ + idy,
+#endif
+ TX_4X4,
so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
distortion += vp10_highbd_block_error(
@@ -1036,7 +1292,7 @@
num_4x4_blocks_wide * 4 * sizeof(uint16_t));
}
}
- next_highbd:
+next_highbd:
{}
}
if (best_rd >= rd_thresh)
@@ -1065,7 +1321,7 @@
// one of the neighboring directional modes
if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
if (conditional_skipintra(mode, *best_mode))
- continue;
+ continue;
}
memcpy(tempa, ta, sizeof(ta));
@@ -1085,26 +1341,49 @@
vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
- const scan_order *so = get_scan(TX_4X4, tx_type);
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+#if CONFIG_VAR_TX
+ int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
+ *(templ + idy));
+#endif
vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
- ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
+#if CONFIG_VAR_TX
+ ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
+ so->neighbors, cpi->sf.use_fast_coef_costing);
+ *(tempa + idx) = !(p->eobs[block] == 0);
+ *(templ + idy) = !(p->eobs[block] == 0);
+#else
+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
+ TX_4X4,
so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
+#endif
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next;
vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
dst, dst_stride, p->eobs[block], DCT_DCT, 1);
} else {
int64_t unused;
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
- const scan_order *so = get_scan(TX_4X4, tx_type);
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+#if CONFIG_VAR_TX
+ int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
+ *(templ + idy));
+#endif
vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
- ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
- so->scan, so->neighbors,
- cpi->sf.use_fast_coef_costing);
+#if CONFIG_VAR_TX
+ ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
+ so->neighbors, cpi->sf.use_fast_coef_costing);
+ *(tempa + idx) = !(p->eobs[block] == 0);
+ *(templ + idy) = !(p->eobs[block] == 0);
+#else
+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
+ TX_4X4, so->scan, so->neighbors,
+ cpi->sf.use_fast_coef_costing);
+#endif
distortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
16, &unused) >> 2;
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
@@ -1167,6 +1446,10 @@
memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
+#if CONFIG_EXT_INTRA
+ mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+#endif // CONFIG_EXT_INTRA
+
// Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
@@ -1211,6 +1494,200 @@
return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
}
+#if CONFIG_EXT_INTRA
+// Return 1 if an ext intra mode is selected; return 0 otherwise.
+static int rd_pick_ext_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int mode_cost,
+ int64_t *best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mic = xd->mi[0];
+ MB_MODE_INFO *mbmi = &mic->mbmi;
+ int this_rate, this_rate_tokenonly, s;
+ int ext_intra_selected_flag = 0;
+ int64_t this_distortion, this_rd;
+ EXT_INTRA_MODE mode;
+ TX_SIZE best_tx_size = TX_4X4;
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+#if CONFIG_EXT_TX
+ TX_TYPE best_tx_type;
+#endif // CONFIG_EXT_TX
+
+ vp10_zero(ext_intra_mode_info);
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 1;
+ mbmi->mode = DC_PRED;
+
+ for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[0] = mode;
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, *best_rd);
+ if (this_rate_tokenonly == INT_MAX)
+ continue;
+
+ this_rate = this_rate_tokenonly +
+ vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
+ write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ best_tx_size = mic->mbmi.tx_size;
+ ext_intra_mode_info = mbmi->ext_intra_mode_info;
+#if CONFIG_EXT_TX
+ best_tx_type = mic->mbmi.tx_type;
+#endif // CONFIG_EXT_TX
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ ext_intra_selected_flag = 1;
+ }
+ }
+
+ if (ext_intra_selected_flag) {
+ mbmi->mode = DC_PRED;
+ mbmi->tx_size = best_tx_size;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] =
+ ext_intra_mode_info.use_ext_intra_mode[0];
+ mbmi->ext_intra_mode_info.ext_intra_mode[0] =
+ ext_intra_mode_info.ext_intra_mode[0];
+#if CONFIG_EXT_TX
+ mbmi->tx_type = best_tx_type;
+#endif // CONFIG_EXT_TX
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static int64_t rd_pick_intra_angle_sby(VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int rate_overhead,
+ int64_t best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mic = xd->mi[0];
+ MB_MODE_INFO *mbmi = &mic->mbmi;
+ int this_rate, this_rate_tokenonly, s;
+ int angle_delta, best_angle_delta = 0;
+ const double rd_adjust = 1.2;
+ int64_t this_distortion, this_rd, sse_dummy;
+ TX_SIZE best_tx_size = mic->mbmi.tx_size;
+#if CONFIG_EXT_TX
+ TX_TYPE best_tx_type = mbmi->tx_type;
+#endif // CONFIG_EXT_TX
+
+ if (ANGLE_FAST_SEARCH) {
+ int deltas_level1[3] = {0, -2, 2};
+ int deltas_level2[3][2] = {
+ {-1, 1}, {-3, -1}, {1, 3},
+ };
+ const int level1 = 3, level2 = 2;
+ int i, j, best_i = -1;
+
+ for (i = 0; i < level1; ++i) {
+ mic->mbmi.angle_delta[0] = deltas_level1[i];
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize,
+ (i == 0 && best_rd < INT64_MAX) ? best_rd * rd_adjust :
+ best_rd);
+ if (this_rate_tokenonly == INT_MAX) {
+ if (i == 0)
+ break;
+ else
+ continue;
+ }
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (i == 0 && best_rd < INT64_MAX && this_rd > best_rd * rd_adjust)
+ break;
+ if (this_rd < best_rd) {
+ best_i = i;
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[0];
+ best_tx_size = mbmi->tx_size;
+#if CONFIG_EXT_TX
+ best_tx_type = mbmi->tx_type;
+#endif // CONFIG_EXT_TX
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+
+ if (best_i >= 0) {
+ for (j = 0; j < level2; ++j) {
+ mic->mbmi.angle_delta[0] = deltas_level2[best_i][j];
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, best_rd);
+ if (this_rate_tokenonly == INT_MAX)
+ continue;
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (this_rd < best_rd) {
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[0];
+ best_tx_size = mbmi->tx_size;
+#if CONFIG_EXT_TX
+ best_tx_type = mbmi->tx_type;
+#endif // CONFIG_EXT_TX
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+ }
+ } else {
+ for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS;
+ ++angle_delta) {
+ mic->mbmi.angle_delta[0] = angle_delta;
+
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, best_rd);
+ if (this_rate_tokenonly == INT_MAX)
+ continue;
+
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < best_rd) {
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[0];
+ best_tx_size = mbmi->tx_size;
+#if CONFIG_EXT_TX
+ best_tx_type = mbmi->tx_type;
+#endif // CONFIG_EXT_TX
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+ }
+
+ mbmi->tx_size = best_tx_size;
+ mbmi->angle_delta[0] = best_angle_delta;
+#if CONFIG_EXT_TX
+ mbmi->tx_type = best_tx_type;
+#endif // CONFIG_EXT_TX
+
+ if (*rate_tokenonly < INT_MAX) {
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &this_rate_tokenonly, &this_distortion, &s,
+ &sse_dummy, INT64_MAX, 0, bsize, mbmi->tx_size,
+ cpi->sf.use_fast_coef_costing);
+ }
+
+ return best_rd;
+}
+#endif // CONFIG_EXT_INTRA
+
// This function is used only for intra_only frames
static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
@@ -1224,6 +1701,13 @@
int this_rate, this_rate_tokenonly, s;
int64_t this_distortion, this_rd;
TX_SIZE best_tx = TX_4X4;
+#if CONFIG_EXT_INTRA
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+ int is_directional_mode, rate_overhead, best_angle_delta = 0;
+#endif // CONFIG_EXT_INTRA
+#if CONFIG_EXT_TX
+ TX_TYPE best_tx_type = DCT_DCT;
+#endif // CONFIG_EXT_TX
int *bmode_costs;
PALETTE_MODE_INFO palette_mode_info;
uint8_t *best_palette_color_map = cpi->common.allow_screen_content_tools ?
@@ -1237,6 +1721,11 @@
const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0);
bmode_costs = cpi->y_mode_costs[A][L];
+#if CONFIG_EXT_INTRA
+ ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mic->mbmi.angle_delta[0] = 0;
+#endif // CONFIG_EXT_INTRA
memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
palette_mode_info.palette_size[0] = 0;
mic->mbmi.palette_mode_info.palette_size[0] = 0;
@@ -1246,11 +1735,26 @@
palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
/* Y Search for intra prediction mode */
- for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+ for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
mic->mbmi.mode = mode;
-
+#if CONFIG_EXT_INTRA
+ is_directional_mode = (mode != DC_PRED && mode != TM_PRED);
+ if (is_directional_mode) {
+ rate_overhead = bmode_costs[mode] +
+ write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0);
+ this_rate_tokenonly = INT_MAX;
+ this_rd =
+ rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rate_tokenonly,
+ &this_distortion, &s, bsize, rate_overhead,
+ best_rd);
+ } else {
+ mic->mbmi.angle_delta[0] = 0;
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, best_rd);
+ }
+#endif // CONFIG_EXT_INTRA
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
- &s, NULL, bsize, best_rd);
+ &s, NULL, bsize, best_rd);
if (this_rate_tokenonly == INT_MAX)
continue;
@@ -1260,12 +1764,26 @@
this_rate +=
vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
[palette_ctx], 0);
+#if CONFIG_EXT_INTRA
+ if (mode == DC_PRED && ALLOW_FILTER_INTRA_MODES)
+ this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 0);
+ if (is_directional_mode)
+ this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS +
+ mic->mbmi.angle_delta[0]);
+#endif // CONFIG_EXT_INTRA
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
mode_selected = mode;
best_rd = this_rd;
best_tx = mic->mbmi.tx_size;
+#if CONFIG_EXT_INTRA
+ best_angle_delta = mic->mbmi.angle_delta[0];
+#endif // CONFIG_EXT_INTRA
+#if CONFIG_EXT_TX
+ best_tx_type = mic->mbmi.tx_type;
+#endif // CONFIG_EXT_TX
*rate = this_rate;
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
@@ -1278,8 +1796,36 @@
&palette_mode_info, best_palette_color_map,
&best_tx, &mode_selected, &best_rd);
+#if CONFIG_EXT_INTRA
+ if (!palette_mode_info.palette_size[0] > 0 && ALLOW_FILTER_INTRA_MODES) {
+ if (rd_pick_ext_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
+ skippable, bsize, bmode_costs[DC_PRED],
+ &best_rd)) {
+ mode_selected = mic->mbmi.mode;
+ best_tx = mic->mbmi.tx_size;
+ ext_intra_mode_info = mic->mbmi.ext_intra_mode_info;
+#if CONFIG_EXT_TX
+ best_tx_type = mic->mbmi.tx_type;
+#endif // CONFIG_EXT_TX
+ }
+ }
+
+ mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] =
+ ext_intra_mode_info.use_ext_intra_mode[0];
+ if (ext_intra_mode_info.use_ext_intra_mode[0]) {
+ mic->mbmi.ext_intra_mode_info.ext_intra_mode[0] =
+ ext_intra_mode_info.ext_intra_mode[0];
+ }
+#endif // CONFIG_EXT_INTRA
+
mic->mbmi.mode = mode_selected;
mic->mbmi.tx_size = best_tx;
+#if CONFIG_EXT_INTRA
+ mic->mbmi.angle_delta[0] = best_angle_delta;
+#endif // CONFIG_EXT_INTRA
+#if CONFIG_EXT_TX
+ mic->mbmi.tx_type = best_tx_type;
+#endif // CONFIG_EXT_TX
mic->mbmi.palette_mode_info.palette_size[0] =
palette_mode_info.palette_size[0];
if (palette_mode_info.palette_size[0] > 0) {
@@ -1293,6 +1839,619 @@
return best_rd;
}
+#if CONFIG_VAR_TX
+static void tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
+ int blk_row, int blk_col, int plane, int block,
+ int plane_bsize, int coeff_ctx,
+ int *rate, int64_t *dist, int64_t *bsse, int *skip) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int ss_txfrm_size = tx_size << 1;
+ int64_t this_sse;
+ int shift = tx_size == TX_32X32 ? 0 : 2;
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+#endif
+ unsigned int tmp_sse = 0;
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *const scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+
+ BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
+ int bh = 4 * num_4x4_blocks_wide_lookup[txm_bsize];
+ int src_stride = p->src.stride;
+ uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col];
+ uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
+ DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]);
+
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
+
+ vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32,
+ NULL, 0, NULL, 0, bh, bh);
+
+ if (blk_row + (bh >> 2) > max_blocks_high ||
+ blk_col + (bh >> 2) > max_blocks_wide) {
+ int idx, idy;
+ unsigned int this_sse;
+ int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
+ int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
+ for (idy = 0; idy < blocks_height; idy += 2) {
+ for (idx = 0; idx < blocks_width; idx += 2) {
+ cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx,
+ src_stride,
+ rec_buffer + 4 * idy * 32 + 4 * idx,
+ 32, &this_sse);
+ tmp_sse += this_sse;
+ }
+ }
+ } else {
+ cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse);
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ *dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
+ &this_sse, xd->bd) >> shift;
+ *bsse += this_sse >> shift;
+#else
+ *bsse += (int64_t)tmp_sse * 16;
+
+ if (p->eobs[block] > 0) {
+ switch (tx_size) {
+ case TX_32X32:
+ vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block],
+ tx_type);
+ break;
+ case TX_16X16:
+ vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block],
+ tx_type);
+ break;
+ case TX_8X8:
+ vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block],
+ tx_type);
+ break;
+ case TX_4X4:
+ vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block],
+ tx_type,
+ xd->lossless[xd->mi[0]->mbmi.segment_id]);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ break;
+ }
+
+ if ((bh >> 2) + blk_col > max_blocks_wide ||
+ (bh >> 2) + blk_row > max_blocks_high) {
+ int idx, idy;
+ unsigned int this_sse;
+ int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
+ int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
+ tmp_sse = 0;
+ for (idy = 0; idy < blocks_height; idy += 2) {
+ for (idx = 0; idx < blocks_width; idx += 2) {
+ cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx,
+ src_stride,
+ rec_buffer + 4 * idy * 32 + 4 * idx,
+ 32, &this_sse);
+ tmp_sse += this_sse;
+ }
+ }
+ } else {
+ cpi->fn_ptr[txm_bsize].vf(src, src_stride,
+ rec_buffer, 32, &tmp_sse);
+ }
+ }
+ *dist += (int64_t)tmp_sse * 16;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size,
+ scan_order->scan, scan_order->neighbors, 0);
+ *skip &= (p->eobs[block] == 0);
+}
+
+static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x,
+ int blk_row, int blk_col, int plane, int block,
+ TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
+ ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
+ TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
+ int *rate, int64_t *dist,
+ int64_t *bsse, int *skip,
+ int64_t ref_best_rd, int *is_cost_valid) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+ int64_t this_rd = INT64_MAX;
+ ENTROPY_CONTEXT *pta = ta + blk_col;
+ ENTROPY_CONTEXT *ptl = tl + blk_row;
+ ENTROPY_CONTEXT stxa = 0, stxl = 0;
+ int coeff_ctx, i;
+ int ctx = txfm_partition_context(tx_above + (blk_col >> 1),
+ tx_left + (blk_row >> 1), tx_size);
+
+ int64_t sum_dist = 0, sum_bsse = 0;
+ int64_t sum_rd = INT64_MAX;
+ int sum_rate = vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1);
+ int all_skip = 1;
+ int tmp_eob = 0;
+ int zero_blk_rate;
+
+ if (ref_best_rd < 0) {
+ *is_cost_valid = 0;
+ return;
+ }
+
+ switch (tx_size) {
+ case TX_4X4:
+ stxa = pta[0];
+ stxl = ptl[0];
+ break;
+ case TX_8X8:
+ stxa = !!*(const uint16_t *)&pta[0];
+ stxl = !!*(const uint16_t *)&ptl[0];
+ break;
+ case TX_16X16:
+ stxa = !!*(const uint32_t *)&pta[0];
+ stxl = !!*(const uint32_t *)&ptl[0];
+ break;
+ case TX_32X32:
+ stxa = !!*(const uint64_t *)&pta[0];
+ stxl = !!*(const uint64_t *)&ptl[0];
+ break;
+ default:
+ assert(0 && "Invalid transform size.");
+ break;
+ }
+ coeff_ctx = combine_entropy_contexts(stxa, stxl);
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ *rate = 0;
+ *dist = 0;
+ *bsse = 0;
+ *skip = 1;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ zero_blk_rate =
+ x->token_costs[tx_size][pd->plane_type][1][0][0][coeff_ctx][EOB_TOKEN];
+
+ if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
+ mbmi->inter_tx_size[tx_idx] = tx_size;
+ tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
+ plane_bsize, coeff_ctx, rate, dist, bsse, skip);
+
+ if ((RDCOST(x->rdmult, x->rddiv, *rate, *dist) >=
+ RDCOST(x->rdmult, x->rddiv, zero_blk_rate, *bsse) || *skip == 1) &&
+ !xd->lossless[mbmi->segment_id]) {
+ *rate = zero_blk_rate;
+ *dist = *bsse;
+ *skip = 1;
+ x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 1;
+ p->eobs[block] = 0;
+ } else {
+ x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 0;
+ *skip = 0;
+ }
+
+ if (tx_size > TX_4X4)
+ *rate += vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
+ this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
+ tmp_eob = p->eobs[block];
+ }
+
+ if (tx_size > TX_4X4) {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bsl = b_height_log2_lookup[bsize];
+ int sub_step = 1 << (2 * (tx_size - 1));
+ int i;
+ int this_rate;
+ int64_t this_dist;
+ int64_t this_bsse;
+ int this_skip;
+ int this_cost_valid = 1;
+ int64_t tmp_rd = 0;
+
+ --bsl;
+ for (i = 0; i < 4 && this_cost_valid; ++i) {
+ int offsetr = (i >> 1) << bsl;
+ int offsetc = (i & 0x01) << bsl;
+ select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc,
+ plane, block + i * sub_step, tx_size - 1,
+ plane_bsize, ta, tl, tx_above, tx_left,
+ &this_rate, &this_dist,
+ &this_bsse, &this_skip,
+ ref_best_rd - tmp_rd, &this_cost_valid);
+ sum_rate += this_rate;
+ sum_dist += this_dist;
+ sum_bsse += this_bsse;
+ all_skip &= this_skip;
+ tmp_rd += RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
+ if (this_rd < tmp_rd)
+ break;
+ }
+ if (this_cost_valid)
+ sum_rd = tmp_rd;
+ }
+
+ if (this_rd < sum_rd) {
+ int idx, idy;
+ for (i = 0; i < (1 << tx_size); ++i)
+ pta[i] = ptl[i] = !(tmp_eob == 0);
+ txfm_partition_update(tx_above + (blk_col >> 1),
+ tx_left + (blk_row >> 1), tx_size);
+ mbmi->inter_tx_size[tx_idx] = tx_size;
+
+ for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
+ for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
+ mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size;
+ mbmi->tx_size = tx_size;
+ if (this_rd == INT64_MAX)
+ *is_cost_valid = 0;
+ x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = *skip;
+ } else {
+ *rate = sum_rate;
+ *dist = sum_dist;
+ *bsse = sum_bsse;
+ *skip = all_skip;
+ if (sum_rd == INT64_MAX)
+ *is_cost_valid = 0;
+ }
+}
+
+static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int64_t *distortion, int *skippable,
+ int64_t *sse, BLOCK_SIZE bsize,
+ int64_t ref_best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int is_cost_valid = 1;
+ int64_t this_rd = 0;
+
+ if (ref_best_rd < 0)
+ is_cost_valid = 0;
+
+ *rate = 0;
+ *distortion = 0;
+ *sse = 0;
+ *skippable = 1;
+
+ if (is_cost_valid) {
+ const struct macroblockd_plane *const pd = &xd->plane[0];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+ int block = 0;
+ int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
+ ENTROPY_CONTEXT ctxa[16], ctxl[16];
+ TXFM_CONTEXT tx_above[8], tx_left[8];
+
+ int pnrate = 0, pnskip = 1;
+ int64_t pndist = 0, pnsse = 0;
+
+ vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctxa, ctxl);
+ memcpy(tx_above, xd->above_txfm_context,
+ sizeof(TXFM_CONTEXT) * (mi_width >> 1));
+ memcpy(tx_left, xd->left_txfm_context,
+ sizeof(TXFM_CONTEXT) * (mi_height >> 1));
+
+ for (idy = 0; idy < mi_height; idy += bh) {
+ for (idx = 0; idx < mi_width; idx += bh) {
+ select_tx_block(cpi, x, idy, idx, 0, block,
+ max_txsize_lookup[plane_bsize], plane_bsize,
+ ctxa, ctxl, tx_above, tx_left,
+ &pnrate, &pndist, &pnsse, &pnskip,
+ ref_best_rd - this_rd, &is_cost_valid);
+ *rate += pnrate;
+ *distortion += pndist;
+ *sse += pnsse;
+ *skippable &= pnskip;
+ this_rd += VPXMIN(RDCOST(x->rdmult, x->rddiv, pnrate, pndist),
+ RDCOST(x->rdmult, x->rddiv, 0, pnsse));
+ block += step;
+ }
+ }
+ }
+
+ this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
+ RDCOST(x->rdmult, x->rddiv, 0, *sse));
+ if (this_rd > ref_best_rd)
+ is_cost_valid = 0;
+
+ if (!is_cost_valid) {
+ // reset cost value
+ *rate = INT_MAX;
+ *distortion = INT64_MAX;
+ *sse = INT64_MAX;
+ *skippable = 0;
+ }
+}
+
+#if CONFIG_EXT_TX
+static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int64_t *distortion, int *skippable,
+ int64_t *sse, BLOCK_SIZE bsize,
+ int64_t ref_best_rd) {
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ const VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int64_t rd = INT64_MAX;
+ int64_t best_rd = INT64_MAX;
+ TX_TYPE tx_type, best_tx_type = DCT_DCT;
+ int ext_tx_set;
+ const int is_inter = is_inter_block(mbmi);
+ vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
+ int s0 = vp10_cost_bit(skip_prob, 0);
+ int s1 = vp10_cost_bit(skip_prob, 1);
+ TX_SIZE best_tx_size[64];
+ TX_SIZE best_tx = TX_SIZES;
+ uint8_t best_blk_skip[256];
+ const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
+ int idx, idy;
+
+ *distortion = INT64_MAX;
+ *rate = INT_MAX;
+ *skippable = 0;
+ *sse = INT64_MAX;
+
+ ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter);
+
+ for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+ int this_rate = 0;
+ int this_skip = 1;
+ int64_t this_dist = 0;
+ int64_t this_sse = 0;
+
+ if (is_inter) {
+ if (!ext_tx_used_inter[ext_tx_set][tx_type])
+ continue;
+ } else {
+ if (!ext_tx_used_intra[ext_tx_set][tx_type])
+ continue;
+ }
+
+ mbmi->tx_type = tx_type;
+
+ if (ext_tx_set == 1 &&
+ mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ break;
+ }
+
+ inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse,
+ bsize, ref_best_rd);
+
+ if (get_ext_tx_types(max_tx_size, bsize, is_inter) > 1 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+ this_rate != INT_MAX) {
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ this_rate += cpi->inter_tx_type_costs[ext_tx_set]
+ [max_tx_size][mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0)
+ this_rate += cpi->intra_tx_type_costs[ext_tx_set][max_tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+ }
+
+ if (this_rate == INT_MAX)
+ continue;
+
+ if (this_skip)
+ rd = RDCOST(x->rdmult, x->rddiv, s1, this_sse);
+ else
+ rd = RDCOST(x->rdmult, x->rddiv, this_rate + s0, this_dist);
+
+ if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !this_skip)
+ rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, this_sse));
+
+ if (rd <
+ (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) *
+ best_rd) {
+ best_rd = rd;
+ *distortion = this_dist;
+ *rate = this_rate;
+ *skippable = this_skip;
+ *sse = this_sse;
+ best_tx_type = mbmi->tx_type;
+ best_tx = mbmi->tx_size;
+ memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ best_tx_size[idy * 8 + idx] = mbmi->inter_tx_size[idy * 8 + idx];
+ }
+ }
+
+ mbmi->tx_type = best_tx_type;
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ mbmi->inter_tx_size[idy * 8 + idx] = best_tx_size[idy * 8 + idx];
+ mbmi->tx_size = best_tx;
+ memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
+}
+#endif
+
+static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x,
+ int blk_row, int blk_col, int plane, int block,
+ TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
+ ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx,
+ int *rate, int64_t *dist, int64_t *bsse, int *skip) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize,
+ 0, 0) :
+ mbmi->inter_tx_size[tx_idx];
+
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ int coeff_ctx, i;
+ ENTROPY_CONTEXT *ta = above_ctx + blk_col;
+ ENTROPY_CONTEXT *tl = left_ctx + blk_row;
+ switch (tx_size) {
+ case TX_4X4:
+ break;
+ case TX_8X8:
+ ta[0] = !!*(const uint16_t *)&ta[0];
+ tl[0] = !!*(const uint16_t *)&tl[0];
+ break;
+ case TX_16X16:
+ ta[0] = !!*(const uint32_t *)&ta[0];
+ tl[0] = !!*(const uint32_t *)&tl[0];
+ break;
+ case TX_32X32:
+ ta[0] = !!*(const uint64_t *)&ta[0];
+ tl[0] = !!*(const uint64_t *)&tl[0];
+ break;
+ default:
+ assert(0 && "Invalid transform size.");
+ break;
+ }
+ coeff_ctx = combine_entropy_contexts(ta[0], tl[0]);
+ tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
+ plane_bsize, coeff_ctx, rate, dist, bsse, skip);
+ for (i = 0; i < (1 << tx_size); ++i) {
+ ta[i] = !(p->eobs[block] == 0);
+ tl[i] = !(p->eobs[block] == 0);
+ }
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int step = 1 << (2 * (tx_size - 1));
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ int offsetr = (i >> 1) << bsl;
+ int offsetc = (i & 0x01) << bsl;
+ tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
+ block + i * step, tx_size - 1, plane_bsize,
+ above_ctx, left_ctx, rate, dist, bsse, skip);
+ }
+ }
+}
+
+// Return value 0: early termination triggered, no valid rd cost available;
+// 1: rd cost values are valid.
+static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int64_t *distortion, int *skippable,
+ int64_t *sse, BLOCK_SIZE bsize,
+ int64_t ref_best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int plane;
+ int is_cost_valid = 1;
+ int64_t this_rd;
+
+ if (ref_best_rd < 0)
+ is_cost_valid = 0;
+
+ if (is_inter_block(mbmi) && is_cost_valid) {
+ int plane;
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane)
+ vp10_subtract_plane(x, bsize, plane);
+ }
+
+ *rate = 0;
+ *distortion = 0;
+ *sse = 0;
+ *skippable = 1;
+
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+ int block = 0;
+ int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
+ int pnrate = 0, pnskip = 1;
+ int64_t pndist = 0, pnsse = 0;
+ ENTROPY_CONTEXT ta[16], tl[16];
+
+ vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl);
+
+ for (idy = 0; idy < mi_height; idy += bh) {
+ for (idx = 0; idx < mi_width; idx += bh) {
+ tx_block_rd(cpi, x, idy, idx, plane, block,
+ max_txsize_lookup[plane_bsize], plane_bsize, ta, tl,
+ &pnrate, &pndist, &pnsse, &pnskip);
+ block += step;
+ }
+ }
+
+ if (pnrate == INT_MAX) {
+ is_cost_valid = 0;
+ break;
+ }
+
+ *rate += pnrate;
+ *distortion += pndist;
+ *sse += pnsse;
+ *skippable &= pnskip;
+
+ this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
+ RDCOST(x->rdmult, x->rddiv, 0, *sse));
+
+ if (this_rd > ref_best_rd) {
+ is_cost_valid = 0;
+ break;
+ }
+ }
+
+ if (!is_cost_valid) {
+ // reset cost value
+ *rate = INT_MAX;
+ *distortion = INT64_MAX;
+ *sse = INT64_MAX;
+ *skippable = 0;
+ }
+
+ return is_cost_valid;
+}
+#endif
+
// Return value 0: early termination triggered, no valid rd cost available;
// 1: rd cost values are valid.
static int super_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
@@ -1322,7 +2481,11 @@
*skippable = 1;
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
- txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &pnrate, &pndist, &pnskip, &pnsse,
ref_best_rd, plane, bsize, uv_tx_size,
cpi->sf.use_fast_coef_costing);
if (pnrate == INT_MAX) {
@@ -1346,35 +2509,221 @@
return is_cost_valid;
}
+#if CONFIG_EXT_INTRA
+// Return 1 if an ext intra mode is selected; return 0 otherwise.
+static int rd_pick_ext_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
+ PICK_MODE_CONTEXT *ctx,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int64_t *best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ int ext_intra_selected_flag = 0;
+ int this_rate_tokenonly, this_rate, s;
+ int64_t this_distortion, this_sse, this_rd;
+ EXT_INTRA_MODE mode;
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+
+ vp10_zero(ext_intra_mode_info);
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 1;
+ mbmi->uv_mode = DC_PRED;
+
+ for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] = mode;
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, *best_rd))
+ continue;
+
+ this_rate = this_rate_tokenonly +
+ vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
+ cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
+ write_uniform_cost(FILTER_INTRA_MODES, mode);
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ ext_intra_mode_info = mbmi->ext_intra_mode_info;
+ ext_intra_selected_flag = 1;
+ if (!x->select_tx_size)
+ swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
+ }
+ }
+
+
+ if (ext_intra_selected_flag) {
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ ext_intra_mode_info.use_ext_intra_mode[1];
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ ext_intra_mode_info.ext_intra_mode[1];
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static int rd_pick_intra_angle_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
+ PICK_MODE_CONTEXT *ctx,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int rate_overhead,
+ int64_t best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ int this_rate_tokenonly, this_rate, s;
+ int64_t this_distortion, this_sse, this_rd;
+ int angle_delta, best_angle_delta = 0;
+ const double rd_adjust = 1.2;
+
+ (void)ctx;
+ *rate_tokenonly = INT_MAX;
+ if (ANGLE_FAST_SEARCH) {
+ int deltas_level1[3] = {0, -2, 2};
+ int deltas_level2[3][2] = {
+ {-1, 1}, {-3, -1}, {1, 3},
+ };
+ const int level1 = 3, level2 = 2;
+ int i, j, best_i = -1;
+
+ for (i = 0; i < level1; ++i) {
+ mbmi->angle_delta[1] = deltas_level1[i];
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize,
+ (i == 0 && best_rd < INT64_MAX) ?
+ best_rd * rd_adjust : best_rd)) {
+ if (i == 0)
+ break;
+ else
+ continue;
+ }
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (i == 0 && best_rd < INT64_MAX && this_rd > best_rd * rd_adjust)
+ break;
+ if (this_rd < best_rd) {
+ best_i = i;
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[1];
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+
+ if (best_i >= 0) {
+ for (j = 0; j < level2; ++j) {
+ mbmi->angle_delta[1] = deltas_level2[best_i][j];
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, best_rd))
+ continue;
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (this_rd < best_rd) {
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[1];
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+ }
+ } else {
+ for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS;
+ ++angle_delta) {
+ mbmi->angle_delta[1] = angle_delta;
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, best_rd))
+ continue;
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (this_rd < best_rd) {
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[1];
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+ }
+
+ mbmi->angle_delta[1] = best_angle_delta;
+ if (*rate_tokenonly != INT_MAX)
+ super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, INT_MAX);
+ return *rate_tokenonly != INT_MAX;
+}
+#endif // CONFIG_EXT_INTRA
+
static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x,
PICK_MODE_CONTEXT *ctx,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
PREDICTION_MODE mode;
PREDICTION_MODE mode_selected = DC_PRED;
int64_t best_rd = INT64_MAX, this_rd;
int this_rate_tokenonly, this_rate, s;
int64_t this_distortion, this_sse;
+#if CONFIG_EXT_INTRA
+ int is_directional_mode, rate_overhead, best_angle_delta = 0;
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+ ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
xd->mi[0]->mbmi.palette_mode_info.palette_size[1] = 0;
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
continue;
- xd->mi[0]->mbmi.uv_mode = mode;
-
+ mbmi->uv_mode = mode;
+#if CONFIG_EXT_INTRA
+ is_directional_mode = (mode != DC_PRED && mode != TM_PRED);
+ rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] +
+ write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0);
+ mbmi->angle_delta[1] = 0;
+ if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode) {
+ if (!rd_pick_intra_angle_sbuv(cpi, x, ctx, &this_rate,
+ &this_rate_tokenonly, &this_distortion, &s,
+ bsize, rate_overhead, best_rd))
+ continue;
+ } else {
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, best_rd))
+ continue;
+ }
+ this_rate = this_rate_tokenonly +
+ cpi->intra_uv_mode_cost[mbmi->mode][mode];
+ if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode)
+ this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS +
+ mbmi->angle_delta[1]);
+ if (mode == DC_PRED && 0)
+ this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 0);
+#else
if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
&this_distortion, &s, &this_sse, bsize, best_rd))
continue;
this_rate = this_rate_tokenonly +
cpi->intra_uv_mode_cost[xd->mi[0]->mbmi.mode][mode];
+#endif // CONFIG_EXT_INTRA
+
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
mode_selected = mode;
+#if CONFIG_EXT_INTRA
+ best_angle_delta = mbmi->angle_delta[1];
+#endif // CONFIG_EXT_INTRA
best_rd = this_rd;
*rate = this_rate;
*rate_tokenonly = this_rate_tokenonly;
@@ -1385,7 +2734,23 @@
}
}
- xd->mi[0]->mbmi.uv_mode = mode_selected;
+#if CONFIG_EXT_INTRA
+ if (mbmi->sb_type >= BLOCK_8X8 && ALLOW_FILTER_INTRA_MODES) {
+ if (rd_pick_ext_intra_sbuv(cpi, x, ctx, rate, rate_tokenonly, distortion,
+ skippable, bsize, &best_rd)) {
+ mode_selected = mbmi->uv_mode;
+ ext_intra_mode_info = mbmi->ext_intra_mode_info;
+ }
+ }
+
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ ext_intra_mode_info.use_ext_intra_mode[1];
+ if (ext_intra_mode_info.use_ext_intra_mode[1])
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ ext_intra_mode_info.ext_intra_mode[1];
+ mbmi->angle_delta[1] = best_angle_delta;
+#endif // CONFIG_EXT_INTRA
+ mbmi->uv_mode = mode_selected;
return best_rd;
}
@@ -1514,8 +2879,8 @@
pd->dst.stride)];
int64_t thisdistortion = 0, thissse = 0;
int thisrate = 0;
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i);
- const scan_order *so = get_scan(TX_4X4, tx_type);
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 1);
vp10_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col);
@@ -1551,8 +2916,14 @@
for (idx = 0; idx < width / 4; ++idx) {
int64_t ssz, rd, rd1, rd2;
tran_low_t* coeff;
-
+#if CONFIG_VAR_TX
+ int coeff_ctx;
+#endif
k += (idy * 2 + idx);
+#if CONFIG_VAR_TX
+ coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)),
+ *(tl + (k >> 1)));
+#endif
coeff = BLOCK_OFFSET(p->coeff, k);
fwd_txm4x4(vp10_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
coeff, 8);
@@ -1571,9 +2942,19 @@
16, &ssz);
#endif // CONFIG_VP9_HIGHBITDEPTH
thissse += ssz;
- thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
+#if CONFIG_VAR_TX
+ thisrate += cost_coeffs(x, 0, k, coeff_ctx,
+ TX_4X4,
so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
+ *(ta + (k & 1)) = !(p->eobs[k] == 0);
+ *(tl + (k >> 1)) = !(p->eobs[k] == 0);
+#else
+ thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1),
+ TX_4X4,
+ so->scan, so->neighbors,
+ cpi->sf.use_fast_coef_costing);
+#endif
rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
rd = VPXMIN(rd1, rd2);
@@ -1646,10 +3027,6 @@
x->e_mbd.plane[0].pre[1] = orig_pre[1];
}
-static INLINE int mv_has_subpel(const MV *mv) {
- return (mv->row & 0x0F) || (mv->col & 0x0F);
-}
-
// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
// TODO(aconverse): Find out if this is still productive then clean up or remove
static int check_best_zero_mv(
@@ -1742,11 +3119,11 @@
// frame we must use a unit scaling factor during mode selection.
#if CONFIG_VP9_HIGHBITDEPTH
vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
- cm->width, cm->height,
- cm->use_highbitdepth);
+ cm->width, cm->height,
+ cm->use_highbitdepth);
#else
vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
- cm->width, cm->height);
+ cm->width, cm->height);
#endif // CONFIG_VP9_HIGHBITDEPTH
// Allow joint search multiple times iteratively for each reference frame
@@ -2731,6 +4108,10 @@
if (cm->interp_filter != BILINEAR) {
if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
best_filter = EIGHTTAP;
+#if CONFIG_EXT_INTERP
+ } else if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE) {
+ best_filter = EIGHTTAP;
+#endif
} else if (best_filter == SWITCHABLE) {
int newbest;
int tmp_rate_sum = 0;
@@ -2746,7 +4127,7 @@
rs = vp10_get_switchable_rate(cpi, xd);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
- if (i > 0 && intpel_mv) {
+ if (i > 0 && intpel_mv && IsInterpolatingFilter(i)) {
rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
filter_cache[i] = rd;
filter_cache[SWITCHABLE_FILTERS] =
@@ -2768,7 +4149,7 @@
(!i || best_needs_copy)) ||
(cm->interp_filter != SWITCHABLE &&
(cm->interp_filter == mbmi->interp_filter ||
- (i == 0 && intpel_mv)))) {
+ (i == 0 && intpel_mv && IsInterpolatingFilter(i))))) {
restore_dst_buf(xd, orig_dst, orig_dst_stride);
} else {
for (j = 0; j < MAX_MB_PLANE; j++) {
@@ -2788,7 +4169,7 @@
rd += rs_rd;
*mask_filter = VPXMAX(*mask_filter, rd);
- if (i == 0 && intpel_mv) {
+ if (i == 0 && intpel_mv && IsInterpolatingFilter(i)) {
tmp_rate_sum = rate_sum;
tmp_dist_sum = dist_sum;
}
@@ -2805,7 +4186,8 @@
if (newbest) {
best_rd = rd;
best_filter = mbmi->interp_filter;
- if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
+ if (cm->interp_filter == SWITCHABLE && i &&
+ !(intpel_mv && IsInterpolatingFilter(i)))
best_needs_copy = !best_needs_copy;
}
@@ -2824,6 +4206,7 @@
restore_dst_buf(xd, orig_dst, orig_dst_stride);
}
}
+
// Set the appropriate filter
mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
cm->interp_filter : best_filter;
@@ -2883,8 +4266,27 @@
// Y cost and distortion
vp10_subtract_plane(x, bsize, 0);
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
+#if CONFIG_EXT_TX
+ select_tx_type_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
+ bsize, ref_best_rd);
+#else
+ inter_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
+ bsize, ref_best_rd);
+#endif
+ } else {
+ int idx, idy;
+ super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
+ bsize, ref_best_rd);
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ mbmi->inter_tx_size[idy * 8 + idx] = mbmi->tx_size;
+ }
+#else
super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
bsize, ref_best_rd);
+#endif
if (*rate_y == INT_MAX) {
*rate2 = INT_MAX;
@@ -2899,8 +4301,13 @@
rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
+#if CONFIG_VAR_TX
+ if (!inter_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
+ &sseuv, bsize, ref_best_rd - rdcosty)) {
+#else
if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
&sseuv, bsize, ref_best_rd - rdcosty)) {
+#endif
*rate2 = INT_MAX;
*distortion = INT64_MAX;
restore_dst_buf(xd, orig_dst, orig_dst_stride);
@@ -3151,6 +4558,12 @@
int64_t dist_uv[TX_SIZES];
int skip_uv[TX_SIZES];
PREDICTION_MODE mode_uv[TX_SIZES];
+#if CONFIG_EXT_INTRA
+ EXT_INTRA_MODE_INFO ext_intra_mode_info_uv[TX_SIZES];
+ int8_t uv_angle_delta[TX_SIZES];
+ int is_directional_mode;
+ int rate_overhead, rate_dummy;
+#endif // CONFIG_EXT_INTRA
const int intra_cost_penalty = vp10_get_intra_cost_penalty(
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
int best_skip2 = 0;
@@ -3410,6 +4823,10 @@
mbmi->uv_mode = DC_PRED;
mbmi->ref_frame[0] = ref_frame;
mbmi->ref_frame[1] = second_ref_frame;
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
// Evaluate all sub-pel filters irrespective of whether we can use
// them for this frame.
mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
@@ -3430,25 +4847,92 @@
TX_SIZE uv_tx;
struct macroblockd_plane *const pd = &xd->plane[1];
memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
+
+#if CONFIG_EXT_INTRA
+ is_directional_mode = (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED);
+ if (is_directional_mode) {
+ rate_overhead = write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0) +
+ cpi->mbmode_cost[mbmi->mode];
+ rate_y = INT_MAX;
+ this_rd =
+ rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
+ &skippable, bsize, rate_overhead, best_rd);
+ } else {
+ mbmi->angle_delta[0] = 0;
+ super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
+ NULL, bsize, best_rd);
+ }
+
+ // TODO(huisu): ext-intra is turned off in lossless mode for now to
+ // avoid a unit test failure
+ if (mbmi->mode == DC_PRED && !xd->lossless[mbmi->segment_id] &&
+ ALLOW_FILTER_INTRA_MODES) {
+ MB_MODE_INFO mbmi_copy = *mbmi;
+
+ if (rate_y != INT_MAX) {
+ int this_rate = rate_y +
+ cpi->mbmode_cost[mbmi->mode] +
+ vp10_cost_bit(cm->fc->ext_intra_probs[0], 0);
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, distortion_y);
+ } else {
+ this_rd = best_rd;
+ }
+
+ if (!rd_pick_ext_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
+ &skippable, bsize,
+ cpi->mbmode_cost[mbmi->mode], &this_rd))
+ *mbmi = mbmi_copy;
+ }
+#else
super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
NULL, bsize, best_rd);
+#endif // CONFIG_EXT_INTRA
+
if (rate_y == INT_MAX)
continue;
-
uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
pd->subsampling_y);
if (rate_uv_intra[uv_tx] == INT_MAX) {
choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx,
&rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
&dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
+#if CONFIG_EXT_INTRA
+ ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info;
+ uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
+#endif // CONFIG_EXT_INTRA
}
rate_uv = rate_uv_tokenonly[uv_tx];
distortion_uv = dist_uv[uv_tx];
skippable = skippable && skip_uv[uv_tx];
mbmi->uv_mode = mode_uv[uv_tx];
+#if CONFIG_EXT_INTRA
+ mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1];
+ if (ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1]) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1];
+ }
+#endif // CONFIG_EXT_INTRA
rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
+#if CONFIG_EXT_INTRA
+ if (is_directional_mode)
+ rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS +
+ mbmi->angle_delta[0]);
+
+ if (mbmi->mode == DC_PRED && ALLOW_FILTER_INTRA_MODES) {
+ rate2 += vp10_cost_bit(cm->fc->ext_intra_probs[0],
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
+ EXT_INTRA_MODE ext_intra_mode =
+ mbmi->ext_intra_mode_info.ext_intra_mode[0];
+ rate2 += write_uniform_cost(FILTER_INTRA_MODES, ext_intra_mode);
+ }
+ }
+#endif // CONFIG_EXT_INTRA
if (this_mode != DC_PRED && this_mode != TM_PRED)
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
@@ -3461,6 +4945,7 @@
single_newmv, single_inter_filter,
single_skippable, &total_sse, best_rd,
&mask_filter, filter_cache);
+
if (this_rd == INT64_MAX)
continue;
@@ -3485,6 +4970,7 @@
// Cost the skip mb case
rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+
} else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
@@ -3552,8 +5038,15 @@
if (!x->select_tx_size)
swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
+
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memcpy(ctx->blk_skip[i], x->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#else
memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
+#endif
// TODO(debargha): enhance this test with a better distortion prediction
// based on qp, activity mask and history
@@ -3775,6 +5268,10 @@
mbmi->palette_mode_info.palette_size[0] = 0;
mbmi->palette_mode_info.palette_size[1] = 0;
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
mbmi->mode = ZEROMV;
mbmi->uv_mode = DC_PRED;
mbmi->ref_frame[0] = LAST_FRAME;
@@ -3785,6 +5282,9 @@
if (cm->interp_filter != BILINEAR) {
best_filter = EIGHTTAP;
if (cm->interp_filter == SWITCHABLE &&
+#if CONFIG_EXT_INTERP
+ vp10_is_interp_needed(xd) &&
+#endif // CONFIG_EXT_INTERP
x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
int rs;
int best_rs = INT_MAX;
@@ -3889,6 +5389,11 @@
memset(x->zcoeff_blk[TX_4X4], 0, 4);
vp10_zero(best_mbmode);
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
+
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
filter_cache[i] = INT64_MAX;
@@ -4044,6 +5549,10 @@
xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
}
+#if CONFIG_VAR_TX
+ mbmi->inter_tx_size[0] = mbmi->tx_size;
+#endif
+
if (ref_frame == INTRA_FRAME) {
int rate;
if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
@@ -4116,7 +5625,11 @@
(int) this_rd_thresh, seg_mvs,
bsi, switchable_filter_index,
mi_row, mi_col);
-
+#if CONFIG_EXT_INTERP
+ if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
+ mbmi->interp_filter != EIGHTTAP) // invalid configuration
+ continue;
+#endif // CONFIG_EXT_INTERP
if (tmp_rd == INT64_MAX)
continue;
rs = vp10_get_switchable_rate(cpi, xd);
@@ -4170,15 +5683,30 @@
mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
tmp_best_filter : cm->interp_filter);
+
+
if (!pred_exists) {
// Handles the special case when a filter that is not in the
- // switchable list (bilinear, 6-tap) is indicated at the frame level
+ // switchable list (bilinear) is indicated at the frame level
tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
&x->mbmi_ext->ref_mvs[ref_frame][0],
second_ref, best_yrd, &rate, &rate_y,
&distortion, &skippable, &total_sse,
(int) this_rd_thresh, seg_mvs, bsi, 0,
mi_row, mi_col);
+#if CONFIG_EXT_INTERP
+ if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
+ mbmi->interp_filter != EIGHTTAP) {
+ mbmi->interp_filter = EIGHTTAP;
+ tmp_rd = rd_pick_best_sub8x8_mode(
+ cpi, x,
+ &x->mbmi_ext->ref_mvs[ref_frame][0],
+ second_ref, best_yrd, &rate, &rate_y,
+ &distortion, &skippable, &total_sse,
+ (int) this_rd_thresh, seg_mvs, bsi, 0,
+ mi_row, mi_col);
+ }
+#endif // CONFIG_EXT_INTERP
if (tmp_rd == INT64_MAX)
continue;
} else {
@@ -4214,10 +5742,15 @@
vp10_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
BLOCK_8X8);
memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
+#if CONFIG_VAR_TX
+ if (!inter_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
+ &uv_sse, BLOCK_8X8, tmp_best_rdu))
+ continue;
+#else
if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
&uv_sse, BLOCK_8X8, tmp_best_rdu))
continue;
-
+#endif
rate2 += rate_uv;
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
@@ -4294,8 +5827,14 @@
best_skip2 = this_skip2;
if (!x->select_tx_size)
swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
+
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memset(ctx->blk_skip[i], 0, sizeof(uint8_t) * ctx->num_4x4_blk);
+#else
memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
+#endif
for (i = 0; i < 4; i++)
best_bmodes[i] = xd->mi[0]->bmi[i];
diff --git a/vp10/encoder/segmentation.c b/vp10/encoder/segmentation.c
index 6a20ee4..e936775 100644
--- a/vp10/encoder/segmentation.c
+++ b/vp10/encoder/segmentation.c
@@ -58,9 +58,7 @@
segcounts[4] + segcounts[5], segcounts[6] + segcounts[7]
};
const unsigned ccc[2] = { cc[0] + cc[1], cc[2] + cc[3] };
-#if CONFIG_MISC_FIXES
int i;
-#endif
segment_tree_probs[0] = get_binary_prob(ccc[0], ccc[1]);
segment_tree_probs[1] = get_binary_prob(cc[0], cc[1]);
@@ -70,16 +68,12 @@
segment_tree_probs[5] = get_binary_prob(segcounts[4], segcounts[5]);
segment_tree_probs[6] = get_binary_prob(segcounts[6], segcounts[7]);
-#if CONFIG_MISC_FIXES
for (i = 0; i < 7; i++) {
const unsigned *ct = i == 0 ? ccc : i < 3 ? cc + (i & 2)
: segcounts + (i - 3) * 2;
vp10_prob_diff_update_savings_search(ct,
cur_tree_probs[i], &segment_tree_probs[i], DIFF_UPDATE_PROB);
}
-#else
- (void) cur_tree_probs;
-#endif
}
// Based on set of segment counts and probabilities calculate a cost estimate
@@ -214,39 +208,22 @@
void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
struct segmentation *seg = &cm->seg;
-#if CONFIG_MISC_FIXES
struct segmentation_probs *segp = &cm->fc->seg;
-#else
- struct segmentation_probs *segp = &cm->segp;
-#endif
int no_pred_cost;
int t_pred_cost = INT_MAX;
int i, tile_col, mi_row, mi_col;
-#if CONFIG_MISC_FIXES
unsigned (*temporal_predictor_count)[2] = cm->counts.seg.pred;
unsigned *no_pred_segcounts = cm->counts.seg.tree_total;
unsigned *t_unpred_seg_counts = cm->counts.seg.tree_mispred;
-#else
- unsigned temporal_predictor_count[PREDICTION_PROBS][2] = { { 0 } };
- unsigned no_pred_segcounts[MAX_SEGMENTS] = { 0 };
- unsigned t_unpred_seg_counts[MAX_SEGMENTS] = { 0 };
-#endif
vpx_prob no_pred_tree[SEG_TREE_PROBS];
vpx_prob t_pred_tree[SEG_TREE_PROBS];
vpx_prob t_nopred_prob[PREDICTION_PROBS];
-#if CONFIG_MISC_FIXES
(void) xd;
-#else
- // Set default state for the segment tree probabilities and the
- // temporal coding probabilities
- memset(segp->tree_probs, 255, sizeof(segp->tree_probs));
- memset(segp->pred_probs, 255, sizeof(segp->pred_probs));
-#endif
// First of all generate stats regarding how well the last segment map
// predicts this one
@@ -284,13 +261,9 @@
const int count0 = temporal_predictor_count[i][0];
const int count1 = temporal_predictor_count[i][1];
-#if CONFIG_MISC_FIXES
vp10_prob_diff_update_savings_search(temporal_predictor_count[i],
segp->pred_probs[i],
&t_nopred_prob[i], DIFF_UPDATE_PROB);
-#else
- t_nopred_prob[i] = get_binary_prob(count0, count1);
-#endif
// Add in the predictor signaling cost
t_pred_cost += count0 * vp10_cost_zero(t_nopred_prob[i]) +
@@ -301,30 +274,17 @@
// Now choose which coding method to use.
if (t_pred_cost < no_pred_cost) {
seg->temporal_update = 1;
-#if !CONFIG_MISC_FIXES
- memcpy(segp->tree_probs, t_pred_tree, sizeof(t_pred_tree));
- memcpy(segp->pred_probs, t_nopred_prob, sizeof(t_nopred_prob));
-#endif
} else {
seg->temporal_update = 0;
-#if !CONFIG_MISC_FIXES
- memcpy(segp->tree_probs, no_pred_tree, sizeof(no_pred_tree));
-#endif
}
}
void vp10_reset_segment_features(VP10_COMMON *cm) {
struct segmentation *seg = &cm->seg;
-#if !CONFIG_MISC_FIXES
- struct segmentation_probs *segp = &cm->segp;
-#endif
// Set up default state for MB feature flags
seg->enabled = 0;
seg->update_map = 0;
seg->update_data = 0;
-#if !CONFIG_MISC_FIXES
- memset(segp->tree_probs, 255, sizeof(segp->tree_probs));
-#endif
vp10_clearall_segfeatures(seg);
}
diff --git a/vp10/encoder/subexp.c b/vp10/encoder/subexp.c
index 67e820b..fd82231 100644
--- a/vp10/encoder/subexp.c
+++ b/vp10/encoder/subexp.c
@@ -25,8 +25,7 @@
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 11 - CONFIG_MISC_FIXES,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
@@ -86,7 +85,7 @@
static void encode_uniform(vpx_writer *w, int v) {
const int l = 8;
- const int m = (1 << l) - 191 + CONFIG_MISC_FIXES;
+ const int m = (1 << l) - 192;
if (v < m) {
vpx_write_literal(w, v, l - 1);
} else {
@@ -212,3 +211,12 @@
vpx_write(w, 0, upd);
}
}
+
+int vp10_cond_prob_diff_update_savings(vpx_prob *oldp,
+ const unsigned int ct[2]) {
+ const vpx_prob upd = DIFF_UPDATE_PROB;
+ vpx_prob newp = get_binary_prob(ct[0], ct[1]);
+ const int savings = vp10_prob_diff_update_savings_search(ct, *oldp, &newp,
+ upd);
+ return savings;
+}
diff --git a/vp10/encoder/subexp.h b/vp10/encoder/subexp.h
index 04b96c0..bad23d6 100644
--- a/vp10/encoder/subexp.h
+++ b/vp10/encoder/subexp.h
@@ -36,6 +36,8 @@
vpx_prob *bestp,
vpx_prob upd,
int stepsize);
+int vp10_cond_prob_diff_update_savings(vpx_prob *oldp,
+ const unsigned int ct[2]);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp10/encoder/temporal_filter.c b/vp10/encoder/temporal_filter.c
index 5278d3b..d5f896a 100644
--- a/vp10/encoder/temporal_filter.c
+++ b/vp10/encoder/temporal_filter.c
@@ -135,15 +135,38 @@
for (i = 0, k = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++, k++) {
- int src_byte = frame1[byte];
- int pixel_value = *frame2++;
+ int pixel_value = *frame2;
- modifier = src_byte - pixel_value;
- // This is an integer approximation of:
- // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
- // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
- modifier *= modifier;
- modifier *= 3;
+ // non-local mean approach
+ int diff_sse[9] = { 0 };
+ int idx, idy, index = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ int row = i + idy;
+ int col = j + idx;
+
+ if (row >= 0 && row < (int)block_height &&
+ col >= 0 && col < (int)block_width) {
+ int diff = frame1[byte + idy * (int)stride + idx] -
+ frame2[idy * (int)block_width + idx];
+ diff_sse[index] = diff * diff;
+ ++index;
+ }
+ }
+ }
+
+ assert(index > 0);
+
+ modifier = 0;
+ for (idx = 0; idx < 9; ++idx)
+ modifier += diff_sse[idx];
+
+ modifier *= 3;
+ modifier /= index;
+
+ ++frame2;
+
modifier += rounding;
modifier >>= strength;
@@ -416,16 +439,16 @@
}
#else
// Apply the filter (YUV)
- vp10_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
+ vp10_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16,
strength, filter_weight,
accumulator, count);
- vp10_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
+ vp10_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
predictor + 256,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 256,
count + 256);
- vp10_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
+ vp10_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
predictor + 512,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 512,
diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c
index e568c0b..725b857 100644
--- a/vp10/encoder/tokenize.c
+++ b/vp10/encoder/tokenize.c
@@ -541,8 +541,8 @@
const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
const int segment_id = mbmi->segment_id;
const int16_t *scan, *nb;
- const TX_TYPE tx_type = get_tx_type(type, xd, block);
- const scan_order *const so = get_scan(tx_size, tx_type);
+ const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
+ const scan_order *const so = get_scan(tx_size, tx_type, is_inter_block(mbmi));
const int ref = is_inter_block(mbmi);
unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
td->rd_counts.coef_counts[tx_size][type][ref];
@@ -645,6 +645,118 @@
return result;
}
+#if CONFIG_VAR_TX
+void tokenize_tx(ThreadData *td, TOKENEXTRA **t,
+ int dry_run, TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
+ int blk_row, int blk_col, int block, int plane,
+ void *arg) {
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int blk_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], bsize, 0, 0) :
+ mbmi->inter_tx_size[blk_idx];
+
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
+ if (!dry_run)
+ tokenize_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
+ else
+ set_entropy_context_b(plane, block, blk_row, blk_col,
+ plane_bsize, tx_size, arg);
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+ int step = 1 << (2 * (tx_size - 1));
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
+ continue;
+
+ tokenize_tx(td, t, dry_run, tx_size - 1, plane_bsize,
+ offsetr, offsetc, block + i * step, plane, arg);
+ }
+ }
+}
+
+void vp10_tokenize_sb_inter(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+ int dry_run, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ TOKENEXTRA *t_backup = *t;
+ const int ctx = vp10_get_skip_context(xd);
+ const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id,
+ SEG_LVL_SKIP);
+ struct tokenize_b_args arg = {cpi, td, t};
+ int plane;
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ return;
+
+ if (mbmi->skip) {
+ if (!dry_run)
+ td->counts->skip[ctx][1] += skip_inc;
+ reset_skip_context(xd, bsize);
+ if (dry_run)
+ *t = t_backup;
+ return;
+ }
+
+ if (!dry_run)
+ td->counts->skip[ctx][0] += skip_inc;
+ else
+ *t = t_backup;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+ int block = 0;
+ int step = 1 << (max_tx_size * 2);
+ for (idy = 0; idy < mi_height; idy += bh) {
+ for (idx = 0; idx < mi_width; idx += bh) {
+ tokenize_tx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx,
+ block, plane, &arg);
+ block += step;
+ }
+ }
+
+ if (!dry_run) {
+ (*t)->token = EOSB_TOKEN;
+ (*t)++;
+ }
+ }
+}
+#endif
+
void vp10_tokenize_sb(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
int dry_run, BLOCK_SIZE bsize) {
VP10_COMMON *const cm = &cpi->common;
diff --git a/vp10/encoder/tokenize.h b/vp10/encoder/tokenize.h
index 1614add..0c9f7da 100644
--- a/vp10/encoder/tokenize.h
+++ b/vp10/encoder/tokenize.h
@@ -51,6 +51,12 @@
struct VP10_COMP;
struct ThreadData;
+#if CONFIG_VAR_TX
+void vp10_tokenize_sb_inter(struct VP10_COMP *cpi, struct ThreadData *td,
+ TOKENEXTRA **t, int dry_run, int mi_row, int mi_col,
+ BLOCK_SIZE bsize);
+#endif
+
void vp10_tokenize_palette_sb(struct ThreadData *const td,
BLOCK_SIZE bsize, int plane,
TOKENEXTRA **t);
diff --git a/vp10/encoder/x86/dct_sse2.c b/vp10/encoder/x86/dct_sse2.c
index e111157..976fe45 100644
--- a/vp10/encoder/x86/dct_sse2.c
+++ b/vp10/encoder/x86/dct_sse2.c
@@ -18,16 +18,37 @@
#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
+// Reverse the 8 16 bit words in __m128i
+static INLINE __m128i mm_reverse_epi16(const __m128i x) {
+ const __m128i a = _mm_shufflelo_epi16(x, 0x1b);
+ const __m128i b = _mm_shufflehi_epi16(a, 0x1b);
+ return _mm_shuffle_epi32(b, 0x4e);
+}
+
static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in,
- int stride) {
+ int stride, int flipud, int fliplr) {
const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1);
const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
__m128i mask;
- in[0] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
- in[1] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
- in[2] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
- in[3] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
+ if (!flipud) {
+ in[0] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+ in[1] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+ in[2] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
+ in[3] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
+ } else {
+ in[0] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
+ in[1] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
+ in[2] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+ in[3] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+ }
+
+ if (fliplr) {
+ in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
+ in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
+ in[2] = _mm_shufflelo_epi16(in[2], 0x1b);
+ in[3] = _mm_shufflelo_epi16(in[3], 0x1b);
+ }
in[0] = _mm_slli_epi16(in[0], 4);
in[1] = _mm_slli_epi16(in[1], 4);
@@ -160,23 +181,55 @@
vpx_fdct4x4_sse2(input, output, stride);
break;
case ADST_DCT:
- load_buffer_4x4(input, in, stride);
+ load_buffer_4x4(input, in, stride, 0, 0);
fadst4_sse2(in);
fdct4_sse2(in);
write_buffer_4x4(output, in);
break;
case DCT_ADST:
- load_buffer_4x4(input, in, stride);
+ load_buffer_4x4(input, in, stride, 0, 0);
fdct4_sse2(in);
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
case ADST_ADST:
- load_buffer_4x4(input, in, stride);
+ load_buffer_4x4(input, in, stride, 0, 0);
fadst4_sse2(in);
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ load_buffer_4x4(input, in, stride, 1, 0);
+ fadst4_sse2(in);
+ fdct4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case DCT_FLIPADST:
+ load_buffer_4x4(input, in, stride, 0, 1);
+ fdct4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case FLIPADST_FLIPADST:
+ load_buffer_4x4(input, in, stride, 1, 1);
+ fadst4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case ADST_FLIPADST:
+ load_buffer_4x4(input, in, stride, 0, 1);
+ fadst4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case FLIPADST_ADST:
+ load_buffer_4x4(input, in, stride, 1, 0);
+ fadst4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -627,15 +680,37 @@
// load 8x8 array
static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in,
- int stride) {
- in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
- in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
- in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
- in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
- in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
- in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
- in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
- in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ int stride, int flipud, int fliplr) {
+ if (!flipud) {
+ in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ } else {
+ in[0] = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ in[1] = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ in[2] = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ in[3] = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ in[4] = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ in[5] = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ in[6] = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ in[7] = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ }
+
+ if (fliplr) {
+ in[0] = mm_reverse_epi16(in[0]);
+ in[1] = mm_reverse_epi16(in[1]);
+ in[2] = mm_reverse_epi16(in[2]);
+ in[3] = mm_reverse_epi16(in[3]);
+ in[4] = mm_reverse_epi16(in[4]);
+ in[5] = mm_reverse_epi16(in[5]);
+ in[6] = mm_reverse_epi16(in[6]);
+ in[7] = mm_reverse_epi16(in[7]);
+ }
in[0] = _mm_slli_epi16(in[0], 2);
in[1] = _mm_slli_epi16(in[1], 2);
@@ -1144,26 +1219,63 @@
vpx_fdct8x8_sse2(input, output, stride);
break;
case ADST_DCT:
- load_buffer_8x8(input, in, stride);
+ load_buffer_8x8(input, in, stride, 0, 0);
fadst8_sse2(in);
fdct8_sse2(in);
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
case DCT_ADST:
- load_buffer_8x8(input, in, stride);
+ load_buffer_8x8(input, in, stride, 0, 0);
fdct8_sse2(in);
fadst8_sse2(in);
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
case ADST_ADST:
- load_buffer_8x8(input, in, stride);
+ load_buffer_8x8(input, in, stride, 0, 0);
fadst8_sse2(in);
fadst8_sse2(in);
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ load_buffer_8x8(input, in, stride, 1, 0);
+ fadst8_sse2(in);
+ fdct8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case DCT_FLIPADST:
+ load_buffer_8x8(input, in, stride, 0, 1);
+ fdct8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case FLIPADST_FLIPADST:
+ load_buffer_8x8(input, in, stride, 1, 1);
+ fadst8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case ADST_FLIPADST:
+ load_buffer_8x8(input, in, stride, 0, 1);
+ fadst8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case FLIPADST_ADST:
+ load_buffer_8x8(input, in, stride, 1, 0);
+ fadst8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -1171,15 +1283,37 @@
}
static INLINE void load_buffer_16x16(const int16_t* input, __m128i *in0,
- __m128i *in1, int stride) {
- // load first 8 columns
- load_buffer_8x8(input, in0, stride);
- load_buffer_8x8(input + 8 * stride, in0 + 8, stride);
+ __m128i *in1, int stride,
+ int flipud, int fliplr) {
+ // Load 4 8x8 blocks
+ const int16_t *topL = input;
+ const int16_t *topR = input + 8;
+ const int16_t *botL = input + 8 * stride;
+ const int16_t *botR = input + 8 * stride + 8;
- input += 8;
+ const int16_t *tmp;
+
+ if (flipud) {
+ // Swap left columns
+ tmp = topL; topL = botL; botL = tmp;
+ // Swap right columns
+ tmp = topR; topR = botR; botR = tmp;
+ }
+
+ if (fliplr) {
+ // Swap top rows
+ tmp = topL; topL = topR; topR = tmp;
+ // Swap bottom rows
+ tmp = botL; botL = botR; botR = tmp;
+ }
+
+ // load first 8 columns
+ load_buffer_8x8(topL, in0, stride, flipud, fliplr);
+ load_buffer_8x8(botL, in0 + 8, stride, flipud, fliplr);
+
// load second 8 columns
- load_buffer_8x8(input, in1, stride);
- load_buffer_8x8(input + 8 * stride, in1 + 8, stride);
+ load_buffer_8x8(topR, in1, stride, flipud, fliplr);
+ load_buffer_8x8(botR, in1 + 8, stride, flipud, fliplr);
}
static INLINE void write_buffer_16x16(tran_low_t *output, __m128i *in0,
@@ -2031,26 +2165,63 @@
vpx_fdct16x16_sse2(input, output, stride);
break;
case ADST_DCT:
- load_buffer_16x16(input, in0, in1, stride);
+ load_buffer_16x16(input, in0, in1, stride, 0, 0);
fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fdct16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
case DCT_ADST:
- load_buffer_16x16(input, in0, in1, stride);
+ load_buffer_16x16(input, in0, in1, stride, 0, 0);
fdct16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
case ADST_ADST:
- load_buffer_16x16(input, in0, in1, stride);
+ load_buffer_16x16(input, in0, in1, stride, 0, 0);
fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ load_buffer_16x16(input, in0, in1, stride, 1, 0);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fdct16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case DCT_FLIPADST:
+ load_buffer_16x16(input, in0, in1, stride, 0, 1);
+ fdct16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case FLIPADST_FLIPADST:
+ load_buffer_16x16(input, in0, in1, stride, 1, 1);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case ADST_FLIPADST:
+ load_buffer_16x16(input, in0, in1, stride, 0, 1);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case FLIPADST_ADST:
+ load_buffer_16x16(input, in0, in1, stride, 1, 0);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk
index 2eb3488..461815c 100644
--- a/vp10/vp10_common.mk
+++ b/vp10/vp10_common.mk
@@ -63,6 +63,15 @@
VP10_COMMON_SRCS-yes += common/scan.h
VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.h
VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.c
+VP10_COMMON_SRCS-yes += common/vp10_txfm.h
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm1d.h
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm1d.c
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm1d.h
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm1d.c
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d.h
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d.c
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d_cfg.h
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d_cfg.h
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.c
diff --git a/vp10/vp10_cx_iface.c b/vp10/vp10_cx_iface.c
index 21c9c03..c90b936 100644
--- a/vp10/vp10_cx_iface.c
+++ b/vp10/vp10_cx_iface.c
@@ -91,9 +91,6 @@
size_t pending_cx_data_sz;
int pending_frame_count;
size_t pending_frame_sizes[8];
-#if !CONFIG_MISC_FIXES
- size_t pending_frame_magnitude;
-#endif
vpx_image_t preview_img;
vpx_enc_frame_flags_t next_frame_flags;
vp8_postproc_cfg_t preview_ppcfg;
@@ -783,39 +780,30 @@
uint8_t marker = 0xc0;
unsigned int mask;
int mag, index_sz;
-#if CONFIG_MISC_FIXES
int i;
size_t max_frame_sz = 0;
-#endif
assert(ctx->pending_frame_count);
assert(ctx->pending_frame_count <= 8);
// Add the number of frames to the marker byte
marker |= ctx->pending_frame_count - 1;
-#if CONFIG_MISC_FIXES
for (i = 0; i < ctx->pending_frame_count - 1; i++) {
const size_t frame_sz = (unsigned int) ctx->pending_frame_sizes[i] - 1;
max_frame_sz = frame_sz > max_frame_sz ? frame_sz : max_frame_sz;
}
-#endif
// Choose the magnitude
for (mag = 0, mask = 0xff; mag < 4; mag++) {
-#if CONFIG_MISC_FIXES
if (max_frame_sz <= mask)
break;
-#else
- if (ctx->pending_frame_magnitude < mask)
- break;
-#endif
mask <<= 8;
mask |= 0xff;
}
marker |= mag << 3;
// Write the index
- index_sz = 2 + (mag + 1) * (ctx->pending_frame_count - CONFIG_MISC_FIXES);
+ index_sz = 2 + (mag + 1) * (ctx->pending_frame_count - 1);
if (ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz) {
uint8_t *x = ctx->pending_cx_data + ctx->pending_cx_data_sz;
int i, j;
@@ -835,11 +823,11 @@
#endif
*x++ = marker;
- for (i = 0; i < ctx->pending_frame_count - CONFIG_MISC_FIXES; i++) {
+ for (i = 0; i < ctx->pending_frame_count - 1; i++) {
unsigned int this_sz;
assert(ctx->pending_frame_sizes[i] > 0);
- this_sz = (unsigned int)ctx->pending_frame_sizes[i] - CONFIG_MISC_FIXES;
+ this_sz = (unsigned int)ctx->pending_frame_sizes[i] - 1;
for (j = 0; j <= mag; j++) {
*x++ = this_sz & 0xff;
this_sz >>= 8;
@@ -993,9 +981,6 @@
ctx->pending_cx_data = cx_data;
ctx->pending_cx_data_sz += size;
ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
-#if !CONFIG_MISC_FIXES
- ctx->pending_frame_magnitude |= size;
-#endif
cx_data += size;
cx_data_sz -= size;
@@ -1012,9 +997,6 @@
ctx->pending_cx_data = NULL;
ctx->pending_cx_data_sz = 0;
ctx->pending_frame_count = 0;
-#if !CONFIG_MISC_FIXES
- ctx->pending_frame_magnitude = 0;
-#endif
ctx->output_cx_pkt_cb.output_cx_pkt(
&pkt, ctx->output_cx_pkt_cb.user_priv);
}
@@ -1031,9 +1013,6 @@
if (ctx->pending_cx_data) {
ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
-#if !CONFIG_MISC_FIXES
- ctx->pending_frame_magnitude |= size;
-#endif
ctx->pending_cx_data_sz += size;
// write the superframe only for the case when
if (!ctx->output_cx_pkt_cb.output_cx_pkt)
@@ -1043,9 +1022,6 @@
ctx->pending_cx_data = NULL;
ctx->pending_cx_data_sz = 0;
ctx->pending_frame_count = 0;
-#if !CONFIG_MISC_FIXES
- ctx->pending_frame_magnitude = 0;
-#endif
} else {
pkt.data.frame.buf = cx_data;
pkt.data.frame.sz = size;
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 61eb591..41994dc 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -98,8 +98,13 @@
return mbmi->ref_frame[0] > INTRA_FRAME;
}
+static INLINE int is_compound_ref(const MV_REFERENCE_FRAME *ref_frame) {
+ assert(ref_frame != NULL);
+ return ref_frame[1] > INTRA_FRAME;
+}
+
static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) {
- return mbmi->ref_frame[1] > INTRA_FRAME;
+ return is_compound_ref(mbmi->ref_frame);
}
PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi,
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 0def2cf..2270a06 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -20,7 +20,6 @@
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_segmentation.h"
-
CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
size_t last_coded_q_map_size;
size_t consec_zero_mv_size;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index ae14464..e32130e 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1545,8 +1545,8 @@
const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- const int refs[2] = {mbmi->ref_frame[0],
- mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]};
+ const int refs[2] = { mbmi->ref_frame[0],
+ mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
int_mv ref_mv[2];
int ite, ref;
const InterpKernel *kernel = vp9_filter_kernels[mbmi->interp_filter];
@@ -2387,8 +2387,8 @@
const int this_mode = mbmi->mode;
int_mv *frame_mv = mode_mv[this_mode];
int i;
- int refs[2] = { mbmi->ref_frame[0],
- (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
+ const int refs[2] = { mbmi->ref_frame[0],
+ mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
int_mv cur_mv[2];
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
@@ -3121,10 +3121,14 @@
int this_skip2 = 0;
int64_t total_sse = INT64_MAX;
int early_term = 0;
+ const MV_REFERENCE_FRAME refs[2] = {
+ vp9_mode_order[mode_index].ref_frame[0],
+ vp9_mode_order[mode_index].ref_frame[1]
+ };
this_mode = vp9_mode_order[mode_index].mode;
- ref_frame = vp9_mode_order[mode_index].ref_frame[0];
- second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
+ ref_frame = refs[0];
+ second_ref_frame = refs[1];
// Look at the reference frame of the best mode so far and set the
// skip mask to look at a subset of the remaining modes.
@@ -3213,7 +3217,7 @@
continue;
}
- comp_pred = second_ref_frame > INTRA_FRAME;
+ comp_pred = is_compound_ref(refs);
if (comp_pred) {
if (!cpi->allow_comp_inter_inter)
continue;
@@ -3506,7 +3510,7 @@
if (best_mbmode.mode == NEWMV) {
const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
best_mbmode.ref_frame[1]};
- int comp_pred_mode = refs[1] > INTRA_FRAME;
+ int comp_pred_mode = is_compound_ref(refs);
if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int ==
@@ -3800,9 +3804,13 @@
int64_t total_sse = INT_MAX;
int early_term = 0;
struct buf_2d backup_yv12[2][MAX_MB_PLANE];
+ const MV_REFERENCE_FRAME refs[2] = {
+ vp9_ref_order[ref_index].ref_frame[0],
+ vp9_ref_order[ref_index].ref_frame[1]
+ };
- ref_frame = vp9_ref_order[ref_index].ref_frame[0];
- second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
+ ref_frame = refs[0];
+ second_ref_frame = refs[1];
// Look at the reference frame of the best mode so far and set the
// skip mask to look at a subset of the remaining modes.
@@ -3841,7 +3849,7 @@
tile_data->thresh_freq_fact[bsize][ref_index]))
continue;
- comp_pred = second_ref_frame > INTRA_FRAME;
+ comp_pred = is_compound_ref(refs);
if (comp_pred) {
if (!cpi->allow_comp_inter_inter)
continue;
@@ -4119,7 +4127,7 @@
// Estimate the reference frame signaling cost and add it
// to the rolling cost variable.
- if (second_ref_frame > INTRA_FRAME) {
+ if (is_compound_ref(mbmi->ref_frame)) {
rate2 += ref_costs_comp[ref_frame];
} else {
rate2 += ref_costs_single[ref_frame];
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index bd99c6d..7504c0e 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -806,9 +806,12 @@
VPX_CTRL_USE_TYPE(VP9E_SET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *)
#define VPX_CTRL_VP9E_SET_SVC_REF_FRAME_CONFIG
-VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *)
+/*!\brief
+ *
+ * TODO(rbultje) : add support of the control in ffmpeg
+ */
#define VPX_CTRL_VP9E_SET_RENDER_SIZE
-
+VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *)
/*!\endcond */
/*! @} - end defgroup vp8_encoder */
#ifdef __cplusplus
diff --git a/vpx_dsp/bitreader_buffer.c b/vpx_dsp/bitreader_buffer.c
index bb91726..8c47d54 100644
--- a/vpx_dsp/bitreader_buffer.c
+++ b/vpx_dsp/bitreader_buffer.c
@@ -43,11 +43,7 @@
int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb,
int bits) {
-#if CONFIG_MISC_FIXES
const int nbits = sizeof(unsigned) * 8 - bits - 1;
const unsigned value = vpx_rb_read_literal(rb, bits + 1) << nbits;
return ((int) value) >> nbits;
-#else
- return vpx_rb_read_signed_literal(rb, bits);
-#endif
}
diff --git a/vpx_dsp/bitwriter_buffer.c b/vpx_dsp/bitwriter_buffer.c
index 6182a72..8633372 100644
--- a/vpx_dsp/bitwriter_buffer.c
+++ b/vpx_dsp/bitwriter_buffer.c
@@ -39,10 +39,5 @@
void vpx_wb_write_inv_signed_literal(struct vpx_write_bit_buffer *wb,
int data, int bits) {
-#if CONFIG_MISC_FIXES
vpx_wb_write_literal(wb, data, bits + 1);
-#else
- vpx_wb_write_literal(wb, abs(data), bits);
- vpx_wb_write_bit(wb, data < 0);
-#endif
}
diff --git a/vpx_dsp/intrapred.c b/vpx_dsp/intrapred.c
index a9669e5..b03168f 100644
--- a/vpx_dsp/intrapred.c
+++ b/vpx_dsp/intrapred.c
@@ -832,11 +832,6 @@
intra_pred_no_4x4(d207)
intra_pred_no_4x4(d63)
intra_pred_no_4x4(d45)
-#if CONFIG_MISC_FIXES
-intra_pred_allsizes(d207e)
-intra_pred_allsizes(d63e)
-intra_pred_no_4x4(d45e)
-#endif
intra_pred_no_4x4(d117)
intra_pred_no_4x4(d135)
intra_pred_no_4x4(d153)