Merge branch 'masterbase' into nextgenv2
Conflicts:
vp10/encoder/rdopt.c
Change-Id: If720e7f9810378d24bf9fd51a95fd29c3bc5d774
diff --git a/configure b/configure
index 095cddf..9769880 100755
--- a/configure
+++ b/configure
@@ -270,7 +270,19 @@
spatial_svc
fp_mb_stats
emulate_hardware
+ var_tx
+ ref_mv
+ ext_tx
misc_fixes
+ ext_intra
+ ext_inter
+ ext_interp
+ ext_refs
+ supertx
+ ans
+ loop_restoration
+ ext_partition
+ obmc
"
CONFIG_LIST="
dependency_tracking
diff --git a/test/hbd_metrics_test.cc b/test/hbd_metrics_test.cc
new file mode 100644
index 0000000..bf75a29
--- /dev/null
+++ b/test/hbd_metrics_test.cc
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <new>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "./vpx_config.h"
+#include "vpx_dsp/ssim.h"
+#include "vpx_ports/mem.h"
+#include "vpx_ports/msvc.h"
+#include "vpx_scale/yv12config.h"
+
+
+using libvpx_test::ACMRandom;
+
+namespace {
+
+typedef double (*LBDMetricFunc)(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest);
+typedef double (*HBDMetricFunc)(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest,
+ uint32_t bd);
+
+
+double compute_hbd_psnrhvs(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest,
+ uint32_t bit_depth) {
+ double tempy, tempu, tempv;
+ return vpx_psnrhvs(source, dest,
+ &tempy, &tempu, &tempv, bit_depth);
+}
+
+double compute_psnrhvs(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest) {
+ double tempy, tempu, tempv;
+ return vpx_psnrhvs(source, dest,
+ &tempy, &tempu, &tempv, 8);
+}
+
+double compute_hbd_fastssim(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest,
+ uint32_t bit_depth) {
+ double tempy, tempu, tempv;
+ return vpx_calc_fastssim(source, dest,
+ &tempy, &tempu, &tempv, bit_depth);
+}
+
+double compute_fastssim(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest) {
+ double tempy, tempu, tempv;
+ return vpx_calc_fastssim(source, dest,
+ &tempy, &tempu, &tempv, 8);
+}
+
+double compute_hbd_vpxssim(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest,
+ uint32_t bit_depth) {
+ double ssim, weight;
+ ssim = vpx_highbd_calc_ssim(source, dest, &weight, bit_depth);
+ return 100 * pow(ssim / weight, 8.0);
+}
+
+double compute_vpxssim(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest) {
+ double ssim, weight;
+ ssim = vpx_calc_ssim(source, dest, &weight);
+ return 100 * pow(ssim / weight, 8.0);
+}
+
+
+class HBDMetricsTestBase {
+ public:
+ virtual ~HBDMetricsTestBase() {}
+
+ protected:
+ void RunAccuracyCheck() {
+ const int width = 1920;
+ const int height = 1080;
+ int i = 0;
+ const uint8_t kPixFiller = 128;
+ YV12_BUFFER_CONFIG lbd_src, lbd_dst;
+ YV12_BUFFER_CONFIG hbd_src, hbd_dst;
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ double lbd_db, hbd_db;
+
+ memset(&lbd_src, 0, sizeof(lbd_src));
+ memset(&lbd_dst, 0, sizeof(lbd_dst));
+ memset(&hbd_src, 0, sizeof(hbd_src));
+ memset(&hbd_dst, 0, sizeof(hbd_dst));
+
+ vpx_alloc_frame_buffer(&lbd_src, width, height, 1, 1, 0, 32, 16);
+ vpx_alloc_frame_buffer(&lbd_dst, width, height, 1, 1, 0, 32, 16);
+ vpx_alloc_frame_buffer(&hbd_src, width, height, 1, 1, 1, 32, 16);
+ vpx_alloc_frame_buffer(&hbd_dst, width, height, 1, 1, 1, 32, 16);
+
+ memset(lbd_src.buffer_alloc, kPixFiller, lbd_src.buffer_alloc_sz);
+ while (i < lbd_src.buffer_alloc_sz) {
+ uint16_t spel, dpel;
+ spel = lbd_src.buffer_alloc[i];
+ // Create some distortion for dst buffer.
+ dpel = rnd.Rand8();
+ lbd_dst.buffer_alloc[i] = (uint8_t)dpel;
+ ((uint16_t*)(hbd_src.buffer_alloc))[i] = spel << (bit_depth_ - 8);
+ ((uint16_t*)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8);
+ i++;
+ }
+
+ lbd_db = lbd_metric_(&lbd_src, &lbd_dst);
+ hbd_db = hbd_metric_(&hbd_src, &hbd_dst, bit_depth_);
+ EXPECT_LE(fabs(lbd_db - hbd_db), threshold_);
+
+ i = 0;
+ while (i < lbd_src.buffer_alloc_sz) {
+ uint16_t dpel;
+ // Create some small distortion for dst buffer.
+ dpel = 120 + (rnd.Rand8() >> 4);
+ lbd_dst.buffer_alloc[i] = (uint8_t)dpel;
+ ((uint16_t*)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8);
+ i++;
+ }
+
+ lbd_db = lbd_metric_(&lbd_src, &lbd_dst);
+ hbd_db = hbd_metric_(&hbd_src, &hbd_dst, bit_depth_);
+ EXPECT_LE(fabs(lbd_db - hbd_db), threshold_);
+
+ i = 0;
+ while (i < lbd_src.buffer_alloc_sz) {
+ uint16_t dpel;
+ // Create some small distortion for dst buffer.
+ dpel = 126 + (rnd.Rand8() >> 6);
+ lbd_dst.buffer_alloc[i] = (uint8_t)dpel;
+ ((uint16_t*)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8);
+ i++;
+ }
+
+ lbd_db = lbd_metric_(&lbd_src, &lbd_dst);
+ hbd_db = hbd_metric_(&hbd_src, &hbd_dst, bit_depth_);
+ EXPECT_LE(fabs(lbd_db - hbd_db), threshold_);
+
+ vpx_free_frame_buffer(&lbd_src);
+ vpx_free_frame_buffer(&lbd_dst);
+ vpx_free_frame_buffer(&hbd_src);
+ vpx_free_frame_buffer(&hbd_dst);
+ }
+
+ int bit_depth_;
+ double threshold_;
+ LBDMetricFunc lbd_metric_;
+ HBDMetricFunc hbd_metric_;
+};
+
+typedef std::tr1::tuple<LBDMetricFunc,
+ HBDMetricFunc, int, double> MetricTestTParam;
+class HBDMetricsTest
+ : public HBDMetricsTestBase,
+ public ::testing::TestWithParam<MetricTestTParam> {
+ public:
+ virtual void SetUp() {
+ lbd_metric_ = GET_PARAM(0);
+ hbd_metric_ = GET_PARAM(1);
+ bit_depth_ = GET_PARAM(2);
+ threshold_ = GET_PARAM(3);
+ }
+ virtual void TearDown() {}
+};
+
+TEST_P(HBDMetricsTest, RunAccuracyCheck) {
+ RunAccuracyCheck();
+}
+
+// Allow small variation due to floating point operations.
+static const double kSsim_thresh = 0.001;
+// Allow some additional errors accumulated in floating point operations.
+static const double kFSsim_thresh = 0.03;
+// Allow some extra variation due to rounding error accumulated in dct.
+static const double kPhvs_thresh = 0.3;
+
+INSTANTIATE_TEST_CASE_P(
+ VPXSSIM, HBDMetricsTest,
+ ::testing::Values(
+ MetricTestTParam(&compute_vpxssim, &compute_hbd_vpxssim, 10,
+ kSsim_thresh),
+ MetricTestTParam(&compute_vpxssim, &compute_hbd_vpxssim, 12,
+ kSsim_thresh)));
+INSTANTIATE_TEST_CASE_P(
+ FASTSSIM, HBDMetricsTest,
+ ::testing::Values(
+ MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, 10,
+ kFSsim_thresh),
+ MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, 12,
+ kFSsim_thresh)));
+INSTANTIATE_TEST_CASE_P(
+ PSNRHVS, HBDMetricsTest,
+ ::testing::Values(
+ MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs, 10,
+ kPhvs_thresh),
+ MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs, 12,
+ kPhvs_thresh)));
+
+} // namespace
+
diff --git a/test/test.mk b/test/test.mk
index e8e8304..7926cae 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -166,16 +166,24 @@
LIBVPX_TEST_SRCS-yes += vp10_inv_txfm_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_ANS) += vp10_ans_test.cc
endif # VP10
## Multi-codec / unconditional whitebox tests.
-
ifeq ($(findstring yes,$(CONFIG_VP9_ENCODER)$(CONFIG_VP10_ENCODER)),yes)
LIBVPX_TEST_SRCS-yes += avg_test.cc
endif
-
+ifeq ($(CONFIG_INTERNAL_STATS),yes)
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_HIGHBITDEPTH) += hbd_metrics_test.cc
+endif
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_txfm_test.h
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm1d_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm1d_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm2d_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm2d_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_convolve_test.cc
TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc
TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c
diff --git a/test/vp10_ans_test.cc b/test/vp10_ans_test.cc
new file mode 100644
index 0000000..441583a
--- /dev/null
+++ b/test/vp10_ans_test.cc
@@ -0,0 +1,337 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <ctime>
+#include <utility>
+#include <vector>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "vp10/common/ans.h"
+#include "vp10/encoder/treewriter.h"
+#include "vpx_dsp/bitreader.h"
+#include "vpx_dsp/bitwriter.h"
+
+namespace {
+typedef std::vector<std::pair<uint8_t, bool> > PvVec;
+
+PvVec abs_encode_build_vals(int iters) {
+ PvVec ret;
+ libvpx_test::ACMRandom gen(0x30317076);
+ double entropy = 0;
+ for (int i = 0; i < iters; ++i) {
+ uint8_t p;
+ do {
+ p = gen.Rand8();
+ } while (p == 0); // zero is not a valid coding probability
+ bool b = gen.Rand8() < p;
+ ret.push_back(std::make_pair(static_cast<uint8_t>(p), b));
+ double d = p / 256.;
+ entropy += -d * log2(d) - (1 - d) * log2(1 - d);
+ }
+ printf("entropy %f\n", entropy);
+ return ret;
+}
+
+bool check_rabs(const PvVec &pv_vec, uint8_t *buf) {
+ AnsCoder a;
+ ans_write_init(&a, buf);
+
+ std::clock_t start = std::clock();
+ for (PvVec::const_reverse_iterator it = pv_vec.rbegin(); it != pv_vec.rend();
+ ++it) {
+ rabs_write(&a, it->second, 256 - it->first);
+ }
+ std::clock_t enc_time = std::clock() - start;
+ int offset = ans_write_end(&a);
+ bool okay = true;
+ AnsDecoder d;
+ if (ans_read_init(&d, buf, offset)) return false;
+ start = std::clock();
+ for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
+ okay &= rabs_read(&d, 256 - it->first) == it->second;
+ }
+ std::clock_t dec_time = std::clock() - start;
+ if (!okay) return false;
+ printf("rABS size %d enc_time %f dec_time %f\n", offset,
+ static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+ static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+ return ans_read_end(&d);
+}
+
+bool check_rabs_asc(const PvVec &pv_vec, uint8_t *buf) {
+ AnsCoder a;
+ ans_write_init(&a, buf);
+
+ std::clock_t start = std::clock();
+ for (PvVec::const_reverse_iterator it = pv_vec.rbegin(); it != pv_vec.rend();
+ ++it) {
+ rabs_asc_write(&a, it->second, 256 - it->first);
+ }
+ std::clock_t enc_time = std::clock() - start;
+ int offset = ans_write_end(&a);
+ bool okay = true;
+ AnsDecoder d;
+ if (ans_read_init(&d, buf, offset)) return false;
+ start = std::clock();
+ for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
+ okay &= rabs_asc_read(&d, 256 - it->first) == it->second;
+ }
+ std::clock_t dec_time = std::clock() - start;
+ if (!okay) return false;
+ printf("rABS (asc) size %d enc_time %f dec_time %f\n", offset,
+ static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+ static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+ return ans_read_end(&d);
+}
+
+bool check_uabs(const PvVec &pv_vec, uint8_t *buf) {
+ AnsCoder a;
+ ans_write_init(&a, buf);
+
+ std::clock_t start = std::clock();
+ for (PvVec::const_reverse_iterator it = pv_vec.rbegin(); it != pv_vec.rend();
+ ++it) {
+ uabs_write(&a, it->second, 256 - it->first);
+ }
+ std::clock_t enc_time = std::clock() - start;
+ int offset = ans_write_end(&a);
+ bool okay = true;
+ AnsDecoder d;
+ if (ans_read_init(&d, buf, offset)) return false;
+ start = std::clock();
+ for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
+ okay &= uabs_read(&d, 256 - it->first) == it->second;
+ }
+ std::clock_t dec_time = std::clock() - start;
+ if (!okay) return false;
+ printf("uABS size %d enc_time %f dec_time %f\n", offset,
+ static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+ static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+ return ans_read_end(&d);
+}
+
+bool check_vpxbool(const PvVec &pv_vec, uint8_t *buf) {
+ vpx_writer w;
+ vpx_reader r;
+ vpx_start_encode(&w, buf);
+
+ std::clock_t start = std::clock();
+ for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
+ vpx_write(&w, it->second, 256 - it->first);
+ }
+ std::clock_t enc_time = std::clock() - start;
+ vpx_stop_encode(&w);
+ bool okay = true;
+ vpx_reader_init(&r, buf, w.pos, NULL, NULL);
+ start = std::clock();
+ for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
+ okay &= vpx_read(&r, 256 - it->first) == it->second;
+ }
+ std::clock_t dec_time = std::clock() - start;
+ printf("VPX size %d enc_time %f dec_time %f\n", w.pos,
+ static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+ static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+ return okay;
+}
+
+const rans_sym rans_sym_tab[] = {
+ {70, 186}, {70, 116}, {100, 16}, {16, 0},
+};
+const int kDistinctSyms = sizeof(rans_sym_tab) / sizeof(rans_sym_tab[0]);
+
+std::vector<int> ans_encode_build_vals(const rans_sym *tab, int iters) {
+ std::vector<int> p_to_sym;
+ int i = 0;
+ while (p_to_sym.size() < 256) {
+ p_to_sym.insert(p_to_sym.end(), tab[i].prob, i);
+ ++i;
+ }
+ assert(p_to_sym.size() == 256);
+ std::vector<int> ret;
+ libvpx_test::ACMRandom gen(18543637);
+ for (int i = 0; i < iters; ++i) {
+ int sym = p_to_sym[gen.Rand8()];
+ ret.push_back(sym);
+ }
+ return ret;
+}
+
+void rans_build_dec_tab(const struct rans_sym sym_tab[],
+ rans_dec_lut dec_tab) {
+ int val = 0;
+ int i;
+ for (i = ans_p8_precision - 1; i >= 0; --i) {
+ dec_tab[i].val = val;
+ dec_tab[i].prob = sym_tab[val].prob;
+ dec_tab[i].cum_prob = sym_tab[val].cum_prob;
+ if (i == sym_tab[val].cum_prob) ++val;
+ }
+}
+
+bool check_rans(const std::vector<int> &sym_vec, const rans_sym *const tab,
+ uint8_t *buf) {
+ AnsCoder a;
+ ans_write_init(&a, buf);
+ rans_dec_lut dec_tab;
+ rans_build_dec_tab(tab, dec_tab);
+
+ std::clock_t start = std::clock();
+ for (std::vector<int>::const_reverse_iterator it = sym_vec.rbegin();
+ it != sym_vec.rend(); ++it) {
+ rans_write(&a, &tab[*it]);
+ }
+ std::clock_t enc_time = std::clock() - start;
+ int offset = ans_write_end(&a);
+ bool okay = true;
+ AnsDecoder d;
+ if (ans_read_init(&d, buf, offset)) return false;
+ start = std::clock();
+ for (std::vector<int>::const_iterator it = sym_vec.begin();
+ it != sym_vec.end(); ++it) {
+ okay &= rans_read(&d, dec_tab) == *it;
+ }
+ std::clock_t dec_time = std::clock() - start;
+ if (!okay) return false;
+ printf("rANS size %d enc_time %f dec_time %f\n", offset,
+ static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+ static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+ return ans_read_end(&d);
+}
+
+void build_tree(vpx_tree_index *tree, int num_syms) {
+ vpx_tree_index i;
+ int sym = 0;
+ for (i = 0; i < num_syms - 1; ++i) {
+ tree[2 * i] = sym--;
+ tree[2 * i + 1] = 2 * (i + 1);
+ }
+ tree[2 * i - 1] = sym;
+}
+
+// treep are the probabilites of tree nodes like:
+// *
+// / \
+// -sym0 *
+// / \
+// -sym1 *
+// / \
+// -sym2 -sym3
+void tab2tree(const rans_sym *tab, int tab_size, vpx_prob *treep) {
+ const unsigned basep = 256;
+ unsigned pleft = basep;
+ for (int i = 0; i < tab_size - 1; ++i) {
+ unsigned prob = (tab[i].prob * basep + (basep / 2)) / pleft;
+ assert(prob > 0 && prob < 256);
+ treep[i] = prob;
+ pleft -= tab[i].prob;
+ }
+}
+
+struct sym_bools {
+ unsigned bits;
+ int len;
+};
+
+static void make_tree_bits_tab(sym_bools *tab, int num_syms) {
+ unsigned bits = 0;
+ int len = 0;
+ int i;
+ for (i = 0; i < num_syms - 1; ++i) {
+ bits *= 2;
+ ++len;
+ tab[i].bits = bits;
+ tab[i].len = len;
+ ++bits;
+ }
+ tab[i].bits = bits;
+ tab[i].len = len;
+}
+
+void build_tpb(vpx_prob probs[/*num_syms*/],
+ vpx_tree_index tree[/*2*num_syms*/],
+ sym_bools bit_len[/*num_syms*/],
+ const rans_sym sym_tab[/*num_syms*/], int num_syms) {
+ tab2tree(sym_tab, num_syms, probs);
+ build_tree(tree, num_syms);
+ make_tree_bits_tab(bit_len, num_syms);
+}
+
+bool check_vpxtree(const std::vector<int> &sym_vec, const rans_sym *sym_tab,
+ uint8_t *buf) {
+ vpx_writer w;
+ vpx_reader r;
+ vpx_start_encode(&w, buf);
+
+ vpx_prob probs[kDistinctSyms];
+ vpx_tree_index tree[2 * kDistinctSyms];
+ sym_bools bit_len[kDistinctSyms];
+ build_tpb(probs, tree, bit_len, sym_tab, kDistinctSyms);
+
+ std::clock_t start = std::clock();
+ for (std::vector<int>::const_iterator it = sym_vec.begin();
+ it != sym_vec.end(); ++it) {
+ vp10_write_tree(&w, tree, probs, bit_len[*it].bits, bit_len[*it].len, 0);
+ }
+ std::clock_t enc_time = std::clock() - start;
+ vpx_stop_encode(&w);
+ vpx_reader_init(&r, buf, w.pos, NULL, NULL);
+ start = std::clock();
+ for (std::vector<int>::const_iterator it = sym_vec.begin();
+ it != sym_vec.end(); ++it) {
+ if (vpx_read_tree(&r, tree, probs) != *it) return false;
+ }
+ std::clock_t dec_time = std::clock() - start;
+ printf("VPXtree size %u enc_time %f dec_time %f\n", w.pos,
+ static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+ static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+ return true;
+}
+
+class Vp10AbsTest : public ::testing::Test {
+ protected:
+ static void SetUpTestCase() { pv_vec_ = abs_encode_build_vals(kNumBools); }
+ virtual void SetUp() { buf_ = new uint8_t[kNumBools / 8]; }
+ virtual void TearDown() { delete[] buf_; }
+ static const int kNumBools = 100000000;
+ static PvVec pv_vec_;
+ uint8_t *buf_;
+};
+PvVec Vp10AbsTest::pv_vec_;
+
+class Vp10AnsTest : public ::testing::Test {
+ protected:
+ static void SetUpTestCase() {
+ sym_vec_ = ans_encode_build_vals(rans_sym_tab, kNumSyms);
+ }
+ virtual void SetUp() { buf_ = new uint8_t[kNumSyms / 2]; }
+ virtual void TearDown() { delete[] buf_; }
+ static const int kNumSyms = 25000000;
+ static std::vector<int> sym_vec_;
+ uint8_t *buf_;
+};
+std::vector<int> Vp10AnsTest::sym_vec_;
+
+TEST_F(Vp10AbsTest, Vpxbool) { EXPECT_TRUE(check_vpxbool(pv_vec_, buf_)); }
+TEST_F(Vp10AbsTest, Rabs) { EXPECT_TRUE(check_rabs(pv_vec_, buf_)); }
+TEST_F(Vp10AbsTest, RabsAsc) { EXPECT_TRUE(check_rabs_asc(pv_vec_, buf_)); }
+TEST_F(Vp10AbsTest, Uabs) { EXPECT_TRUE(check_uabs(pv_vec_, buf_)); }
+
+TEST_F(Vp10AnsTest, Rans) {
+ EXPECT_TRUE(check_rans(sym_vec_, rans_sym_tab, buf_));
+}
+TEST_F(Vp10AnsTest, Vpxtree) {
+ EXPECT_TRUE(check_vpxtree(sym_vec_, rans_sym_tab, buf_));
+}
+} // namespace
diff --git a/test/vp10_convolve_test.cc b/test/vp10_convolve_test.cc
new file mode 100644
index 0000000..eea7068
--- /dev/null
+++ b/test/vp10_convolve_test.cc
@@ -0,0 +1,250 @@
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "vp10/common/filter.h"
+#include "vp10/common/vp10_convolve.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+TEST(VP10ConvolveTest, vp10_convolve8) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ INTERP_FILTER interp_filter = EIGHTTAP;
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ ptrdiff_t filter_size = filter_params.tap;
+ int filter_center = filter_size / 2 - 1;
+ uint8_t src[12 * 12];
+ int src_stride = filter_size;
+ uint8_t dst[1] = {0};
+ uint8_t dst1[1] = {0};
+ int dst_stride = 1;
+ int x_step_q4 = 16;
+ int y_step_q4 = 16;
+ int subpel_x_q4 = 3;
+ int subpel_y_q4 = 2;
+ int avg = 0;
+
+ int w = 1;
+ int h = 1;
+
+ for (int i = 0; i < filter_size * filter_size; i++) {
+ src[i] = rnd.Rand16() % (1 << 8);
+ }
+
+ vp10_convolve(src + src_stride * filter_center + filter_center, src_stride,
+ dst, dst_stride, w, h, filter_params, subpel_x_q4, x_step_q4,
+ subpel_y_q4, y_step_q4, avg);
+
+ const int16_t* x_filter =
+ vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
+ const int16_t* y_filter =
+ vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
+
+ vpx_convolve8_c(src + src_stride * filter_center + filter_center, src_stride,
+ dst1, dst_stride, x_filter, 16, y_filter, 16, w, h);
+ EXPECT_EQ(dst[0], dst1[0]);
+}
+TEST(VP10ConvolveTest, vp10_convolve) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ INTERP_FILTER interp_filter = EIGHTTAP;
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ ptrdiff_t filter_size = filter_params.tap;
+ int filter_center = filter_size / 2 - 1;
+ uint8_t src[12 * 12];
+ int src_stride = filter_size;
+ uint8_t dst[1] = {0};
+ int dst_stride = 1;
+ int x_step_q4 = 16;
+ int y_step_q4 = 16;
+ int subpel_x_q4 = 3;
+ int subpel_y_q4 = 2;
+ int avg = 0;
+
+ int w = 1;
+ int h = 1;
+
+ for (int i = 0; i < filter_size * filter_size; i++) {
+ src[i] = rnd.Rand16() % (1 << 8);
+ }
+
+ vp10_convolve(src + src_stride * filter_center + filter_center, src_stride,
+ dst, dst_stride, w, h, filter_params, subpel_x_q4, x_step_q4,
+ subpel_y_q4, y_step_q4, avg);
+
+ const int16_t* x_filter =
+ vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
+ const int16_t* y_filter =
+ vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
+
+ int temp[12];
+ int dst_ref = 0;
+ for (int r = 0; r < filter_size; r++) {
+ temp[r] = 0;
+ for (int c = 0; c < filter_size; c++) {
+ temp[r] += x_filter[c] * src[r * filter_size + c];
+ }
+ temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
+ dst_ref += temp[r] * y_filter[r];
+ }
+ dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
+ EXPECT_EQ(dst[0], dst_ref);
+}
+
+TEST(VP10ConvolveTest, vp10_convolve_avg) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ INTERP_FILTER interp_filter = EIGHTTAP;
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ ptrdiff_t filter_size = filter_params.tap;
+ int filter_center = filter_size / 2 - 1;
+ uint8_t src0[12 * 12];
+ uint8_t src1[12 * 12];
+ int src_stride = filter_size;
+ uint8_t dst0[1] = {0};
+ uint8_t dst1[1] = {0};
+ uint8_t dst[1] = {0};
+ int dst_stride = 1;
+ int x_step_q4 = 16;
+ int y_step_q4 = 16;
+ int subpel_x_q4 = 3;
+ int subpel_y_q4 = 2;
+ int avg = 0;
+
+ int w = 1;
+ int h = 1;
+
+ for (int i = 0; i < filter_size * filter_size; i++) {
+ src0[i] = rnd.Rand16() % (1 << 8);
+ src1[i] = rnd.Rand16() % (1 << 8);
+ }
+
+ int offset = filter_size * filter_center + filter_center;
+
+ avg = 0;
+ vp10_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
+ avg);
+ avg = 0;
+ vp10_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
+ avg);
+
+ avg = 0;
+ vp10_convolve(src0 + offset, src_stride, dst, dst_stride, w, h, filter_params,
+ subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
+ avg = 1;
+ vp10_convolve(src1 + offset, src_stride, dst, dst_stride, w, h, filter_params,
+ subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
+
+ EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+TEST(VP10ConvolveTest, vp10_highbd_convolve) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ INTERP_FILTER interp_filter = EIGHTTAP;
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ ptrdiff_t filter_size = filter_params.tap;
+ int filter_center = filter_size / 2 - 1;
+ uint16_t src[12 * 12];
+ int src_stride = filter_size;
+ uint16_t dst[1] = {0};
+ int dst_stride = 1;
+ int x_step_q4 = 16;
+ int y_step_q4 = 16;
+ int subpel_x_q4 = 8;
+ int subpel_y_q4 = 6;
+ int avg = 0;
+ int bd = 10;
+
+ int w = 1;
+ int h = 1;
+
+ for (int i = 0; i < filter_size * filter_size; i++) {
+ src[i] = rnd.Rand16() % (1 << bd);
+ }
+
+ vp10_highbd_convolve(
+ CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
+ src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
+ subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
+
+ const int16_t* x_filter =
+ vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
+ const int16_t* y_filter =
+ vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
+
+ int temp[12];
+ int dst_ref = 0;
+ for (int r = 0; r < filter_size; r++) {
+ temp[r] = 0;
+ for (int c = 0; c < filter_size; c++) {
+ temp[r] += x_filter[c] * src[r * filter_size + c];
+ }
+ temp[r] = clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
+ dst_ref += temp[r] * y_filter[r];
+ }
+ dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
+ EXPECT_EQ(dst[0], dst_ref);
+}
+
+TEST(VP10ConvolveTest, vp10_highbd_convolve_avg) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ INTERP_FILTER interp_filter = EIGHTTAP;
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ ptrdiff_t filter_size = filter_params.tap;
+ int filter_center = filter_size / 2 - 1;
+ uint16_t src0[12 * 12];
+ uint16_t src1[12 * 12];
+ int src_stride = filter_size;
+ uint16_t dst0[1] = {0};
+ uint16_t dst1[1] = {0};
+ uint16_t dst[1] = {0};
+ int dst_stride = 1;
+ int x_step_q4 = 16;
+ int y_step_q4 = 16;
+ int subpel_x_q4 = 3;
+ int subpel_y_q4 = 2;
+ int avg = 0;
+ int bd = 10;
+
+ int w = 1;
+ int h = 1;
+
+ for (int i = 0; i < filter_size * filter_size; i++) {
+ src0[i] = rnd.Rand16() % (1 << bd);
+ src1[i] = rnd.Rand16() % (1 << bd);
+ }
+
+ int offset = filter_size * filter_center + filter_center;
+
+ avg = 0;
+ vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
+ CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+ y_step_q4, avg, bd);
+ avg = 0;
+ vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
+ CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+ y_step_q4, avg, bd);
+
+ avg = 0;
+ vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
+ CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
+ subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
+ avg = 1;
+ vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
+ CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
+ subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
+
+ EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+} // namespace
diff --git a/test/vp10_fwd_txfm1d_test.cc b/test/vp10_fwd_txfm1d_test.cc
new file mode 100644
index 0000000..a39e0ef
--- /dev/null
+++ b/test/vp10_fwd_txfm1d_test.cc
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "test/vp10_txfm_test.h"
+#include "vp10/common/vp10_fwd_txfm1d.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+static int txfm_type_num = 2;
+static TYPE_TXFM txfm_type_ls[2] = {TYPE_DCT, TYPE_ADST};
+
+static int txfm_size_num = 4;
+static int txfm_size_ls[4] = {4, 8, 16, 32};
+
+static TxfmFunc fwd_txfm_func_ls[2][4] = {
+ {vp10_fdct4_new, vp10_fdct8_new, vp10_fdct16_new, vp10_fdct32_new},
+ {vp10_fadst4_new, vp10_fadst8_new, vp10_fadst16_new, vp10_fadst32_new}};
+
+// the maximum stage number of fwd/inv 1d dct/adst txfm is 12
+static int8_t cos_bit[12] = {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14};
+static int8_t range_bit[12] = {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32};
+
+TEST(vp10_fwd_txfm1d, round_shift) {
+ EXPECT_EQ(round_shift(7, 1), 3);
+ EXPECT_EQ(round_shift(-7, 1), -3);
+
+ EXPECT_EQ(round_shift(7, 2), 2);
+ EXPECT_EQ(round_shift(-7, 2), -2);
+
+ EXPECT_EQ(round_shift(8, 2), 2);
+ EXPECT_EQ(round_shift(-8, 2), -2);
+}
+
+TEST(vp10_fwd_txfm1d, get_max_bit) {
+ int max_bit = get_max_bit(8);
+ EXPECT_EQ(max_bit, 3);
+}
+
+TEST(vp10_fwd_txfm1d, half_btf) {
+ int32_t max = (1 << 15) - 1;
+ int32_t w0 = max;
+ int32_t in0 = max;
+ int32_t w1 = max;
+ int32_t in1 = max;
+ int32_t result_32 = half_btf(w0, in0, w1, in1, 0);
+ int64_t result_64 = (int64_t)w0 * (int64_t)in0 + (int64_t)w1 * (int64_t)in1;
+ EXPECT_EQ(result_32, result_64);
+}
+
+TEST(vp10_fwd_txfm1d, cospi_arr) {
+ for (int i = 0; i < 7; i++) {
+ for (int j = 0; j < 64; j++) {
+ EXPECT_EQ(cospi_arr[i][j],
+ (int32_t)round(cos(M_PI * j / 128) * (1 << (cos_bit_min + i))));
+ }
+ }
+}
+
+TEST(vp10_fwd_txfm1d, clamp_block) {
+ int16_t block[5][5] = {{7, -5, 6, -3, 9},
+ {7, -5, 6, -3, 9},
+ {7, -5, 6, -3, 9},
+ {7, -5, 6, -3, 9},
+ {7, -5, 6, -3, 9}};
+
+ int16_t ref_block[5][5] = {{7, -5, 6, -3, 9},
+ {7, -5, 6, -3, 9},
+ {7, -4, 2, -3, 9},
+ {7, -4, 2, -3, 9},
+ {7, -4, 2, -3, 9}};
+
+ int row = 2;
+ int col = 1;
+ int block_size = 3;
+ int stride = 5;
+ clamp_block(block[row] + col, block_size, stride, -4, 2);
+ for (int r = 0; r < stride; r++) {
+ for (int c = 0; c < stride; c++) {
+ EXPECT_EQ(block[r][c], ref_block[r][c]);
+ }
+ }
+}
+
+TEST(vp10_fwd_txfm1d, accuracy) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ for (int si = 0; si < txfm_size_num; ++si) {
+ int txfm_size = txfm_size_ls[si];
+ int32_t *input = new int32_t[txfm_size];
+ int32_t *output = new int32_t[txfm_size];
+ double *ref_input = new double[txfm_size];
+ double *ref_output = new double[txfm_size];
+
+ for (int ti = 0; ti < txfm_type_num; ++ti) {
+ TYPE_TXFM txfm_type = txfm_type_ls[ti];
+ TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[ti][si];
+ int max_error = 7;
+
+ const int count_test_block = 5000;
+ for (int ti = 0; ti < count_test_block; ++ti) {
+ for (int ni = 0; ni < txfm_size; ++ni) {
+ input[ni] = rnd.Rand16() % base - rnd.Rand16() % base;
+ ref_input[ni] = static_cast<double>(input[ni]);
+ }
+
+ fwd_txfm_func(input, output, cos_bit, range_bit);
+ reference_hybrid_1d(ref_input, ref_output, txfm_size, txfm_type);
+
+ for (int ni = 0; ni < txfm_size; ++ni) {
+ EXPECT_LE(
+ abs(output[ni] - static_cast<int32_t>(round(ref_output[ni]))),
+ max_error);
+ }
+ }
+ }
+
+ delete[] input;
+ delete[] output;
+ delete[] ref_input;
+ delete[] ref_output;
+ }
+}
+} // namespace
diff --git a/test/vp10_fwd_txfm2d_test.cc b/test/vp10_fwd_txfm2d_test.cc
new file mode 100644
index 0000000..e6416cc
--- /dev/null
+++ b/test/vp10_fwd_txfm2d_test.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "test/vp10_txfm_test.h"
+#include "vp10/common/vp10_fwd_txfm2d.h"
+#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+
+const int txfm_size_num = 4;
+const int txfm_size_ls[4] = {4, 8, 16, 32};
+const TXFM_2D_CFG fwd_txfm_cfg_ls[4][4] = {
+ {fwd_txfm_2d_cfg_dct_dct_4, fwd_txfm_2d_cfg_dct_adst_4,
+ fwd_txfm_2d_cfg_adst_adst_4, fwd_txfm_2d_cfg_adst_dct_4},
+ {fwd_txfm_2d_cfg_dct_dct_8, fwd_txfm_2d_cfg_dct_adst_8,
+ fwd_txfm_2d_cfg_adst_adst_8, fwd_txfm_2d_cfg_adst_dct_8},
+ {fwd_txfm_2d_cfg_dct_dct_16, fwd_txfm_2d_cfg_dct_adst_16,
+ fwd_txfm_2d_cfg_adst_adst_16, fwd_txfm_2d_cfg_adst_dct_16},
+ {fwd_txfm_2d_cfg_dct_dct_32, fwd_txfm_2d_cfg_dct_adst_32,
+ fwd_txfm_2d_cfg_adst_adst_32, fwd_txfm_2d_cfg_adst_dct_32}};
+
+const Fwd_Txfm2d_Func fwd_txfm_func_ls[4] = {
+ vp10_fwd_txfm2d_4x4, vp10_fwd_txfm2d_8x8, vp10_fwd_txfm2d_16x16,
+ vp10_fwd_txfm2d_32x32};
+
+const int txfm_type_num = 4;
+const TYPE_TXFM type_ls_0[4] = {TYPE_DCT, TYPE_DCT, TYPE_ADST, TYPE_ADST};
+const TYPE_TXFM type_ls_1[4] = {TYPE_DCT, TYPE_ADST, TYPE_ADST, TYPE_DCT};
+
+TEST(vp10_fwd_txfm2d, accuracy) {
+ for (int txfm_size_idx = 0; txfm_size_idx < txfm_size_num; ++txfm_size_idx) {
+ int txfm_size = txfm_size_ls[txfm_size_idx];
+ int sqr_txfm_size = txfm_size * txfm_size;
+ int16_t* input = new int16_t[sqr_txfm_size];
+ int32_t* output = new int32_t[sqr_txfm_size];
+ double* ref_input = new double[sqr_txfm_size];
+ double* ref_output = new double[sqr_txfm_size];
+
+ for (int txfm_type_idx = 0; txfm_type_idx < txfm_type_num;
+ ++txfm_type_idx) {
+ TXFM_2D_CFG fwd_txfm_cfg = fwd_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
+ Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx];
+ TYPE_TXFM type0 = type_ls_0[txfm_type_idx];
+ TYPE_TXFM type1 = type_ls_1[txfm_type_idx];
+ int amplify_bit =
+ fwd_txfm_cfg.shift[0] + fwd_txfm_cfg.shift[1] + fwd_txfm_cfg.shift[2];
+ double amplify_factor =
+ amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
+
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int count = 5000;
+ double avg_abs_error = 0;
+ for (int ci = 0; ci < count; ci++) {
+ for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+ input[ni] = rnd.Rand16() % base;
+ ref_input[ni] = static_cast<double>(input[ni]);
+ output[ni] = 0;
+ ref_output[ni] = 0;
+ }
+
+ fwd_txfm_func(input, output, txfm_size, &fwd_txfm_cfg, bd);
+ reference_hybrid_2d(ref_input, ref_output, txfm_size, type0, type1);
+
+ for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+ ref_output[ni] = round(ref_output[ni] * amplify_factor);
+ EXPECT_LE(fabs(output[ni] - ref_output[ni]) / amplify_factor, 30);
+ }
+ avg_abs_error += compute_avg_abs_error<int32_t, double>(
+ output, ref_output, sqr_txfm_size);
+ }
+
+ avg_abs_error /= amplify_factor;
+ avg_abs_error /= count;
+ // max_abs_avg_error comes from upper bound of avg_abs_error
+ // printf("type0: %d type1: %d txfm_size: %d accuracy_avg_abs_error:
+ // %f\n", type0, type1, txfm_size, avg_abs_error);
+ double max_abs_avg_error = 1.5;
+ EXPECT_LE(avg_abs_error, max_abs_avg_error);
+ }
+
+ delete[] input;
+ delete[] output;
+ delete[] ref_input;
+ delete[] ref_output;
+ }
+}
+
+} // anonymous namespace
diff --git a/test/vp10_inv_txfm1d_test.cc b/test/vp10_inv_txfm1d_test.cc
new file mode 100644
index 0000000..3b716c8
--- /dev/null
+++ b/test/vp10_inv_txfm1d_test.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "test/vp10_txfm_test.h"
+#include "vp10/common/vp10_fwd_txfm1d.h"
+#include "vp10/common/vp10_inv_txfm1d.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+static int txfm_type_num = 2;
+static int txfm_size_num = 4;
+static int txfm_size_ls[4] = {4, 8, 16, 32};
+
+static TxfmFunc fwd_txfm_func_ls[2][4] = {
+ {vp10_fdct4_new, vp10_fdct8_new, vp10_fdct16_new, vp10_fdct32_new},
+ {vp10_fadst4_new, vp10_fadst8_new, vp10_fadst16_new, vp10_fadst32_new}};
+
+static TxfmFunc inv_txfm_func_ls[2][4] = {
+ {vp10_idct4_new, vp10_idct8_new, vp10_idct16_new, vp10_idct32_new},
+ {vp10_iadst4_new, vp10_iadst8_new, vp10_iadst16_new, vp10_iadst32_new}};
+
+// the maximum stage number of fwd/inv 1d dct/adst txfm is 12
+static int8_t cos_bit[12] = {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14};
+static int8_t range_bit[12] = {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32};
+
+TEST(vp10_inv_txfm1d, round_trip) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ for (int si = 0; si < txfm_size_num; ++si) {
+ int txfm_size = txfm_size_ls[si];
+ int32_t *input = new int32_t[txfm_size];
+ int32_t *output = new int32_t[txfm_size];
+ int32_t *round_trip_output = new int32_t[txfm_size];
+
+ for (int ti = 0; ti < txfm_type_num; ++ti) {
+ TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[ti][si];
+ TxfmFunc inv_txfm_func = inv_txfm_func_ls[ti][si];
+ int max_error = 2;
+
+ const int count_test_block = 5000;
+ for (int ci = 0; ci < count_test_block; ++ci) {
+ for (int ni = 0; ni < txfm_size; ++ni) {
+ input[ni] = rnd.Rand16() % base - rnd.Rand16() % base;
+ }
+
+ fwd_txfm_func(input, output, cos_bit, range_bit);
+ inv_txfm_func(output, round_trip_output, cos_bit, range_bit);
+
+ for (int ni = 0; ni < txfm_size; ++ni) {
+ EXPECT_LE(abs(input[ni] - round_shift(round_trip_output[ni],
+ get_max_bit(txfm_size) - 1)),
+ max_error);
+ }
+ }
+ }
+ delete[] input;
+ delete[] output;
+ delete[] round_trip_output;
+ }
+}
+
+} // namespace
diff --git a/test/vp10_inv_txfm2d_test.cc b/test/vp10_inv_txfm2d_test.cc
new file mode 100644
index 0000000..603821e
--- /dev/null
+++ b/test/vp10_inv_txfm2d_test.cc
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "test/vp10_txfm_test.h"
+#include "vp10/common/vp10_fwd_txfm2d.h"
+#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
+#include "vp10/common/vp10_inv_txfm2d.h"
+#include "vp10/common/vp10_inv_txfm2d_cfg.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+
+const int txfm_size_num = 4;
+const int txfm_size_ls[4] = {4, 8, 16, 32};
+const TXFM_2D_CFG fwd_txfm_cfg_ls[4][4] = {
+ {fwd_txfm_2d_cfg_dct_dct_4, fwd_txfm_2d_cfg_dct_adst_4,
+ fwd_txfm_2d_cfg_adst_adst_4, fwd_txfm_2d_cfg_adst_dct_4},
+ {fwd_txfm_2d_cfg_dct_dct_8, fwd_txfm_2d_cfg_dct_adst_8,
+ fwd_txfm_2d_cfg_adst_adst_8, fwd_txfm_2d_cfg_adst_dct_8},
+ {fwd_txfm_2d_cfg_dct_dct_16, fwd_txfm_2d_cfg_dct_adst_16,
+ fwd_txfm_2d_cfg_adst_adst_16, fwd_txfm_2d_cfg_adst_dct_16},
+ {fwd_txfm_2d_cfg_dct_dct_32, fwd_txfm_2d_cfg_dct_adst_32,
+ fwd_txfm_2d_cfg_adst_adst_32, fwd_txfm_2d_cfg_adst_dct_32}};
+
+const TXFM_2D_CFG inv_txfm_cfg_ls[4][4] = {
+ {inv_txfm_2d_cfg_dct_dct_4, inv_txfm_2d_cfg_dct_adst_4,
+ inv_txfm_2d_cfg_adst_adst_4, inv_txfm_2d_cfg_adst_dct_4},
+ {inv_txfm_2d_cfg_dct_dct_8, inv_txfm_2d_cfg_dct_adst_8,
+ inv_txfm_2d_cfg_adst_adst_8, inv_txfm_2d_cfg_adst_dct_8},
+ {inv_txfm_2d_cfg_dct_dct_16, inv_txfm_2d_cfg_dct_adst_16,
+ inv_txfm_2d_cfg_adst_adst_16, inv_txfm_2d_cfg_adst_dct_16},
+ {inv_txfm_2d_cfg_dct_dct_32, inv_txfm_2d_cfg_dct_adst_32,
+ inv_txfm_2d_cfg_adst_adst_32, inv_txfm_2d_cfg_adst_dct_32}};
+
+const Fwd_Txfm2d_Func fwd_txfm_func_ls[4] = {
+ vp10_fwd_txfm2d_4x4, vp10_fwd_txfm2d_8x8, vp10_fwd_txfm2d_16x16,
+ vp10_fwd_txfm2d_32x32};
+const Inv_Txfm2d_Func inv_txfm_func_ls[4] = {
+ vp10_inv_txfm2d_add_4x4, vp10_inv_txfm2d_add_8x8, vp10_inv_txfm2d_add_16x16,
+ vp10_inv_txfm2d_add_32x32};
+
+const int txfm_type_num = 4;
+
+TEST(vp10_inv_txfm2d, round_trip) {
+ for (int txfm_size_idx = 0; txfm_size_idx < txfm_size_num; ++txfm_size_idx) {
+ const int txfm_size = txfm_size_ls[txfm_size_idx];
+ const int sqr_txfm_size = txfm_size * txfm_size;
+ int16_t* input = new int16_t[sqr_txfm_size];
+ uint16_t* ref_input = new uint16_t[sqr_txfm_size];
+ int32_t* output = new int32_t[sqr_txfm_size];
+
+ for (int txfm_type_idx = 0; txfm_type_idx < txfm_type_num;
+ ++txfm_type_idx) {
+ const TXFM_2D_CFG fwd_txfm_cfg =
+ fwd_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
+ const TXFM_2D_CFG inv_txfm_cfg =
+ inv_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
+ const Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx];
+ const Inv_Txfm2d_Func inv_txfm_func = inv_txfm_func_ls[txfm_size_idx];
+ const int count = 5000;
+ double avg_abs_error = 0;
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ for (int ci = 0; ci < count; ci++) {
+ for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+ if (ci == 0) {
+ int extreme_input = base - 1;
+ input[ni] = extreme_input; // extreme case
+ ref_input[ni] = 0;
+ } else {
+ input[ni] = rnd.Rand16() % base;
+ ref_input[ni] = 0;
+ }
+ }
+
+ fwd_txfm_func(input, output, txfm_size, &fwd_txfm_cfg, bd);
+ inv_txfm_func(output, ref_input, txfm_size, &inv_txfm_cfg, bd);
+
+ for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+ EXPECT_LE(abs(input[ni] - ref_input[ni]), 2);
+ }
+ avg_abs_error += compute_avg_abs_error<int16_t, uint16_t>(
+ input, ref_input, sqr_txfm_size);
+ }
+
+ avg_abs_error /= count;
+ // max_abs_avg_error comes from upper bound of
+ // printf("txfm_size: %d accuracy_avg_abs_error: %f\n", txfm_size,
+ // avg_abs_error);
+ // TODO(angiebird): this upper bound is from adst_adst_8
+ const double max_abs_avg_error = 0.024;
+ EXPECT_LE(avg_abs_error, max_abs_avg_error);
+ }
+
+ delete[] input;
+ delete[] ref_input;
+ delete[] output;
+ }
+}
+
+} // anonymous namespace
diff --git a/test/vp10_inv_txfm_test.cc b/test/vp10_inv_txfm_test.cc
index c49081e..6c0a3d2 100644
--- a/test/vp10_inv_txfm_test.cc
+++ b/test/vp10_inv_txfm_test.cc
@@ -203,7 +203,7 @@
// quantization with maximum allowed step sizes
test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336;
for (int j = 1; j < last_nonzero_; ++j)
- test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]]
+ test_coef_block1[get_scan(tx_size_, DCT_DCT, 0)->scan[j]]
= (output_ref_block[j] / 1828) * 1828;
}
@@ -265,7 +265,7 @@
max_energy_leftover = 0;
coef = 0;
}
- test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] = coef;
+ test_coef_block1[get_scan(tx_size_, DCT_DCT, 0)->scan[j]] = coef;
}
memcpy(test_coef_block2, test_coef_block1,
diff --git a/test/vp10_txfm_test.h b/test/vp10_txfm_test.h
new file mode 100644
index 0000000..a3a4258
--- /dev/null
+++ b/test/vp10_txfm_test.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_TXFM_TEST_H_
+#define VP10_TXFM_TEST_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef _MSC_VER
+#define _USE_MATH_DEFINES
+#endif
+#include <math.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "vp10/common/vp10_txfm.h"
+
+typedef enum {
+ TYPE_DCT = 0,
+ TYPE_ADST,
+ TYPE_IDCT,
+ TYPE_IADST,
+ TYPE_LAST
+} TYPE_TXFM;
+
+static double invSqrt2 = 1 / pow(2, 0.5);
+
+static void reference_dct_1d(const double* in, double* out, int size) {
+ for (int k = 0; k < size; ++k) {
+ out[k] = 0;
+ for (int n = 0; n < size; ++n) {
+ out[k] += in[n] * cos(M_PI * (2 * n + 1) * k / (2 * size));
+ }
+ if (k == 0) out[k] = out[k] * invSqrt2;
+ }
+}
+
+static void reference_adst_1d(const double* in, double* out, int size) {
+ for (int k = 0; k < size; ++k) {
+ out[k] = 0;
+ for (int n = 0; n < size; ++n) {
+ out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (4 * size));
+ }
+ }
+}
+
+static void reference_hybrid_1d(double* in, double* out, int size, int type) {
+ if (type == TYPE_DCT)
+ reference_dct_1d(in, out, size);
+ else
+ reference_adst_1d(in, out, size);
+}
+
+static INLINE void reference_hybrid_2d(double* in, double* out, int size,
+ int type0, int type1) {
+ double* tempOut = new double[size * size];
+
+ for (int r = 0; r < size; r++) {
+ // out ->tempOut
+ for (int c = 0; c < size; c++) {
+ tempOut[r * size + c] = in[c * size + r];
+ }
+ }
+
+ // dct each row: in -> out
+ for (int r = 0; r < size; r++) {
+ reference_hybrid_1d(tempOut + r * size, out + r * size, size, type0);
+ }
+
+ for (int r = 0; r < size; r++) {
+ // out ->tempOut
+ for (int c = 0; c < size; c++) {
+ tempOut[r * size + c] = out[c * size + r];
+ }
+ }
+
+ for (int r = 0; r < size; r++) {
+ reference_hybrid_1d(tempOut + r * size, out + r * size, size, type1);
+ }
+ delete[] tempOut;
+}
+
+template <typename Type1, typename Type2>
+static double compute_avg_abs_error(const Type1* a, const Type2* b,
+ const int size) {
+ double error = 0;
+ for (int i = 0; i < size; i++) {
+ error += fabs(static_cast<double>(a[i]) - static_cast<double>(b[i]));
+ }
+ error = error / size;
+ return error;
+}
+
+typedef void (*TxfmFunc)(const int32_t* in, int32_t* out, const int8_t* cos_bit,
+ const int8_t* range_bit);
+
+typedef void (*Fwd_Txfm2d_Func)(const int16_t*, int32_t*, const int,
+ const TXFM_2D_CFG*, const int);
+typedef void (*Inv_Txfm2d_Func)(const int32_t*, uint16_t*, const int,
+ const TXFM_2D_CFG*, const int);
+
+static const int bd = 10;
+static const int base = (1 << bd);
+
+#endif // VP10_TXFM_TEST_H_
diff --git a/test/vp9_arf_freq_test.cc b/test/vp9_arf_freq_test.cc
index 89200d4..670529c 100644
--- a/test/vp9_arf_freq_test.cc
+++ b/test/vp9_arf_freq_test.cc
@@ -78,19 +78,19 @@
return !strcmp(dot, ".y4m");
}
-class ArfFreqTest
+class ArfFreqTestLarge
: public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWith3Params<TestVideoParam, \
TestEncodeParam, int> {
protected:
- ArfFreqTest()
+ ArfFreqTestLarge()
: EncoderTest(GET_PARAM(0)),
test_video_param_(GET_PARAM(1)),
test_encode_param_(GET_PARAM(2)),
min_arf_requested_(GET_PARAM(3)) {
}
- virtual ~ArfFreqTest() {}
+ virtual ~ArfFreqTestLarge() {}
virtual void SetUp() {
InitializeConfig();
@@ -190,7 +190,7 @@
int run_of_visible_frames_;
};
-TEST_P(ArfFreqTest, MinArfFreqTest) {
+TEST_P(ArfFreqTestLarge, MinArfFreqTest) {
cfg_.rc_target_bitrate = kBitrate;
cfg_.g_error_resilient = 0;
cfg_.g_profile = test_video_param_.profile;
@@ -225,26 +225,26 @@
}
VP9_INSTANTIATE_TEST_CASE(
- ArfFreqTest,
+ ArfFreqTestLarge,
::testing::ValuesIn(kTestVectors),
::testing::ValuesIn(kEncodeVectors),
::testing::ValuesIn(kMinArfVectors));
#if CONFIG_VP9_HIGHBITDEPTH
-# if CONFIG_VP10_ENCODER
+#if CONFIG_VP10_ENCODER
// TODO(angiebird): 25-29 fail in high bitdepth mode.
INSTANTIATE_TEST_CASE_P(
- DISABLED_VP10, ArfFreqTest,
+ DISABLED_VP10, ArfFreqTestLarge,
::testing::Combine(
::testing::Values(static_cast<const libvpx_test::CodecFactory *>(
&libvpx_test::kVP10)),
::testing::ValuesIn(kTestVectors),
::testing::ValuesIn(kEncodeVectors),
::testing::ValuesIn(kMinArfVectors)));
-# endif // CONFIG_VP10_ENCODER
+#endif // CONFIG_VP10_ENCODER
#else
VP10_INSTANTIATE_TEST_CASE(
- ArfFreqTest,
+ ArfFreqTestLarge,
::testing::ValuesIn(kTestVectors),
::testing::ValuesIn(kEncodeVectors),
::testing::ValuesIn(kMinArfVectors));
diff --git a/test/vp9_ethread_test.cc b/test/vp9_ethread_test.cc
index 63f6dfe..8ac5c33 100644
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -108,7 +108,7 @@
TEST_P(VPxEncoderThreadTest, EncoderResultTest) {
std::vector<std::string> single_thr_md5, multi_thr_md5;
- ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20);
+ ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 18);
cfg_.rc_target_bitrate = 1000;
@@ -138,5 +138,5 @@
VP10_INSTANTIATE_TEST_CASE(
VPxEncoderThreadTest,
::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood),
- ::testing::Range(1, 3));
+ ::testing::Range(1, 2));
} // namespace
diff --git a/vp10/common/alloccommon.c b/vp10/common/alloccommon.c
index 9ca86e5..e14aee7 100644
--- a/vp10/common/alloccommon.c
+++ b/vp10/common/alloccommon.c
@@ -81,6 +81,12 @@
}
}
+#if CONFIG_LOOP_RESTORATION
+void vp10_free_restoration_buffers(VP10_COMMON *cm) {
+ vpx_free_frame_buffer(&cm->tmp_loop_buf);
+}
+#endif // CONFIG_LOOP_RESTORATION
+
void vp10_free_postproc_buffers(VP10_COMMON *cm) {
#if CONFIG_VP9_POSTPROC
vpx_free_frame_buffer(&cm->post_proc_buffer);
@@ -97,6 +103,10 @@
cm->above_context = NULL;
vpx_free(cm->above_seg_context);
cm->above_seg_context = NULL;
+#if CONFIG_VAR_TX
+ vpx_free(cm->above_txfm_context);
+ cm->above_txfm_context = NULL;
+#endif
}
int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) {
@@ -128,6 +138,14 @@
cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc(
mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context));
if (!cm->above_seg_context) goto fail;
+
+#if CONFIG_VAR_TX
+ vpx_free(cm->above_txfm_context);
+ cm->above_txfm_context = (TXFM_CONTEXT *)vpx_calloc(
+ mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_txfm_context));
+ if (!cm->above_txfm_context) goto fail;
+#endif
+
cm->above_context_alloc_cols = cm->mi_cols;
}
diff --git a/vp10/common/alloccommon.h b/vp10/common/alloccommon.h
index 5cfe660..f77833b 100644
--- a/vp10/common/alloccommon.h
+++ b/vp10/common/alloccommon.h
@@ -29,6 +29,9 @@
void vp10_free_ref_frame_buffers(struct BufferPool *pool);
void vp10_free_postproc_buffers(struct VP10Common *cm);
+#if CONFIG_LOOP_RESTORATION
+void vp10_free_restoration_buffers(struct VP10Common *cm);
+#endif // CONFIG_LOOP_RESTORATION
int vp10_alloc_state_buffers(struct VP10Common *cm, int width, int height);
void vp10_free_state_buffers(struct VP10Common *cm);
diff --git a/vp10/common/ans.h b/vp10/common/ans.h
new file mode 100644
index 0000000..a1862f3
--- /dev/null
+++ b/vp10/common/ans.h
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ANS_H_
+#define VP10_COMMON_ANS_H_
+// An implementation of Asymmetric Numeral Systems
+// http://arxiv.org/abs/1311.2540v2
+
+#include <assert.h>
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem_ops.h"
+
+#define ANS_DIVIDE_BY_MULTIPLY 1
+#if ANS_DIVIDE_BY_MULTIPLY
+#include "vp10/common/divide.h"
+#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
+ do { \
+ quotient = fastdiv(dividend, divisor); \
+ remainder = dividend - quotient * divisor; \
+ } while (0)
+#define ANS_DIV(dividend, divisor) \
+ fastdiv(dividend, divisor)
+#else
+#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
+ do { \
+ quotient = dividend / divisor; \
+ remainder = dividend % divisor; \
+ } while (0)
+#define ANS_DIV(dividend, divisor) \
+ ((dividend) / (divisor))
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+struct AnsCoder {
+ uint8_t *buf;
+ int buf_offset;
+ uint32_t state;
+};
+
+struct AnsDecoder {
+ const uint8_t *buf;
+ int buf_offset;
+ uint32_t state;
+};
+
+typedef uint8_t AnsP8;
+#define ans_p8_precision 256u
+#define ans_p8_shift 8
+#define l_base (ans_p8_precision * 4) // l_base % precision must be 0
+#define io_base 256
+// Range I = { l_base, l_base + 1, ..., l_base * io_base - 1 }
+
+static INLINE void ans_write_init(struct AnsCoder *const ans,
+ uint8_t *const buf) {
+ ans->buf = buf;
+ ans->buf_offset = 0;
+ ans->state = l_base;
+}
+
+static INLINE int ans_write_end(struct AnsCoder *const ans) {
+ uint32_t state;
+ assert(ans->state >= l_base);
+ assert(ans->state < l_base * io_base);
+ state = ans->state - l_base;
+ if (state < (1 << 6)) {
+ ans->buf[ans->buf_offset] = (0 << 6) + state;
+ return ans->buf_offset + 1;
+ } else if (state < (1 << 14)) {
+ mem_put_le16(ans->buf + ans->buf_offset, (1 << 14) + state);
+ return ans->buf_offset + 2;
+ } else {
+ mem_put_le24(ans->buf + ans->buf_offset, (1 << 23) + state);
+ return ans->buf_offset + 3;
+ }
+}
+
+// rABS with descending spread
+// p or p0 takes the place of l_s from the paper
+// ans_p8_precision is m
+static INLINE void rabs_desc_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+ const AnsP8 p = ans_p8_precision - p0;
+ const unsigned l_s = val ? p : p0;
+ unsigned quot, rem;
+ if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+ ans->buf[ans->buf_offset++] = ans->state % io_base;
+ ans->state /= io_base;
+ }
+ ANS_DIVREM(quot, rem, ans->state, l_s);
+ ans->state = quot * ans_p8_precision + rem + (val ? 0 : p);
+}
+
+#define ANS_IMPL1 0
+#define UNPREDICTABLE(x) x
+static INLINE int rabs_desc_read(struct AnsDecoder *ans, AnsP8 p0) {
+ int val;
+#if ANS_IMPL1
+ unsigned l_s;
+#else
+ unsigned quot, rem, x, xn;
+#endif
+ const AnsP8 p = ans_p8_precision - p0;
+ if (ans->state < l_base) {
+ ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
+ }
+#if ANS_IMPL1
+ val = ans->state % ans_p8_precision < p;
+ l_s = val ? p : p0;
+ ans->state = (ans->state / ans_p8_precision) * l_s +
+ ans->state % ans_p8_precision - (!val * p);
+#else
+ x = ans->state;
+ quot = x / ans_p8_precision;
+ rem = x % ans_p8_precision;
+ xn = quot * p;
+ val = rem < p;
+ if (UNPREDICTABLE(val)) {
+ ans->state = xn + rem;
+ } else {
+ // ans->state = quot * p0 + rem - p;
+ ans->state = x - xn - p;
+ }
+#endif
+ return val;
+}
+
+// rABS with ascending spread
+// p or p0 takes the place of l_s from the paper
+// ans_p8_precision is m
+static INLINE void rabs_asc_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+ const AnsP8 p = ans_p8_precision - p0;
+ const unsigned l_s = val ? p : p0;
+ unsigned quot, rem;
+ if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+ ans->buf[ans->buf_offset++] = ans->state % io_base;
+ ans->state /= io_base;
+ }
+ ANS_DIVREM(quot, rem, ans->state, l_s);
+ ans->state = quot * ans_p8_precision + rem + (val ? p0 : 0);
+}
+
+static INLINE int rabs_asc_read(struct AnsDecoder *ans, AnsP8 p0) {
+ int val;
+#if ANS_IMPL1
+ unsigned l_s;
+#else
+ unsigned quot, rem, x, xn;
+#endif
+ const AnsP8 p = ans_p8_precision - p0;
+ if (ans->state < l_base) {
+ ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
+ }
+#if ANS_IMPL1
+ val = ans->state % ans_p8_precision < p;
+ l_s = val ? p : p0;
+ ans->state = (ans->state / ans_p8_precision) * l_s +
+ ans->state % ans_p8_precision - (!val * p);
+#else
+ x = ans->state;
+ quot = x / ans_p8_precision;
+ rem = x % ans_p8_precision;
+ xn = quot * p;
+ val = rem >= p0;
+ if (UNPREDICTABLE(val)) {
+ ans->state = xn + rem - p0;
+ } else {
+ // ans->state = quot * p0 + rem - p0;
+ ans->state = x - xn;
+ }
+#endif
+ return val;
+}
+
+#define rabs_read rabs_desc_read
+#define rabs_write rabs_desc_write
+
+// uABS with normalization
+static INLINE void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+ AnsP8 p = ans_p8_precision - p0;
+ const unsigned l_s = val ? p : p0;
+ if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+ ans->buf[ans->buf_offset++] = ans->state % io_base;
+ ans->state /= io_base;
+ }
+ if (!val)
+ ans->state = ANS_DIV(ans->state * ans_p8_precision, p0);
+ else
+ ans->state = ANS_DIV((ans->state + 1) * ans_p8_precision + p - 1, p) - 1;
+}
+
+static INLINE int uabs_read(struct AnsDecoder *ans, AnsP8 p0) {
+ AnsP8 p = ans_p8_precision - p0;
+ int s;
+ // unsigned int xp1;
+ unsigned xp, sp;
+ unsigned state = ans->state;
+ if (state < l_base && ans->buf_offset > 0) {
+ state = state * io_base + ans->buf[--ans->buf_offset];
+ }
+ sp = state * p;
+ // xp1 = (sp + p) / ans_p8_precision;
+ xp = sp / ans_p8_precision;
+ // s = xp1 - xp;
+ s = (sp & 0xFF) >= p0;
+ if (UNPREDICTABLE(s))
+ ans->state = xp;
+ else
+ ans->state = state - xp;
+ return s;
+}
+
+static INLINE int uabs_read_bit(struct AnsDecoder *ans) {
+ int s;
+ unsigned state = ans->state;
+ if (state < l_base && ans->buf_offset > 0) {
+ state = state * io_base + ans->buf[--ans->buf_offset];
+ }
+ s = (int)(state & 1);
+ ans->state = state >> 1;
+ return s;
+}
+
+struct rans_sym {
+ AnsP8 prob;
+ AnsP8 cum_prob; // not-inclusive
+};
+
+struct rans_dec_sym {
+ uint8_t val;
+ AnsP8 prob;
+ AnsP8 cum_prob; // not-inclusive
+};
+
+typedef struct rans_dec_sym rans_dec_lut[ans_p8_precision];
+
+static INLINE void rans_build_dec_tab(const AnsP8 token_probs[],
+ rans_dec_lut dec_tab) {
+ int val = 0;
+ int cum_prob = 0;
+ int sym_end = token_probs[0];
+ int i;
+ for (i = 0; i < 256; ++i) {
+ if (i == sym_end) {
+ ++val;
+ cum_prob = sym_end;
+ sym_end += token_probs[val];
+ }
+ dec_tab[i].val = val;
+ dec_tab[i].prob = token_probs[val];
+ dec_tab[i].cum_prob = cum_prob;
+ }
+}
+
+// rANS with normalization
+// sym->prob takes the place of l_s from the paper
+// ans_p8_precision is m
+static INLINE void rans_write(struct AnsCoder *ans,
+ const struct rans_sym *const sym) {
+ const AnsP8 p = sym->prob;
+ if (ans->state >= l_base / ans_p8_precision * io_base * p) {
+ ans->buf[ans->buf_offset++] = ans->state % io_base;
+ ans->state /= io_base;
+ }
+ ans->state =
+ (ans->state / p) * ans_p8_precision + ans->state % p + sym->cum_prob;
+}
+
+static INLINE int rans_read(struct AnsDecoder *ans,
+ const rans_dec_lut tab) {
+ unsigned rem;
+ unsigned quo;
+ int val;
+ if (ans->state < l_base && ans->buf_offset > 0) {
+ ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
+ }
+ quo = ans->state / ans_p8_precision;
+ rem = ans->state % ans_p8_precision;
+ val = tab[rem].val;
+
+ ans->state = quo * tab[rem].prob + rem - tab[rem].cum_prob;
+ return val;
+}
+
+static INLINE int ans_read_init(struct AnsDecoder *const ans,
+ const uint8_t *const buf,
+ int offset) {
+ unsigned x;
+ if (offset < 1) return 1;
+ ans->buf = buf;
+ x = buf[offset - 1] >> 6;
+ if (x == 0) {
+ ans->buf_offset = offset - 1;
+ ans->state = buf[offset - 1] & 0x3F;
+ } else if (x == 1) {
+ if (offset < 2) return 1;
+ ans->buf_offset = offset - 2;
+ ans->state = mem_get_le16(buf + offset - 2) & 0x3FFF;
+ } else if (x == 2) {
+ if (offset < 3) return 1;
+ ans->buf_offset = offset - 3;
+ ans->state = mem_get_le24(buf + offset - 3) & 0x3FFFFF;
+ } else {
+ // x == 3 implies this byte is a superframe marker
+ return 1;
+ }
+ ans->state += l_base;
+ if (ans->state >= l_base * io_base)
+ return 1;
+ return 0;
+}
+
+static INLINE int ans_read_end(struct AnsDecoder *const ans) {
+ return ans->state == l_base;
+}
+#undef ANS_DIVREM
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+#endif // VP10_COMMON_ANS_H_
diff --git a/vp10/common/blockd.c b/vp10/common/blockd.c
index b6f910f..5f45077 100644
--- a/vp10/common/blockd.c
+++ b/vp10/common/blockd.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <math.h>
#include "vp10/common/blockd.h"
PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi,
@@ -134,3 +135,23 @@
xd->plane[i].subsampling_y = i ? ss_y : 0;
}
}
+
+#if CONFIG_EXT_INTRA
+#define PI 3.14159265
+// Returns whether filter selection is needed for a given
+// intra prediction angle.
+int pick_intra_filter(int angle) {
+ if (angle % 45 == 0)
+ return 0;
+ if (angle > 90 && angle < 180) {
+ return 1;
+ } else {
+ double t = tan(angle * PI / 180.0);
+ double n;
+ if (angle < 90)
+ t = 1 / t;
+ n = floor(t);
+ return (t - n) * 1024 > 1;
+ }
+}
+#endif // CONFIG_EXT_INTRA
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index fce1767..4a3e67c 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -38,10 +38,40 @@
FRAME_TYPES,
} FRAME_TYPE;
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+#define IsInterpolatingFilter(filter) \
+ (vp10_filter_kernels[filter][0][SUBPEL_TAPS / 2 - 1] == 128)
+#else
+#define IsInterpolatingFilter(filter) (1)
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+
+#define MAXTXLEN 32
+
static INLINE int is_inter_mode(PREDICTION_MODE mode) {
+#if CONFIG_EXT_INTER
+ return mode >= NEARESTMV && mode <= NEW_NEWMV;
+#else
return mode >= NEARESTMV && mode <= NEWMV;
+#endif // CONFIG_EXT_INTER
}
+#if CONFIG_EXT_INTER
+static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) {
+ return mode >= NEARESTMV && mode <= NEWFROMNEARMV;
+}
+
+static INLINE int is_inter_compound_mode(PREDICTION_MODE mode) {
+ return mode >= NEAREST_NEARESTMV && mode <= NEW_NEWMV;
+}
+
+static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) {
+ return (mode == NEWMV || mode == NEWFROMNEARMV ||
+ mode == NEW_NEWMV ||
+ mode == NEAREST_NEWMV || mode == NEW_NEARESTMV ||
+ mode == NEAR_NEWMV || mode == NEW_NEARMV);
+}
+#endif // CONFIG_EXT_INTER
+
/* For keyframes, intra block modes are predicted by the (already decoded)
modes for the Y blocks to the left and above us; for interframes, there
is a single probability table. */
@@ -49,6 +79,9 @@
typedef struct {
PREDICTION_MODE as_mode;
int_mv as_mv[2]; // first, second inter predictor motion vectors
+#if CONFIG_EXT_INTER
+ int_mv ref_mv[2];
+#endif // CONFIG_EXT_INTER
} b_mode_info;
// Note that the rate-distortion optimization loop, bit-stream writer, and
@@ -58,34 +91,87 @@
#define NONE -1
#define INTRA_FRAME 0
#define LAST_FRAME 1
+#if CONFIG_EXT_REFS
+#define LAST2_FRAME 2
+#define LAST3_FRAME 3
+#define LAST4_FRAME 4
+#define GOLDEN_FRAME 5
+#define ALTREF_FRAME 6
+#define MAX_REF_FRAMES 7
+#define LAST_REF_FRAMES (LAST4_FRAME - LAST_FRAME + 1)
+#else
#define GOLDEN_FRAME 2
#define ALTREF_FRAME 3
#define MAX_REF_FRAMES 4
+#endif // CONFIG_EXT_REFS
+
typedef int8_t MV_REFERENCE_FRAME;
+#if CONFIG_REF_MV
+#define MODE_CTX_REF_FRAMES (MAX_REF_FRAMES + (ALTREF_FRAME - LAST_FRAME))
+#else
+#define MODE_CTX_REF_FRAMES MAX_REF_FRAMES
+#endif
+
+typedef struct {
+ // Number of base colors for Y (0) and UV (1)
+ uint8_t palette_size[2];
+ // Value of base colors for Y, U, and V
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint16_t palette_colors[3 * PALETTE_MAX_SIZE];
+#else
+ uint8_t palette_colors[3 * PALETTE_MAX_SIZE];
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ // Only used by encoder to store the color index of the top left pixel.
+ // TODO(huisu): move this to encoder
+ uint8_t palette_first_color_idx[2];
+} PALETTE_MODE_INFO;
+
+#if CONFIG_EXT_INTRA
+typedef struct {
+ // 1: an ext intra mode is used; 0: otherwise.
+ uint8_t use_ext_intra_mode[PLANE_TYPES];
+ EXT_INTRA_MODE ext_intra_mode[PLANE_TYPES];
+} EXT_INTRA_MODE_INFO;
+#endif // CONFIG_EXT_INTRA
+
// This structure now relates to 8x8 block regions.
typedef struct {
// Common for both INTER and INTRA blocks
BLOCK_SIZE sb_type;
PREDICTION_MODE mode;
TX_SIZE tx_size;
- int8_t skip;
-#if CONFIG_MISC_FIXES
- int8_t has_no_coeffs;
+#if CONFIG_VAR_TX
+ // TODO(jingning): This effectively assigned 64 entries for each 8x8 block.
+ // Apparently it takes much more space than needed.
+ TX_SIZE inter_tx_size[64];
#endif
+ int8_t skip;
+ int8_t has_no_coeffs;
int8_t segment_id;
int8_t seg_id_predicted; // valid only when temporal_update is enabled
// Only for INTRA blocks
PREDICTION_MODE uv_mode;
+ PALETTE_MODE_INFO palette_mode_info;
// Only for INTER blocks
INTERP_FILTER interp_filter;
MV_REFERENCE_FRAME ref_frame[2];
TX_TYPE tx_type;
- // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead.
+#if CONFIG_EXT_INTRA
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+ int8_t angle_delta[2];
+ // To-Do (huisu): this may be replaced by interp_filter
+ INTRA_FILTER intra_filter;
+#endif // CONFIG_EXT_INTRA
+
int_mv mv[2];
+ int_mv pred_mv[2];
+#if CONFIG_REF_MV
+ uint8_t ref_mv_idx;
+#endif
} MB_MODE_INFO;
typedef struct MODE_INFO {
@@ -122,7 +208,7 @@
int stride;
};
-struct macroblockd_plane {
+typedef struct macroblockd_plane {
tran_low_t *dqcoeff;
PLANE_TYPE plane_type;
int subsampling_x;
@@ -141,7 +227,7 @@
// encoder
const int16_t *dequant;
-};
+} MACROBLOCKD_PLANE;
#define BLOCK_OFFSET(x, i) ((x) + (i) * 16)
@@ -172,6 +258,8 @@
int up_available;
int left_available;
+ const vpx_prob (*partition_probs)[PARTITION_TYPES - 1];
+
/* Distance of MB away from frame edges */
int mb_to_left_edge;
int mb_to_right_edge;
@@ -192,6 +280,26 @@
PARTITION_CONTEXT *above_seg_context;
PARTITION_CONTEXT left_seg_context[8];
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT *above_txfm_context;
+ TXFM_CONTEXT *left_txfm_context;
+ TXFM_CONTEXT left_txfm_context_buffer[8];
+
+ TX_SIZE max_tx_size;
+#if CONFIG_SUPERTX
+ TX_SIZE supertx_size;
+#endif
+#endif
+
+ // dimension in the unit of 8x8 block of the current block
+ uint8_t n8_w, n8_h;
+
+#if CONFIG_REF_MV
+ uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
+ CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
+ uint8_t is_sec_rect;
+#endif
+
#if CONFIG_VP9_HIGHBITDEPTH
/* Bit depth: 8, 10, 12 */
int bd;
@@ -221,17 +329,183 @@
ADST_ADST, // TM
};
-static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd,
- int block_idx) {
+#if CONFIG_SUPERTX
+static INLINE int supertx_enabled(const MB_MODE_INFO *mbmi) {
+ return (int)mbmi->tx_size >
+ VPXMIN(b_width_log2_lookup[mbmi->sb_type],
+ b_height_log2_lookup[mbmi->sb_type]);
+}
+#endif // CONFIG_SUPERTX
+
+#if CONFIG_EXT_TX
+#define ALLOW_INTRA_EXT_TX 1
+
+static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = {
+ 1, 17, 10, 2
+};
+static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = {
+ 1, 17, 10
+};
+
+#define USE_IDTX_FOR_32X32 0
+static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs,
+ int is_inter) {
+ (void) is_inter;
+ if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0;
+#if USE_IDTX_FOR_32X32
+ if (tx_size == TX_32X32) return is_inter ? 3 : 0;
+#else
+ if (tx_size == TX_32X32) return 0;
+#endif
+ return tx_size == TX_16X16 ? 2 : 1;
+}
+
+static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs,
+ int is_inter) {
+ const int set = get_ext_tx_set(tx_size, bs, is_inter);
+ return is_inter ? num_ext_tx_set_inter[set] : num_ext_tx_set_intra[set];
+}
+
+static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][TX_SIZES] = {
+ { 0, 0, 0, 0, }, // unused
+ { 1, 1, 0, 0, },
+ { 0, 0, 1, 0, },
+};
+
+static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][TX_SIZES] = {
+ { 0, 0, 0, 0, }, // unused
+ { 1, 1, 0, 0, },
+ { 0, 0, 1, 0, },
+ { 0, 0, 0, USE_IDTX_FOR_32X32, },
+};
+
+// Transform types used in each intra set
+static const int ext_tx_used_intra[EXT_TX_SETS_INTRA][TX_TYPES] = {
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, },
+};
+
+// Transform types used in each inter set
+static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = {
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, },
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, },
+};
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_EXT_INTRA
+#define ALLOW_FILTER_INTRA_MODES 1
+#define ANGLE_STEP 3
+#define MAX_ANGLE_DELTAS 3
+#define ANGLE_FAST_SEARCH 1
+#define ANGLE_SKIP_THRESH 0.10
+#define FILTER_FAST_SEARCH 1
+
+static uint8_t mode_to_angle_map[INTRA_MODES] = {
+ 0, 90, 180, 45, 135, 111, 157, 203, 67, 0,
+};
+
+static const TX_TYPE filter_intra_mode_to_tx_type_lookup[FILTER_INTRA_MODES] = {
+ DCT_DCT, // FILTER_DC
+ ADST_DCT, // FILTER_V
+ DCT_ADST, // FILTER_H
+ DCT_DCT, // FILTER_D45
+ ADST_ADST, // FILTER_D135
+ ADST_DCT, // FILTER_D117
+ DCT_ADST, // FILTER_D153
+ DCT_ADST, // FILTER_D207
+ ADST_DCT, // FILTER_D63
+ ADST_ADST, // FILTER_TM
+};
+
+int pick_intra_filter(int angle);
+#endif // CONFIG_EXT_INTRA
+
+static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
+ const MACROBLOCKD *xd,
+ int block_idx, TX_SIZE tx_size) {
const MODE_INFO *const mi = xd->mi[0];
const MB_MODE_INFO *const mbmi = &mi->mbmi;
+#if CONFIG_EXT_INTRA
+ if (!is_inter_block(mbmi)) {
+ const int use_ext_intra_mode_info =
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[plane_type];
+ const EXT_INTRA_MODE ext_intra_mode =
+ mbmi->ext_intra_mode_info.ext_intra_mode[plane_type];
+ const PREDICTION_MODE mode = (plane_type == PLANE_TYPE_Y) ?
+ get_y_mode(mi, block_idx) : mbmi->uv_mode;
+
+ if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32)
+ return DCT_DCT;
+
+#if CONFIG_EXT_TX
+#if ALLOW_INTRA_EXT_TX
+ if (mbmi->sb_type >= BLOCK_8X8 && plane_type == PLANE_TYPE_Y)
+ return mbmi->tx_type;
+#endif // ALLOW_INTRA_EXT_TX
+#endif // CONFIG_EXT_TX
+
+ if (use_ext_intra_mode_info)
+ return filter_intra_mode_to_tx_type_lookup[ext_intra_mode];
+
+ if (mode == DC_PRED) {
+ return DCT_DCT;
+ } else if (mode == TM_PRED) {
+ return ADST_ADST;
+ } else {
+ int angle = mode_to_angle_map[mode];
+ if (mbmi->sb_type >= BLOCK_8X8)
+ angle += mbmi->angle_delta[plane_type] * ANGLE_STEP;
+ assert(angle > 0 && angle < 270);
+ if (angle == 135)
+ return ADST_ADST;
+ else if (angle < 45 || angle > 225)
+ return DCT_DCT;
+ else if (angle < 135)
+ return ADST_DCT;
+ else
+ return DCT_ADST;
+ }
+ }
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_TX
+#if USE_IDTX_FOR_32X32
+ if (xd->lossless[mbmi->segment_id] || tx_size > TX_32X32 ||
+ (tx_size >= TX_32X32 && !is_inter_block(mbmi)))
+#else
+ if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32)
+#endif
+ return DCT_DCT;
+ if (mbmi->sb_type >= BLOCK_8X8) {
+ if (plane_type == PLANE_TYPE_Y) {
+#if !ALLOW_INTRA_EXT_TX
+ if (is_inter_block(mbmi))
+#endif // ALLOW_INTRA_EXT_TX
+ return mbmi->tx_type;
+ }
+ if (is_inter_block(mbmi))
+ // UV Inter only
+ return (mbmi->tx_type == IDTX && tx_size == TX_32X32 ?
+ DCT_DCT : mbmi->tx_type);
+ }
+
+ // Sub8x8-Inter/Intra OR UV-Intra
+ if (is_inter_block(mbmi)) // Sub8x8-Inter
+ return DCT_DCT;
+ else // Sub8x8 Intra OR UV-Intra
+ return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y ?
+ get_y_mode(mi, block_idx) : mbmi->uv_mode];
+#else
(void) block_idx;
if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] ||
- mbmi->tx_size >= TX_32X32)
+ tx_size >= TX_32X32)
return DCT_DCT;
-
return mbmi->tx_type;
+#endif // CONFIG_EXT_TX
}
void vp10_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y);
@@ -248,8 +522,18 @@
static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi,
const struct macroblockd_plane *pd) {
+#if CONFIG_SUPERTX
+ if (!supertx_enabled(mbmi)) {
+ return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x,
+ pd->subsampling_y);
+ } else {
+ return uvsupertx_size_lookup[mbmi->tx_size][pd->subsampling_x]
+ [pd->subsampling_y];
+ }
+#else
return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x,
pd->subsampling_y);
+#endif // CONFIG_SUPERTX
}
static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize,
@@ -279,7 +563,6 @@
const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
foreach_transformed_block_visitor visit, void *arg);
-
void vp10_foreach_transformed_block(
const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
foreach_transformed_block_visitor visit, void *arg);
diff --git a/vp10/common/common_data.h b/vp10/common/common_data.h
index 334489c..84476fa 100644
--- a/vp10/common/common_data.h
+++ b/vp10/common/common_data.h
@@ -170,6 +170,21 @@
{0, 0 }, // 64X64 - {0b0000, 0b0000}
};
+#if CONFIG_SUPERTX
+static const TX_SIZE uvsupertx_size_lookup[TX_SIZES][2][2] = {
+ // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
+ // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
+ {{TX_4X4, TX_4X4}, {TX_4X4, TX_4X4}},
+ {{TX_8X8, TX_4X4}, {TX_4X4, TX_4X4}},
+ {{TX_16X16, TX_8X8}, {TX_8X8, TX_8X8}},
+ {{TX_32X32, TX_16X16}, {TX_16X16, TX_16X16}},
+};
+
+static const int partition_supertx_context_lookup[PARTITION_TYPES] = {
+ -1, 0, 0, 1
+};
+#endif // CONFIG_SUPERTX
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/divide.c b/vp10/common/divide.c
new file mode 100644
index 0000000..00b43a0
--- /dev/null
+++ b/vp10/common/divide.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp10/common/divide.h"
+
+/* Constants for divide by multiply for small divisors generated with:
+void init_fastdiv() {
+ int i;
+ for (i = 3; i < 256; ++i) {
+ const int s = 31 ^ __builtin_clz(2 * i + 1);
+ const unsigned long long base = (1ull << (sizeof(unsigned) * 8 + s)) - 1;
+ fastdiv_tab[i].mult = (base / i + 1) & 0xFFFFFFFF;
+ fastdiv_tab[i].shift = s;
+ }
+ for (i = 0; i < 8; ++i) {
+ fastdiv_tab[1 << i].mult = 0;
+ fastdiv_tab[1 << i].shift = i;
+ }
+}
+*/
+const struct fastdiv_elem vp10_fastdiv_tab[256] = {
+ {0, 0}, {0, 0}, {0, 1}, {1431655766, 2},
+ {0, 2}, {2576980378, 3}, {1431655766, 3}, {613566757, 3},
+ {0, 3}, {3340530120, 4}, {2576980378, 4}, {1952257862, 4},
+ {1431655766, 4}, {991146300, 4}, {613566757, 4}, {286331154, 4},
+ {0, 4}, {3789677026, 5}, {3340530120, 5}, {2938661835, 5},
+ {2576980378, 5}, {2249744775, 5}, {1952257862, 5}, {1680639377, 5},
+ {1431655766, 5}, {1202590843, 5}, {991146300, 5}, {795364315, 5},
+ {613566757, 5}, {444306962, 5}, {286331154, 5}, {138547333, 5},
+ {0, 5}, {4034666248, 6}, {3789677026, 6}, {3558687189, 6},
+ {3340530120, 6}, {3134165325, 6}, {2938661835, 6}, {2753184165, 6},
+ {2576980378, 6}, {2409371898, 6}, {2249744775, 6}, {2097542168, 6},
+ {1952257862, 6}, {1813430637, 6}, {1680639377, 6}, {1553498810, 6},
+ {1431655766, 6}, {1314785907, 6}, {1202590843, 6}, {1094795586, 6},
+ {991146300, 6}, {891408307, 6}, {795364315, 6}, {702812831, 6},
+ {613566757, 6}, {527452125, 6}, {444306962, 6}, {363980280, 6},
+ {286331154, 6}, {211227900, 6}, {138547333, 6}, {68174085, 6},
+ {0, 6}, {4162814457, 7}, {4034666248, 7}, {3910343360, 7},
+ {3789677026, 7}, {3672508268, 7}, {3558687189, 7}, {3448072337, 7},
+ {3340530120, 7}, {3235934265, 7}, {3134165325, 7}, {3035110223, 7},
+ {2938661835, 7}, {2844718599, 7}, {2753184165, 7}, {2663967058, 7},
+ {2576980378, 7}, {2492141518, 7}, {2409371898, 7}, {2328596727, 7},
+ {2249744775, 7}, {2172748162, 7}, {2097542168, 7}, {2024065048, 7},
+ {1952257862, 7}, {1882064321, 7}, {1813430637, 7}, {1746305385, 7},
+ {1680639377, 7}, {1616385542, 7}, {1553498810, 7}, {1491936009, 7},
+ {1431655766, 7}, {1372618415, 7}, {1314785907, 7}, {1258121734, 7},
+ {1202590843, 7}, {1148159575, 7}, {1094795586, 7}, {1042467791, 7},
+ {991146300, 7}, {940802361, 7}, {891408307, 7}, {842937507, 7},
+ {795364315, 7}, {748664025, 7}, {702812831, 7}, {657787785, 7},
+ {613566757, 7}, {570128403, 7}, {527452125, 7}, {485518043, 7},
+ {444306962, 7}, {403800345, 7}, {363980280, 7}, {324829460, 7},
+ {286331154, 7}, {248469183, 7}, {211227900, 7}, {174592167, 7},
+ {138547333, 7}, {103079216, 7}, {68174085, 7}, {33818641, 7},
+ {0, 7}, {4228378656, 8}, {4162814457, 8}, {4098251237, 8},
+ {4034666248, 8}, {3972037425, 8}, {3910343360, 8}, {3849563281, 8},
+ {3789677026, 8}, {3730665024, 8}, {3672508268, 8}, {3615188300, 8},
+ {3558687189, 8}, {3502987511, 8}, {3448072337, 8}, {3393925206, 8},
+ {3340530120, 8}, {3287871517, 8}, {3235934265, 8}, {3184703642, 8},
+ {3134165325, 8}, {3084305374, 8}, {3035110223, 8}, {2986566663, 8},
+ {2938661835, 8}, {2891383213, 8}, {2844718599, 8}, {2798656110, 8},
+ {2753184165, 8}, {2708291480, 8}, {2663967058, 8}, {2620200175, 8},
+ {2576980378, 8}, {2534297473, 8}, {2492141518, 8}, {2450502814, 8},
+ {2409371898, 8}, {2368739540, 8}, {2328596727, 8}, {2288934667, 8},
+ {2249744775, 8}, {2211018668, 8}, {2172748162, 8}, {2134925265, 8},
+ {2097542168, 8}, {2060591247, 8}, {2024065048, 8}, {1987956292, 8},
+ {1952257862, 8}, {1916962805, 8}, {1882064321, 8}, {1847555765, 8},
+ {1813430637, 8}, {1779682582, 8}, {1746305385, 8}, {1713292966, 8},
+ {1680639377, 8}, {1648338801, 8}, {1616385542, 8}, {1584774030, 8},
+ {1553498810, 8}, {1522554545, 8}, {1491936009, 8}, {1461638086, 8},
+ {1431655766, 8}, {1401984144, 8}, {1372618415, 8}, {1343553873, 8},
+ {1314785907, 8}, {1286310003, 8}, {1258121734, 8}, {1230216764, 8},
+ {1202590843, 8}, {1175239808, 8}, {1148159575, 8}, {1121346142, 8},
+ {1094795586, 8}, {1068504060, 8}, {1042467791, 8}, {1016683080, 8},
+ {991146300, 8}, {965853890, 8}, {940802361, 8}, {915988286, 8},
+ {891408307, 8}, {867059126, 8}, {842937507, 8}, {819040276, 8},
+ {795364315, 8}, {771906565, 8}, {748664025, 8}, {725633745, 8},
+ {702812831, 8}, {680198441, 8}, {657787785, 8}, {635578121, 8},
+ {613566757, 8}, {591751050, 8}, {570128403, 8}, {548696263, 8},
+ {527452125, 8}, {506393524, 8}, {485518043, 8}, {464823301, 8},
+ {444306962, 8}, {423966729, 8}, {403800345, 8}, {383805589, 8},
+ {363980280, 8}, {344322273, 8}, {324829460, 8}, {305499766, 8},
+ {286331154, 8}, {267321616, 8}, {248469183, 8}, {229771913, 8},
+ {211227900, 8}, {192835267, 8}, {174592167, 8}, {156496785, 8},
+ {138547333, 8}, {120742053, 8}, {103079216, 8}, {85557118, 8},
+ {68174085, 8}, {50928466, 8}, {33818641, 8}, {16843010, 8},
+};
diff --git a/vp10/common/divide.h b/vp10/common/divide.h
new file mode 100644
index 0000000..2f3c35c
--- /dev/null
+++ b/vp10/common/divide.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_DIVIDE_H_
+#define VP10_COMMON_DIVIDE_H_
+// An implemntation of the divide by multiply alogrithm
+// https://gmplib.org/~tege/divcnst-pldi94.pdf
+
+#include <limits.h>
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+struct fastdiv_elem {
+ unsigned mult;
+ unsigned shift;
+};
+
+extern const struct fastdiv_elem vp10_fastdiv_tab[256];
+
+static INLINE unsigned fastdiv(unsigned x, int y) {
+ unsigned t =
+ ((uint64_t)x * vp10_fastdiv_tab[y].mult) >> (sizeof(x) * CHAR_BIT);
+ return (t + x) >> vp10_fastdiv_tab[y].shift;
+}
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+#endif // VP10_COMMON_DIVIDE_H_
diff --git a/vp10/common/entropy.c b/vp10/common/entropy.c
index 3da08a6..f60bcf5 100644
--- a/vp10/common/entropy.c
+++ b/vp10/common/entropy.c
@@ -133,7 +133,7 @@
0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5
};
-// Model obtained from a 2-sided zero-centerd distribuition derived
+// Model obtained from a 2-sided zero-centered distribution derived
// from a Pareto distribution. The cdf of the distribution is:
// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta]
//
@@ -405,6 +405,287 @@
{255, 246, 247, 255, 239, 255, 253, 255},
};
+#if CONFIG_ANS
+// Model obtained from a 2-sided zero-centerd distribuition derived
+// from a Pareto distribution. The cdf of the distribution is:
+// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta]
+//
+// For a given beta and a given probablity of the 1-node, the alpha
+// is first solved, and then the {alpha, beta} pair is used to generate
+// the probabilities for the rest of the nodes.
+//
+// beta = 8
+// Values for tokens ONE_TOKEN through CATEGORY6_TOKEN included here.
+// ZERO_TOKEN and EOB_TOKEN are coded as flags outside this coder.
+const vpx_prob vp10_pareto8_token_probs[COEFF_PROB_MODELS]
+ [ENTROPY_TOKENS - 2] = {
+ {1, 1, 1, 1, 2, 4, 8, 14, 26, 198},
+ {2, 2, 2, 2, 4, 7, 14, 26, 42, 155},
+ {3, 3, 3, 3, 6, 11, 20, 34, 51, 122},
+ {4, 4, 4, 4, 7, 14, 25, 41, 56, 97},
+ {5, 5, 5, 5, 9, 17, 30, 46, 58, 76},
+ {6, 6, 6, 5, 11, 20, 34, 50, 57, 61},
+ {7, 7, 7, 6, 12, 22, 37, 53, 56, 49},
+ {8, 8, 7, 7, 14, 25, 40, 54, 53, 40},
+ {9, 9, 8, 8, 15, 27, 43, 55, 50, 32},
+ {10, 10, 9, 9, 16, 29, 45, 55, 47, 26},
+ {11, 10, 10, 10, 18, 31, 47, 55, 43, 21},
+ {12, 11, 11, 10, 19, 32, 48, 55, 40, 18},
+ {13, 12, 12, 11, 20, 34, 49, 54, 37, 14},
+ {14, 13, 12, 12, 21, 35, 50, 53, 34, 12},
+ {15, 14, 13, 12, 22, 37, 51, 51, 31, 10},
+ {16, 15, 14, 13, 23, 38, 51, 50, 28, 8},
+ {17, 16, 15, 13, 24, 39, 51, 48, 26, 7},
+ {18, 17, 15, 14, 25, 40, 52, 46, 23, 6},
+ {19, 17, 16, 15, 26, 41, 51, 45, 21, 5},
+ {20, 18, 17, 15, 27, 42, 51, 43, 19, 4},
+ {21, 19, 17, 16, 28, 42, 51, 41, 18, 3},
+ {22, 20, 18, 16, 28, 43, 51, 39, 16, 3},
+ {23, 21, 19, 17, 29, 43, 50, 37, 14, 3},
+ {24, 22, 19, 17, 30, 44, 49, 36, 13, 2},
+ {25, 22, 20, 18, 30, 44, 49, 34, 12, 2},
+ {26, 23, 20, 18, 31, 44, 48, 33, 11, 2},
+ {27, 24, 21, 19, 31, 45, 47, 31, 10, 1},
+ {28, 25, 22, 19, 32, 45, 46, 29, 9, 1},
+ {29, 25, 22, 20, 32, 45, 46, 28, 8, 1},
+ {30, 26, 23, 20, 33, 45, 45, 26, 7, 1},
+ {31, 27, 23, 20, 33, 45, 44, 25, 7, 1},
+ {32, 27, 24, 21, 33, 45, 43, 24, 6, 1},
+ {33, 28, 24, 21, 34, 44, 42, 23, 6, 1},
+ {34, 29, 25, 21, 34, 44, 41, 22, 5, 1},
+ {35, 30, 25, 22, 34, 44, 40, 20, 5, 1},
+ {36, 30, 26, 22, 35, 44, 39, 19, 4, 1},
+ {37, 31, 26, 22, 35, 44, 38, 18, 4, 1},
+ {38, 32, 27, 22, 35, 43, 37, 17, 4, 1},
+ {39, 33, 27, 23, 35, 43, 36, 16, 3, 1},
+ {40, 33, 27, 23, 35, 43, 35, 16, 3, 1},
+ {41, 34, 28, 23, 35, 42, 34, 15, 3, 1},
+ {42, 35, 28, 23, 36, 42, 33, 14, 2, 1},
+ {43, 35, 29, 24, 35, 42, 32, 13, 2, 1},
+ {44, 36, 29, 24, 36, 41, 31, 12, 2, 1},
+ {45, 36, 29, 24, 36, 41, 30, 12, 2, 1},
+ {46, 37, 30, 24, 35, 40, 30, 11, 2, 1},
+ {47, 37, 30, 24, 36, 40, 29, 10, 2, 1},
+ {48, 38, 30, 24, 36, 40, 28, 10, 1, 1},
+ {49, 39, 31, 24, 36, 39, 27, 9, 1, 1},
+ {50, 39, 31, 25, 35, 39, 26, 9, 1, 1},
+ {51, 40, 31, 25, 36, 38, 25, 8, 1, 1},
+ {52, 40, 31, 25, 35, 38, 25, 8, 1, 1},
+ {53, 41, 32, 25, 35, 37, 24, 7, 1, 1},
+ {54, 41, 32, 25, 35, 37, 23, 7, 1, 1},
+ {55, 42, 32, 25, 35, 36, 22, 7, 1, 1},
+ {56, 42, 33, 25, 35, 35, 22, 6, 1, 1},
+ {57, 43, 33, 25, 34, 35, 21, 6, 1, 1},
+ {58, 43, 33, 25, 35, 34, 20, 6, 1, 1},
+ {59, 44, 33, 25, 34, 34, 20, 5, 1, 1},
+ {60, 45, 33, 25, 34, 33, 19, 5, 1, 1},
+ {61, 45, 33, 25, 34, 33, 18, 5, 1, 1},
+ {62, 45, 34, 25, 34, 32, 18, 4, 1, 1},
+ {63, 46, 34, 25, 33, 32, 17, 4, 1, 1},
+ {64, 46, 34, 25, 33, 31, 17, 4, 1, 1},
+ {65, 47, 34, 25, 33, 30, 16, 4, 1, 1},
+ {66, 47, 34, 25, 33, 30, 15, 4, 1, 1},
+ {67, 48, 34, 25, 33, 29, 15, 3, 1, 1},
+ {68, 48, 35, 25, 32, 29, 14, 3, 1, 1},
+ {69, 48, 35, 25, 32, 28, 14, 3, 1, 1},
+ {70, 49, 35, 25, 32, 27, 13, 3, 1, 1},
+ {71, 49, 35, 25, 31, 27, 13, 3, 1, 1},
+ {72, 49, 35, 25, 31, 27, 12, 3, 1, 1},
+ {73, 50, 35, 25, 31, 26, 12, 2, 1, 1},
+ {74, 50, 35, 25, 31, 25, 12, 2, 1, 1},
+ {75, 51, 35, 25, 30, 25, 11, 2, 1, 1},
+ {76, 51, 35, 25, 30, 24, 11, 2, 1, 1},
+ {77, 51, 35, 25, 30, 24, 10, 2, 1, 1},
+ {78, 52, 35, 24, 29, 24, 10, 2, 1, 1},
+ {79, 52, 35, 24, 29, 23, 10, 2, 1, 1},
+ {80, 52, 35, 24, 29, 23, 9, 2, 1, 1},
+ {81, 53, 35, 24, 28, 22, 9, 2, 1, 1},
+ {82, 53, 35, 24, 28, 22, 9, 1, 1, 1},
+ {83, 54, 35, 24, 28, 21, 8, 1, 1, 1},
+ {84, 54, 35, 24, 27, 21, 8, 1, 1, 1},
+ {85, 54, 35, 24, 27, 20, 8, 1, 1, 1},
+ {86, 54, 35, 24, 27, 20, 7, 1, 1, 1},
+ {87, 55, 35, 23, 27, 19, 7, 1, 1, 1},
+ {88, 55, 35, 23, 26, 19, 7, 1, 1, 1},
+ {89, 55, 35, 23, 26, 18, 7, 1, 1, 1},
+ {90, 55, 35, 23, 26, 18, 6, 1, 1, 1},
+ {91, 56, 35, 23, 25, 17, 6, 1, 1, 1},
+ {92, 56, 35, 22, 25, 17, 6, 1, 1, 1},
+ {93, 56, 35, 22, 24, 17, 6, 1, 1, 1},
+ {94, 57, 35, 22, 24, 16, 5, 1, 1, 1},
+ {95, 56, 35, 22, 24, 16, 5, 1, 1, 1},
+ {96, 57, 35, 22, 23, 15, 5, 1, 1, 1},
+ {97, 56, 35, 22, 23, 15, 5, 1, 1, 1},
+ {98, 57, 34, 21, 23, 15, 5, 1, 1, 1},
+ {99, 57, 35, 21, 23, 14, 4, 1, 1, 1},
+ {100, 58, 34, 21, 22, 14, 4, 1, 1, 1},
+ {101, 57, 34, 21, 22, 14, 4, 1, 1, 1},
+ {102, 58, 34, 21, 21, 13, 4, 1, 1, 1},
+ {103, 57, 34, 21, 21, 13, 4, 1, 1, 1},
+ {104, 57, 34, 20, 21, 13, 4, 1, 1, 1},
+ {105, 58, 34, 20, 20, 12, 4, 1, 1, 1},
+ {106, 58, 34, 20, 20, 12, 3, 1, 1, 1},
+ {107, 58, 33, 20, 20, 12, 3, 1, 1, 1},
+ {108, 59, 33, 20, 19, 11, 3, 1, 1, 1},
+ {109, 59, 33, 19, 19, 11, 3, 1, 1, 1},
+ {110, 58, 33, 19, 19, 11, 3, 1, 1, 1},
+ {111, 59, 33, 19, 18, 10, 3, 1, 1, 1},
+ {112, 58, 33, 19, 18, 10, 3, 1, 1, 1},
+ {113, 58, 32, 19, 18, 10, 3, 1, 1, 1},
+ {114, 59, 32, 18, 18, 10, 2, 1, 1, 1},
+ {115, 60, 32, 18, 17, 9, 2, 1, 1, 1},
+ {116, 59, 32, 18, 17, 9, 2, 1, 1, 1},
+ {117, 59, 32, 18, 16, 9, 2, 1, 1, 1},
+ {118, 59, 31, 18, 16, 9, 2, 1, 1, 1},
+ {119, 59, 32, 17, 16, 8, 2, 1, 1, 1},
+ {120, 59, 31, 17, 16, 8, 2, 1, 1, 1},
+ {121, 59, 31, 17, 15, 8, 2, 1, 1, 1},
+ {122, 59, 30, 17, 15, 8, 2, 1, 1, 1},
+ {123, 59, 30, 17, 15, 7, 2, 1, 1, 1},
+ {124, 59, 30, 16, 15, 7, 2, 1, 1, 1},
+ {125, 59, 30, 16, 14, 7, 2, 1, 1, 1},
+ {126, 59, 30, 16, 14, 7, 1, 1, 1, 1},
+ {127, 59, 30, 16, 14, 6, 1, 1, 1, 1},
+ {128, 59, 30, 16, 13, 6, 1, 1, 1, 1},
+ {129, 59, 30, 15, 13, 6, 1, 1, 1, 1},
+ {130, 59, 29, 15, 13, 6, 1, 1, 1, 1},
+ {131, 59, 29, 15, 12, 6, 1, 1, 1, 1},
+ {132, 59, 28, 15, 12, 6, 1, 1, 1, 1},
+ {133, 59, 28, 15, 12, 5, 1, 1, 1, 1},
+ {134, 59, 28, 14, 12, 5, 1, 1, 1, 1},
+ {135, 59, 28, 14, 11, 5, 1, 1, 1, 1},
+ {136, 58, 28, 14, 11, 5, 1, 1, 1, 1},
+ {137, 58, 27, 14, 11, 5, 1, 1, 1, 1},
+ {138, 58, 27, 13, 11, 5, 1, 1, 1, 1},
+ {139, 58, 27, 13, 11, 4, 1, 1, 1, 1},
+ {140, 58, 27, 13, 10, 4, 1, 1, 1, 1},
+ {141, 58, 26, 13, 10, 4, 1, 1, 1, 1},
+ {142, 57, 26, 13, 10, 4, 1, 1, 1, 1},
+ {143, 57, 26, 12, 10, 4, 1, 1, 1, 1},
+ {144, 57, 26, 12, 9, 4, 1, 1, 1, 1},
+ {145, 57, 25, 12, 9, 4, 1, 1, 1, 1},
+ {146, 57, 25, 12, 9, 3, 1, 1, 1, 1},
+ {147, 57, 25, 11, 9, 3, 1, 1, 1, 1},
+ {148, 57, 25, 11, 8, 3, 1, 1, 1, 1},
+ {149, 57, 24, 11, 8, 3, 1, 1, 1, 1},
+ {150, 56, 24, 11, 8, 3, 1, 1, 1, 1},
+ {151, 56, 23, 11, 8, 3, 1, 1, 1, 1},
+ {152, 56, 23, 10, 8, 3, 1, 1, 1, 1},
+ {153, 56, 23, 10, 7, 3, 1, 1, 1, 1},
+ {154, 55, 23, 10, 7, 3, 1, 1, 1, 1},
+ {155, 55, 22, 10, 7, 3, 1, 1, 1, 1},
+ {156, 55, 22, 10, 7, 2, 1, 1, 1, 1},
+ {157, 54, 22, 10, 7, 2, 1, 1, 1, 1},
+ {158, 54, 22, 9, 7, 2, 1, 1, 1, 1},
+ {159, 55, 21, 9, 6, 2, 1, 1, 1, 1},
+ {160, 54, 21, 9, 6, 2, 1, 1, 1, 1},
+ {161, 53, 21, 9, 6, 2, 1, 1, 1, 1},
+ {162, 53, 20, 9, 6, 2, 1, 1, 1, 1},
+ {163, 53, 20, 8, 6, 2, 1, 1, 1, 1},
+ {164, 53, 20, 8, 5, 2, 1, 1, 1, 1},
+ {165, 52, 20, 8, 5, 2, 1, 1, 1, 1},
+ {166, 52, 19, 8, 5, 2, 1, 1, 1, 1},
+ {167, 51, 19, 8, 5, 2, 1, 1, 1, 1},
+ {168, 51, 19, 7, 5, 2, 1, 1, 1, 1},
+ {169, 51, 19, 7, 5, 1, 1, 1, 1, 1},
+ {170, 51, 18, 7, 5, 1, 1, 1, 1, 1},
+ {171, 51, 18, 7, 4, 1, 1, 1, 1, 1},
+ {172, 50, 18, 7, 4, 1, 1, 1, 1, 1},
+ {173, 50, 17, 7, 4, 1, 1, 1, 1, 1},
+ {174, 49, 17, 7, 4, 1, 1, 1, 1, 1},
+ {175, 49, 17, 6, 4, 1, 1, 1, 1, 1},
+ {176, 49, 16, 6, 4, 1, 1, 1, 1, 1},
+ {177, 48, 16, 6, 4, 1, 1, 1, 1, 1},
+ {178, 47, 16, 6, 4, 1, 1, 1, 1, 1},
+ {179, 47, 16, 6, 3, 1, 1, 1, 1, 1},
+ {180, 47, 15, 6, 3, 1, 1, 1, 1, 1},
+ {181, 47, 15, 5, 3, 1, 1, 1, 1, 1},
+ {182, 46, 15, 5, 3, 1, 1, 1, 1, 1},
+ {183, 46, 14, 5, 3, 1, 1, 1, 1, 1},
+ {184, 45, 14, 5, 3, 1, 1, 1, 1, 1},
+ {185, 44, 14, 5, 3, 1, 1, 1, 1, 1},
+ {186, 44, 13, 5, 3, 1, 1, 1, 1, 1},
+ {187, 43, 13, 5, 3, 1, 1, 1, 1, 1},
+ {188, 44, 13, 4, 2, 1, 1, 1, 1, 1},
+ {189, 43, 13, 4, 2, 1, 1, 1, 1, 1},
+ {190, 43, 12, 4, 2, 1, 1, 1, 1, 1},
+ {191, 42, 12, 4, 2, 1, 1, 1, 1, 1},
+ {192, 41, 12, 4, 2, 1, 1, 1, 1, 1},
+ {193, 41, 11, 4, 2, 1, 1, 1, 1, 1},
+ {194, 40, 11, 4, 2, 1, 1, 1, 1, 1},
+ {195, 39, 11, 4, 2, 1, 1, 1, 1, 1},
+ {196, 39, 11, 3, 2, 1, 1, 1, 1, 1},
+ {197, 39, 10, 3, 2, 1, 1, 1, 1, 1},
+ {198, 38, 10, 3, 2, 1, 1, 1, 1, 1},
+ {199, 37, 10, 3, 2, 1, 1, 1, 1, 1},
+ {200, 37, 10, 3, 1, 1, 1, 1, 1, 1},
+ {201, 37, 9, 3, 1, 1, 1, 1, 1, 1},
+ {202, 36, 9, 3, 1, 1, 1, 1, 1, 1},
+ {203, 35, 9, 3, 1, 1, 1, 1, 1, 1},
+ {204, 35, 8, 3, 1, 1, 1, 1, 1, 1},
+ {205, 35, 8, 2, 1, 1, 1, 1, 1, 1},
+ {206, 34, 8, 2, 1, 1, 1, 1, 1, 1},
+ {207, 33, 8, 2, 1, 1, 1, 1, 1, 1},
+ {208, 32, 8, 2, 1, 1, 1, 1, 1, 1},
+ {209, 32, 7, 2, 1, 1, 1, 1, 1, 1},
+ {210, 31, 7, 2, 1, 1, 1, 1, 1, 1},
+ {211, 30, 7, 2, 1, 1, 1, 1, 1, 1},
+ {212, 30, 6, 2, 1, 1, 1, 1, 1, 1},
+ {213, 29, 6, 2, 1, 1, 1, 1, 1, 1},
+ {214, 28, 6, 2, 1, 1, 1, 1, 1, 1},
+ {215, 27, 6, 2, 1, 1, 1, 1, 1, 1},
+ {216, 27, 6, 1, 1, 1, 1, 1, 1, 1},
+ {217, 27, 5, 1, 1, 1, 1, 1, 1, 1},
+ {218, 26, 5, 1, 1, 1, 1, 1, 1, 1},
+ {219, 25, 5, 1, 1, 1, 1, 1, 1, 1},
+ {220, 24, 5, 1, 1, 1, 1, 1, 1, 1},
+ {221, 24, 4, 1, 1, 1, 1, 1, 1, 1},
+ {222, 23, 4, 1, 1, 1, 1, 1, 1, 1},
+ {223, 22, 4, 1, 1, 1, 1, 1, 1, 1},
+ {224, 21, 4, 1, 1, 1, 1, 1, 1, 1},
+ {225, 20, 4, 1, 1, 1, 1, 1, 1, 1},
+ {226, 20, 3, 1, 1, 1, 1, 1, 1, 1},
+ {227, 19, 3, 1, 1, 1, 1, 1, 1, 1},
+ {228, 18, 3, 1, 1, 1, 1, 1, 1, 1},
+ {229, 17, 3, 1, 1, 1, 1, 1, 1, 1},
+ {230, 16, 3, 1, 1, 1, 1, 1, 1, 1},
+ {231, 16, 2, 1, 1, 1, 1, 1, 1, 1},
+ {232, 15, 2, 1, 1, 1, 1, 1, 1, 1},
+ {233, 14, 2, 1, 1, 1, 1, 1, 1, 1},
+ {234, 13, 2, 1, 1, 1, 1, 1, 1, 1},
+ {235, 12, 2, 1, 1, 1, 1, 1, 1, 1},
+ {236, 11, 2, 1, 1, 1, 1, 1, 1, 1},
+ {237, 11, 1, 1, 1, 1, 1, 1, 1, 1},
+ {238, 10, 1, 1, 1, 1, 1, 1, 1, 1},
+ {239, 9, 1, 1, 1, 1, 1, 1, 1, 1},
+ {240, 8, 1, 1, 1, 1, 1, 1, 1, 1},
+ {241, 7, 1, 1, 1, 1, 1, 1, 1, 1},
+ {242, 6, 1, 1, 1, 1, 1, 1, 1, 1},
+ {243, 5, 1, 1, 1, 1, 1, 1, 1, 1},
+ {244, 4, 1, 1, 1, 1, 1, 1, 1, 1},
+ {245, 3, 1, 1, 1, 1, 1, 1, 1, 1},
+ {246, 2, 1, 1, 1, 1, 1, 1, 1, 1},
+ {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+};
+
+void vp10_build_pareto8_dec_tab(
+ const vpx_prob token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2],
+ rans_dec_lut dec_tab[COEFF_PROB_MODELS]) {
+ int p;
+ for (p = 0; p < COEFF_PROB_MODELS; ++p) {
+ rans_build_dec_tab(token_probs[p], dec_tab[p]);
+ }
+}
+#endif // CONFIG_ANS
+
static const vp10_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = {
{ // Y plane
{ // Intra
diff --git a/vp10/common/entropy.h b/vp10/common/entropy.h
index 9a471c8..4fa330e 100644
--- a/vp10/common/entropy.h
+++ b/vp10/common/entropy.h
@@ -14,6 +14,9 @@
#include "vpx/vpx_integer.h"
#include "vpx_dsp/prob.h"
+#if CONFIG_ANS
+#include "vp10/common/ans.h"
+#endif // CONFIG_ANS
#include "vp10/common/common.h"
#include "vp10/common/enums.h"
@@ -21,8 +24,8 @@
extern "C" {
#endif
-#define DIFF_UPDATE_PROB 252
-#define GROUP_DIFF_UPDATE_PROB 252
+#define DIFF_UPDATE_PROB 252
+#define GROUP_DIFF_UPDATE_PROB 252
// Coefficient token alphabet
#define ZERO_TOKEN 0 // 0 Extra Bits 0+0
@@ -163,6 +166,14 @@
#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
extern const vpx_tree_index vp10_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)];
extern const vpx_prob vp10_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
+#if CONFIG_ANS
+extern const vpx_prob
+ vp10_pareto8_token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2];
+
+void vp10_build_pareto8_dec_tab(
+ const vpx_prob token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2],
+ rans_dec_lut dec_tab[COEFF_PROB_MODELS]);
+#endif // CONFIG_ANS
typedef vpx_prob vp10_coeff_probs_model[REF_TYPES][COEF_BANDS]
[COEFF_CONTEXTS][UNCONSTRAINED_NODES];
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index 78f3650..4d9f773 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -127,21 +127,6 @@
}
};
-#if !CONFIG_MISC_FIXES
-const vpx_prob vp10_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = {
- { 144, 11, 54, 157, 195, 130, 46, 58, 108 }, // y = dc
- { 118, 15, 123, 148, 131, 101, 44, 93, 131 }, // y = v
- { 113, 12, 23, 188, 226, 142, 26, 32, 125 }, // y = h
- { 120, 11, 50, 123, 163, 135, 64, 77, 103 }, // y = d45
- { 113, 9, 36, 155, 111, 157, 32, 44, 161 }, // y = d135
- { 116, 9, 55, 176, 76, 96, 37, 61, 149 }, // y = d117
- { 115, 9, 28, 141, 161, 167, 21, 25, 193 }, // y = d153
- { 120, 12, 32, 145, 195, 142, 32, 38, 86 }, // y = d207
- { 116, 12, 64, 120, 140, 125, 49, 115, 121 }, // y = d63
- { 102, 19, 66, 162, 182, 122, 35, 59, 128 } // y = tm
-};
-#endif
-
static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = {
{ 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8
{ 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16
@@ -162,32 +147,6 @@
{ 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm
};
-#if !CONFIG_MISC_FIXES
-const vpx_prob vp10_kf_partition_probs[PARTITION_CONTEXTS]
- [PARTITION_TYPES - 1] = {
- // 8x8 -> 4x4
- { 158, 97, 94 }, // a/l both not split
- { 93, 24, 99 }, // a split, l not split
- { 85, 119, 44 }, // l split, a not split
- { 62, 59, 67 }, // a/l both split
- // 16x16 -> 8x8
- { 149, 53, 53 }, // a/l both not split
- { 94, 20, 48 }, // a split, l not split
- { 83, 53, 24 }, // l split, a not split
- { 52, 18, 18 }, // a/l both split
- // 32x32 -> 16x16
- { 150, 40, 39 }, // a/l both not split
- { 78, 12, 26 }, // a split, l not split
- { 67, 33, 11 }, // l split, a not split
- { 24, 7, 5 }, // a/l both split
- // 64x64 -> 32x32
- { 174, 35, 49 }, // a/l both not split
- { 68, 11, 27 }, // a split, l not split
- { 57, 15, 9 }, // l split, a not split
- { 12, 3, 3 }, // a/l both split
-};
-#endif
-
static const vpx_prob default_partition_probs[PARTITION_CONTEXTS]
[PARTITION_TYPES - 1] = {
// 8x8 -> 4x4
@@ -212,8 +171,40 @@
{ 10, 7, 6 }, // a/l both split
};
+#if CONFIG_REF_MV
+static const vpx_prob default_newmv_prob[NEWMV_MODE_CONTEXTS] = {
+ 200, 180, 150, 150, 110, 70, 60,
+};
+
+static const vpx_prob default_zeromv_prob[ZEROMV_MODE_CONTEXTS] = {
+ 192, 64,
+};
+
+static const vpx_prob default_refmv_prob[REFMV_MODE_CONTEXTS] = {
+ 220, 220, 200, 200, 180, 128, 30, 220, 30,
+};
+
+static const vpx_prob default_drl_prob[DRL_MODE_CONTEXTS] = {
+ 128, 128, 128,
+};
+
+#if CONFIG_EXT_INTER
+static const vpx_prob default_new2mv_prob = 180;
+#endif
+#endif
+
static const vpx_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
[INTER_MODES - 1] = {
+#if CONFIG_EXT_INTER
+ // TODO(zoeliu): To adjust the initial default probs
+ {2, 173, 34, 173}, // 0 = both zero mv
+ {7, 145, 85, 145}, // 1 = one zero mv + one a predicted mv
+ {7, 166, 63, 166}, // 2 = two predicted mvs
+ {7, 94, 66, 128}, // 3 = one predicted/zero and one new mv
+ {8, 64, 46, 128}, // 4 = two new mvs
+ {17, 81, 31, 128}, // 5 = one intra neighbour + x
+ {25, 29, 30, 96}, // 6 = two intra neighbours
+#else
{2, 173, 34}, // 0 = both zero mv
{7, 145, 85}, // 1 = one zero mv + one a predicted mv
{7, 166, 63}, // 2 = two predicted mvs
@@ -221,8 +212,22 @@
{8, 64, 46}, // 4 = two new mvs
{17, 81, 31}, // 5 = one intra neighbour + x
{25, 29, 30}, // 6 = two intra neighbours
+#endif // CONFIG_EXT_INTER
};
+#if CONFIG_EXT_INTER
+static const vpx_prob default_inter_compound_mode_probs
+ [INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES - 1] = {
+ { 2, 173, 68, 192, 192, 128, 180, 180}, // 0 = both zero mv
+ { 7, 145, 160, 192, 192, 128, 180, 180}, // 1 = 1 zero + 1 predicted
+ { 7, 166, 126, 192, 192, 128, 180, 180}, // 2 = two predicted mvs
+ { 7, 94, 132, 192, 192, 128, 180, 180}, // 3 = 1 pred/zero, 1 new
+ { 8, 64, 64, 192, 192, 128, 180, 180}, // 4 = two new mvs
+ {17, 81, 52, 192, 192, 128, 180, 180}, // 5 = one intra neighbour
+ {25, 29, 50, 192, 192, 128, 180, 180}, // 6 = two intra neighbours
+};
+#endif // CONFIG_EXT_INTER
+
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
const vpx_tree_index vp10_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
-DC_PRED, 2, /* 0 = DC_NODE */
@@ -239,9 +244,29 @@
const vpx_tree_index vp10_inter_mode_tree[TREE_SIZE(INTER_MODES)] = {
-INTER_OFFSET(ZEROMV), 2,
-INTER_OFFSET(NEARESTMV), 4,
+#if CONFIG_EXT_INTER
+ -INTER_OFFSET(NEARMV), 6,
+ -INTER_OFFSET(NEWMV), -INTER_OFFSET(NEWFROMNEARMV)
+#else
-INTER_OFFSET(NEARMV), -INTER_OFFSET(NEWMV)
+#endif // CONFIG_EXT_INTER
};
+#if CONFIG_EXT_INTER
+const vpx_tree_index vp10_inter_compound_mode_tree
+ [TREE_SIZE(INTER_COMPOUND_MODES)] = {
+ -INTER_COMPOUND_OFFSET(ZERO_ZEROMV), 2,
+ -INTER_COMPOUND_OFFSET(NEAREST_NEARESTMV), 4,
+ 6, -INTER_COMPOUND_OFFSET(NEW_NEWMV),
+ 8, 10,
+ -INTER_COMPOUND_OFFSET(NEAREST_NEARMV),
+ -INTER_COMPOUND_OFFSET(NEAR_NEARESTMV),
+ 12, 14,
+ -INTER_COMPOUND_OFFSET(NEAREST_NEWMV), -INTER_COMPOUND_OFFSET(NEW_NEARESTMV),
+ -INTER_COMPOUND_OFFSET(NEAR_NEWMV), -INTER_COMPOUND_OFFSET(NEW_NEARMV)
+};
+#endif // CONFIG_EXT_INTER
+
const vpx_tree_index vp10_partition_tree[TREE_SIZE(PARTITION_TYPES)] = {
-PARTITION_NONE, 2,
-PARTITION_HORZ, 4,
@@ -256,16 +281,33 @@
239, 183, 119, 96, 41
};
-static const vpx_prob default_comp_ref_p[REF_CONTEXTS] = {
- 50, 126, 123, 221, 226
+static const vpx_prob default_comp_ref_p[REF_CONTEXTS][COMP_REFS - 1] = {
+#if CONFIG_EXT_REFS
+ // TODO(zoeliu): To adjust the initial prob values.
+ { 33, 16, 16, 16 },
+ { 77, 74, 74, 74 },
+ { 142, 142, 142, 142 },
+ { 172, 170, 170, 170 },
+ { 238, 247, 247, 247 }
+#else
+ { 50 }, { 126 }, { 123 }, { 221 }, { 226 }
+#endif // CONFIG_EXT_REFS
};
-static const vpx_prob default_single_ref_p[REF_CONTEXTS][2] = {
+static const vpx_prob default_single_ref_p[REF_CONTEXTS][SINGLE_REFS - 1] = {
+#if CONFIG_EXT_REFS
+ { 33, 16, 16, 16, 16 },
+ { 77, 74, 74, 74, 74 },
+ { 142, 142, 142, 142, 142 },
+ { 172, 170, 170, 170, 170 },
+ { 238, 247, 247, 247, 247 }
+#else
{ 33, 16 },
{ 77, 74 },
{ 142, 142 },
{ 172, 170 },
{ 238, 247 }
+#endif // CONFIG_EXT_REFS
};
static const struct tx_probs default_tx_probs = {
@@ -279,6 +321,442 @@
{ 66 } }
};
+const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)] = {
+ -TWO_COLORS, 2,
+ -THREE_COLORS, 4,
+ -FOUR_COLORS, 6,
+ -FIVE_COLORS, 8,
+ -SIX_COLORS, 10,
+ -SEVEN_COLORS, -EIGHT_COLORS,
+};
+
+// TODO(huisu): tune these probs
+const vpx_prob
+vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = {
+ { 96, 89, 100, 64, 77, 130},
+ { 22, 15, 44, 16, 34, 82},
+ { 30, 19, 57, 18, 38, 86},
+ { 94, 36, 104, 23, 43, 92},
+ { 116, 76, 107, 46, 65, 105},
+ { 112, 82, 94, 40, 70, 112},
+ { 147, 124, 123, 58, 69, 103},
+ { 180, 113, 136, 49, 45, 114},
+ { 107, 70, 87, 49, 154, 156},
+ { 98, 105, 142, 63, 64, 152},
+};
+
+const vpx_prob
+vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = {
+ { 160, 196, 228, 213, 175, 230},
+ { 87, 148, 208, 141, 166, 163},
+ { 72, 151, 204, 139, 155, 161},
+ { 78, 135, 171, 104, 120, 173},
+ { 59, 92, 131, 78, 92, 142},
+ { 75, 118, 149, 84, 90, 128},
+ { 89, 87, 92, 66, 66, 128},
+ { 67, 53, 54, 55, 66, 93},
+ { 120, 130, 83, 171, 75, 214},
+ { 72, 55, 66, 68, 79, 107},
+};
+
+const vpx_prob
+vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS]
+ = {
+ { 240, 180, 100, },
+ { 240, 180, 100, },
+ { 240, 180, 100, },
+ { 240, 180, 100, },
+ { 240, 180, 100, },
+ { 240, 180, 100, },
+ { 240, 180, 100, },
+ { 240, 180, 100, },
+ { 240, 180, 100, },
+ { 240, 180, 100, },
+};
+
+
+const vpx_prob default_uv_palette_mode_prob[2] = {
+ 253, 229
+};
+
+const vpx_tree_index
+vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)] = {
+ { // 2 colors
+ -PALETTE_COLOR_ONE, -PALETTE_COLOR_TWO,
+ },
+ { // 3 colors
+ -PALETTE_COLOR_ONE, 2,
+ -PALETTE_COLOR_TWO, -PALETTE_COLOR_THREE,
+ },
+ { // 4 colors
+ -PALETTE_COLOR_ONE, 2,
+ -PALETTE_COLOR_TWO, 4,
+ -PALETTE_COLOR_THREE, -PALETTE_COLOR_FOUR,
+ },
+ { // 5 colors
+ -PALETTE_COLOR_ONE, 2,
+ -PALETTE_COLOR_TWO, 4,
+ -PALETTE_COLOR_THREE, 6,
+ -PALETTE_COLOR_FOUR, -PALETTE_COLOR_FIVE,
+ },
+ { // 6 colors
+ -PALETTE_COLOR_ONE, 2,
+ -PALETTE_COLOR_TWO, 4,
+ -PALETTE_COLOR_THREE, 6,
+ -PALETTE_COLOR_FOUR, 8,
+ -PALETTE_COLOR_FIVE, -PALETTE_COLOR_SIX,
+ },
+ { // 7 colors
+ -PALETTE_COLOR_ONE, 2,
+ -PALETTE_COLOR_TWO, 4,
+ -PALETTE_COLOR_THREE, 6,
+ -PALETTE_COLOR_FOUR, 8,
+ -PALETTE_COLOR_FIVE, 10,
+ -PALETTE_COLOR_SIX, -PALETTE_COLOR_SEVEN,
+ },
+ { // 8 colors
+ -PALETTE_COLOR_ONE, 2,
+ -PALETTE_COLOR_TWO, 4,
+ -PALETTE_COLOR_THREE, 6,
+ -PALETTE_COLOR_FOUR, 8,
+ -PALETTE_COLOR_FIVE, 10,
+ -PALETTE_COLOR_SIX, 12,
+ -PALETTE_COLOR_SEVEN, -PALETTE_COLOR_EIGHT,
+ },
+};
+
+const vpx_prob vp10_default_palette_y_color_prob
+[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] = {
+ { // 2 colors
+ { 230, 255, 128, 128, 128, 128, 128 },
+ { 214, 255, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 240, 255, 128, 128, 128, 128, 128 },
+ { 73, 255, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 130, 255, 128, 128, 128, 128, 128 },
+ { 227, 255, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 188, 255, 128, 128, 128, 128, 128 },
+ { 75, 255, 128, 128, 128, 128, 128 },
+ { 250, 255, 128, 128, 128, 128, 128 },
+ { 223, 255, 128, 128, 128, 128, 128 },
+ { 252, 255, 128, 128, 128, 128, 128 },
+ }, { // 3 colors
+ { 229, 137, 255, 128, 128, 128, 128 },
+ { 197, 120, 255, 128, 128, 128, 128 },
+ { 107, 195, 255, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 27, 151, 255, 128, 128, 128, 128 },
+ { 230, 130, 255, 128, 128, 128, 128 },
+ { 37, 230, 255, 128, 128, 128, 128 },
+ { 67, 221, 255, 128, 128, 128, 128 },
+ { 124, 230, 255, 128, 128, 128, 128 },
+ { 195, 109, 255, 128, 128, 128, 128 },
+ { 99, 122, 255, 128, 128, 128, 128 },
+ { 205, 208, 255, 128, 128, 128, 128 },
+ { 40, 235, 255, 128, 128, 128, 128 },
+ { 251, 132, 255, 128, 128, 128, 128 },
+ { 237, 186, 255, 128, 128, 128, 128 },
+ { 253, 112, 255, 128, 128, 128, 128 },
+ }, { // 4 colors
+ { 195, 87, 128, 255, 128, 128, 128 },
+ { 143, 100, 123, 255, 128, 128, 128 },
+ { 94, 124, 119, 255, 128, 128, 128 },
+ { 77, 91, 130, 255, 128, 128, 128 },
+ { 39, 114, 178, 255, 128, 128, 128 },
+ { 222, 94, 125, 255, 128, 128, 128 },
+ { 44, 203, 132, 255, 128, 128, 128 },
+ { 68, 175, 122, 255, 128, 128, 128 },
+ { 110, 187, 124, 255, 128, 128, 128 },
+ { 152, 91, 128, 255, 128, 128, 128 },
+ { 70, 109, 181, 255, 128, 128, 128 },
+ { 133, 113, 164, 255, 128, 128, 128 },
+ { 47, 205, 133, 255, 128, 128, 128 },
+ { 247, 94, 136, 255, 128, 128, 128 },
+ { 205, 122, 146, 255, 128, 128, 128 },
+ { 251, 100, 141, 255, 128, 128, 128 },
+ }, { // 5 colors
+ { 195, 65, 84, 125, 255, 128, 128 },
+ { 150, 76, 84, 121, 255, 128, 128 },
+ { 94, 110, 81, 117, 255, 128, 128 },
+ { 79, 85, 91, 139, 255, 128, 128 },
+ { 26, 102, 139, 127, 255, 128, 128 },
+ { 220, 73, 91, 119, 255, 128, 128 },
+ { 38, 203, 86, 127, 255, 128, 128 },
+ { 61, 186, 72, 124, 255, 128, 128 },
+ { 132, 199, 84, 128, 255, 128, 128 },
+ { 172, 52, 62, 120, 255, 128, 128 },
+ { 102, 89, 121, 122, 255, 128, 128 },
+ { 182, 48, 69, 186, 255, 128, 128 },
+ { 36, 206, 87, 126, 255, 128, 128 },
+ { 249, 55, 67, 122, 255, 128, 128 },
+ { 218, 88, 75, 122, 255, 128, 128 },
+ { 253, 64, 80, 119, 255, 128, 128 },
+ }, { // 6 colors
+ { 182, 54, 64, 75, 118, 255, 128 },
+ { 126, 67, 70, 76, 116, 255, 128 },
+ { 79, 92, 67, 85, 120, 255, 128 },
+ { 63, 61, 81, 118, 132, 255, 128 },
+ { 21, 80, 105, 83, 119, 255, 128 },
+ { 215, 72, 74, 74, 111, 255, 128 },
+ { 50, 176, 63, 79, 120, 255, 128 },
+ { 72, 148, 66, 77, 120, 255, 128 },
+ { 105, 177, 57, 78, 130, 255, 128 },
+ { 150, 66, 66, 80, 127, 255, 128 },
+ { 81, 76, 109, 85, 116, 255, 128 },
+ { 113, 81, 62, 96, 148, 255, 128 },
+ { 54, 179, 69, 82, 121, 255, 128 },
+ { 244, 47, 48, 67, 118, 255, 128 },
+ { 198, 83, 53, 65, 121, 255, 128 },
+ { 250, 42, 51, 69, 110, 255, 128 },
+ }, { // 7 colors
+ { 182, 45, 54, 62, 74, 113, 255 },
+ { 124, 63, 57, 62, 77, 114, 255 },
+ { 77, 80, 56, 66, 76, 117, 255 },
+ { 63, 57, 69, 98, 85, 131, 255 },
+ { 19, 81, 98, 63, 80, 116, 255 },
+ { 215, 56, 60, 63, 68, 105, 255 },
+ { 50, 174, 50, 60, 79, 118, 255 },
+ { 68, 151, 50, 58, 73, 117, 255 },
+ { 104, 182, 53, 57, 79, 127, 255 },
+ { 156, 50, 51, 63, 77, 111, 255 },
+ { 88, 67, 97, 59, 82, 120, 255 },
+ { 114, 81, 46, 65, 103, 132, 255 },
+ { 55, 166, 57, 66, 82, 120, 255 },
+ { 245, 34, 38, 43, 63, 114, 255 },
+ { 203, 68, 45, 47, 60, 118, 255 },
+ { 250, 35, 37, 47, 66, 110, 255 },
+ }, { // 8 colors
+ { 180, 43, 46, 50, 56, 69, 109 },
+ { 116, 53, 51, 49, 57, 73, 115 },
+ { 79, 70, 49, 50, 59, 74, 117 },
+ { 60, 54, 57, 70, 62, 83, 129 },
+ { 20, 73, 85, 52, 66, 81, 119 },
+ { 213, 56, 52, 49, 53, 62, 104 },
+ { 48, 161, 41, 45, 56, 77, 116 },
+ { 68, 139, 40, 47, 54, 71, 116 },
+ { 123, 166, 42, 43, 52, 76, 130 },
+ { 153, 44, 44, 47, 54, 79, 129 },
+ { 87, 64, 83, 49, 60, 75, 127 },
+ { 131, 68, 43, 48, 73, 96, 130 },
+ { 55, 152, 45, 51, 64, 77, 113 },
+ { 243, 30, 28, 33, 41, 65, 114 },
+ { 202, 56, 35, 36, 42, 63, 123 },
+ { 249, 31, 29, 32, 45, 68, 111 },
+ }
+};
+
+const vpx_prob vp10_default_palette_uv_color_prob
+[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] = {
+ { // 2 colors
+ { 228, 255, 128, 128, 128, 128, 128 },
+ { 195, 255, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 228, 255, 128, 128, 128, 128, 128 },
+ { 71, 255, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 129, 255, 128, 128, 128, 128, 128 },
+ { 206, 255, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 136, 255, 128, 128, 128, 128, 128 },
+ { 98, 255, 128, 128, 128, 128, 128 },
+ { 236, 255, 128, 128, 128, 128, 128 },
+ { 222, 255, 128, 128, 128, 128, 128 },
+ { 249, 255, 128, 128, 128, 128, 128 },
+ }, { // 3 colors
+ { 198, 136, 255, 128, 128, 128, 128 },
+ { 178, 105, 255, 128, 128, 128, 128 },
+ { 100, 206, 255, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 136, 255, 128, 128, 128, 128 },
+ { 219, 134, 255, 128, 128, 128, 128 },
+ { 50, 198, 255, 128, 128, 128, 128 },
+ { 61, 231, 255, 128, 128, 128, 128 },
+ { 110, 209, 255, 128, 128, 128, 128 },
+ { 173, 106, 255, 128, 128, 128, 128 },
+ { 145, 166, 255, 128, 128, 128, 128 },
+ { 156, 175, 255, 128, 128, 128, 128 },
+ { 69, 183, 255, 128, 128, 128, 128 },
+ { 241, 163, 255, 128, 128, 128, 128 },
+ { 224, 160, 255, 128, 128, 128, 128 },
+ { 246, 154, 255, 128, 128, 128, 128 },
+ }, { // 4 colors
+ { 173, 88, 143, 255, 128, 128, 128 },
+ { 146, 81, 127, 255, 128, 128, 128 },
+ { 84, 134, 102, 255, 128, 128, 128 },
+ { 69, 138, 140, 255, 128, 128, 128 },
+ { 31, 103, 200, 255, 128, 128, 128 },
+ { 217, 101, 139, 255, 128, 128, 128 },
+ { 51, 174, 121, 255, 128, 128, 128 },
+ { 64, 177, 109, 255, 128, 128, 128 },
+ { 96, 179, 145, 255, 128, 128, 128 },
+ { 164, 77, 114, 255, 128, 128, 128 },
+ { 87, 94, 156, 255, 128, 128, 128 },
+ { 105, 57, 173, 255, 128, 128, 128 },
+ { 63, 158, 137, 255, 128, 128, 128 },
+ { 236, 102, 156, 255, 128, 128, 128 },
+ { 197, 115, 153, 255, 128, 128, 128 },
+ { 245, 106, 154, 255, 128, 128, 128 },
+ }, { // 5 colors
+ { 179, 64, 97, 129, 255, 128, 128 },
+ { 137, 56, 88, 125, 255, 128, 128 },
+ { 82, 107, 61, 118, 255, 128, 128 },
+ { 59, 113, 86, 115, 255, 128, 128 },
+ { 23, 88, 118, 130, 255, 128, 128 },
+ { 213, 66, 90, 125, 255, 128, 128 },
+ { 37, 181, 103, 121, 255, 128, 128 },
+ { 47, 188, 61, 131, 255, 128, 128 },
+ { 104, 185, 103, 144, 255, 128, 128 },
+ { 163, 39, 76, 112, 255, 128, 128 },
+ { 94, 74, 131, 126, 255, 128, 128 },
+ { 142, 42, 103, 163, 255, 128, 128 },
+ { 53, 162, 99, 149, 255, 128, 128 },
+ { 239, 54, 84, 108, 255, 128, 128 },
+ { 203, 84, 110, 147, 255, 128, 128 },
+ { 248, 70, 105, 151, 255, 128, 128 },
+ }, { // 6 colors
+ { 189, 50, 67, 90, 130, 255, 128 },
+ { 114, 50, 55, 90, 123, 255, 128 },
+ { 66, 76, 54, 82, 128, 255, 128 },
+ { 43, 69, 69, 80, 129, 255, 128 },
+ { 22, 59, 87, 88, 141, 255, 128 },
+ { 203, 49, 68, 87, 122, 255, 128 },
+ { 43, 157, 74, 104, 146, 255, 128 },
+ { 54, 138, 51, 95, 138, 255, 128 },
+ { 82, 171, 58, 102, 146, 255, 128 },
+ { 129, 38, 59, 64, 168, 255, 128 },
+ { 56, 67, 119, 92, 112, 255, 128 },
+ { 96, 62, 53, 132, 82, 255, 128 },
+ { 60, 147, 77, 108, 145, 255, 128 },
+ { 238, 76, 73, 93, 148, 255, 128 },
+ { 189, 86, 73, 103, 157, 255, 128 },
+ { 246, 62, 75, 83, 167, 255, 128 },
+ }, { // 7 colors
+ { 179, 42, 51, 73, 99, 134, 255 },
+ { 119, 52, 52, 61, 64, 114, 255 },
+ { 53, 77, 35, 65, 71, 131, 255 },
+ { 38, 70, 51, 68, 89, 144, 255 },
+ { 23, 65, 128, 73, 97, 131, 255 },
+ { 210, 47, 52, 63, 81, 143, 255 },
+ { 42, 159, 57, 68, 98, 143, 255 },
+ { 49, 153, 45, 82, 93, 143, 255 },
+ { 81, 169, 52, 72, 113, 151, 255 },
+ { 136, 46, 35, 56, 75, 96, 255 },
+ { 57, 84, 109, 47, 107, 131, 255 },
+ { 128, 78, 57, 36, 128, 85, 255 },
+ { 54, 149, 68, 77, 94, 153, 255 },
+ { 243, 58, 50, 71, 81, 167, 255 },
+ { 189, 92, 64, 70, 121, 173, 255 },
+ { 248, 35, 38, 51, 82, 201, 255 },
+ }, { // 8 colors
+ { 201, 40, 36, 42, 64, 92, 123 },
+ { 116, 43, 33, 43, 73, 102, 128 },
+ { 46, 77, 37, 69, 62, 78, 150 },
+ { 40, 65, 52, 50, 76, 89, 133 },
+ { 28, 48, 91, 17, 64, 77, 133 },
+ { 218, 43, 43, 37, 56, 72, 163 },
+ { 41, 155, 44, 83, 82, 129, 180 },
+ { 44, 141, 29, 55, 64, 89, 147 },
+ { 92, 166, 48, 45, 59, 126, 179 },
+ { 169, 35, 49, 41, 36, 99, 139 },
+ { 55, 77, 77, 56, 60, 75, 156 },
+ { 155, 81, 51, 64, 57, 182, 255 },
+ { 60, 134, 49, 49, 93, 128, 174 },
+ { 244, 98, 51, 46, 22, 73, 238 },
+ { 189, 70, 40, 87, 93, 79, 201 },
+ { 248, 54, 49, 40, 29, 42, 227 },
+ }
+};
+
+static const int palette_color_context_lookup[PALETTE_COLOR_CONTEXTS] = {
+ // (3, 0, 0, 0), (3, 2, 0, 0), (3, 3, 2, 0), (3, 3, 2, 2),
+ 3993, 4235, 4378, 4380,
+ // (4, 3, 3, 0), (5, 0, 0, 0), (5, 3, 0, 0), (5, 3, 2, 0),
+ 5720, 6655, 7018, 7040,
+ // (5, 5, 0, 0), (6, 2, 0, 0), (6, 2, 2, 0), (6, 4, 0, 0),
+ 7260, 8228, 8250, 8470,
+ // (7, 3, 0, 0), (8, 0, 0, 0), (8, 2, 0, 0), (10, 0, 0, 0)
+ 9680, 10648, 10890, 13310
+};
+
+int vp10_get_palette_color_context(const uint8_t *color_map, int cols,
+ int r, int c, int n, int *color_order) {
+ int i, j, max, max_idx, temp;
+ int scores[PALETTE_MAX_SIZE + 10];
+ int weights[4] = {3, 2, 3, 2};
+ int color_ctx = 0;
+ int color_neighbors[4];
+
+ assert(n <= PALETTE_MAX_SIZE);
+
+ if (c - 1 >= 0)
+ color_neighbors[0] = color_map[r * cols + c - 1];
+ else
+ color_neighbors[0] = -1;
+ if (c - 1 >= 0 && r - 1 >= 0)
+ color_neighbors[1] = color_map[(r - 1) * cols + c - 1];
+ else
+ color_neighbors[1] = -1;
+ if (r - 1 >= 0)
+ color_neighbors[2] = color_map[(r - 1) * cols + c];
+ else
+ color_neighbors[2] = -1;
+ if (r - 1 >= 0 && c + 1 <= cols - 1)
+ color_neighbors[3] = color_map[(r - 1) * cols + c + 1];
+ else
+ color_neighbors[3] = -1;
+
+ for (i = 0; i < PALETTE_MAX_SIZE; ++i)
+ color_order[i] = i;
+ memset(scores, 0, PALETTE_MAX_SIZE * sizeof(scores[0]));
+ for (i = 0; i < 4; ++i) {
+ if (color_neighbors[i] >= 0)
+ scores[color_neighbors[i]] += weights[i];
+ }
+
+ for (i = 0; i < 4; ++i) {
+ max = scores[i];
+ max_idx = i;
+ j = i + 1;
+ while (j < n) {
+ if (scores[j] > max) {
+ max = scores[j];
+ max_idx = j;
+ }
+ ++j;
+ }
+
+ if (max_idx != i) {
+ temp = scores[i];
+ scores[i] = scores[max_idx];
+ scores[max_idx] = temp;
+
+ temp = color_order[i];
+ color_order[i] = color_order[max_idx];
+ color_order[max_idx] = temp;
+ }
+ }
+
+ for (i = 0; i < 4; ++i)
+ color_ctx = color_ctx * 11 + scores[i];
+
+ for (i = 0; i < PALETTE_COLOR_CONTEXTS; ++i)
+ if (color_ctx == palette_color_context_lookup[i]) {
+ color_ctx = i;
+ break;
+ }
+
+ if (color_ctx >= PALETTE_COLOR_CONTEXTS)
+ color_ctx = 0;
+
+ return color_ctx;
+}
+
void vp10_tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p,
unsigned int (*ct_32x32p)[2]) {
ct_32x32p[0][0] = tx_count_32x32p[TX_4X4];
@@ -306,10 +784,26 @@
ct_8x8p[0][1] = tx_count_8x8p[TX_8X8];
}
+#if CONFIG_VAR_TX
+static const vpx_prob default_txfm_partition_probs[TXFM_PARTITION_CONTEXTS] = {
+ 192, 128, 64, 192, 128, 64, 192, 128, 64,
+};
+#endif
+
static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = {
192, 128, 64
};
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
+ [SWITCHABLE_FILTERS - 1] = {
+ { 235, 192, 128},
+ { 36, 243, 208},
+ { 34, 16, 128},
+ { 36, 243, 48},
+ { 149, 160, 128},
+};
+#else
static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
[SWITCHABLE_FILTERS - 1] = {
{ 235, 162, },
@@ -317,15 +811,420 @@
{ 34, 3, },
{ 149, 144, },
};
-
-#if CONFIG_MISC_FIXES
-// FIXME(someone) need real defaults here
-static const struct segmentation_probs default_seg_probs = {
- { 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128 },
-};
#endif
+#if CONFIG_EXT_TX
+const vpx_tree_index vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER]
+ [TREE_SIZE(TX_TYPES)] = {
+ { // ToDo(yaowu): remove used entry 0.
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ -DST_DST, 6,
+ 8, 18,
+ 10, 12,
+ -DST_DCT, -DCT_DST,
+ 14, 16,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 20, 26,
+ 22, 24,
+ -DST_ADST, -ADST_DST,
+ -DST_FLIPADST, -FLIPADST_DST,
+ 28, 30,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST,
+ }, {
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ -DST_DST, 6,
+ 8, 18,
+ 10, 12,
+ -DST_DCT, -DCT_DST,
+ 14, 16,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 20, 26,
+ 22, 24,
+ -DST_ADST, -ADST_DST,
+ -DST_FLIPADST, -FLIPADST_DST,
+ 28, 30,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST,
+ }, {
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ 6, 12,
+ 8, 10,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 14, 16,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST
+ }, {
+ -IDTX, -DCT_DCT,
+ }
+};
+
+const vpx_tree_index vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA]
+ [TREE_SIZE(TX_TYPES)] = {
+ { // ToDo(yaowu): remove unused entry 0.
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ -DST_DST, 6,
+ 8, 18,
+ 10, 12,
+ -DST_DCT, -DCT_DST,
+ 14, 16,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 20, 26,
+ 22, 24,
+ -DST_ADST, -ADST_DST,
+ -DST_FLIPADST, -FLIPADST_DST,
+ 28, 30,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST,
+ }, {
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ -DST_DST, 6,
+ 8, 18,
+ 10, 12,
+ -DST_DCT, -DCT_DST,
+ 14, 16,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 20, 26,
+ 22, 24,
+ -DST_ADST, -ADST_DST,
+ -DST_FLIPADST, -FLIPADST_DST,
+ 28, 30,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST,
+ }, {
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ 6, 12,
+ 8, 10,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 14, 16,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST
+ }
+};
+
+static const vpx_prob
+default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = {
+ { // ToDo(yaowu): remove unused entry 0.
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+#if EXT_TX_SIZES == 4
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+#endif
+ }, {
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+#if EXT_TX_SIZES == 4
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+#endif
+ }, {
+ { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
+#if EXT_TX_SIZES == 4
+ { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
+#endif
+ }, {
+ { 12, },
+ { 12, },
+ { 12, },
+#if EXT_TX_SIZES == 4
+ { 12, },
+#endif
+ }
+};
+
+static const vpx_prob
+default_intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES]
+ [INTRA_MODES][TX_TYPES - 1] = {
+ { // ToDo(yaowu): remove unused entry 0.
+ {
+ { 8, 11, 24, 112, 87, 137, 127, 134,
+ 128, 86, 128, 124, 125, 133, 176, 123, },
+ { 10, 9, 39, 106, 73, 155, 163, 228,
+ 35, 62, 129, 127, 133, 114, 213, 234, },
+ { 10, 9, 14, 88, 91, 127, 151, 51,
+ 210, 89, 126, 58, 52, 116, 217, 24, },
+ { 9, 6, 29, 113, 98, 131, 149, 210,
+ 119, 60, 124, 93, 90, 143, 170, 197, },
+ { 8, 8, 38, 101, 111, 166, 167, 141,
+ 130, 105, 128, 75, 75, 118, 197, 117, },
+ { 7, 8, 39, 91, 101, 153, 166, 200,
+ 99, 77, 123, 90, 83, 144, 224, 192, },
+ { 7, 10, 26, 86, 119, 154, 130, 101,
+ 152, 91, 129, 75, 79, 137, 219, 77, },
+ { 10, 13, 20, 86, 102, 162, 112, 76,
+ 171, 86, 134, 122, 106, 124, 196, 44, },
+ { 8, 9, 33, 108, 100, 144, 148, 215,
+ 77, 60, 125, 125, 128, 126, 198, 220, },
+ { 3, 10, 29, 111, 69, 141, 204, 141,
+ 139, 93, 120, 75, 77, 163, 242, 124, },
+ }, {
+ { 2, 53, 18, 147, 96, 98, 136, 133,
+ 131, 120, 153, 163, 169, 137, 173, 124, },
+ { 4, 18, 34, 133, 54, 130, 179, 228,
+ 28, 72, 153, 164, 168, 118, 227, 239, },
+ { 4, 18, 13, 125, 72, 110, 176, 36,
+ 221, 104, 148, 75, 72, 117, 225, 19, },
+ { 8, 33, 24, 162, 113, 99, 147, 226,
+ 103, 85, 153, 143, 153, 124, 155, 210, },
+ { 2, 15, 35, 107, 127, 158, 192, 128,
+ 126, 116, 151, 95, 88, 182, 241, 119, },
+ { 3, 15, 36, 112, 100, 146, 194, 189,
+ 90, 98, 152, 99, 100, 165, 235, 175, },
+ { 3, 16, 29, 109, 103, 140, 182, 76,
+ 173, 104, 147, 82, 85, 159, 235, 70, },
+ { 9, 24, 14, 120, 86, 156, 161, 34,
+ 177, 121, 142, 128, 128, 126, 185, 37, },
+ { 5, 24, 29, 152, 98, 99, 174, 228,
+ 82, 76, 147, 149, 128, 132, 191, 225, },
+ { 2, 15, 29, 111, 77, 126, 200, 135,
+ 117, 93, 152, 96, 84, 191, 245, 135, },
+ }, {
+ { 2, 69, 13, 173, 111, 69, 137, 159,
+ 159, 146, 151, 193, 203, 131, 180, 123, },
+ { 1, 12, 33, 164, 32, 98, 204, 242,
+ 23, 99, 149, 215, 232, 110, 239, 245, },
+ { 1, 17, 9, 136, 82, 83, 171, 28,
+ 231, 128, 135, 76, 64, 118, 235, 17, },
+ { 4, 41, 17, 195, 131, 58, 161, 237,
+ 141, 97, 153, 189, 191, 117, 182, 202, },
+ { 2, 17, 36, 104, 149, 137, 217, 139,
+ 191, 119, 125, 107, 115, 223, 249, 110, },
+ { 2, 14, 24, 127, 91, 135, 219, 198,
+ 113, 91, 164, 125, 173, 211, 250, 116, },
+ { 3, 19, 24, 120, 102, 130, 209, 81,
+ 187, 95, 143, 102, 50, 190, 244, 56, },
+ { 4, 27, 10, 128, 91, 157, 181, 33,
+ 181, 150, 141, 141, 166, 114, 215, 25, },
+ { 2, 34, 27, 187, 102, 77, 210, 245,
+ 113, 107, 136, 184, 188, 121, 210, 234, },
+ { 1, 15, 22, 141, 59, 94, 208, 133,
+ 154, 95, 152, 112, 105, 191, 242, 111, },
+#if EXT_TX_SIZES == 4
+ }, {
+ { 2, 69, 13, 173, 111, 69, 137, 159,
+ 159, 146, 151, 193, 203, 131, 180, 123, },
+ { 1, 12, 33, 164, 32, 98, 204, 242,
+ 23, 99, 149, 215, 232, 110, 239, 245, },
+ { 1, 17, 9, 136, 82, 83, 171, 28,
+ 231, 128, 135, 76, 64, 118, 235, 17, },
+ { 4, 41, 17, 195, 131, 58, 161, 237,
+ 141, 97, 153, 189, 191, 117, 182, 202, },
+ { 2, 17, 36, 104, 149, 137, 217, 139,
+ 191, 119, 125, 107, 115, 223, 249, 110, },
+ { 2, 14, 24, 127, 91, 135, 219, 198,
+ 113, 91, 164, 125, 173, 211, 250, 116, },
+ { 3, 19, 24, 120, 102, 130, 209, 81,
+ 187, 95, 143, 102, 50, 190, 244, 56, },
+ { 4, 27, 10, 128, 91, 157, 181, 33,
+ 181, 150, 141, 141, 166, 114, 215, 25, },
+ { 2, 34, 27, 187, 102, 77, 210, 245,
+ 113, 107, 136, 184, 188, 121, 210, 234, },
+ { 1, 15, 22, 141, 59, 94, 208, 133,
+ 154, 95, 152, 112, 105, 191, 242, 111, },
+#endif
+ },
+ }, {
+ {
+ { 8, 11, 24, 112, 87, 137, 127, 134,
+ 128, 86, 128, 124, 125, 133, 176, 123, },
+ { 10, 9, 39, 106, 73, 155, 163, 228,
+ 35, 62, 129, 127, 133, 114, 213, 234, },
+ { 10, 9, 14, 88, 91, 127, 151, 51,
+ 210, 89, 126, 58, 52, 116, 217, 24, },
+ { 9, 6, 29, 113, 98, 131, 149, 210,
+ 119, 60, 124, 93, 90, 143, 170, 197, },
+ { 8, 8, 38, 101, 111, 166, 167, 141,
+ 130, 105, 128, 75, 75, 118, 197, 117, },
+ { 7, 8, 39, 91, 101, 153, 166, 200,
+ 99, 77, 123, 90, 83, 144, 224, 192, },
+ { 7, 10, 26, 86, 119, 154, 130, 101,
+ 152, 91, 129, 75, 79, 137, 219, 77, },
+ { 10, 13, 20, 86, 102, 162, 112, 76,
+ 171, 86, 134, 122, 106, 124, 196, 44, },
+ { 8, 9, 33, 108, 100, 144, 148, 215,
+ 77, 60, 125, 125, 128, 126, 198, 220, },
+ { 3, 10, 29, 111, 69, 141, 204, 141,
+ 139, 93, 120, 75, 77, 163, 242, 124, },
+ }, {
+ { 2, 53, 18, 147, 96, 98, 136, 133,
+ 131, 120, 153, 163, 169, 137, 173, 124, },
+ { 4, 18, 34, 133, 54, 130, 179, 228,
+ 28, 72, 153, 164, 168, 118, 227, 239, },
+ { 4, 18, 13, 125, 72, 110, 176, 36,
+ 221, 104, 148, 75, 72, 117, 225, 19, },
+ { 8, 33, 24, 162, 113, 99, 147, 226,
+ 103, 85, 153, 143, 153, 124, 155, 210, },
+ { 2, 15, 35, 107, 127, 158, 192, 128,
+ 126, 116, 151, 95, 88, 182, 241, 119, },
+ { 3, 15, 36, 112, 100, 146, 194, 189,
+ 90, 98, 152, 99, 100, 165, 235, 175, },
+ { 3, 16, 29, 109, 103, 140, 182, 76,
+ 173, 104, 147, 82, 85, 159, 235, 70, },
+ { 9, 24, 14, 120, 86, 156, 161, 34,
+ 177, 121, 142, 128, 128, 126, 185, 37, },
+ { 5, 24, 29, 152, 98, 99, 174, 228,
+ 82, 76, 147, 149, 128, 132, 191, 225, },
+ { 2, 15, 29, 111, 77, 126, 200, 135,
+ 117, 93, 152, 96, 84, 191, 245, 135, },
+ }, {
+ { 2, 69, 13, 173, 111, 69, 137, 159,
+ 159, 146, 151, 193, 203, 131, 180, 123, },
+ { 1, 12, 33, 164, 32, 98, 204, 242,
+ 23, 99, 149, 215, 232, 110, 239, 245, },
+ { 1, 17, 9, 136, 82, 83, 171, 28,
+ 231, 128, 135, 76, 64, 118, 235, 17, },
+ { 4, 41, 17, 195, 131, 58, 161, 237,
+ 141, 97, 153, 189, 191, 117, 182, 202, },
+ { 2, 17, 36, 104, 149, 137, 217, 139,
+ 191, 119, 125, 107, 115, 223, 249, 110, },
+ { 2, 14, 24, 127, 91, 135, 219, 198,
+ 113, 91, 164, 125, 173, 211, 250, 116, },
+ { 3, 19, 24, 120, 102, 130, 209, 81,
+ 187, 95, 143, 102, 50, 190, 244, 56, },
+ { 4, 27, 10, 128, 91, 157, 181, 33,
+ 181, 150, 141, 141, 166, 114, 215, 25, },
+ { 2, 34, 27, 187, 102, 77, 210, 245,
+ 113, 107, 136, 184, 188, 121, 210, 234, },
+ { 1, 15, 22, 141, 59, 94, 208, 133,
+ 154, 95, 152, 112, 105, 191, 242, 111, },
+#if EXT_TX_SIZES == 4
+ }, {
+ { 2, 69, 13, 173, 111, 69, 137, 159,
+ 159, 146, 151, 193, 203, 131, 180, 123, },
+ { 1, 12, 33, 164, 32, 98, 204, 242,
+ 23, 99, 149, 215, 232, 110, 239, 245, },
+ { 1, 17, 9, 136, 82, 83, 171, 28,
+ 231, 128, 135, 76, 64, 118, 235, 17, },
+ { 4, 41, 17, 195, 131, 58, 161, 237,
+ 141, 97, 153, 189, 191, 117, 182, 202, },
+ { 2, 17, 36, 104, 149, 137, 217, 139,
+ 191, 119, 125, 107, 115, 223, 249, 110, },
+ { 2, 14, 24, 127, 91, 135, 219, 198,
+ 113, 91, 164, 125, 173, 211, 250, 116, },
+ { 3, 19, 24, 120, 102, 130, 209, 81,
+ 187, 95, 143, 102, 50, 190, 244, 56, },
+ { 4, 27, 10, 128, 91, 157, 181, 33,
+ 181, 150, 141, 141, 166, 114, 215, 25, },
+ { 2, 34, 27, 187, 102, 77, 210, 245,
+ 113, 107, 136, 184, 188, 121, 210, 234, },
+ { 1, 15, 22, 141, 59, 94, 208, 133,
+ 154, 95, 152, 112, 105, 191, 242, 111, },
+#endif
+ },
+ }, {
+ {
+ { 8, 176, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 10, 28, 176, 192, 208, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 10, 28, 176, 192, 48, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 9, 160, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 8, 28, 96, 128, 128, 128, 160, 192,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 7, 28, 160, 176, 192, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 7, 20, 160, 176, 64, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 10, 23, 160, 176, 64, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 8, 29, 160, 176, 192, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 3, 20, 96, 128, 128, 128, 160, 192,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ }, {
+ { 2, 176, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 4, 28, 176, 192, 208, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 4, 28, 176, 192, 48, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 8, 160, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 28, 96, 128, 128, 128, 160, 192,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 3, 28, 160, 176, 192, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 3, 26, 160, 176, 64, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 9, 24, 160, 176, 64, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 5, 24, 160, 176, 192, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 25, 96, 128, 128, 128, 160, 192,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ }, {
+ { 2, 176, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 28, 176, 192, 208, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 28, 176, 192, 48, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 4, 160, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 28, 96, 128, 128, 128, 160, 192,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 28, 160, 176, 192, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 3, 29, 160, 176, 64, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 4, 27, 160, 176, 64, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 34, 160, 176, 192, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 25, 96, 128, 128, 128, 160, 192,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+#if EXT_TX_SIZES == 4
+ }, {
+ { 2, 176, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 12, 160, 176, 192, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 17, 160, 176, 64, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 4, 41, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 17, 96, 128, 128, 128, 160, 192,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 14, 160, 176, 192, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 3, 19, 160, 176, 64, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 4, 27, 160, 176, 64, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 34, 160, 176, 192, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 15, 96, 128, 128, 128, 160, 192,
+ 128, 128, 128, 128, 128, 128, 128, 128, },
+#endif
+ },
+ },
+};
+#else
const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)] = {
-DCT_DCT, 2,
-ADST_ADST, 4,
@@ -345,6 +1244,39 @@
{176, 85, 128},
{192, 85, 128},
};
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_EXT_INTRA
+static const vpx_prob
+default_intra_filter_probs[INTRA_FILTERS + 1][INTRA_FILTERS - 1] = {
+ { 98, 63, 60, },
+ { 98, 82, 80, },
+ { 94, 65, 103, },
+ { 49, 25, 24, },
+ { 72, 38, 50, },
+};
+static const vpx_prob default_ext_intra_probs[2] = {230, 230};
+
+const vpx_tree_index vp10_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)] = {
+ -INTRA_FILTER_LINEAR, 2,
+ -INTRA_FILTER_8TAP, 4,
+ -INTRA_FILTER_8TAP_SHARP, -INTRA_FILTER_8TAP_SMOOTH,
+};
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_SUPERTX
+static const vpx_prob default_supertx_prob[PARTITION_SUPERTX_CONTEXTS]
+ [TX_SIZES] = {
+ { 1, 160, 160, 170 },
+ { 1, 200, 200, 210 },
+};
+#endif // CONFIG_SUPERTX
+
+// FIXME(someone) need real defaults here
+static const struct segmentation_probs default_seg_probs = {
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128 },
+};
static void init_mode_probs(FRAME_CONTEXT *fc) {
vp10_copy(fc->uv_mode_prob, default_uv_probs);
@@ -356,21 +1288,51 @@
vp10_copy(fc->comp_ref_prob, default_comp_ref_p);
vp10_copy(fc->single_ref_prob, default_single_ref_p);
fc->tx_probs = default_tx_probs;
+#if CONFIG_VAR_TX
+ vp10_copy(fc->txfm_partition_prob, default_txfm_partition_probs);
+#endif
vp10_copy(fc->skip_probs, default_skip_probs);
+#if CONFIG_REF_MV
+ vp10_copy(fc->newmv_prob, default_newmv_prob);
+ vp10_copy(fc->zeromv_prob, default_zeromv_prob);
+ vp10_copy(fc->refmv_prob, default_refmv_prob);
+ vp10_copy(fc->drl_prob0, default_drl_prob);
+ vp10_copy(fc->drl_prob1, default_drl_prob);
+#if CONFIG_EXT_INTER
+ fc->new2mv_prob = default_new2mv_prob;
+#endif // CONFIG_EXT_INTER
+#endif // CONFIG_REF_MV
vp10_copy(fc->inter_mode_probs, default_inter_mode_probs);
-#if CONFIG_MISC_FIXES
+#if CONFIG_EXT_INTER
+ vp10_copy(fc->inter_compound_mode_probs, default_inter_compound_mode_probs);
+#endif // CONFIG_EXT_INTER
+#if CONFIG_SUPERTX
+ vp10_copy(fc->supertx_prob, default_supertx_prob);
+#endif // CONFIG_SUPERTX
vp10_copy(fc->seg.tree_probs, default_seg_probs.tree_probs);
vp10_copy(fc->seg.pred_probs, default_seg_probs.pred_probs);
-#endif
- vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob);
+#if CONFIG_EXT_INTRA
+ vp10_copy(fc->ext_intra_probs, default_ext_intra_probs);
+ vp10_copy(fc->intra_filter_probs, default_intra_filter_probs);
+#endif // CONFIG_EXT_INTRA
vp10_copy(fc->inter_ext_tx_prob, default_inter_ext_tx_prob);
+ vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob);
}
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
const vpx_tree_index vp10_switchable_interp_tree
- [TREE_SIZE(SWITCHABLE_FILTERS)] = {
+[TREE_SIZE(SWITCHABLE_FILTERS)] = {
+ -EIGHTTAP, 2,
+ 4, -EIGHTTAP_SHARP,
+ -EIGHTTAP_SMOOTH, -EIGHTTAP_SMOOTH2,
+};
+#else
+const vpx_tree_index vp10_switchable_interp_tree
+[TREE_SIZE(SWITCHABLE_FILTERS)] = {
-EIGHTTAP, 2,
-EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP
};
+#endif // CONFIG_EXT_INTERP
void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) {
int i, j;
@@ -385,31 +1347,64 @@
fc->comp_inter_prob[i] = mode_mv_merge_probs(pre_fc->comp_inter_prob[i],
counts->comp_inter[i]);
for (i = 0; i < REF_CONTEXTS; i++)
- fc->comp_ref_prob[i] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i],
- counts->comp_ref[i]);
+ for (j = 0; j < (COMP_REFS - 1); j++)
+ fc->comp_ref_prob[i][j] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i][j],
+ counts->comp_ref[i][j]);
for (i = 0; i < REF_CONTEXTS; i++)
- for (j = 0; j < 2; j++)
+ for (j = 0; j < (SINGLE_REFS - 1); j++)
fc->single_ref_prob[i][j] = mode_mv_merge_probs(
pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]);
+#if CONFIG_REF_MV
+ for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
+ fc->newmv_prob[i] = mode_mv_merge_probs(pre_fc->newmv_prob[i],
+ counts->newmv_mode[i]);
+ for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i)
+ fc->zeromv_prob[i] = mode_mv_merge_probs(pre_fc->zeromv_prob[i],
+ counts->zeromv_mode[i]);
+ for (i = 0; i < REFMV_MODE_CONTEXTS; ++i)
+ fc->refmv_prob[i] = mode_mv_merge_probs(pre_fc->refmv_prob[i],
+ counts->refmv_mode[i]);
+
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
+ fc->drl_prob0[i] = mode_mv_merge_probs(pre_fc->drl_prob0[i],
+ counts->drl_mode0[i]);
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
+ fc->drl_prob1[i] = mode_mv_merge_probs(pre_fc->drl_prob1[i],
+ counts->drl_mode1[i]);
+
+#if CONFIG_EXT_INTER
+ fc->new2mv_prob = mode_mv_merge_probs(pre_fc->new2mv_prob,
+ counts->new2mv_mode);
+#endif // CONFIG_EXT_INTER
+#else
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
vpx_tree_merge_probs(vp10_inter_mode_tree, pre_fc->inter_mode_probs[i],
counts->inter_mode[i], fc->inter_mode_probs[i]);
+#endif
+
+#if CONFIG_SUPERTX
+ for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
+ int j;
+ for (j = 1; j < TX_SIZES; ++j) {
+ fc->supertx_prob[i][j] = mode_mv_merge_probs(pre_fc->supertx_prob[i][j],
+ counts->supertx[i][j]);
+ }
+ }
+#endif // CONFIG_SUPERTX
+
+#if CONFIG_EXT_INTER
+ for (i = 0; i < INTER_MODE_CONTEXTS; i++)
+ vpx_tree_merge_probs(vp10_inter_compound_mode_tree,
+ pre_fc->inter_compound_mode_probs[i],
+ counts->inter_compound_mode[i],
+ fc->inter_compound_mode_probs[i]);
+#endif // CONFIG_EXT_INTER
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->y_mode_prob[i],
counts->y_mode[i], fc->y_mode_prob[i]);
-#if !CONFIG_MISC_FIXES
- for (i = 0; i < INTRA_MODES; ++i)
- vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->uv_mode_prob[i],
- counts->uv_mode[i], fc->uv_mode_prob[i]);
-
- for (i = 0; i < PARTITION_CONTEXTS; i++)
- vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[i],
- counts->partition[i], fc->partition_prob[i]);
-#endif
-
if (cm->interp_filter == SWITCHABLE) {
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
vpx_tree_merge_probs(vp10_switchable_interp_tree,
@@ -449,10 +1444,41 @@
}
}
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT)
+ for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
+ fc->txfm_partition_prob[i] =
+ mode_mv_merge_probs(pre_fc->txfm_partition_prob[i],
+ counts->txfm_partition[i]);
+#endif
+
for (i = 0; i < SKIP_CONTEXTS; ++i)
fc->skip_probs[i] = mode_mv_merge_probs(
pre_fc->skip_probs[i], counts->skip[i]);
+#if CONFIG_EXT_TX
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ if (use_inter_ext_tx_for_txsize[s][i]) {
+ vpx_tree_merge_probs(vp10_ext_tx_inter_tree[s],
+ pre_fc->inter_ext_tx_prob[s][i],
+ counts->inter_ext_tx[s][i],
+ fc->inter_ext_tx_prob[s][i]);
+ }
+ }
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ if (use_intra_ext_tx_for_txsize[s][i]) {
+ int j;
+ for (j = 0; j < INTRA_MODES; ++j)
+ vpx_tree_merge_probs(vp10_ext_tx_intra_tree[s],
+ pre_fc->intra_ext_tx_prob[s][i][j],
+ counts->intra_ext_tx[s][i][j],
+ fc->intra_ext_tx_prob[s][i][j]);
+ }
+ }
+ }
+#else
for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
int j;
for (j = 0; j < TX_TYPES; ++j)
@@ -467,8 +1493,8 @@
counts->inter_ext_tx[i],
fc->inter_ext_tx_prob[i]);
}
+#endif // CONFIG_EXT_TX
-#if CONFIG_MISC_FIXES
if (cm->seg.temporal_update) {
for (i = 0; i < PREDICTION_PROBS; i++)
fc->seg.pred_probs[i] = mode_mv_merge_probs(pre_fc->seg.pred_probs[i],
@@ -488,7 +1514,17 @@
for (i = 0; i < PARTITION_CONTEXTS; i++)
vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[i],
counts->partition[i], fc->partition_prob[i]);
-#endif
+
+#if CONFIG_EXT_INTRA
+ for (i = 0; i < PLANE_TYPES; ++i) {
+ fc->ext_intra_probs[i] = mode_mv_merge_probs(
+ pre_fc->ext_intra_probs[i], counts->ext_intra[i]);
+ }
+
+ for (i = 0; i < INTRA_FILTERS + 1; ++i)
+ vpx_tree_merge_probs(vp10_intra_filter_tree, pre_fc->intra_filter_probs[i],
+ counts->intra_filter[i], fc->intra_filter_probs[i]);
+#endif // CONFIG_EXT_INTRA
}
static void set_default_lf_deltas(struct loopfilter *lf) {
@@ -497,6 +1533,11 @@
lf->ref_deltas[INTRA_FRAME] = 1;
lf->ref_deltas[LAST_FRAME] = 0;
+#if CONFIG_EXT_REFS
+ lf->ref_deltas[LAST2_FRAME] = lf->ref_deltas[LAST_FRAME];
+ lf->ref_deltas[LAST3_FRAME] = lf->ref_deltas[LAST_FRAME];
+ lf->ref_deltas[LAST4_FRAME] = lf->ref_deltas[LAST_FRAME];
+#endif // CONFIG_EXT_REFS
lf->ref_deltas[GOLDEN_FRAME] = -1;
lf->ref_deltas[ALTREF_FRAME] = -1;
@@ -526,6 +1567,10 @@
// To force update of the sharpness
lf->last_sharpness_level = -1;
+#if CONFIG_LOOP_RESTORATION
+ lf->restoration_level = 0;
+ lf->last_restoration_level = 0;
+#endif // CONFIG_LOOP_RESTORATION
vp10_default_coef_probs(cm);
init_mode_probs(cm->fc);
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index 611d3ad..4b4bdf1 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h
@@ -26,6 +26,14 @@
#define TX_SIZE_CONTEXTS 2
#define INTER_OFFSET(mode) ((mode) - NEARESTMV)
+#if CONFIG_EXT_INTER
+#define INTER_COMPOUND_OFFSET(mode) ((mode) - NEAREST_NEARESTMV)
+#endif // CONFIG_EXT_INTER
+
+#define PALETTE_COLOR_CONTEXTS 16
+#define PALETTE_MAX_SIZE 8
+#define PALETTE_BLOCK_SIZES (BLOCK_64X64 - BLOCK_8X8 + 1)
+#define PALETTE_Y_MODE_CONTEXTS 3
struct VP10Common;
@@ -55,20 +63,51 @@
vp10_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES];
vpx_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
[SWITCHABLE_FILTERS - 1];
+
+#if CONFIG_REF_MV
+ vpx_prob newmv_prob[NEWMV_MODE_CONTEXTS];
+ vpx_prob zeromv_prob[ZEROMV_MODE_CONTEXTS];
+ vpx_prob refmv_prob[REFMV_MODE_CONTEXTS];
+ vpx_prob drl_prob0[DRL_MODE_CONTEXTS];
+ vpx_prob drl_prob1[DRL_MODE_CONTEXTS];
+
+#if CONFIG_EXT_INTER
+ vpx_prob new2mv_prob;
+#endif // CONFIG_EXT_INTER
+#endif
+
vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
+#if CONFIG_EXT_INTER
+ vpx_prob inter_compound_mode_probs[INTER_MODE_CONTEXTS]
+ [INTER_COMPOUND_MODES - 1];
+#endif // CONFIG_EXT_INTER
vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS];
- vpx_prob single_ref_prob[REF_CONTEXTS][2];
- vpx_prob comp_ref_prob[REF_CONTEXTS];
+ vpx_prob single_ref_prob[REF_CONTEXTS][SINGLE_REFS-1];
+ vpx_prob comp_ref_prob[REF_CONTEXTS][COMP_REFS-1];
struct tx_probs tx_probs;
+#if CONFIG_VAR_TX
+ vpx_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
+#endif
vpx_prob skip_probs[SKIP_CONTEXTS];
nmv_context nmvc;
-#if CONFIG_MISC_FIXES
- struct segmentation_probs seg;
-#endif
+ int initialized;
+#if CONFIG_EXT_TX
+ vpx_prob inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1];
+ vpx_prob intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
+ [TX_TYPES - 1];
+#else
vpx_prob intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1];
vpx_prob inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1];
- int initialized;
+#endif // CONFIG_EXT_TX
+#if CONFIG_SUPERTX
+ vpx_prob supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES];
+#endif // CONFIG_SUPERTX
+ struct segmentation_probs seg;
+#if CONFIG_EXT_INTRA
+ vpx_prob ext_intra_probs[PLANE_TYPES];
+ vpx_prob intra_filter_probs[INTRA_FILTERS + 1][INTRA_FILTERS - 1];
+#endif // CONFIG_EXT_INTRA
} FRAME_CONTEXT;
typedef struct FRAME_COUNTS {
@@ -81,35 +120,87 @@
[COEF_BANDS][COEFF_CONTEXTS];
unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS]
[SWITCHABLE_FILTERS];
+#if CONFIG_REF_MV
+ unsigned int newmv_mode[NEWMV_MODE_CONTEXTS][2];
+ unsigned int zeromv_mode[ZEROMV_MODE_CONTEXTS][2];
+ unsigned int refmv_mode[REFMV_MODE_CONTEXTS][2];
+ unsigned int drl_mode0[DRL_MODE_CONTEXTS][2];
+ unsigned int drl_mode1[DRL_MODE_CONTEXTS][2];
+#if CONFIG_EXT_INTER
+ unsigned int new2mv_mode[2];
+#endif // CONFIG_EXT_INTER
+#endif
+
unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES];
+#if CONFIG_EXT_INTER
+ unsigned int inter_compound_mode[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
+#endif // CONFIG_EXT_INTER
unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
- unsigned int single_ref[REF_CONTEXTS][2][2];
- unsigned int comp_ref[REF_CONTEXTS][2];
+ unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS-1][2];
+ unsigned int comp_ref[REF_CONTEXTS][COMP_REFS-1][2];
struct tx_counts tx;
+#if CONFIG_VAR_TX
+ unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
+#endif
unsigned int skip[SKIP_CONTEXTS][2];
nmv_context_counts mv;
-#if CONFIG_MISC_FIXES
- struct seg_counts seg;
-#endif
+#if CONFIG_EXT_TX
+ unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
+ unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
+ [TX_TYPES];
+#else
unsigned int intra_ext_tx[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
unsigned int inter_ext_tx[EXT_TX_SIZES][TX_TYPES];
+#endif // CONFIG_EXT_TX
+#if CONFIG_SUPERTX
+ unsigned int supertx[PARTITION_SUPERTX_CONTEXTS][TX_SIZES][2];
+ unsigned int supertx_size[TX_SIZES];
+#endif // CONFIG_SUPERTX
+ struct seg_counts seg;
+#if CONFIG_EXT_INTRA
+ unsigned int ext_intra[PLANE_TYPES][2];
+ unsigned int intra_filter[INTRA_FILTERS + 1][INTRA_FILTERS];
+#endif // CONFIG_EXT_INTRA
} FRAME_COUNTS;
extern const vpx_prob vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
[INTRA_MODES - 1];
-#if !CONFIG_MISC_FIXES
-extern const vpx_prob vp10_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
-extern const vpx_prob vp10_kf_partition_probs[PARTITION_CONTEXTS]
- [PARTITION_TYPES - 1];
-#endif
+extern const vpx_prob
+vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS];
+extern const vpx_prob
+vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1];
+extern const vpx_prob
+vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1];
+extern const vpx_prob vp10_default_palette_y_color_prob
+[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1];
+extern const vpx_prob vp10_default_palette_uv_color_prob
+[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1];
extern const vpx_tree_index vp10_intra_mode_tree[TREE_SIZE(INTRA_MODES)];
extern const vpx_tree_index vp10_inter_mode_tree[TREE_SIZE(INTER_MODES)];
+#if CONFIG_EXT_INTER
+extern const vpx_tree_index vp10_inter_compound_mode_tree
+ [TREE_SIZE(INTER_COMPOUND_MODES)];
+#endif // CONFIG_EXT_INTER
extern const vpx_tree_index vp10_partition_tree[TREE_SIZE(PARTITION_TYPES)];
extern const vpx_tree_index vp10_switchable_interp_tree
[TREE_SIZE(SWITCHABLE_FILTERS)];
-
+extern const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)];
+extern const vpx_tree_index
+vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)];
+#if CONFIG_EXT_INTRA
+extern const vpx_tree_index vp10_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)];
+#endif // CONFIG_EXT_INTRA
+#if CONFIG_EXT_TX
+extern const vpx_tree_index
+ vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER][TREE_SIZE(TX_TYPES)];
+extern const vpx_tree_index
+ vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA][TREE_SIZE(TX_TYPES)];
+#else
+extern const vpx_tree_index
+ vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)];
+#endif // CONFIG_EXT_TX
void vp10_setup_past_independence(struct VP10Common *cm);
@@ -123,9 +214,6 @@
void vp10_tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
unsigned int (*ct_8x8p)[2]);
-extern const vpx_tree_index
- vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)];
-
static INLINE int vp10_ceil_log2(int n) {
int i = 1, p = 2;
while (p < n) {
@@ -135,6 +223,9 @@
return i;
}
+int vp10_get_palette_color_context(const uint8_t *color_map, int cols,
+ int r, int c, int n, int *color_order);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index 18c7d16..f41b8d9 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -94,15 +94,44 @@
ADST_DCT = 1, // ADST in vertical, DCT in horizontal
DCT_ADST = 2, // DCT in vertical, ADST in horizontal
ADST_ADST = 3, // ADST in both directions
- TX_TYPES = 4
+#if CONFIG_EXT_TX
+ FLIPADST_DCT = 4,
+ DCT_FLIPADST = 5,
+ FLIPADST_FLIPADST = 6,
+ ADST_FLIPADST = 7,
+ FLIPADST_ADST = 8,
+ DST_DCT = 9,
+ DCT_DST = 10,
+ DST_ADST = 11,
+ ADST_DST = 12,
+ DST_FLIPADST = 13,
+ FLIPADST_DST = 14,
+ DST_DST = 15,
+ IDTX = 16,
+#endif // CONFIG_EXT_TX
+ TX_TYPES,
} TX_TYPE;
#define EXT_TX_SIZES 3 // number of sizes that use extended transforms
+#if CONFIG_EXT_TX
+#define USE_DST2 1
+#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER
+#define EXT_TX_SETS_INTRA 3 // Sets of transform selections for INTRA
+#endif // CONFIG_EXT_TX
+
typedef enum {
VP9_LAST_FLAG = 1 << 0,
+#if CONFIG_EXT_REFS
+ VP9_LAST2_FLAG = 1 << 1,
+ VP9_LAST3_FLAG = 1 << 2,
+ VP9_LAST4_FLAG = 1 << 3,
+ VP9_GOLD_FLAG = 1 << 4,
+ VP9_ALT_FLAG = 1 << 5,
+#else
VP9_GOLD_FLAG = 1 << 1,
VP9_ALT_FLAG = 1 << 2,
+#endif // CONFIG_EXT_REFS
} VP9_REFFRAME;
typedef enum {
@@ -111,6 +140,29 @@
PLANE_TYPES
} PLANE_TYPE;
+typedef enum {
+ TWO_COLORS,
+ THREE_COLORS,
+ FOUR_COLORS,
+ FIVE_COLORS,
+ SIX_COLORS,
+ SEVEN_COLORS,
+ EIGHT_COLORS,
+ PALETTE_SIZES
+} PALETTE_SIZE;
+
+typedef enum {
+ PALETTE_COLOR_ONE,
+ PALETTE_COLOR_TWO,
+ PALETTE_COLOR_THREE,
+ PALETTE_COLOR_FOUR,
+ PALETTE_COLOR_FIVE,
+ PALETTE_COLOR_SIX,
+ PALETTE_COLOR_SEVEN,
+ PALETTE_COLOR_EIGHT,
+ PALETTE_COLORS
+} PALETTE_COLOR;
+
#define DC_PRED 0 // Average of above and left pixels
#define V_PRED 1 // Vertical
#define H_PRED 2 // Horizontal
@@ -125,23 +177,106 @@
#define NEARMV 11
#define ZEROMV 12
#define NEWMV 13
+#if CONFIG_EXT_INTER
+#define NEWFROMNEARMV 14
+#define NEAREST_NEARESTMV 15
+#define NEAREST_NEARMV 16
+#define NEAR_NEARESTMV 17
+#define NEAREST_NEWMV 18
+#define NEW_NEARESTMV 19
+#define NEAR_NEWMV 20
+#define NEW_NEARMV 21
+#define ZERO_ZEROMV 22
+#define NEW_NEWMV 23
+#define MB_MODE_COUNT 24
+#else
#define MB_MODE_COUNT 14
+#endif // CONFIG_EXT_INTER
typedef uint8_t PREDICTION_MODE;
#define INTRA_MODES (TM_PRED + 1)
+#if CONFIG_EXT_INTRA
+typedef enum {
+ FILTER_DC_PRED,
+ FILTER_V_PRED,
+ FILTER_H_PRED,
+ FILTER_D45_PRED,
+ FILTER_D135_PRED,
+ FILTER_D117_PRED,
+ FILTER_D153_PRED,
+ FILTER_D207_PRED,
+ FILTER_D63_PRED,
+ FILTER_TM_PRED,
+ EXT_INTRA_MODES,
+} EXT_INTRA_MODE;
+
+#define FILTER_INTRA_MODES (FILTER_TM_PRED + 1)
+#define DIRECTIONAL_MODES (INTRA_MODES - 2)
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTER
+#define INTER_MODES (1 + NEWFROMNEARMV - NEARESTMV)
+#else
#define INTER_MODES (1 + NEWMV - NEARESTMV)
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_EXT_INTER
+#define INTER_COMPOUND_MODES (1 + NEW_NEWMV - NEAREST_NEARESTMV)
+#endif // CONFIG_EXT_INTER
#define SKIP_CONTEXTS 3
+
+#if CONFIG_REF_MV
+#define NEWMV_MODE_CONTEXTS 7
+#define ZEROMV_MODE_CONTEXTS 2
+#define REFMV_MODE_CONTEXTS 9
+#define DRL_MODE_CONTEXTS 3
+
+#define ZEROMV_OFFSET 3
+#define REFMV_OFFSET 4
+
+#define NEWMV_CTX_MASK ((1 << ZEROMV_OFFSET) - 1)
+#define ZEROMV_CTX_MASK ((1 << (REFMV_OFFSET - ZEROMV_OFFSET)) - 1)
+#define REFMV_CTX_MASK ((1 << (8 - REFMV_OFFSET)) - 1)
+
+#define ALL_ZERO_FLAG_OFFSET 8
+#define SKIP_NEARESTMV_OFFSET 9
+#define SKIP_NEARMV_OFFSET 10
+#define SKIP_NEARESTMV_SUB8X8_OFFSET 11
+#endif
+
#define INTER_MODE_CONTEXTS 7
/* Segment Feature Masks */
#define MAX_MV_REF_CANDIDATES 2
+#if CONFIG_REF_MV
+#define MAX_REF_MV_STACK_SIZE 16
+#define REF_CAT_LEVEL 160
+#endif
#define INTRA_INTER_CONTEXTS 4
#define COMP_INTER_CONTEXTS 5
#define REF_CONTEXTS 5
+#if CONFIG_VAR_TX
+#define TXFM_PARTITION_CONTEXTS 9
+typedef TX_SIZE TXFM_CONTEXT;
+#endif
+
+#if CONFIG_EXT_REFS
+#define SINGLE_REFS 6
+#define COMP_REFS 5
+#else
+#define SINGLE_REFS 3
+#define COMP_REFS 2
+#endif // CONFIG_EXT_REFS
+
+#if CONFIG_SUPERTX
+#define PARTITION_SUPERTX_CONTEXTS 2
+#define MAX_SUPERTX_BLOCK_SIZE BLOCK_32X32
+#endif // CONFIG_SUPERTX
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/filter.c b/vp10/common/filter.c
index dda279f..a3aa3cf 100644
--- a/vp10/common/filter.c
+++ b/vp10/common/filter.c
@@ -12,6 +12,8 @@
#include "vp10/common/filter.h"
+#define USE_12_SHARP_FILTER 0
+
DECLARE_ALIGNED(256, static const InterpKernel,
bilinear_filters[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
@@ -32,9 +34,28 @@
{ 0, 0, 0, 8, 120, 0, 0, 0 }
};
-// Lagrangian interpolation filter
DECLARE_ALIGNED(256, static const InterpKernel,
sub_pel_filters_8[SUBPEL_SHIFTS]) = {
+#if CONFIG_EXT_INTERP
+ // intfilt 0.575
+ {0, 0, 0, 128, 0, 0, 0, 0},
+ {0, 1, -5, 126, 8, -3, 1, 0},
+ {-1, 3, -10, 123, 18, -6, 2, -1},
+ {-1, 4, -14, 118, 27, -9, 3, 0},
+ {-1, 5, -16, 112, 37, -12, 4, -1},
+ {-1, 5, -18, 105, 48, -14, 4, -1},
+ {-1, 6, -19, 97, 58, -17, 5, -1},
+ {-1, 6, -20, 88, 68, -18, 6, -1},
+ {-1, 6, -19, 78, 78, -19, 6, -1},
+ {-1, 6, -18, 68, 88, -20, 6, -1},
+ {-1, 5, -17, 58, 97, -19, 6, -1},
+ {-1, 4, -14, 48, 105, -18, 5, -1},
+ {-1, 4, -12, 37, 112, -16, 5, -1},
+ {0, 3, -9, 27, 118, -14, 4, -1},
+ {-1, 2, -6, 18, 123, -10, 3, -1},
+ {0, 1, -3, 8, 126, -5, 1, 0},
+#else
+ // Lagrangian interpolation filter
{ 0, 0, 0, 128, 0, 0, 0, 0},
{ 0, 1, -5, 126, 8, -3, 1, 0},
{ -1, 3, -10, 122, 18, -6, 2, 0},
@@ -51,11 +72,54 @@
{ -1, 3, -9, 27, 118, -13, 4, -1},
{ 0, 2, -6, 18, 122, -10, 3, -1},
{ 0, 1, -3, 8, 126, -5, 1, 0}
+#endif // CONFIG_EXT_INTERP
};
-// DCT based filter
+#if USE_12_SHARP_FILTER
+DECLARE_ALIGNED(16, static const int16_t,
+ sub_pel_filters_12sharp[16][12]) = {
+ // intfilt 0.8
+ {0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0},
+ {0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0},
+ {0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0},
+ {-1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1},
+ {-1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1},
+ {-1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1},
+ {-1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1},
+ {-1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1},
+ {-1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1},
+ {-1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1},
+ {-1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1},
+ {-1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1},
+ {-1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1},
+ {-1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1},
+ {0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0},
+ {0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0},
+};
+#endif // USE_12_SHARP_FILTER
+
DECLARE_ALIGNED(256, static const InterpKernel,
- sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
+ sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
+#if CONFIG_EXT_INTERP
+ // intfilt 0.8
+ {0, 0, 0, 128, 0, 0, 0, 0},
+ {-1, 2, -6, 127, 9, -4, 2, -1},
+ {-2, 5, -12, 124, 18, -7, 4, -2},
+ {-2, 7, -16, 119, 28, -11, 5, -2},
+ {-3, 8, -19, 114, 38, -14, 7, -3},
+ {-3, 9, -22, 107, 49, -17, 8, -3},
+ {-4, 10, -23, 99, 60, -20, 10, -4},
+ {-4, 11, -23, 90, 70, -22, 10, -4},
+ {-4, 11, -23, 80, 80, -23, 11, -4},
+ {-4, 10, -22, 70, 90, -23, 11, -4},
+ {-4, 10, -20, 60, 99, -23, 10, -4},
+ {-3, 8, -17, 49, 107, -22, 9, -3},
+ {-3, 7, -14, 38, 114, -19, 8, -3},
+ {-2, 5, -11, 28, 119, -16, 7, -2},
+ {-2, 4, -7, 18, 124, -12, 5, -2},
+ {-1, 2, -4, 9, 127, -6, 2, -1},
+#else
+ // DCT based filter
{0, 0, 0, 128, 0, 0, 0, 0},
{-1, 3, -7, 127, 8, -3, 1, 0},
{-2, 5, -13, 125, 17, -6, 3, -1},
@@ -72,11 +136,58 @@
{-2, 5, -10, 27, 121, -17, 7, -3},
{-1, 3, -6, 17, 125, -13, 5, -2},
{0, 1, -3, 8, 127, -7, 3, -1}
+#endif // CONFIG_EXT_INTERP
};
-// freqmultiplier = 0.5
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+
DECLARE_ALIGNED(256, static const InterpKernel,
- sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
+ sub_pel_filters_8smooth2[SUBPEL_SHIFTS]) = {
+// freqmultiplier = 0.35
+ {0, 0, 0, 128, 0, 0, 0, 0},
+ {-1, 8, 31, 47, 34, 10, 0, -1},
+ {-1, 7, 29, 46, 36, 12, 0, -1},
+ {-1, 6, 28, 46, 37, 13, 0, -1},
+ {-1, 5, 26, 46, 38, 14, 1, -1},
+ {-1, 4, 25, 45, 39, 16, 1, -1},
+ {-1, 4, 23, 44, 41, 17, 1, -1},
+ {-1, 3, 21, 44, 42, 18, 2, -1},
+ {-1, 2, 20, 43, 43, 20, 2, -1},
+ {-1, 2, 18, 42, 44, 21, 3, -1},
+ {-1, 1, 17, 41, 44, 23, 4, -1},
+ {-1, 1, 16, 39, 45, 25, 4, -1},
+ {-1, 1, 14, 38, 46, 26, 5, -1},
+ {-1, 0, 13, 37, 46, 28, 6, -1},
+ {-1, 0, 12, 36, 46, 29, 7, -1},
+ {-1, 0, 10, 34, 47, 31, 8, -1},
+};
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
+// freqmultiplier = 0.75
+ {0, 0, 0, 128, 0, 0, 0, 0},
+ {2, -10, 19, 95, 31, -11, 2, 0},
+ {2, -9, 14, 94, 37, -12, 2, 0},
+ {2, -8, 9, 92, 43, -12, 1, 1},
+ {2, -7, 5, 90, 49, -12, 1, 0},
+ {2, -5, 1, 86, 55, -12, 0, 1},
+ {1, -4, -2, 82, 61, -11, 0, 1},
+ {1, -3, -5, 77, 67, -9, -1, 1},
+ {1, -2, -7, 72, 72, -7, -2, 1},
+ {1, -1, -9, 67, 77, -5, -3, 1},
+ {1, 0, -11, 61, 82, -2, -4, 1},
+ {1, 0, -12, 55, 86, 1, -5, 2},
+ {0, 1, -12, 49, 90, 5, -7, 2},
+ {1, 1, -12, 43, 92, 9, -8, 2},
+ {0, 2, -12, 37, 94, 14, -9, 2},
+ {0, 2, -11, 31, 95, 19, -10, 2},
+};
+
+#else
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
+// freqmultiplier = 0.5
{ 0, 0, 0, 128, 0, 0, 0, 0},
{-3, -1, 32, 64, 38, 1, -3, 0},
{-2, -2, 29, 63, 41, 2, -3, 0},
@@ -95,10 +206,43 @@
{ 0, -3, 1, 38, 64, 32, -1, -3}
};
+#endif // CONFIG_EXT_INTERP
-const InterpKernel *vp10_filter_kernels[4] = {
+const InterpKernel *vp10_filter_kernels[SWITCHABLE_FILTERS + 1] = {
sub_pel_filters_8,
- sub_pel_filters_8lp,
- sub_pel_filters_8s,
+ sub_pel_filters_8smooth,
+ sub_pel_filters_8sharp,
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+ sub_pel_filters_8smooth2,
+#endif
bilinear_filters
};
+
+#if CONFIG_EXT_INTRA
+const InterpKernel *vp10_intra_filter_kernels[INTRA_FILTERS] = {
+ NULL, // INTRA_FILTER_LINEAR
+ sub_pel_filters_8, // INTRA_FILTER_8TAP
+ sub_pel_filters_8sharp, // INTRA_FILTER_8TAP_SHARP
+ sub_pel_filters_8smooth, // INTRA_FILTER_8TAP_SMOOTH
+};
+#endif // CONFIG_EXT_INTRA
+
+static const InterpFilterParams
+vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
+ {(const int16_t*)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS},
+ {(const int16_t*)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS},
+#if USE_12_SHARP_FILTER
+ {(const int16_t*)sub_pel_filters_12sharp, 12, SUBPEL_SHIFTS},
+#else // USE_12_SHARP_FILTER
+ {(const int16_t*)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS},
+#endif // USE_12_SHARP_FILTER
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+ {(const int16_t*)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS},
+#endif
+ {(const int16_t*)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS}
+};
+
+InterpFilterParams vp10_get_interp_filter_params(
+ const INTERP_FILTER interp_filter) {
+ return vp10_interp_filter_params_list[interp_filter];
+}
diff --git a/vp10/common/filter.h b/vp10/common/filter.h
index 826cd03..afebee0 100644
--- a/vp10/common/filter.h
+++ b/vp10/common/filter.h
@@ -24,17 +24,49 @@
#define EIGHTTAP 0
#define EIGHTTAP_SMOOTH 1
#define EIGHTTAP_SHARP 2
+
+#if CONFIG_EXT_INTERP
+#define SUPPORT_NONINTERPOLATING_FILTERS 0 /* turn it on for experimentation */
+#define EIGHTTAP_SMOOTH2 3
+#define SWITCHABLE_FILTERS 4 /* Number of switchable filters */
+#else
#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */
-#define BILINEAR 3
+#endif // CONFIG_EXT_INTERP
// The codec can operate in four possible inter prediction filter mode:
// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
+
+#define BILINEAR (SWITCHABLE_FILTERS)
+#define SWITCHABLE (SWITCHABLE_FILTERS + 1) /* the last one */
#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
-#define SWITCHABLE 4 /* should be the last one */
typedef uint8_t INTERP_FILTER;
-extern const InterpKernel *vp10_filter_kernels[4];
+extern const InterpKernel *vp10_filter_kernels[SWITCHABLE_FILTERS + 1];
+#if CONFIG_EXT_INTRA
+typedef enum {
+ INTRA_FILTER_LINEAR,
+ INTRA_FILTER_8TAP,
+ INTRA_FILTER_8TAP_SHARP,
+ INTRA_FILTER_8TAP_SMOOTH,
+ INTRA_FILTERS,
+} INTRA_FILTER;
+
+extern const InterpKernel *vp10_intra_filter_kernels[INTRA_FILTERS];
+#endif // CONFIG_EXT_INTRA
+
+typedef struct InterpFilterParams {
+ const int16_t* filter_ptr;
+ uint16_t tap;
+ uint16_t subpel_shifts;
+} InterpFilterParams;
+
+InterpFilterParams vp10_get_interp_filter_params(
+ const INTERP_FILTER interp_filter);
+static INLINE const int16_t* vp10_get_interp_filter_kernel(
+ const InterpFilterParams filter_params, const int subpel) {
+ return filter_params.filter_ptr + filter_params.tap * subpel;
+}
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index 5ee15c8..d42f5f5 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -13,107 +13,1138 @@
#include "./vp10_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vp10/common/blockd.h"
+#include "vp10/common/enums.h"
#include "vp10/common/idct.h"
#include "vpx_dsp/inv_txfm.h"
#include "vpx_ports/mem.h"
+#if CONFIG_EXT_TX
+void idst4_c(const tran_low_t *input, tran_low_t *output) {
+#if USE_DST2
+ tran_low_t step[4];
+ tran_high_t temp1, temp2;
+ // stage 1
+ temp1 = (input[3] + input[1]) * cospi_16_64;
+ temp2 = (input[3] - input[1]) * cospi_16_64;
+ step[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ temp1 = input[2] * cospi_24_64 - input[0] * cospi_8_64;
+ temp2 = input[2] * cospi_8_64 + input[0] * cospi_24_64;
+ step[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
+
+ // stage 2
+ output[0] = WRAPLOW(step[0] + step[3], 8);
+ output[1] = WRAPLOW(-step[1] - step[2], 8);
+ output[2] = WRAPLOW(step[1] - step[2], 8);
+ output[3] = WRAPLOW(step[3] - step[0], 8);
+#else
+ // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 141124871, 228344838,
+ };
+ int64_t sum;
+ int64_t s03 = (input[0] + input[3]);
+ int64_t d03 = (input[0] - input[3]);
+ int64_t s12 = (input[1] + input[2]);
+ int64_t d12 = (input[1] - input[2]);
+ sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+#endif // USE_DST2
+}
+
+void idst8_c(const tran_low_t *input, tran_low_t *output) {
+#if USE_DST2
+ // vp9_igentx8(input, output, Tx8);
+ tran_low_t step1[8], step2[8];
+ tran_high_t temp1, temp2;
+ // stage 1
+ step1[0] = input[7];
+ step1[2] = input[3];
+ step1[1] = input[5];
+ step1[3] = input[1];
+ temp1 = input[6] * cospi_28_64 - input[0] * cospi_4_64;
+ temp2 = input[6] * cospi_4_64 + input[0] * cospi_28_64;
+ step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ temp1 = input[2] * cospi_12_64 - input[4] * cospi_20_64;
+ temp2 = input[2] * cospi_20_64 + input[4] * cospi_12_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
+
+ // stage 2
+ temp1 = (step1[0] + step1[2]) * cospi_16_64;
+ temp2 = (step1[0] - step1[2]) * cospi_16_64;
+ step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[4] = WRAPLOW(step1[4] + step1[5], 8);
+ step2[5] = WRAPLOW(step1[4] - step1[5], 8);
+ step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
+ step2[7] = WRAPLOW(step1[6] + step1[7], 8);
+
+ // stage 3
+ step1[0] = WRAPLOW(step2[0] + step2[3], 8);
+ step1[1] = WRAPLOW(step2[1] + step2[2], 8);
+ step1[2] = WRAPLOW(step2[1] - step2[2], 8);
+ step1[3] = WRAPLOW(step2[0] - step2[3], 8);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[7] = step2[7];
+
+ // stage 4
+ output[0] = WRAPLOW(step1[0] + step1[7], 8);
+ output[1] = WRAPLOW(-step1[1] - step1[6], 8);
+ output[2] = WRAPLOW(step1[2] + step1[5], 8);
+ output[3] = WRAPLOW(-step1[3] - step1[4], 8);
+ output[4] = WRAPLOW(step1[3] - step1[4], 8);
+ output[5] = WRAPLOW(-step1[2] + step1[5], 8);
+ output[6] = WRAPLOW(step1[1] - step1[6], 8);
+ output[7] = WRAPLOW(-step1[0] + step1[7], 8);
+#else
+ // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
+ static const int32_t sinvalue_lookup[] = {
+ 86559612, 162678858, 219176632, 249238470
+ };
+ int64_t sum;
+ int64_t s07 = (input[0] + input[7]);
+ int64_t d07 = (input[0] - input[7]);
+ int64_t s16 = (input[1] + input[6]);
+ int64_t d16 = (input[1] - input[6]);
+ int64_t s25 = (input[2] + input[5]);
+ int64_t d25 = (input[2] - input[5]);
+ int64_t s34 = (input[3] + input[4]);
+ int64_t d34 = (input[3] - input[4]);
+ sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
+ d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = (s07 + s16 - s34)* sinvalue_lookup[2];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = (d07 - d16 + d34)* sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
+ s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+#endif // USE_DST2
+}
+
+void idst16_c(const tran_low_t *input, tran_low_t *output) {
+#if USE_DST2
+ tran_low_t step1[16], step2[16];
+ tran_high_t temp1, temp2;
+
+ // stage 1
+ step1[0] = input[15];
+ step1[1] = input[7];
+ step1[2] = input[11];
+ step1[3] = input[3];
+ step1[4] = input[13];
+ step1[5] = input[5];
+ step1[6] = input[9];
+ step1[7] = input[1];
+ step1[8] = input[14];
+ step1[9] = input[6];
+ step1[10] = input[10];
+ step1[11] = input[2];
+ step1[12] = input[12];
+ step1[13] = input[4];
+ step1[14] = input[8];
+ step1[15] = input[0];
+
+ // stage 2
+ step2[0] = step1[0];
+ step2[1] = step1[1];
+ step2[2] = step1[2];
+ step2[3] = step1[3];
+ step2[4] = step1[4];
+ step2[5] = step1[5];
+ step2[6] = step1[6];
+ step2[7] = step1[7];
+
+ temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+ temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+ step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8);
+
+ temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+ temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+ step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
+
+ temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+ temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
+
+ temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+ temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+ step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
+
+ // stage 3
+ step1[0] = step2[0];
+ step1[1] = step2[1];
+ step1[2] = step2[2];
+ step1[3] = step2[3];
+
+ temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+ temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+ step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+ temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
+
+ step1[8] = WRAPLOW(step2[8] + step2[9], 8);
+ step1[9] = WRAPLOW(step2[8] - step2[9], 8);
+ step1[10] = WRAPLOW(-step2[10] + step2[11], 8);
+ step1[11] = WRAPLOW(step2[10] + step2[11], 8);
+ step1[12] = WRAPLOW(step2[12] + step2[13], 8);
+ step1[13] = WRAPLOW(step2[12] - step2[13], 8);
+ step1[14] = WRAPLOW(-step2[14] + step2[15], 8);
+ step1[15] = WRAPLOW(step2[14] + step2[15], 8);
+
+ // stage 4
+ temp1 = (step1[0] + step1[1]) * cospi_16_64;
+ temp2 = (step1[0] - step1[1]) * cospi_16_64;
+ step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[4] = WRAPLOW(step1[4] + step1[5], 8);
+ step2[5] = WRAPLOW(step1[4] - step1[5], 8);
+ step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
+ step2[7] = WRAPLOW(step1[6] + step1[7], 8);
+
+ step2[8] = step1[8];
+ step2[15] = step1[15];
+ temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+ temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+ step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+ temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[11] = step1[11];
+ step2[12] = step1[12];
+
+ // stage 5
+ step1[0] = WRAPLOW(step2[0] + step2[3], 8);
+ step1[1] = WRAPLOW(step2[1] + step2[2], 8);
+ step1[2] = WRAPLOW(step2[1] - step2[2], 8);
+ step1[3] = WRAPLOW(step2[0] - step2[3], 8);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[7] = step2[7];
+
+ step1[8] = WRAPLOW(step2[8] + step2[11], 8);
+ step1[9] = WRAPLOW(step2[9] + step2[10], 8);
+ step1[10] = WRAPLOW(step2[9] - step2[10], 8);
+ step1[11] = WRAPLOW(step2[8] - step2[11], 8);
+ step1[12] = WRAPLOW(-step2[12] + step2[15], 8);
+ step1[13] = WRAPLOW(-step2[13] + step2[14], 8);
+ step1[14] = WRAPLOW(step2[13] + step2[14], 8);
+ step1[15] = WRAPLOW(step2[12] + step2[15], 8);
+
+ // stage 6
+ step2[0] = WRAPLOW(step1[0] + step1[7], 8);
+ step2[1] = WRAPLOW(step1[1] + step1[6], 8);
+ step2[2] = WRAPLOW(step1[2] + step1[5], 8);
+ step2[3] = WRAPLOW(step1[3] + step1[4], 8);
+ step2[4] = WRAPLOW(step1[3] - step1[4], 8);
+ step2[5] = WRAPLOW(step1[2] - step1[5], 8);
+ step2[6] = WRAPLOW(step1[1] - step1[6], 8);
+ step2[7] = WRAPLOW(step1[0] - step1[7], 8);
+ step2[8] = step1[8];
+ step2[9] = step1[9];
+ temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+ temp2 = (step1[10] + step1[13]) * cospi_16_64;
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+ temp2 = (step1[11] + step1[12]) * cospi_16_64;
+ step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[14] = step1[14];
+ step2[15] = step1[15];
+
+ // stage 7
+ output[0] = WRAPLOW(step2[0] + step2[15], 8);
+ output[1] = WRAPLOW(-step2[1] - step2[14], 8);
+ output[2] = WRAPLOW(step2[2] + step2[13], 8);
+ output[3] = WRAPLOW(-step2[3] - step2[12], 8);
+ output[4] = WRAPLOW(step2[4] + step2[11], 8);
+ output[5] = WRAPLOW(-step2[5] - step2[10], 8);
+ output[6] = WRAPLOW(step2[6] + step2[9], 8);
+ output[7] = WRAPLOW(-step2[7] - step2[8], 8);
+ output[8] = WRAPLOW(step2[7] - step2[8], 8);
+ output[9] = WRAPLOW(-step2[6] + step2[9], 8);
+ output[10] = WRAPLOW(step2[5] - step2[10], 8);
+ output[11] = WRAPLOW(-step2[4] + step2[11], 8);
+ output[12] = WRAPLOW(step2[3] - step2[12], 8);
+ output[13] = WRAPLOW(-step2[2] + step2[13], 8);
+ output[14] = WRAPLOW(step2[1] - step2[14], 8);
+ output[15] = WRAPLOW(-step2[0] + step2[15], 8);
+#else
+ // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 47852167, 94074787, 137093803, 175444254,
+ 207820161, 233119001, 250479254, 259309736
+ };
+ int64_t sum;
+ int64_t s015 = (input[0] + input[15]);
+ int64_t d015 = (input[0] - input[15]);
+ int64_t s114 = (input[1] + input[14]);
+ int64_t d114 = (input[1] - input[14]);
+ int64_t s213 = (input[2] + input[13]);
+ int64_t d213 = (input[2] - input[13]);
+ int64_t s312 = (input[3] + input[12]);
+ int64_t d312 = (input[3] - input[12]);
+ int64_t s411 = (input[4] + input[11]);
+ int64_t d411 = (input[4] - input[11]);
+ int64_t s510 = (input[5] + input[10]);
+ int64_t d510 = (input[5] - input[10]);
+ int64_t s69 = (input[6] + input[9]);
+ int64_t d69 = (input[6] - input[9]);
+ int64_t s78 = (input[7] + input[8]);
+ int64_t d78 = (input[7] - input[8]);
+ sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
+ s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
+ s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
+ s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
+ d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
+ d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
+ d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
+ s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
+ s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
+ s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
+ d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
+ d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
+ d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
+ s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
+ s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
+ s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
+ d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
+ d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
+ d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
+ s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
+ s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
+ s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
+ d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
+ d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
+ d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
+ s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
+ s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
+ s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
+ output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
+ d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
+ d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
+ d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
+ output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
+ s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
+ s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
+ s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
+ output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
+ d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
+ d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
+ d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
+ output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
+ s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
+ s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
+ s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
+ output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
+ d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
+ d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
+ d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
+ output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
+ s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
+ s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
+ s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
+ output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
+ d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
+ d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
+ d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
+ output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+#endif // USE_DST2
+}
+
+// Inverse identiy transform and add.
+static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int bs) {
+ int r, c;
+ const int shift = bs < 32 ? 3 : 2;
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c)
+ dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
+ dest += stride;
+ input += bs;
+ }
+}
+
+#define FLIPUD_PTR(dest, stride, size) do { \
+ (dest) = (dest) + ((size) - 1) * (stride); \
+ (stride) = - (stride); \
+} while (0)
+
+static void maybe_flip_strides(uint8_t **dst, int *dstride,
+ tran_low_t **src, int *sstride,
+ int tx_type, int size) {
+ // Note that the transpose of src will be added to dst. In order to LR
+ // flip the addends (in dst coordinates), we UD flip the src. To UD flip
+ // the addends, we UD flip the dst.
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ break;
+ case FLIPADST_DCT:
+ case FLIPADST_ADST:
+ case FLIPADST_DST:
+ // flip UD
+ FLIPUD_PTR(*dst, *dstride, size);
+ break;
+ case DCT_FLIPADST:
+ case ADST_FLIPADST:
+ case DST_FLIPADST:
+ // flip LR
+ FLIPUD_PTR(*src, *sstride, size);
+ break;
+ case FLIPADST_FLIPADST:
+ // flip UD
+ FLIPUD_PTR(*dst, *dstride, size);
+ // flip LR
+ FLIPUD_PTR(*src, *sstride, size);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
+#if USE_DST2
+ tran_low_t step[4];
+ tran_high_t temp1, temp2;
+ (void) bd;
+ // stage 1
+ temp1 = (input[3] + input[1]) * cospi_16_64;
+ temp2 = (input[3] - input[1]) * cospi_16_64;
+ step[0] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step[1] = WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = input[2] * cospi_24_64 - input[0] * cospi_8_64;
+ temp2 = input[2] * cospi_8_64 + input[0] * cospi_24_64;
+ step[2] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step[3] = WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ // stage 2
+ output[0] = WRAPLOW(step[0] + step[3], bd);
+ output[1] = WRAPLOW(-step[1] - step[2], bd);
+ output[2] = WRAPLOW(step[1] - step[2], bd);
+ output[3] = WRAPLOW(step[3] - step[0], bd);
+#else
+ // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 141124871, 228344838,
+ };
+ int64_t sum;
+ int64_t s03 = (input[0] + input[3]);
+ int64_t d03 = (input[0] - input[3]);
+ int64_t s12 = (input[1] + input[2]);
+ int64_t d12 = (input[1] - input[2]);
+
+#if !CONFIG_EMULATE_HARDWARE
+ (void)bd;
+#endif
+
+ sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+#endif // USE_DST2
+}
+
+void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
+#if USE_DST2
+ tran_low_t step1[8], step2[8];
+ tran_high_t temp1, temp2;
+ (void) bd;
+ // stage 1
+ step1[0] = input[7];
+ step1[2] = input[3];
+ step1[1] = input[5];
+ step1[3] = input[1];
+ temp1 = input[6] * cospi_28_64 - input[0] * cospi_4_64;
+ temp2 = input[6] * cospi_4_64 + input[0] * cospi_28_64;
+ step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = input[2] * cospi_12_64 - input[4] * cospi_20_64;
+ temp2 = input[2] * cospi_20_64 + input[4] * cospi_12_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ // stage 2
+ temp1 = (step1[0] + step1[2]) * cospi_16_64;
+ temp2 = (step1[0] - step1[2]) * cospi_16_64;
+ step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd);
+ step2[4] = WRAPLOW(step1[4] + step1[5], bd);
+ step2[5] = WRAPLOW(step1[4] - step1[5], bd);
+ step2[6] = WRAPLOW(-step1[6] + step1[7], bd);
+ step2[7] = WRAPLOW(step1[6] + step1[7], bd);
+
+ // stage 3
+ step1[0] = WRAPLOW(step2[0] + step2[3], bd);
+ step1[1] = WRAPLOW(step2[1] + step2[2], bd);
+ step1[2] = WRAPLOW(step2[1] - step2[2], bd);
+ step1[3] = WRAPLOW(step2[0] - step2[3], bd);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd);
+ step1[7] = step2[7];
+
+ // stage 4
+ output[0] = WRAPLOW(step1[0] + step1[7], bd);
+ output[1] = WRAPLOW(-step1[1] - step1[6], bd);
+ output[2] = WRAPLOW(step1[2] + step1[5], bd);
+ output[3] = WRAPLOW(-step1[3] - step1[4], bd);
+ output[4] = WRAPLOW(step1[3] - step1[4], bd);
+ output[5] = WRAPLOW(-step1[2] + step1[5], bd);
+ output[6] = WRAPLOW(step1[1] - step1[6], bd);
+ output[7] = WRAPLOW(-step1[0] + step1[7], bd);
+#else
+ // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
+ static const int32_t sinvalue_lookup[] = {
+ 86559612, 162678858, 219176632, 249238470
+ };
+ int64_t sum;
+ int64_t s07 = (input[0] + input[7]);
+ int64_t d07 = (input[0] - input[7]);
+ int64_t s16 = (input[1] + input[6]);
+ int64_t d16 = (input[1] - input[6]);
+ int64_t s25 = (input[2] + input[5]);
+ int64_t d25 = (input[2] - input[5]);
+ int64_t s34 = (input[3] + input[4]);
+ int64_t d34 = (input[3] - input[4]);
+
+#if !CONFIG_EMULATE_HARDWARE
+ (void)bd;
+#endif
+
+ sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
+ d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = (s07 + s16 - s34)* sinvalue_lookup[2];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = (d07 - d16 + d34)* sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
+ s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+#endif // USE_DST2
+}
+
+void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
+#if USE_DST2
+ // vp9_highbd_igentx16(input, output, bd, Tx16);
+ tran_low_t step1[16], step2[16];
+ tran_high_t temp1, temp2;
+ (void) bd;
+
+ // stage 1
+ step1[0] = input[15];
+ step1[1] = input[7];
+ step1[2] = input[11];
+ step1[3] = input[3];
+ step1[4] = input[13];
+ step1[5] = input[5];
+ step1[6] = input[9];
+ step1[7] = input[1];
+ step1[8] = input[14];
+ step1[9] = input[6];
+ step1[10] = input[10];
+ step1[11] = input[2];
+ step1[12] = input[12];
+ step1[13] = input[4];
+ step1[14] = input[8];
+ step1[15] = input[0];
+
+ // stage 2
+ step2[0] = step1[0];
+ step2[1] = step1[1];
+ step2[2] = step1[2];
+ step2[3] = step1[3];
+ step2[4] = step1[4];
+ step2[5] = step1[5];
+ step2[6] = step1[6];
+ step2[7] = step1[7];
+
+ temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+ temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+ step2[8] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[15] = WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+ temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+ step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+ temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+ temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+ step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ // stage 3
+ step1[0] = step2[0];
+ step1[1] = step2[1];
+ step1[2] = step2[2];
+ step1[3] = step2[3];
+
+ temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+ temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+ step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+ temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ step1[8] = WRAPLOW(step2[8] + step2[9], bd);
+ step1[9] = WRAPLOW(step2[8] - step2[9], bd);
+ step1[10] = WRAPLOW(-step2[10] + step2[11], bd);
+ step1[11] = WRAPLOW(step2[10] + step2[11], bd);
+ step1[12] = WRAPLOW(step2[12] + step2[13], bd);
+ step1[13] = WRAPLOW(step2[12] - step2[13], bd);
+ step1[14] = WRAPLOW(-step2[14] + step2[15], bd);
+ step1[15] = WRAPLOW(step2[14] + step2[15], bd);
+
+ // stage 4
+ temp1 = (step1[0] + step1[1]) * cospi_16_64;
+ temp2 = (step1[0] - step1[1]) * cospi_16_64;
+ step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd);
+ step2[4] = WRAPLOW(step1[4] + step1[5], bd);
+ step2[5] = WRAPLOW(step1[4] - step1[5], bd);
+ step2[6] = WRAPLOW(-step1[6] + step1[7], bd);
+ step2[7] = WRAPLOW(step1[6] + step1[7], bd);
+
+ step2[8] = step1[8];
+ step2[15] = step1[15];
+ temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+ temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+ step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+ temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd);
+ step2[11] = step1[11];
+ step2[12] = step1[12];
+
+ // stage 5
+ step1[0] = WRAPLOW(step2[0] + step2[3], bd);
+ step1[1] = WRAPLOW(step2[1] + step2[2], bd);
+ step1[2] = WRAPLOW(step2[1] - step2[2], bd);
+ step1[3] = WRAPLOW(step2[0] - step2[3], bd);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd);
+ step1[7] = step2[7];
+
+ step1[8] = WRAPLOW(step2[8] + step2[11], bd);
+ step1[9] = WRAPLOW(step2[9] + step2[10], bd);
+ step1[10] = WRAPLOW(step2[9] - step2[10], bd);
+ step1[11] = WRAPLOW(step2[8] - step2[11], bd);
+ step1[12] = WRAPLOW(-step2[12] + step2[15], bd);
+ step1[13] = WRAPLOW(-step2[13] + step2[14], bd);
+ step1[14] = WRAPLOW(step2[13] + step2[14], bd);
+ step1[15] = WRAPLOW(step2[12] + step2[15], bd);
+
+ // stage 6
+ step2[0] = WRAPLOW(step1[0] + step1[7], bd);
+ step2[1] = WRAPLOW(step1[1] + step1[6], bd);
+ step2[2] = WRAPLOW(step1[2] + step1[5], bd);
+ step2[3] = WRAPLOW(step1[3] + step1[4], bd);
+ step2[4] = WRAPLOW(step1[3] - step1[4], bd);
+ step2[5] = WRAPLOW(step1[2] - step1[5], bd);
+ step2[6] = WRAPLOW(step1[1] - step1[6], bd);
+ step2[7] = WRAPLOW(step1[0] - step1[7], bd);
+ step2[8] = step1[8];
+ step2[9] = step1[9];
+ temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+ temp2 = (step1[10] + step1[13]) * cospi_16_64;
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+ temp2 = (step1[11] + step1[12]) * cospi_16_64;
+ step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd);
+ step2[14] = step1[14];
+ step2[15] = step1[15];
+
+ // stage 7
+ output[0] = WRAPLOW(step2[0] + step2[15], bd);
+ output[1] = WRAPLOW(-step2[1] - step2[14], bd);
+ output[2] = WRAPLOW(step2[2] + step2[13], bd);
+ output[3] = WRAPLOW(-step2[3] - step2[12], bd);
+ output[4] = WRAPLOW(step2[4] + step2[11], bd);
+ output[5] = WRAPLOW(-step2[5] - step2[10], bd);
+ output[6] = WRAPLOW(step2[6] + step2[9], bd);
+ output[7] = WRAPLOW(-step2[7] - step2[8], bd);
+ output[8] = WRAPLOW(step2[7] - step2[8], bd);
+ output[9] = WRAPLOW(-step2[6] + step2[9], bd);
+ output[10] = WRAPLOW(step2[5] - step2[10], bd);
+ output[11] = WRAPLOW(-step2[4] + step2[11], bd);
+ output[12] = WRAPLOW(step2[3] - step2[12], bd);
+ output[13] = WRAPLOW(-step2[2] + step2[13], bd);
+ output[14] = WRAPLOW(step2[1] - step2[14], bd);
+ output[15] = WRAPLOW(-step2[0] + step2[15], bd);
+#else
+ // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 47852167, 94074787, 137093803, 175444254,
+ 207820161, 233119001, 250479254, 259309736
+ };
+ int64_t sum;
+ int64_t s015 = (input[0] + input[15]);
+ int64_t d015 = (input[0] - input[15]);
+ int64_t s114 = (input[1] + input[14]);
+ int64_t d114 = (input[1] - input[14]);
+ int64_t s213 = (input[2] + input[13]);
+ int64_t d213 = (input[2] - input[13]);
+ int64_t s312 = (input[3] + input[12]);
+ int64_t d312 = (input[3] - input[12]);
+ int64_t s411 = (input[4] + input[11]);
+ int64_t d411 = (input[4] - input[11]);
+ int64_t s510 = (input[5] + input[10]);
+ int64_t d510 = (input[5] - input[10]);
+ int64_t s69 = (input[6] + input[9]);
+ int64_t d69 = (input[6] - input[9]);
+ int64_t s78 = (input[7] + input[8]);
+ int64_t d78 = (input[7] - input[8]);
+
+#if !CONFIG_EMULATE_HARDWARE
+ (void)bd;
+#endif
+
+ sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
+ s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
+ s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
+ s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
+ d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
+ d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
+ d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
+ s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
+ s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
+ s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
+ d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
+ d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
+ d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
+ s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
+ s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
+ s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
+ d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
+ d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
+ d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
+ s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
+ s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
+ s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
+ d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
+ d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
+ d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
+ s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
+ s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
+ s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
+ output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
+ d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
+ d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
+ d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
+ output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
+ s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
+ s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
+ s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
+ output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
+ d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
+ d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
+ d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
+ output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
+ s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
+ s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
+ s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
+ output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
+ d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
+ d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
+ d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
+ output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
+ s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
+ s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
+ s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
+ output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
+ d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
+ d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
+ d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
+ output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+#endif // USE_DST2
+}
+
+static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bs, int bd) {
+ int r, c;
+ const int shift = bs < 32 ? 3 : 2;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c)
+ dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
+ dest += stride;
+ input += bs;
+ }
+}
+
+static void maybe_flip_strides16(uint16_t **dst, int *dstride,
+ tran_low_t **src, int *sstride,
+ int tx_type, int size) {
+ // Note that the transpose of src will be added to dst. In order to LR
+ // flip the addends (in dst coordinates), we UD flip the src. To UD flip
+ // the addends, we UD flip the dst.
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ break;
+ case FLIPADST_DCT:
+ case FLIPADST_ADST:
+ case FLIPADST_DST:
+ // flip UD
+ FLIPUD_PTR(*dst, *dstride, size);
+ break;
+ case DCT_FLIPADST:
+ case ADST_FLIPADST:
+ case DST_FLIPADST:
+ // flip LR
+ FLIPUD_PTR(*src, *sstride, size);
+ break;
+ case FLIPADST_FLIPADST:
+ // flip UD
+ FLIPUD_PTR(*dst, *dstride, size);
+ // flip LR
+ FLIPUD_PTR(*src, *sstride, size);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_TX
+
void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- int tx_type) {
- const transform_2d IHT_4[] = {
- { idct4_c, idct4_c }, // DCT_DCT = 0
- { iadst4_c, idct4_c }, // ADST_DCT = 1
- { idct4_c, iadst4_c }, // DCT_ADST = 2
- { iadst4_c, iadst4_c } // ADST_ADST = 3
+ int tx_type) {
+ static const transform_2d IHT_4[] = {
+ { idct4_c, idct4_c }, // DCT_DCT = 0,
+ { iadst4_c, idct4_c }, // ADST_DCT = 1,
+ { idct4_c, iadst4_c }, // DCT_ADST = 2,
+ { iadst4_c, iadst4_c }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { iadst4_c, idct4_c }, // FLIPADST_DCT = 4,
+ { idct4_c, iadst4_c }, // DCT_FLIPADST = 5,
+ { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST = 6,
+ { iadst4_c, iadst4_c }, // ADST_FLIPADST = 7,
+ { iadst4_c, iadst4_c }, // FLIPADST_ADST = 8,
+ { idst4_c, idct4_c }, // DST_DCT = 9,
+ { idct4_c, idst4_c }, // DCT_DST = 10,
+ { idst4_c, iadst4_c }, // DST_ADST = 11,
+ { iadst4_c, idst4_c }, // ADST_DST = 12,
+ { idst4_c, iadst4_c }, // DST_FLIPADST = 13,
+ { iadst4_c, idst4_c }, // FLIPADST_DST = 14,
+ { idst4_c, idst4_c }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
};
int i, j;
- tran_low_t out[4 * 4];
- tran_low_t *outptr = out;
- tran_low_t temp_in[4], temp_out[4];
+ tran_low_t tmp;
+ tran_low_t out[4][4];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 4;
// inverse transform row vectors
for (i = 0; i < 4; ++i) {
- IHT_4[tx_type].rows(input, outptr);
+ IHT_4[tx_type].rows(input, out[i]);
input += 4;
- outptr += 4;
+ }
+
+ // transpose
+ for (i = 1 ; i < 4; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
}
// inverse transform column vectors
for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = out[j * 4 + i];
- IHT_4[tx_type].cols(temp_in, temp_out);
+ IHT_4[tx_type].cols(out[i], out[i]);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j) {
- dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
- ROUND_POWER_OF_TWO(temp_out[j], 4));
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
}
}
}
-static const transform_2d IHT_8[] = {
- { idct8_c, idct8_c }, // DCT_DCT = 0
- { iadst8_c, idct8_c }, // ADST_DCT = 1
- { idct8_c, iadst8_c }, // DCT_ADST = 2
- { iadst8_c, iadst8_c } // ADST_ADST = 3
-};
-
void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
+ static const transform_2d IHT_8[] = {
+ { idct8_c, idct8_c }, // DCT_DCT = 0,
+ { iadst8_c, idct8_c }, // ADST_DCT = 1,
+ { idct8_c, iadst8_c }, // DCT_ADST = 2,
+ { iadst8_c, iadst8_c }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { iadst8_c, idct8_c }, // FLIPADST_DCT = 4,
+ { idct8_c, iadst8_c }, // DCT_FLIPADST = 5,
+ { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST = 6,
+ { iadst8_c, iadst8_c }, // ADST_FLIPADST = 7,
+ { iadst8_c, iadst8_c }, // FLIPADST_ADST = 8,
+ { idst8_c, idct8_c }, // DST_DCT = 9,
+ { idct8_c, idst8_c }, // DCT_DST = 10,
+ { idst8_c, iadst8_c }, // DST_ADST = 11,
+ { iadst8_c, idst8_c }, // ADST_DST = 12,
+ { idst8_c, iadst8_c }, // DST_FLIPADST = 13,
+ { iadst8_c, idst8_c }, // FLIPADST_DST = 14,
+ { idst8_c, idst8_c }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
+ };
+
int i, j;
- tran_low_t out[8 * 8];
- tran_low_t *outptr = out;
- tran_low_t temp_in[8], temp_out[8];
- const transform_2d ht = IHT_8[tx_type];
+ tran_low_t tmp;
+ tran_low_t out[8][8];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 8;
// inverse transform row vectors
for (i = 0; i < 8; ++i) {
- ht.rows(input, outptr);
- input += 8;
- outptr += 8;
+ IHT_8[tx_type].rows(input, out[i]);
+ input += 8;
+ }
+
+ // transpose
+ for (i = 1 ; i < 8; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
}
// inverse transform column vectors
for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j)
- temp_in[j] = out[j * 8 + i];
- ht.cols(temp_in, temp_out);
+ IHT_8[tx_type].cols(out[i], out[i]);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) {
- dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
- ROUND_POWER_OF_TWO(temp_out[j], 5));
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
}
}
}
-static const transform_2d IHT_16[] = {
- { idct16_c, idct16_c }, // DCT_DCT = 0
- { iadst16_c, idct16_c }, // ADST_DCT = 1
- { idct16_c, iadst16_c }, // DCT_ADST = 2
- { iadst16_c, iadst16_c } // ADST_ADST = 3
-};
-
void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
- int i, j;
- tran_low_t out[16 * 16];
- tran_low_t *outptr = out;
- tran_low_t temp_in[16], temp_out[16];
- const transform_2d ht = IHT_16[tx_type];
+ static const transform_2d IHT_16[] = {
+ { idct16_c, idct16_c }, // DCT_DCT = 0,
+ { iadst16_c, idct16_c }, // ADST_DCT = 1,
+ { idct16_c, iadst16_c }, // DCT_ADST = 2,
+ { iadst16_c, iadst16_c }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { iadst16_c, idct16_c }, // FLIPADST_DCT = 4,
+ { idct16_c, iadst16_c }, // DCT_FLIPADST = 5,
+ { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST = 6,
+ { iadst16_c, iadst16_c }, // ADST_FLIPADST = 7,
+ { iadst16_c, iadst16_c }, // FLIPADST_ADST = 8,
+ { idst16_c, idct16_c }, // DST_DCT = 9,
+ { idct16_c, idst16_c }, // DCT_DST = 10,
+ { idst16_c, iadst16_c }, // DST_ADST = 11,
+ { iadst16_c, idst16_c }, // ADST_DST = 12,
+ { idst16_c, iadst16_c }, // DST_FLIPADST = 13,
+ { iadst16_c, idst16_c }, // FLIPADST_DST = 14,
+ { idst16_c, idst16_c }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
+ };
- // Rows
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[16][16];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 16;
+
+ // inverse transform row vectors
for (i = 0; i < 16; ++i) {
- ht.rows(input, outptr);
- input += 16;
- outptr += 16;
+ IHT_16[tx_type].rows(input, out[i]);
+ input += 16;
}
- // Columns
+ // transpose
+ for (i = 1 ; i < 16; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j * 16 + i];
- ht.cols(temp_in, temp_out);
+ IHT_16[tx_type].cols(out[i], out[i]);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) {
- dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
- ROUND_POWER_OF_TWO(temp_out[j], 6));
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
}
}
}
@@ -183,20 +1214,43 @@
if (lossless) {
assert(tx_type == DCT_DCT);
vp10_iwht4x4_add(input, dest, stride, eob);
- } else {
- switch (tx_type) {
- case DCT_DCT:
- vp10_idct4x4_add(input, dest, stride, eob);
- break;
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- vp10_iht4x4_16_add(input, dest, stride, tx_type);
- break;
- default:
- assert(0);
- break;
- }
+ return;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT:
+ vp10_idct4x4_add(input, dest, stride, eob);
+ break;
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_iht4x4_16_add(input, dest, stride, tx_type);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_iht4x4_16_add(input, dest, stride, tx_type);
+ break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_iht4x4_16_add_c(input, dest, stride, tx_type);
+ break;
+ case IDTX:
+ inv_idtx_add_c(input, dest, stride, 4);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
}
}
@@ -211,6 +1265,28 @@
case ADST_ADST:
vp10_iht8x8_64_add(input, dest, stride, tx_type);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_iht8x8_64_add(input, dest, stride, tx_type);
+ break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_iht8x8_64_add_c(input, dest, stride, tx_type);
+ break;
+ case IDTX:
+ inv_idtx_add_c(input, dest, stride, 8);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -228,6 +1304,28 @@
case ADST_ADST:
vp10_iht16x16_256_add(input, dest, stride, tx_type);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_iht16x16_256_add(input, dest, stride, tx_type);
+ break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_iht16x16_256_add_c(input, dest, stride, tx_type);
+ break;
+ case IDTX:
+ inv_idtx_add_c(input, dest, stride, 16);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -240,6 +1338,11 @@
case DCT_DCT:
vp10_idct32x32_add(input, dest, stride, eob);
break;
+#if CONFIG_EXT_TX
+ case IDTX:
+ inv_idtx_add_c(input, dest, stride, 32);
+ break;
+#endif // CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
@@ -254,104 +1357,198 @@
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
- const highbd_transform_2d IHT_4[] = {
- { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0
- { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1
- { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
- { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3
+ static const highbd_transform_2d HIGH_IHT_4[] = {
+ { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0,
+ { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1,
+ { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2,
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT = 4,
+ { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST = 5,
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST = 6,
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST = 7,
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST = 8,
+ { highbd_idst4_c, vpx_highbd_idct4_c }, // DST_DCT = 9,
+ { vpx_highbd_idct4_c, highbd_idst4_c }, // DCT_DST = 10,
+ { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_ADST = 11,
+ { vpx_highbd_iadst4_c, highbd_idst4_c }, // ADST_DST = 12,
+ { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_FLIPADST = 13,
+ { vpx_highbd_iadst4_c, highbd_idst4_c }, // FLIPADST_DST = 14,
+ { highbd_idst4_c, highbd_idst4_c }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
};
+
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
int i, j;
- tran_low_t out[4 * 4];
- tran_low_t *outptr = out;
- tran_low_t temp_in[4], temp_out[4];
+ tran_low_t tmp;
+ tran_low_t out[4][4];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 4;
- // Inverse transform row vectors.
+ // inverse transform row vectors
for (i = 0; i < 4; ++i) {
- IHT_4[tx_type].rows(input, outptr, bd);
+ HIGH_IHT_4[tx_type].rows(input, out[i], bd);
input += 4;
- outptr += 4;
}
- // Inverse transform column vectors.
+ // transpose
+ for (i = 1 ; i < 4; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = out[j * 4 + i];
- IHT_4[tx_type].cols(temp_in, temp_out, bd);
+ HIGH_IHT_4[tx_type].cols(out[i], out[i], bd);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j) {
- dest[j * stride + i] = highbd_clip_pixel_add(
- dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = highbd_clip_pixel_add(dest[d],
+ ROUND_POWER_OF_TWO(outp[s], 4), bd);
}
}
}
-static const highbd_transform_2d HIGH_IHT_8[] = {
- { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0
- { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1
- { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2
- { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3
-};
-
void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
- int i, j;
- tran_low_t out[8 * 8];
- tran_low_t *outptr = out;
- tran_low_t temp_in[8], temp_out[8];
- const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
+ static const highbd_transform_2d HIGH_IHT_8[] = {
+ { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0,
+ { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1,
+ { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2,
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT = 4,
+ { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST = 5,
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST = 6,
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST = 7,
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST = 8,
+ { highbd_idst8_c, vpx_highbd_idct8_c }, // DST_DCT = 9,
+ { vpx_highbd_idct8_c, highbd_idst8_c }, // DCT_DST = 10,
+ { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_ADST = 11,
+ { vpx_highbd_iadst8_c, highbd_idst8_c }, // ADST_DST = 12,
+ { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_FLIPADST = 13,
+ { vpx_highbd_iadst8_c, highbd_idst8_c }, // FLIPADST_DST = 14,
+ { highbd_idst8_c, highbd_idst8_c }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
+ };
+
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- // Inverse transform row vectors.
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[8][8];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 8;
+
+ // inverse transform row vectors
for (i = 0; i < 8; ++i) {
- ht.rows(input, outptr, bd);
- input += 8;
- outptr += 8;
+ HIGH_IHT_8[tx_type].rows(input, out[i], bd);
+ input += 8;
}
- // Inverse transform column vectors.
+ // transpose
+ for (i = 1 ; i < 8; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j)
- temp_in[j] = out[j * 8 + i];
- ht.cols(temp_in, temp_out, bd);
+ HIGH_IHT_8[tx_type].cols(out[i], out[i], bd);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) {
- dest[j * stride + i] = highbd_clip_pixel_add(
- dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = highbd_clip_pixel_add(dest[d],
+ ROUND_POWER_OF_TWO(outp[s], 5), bd);
}
}
}
-static const highbd_transform_2d HIGH_IHT_16[] = {
- { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0
- { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1
- { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2
- { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3
-};
-
void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
- int i, j;
- tran_low_t out[16 * 16];
- tran_low_t *outptr = out;
- tran_low_t temp_in[16], temp_out[16];
- const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
+ static const highbd_transform_2d HIGH_IHT_16[] = {
+ { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0,
+ { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1,
+ { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2,
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT = 4,
+ { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST = 5,
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST = 6,
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST = 7,
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST = 8,
+ { highbd_idst16_c, vpx_highbd_idct16_c }, // DST_DCT = 9,
+ { vpx_highbd_idct16_c, highbd_idst16_c }, // DCT_DST = 10,
+ { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_ADST = 11,
+ { vpx_highbd_iadst16_c, highbd_idst16_c }, // ADST_DST = 12,
+ { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_FLIPADST = 13,
+ { vpx_highbd_iadst16_c, highbd_idst16_c }, // FLIPADST_DST = 14,
+ { highbd_idst16_c, highbd_idst16_c }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
+ };
+
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- // Rows
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[16][16];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 16;
+
+ // inverse transform row vectors
for (i = 0; i < 16; ++i) {
- ht.rows(input, outptr, bd);
- input += 16;
- outptr += 16;
+ HIGH_IHT_16[tx_type].rows(input, out[i], bd);
+ input += 16;
}
- // Columns
+ // transpose
+ for (i = 1 ; i < 16; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j * 16 + i];
- ht.cols(temp_in, temp_out, bd);
+ HIGH_IHT_16[tx_type].cols(out[i], out[i], bd);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 16);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) {
- dest[j * stride + i] = highbd_clip_pixel_add(
- dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = highbd_clip_pixel_add(dest[d],
+ ROUND_POWER_OF_TWO(outp[s], 6), bd);
}
}
}
@@ -425,20 +1622,43 @@
if (lossless) {
assert(tx_type == DCT_DCT);
vp10_highbd_iwht4x4_add(input, dest, stride, eob, bd);
- } else {
- switch (tx_type) {
- case DCT_DCT:
- vp10_highbd_idct4x4_add(input, dest, stride, eob, bd);
- break;
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
- break;
- default:
- assert(0);
- break;
- }
+ return;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT:
+ vp10_highbd_idct4x4_add(input, dest, stride, eob, bd);
+ break;
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
+ break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 4, bd);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
}
}
@@ -454,6 +1674,28 @@
case ADST_ADST:
vp10_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
+ break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 8, bd);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -472,6 +1714,28 @@
case ADST_ADST:
vp10_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
+ break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 16, bd);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -485,6 +1749,11 @@
case DCT_DCT:
vp10_highbd_idct32x32_add(input, dest, stride, eob, bd);
break;
+#if CONFIG_EXT_TX
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 32, bd);
+ break;
+#endif // CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
@@ -496,3 +1765,66 @@
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
+ INV_TXFM_PARAM *inv_txfm_param) {
+ const TX_TYPE tx_type = inv_txfm_param->tx_type;
+ const TX_SIZE tx_size = inv_txfm_param->tx_size;
+ const int eob = inv_txfm_param->eob;
+ const int lossless = inv_txfm_param->lossless;
+
+ switch (tx_size) {
+ case TX_32X32:
+ vp10_inv_txfm_add_32x32(input, dest, stride, eob, tx_type);
+ break;
+ case TX_16X16:
+ vp10_inv_txfm_add_16x16(input, dest, stride, eob, tx_type);
+ break;
+ case TX_8X8:
+ vp10_inv_txfm_add_8x8(input, dest, stride, eob, tx_type);
+ break;
+ case TX_4X4:
+ // this is like vp10_short_idct4x4 but has a special case around eob<=1
+ // which is significant (not just an optimization) for the lossless
+ // case.
+ vp10_inv_txfm_add_4x4(input, dest, stride, eob, tx_type,
+ lossless);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ break;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
+ INV_TXFM_PARAM *inv_txfm_param) {
+ const TX_TYPE tx_type = inv_txfm_param->tx_type;
+ const TX_SIZE tx_size = inv_txfm_param->tx_size;
+ const int eob = inv_txfm_param->eob;
+ const int bd = inv_txfm_param->bd;
+ const int lossless = inv_txfm_param->lossless;
+
+ switch (tx_size) {
+ case TX_32X32:
+ vp10_highbd_inv_txfm_add_32x32(input, dest, stride, eob, bd, tx_type);
+ break;
+ case TX_16X16:
+ vp10_highbd_inv_txfm_add_16x16(input, dest, stride, eob, bd, tx_type);
+ break;
+ case TX_8X8:
+ vp10_highbd_inv_txfm_add_8x8(input, dest, stride, eob, bd, tx_type);
+ break;
+ case TX_4X4:
+ // this is like vp10_short_idct4x4 but has a special case around eob<=1
+ // which is significant (not just an optimization) for the lossless
+ // case.
+ vp10_highbd_inv_txfm_add_4x4(input, dest, stride, eob, bd, tx_type,
+ lossless);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ break;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp10/common/idct.h b/vp10/common/idct.h
index 0883398..31b26b8 100644
--- a/vp10/common/idct.h
+++ b/vp10/common/idct.h
@@ -24,6 +24,16 @@
extern "C" {
#endif
+typedef struct INV_TXFM_PARAM {
+ TX_TYPE tx_type;
+ TX_SIZE tx_size;
+ int eob;
+ int lossless;
+#if CONFIG_VP9_HIGHBITDEPTH
+ int bd;
+#endif
+} INV_TXFM_PARAM;
+
typedef void (*transform_1d)(const tran_low_t*, tran_low_t*);
typedef struct {
@@ -51,7 +61,8 @@
int stride, int eob, TX_TYPE tx_type);
void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
int stride, int eob, TX_TYPE tx_type);
-
+void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
+ INV_TXFM_PARAM *inv_txfm_param);
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd);
@@ -74,6 +85,8 @@
void vp10_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd,
TX_TYPE tx_type);
+void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
+ INV_TXFM_PARAM *inv_txfm_param);
#endif // CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c
index a1925de..125b5bf 100644
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -8,11 +8,14 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <math.h>
+
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vp10/common/loopfilter.h"
#include "vp10/common/onyxc_int.h"
#include "vp10/common/reconinter.h"
+#include "vp10/common/restoration.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@@ -207,6 +210,10 @@
static const int mode_lf_lut[MB_MODE_COUNT] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
1, 1, 0, 1 // INTER_MODES (ZEROMV == 0)
+#if CONFIG_EXT_INTER
+ , 1, // NEWFROMNEARMV mode
+ 1, 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (ZERO_ZEROMV == 0)
+#endif // CONFIG_EXT_INTER
};
static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
@@ -719,11 +726,7 @@
uint64_t *const int_4x4_y = &lfm->int_4x4_y;
uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
-#if CONFIG_MISC_FIXES
uint16_t *const int_4x4_uv = &lfm->left_int_4x4_uv;
-#else
- uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
-#endif
int i;
// If filter level is 0 we don't loop filter.
@@ -758,13 +761,8 @@
// If the block has no coefficients and is not intra we skip applying
// the loop filter on block edges.
-#if CONFIG_MISC_FIXES
if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi))
return;
-#else
- if (mbmi->skip && is_inter_block(mbmi))
- return;
-#endif
// Here we are adding a mask for the transform size. The transform
// size mask is set to be correct for a 64x64 prediction block size. We
@@ -796,10 +794,18 @@
// we only update u and v masks on the first block.
static void build_y_mask(const loop_filter_info_n *const lfi_n,
const MODE_INFO *mi, const int shift_y,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif // CONFIG_SUPERTX
LOOP_FILTER_MASK *lfm) {
const MB_MODE_INFO *mbmi = &mi->mbmi;
- const BLOCK_SIZE block_size = mbmi->sb_type;
const TX_SIZE tx_size_y = mbmi->tx_size;
+#if CONFIG_SUPERTX
+ const BLOCK_SIZE block_size =
+ supertx_enabled ? (BLOCK_SIZE)(3 * tx_size_y) : mbmi->sb_type;
+#else
+ const BLOCK_SIZE block_size = mbmi->sb_type;
+#endif
const int filter_level = get_filter_level(lfi_n, mbmi);
uint64_t *const left_y = &lfm->left_y[tx_size_y];
uint64_t *const above_y = &lfm->above_y[tx_size_y];
@@ -821,13 +827,8 @@
*above_y |= above_prediction_mask[block_size] << shift_y;
*left_y |= left_prediction_mask[block_size] << shift_y;
-#if CONFIG_MISC_FIXES
if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi))
return;
-#else
- if (mbmi->skip && is_inter_block(mbmi))
- return;
-#endif
*above_y |= (size_mask[block_size] &
above_64x64_txform_mask[tx_size_y]) << shift_y;
@@ -913,6 +914,10 @@
break;
case BLOCK_32X16:
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+#if CONFIG_SUPERTX
+ if (supertx_enabled(&mip[0]->mbmi))
+ break;
+#endif
if (mi_32_row_offset + 2 >= max_rows)
continue;
mip2 = mip + mode_info_stride * 2;
@@ -920,12 +925,22 @@
break;
case BLOCK_16X32:
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+#if CONFIG_SUPERTX
+ if (supertx_enabled(&mip[0]->mbmi))
+ break;
+#endif
if (mi_32_col_offset + 2 >= max_cols)
continue;
mip2 = mip + 2;
build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm);
break;
default:
+#if CONFIG_SUPERTX
+ if (mip[0]->mbmi.tx_size == TX_32X32) {
+ build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ break;
+ }
+#endif
for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16];
const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16];
@@ -942,23 +957,45 @@
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
break;
case BLOCK_16X8:
+#if CONFIG_SUPERTX
+ if (supertx_enabled(&mip[0]->mbmi))
+ break;
+#endif
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
if (mi_16_row_offset + 1 >= max_rows)
continue;
mip2 = mip + mode_info_stride;
- build_y_mask(lfi_n, mip2[0], shift_y+8, lfm);
+ build_y_mask(lfi_n, mip2[0], shift_y+8,
+#if CONFIG_SUPERTX
+ 0,
+#endif
+ lfm);
break;
case BLOCK_8X16:
+#if CONFIG_SUPERTX
+ if (supertx_enabled(&mip[0]->mbmi))
+ break;
+#endif
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
if (mi_16_col_offset +1 >= max_cols)
continue;
mip2 = mip + 1;
- build_y_mask(lfi_n, mip2[0], shift_y+1, lfm);
+ build_y_mask(lfi_n, mip2[0], shift_y+1,
+#if CONFIG_SUPERTX
+ 0,
+#endif
+ lfm);
break;
default: {
const int shift_y = shift_32_y[idx_32] +
shift_16_y[idx_16] +
shift_8_y[0];
+#if CONFIG_SUPERTX
+ if (mip[0]->mbmi.tx_size == TX_16X16) {
+ build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ break;
+ }
+#endif
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
mip += offset[0];
for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
@@ -973,7 +1010,11 @@
if (mi_8_col_offset >= max_cols ||
mi_8_row_offset >= max_rows)
continue;
- build_y_mask(lfi_n, mip[0], shift_y, lfm);
+ build_y_mask(lfi_n, mip[0], shift_y,
+#if CONFIG_SUPERTX
+ supertx_enabled(&mip[0]->mbmi),
+#endif
+ lfm);
}
break;
}
@@ -1019,11 +1060,7 @@
lfm->above_uv[i] &= mask_uv;
}
lfm->int_4x4_y &= mask_y;
-#if CONFIG_MISC_FIXES
lfm->above_int_4x4_uv = lfm->left_int_4x4_uv & mask_uv;
-#else
- lfm->int_4x4_uv &= mask_uv;
-#endif
// We don't apply a wide loop filter on the last uv block row. If set
// apply the shorter one instead.
@@ -1057,11 +1094,7 @@
lfm->above_uv[i] &= mask_uv;
}
lfm->int_4x4_y &= mask_y;
-#if CONFIG_MISC_FIXES
lfm->left_int_4x4_uv &= mask_uv_int;
-#else
- lfm->int_4x4_uv &= mask_uv_int;
-#endif
// We don't apply a wide loop filter on the last uv column. If set
// apply the shorter one instead.
@@ -1091,11 +1124,7 @@
assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8]));
assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
-#if CONFIG_MISC_FIXES
assert(!(lfm->left_int_4x4_uv & lfm->left_uv[TX_16X16]));
-#else
- assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16]));
-#endif
assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
@@ -1103,11 +1132,7 @@
assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
-#if CONFIG_MISC_FIXES
assert(!(lfm->above_int_4x4_uv & lfm->above_uv[TX_16X16]));
-#else
- assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
-#endif
}
static void filter_selectively_vert(uint8_t *s, int pitch,
@@ -1183,9 +1208,9 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp10_filter_block_plane_non420(VP10_COMMON *cm,
- struct macroblockd_plane *plane,
- MODE_INFO **mi_8x8,
- int mi_row, int mi_col) {
+ struct macroblockd_plane *plane,
+ MODE_INFO **mi_8x8,
+ int mi_row, int mi_col) {
const int ss_x = plane->subsampling_x;
const int ss_y = plane->subsampling_y;
const int row_step = 1 << ss_y;
@@ -1209,49 +1234,103 @@
// Determine the vertical edges that need filtering
for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
const MODE_INFO *mi = mi_8x8[c];
- const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type;
- const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi);
+ const MB_MODE_INFO *mbmi = &mi[0].mbmi;
+ const BLOCK_SIZE sb_type = mbmi->sb_type;
+ const int skip_this = mbmi->skip && is_inter_block(mbmi);
+ const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
+ const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
+
// left edge of current unit is block/partition edge -> no skip
const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ?
- !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1;
+ !blk_col : 1;
const int skip_this_c = skip_this && !block_edge_left;
// top edge of current unit is block/partition edge -> no skip
const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ?
- !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1;
+ !blk_row : 1;
const int skip_this_r = skip_this && !block_edge_above;
+
+#if CONFIG_VAR_TX
+ TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
+ ? get_uv_tx_size(mbmi, plane) : mbmi->tx_size;
+#else
const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
- ? get_uv_tx_size(&mi[0].mbmi, plane)
- : mi[0].mbmi.tx_size;
+ ? get_uv_tx_size(mbmi, plane)
+ : mbmi->tx_size;
+#endif
+
const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
+ TX_SIZE tx_size_c = tx_size;
+ TX_SIZE tx_size_r = tx_size;
+
+ int tx_size_mask = 0;
// Filter level can vary per MI
if (!(lfl[(r << 3) + (c >> ss_x)] =
- get_filter_level(&cm->lf_info, &mi[0].mbmi)))
+ get_filter_level(&cm->lf_info, mbmi)))
continue;
+ if (tx_size == TX_32X32)
+ tx_size_mask = 3;
+ else if (tx_size == TX_16X16)
+ tx_size_mask = 1;
+ else
+ tx_size_mask = 0;
+
+#if CONFIG_VAR_TX
+ if (is_inter_block(mbmi) && !mbmi->skip)
+ tx_size = (plane->plane_type == PLANE_TYPE_UV) ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[blk_row * 8 + blk_col],
+ sb_type, ss_x, ss_y) :
+ mbmi->inter_tx_size[blk_row * 8 + blk_col];
+
+ tx_size_r = VPXMIN(tx_size, cm->above_txfm_context[mi_col + c]);
+ tx_size_c = VPXMIN(tx_size, cm->left_txfm_context[(mi_row + r) & 0x07]);
+
+ cm->above_txfm_context[mi_col + c] = tx_size;
+ cm->left_txfm_context[(mi_row + r) & 0x07] = tx_size;
+#endif
+
// Build masks based on the transform size of each block
- if (tx_size == TX_32X32) {
- if (!skip_this_c && ((c >> ss_x) & 3) == 0) {
+ // handle vertical mask
+ if (tx_size_c == TX_32X32) {
+ if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
if (!skip_border_4x4_c)
mask_16x16_c |= 1 << (c >> ss_x);
else
mask_8x8_c |= 1 << (c >> ss_x);
}
- if (!skip_this_r && ((r >> ss_y) & 3) == 0) {
+ } else if (tx_size_c == TX_16X16) {
+ if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
+ if (!skip_border_4x4_c)
+ mask_16x16_c |= 1 << (c >> ss_x);
+ else
+ mask_8x8_c |= 1 << (c >> ss_x);
+ }
+ } else {
+ // force 8x8 filtering on 32x32 boundaries
+ if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
+ if (tx_size_c == TX_8X8 || ((c >> ss_x) & 3) == 0)
+ mask_8x8_c |= 1 << (c >> ss_x);
+ else
+ mask_4x4_c |= 1 << (c >> ss_x);
+ }
+
+ if (!skip_this && tx_size_c < TX_8X8 && !skip_border_4x4_c &&
+ ((c >> ss_x) & tx_size_mask) == 0)
+ mask_4x4_int[r] |= 1 << (c >> ss_x);
+ }
+
+ // set horizontal mask
+ if (tx_size_r == TX_32X32) {
+ if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
if (!skip_border_4x4_r)
mask_16x16[r] |= 1 << (c >> ss_x);
else
mask_8x8[r] |= 1 << (c >> ss_x);
}
- } else if (tx_size == TX_16X16) {
- if (!skip_this_c && ((c >> ss_x) & 1) == 0) {
- if (!skip_border_4x4_c)
- mask_16x16_c |= 1 << (c >> ss_x);
- else
- mask_8x8_c |= 1 << (c >> ss_x);
- }
- if (!skip_this_r && ((r >> ss_y) & 1) == 0) {
+ } else if (tx_size_r == TX_16X16) {
+ if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
if (!skip_border_4x4_r)
mask_16x16[r] |= 1 << (c >> ss_x);
else
@@ -1259,21 +1338,15 @@
}
} else {
// force 8x8 filtering on 32x32 boundaries
- if (!skip_this_c) {
- if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0)
- mask_8x8_c |= 1 << (c >> ss_x);
- else
- mask_4x4_c |= 1 << (c >> ss_x);
- }
-
- if (!skip_this_r) {
- if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0)
+ if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
+ if (tx_size_r == TX_8X8 || ((r >> ss_y) & 3) == 0)
mask_8x8[r] |= 1 << (c >> ss_x);
else
mask_4x4[r] |= 1 << (c >> ss_x);
}
- if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c)
+ if (!skip_this && tx_size_r < TX_8X8 && !skip_border_4x4_c &&
+ ((r >> ss_y) & tx_size_mask) == 0)
mask_4x4_int[r] |= 1 << (c >> ss_x);
}
}
@@ -1462,11 +1535,7 @@
uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
-#if CONFIG_MISC_FIXES
uint16_t mask_4x4_int = lfm->left_int_4x4_uv;
-#else
- uint16_t mask_4x4_int = lfm->int_4x4_uv;
-#endif
assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
@@ -1518,11 +1587,7 @@
mask_16x16 = lfm->above_uv[TX_16X16];
mask_8x8 = lfm->above_uv[TX_8X8];
mask_4x4 = lfm->above_uv[TX_4X4];
-#if CONFIG_MISC_FIXES
mask_4x4_int = lfm->above_int_4x4_uv;
-#else
- mask_4x4_int = lfm->int_4x4_uv;
-#endif
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
@@ -1568,13 +1633,14 @@
}
void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
- VP10_COMMON *cm,
- struct macroblockd_plane planes[MAX_MB_PLANE],
- int start, int stop, int y_only) {
+ VP10_COMMON *cm,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
+ int start, int stop, int y_only) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
+ int mi_row, mi_col;
+#if !CONFIG_VAR_TX
enum lf_path path;
LOOP_FILTER_MASK lfm;
- int mi_row, mi_col;
if (y_only)
path = LF_PATH_444;
@@ -1584,19 +1650,29 @@
path = LF_PATH_444;
else
path = LF_PATH_SLOW;
+#endif
+#if CONFIG_VAR_TX
+ memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols);
+#endif
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
-
+#if CONFIG_VAR_TX
+ memset(cm->left_txfm_context, TX_SIZES, 8);
+#endif
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
int plane;
vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
+#if CONFIG_VAR_TX
+ for (plane = 0; plane < num_planes; ++plane)
+ vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
+ mi_row, mi_col);
+#else
// TODO(JBB): Make setup_mask work for non 420.
vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
&lfm);
-
vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
for (plane = 1; plane < num_planes; ++plane) {
switch (path) {
@@ -1612,6 +1688,7 @@
break;
}
}
+#endif
}
}
}
diff --git a/vp10/common/loopfilter.h b/vp10/common/loopfilter.h
index 8db705a..5a16baa 100644
--- a/vp10/common/loopfilter.h
+++ b/vp10/common/loopfilter.h
@@ -15,6 +15,7 @@
#include "./vpx_config.h"
#include "vp10/common/blockd.h"
+#include "vp10/common/restoration.h"
#include "vp10/common/seg_common.h"
#ifdef __cplusplus
@@ -43,13 +44,19 @@
uint8_t mode_ref_delta_enabled;
uint8_t mode_ref_delta_update;
- // 0 = Intra, Last, GF, ARF
+ // 0 = Intra, Last, Last2+Last3+LAST4(CONFIG_EXT_REFS),
+ // GF, ARF
signed char ref_deltas[MAX_REF_FRAMES];
signed char last_ref_deltas[MAX_REF_FRAMES];
// 0 = ZERO_MV, MV
signed char mode_deltas[MAX_MODE_LF_DELTAS];
signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
+
+#if CONFIG_LOOP_RESTORATION
+ int restoration_level;
+ int last_restoration_level;
+#endif // CONFIG_LOOP_RESTORATION
};
// Need to align this structure so when it is declared and
@@ -80,12 +87,8 @@
uint64_t int_4x4_y;
uint16_t left_uv[TX_SIZES];
uint16_t above_uv[TX_SIZES];
-#if CONFIG_MISC_FIXES
uint16_t left_int_4x4_uv;
uint16_t above_int_4x4_uv;
-#else
- uint16_t int_4x4_uv;
-#endif
uint8_t lfl_y[64];
uint8_t lfl_uv[16];
} LOOP_FILTER_MASK;
@@ -125,16 +128,16 @@
void vp10_loop_filter_frame_init(struct VP10Common *cm, int default_filt_lvl);
void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
- struct VP10Common *cm,
- struct macroblockd *mbd,
- int filter_level,
- int y_only, int partial_frame);
+ struct VP10Common *cm,
+ struct macroblockd *mbd,
+ int filter_level,
+ int y_only, int partial_frame);
// Apply the loop filter to [start, stop) macro block rows in frame_buffer.
void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
- struct VP10Common *cm,
- struct macroblockd_plane planes[MAX_MB_PLANE],
- int start, int stop, int y_only);
+ struct VP10Common *cm,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
+ int start, int stop, int y_only);
typedef struct LoopFilterWorkerData {
YV12_BUFFER_CONFIG *frame_buffer;
diff --git a/vp10/common/mv.h b/vp10/common/mv.h
index b4971a5..904d372 100644
--- a/vp10/common/mv.h
+++ b/vp10/common/mv.h
@@ -34,6 +34,14 @@
int32_t col;
} MV32;
+#if CONFIG_REF_MV
+typedef struct candidate_mv {
+ int_mv this_mv;
+ int_mv comp_mv;
+ int weight;
+} CANDIDATE_MV;
+#endif
+
static INLINE int is_zero_mv(const MV *mv) {
return *((const uint32_t *)mv) == 0;
}
@@ -48,6 +56,9 @@
mv->row = clamp(mv->row, min_row, max_row);
}
+static INLINE int mv_has_subpel(const MV *mv) {
+ return (mv->row & 0x0F) || (mv->col & 0x0F);
+}
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c
index 1ef80c2..1b7fb7d 100644
--- a/vp10/common/mvref_common.c
+++ b/vp10/common/mvref_common.c
@@ -11,6 +11,492 @@
#include "vp10/common/mvref_common.h"
+#if CONFIG_REF_MV
+static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi,
+ const MB_MODE_INFO *const candidate,
+ const MV_REFERENCE_FRAME rf[2],
+ uint8_t *refmv_count,
+ CANDIDATE_MV *ref_mv_stack,
+ int len, int block, int col) {
+ const int weight = len;
+ int index = 0, ref;
+ int newmv_count = 0;
+
+ if (rf[1] == NONE) {
+ // single reference frame
+ for (ref = 0; ref < 2; ++ref) {
+ if (candidate->ref_frame[ref] == rf[0]) {
+ int_mv this_refmv =
+ get_sub_block_mv(candidate_mi, ref, col, block);
+ for (index = 0; index < *refmv_count; ++index)
+ if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int)
+ break;
+
+ if (index < *refmv_count)
+ ref_mv_stack[index].weight += 2 * weight;
+
+ // Add a new item to the list.
+ if (index == *refmv_count) {
+ ref_mv_stack[index].this_mv = this_refmv;
+ ref_mv_stack[index].weight = 2 * weight;
+ ++(*refmv_count);
+
+#if CONFIG_EXT_INTER
+ if (candidate->mode == NEWMV || candidate->mode == NEWFROMNEARMV)
+#else
+ if (candidate->mode == NEWMV)
+#endif // CONFIG_EXT_INTER
+ ++newmv_count;
+ }
+
+ if (candidate_mi->mbmi.sb_type < BLOCK_8X8 && block >= 0) {
+ int alt_block = 3 - block;
+ this_refmv =
+ get_sub_block_mv(candidate_mi, ref, col, alt_block);
+ for (index = 0; index < *refmv_count; ++index)
+ if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int)
+ break;
+
+ if (index < *refmv_count)
+ ref_mv_stack[index].weight += weight;
+
+ // Add a new item to the list.
+ if (index == *refmv_count) {
+ ref_mv_stack[index].this_mv = this_refmv;
+ ref_mv_stack[index].weight = weight;
+ ++(*refmv_count);
+
+#if CONFIG_EXT_INTER
+ if (candidate->mode == NEWMV || candidate->mode == NEWFROMNEARMV)
+#else
+ if (candidate->mode == NEWMV)
+#endif // CONFIG_EXT_INTER
+ ++newmv_count;
+ }
+ }
+ }
+ }
+ } else {
+ // compound reference frame
+ if (candidate->ref_frame[0] == rf[0] &&
+ candidate->ref_frame[1] == rf[1]) {
+ int_mv this_refmv[2] = {
+ get_sub_block_mv(candidate_mi, 0, col, block),
+ get_sub_block_mv(candidate_mi, 1, col, block)
+ };
+
+ for (index = 0; index < *refmv_count; ++index)
+ if ((ref_mv_stack[index].this_mv.as_int == this_refmv[0].as_int) &&
+ (ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int))
+ break;
+
+ if (index < *refmv_count)
+ ref_mv_stack[index].weight += 2 * weight;
+
+ // Add a new item to the list.
+ if (index == *refmv_count) {
+ ref_mv_stack[index].this_mv = this_refmv[0];
+ ref_mv_stack[index].comp_mv = this_refmv[1];
+ ref_mv_stack[index].weight = 2 * weight;
+ ++(*refmv_count);
+
+#if CONFIG_EXT_INTER
+ if (candidate->mode == NEW_NEWMV)
+#else
+ if (candidate->mode == NEWMV)
+#endif // CONFIG_EXT_INTER
+ ++newmv_count;
+ }
+
+ if (candidate_mi->mbmi.sb_type < BLOCK_8X8 && block >= 0) {
+ int alt_block = 3 - block;
+ this_refmv[0] = get_sub_block_mv(candidate_mi, 0, col, alt_block);
+ this_refmv[1] = get_sub_block_mv(candidate_mi, 1, col, alt_block);
+
+ for (index = 0; index < *refmv_count; ++index)
+ if (ref_mv_stack[index].this_mv.as_int == this_refmv[0].as_int &&
+ ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int)
+ break;
+
+ if (index < *refmv_count)
+ ref_mv_stack[index].weight += weight;
+
+ // Add a new item to the list.
+ if (index == *refmv_count) {
+ ref_mv_stack[index].this_mv = this_refmv[0];
+ ref_mv_stack[index].comp_mv = this_refmv[1];
+ ref_mv_stack[index].weight = weight;
+ ++(*refmv_count);
+
+#if CONFIG_EXT_INTER
+ if (candidate->mode == NEW_NEWMV)
+#else
+ if (candidate->mode == NEWMV)
+#endif // CONFIG_EXT_INTER
+ ++newmv_count;
+ }
+ }
+ }
+ }
+ return newmv_count;
+}
+
+static uint8_t scan_row_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ const int mi_row, const int mi_col, int block,
+ const MV_REFERENCE_FRAME rf[2],
+ int row_offset,
+ CANDIDATE_MV *ref_mv_stack,
+ uint8_t *refmv_count) {
+ const TileInfo *const tile = &xd->tile;
+ int i;
+ uint8_t newmv_count = 0;
+
+ for (i = 0; i < xd->n8_w && *refmv_count < MAX_REF_MV_STACK_SIZE;) {
+ POSITION mi_pos;
+ mi_pos.row = row_offset;
+ mi_pos.col = i;
+
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, &mi_pos)) {
+ const MODE_INFO *const candidate_mi =
+ xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ const int len = VPXMIN(xd->n8_w,
+ num_8x8_blocks_wide_lookup[candidate->sb_type]);
+
+ newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf,
+ refmv_count, ref_mv_stack, len,
+ block, mi_pos.col);
+ i += len;
+ } else {
+ ++i;
+ }
+ }
+
+ return newmv_count;
+}
+
+static uint8_t scan_col_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ const int mi_row, const int mi_col, int block,
+ const MV_REFERENCE_FRAME rf[2],
+ int col_offset,
+ CANDIDATE_MV *ref_mv_stack,
+ uint8_t *refmv_count) {
+ const TileInfo *const tile = &xd->tile;
+ int i;
+ uint8_t newmv_count = 0;
+
+ for (i = 0; i < xd->n8_h && *refmv_count < MAX_REF_MV_STACK_SIZE;) {
+ POSITION mi_pos;
+ mi_pos.row = i;
+ mi_pos.col = col_offset;
+
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, &mi_pos)) {
+ const MODE_INFO *const candidate_mi =
+ xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ const int len = VPXMIN(xd->n8_h,
+ num_8x8_blocks_high_lookup[candidate->sb_type]);
+
+ newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf,
+ refmv_count, ref_mv_stack, len,
+ block, mi_pos.col);
+ i += len;
+ } else {
+ ++i;
+ }
+ }
+
+ return newmv_count;
+}
+
+static uint8_t scan_blk_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ const int mi_row, const int mi_col, int block,
+ const MV_REFERENCE_FRAME rf[2],
+ int row_offset, int col_offset,
+ CANDIDATE_MV *ref_mv_stack,
+ uint8_t *refmv_count) {
+ const TileInfo *const tile = &xd->tile;
+ POSITION mi_pos;
+ uint8_t newmv_count = 0;
+
+ mi_pos.row = row_offset;
+ mi_pos.col = col_offset;
+
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, &mi_pos) &&
+ *refmv_count < MAX_REF_MV_STACK_SIZE) {
+ const MODE_INFO *const candidate_mi =
+ xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ const int len = 1;
+
+ newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf,
+ refmv_count, ref_mv_stack, len,
+ block, mi_pos.col);
+ } // Analyze a single 8x8 block motion information.
+ return newmv_count;
+}
+
+static int has_top_right(const MACROBLOCKD *xd,
+ int mi_row, int mi_col, int bs) {
+ int has_tr = !((mi_row & bs) & (bs * 2 - 1)) ||
+ !((mi_col & bs) & (bs * 2 - 1));
+
+ // Filter out partial right-most boundaries
+ if ((mi_col & bs) & (bs * 2 - 1)) {
+ if (((mi_col & (2 * bs)) & (bs * 4 - 1)) &&
+ ((mi_row & (2 * bs)) & (bs * 4 - 1)))
+ has_tr = 0;
+ }
+
+ if (has_tr)
+ if (((mi_col + xd->n8_w) & 0x07) == 0)
+ if ((mi_row & 0x07) > 0)
+ has_tr = 0;
+
+ if (xd->n8_w < xd->n8_h)
+ if (!xd->is_sec_rect)
+ has_tr = 1;
+
+ if (xd->n8_w > xd->n8_h)
+ if (xd->is_sec_rect)
+ has_tr = 0;
+
+ return has_tr;
+}
+
+static void handle_sec_rect_block(const MB_MODE_INFO * const candidate,
+ uint8_t refmv_count,
+ CANDIDATE_MV *ref_mv_stack,
+ MV_REFERENCE_FRAME ref_frame,
+ int16_t *mode_context) {
+ int rf, idx;
+
+ for (rf = 0; rf < 2; ++rf) {
+ if (candidate->ref_frame[rf] == ref_frame) {
+ const int list_range = VPXMIN(refmv_count, MAX_MV_REF_CANDIDATES);
+
+ const int_mv pred_mv = candidate->mv[rf];
+ for (idx = 0; idx < list_range; ++idx)
+ if (pred_mv.as_int == ref_mv_stack[idx].this_mv.as_int)
+ break;
+
+ if (idx < list_range) {
+ if (idx == 0)
+ mode_context[ref_frame] |= (1 << SKIP_NEARESTMV_OFFSET);
+ else if (idx == 1)
+ mode_context[ref_frame] |= (1 << SKIP_NEARMV_OFFSET);
+ }
+ }
+ }
+}
+
+static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ MV_REFERENCE_FRAME ref_frame,
+ uint8_t *refmv_count,
+ CANDIDATE_MV *ref_mv_stack,
+ int_mv *mv_ref_list,
+ int block, int mi_row, int mi_col,
+ int16_t *mode_context) {
+ int idx, nearest_refmv_count = 0;
+ uint8_t newmv_count = 0;
+
+ CANDIDATE_MV tmp_mv;
+ int len, nr_len;
+
+ const MV_REF *const prev_frame_mvs_base = cm->use_prev_frame_mvs ?
+ cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL;
+
+ int bs = VPXMAX(xd->n8_w, xd->n8_h);
+ int has_tr = has_top_right(xd, mi_row, mi_col, bs);
+
+ MV_REFERENCE_FRAME rf[2];
+ vp10_set_ref_frame(rf, ref_frame);
+
+ mode_context[ref_frame] = 0;
+ *refmv_count = 0;
+
+ // Scan the first above row mode info.
+ newmv_count = scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf,
+ -1, ref_mv_stack, refmv_count);
+ // Scan the first left column mode info.
+ newmv_count += scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf,
+ -1, ref_mv_stack, refmv_count);
+
+ // Check top-right boundary
+ if (has_tr)
+ newmv_count += scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf,
+ -1, 1, ref_mv_stack, refmv_count);
+
+ nearest_refmv_count = *refmv_count;
+
+ for (idx = 0; idx < nearest_refmv_count; ++idx)
+ ref_mv_stack[idx].weight += REF_CAT_LEVEL;
+
+ if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame
+ && rf[1] == NONE) {
+ int ref;
+ int blk_row, blk_col;
+
+ for (blk_row = 0; blk_row < xd->n8_h; ++blk_row) {
+ for (blk_col = 0; blk_col < xd->n8_w; ++blk_col) {
+ const MV_REF *prev_frame_mvs =
+ prev_frame_mvs_base + blk_row * cm->mi_cols + blk_col;
+
+ POSITION mi_pos;
+ mi_pos.row = blk_row;
+ mi_pos.col = blk_col;
+
+ if (!is_inside(&xd->tile, mi_col, mi_row, cm->mi_rows, &mi_pos))
+ continue;
+
+ for (ref = 0; ref < 2; ++ref) {
+ if (prev_frame_mvs->ref_frame[ref] == ref_frame) {
+ for (idx = 0; idx < *refmv_count; ++idx)
+ if (prev_frame_mvs->mv[ref].as_int ==
+ ref_mv_stack[idx].this_mv.as_int)
+ break;
+
+ if (idx < *refmv_count)
+ ref_mv_stack[idx].weight += 2;
+
+ if (idx == *refmv_count &&
+ *refmv_count < MAX_REF_MV_STACK_SIZE) {
+ ref_mv_stack[idx].this_mv.as_int = prev_frame_mvs->mv[ref].as_int;
+ ref_mv_stack[idx].weight = 2;
+ ++(*refmv_count);
+
+ if (abs(ref_mv_stack[idx].this_mv.as_mv.row) >= 8 ||
+ abs(ref_mv_stack[idx].this_mv.as_mv.col) >= 8)
+ mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (*refmv_count == nearest_refmv_count)
+ mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
+
+ // Analyze the top-left corner block mode info.
+// scan_blk_mbmi(cm, xd, mi_row, mi_col, block, ref_frame,
+// -1, -1, ref_mv_stack, refmv_count);
+
+ // Scan the second outer area.
+ scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf,
+ -2, ref_mv_stack, refmv_count);
+ scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf,
+ -2, ref_mv_stack, refmv_count);
+
+ // Scan the third outer area.
+ scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf,
+ -3, ref_mv_stack, refmv_count);
+ scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf,
+ -3, ref_mv_stack, refmv_count);
+
+ // Scan the fourth outer area.
+ scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf,
+ -4, ref_mv_stack, refmv_count);
+ // Scan the third left row mode info.
+ scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf,
+ -4, ref_mv_stack, refmv_count);
+
+ switch (nearest_refmv_count) {
+ case 0:
+ mode_context[ref_frame] |= 0;
+ if (*refmv_count >= 1)
+ mode_context[ref_frame] |= 1;
+
+ if (*refmv_count == 1)
+ mode_context[ref_frame] |= (1 << REFMV_OFFSET);
+ else if (*refmv_count >= 2)
+ mode_context[ref_frame] |= (2 << REFMV_OFFSET);
+ break;
+ case 1:
+ mode_context[ref_frame] |= (newmv_count > 0) ? 2 : 3;
+
+ if (*refmv_count == 1)
+ mode_context[ref_frame] |= (3 << REFMV_OFFSET);
+ else if (*refmv_count >= 2)
+ mode_context[ref_frame] |= (4 << REFMV_OFFSET);
+ break;
+
+ case 2:
+ default:
+ if (newmv_count >= 2)
+ mode_context[ref_frame] |= 4;
+ else if (newmv_count == 1)
+ mode_context[ref_frame] |= 5;
+ else
+ mode_context[ref_frame] |= 6;
+
+ mode_context[ref_frame] |= (5 << REFMV_OFFSET);
+ break;
+ }
+
+ // Rank the likelihood and assign nearest and near mvs.
+ len = nearest_refmv_count;
+ while (len > 0) {
+ nr_len = 0;
+ for (idx = 1; idx < len; ++idx) {
+ if (ref_mv_stack[idx - 1].weight < ref_mv_stack[idx].weight) {
+ tmp_mv = ref_mv_stack[idx - 1];
+ ref_mv_stack[idx - 1] = ref_mv_stack[idx];
+ ref_mv_stack[idx] = tmp_mv;
+ nr_len = idx;
+ }
+ }
+ len = nr_len;
+ }
+
+ len = *refmv_count;
+ while (len > nearest_refmv_count) {
+ nr_len = nearest_refmv_count;
+ for (idx = nearest_refmv_count + 1; idx < len; ++idx) {
+ if (ref_mv_stack[idx - 1].weight < ref_mv_stack[idx].weight) {
+ tmp_mv = ref_mv_stack[idx - 1];
+ ref_mv_stack[idx - 1] = ref_mv_stack[idx];
+ ref_mv_stack[idx] = tmp_mv;
+ nr_len = idx;
+ }
+ }
+ len = nr_len;
+ }
+
+ // TODO(jingning): Clean-up needed.
+ if (xd->is_sec_rect) {
+ if (xd->n8_w < xd->n8_h) {
+ const MODE_INFO *const candidate_mi = xd->mi[-1];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ handle_sec_rect_block(candidate, nearest_refmv_count, ref_mv_stack,
+ ref_frame, mode_context);
+ }
+
+ if (xd->n8_w > xd->n8_h) {
+ const MODE_INFO *const candidate_mi = xd->mi[-xd->mi_stride];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ handle_sec_rect_block(candidate, nearest_refmv_count, ref_mv_stack,
+ ref_frame, mode_context);
+ }
+ }
+
+ if (rf[1] > NONE) {
+ for (idx = 0; idx < *refmv_count; ++idx) {
+ clamp_mv_ref(&ref_mv_stack[idx].this_mv.as_mv,
+ xd->n8_w << 3 , xd->n8_h << 3, xd);
+ clamp_mv_ref(&ref_mv_stack[idx].comp_mv.as_mv,
+ xd->n8_w << 3 , xd->n8_h << 3, xd);
+ }
+ } else {
+ for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *refmv_count); ++idx) {
+ mv_ref_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
+ clamp_mv_ref(&mv_ref_list[idx].as_mv,
+ xd->n8_w << 3, xd->n8_h << 3, xd);
+ }
+ }
+}
+#endif
+
// This function searches the neighbourhood of a given MB/SB
// to try and find candidate reference vectors.
static void find_mv_refs_idx(const VP10_COMMON *cm, const MACROBLOCKD *xd,
@@ -18,7 +504,7 @@
int_mv *mv_ref_list,
int block, int mi_row, int mi_col,
find_mv_refs_sync sync, void *const data,
- uint8_t *mode_context) {
+ int16_t *mode_context) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
@@ -30,11 +516,6 @@
const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type] << 3;
const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type] << 3;
-#if !CONFIG_MISC_FIXES
- // Blank the reference vector list
- memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
-#endif
-
// The nearest 2 blocks are treated differently
// if the size < 8x8 we get the mv from the bmi substructure,
// and we also need to keep a mode count.
@@ -133,9 +614,6 @@
}
if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME &&
-#if !CONFIG_MISC_FIXES
- prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int &&
-#endif
prev_frame_mvs->ref_frame[1] != ref_frame) {
int_mv mv = prev_frame_mvs->mv[1];
if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] !=
@@ -147,38 +625,101 @@
}
}
- Done:
-
- mode_context[ref_frame] = counter_to_context[context_counter];
-
-#if CONFIG_MISC_FIXES
+Done:
+ if (mode_context)
+ mode_context[ref_frame] = counter_to_context[context_counter];
for (i = refmv_count; i < MAX_MV_REF_CANDIDATES; ++i)
mv_ref_list[i].as_int = 0;
-#else
- // Clamp vectors
- for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
- clamp_mv_ref(&mv_ref_list[i].as_mv, bw, bh, xd);
-#endif
}
+#if CONFIG_EXT_INTER
+// This function keeps a mode count for a given MB/SB
+void vp10_update_mv_context(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+ int_mv *mv_ref_list,
+ int block, int mi_row, int mi_col,
+ int16_t *mode_context) {
+ int i, refmv_count = 0;
+ const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
+ int context_counter = 0;
+ const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type] << 3;
+ const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type] << 3;
+ const TileInfo *const tile = &xd->tile;
+
+ // Blank the reference vector list
+ memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
+
+ // The nearest 2 blocks are examined only.
+ // If the size < 8x8, we get the mv from the bmi substructure;
+ for (i = 0; i < 2; ++i) {
+ const POSITION *const mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MODE_INFO *const candidate_mi =
+ xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+
+ // Keep counts for entropy encoding.
+ context_counter += mode_2_counter[candidate->mode];
+
+ if (candidate->ref_frame[0] == ref_frame) {
+ ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block),
+ refmv_count, mv_ref_list, bw, bh, xd, Done);
+ } else if (candidate->ref_frame[1] == ref_frame) {
+ ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block),
+ refmv_count, mv_ref_list, bw, bh, xd, Done);
+ }
+ }
+ }
+
+ Done:
+
+ if (mode_context)
+ mode_context[ref_frame] = counter_to_context[context_counter];
+}
+#endif // CONFIG_EXT_INTER
+
void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+#if CONFIG_REF_MV
+ uint8_t *ref_mv_count,
+ CANDIDATE_MV *ref_mv_stack,
+#if CONFIG_EXT_INTER
+ int16_t *compound_mode_context,
+#endif // CONFIG_EXT_INTER
+#endif
int_mv *mv_ref_list,
int mi_row, int mi_col,
find_mv_refs_sync sync, void *const data,
- uint8_t *mode_context) {
+ int16_t *mode_context) {
+#if CONFIG_REF_MV
+ int idx, all_zero = 1;
+#endif
+#if CONFIG_EXT_INTER
+ vp10_update_mv_context(cm, xd, mi, ref_frame, mv_ref_list, -1,
+ mi_row, mi_col,
+#if CONFIG_REF_MV
+ compound_mode_context);
+#else
+ mode_context);
+#endif // CONFIG_REF_MV
+ find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1,
+ mi_row, mi_col, sync, data, NULL);
+#else
find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1,
mi_row, mi_col, sync, data, mode_context);
-}
+#endif // CONFIG_EXT_INTER
-static void lower_mv_precision(MV *mv, int allow_hp) {
- const int use_hp = allow_hp && vp10_use_mv_hp(mv);
- if (!use_hp) {
- if (mv->row & 1)
- mv->row += (mv->row > 0 ? -1 : 1);
- if (mv->col & 1)
- mv->col += (mv->col > 0 ? -1 : 1);
- }
+#if CONFIG_REF_MV
+ setup_ref_mv_list(cm, xd, ref_frame, ref_mv_count, ref_mv_stack,
+ mv_ref_list, -1, mi_row, mi_col, mode_context);
+
+ for (idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx)
+ if (mv_ref_list[idx].as_int != 0)
+ all_zero = 0;
+
+ if (all_zero)
+ mode_context[ref_frame] |= (1 << ALL_ZERO_FLAG_OFFSET);
+#endif
}
void vp10_find_best_ref_mvs(int allow_hp,
@@ -194,18 +735,51 @@
}
void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd,
- int block, int ref, int mi_row, int mi_col,
- int_mv *nearest_mv, int_mv *near_mv,
- uint8_t *mode_context) {
+ int block, int ref, int mi_row, int mi_col,
+#if CONFIG_EXT_INTER
+ int_mv *mv_list,
+#endif // CONFIG_EXT_INTER
+ int_mv *nearest_mv, int_mv *near_mv) {
+#if !CONFIG_EXT_INTER
int_mv mv_list[MAX_MV_REF_CANDIDATES];
+#endif // !CONFIG_EXT_INTER
MODE_INFO *const mi = xd->mi[0];
b_mode_info *bmi = mi->bmi;
int n;
+#if CONFIG_REF_MV
+ CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE];
+ CANDIDATE_MV tmp_mv;
+ uint8_t ref_mv_count = 0, idx;
+ uint8_t above_count = 0, left_count = 0;
+ MV_REFERENCE_FRAME rf[2] = { mi->mbmi.ref_frame[ref], NONE };
+#endif
assert(MAX_MV_REF_CANDIDATES == 2);
find_mv_refs_idx(cm, xd, mi, mi->mbmi.ref_frame[ref], mv_list, block,
- mi_row, mi_col, NULL, NULL, mode_context);
+ mi_row, mi_col, NULL, NULL, NULL);
+
+#if CONFIG_REF_MV
+ scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf,
+ -1, 0, ref_mv_stack, &ref_mv_count);
+ above_count = ref_mv_count;
+
+ scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf,
+ 0, -1, ref_mv_stack, &ref_mv_count);
+ left_count = ref_mv_count - above_count;
+
+ if (above_count > 1 && left_count > 0) {
+ tmp_mv = ref_mv_stack[1];
+ ref_mv_stack[1] = ref_mv_stack[above_count];
+ ref_mv_stack[above_count] = tmp_mv;
+ }
+
+ for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, ref_mv_count); ++idx) {
+ mv_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
+ clamp_mv_ref(&mv_list[idx].as_mv,
+ xd->n8_w << 3, xd->n8_h << 3, xd);
+ }
+#endif
near_mv->as_int = 0;
switch (block) {
diff --git a/vp10/common/mvref_common.h b/vp10/common/mvref_common.h
index 0a98866..224c5ed 100644
--- a/vp10/common/mvref_common.h
+++ b/vp10/common/mvref_common.h
@@ -55,6 +55,18 @@
0, // NEARMV
3, // ZEROMV
1, // NEWMV
+#if CONFIG_EXT_INTER
+ 1, // NEWFROMNEARMV
+ 0, // NEAREST_NEARESTMV
+ 0, // NEAREST_NEARMV
+ 0, // NEAR_NEARESTMV
+ 1, // NEAREST_NEWMV
+ 1, // NEW_NEARESTMV
+ 1, // NEAR_NEWMV
+ 1, // NEW_NEARMV
+ 3, // ZERO_ZEROMV
+ 1, // NEW_NEWMV
+#endif // CONFIG_EXT_INTER
};
// There are 3^3 different combinations of 3 counts that can be either 0,1 or
@@ -119,26 +131,13 @@
};
// clamp_mv_ref
-#if CONFIG_MISC_FIXES
#define MV_BORDER (8 << 3) // Allow 8 pels in 1/8th pel units
-#else
-#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
-#endif
static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) {
-#if CONFIG_MISC_FIXES
clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER,
xd->mb_to_right_edge + bw * 8 + MV_BORDER,
xd->mb_to_top_edge - bh * 8 - MV_BORDER,
xd->mb_to_bottom_edge + bh * 8 + MV_BORDER);
-#else
- (void) bw;
- (void) bh;
- clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
- xd->mb_to_right_edge + MV_BORDER,
- xd->mb_to_top_edge - MV_BORDER,
- xd->mb_to_bottom_edge + MV_BORDER);
-#endif
}
// This function returns either the appropriate sub block or block's mv
@@ -164,11 +163,7 @@
return mv;
}
-#if CONFIG_MISC_FIXES
#define CLIP_IN_ADD(mv, bw, bh, xd) clamp_mv_ref(mv, bw, bh, xd)
-#else
-#define CLIP_IN_ADD(mv, bw, bh, xd) do {} while (0)
-#endif
// This macro is used to add a motion vector mv_ref list if it isn't
// already in the list. If it's the second motion vector it will also
@@ -194,8 +189,6 @@
ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \
refmv_count, mv_ref_list, bw, bh, xd, Done); \
if (has_second_ref(mbmi) && \
- (CONFIG_MISC_FIXES || \
- (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) && \
(mbmi)->ref_frame[1] != ref_frame) \
ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \
refmv_count, mv_ref_list, bw, bh, xd, Done); \
@@ -214,12 +207,89 @@
mi_col + mi_pos->col >= tile->mi_col_end);
}
+static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
+ const int use_hp = allow_hp && vp10_use_mv_hp(mv);
+ if (!use_hp) {
+ if (mv->row & 1)
+ mv->row += (mv->row > 0 ? -1 : 1);
+ if (mv->col & 1)
+ mv->col += (mv->col > 0 ? -1 : 1);
+ }
+}
+
+#if CONFIG_REF_MV
+static INLINE int8_t vp10_ref_frame_type(const MV_REFERENCE_FRAME *const rf) {
+ if (rf[1] > INTRA_FRAME)
+ return rf[0] + ALTREF_FRAME;
+
+ return rf[0];
+}
+
+static INLINE void vp10_set_ref_frame(MV_REFERENCE_FRAME *rf,
+ int8_t ref_frame_type) {
+ if (ref_frame_type > ALTREF_FRAME) {
+ rf[0] = ref_frame_type - ALTREF_FRAME;
+ rf[1] = ALTREF_FRAME;
+ } else {
+ rf[0] = ref_frame_type;
+ rf[1] = NONE;
+ assert(ref_frame_type > INTRA_FRAME && ref_frame_type < MAX_REF_FRAMES);
+ }
+}
+
+static INLINE int16_t vp10_mode_context_analyzer(
+ const int16_t *const mode_context, const MV_REFERENCE_FRAME *const rf,
+ BLOCK_SIZE bsize, int block) {
+ int16_t mode_ctx = 0;
+ if (block >= 0) {
+ mode_ctx = mode_context[rf[0]] & 0x00ff;
+
+ if (block > 0 && bsize < BLOCK_8X8 && bsize > BLOCK_4X4)
+ mode_ctx |= (1 << SKIP_NEARESTMV_SUB8X8_OFFSET);
+
+ return mode_ctx;
+ }
+
+ if (rf[1] > INTRA_FRAME)
+ return mode_context[rf[0]] & (mode_context[rf[1]] | 0x00ff);
+ else if (rf[0] != ALTREF_FRAME)
+ return mode_context[rf[0]] & ~(mode_context[ALTREF_FRAME] & 0xfe00);
+ else
+ return mode_context[rf[0]];
+}
+
+static INLINE uint8_t vp10_drl_ctx(const CANDIDATE_MV *ref_mv_stack,
+ int ref_idx) {
+ if (ref_mv_stack[ref_idx + 1].weight > REF_CAT_LEVEL &&
+ ref_mv_stack[ref_idx + 2].weight > REF_CAT_LEVEL)
+ return 0;
+
+ if (ref_mv_stack[ref_idx + 1].weight > REF_CAT_LEVEL &&
+ ref_mv_stack[ref_idx + 2].weight < REF_CAT_LEVEL)
+ return 1;
+
+ if (ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL &&
+ ref_mv_stack[ref_idx + 2].weight < REF_CAT_LEVEL)
+ return 2;
+
+ assert(0);
+ return 0;
+}
+#endif
+
typedef void (*find_mv_refs_sync)(void *const data, int mi_row);
void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd,
- MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
- int_mv *mv_ref_list, int mi_row, int mi_col,
- find_mv_refs_sync sync, void *const data,
- uint8_t *mode_context);
+ MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+#if CONFIG_REF_MV
+ uint8_t *ref_mv_count,
+ CANDIDATE_MV *ref_mv_stack,
+#if CONFIG_EXT_INTER
+ int16_t *compound_mode_context,
+#endif // CONFIG_EXT_INTER
+#endif
+ int_mv *mv_ref_list, int mi_row, int mi_col,
+ find_mv_refs_sync sync, void *const data,
+ int16_t *mode_context);
// check a list of motion vectors by sad score using a number rows of pixels
// above and a number cols of pixels in the left to select the one with best
@@ -228,9 +298,20 @@
int_mv *mvlist, int_mv *nearest_mv, int_mv *near_mv);
void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd,
- int block, int ref, int mi_row, int mi_col,
- int_mv *nearest_mv, int_mv *near_mv,
- uint8_t *mode_context);
+ int block, int ref, int mi_row, int mi_col,
+#if CONFIG_EXT_INTER
+ int_mv *mv_list,
+#endif // CONFIG_EXT_INTER
+ int_mv *nearest_mv, int_mv *near_mv);
+
+#if CONFIG_EXT_INTER
+// This function keeps a mode count for a given MB/SB
+void vp10_update_mv_context(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+ int_mv *mv_ref_list,
+ int block, int mi_row, int mi_col,
+ int16_t *mode_context);
+#endif // CONFIG_EXT_INTER
#ifdef __cplusplus
} // extern "C"
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h
index ffef733..b6051fd 100644
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h
@@ -20,9 +20,11 @@
#include "vp10/common/entropymv.h"
#include "vp10/common/entropy.h"
#include "vp10/common/entropymode.h"
+#include "vp10/common/mv.h"
#include "vp10/common/frame_buffers.h"
#include "vp10/common/quant_common.h"
#include "vp10/common/tile_common.h"
+#include "vp10/common/restoration.h"
#if CONFIG_VP9_POSTPROC
#include "vp10/common/postproc.h"
@@ -144,7 +146,8 @@
int subsampling_y;
#if CONFIG_VP9_HIGHBITDEPTH
- int use_highbitdepth; // Marks if we need to use 16bit frame buffers.
+ // Marks if we need to use 16bit frame buffers (1: yes, 0: no).
+ int use_highbitdepth;
#endif
YV12_BUFFER_CONFIG *frame_to_show;
@@ -171,8 +174,17 @@
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG post_proc_buffer_int;
#endif
+#if CONFIG_LOOP_RESTORATION
+ YV12_BUFFER_CONFIG tmp_loop_buf;
+#endif // CONFIG_LOOP_RESTORATION
FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/
+#if CONFIG_EXT_REFS
+ // frame type of the frame before last frame
+ FRAME_TYPE last2_frame_type;
+ // frame type of the frame two frames before last frame
+ FRAME_TYPE last3_frame_type;
+#endif // CONFIG_EXT_REFS
FRAME_TYPE frame_type;
int show_frame;
@@ -185,6 +197,8 @@
int allow_high_precision_mv;
+ int allow_screen_content_tools;
+
// Flag signaling which frame contexts should be reset to default values.
RESET_FRAME_CONTEXT_MODE reset_frame_context;
@@ -243,6 +257,9 @@
INTERP_FILTER interp_filter;
loop_filter_info_n lf_info;
+#if CONFIG_LOOP_RESTORATION
+ restoration_info_n rst_info;
+#endif // CONFIG_LOOP_RESTORATION
// Flag signaling how frame contexts should be updated at the end of
// a frame decode
@@ -252,15 +269,12 @@
struct loopfilter lf;
struct segmentation seg;
-#if !CONFIG_MISC_FIXES
- struct segmentation_probs segp;
-#endif
int frame_parallel_decode; // frame-based threading.
// Context probabilities for reference frame prediction
MV_REFERENCE_FRAME comp_fixed_ref;
- MV_REFERENCE_FRAME comp_var_ref[2];
+ MV_REFERENCE_FRAME comp_var_ref[COMP_REFS];
REFERENCE_MODE reference_mode;
FRAME_CONTEXT *fc; /* this frame entropy */
@@ -299,6 +313,10 @@
PARTITION_CONTEXT *above_seg_context;
ENTROPY_CONTEXT *above_context;
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT *above_txfm_context;
+ TXFM_CONTEXT left_txfm_context[8];
+#endif
int above_context_alloc_cols;
// scratch memory for intraonly/keyframe forward updates from default tables
@@ -395,6 +413,9 @@
}
xd->above_seg_context = cm->above_seg_context;
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context;
+#endif
xd->mi_stride = cm->mi_stride;
xd->error_info = &cm->error;
}
@@ -444,6 +465,19 @@
xd->left_mi = NULL;
xd->left_mbmi = NULL;
}
+
+ xd->n8_h = bh;
+ xd->n8_w = bw;
+#if CONFIG_REF_MV
+ xd->is_sec_rect = 0;
+ if (xd->n8_w < xd->n8_h)
+ if (mi_col & (xd->n8_h - 1))
+ xd->is_sec_rect = 1;
+
+ if (xd->n8_w > xd->n8_h)
+ if (mi_row & (xd->n8_w - 1))
+ xd->is_sec_rect = 1;
+#endif
}
static INLINE const vpx_prob *get_y_mode_probs(const VP10_COMMON *cm,
@@ -487,6 +521,36 @@
return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
}
+#if CONFIG_VAR_TX
+static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx,
+ TX_SIZE tx_size,
+ int len) {
+ int i;
+ for (i = 0; i < len; ++i)
+ txfm_ctx[i] = tx_size;
+}
+
+static INLINE void txfm_partition_update(TXFM_CONTEXT *above_ctx,
+ TXFM_CONTEXT *left_ctx,
+ TX_SIZE tx_size) {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bs = num_8x8_blocks_high_lookup[bsize];
+ int i;
+ for (i = 0; i < bs; ++i) {
+ above_ctx[i] = tx_size;
+ left_ctx[i] = tx_size;
+ }
+}
+
+static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx,
+ TXFM_CONTEXT *left_ctx,
+ TX_SIZE tx_size) {
+ int above = *above_ctx < tx_size;
+ int left = *left_ctx < tx_size;
+ return (tx_size - 1) * 3 + above + left;
+}
+#endif
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/pred_common.c b/vp10/common/pred_common.c
index 236ae54..4dd6841 100644
--- a/vp10/common/pred_common.c
+++ b/vp10/common/pred_common.c
@@ -21,10 +21,10 @@
// The prediction flags in these dummy entries are initialized to 0.
const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
const int left_type = xd->left_available && is_inter_block(left_mbmi) ?
- left_mbmi->interp_filter : SWITCHABLE_FILTERS;
+ left_mbmi->interp_filter : SWITCHABLE_FILTERS;
const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
const int above_type = xd->up_available && is_inter_block(above_mbmi) ?
- above_mbmi->interp_filter : SWITCHABLE_FILTERS;
+ above_mbmi->interp_filter : SWITCHABLE_FILTERS;
if (left_type == above_type)
return left_type;
@@ -36,6 +36,85 @@
return SWITCHABLE_FILTERS;
}
+#if CONFIG_EXT_INTRA
+int vp10_get_pred_context_intra_interp(const MACROBLOCKD *xd) {
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ int left_type = INTRA_FILTERS, above_type = INTRA_FILTERS;
+
+ if (xd->left_available && left_mbmi->sb_type >= BLOCK_8X8) {
+ PREDICTION_MODE mode = left_mbmi->mode;
+ if (is_inter_block(left_mbmi)) {
+ switch (left_mbmi->interp_filter) {
+ case EIGHTTAP:
+ left_type = INTRA_FILTER_8TAP;
+ break;
+ case EIGHTTAP_SMOOTH:
+ left_type = INTRA_FILTER_8TAP_SMOOTH;
+ break;
+ case EIGHTTAP_SHARP:
+ left_type = INTRA_FILTER_8TAP_SHARP;
+ break;
+ case BILINEAR:
+ left_type = INTRA_FILTERS;
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (mode != DC_PRED && mode != TM_PRED) {
+ int p_angle;
+ p_angle = mode_to_angle_map[mode] +
+ left_mbmi->angle_delta[0] * ANGLE_STEP;
+ if (pick_intra_filter(p_angle)) {
+ left_type = left_mbmi->intra_filter;
+ }
+ }
+ }
+ }
+
+ if (xd->up_available && above_mbmi->sb_type >= BLOCK_8X8) {
+ if (is_inter_block(above_mbmi)) {
+ switch (above_mbmi->interp_filter) {
+ case EIGHTTAP:
+ above_type = INTRA_FILTER_8TAP;
+ break;
+ case EIGHTTAP_SMOOTH:
+ above_type = INTRA_FILTER_8TAP_SMOOTH;
+ break;
+ case EIGHTTAP_SHARP:
+ above_type = INTRA_FILTER_8TAP_SHARP;
+ break;
+ case BILINEAR:
+ above_type = INTRA_FILTERS;
+ break;
+ default:
+ break;
+ }
+ } else {
+ PREDICTION_MODE mode = above_mbmi->mode;
+ if (mode != DC_PRED && mode != TM_PRED) {
+ int p_angle;
+ p_angle = mode_to_angle_map[mode] +
+ above_mbmi->angle_delta[0] * ANGLE_STEP;
+ if (pick_intra_filter(p_angle)) {
+ above_type = above_mbmi->intra_filter;
+ }
+ }
+ }
+ }
+
+ if (left_type == above_type)
+ return left_type;
+ else if (left_type == INTRA_FILTERS && above_type != INTRA_FILTERS)
+ return above_type;
+ else if (left_type != INTRA_FILTERS && above_type == INTRA_FILTERS)
+ return left_type;
+ else
+ return INTRA_FILTERS;
+}
+#endif // CONFIG_EXT_INTRA
+
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
@@ -103,9 +182,424 @@
return ctx;
}
+#if CONFIG_EXT_REFS
+
+// TODO(zoeliu): Future work will be conducted to optimize the context design
+// for the coding of the reference frames.
+
+#define CHECK_LAST_OR_LAST2(ref_frame) \
+ ((ref_frame == LAST_FRAME) || (ref_frame == LAST2_FRAME))
+
+#define CHECK_GOLDEN_LAST3_LAST4(ref_frame) \
+ ((ref_frame == GOLDEN_FRAME) || (ref_frame == LAST3_FRAME) || \
+ (ref_frame == LAST4_FRAME))
+
+// Returns a context number for the given MB prediction signal
+// Signal the first reference frame for a compound mode is either
+// GOLDEN/LAST3/LAST4, or LAST/LAST2.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is either
+// GOLDEN_FRAME/LAST3_FRAME/LAST4_FRAME.
+int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int above_in_image = xd->up_available;
+ const int left_in_image = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+ const int var_ref_idx = !fix_ref_idx;
+
+ if (above_in_image && left_in_image) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra (2)
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+ if (!has_second_ref(edge_mbmi)) // single pred (1/3)
+ pred_context = 1 +
+ 2 * (!CHECK_GOLDEN_LAST3_LAST4(edge_mbmi->ref_frame[0]));
+ else // comp pred (1/3)
+ pred_context = 1 +
+ 2 * (!CHECK_GOLDEN_LAST3_LAST4(edge_mbmi->ref_frame[var_ref_idx]));
+ } else { // inter/inter
+ const int l_sg = !has_second_ref(left_mbmi);
+ const int a_sg = !has_second_ref(above_mbmi);
+ const MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0]
+ : above_mbmi->ref_frame[var_ref_idx];
+ const MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0]
+ : left_mbmi->ref_frame[var_ref_idx];
+
+ if (vrfa == vrfl && CHECK_GOLDEN_LAST3_LAST4(vrfa)) {
+ pred_context = 0;
+ } else if (l_sg && a_sg) { // single/single
+ if ((vrfa == ALTREF_FRAME && CHECK_LAST_OR_LAST2(vrfl)) ||
+ (vrfl == ALTREF_FRAME && CHECK_LAST_OR_LAST2(vrfa))) {
+ pred_context = 4;
+ } else if (vrfa == vrfl || (CHECK_LAST_OR_LAST2(vrfa) &&
+ CHECK_LAST_OR_LAST2(vrfl))) {
+ pred_context = 3;
+ } else { // Either vrfa or vrfl is GOLDEN / LAST3 / LAST4
+ // NOTE(zoeliu): Following assert may be removed once confirmed.
+ assert(CHECK_GOLDEN_LAST3_LAST4(vrfa) ||
+ CHECK_GOLDEN_LAST3_LAST4(vrfl));
+ pred_context = 1;
+ }
+ } else if (l_sg || a_sg) { // single/comp
+ const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl;
+ const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl;
+
+ if (CHECK_GOLDEN_LAST3_LAST4(vrfc) && !CHECK_GOLDEN_LAST3_LAST4(rfs))
+ pred_context = 1;
+ else if (CHECK_GOLDEN_LAST3_LAST4(rfs) &&
+ !CHECK_GOLDEN_LAST3_LAST4(vrfc))
+ pred_context = 2;
+ else
+ pred_context = 4;
+ } else { // comp/comp
+ if ((CHECK_LAST_OR_LAST2(vrfa) && CHECK_LAST_OR_LAST2(vrfl))) {
+ pred_context = 4;
+ } else {
+ // NOTE(zoeliu): Following assert may be removed once confirmed.
+ assert(CHECK_GOLDEN_LAST3_LAST4(vrfa) ||
+ CHECK_GOLDEN_LAST3_LAST4(vrfl));
+ pred_context = 2;
+ }
+ }
+ }
+ } else if (above_in_image || left_in_image) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi)) {
+ pred_context = 2;
+ } else {
+ if (has_second_ref(edge_mbmi))
+ pred_context =
+ 4 * (!CHECK_GOLDEN_LAST3_LAST4(edge_mbmi->ref_frame[var_ref_idx]));
+ else
+ pred_context = 3 * (!CHECK_GOLDEN_LAST3_LAST4(edge_mbmi->ref_frame[0]));
+ }
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+ return pred_context;
+}
+
+// Returns a context number for the given MB prediction signal
+// Signal the first reference frame for a compound mode is LAST,
+// conditioning on that it is known either LAST/LAST2.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is LAST_FRAME,
+// conditioning on it is either LAST_FRAME or LAST2_FRAME.
+int vp10_get_pred_context_comp_ref_p1(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int above_in_image = xd->up_available;
+ const int left_in_image = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+ const int var_ref_idx = !fix_ref_idx;
+
+ if (above_in_image && left_in_image) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra (2)
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+ if (!has_second_ref(edge_mbmi)) // single pred (1/3)
+ pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != LAST_FRAME);
+ else // comp pred (1/3)
+ pred_context = 1 + 2 * (edge_mbmi->ref_frame[var_ref_idx]
+ != LAST_FRAME);
+ } else { // inter/inter
+ const int l_sg = !has_second_ref(left_mbmi);
+ const int a_sg = !has_second_ref(above_mbmi);
+ const MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0]
+ : above_mbmi->ref_frame[var_ref_idx];
+ const MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0]
+ : left_mbmi->ref_frame[var_ref_idx];
+
+ if (vrfa == vrfl && vrfa == LAST_FRAME)
+ pred_context = 0;
+ else if (l_sg && a_sg) { // single/single
+ if (vrfa == LAST_FRAME || vrfl == LAST_FRAME)
+ pred_context = 1;
+ else if (CHECK_GOLDEN_LAST3_LAST4(vrfa) ||
+ CHECK_GOLDEN_LAST3_LAST4(vrfl))
+ pred_context = 2 + (vrfa != vrfl);
+ else if (vrfa == vrfl)
+ pred_context = 3;
+ else
+ pred_context = 4;
+ } else if (l_sg || a_sg) { // single/comp
+ const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl;
+ const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl;
+
+ if (vrfc == LAST_FRAME && rfs != LAST_FRAME)
+ pred_context = 1;
+ else if (rfs == LAST_FRAME && vrfc != LAST_FRAME)
+ pred_context = 2;
+ else
+ pred_context = 3 +
+ (vrfc == LAST2_FRAME || CHECK_GOLDEN_LAST3_LAST4(rfs));
+ } else { // comp/comp
+ if (vrfa == LAST_FRAME || vrfl == LAST_FRAME)
+ pred_context = 2;
+ else
+ pred_context = 3 + (CHECK_GOLDEN_LAST3_LAST4(vrfa) ||
+ CHECK_GOLDEN_LAST3_LAST4(vrfl));
+ }
+ }
+ } else if (above_in_image || left_in_image) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi)) {
+ pred_context = 2;
+ } else {
+ if (has_second_ref(edge_mbmi)) {
+ pred_context = 4 * (edge_mbmi->ref_frame[var_ref_idx] != LAST_FRAME);
+ } else {
+ if (edge_mbmi->ref_frame[0] == LAST_FRAME)
+ pred_context = 0;
+ else
+ pred_context = 2 + CHECK_GOLDEN_LAST3_LAST4(edge_mbmi->ref_frame[0]);
+ }
+ }
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+ return pred_context;
+}
+
+#define CHECK_LAST3_OR_LAST4(ref_frame) \
+ ((ref_frame == LAST3_FRAME) || (ref_frame == LAST4_FRAME))
+
+// Returns a context number for the given MB prediction signal
+// Signal the first reference frame for a compound mode is GOLDEN,
+// conditioning on that it is known either GOLDEN/LAST3/LAST4.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is GOLDEN_FRAME,
+// conditioning on it is either GOLDEN / LAST3 / LAST4.
+int vp10_get_pred_context_comp_ref_p2(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int above_in_image = xd->up_available;
+ const int left_in_image = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+ const int var_ref_idx = !fix_ref_idx;
+
+ if (above_in_image && left_in_image) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra (2)
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+ if (!has_second_ref(edge_mbmi)) // single pred (1/3)
+ pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != GOLDEN_FRAME);
+ else // comp pred (1/3)
+ pred_context = 1 +
+ 2 * (edge_mbmi->ref_frame[var_ref_idx] != GOLDEN_FRAME);
+ } else { // inter/inter
+ const int l_sg = !has_second_ref(left_mbmi);
+ const int a_sg = !has_second_ref(above_mbmi);
+ const MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0]
+ : above_mbmi->ref_frame[var_ref_idx];
+ const MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0]
+ : left_mbmi->ref_frame[var_ref_idx];
+
+ if (vrfa == vrfl && vrfa == GOLDEN_FRAME)
+ pred_context = 0;
+ else if (l_sg && a_sg) { // single/single
+ if (vrfa == GOLDEN_FRAME || vrfl == GOLDEN_FRAME)
+ pred_context = 1;
+ else if (CHECK_LAST_OR_LAST2(vrfa) || CHECK_LAST_OR_LAST2(vrfl))
+ pred_context = 2 + (vrfa != vrfl);
+ else if (vrfa == vrfl)
+ pred_context = 3;
+ else
+ pred_context = 4;
+ } else if (l_sg || a_sg) { // single/comp
+ const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl;
+ const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl;
+
+ if (vrfc == GOLDEN_FRAME && rfs != GOLDEN_FRAME)
+ pred_context = 1;
+ else if (rfs == GOLDEN_FRAME && vrfc != GOLDEN_FRAME)
+ pred_context = 2;
+ else
+ pred_context = 3 +
+ (CHECK_LAST3_OR_LAST4(vrfc) || CHECK_LAST_OR_LAST2(rfs));
+ } else { // comp/comp
+ if (vrfa == GOLDEN_FRAME || vrfl == GOLDEN_FRAME)
+ pred_context = 2;
+ else
+ pred_context = 3 +
+ (CHECK_LAST_OR_LAST2(vrfa) || CHECK_LAST_OR_LAST2(vrfl));
+ }
+ }
+ } else if (above_in_image || left_in_image) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi)) {
+ pred_context = 2;
+ } else {
+ if (has_second_ref(edge_mbmi)) {
+ pred_context = 4 * (edge_mbmi->ref_frame[var_ref_idx] != GOLDEN_FRAME);
+ } else {
+ if (edge_mbmi->ref_frame[0] == GOLDEN_FRAME)
+ pred_context = 0;
+ else
+ pred_context = 2 + CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]);
+ }
+ }
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+ return pred_context;
+}
+
+#define CHECK_LAST_LAST2_GOLDEN(ref_frame) \
+ ((ref_frame == LAST_FRAME) || (ref_frame == LAST2_FRAME) || \
+ (ref_frame == GOLDEN_FRAME))
+
+// Returns a context number for the given MB prediction signal
+// Signal the first reference frame for a compound mode is LAST3,
+// conditioning on that it is known either LAST3/LAST4.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is LAST3_FRAME,
+// conditioning on it is either LAST3 / LAST4.
+int vp10_get_pred_context_comp_ref_p3(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int above_in_image = xd->up_available;
+ const int left_in_image = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+ const int var_ref_idx = !fix_ref_idx;
+
+ if (above_in_image && left_in_image) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra (2)
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+ if (!has_second_ref(edge_mbmi)) // single pred (1/3)
+ pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != LAST3_FRAME);
+ else // comp pred (1/3)
+ pred_context = 1 +
+ 2 * (edge_mbmi->ref_frame[var_ref_idx] != LAST3_FRAME);
+ } else { // inter/inter
+ const int l_sg = !has_second_ref(left_mbmi);
+ const int a_sg = !has_second_ref(above_mbmi);
+ const MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0]
+ : above_mbmi->ref_frame[var_ref_idx];
+ const MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0]
+ : left_mbmi->ref_frame[var_ref_idx];
+
+ if (vrfa == vrfl && vrfa == LAST3_FRAME)
+ pred_context = 0;
+ else if (l_sg && a_sg) { // single/single
+ if (vrfa == LAST3_FRAME || vrfl == LAST3_FRAME)
+ pred_context = 1;
+ else if (CHECK_LAST_LAST2_GOLDEN(vrfa) || CHECK_LAST_LAST2_GOLDEN(vrfl))
+ pred_context = 2 + (vrfa != vrfl);
+ else if (vrfa == vrfl)
+ pred_context = 3;
+ else
+ pred_context = 4;
+ } else if (l_sg || a_sg) { // single/comp
+ const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl;
+ const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl;
+
+ if (vrfc == LAST3_FRAME && rfs != LAST3_FRAME)
+ pred_context = 1;
+ else if (rfs == LAST3_FRAME && vrfc != LAST3_FRAME)
+ pred_context = 2;
+ else
+ pred_context = 3 +
+ (vrfc == LAST4_FRAME || CHECK_LAST_LAST2_GOLDEN(rfs));
+ } else { // comp/comp
+ if (vrfa == LAST3_FRAME || vrfl == LAST3_FRAME)
+ pred_context = 2;
+ else
+ pred_context = 3 +
+ (CHECK_LAST_LAST2_GOLDEN(vrfa) || CHECK_LAST_LAST2_GOLDEN(vrfl));
+ }
+ }
+ } else if (above_in_image || left_in_image) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi)) {
+ pred_context = 2;
+ } else {
+ if (has_second_ref(edge_mbmi)) {
+ pred_context = 4 * (edge_mbmi->ref_frame[var_ref_idx] != LAST3_FRAME);
+ } else {
+ if (edge_mbmi->ref_frame[0] == LAST3_FRAME)
+ pred_context = 0;
+ else
+ pred_context = 2 + CHECK_LAST_LAST2_GOLDEN(edge_mbmi->ref_frame[0]);
+ }
+ }
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+ return pred_context;
+}
+
+#else // CONFIG_EXT_REFS
+
// Returns a context number for the given MB prediction signal
int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm,
- const MACROBLOCKD *xd) {
+ const MACROBLOCKD *xd) {
int pred_context;
const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
@@ -186,6 +680,472 @@
return pred_context;
}
+#endif // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+
+#define CHECK_GOLDEN_OR_ALTREF(ref_frame) \
+ ((ref_frame == GOLDEN_FRAME) || (ref_frame == ALTREF_FRAME))
+
+// For the bit to signal whether the single reference is a ALTREF_FRAME
+// or a GOLDEN_FRAME.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is ALTREF/GOLDEN.
+int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter or inter/intra
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+ if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (!CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0]));
+ else
+ pred_context = 1 + (!CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0]) ||
+ !CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[1]));
+ } else { // inter/inter
+ const int above_has_second = has_second_ref(above_mbmi);
+ const int left_has_second = has_second_ref(left_mbmi);
+
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+ if (above_has_second && left_has_second) {
+ pred_context = 1 + (!CHECK_GOLDEN_OR_ALTREF(above0) ||
+ !CHECK_GOLDEN_OR_ALTREF(above1) ||
+ !CHECK_GOLDEN_OR_ALTREF(left0) ||
+ !CHECK_GOLDEN_OR_ALTREF(left1));
+ } else if (above_has_second || left_has_second) {
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+ if (!CHECK_GOLDEN_OR_ALTREF(rfs))
+ pred_context = 3 + (!CHECK_GOLDEN_OR_ALTREF(crf1) ||
+ !CHECK_GOLDEN_OR_ALTREF(crf2));
+ else
+ pred_context = !CHECK_GOLDEN_OR_ALTREF(crf1) ||
+ !CHECK_GOLDEN_OR_ALTREF(crf2);
+ } else {
+ pred_context = 2 * (!CHECK_GOLDEN_OR_ALTREF(above0)) +
+ 2 * (!CHECK_GOLDEN_OR_ALTREF(left0));
+ }
+ }
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+ if (!is_inter_block(edge_mbmi)) { // intra
+ pred_context = 2;
+ } else { // inter
+ if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (!CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0]));
+ else
+ pred_context = 1 + (!CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0]) ||
+ !CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[1]));
+ }
+ } else { // no edges available
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+ return pred_context;
+}
+
+// For the bit to signal whether the single reference is ALTREF_FRAME or
+// GOLDEN_FRAME, knowing that it shall be either of these 2 choices.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is ALTREF_FRAME, conditioning
+// on it is either ALTREF_FRAME/GOLDEN_FRAME.
+int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter or inter/intra
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+ if (!has_second_ref(edge_mbmi)) {
+ if (!CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0]))
+ pred_context = 3;
+ else
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
+ } else {
+ pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
+ edge_mbmi->ref_frame[1] == GOLDEN_FRAME);
+ }
+ } else { // inter/inter
+ const int above_has_second = has_second_ref(above_mbmi);
+ const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+ if (above_has_second && left_has_second) {
+ if (above0 == left0 && above1 == left1)
+ pred_context = 3 * (above0 == GOLDEN_FRAME ||
+ above1 == GOLDEN_FRAME ||
+ left0 == GOLDEN_FRAME ||
+ left1 == GOLDEN_FRAME);
+ else
+ pred_context = 2;
+ } else if (above_has_second || left_has_second) {
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+ if (rfs == GOLDEN_FRAME)
+ pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
+ else if (rfs == ALTREF_FRAME)
+ pred_context = (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
+ else
+ pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
+ } else {
+ if (!CHECK_GOLDEN_OR_ALTREF(above0) && !CHECK_GOLDEN_OR_ALTREF(left0)) {
+ pred_context = 2 + (above0 == left0);
+ } else if (!CHECK_GOLDEN_OR_ALTREF(above0) ||
+ !CHECK_GOLDEN_OR_ALTREF(left0)) {
+ const MV_REFERENCE_FRAME edge0 =
+ !CHECK_GOLDEN_OR_ALTREF(above0) ? left0 : above0;
+ pred_context = 4 * (edge0 == GOLDEN_FRAME);
+ } else {
+ pred_context = 2 * (above0 == GOLDEN_FRAME) +
+ 2 * (left0 == GOLDEN_FRAME);
+ }
+ }
+ }
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi) ||
+ (!CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0]) &&
+ !has_second_ref(edge_mbmi)))
+ pred_context = 2;
+ else if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
+ else
+ pred_context = 3 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
+ edge_mbmi->ref_frame[1] == GOLDEN_FRAME);
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+ return pred_context;
+}
+
+// For the bit to signal whether the single reference is LAST3/LAST4 or
+// LAST2/LAST, knowing that it shall be either of these 2 choices.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is LAST3/LAST4, conditioning
+// on it is either LAST3/LAST4/LAST2/LAST.
+int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter or inter/intra
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+ if (!has_second_ref(edge_mbmi)) {
+ if (CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0]))
+ pred_context = 3;
+ else
+ pred_context = 4 * CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]);
+ } else {
+ pred_context = 1 +
+ 2 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) ||
+ CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[1]));
+ }
+ } else { // inter/inter
+ const int above_has_second = has_second_ref(above_mbmi);
+ const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+ if (above_has_second && left_has_second) {
+ if (above0 == left0 && above1 == left1)
+ pred_context = 3 * (CHECK_LAST_OR_LAST2(above0) ||
+ CHECK_LAST_OR_LAST2(above1) ||
+ CHECK_LAST_OR_LAST2(left0) ||
+ CHECK_LAST_OR_LAST2(left1));
+ else
+ pred_context = 2;
+ } else if (above_has_second || left_has_second) {
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+ if (CHECK_LAST_OR_LAST2(rfs))
+ pred_context = 3 + (CHECK_LAST_OR_LAST2(crf1) ||
+ CHECK_LAST_OR_LAST2(crf2));
+ else if (rfs == LAST3_FRAME || rfs == LAST4_FRAME)
+ pred_context = (CHECK_LAST_OR_LAST2(crf1) ||
+ CHECK_LAST_OR_LAST2(crf2));
+ else
+ pred_context = 1 + 2 * (CHECK_LAST_OR_LAST2(crf1) ||
+ CHECK_LAST_OR_LAST2(crf2));
+ } else {
+ if (CHECK_GOLDEN_OR_ALTREF(above0) && CHECK_GOLDEN_OR_ALTREF(left0)) {
+ pred_context = 2 + (above0 == left0);
+ } else if (CHECK_GOLDEN_OR_ALTREF(above0) ||
+ CHECK_GOLDEN_OR_ALTREF(left0)) {
+ const MV_REFERENCE_FRAME edge0 =
+ CHECK_GOLDEN_OR_ALTREF(above0) ? left0 : above0;
+ pred_context = 4 * CHECK_LAST_OR_LAST2(edge0);
+ } else {
+ pred_context = 2 * CHECK_LAST_OR_LAST2(above0) +
+ 2 * CHECK_LAST_OR_LAST2(left0);
+ }
+ }
+ }
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi) ||
+ (CHECK_GOLDEN_OR_ALTREF(edge_mbmi->ref_frame[0]) &&
+ !has_second_ref(edge_mbmi)))
+ pred_context = 2;
+ else if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]));
+ else
+ pred_context = 3 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) ||
+ CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[1]));
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+ return pred_context;
+}
+
+// For the bit to signal whether the single reference is LAST2_FRAME or
+// LAST_FRAME, knowing that it shall be either of these 2 choices.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is LAST2_FRAME, conditioning
+// on it is either LAST2_FRAME/LAST_FRAME.
+int vp10_get_pred_context_single_ref_p4(const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter or inter/intra
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+ if (!has_second_ref(edge_mbmi)) {
+ if (!CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]))
+ pred_context = 3;
+ else
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+ } else {
+ pred_context = 1 +
+ 2 * (edge_mbmi->ref_frame[0] == LAST_FRAME ||
+ edge_mbmi->ref_frame[1] == LAST_FRAME);
+ }
+ } else { // inter/inter
+ const int above_has_second = has_second_ref(above_mbmi);
+ const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+ if (above_has_second && left_has_second) {
+ if (above0 == left0 && above1 == left1)
+ pred_context = 3 * (above0 == LAST_FRAME || above1 == LAST_FRAME ||
+ left0 == LAST_FRAME || left1 == LAST_FRAME);
+ else
+ pred_context = 2;
+ } else if (above_has_second || left_has_second) {
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+ if (rfs == LAST_FRAME)
+ pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+ else if (rfs == LAST2_FRAME)
+ pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+ else
+ pred_context = 1 + 2 * (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+ } else {
+ if (!CHECK_LAST_OR_LAST2(above0) &&
+ !CHECK_LAST_OR_LAST2(left0)) {
+ pred_context = 2 + (above0 == left0);
+ } else if (!CHECK_LAST_OR_LAST2(above0) ||
+ !CHECK_LAST_OR_LAST2(left0)) {
+ const MV_REFERENCE_FRAME edge0 =
+ !CHECK_LAST_OR_LAST2(above0) ? left0 : above0;
+ pred_context = 4 * (edge0 == LAST_FRAME);
+ } else {
+ pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME);
+ }
+ }
+ }
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi) ||
+ (!CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) &&
+ !has_second_ref(edge_mbmi)))
+ pred_context = 2;
+ else if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+ else
+ pred_context = 3 * (edge_mbmi->ref_frame[0] == LAST_FRAME ||
+ edge_mbmi->ref_frame[1] == LAST_FRAME);
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+ return pred_context;
+}
+
+// For the bit to signal whether the single reference is LAST4_FRAME or
+// LAST3_FRAME, knowing that it shall be either of these 2 choices.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is LAST4_FRAME, conditioning
+// on it is either LAST4_FRAME/LAST3_FRAME.
+int vp10_get_pred_context_single_ref_p5(const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter or inter/intra
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+ if (!has_second_ref(edge_mbmi)) {
+ if (!CHECK_LAST3_OR_LAST4(edge_mbmi->ref_frame[0]))
+ pred_context = 3;
+ else
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST3_FRAME);
+ } else {
+ pred_context = 1 +
+ 2 * (edge_mbmi->ref_frame[0] == LAST3_FRAME ||
+ edge_mbmi->ref_frame[1] == LAST3_FRAME);
+ }
+ } else { // inter/inter
+ const int above_has_second = has_second_ref(above_mbmi);
+ const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+ if (above_has_second && left_has_second) {
+ if (above0 == left0 && above1 == left1)
+ pred_context = 3 * (above0 == LAST3_FRAME || above1 == LAST3_FRAME ||
+ left0 == LAST3_FRAME || left1 == LAST3_FRAME);
+ else
+ pred_context = 2;
+ } else if (above_has_second || left_has_second) {
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+ if (rfs == LAST3_FRAME)
+ pred_context = 3 + (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME);
+ else if (rfs == LAST4_FRAME)
+ pred_context = (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME);
+ else
+ pred_context = 1 + 2 * (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME);
+ } else {
+ if (!CHECK_LAST3_OR_LAST4(above0) &&
+ !CHECK_LAST3_OR_LAST4(left0)) {
+ pred_context = 2 + (above0 == left0);
+ } else if (!CHECK_LAST3_OR_LAST4(above0) ||
+ !CHECK_LAST3_OR_LAST4(left0)) {
+ const MV_REFERENCE_FRAME edge0 =
+ !CHECK_LAST3_OR_LAST4(above0) ? left0 : above0;
+ pred_context = 4 * (edge0 == LAST3_FRAME);
+ } else {
+ pred_context = 2 * (above0 == LAST3_FRAME) +
+ 2 * (left0 == LAST3_FRAME);
+ }
+ }
+ }
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi) ||
+ (!CHECK_LAST3_OR_LAST4(edge_mbmi->ref_frame[0]) &&
+ !has_second_ref(edge_mbmi)))
+ pred_context = 2;
+ else if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST3_FRAME);
+ else
+ pred_context = 3 * (edge_mbmi->ref_frame[0] == LAST3_FRAME ||
+ edge_mbmi->ref_frame[1] == LAST3_FRAME);
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+ return pred_context;
+}
+
+#else // CONFIG_EXT_REFS
+
int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
int pred_context;
const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
@@ -337,3 +1297,5 @@
assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
return pred_context;
}
+
+#endif // CONFIG_EXT_REFS
diff --git a/vp10/common/pred_common.h b/vp10/common/pred_common.h
index d6d7146..7d2f28a8 100644
--- a/vp10/common/pred_common.h
+++ b/vp10/common/pred_common.h
@@ -68,6 +68,10 @@
int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
+#if CONFIG_EXT_INTRA
+int vp10_get_pred_context_intra_interp(const MACROBLOCKD *xd);
+#endif // CONFIG_EXT_INTRA
+
int vp10_get_intra_inter_context(const MACROBLOCKD *xd);
static INLINE vpx_prob vp10_get_intra_inter_prob(const VP10_COMMON *cm,
@@ -87,25 +91,77 @@
const MACROBLOCKD *xd);
static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p(const VP10_COMMON *cm,
- const MACROBLOCKD *xd) {
+ const MACROBLOCKD *xd) {
const int pred_context = vp10_get_pred_context_comp_ref_p(cm, xd);
- return cm->fc->comp_ref_prob[pred_context];
+ return cm->fc->comp_ref_prob[pred_context][0];
}
+#if CONFIG_EXT_REFS
+int vp10_get_pred_context_comp_ref_p1(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p1(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ const int pred_context = vp10_get_pred_context_comp_ref_p1(cm, xd);
+ return cm->fc->comp_ref_prob[pred_context][1];
+}
+
+int vp10_get_pred_context_comp_ref_p2(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p2(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ const int pred_context = vp10_get_pred_context_comp_ref_p2(cm, xd);
+ return cm->fc->comp_ref_prob[pred_context][2];
+}
+
+int vp10_get_pred_context_comp_ref_p3(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p3(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ const int pred_context = vp10_get_pred_context_comp_ref_p3(cm, xd);
+ return cm->fc->comp_ref_prob[pred_context][3];
+}
+#endif // CONFIG_EXT_REFS
+
int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd);
static INLINE vpx_prob vp10_get_pred_prob_single_ref_p1(const VP10_COMMON *cm,
- const MACROBLOCKD *xd) {
+ const MACROBLOCKD *xd) {
return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p1(xd)][0];
}
int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd);
static INLINE vpx_prob vp10_get_pred_prob_single_ref_p2(const VP10_COMMON *cm,
- const MACROBLOCKD *xd) {
+ const MACROBLOCKD *xd) {
return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p2(xd)][1];
}
+#if CONFIG_EXT_REFS
+int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_single_ref_p3(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p3(xd)][2];
+}
+
+int vp10_get_pred_context_single_ref_p4(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_single_ref_p4(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p4(xd)][3];
+}
+
+int vp10_get_pred_context_single_ref_p5(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_single_ref_p5(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p5(xd)][4];
+}
+#endif // CONFIG_EXT_REFS
+
// Returns a context number for the given MB prediction signal
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real blocks.
@@ -165,6 +221,71 @@
}
}
+#if CONFIG_VAR_TX
+static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd,
+ MB_MODE_INFO *mbmi, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, int blk_row, int blk_col,
+ TX_SIZE max_tx_size, int ctx,
+ struct tx_counts *tx_counts) {
+ const struct macroblockd_plane *const pd = &xd->plane[0];
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx];
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ ++get_tx_counts(max_tx_size, ctx, tx_counts)[tx_size];
+ mbmi->tx_size = tx_size;
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
+ continue;
+ update_tx_counts(cm, xd, mbmi, plane_bsize,
+ tx_size - 1, offsetr, offsetc,
+ max_tx_size, ctx, tx_counts);
+ }
+ }
+}
+
+static INLINE void inter_block_tx_count_update(VP10_COMMON *cm,
+ MACROBLOCKD *xd,
+ MB_MODE_INFO *mbmi,
+ BLOCK_SIZE plane_bsize,
+ int ctx,
+ struct tx_counts *tx_counts) {
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+
+ for (idy = 0; idy < mi_height; idy += bh)
+ for (idx = 0; idx < mi_width; idx += bh)
+ update_tx_counts(cm, xd, mbmi, plane_bsize, max_tx_size, idy, idx,
+ max_tx_size, ctx, tx_counts);
+}
+#endif
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c
index fdcb967..60592fd 100644
--- a/vp10/common/reconinter.c
+++ b/vp10/common/reconinter.c
@@ -25,7 +25,7 @@
const MV *src_mv,
const struct scale_factors *sf,
int w, int h, int ref,
- const InterpKernel *kernel,
+ const INTERP_FILTER interp_filter,
enum mv_precision precision,
int x, int y, int bd) {
const int is_q4 = precision == MV_PRECISION_Q4;
@@ -38,7 +38,8 @@
src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
high_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
- sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, bd);
+ sf, w, h, ref, interp_filter, sf->x_step_q4,
+ sf->y_step_q4, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -47,7 +48,7 @@
const MV *src_mv,
const struct scale_factors *sf,
int w, int h, int ref,
- const InterpKernel *kernel,
+ const INTERP_FILTER interp_filter,
enum mv_precision precision,
int x, int y) {
const int is_q4 = precision == MV_PRECISION_Q4;
@@ -60,17 +61,17 @@
src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
- sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4);
+ sf, w, h, ref, interp_filter, sf->x_step_q4, sf->y_step_q4);
}
void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
- int bw, int bh,
- int x, int y, int w, int h,
- int mi_x, int mi_y) {
+ int bw, int bh,
+ int x, int y, int w, int h,
+ int mi_x, int mi_y) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const MODE_INFO *mi = xd->mi[0];
const int is_compound = has_second_ref(&mi->mbmi);
- const InterpKernel *kernel = vp10_filter_kernels[mi->mbmi.interp_filter];
+ const INTERP_FILTER interp_filter = mi->mbmi.interp_filter;
int ref;
for (ref = 0; ref < 1 + is_compound; ++ref) {
@@ -115,15 +116,15 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
- subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys,
- xd->bd);
+ subpel_x, subpel_y, sf, w, h, ref,
+ interp_filter, xs, ys, xd->bd);
} else {
inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
- subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);
+ subpel_x, subpel_y, sf, w, h, ref, interp_filter, xs, ys);
}
#else
inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
- subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);
+ subpel_x, subpel_y, sf, w, h, ref, interp_filter, xs, ys);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
@@ -140,7 +141,7 @@
uint8_t *const dst = &pd->dst.buf[(ir * pd->dst.stride + ic) << 2];
int ref;
const int is_compound = has_second_ref(&mi->mbmi);
- const InterpKernel *kernel = vp10_filter_kernels[mi->mbmi.interp_filter];
+ const INTERP_FILTER interp_filter = mi->mbmi.interp_filter;
for (ref = 0; ref < 1 + is_compound; ++ref) {
const uint8_t *pre =
@@ -151,7 +152,7 @@
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height,
- ref, kernel, MV_PRECISION_Q3,
+ ref, interp_filter, MV_PRECISION_Q3,
mi_col * MI_SIZE + 4 * ic,
mi_row * MI_SIZE + 4 * ir, xd->bd);
} else {
@@ -159,7 +160,7 @@
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height, ref,
- kernel, MV_PRECISION_Q3,
+ interp_filter, MV_PRECISION_Q3,
mi_col * MI_SIZE + 4 * ic,
mi_row * MI_SIZE + 4 * ir);
}
@@ -168,7 +169,7 @@
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height, ref,
- kernel, MV_PRECISION_Q3,
+ interp_filter, MV_PRECISION_Q3,
mi_col * MI_SIZE + 4 * ic,
mi_row * MI_SIZE + 4 * ir);
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -264,3 +265,227 @@
}
}
}
+
+#if CONFIG_SUPERTX
+static const uint8_t mask_8[8] = {
+ 64, 64, 62, 52, 12, 2, 0, 0
+};
+
+static const uint8_t mask_16[16] = {
+ 63, 62, 60, 58, 55, 50, 43, 36, 28, 21, 14, 9, 6, 4, 2, 1
+};
+
+static const uint8_t mask_32[32] = {
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 61, 57, 52, 45, 36,
+ 28, 19, 12, 7, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static const uint8_t mask_8_uv[8] = {
+ 64, 64, 62, 52, 12, 2, 0, 0
+};
+
+static const uint8_t mask_16_uv[16] = {
+ 64, 64, 64, 64, 61, 53, 45, 36, 28, 19, 11, 3, 0, 0, 0, 0
+};
+
+static const uint8_t mask_32_uv[32] = {
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 60, 54, 46, 36,
+ 28, 18, 10, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static void generate_1dmask(int length, uint8_t *mask, int plane) {
+ switch (length) {
+ case 8:
+ memcpy(mask, plane ? mask_8_uv : mask_8, length);
+ break;
+ case 16:
+ memcpy(mask, plane ? mask_16_uv : mask_16, length);
+ break;
+ case 32:
+ memcpy(mask, plane ? mask_32_uv : mask_32, length);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+void vp10_build_masked_inter_predictor_complex(
+ MACROBLOCKD *xd,
+ uint8_t *dst, int dst_stride, uint8_t *dst2, int dst2_stride,
+ const struct macroblockd_plane *pd, int mi_row, int mi_col,
+ int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize,
+ PARTITION_TYPE partition, int plane) {
+ int i, j;
+ uint8_t mask[MAXTXLEN];
+ int top_w = 4 << b_width_log2_lookup[top_bsize],
+ top_h = 4 << b_height_log2_lookup[top_bsize];
+ int w = 4 << b_width_log2_lookup[bsize], h = 4 << b_height_log2_lookup[bsize];
+ int w_offset = (mi_col - mi_col_ori) << 3,
+ h_offset = (mi_row - mi_row_ori) << 3;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint16_t *dst16= CONVERT_TO_SHORTPTR(dst);
+ uint16_t *dst216 = CONVERT_TO_SHORTPTR(dst2);
+ int b_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ top_w >>= pd->subsampling_x;
+ top_h >>= pd->subsampling_y;
+ w >>= pd->subsampling_x;
+ h >>= pd->subsampling_y;
+ w_offset >>= pd->subsampling_x;
+ h_offset >>= pd->subsampling_y;
+
+ switch (partition) {
+ case PARTITION_HORZ:
+ {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (b_hdb) {
+ uint16_t *dst_tmp = dst16 + h_offset * dst_stride;
+ uint16_t *dst2_tmp = dst216 + h_offset * dst2_stride;
+ generate_1dmask(h, mask + h_offset,
+ plane && xd->plane[plane].subsampling_y);
+
+ for (i = h_offset; i < h_offset + h; i++) {
+ for (j = 0; j < top_w; j++) {
+ const int m = mask[i]; assert(m >= 0 && m <= 64);
+ if (m == 64)
+ continue;
+
+ if (m == 0)
+ dst_tmp[j] = dst2_tmp[j];
+ else
+ dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6;
+ }
+ dst_tmp += dst_stride;
+ dst2_tmp += dst2_stride;
+ }
+
+ for (; i < top_h; i ++) {
+ memcpy(dst_tmp, dst2_tmp, top_w * sizeof(uint16_t));
+ dst_tmp += dst_stride;
+ dst2_tmp += dst2_stride;
+ }
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *dst_tmp = dst + h_offset * dst_stride;
+ uint8_t *dst2_tmp = dst2 + h_offset * dst2_stride;
+ generate_1dmask(h, mask + h_offset,
+ plane && xd->plane[plane].subsampling_y);
+
+ for (i = h_offset; i < h_offset + h; i++) {
+ for (j = 0; j < top_w; j++) {
+ const int m = mask[i]; assert(m >= 0 && m <= 64);
+ if (m == 64)
+ continue;
+
+ if (m == 0)
+ dst_tmp[j] = dst2_tmp[j];
+ else
+ dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6;
+ }
+ dst_tmp += dst_stride;
+ dst2_tmp += dst2_stride;
+ }
+
+ for (; i < top_h; i ++) {
+ memcpy(dst_tmp, dst2_tmp, top_w * sizeof(uint8_t));
+ dst_tmp += dst_stride;
+ dst2_tmp += dst2_stride;
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+
+ break;
+ case PARTITION_VERT:
+ {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (b_hdb) {
+ uint16_t *dst_tmp = dst16;
+ uint16_t *dst2_tmp = dst216;
+ generate_1dmask(w, mask + w_offset,
+ plane && xd->plane[plane].subsampling_x);
+
+ for (i = 0; i < top_h; i++) {
+ for (j = w_offset; j < w_offset + w; j++) {
+ const int m = mask[j]; assert(m >= 0 && m <= 64);
+ if (m == 64)
+ continue;
+
+ if (m == 0)
+ dst_tmp[j] = dst2_tmp[j];
+ else
+ dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6;
+ }
+ memcpy(dst_tmp + j, dst2_tmp + j,
+ (top_w - w_offset - w) * sizeof(uint16_t));
+ dst_tmp += dst_stride;
+ dst2_tmp += dst2_stride;
+ }
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *dst_tmp = dst;
+ uint8_t *dst2_tmp = dst2;
+ generate_1dmask(w, mask + w_offset,
+ plane && xd->plane[plane].subsampling_x);
+
+ for (i = 0; i < top_h; i++) {
+ for (j = w_offset; j < w_offset + w; j++) {
+ const int m = mask[j]; assert(m >= 0 && m <= 64);
+ if (m == 64)
+ continue;
+
+ if (m == 0)
+ dst_tmp[j] = dst2_tmp[j];
+ else
+ dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6;
+ }
+ memcpy(dst_tmp + j, dst2_tmp + j,
+ (top_w - w_offset - w) * sizeof(uint8_t));
+ dst_tmp += dst_stride;
+ dst2_tmp += dst2_stride;
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ break;
+ default:
+ assert(0);
+ }
+ (void) xd;
+}
+
+void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int block) {
+ // Prediction function used in supertx:
+ // Use the mv at current block (which is less than 8x8)
+ // to get prediction of a block located at (mi_row, mi_col) at size of bsize
+ // bsize can be larger than 8x8.
+ // block (0-3): the sub8x8 location of current block
+ int plane;
+ const int mi_x = mi_col * MI_SIZE;
+ const int mi_y = mi_row * MI_SIZE;
+
+ // For sub8x8 uv:
+ // Skip uv prediction in supertx except the first block (block = 0)
+ int max_plane = block ? 1 : MAX_MB_PLANE;
+
+ for (plane = 0; plane < max_plane; plane++) {
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
+ &xd->plane[plane]);
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ const int bw = 4 * num_4x4_w;
+ const int bh = 4 * num_4x4_h;
+
+ build_inter_predictors(xd, plane, block, bw, bh,
+ 0, 0, bw, bh,
+ mi_x, mi_y);
+ }
+}
+#endif // CONFIG_SUPERTX
diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h
index 5678f47..3fcdb97 100644
--- a/vp10/common/reconinter.h
+++ b/vp10/common/reconinter.h
@@ -13,8 +13,8 @@
#include "vp10/common/filter.h"
#include "vp10/common/onyxc_int.h"
+#include "vp10/common/vp10_convolve.h"
#include "vpx/vpx_integer.h"
-#include "vpx_dsp/vpx_filter.h"
#ifdef __cplusplus
extern "C" {
@@ -26,11 +26,36 @@
const int subpel_y,
const struct scale_factors *sf,
int w, int h, int ref,
- const InterpKernel *kernel,
+ const INTERP_FILTER interp_filter,
int xs, int ys) {
- sf->predict[subpel_x != 0][subpel_y != 0][ref](
- src, src_stride, dst, dst_stride,
- kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+ InterpFilterParams interp_filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ if (interp_filter_params.tap == SUBPEL_TAPS) {
+ const InterpKernel *kernel = vp10_filter_kernels[interp_filter];
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ if (IsInterpolatingFilter(interp_filter)) {
+ // Interpolating filter
+ sf->predict[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+ } else {
+ sf->predict_ni[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+ }
+#else
+ sf->predict[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ } else {
+ // ref > 0 means this is the second reference frame
+ // first reference frame's prediction result is already in dst
+ // therefore we need to average the first and second results
+ int avg = ref > 0;
+ vp10_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter_params,
+ subpel_x, xs, subpel_y, ys, avg);
+ }
}
#if CONFIG_VP9_HIGHBITDEPTH
@@ -40,11 +65,37 @@
const int subpel_y,
const struct scale_factors *sf,
int w, int h, int ref,
- const InterpKernel *kernel,
+ const INTERP_FILTER interp_filter,
int xs, int ys, int bd) {
- sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
- src, src_stride, dst, dst_stride,
- kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+ InterpFilterParams interp_filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ if (interp_filter_params.tap == SUBPEL_TAPS) {
+ const InterpKernel *kernel = vp10_filter_kernels[interp_filter];
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ if (IsInterpolatingFilter(interp_filter)) {
+ // Interpolating filter
+ sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+ } else {
+ sf->highbd_predict_ni[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+ }
+#else
+ sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ } else {
+ // ref > 0 means this is the second reference frame
+ // first reference frame's prediction result is already in dst
+ // therefore we need to average the first and second results
+ int avg = ref > 0;
+ vp10_highbd_convolve(src, src_stride, dst, dst_stride, w, h,
+ interp_filter_params, subpel_x, xs, subpel_y, ys, avg,
+ bd);
+ }
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -127,32 +178,46 @@
}
void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
- int bw, int bh,
- int x, int y, int w, int h,
- int mi_x, int mi_y);
+ int bw, int bh,
+ int x, int y, int w, int h,
+ int mi_x, int mi_y);
void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane,
int i, int ir, int ic,
int mi_row, int mi_col);
void vp10_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
- BLOCK_SIZE bsize);
-
-void vp10_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col,
- BLOCK_SIZE bsize, int plane);
-
-void vp10_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
BLOCK_SIZE bsize);
+void vp10_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int plane);
+
+void vp10_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize);
+
void vp10_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
- BLOCK_SIZE bsize);
+ BLOCK_SIZE bsize);
+
+#if CONFIG_SUPERTX
+void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int block);
+struct macroblockd_plane;
+void vp10_build_masked_inter_predictor_complex(
+ MACROBLOCKD *xd,
+ uint8_t *dst, int dst_stride, uint8_t *dst2, int dst2_stride,
+ const struct macroblockd_plane *pd, int mi_row, int mi_col,
+ int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize,
+ PARTITION_TYPE partition, int plane);
+
+#endif // CONFIG_SUPERTX
void vp10_build_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const MV *mv_q3,
const struct scale_factors *sf,
int w, int h, int do_avg,
- const InterpKernel *kernel,
+ const INTERP_FILTER interp_filter,
enum mv_precision precision,
int x, int y);
@@ -162,7 +227,7 @@
const MV *mv_q3,
const struct scale_factors *sf,
int w, int h, int do_avg,
- const InterpKernel *kernel,
+ const INTERP_FILTER interp_filter,
enum mv_precision precision,
int x, int y, int bd);
#endif
@@ -193,6 +258,100 @@
const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
const struct scale_factors *sf);
+#if CONFIG_EXT_INTERP
+static INLINE int vp10_is_interp_needed(const MACROBLOCKD *const xd) {
+ MODE_INFO *const mi = xd->mi[0];
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const int is_compound = has_second_ref(mbmi);
+ int intpel_mv;
+ int plane;
+
+#if SUPPORT_NONINTERPOLATING_FILTERS
+ // TODO(debargha): This is is currently only for experimentation
+ // with non-interpolating filters. Remove later.
+ // If any of the filters are non-interpolating, then indicate the
+ // interpolation filter always.
+ int i;
+ for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
+ if (!IsInterpolatingFilter(i)) return 1;
+ }
+#endif
+
+ // For scaled references, interpolation filter is indicated all the time.
+ if (vp10_is_scaled(&xd->block_refs[0]->sf))
+ return 1;
+ if (is_compound && vp10_is_scaled(&xd->block_refs[1]->sf))
+ return 1;
+
+ if (bsize == BLOCK_4X4) {
+ for (plane = 0; plane < 2; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ MV mv0 = average_split_mvs(pd, mi, 0, 0);
+ MV mv1 = average_split_mvs(pd, mi, 0, 1);
+ MV mv2 = average_split_mvs(pd, mi, 0, 2);
+ MV mv3 = average_split_mvs(pd, mi, 0, 3);
+ intpel_mv =
+ !mv_has_subpel(&mv0) &&
+ !mv_has_subpel(&mv1) &&
+ !mv_has_subpel(&mv2) &&
+ !mv_has_subpel(&mv3);
+ if (is_compound && intpel_mv) {
+ mv0 = average_split_mvs(pd, mi, 1, 0);
+ mv1 = average_split_mvs(pd, mi, 1, 1);
+ mv2 = average_split_mvs(pd, mi, 1, 2);
+ mv3 = average_split_mvs(pd, mi, 1, 3);
+ intpel_mv =
+ !mv_has_subpel(&mv0) &&
+ !mv_has_subpel(&mv1) &&
+ !mv_has_subpel(&mv2) &&
+ !mv_has_subpel(&mv3);
+ }
+ if (!intpel_mv) break;
+ }
+ } else if (bsize == BLOCK_4X8) {
+ for (plane = 0; plane < 2; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ MV mv0 = average_split_mvs(pd, mi, 0, 0);
+ MV mv1 = average_split_mvs(pd, mi, 0, 1);
+ intpel_mv =
+ !mv_has_subpel(&mv0) &&
+ !mv_has_subpel(&mv1);
+ if (is_compound && intpel_mv) {
+ mv0 = average_split_mvs(pd, mi, 1, 0);
+ mv1 = average_split_mvs(pd, mi, 1, 1);
+ intpel_mv =
+ !mv_has_subpel(&mv0) &&
+ !mv_has_subpel(&mv1);
+ }
+ if (!intpel_mv) break;
+ }
+ } else if (bsize == BLOCK_8X4) {
+ for (plane = 0; plane < 2; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ MV mv0 = average_split_mvs(pd, mi, 0, 0);
+ MV mv1 = average_split_mvs(pd, mi, 0, 2);
+ intpel_mv =
+ !mv_has_subpel(&mv0) &&
+ !mv_has_subpel(&mv1);
+ if (is_compound && intpel_mv) {
+ mv0 = average_split_mvs(pd, mi, 1, 0);
+ mv1 = average_split_mvs(pd, mi, 1, 2);
+ intpel_mv =
+ !mv_has_subpel(&mv0) &&
+ !mv_has_subpel(&mv1);
+ }
+ if (!intpel_mv) break;
+ }
+ } else {
+ intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
+ if (is_compound && intpel_mv) {
+ intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
+ }
+ }
+ return !intpel_mv;
+}
+#endif // CONFIG_EXT_INTERP
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c
index e9e3949..f257200 100644
--- a/vp10/common/reconintra.c
+++ b/vp10/common/reconintra.c
@@ -8,6 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <math.h>
+
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
@@ -21,7 +23,6 @@
#include "vp10/common/reconintra.h"
#include "vp10/common/onyxc_int.h"
-#if CONFIG_MISC_FIXES
enum {
NEED_LEFT = 1 << 1,
NEED_ABOVE = 1 << 2,
@@ -42,28 +43,7 @@
NEED_ABOVE | NEED_ABOVERIGHT, // D63
NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // TM
};
-#else
-enum {
- NEED_LEFT = 1 << 1,
- NEED_ABOVE = 1 << 2,
- NEED_ABOVERIGHT = 1 << 3,
-};
-static const uint8_t extend_modes[INTRA_MODES] = {
- NEED_ABOVE | NEED_LEFT, // DC
- NEED_ABOVE, // V
- NEED_LEFT, // H
- NEED_ABOVERIGHT, // D45
- NEED_LEFT | NEED_ABOVE, // D135
- NEED_LEFT | NEED_ABOVE, // D117
- NEED_LEFT | NEED_ABOVE, // D153
- NEED_LEFT, // D207
- NEED_ABOVERIGHT, // D63
- NEED_LEFT | NEED_ABOVE, // TM
-};
-#endif
-
-#if CONFIG_MISC_FIXES
static const uint8_t orders_64x64[1] = { 0 };
static const uint8_t orders_64x32[2] = { 0, 1 };
static const uint8_t orders_32x64[2] = { 0, 1 };
@@ -123,11 +103,12 @@
static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
int right_available,
TX_SIZE txsz, int y, int x, int ss_x) {
+ const int wl = mi_width_log2_lookup[bsize];
+ const int w = VPXMAX(num_4x4_blocks_wide_lookup[bsize] >> ss_x, 1);
+ const int step = 1 << txsz;
+
if (y == 0) {
- int wl = mi_width_log2_lookup[bsize];
- int hl = mi_height_log2_lookup[bsize];
- int w = 1 << (wl + 1 - ss_x);
- int step = 1 << txsz;
+ const int hl = mi_height_log2_lookup[bsize];
const uint8_t *order = orders[bsize];
int my_order, tr_order;
@@ -148,10 +129,6 @@
return my_order > tr_order && right_available;
} else {
- int wl = mi_width_log2_lookup[bsize];
- int w = 1 << (wl + 1 - ss_x);
- int step = 1 << txsz;
-
return x + step < w;
}
}
@@ -160,10 +137,10 @@
int bottom_available, TX_SIZE txsz,
int y, int x, int ss_y) {
if (x == 0) {
- int wl = mi_width_log2_lookup[bsize];
- int hl = mi_height_log2_lookup[bsize];
- int h = 1 << (hl + 1 - ss_y);
- int step = 1 << txsz;
+ const int wl = mi_width_log2_lookup[bsize];
+ const int hl = mi_height_log2_lookup[bsize];
+ const int h = 1 << (hl + 1 - ss_y);
+ const int step = 1 << txsz;
const uint8_t *order = orders[bsize];
int my_order, bl_order;
@@ -188,7 +165,6 @@
return 0;
}
}
-#endif
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left);
@@ -216,15 +192,9 @@
INIT_ALL_SIZES(pred[V_PRED], v);
INIT_ALL_SIZES(pred[H_PRED], h);
-#if CONFIG_MISC_FIXES
INIT_ALL_SIZES(pred[D207_PRED], d207e);
INIT_ALL_SIZES(pred[D45_PRED], d45e);
INIT_ALL_SIZES(pred[D63_PRED], d63e);
-#else
- INIT_ALL_SIZES(pred[D207_PRED], d207);
- INIT_ALL_SIZES(pred[D45_PRED], d45);
- INIT_ALL_SIZES(pred[D63_PRED], d63);
-#endif
INIT_ALL_SIZES(pred[D117_PRED], d117);
INIT_ALL_SIZES(pred[D135_PRED], d135);
INIT_ALL_SIZES(pred[D153_PRED], d153);
@@ -238,15 +208,9 @@
#if CONFIG_VP9_HIGHBITDEPTH
INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
-#if CONFIG_MISC_FIXES
INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207e);
INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45e);
- INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63);
-#else
- INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207);
- INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45);
- INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63);
-#endif
+ INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63e);
INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117);
INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135);
INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153);
@@ -261,12 +225,638 @@
#undef intra_pred_allsizes
}
-#if CONFIG_MISC_FIXES
-static INLINE void memset16(uint16_t *dst, int val, int n) {
- while (n--)
- *dst++ = val;
+#if CONFIG_EXT_INTRA
+#define PI 3.14159265
+#define FILTER_INTRA_PREC_BITS 10
+#define FILTER_INTRA_ROUND_VAL 511
+
+static const uint8_t ext_intra_extend_modes[FILTER_INTRA_MODES] = {
+ NEED_LEFT | NEED_ABOVE, // FILTER_DC
+ NEED_LEFT | NEED_ABOVE, // FILTER_V
+ NEED_LEFT | NEED_ABOVE, // FILTER_H
+ NEED_LEFT | NEED_ABOVE, // FILTER_D45
+ NEED_LEFT | NEED_ABOVE, // FILTER_D135
+ NEED_LEFT | NEED_ABOVE, // FILTER_D117
+ NEED_LEFT | NEED_ABOVE, // FILTER_D153
+ NEED_LEFT | NEED_ABOVE, // FILTER_D207
+ NEED_LEFT | NEED_ABOVE, // FILTER_D63
+ NEED_LEFT | NEED_ABOVE, // FILTER_TM
+};
+
+static int intra_subpel_interp(int base, int shift, const uint8_t *ref,
+ int ref_start_idx, int ref_end_idx,
+ INTRA_FILTER filter_type) {
+ int val, k, idx, filter_idx = 0;
+ const int16_t *filter = NULL;
+
+ if (filter_type == INTRA_FILTER_LINEAR) {
+ val = ref[base] * (256 - shift) + ref[base + 1] * shift;
+ val = ROUND_POWER_OF_TWO(val, 8);
+ } else {
+ filter_idx = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
+ filter = vp10_intra_filter_kernels[filter_type][filter_idx];
+
+ if (filter_idx < (1 << SUBPEL_BITS)) {
+ val = 0;
+ for (k = 0; k < SUBPEL_TAPS; ++k) {
+ idx = base + 1 - (SUBPEL_TAPS / 2) + k;
+ idx = VPXMAX(VPXMIN(idx, ref_end_idx), ref_start_idx);
+ val += ref[idx] * filter[k];
+ }
+ val = ROUND_POWER_OF_TWO(val, FILTER_BITS);
+ } else {
+ val = ref[base + 1];
+ }
+ }
+
+ return val;
}
-#endif
+
+// Directional prediction, zone 1: 0 < angle < 90
+static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left,
+ int dx, int dy, INTRA_FILTER filter_type) {
+ int r, c, x, y, base, shift, val;
+
+ (void)left;
+ (void)dy;
+ assert(dy == 1);
+ assert(dx < 0);
+
+ for (r = 0; r < bs; ++r) {
+ y = r + 1;
+ for (c = 0; c < bs; ++c) {
+ x = (c << 8) - y * dx;
+ base = x >> 8;
+ shift = x - (base << 8);
+ if (base < 2 * bs - 1) {
+ val = intra_subpel_interp(base, shift, above, 0, 2 * bs - 1,
+ filter_type);
+ dst[c] = clip_pixel(val);
+ } else {
+ dst[c] = above[2 * bs - 1];
+ }
+ }
+ dst += stride;
+ }
+}
+
+// Directional prediction, zone 2: 90 < angle < 180
+static void dr_prediction_z2(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left,
+ int dx, int dy, INTRA_FILTER filter_type) {
+ int r, c, x, y, shift, val, base;
+
+ assert(dx > 0);
+ assert(dy > 0);
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ y = r + 1;
+ x = (c << 8) - y * dx;
+ base = x >> 8;
+ if (base >= -1) {
+ shift = x - (base << 8);
+ val = intra_subpel_interp(base, shift, above, -1, bs - 1, filter_type);
+ } else {
+ x = c + 1;
+ y = (r << 8) - x * dy;
+ base = y >> 8;
+ if (base >= 0) {
+ shift = y - (base << 8);
+ val = intra_subpel_interp(base, shift, left, 0, bs - 1, filter_type);
+ } else {
+ val = left[0];
+ }
+ }
+ dst[c] = clip_pixel(val);
+ }
+ dst += stride;
+ }
+}
+
+// Directional prediction, zone 3: 180 < angle < 270
+static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left,
+ int dx, int dy, INTRA_FILTER filter_type) {
+ int r, c, x, y, base, shift, val;
+
+ (void)above;
+ (void)dx;
+
+ assert(dx == 1);
+ assert(dy < 0);
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ x = c + 1;
+ y = (r << 8) - x * dy;
+ base = y >> 8;
+ shift = y - (base << 8);
+ if (base < 2 * bs - 1) {
+ val = intra_subpel_interp(base, shift, left, 0, 2 * bs - 1,
+ filter_type);
+ dst[c] = clip_pixel(val);
+ } else {
+ dst[c] = left[ 2 * bs - 1];
+ }
+ }
+ dst += stride;
+ }
+}
+
+static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
+ const uint8_t *above, const uint8_t *left, int angle,
+ INTRA_FILTER filter_type) {
+ double t = 0;
+ int dx, dy;
+ int bs = 4 << tx_size;
+
+ if (angle != 90 && angle != 180)
+ t = tan(angle * PI / 180.0);
+ if (angle > 0 && angle < 90) {
+ dx = -((int)(256 / t));
+ dy = 1;
+ dr_prediction_z1(dst, stride, bs, above, left, dx, dy, filter_type);
+ } else if (angle > 90 && angle < 180) {
+ t = -t;
+ dx = (int)(256 / t);
+ dy = (int)(256 * t);
+ dr_prediction_z2(dst, stride, bs, above, left, dx, dy, filter_type);
+ } else if (angle > 180 && angle < 270) {
+ dx = 1;
+ dy = -((int)(256 * t));
+ dr_prediction_z3(dst, stride, bs, above, left, dx, dy, filter_type);
+ } else if (angle == 90) {
+ pred[V_PRED][tx_size](dst, stride, above, left);
+ } else if (angle == 180) {
+ pred[H_PRED][tx_size](dst, stride, above, left);
+ }
+}
+
+static int filter_intra_taps_4[TX_SIZES][INTRA_MODES][4] = {
+ {
+ {735, 881, -537, -54},
+ {1005, 519, -488, -11},
+ {383, 990, -343, -6},
+ {442, 805, -542, 319},
+ {658, 616, -133, -116},
+ {875, 442, -141, -151},
+ {386, 741, -23, -80},
+ {390, 1027, -446, 51},
+ {679, 606, -523, 262},
+ {903, 922, -778, -23},
+ },
+ {
+ {648, 803, -444, 16},
+ {972, 620, -576, 7},
+ {561, 967, -499, -5},
+ {585, 762, -468, 144},
+ {596, 619, -182, -9},
+ {895, 459, -176, -153},
+ {557, 722, -126, -129},
+ {601, 839, -523, 105},
+ {562, 709, -499, 251},
+ {803, 872, -695, 43},
+ },
+ {
+ {423, 728, -347, 111},
+ {963, 685, -665, 23},
+ {281, 1024, -480, 216},
+ {640, 596, -437, 78},
+ {429, 669, -259, 99},
+ {740, 646, -415, 23},
+ {568, 771, -346, 40},
+ {404, 833, -486, 209},
+ {398, 712, -423, 307},
+ {939, 935, -887, 17},
+ },
+ {
+ {477, 737, -393, 150},
+ {881, 630, -546, 67},
+ {506, 984, -443, -20},
+ {114, 459, -270, 528},
+ {433, 528, 14, 3},
+ {837, 470, -301, -30},
+ {181, 777, 89, -107},
+ {-29, 716, -232, 259},
+ {589, 646, -495, 255},
+ {740, 884, -728, 77},
+ },
+};
+
+static void filter_intra_predictors_4tap(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above,
+ const uint8_t *left,
+ int mode) {
+ int k, r, c;
+ int pred[33][65];
+ int mean, ipred;
+ const TX_SIZE tx_size = (bs == 32) ? TX_32X32 :
+ ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+ const int c0 = filter_intra_taps_4[tx_size][mode][0];
+ const int c1 = filter_intra_taps_4[tx_size][mode][1];
+ const int c2 = filter_intra_taps_4[tx_size][mode][2];
+ const int c3 = filter_intra_taps_4[tx_size][mode][3];
+
+ k = 0;
+ mean = 0;
+ while (k < bs) {
+ mean = mean + (int)left[k];
+ mean = mean + (int)above[k];
+ k++;
+ }
+ mean = (mean + bs) / (2 * bs);
+
+ for (r = 0; r < bs; ++r)
+ pred[r + 1][0] = (int)left[r] - mean;
+
+ for (c = 0; c < 2 * bs + 1; ++c)
+ pred[0][c] = (int)above[c - 1] - mean;
+
+ for (r = 1; r < bs + 1; ++r)
+ for (c = 1; c < 2 * bs + 1 - r; ++c) {
+ ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
+ c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
+ pred[r][c] = ipred < 0 ?
+ -((-ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS) :
+ ((ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS);
+ }
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ ipred = pred[r + 1][c + 1] + mean;
+ dst[c] = clip_pixel(ipred);
+ }
+ dst += stride;
+ }
+}
+
+static void dc_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED);
+}
+
+static void v_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED);
+}
+
+static void h_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED);
+}
+
+static void d45_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED);
+}
+
+static void d135_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED);
+}
+
+static void d117_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED);
+}
+
+static void d153_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED);
+}
+
+static void d207_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED);
+}
+
+static void d63_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED);
+}
+
+static void tm_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED);
+}
+
+static void (*filter_intra_predictors[EXT_INTRA_MODES])(uint8_t *dst,
+ ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) = {
+ dc_filter_predictor, v_filter_predictor, h_filter_predictor,
+ d45_filter_predictor, d135_filter_predictor, d117_filter_predictor,
+ d153_filter_predictor, d207_filter_predictor, d63_filter_predictor,
+ tm_filter_predictor,
+};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static int highbd_intra_subpel_interp(int base, int shift, const uint16_t *ref,
+ int ref_start_idx, int ref_end_idx,
+ INTRA_FILTER filter_type) {
+ int val, k, idx, filter_idx = 0;
+ const int16_t *filter = NULL;
+
+ if (filter_type == INTRA_FILTER_LINEAR) {
+ val = ref[base] * (256 - shift) + ref[base + 1] * shift;
+ val = ROUND_POWER_OF_TWO(val, 8);
+ } else {
+ filter_idx = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
+ filter = vp10_intra_filter_kernels[filter_type][filter_idx];
+
+ if (filter_idx < (1 << SUBPEL_BITS)) {
+ val = 0;
+ for (k = 0; k < SUBPEL_TAPS; ++k) {
+ idx = base + 1 - (SUBPEL_TAPS / 2) + k;
+ idx = VPXMAX(VPXMIN(idx, ref_end_idx), ref_start_idx);
+ val += ref[idx] * filter[k];
+ }
+ val = ROUND_POWER_OF_TWO(val, FILTER_BITS);
+ } else {
+ val = ref[base + 1];
+ }
+ }
+
+ return val;
+}
+
+// Directional prediction, zone 1: 0 < angle < 90
+static void highbd_dr_prediction_z1(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above, const uint16_t *left,
+ int dx, int dy, int bd,
+ INTRA_FILTER filter_type) {
+ int r, c, x, y, base, shift, val;
+
+ (void)left;
+ (void)dy;
+ assert(dy == 1);
+ assert(dx < 0);
+
+ for (r = 0; r < bs; ++r) {
+ y = r + 1;
+ for (c = 0; c < bs; ++c) {
+ x = (c << 8) - y * dx;
+ base = x >> 8;
+ shift = x - (base << 8);
+ if (base < 2 * bs - 1) {
+ val = highbd_intra_subpel_interp(base, shift, above, 0, 2 * bs - 1,
+ filter_type);
+ dst[c] = clip_pixel_highbd(val, bd);
+ } else {
+ dst[c] = above[2 * bs - 1];
+ }
+ }
+ dst += stride;
+ }
+}
+
+// Directional prediction, zone 2: 90 < angle < 180
+static void highbd_dr_prediction_z2(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above, const uint16_t *left,
+ int dx, int dy, int bd,
+ INTRA_FILTER filter_type) {
+ int r, c, x, y, shift, val, base;
+
+ assert(dx > 0);
+ assert(dy > 0);
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ y = r + 1;
+ x = (c << 8) - y * dx;
+ base = x >> 8;
+ if (base >= -1) {
+ shift = x - (base << 8);
+ val = highbd_intra_subpel_interp(base, shift, above, -1, bs - 1,
+ filter_type);
+ } else {
+ x = c + 1;
+ y = (r << 8) - x * dy;
+ base = y >> 8;
+ if (base >= 0) {
+ shift = y - (base << 8);
+ val = highbd_intra_subpel_interp(base, shift, left, 0, bs - 1,
+ filter_type);
+ } else {
+ val = left[0];
+ }
+ }
+ dst[c] = clip_pixel_highbd(val, bd);
+ }
+ dst += stride;
+ }
+}
+
+// Directional prediction, zone 3: 180 < angle < 270
+static void highbd_dr_prediction_z3(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above, const uint16_t *left,
+ int dx, int dy, int bd,
+ INTRA_FILTER filter_type) {
+ int r, c, x, y, base, shift, val;
+
+ (void)above;
+ (void)dx;
+ assert(dx == 1);
+ assert(dy < 0);
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ x = c + 1;
+ y = (r << 8) - x * dy;
+ base = y >> 8;
+ shift = y - (base << 8);
+ if (base < 2 * bs - 1) {
+ val = highbd_intra_subpel_interp(base, shift, left, 0, 2 * bs - 1,
+ filter_type);
+ dst[c] = clip_pixel_highbd(val, bd);
+ } else {
+ dst[c] = left[2 * bs - 1];
+ }
+ }
+ dst += stride;
+ }
+}
+
+static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r;
+ (void) left;
+ (void) bd;
+ for (r = 0; r < bs; r++) {
+ memcpy(dst, above, bs * sizeof(uint16_t));
+ dst += stride;
+ }
+}
+
+static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r;
+ (void) above;
+ (void) bd;
+ for (r = 0; r < bs; r++) {
+ vpx_memset16(dst, left[r], bs);
+ dst += stride;
+ }
+}
+
+static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above, const uint16_t *left,
+ int angle, int bd, INTRA_FILTER filter) {
+ double t = 0;
+ int dx, dy;
+
+ if (angle != 90 && angle != 180)
+ t = tan(angle * PI / 180.0);
+ if (angle > 0 && angle < 90) {
+ dx = -((int)(256 / t));
+ dy = 1;
+ highbd_dr_prediction_z1(dst, stride, bs, above, left, dx, dy, bd, filter);
+ } else if (angle > 90 && angle < 180) {
+ t = -t;
+ dx = (int)(256 / t);
+ dy = (int)(256 * t);
+ highbd_dr_prediction_z2(dst, stride, bs, above, left, dx, dy, bd, filter);
+ } else if (angle > 180 && angle < 270) {
+ dx = 1;
+ dy = -((int)(256 * t));
+ highbd_dr_prediction_z3(dst, stride, bs, above, left, dx, dy, bd, filter);
+ } else if (angle == 90) {
+ highbd_v_predictor(dst, stride, bs, above, left, bd);
+ } else if (angle == 180) {
+ highbd_h_predictor(dst, stride, bs, above, left, bd);
+ }
+}
+
+static void highbd_filter_intra_predictors_4tap(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int mode,
+ int bd) {
+ int k, r, c;
+ int pred[33][65];
+ int mean, ipred;
+ const TX_SIZE tx_size = (bs == 32) ? TX_32X32 :
+ ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+ const int c0 = filter_intra_taps_4[tx_size][mode][0];
+ const int c1 = filter_intra_taps_4[tx_size][mode][1];
+ const int c2 = filter_intra_taps_4[tx_size][mode][2];
+ const int c3 = filter_intra_taps_4[tx_size][mode][3];
+
+ k = 0;
+ mean = 0;
+ while (k < bs) {
+ mean = mean + (int)left[k];
+ mean = mean + (int)above[k];
+ k++;
+ }
+ mean = (mean + bs) / (2 * bs);
+
+ for (r = 0; r < bs; ++r)
+ pred[r + 1][0] = (int)left[r] - mean;
+
+ for (c = 0; c < 2 * bs + 1; ++c)
+ pred[0][c] = (int)above[c - 1] - mean;
+
+ for (r = 1; r < bs + 1; ++r)
+ for (c = 1; c < 2 * bs + 1 - r; ++c) {
+ ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
+ c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
+ pred[r][c] = ipred < 0 ?
+ -((-ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS) :
+ ((ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS);
+ }
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ ipred = pred[r + 1][c + 1] + mean;
+ dst[c] = clip_pixel_highbd(ipred, bd);
+ }
+ dst += stride;
+ }
+}
+
+static void highbd_dc_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED,
+ bd);
+}
+
+static void highbd_v_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED,
+ bd);
+}
+
+static void highbd_h_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED,
+ bd);
+}
+
+static void highbd_d45_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED,
+ bd);
+}
+
+static void highbd_d135_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED,
+ bd);
+}
+
+static void highbd_d117_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED,
+ bd);
+}
+
+static void highbd_d153_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED,
+ bd);
+}
+
+static void highbd_d207_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED,
+ bd);
+}
+
+static void highbd_d63_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED,
+ bd);
+}
+
+static void highbd_tm_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED,
+ bd);
+}
+
+static void (*highbd_filter_intra_predictors[EXT_INTRA_MODES])(uint16_t *dst,
+ ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left,
+ int bd) = {
+ highbd_dc_filter_predictor, highbd_v_filter_predictor,
+ highbd_h_filter_predictor, highbd_d45_filter_predictor,
+ highbd_d135_filter_predictor, highbd_d117_filter_predictor,
+ highbd_d153_filter_predictor, highbd_d207_filter_predictor,
+ highbd_d63_filter_predictor, highbd_tm_filter_predictor,
+};
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_INTRA
#if CONFIG_VP9_HIGHBITDEPTH
static void build_intra_predictors_high(const MACROBLOCKD *xd,
@@ -276,56 +866,71 @@
int dst_stride,
PREDICTION_MODE mode,
TX_SIZE tx_size,
-#if CONFIG_MISC_FIXES
int n_top_px, int n_topright_px,
int n_left_px, int n_bottomleft_px,
-#else
- int up_available,
- int left_available,
- int right_available,
-#endif
- int x, int y,
- int plane, int bd) {
+ int plane) {
int i;
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
-#if CONFIG_MISC_FIXES
- DECLARE_ALIGNED(16, uint16_t, left_col[32]);
-#else
DECLARE_ALIGNED(16, uint16_t, left_col[64]);
-#endif
DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]);
uint16_t *above_row = above_data + 16;
const uint16_t *const_above_row = above_row;
const int bs = 4 << tx_size;
-#if CONFIG_MISC_FIXES
+ int need_left = extend_modes[mode] & NEED_LEFT;
+ int need_above = extend_modes[mode] & NEED_ABOVE;
const uint16_t *above_ref = ref - ref_stride;
-#else
- int frame_width, frame_height;
- int x0, y0;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
-#endif
- const int need_left = extend_modes[mode] & NEED_LEFT;
- const int need_above = extend_modes[mode] & NEED_ABOVE;
- const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT;
- int base = 128 << (bd - 8);
+ int base = 128 << (xd->bd - 8);
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
// 129 C D .. W X
// 129 E F .. U V
// 129 G H .. S T T T T T
-#if CONFIG_MISC_FIXES
- (void) x;
- (void) y;
+#if CONFIG_EXT_INTRA
+ const EXT_INTRA_MODE_INFO *ext_intra_mode_info =
+ &xd->mi[0]->mbmi.ext_intra_mode_info;
+ const EXT_INTRA_MODE ext_intra_mode =
+ ext_intra_mode_info->ext_intra_mode[plane != 0];
+ int p_angle = 0;
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ p_angle = mode_to_angle_map[mode] +
+ xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+ if (p_angle <= 90)
+ need_above = 1, need_left = 0;
+ else if (p_angle < 180)
+ need_above = 1, need_left = 1;
+ else
+ need_above = 0, need_left = 1;
+ }
+
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ EXT_INTRA_MODE ext_intra_mode =
+ ext_intra_mode_info->ext_intra_mode[plane != 0];
+ need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
+ need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
+ }
+#endif // CONFIG_EXT_INTRA
+
(void) plane;
- (void) need_left;
- (void) need_above;
- (void) need_aboveright;
// NEED_LEFT
- if (extend_modes[mode] & NEED_LEFT) {
+ if (need_left) {
+#if CONFIG_EXT_INTRA
+ int need_bottom;
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ need_bottom = 0;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_bottom = p_angle > 180;
+ } else {
+ need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+ }
+#else
const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+#endif // CONFIG_EXT_INTRA
i = 0;
if (n_left_px > 0) {
for (; i < n_left_px; i++)
@@ -336,15 +941,27 @@
left_col[i] = ref[i * ref_stride - 1];
}
if (i < (bs << need_bottom))
- memset16(&left_col[i], left_col[i - 1], (bs << need_bottom) - i);
+ vpx_memset16(&left_col[i], left_col[i - 1], (bs << need_bottom) - i);
} else {
- memset16(left_col, base + 1, bs << need_bottom);
+ vpx_memset16(left_col, base + 1, bs << need_bottom);
}
}
// NEED_ABOVE
- if (extend_modes[mode] & NEED_ABOVE) {
+ if (need_above) {
+#if CONFIG_EXT_INTRA
+ int need_right;
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ need_right = 1;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_right = p_angle < 90;
+ } else {
+ need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+ }
+#else
const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+#endif // CONFIG_EXT_INTRA
if (n_top_px > 0) {
memcpy(above_row, above_ref, n_top_px * 2);
i = n_top_px;
@@ -354,148 +971,50 @@
i += n_topright_px;
}
if (i < (bs << need_right))
- memset16(&above_row[i], above_row[i - 1], (bs << need_right) - i);
+ vpx_memset16(&above_row[i], above_row[i - 1], (bs << need_right) - i);
} else {
- memset16(above_row, base - 1, bs << need_right);
+ vpx_memset16(above_row, base - 1, bs << need_right);
}
}
- if (extend_modes[mode] & NEED_ABOVELEFT) {
+#if CONFIG_EXT_INTRA
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] ||
+ (extend_modes[mode] & NEED_ABOVELEFT) ||
+ (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) {
above_row[-1] = n_top_px > 0 ?
(n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
}
#else
- // Get current frame pointer, width and height.
- if (plane == 0) {
- frame_width = xd->cur_buf->y_width;
- frame_height = xd->cur_buf->y_height;
- } else {
- frame_width = xd->cur_buf->uv_width;
- frame_height = xd->cur_buf->uv_height;
+ if ((extend_modes[mode] & NEED_ABOVELEFT)) {
+ above_row[-1] = n_top_px > 0 ?
+ (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
+ }
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTRA
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ highbd_filter_intra_predictors[ext_intra_mode](dst, dst_stride, bs,
+ const_above_row, left_col, xd->bd);
+ return;
}
- // Get block position in current frame.
- x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
- y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
-
- // NEED_LEFT
- if (need_left) {
- if (left_available) {
- if (xd->mb_to_bottom_edge < 0) {
- /* slower path if the block needs border extension */
- if (y0 + bs <= frame_height) {
- for (i = 0; i < bs; ++i)
- left_col[i] = ref[i * ref_stride - 1];
- } else {
- const int extend_bottom = frame_height - y0;
- for (i = 0; i < extend_bottom; ++i)
- left_col[i] = ref[i * ref_stride - 1];
- for (; i < bs; ++i)
- left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
- }
- } else {
- /* faster path if the block does not need extension */
- for (i = 0; i < bs; ++i)
- left_col[i] = ref[i * ref_stride - 1];
- }
- } else {
- // TODO(Peter): this value should probably change for high bitdepth
- vpx_memset16(left_col, base + 1, bs);
- }
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ INTRA_FILTER filter = INTRA_FILTER_LINEAR;
+ if (plane == 0 && pick_intra_filter(p_angle))
+ filter = xd->mi[0]->mbmi.intra_filter;
+ highbd_dr_predictor(dst, dst_stride, bs, const_above_row, left_col,
+ p_angle, xd->bd, filter);
+ return;
}
-
- // NEED_ABOVE
- if (need_above) {
- if (up_available) {
- const uint16_t *above_ref = ref - ref_stride;
- if (xd->mb_to_right_edge < 0) {
- /* slower path if the block needs border extension */
- if (x0 + bs <= frame_width) {
- memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
- } else if (x0 <= frame_width) {
- const int r = frame_width - x0;
- memcpy(above_row, above_ref, r * sizeof(above_row[0]));
- vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width);
- }
- } else {
- /* faster path if the block does not need extension */
- if (bs == 4 && right_available && left_available) {
- const_above_row = above_ref;
- } else {
- memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
- }
- }
- above_row[-1] = left_available ? above_ref[-1] : (base + 1);
- } else {
- vpx_memset16(above_row, base - 1, bs);
- above_row[-1] = base - 1;
- }
- }
-
- // NEED_ABOVERIGHT
- if (need_aboveright) {
- if (up_available) {
- const uint16_t *above_ref = ref - ref_stride;
- if (xd->mb_to_right_edge < 0) {
- /* slower path if the block needs border extension */
- if (x0 + 2 * bs <= frame_width) {
- if (right_available && bs == 4) {
- memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0]));
- } else {
- memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
- vpx_memset16(above_row + bs, above_row[bs - 1], bs);
- }
- } else if (x0 + bs <= frame_width) {
- const int r = frame_width - x0;
- if (right_available && bs == 4) {
- memcpy(above_row, above_ref, r * sizeof(above_row[0]));
- vpx_memset16(above_row + r, above_row[r - 1],
- x0 + 2 * bs - frame_width);
- } else {
- memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
- vpx_memset16(above_row + bs, above_row[bs - 1], bs);
- }
- } else if (x0 <= frame_width) {
- const int r = frame_width - x0;
- memcpy(above_row, above_ref, r * sizeof(above_row[0]));
- vpx_memset16(above_row + r, above_row[r - 1],
- x0 + 2 * bs - frame_width);
- }
- // TODO(Peter) this value should probably change for high bitdepth
- above_row[-1] = left_available ? above_ref[-1] : (base + 1);
- } else {
- /* faster path if the block does not need extension */
- if (bs == 4 && right_available && left_available) {
- const_above_row = above_ref;
- } else {
- memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
- if (bs == 4 && right_available)
- memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
- else
- vpx_memset16(above_row + bs, above_row[bs - 1], bs);
- // TODO(Peter): this value should probably change for high bitdepth
- above_row[-1] = left_available ? above_ref[-1] : (base + 1);
- }
- }
- } else {
- vpx_memset16(above_row, base - 1, bs * 2);
- // TODO(Peter): this value should probably change for high bitdepth
- above_row[-1] = base - 1;
- }
- }
-#endif
+#endif // CONFIG_EXT_INTRA
// predict
if (mode == DC_PRED) {
-#if CONFIG_MISC_FIXES
dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride,
const_above_row,
left_col, xd->bd);
-#else
- dc_pred_high[left_available][up_available][tx_size](dst, dst_stride,
- const_above_row,
- left_col, xd->bd);
-#endif
} else {
pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col,
xd->bd);
@@ -506,28 +1025,44 @@
static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
int ref_stride, uint8_t *dst, int dst_stride,
PREDICTION_MODE mode, TX_SIZE tx_size,
-#if CONFIG_MISC_FIXES
int n_top_px, int n_topright_px,
int n_left_px, int n_bottomleft_px,
-#else
- int up_available, int left_available,
- int right_available,
-#endif
- int x, int y, int plane) {
+ int plane) {
int i;
-#if CONFIG_MISC_FIXES
DECLARE_ALIGNED(16, uint8_t, left_col[64]);
const uint8_t *above_ref = ref - ref_stride;
-#else
- DECLARE_ALIGNED(16, uint8_t, left_col[32]);
- int frame_width, frame_height;
- int x0, y0;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
-#endif
DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]);
uint8_t *above_row = above_data + 16;
const uint8_t *const_above_row = above_row;
const int bs = 4 << tx_size;
+ int need_left = extend_modes[mode] & NEED_LEFT;
+ int need_above = extend_modes[mode] & NEED_ABOVE;
+#if CONFIG_EXT_INTRA
+ const EXT_INTRA_MODE_INFO *ext_intra_mode_info =
+ &xd->mi[0]->mbmi.ext_intra_mode_info;
+ const EXT_INTRA_MODE ext_intra_mode =
+ ext_intra_mode_info->ext_intra_mode[plane != 0];
+ int p_angle = 0;
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ p_angle = mode_to_angle_map[mode] +
+ xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+ if (p_angle <= 90)
+ need_above = 1, need_left = 0;
+ else if (p_angle < 180)
+ need_above = 1, need_left = 1;
+ else
+ need_above = 0, need_left = 1;
+ }
+
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ EXT_INTRA_MODE ext_intra_mode =
+ ext_intra_mode_info->ext_intra_mode[plane != 0];
+ need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
+ need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
+ }
+#endif // CONFIG_EXT_INTRA
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
@@ -536,34 +1071,28 @@
// 129 G H .. S T T T T T
// ..
-#if CONFIG_MISC_FIXES
(void) xd;
- (void) x;
- (void) y;
(void) plane;
assert(n_top_px >= 0);
assert(n_topright_px >= 0);
assert(n_left_px >= 0);
assert(n_bottomleft_px >= 0);
-#else
- // Get current frame pointer, width and height.
- if (plane == 0) {
- frame_width = xd->cur_buf->y_width;
- frame_height = xd->cur_buf->y_height;
- } else {
- frame_width = xd->cur_buf->uv_width;
- frame_height = xd->cur_buf->uv_height;
- }
-
- // Get block position in current frame.
- x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
- y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
-#endif
// NEED_LEFT
- if (extend_modes[mode] & NEED_LEFT) {
-#if CONFIG_MISC_FIXES
+ if (need_left) {
+#if CONFIG_EXT_INTRA
+ int need_bottom;
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ need_bottom = 0;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_bottom = p_angle > 180;
+ } else {
+ need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+ }
+#else
const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+#endif // CONFIG_EXT_INTRA
i = 0;
if (n_left_px > 0) {
for (; i < n_left_px; i++)
@@ -578,35 +1107,23 @@
} else {
memset(left_col, 129, bs << need_bottom);
}
-#else
- if (left_available) {
- if (xd->mb_to_bottom_edge < 0) {
- /* slower path if the block needs border extension */
- if (y0 + bs <= frame_height) {
- for (i = 0; i < bs; ++i)
- left_col[i] = ref[i * ref_stride - 1];
- } else {
- const int extend_bottom = frame_height - y0;
- for (i = 0; i < extend_bottom; ++i)
- left_col[i] = ref[i * ref_stride - 1];
- for (; i < bs; ++i)
- left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
- }
- } else {
- /* faster path if the block does not need extension */
- for (i = 0; i < bs; ++i)
- left_col[i] = ref[i * ref_stride - 1];
- }
- } else {
- memset(left_col, 129, bs);
- }
-#endif
}
// NEED_ABOVE
- if (extend_modes[mode] & NEED_ABOVE) {
-#if CONFIG_MISC_FIXES
+ if (need_above) {
+#if CONFIG_EXT_INTRA
+ int need_right;
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ need_right = 1;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_right = p_angle < 90;
+ } else {
+ need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+ }
+#else
const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+#endif // CONFIG_EXT_INTRA
if (n_top_px > 0) {
memcpy(above_row, above_ref, n_top_px);
i = n_top_px;
@@ -620,115 +1137,62 @@
} else {
memset(above_row, 127, bs << need_right);
}
-#else
- if (up_available) {
- const uint8_t *above_ref = ref - ref_stride;
- if (xd->mb_to_right_edge < 0) {
- /* slower path if the block needs border extension */
- if (x0 + bs <= frame_width) {
- memcpy(above_row, above_ref, bs);
- } else if (x0 <= frame_width) {
- const int r = frame_width - x0;
- memcpy(above_row, above_ref, r);
- memset(above_row + r, above_row[r - 1], x0 + bs - frame_width);
- }
- } else {
- /* faster path if the block does not need extension */
- if (bs == 4 && right_available && left_available) {
- const_above_row = above_ref;
- } else {
- memcpy(above_row, above_ref, bs);
- }
- }
- above_row[-1] = left_available ? above_ref[-1] : 129;
- } else {
- memset(above_row, 127, bs);
- above_row[-1] = 127;
- }
-#endif
}
-#if CONFIG_MISC_FIXES
- if (extend_modes[mode] & NEED_ABOVELEFT) {
+#if CONFIG_EXT_INTRA
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] ||
+ (extend_modes[mode] & NEED_ABOVELEFT) ||
+ (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) {
above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
}
#else
- // NEED_ABOVERIGHT
- if (extend_modes[mode] & NEED_ABOVERIGHT) {
- if (up_available) {
- const uint8_t *above_ref = ref - ref_stride;
- if (xd->mb_to_right_edge < 0) {
- /* slower path if the block needs border extension */
- if (x0 + 2 * bs <= frame_width) {
- if (right_available && bs == 4) {
- memcpy(above_row, above_ref, 2 * bs);
- } else {
- memcpy(above_row, above_ref, bs);
- memset(above_row + bs, above_row[bs - 1], bs);
- }
- } else if (x0 + bs <= frame_width) {
- const int r = frame_width - x0;
- if (right_available && bs == 4) {
- memcpy(above_row, above_ref, r);
- memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width);
- } else {
- memcpy(above_row, above_ref, bs);
- memset(above_row + bs, above_row[bs - 1], bs);
- }
- } else if (x0 <= frame_width) {
- const int r = frame_width - x0;
- memcpy(above_row, above_ref, r);
- memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width);
- }
- } else {
- /* faster path if the block does not need extension */
- if (bs == 4 && right_available && left_available) {
- const_above_row = above_ref;
- } else {
- memcpy(above_row, above_ref, bs);
- if (bs == 4 && right_available)
- memcpy(above_row + bs, above_ref + bs, bs);
- else
- memset(above_row + bs, above_row[bs - 1], bs);
- }
- }
- above_row[-1] = left_available ? above_ref[-1] : 129;
- } else {
- memset(above_row, 127, bs * 2);
- above_row[-1] = 127;
- }
+ if ((extend_modes[mode] & NEED_ABOVELEFT)) {
+ above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
}
-#endif
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTRA
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ filter_intra_predictors[ext_intra_mode](dst, dst_stride, bs,
+ const_above_row, left_col);
+ return;
+ }
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ INTRA_FILTER filter = INTRA_FILTER_LINEAR;
+ if (plane == 0 && pick_intra_filter(p_angle))
+ filter = xd->mi[0]->mbmi.intra_filter;
+ dr_predictor(dst, dst_stride, tx_size, const_above_row, left_col, p_angle,
+ filter);
+ return;
+ }
+#endif // CONFIG_EXT_INTRA
// predict
if (mode == DC_PRED) {
-#if CONFIG_MISC_FIXES
dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride,
const_above_row, left_col);
-#else
- dc_pred[left_available][up_available][tx_size](dst, dst_stride,
- const_above_row, left_col);
-#endif
} else {
pred[mode][tx_size](dst, dst_stride, const_above_row, left_col);
}
}
void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
- TX_SIZE tx_size, PREDICTION_MODE mode,
- const uint8_t *ref, int ref_stride,
- uint8_t *dst, int dst_stride,
- int aoff, int loff, int plane) {
+ TX_SIZE tx_size, PREDICTION_MODE mode,
+ const uint8_t *ref, int ref_stride,
+ uint8_t *dst, int dst_stride,
+ int aoff, int loff, int plane) {
const int txw = (1 << tx_size);
const int have_top = loff || xd->up_available;
const int have_left = aoff || xd->left_available;
const int x = aoff * 4;
const int y = loff * 4;
-#if CONFIG_MISC_FIXES
const int bw = VPXMAX(2, 1 << bwl_in);
const int bh = VPXMAX(2, 1 << bhl_in);
- const int mi_row = -xd->mb_to_top_edge >> 6;
- const int mi_col = -xd->mb_to_left_edge >> 6;
+ const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
+ const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int right_available =
@@ -744,16 +1208,51 @@
const int wpx = 4 * bw;
const int hpx = 4 * bh;
const int txpx = 4 * txw;
+ // Distance between the right edge of this prediction block to
+ // the frame right edge
+ const int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) +
+ (wpx - x - txpx);
+ // Distance between the bottom edge of this prediction block to
+ // the frame bottom edge
+ const int yd = (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) +
+ (hpx - y - txpx);
- int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + (wpx - x - txpx);
- int yd =
- (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + (hpx - y - txpx);
+ if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) {
+ const int bs = 4 * (1 << tx_size);
+ const int stride = 4 * (1 << bwl_in);
+ int r, c;
+ uint8_t *map = NULL;
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint16_t *palette = xd->mi[0]->mbmi.palette_mode_info.palette_colors +
+ plane * PALETTE_MAX_SIZE;
#else
- const int bw = (1 << bwl_in);
- const int have_right = (aoff + txw) < bw;
-#endif // CONFIG_MISC_FIXES
+ uint8_t *palette = xd->mi[0]->mbmi.palette_mode_info.palette_colors +
+ plane * PALETTE_MAX_SIZE;
+#endif // CONFIG_VP9_HIGHBITDEPTH
-#if CONFIG_MISC_FIXES
+ map = xd->plane[plane != 0].color_index_map;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
+ for (r = 0; r < bs; ++r)
+ for (c = 0; c < bs; ++c)
+ dst16[r * dst_stride + c] =
+ palette[map[(r + y) * stride + c + x]];
+ } else {
+ for (r = 0; r < bs; ++r)
+ for (c = 0; c < bs; ++c)
+ dst[r * dst_stride + c] =
+ (uint8_t)(palette[map[(r + y) * stride + c + x]]);
+ }
+#else
+ for (r = 0; r < bs; ++r)
+ for (c = 0; c < bs; ++c)
+ dst[r * dst_stride + c] = palette[map[(r + y) * stride + c + x]];
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ return;
+ }
+
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode,
@@ -762,7 +1261,7 @@
have_top && have_right ? VPXMIN(txpx, xr) : 0,
have_left ? VPXMIN(txpx, yd + txpx) : 0,
have_bottom && have_left ? VPXMIN(txpx, yd) : 0,
- x, y, plane, xd->bd);
+ plane);
return;
}
#endif
@@ -772,20 +1271,7 @@
have_top && have_right ? VPXMIN(txpx, xr) : 0,
have_left ? VPXMIN(txpx, yd + txpx) : 0,
have_bottom && have_left ? VPXMIN(txpx, yd) : 0,
- x, y, plane);
-#else // CONFIG_MISC_FIXES
- (void) bhl_in;
-#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode,
- tx_size, have_top, have_left, have_right,
- x, y, plane, xd->bd);
- return;
- }
-#endif
- build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
- have_top, have_left, have_right, x, y, plane);
-#endif // CONFIG_MISC_FIXES
+ plane);
}
void vp10_init_intra_predictors(void) {
diff --git a/vp10/common/reconintra.h b/vp10/common/reconintra.h
index f451fb8..77489c1 100644
--- a/vp10/common/reconintra.h
+++ b/vp10/common/reconintra.h
@@ -25,6 +25,9 @@
const uint8_t *ref, int ref_stride,
uint8_t *dst, int dst_stride,
int aoff, int loff, int plane);
+#if CONFIG_EXT_INTRA
+int pick_intra_filter(int angle);
+#endif // CONFIG_EXT_INTRA
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/common/restoration.c b/vp10/common/restoration.c
new file mode 100644
index 0000000..c73a2f9
--- /dev/null
+++ b/vp10/common/restoration.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vp10/common/onyxc_int.h"
+#include "vp10/common/restoration.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
+
+#define RESTORATION_RANGE 256
+#define RESTORATION_RANGE_SYM (2 * RESTORATION_RANGE + 1)
+static double restoration_filters_r_kf[RESTORATION_LEVELS_KF + 1]
+ [RESTORATION_RANGE_SYM];
+static double restoration_filters_r[RESTORATION_LEVELS + 1]
+ [RESTORATION_RANGE_SYM];
+static double restoration_filters_s_kf[RESTORATION_LEVELS_KF + 1]
+ [RESTORATION_WIN][RESTORATION_WIN];
+static double restoration_filters_s[RESTORATION_LEVELS + 1]
+ [RESTORATION_WIN][RESTORATION_WIN];
+
+void vp10_loop_restoration_precal() {
+ int i;
+ for (i = 1; i < RESTORATION_LEVELS_KF + 1; i ++) {
+ const restoration_params_t param = vp10_restoration_level_to_params(i, 1);
+ const int sigma_x = param.sigma_x;
+ const int sigma_y = param.sigma_y;
+ const int sigma_r = param.sigma_r;
+ const double sigma_r_d = (double)sigma_r / RESTORATION_PRECISION;
+ const double sigma_x_d = (double)sigma_x / RESTORATION_PRECISION;
+ const double sigma_y_d = (double)sigma_y / RESTORATION_PRECISION;
+
+ double *fr = restoration_filters_r_kf[i] + RESTORATION_RANGE;
+ int j, x, y;
+ for (j = 0; j <= RESTORATION_RANGE; j++) {
+ fr[j] = exp(-(j * j) / (2 * sigma_r_d * sigma_r_d));
+ fr[-j] = fr[j];
+ }
+ for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; y++) {
+ for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; x++) {
+ restoration_filters_s_kf[i][y + RESTORATION_HALFWIN]
+ [x + RESTORATION_HALFWIN] =
+ exp(-(x * x) / (2 * sigma_x_d * sigma_x_d)
+ -(y * y) / (2 * sigma_y_d * sigma_y_d));
+ }
+ }
+ }
+ for (i = 1; i < RESTORATION_LEVELS + 1; i ++) {
+ const restoration_params_t param = vp10_restoration_level_to_params(i, 0);
+ const int sigma_x = param.sigma_x;
+ const int sigma_y = param.sigma_y;
+ const int sigma_r = param.sigma_r;
+ const double sigma_r_d = (double)sigma_r / RESTORATION_PRECISION;
+ const double sigma_x_d = (double)sigma_x / RESTORATION_PRECISION;
+ const double sigma_y_d = (double)sigma_y / RESTORATION_PRECISION;
+
+ double *fr = restoration_filters_r[i] + RESTORATION_RANGE;
+ int j, x, y;
+ for (j = 0; j <= RESTORATION_RANGE; j++) {
+ fr[j] = exp(-(j * j) / (2 * sigma_r_d * sigma_r_d));
+ fr[-j] = fr[j];
+ }
+ for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; y++) {
+ for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; x++) {
+ restoration_filters_s
+ [i][y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] =
+ exp(-(x * x) / (2 * sigma_x_d * sigma_x_d)
+ -(y * y) / (2 * sigma_y_d * sigma_y_d));
+ }
+ }
+ }
+}
+
+int vp10_restoration_level_bits(const VP10_COMMON *const cm) {
+ return cm->frame_type == KEY_FRAME ?
+ RESTORATION_LEVEL_BITS_KF : RESTORATION_LEVEL_BITS;
+}
+
+int vp10_loop_restoration_used(int level, int kf) {
+ const restoration_params_t param =
+ vp10_restoration_level_to_params(level, kf);
+ return (param.sigma_x && param.sigma_y && param.sigma_r);
+}
+
+void vp10_loop_restoration_init(restoration_info_n *rst,
+ int level, int kf) {
+ rst->restoration_used = vp10_loop_restoration_used(level, kf);
+
+ if (rst->restoration_used) {
+ int i;
+ rst->wr_lut = kf ? restoration_filters_r_kf[level] :
+ restoration_filters_r[level];
+ for (i = 0; i < RESTORATION_WIN; i++)
+ rst->wx_lut[i] = kf ? restoration_filters_s_kf[level][i] :
+ restoration_filters_s[level][i];
+ }
+}
+
+static int is_in_image(int x, int y, int width, int height) {
+ return (x >= 0 && x < width && y >= 0 && y < height);
+}
+
+static void loop_restoration_filter(uint8_t *data, int width, int height,
+ int stride, restoration_info_n *rst,
+ uint8_t *tmpdata, int tmpstride) {
+ int i, j;
+ const double *wr_lut_ = rst->wr_lut + RESTORATION_RANGE;
+
+ uint8_t *data_p = data;
+ uint8_t *tmpdata_p = tmpdata;
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ int x, y;
+ double flsum = 0, wtsum = 0, wt;
+ uint8_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride;
+ for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) {
+ for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) {
+ if (!is_in_image(j + x, i + y, width, height))
+ continue;
+ wt = rst->wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] *
+ wr_lut_[data_p2[x] - data_p[j]];
+ wtsum += wt;
+ flsum += wt * data_p2[x];
+ }
+ data_p2 += stride;
+ }
+ assert(wtsum > 0);
+ tmpdata_p[j] = clip_pixel((int)(flsum / wtsum + 0.5));
+ }
+ tmpdata_p += tmpstride;
+ data_p += stride;
+ }
+
+ for (i = 0; i < height; ++i) {
+ memcpy(data + i * stride, tmpdata + i * tmpstride,
+ width * sizeof(*data));
+ }
+}
+#if 0 // TODO(yaowu): remove when the experiment is finalized
+// Normalized non-separable filter where weights all sum to 1
+static void loop_restoration_filter_norm(uint8_t *data, int width, int height,
+ int stride, restoration_info_n *rst,
+ uint8_t *tmpdata, int tmpstride) {
+ int i, j;
+ uint8_t *data_p = data;
+ uint8_t *tmpdata_p = tmpdata;
+ for (i = RESTORATION_HALFWIN; i < height - RESTORATION_HALFWIN; ++i) {
+ for (j = RESTORATION_HALFWIN; j < width - RESTORATION_HALFWIN; ++j) {
+ int x, y;
+ double flsum = 0;
+ uint8_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride;
+ for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) {
+ for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) {
+ flsum += data_p2[x] *
+ rst->wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN];
+ }
+ data_p2 += stride;
+ }
+ tmpdata_p[j] = clip_pixel((int)(flsum + 0.5));
+ }
+ tmpdata_p += tmpstride;
+ data_p += stride;
+ }
+ for (i = 0; i < height; ++i) {
+ memcpy(data + i * stride, tmpdata + i * tmpstride,
+ width * sizeof(*data));
+ }
+}
+#endif
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void loop_restoration_filter_highbd(
+ uint8_t *data8, int width, int height,
+ int stride, restoration_info_n *rst,
+ uint8_t *tmpdata8, int tmpstride, int bit_depth) {
+ int i, j;
+ const double *wr_lut_ = rst->wr_lut + RESTORATION_RANGE;
+
+ uint16_t *data = CONVERT_TO_SHORTPTR(data8);
+ uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8);
+ uint16_t *data_p = data;
+ uint16_t *tmpdata_p = tmpdata;
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ int x, y, diff_r;
+ double flsum = 0, wtsum = 0, wt;
+ uint16_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride;
+
+ for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) {
+ for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) {
+ if (!is_in_image(j + x, i + y, width, height))
+ continue;
+
+ diff_r = (data_p2[x] - data_p[j]) >> (bit_depth - 8);
+ assert(diff_r >= -RESTORATION_RANGE && diff_r <= RESTORATION_RANGE);
+
+ wt = rst->wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] *
+ wr_lut_[diff_r];
+ wtsum += wt;
+ flsum += wt * data_p2[x];
+ }
+ data_p2 += stride;
+ }
+
+ assert(wtsum > 0);
+ tmpdata_p[j] = (int)(flsum / wtsum + 0.5);
+ }
+ tmpdata_p += tmpstride;
+ data_p += stride;
+ }
+ for (i = 0; i < height; ++i) {
+ memcpy(data + i * stride, tmpdata + i * tmpstride,
+ width * sizeof(*data));
+ }
+}
+
+#if 0 // TODO(yaowu): remove when the experiment is finalized
+// Normalized non-separable filter where weights all sum to 1
+static void loop_restoration_filter_norm_highbd(
+ uint8_t *data8, int width, int height,
+ int stride, restoration_info_n *rst,
+ uint8_t *tmpdata8, int tmpstride) {
+ int i, j;
+ uint16_t *data = CONVERT_TO_SHORTPTR(data8);
+ uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8);
+ uint16_t *data_p = data;
+ uint16_t *tmpdata_p = tmpdata;
+ for (i = RESTORATION_HALFWIN; i < height - RESTORATION_HALFWIN; ++i) {
+ for (j = RESTORATION_HALFWIN; j < width - RESTORATION_HALFWIN; ++j) {
+ int x, y;
+ double flsum = 0;
+ uint16_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride;
+ for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) {
+ for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) {
+ flsum += data_p2[x] *
+ rst->wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN];
+ }
+ data_p2 += stride;
+ }
+ tmpdata_p[j] = (int)(flsum + 0.5);
+ }
+ tmpdata_p += tmpstride;
+ data_p += stride;
+ }
+ for (i = 0; i < height; ++i) {
+ memcpy(data + i * stride, tmpdata + i * tmpstride,
+ width * sizeof(*data));
+ }
+}
+#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame,
+ VP10_COMMON *cm,
+ int start_mi_row, int end_mi_row,
+ int y_only) {
+ const int ywidth = frame->y_crop_width;
+ const int ystride = frame->y_stride;
+ const int uvwidth = frame->uv_crop_width;
+ const int uvstride = frame->uv_stride;
+ const int ystart = start_mi_row << MI_SIZE_LOG2;
+ const int uvstart = ystart >> cm->subsampling_y;
+ int yend = end_mi_row << MI_SIZE_LOG2;
+ int uvend = yend >> cm->subsampling_y;
+ YV12_BUFFER_CONFIG *tmp_buf;
+ yend = VPXMIN(yend, cm->height);
+ uvend = VPXMIN(uvend, cm->subsampling_y ? (cm->height + 1) >> 1 : cm->height);
+
+ if (vpx_realloc_frame_buffer(&cm->tmp_loop_buf, cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VP9_DEC_BORDER_IN_PIXELS, cm->byte_alignment,
+ NULL, NULL, NULL) < 0)
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate tmp restoration buffer");
+
+ tmp_buf = &cm->tmp_loop_buf;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth)
+ loop_restoration_filter_highbd(
+ frame->y_buffer + ystart * ystride,
+ ywidth, yend - ystart, ystride, &cm->rst_info,
+ tmp_buf->y_buffer + ystart * tmp_buf->y_stride,
+ tmp_buf->y_stride, cm->bit_depth);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ loop_restoration_filter(
+ frame->y_buffer + ystart * ystride,
+ ywidth, yend - ystart, ystride, &cm->rst_info,
+ tmp_buf->y_buffer + ystart * tmp_buf->y_stride,
+ tmp_buf->y_stride);
+ if (!y_only) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ loop_restoration_filter_highbd(
+ frame->u_buffer + uvstart * uvstride,
+ uvwidth, uvend - uvstart, uvstride, &cm->rst_info,
+ tmp_buf->u_buffer + uvstart * tmp_buf->uv_stride,
+ tmp_buf->uv_stride, cm->bit_depth);
+ loop_restoration_filter_highbd(
+ frame->v_buffer + uvstart * uvstride,
+ uvwidth, uvend - uvstart, uvstride, &cm->rst_info,
+ tmp_buf->v_buffer + uvstart * tmp_buf->uv_stride,
+ tmp_buf->uv_stride, cm->bit_depth);
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ loop_restoration_filter(
+ frame->u_buffer + uvstart * uvstride,
+ uvwidth, uvend - uvstart, uvstride, &cm->rst_info,
+ tmp_buf->u_buffer + uvstart * tmp_buf->uv_stride,
+ tmp_buf->uv_stride);
+ loop_restoration_filter(
+ frame->v_buffer + uvstart * uvstride,
+ uvwidth, uvend - uvstart, uvstride, &cm->rst_info,
+ tmp_buf->v_buffer + uvstart * tmp_buf->uv_stride,
+ tmp_buf->uv_stride);
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+}
+
+void vp10_loop_restoration_frame(YV12_BUFFER_CONFIG *frame,
+ VP10_COMMON *cm,
+ int restoration_level,
+ int y_only, int partial_frame) {
+ int start_mi_row, end_mi_row, mi_rows_to_filter;
+ vp10_loop_restoration_init(&cm->rst_info, restoration_level,
+ cm->frame_type == KEY_FRAME);
+ if (!cm->rst_info.restoration_used)
+ return;
+ start_mi_row = 0;
+ mi_rows_to_filter = cm->mi_rows;
+ if (partial_frame && cm->mi_rows > 8) {
+ start_mi_row = cm->mi_rows >> 1;
+ start_mi_row &= 0xfffffff8;
+ mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
+ }
+ end_mi_row = start_mi_row + mi_rows_to_filter;
+ vp10_loop_restoration_rows(frame, cm, start_mi_row, end_mi_row, y_only);
+}
diff --git a/vp10/common/restoration.h b/vp10/common/restoration.h
new file mode 100644
index 0000000..3859191
--- /dev/null
+++ b/vp10/common/restoration.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_RESTORATION_H_
+#define VP10_COMMON_RESTORATION_H_
+
+#include "vpx_ports/mem.h"
+#include "./vpx_config.h"
+
+#include "vp10/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RESTORATION_LEVEL_BITS_KF 4
+#define RESTORATION_LEVELS_KF (1 << RESTORATION_LEVEL_BITS_KF)
+#define RESTORATION_LEVEL_BITS 3
+#define RESTORATION_LEVELS (1 << RESTORATION_LEVEL_BITS)
+#define DEF_RESTORATION_LEVEL 2
+
+#define RESTORATION_PRECISION 16
+#define RESTORATION_HALFWIN 3
+#define RESTORATION_WIN (2 * RESTORATION_HALFWIN + 1)
+
+typedef struct restoration_params {
+ int sigma_x; // spatial variance x
+ int sigma_y; // spatial variance y
+ int sigma_r; // range variance
+} restoration_params_t;
+
+static restoration_params_t
+ restoration_level_to_params_arr[RESTORATION_LEVELS + 1] = {
+ // Values are rounded to 1/16 th precision
+ {0, 0, 0}, // 0 - default
+ {8, 9, 30},
+ {9, 8, 30},
+ {9, 11, 32},
+ {11, 9, 32},
+ {14, 14, 32},
+ {18, 18, 36},
+ {24, 24, 40},
+ {32, 32, 40},
+};
+
+static restoration_params_t
+ restoration_level_to_params_arr_kf[RESTORATION_LEVELS_KF + 1] = {
+ // Values are rounded to 1/16 th precision
+ {0, 0, 0}, // 0 - default
+ {8, 8, 30},
+ {9, 9, 32},
+ {10, 10, 32},
+ {12, 12, 32},
+ {14, 14, 32},
+ {18, 18, 36},
+ {24, 24, 40},
+ {30, 30, 44},
+ {36, 36, 48},
+ {42, 42, 48},
+ {48, 48, 48},
+ {48, 48, 56},
+ {56, 56, 48},
+ {56, 56, 56},
+ {56, 56, 64},
+ {64, 64, 48},
+};
+
+typedef struct {
+ double *wx_lut[RESTORATION_WIN];
+ double *wr_lut;
+ int restoration_sigma_x_set;
+ int restoration_sigma_y_set;
+ int restoration_sigma_r_set;
+ int restoration_used;
+} restoration_info_n;
+
+int vp10_restoration_level_bits(const struct VP10Common *const cm);
+int vp10_loop_restoration_used(int level, int kf);
+
+static INLINE restoration_params_t vp10_restoration_level_to_params(
+ int index, int kf) {
+ return kf ? restoration_level_to_params_arr_kf[index] :
+ restoration_level_to_params_arr[index];
+}
+
+void vp10_loop_restoration_init(restoration_info_n *rst, int T, int kf);
+void vp10_loop_restoration_frame(YV12_BUFFER_CONFIG *frame,
+ struct VP10Common *cm,
+ int restoration_level,
+ int y_only, int partial_frame);
+void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame,
+ struct VP10Common *cm,
+ int start_mi_row, int end_mi_row,
+ int y_only);
+void vp10_loop_restoration_precal();
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_RESTORATION_H_
diff --git a/vp10/common/scale.c b/vp10/common/scale.c
index ce6062c..65e14a9 100644
--- a/vp10/common/scale.c
+++ b/vp10/common/scale.c
@@ -46,15 +46,15 @@
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_setup_scale_factors_for_frame(struct scale_factors *sf,
- int other_w, int other_h,
- int this_w, int this_h,
- int use_highbd) {
+ int other_w, int other_h,
+ int this_w, int this_h,
+ int use_highbd) {
#else
void vp10_setup_scale_factors_for_frame(struct scale_factors *sf,
- int other_w, int other_h,
- int this_w, int this_h) {
+ int other_w, int other_h,
+ int this_w, int this_h) {
#endif
- if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
+ if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
sf->x_scale_fp = REF_INVALID_SCALE;
sf->y_scale_fp = REF_INVALID_SCALE;
return;
@@ -79,6 +79,16 @@
// applied in one direction only, and not at all for 0,0, seems to give the
// best quality, but it may be worth trying an additional mode that does
// do the filtering on full-pel.
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ sf->predict_ni[0][0][0] = vpx_convolve8_c;
+ sf->predict_ni[0][0][1] = vpx_convolve8_avg_c;
+ sf->predict_ni[0][1][0] = vpx_convolve8_c;
+ sf->predict_ni[0][1][1] = vpx_convolve8_avg_c;
+ sf->predict_ni[1][0][0] = vpx_convolve8_c;
+ sf->predict_ni[1][0][1] = vpx_convolve8_avg_c;
+ sf->predict_ni[1][1][0] = vpx_convolve8;
+ sf->predict_ni[1][1][1] = vpx_convolve8_avg;
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
if (sf->x_step_q4 == 16) {
if (sf->y_step_q4 == 16) {
// No scaling in either direction.
@@ -119,8 +129,19 @@
// 2D subpel motion always gets filtered in both directions
sf->predict[1][1][0] = vpx_convolve8;
sf->predict[1][1][1] = vpx_convolve8_avg;
+
#if CONFIG_VP9_HIGHBITDEPTH
if (use_highbd) {
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ sf->highbd_predict_ni[0][0][0] = vpx_highbd_convolve8_c;
+ sf->highbd_predict_ni[0][0][1] = vpx_highbd_convolve8_avg_c;
+ sf->highbd_predict_ni[0][1][0] = vpx_highbd_convolve8_c;
+ sf->highbd_predict_ni[0][1][1] = vpx_highbd_convolve8_avg_c;
+ sf->highbd_predict_ni[1][0][0] = vpx_highbd_convolve8_c;
+ sf->highbd_predict_ni[1][0][1] = vpx_highbd_convolve8_avg_c;
+ sf->highbd_predict_ni[1][1][0] = vpx_highbd_convolve8;
+ sf->highbd_predict_ni[1][1][1] = vpx_highbd_convolve8_avg;
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
if (sf->x_step_q4 == 16) {
if (sf->y_step_q4 == 16) {
// No scaling in either direction.
@@ -162,5 +183,5 @@
sf->highbd_predict[1][1][0] = vpx_highbd_convolve8;
sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg;
}
-#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
diff --git a/vp10/common/scale.h b/vp10/common/scale.h
index 833f6c4..604b9d2 100644
--- a/vp10/common/scale.h
+++ b/vp10/common/scale.h
@@ -34,7 +34,15 @@
convolve_fn_t predict[2][2][2]; // horiz, vert, avg
#if CONFIG_VP9_HIGHBITDEPTH
highbd_convolve_fn_t highbd_predict[2][2][2]; // horiz, vert, avg
-#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+// Functions for non-interpolating filters (those that filter zero offsets)
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ convolve_fn_t predict_ni[2][2][2]; // horiz, vert, avg
+#if CONFIG_VP9_HIGHBITDEPTH
+ highbd_convolve_fn_t highbd_predict_ni[2][2][2]; // horiz, vert, avg
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
};
MV32 vp10_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf);
@@ -48,7 +56,7 @@
void vp10_setup_scale_factors_for_frame(struct scale_factors *sf,
int other_w, int other_h,
int this_w, int this_h);
-#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
static INLINE int vp10_is_valid_scale(const struct scale_factors *sf) {
return sf->x_scale_fp != REF_INVALID_SCALE &&
diff --git a/vp10/common/scan.c b/vp10/common/scan.c
index 7217f6d..23a7b98 100644
--- a/vp10/common/scan.c
+++ b/vp10/common/scan.c
@@ -702,7 +702,228 @@
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
};
-const scan_order vp10_scan_orders[TX_SIZES][TX_TYPES] = {
+#if CONFIG_EXT_TX
+const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+ { // TX_4X4
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
+ {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ }, { // TX_8X8
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
+ {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ }, { // TX_16X16
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
+ {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ }, { // TX_32X32
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ }
+};
+
+const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES] = {
+ { // TX_4X4
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ }, { // TX_8X8
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ }, { // TX_16X16
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
+ }, { // TX_32X32
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ }
+};
+
+#else // CONFIG_EXT_TX
+
+const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ // TX_4X4
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
@@ -714,14 +935,21 @@
{col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}
}, { // TX_16X16
- {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors},
{row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
{col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}
+ {default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors}
}, { // TX_32X32
- {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
- {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
- {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
- {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors},
}
};
+#endif // CONFIG_EXT_TX
diff --git a/vp10/common/scan.h b/vp10/common/scan.h
index f5a020f..aadae40 100644
--- a/vp10/common/scan.h
+++ b/vp10/common/scan.h
@@ -30,7 +30,7 @@
} scan_order;
extern const scan_order vp10_default_scan_orders[TX_SIZES];
-extern const scan_order vp10_scan_orders[TX_SIZES][TX_TYPES];
+extern const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES];
static INLINE int get_coef_context(const int16_t *neighbors,
const uint8_t *token_cache, int c) {
@@ -38,8 +38,31 @@
token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
}
-static INLINE const scan_order *get_scan(TX_SIZE tx_size, TX_TYPE tx_type) {
- return &vp10_scan_orders[tx_size][tx_type];
+static INLINE const scan_order *get_intra_scan(TX_SIZE tx_size,
+ TX_TYPE tx_type) {
+ return &vp10_intra_scan_orders[tx_size][tx_type];
+}
+
+#if CONFIG_EXT_TX
+extern const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES];
+
+static INLINE const scan_order *get_inter_scan(TX_SIZE tx_size,
+ TX_TYPE tx_type) {
+ return &vp10_inter_scan_orders[tx_size][tx_type];
+}
+#endif // CONFIG_EXT_TX
+
+static INLINE const scan_order *get_scan(TX_SIZE tx_size,
+ TX_TYPE tx_type,
+ int is_inter) {
+#if CONFIG_EXT_TX
+ return
+ is_inter ? &vp10_inter_scan_orders[tx_size][tx_type] :
+ &vp10_intra_scan_orders[tx_size][tx_type];
+#else
+ (void) is_inter;
+ return &vp10_intra_scan_orders[tx_size][tx_type];
+#endif // CONFIG_EXT_TX
}
#ifdef __cplusplus
diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c
index 0c7a1c2..6e959ed 100644
--- a/vp10/common/thread_common.c
+++ b/vp10/common/thread_common.c
@@ -366,10 +366,45 @@
for (j = 0; j < SWITCHABLE_FILTERS; j++)
cm->counts.switchable_interp[i][j] += counts->switchable_interp[i][j];
+#if CONFIG_REF_MV
+ for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
+ for (j = 0; j < 2; ++j)
+ cm->counts.newmv_mode[i][j] += counts->newmv_mode[i][j];
+
+ for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i)
+ for (j = 0; j < 2; ++j)
+ cm->counts.zeromv_mode[i][j] += counts->zeromv_mode[i][j];
+
+ for (i = 0; i < REFMV_MODE_CONTEXTS; ++i)
+ for (j = 0; j < 2; ++j)
+ cm->counts.refmv_mode[i][j] += counts->refmv_mode[i][j];
+
+
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
+ for (j = 0; j < 2; ++j)
+ cm->counts.drl_mode0[i][j] += counts->drl_mode0[i][j];
+
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
+ for (j = 0; j < 2; ++j)
+ cm->counts.drl_mode1[i][j] += counts->drl_mode1[i][j];
+
+#if CONFIG_EXT_INTER
+ for (j = 0; j < 2; ++j)
+ cm->counts.new2mv_mode[j] += counts->new2mv_mode[j];
+#endif // CONFIG_EXT_INTER
+#endif
+
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
for (j = 0; j < INTER_MODES; j++)
cm->counts.inter_mode[i][j] += counts->inter_mode[i][j];
+#if CONFIG_EXT_INTER
+ for (i = 0; i < INTER_MODE_CONTEXTS; i++)
+ for (j = 0; j < INTER_COMPOUND_MODES; j++)
+ cm->counts.inter_compound_mode[i][j] +=
+ counts->inter_compound_mode[i][j];
+#endif // CONFIG_EXT_INTER
+
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
for (j = 0; j < 2; j++)
cm->counts.intra_inter[i][j] += counts->intra_inter[i][j];
@@ -379,13 +414,14 @@
cm->counts.comp_inter[i][j] += counts->comp_inter[i][j];
for (i = 0; i < REF_CONTEXTS; i++)
- for (j = 0; j < 2; j++)
+ for (j = 0; j < (SINGLE_REFS - 1); j++)
for (k = 0; k < 2; k++)
- cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k];
+ cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k];
for (i = 0; i < REF_CONTEXTS; i++)
- for (j = 0; j < 2; j++)
- cm->counts.comp_ref[i][j] += counts->comp_ref[i][j];
+ for (j = 0; j < (COMP_REFS - 1); j++)
+ for (k = 0; k < 2; k++)
+ cm->counts.comp_ref[i][j][k] += counts->comp_ref[i][j][k];
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
for (j = 0; j < TX_SIZES; j++)
@@ -401,6 +437,12 @@
for (i = 0; i < TX_SIZES; i++)
cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i];
+#if CONFIG_VAR_TX
+ for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
+ for (j = 0; j < 2; ++j)
+ cm->counts.txfm_partition[i][j] += counts->txfm_partition[i][j];
+#endif
+
for (i = 0; i < SKIP_CONTEXTS; i++)
for (j = 0; j < 2; j++)
cm->counts.skip[i][j] += counts->skip[i][j];
@@ -435,6 +477,26 @@
comps->fp[i] += comps_t->fp[i];
}
+#if CONFIG_EXT_TX
+ for (i = 0; i < EXT_TX_SIZES; i++) {
+ int s, k;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ if (use_inter_ext_tx_for_txsize[s][i]) {
+ for (k = 0; k < TX_TYPES; k++)
+ cm->counts.inter_ext_tx[s][i][k] += counts->inter_ext_tx[s][i][k];
+ }
+ }
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ if (use_intra_ext_tx_for_txsize[s][i]) {
+ int j;
+ for (j = 0; j < INTRA_MODES; ++j)
+ for (k = 0; k < TX_TYPES; k++)
+ cm->counts.intra_ext_tx[s][i][j][k] +=
+ counts->intra_ext_tx[s][i][j][k];
+ }
+ }
+ }
+#else
for (i = 0; i < EXT_TX_SIZES; i++) {
int j;
for (j = 0; j < TX_TYPES; ++j)
@@ -445,8 +507,17 @@
for (k = 0; k < TX_TYPES; k++)
cm->counts.inter_ext_tx[i][k] += counts->inter_ext_tx[i][k];
}
+#endif // CONFIG_EXT_TX
-#if CONFIG_MISC_FIXES
+#if CONFIG_SUPERTX
+ for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; i++)
+ for (j = 0; j < TX_SIZES; j++)
+ for (k = 0; k < 2; k++)
+ cm->counts.supertx[i][j][k] += counts->supertx[i][j][k];
+ for (i = 0; i < TX_SIZES; i++)
+ cm->counts.supertx_size[i] += counts->supertx_size[i];
+#endif // CONFIG_SUPERTX
+
for (i = 0; i < PREDICTION_PROBS; i++)
for (j = 0; j < 2; j++)
cm->counts.seg.pred[i][j] += counts->seg.pred[i][j];
@@ -455,5 +526,13 @@
cm->counts.seg.tree_total[i] += counts->seg.tree_total[i];
cm->counts.seg.tree_mispred[i] += counts->seg.tree_mispred[i];
}
-#endif
+
+#if CONFIG_EXT_INTRA
+ for (i = 0; i < PLANE_TYPES; ++i)
+ for (j = 0; j < 2; ++j)
+ cm->counts.ext_intra[i][j] += counts->ext_intra[i][j];
+ for (i = 0; i < INTRA_FILTERS + 1; ++i)
+ for (j = 0; j < INTRA_FILTERS; ++j)
+ cm->counts.intra_filter[i][j] += counts->intra_filter[i][j];
+#endif // CONFIG_EXT_INTRA
}
diff --git a/vp10/common/vp10_convolve.c b/vp10/common/vp10_convolve.c
new file mode 100644
index 0000000..e8c0c92
--- /dev/null
+++ b/vp10/common/vp10_convolve.c
@@ -0,0 +1,199 @@
+#include <assert.h>
+
+#include "vp10/common/filter.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_ports/mem.h"
+
+#define MAX_BLOCK_WIDTH (64)
+#define MAX_BLOCK_HEIGHT (64)
+#define MAX_STEP (32)
+#define MAX_FILTER_TAP (12)
+
+static void convolve_horiz(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x_q4, int x_step_q4, int avg) {
+ int x, y;
+ int filter_size = filter_params.tap;
+ src -= filter_size / 2 - 1;
+ for (y = 0; y < h; ++y) {
+ int x_q4 = subpel_x_q4;
+ for (x = 0; x < w; ++x) {
+ const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
+ const int16_t *x_filter =
+ vp10_get_interp_filter_kernel(filter_params, x_q4 & SUBPEL_MASK);
+ int k, sum = 0;
+ for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
+ if (avg) {
+ dst[x] = ROUND_POWER_OF_TWO(
+ dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
+ } else {
+ dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ x_q4 += x_step_q4;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+static void convolve_vert(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_y_q4, int y_step_q4, int avg) {
+ int x, y;
+ int filter_size = filter_params.tap;
+ src -= src_stride * (filter_size / 2 - 1);
+
+ for (x = 0; x < w; ++x) {
+ int y_q4 = subpel_y_q4;
+ for (y = 0; y < h; ++y) {
+ const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
+ const int16_t *y_filter =
+ vp10_get_interp_filter_kernel(filter_params, y_q4 & SUBPEL_MASK);
+ int k, sum = 0;
+ for (k = 0; k < filter_size; ++k)
+ sum += src_y[k * src_stride] * y_filter[k];
+ if (avg) {
+ dst[y * dst_stride] = ROUND_POWER_OF_TWO(
+ dst[y * dst_stride] +
+ clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
+ 1);
+ } else {
+ dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ y_q4 += y_step_q4;
+ }
+ ++src;
+ ++dst;
+ }
+}
+
+void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
+ int y_step_q4, int avg) {
+ int filter_size = filter_params.tap;
+
+ // temp's size is set to (maximum possible intermediate_height) *
+ // MAX_BLOCK_WIDTH
+ uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
+ MAX_FILTER_TAP) *
+ MAX_BLOCK_WIDTH];
+ int temp_stride = MAX_BLOCK_WIDTH;
+
+ int intermediate_height =
+ (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+
+ assert(w <= MAX_BLOCK_WIDTH);
+ assert(h <= MAX_BLOCK_HEIGHT);
+ assert(y_step_q4 <= MAX_STEP);
+ assert(x_step_q4 <= MAX_STEP);
+ assert(filter_params.tap <= MAX_FILTER_TAP);
+
+ convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
+ temp_stride, w, intermediate_height, filter_params,
+ subpel_x_q4, x_step_q4, 0);
+ convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
+ dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, avg);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_convolve_horiz(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x_q4, int x_step_q4, int avg,
+ int bd) {
+ int x, y;
+ int filter_size = filter_params.tap;
+ src -= filter_size / 2 - 1;
+ for (y = 0; y < h; ++y) {
+ int x_q4 = subpel_x_q4;
+ for (x = 0; x < w; ++x) {
+ const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
+ const int16_t *x_filter =
+ vp10_get_interp_filter_kernel(filter_params, x_q4 & SUBPEL_MASK);
+ int k, sum = 0;
+ for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
+ if (avg)
+ dst[x] = ROUND_POWER_OF_TWO(
+ dst[x] +
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
+ 1);
+ else
+ dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
+ x_q4 += x_step_q4;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+static void highbd_convolve_vert(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_y_q4, int y_step_q4, int avg,
+ int bd) {
+ int x, y;
+ int filter_size = filter_params.tap;
+ src -= src_stride * (filter_size / 2 - 1);
+
+ for (x = 0; x < w; ++x) {
+ int y_q4 = subpel_y_q4;
+ for (y = 0; y < h; ++y) {
+ const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
+ const int16_t *y_filter =
+ vp10_get_interp_filter_kernel(filter_params, y_q4 & SUBPEL_MASK);
+ int k, sum = 0;
+ for (k = 0; k < filter_size; ++k)
+ sum += src_y[k * src_stride] * y_filter[k];
+ if (avg) {
+ dst[y * dst_stride] = ROUND_POWER_OF_TWO(
+ dst[y * dst_stride] +
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
+ 1);
+ } else {
+ dst[y * dst_stride] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
+ }
+ y_q4 += y_step_q4;
+ }
+ ++src;
+ ++dst;
+ }
+}
+
+void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
+ int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x_q4, int x_step_q4,
+ const int subpel_y_q4, int y_step_q4, int avg,
+ int bd) {
+ int filter_size = filter_params.tap;
+
+ // temp's size is set to (maximum possible intermediate_height) *
+ // MAX_BLOCK_WIDTH
+ uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
+ MAX_FILTER_TAP) *
+ MAX_BLOCK_WIDTH];
+ int temp_stride = MAX_BLOCK_WIDTH;
+
+ int intermediate_height =
+ (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+
+ assert(w <= MAX_BLOCK_WIDTH);
+ assert(h <= MAX_BLOCK_HEIGHT);
+ assert(y_step_q4 <= MAX_STEP);
+ assert(x_step_q4 <= MAX_STEP);
+ assert(filter_params.tap <= MAX_FILTER_TAP);
+
+ highbd_convolve_horiz(
+ CONVERT_TO_SHORTPTR(src8 - src_stride * (filter_size / 2 - 1)),
+ src_stride, temp, temp_stride, w, intermediate_height, filter_params,
+ subpel_x_q4, x_step_q4, 0, bd);
+ highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
+ CONVERT_TO_SHORTPTR(dst8), dst_stride, w, h,
+ filter_params, subpel_y_q4, y_step_q4, avg, bd);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp10/common/vp10_convolve.h b/vp10/common/vp10_convolve.h
new file mode 100644
index 0000000..a3d6c65
--- /dev/null
+++ b/vp10/common/vp10_convolve.h
@@ -0,0 +1,31 @@
+#ifndef VP10_COMMON_VP10_CONVOLVE_H_
+#define VP10_COMMON_VP10_CONVOLVE_H_
+#include "vp10/common/filter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_convolve(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x,
+ const int subpel_y,
+ int xstep, int ystep, int avg);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_convolve(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x,
+ const int subpel_y,
+ int xstep, int ystep, int avg, int bd);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_VP10_CONVOLVE_H_
diff --git a/vp10/common/vp10_fwd_txfm1d.c b/vp10/common/vp10_fwd_txfm1d.c
new file mode 100644
index 0000000..f3da5c9
--- /dev/null
+++ b/vp10/common/vp10_fwd_txfm1d.c
@@ -0,0 +1,1531 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include "vp10/common/vp10_fwd_txfm1d.h"
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+#define range_check(stage, input, buf, size, bit) \
+ { \
+ int i, j; \
+ for (i = 0; i < size; ++i) { \
+ int buf_bit = get_max_bit(abs(buf[i])) + 1; \
+ if (buf_bit > bit) { \
+ printf("======== %s overflow ========\n", __func__); \
+ printf("stage: %d node: %d\n", stage, i); \
+ printf("bit: %d buf_bit: %d buf[i]: %d\n", bit, buf_bit, buf[i]); \
+ printf("input:\n"); \
+ for (j = 0; j < size; j++) { \
+ printf("%d,", input[j]); \
+ } \
+ printf("\n"); \
+ assert(0); \
+ } \
+ } \
+ }
+#else
+#define range_check(stage, input, buf, size, bit) \
+ { \
+ (void) stage; \
+ (void) input; \
+ (void) buf; \
+ (void) size; \
+ (void) bit; \
+ }
+#endif
+
+void vp10_fdct4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 4;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[4];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0] + input[3];
+ bf1[1] = input[1] + input[2];
+ bf1[2] = -input[2] + input[1];
+ bf1[3] = -input[3] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[2];
+ bf1[2] = bf0[1];
+ bf1[3] = bf0[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fdct8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 8;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[8];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0] + input[7];
+ bf1[1] = input[1] + input[6];
+ bf1[2] = input[2] + input[5];
+ bf1[3] = input[3] + input[4];
+ bf1[4] = -input[4] + input[3];
+ bf1[5] = -input[5] + input[2];
+ bf1[6] = -input[6] + input[1];
+ bf1[7] = -input[7] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = -bf0[2] + bf0[1];
+ bf1[3] = -bf0[3] + bf0[0];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = -bf0[5] + bf0[4];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[7] + bf0[6];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[4];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[6];
+ bf1[4] = bf0[1];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[3];
+ bf1[7] = bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fdct16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 16;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[16];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0] + input[15];
+ bf1[1] = input[1] + input[14];
+ bf1[2] = input[2] + input[13];
+ bf1[3] = input[3] + input[12];
+ bf1[4] = input[4] + input[11];
+ bf1[5] = input[5] + input[10];
+ bf1[6] = input[6] + input[9];
+ bf1[7] = input[7] + input[8];
+ bf1[8] = -input[8] + input[7];
+ bf1[9] = -input[9] + input[6];
+ bf1[10] = -input[10] + input[5];
+ bf1[11] = -input[11] + input[4];
+ bf1[12] = -input[12] + input[3];
+ bf1[13] = -input[13] + input[2];
+ bf1[14] = -input[14] + input[1];
+ bf1[15] = -input[15] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = -bf0[4] + bf0[3];
+ bf1[5] = -bf0[5] + bf0[2];
+ bf1[6] = -bf0[6] + bf0[1];
+ bf1[7] = -bf0[7] + bf0[0];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = -bf0[2] + bf0[1];
+ bf1[3] = -bf0[3] + bf0[0];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = -bf0[10] + bf0[9];
+ bf1[11] = -bf0[11] + bf0[8];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[14] + bf0[13];
+ bf1[15] = bf0[15] + bf0[12];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = -bf0[5] + bf0[4];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[7] + bf0[6];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = -bf0[9] + bf0[8];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[11] + bf0[10];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = -bf0[13] + bf0[12];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[15] + bf0[14];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[8];
+ bf1[2] = bf0[4];
+ bf1[3] = bf0[12];
+ bf1[4] = bf0[2];
+ bf1[5] = bf0[10];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[14];
+ bf1[8] = bf0[1];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[5];
+ bf1[11] = bf0[13];
+ bf1[12] = bf0[3];
+ bf1[13] = bf0[11];
+ bf1[14] = bf0[7];
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fdct32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 32;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[32];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0] + input[31];
+ bf1[1] = input[1] + input[30];
+ bf1[2] = input[2] + input[29];
+ bf1[3] = input[3] + input[28];
+ bf1[4] = input[4] + input[27];
+ bf1[5] = input[5] + input[26];
+ bf1[6] = input[6] + input[25];
+ bf1[7] = input[7] + input[24];
+ bf1[8] = input[8] + input[23];
+ bf1[9] = input[9] + input[22];
+ bf1[10] = input[10] + input[21];
+ bf1[11] = input[11] + input[20];
+ bf1[12] = input[12] + input[19];
+ bf1[13] = input[13] + input[18];
+ bf1[14] = input[14] + input[17];
+ bf1[15] = input[15] + input[16];
+ bf1[16] = -input[16] + input[15];
+ bf1[17] = -input[17] + input[14];
+ bf1[18] = -input[18] + input[13];
+ bf1[19] = -input[19] + input[12];
+ bf1[20] = -input[20] + input[11];
+ bf1[21] = -input[21] + input[10];
+ bf1[22] = -input[22] + input[9];
+ bf1[23] = -input[23] + input[8];
+ bf1[24] = -input[24] + input[7];
+ bf1[25] = -input[25] + input[6];
+ bf1[26] = -input[26] + input[5];
+ bf1[27] = -input[27] + input[4];
+ bf1[28] = -input[28] + input[3];
+ bf1[29] = -input[29] + input[2];
+ bf1[30] = -input[30] + input[1];
+ bf1[31] = -input[31] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = -bf0[8] + bf0[7];
+ bf1[9] = -bf0[9] + bf0[6];
+ bf1[10] = -bf0[10] + bf0[5];
+ bf1[11] = -bf0[11] + bf0[4];
+ bf1[12] = -bf0[12] + bf0[3];
+ bf1[13] = -bf0[13] + bf0[2];
+ bf1[14] = -bf0[14] + bf0[1];
+ bf1[15] = -bf0[15] + bf0[0];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = -bf0[4] + bf0[3];
+ bf1[5] = -bf0[5] + bf0[2];
+ bf1[6] = -bf0[6] + bf0[1];
+ bf1[7] = -bf0[7] + bf0[0];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[23];
+ bf1[17] = bf0[17] + bf0[22];
+ bf1[18] = bf0[18] + bf0[21];
+ bf1[19] = bf0[19] + bf0[20];
+ bf1[20] = -bf0[20] + bf0[19];
+ bf1[21] = -bf0[21] + bf0[18];
+ bf1[22] = -bf0[22] + bf0[17];
+ bf1[23] = -bf0[23] + bf0[16];
+ bf1[24] = -bf0[24] + bf0[31];
+ bf1[25] = -bf0[25] + bf0[30];
+ bf1[26] = -bf0[26] + bf0[29];
+ bf1[27] = -bf0[27] + bf0[28];
+ bf1[28] = bf0[28] + bf0[27];
+ bf1[29] = bf0[29] + bf0[26];
+ bf1[30] = bf0[30] + bf0[25];
+ bf1[31] = bf0[31] + bf0[24];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = -bf0[2] + bf0[1];
+ bf1[3] = -bf0[3] + bf0[0];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = -bf0[10] + bf0[9];
+ bf1[11] = -bf0[11] + bf0[8];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[14] + bf0[13];
+ bf1[15] = bf0[15] + bf0[12];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = -bf0[5] + bf0[4];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[7] + bf0[6];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[19];
+ bf1[17] = bf0[17] + bf0[18];
+ bf1[18] = -bf0[18] + bf0[17];
+ bf1[19] = -bf0[19] + bf0[16];
+ bf1[20] = -bf0[20] + bf0[23];
+ bf1[21] = -bf0[21] + bf0[22];
+ bf1[22] = bf0[22] + bf0[21];
+ bf1[23] = bf0[23] + bf0[20];
+ bf1[24] = bf0[24] + bf0[27];
+ bf1[25] = bf0[25] + bf0[26];
+ bf1[26] = -bf0[26] + bf0[25];
+ bf1[27] = -bf0[27] + bf0[24];
+ bf1[28] = -bf0[28] + bf0[31];
+ bf1[29] = -bf0[29] + bf0[30];
+ bf1[30] = bf0[30] + bf0[29];
+ bf1[31] = bf0[31] + bf0[28];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = -bf0[9] + bf0[8];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[11] + bf0[10];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = -bf0[13] + bf0[12];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[15] + bf0[14];
+ bf1[16] = bf0[16];
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
+ bf1[16] = bf0[16] + bf0[17];
+ bf1[17] = -bf0[17] + bf0[16];
+ bf1[18] = -bf0[18] + bf0[19];
+ bf1[19] = bf0[19] + bf0[18];
+ bf1[20] = bf0[20] + bf0[21];
+ bf1[21] = -bf0[21] + bf0[20];
+ bf1[22] = -bf0[22] + bf0[23];
+ bf1[23] = bf0[23] + bf0[22];
+ bf1[24] = bf0[24] + bf0[25];
+ bf1[25] = -bf0[25] + bf0[24];
+ bf1[26] = -bf0[26] + bf0[27];
+ bf1[27] = bf0[27] + bf0[26];
+ bf1[28] = bf0[28] + bf0[29];
+ bf1[29] = -bf0[29] + bf0[28];
+ bf1[30] = -bf0[30] + bf0[31];
+ bf1[31] = bf0[31] + bf0[30];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[16];
+ bf1[2] = bf0[8];
+ bf1[3] = bf0[24];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[20];
+ bf1[6] = bf0[12];
+ bf1[7] = bf0[28];
+ bf1[8] = bf0[2];
+ bf1[9] = bf0[18];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[26];
+ bf1[12] = bf0[6];
+ bf1[13] = bf0[22];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[30];
+ bf1[16] = bf0[1];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[9];
+ bf1[19] = bf0[25];
+ bf1[20] = bf0[5];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[13];
+ bf1[23] = bf0[29];
+ bf1[24] = bf0[3];
+ bf1[25] = bf0[19];
+ bf1[26] = bf0[11];
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[7];
+ bf1[29] = bf0[23];
+ bf1[30] = bf0[15];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 4;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[4];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[3];
+ bf1[1] = input[0];
+ bf1[2] = input[1];
+ bf1[3] = input[2];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[8], bf0[1], cospi[56], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[40], bf0[3], cospi[24], bf0[2], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = -bf0[2] + bf0[0];
+ bf1[3] = -bf0[3] + bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[2];
+ bf1[2] = bf0[3];
+ bf1[3] = -bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 8;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[8];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[7];
+ bf1[1] = input[0];
+ bf1[2] = input[5];
+ bf1[3] = input[2];
+ bf1[4] = input[3];
+ bf1[5] = input[4];
+ bf1[6] = input[1];
+ bf1[7] = input[6];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[4], bf0[1], cospi[60], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[20], bf0[3], cospi[44], bf0[2], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[36], bf0[5], cospi[28], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[52], bf0[7], cospi[12], bf0[6], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = -bf0[4] + bf0[0];
+ bf1[5] = -bf0[5] + bf0[1];
+ bf1[6] = -bf0[6] + bf0[2];
+ bf1[7] = -bf0[7] + bf0[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = -bf0[2] + bf0[0];
+ bf1[3] = -bf0[3] + bf0[1];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = -bf0[6] + bf0[4];
+ bf1[7] = -bf0[7] + bf0[5];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[4];
+ bf1[2] = bf0[6];
+ bf1[3] = -bf0[2];
+ bf1[4] = bf0[3];
+ bf1[5] = -bf0[7];
+ bf1[6] = bf0[5];
+ bf1[7] = -bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 16;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[16];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[15];
+ bf1[1] = input[0];
+ bf1[2] = input[13];
+ bf1[3] = input[2];
+ bf1[4] = input[11];
+ bf1[5] = input[4];
+ bf1[6] = input[9];
+ bf1[7] = input[6];
+ bf1[8] = input[7];
+ bf1[9] = input[8];
+ bf1[10] = input[5];
+ bf1[11] = input[10];
+ bf1[12] = input[3];
+ bf1[13] = input[12];
+ bf1[14] = input[1];
+ bf1[15] = input[14];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[2], bf0[1], cospi[62], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[10], bf0[3], cospi[54], bf0[2], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[18], bf0[5], cospi[46], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[26], bf0[7], cospi[38], bf0[6], cos_bit[stage]);
+ bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[34], bf0[9], cospi[30], bf0[8], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[42], bf0[11], cospi[22], bf0[10], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[50], bf0[13], cospi[14], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(-cospi[58], bf0[15], cospi[6], bf0[14], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[8];
+ bf1[1] = bf0[1] + bf0[9];
+ bf1[2] = bf0[2] + bf0[10];
+ bf1[3] = bf0[3] + bf0[11];
+ bf1[4] = bf0[4] + bf0[12];
+ bf1[5] = bf0[5] + bf0[13];
+ bf1[6] = bf0[6] + bf0[14];
+ bf1[7] = bf0[7] + bf0[15];
+ bf1[8] = -bf0[8] + bf0[0];
+ bf1[9] = -bf0[9] + bf0[1];
+ bf1[10] = -bf0[10] + bf0[2];
+ bf1[11] = -bf0[11] + bf0[3];
+ bf1[12] = -bf0[12] + bf0[4];
+ bf1[13] = -bf0[13] + bf0[5];
+ bf1[14] = -bf0[14] + bf0[6];
+ bf1[15] = -bf0[15] + bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
+ bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = -bf0[4] + bf0[0];
+ bf1[5] = -bf0[5] + bf0[1];
+ bf1[6] = -bf0[6] + bf0[2];
+ bf1[7] = -bf0[7] + bf0[3];
+ bf1[8] = bf0[8] + bf0[12];
+ bf1[9] = bf0[9] + bf0[13];
+ bf1[10] = bf0[10] + bf0[14];
+ bf1[11] = bf0[11] + bf0[15];
+ bf1[12] = -bf0[12] + bf0[8];
+ bf1[13] = -bf0[13] + bf0[9];
+ bf1[14] = -bf0[14] + bf0[10];
+ bf1[15] = -bf0[15] + bf0[11];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = -bf0[2] + bf0[0];
+ bf1[3] = -bf0[3] + bf0[1];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = -bf0[6] + bf0[4];
+ bf1[7] = -bf0[7] + bf0[5];
+ bf1[8] = bf0[8] + bf0[10];
+ bf1[9] = bf0[9] + bf0[11];
+ bf1[10] = -bf0[10] + bf0[8];
+ bf1[11] = -bf0[11] + bf0[9];
+ bf1[12] = bf0[12] + bf0[14];
+ bf1[13] = bf0[13] + bf0[15];
+ bf1[14] = -bf0[14] + bf0[12];
+ bf1[15] = -bf0[15] + bf0[13];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[8];
+ bf1[2] = bf0[12];
+ bf1[3] = -bf0[4];
+ bf1[4] = bf0[6];
+ bf1[5] = -bf0[14];
+ bf1[6] = bf0[10];
+ bf1[7] = -bf0[2];
+ bf1[8] = bf0[3];
+ bf1[9] = -bf0[11];
+ bf1[10] = bf0[15];
+ bf1[11] = -bf0[7];
+ bf1[12] = bf0[5];
+ bf1[13] = -bf0[13];
+ bf1[14] = bf0[9];
+ bf1[15] = -bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+}
+
+void vp10_fadst32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 32;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[32];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[31];
+ bf1[1] = input[0];
+ bf1[2] = input[29];
+ bf1[3] = input[2];
+ bf1[4] = input[27];
+ bf1[5] = input[4];
+ bf1[6] = input[25];
+ bf1[7] = input[6];
+ bf1[8] = input[23];
+ bf1[9] = input[8];
+ bf1[10] = input[21];
+ bf1[11] = input[10];
+ bf1[12] = input[19];
+ bf1[13] = input[12];
+ bf1[14] = input[17];
+ bf1[15] = input[14];
+ bf1[16] = input[15];
+ bf1[17] = input[16];
+ bf1[18] = input[13];
+ bf1[19] = input[18];
+ bf1[20] = input[11];
+ bf1[21] = input[20];
+ bf1[22] = input[9];
+ bf1[23] = input[22];
+ bf1[24] = input[7];
+ bf1[25] = input[24];
+ bf1[26] = input[5];
+ bf1[27] = input[26];
+ bf1[28] = input[3];
+ bf1[29] = input[28];
+ bf1[30] = input[1];
+ bf1[31] = input[30];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[1], bf0[1], cospi[63], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[5], bf0[3], cospi[59], bf0[2], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[9], bf0[5], cospi[55], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[13], bf0[7], cospi[51], bf0[6], cos_bit[stage]);
+ bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[17], bf0[9], cospi[47], bf0[8], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[21], bf0[11], cospi[43], bf0[10], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[25], bf0[13], cospi[39], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(-cospi[29], bf0[15], cospi[35], bf0[14], cos_bit[stage]);
+ bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]);
+ bf1[17] = half_btf(-cospi[33], bf0[17], cospi[31], bf0[16], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[37], bf0[19], cospi[27], bf0[18], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[41], bf0[21], cospi[23], bf0[20], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[45], bf0[23], cospi[19], bf0[22], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(-cospi[49], bf0[25], cospi[15], bf0[24], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[53], bf0[27], cospi[11], bf0[26], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(-cospi[57], bf0[29], cospi[7], bf0[28], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(-cospi[61], bf0[31], cospi[3], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[16];
+ bf1[1] = bf0[1] + bf0[17];
+ bf1[2] = bf0[2] + bf0[18];
+ bf1[3] = bf0[3] + bf0[19];
+ bf1[4] = bf0[4] + bf0[20];
+ bf1[5] = bf0[5] + bf0[21];
+ bf1[6] = bf0[6] + bf0[22];
+ bf1[7] = bf0[7] + bf0[23];
+ bf1[8] = bf0[8] + bf0[24];
+ bf1[9] = bf0[9] + bf0[25];
+ bf1[10] = bf0[10] + bf0[26];
+ bf1[11] = bf0[11] + bf0[27];
+ bf1[12] = bf0[12] + bf0[28];
+ bf1[13] = bf0[13] + bf0[29];
+ bf1[14] = bf0[14] + bf0[30];
+ bf1[15] = bf0[15] + bf0[31];
+ bf1[16] = -bf0[16] + bf0[0];
+ bf1[17] = -bf0[17] + bf0[1];
+ bf1[18] = -bf0[18] + bf0[2];
+ bf1[19] = -bf0[19] + bf0[3];
+ bf1[20] = -bf0[20] + bf0[4];
+ bf1[21] = -bf0[21] + bf0[5];
+ bf1[22] = -bf0[22] + bf0[6];
+ bf1[23] = -bf0[23] + bf0[7];
+ bf1[24] = -bf0[24] + bf0[8];
+ bf1[25] = -bf0[25] + bf0[9];
+ bf1[26] = -bf0[26] + bf0[10];
+ bf1[27] = -bf0[27] + bf0[11];
+ bf1[28] = -bf0[28] + bf0[12];
+ bf1[29] = -bf0[29] + bf0[13];
+ bf1[30] = -bf0[30] + bf0[14];
+ bf1[31] = -bf0[31] + bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]);
+ bf1[17] = half_btf(-cospi[4], bf0[17], cospi[60], bf0[16], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[20], bf0[19], cospi[44], bf0[18], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[36], bf0[21], cospi[28], bf0[20], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[52], bf0[23], cospi[12], bf0[22], cos_bit[stage]);
+ bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[60], bf0[25], cospi[4], bf0[24], cos_bit[stage]);
+ bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[44], bf0[27], cospi[20], bf0[26], cos_bit[stage]);
+ bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[28], bf0[29], cospi[36], bf0[28], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[12], bf0[31], cospi[52], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[8];
+ bf1[1] = bf0[1] + bf0[9];
+ bf1[2] = bf0[2] + bf0[10];
+ bf1[3] = bf0[3] + bf0[11];
+ bf1[4] = bf0[4] + bf0[12];
+ bf1[5] = bf0[5] + bf0[13];
+ bf1[6] = bf0[6] + bf0[14];
+ bf1[7] = bf0[7] + bf0[15];
+ bf1[8] = -bf0[8] + bf0[0];
+ bf1[9] = -bf0[9] + bf0[1];
+ bf1[10] = -bf0[10] + bf0[2];
+ bf1[11] = -bf0[11] + bf0[3];
+ bf1[12] = -bf0[12] + bf0[4];
+ bf1[13] = -bf0[13] + bf0[5];
+ bf1[14] = -bf0[14] + bf0[6];
+ bf1[15] = -bf0[15] + bf0[7];
+ bf1[16] = bf0[16] + bf0[24];
+ bf1[17] = bf0[17] + bf0[25];
+ bf1[18] = bf0[18] + bf0[26];
+ bf1[19] = bf0[19] + bf0[27];
+ bf1[20] = bf0[20] + bf0[28];
+ bf1[21] = bf0[21] + bf0[29];
+ bf1[22] = bf0[22] + bf0[30];
+ bf1[23] = bf0[23] + bf0[31];
+ bf1[24] = -bf0[24] + bf0[16];
+ bf1[25] = -bf0[25] + bf0[17];
+ bf1[26] = -bf0[26] + bf0[18];
+ bf1[27] = -bf0[27] + bf0[19];
+ bf1[28] = -bf0[28] + bf0[20];
+ bf1[29] = -bf0[29] + bf0[21];
+ bf1[30] = -bf0[30] + bf0[22];
+ bf1[31] = -bf0[31] + bf0[23];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
+ bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(-cospi[8], bf0[25], cospi[56], bf0[24], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[40], bf0[27], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[56], bf0[29], cospi[8], bf0[28], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[24], bf0[31], cospi[40], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = -bf0[4] + bf0[0];
+ bf1[5] = -bf0[5] + bf0[1];
+ bf1[6] = -bf0[6] + bf0[2];
+ bf1[7] = -bf0[7] + bf0[3];
+ bf1[8] = bf0[8] + bf0[12];
+ bf1[9] = bf0[9] + bf0[13];
+ bf1[10] = bf0[10] + bf0[14];
+ bf1[11] = bf0[11] + bf0[15];
+ bf1[12] = -bf0[12] + bf0[8];
+ bf1[13] = -bf0[13] + bf0[9];
+ bf1[14] = -bf0[14] + bf0[10];
+ bf1[15] = -bf0[15] + bf0[11];
+ bf1[16] = bf0[16] + bf0[20];
+ bf1[17] = bf0[17] + bf0[21];
+ bf1[18] = bf0[18] + bf0[22];
+ bf1[19] = bf0[19] + bf0[23];
+ bf1[20] = -bf0[20] + bf0[16];
+ bf1[21] = -bf0[21] + bf0[17];
+ bf1[22] = -bf0[22] + bf0[18];
+ bf1[23] = -bf0[23] + bf0[19];
+ bf1[24] = bf0[24] + bf0[28];
+ bf1[25] = bf0[25] + bf0[29];
+ bf1[26] = bf0[26] + bf0[30];
+ bf1[27] = bf0[27] + bf0[31];
+ bf1[28] = -bf0[28] + bf0[24];
+ bf1[29] = -bf0[29] + bf0[25];
+ bf1[30] = -bf0[30] + bf0[26];
+ bf1[31] = -bf0[31] + bf0[27];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[20], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[48], bf0[23], cospi[16], bf0[22], cos_bit[stage]);
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = bf0[26];
+ bf1[27] = bf0[27];
+ bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(-cospi[16], bf0[29], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[48], bf0[31], cospi[16], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = -bf0[2] + bf0[0];
+ bf1[3] = -bf0[3] + bf0[1];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = -bf0[6] + bf0[4];
+ bf1[7] = -bf0[7] + bf0[5];
+ bf1[8] = bf0[8] + bf0[10];
+ bf1[9] = bf0[9] + bf0[11];
+ bf1[10] = -bf0[10] + bf0[8];
+ bf1[11] = -bf0[11] + bf0[9];
+ bf1[12] = bf0[12] + bf0[14];
+ bf1[13] = bf0[13] + bf0[15];
+ bf1[14] = -bf0[14] + bf0[12];
+ bf1[15] = -bf0[15] + bf0[13];
+ bf1[16] = bf0[16] + bf0[18];
+ bf1[17] = bf0[17] + bf0[19];
+ bf1[18] = -bf0[18] + bf0[16];
+ bf1[19] = -bf0[19] + bf0[17];
+ bf1[20] = bf0[20] + bf0[22];
+ bf1[21] = bf0[21] + bf0[23];
+ bf1[22] = -bf0[22] + bf0[20];
+ bf1[23] = -bf0[23] + bf0[21];
+ bf1[24] = bf0[24] + bf0[26];
+ bf1[25] = bf0[25] + bf0[27];
+ bf1[26] = -bf0[26] + bf0[24];
+ bf1[27] = -bf0[27] + bf0[25];
+ bf1[28] = bf0[28] + bf0[30];
+ bf1[29] = bf0[29] + bf0[31];
+ bf1[30] = -bf0[30] + bf0[28];
+ bf1[31] = -bf0[31] + bf0[29];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 10
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[32], bf0[19], cospi[32], bf0[18], cos_bit[stage]);
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[22], cos_bit[stage]);
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[32], bf0[27], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(-cospi[32], bf0[31], cospi[32], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 11
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[16];
+ bf1[2] = bf0[24];
+ bf1[3] = -bf0[8];
+ bf1[4] = bf0[12];
+ bf1[5] = -bf0[28];
+ bf1[6] = bf0[20];
+ bf1[7] = -bf0[4];
+ bf1[8] = bf0[6];
+ bf1[9] = -bf0[22];
+ bf1[10] = bf0[30];
+ bf1[11] = -bf0[14];
+ bf1[12] = bf0[10];
+ bf1[13] = -bf0[26];
+ bf1[14] = bf0[18];
+ bf1[15] = -bf0[2];
+ bf1[16] = bf0[3];
+ bf1[17] = -bf0[19];
+ bf1[18] = bf0[27];
+ bf1[19] = -bf0[11];
+ bf1[20] = bf0[15];
+ bf1[21] = -bf0[31];
+ bf1[22] = bf0[23];
+ bf1[23] = -bf0[7];
+ bf1[24] = bf0[5];
+ bf1[25] = -bf0[21];
+ bf1[26] = bf0[29];
+ bf1[27] = -bf0[13];
+ bf1[28] = bf0[9];
+ bf1[29] = -bf0[25];
+ bf1[30] = bf0[17];
+ bf1[31] = -bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
diff --git a/vp10/common/vp10_fwd_txfm1d.h b/vp10/common/vp10_fwd_txfm1d.h
new file mode 100644
index 0000000..d5b9f40
--- /dev/null
+++ b/vp10/common/vp10_fwd_txfm1d.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_FWD_TXFM1D_H_
+#define VP10_FWD_TXFM1D_H_
+
+#include "vp10/common/vp10_txfm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_fdct4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_fadst4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // VP10_FWD_TXFM1D_H_
diff --git a/vp10/common/vp10_fwd_txfm2d.c b/vp10/common/vp10_fwd_txfm2d.c
new file mode 100644
index 0000000..67449ec
--- /dev/null
+++ b/vp10/common/vp10_fwd_txfm2d.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp10/common/vp10_txfm.h"
+
+static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ int32_t *txfm_buf) {
+ int i, j;
+ const int txfm_size = cfg->txfm_size;
+ const int8_t *shift = cfg->shift;
+ const int8_t *stage_range_col = cfg->stage_range_col;
+ const int8_t *stage_range_row = cfg->stage_range_row;
+ const int8_t *cos_bit_col = cfg->cos_bit_col;
+ const int8_t *cos_bit_row = cfg->cos_bit_row;
+ const TxfmFunc txfm_func_col = cfg->txfm_func_col;
+ const TxfmFunc txfm_func_row = cfg->txfm_func_row;
+
+ // txfm_buf's length is txfm_size * txfm_size + 2 * txfm_size
+ // it is used for intermediate data buffering
+ int32_t *temp_in = txfm_buf;
+ int32_t *temp_out = temp_in + txfm_size;
+ int32_t *buf = temp_out + txfm_size;
+
+ // Columns
+ for (i = 0; i < txfm_size; ++i) {
+ for (j = 0; j < txfm_size; ++j)
+ temp_in[j] = input[j * stride + i];
+ round_shift_array(temp_in, txfm_size, -shift[0]);
+ txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
+ round_shift_array(temp_out, txfm_size, -shift[1]);
+ for (j = 0; j < txfm_size; ++j)
+ buf[j * txfm_size + i] = temp_out[j];
+ }
+
+ // Rows
+ for (i = 0; i < txfm_size; ++i) {
+ for (j = 0; j < txfm_size; ++j)
+ temp_in[j] = buf[j + i * txfm_size];
+ txfm_func_row(temp_in, temp_out, cos_bit_row, stage_range_row);
+ round_shift_array(temp_out, txfm_size, -shift[2]);
+ for (j = 0; j < txfm_size; ++j)
+ output[j + i * txfm_size] = (int32_t)temp_out[j];
+ }
+}
+
+void vp10_fwd_txfm2d_4x4(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[4 * 4 + 4 + 4];
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_8x8(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[8 * 8 + 8 + 8];
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_16x16(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[16 * 16 + 16 + 16];
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_32x32(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[32 * 32 + 32 + 32];
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+}
diff --git a/vp10/common/vp10_fwd_txfm2d.h b/vp10/common/vp10_fwd_txfm2d.h
new file mode 100644
index 0000000..64e6f56
--- /dev/null
+++ b/vp10/common/vp10_fwd_txfm2d.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_FWD_TXFM2D_H_
+#define VP10_FWD_TXFM2D_H_
+
+#include "vp10/common/vp10_txfm.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+void vp10_fwd_txfm2d_4x4(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd);
+void vp10_fwd_txfm2d_8x8(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd);
+void vp10_fwd_txfm2d_16x16(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd);
+void vp10_fwd_txfm2d_32x32(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd);
+#ifdef __cplusplus
+}
+#endif
+#endif // VP10_FWD_TXFM2D_H_
diff --git a/vp10/common/vp10_fwd_txfm2d_cfg.h b/vp10/common/vp10_fwd_txfm2d_cfg.h
new file mode 100644
index 0000000..5c2b4ca
--- /dev/null
+++ b/vp10/common/vp10_fwd_txfm2d_cfg.h
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_FWD_TXFM2D_CFG_H_
+#define VP10_FWD_TXFM2D_CFG_H_
+#include "vp10/common/vp10_fwd_txfm1d.h"
+// ---------------- config fwd_dct_dct_4 ----------------
+static const int8_t fwd_shift_dct_dct_4[3] = {4, 0, -2};
+static const int8_t fwd_stage_range_col_dct_dct_4[4] = {15, 16, 17, 17};
+static const int8_t fwd_stage_range_row_dct_dct_4[4] = {17, 18, 18, 18};
+static const int8_t fwd_cos_bit_col_dct_dct_4[4] = {15, 15, 15, 15};
+static const int8_t fwd_cos_bit_row_dct_dct_4[4] = {15, 14, 14, 14};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_4 = {
+ 4, // .txfm_size
+ 4, // .stage_num_col
+ 4, // .stage_num_row
+ fwd_shift_dct_dct_4, // .shift
+ fwd_stage_range_col_dct_dct_4, // .stage_range_col
+ fwd_stage_range_row_dct_dct_4, // .stage_range_row
+ fwd_cos_bit_col_dct_dct_4, // .cos_bit_col
+ fwd_cos_bit_row_dct_dct_4, // .cos_bit_row
+ vp10_fdct4_new, // .txfm_func_col
+ vp10_fdct4_new}; // .txfm_func_row;
+
+// ---------------- config fwd_dct_dct_8 ----------------
+static const int8_t fwd_shift_dct_dct_8[3] = {5, -3, -1};
+static const int8_t fwd_stage_range_col_dct_dct_8[6] = {16, 17, 18, 19, 19, 19};
+static const int8_t fwd_stage_range_row_dct_dct_8[6] = {16, 17, 18, 18, 18, 18};
+static const int8_t fwd_cos_bit_col_dct_dct_8[6] = {15, 15, 14, 13, 13, 13};
+static const int8_t fwd_cos_bit_row_dct_dct_8[6] = {15, 15, 14, 14, 14, 14};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_8 = {
+ 8, // .txfm_size
+ 6, // .stage_num_col
+ 6, // .stage_num_row
+ fwd_shift_dct_dct_8, // .shift
+ fwd_stage_range_col_dct_dct_8, // .stage_range_col
+ fwd_stage_range_row_dct_dct_8, // .stage_range_row
+ fwd_cos_bit_col_dct_dct_8, // .cos_bit_col
+ fwd_cos_bit_row_dct_dct_8, // .cos_bit_row
+ vp10_fdct8_new, // .txfm_func_col
+ vp10_fdct8_new}; // .txfm_func_row;
+
+// ---------------- config fwd_dct_dct_16 ----------------
+static const int8_t fwd_shift_dct_dct_16[3] = {4, -3, -1};
+static const int8_t fwd_stage_range_col_dct_dct_16[8] = {15, 16, 17, 18,
+ 19, 19, 19, 19};
+static const int8_t fwd_stage_range_row_dct_dct_16[8] = {16, 17, 18, 19,
+ 19, 19, 19, 19};
+static const int8_t fwd_cos_bit_col_dct_dct_16[8] = {15, 15, 15, 14,
+ 13, 13, 13, 13};
+static const int8_t fwd_cos_bit_row_dct_dct_16[8] = {15, 15, 14, 13,
+ 13, 13, 13, 13};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_16 = {
+ 16, // .txfm_size
+ 8, // .stage_num_col
+ 8, // .stage_num_row
+ fwd_shift_dct_dct_16, // .shift
+ fwd_stage_range_col_dct_dct_16, // .stage_range_col
+ fwd_stage_range_row_dct_dct_16, // .stage_range_row
+ fwd_cos_bit_col_dct_dct_16, // .cos_bit_col
+ fwd_cos_bit_row_dct_dct_16, // .cos_bit_row
+ vp10_fdct16_new, // .txfm_func_col
+ vp10_fdct16_new}; // .txfm_func_row;
+
+// ---------------- config fwd_dct_dct_32 ----------------
+static const int8_t fwd_shift_dct_dct_32[3] = {3, -3, -1};
+static const int8_t fwd_stage_range_col_dct_dct_32[10] = {14, 15, 16, 17, 18,
+ 19, 19, 19, 19, 19};
+static const int8_t fwd_stage_range_row_dct_dct_32[10] = {16, 17, 18, 19, 20,
+ 20, 20, 20, 20, 20};
+static const int8_t fwd_cos_bit_col_dct_dct_32[10] = {15, 15, 15, 15, 14,
+ 13, 13, 13, 13, 13};
+static const int8_t fwd_cos_bit_row_dct_dct_32[10] = {15, 15, 14, 13, 12,
+ 12, 12, 12, 12, 12};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_32 = {
+ 32, // .txfm_size
+ 10, // .stage_num_col
+ 10, // .stage_num_row
+ fwd_shift_dct_dct_32, // .shift
+ fwd_stage_range_col_dct_dct_32, // .stage_range_col
+ fwd_stage_range_row_dct_dct_32, // .stage_range_row
+ fwd_cos_bit_col_dct_dct_32, // .cos_bit_col
+ fwd_cos_bit_row_dct_dct_32, // .cos_bit_row
+ vp10_fdct32_new, // .txfm_func_col
+ vp10_fdct32_new}; // .txfm_func_row;
+
+// ---------------- config fwd_dct_adst_4 ----------------
+static const int8_t fwd_shift_dct_adst_4[3] = {5, -2, -1};
+static const int8_t fwd_stage_range_col_dct_adst_4[4] = {16, 17, 18, 18};
+static const int8_t fwd_stage_range_row_dct_adst_4[6] = {16, 16, 16,
+ 17, 17, 17};
+static const int8_t fwd_cos_bit_col_dct_adst_4[4] = {15, 15, 14, 14};
+static const int8_t fwd_cos_bit_row_dct_adst_4[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_4 = {
+ 4, // .txfm_size
+ 4, // .stage_num_col
+ 6, // .stage_num_row
+ fwd_shift_dct_adst_4, // .shift
+ fwd_stage_range_col_dct_adst_4, // .stage_range_col
+ fwd_stage_range_row_dct_adst_4, // .stage_range_row
+ fwd_cos_bit_col_dct_adst_4, // .cos_bit_col
+ fwd_cos_bit_row_dct_adst_4, // .cos_bit_row
+ vp10_fdct4_new, // .txfm_func_col
+ vp10_fadst4_new}; // .txfm_func_row;
+
+// ---------------- config fwd_dct_adst_8 ----------------
+static const int8_t fwd_shift_dct_adst_8[3] = {7, -3, -3};
+static const int8_t fwd_stage_range_col_dct_adst_8[6] = {18, 19, 20,
+ 21, 21, 21};
+static const int8_t fwd_stage_range_row_dct_adst_8[8] = {18, 18, 18, 19,
+ 19, 20, 20, 20};
+static const int8_t fwd_cos_bit_col_dct_adst_8[6] = {14, 13, 12, 11, 11, 11};
+static const int8_t fwd_cos_bit_row_dct_adst_8[8] = {14, 14, 14, 13,
+ 13, 12, 12, 12};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_8 = {
+ 8, // .txfm_size
+ 6, // .stage_num_col
+ 8, // .stage_num_row
+ fwd_shift_dct_adst_8, // .shift
+ fwd_stage_range_col_dct_adst_8, // .stage_range_col
+ fwd_stage_range_row_dct_adst_8, // .stage_range_row
+ fwd_cos_bit_col_dct_adst_8, // .cos_bit_col
+ fwd_cos_bit_row_dct_adst_8, // .cos_bit_row
+ vp10_fdct8_new, // .txfm_func_col
+ vp10_fadst8_new}; // .txfm_func_row;
+
+// ---------------- config fwd_dct_adst_16 ----------------
+static const int8_t fwd_shift_dct_adst_16[3] = {4, -1, -3};
+static const int8_t fwd_stage_range_col_dct_adst_16[8] = {15, 16, 17, 18,
+ 19, 19, 19, 19};
+static const int8_t fwd_stage_range_row_dct_adst_16[10] = {18, 18, 18, 19, 19,
+ 20, 20, 21, 21, 21};
+static const int8_t fwd_cos_bit_col_dct_adst_16[8] = {15, 15, 15, 14,
+ 13, 13, 13, 13};
+static const int8_t fwd_cos_bit_row_dct_adst_16[10] = {14, 14, 14, 13, 13,
+ 12, 12, 11, 11, 11};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_16 = {
+ 16, // .txfm_size
+ 8, // .stage_num_col
+ 10, // .stage_num_row
+ fwd_shift_dct_adst_16, // .shift
+ fwd_stage_range_col_dct_adst_16, // .stage_range_col
+ fwd_stage_range_row_dct_adst_16, // .stage_range_row
+ fwd_cos_bit_col_dct_adst_16, // .cos_bit_col
+ fwd_cos_bit_row_dct_adst_16, // .cos_bit_row
+ vp10_fdct16_new, // .txfm_func_col
+ vp10_fadst16_new}; // .txfm_func_row;
+
+// ---------------- config fwd_dct_adst_32 ----------------
+static const int8_t fwd_shift_dct_adst_32[3] = {3, -1, -3};
+static const int8_t fwd_stage_range_col_dct_adst_32[10] = {14, 15, 16, 17, 18,
+ 19, 19, 19, 19, 19};
+static const int8_t fwd_stage_range_row_dct_adst_32[12] = {
+ 18, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 22};
+static const int8_t fwd_cos_bit_col_dct_adst_32[10] = {15, 15, 15, 15, 14,
+ 13, 13, 13, 13, 13};
+static const int8_t fwd_cos_bit_row_dct_adst_32[12] = {14, 14, 14, 13, 13, 12,
+ 12, 11, 11, 10, 10, 10};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_32 = {
+ 32, // .txfm_size
+ 10, // .stage_num_col
+ 12, // .stage_num_row
+ fwd_shift_dct_adst_32, // .shift
+ fwd_stage_range_col_dct_adst_32, // .stage_range_col
+ fwd_stage_range_row_dct_adst_32, // .stage_range_row
+ fwd_cos_bit_col_dct_adst_32, // .cos_bit_col
+ fwd_cos_bit_row_dct_adst_32, // .cos_bit_row
+ vp10_fdct32_new, // .txfm_func_col
+ vp10_fadst32_new}; // .txfm_func_row;
+
+// ---------------- config fwd_adst_adst_4 ----------------
+static const int8_t fwd_shift_adst_adst_4[3] = {6, 1, -5};
+static const int8_t fwd_stage_range_col_adst_adst_4[6] = {17, 17, 18,
+ 19, 19, 19};
+static const int8_t fwd_stage_range_row_adst_adst_4[6] = {20, 20, 20,
+ 21, 21, 21};
+static const int8_t fwd_cos_bit_col_adst_adst_4[6] = {15, 15, 14, 13, 13, 13};
+static const int8_t fwd_cos_bit_row_adst_adst_4[6] = {12, 12, 12, 11, 11, 11};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_4 = {
+ 4, // .txfm_size
+ 6, // .stage_num_col
+ 6, // .stage_num_row
+ fwd_shift_adst_adst_4, // .shift
+ fwd_stage_range_col_adst_adst_4, // .stage_range_col
+ fwd_stage_range_row_adst_adst_4, // .stage_range_row
+ fwd_cos_bit_col_adst_adst_4, // .cos_bit_col
+ fwd_cos_bit_row_adst_adst_4, // .cos_bit_row
+ vp10_fadst4_new, // .txfm_func_col
+ vp10_fadst4_new}; // .txfm_func_row;
+
+// ---------------- config fwd_adst_adst_8 ----------------
+static const int8_t fwd_shift_adst_adst_8[3] = {3, -1, -1};
+static const int8_t fwd_stage_range_col_adst_adst_8[8] = {14, 14, 15, 16,
+ 16, 17, 17, 17};
+static const int8_t fwd_stage_range_row_adst_adst_8[8] = {16, 16, 16, 17,
+ 17, 18, 18, 18};
+static const int8_t fwd_cos_bit_col_adst_adst_8[8] = {15, 15, 15, 15,
+ 15, 15, 15, 15};
+static const int8_t fwd_cos_bit_row_adst_adst_8[8] = {15, 15, 15, 15,
+ 15, 14, 14, 14};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_8 = {
+ 8, // .txfm_size
+ 8, // .stage_num_col
+ 8, // .stage_num_row
+ fwd_shift_adst_adst_8, // .shift
+ fwd_stage_range_col_adst_adst_8, // .stage_range_col
+ fwd_stage_range_row_adst_adst_8, // .stage_range_row
+ fwd_cos_bit_col_adst_adst_8, // .cos_bit_col
+ fwd_cos_bit_row_adst_adst_8, // .cos_bit_row
+ vp10_fadst8_new, // .txfm_func_col
+ vp10_fadst8_new}; // .txfm_func_row;
+
+// ---------------- config fwd_adst_adst_16 ----------------
+static const int8_t fwd_shift_adst_adst_16[3] = {2, 0, -2};
+static const int8_t fwd_stage_range_col_adst_adst_16[10] = {13, 13, 14, 15, 15,
+ 16, 16, 17, 17, 17};
+static const int8_t fwd_stage_range_row_adst_adst_16[10] = {17, 17, 17, 18, 18,
+ 19, 19, 20, 20, 20};
+static const int8_t fwd_cos_bit_col_adst_adst_16[10] = {15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15};
+static const int8_t fwd_cos_bit_row_adst_adst_16[10] = {15, 15, 15, 14, 14,
+ 13, 13, 12, 12, 12};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_16 = {
+ 16, // .txfm_size
+ 10, // .stage_num_col
+ 10, // .stage_num_row
+ fwd_shift_adst_adst_16, // .shift
+ fwd_stage_range_col_adst_adst_16, // .stage_range_col
+ fwd_stage_range_row_adst_adst_16, // .stage_range_row
+ fwd_cos_bit_col_adst_adst_16, // .cos_bit_col
+ fwd_cos_bit_row_adst_adst_16, // .cos_bit_row
+ vp10_fadst16_new, // .txfm_func_col
+ vp10_fadst16_new}; // .txfm_func_row;
+
+// ---------------- config fwd_adst_adst_32 ----------------
+static const int8_t fwd_shift_adst_adst_32[3] = {4, -2, -3};
+static const int8_t fwd_stage_range_col_adst_adst_32[12] = {
+ 15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20};
+static const int8_t fwd_stage_range_row_adst_adst_32[12] = {
+ 18, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 22};
+static const int8_t fwd_cos_bit_col_adst_adst_32[12] = {15, 15, 15, 15, 15, 14,
+ 14, 13, 13, 12, 12, 12};
+static const int8_t fwd_cos_bit_row_adst_adst_32[12] = {14, 14, 14, 13, 13, 12,
+ 12, 11, 11, 10, 10, 10};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_32 = {
+ 32, // .txfm_size
+ 12, // .stage_num_col
+ 12, // .stage_num_row
+ fwd_shift_adst_adst_32, // .shift
+ fwd_stage_range_col_adst_adst_32, // .stage_range_col
+ fwd_stage_range_row_adst_adst_32, // .stage_range_row
+ fwd_cos_bit_col_adst_adst_32, // .cos_bit_col
+ fwd_cos_bit_row_adst_adst_32, // .cos_bit_row
+ vp10_fadst32_new, // .txfm_func_col
+ vp10_fadst32_new}; // .txfm_func_row;
+
+// ---------------- config fwd_adst_dct_4 ----------------
+static const int8_t fwd_shift_adst_dct_4[3] = {5, -4, 1};
+static const int8_t fwd_stage_range_col_adst_dct_4[6] = {16, 16, 17,
+ 18, 18, 18};
+static const int8_t fwd_stage_range_row_adst_dct_4[4] = {14, 15, 15, 15};
+static const int8_t fwd_cos_bit_col_adst_dct_4[6] = {15, 15, 15, 14, 14, 14};
+static const int8_t fwd_cos_bit_row_adst_dct_4[4] = {15, 15, 15, 15};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_4 = {
+ 4, // .txfm_size
+ 6, // .stage_num_col
+ 4, // .stage_num_row
+ fwd_shift_adst_dct_4, // .shift
+ fwd_stage_range_col_adst_dct_4, // .stage_range_col
+ fwd_stage_range_row_adst_dct_4, // .stage_range_row
+ fwd_cos_bit_col_adst_dct_4, // .cos_bit_col
+ fwd_cos_bit_row_adst_dct_4, // .cos_bit_row
+ vp10_fadst4_new, // .txfm_func_col
+ vp10_fdct4_new}; // .txfm_func_row;
+
+// ---------------- config fwd_adst_dct_8 ----------------
+static const int8_t fwd_shift_adst_dct_8[3] = {5, 1, -5};
+static const int8_t fwd_stage_range_col_adst_dct_8[8] = {16, 16, 17, 18,
+ 18, 19, 19, 19};
+static const int8_t fwd_stage_range_row_adst_dct_8[6] = {20, 21, 22,
+ 22, 22, 22};
+static const int8_t fwd_cos_bit_col_adst_dct_8[8] = {15, 15, 15, 14,
+ 14, 13, 13, 13};
+static const int8_t fwd_cos_bit_row_adst_dct_8[6] = {12, 11, 10, 10, 10, 10};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_8 = {
+ 8, // .txfm_size
+ 8, // .stage_num_col
+ 6, // .stage_num_row
+ fwd_shift_adst_dct_8, // .shift
+ fwd_stage_range_col_adst_dct_8, // .stage_range_col
+ fwd_stage_range_row_adst_dct_8, // .stage_range_row
+ fwd_cos_bit_col_adst_dct_8, // .cos_bit_col
+ fwd_cos_bit_row_adst_dct_8, // .cos_bit_row
+ vp10_fadst8_new, // .txfm_func_col
+ vp10_fdct8_new}; // .txfm_func_row;
+
+// ---------------- config fwd_adst_dct_16 ----------------
+static const int8_t fwd_shift_adst_dct_16[3] = {4, -3, -1};
+static const int8_t fwd_stage_range_col_adst_dct_16[10] = {15, 15, 16, 17, 17,
+ 18, 18, 19, 19, 19};
+static const int8_t fwd_stage_range_row_adst_dct_16[8] = {16, 17, 18, 19,
+ 19, 19, 19, 19};
+static const int8_t fwd_cos_bit_col_adst_dct_16[10] = {15, 15, 15, 15, 15,
+ 14, 14, 13, 13, 13};
+static const int8_t fwd_cos_bit_row_adst_dct_16[8] = {15, 15, 14, 13,
+ 13, 13, 13, 13};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_16 = {
+ 16, // .txfm_size
+ 10, // .stage_num_col
+ 8, // .stage_num_row
+ fwd_shift_adst_dct_16, // .shift
+ fwd_stage_range_col_adst_dct_16, // .stage_range_col
+ fwd_stage_range_row_adst_dct_16, // .stage_range_row
+ fwd_cos_bit_col_adst_dct_16, // .cos_bit_col
+ fwd_cos_bit_row_adst_dct_16, // .cos_bit_row
+ vp10_fadst16_new, // .txfm_func_col
+ vp10_fdct16_new}; // .txfm_func_row;
+
+// ---------------- config fwd_adst_dct_32 ----------------
+static const int8_t fwd_shift_adst_dct_32[3] = {5, -4, -2};
+static const int8_t fwd_stage_range_col_adst_dct_32[12] = {
+ 16, 16, 17, 18, 18, 19, 19, 20, 20, 21, 21, 21};
+static const int8_t fwd_stage_range_row_adst_dct_32[10] = {17, 18, 19, 20, 21,
+ 21, 21, 21, 21, 21};
+static const int8_t fwd_cos_bit_col_adst_dct_32[12] = {15, 15, 15, 14, 14, 13,
+ 13, 12, 12, 11, 11, 11};
+static const int8_t fwd_cos_bit_row_adst_dct_32[10] = {15, 14, 13, 12, 11,
+ 11, 11, 11, 11, 11};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_32 = {
+ 32, // .txfm_size
+ 12, // .stage_num_col
+ 10, // .stage_num_row
+ fwd_shift_adst_dct_32, // .shift
+ fwd_stage_range_col_adst_dct_32, // .stage_range_col
+ fwd_stage_range_row_adst_dct_32, // .stage_range_row
+ fwd_cos_bit_col_adst_dct_32, // .cos_bit_col
+ fwd_cos_bit_row_adst_dct_32, // .cos_bit_row
+ vp10_fadst32_new, // .txfm_func_col
+ vp10_fdct32_new}; // .txfm_func_row;
+
+#endif // VP10_FWD_TXFM2D_CFG_H_
diff --git a/vp10/common/vp10_inv_txfm1d.c b/vp10/common/vp10_inv_txfm1d.c
new file mode 100644
index 0000000..606ca55
--- /dev/null
+++ b/vp10/common/vp10_inv_txfm1d.c
@@ -0,0 +1,1537 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include "vp10/common/vp10_inv_txfm1d.h"
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+#define range_check(stage, input, buf, size, bit) \
+ { \
+ int i, j; \
+ for (i = 0; i < size; ++i) { \
+ int buf_bit = get_max_bit(abs(buf[i])) + 1; \
+ if (buf_bit > bit) { \
+ printf("======== %s overflow ========\n", __func__); \
+ printf("stage: %d node: %d\n", stage, i); \
+ printf("bit: %d buf_bit: %d buf[i]: %d\n", bit, buf_bit, buf[i]); \
+ printf("input:\n"); \
+ for (j = 0; j < size; j++) { \
+ printf("%d,", input[j]); \
+ } \
+ printf("\n"); \
+ assert(0); \
+ } \
+ } \
+ }
+#else
+#define range_check(stage, input, buf, size, bit) \
+ { \
+ (void) stage; \
+ (void) input; \
+ (void) buf; \
+ (void) size; \
+ (void) bit; \
+ }
+#endif
+
+void vp10_idct4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 4;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[4];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[2];
+ bf1[2] = input[1];
+ bf1[3] = input[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_idct8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 8;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[8];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[4];
+ bf1[2] = input[2];
+ bf1[3] = input[6];
+ bf1[4] = input[1];
+ bf1[5] = input[5];
+ bf1[6] = input[3];
+ bf1[7] = input[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = bf0[4] - bf0[5];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[6] + bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = bf0[3] - bf0[4];
+ bf1[5] = bf0[2] - bf0[5];
+ bf1[6] = bf0[1] - bf0[6];
+ bf1[7] = bf0[0] - bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_idct16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 16;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[16];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[8];
+ bf1[2] = input[4];
+ bf1[3] = input[12];
+ bf1[4] = input[2];
+ bf1[5] = input[10];
+ bf1[6] = input[6];
+ bf1[7] = input[14];
+ bf1[8] = input[1];
+ bf1[9] = input[9];
+ bf1[10] = input[5];
+ bf1[11] = input[13];
+ bf1[12] = input[3];
+ bf1[13] = input[11];
+ bf1[14] = input[7];
+ bf1[15] = input[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = bf0[8] - bf0[9];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[10] + bf0[11];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = bf0[12] - bf0[13];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[14] + bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = bf0[4] - bf0[5];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[6] + bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = bf0[9] - bf0[10];
+ bf1[11] = bf0[8] - bf0[11];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[13] + bf0[14];
+ bf1[15] = bf0[12] + bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = bf0[3] - bf0[4];
+ bf1[5] = bf0[2] - bf0[5];
+ bf1[6] = bf0[1] - bf0[6];
+ bf1[7] = bf0[0] - bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = bf0[7] - bf0[8];
+ bf1[9] = bf0[6] - bf0[9];
+ bf1[10] = bf0[5] - bf0[10];
+ bf1[11] = bf0[4] - bf0[11];
+ bf1[12] = bf0[3] - bf0[12];
+ bf1[13] = bf0[2] - bf0[13];
+ bf1[14] = bf0[1] - bf0[14];
+ bf1[15] = bf0[0] - bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_idct32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 32;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[32];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[16];
+ bf1[2] = input[8];
+ bf1[3] = input[24];
+ bf1[4] = input[4];
+ bf1[5] = input[20];
+ bf1[6] = input[12];
+ bf1[7] = input[28];
+ bf1[8] = input[2];
+ bf1[9] = input[18];
+ bf1[10] = input[10];
+ bf1[11] = input[26];
+ bf1[12] = input[6];
+ bf1[13] = input[22];
+ bf1[14] = input[14];
+ bf1[15] = input[30];
+ bf1[16] = input[1];
+ bf1[17] = input[17];
+ bf1[18] = input[9];
+ bf1[19] = input[25];
+ bf1[20] = input[5];
+ bf1[21] = input[21];
+ bf1[22] = input[13];
+ bf1[23] = input[29];
+ bf1[24] = input[3];
+ bf1[25] = input[19];
+ bf1[26] = input[11];
+ bf1[27] = input[27];
+ bf1[28] = input[7];
+ bf1[29] = input[23];
+ bf1[30] = input[15];
+ bf1[31] = input[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16] + bf0[17];
+ bf1[17] = bf0[16] - bf0[17];
+ bf1[18] = -bf0[18] + bf0[19];
+ bf1[19] = bf0[18] + bf0[19];
+ bf1[20] = bf0[20] + bf0[21];
+ bf1[21] = bf0[20] - bf0[21];
+ bf1[22] = -bf0[22] + bf0[23];
+ bf1[23] = bf0[22] + bf0[23];
+ bf1[24] = bf0[24] + bf0[25];
+ bf1[25] = bf0[24] - bf0[25];
+ bf1[26] = -bf0[26] + bf0[27];
+ bf1[27] = bf0[26] + bf0[27];
+ bf1[28] = bf0[28] + bf0[29];
+ bf1[29] = bf0[28] - bf0[29];
+ bf1[30] = -bf0[30] + bf0[31];
+ bf1[31] = bf0[30] + bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = bf0[8] - bf0[9];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[10] + bf0[11];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = bf0[12] - bf0[13];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[14] + bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit[stage]);
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit[stage]);
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = bf0[4] - bf0[5];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[6] + bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[19];
+ bf1[17] = bf0[17] + bf0[18];
+ bf1[18] = bf0[17] - bf0[18];
+ bf1[19] = bf0[16] - bf0[19];
+ bf1[20] = -bf0[20] + bf0[23];
+ bf1[21] = -bf0[21] + bf0[22];
+ bf1[22] = bf0[21] + bf0[22];
+ bf1[23] = bf0[20] + bf0[23];
+ bf1[24] = bf0[24] + bf0[27];
+ bf1[25] = bf0[25] + bf0[26];
+ bf1[26] = bf0[25] - bf0[26];
+ bf1[27] = bf0[24] - bf0[27];
+ bf1[28] = -bf0[28] + bf0[31];
+ bf1[29] = -bf0[29] + bf0[30];
+ bf1[30] = bf0[29] + bf0[30];
+ bf1[31] = bf0[28] + bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = bf0[9] - bf0[10];
+ bf1[11] = bf0[8] - bf0[11];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[13] + bf0[14];
+ bf1[15] = bf0[12] + bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit[stage]);
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = bf0[3] - bf0[4];
+ bf1[5] = bf0[2] - bf0[5];
+ bf1[6] = bf0[1] - bf0[6];
+ bf1[7] = bf0[0] - bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[23];
+ bf1[17] = bf0[17] + bf0[22];
+ bf1[18] = bf0[18] + bf0[21];
+ bf1[19] = bf0[19] + bf0[20];
+ bf1[20] = bf0[19] - bf0[20];
+ bf1[21] = bf0[18] - bf0[21];
+ bf1[22] = bf0[17] - bf0[22];
+ bf1[23] = bf0[16] - bf0[23];
+ bf1[24] = -bf0[24] + bf0[31];
+ bf1[25] = -bf0[25] + bf0[30];
+ bf1[26] = -bf0[26] + bf0[29];
+ bf1[27] = -bf0[27] + bf0[28];
+ bf1[28] = bf0[27] + bf0[28];
+ bf1[29] = bf0[26] + bf0[29];
+ bf1[30] = bf0[25] + bf0[30];
+ bf1[31] = bf0[24] + bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = bf0[7] - bf0[8];
+ bf1[9] = bf0[6] - bf0[9];
+ bf1[10] = bf0[5] - bf0[10];
+ bf1[11] = bf0[4] - bf0[11];
+ bf1[12] = bf0[3] - bf0[12];
+ bf1[13] = bf0[2] - bf0[13];
+ bf1[14] = bf0[1] - bf0[14];
+ bf1[15] = bf0[0] - bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[31];
+ bf1[1] = bf0[1] + bf0[30];
+ bf1[2] = bf0[2] + bf0[29];
+ bf1[3] = bf0[3] + bf0[28];
+ bf1[4] = bf0[4] + bf0[27];
+ bf1[5] = bf0[5] + bf0[26];
+ bf1[6] = bf0[6] + bf0[25];
+ bf1[7] = bf0[7] + bf0[24];
+ bf1[8] = bf0[8] + bf0[23];
+ bf1[9] = bf0[9] + bf0[22];
+ bf1[10] = bf0[10] + bf0[21];
+ bf1[11] = bf0[11] + bf0[20];
+ bf1[12] = bf0[12] + bf0[19];
+ bf1[13] = bf0[13] + bf0[18];
+ bf1[14] = bf0[14] + bf0[17];
+ bf1[15] = bf0[15] + bf0[16];
+ bf1[16] = bf0[15] - bf0[16];
+ bf1[17] = bf0[14] - bf0[17];
+ bf1[18] = bf0[13] - bf0[18];
+ bf1[19] = bf0[12] - bf0[19];
+ bf1[20] = bf0[11] - bf0[20];
+ bf1[21] = bf0[10] - bf0[21];
+ bf1[22] = bf0[9] - bf0[22];
+ bf1[23] = bf0[8] - bf0[23];
+ bf1[24] = bf0[7] - bf0[24];
+ bf1[25] = bf0[6] - bf0[25];
+ bf1[26] = bf0[5] - bf0[26];
+ bf1[27] = bf0[4] - bf0[27];
+ bf1[28] = bf0[3] - bf0[28];
+ bf1[29] = bf0[2] - bf0[29];
+ bf1[30] = bf0[1] - bf0[30];
+ bf1[31] = bf0[0] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 4;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[4];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = -input[3];
+ bf1[2] = -input[1];
+ bf1[3] = input[2];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = bf0[0] - bf0[2];
+ bf1[3] = bf0[1] - bf0[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[56], bf0[0], -cospi[8], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[24], bf0[2], -cospi[40], bf0[3], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[1];
+ bf1[1] = bf0[2];
+ bf1[2] = bf0[3];
+ bf1[3] = bf0[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 8;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[8];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = -input[7];
+ bf1[2] = -input[3];
+ bf1[3] = input[4];
+ bf1[4] = -input[1];
+ bf1[5] = input[6];
+ bf1[6] = input[2];
+ bf1[7] = -input[5];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = bf0[0] - bf0[2];
+ bf1[3] = bf0[1] - bf0[3];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = bf0[4] - bf0[6];
+ bf1[7] = bf0[5] - bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = bf0[0] - bf0[4];
+ bf1[5] = bf0[1] - bf0[5];
+ bf1[6] = bf0[2] - bf0[6];
+ bf1[7] = bf0[3] - bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[1];
+ bf1[1] = bf0[6];
+ bf1[2] = bf0[3];
+ bf1[3] = bf0[4];
+ bf1[4] = bf0[5];
+ bf1[5] = bf0[2];
+ bf1[6] = bf0[7];
+ bf1[7] = bf0[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 16;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[16];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = -input[15];
+ bf1[2] = -input[7];
+ bf1[3] = input[8];
+ bf1[4] = -input[3];
+ bf1[5] = input[12];
+ bf1[6] = input[4];
+ bf1[7] = -input[11];
+ bf1[8] = -input[1];
+ bf1[9] = input[14];
+ bf1[10] = input[6];
+ bf1[11] = -input[9];
+ bf1[12] = input[2];
+ bf1[13] = -input[13];
+ bf1[14] = -input[5];
+ bf1[15] = input[10];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]);
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = bf0[0] - bf0[2];
+ bf1[3] = bf0[1] - bf0[3];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = bf0[4] - bf0[6];
+ bf1[7] = bf0[5] - bf0[7];
+ bf1[8] = bf0[8] + bf0[10];
+ bf1[9] = bf0[9] + bf0[11];
+ bf1[10] = bf0[8] - bf0[10];
+ bf1[11] = bf0[9] - bf0[11];
+ bf1[12] = bf0[12] + bf0[14];
+ bf1[13] = bf0[13] + bf0[15];
+ bf1[14] = bf0[12] - bf0[14];
+ bf1[15] = bf0[13] - bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = bf0[0] - bf0[4];
+ bf1[5] = bf0[1] - bf0[5];
+ bf1[6] = bf0[2] - bf0[6];
+ bf1[7] = bf0[3] - bf0[7];
+ bf1[8] = bf0[8] + bf0[12];
+ bf1[9] = bf0[9] + bf0[13];
+ bf1[10] = bf0[10] + bf0[14];
+ bf1[11] = bf0[11] + bf0[15];
+ bf1[12] = bf0[8] - bf0[12];
+ bf1[13] = bf0[9] - bf0[13];
+ bf1[14] = bf0[10] - bf0[14];
+ bf1[15] = bf0[11] - bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]);
+ bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[8];
+ bf1[1] = bf0[1] + bf0[9];
+ bf1[2] = bf0[2] + bf0[10];
+ bf1[3] = bf0[3] + bf0[11];
+ bf1[4] = bf0[4] + bf0[12];
+ bf1[5] = bf0[5] + bf0[13];
+ bf1[6] = bf0[6] + bf0[14];
+ bf1[7] = bf0[7] + bf0[15];
+ bf1[8] = bf0[0] - bf0[8];
+ bf1[9] = bf0[1] - bf0[9];
+ bf1[10] = bf0[2] - bf0[10];
+ bf1[11] = bf0[3] - bf0[11];
+ bf1[12] = bf0[4] - bf0[12];
+ bf1[13] = bf0[5] - bf0[13];
+ bf1[14] = bf0[6] - bf0[14];
+ bf1[15] = bf0[7] - bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit[stage]);
+ bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[1];
+ bf1[1] = bf0[14];
+ bf1[2] = bf0[3];
+ bf1[3] = bf0[12];
+ bf1[4] = bf0[5];
+ bf1[5] = bf0[10];
+ bf1[6] = bf0[7];
+ bf1[7] = bf0[8];
+ bf1[8] = bf0[9];
+ bf1[9] = bf0[6];
+ bf1[10] = bf0[11];
+ bf1[11] = bf0[4];
+ bf1[12] = bf0[13];
+ bf1[13] = bf0[2];
+ bf1[14] = bf0[15];
+ bf1[15] = bf0[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 32;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[32];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = -input[31];
+ bf1[2] = -input[15];
+ bf1[3] = input[16];
+ bf1[4] = -input[7];
+ bf1[5] = input[24];
+ bf1[6] = input[8];
+ bf1[7] = -input[23];
+ bf1[8] = -input[3];
+ bf1[9] = input[28];
+ bf1[10] = input[12];
+ bf1[11] = -input[19];
+ bf1[12] = input[4];
+ bf1[13] = -input[27];
+ bf1[14] = -input[11];
+ bf1[15] = input[20];
+ bf1[16] = -input[1];
+ bf1[17] = input[30];
+ bf1[18] = input[14];
+ bf1[19] = -input[17];
+ bf1[20] = input[6];
+ bf1[21] = -input[25];
+ bf1[22] = -input[9];
+ bf1[23] = input[22];
+ bf1[24] = input[2];
+ bf1[25] = -input[29];
+ bf1[26] = -input[13];
+ bf1[27] = input[18];
+ bf1[28] = -input[5];
+ bf1[29] = input[26];
+ bf1[30] = input[10];
+ bf1[31] = -input[21];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]);
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[32], bf0[18], -cospi[32], bf0[19], cos_bit[stage]);
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[32], bf0[22], -cospi[32], bf0[23], cos_bit[stage]);
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[26], -cospi[32], bf0[27], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[32], bf0[30], -cospi[32], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = bf0[0] - bf0[2];
+ bf1[3] = bf0[1] - bf0[3];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = bf0[4] - bf0[6];
+ bf1[7] = bf0[5] - bf0[7];
+ bf1[8] = bf0[8] + bf0[10];
+ bf1[9] = bf0[9] + bf0[11];
+ bf1[10] = bf0[8] - bf0[10];
+ bf1[11] = bf0[9] - bf0[11];
+ bf1[12] = bf0[12] + bf0[14];
+ bf1[13] = bf0[13] + bf0[15];
+ bf1[14] = bf0[12] - bf0[14];
+ bf1[15] = bf0[13] - bf0[15];
+ bf1[16] = bf0[16] + bf0[18];
+ bf1[17] = bf0[17] + bf0[19];
+ bf1[18] = bf0[16] - bf0[18];
+ bf1[19] = bf0[17] - bf0[19];
+ bf1[20] = bf0[20] + bf0[22];
+ bf1[21] = bf0[21] + bf0[23];
+ bf1[22] = bf0[20] - bf0[22];
+ bf1[23] = bf0[21] - bf0[23];
+ bf1[24] = bf0[24] + bf0[26];
+ bf1[25] = bf0[25] + bf0[27];
+ bf1[26] = bf0[24] - bf0[26];
+ bf1[27] = bf0[25] - bf0[27];
+ bf1[28] = bf0[28] + bf0[30];
+ bf1[29] = bf0[29] + bf0[31];
+ bf1[30] = bf0[28] - bf0[30];
+ bf1[31] = bf0[29] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[48], bf0[20], -cospi[16], bf0[21], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[16], bf0[22], cospi[48], bf0[23], cos_bit[stage]);
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = bf0[26];
+ bf1[27] = bf0[27];
+ bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[48], bf0[28], -cospi[16], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[16], bf0[30], cospi[48], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = bf0[0] - bf0[4];
+ bf1[5] = bf0[1] - bf0[5];
+ bf1[6] = bf0[2] - bf0[6];
+ bf1[7] = bf0[3] - bf0[7];
+ bf1[8] = bf0[8] + bf0[12];
+ bf1[9] = bf0[9] + bf0[13];
+ bf1[10] = bf0[10] + bf0[14];
+ bf1[11] = bf0[11] + bf0[15];
+ bf1[12] = bf0[8] - bf0[12];
+ bf1[13] = bf0[9] - bf0[13];
+ bf1[14] = bf0[10] - bf0[14];
+ bf1[15] = bf0[11] - bf0[15];
+ bf1[16] = bf0[16] + bf0[20];
+ bf1[17] = bf0[17] + bf0[21];
+ bf1[18] = bf0[18] + bf0[22];
+ bf1[19] = bf0[19] + bf0[23];
+ bf1[20] = bf0[16] - bf0[20];
+ bf1[21] = bf0[17] - bf0[21];
+ bf1[22] = bf0[18] - bf0[22];
+ bf1[23] = bf0[19] - bf0[23];
+ bf1[24] = bf0[24] + bf0[28];
+ bf1[25] = bf0[25] + bf0[29];
+ bf1[26] = bf0[26] + bf0[30];
+ bf1[27] = bf0[27] + bf0[31];
+ bf1[28] = bf0[24] - bf0[28];
+ bf1[29] = bf0[25] - bf0[29];
+ bf1[30] = bf0[26] - bf0[30];
+ bf1[31] = bf0[27] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]);
+ bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[56], bf0[24], -cospi[8], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[24], bf0[26], -cospi[40], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[8], bf0[28], cospi[56], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[40], bf0[30], cospi[24], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[8];
+ bf1[1] = bf0[1] + bf0[9];
+ bf1[2] = bf0[2] + bf0[10];
+ bf1[3] = bf0[3] + bf0[11];
+ bf1[4] = bf0[4] + bf0[12];
+ bf1[5] = bf0[5] + bf0[13];
+ bf1[6] = bf0[6] + bf0[14];
+ bf1[7] = bf0[7] + bf0[15];
+ bf1[8] = bf0[0] - bf0[8];
+ bf1[9] = bf0[1] - bf0[9];
+ bf1[10] = bf0[2] - bf0[10];
+ bf1[11] = bf0[3] - bf0[11];
+ bf1[12] = bf0[4] - bf0[12];
+ bf1[13] = bf0[5] - bf0[13];
+ bf1[14] = bf0[6] - bf0[14];
+ bf1[15] = bf0[7] - bf0[15];
+ bf1[16] = bf0[16] + bf0[24];
+ bf1[17] = bf0[17] + bf0[25];
+ bf1[18] = bf0[18] + bf0[26];
+ bf1[19] = bf0[19] + bf0[27];
+ bf1[20] = bf0[20] + bf0[28];
+ bf1[21] = bf0[21] + bf0[29];
+ bf1[22] = bf0[22] + bf0[30];
+ bf1[23] = bf0[23] + bf0[31];
+ bf1[24] = bf0[16] - bf0[24];
+ bf1[25] = bf0[17] - bf0[25];
+ bf1[26] = bf0[18] - bf0[26];
+ bf1[27] = bf0[19] - bf0[27];
+ bf1[28] = bf0[20] - bf0[28];
+ bf1[29] = bf0[21] - bf0[29];
+ bf1[30] = bf0[22] - bf0[30];
+ bf1[31] = bf0[23] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[60], bf0[16], -cospi[4], bf0[17], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[44], bf0[18], -cospi[20], bf0[19], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[28], bf0[20], -cospi[36], bf0[21], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[12], bf0[22], -cospi[52], bf0[23], cos_bit[stage]);
+ bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[4], bf0[24], cospi[60], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[20], bf0[26], cospi[44], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[36], bf0[28], cospi[28], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[52], bf0[30], cospi[12], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[16];
+ bf1[1] = bf0[1] + bf0[17];
+ bf1[2] = bf0[2] + bf0[18];
+ bf1[3] = bf0[3] + bf0[19];
+ bf1[4] = bf0[4] + bf0[20];
+ bf1[5] = bf0[5] + bf0[21];
+ bf1[6] = bf0[6] + bf0[22];
+ bf1[7] = bf0[7] + bf0[23];
+ bf1[8] = bf0[8] + bf0[24];
+ bf1[9] = bf0[9] + bf0[25];
+ bf1[10] = bf0[10] + bf0[26];
+ bf1[11] = bf0[11] + bf0[27];
+ bf1[12] = bf0[12] + bf0[28];
+ bf1[13] = bf0[13] + bf0[29];
+ bf1[14] = bf0[14] + bf0[30];
+ bf1[15] = bf0[15] + bf0[31];
+ bf1[16] = bf0[0] - bf0[16];
+ bf1[17] = bf0[1] - bf0[17];
+ bf1[18] = bf0[2] - bf0[18];
+ bf1[19] = bf0[3] - bf0[19];
+ bf1[20] = bf0[4] - bf0[20];
+ bf1[21] = bf0[5] - bf0[21];
+ bf1[22] = bf0[6] - bf0[22];
+ bf1[23] = bf0[7] - bf0[23];
+ bf1[24] = bf0[8] - bf0[24];
+ bf1[25] = bf0[9] - bf0[25];
+ bf1[26] = bf0[10] - bf0[26];
+ bf1[27] = bf0[11] - bf0[27];
+ bf1[28] = bf0[12] - bf0[28];
+ bf1[29] = bf0[13] - bf0[29];
+ bf1[30] = bf0[14] - bf0[30];
+ bf1[31] = bf0[15] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 10
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[63], bf0[0], -cospi[1], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[59], bf0[2], -cospi[5], bf0[3], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[55], bf0[4], -cospi[9], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[51], bf0[6], -cospi[13], bf0[7], cos_bit[stage]);
+ bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[47], bf0[8], -cospi[17], bf0[9], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[43], bf0[10], -cospi[21], bf0[11], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[39], bf0[12], -cospi[25], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[35], bf0[14], -cospi[29], bf0[15], cos_bit[stage]);
+ bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[31], bf0[16], -cospi[33], bf0[17], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[27], bf0[18], -cospi[37], bf0[19], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[23], bf0[20], -cospi[41], bf0[21], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[19], bf0[22], -cospi[45], bf0[23], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[15], bf0[24], -cospi[49], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[11], bf0[26], -cospi[53], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[7], bf0[28], -cospi[57], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[3], bf0[30], -cospi[61], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 11
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[1];
+ bf1[1] = bf0[30];
+ bf1[2] = bf0[3];
+ bf1[3] = bf0[28];
+ bf1[4] = bf0[5];
+ bf1[5] = bf0[26];
+ bf1[6] = bf0[7];
+ bf1[7] = bf0[24];
+ bf1[8] = bf0[9];
+ bf1[9] = bf0[22];
+ bf1[10] = bf0[11];
+ bf1[11] = bf0[20];
+ bf1[12] = bf0[13];
+ bf1[13] = bf0[18];
+ bf1[14] = bf0[15];
+ bf1[15] = bf0[16];
+ bf1[16] = bf0[17];
+ bf1[17] = bf0[14];
+ bf1[18] = bf0[19];
+ bf1[19] = bf0[12];
+ bf1[20] = bf0[21];
+ bf1[21] = bf0[10];
+ bf1[22] = bf0[23];
+ bf1[23] = bf0[8];
+ bf1[24] = bf0[25];
+ bf1[25] = bf0[6];
+ bf1[26] = bf0[27];
+ bf1[27] = bf0[4];
+ bf1[28] = bf0[29];
+ bf1[29] = bf0[2];
+ bf1[30] = bf0[31];
+ bf1[31] = bf0[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
diff --git a/vp10/common/vp10_inv_txfm1d.h b/vp10/common/vp10_inv_txfm1d.h
new file mode 100644
index 0000000..0609b65
--- /dev/null
+++ b/vp10/common/vp10_inv_txfm1d.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_INV_TXFM1D_H_
+#define VP10_INV_TXFM1D_H_
+
+#include "vp10/common/vp10_txfm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_idct4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_iadst4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // VP10_INV_TXFM1D_H_
diff --git a/vp10/common/vp10_inv_txfm2d.c b/vp10/common/vp10_inv_txfm2d.c
new file mode 100644
index 0000000..c894a42
--- /dev/null
+++ b/vp10/common/vp10_inv_txfm2d.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp10/common/vp10_txfm.h"
+
+static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output,
+ int stride, const TXFM_2D_CFG *cfg,
+ int32_t *txfm_buf) {
+ const int txfm_size = cfg->txfm_size;
+ const int8_t *shift = cfg->shift;
+ const int8_t *stage_range_col = cfg->stage_range_col;
+ const int8_t *stage_range_row = cfg->stage_range_row;
+ const int8_t *cos_bit_col = cfg->cos_bit_col;
+ const int8_t *cos_bit_row = cfg->cos_bit_row;
+ const TxfmFunc txfm_func_col = cfg->txfm_func_col;
+ const TxfmFunc txfm_func_row = cfg->txfm_func_row;
+
+ // txfm_buf's length is txfm_size * txfm_size + 2 * txfm_size
+ // it is used for intermediate data buffering
+ int32_t *temp_in = txfm_buf;
+ int32_t *temp_out = temp_in + txfm_size;
+ int32_t *buf = temp_out + txfm_size;
+ int32_t *buf_ptr = buf;
+ int i, j;
+
+ // Rows
+ for (i = 0; i < txfm_size; ++i) {
+ txfm_func_row(input, buf_ptr, cos_bit_row, stage_range_row);
+ round_shift_array(buf_ptr, txfm_size, -shift[0]);
+ input += txfm_size;
+ buf_ptr += txfm_size;
+ }
+
+ // Columns
+ for (i = 0; i < txfm_size; ++i) {
+ for (j = 0; j < txfm_size; ++j)
+ temp_in[j] = buf[j * txfm_size + i];
+ txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
+ round_shift_array(temp_out, txfm_size, -shift[1]);
+ for (j = 0; j < txfm_size; ++j)
+ output[j * stride + i] += temp_out[j];
+ }
+}
+
+void vp10_inv_txfm2d_add_4x4(const int32_t *input, uint16_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[4 * 4 + 4 + 4];
+ // output contains the prediction signal which is always positive and smaller
+ // than (1 << bd) - 1
+ // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+ // int16_t*
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+ clamp_block((int16_t *)output, 4, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_8x8(const int32_t *input, uint16_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[8 * 8 + 8 + 8];
+ // output contains the prediction signal which is always positive and smaller
+ // than (1 << bd) - 1
+ // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+ // int16_t*
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+ clamp_block((int16_t *)output, 8, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_16x16(const int32_t *input, uint16_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[16 * 16 + 16 + 16];
+ // output contains the prediction signal which is always positive and smaller
+ // than (1 << bd) - 1
+ // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+ // int16_t*
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+ clamp_block((int16_t *)output, 16, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_32x32(const int32_t *input, uint16_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd) {
+ int txfm_buf[32 * 32 + 32 + 32];
+ // output contains the prediction signal which is always positive and smaller
+ // than (1 << bd) - 1
+ // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+ // int16_t*
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+ clamp_block((int16_t *)output, 32, stride, 0, (1 << bd) - 1);
+}
diff --git a/vp10/common/vp10_inv_txfm2d.h b/vp10/common/vp10_inv_txfm2d.h
new file mode 100644
index 0000000..1b570ef
--- /dev/null
+++ b/vp10/common/vp10_inv_txfm2d.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_INV_TXFM2D_C_H_
+#define VP10_INV_TXFM2D_C_H_
+
+#include "vp10/common/vp10_inv_txfm2d_cfg.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+void vp10_inv_txfm2d_add_4x4(const int32_t *input, uint16_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd);
+void vp10_inv_txfm2d_add_8x8(const int32_t *input, uint16_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd);
+void vp10_inv_txfm2d_add_16x16(const int32_t *input, uint16_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd);
+void vp10_inv_txfm2d_add_32x32(const int32_t *input, uint16_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ const int bd);
+#ifdef __cplusplus
+}
+#endif
+#endif // VP10_INV_TXFM2D_C_H_
diff --git a/vp10/common/vp10_inv_txfm2d_cfg.h b/vp10/common/vp10_inv_txfm2d_cfg.h
new file mode 100644
index 0000000..fc552fe
--- /dev/null
+++ b/vp10/common/vp10_inv_txfm2d_cfg.h
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_INV_TXFM2D_CFG_H_
+#define VP10_INV_TXFM2D_CFG_H_
+#include "vp10/common/vp10_inv_txfm1d.h"
+
+// ---------------- config inv_dct_dct_4 ----------------
+static const int8_t inv_shift_dct_dct_4[2] = {1, -5};
+static const int8_t inv_stage_range_col_dct_dct_4[4] = {17, 17, 16, 16};
+static const int8_t inv_stage_range_row_dct_dct_4[4] = {16, 16, 16, 16};
+static const int8_t inv_cos_bit_col_dct_dct_4[4] = {15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_dct_dct_4[4] = {15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_4 = {
+ 4, // .txfm_size
+ 4, // .stage_num_col
+ 4, // .stage_num_row
+ inv_shift_dct_dct_4, // .shift
+ inv_stage_range_col_dct_dct_4, // .stage_range_col
+ inv_stage_range_row_dct_dct_4, // .stage_range_row
+ inv_cos_bit_col_dct_dct_4, // .cos_bit_col
+ inv_cos_bit_row_dct_dct_4, // .cos_bit_row
+ vp10_idct4_new, // .txfm_func_col
+ vp10_idct4_new}; // .txfm_func_row;
+
+// ---------------- config inv_dct_dct_8 ----------------
+static const int8_t inv_shift_dct_dct_8[2] = {0, -5};
+static const int8_t inv_stage_range_col_dct_dct_8[6] = {17, 17, 17, 17, 16, 16};
+static const int8_t inv_stage_range_row_dct_dct_8[6] = {17, 17, 17, 17, 17, 17};
+static const int8_t inv_cos_bit_col_dct_dct_8[6] = {15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_dct_dct_8[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_8 = {
+ 8, // .txfm_size
+ 6, // .stage_num_col
+ 6, // .stage_num_row
+ inv_shift_dct_dct_8, // .shift
+ inv_stage_range_col_dct_dct_8, // .stage_range_col
+ inv_stage_range_row_dct_dct_8, // .stage_range_row
+ inv_cos_bit_col_dct_dct_8, // .cos_bit_col
+ inv_cos_bit_row_dct_dct_8, // .cos_bit_row
+ vp10_idct8_new, // .txfm_func_col
+ vp10_idct8_new}; // .txfm_func_row;
+
+// ---------------- config inv_dct_dct_16 ----------------
+static const int8_t inv_shift_dct_dct_16[2] = {0, -6};
+static const int8_t inv_stage_range_col_dct_dct_16[8] = {18, 18, 18, 18,
+ 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_dct_dct_16[8] = {18, 18, 18, 18,
+ 18, 18, 18, 18};
+static const int8_t inv_cos_bit_col_dct_dct_16[8] = {14, 14, 14, 14,
+ 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_dct_dct_16[8] = {14, 14, 14, 14,
+ 14, 14, 14, 14};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_16 = {
+ 16, // .txfm_size
+ 8, // .stage_num_col
+ 8, // .stage_num_row
+ inv_shift_dct_dct_16, // .shift
+ inv_stage_range_col_dct_dct_16, // .stage_range_col
+ inv_stage_range_row_dct_dct_16, // .stage_range_row
+ inv_cos_bit_col_dct_dct_16, // .cos_bit_col
+ inv_cos_bit_row_dct_dct_16, // .cos_bit_row
+ vp10_idct16_new, // .txfm_func_col
+ vp10_idct16_new}; // .txfm_func_row;
+
+// ---------------- config inv_dct_dct_32 ----------------
+static const int8_t inv_shift_dct_dct_32[2] = {-1, -6};
+static const int8_t inv_stage_range_col_dct_dct_32[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_dct_dct_32[10] = {19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19};
+static const int8_t inv_cos_bit_col_dct_dct_32[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_dct_dct_32[10] = {13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_32 = {
+ 32, // .txfm_size
+ 10, // .stage_num_col
+ 10, // .stage_num_row
+ inv_shift_dct_dct_32, // .shift
+ inv_stage_range_col_dct_dct_32, // .stage_range_col
+ inv_stage_range_row_dct_dct_32, // .stage_range_row
+ inv_cos_bit_col_dct_dct_32, // .cos_bit_col
+ inv_cos_bit_row_dct_dct_32, // .cos_bit_row
+ vp10_idct32_new, // .txfm_func_col
+ vp10_idct32_new}; // .txfm_func_row;
+
+// ---------------- config inv_dct_adst_4 ----------------
+static const int8_t inv_shift_dct_adst_4[2] = {1, -5};
+static const int8_t inv_stage_range_col_dct_adst_4[4] = {17, 17, 16, 16};
+static const int8_t inv_stage_range_row_dct_adst_4[6] = {16, 16, 16,
+ 16, 16, 16};
+static const int8_t inv_cos_bit_col_dct_adst_4[4] = {15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_dct_adst_4[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_4 = {
+ 4, // .txfm_size
+ 4, // .stage_num_col
+ 6, // .stage_num_row
+ inv_shift_dct_adst_4, // .shift
+ inv_stage_range_col_dct_adst_4, // .stage_range_col
+ inv_stage_range_row_dct_adst_4, // .stage_range_row
+ inv_cos_bit_col_dct_adst_4, // .cos_bit_col
+ inv_cos_bit_row_dct_adst_4, // .cos_bit_row
+ vp10_idct4_new, // .txfm_func_col
+ vp10_iadst4_new}; // .txfm_func_row;
+
+// ---------------- config inv_dct_adst_8 ----------------
+static const int8_t inv_shift_dct_adst_8[2] = {-1, -4};
+static const int8_t inv_stage_range_col_dct_adst_8[6] = {16, 16, 16,
+ 16, 15, 15};
+static const int8_t inv_stage_range_row_dct_adst_8[8] = {17, 17, 17, 17,
+ 17, 17, 17, 17};
+static const int8_t inv_cos_bit_col_dct_adst_8[6] = {15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_dct_adst_8[8] = {15, 15, 15, 15,
+ 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_8 = {
+ 8, // .txfm_size
+ 6, // .stage_num_col
+ 8, // .stage_num_row
+ inv_shift_dct_adst_8, // .shift
+ inv_stage_range_col_dct_adst_8, // .stage_range_col
+ inv_stage_range_row_dct_adst_8, // .stage_range_row
+ inv_cos_bit_col_dct_adst_8, // .cos_bit_col
+ inv_cos_bit_row_dct_adst_8, // .cos_bit_row
+ vp10_idct8_new, // .txfm_func_col
+ vp10_iadst8_new}; // .txfm_func_row;
+
+// ---------------- config inv_dct_adst_16 ----------------
+static const int8_t inv_shift_dct_adst_16[2] = {1, -7};
+static const int8_t inv_stage_range_col_dct_adst_16[8] = {19, 19, 19, 19,
+ 19, 19, 18, 18};
+static const int8_t inv_stage_range_row_dct_adst_16[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18};
+static const int8_t inv_cos_bit_col_dct_adst_16[8] = {13, 13, 13, 13,
+ 13, 13, 13, 14};
+static const int8_t inv_cos_bit_row_dct_adst_16[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_16 = {
+ 16, // .txfm_size
+ 8, // .stage_num_col
+ 10, // .stage_num_row
+ inv_shift_dct_adst_16, // .shift
+ inv_stage_range_col_dct_adst_16, // .stage_range_col
+ inv_stage_range_row_dct_adst_16, // .stage_range_row
+ inv_cos_bit_col_dct_adst_16, // .cos_bit_col
+ inv_cos_bit_row_dct_adst_16, // .cos_bit_row
+ vp10_idct16_new, // .txfm_func_col
+ vp10_iadst16_new}; // .txfm_func_row;
+
+// ---------------- config inv_dct_adst_32 ----------------
+static const int8_t inv_shift_dct_adst_32[2] = {-1, -6};
+static const int8_t inv_stage_range_col_dct_adst_32[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_dct_adst_32[12] = {
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19};
+static const int8_t inv_cos_bit_col_dct_adst_32[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_dct_adst_32[12] = {13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_32 = {
+ 32, // .txfm_size
+ 10, // .stage_num_col
+ 12, // .stage_num_row
+ inv_shift_dct_adst_32, // .shift
+ inv_stage_range_col_dct_adst_32, // .stage_range_col
+ inv_stage_range_row_dct_adst_32, // .stage_range_row
+ inv_cos_bit_col_dct_adst_32, // .cos_bit_col
+ inv_cos_bit_row_dct_adst_32, // .cos_bit_row
+ vp10_idct32_new, // .txfm_func_col
+ vp10_iadst32_new}; // .txfm_func_row;
+
+// ---------------- config inv_adst_adst_4 ----------------
+static const int8_t inv_shift_adst_adst_4[2] = {0, -4};
+static const int8_t inv_stage_range_col_adst_adst_4[6] = {16, 16, 16,
+ 16, 15, 15};
+static const int8_t inv_stage_range_row_adst_adst_4[6] = {16, 16, 16,
+ 16, 16, 16};
+static const int8_t inv_cos_bit_col_adst_adst_4[6] = {15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_adst_4[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_4 = {
+ 4, // .txfm_size
+ 6, // .stage_num_col
+ 6, // .stage_num_row
+ inv_shift_adst_adst_4, // .shift
+ inv_stage_range_col_adst_adst_4, // .stage_range_col
+ inv_stage_range_row_adst_adst_4, // .stage_range_row
+ inv_cos_bit_col_adst_adst_4, // .cos_bit_col
+ inv_cos_bit_row_adst_adst_4, // .cos_bit_row
+ vp10_iadst4_new, // .txfm_func_col
+ vp10_iadst4_new}; // .txfm_func_row;
+
+// ---------------- config inv_adst_adst_8 ----------------
+static const int8_t inv_shift_adst_adst_8[2] = {-1, -4};
+static const int8_t inv_stage_range_col_adst_adst_8[8] = {16, 16, 16, 16,
+ 16, 16, 15, 15};
+static const int8_t inv_stage_range_row_adst_adst_8[8] = {17, 17, 17, 17,
+ 17, 17, 17, 17};
+static const int8_t inv_cos_bit_col_adst_adst_8[8] = {15, 15, 15, 15,
+ 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_adst_8[8] = {15, 15, 15, 15,
+ 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_8 = {
+ 8, // .txfm_size
+ 8, // .stage_num_col
+ 8, // .stage_num_row
+ inv_shift_adst_adst_8, // .shift
+ inv_stage_range_col_adst_adst_8, // .stage_range_col
+ inv_stage_range_row_adst_adst_8, // .stage_range_row
+ inv_cos_bit_col_adst_adst_8, // .cos_bit_col
+ inv_cos_bit_row_adst_adst_8, // .cos_bit_row
+ vp10_iadst8_new, // .txfm_func_col
+ vp10_iadst8_new}; // .txfm_func_row;
+
+// ---------------- config inv_adst_adst_16 ----------------
+static const int8_t inv_shift_adst_adst_16[2] = {0, -6};
+static const int8_t inv_stage_range_col_adst_adst_16[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_adst_adst_16[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18};
+static const int8_t inv_cos_bit_col_adst_adst_16[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_adst_adst_16[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_16 = {
+ 16, // .txfm_size
+ 10, // .stage_num_col
+ 10, // .stage_num_row
+ inv_shift_adst_adst_16, // .shift
+ inv_stage_range_col_adst_adst_16, // .stage_range_col
+ inv_stage_range_row_adst_adst_16, // .stage_range_row
+ inv_cos_bit_col_adst_adst_16, // .cos_bit_col
+ inv_cos_bit_row_adst_adst_16, // .cos_bit_row
+ vp10_iadst16_new, // .txfm_func_col
+ vp10_iadst16_new}; // .txfm_func_row;
+
+// ---------------- config inv_adst_adst_32 ----------------
+static const int8_t inv_shift_adst_adst_32[2] = {-1, -6};
+static const int8_t inv_stage_range_col_adst_adst_32[12] = {
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_adst_adst_32[12] = {
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19};
+static const int8_t inv_cos_bit_col_adst_adst_32[12] = {14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_adst_adst_32[12] = {13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_32 = {
+ 32, // .txfm_size
+ 12, // .stage_num_col
+ 12, // .stage_num_row
+ inv_shift_adst_adst_32, // .shift
+ inv_stage_range_col_adst_adst_32, // .stage_range_col
+ inv_stage_range_row_adst_adst_32, // .stage_range_row
+ inv_cos_bit_col_adst_adst_32, // .cos_bit_col
+ inv_cos_bit_row_adst_adst_32, // .cos_bit_row
+ vp10_iadst32_new, // .txfm_func_col
+ vp10_iadst32_new}; // .txfm_func_row;
+
+// ---------------- config inv_adst_dct_4 ----------------
+static const int8_t inv_shift_adst_dct_4[2] = {1, -5};
+static const int8_t inv_stage_range_col_adst_dct_4[6] = {17, 17, 17,
+ 17, 16, 16};
+static const int8_t inv_stage_range_row_adst_dct_4[4] = {16, 16, 16, 16};
+static const int8_t inv_cos_bit_col_adst_dct_4[6] = {15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_dct_4[4] = {15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_4 = {
+ 4, // .txfm_size
+ 6, // .stage_num_col
+ 4, // .stage_num_row
+ inv_shift_adst_dct_4, // .shift
+ inv_stage_range_col_adst_dct_4, // .stage_range_col
+ inv_stage_range_row_adst_dct_4, // .stage_range_row
+ inv_cos_bit_col_adst_dct_4, // .cos_bit_col
+ inv_cos_bit_row_adst_dct_4, // .cos_bit_row
+ vp10_iadst4_new, // .txfm_func_col
+ vp10_idct4_new}; // .txfm_func_row;
+
+// ---------------- config inv_adst_dct_8 ----------------
+static const int8_t inv_shift_adst_dct_8[2] = {-1, -4};
+static const int8_t inv_stage_range_col_adst_dct_8[8] = {16, 16, 16, 16,
+ 16, 16, 15, 15};
+static const int8_t inv_stage_range_row_adst_dct_8[6] = {17, 17, 17,
+ 17, 17, 17};
+static const int8_t inv_cos_bit_col_adst_dct_8[8] = {15, 15, 15, 15,
+ 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_dct_8[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_8 = {
+ 8, // .txfm_size
+ 8, // .stage_num_col
+ 6, // .stage_num_row
+ inv_shift_adst_dct_8, // .shift
+ inv_stage_range_col_adst_dct_8, // .stage_range_col
+ inv_stage_range_row_adst_dct_8, // .stage_range_row
+ inv_cos_bit_col_adst_dct_8, // .cos_bit_col
+ inv_cos_bit_row_adst_dct_8, // .cos_bit_row
+ vp10_iadst8_new, // .txfm_func_col
+ vp10_idct8_new}; // .txfm_func_row;
+
+// ---------------- config inv_adst_dct_16 ----------------
+static const int8_t inv_shift_adst_dct_16[2] = {-1, -5};
+static const int8_t inv_stage_range_col_adst_dct_16[10] = {17, 17, 17, 17, 17,
+ 17, 17, 17, 16, 16};
+static const int8_t inv_stage_range_row_adst_dct_16[8] = {18, 18, 18, 18,
+ 18, 18, 18, 18};
+static const int8_t inv_cos_bit_col_adst_dct_16[10] = {15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_dct_16[8] = {14, 14, 14, 14,
+ 14, 14, 14, 14};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_16 = {
+ 16, // .txfm_size
+ 10, // .stage_num_col
+ 8, // .stage_num_row
+ inv_shift_adst_dct_16, // .shift
+ inv_stage_range_col_adst_dct_16, // .stage_range_col
+ inv_stage_range_row_adst_dct_16, // .stage_range_row
+ inv_cos_bit_col_adst_dct_16, // .cos_bit_col
+ inv_cos_bit_row_adst_dct_16, // .cos_bit_row
+ vp10_iadst16_new, // .txfm_func_col
+ vp10_idct16_new}; // .txfm_func_row;
+
+// ---------------- config inv_adst_dct_32 ----------------
+static const int8_t inv_shift_adst_dct_32[2] = {-1, -6};
+static const int8_t inv_stage_range_col_adst_dct_32[12] = {
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_adst_dct_32[10] = {19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19};
+static const int8_t inv_cos_bit_col_adst_dct_32[12] = {14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_adst_dct_32[10] = {13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_32 = {
+ 32, // .txfm_size
+ 12, // .stage_num_col
+ 10, // .stage_num_row
+ inv_shift_adst_dct_32, // .shift
+ inv_stage_range_col_adst_dct_32, // .stage_range_col
+ inv_stage_range_row_adst_dct_32, // .stage_range_row
+ inv_cos_bit_col_adst_dct_32, // .cos_bit_col
+ inv_cos_bit_row_adst_dct_32, // .cos_bit_row
+ vp10_iadst32_new, // .txfm_func_col
+ vp10_idct32_new}; // .txfm_func_row;
+
+#endif // VP10_INV_TXFM2D_CFG_H_
diff --git a/vp10/common/vp10_txfm.h b/vp10/common/vp10_txfm.h
new file mode 100644
index 0000000..b4fd753
--- /dev/null
+++ b/vp10/common/vp10_txfm.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_TXFM_H_
+#define VP10_TXFM_H_
+
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+static const int cos_bit_min = 10;
+static const int cos_bit_max = 16;
+
+// cospi_arr[i][j] = (int)round(cos(M_PI*j/128) * (1<<(cos_bit_min+i)));
+static const int32_t cospi_arr[7][64] =
+ {{ 1024, 1024, 1023, 1021, 1019, 1016, 1013, 1009,
+ 1004, 999, 993, 987, 980, 972, 964, 955,
+ 946, 936, 926, 915, 903, 891, 878, 865,
+ 851, 837, 822, 807, 792, 775, 759, 742,
+ 724, 706, 688, 669, 650, 630, 610, 590,
+ 569, 548, 526, 505, 483, 460, 438, 415,
+ 392, 369, 345, 321, 297, 273, 249, 224,
+ 200, 175, 150, 125, 100, 75, 50, 25},
+ { 2048, 2047, 2046, 2042, 2038, 2033, 2026, 2018,
+ 2009, 1998, 1987, 1974, 1960, 1945, 1928, 1911,
+ 1892, 1872, 1851, 1829, 1806, 1782, 1757, 1730,
+ 1703, 1674, 1645, 1615, 1583, 1551, 1517, 1483,
+ 1448, 1412, 1375, 1338, 1299, 1260, 1220, 1179,
+ 1138, 1096, 1053, 1009, 965, 921, 876, 830,
+ 784, 737, 690, 642, 595, 546, 498, 449,
+ 400, 350, 301, 251, 201, 151, 100, 50},
+ { 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036,
+ 4017, 3996, 3973, 3948, 3920, 3889, 3857, 3822,
+ 3784, 3745, 3703, 3659, 3612, 3564, 3513, 3461,
+ 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967,
+ 2896, 2824, 2751, 2675, 2598, 2520, 2440, 2359,
+ 2276, 2191, 2106, 2019, 1931, 1842, 1751, 1660,
+ 1567, 1474, 1380, 1285, 1189, 1092, 995, 897,
+ 799, 700, 601, 501, 401, 301, 201, 101},
+ { 8192, 8190, 8182, 8170, 8153, 8130, 8103, 8071,
+ 8035, 7993, 7946, 7895, 7839, 7779, 7713, 7643,
+ 7568, 7489, 7405, 7317, 7225, 7128, 7027, 6921,
+ 6811, 6698, 6580, 6458, 6333, 6203, 6070, 5933,
+ 5793, 5649, 5501, 5351, 5197, 5040, 4880, 4717,
+ 4551, 4383, 4212, 4038, 3862, 3683, 3503, 3320,
+ 3135, 2948, 2760, 2570, 2378, 2185, 1990, 1795,
+ 1598, 1401, 1202, 1003, 803, 603, 402, 201},
+ { 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143,
+ 16069, 15986, 15893, 15791, 15679, 15557, 15426, 15286,
+ 15137, 14978, 14811, 14635, 14449, 14256, 14053, 13842,
+ 13623, 13395, 13160, 12916, 12665, 12406, 12140, 11866,
+ 11585, 11297, 11003, 10702, 10394, 10080, 9760, 9434,
+ 9102, 8765, 8423, 8076, 7723, 7366, 7005, 6639,
+ 6270, 5897, 5520, 5139, 4756, 4370, 3981, 3590,
+ 3196, 2801, 2404, 2006, 1606, 1205, 804, 402},
+ { 32768, 32758, 32729, 32679, 32610, 32522, 32413, 32286,
+ 32138, 31972, 31786, 31581, 31357, 31114, 30853, 30572,
+ 30274, 29957, 29622, 29269, 28899, 28511, 28106, 27684,
+ 27246, 26791, 26320, 25833, 25330, 24812, 24279, 23732,
+ 23170, 22595, 22006, 21403, 20788, 20160, 19520, 18868,
+ 18205, 17531, 16846, 16151, 15447, 14733, 14010, 13279,
+ 12540, 11793, 11039, 10279, 9512, 8740, 7962, 7180,
+ 6393, 5602, 4808, 4011, 3212, 2411, 1608, 804},
+ { 65536, 65516, 65457, 65358, 65220, 65043, 64827, 64571,
+ 64277, 63944, 63572, 63162, 62714, 62228, 61705, 61145,
+ 60547, 59914, 59244, 58538, 57798, 57022, 56212, 55368,
+ 54491, 53581, 52639, 51665, 50660, 49624, 48559, 47464,
+ 46341, 45190, 44011, 42806, 41576, 40320, 39040, 37736,
+ 36410, 35062, 33692, 32303, 30893, 29466, 28020, 26558,
+ 25080, 23586, 22078, 20557, 19024, 17479, 15924, 14359,
+ 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608}};
+
+static INLINE int32_t round_shift(int32_t value, int bit) {
+ // For value >= 0,
+ // there are twe version of rounding
+ // 1) (value + (1 << (bit - 1)) - 1) >> bit
+ // 2) (value + (1 << (bit - 1))) >> bit
+ // boath methods are mild unbiased
+ // however, the first version has slightly advantage because
+ // it rounds number toward zero.
+ // For value < 0, we also choose the version that rounds number
+ // toward zero.
+ if (bit > 0) {
+ if (value >= 0)
+ return (value + (1 << (bit - 1)) - 1) >> bit;
+ else
+ return ((value - (1 << (bit - 1))) >> bit) + 1;
+ } else {
+ return value << (-bit);
+ }
+}
+
+static INLINE void round_shift_array(int32_t *arr, int size, int bit) {
+ int i;
+ if (bit == 0) {
+ return;
+ } else {
+ for (i = 0; i < size; i++) {
+ arr[i] = round_shift(arr[i], bit);
+ }
+ }
+}
+
+static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1,
+ int bit) {
+ int32_t result_32 = w0 * in0 + w1 * in1;
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+ int64_t result_64 = (int64_t)w0 * (int64_t)in0 + (int64_t)w1 * (int64_t)in1;
+ if (result_32 != result_64) {
+ printf(
+ "%s overflow result_32: %d result_64: %ld w0: %d in0: %d w1: %d in1: "
+ "%d\n",
+ __func__, result_32, result_64, w0, in0, w1, in1);
+ assert(0 && "half_btf overflow");
+ }
+#endif
+ return round_shift(result_32, bit);
+}
+
+static INLINE int get_max_bit(int x) {
+ int max_bit = -1;
+ while (x) {
+ x = x >> 1;
+ max_bit++;
+ }
+ return max_bit;
+}
+
+// TODO(angiebird): implement SSE
+static INLINE void clamp_block(int16_t *block, int block_size, int stride,
+ int low, int high) {
+ int i, j;
+ for (i = 0; i < block_size; ++i) {
+ for (j = 0; j < block_size; ++j) {
+ block[i * stride + j] = clamp(block[i * stride + j], low, high);
+ }
+ }
+}
+
+typedef void (*TxfmFunc)(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+typedef struct TXFM_2D_CFG {
+ const int txfm_size;
+ const int stage_num_col;
+ const int stage_num_row;
+
+ const int8_t *shift;
+ const int8_t *stage_range_col;
+ const int8_t *stage_range_row;
+ const int8_t *cos_bit_col;
+ const int8_t *cos_bit_row;
+ const TxfmFunc txfm_func_col;
+ const TxfmFunc txfm_func_row;
+} TXFM_2D_CFG;
+
+#endif // VP10_TXFM_H_
diff --git a/vp10/common/x86/idct_intrin_sse2.c b/vp10/common/x86/idct_intrin_sse2.c
index a2c674b..900f091 100644
--- a/vp10/common/x86/idct_intrin_sse2.c
+++ b/vp10/common/x86/idct_intrin_sse2.c
@@ -11,6 +11,54 @@
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
+#include "vp10/common/enums.h"
+
+#if CONFIG_EXT_TX
+// Reverse the 8 16 bit words in __m128i
+static INLINE __m128i mm_reverse_epi16(const __m128i x) {
+ const __m128i a = _mm_shufflelo_epi16(x, 0x1b);
+ const __m128i b = _mm_shufflehi_epi16(a, 0x1b);
+ return _mm_shuffle_epi32(b, 0x4e);
+}
+
+static INLINE void fliplr_4x4(__m128i in[2]) {
+ in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
+ in[0] = _mm_shufflehi_epi16(in[0], 0x1b);
+ in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
+ in[1] = _mm_shufflehi_epi16(in[1], 0x1b);
+}
+
+static INLINE void fliplr_8x8(__m128i in[8]) {
+ in[0] = mm_reverse_epi16(in[0]);
+ in[1] = mm_reverse_epi16(in[1]);
+ in[2] = mm_reverse_epi16(in[2]);
+ in[3] = mm_reverse_epi16(in[3]);
+
+ in[4] = mm_reverse_epi16(in[4]);
+ in[5] = mm_reverse_epi16(in[5]);
+ in[6] = mm_reverse_epi16(in[6]);
+ in[7] = mm_reverse_epi16(in[7]);
+}
+
+static INLINE void fliplr_16x8(__m128i in[16]) {
+ fliplr_8x8(&in[0]);
+ fliplr_8x8(&in[8]);
+}
+
+#define FLIPLR_16x16(in0, in1) do { \
+ __m128i *tmp; \
+ fliplr_16x8(in0); \
+ fliplr_16x8(in1); \
+ tmp = (in0); \
+ (in0) = (in1); \
+ (in1) = tmp; \
+} while (0)
+
+#define FLIPUD_PTR(dest, stride, size) do { \
+ (dest) = (dest) + ((size) - 1) * (stride); \
+ (stride) = - (stride); \
+} while (0)
+#endif
void vp10_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
@@ -22,22 +70,50 @@
in[1] = load_input_data(input + 8);
switch (tx_type) {
- case 0: // DCT_DCT
+ case DCT_DCT:
idct4_sse2(in);
idct4_sse2(in);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
idct4_sse2(in);
iadst4_sse2(in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
iadst4_sse2(in);
idct4_sse2(in);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
iadst4_sse2(in);
iadst4_sse2(in);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ idct4_sse2(in);
+ iadst4_sse2(in);
+ FLIPUD_PTR(dest, stride, 4);
+ break;
+ case DCT_FLIPADST:
+ iadst4_sse2(in);
+ idct4_sse2(in);
+ fliplr_4x4(in);
+ break;
+ case FLIPADST_FLIPADST:
+ iadst4_sse2(in);
+ iadst4_sse2(in);
+ FLIPUD_PTR(dest, stride, 4);
+ fliplr_4x4(in);
+ break;
+ case ADST_FLIPADST:
+ iadst4_sse2(in);
+ iadst4_sse2(in);
+ fliplr_4x4(in);
+ break;
+ case FLIPADST_ADST:
+ iadst4_sse2(in);
+ iadst4_sse2(in);
+ FLIPUD_PTR(dest, stride, 4);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -52,12 +128,12 @@
// Reconstruction and Store
{
- __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest));
+ __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 0));
+ __m128i d1 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 1));
__m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
- d0 = _mm_unpacklo_epi32(d0,
- _mm_cvtsi32_si128(*(const int *)(dest + stride)));
- d2 = _mm_unpacklo_epi32(
- d2, _mm_cvtsi32_si128(*(const int *)(dest + stride * 3)));
+ __m128i d3 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 3));
+ d0 = _mm_unpacklo_epi32(d0, d1);
+ d2 = _mm_unpacklo_epi32(d2, d3);
d0 = _mm_unpacklo_epi8(d0, zero);
d2 = _mm_unpacklo_epi8(d2, zero);
d0 = _mm_add_epi16(d0, in[0]);
@@ -94,22 +170,50 @@
in[7] = load_input_data(input + 8 * 7);
switch (tx_type) {
- case 0: // DCT_DCT
+ case DCT_DCT:
idct8_sse2(in);
idct8_sse2(in);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
idct8_sse2(in);
iadst8_sse2(in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
iadst8_sse2(in);
idct8_sse2(in);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
iadst8_sse2(in);
iadst8_sse2(in);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ idct8_sse2(in);
+ iadst8_sse2(in);
+ FLIPUD_PTR(dest, stride, 8);
+ break;
+ case DCT_FLIPADST:
+ iadst8_sse2(in);
+ idct8_sse2(in);
+ fliplr_8x8(in);
+ break;
+ case FLIPADST_FLIPADST:
+ iadst8_sse2(in);
+ iadst8_sse2(in);
+ FLIPUD_PTR(dest, stride, 8);
+ fliplr_8x8(in);
+ break;
+ case ADST_FLIPADST:
+ iadst8_sse2(in);
+ iadst8_sse2(in);
+ fliplr_8x8(in);
+ break;
+ case FLIPADST_ADST:
+ iadst8_sse2(in);
+ iadst8_sse2(in);
+ FLIPUD_PTR(dest, stride, 8);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -146,29 +250,59 @@
void vp10_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
int stride, int tx_type) {
- __m128i in0[16], in1[16];
+ __m128i in[32];
+ __m128i *in0 = &in[0];
+ __m128i *in1 = &in[16];
load_buffer_8x16(input, in0);
input += 8;
load_buffer_8x16(input, in1);
switch (tx_type) {
- case 0: // DCT_DCT
+ case DCT_DCT:
idct16_sse2(in0, in1);
idct16_sse2(in0, in1);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
idct16_sse2(in0, in1);
iadst16_sse2(in0, in1);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
iadst16_sse2(in0, in1);
idct16_sse2(in0, in1);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
iadst16_sse2(in0, in1);
iadst16_sse2(in0, in1);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ idct16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ FLIPUD_PTR(dest, stride, 16);
+ break;
+ case DCT_FLIPADST:
+ iadst16_sse2(in0, in1);
+ idct16_sse2(in0, in1);
+ FLIPLR_16x16(in0, in1);
+ break;
+ case FLIPADST_FLIPADST:
+ iadst16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ FLIPUD_PTR(dest, stride, 16);
+ FLIPLR_16x16(in0, in1);
+ break;
+ case ADST_FLIPADST:
+ iadst16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ FLIPLR_16x16(in0, in1);
+ break;
+ case FLIPADST_ADST:
+ iadst16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ FLIPUD_PTR(dest, stride, 16);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 1c3f182..ec1a5fb 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -61,13 +61,26 @@
cm->ref_frame_sign_bias[GOLDEN_FRAME]) {
cm->comp_fixed_ref = ALTREF_FRAME;
cm->comp_var_ref[0] = LAST_FRAME;
+#if CONFIG_EXT_REFS
+ cm->comp_var_ref[1] = LAST2_FRAME;
+ cm->comp_var_ref[2] = LAST3_FRAME;
+ cm->comp_var_ref[3] = LAST4_FRAME;
+ cm->comp_var_ref[4] = GOLDEN_FRAME;
+#else
cm->comp_var_ref[1] = GOLDEN_FRAME;
+#endif // CONFIG_EXT_REFS
} else if (cm->ref_frame_sign_bias[LAST_FRAME] ==
cm->ref_frame_sign_bias[ALTREF_FRAME]) {
+#if CONFIG_EXT_REFS
+ assert(0);
+#endif // CONFIG_EXT_REFS
cm->comp_fixed_ref = GOLDEN_FRAME;
cm->comp_var_ref[0] = LAST_FRAME;
cm->comp_var_ref[1] = ALTREF_FRAME;
} else {
+#if CONFIG_EXT_REFS
+ assert(0);
+#endif // CONFIG_EXT_REFS
cm->comp_fixed_ref = LAST_FRAME;
cm->comp_var_ref[0] = GOLDEN_FRAME;
cm->comp_var_ref[1] = ALTREF_FRAME;
@@ -83,18 +96,9 @@
return data > max ? max : data;
}
-#if CONFIG_MISC_FIXES
static TX_MODE read_tx_mode(struct vpx_read_bit_buffer *rb) {
return vpx_rb_read_bit(rb) ? TX_MODE_SELECT : vpx_rb_read_literal(rb, 2);
}
-#else
-static TX_MODE read_tx_mode(vpx_reader *r) {
- TX_MODE tx_mode = vpx_read_literal(r, 2);
- if (tx_mode == ALLOW_32X32)
- tx_mode += vpx_read_bit(r);
- return tx_mode;
-}
-#endif
static void read_tx_mode_probs(struct tx_probs *tx_probs, vpx_reader *r) {
int i, j;
@@ -120,13 +124,42 @@
}
static void read_inter_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
- int i, j;
+ int i;
+#if CONFIG_REF_MV
+ for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
+ vp10_diff_update_prob(r, &fc->newmv_prob[i]);
+ for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i)
+ vp10_diff_update_prob(r, &fc->zeromv_prob[i]);
+ for (i = 0; i < REFMV_MODE_CONTEXTS; ++i)
+ vp10_diff_update_prob(r, &fc->refmv_prob[i]);
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
+ vp10_diff_update_prob(r, &fc->drl_prob0[i]);
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
+ vp10_diff_update_prob(r, &fc->drl_prob1[i]);
+#if CONFIG_EXT_INTER
+ vp10_diff_update_prob(r, &fc->new2mv_prob);
+#endif // CONFIG_EXT_INTER
+#else
+ int j;
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
for (j = 0; j < INTER_MODES - 1; ++j)
vp10_diff_update_prob(r, &fc->inter_mode_probs[i][j]);
+#endif
}
-#if CONFIG_MISC_FIXES
+#if CONFIG_EXT_INTER
+static void read_inter_compound_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
+ int i, j;
+ if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (j = 0; j < INTER_MODE_CONTEXTS; ++j) {
+ for (i = 0; i < INTER_COMPOUND_MODES - 1; ++i) {
+ vp10_diff_update_prob(r, &fc->inter_compound_mode_probs[j][i]);
+ }
+ }
+ }
+}
+#endif // CONFIG_EXT_INTER
+
static REFERENCE_MODE read_frame_reference_mode(const VP10_COMMON *cm,
struct vpx_read_bit_buffer *rb) {
if (is_compound_reference_allowed(cm)) {
@@ -137,47 +170,36 @@
return SINGLE_REFERENCE;
}
}
-#else
-static REFERENCE_MODE read_frame_reference_mode(const VP10_COMMON *cm,
- vpx_reader *r) {
- if (is_compound_reference_allowed(cm)) {
- return vpx_read_bit(r) ? (vpx_read_bit(r) ? REFERENCE_MODE_SELECT
- : COMPOUND_REFERENCE)
- : SINGLE_REFERENCE;
- } else {
- return SINGLE_REFERENCE;
- }
-}
-#endif
static void read_frame_reference_mode_probs(VP10_COMMON *cm, vpx_reader *r) {
FRAME_CONTEXT *const fc = cm->fc;
- int i;
+ int i, j;
if (cm->reference_mode == REFERENCE_MODE_SELECT)
for (i = 0; i < COMP_INTER_CONTEXTS; ++i)
vp10_diff_update_prob(r, &fc->comp_inter_prob[i]);
- if (cm->reference_mode != COMPOUND_REFERENCE)
+ if (cm->reference_mode != COMPOUND_REFERENCE) {
for (i = 0; i < REF_CONTEXTS; ++i) {
- vp10_diff_update_prob(r, &fc->single_ref_prob[i][0]);
- vp10_diff_update_prob(r, &fc->single_ref_prob[i][1]);
+ for (j = 0; j < (SINGLE_REFS - 1); ++j) {
+ vp10_diff_update_prob(r, &fc->single_ref_prob[i][j]);
+ }
}
+ }
- if (cm->reference_mode != SINGLE_REFERENCE)
- for (i = 0; i < REF_CONTEXTS; ++i)
- vp10_diff_update_prob(r, &fc->comp_ref_prob[i]);
+ if (cm->reference_mode != SINGLE_REFERENCE) {
+ for (i = 0; i < REF_CONTEXTS; ++i) {
+ for (j = 0; j < (COMP_REFS - 1); ++j) {
+ vp10_diff_update_prob(r, &fc->comp_ref_prob[i][j]);
+ }
+ }
+ }
}
static void update_mv_probs(vpx_prob *p, int n, vpx_reader *r) {
int i;
for (i = 0; i < n; ++i)
-#if CONFIG_MISC_FIXES
vp10_diff_update_prob(r, &p[i]);
-#else
- if (vpx_read(r, MV_UPDATE_PROB))
- p[i] = (vpx_read_literal(r, 7) << 1) | 1;
-#endif
}
static void read_mv_probs(nmv_context *ctx, int allow_hp, vpx_reader *r) {
@@ -214,7 +236,7 @@
uint8_t *dst, int stride,
int eob, int block) {
struct macroblockd_plane *const pd = &xd->plane[plane];
- TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block);
+ TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
const int seg_id = xd->mi[0]->mbmi.segment_id;
if (eob > 0) {
tran_low_t *const dqcoeff = pd->dqcoeff;
@@ -348,7 +370,12 @@
}
static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd,
+#if CONFIG_ANS
+ const rans_dec_lut *const token_tab,
+ struct AnsDecoder *const r,
+#else
vpx_reader *r,
+#endif // CONFIG_ANS
MB_MODE_INFO *const mbmi,
int plane,
int row, int col,
@@ -369,25 +396,96 @@
col, row, plane);
if (!mbmi->skip) {
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx);
- const scan_order *sc = get_scan(tx_size, tx_type);
- const int eob = vp10_decode_block_tokens(xd, plane, sc, col, row, tx_size,
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
+ const scan_order *sc = get_scan(tx_size, tx_type, 0);
+ const int eob = vp10_decode_block_tokens(xd,
+#if CONFIG_ANS
+ token_tab,
+#endif // CONFIG_ANS
+ plane, sc, col, row, tx_size,
r, mbmi->segment_id);
inverse_transform_block_intra(xd, plane, tx_type, tx_size,
dst, pd->dst.stride, eob);
}
}
-static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r,
+#if CONFIG_VAR_TX
+static void decode_reconstruct_tx(MACROBLOCKD *const xd, vpx_reader *r,
+ MB_MODE_INFO *const mbmi,
+ int plane, BLOCK_SIZE plane_bsize,
+ int block, int blk_row, int blk_col,
+ TX_SIZE tx_size, int *eob_total) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, 0, 0) :
+ mbmi->inter_tx_size[tx_idx];
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *sc = get_scan(tx_size, tx_type, 1);
+ const int eob = vp10_decode_block_tokens(xd, plane, sc,
+ blk_col, blk_row, tx_size,
+ r, mbmi->segment_id);
+ inverse_transform_block_inter(xd, plane, tx_size,
+ &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col],
+ pd->dst.stride, eob, block);
+ *eob_total += eob;
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+ int step = 1 << (2 * (tx_size - 1));
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
+ continue;
+
+ decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize, block + i * step,
+ offsetr, offsetc, tx_size - 1, eob_total);
+ }
+ }
+}
+#endif // CONFIG_VAR_TX
+
+static int reconstruct_inter_block(MACROBLOCKD *const xd,
+#if CONFIG_ANS
+ const rans_dec_lut *const token_tab,
+ struct AnsDecoder *const r,
+#else
+ vpx_reader *r,
+#endif
MB_MODE_INFO *const mbmi, int plane,
int row, int col, TX_SIZE tx_size) {
struct macroblockd_plane *const pd = &xd->plane[plane];
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
int block_idx = (row << 1) + col;
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx);
- const scan_order *sc = get_scan(tx_size, tx_type);
- const int eob = vp10_decode_block_tokens(xd, plane, sc, col, row, tx_size, r,
- mbmi->segment_id);
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
+ const scan_order *sc = get_scan(tx_size, tx_type, 1);
+ const int eob = vp10_decode_block_tokens(xd,
+#if CONFIG_ANS
+ token_tab,
+#endif
+ plane, sc, col, row, tx_size, r,
+ mbmi->segment_id);
inverse_transform_block_inter(xd, plane, tx_size,
&pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
@@ -492,7 +590,7 @@
int border_offset,
uint8_t *const dst, int dst_buf_stride,
int subpel_x, int subpel_y,
- const InterpKernel *kernel,
+ const INTERP_FILTER interp_filter,
const struct scale_factors *sf,
MACROBLOCKD *xd,
int w, int h, int ref, int xs, int ys) {
@@ -511,20 +609,23 @@
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
high_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
- subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
+ subpel_y, sf, w, h, ref, interp_filter,
+ xs, ys, xd->bd);
} else {
inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
- subpel_y, sf, w, h, ref, kernel, xs, ys);
+ subpel_y, sf, w, h, ref, interp_filter, xs, ys);
}
}
+
#else
+
static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
int x0, int y0, int b_w, int b_h,
int frame_width, int frame_height,
int border_offset,
uint8_t *const dst, int dst_buf_stride,
int subpel_x, int subpel_y,
- const InterpKernel *kernel,
+ const INTERP_FILTER interp_filter,
const struct scale_factors *sf,
int w, int h, int ref, int xs, int ys) {
DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
@@ -535,14 +636,14 @@
buf_ptr = mc_buf + border_offset;
inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
- subpel_y, sf, w, h, ref, kernel, xs, ys);
+ subpel_y, sf, w, h, ref, interp_filter, xs, ys);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd,
int plane, int bw, int bh, int x,
int y, int w, int h, int mi_x, int mi_y,
- const InterpKernel *kernel,
+ const INTERP_FILTER interp_filter,
const struct scale_factors *sf,
struct buf_2d *pre_buf,
struct buf_2d *dst_buf, const MV* mv,
@@ -555,6 +656,9 @@
int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height,
buf_stride, subpel_x, subpel_y;
uint8_t *ref_frame, *buf_ptr;
+#if CONFIG_EXT_INTERP
+ const int i_filter = IsInterpolatingFilter(xd->mi[0]->mbmi.interp_filter);
+#endif // CONFIG_EXT_INTERP
// Get reference frame pointer, width and height.
if (plane == 0) {
@@ -624,6 +728,9 @@
// Do border extension if there is motion or the
// width/height is not a multiple of 8 pixels.
if (is_scaled || scaled_mv.col || scaled_mv.row ||
+#if CONFIG_EXT_INTERP
+ !i_filter ||
+#endif
(frame_width & 0x7) || (frame_height & 0x7)) {
int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
@@ -631,15 +738,27 @@
int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
int x_pad = 0, y_pad = 0;
- if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) {
- x0 -= VP9_INTERP_EXTEND - 1;
- x1 += VP9_INTERP_EXTEND;
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ int filter_size = filter_params.tap;
+
+ if (subpel_x ||
+#if CONFIG_EXT_INTERP
+ !i_filter ||
+#endif
+ (sf->x_step_q4 != SUBPEL_SHIFTS)) {
+ x0 -= filter_size / 2 - 1;
+ x1 += filter_size / 2;
x_pad = 1;
}
- if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) {
- y0 -= VP9_INTERP_EXTEND - 1;
- y1 += VP9_INTERP_EXTEND;
+ if (subpel_y ||
+#if CONFIG_EXT_INTERP
+ !i_filter ||
+#endif
+ (sf->y_step_q4 != SUBPEL_SHIFTS)) {
+ y0 -= filter_size / 2 - 1;
+ y1 += filter_size / 2;
y_pad = 1;
}
@@ -656,13 +775,14 @@
const uint8_t *const buf_ptr1 = ref_frame + y0 * buf_stride + x0;
const int b_w = x1 - x0 + 1;
const int b_h = y1 - y0 + 1;
- const int border_offset = y_pad * 3 * b_w + x_pad * 3;
+ const int border_offset = y_pad * (filter_size / 2 - 1) * b_w +
+ x_pad * (filter_size / 2 - 1);
extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h,
frame_width, frame_height, border_offset,
dst, dst_buf->stride,
subpel_x, subpel_y,
- kernel, sf,
+ interp_filter, sf,
#if CONFIG_VP9_HIGHBITDEPTH
xd,
#endif
@@ -681,14 +801,15 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
- subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
+ subpel_y, sf, w, h, ref, interp_filter,
+ xs, ys, xd->bd);
} else {
inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
- subpel_y, sf, w, h, ref, kernel, xs, ys);
+ subpel_y, sf, w, h, ref, interp_filter, xs, ys);
}
#else
inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
- subpel_y, sf, w, h, ref, kernel, xs, ys);
+ subpel_y, sf, w, h, ref, interp_filter, xs, ys);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
@@ -699,7 +820,7 @@
const int mi_x = mi_col * MI_SIZE;
const int mi_y = mi_row * MI_SIZE;
const MODE_INFO *mi = xd->mi[0];
- const InterpKernel *kernel = vp10_filter_kernels[mi->mbmi.interp_filter];
+ const INTERP_FILTER interp_filter = mi->mbmi.interp_filter;
const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
const int is_compound = has_second_ref(&mi->mbmi);
@@ -734,22 +855,66 @@
for (x = 0; x < num_4x4_w; ++x) {
const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4,
- 4 * x, 4 * y, pw, ph, mi_x, mi_y, kernel,
- sf, pre_buf, dst_buf, &mv,
- ref_frame_buf, is_scaled, ref);
+ 4 * x, 4 * y, pw, ph, mi_x, mi_y,
+ interp_filter, sf, pre_buf, dst_buf,
+ &mv, ref_frame_buf, is_scaled, ref);
}
}
} else {
const MV mv = mi->mbmi.mv[ref].as_mv;
dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4,
- 0, 0, n4w_x4, n4h_x4, mi_x, mi_y, kernel,
- sf, pre_buf, dst_buf, &mv, ref_frame_buf,
+ 0, 0, n4w_x4, n4h_x4, mi_x, mi_y,
+ interp_filter, sf, pre_buf, dst_buf,
+ &mv, ref_frame_buf,
is_scaled, ref);
}
}
}
}
+#if CONFIG_SUPERTX
+static void dec_build_inter_predictors_sb_sub8x8(VP10Decoder *const pbi,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ int block) {
+ // Prediction function used in supertx:
+ // Use the mv at current block (which is less than 8x8)
+ int plane;
+ const int mi_x = mi_col * MI_SIZE;
+ const int mi_y = mi_row * MI_SIZE;
+ const MODE_INFO *mi = xd->mi[0];
+ const INTERP_FILTER interp_filter = mi->mbmi.interp_filter;
+ const int is_compound = has_second_ref(&mi->mbmi);
+ // For sub8x8 uv:
+ // Skip uv prediction in supertx except the first block (block = 0)
+ int max_plane = block ? 1 : MAX_MB_PLANE;
+
+ for (plane = 0; plane < max_plane; ++plane) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ struct buf_2d *const dst_buf = &pd->dst;
+ const int num_4x4_w = pd->n4_w;
+ const int num_4x4_h = pd->n4_h;
+
+ const int n4w_x4 = 4 * num_4x4_w;
+ const int n4h_x4 = 4 * num_4x4_h;
+ int ref;
+
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+ struct buf_2d *const pre_buf = &pd->pre[ref];
+ const int idx = xd->block_refs[ref]->idx;
+ BufferPool *const pool = pbi->common.buffer_pool;
+ RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
+ const int is_scaled = vp10_is_scaled(sf);
+ const MV mv = average_split_mvs(pd, mi, ref, block);
+ dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4,
+ 0, 0, n4w_x4, n4h_x4, mi_x, mi_y,
+ interp_filter, sf, pre_buf, dst_buf,
+ &mv, ref_frame_buf, is_scaled, ref);
+ }
+ }
+}
+#endif
static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi,
int n4_wl, int n4_hl) {
// get minimum log2 num4x4s dimension
@@ -799,6 +964,11 @@
set_skip_context(xd, mi_row, mi_col);
+
+#if CONFIG_VAR_TX
+ xd->max_tx_size = max_txsize_lookup[bsize];
+#endif
+
// Distance of Mb to the various image edges. These are specified to 8th pel
// as they are always compared to values that are in 1/8th pel units
set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
@@ -807,9 +977,596 @@
return &xd->mi[0]->mbmi;
}
+#if CONFIG_SUPERTX
+static MB_MODE_INFO *set_offsets_extend(VP10_COMMON *const cm,
+ MACROBLOCKD *const xd,
+ const TileInfo *const tile,
+ BLOCK_SIZE bsize_pred,
+ int mi_row_pred, int mi_col_pred,
+ int mi_row_ori, int mi_col_ori) {
+ // Used in supertx
+ // (mi_row_ori, mi_col_ori): location for mv
+ // (mi_row_pred, mi_col_pred, bsize_pred): region to predict
+ const int bw = num_8x8_blocks_wide_lookup[bsize_pred];
+ const int bh = num_8x8_blocks_high_lookup[bsize_pred];
+ const int offset = mi_row_ori * cm->mi_stride + mi_col_ori;
+ const int bwl = b_width_log2_lookup[bsize_pred];
+ const int bhl = b_height_log2_lookup[bsize_pred];
+ xd->mi = cm->mi_grid_visible + offset;
+ xd->mi[0] = cm->mi + offset;
+ set_mi_row_col(xd, tile, mi_row_pred, bh, mi_col_pred, bw,
+ cm->mi_rows, cm->mi_cols);
+
+ xd->up_available = (mi_row_ori != 0);
+ xd->left_available = (mi_col_ori > tile->mi_col_start);
+
+ set_plane_n4(xd, bw, bh, bwl, bhl);
+
+ return &xd->mi[0]->mbmi;
+}
+
+static MB_MODE_INFO *set_mb_offsets(VP10_COMMON *const cm,
+ MACROBLOCKD *const xd,
+ BLOCK_SIZE bsize,
+ int mi_row, int mi_col,
+ int bw, int bh,
+ int x_mis, int y_mis) {
+ const int offset = mi_row * cm->mi_stride + mi_col;
+ const TileInfo *const tile = &xd->tile;
+ int x, y;
+
+ xd->mi = cm->mi_grid_visible + offset;
+ xd->mi[0] = cm->mi + offset;
+ xd->mi[0]->mbmi.sb_type = bsize;
+ for (y = 0; y < y_mis; ++y)
+ for (x = !y; x < x_mis; ++x)
+ xd->mi[y * cm->mi_stride + x] = xd->mi[0];
+
+ set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
+ return &xd->mi[0]->mbmi;
+}
+
+static void set_offsets_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+ const TileInfo *const tile,
+ BLOCK_SIZE bsize, int mi_row, int mi_col) {
+ const int bw = num_8x8_blocks_wide_lookup[bsize];
+ const int bh = num_8x8_blocks_high_lookup[bsize];
+ const int offset = mi_row * cm->mi_stride + mi_col;
+ const int bwl = b_width_log2_lookup[bsize];
+ const int bhl = b_height_log2_lookup[bsize];
+
+ xd->mi = cm->mi_grid_visible + offset;
+ xd->mi[0] = cm->mi + offset;
+
+ set_plane_n4(xd, bw, bh, bwl, bhl);
+
+ set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
+
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
+}
+
+static void set_param_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int txfm,
+ int skip) {
+ const int bw = num_8x8_blocks_wide_lookup[bsize];
+ const int bh = num_8x8_blocks_high_lookup[bsize];
+ const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
+ const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
+ const int offset = mi_row * cm->mi_stride + mi_col;
+ int x, y;
+
+ xd->mi = cm->mi_grid_visible + offset;
+ xd->mi[0] = cm->mi + offset;
+
+ for (y = 0; y < y_mis; ++y)
+ for (x = 0; x < x_mis; ++x) {
+ xd->mi[y * cm->mi_stride + x]->mbmi.skip = skip;
+ xd->mi[y * cm->mi_stride + x]->mbmi.tx_type = txfm;
+ }
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+ set_txfm_ctx(xd->left_txfm_context, xd->mi[0]->mbmi.tx_size, bh);
+ set_txfm_ctx(xd->above_txfm_context, xd->mi[0]->mbmi.tx_size, bw);
+#endif
+}
+
+static void set_ref(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+ int idx, int mi_row, int mi_col) {
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ RefBuffer *ref_buffer = &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME];
+ xd->block_refs[idx] = ref_buffer;
+ if (!vp10_is_valid_scale(&ref_buffer->sf))
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Invalid scale factors");
+ vp10_setup_pre_planes(xd, idx, ref_buffer->buf, mi_row, mi_col,
+ &ref_buffer->sf);
+ xd->corrupted |= ref_buffer->buf->corrupted;
+}
+
+static void dec_predict_b_extend(
+ VP10Decoder *const pbi, MACROBLOCKD *const xd,
+ const TileInfo *const tile, int block,
+ int mi_row_ori, int mi_col_ori,
+ int mi_row_pred, int mi_col_pred,
+ int mi_row_top, int mi_col_top,
+ uint8_t * dst_buf[3], int dst_stride[3],
+ BLOCK_SIZE bsize_top,
+ BLOCK_SIZE bsize_pred,
+ int b_sub8x8, int bextend) {
+ // Used in supertx
+ // (mi_row_ori, mi_col_ori): location for mv
+ // (mi_row_pred, mi_col_pred, bsize_pred): region to predict
+ // (mi_row_top, mi_col_top, bsize_top): region of the top partition size
+ // block: sub location of sub8x8 blocks
+ // b_sub8x8: 1: ori is sub8x8; 0: ori is not sub8x8
+ // bextend: 1: region to predict is an extension of ori; 0: not
+ int r = (mi_row_pred - mi_row_top) * MI_SIZE;
+ int c = (mi_col_pred - mi_col_top) * MI_SIZE;
+ const int mi_width_top = num_8x8_blocks_wide_lookup[bsize_top];
+ const int mi_height_top = num_8x8_blocks_high_lookup[bsize_top];
+ MB_MODE_INFO *mbmi;
+ VP10_COMMON *const cm = &pbi->common;
+
+ if (mi_row_pred < mi_row_top || mi_col_pred < mi_col_top ||
+ mi_row_pred >= mi_row_top + mi_height_top ||
+ mi_col_pred >= mi_col_top + mi_width_top ||
+ mi_row_pred >= cm->mi_rows || mi_col_pred >= cm->mi_cols)
+ return;
+
+ mbmi = set_offsets_extend(cm, xd, tile, bsize_pred,
+ mi_row_pred, mi_col_pred,
+ mi_row_ori, mi_col_ori);
+ set_ref(cm, xd, 0, mi_row_pred, mi_col_pred);
+ if (has_second_ref(&xd->mi[0]->mbmi))
+ set_ref(cm, xd, 1, mi_row_pred, mi_col_pred);
+
+ if (!bextend) {
+ mbmi->tx_size = b_width_log2_lookup[bsize_top];
+ }
+
+ xd->plane[0].dst.stride = dst_stride[0];
+ xd->plane[1].dst.stride = dst_stride[1];
+ xd->plane[2].dst.stride = dst_stride[2];
+ xd->plane[0].dst.buf = dst_buf[0] +
+ (r >> xd->plane[0].subsampling_y) * dst_stride[0] +
+ (c >> xd->plane[0].subsampling_x);
+ xd->plane[1].dst.buf = dst_buf[1] +
+ (r >> xd->plane[1].subsampling_y) * dst_stride[1] +
+ (c >> xd->plane[1].subsampling_x);
+ xd->plane[2].dst.buf = dst_buf[2] +
+ (r >> xd->plane[2].subsampling_y) * dst_stride[2] +
+ (c >> xd->plane[2].subsampling_x);
+
+ if (!b_sub8x8)
+ dec_build_inter_predictors_sb(pbi, xd, mi_row_pred, mi_col_pred);
+ else
+ dec_build_inter_predictors_sb_sub8x8(pbi, xd, mi_row_pred, mi_col_pred,
+ block);
+}
+
+static void dec_extend_dir(VP10Decoder *const pbi, MACROBLOCKD *const xd,
+ const TileInfo *const tile, int block,
+ BLOCK_SIZE bsize, BLOCK_SIZE top_bsize,
+ int mi_row, int mi_col,
+ int mi_row_top, int mi_col_top,
+ uint8_t * dst_buf[3], int dst_stride[3], int dir) {
+ // dir: 0-lower, 1-upper, 2-left, 3-right
+ // 4-lowerleft, 5-upperleft, 6-lowerright, 7-upperright
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ int xss = xd->plane[1].subsampling_x;
+ int yss = xd->plane[1].subsampling_y;
+ int b_sub8x8 = (bsize < BLOCK_8X8) ? 1 : 0;
+ BLOCK_SIZE extend_bsize;
+ int unit, mi_row_pred, mi_col_pred;
+
+ if (dir == 0 || dir == 1) {
+ extend_bsize = (mi_width == 1 || bsize < BLOCK_8X8 || xss < yss) ?
+ BLOCK_8X8 : BLOCK_16X8;
+ unit = num_8x8_blocks_wide_lookup[extend_bsize];
+ mi_row_pred = mi_row + ((dir == 0) ? mi_height : -1);
+ mi_col_pred = mi_col;
+
+ dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col,
+ mi_row_pred, mi_col_pred,
+ mi_row_top, mi_col_top,
+ dst_buf, dst_stride,
+ top_bsize, extend_bsize, b_sub8x8, 1);
+
+ if (mi_width > unit) {
+ int i;
+ assert(!b_sub8x8);
+ for (i = 0; i < mi_width/unit - 1; i++) {
+ mi_col_pred += unit;
+ dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col,
+ mi_row_pred, mi_col_pred,
+ mi_row_top, mi_col_top,
+ dst_buf, dst_stride,
+ top_bsize, extend_bsize, b_sub8x8, 1);
+ }
+ }
+ } else if (dir == 2 || dir == 3) {
+ extend_bsize = (mi_height == 1 || bsize < BLOCK_8X8 || yss < xss) ?
+ BLOCK_8X8 : BLOCK_8X16;
+ unit = num_8x8_blocks_high_lookup[extend_bsize];
+ mi_row_pred = mi_row;
+ mi_col_pred = mi_col + ((dir == 3) ? mi_width : -1);
+
+ dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col,
+ mi_row_pred, mi_col_pred,
+ mi_row_top, mi_col_top,
+ dst_buf, dst_stride,
+ top_bsize, extend_bsize, b_sub8x8, 1);
+
+ if (mi_height > unit) {
+ int i;
+ for (i = 0; i < mi_height/unit - 1; i++) {
+ mi_row_pred += unit;
+ dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col,
+ mi_row_pred, mi_col_pred,
+ mi_row_top, mi_col_top,
+ dst_buf, dst_stride,
+ top_bsize, extend_bsize, b_sub8x8, 1);
+ }
+ }
+ } else {
+ extend_bsize = BLOCK_8X8;
+ mi_row_pred = mi_row + ((dir == 4 || dir == 6) ? mi_height : -1);
+ mi_col_pred = mi_col + ((dir == 6 || dir == 7) ? mi_width : -1);
+ dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col,
+ mi_row_pred, mi_col_pred,
+ mi_row_top, mi_col_top,
+ dst_buf, dst_stride,
+ top_bsize, extend_bsize, b_sub8x8, 1);
+ }
+}
+
+static void dec_extend_all(VP10Decoder *const pbi, MACROBLOCKD *const xd,
+ const TileInfo *const tile, int block,
+ BLOCK_SIZE bsize, BLOCK_SIZE top_bsize,
+ int mi_row, int mi_col,
+ int mi_row_top, int mi_col_top,
+ uint8_t * dst_buf[3], int dst_stride[3]) {
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 0);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 1);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 2);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 3);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 4);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 5);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 6);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 7);
+}
+
+static void dec_predict_sb_complex(VP10Decoder *const pbi,
+ MACROBLOCKD *const xd,
+ const TileInfo *const tile,
+ int mi_row, int mi_col,
+ int mi_row_top, int mi_col_top,
+ BLOCK_SIZE bsize, BLOCK_SIZE top_bsize,
+ uint8_t *dst_buf[3], int dst_stride[3]) {
+ VP10_COMMON *const cm = &pbi->common;
+ const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
+ PARTITION_TYPE partition;
+ BLOCK_SIZE subsize;
+ MB_MODE_INFO *mbmi;
+ int i, offset = mi_row * cm->mi_stride + mi_col;
+ uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3];
+
+ DECLARE_ALIGNED(16, uint8_t,
+ tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]);
+ DECLARE_ALIGNED(16, uint8_t,
+ tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]);
+ DECLARE_ALIGNED(16, uint8_t,
+ tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]);
+ int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
+ int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
+ int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ int len = sizeof(uint16_t);
+ dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
+ dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len);
+ dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len);
+ dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
+ dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len);
+ dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len);
+ dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3);
+ dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len);
+ dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len);
+ } else {
+#endif
+ dst_buf1[0] = tmp_buf1;
+ dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN;
+ dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN;
+ dst_buf2[0] = tmp_buf2;
+ dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN;
+ dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN;
+ dst_buf3[0] = tmp_buf3;
+ dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN;
+ dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN;
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ return;
+
+ xd->mi = cm->mi_grid_visible + offset;
+ xd->mi[0] = cm->mi + offset;
+ mbmi = &xd->mi[0]->mbmi;
+ partition = partition_lookup[bsl][mbmi->sb_type];
+ subsize = get_subsize(bsize, partition);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ }
+
+ switch (partition) {
+ case PARTITION_NONE:
+ assert(bsize < top_bsize);
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, bsize, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+ break;
+ case PARTITION_HORZ:
+ if (bsize == BLOCK_8X8) {
+ // For sub8x8, predict in 8x8 unit
+ // First half
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, BLOCK_8X8, 1, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+
+ // Second half
+ dec_predict_b_extend(pbi, xd, tile, 2, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ top_bsize, BLOCK_8X8, 1, 1);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 2, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+
+ // weighted average to smooth the boundary
+ xd->plane[0].dst.buf = dst_buf[0];
+ xd->plane[0].dst.stride = dst_stride[0];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[0], dst_stride[0],
+ dst_buf1[0], dst_stride1[0],
+ &xd->plane[0],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, 0);
+ } else {
+ // First half
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 0);
+
+ if (mi_row + hbs < cm->mi_rows) {
+ // Second half
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col,
+ mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1,
+ top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize,
+ mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize,
+ mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1, 1);
+
+ // weighted average to smooth the boundary
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
+ &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top,
+ bsize, top_bsize, PARTITION_HORZ, i);
+ }
+ }
+ }
+ break;
+ case PARTITION_VERT:
+ if (bsize == BLOCK_8X8) {
+ // First half
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, BLOCK_8X8, 1, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+
+ // Second half
+ dec_predict_b_extend(pbi, xd, tile, 1, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ top_bsize, BLOCK_8X8, 1, 1);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 1, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+
+ // Smooth
+ xd->plane[0].dst.buf = dst_buf[0];
+ xd->plane[0].dst.stride = dst_stride[0];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[0], dst_stride[0],
+ dst_buf1[0], dst_stride1[0],
+ &xd->plane[0],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, 0);
+ } else {
+ // First half
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 3);
+
+ // Second half
+ if (mi_col + hbs < cm->mi_cols) {
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col + hbs,
+ mi_row, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1, top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize,
+ mi_row, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize,
+ mi_row, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1, 2);
+
+ // Smooth
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
+ &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top,
+ bsize, top_bsize, PARTITION_VERT, i);
+ }
+ }
+ }
+ break;
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8) {
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, BLOCK_8X8, 1, 0);
+ dec_predict_b_extend(pbi, xd, tile, 1, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ top_bsize, BLOCK_8X8, 1, 1);
+ dec_predict_b_extend(pbi, xd, tile, 2, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf2, dst_stride2,
+ top_bsize, BLOCK_8X8, 1, 1);
+ dec_predict_b_extend(pbi, xd, tile, 3, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf3, dst_stride3,
+ top_bsize, BLOCK_8X8, 1, 1);
+ if (bsize < top_bsize) {
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+ dec_extend_all(pbi, xd, tile, 1, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+ dec_extend_all(pbi, xd, tile, 2, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf2, dst_stride2);
+ dec_extend_all(pbi, xd, tile, 3, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf3, dst_stride3);
+ }
+ } else {
+ dec_predict_sb_complex(pbi, xd, tile, mi_row, mi_col,
+ mi_row_top, mi_col_top, subsize, top_bsize,
+ dst_buf, dst_stride);
+ if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols)
+ dec_predict_sb_complex(pbi, xd, tile, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, subsize, top_bsize,
+ dst_buf1, dst_stride1);
+ if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols)
+ dec_predict_sb_complex(pbi, xd, tile, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, subsize, top_bsize,
+ dst_buf2, dst_stride2);
+ if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols)
+ dec_predict_sb_complex(pbi, xd, tile, mi_row + hbs, mi_col + hbs,
+ mi_row_top, mi_col_top, subsize, top_bsize,
+ dst_buf3, dst_stride3);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ if (bsize == BLOCK_8X8 && i != 0)
+ continue; // Skip <4x4 chroma smoothing
+ if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i], dst_stride[i],
+ dst_buf1[i],
+ dst_stride1[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, i);
+ if (mi_row + hbs < cm->mi_rows) {
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf2[i],
+ dst_stride2[i],
+ dst_buf3[i],
+ dst_stride3[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, i);
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i],
+ dst_stride[i],
+ dst_buf2[i],
+ dst_stride2[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) {
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i],
+ dst_stride[i],
+ dst_buf2[i],
+ dst_stride2[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+#endif // CONFIG_SUPERTX
+
static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif // CONFIG_SUPERTX
int mi_row, int mi_col,
- vpx_reader *r, BLOCK_SIZE bsize,
+ vpx_reader *r,
+#if CONFIG_ANS
+ struct AnsDecoder *const tok,
+#endif // CONFIG_ANS
+ BLOCK_SIZE bsize,
int bwl, int bhl) {
VP10_COMMON *const cm = &pbi->common;
const int less8x8 = bsize < BLOCK_8X8;
@@ -818,8 +1575,22 @@
const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
+#if CONFIG_SUPERTX
+ MB_MODE_INFO *mbmi;
+ if (supertx_enabled) {
+ mbmi = set_mb_offsets(cm, xd, bsize, mi_row, mi_col,
+ bw, bh, x_mis, y_mis);
+ } else {
+ mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col,
+ bw, bh, x_mis, y_mis, bwl, bhl);
+ }
+ vp10_read_mode_info(pbi, xd, supertx_enabled,
+ mi_row, mi_col, r, x_mis, y_mis);
+#else
MB_MODE_INFO *mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col,
bw, bh, x_mis, y_mis, bwl, bhl);
+ vp10_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis);
+#endif // CONFIG_SUPERTX
if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) {
const BLOCK_SIZE uv_subsize =
@@ -829,70 +1600,104 @@
VPX_CODEC_CORRUPT_FRAME, "Invalid block size.");
}
- vp10_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis);
-
- if (mbmi->skip) {
- dec_reset_skip_context(xd);
- }
-
- if (!is_inter_block(mbmi)) {
- int plane;
- for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const TX_SIZE tx_size =
- plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
- : mbmi->tx_size;
- const int num_4x4_w = pd->n4_w;
- const int num_4x4_h = pd->n4_h;
- const int step = (1 << tx_size);
- int row, col;
- const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ?
- 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
- const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ?
- 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
-
- for (row = 0; row < max_blocks_high; row += step)
- for (col = 0; col < max_blocks_wide; col += step)
- predict_and_reconstruct_intra_block(xd, r, mbmi, plane,
- row, col, tx_size);
+#if CONFIG_SUPERTX
+ if (!supertx_enabled) {
+#endif
+ if (mbmi->skip) {
+ dec_reset_skip_context(xd);
}
- } else {
- // Prediction
- dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col);
-
- // Reconstruction
- if (!mbmi->skip) {
- int eobtotal = 0;
+ if (!is_inter_block(mbmi)) {
int plane;
-
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const TX_SIZE tx_size =
plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
- : mbmi->tx_size;
+ : mbmi->tx_size;
const int num_4x4_w = pd->n4_w;
const int num_4x4_h = pd->n4_h;
const int step = (1 << tx_size);
int row, col;
- const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ?
- 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
- const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ?
- 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+ const int max_blocks_wide = num_4x4_w +
+ (xd->mb_to_right_edge >= 0 ?
+ 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ const int max_blocks_high = num_4x4_h +
+ (xd->mb_to_bottom_edge >= 0 ?
+ 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
for (row = 0; row < max_blocks_high; row += step)
for (col = 0; col < max_blocks_wide; col += step)
- eobtotal += reconstruct_inter_block(xd, r, mbmi, plane, row, col,
- tx_size);
- }
-
- if (!less8x8 && eobtotal == 0)
-#if CONFIG_MISC_FIXES
- mbmi->has_no_coeffs = 1; // skip loopfilter
+ predict_and_reconstruct_intra_block(xd,
+#if CONFIG_ANS
+ pbi->token_tab, tok,
#else
- mbmi->skip = 1; // skip loopfilter
+ r,
#endif
+ mbmi, plane,
+ row, col, tx_size);
+ }
+ } else {
+ // Prediction
+ dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col);
+
+ // Reconstruction
+ if (!mbmi->skip) {
+ int eobtotal = 0;
+ int plane;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int num_4x4_w = pd->n4_w;
+ const int num_4x4_h = pd->n4_h;
+ int row, col;
+#if CONFIG_VAR_TX
+ // TODO(jingning): This can be simplified for decoder performance.
+ const BLOCK_SIZE plane_bsize =
+ get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), pd);
+ const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bw = num_4x4_blocks_wide_lookup[txb_size];
+ int block = 0;
+ const int step = 1 << (max_tx_size << 1);
+
+ for (row = 0; row < num_4x4_h; row += bw) {
+ for (col = 0; col < num_4x4_w; col += bw) {
+ decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize,
+ block, row, col, max_tx_size, &eobtotal);
+ block += step;
+ }
+ }
+#else
+ const TX_SIZE tx_size =
+ plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
+ : mbmi->tx_size;
+ const int step = (1 << tx_size);
+ const int max_blocks_wide = num_4x4_w +
+ (xd->mb_to_right_edge >= 0 ?
+ 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ const int max_blocks_high = num_4x4_h +
+ (xd->mb_to_bottom_edge >= 0 ?
+ 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+
+ for (row = 0; row < max_blocks_high; row += step)
+ for (col = 0; col < max_blocks_wide; col += step)
+ eobtotal += reconstruct_inter_block(xd,
+#if CONFIG_ANS
+ pbi->token_tab, tok,
+#else
+ r,
+#endif
+ mbmi, plane, row, col,
+ tx_size);
+#endif
+ }
+
+ if (!less8x8 && eobtotal == 0)
+ mbmi->has_no_coeffs = 1; // skip loopfilter
+ }
}
+#if CONFIG_SUPERTX
}
+#endif // CONFIG_SUPERTX
xd->corrupted |= vpx_reader_has_error(r);
}
@@ -946,10 +1751,29 @@
return p;
}
+#if CONFIG_SUPERTX
+static int read_skip_without_seg(VP10_COMMON *cm, const MACROBLOCKD *xd,
+ vpx_reader *r) {
+ const int ctx = vp10_get_skip_context(xd);
+ const int skip = vpx_read(r, cm->fc->skip_probs[ctx]);
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts)
+ ++counts->skip[ctx][skip];
+ return skip;
+}
+#endif // CONFIG_SUPERTX
+
// TODO(slavarnway): eliminate bsize and subsize in future commits
static void decode_partition(VP10Decoder *const pbi, MACROBLOCKD *const xd,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif
int mi_row, int mi_col,
- vpx_reader* r, BLOCK_SIZE bsize, int n4x4_l2) {
+ vpx_reader* r,
+#if CONFIG_ANS
+ struct AnsDecoder *const tok,
+#endif // CONFIG_ANS
+ BLOCK_SIZE bsize, int n4x4_l2) {
VP10_COMMON *const cm = &pbi->common;
const int n8x8_l2 = n4x4_l2 - 1;
const int num_8x8_wh = 1 << n8x8_l2;
@@ -958,6 +1782,13 @@
BLOCK_SIZE subsize;
const int has_rows = (mi_row + hbs) < cm->mi_rows;
const int has_cols = (mi_col + hbs) < cm->mi_cols;
+#if CONFIG_SUPERTX
+ const int read_token = !supertx_enabled;
+ int skip = 0;
+ TX_SIZE supertx_size = b_width_log2_lookup[bsize];
+ const TileInfo *const tile = &xd->tile;
+ int txfm = DCT_DCT;
+#endif // CONFIG_SUPERTX
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
@@ -965,53 +1796,232 @@
partition = read_partition(cm, xd, mi_row, mi_col, r, has_rows, has_cols,
n8x8_l2);
subsize = subsize_lookup[partition][bsize]; // get_subsize(bsize, partition);
+#if CONFIG_SUPERTX
+ if (!frame_is_intra_only(cm) &&
+ partition != PARTITION_NONE &&
+ bsize <= MAX_SUPERTX_BLOCK_SIZE &&
+ !supertx_enabled &&
+ !xd->lossless[0]) {
+ const int supertx_context =
+ partition_supertx_context_lookup[partition];
+ supertx_enabled = vpx_read(
+ r, cm->fc->supertx_prob[supertx_context][supertx_size]);
+ if (xd->counts)
+ xd->counts->supertx[supertx_context][supertx_size][supertx_enabled]++;
+ }
+ if (supertx_enabled && read_token) {
+ int offset = mi_row * cm->mi_stride + mi_col;
+ xd->mi = cm->mi_grid_visible + offset;
+ xd->mi[0] = cm->mi + offset;
+ set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[bsize],
+ mi_col, num_8x8_blocks_wide_lookup[bsize],
+ cm->mi_rows, cm->mi_cols);
+ set_skip_context(xd, mi_row, mi_col);
+ // Here skip is read without using any segment level feature
+ skip = read_skip_without_seg(cm, xd, r);
+ if (skip) {
+ reset_skip_context(xd, bsize);
+ } else {
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(supertx_size, bsize, 1) > 1) {
+ int eset = get_ext_tx_set(supertx_size, bsize, 1);
+ if (eset > 0) {
+ txfm = vpx_read_tree(r, vp10_ext_tx_inter_tree[eset],
+ cm->fc->inter_ext_tx_prob[eset][supertx_size]);
+ if (xd->counts)
+ ++xd->counts->inter_ext_tx[eset][supertx_size][txfm];
+ }
+ }
+#else
+ if (supertx_size < TX_32X32) {
+ txfm = vpx_read_tree(r, vp10_ext_tx_tree,
+ cm->fc->inter_ext_tx_prob[supertx_size]);
+ if (xd->counts)
+ ++xd->counts->inter_ext_tx[supertx_size][txfm];
+ }
+#endif // CONFIG_EXT_TX
+ }
+#if CONFIG_VAR_TX
+ xd->supertx_size = supertx_size;
+#endif
+ }
+#endif // CONFIG_SUPERTX
if (!hbs) {
// calculate bmode block dimensions (log 2)
xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT);
xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ);
- decode_block(pbi, xd, mi_row, mi_col, r, subsize, 1, 1);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ subsize, 1, 1);
} else {
switch (partition) {
case PARTITION_NONE:
- decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n4x4_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ subsize, n4x4_l2, n4x4_l2);
break;
case PARTITION_HORZ:
- decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ subsize, n4x4_l2, n8x8_l2);
if (has_rows)
- decode_block(pbi, xd, mi_row + hbs, mi_col, r, subsize, n4x4_l2,
- n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row + hbs, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ subsize, n4x4_l2, n8x8_l2);
break;
case PARTITION_VERT:
- decode_block(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2, n4x4_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ subsize, n8x8_l2, n4x4_l2);
if (has_cols)
- decode_block(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2,
- n4x4_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col + hbs, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ subsize, n8x8_l2, n4x4_l2);
break;
case PARTITION_SPLIT:
- decode_partition(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2);
- decode_partition(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2);
- decode_partition(pbi, xd, mi_row + hbs, mi_col, r, subsize, n8x8_l2);
- decode_partition(pbi, xd, mi_row + hbs, mi_col + hbs, r, subsize,
- n8x8_l2);
+ decode_partition(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ subsize, n8x8_l2);
+ decode_partition(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col + hbs, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ subsize, n8x8_l2);
+ decode_partition(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row + hbs, mi_col, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ subsize, n8x8_l2);
+ decode_partition(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row + hbs, mi_col + hbs, r,
+#if CONFIG_ANS
+ tok,
+#endif // CONFIG_ANS
+ subsize, n8x8_l2);
break;
default:
assert(0 && "Invalid partition type");
}
}
+#if CONFIG_SUPERTX
+ if (supertx_enabled && read_token) {
+ uint8_t *dst_buf[3];
+ int dst_stride[3], i;
+
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ dst_buf[i] = xd->plane[i].dst.buf;
+ dst_stride[i] = xd->plane[i].dst.stride;
+ }
+ dec_predict_sb_complex(pbi, xd, tile, mi_row, mi_col, mi_row, mi_col,
+ bsize, bsize, dst_buf, dst_stride);
+
+ if (!skip) {
+ int eobtotal = 0;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ set_offsets_topblock(cm, xd, tile, bsize, mi_row, mi_col);
+ xd->mi[0]->mbmi.tx_type = txfm;
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ const struct macroblockd_plane *const pd = &xd->plane[i];
+ const int num_4x4_w = pd->n4_w;
+ const int num_4x4_h = pd->n4_h;
+ int row, col;
+ const TX_SIZE tx_size =
+ i ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
+ : mbmi->tx_size;
+ const int step = (1 << tx_size);
+ const int max_blocks_wide = num_4x4_w +
+ (xd->mb_to_right_edge >= 0 ?
+ 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ const int max_blocks_high = num_4x4_h +
+ (xd->mb_to_bottom_edge >= 0 ?
+ 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+
+ for (row = 0; row < max_blocks_high; row += step)
+ for (col = 0; col < max_blocks_wide; col += step)
+ eobtotal += reconstruct_inter_block(xd,
+#if CONFIG_ANS
+ pbi->token_tab, tok,
+#else
+ r,
+#endif
+ mbmi, i, row, col,
+ tx_size);
+ }
+ if (!(subsize < BLOCK_8X8) && eobtotal == 0)
+ skip = 1;
+ }
+ set_param_topblock(cm, xd, bsize, mi_row, mi_col, txfm, skip);
+ }
+#endif // CONFIG_SUPERTX
+
// update partition context
if (bsize >= BLOCK_8X8 &&
(bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh);
}
-static void setup_token_decoder(const uint8_t *data,
- const uint8_t *data_end,
- size_t read_size,
- struct vpx_internal_error_info *error_info,
- vpx_reader *r,
- vpx_decrypt_cb decrypt_cb,
- void *decrypt_state) {
+static void setup_bool_decoder(const uint8_t *data,
+ const uint8_t *data_end,
+ const size_t read_size,
+ struct vpx_internal_error_info *error_info,
+ vpx_reader *r,
+ vpx_decrypt_cb decrypt_cb,
+ void *decrypt_state) {
// Validate the calculated partition length. If the buffer
// described by the partition can't be fully read, then restrict
// it to the portion that can be (for EC mode) or throw an error.
@@ -1023,6 +2033,28 @@
vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder %d", 1);
}
+#if CONFIG_ANS
+static void setup_token_decoder(const uint8_t *data,
+ const uint8_t *data_end,
+ const size_t read_size,
+ struct vpx_internal_error_info *error_info,
+ struct AnsDecoder *const ans,
+ vpx_decrypt_cb decrypt_cb,
+ void *decrypt_state) {
+ (void) decrypt_cb;
+ (void) decrypt_state;
+ // Validate the calculated partition length. If the buffer
+ // described by the partition can't be fully read, then restrict
+ // it to the portion that can be (for EC mode) or throw an error.
+ if (!read_is_valid(data, read_size, data_end))
+ vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt tile length");
+
+ if (read_size > INT_MAX || ans_read_init(ans, data, (int)read_size))
+ vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate token decoder %d", 1);
+}
+#endif
static void read_coef_probs_common(vp10_coeff_probs_model *coef_probs,
vpx_reader *r) {
@@ -1048,9 +2080,6 @@
static void setup_segmentation(VP10_COMMON *const cm,
struct vpx_read_bit_buffer *rb) {
struct segmentation *const seg = &cm->seg;
-#if !CONFIG_MISC_FIXES
- struct segmentation_probs *const segp = &cm->segp;
-#endif
int i, j;
seg->update_map = 0;
@@ -1067,26 +2096,11 @@
seg->update_map = vpx_rb_read_bit(rb);
}
if (seg->update_map) {
-#if !CONFIG_MISC_FIXES
- for (i = 0; i < SEG_TREE_PROBS; i++)
- segp->tree_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8)
- : MAX_PROB;
-#endif
if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
seg->temporal_update = 0;
} else {
seg->temporal_update = vpx_rb_read_bit(rb);
}
-#if !CONFIG_MISC_FIXES
- if (seg->temporal_update) {
- for (i = 0; i < PREDICTION_PROBS; i++)
- segp->pred_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8)
- : MAX_PROB;
- } else {
- for (i = 0; i < PREDICTION_PROBS; i++)
- segp->pred_probs[i] = MAX_PROB;
- }
-#endif
}
// Segmentation data update
@@ -1112,8 +2126,9 @@
}
}
-static void setup_loopfilter(struct loopfilter *lf,
+static void setup_loopfilter(VP10_COMMON *cm,
struct vpx_read_bit_buffer *rb) {
+ struct loopfilter *lf = &cm->lf;
lf->filter_level = vpx_rb_read_literal(rb, 6);
lf->sharpness_level = vpx_rb_read_literal(rb, 3);
@@ -1136,11 +2151,24 @@
lf->mode_deltas[i] = vpx_rb_read_inv_signed_literal(rb, 6);
}
}
+#if CONFIG_LOOP_RESTORATION
+ lf->restoration_level = vpx_rb_read_bit(rb);
+ if (lf->restoration_level) {
+ int level = vpx_rb_read_literal(rb, vp10_restoration_level_bits(cm));
+ lf->restoration_level = level + (level >= lf->last_restoration_level);
+ } else {
+ lf->restoration_level = lf->last_restoration_level;
+ }
+ if (cm->frame_type != KEY_FRAME)
+ cm->lf.last_restoration_level = cm->lf.restoration_level;
+ else
+ cm->lf.last_restoration_level = 0;
+#endif // CONFIG_LOOP_RESTORATION
}
static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) {
return vpx_rb_read_bit(rb) ?
- vpx_rb_read_inv_signed_literal(rb, CONFIG_MISC_FIXES ? 6 : 4) : 0;
+ vpx_rb_read_inv_signed_literal(rb, 6) : 0;
}
static void setup_quantization(VP10_COMMON *const cm,
@@ -1180,7 +2208,8 @@
}
static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) {
- return vpx_rb_read_bit(rb) ? SWITCHABLE : vpx_rb_read_literal(rb, 2);
+ return vpx_rb_read_bit(rb) ?
+ SWITCHABLE : vpx_rb_read_literal(rb, 2 + CONFIG_EXT_INTERP);
}
static void setup_render_size(VP10_COMMON *cm,
@@ -1283,10 +2312,8 @@
YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf;
width = buf->y_crop_width;
height = buf->y_crop_height;
-#if CONFIG_MISC_FIXES
cm->render_width = buf->render_width;
cm->render_height = buf->render_height;
-#endif
found = 1;
break;
}
@@ -1294,9 +2321,7 @@
if (!found) {
vp10_read_frame_size(rb, &width, &height);
-#if CONFIG_MISC_FIXES
setup_render_size(cm, rb);
-#endif
}
if (width <= 0 || height <= 0)
@@ -1328,9 +2353,6 @@
}
resize_context_buffers(cm, width, height);
-#if !CONFIG_MISC_FIXES
- setup_render_size(cm, rb);
-#endif
lock_buffer_pool(pool);
if (vpx_realloc_frame_buffer(
@@ -1377,14 +2399,10 @@
if (cm->log2_tile_rows)
cm->log2_tile_rows += vpx_rb_read_bit(rb);
-#if CONFIG_MISC_FIXES
// tile size magnitude
if (cm->log2_tile_rows > 0 || cm->log2_tile_cols > 0) {
cm->tile_sz_mag = vpx_rb_read_literal(rb, 2);
}
-#else
- cm->tile_sz_mag = 3;
-#endif
}
typedef struct TileBuffer {
@@ -1428,9 +2446,9 @@
if (decrypt_cb) {
uint8_t be_data[4];
decrypt_cb(decrypt_state, *data, be_data, tile_sz_mag + 1);
- size = mem_get_varsize(be_data, tile_sz_mag) + CONFIG_MISC_FIXES;
+ size = mem_get_varsize(be_data, tile_sz_mag) + 1;
} else {
- size = mem_get_varsize(*data, tile_sz_mag) + CONFIG_MISC_FIXES;
+ size = mem_get_varsize(*data, tile_sz_mag) + 1;
}
*data += tile_sz_mag + 1;
@@ -1508,6 +2526,11 @@
memset(cm->above_seg_context, 0,
sizeof(*cm->above_seg_context) * aligned_cols);
+#if CONFIG_VAR_TX
+ memset(cm->above_txfm_context, 0,
+ sizeof(*cm->above_txfm_context) * aligned_cols);
+#endif
+
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
if (pbi->tile_data == NULL ||
@@ -1524,6 +2547,7 @@
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
const TileBuffer *const buf = &tile_buffers[tile_row][tile_col];
+
tile_data = pbi->tile_data + tile_cols * tile_row + tile_col;
tile_data->cm = cm;
tile_data->xd = pbi->mb;
@@ -1533,9 +2557,21 @@
&cm->counts : NULL;
vp10_zero(tile_data->dqcoeff);
vp10_tile_init(&tile_data->xd.tile, tile_data->cm, tile_row, tile_col);
+#if !CONFIG_ANS
+ setup_bool_decoder(buf->data, data_end, buf->size, &cm->error,
+ &tile_data->bit_reader, pbi->decrypt_cb,
+ pbi->decrypt_state);
+#else
+ if (buf->size < 3 || !read_is_valid(buf->data, buf->size, data_end))
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt tile length");
+ setup_bool_decoder(buf->data, data_end, buf->size, &cm->error,
+ &tile_data->bit_reader, pbi->decrypt_cb,
+ pbi->decrypt_state);
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
- &tile_data->bit_reader, pbi->decrypt_cb,
+ &tile_data->token_ans, pbi->decrypt_cb,
pbi->decrypt_state);
+#endif
vp10_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff);
tile_data->xd.plane[0].color_index_map = tile_data->color_index_map[0];
tile_data->xd.plane[1].color_index_map = tile_data->color_index_map[1];
@@ -1554,16 +2590,27 @@
vp10_tile_set_col(&tile, tile_data->cm, col);
vp10_zero(tile_data->xd.left_context);
vp10_zero(tile_data->xd.left_seg_context);
+#if CONFIG_VAR_TX
+ vp10_zero(tile_data->xd.left_txfm_context_buffer);
+#endif
for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
mi_col += MI_BLOCK_SIZE) {
- decode_partition(pbi, &tile_data->xd, mi_row,
- mi_col, &tile_data->bit_reader, BLOCK_64X64, 4);
+ decode_partition(pbi, &tile_data->xd,
+#if CONFIG_SUPERTX
+ 0,
+#endif
+ mi_row, mi_col, &tile_data->bit_reader,
+#if CONFIG_ANS
+ &tile_data->token_ans,
+#endif // CONFIG_ANS
+ BLOCK_64X64, 4);
}
pbi->mb.corrupted |= tile_data->xd.corrupted;
if (pbi->mb.corrupted)
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Failed to decode tile data");
}
+#if !CONFIG_VAR_TX
// Loopfilter one row.
if (cm->lf.filter_level && !cm->skip_loop_filter) {
const int lf_start = mi_row - MI_BLOCK_SIZE;
@@ -1590,10 +2637,15 @@
if (cm->frame_parallel_decode)
vp10_frameworker_broadcast(pbi->cur_buf,
mi_row << MI_BLOCK_SIZE_LOG2);
+#endif
}
}
// Loopfilter remaining rows in the frame.
+#if CONFIG_VAR_TX
+ vp10_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
+ cm->lf.filter_level, 0, 0);
+#else
if (cm->lf.filter_level && !cm->skip_loop_filter) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
winterface->sync(&pbi->lf_worker);
@@ -1601,13 +2653,18 @@
lf_data->stop = cm->mi_rows;
winterface->execute(&pbi->lf_worker);
}
+#endif
// Get last tile data.
tile_data = pbi->tile_data + tile_cols * tile_rows - 1;
if (cm->frame_parallel_decode)
vp10_frameworker_broadcast(pbi->cur_buf, INT_MAX);
+#if CONFIG_ANS
+ return data_end;
+#else
return vpx_reader_find_end(&tile_data->bit_reader);
+#endif
}
static int tile_worker_hook(TileWorkerData *const tile_data,
@@ -1627,10 +2684,19 @@
mi_row += MI_BLOCK_SIZE) {
vp10_zero(tile_data->xd.left_context);
vp10_zero(tile_data->xd.left_seg_context);
+#if CONFIG_VAR_TX
+ vp10_zero(tile_data->xd.left_txfm_context_buffer);
+#endif
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
decode_partition(tile_data->pbi, &tile_data->xd,
+#if CONFIG_SUPERTX
+ 0,
+#endif
mi_row, mi_col, &tile_data->bit_reader,
+#if CONFIG_ANS
+ &tile_data->token_ans,
+#endif // CONFIG_ANS
BLOCK_64X64, 4);
}
}
@@ -1661,6 +2727,9 @@
assert(tile_cols <= (1 << 6));
assert(tile_rows == 1);
(void)tile_rows;
+#if CONFIG_ANS
+ abort(); // FIXME: Tile parsing broken
+#endif
// TODO(jzern): See if we can remove the restriction of passing in max
// threads to the decoder.
@@ -1704,7 +2773,10 @@
sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols);
memset(cm->above_seg_context, 0,
sizeof(*cm->above_seg_context) * aligned_mi_cols);
-
+#if CONFIG_VAR_TX
+ memset(cm->above_txfm_context, 0,
+ sizeof(*cm->above_txfm_context) * aligned_mi_cols);
+#endif
// Load tile data into tile_buffers
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
@@ -1757,9 +2829,9 @@
vp10_zero(tile_data->dqcoeff);
vp10_tile_init(tile, cm, 0, buf->col);
vp10_tile_init(&tile_data->xd.tile, cm, 0, buf->col);
- setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
- &tile_data->bit_reader, pbi->decrypt_cb,
- pbi->decrypt_state);
+ setup_bool_decoder(buf->data, data_end, buf->size, &cm->error,
+ &tile_data->bit_reader, pbi->decrypt_cb,
+ pbi->decrypt_state);
vp10_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff);
tile_data->xd.plane[0].color_index_map = tile_data->color_index_map[0];
tile_data->xd.plane[1].color_index_map = tile_data->color_index_map[1];
@@ -1865,6 +2937,10 @@
int i, mask, ref_index = 0;
size_t sz;
+#if CONFIG_EXT_REFS
+ cm->last3_frame_type = cm->last2_frame_type;
+ cm->last2_frame_type = cm->last_frame_type;
+#endif // CONFIG_EXT_REFS
cm->last_frame_type = cm->frame_type;
cm->last_intra_only = cm->intra_only;
@@ -1930,13 +3006,14 @@
memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
pbi->need_resync = 0;
}
+ if (frame_is_intra_only(cm))
+ cm->allow_screen_content_tools = vpx_rb_read_bit(rb);
} else {
cm->intra_only = cm->show_frame ? 0 : vpx_rb_read_bit(rb);
if (cm->error_resilient_mode) {
cm->reset_frame_context = RESET_FRAME_CONTEXT_ALL;
} else {
-#if CONFIG_MISC_FIXES
if (cm->intra_only) {
cm->reset_frame_context =
vpx_rb_read_bit(rb) ? RESET_FRAME_CONTEXT_ALL
@@ -1950,40 +3027,14 @@
vpx_rb_read_bit(rb) ? RESET_FRAME_CONTEXT_ALL
: RESET_FRAME_CONTEXT_CURRENT;
}
-#else
- static const RESET_FRAME_CONTEXT_MODE reset_frame_context_conv_tbl[4] = {
- RESET_FRAME_CONTEXT_NONE, RESET_FRAME_CONTEXT_NONE,
- RESET_FRAME_CONTEXT_CURRENT, RESET_FRAME_CONTEXT_ALL
- };
-
- cm->reset_frame_context =
- reset_frame_context_conv_tbl[vpx_rb_read_literal(rb, 2)];
-#endif
}
if (cm->intra_only) {
if (!vp10_read_sync_code(rb))
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid frame sync code");
-#if CONFIG_MISC_FIXES
+
read_bitdepth_colorspace_sampling(cm, rb);
-#else
- if (cm->profile > PROFILE_0) {
- read_bitdepth_colorspace_sampling(cm, rb);
- } else {
- // NOTE: The intra-only frame header does not include the specification
- // of either the color format or color sub-sampling in profile 0. VP9
- // specifies that the default color format should be YUV 4:2:0 in this
- // case (normative).
- cm->color_space = VPX_CS_BT_601;
- cm->color_range = 0;
- cm->subsampling_y = cm->subsampling_x = 1;
- cm->bit_depth = VPX_BITS_8;
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth = 0;
-#endif
- }
-#endif
pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES);
setup_frame_size(cm, rb);
@@ -2046,10 +3097,6 @@
cm->refresh_frame_context =
vpx_rb_read_bit(rb) ? REFRESH_FRAME_CONTEXT_FORWARD
: REFRESH_FRAME_CONTEXT_BACKWARD;
-#if !CONFIG_MISC_FIXES
- } else {
- vpx_rb_read_bit(rb); // parallel decoding mode flag
-#endif
}
} else {
cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_OFF;
@@ -2076,6 +3123,7 @@
for (; ref_index < REF_FRAMES; ++ref_index) {
cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index];
+
// Current thread holds the reference frame.
if (cm->ref_frame_map[ref_index] >= 0)
++frame_bufs[cm->ref_frame_map[ref_index]].ref_count;
@@ -2086,7 +3134,7 @@
if (frame_is_intra_only(cm) || cm->error_resilient_mode)
vp10_setup_past_independence(cm);
- setup_loopfilter(&cm->lf, rb);
+ setup_loopfilter(cm, rb);
setup_quantization(cm, rb);
#if CONFIG_VP9_HIGHBITDEPTH
xd->bd = (int)cm->bit_depth;
@@ -2097,9 +3145,8 @@
{
int i;
for (i = 0; i < MAX_SEGMENTS; ++i) {
- const int qindex = CONFIG_MISC_FIXES && cm->seg.enabled ?
- vp10_get_qindex(&cm->seg, i, cm->base_qindex) :
- cm->base_qindex;
+ const int qindex = cm->seg.enabled ?
+ vp10_get_qindex(&cm->seg, i, cm->base_qindex) : cm->base_qindex;
xd->lossless[i] = qindex == 0 &&
cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 &&
@@ -2108,11 +3155,9 @@
}
setup_segmentation_dequant(cm);
-#if CONFIG_MISC_FIXES
cm->tx_mode = (!cm->seg.enabled && xd->lossless[0]) ? ONLY_4X4
: read_tx_mode(rb);
cm->reference_mode = read_frame_reference_mode(cm, rb);
-#endif
setup_tile_info(cm, rb);
sz = vpx_rb_read_literal(rb, 16);
@@ -2124,6 +3169,34 @@
return sz;
}
+#if CONFIG_EXT_TX
+static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
+ int i, j, k;
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_inter_ext_tx_for_txsize[s][i]) continue;
+ for (j = 0; j < num_ext_tx_set_inter[s] - 1; ++j)
+ vp10_diff_update_prob(r, &fc->inter_ext_tx_prob[s][i][j]);
+ }
+ }
+ }
+
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_intra_ext_tx_for_txsize[s][i]) continue;
+ for (j = 0; j < INTRA_MODES; ++j)
+ for (k = 0; k < num_ext_tx_set_intra[s] - 1; ++k)
+ vp10_diff_update_prob(r, &fc->intra_ext_tx_prob[s][i][j][k]);
+ }
+ }
+ }
+}
+
+#else
+
static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
int i, j, k;
if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) {
@@ -2140,11 +3213,25 @@
}
}
}
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_SUPERTX
+static void read_supertx_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
+ int i, j;
+ if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
+ for (j = 1; j < TX_SIZES; ++j) {
+ vp10_diff_update_prob(r, &fc->supertx_prob[i][j]);
+ }
+ }
+ }
+}
+#endif // CONFIG_SUPERTX
static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data,
size_t partition_size) {
VP10_COMMON *const cm = &pbi->common;
-#if !CONFIG_MISC_FIXES
+#if CONFIG_SUPERTX
MACROBLOCKD *const xd = &pbi->mb;
#endif
FRAME_CONTEXT *const fc = cm->fc;
@@ -2156,17 +3243,18 @@
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder 0");
-#if !CONFIG_MISC_FIXES
- cm->tx_mode = xd->lossless[0] ? ONLY_4X4 : read_tx_mode(&r);
-#endif
if (cm->tx_mode == TX_MODE_SELECT)
read_tx_mode_probs(&fc->tx_probs, &r);
read_coef_probs(fc, cm->tx_mode, &r);
+#if CONFIG_VAR_TX
+ for (k = 0; k < TXFM_PARTITION_CONTEXTS; ++k)
+ vp10_diff_update_prob(&r, &fc->txfm_partition_prob[k]);
+#endif
+
for (k = 0; k < SKIP_CONTEXTS; ++k)
vp10_diff_update_prob(&r, &fc->skip_probs[k]);
-#if CONFIG_MISC_FIXES
if (cm->seg.enabled) {
if (cm->seg.temporal_update) {
for (k = 0; k < PREDICTION_PROBS; k++)
@@ -2183,30 +3271,34 @@
for (j = 0; j < PARTITION_CONTEXTS; ++j)
for (i = 0; i < PARTITION_TYPES - 1; ++i)
vp10_diff_update_prob(&r, &fc->partition_prob[j][i]);
-#endif
+
+#if CONFIG_EXT_INTRA
+ for (i = 0; i < INTRA_FILTERS + 1; ++i)
+ for (j = 0; j < INTRA_FILTERS - 1; ++j)
+ vp10_diff_update_prob(&r, &fc->intra_filter_probs[i][j]);
+#endif // CONFIG_EXT_INTRA
if (frame_is_intra_only(cm)) {
vp10_copy(cm->kf_y_prob, vp10_kf_y_mode_prob);
-#if CONFIG_MISC_FIXES
for (k = 0; k < INTRA_MODES; k++)
for (j = 0; j < INTRA_MODES; j++)
for (i = 0; i < INTRA_MODES - 1; ++i)
vp10_diff_update_prob(&r, &cm->kf_y_prob[k][j][i]);
-#endif
} else {
nmv_context *const nmvc = &fc->nmvc;
read_inter_mode_probs(fc, &r);
+#if CONFIG_EXT_INTER
+ read_inter_compound_mode_probs(fc, &r);
+#endif // CONFIG_EXT_INTER
+
if (cm->interp_filter == SWITCHABLE)
read_switchable_interp_probs(fc, &r);
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
vp10_diff_update_prob(&r, &fc->intra_inter_prob[i]);
-#if !CONFIG_MISC_FIXES
- cm->reference_mode = read_frame_reference_mode(cm, &r);
-#endif
if (cm->reference_mode != SINGLE_REFERENCE)
setup_compound_reference_mode(cm);
read_frame_reference_mode_probs(cm, &r);
@@ -2215,14 +3307,12 @@
for (i = 0; i < INTRA_MODES - 1; ++i)
vp10_diff_update_prob(&r, &fc->y_mode_prob[j][i]);
-#if !CONFIG_MISC_FIXES
- for (j = 0; j < PARTITION_CONTEXTS; ++j)
- for (i = 0; i < PARTITION_TYPES - 1; ++i)
- vp10_diff_update_prob(&r, &fc->partition_prob[j][i]);
-#endif
-
read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
read_ext_tx_probs(fc, &r);
+#if CONFIG_SUPERTX
+ if (!xd->lossless[0])
+ read_supertx_probs(fc, &r);
+#endif
}
return vpx_reader_has_error(&r);
@@ -2252,6 +3342,11 @@
sizeof(cm->counts.switchable_interp)));
assert(!memcmp(cm->counts.inter_mode, zero_counts.inter_mode,
sizeof(cm->counts.inter_mode)));
+#if CONFIG_EXT_INTER
+ assert(!memcmp(cm->counts.inter_compound_mode,
+ zero_counts.inter_compound_mode,
+ sizeof(cm->counts.inter_compound_mode)));
+#endif // CONFIG_EXT_INTER
assert(!memcmp(cm->counts.intra_inter, zero_counts.intra_inter,
sizeof(cm->counts.intra_inter)));
assert(!memcmp(cm->counts.comp_inter, zero_counts.comp_inter,
@@ -2263,10 +3358,10 @@
assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
- assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx,
- sizeof(cm->counts.intra_ext_tx)));
assert(!memcmp(cm->counts.inter_ext_tx, zero_counts.inter_ext_tx,
sizeof(cm->counts.inter_ext_tx)));
+ assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx,
+ sizeof(cm->counts.intra_ext_tx)));
}
#endif // NDEBUG
@@ -2403,18 +3498,20 @@
} else {
*p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
}
+#if CONFIG_LOOP_RESTORATION
+ vp10_loop_restoration_init(&cm->rst_info, cm->lf.restoration_level,
+ cm->frame_type == KEY_FRAME);
+ if (cm->rst_info.restoration_used) {
+ vp10_loop_restoration_rows(new_fb, cm, 0, cm->mi_rows, 0);
+ }
+#endif // CONFIG_LOOP_RESTORATION
if (!xd->corrupted) {
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
vp10_adapt_coef_probs(cm);
-#if CONFIG_MISC_FIXES
vp10_adapt_intra_frame_probs(cm);
-#endif
if (!frame_is_intra_only(cm)) {
-#if !CONFIG_MISC_FIXES
- vp10_adapt_intra_frame_probs(cm);
-#endif
vp10_adapt_inter_frame_probs(cm);
vp10_adapt_mv_probs(cm, cm->allow_high_precision_mv);
}
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index 01b796c..78ddf1a 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -24,6 +24,19 @@
#include "vpx_dsp/vpx_dsp_common.h"
+static INLINE int read_uniform(vpx_reader *r, int n) {
+ int l = get_unsigned_bits(n);
+ int m = (1 << l) - n;
+ int v = vpx_read_literal(r, l-1);
+
+ assert(l != 0);
+
+ if (v < m)
+ return v;
+ else
+ return (v << 1) - m + vpx_read_literal(r, 1);
+}
+
static PREDICTION_MODE read_intra_mode(vpx_reader *r, const vpx_prob *p) {
return (PREDICTION_MODE)vpx_read_tree(r, vp10_intra_mode_tree, p);
}
@@ -50,7 +63,80 @@
}
static PREDICTION_MODE read_inter_mode(VP10_COMMON *cm, MACROBLOCKD *xd,
- vpx_reader *r, int ctx) {
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ MB_MODE_INFO *mbmi,
+#endif
+ vpx_reader *r, int16_t ctx) {
+#if CONFIG_REF_MV
+ FRAME_COUNTS *counts = xd->counts;
+ int16_t mode_ctx = ctx & NEWMV_CTX_MASK;
+ vpx_prob mode_prob = cm->fc->newmv_prob[mode_ctx];
+
+ if (vpx_read(r, mode_prob) == 0) {
+ if (counts)
+ ++counts->newmv_mode[mode_ctx][0];
+
+#if CONFIG_EXT_INTER
+ if (has_second_ref(mbmi)) {
+#endif // CONFIG_EXT_INTER
+ return NEWMV;
+#if CONFIG_EXT_INTER
+ } else {
+ mode_prob = cm->fc->new2mv_prob;
+ if (vpx_read(r, mode_prob) == 0) {
+ if (counts)
+ ++counts->new2mv_mode[0];
+ return NEWMV;
+ } else {
+ if (counts)
+ ++counts->new2mv_mode[1];
+ return NEWFROMNEARMV;
+ }
+ }
+#endif // CONFIG_EXT_INTER
+ }
+ if (counts)
+ ++counts->newmv_mode[mode_ctx][1];
+
+ if (ctx & (1 << ALL_ZERO_FLAG_OFFSET))
+ return ZEROMV;
+
+ mode_ctx = (ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
+
+ mode_prob = cm->fc->zeromv_prob[mode_ctx];
+ if (vpx_read(r, mode_prob) == 0) {
+ if (counts)
+ ++counts->zeromv_mode[mode_ctx][0];
+ return ZEROMV;
+ }
+ if (counts)
+ ++counts->zeromv_mode[mode_ctx][1];
+
+ mode_ctx = (ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
+
+ if (ctx & (1 << SKIP_NEARESTMV_OFFSET))
+ mode_ctx = 6;
+ if (ctx & (1 << SKIP_NEARMV_OFFSET))
+ mode_ctx = 7;
+ if (ctx & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET))
+ mode_ctx = 8;
+
+ mode_prob = cm->fc->refmv_prob[mode_ctx];
+
+ if (vpx_read(r, mode_prob) == 0) {
+ if (counts)
+ ++counts->refmv_mode[mode_ctx][0];
+
+ return NEARESTMV;
+ } else {
+ if (counts)
+ ++counts->refmv_mode[mode_ctx][1];
+ return NEARMV;
+ }
+
+ // Invalid prediction mode.
+ assert(0);
+#else
const int mode = vpx_read_tree(r, vp10_inter_mode_tree,
cm->fc->inter_mode_probs[ctx]);
FRAME_COUNTS *counts = xd->counts;
@@ -58,13 +144,131 @@
++counts->inter_mode[ctx][mode];
return NEARESTMV + mode;
+#endif
}
+#if CONFIG_REF_MV
+static void read_drl_idx(const VP10_COMMON *cm,
+ MACROBLOCKD *xd,
+ MB_MODE_INFO *mbmi,
+ vpx_reader *r) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+ mbmi->ref_mv_idx = 0;
+
+ if (xd->ref_mv_count[ref_frame_type] > 2) {
+ uint8_t drl0_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], 0);
+ vpx_prob drl0_prob = cm->fc->drl_prob0[drl0_ctx];
+ if (vpx_read(r, drl0_prob)) {
+ mbmi->ref_mv_idx = 1;
+ if (xd->counts)
+ ++xd->counts->drl_mode0[drl0_ctx][1];
+ if (xd->ref_mv_count[ref_frame_type] > 3) {
+ uint8_t drl1_ctx =
+ vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], 1);
+ vpx_prob drl1_prob = cm->fc->drl_prob1[drl1_ctx];
+ if (vpx_read(r, drl1_prob)) {
+ mbmi->ref_mv_idx = 2;
+ if (xd->counts)
+ ++xd->counts->drl_mode1[drl1_ctx][1];
+
+ return;
+ }
+
+ if (xd->counts)
+ ++xd->counts->drl_mode1[drl1_ctx][0];
+ }
+ return;
+ }
+
+ if (xd->counts)
+ ++xd->counts->drl_mode0[drl0_ctx][0];
+ }
+}
+#endif
+
+#if CONFIG_EXT_INTER
+static PREDICTION_MODE read_inter_compound_mode(VP10_COMMON *cm,
+ MACROBLOCKD *xd,
+ vpx_reader *r, int16_t ctx) {
+ const int mode = vpx_read_tree(r, vp10_inter_compound_mode_tree,
+ cm->fc->inter_compound_mode_probs[ctx]);
+ FRAME_COUNTS *counts = xd->counts;
+
+ if (counts)
+ ++counts->inter_compound_mode[ctx][mode];
+
+ assert(is_inter_compound_mode(NEAREST_NEARESTMV + mode));
+ return NEAREST_NEARESTMV + mode;
+}
+#endif // CONFIG_EXT_INTER
+
static int read_segment_id(vpx_reader *r,
const struct segmentation_probs *segp) {
return vpx_read_tree(r, vp10_segment_tree, segp->tree_probs);
}
+#if CONFIG_VAR_TX
+static void read_tx_size_inter(VP10_COMMON *cm, MACROBLOCKD *xd,
+ MB_MODE_INFO *mbmi, FRAME_COUNTS *counts,
+ TX_SIZE tx_size, int blk_row, int blk_col,
+ vpx_reader *r) {
+ int is_split = 0;
+ const int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+ int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1),
+ tx_size);
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> 5;
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> 5;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ is_split = vpx_read(r, cm->fc->txfm_partition_prob[ctx]);
+
+ if (is_split) {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ if (counts)
+ ++counts->txfm_partition[ctx][1];
+
+ if (tx_size == TX_8X8) {
+ mbmi->inter_tx_size[tx_idx] = TX_4X4;
+ mbmi->tx_size = mbmi->inter_tx_size[tx_idx];
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+ return;
+ }
+
+ assert(bsl > 0);
+ --bsl;
+ for (i = 0; i < 4; ++i) {
+ int offsetr = blk_row + ((i >> 1) << bsl);
+ int offsetc = blk_col + ((i & 0x01) << bsl);
+ read_tx_size_inter(cm, xd, mbmi, counts,
+ tx_size - 1, offsetr, offsetc, r);
+ }
+ } else {
+ int idx, idy;
+ mbmi->inter_tx_size[tx_idx] = tx_size;
+ for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
+ for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
+ mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size;
+ mbmi->tx_size = mbmi->inter_tx_size[tx_idx];
+ if (counts)
+ ++counts->txfm_partition[ctx][0];
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), tx_size);
+ }
+}
+#endif
+
static TX_SIZE read_selected_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd,
TX_SIZE max_tx_size, vpx_reader *r) {
FRAME_COUNTS *counts = xd->counts;
@@ -123,28 +327,18 @@
int mi_offset, int x_mis, int y_mis,
vpx_reader *r) {
struct segmentation *const seg = &cm->seg;
-#if CONFIG_MISC_FIXES
FRAME_COUNTS *counts = xd->counts;
struct segmentation_probs *const segp = &cm->fc->seg;
-#else
- struct segmentation_probs *const segp = &cm->segp;
-#endif
int segment_id;
-#if !CONFIG_MISC_FIXES
- (void) xd;
-#endif
-
if (!seg->enabled)
return 0; // Default for disabled segmentation
assert(seg->update_map && !seg->temporal_update);
segment_id = read_segment_id(r, segp);
-#if CONFIG_MISC_FIXES
if (counts)
++counts->seg.tree_total[segment_id];
-#endif
set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
return segment_id;
}
@@ -164,12 +358,8 @@
static int read_inter_segment_id(VP10_COMMON *const cm, MACROBLOCKD *const xd,
int mi_row, int mi_col, vpx_reader *r) {
struct segmentation *const seg = &cm->seg;
-#if CONFIG_MISC_FIXES
FRAME_COUNTS *counts = xd->counts;
struct segmentation_probs *const segp = &cm->fc->seg;
-#else
- struct segmentation_probs *const segp = &cm->segp;
-#endif
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int predicted_segment_id, segment_id;
const int mi_offset = mi_row * cm->mi_cols + mi_col;
@@ -197,25 +387,19 @@
const int ctx = vp10_get_pred_context_seg_id(xd);
const vpx_prob pred_prob = segp->pred_probs[ctx];
mbmi->seg_id_predicted = vpx_read(r, pred_prob);
-#if CONFIG_MISC_FIXES
if (counts)
++counts->seg.pred[ctx][mbmi->seg_id_predicted];
-#endif
if (mbmi->seg_id_predicted) {
segment_id = predicted_segment_id;
} else {
segment_id = read_segment_id(r, segp);
-#if CONFIG_MISC_FIXES
if (counts)
++counts->seg.tree_mispred[segment_id];
-#endif
}
} else {
segment_id = read_segment_id(r, segp);
-#if CONFIG_MISC_FIXES
if (counts)
++counts->seg.tree_total[segment_id];
-#endif
}
set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
return segment_id;
@@ -235,6 +419,71 @@
}
}
+static void read_palette_mode_info(VP10_COMMON *const cm,
+ MACROBLOCKD *const xd,
+ vpx_reader *r) {
+ MODE_INFO *const mi = xd->mi[0];
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const MODE_INFO *above_mi = xd->above_mi;
+ const MODE_INFO *left_mi = xd->left_mi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ int i, palette_ctx = 0;
+
+ if (above_mi)
+ palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ if (left_mi)
+ palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ if (vpx_read(r, vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
+ [palette_ctx])) {
+ int n;
+ PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info;
+
+ pmi->palette_size[0] =
+ vpx_read_tree(r, vp10_palette_size_tree,
+ vp10_default_palette_y_size_prob[bsize - BLOCK_8X8]) + 2;
+ n = pmi->palette_size[0];
+
+ for (i = 0; i < n; ++i)
+ pmi->palette_colors[i] = vpx_read_literal(r, cm->bit_depth);
+
+ xd->plane[0].color_index_map[0] = read_uniform(r, n);
+ assert(xd->plane[0].color_index_map[0] < n);
+ }
+}
+
+#if CONFIG_EXT_INTRA
+static void read_ext_intra_mode_info(VP10_COMMON *const cm,
+ MACROBLOCKD *const xd, vpx_reader *r) {
+ MODE_INFO *const mi = xd->mi[0];
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ FRAME_COUNTS *counts = xd->counts;
+
+#if !ALLOW_FILTER_INTRA_MODES
+ return;
+#endif
+ if (mbmi->mode == DC_PRED) {
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] =
+ vpx_read(r, cm->fc->ext_intra_probs[0]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[0] =
+ read_uniform(r, FILTER_INTRA_MODES);
+ }
+ if (counts)
+ ++counts->ext_intra[0][mbmi->ext_intra_mode_info.use_ext_intra_mode[0]];
+ }
+ if (mbmi->uv_mode == DC_PRED) {
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ vpx_read(r, cm->fc->ext_intra_probs[1]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ read_uniform(r, FILTER_INTRA_MODES);
+ }
+ if (counts)
+ ++counts->ext_intra[1][mbmi->ext_intra_mode_info.use_ext_intra_mode[1]];
+ }
+}
+#endif // CONFIG_EXT_INTRA
+
static void read_intra_frame_mode_info(VP10_COMMON *const cm,
MACROBLOCKD *const xd,
int mi_row, int mi_col, vpx_reader *r) {
@@ -280,10 +529,60 @@
default:
mbmi->mode = read_intra_mode(r,
get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
+#if CONFIG_EXT_INTRA
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) {
+ int p_angle;
+ const int ctx = vp10_get_pred_context_intra_interp(xd);
+ mbmi->angle_delta[0] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+ p_angle = mode_to_angle_map[mbmi->mode] +
+ mbmi->angle_delta[0] * ANGLE_STEP;
+ if (pick_intra_filter(p_angle)) {
+ FRAME_COUNTS *counts = xd->counts;
+ mbmi->intra_filter = vpx_read_tree(r, vp10_intra_filter_tree,
+ cm->fc->intra_filter_probs[ctx]);
+ if (counts)
+ ++counts->intra_filter[ctx][mbmi->intra_filter];
+ } else {
+ mbmi->intra_filter = INTRA_FILTER_LINEAR;
+ }
+ }
+#endif // CONFIG_EXT_INTRA
}
mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
+#if CONFIG_EXT_INTRA
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED &&
+ bsize >= BLOCK_8X8)
+ mbmi->angle_delta[1] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+#endif
+ mbmi->palette_mode_info.palette_size[0] = 0;
+ mbmi->palette_mode_info.palette_size[1] = 0;
+ if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools &&
+ mbmi->mode == DC_PRED)
+ read_palette_mode_info(cm, xd, r);
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, mbmi->sb_type, 0) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
+ ALLOW_INTRA_EXT_TX) {
+ FRAME_COUNTS *counts = xd->counts;
+ int eset = get_ext_tx_set(mbmi->tx_size, mbmi->sb_type, 0);
+ if (eset > 0) {
+ mbmi->tx_type = vpx_read_tree(
+ r, vp10_ext_tx_intra_tree[eset],
+ cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode]);
+ if (counts)
+ ++counts->intra_ext_tx[eset][mbmi->tx_size][mbmi->mode]
+ [mbmi->tx_type];
+ }
+ } else {
+ mbmi->tx_type = DCT_DCT;
+ }
+#else
if (mbmi->tx_size < TX_32X32 &&
cm->base_qindex > 0 && !mbmi->skip &&
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
@@ -297,6 +596,14 @@
} else {
mbmi->tx_type = DCT_DCT;
}
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+ if (bsize >= BLOCK_8X8)
+ read_ext_intra_mode_info(cm, xd, r);
+#endif // CONFIG_EXT_INTRA
}
static int read_mv_component(vpx_reader *r,
@@ -386,12 +693,68 @@
if (mode == COMPOUND_REFERENCE) {
const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
const int ctx = vp10_get_pred_context_comp_ref_p(cm, xd);
- const int bit = vpx_read(r, fc->comp_ref_prob[ctx]);
+ const int bit = vpx_read(r, fc->comp_ref_prob[ctx][0]);
if (counts)
- ++counts->comp_ref[ctx][bit];
+ ++counts->comp_ref[ctx][0][bit];
ref_frame[idx] = cm->comp_fixed_ref;
+
+#if CONFIG_EXT_REFS
+ if (!bit) {
+ const int ctx1 = vp10_get_pred_context_comp_ref_p1(cm, xd);
+ const int bit1 = vpx_read(r, fc->comp_ref_prob[ctx1][1]);
+ if (counts)
+ ++counts->comp_ref[ctx1][1][bit1];
+ ref_frame[!idx] = cm->comp_var_ref[bit1 ? 0 : 1];
+ } else {
+ const int ctx2 = vp10_get_pred_context_comp_ref_p2(cm, xd);
+ const int bit2 = vpx_read(r, fc->comp_ref_prob[ctx2][2]);
+ if (counts)
+ ++counts->comp_ref[ctx2][2][bit2];
+ if (!bit2) {
+ const int ctx3 = vp10_get_pred_context_comp_ref_p3(cm, xd);
+ const int bit3 = vpx_read(r, fc->comp_ref_prob[ctx3][3]);
+ if (counts)
+ ++counts->comp_ref[ctx3][3][bit3];
+ ref_frame[!idx] = cm->comp_var_ref[bit3 ? 2 : 3];
+ } else {
+ ref_frame[!idx] = cm->comp_var_ref[4];
+ }
+ }
+#else
ref_frame[!idx] = cm->comp_var_ref[bit];
+#endif // CONFIG_EXT_REFS
} else if (mode == SINGLE_REFERENCE) {
+#if CONFIG_EXT_REFS
+ const int ctx0 = vp10_get_pred_context_single_ref_p1(xd);
+ const int bit0 = vpx_read(r, fc->single_ref_prob[ctx0][0]);
+ if (counts)
+ ++counts->single_ref[ctx0][0][bit0];
+ if (bit0) {
+ const int ctx1 = vp10_get_pred_context_single_ref_p2(xd);
+ const int bit1 = vpx_read(r, fc->single_ref_prob[ctx1][1]);
+ if (counts)
+ ++counts->single_ref[ctx1][1][bit1];
+ ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME;
+ } else {
+ const int ctx2 = vp10_get_pred_context_single_ref_p3(xd);
+ const int bit2 = vpx_read(r, fc->single_ref_prob[ctx2][2]);
+ if (counts)
+ ++counts->single_ref[ctx2][2][bit2];
+ if (bit2) {
+ const int ctx4 = vp10_get_pred_context_single_ref_p5(xd);
+ const int bit4 = vpx_read(r, fc->single_ref_prob[ctx4][4]);
+ if (counts)
+ ++counts->single_ref[ctx4][4][bit4];
+ ref_frame[0] = bit4 ? LAST4_FRAME : LAST3_FRAME;
+ } else {
+ const int ctx3 = vp10_get_pred_context_single_ref_p4(xd);
+ const int bit3 = vpx_read(r, fc->single_ref_prob[ctx3][3]);
+ if (counts)
+ ++counts->single_ref[ctx3][3][bit3];
+ ref_frame[0] = bit3 ? LAST2_FRAME : LAST_FRAME;
+ }
+ }
+#else
const int ctx0 = vp10_get_pred_context_single_ref_p1(xd);
const int bit0 = vpx_read(r, fc->single_ref_prob[ctx0][0]);
if (counts)
@@ -405,6 +768,7 @@
} else {
ref_frame[0] = LAST_FRAME;
}
+#endif // CONFIG_EXT_REFS
ref_frame[1] = NONE;
} else {
@@ -418,10 +782,13 @@
VP10_COMMON *const cm, MACROBLOCKD *const xd,
vpx_reader *r) {
const int ctx = vp10_get_pred_context_switchable_interp(xd);
- const INTERP_FILTER type =
- (INTERP_FILTER)vpx_read_tree(r, vp10_switchable_interp_tree,
- cm->fc->switchable_interp_prob[ctx]);
FRAME_COUNTS *counts = xd->counts;
+ INTERP_FILTER type;
+#if CONFIG_EXT_INTERP
+ if (!vp10_is_interp_needed(xd)) return EIGHTTAP;
+#endif
+ type = (INTERP_FILTER)vpx_read_tree(r, vp10_switchable_interp_tree,
+ cm->fc->switchable_interp_prob[ctx]);
if (counts)
++counts->switchable_interp[ctx][type];
return type;
@@ -457,9 +824,44 @@
break;
default:
mbmi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]);
+#if CONFIG_EXT_INTRA
+ mbmi->angle_delta[0] = 0;
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) {
+ int p_angle;
+ mbmi->angle_delta[0] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+ p_angle =
+ mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+ if (pick_intra_filter(p_angle)) {
+ FRAME_COUNTS *counts = xd->counts;
+ const int ctx = vp10_get_pred_context_intra_interp(xd);
+ mbmi->intra_filter = vpx_read_tree(r, vp10_intra_filter_tree,
+ cm->fc->intra_filter_probs[ctx]);
+ if (counts)
+ ++counts->intra_filter[ctx][mbmi->intra_filter];
+ } else {
+ mbmi->intra_filter = INTRA_FILTER_LINEAR;
+ }
+ }
+#endif // CONFIG_EXT_INTRA
}
mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
+#if CONFIG_EXT_INTRA
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED &&
+ bsize >= BLOCK_8X8)
+ mbmi->angle_delta[1] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+#endif // CONFIG_EXT_INTRA
+
+ mbmi->palette_mode_info.palette_size[0] = 0;
+ mbmi->palette_mode_info.palette_size[1] = 0;
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+ if (bsize >= BLOCK_8X8)
+ read_ext_intra_mode_info(cm, xd, r);
+#endif // CONFIG_EXT_INTRA
}
static INLINE int is_mv_valid(const MV *mv) {
@@ -476,6 +878,9 @@
int ret = 1;
switch (mode) {
+#if CONFIG_EXT_INTER
+ case NEWFROMNEARMV:
+#endif // CONFIG_EXT_INTER
case NEWMV: {
FRAME_COUNTS *counts = xd->counts;
nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
@@ -504,6 +909,83 @@
mv[1].as_int = 0;
break;
}
+#if CONFIG_EXT_INTER
+ case NEW_NEWMV: {
+ FRAME_COUNTS *counts = xd->counts;
+ nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+ assert(is_compound);
+ for (i = 0; i < 2; ++i) {
+ read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts,
+ allow_hp);
+ ret = ret && is_mv_valid(&mv[i].as_mv);
+ }
+ break;
+ }
+ case NEAREST_NEARESTMV: {
+ assert(is_compound);
+ mv[0].as_int = nearest_mv[0].as_int;
+ mv[1].as_int = nearest_mv[1].as_int;
+ break;
+ }
+ case NEAREST_NEARMV: {
+ assert(is_compound);
+ mv[0].as_int = nearest_mv[0].as_int;
+ mv[1].as_int = near_mv[1].as_int;
+ break;
+ }
+ case NEAR_NEARESTMV: {
+ assert(is_compound);
+ mv[0].as_int = near_mv[0].as_int;
+ mv[1].as_int = nearest_mv[1].as_int;
+ break;
+ }
+ case NEW_NEARESTMV: {
+ FRAME_COUNTS *counts = xd->counts;
+ nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+ assert(is_compound);
+ read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv, &cm->fc->nmvc, mv_counts,
+ allow_hp);
+ ret = ret && is_mv_valid(&mv[0].as_mv);
+ mv[1].as_int = nearest_mv[1].as_int;
+ break;
+ }
+ case NEAREST_NEWMV: {
+ FRAME_COUNTS *counts = xd->counts;
+ nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+ assert(is_compound);
+ mv[0].as_int = nearest_mv[0].as_int;
+ read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv, &cm->fc->nmvc, mv_counts,
+ allow_hp);
+ ret = ret && is_mv_valid(&mv[1].as_mv);
+ break;
+ }
+ case NEAR_NEWMV: {
+ FRAME_COUNTS *counts = xd->counts;
+ nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+ assert(is_compound);
+ mv[0].as_int = near_mv[0].as_int;
+ read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv, &cm->fc->nmvc, mv_counts,
+ allow_hp);
+ ret = ret && is_mv_valid(&mv[1].as_mv);
+ break;
+ }
+ case NEW_NEARMV: {
+ FRAME_COUNTS *counts = xd->counts;
+ nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+ assert(is_compound);
+ read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv, &cm->fc->nmvc, mv_counts,
+ allow_hp);
+ ret = ret && is_mv_valid(&mv[0].as_mv);
+ mv[1].as_int = near_mv[1].as_int;
+ break;
+ }
+ case ZERO_ZEROMV: {
+ assert(is_compound);
+ mv[0].as_int = 0;
+ mv[1].as_int = 0;
+ break;
+ }
+#endif // CONFIG_EXT_INTER
default: {
return 0;
}
@@ -540,15 +1022,23 @@
const BLOCK_SIZE bsize = mbmi->sb_type;
const int allow_hp = cm->allow_high_precision_mv;
int_mv nearestmv[2], nearmv[2];
- int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+ int_mv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+#if CONFIG_EXT_INTER
+ int mv_idx;
+#endif // CONFIG_EXT_INTER
int ref, is_compound;
- uint8_t inter_mode_ctx[MAX_REF_FRAMES];
+ int16_t inter_mode_ctx[MODE_CTX_REF_FRAMES];
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ int16_t compound_inter_mode_ctx[MODE_CTX_REF_FRAMES];
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ int16_t mode_ctx = 0;
+ MV_REFERENCE_FRAME ref_frame;
read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame);
is_compound = has_second_ref(mbmi);
for (ref = 0; ref < 1 + is_compound; ++ref) {
- const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+ MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
xd->block_refs[ref] = ref_buf;
@@ -557,10 +1047,34 @@
"Reference frame has invalid dimensions");
vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col,
&ref_buf->sf);
- vp10_find_mv_refs(cm, xd, mi, frame, ref_mvs[frame],
- mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
}
+ for (ref_frame = LAST_FRAME; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
+ vp10_find_mv_refs(cm, xd, mi, ref_frame,
+#if CONFIG_REF_MV
+ &xd->ref_mv_count[ref_frame],
+ xd->ref_mv_stack[ref_frame],
+#if CONFIG_EXT_INTER
+ compound_inter_mode_ctx,
+#endif // CONFIG_EXT_INTER
+#endif
+ ref_mvs[ref_frame],
+ mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
+ }
+
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (is_compound)
+ mode_ctx = compound_inter_mode_ctx[mbmi->ref_frame[0]];
+ else
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(inter_mode_ctx,
+ mbmi->ref_frame, bsize, -1);
+ mbmi->ref_mv_idx = 0;
+#else
+ mode_ctx = inter_mode_ctx[mbmi->ref_frame[0]];
+#endif
+
if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
mbmi->mode = ZEROMV;
if (bsize < BLOCK_8X8) {
@@ -569,21 +1083,112 @@
return;
}
} else {
- if (bsize >= BLOCK_8X8)
- mbmi->mode = read_inter_mode(cm, xd, r,
- inter_mode_ctx[mbmi->ref_frame[0]]);
+ if (bsize >= BLOCK_8X8) {
+#if CONFIG_EXT_INTER
+ if (is_compound)
+ mbmi->mode = read_inter_compound_mode(cm, xd, r, mode_ctx);
+ else
+#endif // CONFIG_EXT_INTER
+ mbmi->mode = read_inter_mode(cm, xd,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ mbmi,
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ r, mode_ctx);
+#if CONFIG_REF_MV
+ if (mbmi->mode == NEARMV)
+ read_drl_idx(cm, xd, mbmi, r);
+#endif
+ }
}
+#if CONFIG_EXT_INTER
+ if (bsize < BLOCK_8X8 ||
+ (mbmi->mode != ZEROMV && mbmi->mode != ZERO_ZEROMV)) {
+#else
if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
+#endif // CONFIG_EXT_INTER
for (ref = 0; ref < 1 + is_compound; ++ref) {
vp10_find_best_ref_mvs(allow_hp, ref_mvs[mbmi->ref_frame[ref]],
&nearestmv[ref], &nearmv[ref]);
}
}
+#if CONFIG_REF_MV
+ if (mbmi->ref_mv_idx > 0) {
+ int_mv cur_mv =
+ xd->ref_mv_stack[mbmi->ref_frame[0]][1 + mbmi->ref_mv_idx].this_mv;
+ lower_mv_precision(&cur_mv.as_mv, cm->allow_high_precision_mv);
+ nearmv[0] = cur_mv;
+ }
+
+#if CONFIG_EXT_INTER
+ if (is_compound && bsize >= BLOCK_8X8 && mbmi->mode != ZERO_ZEROMV) {
+#else
+ if (is_compound && bsize >= BLOCK_8X8 && mbmi->mode != NEWMV &&
+ mbmi->mode != ZEROMV) {
+#endif // CONFIG_EXT_INTER
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+
+#if CONFIG_EXT_INTER
+ if (xd->ref_mv_count[ref_frame_type] > 0) {
+#else
+ if (xd->ref_mv_count[ref_frame_type] == 1 && mbmi->mode == NEARESTMV) {
+#endif // CONFIG_EXT_INTER
+ int i;
+#if CONFIG_EXT_INTER
+ if (mbmi->mode == NEAREST_NEARESTMV) {
+#endif // CONFIG_EXT_INTER
+ nearestmv[0] = xd->ref_mv_stack[ref_frame_type][0].this_mv;
+ nearestmv[1] = xd->ref_mv_stack[ref_frame_type][0].comp_mv;
+
+ for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
+ lower_mv_precision(&nearestmv[i].as_mv, allow_hp);
+#if CONFIG_EXT_INTER
+ } else if (mbmi->mode == NEAREST_NEWMV || mbmi->mode == NEAREST_NEARMV) {
+ nearestmv[0] = xd->ref_mv_stack[ref_frame_type][0].this_mv;
+ lower_mv_precision(&nearestmv[0].as_mv, allow_hp);
+ } else if (mbmi->mode == NEW_NEARESTMV || mbmi->mode == NEAR_NEARESTMV) {
+ nearestmv[1] = xd->ref_mv_stack[ref_frame_type][0].comp_mv;
+ lower_mv_precision(&nearestmv[1].as_mv, allow_hp);
+ }
+#endif // CONFIG_EXT_INTER
+ }
+
+#if CONFIG_EXT_INTER
+ if (xd->ref_mv_count[ref_frame_type] > 1) {
+ if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEAR_NEARESTMV) {
+ nearmv[0] = xd->ref_mv_stack[ref_frame_type][1].this_mv;
+ lower_mv_precision(&nearmv[0].as_mv, allow_hp);
+ }
+
+ if (mbmi->mode == NEW_NEARMV || mbmi->mode == NEAREST_NEARMV) {
+ nearmv[1] = xd->ref_mv_stack[ref_frame_type][1].comp_mv;
+ lower_mv_precision(&nearmv[1].as_mv, allow_hp);
+ }
+ }
+#else
+ if (xd->ref_mv_count[ref_frame_type] > 1) {
+ int i;
+ int ref_mv_idx = 1 + mbmi->ref_mv_idx;
+ nearestmv[0] = xd->ref_mv_stack[ref_frame_type][0].this_mv;
+ nearestmv[1] = xd->ref_mv_stack[ref_frame_type][0].comp_mv;
+ nearmv[0] = xd->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
+ nearmv[1] = xd->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
+
+ for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
+ lower_mv_precision(&nearestmv[i].as_mv, allow_hp);
+ lower_mv_precision(&nearmv[i].as_mv, allow_hp);
+ }
+ }
+#endif // CONFIG_EXT_INTER
+ }
+#endif
+
+#if !CONFIG_EXT_INTERP
mbmi->interp_filter = (cm->interp_filter == SWITCHABLE)
- ? read_switchable_interp_filter(cm, xd, r)
- : cm->interp_filter;
+ ? read_switchable_interp_filter(cm, xd, r)
+ : cm->interp_filter;
+#endif // !CONFIG_EXT_INTERP
if (bsize < BLOCK_8X8) {
const int num_4x4_w = 1 << xd->bmode_blocks_wl;
@@ -591,22 +1196,68 @@
int idx, idy;
PREDICTION_MODE b_mode;
int_mv nearest_sub8x8[2], near_sub8x8[2];
+#if CONFIG_EXT_INTER
+ int_mv ref_mv[2][2];
+#endif // CONFIG_EXT_INTER
for (idy = 0; idy < 2; idy += num_4x4_h) {
for (idx = 0; idx < 2; idx += num_4x4_w) {
int_mv block[2];
const int j = idy * 2 + idx;
- b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx[mbmi->ref_frame[0]]);
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (!is_compound)
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(inter_mode_ctx, mbmi->ref_frame,
+ bsize, j);
+#endif
+#if CONFIG_EXT_INTER
+ if (is_compound)
+ b_mode = read_inter_compound_mode(cm, xd, r, mode_ctx);
+ else
+#endif // CONFIG_EXT_INTER
+ b_mode = read_inter_mode(cm, xd,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ mbmi,
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ r, mode_ctx);
+#if CONFIG_EXT_INTER
+ mv_idx = (b_mode == NEWFROMNEARMV) ? 1 : 0;
+
+ if (b_mode != ZEROMV && b_mode != ZERO_ZEROMV) {
+#else
if (b_mode == NEARESTMV || b_mode == NEARMV) {
- uint8_t dummy_mode_ctx[MAX_REF_FRAMES];
+#endif // CONFIG_EXT_INTER
for (ref = 0; ref < 1 + is_compound; ++ref)
+#if CONFIG_EXT_INTER
+ {
+ int_mv mv_ref_list[MAX_MV_REF_CANDIDATES];
+ vp10_update_mv_context(cm, xd, mi, mbmi->ref_frame[ref],
+ mv_ref_list, j, mi_row, mi_col, NULL);
+#endif // CONFIG_EXT_INTER
vp10_append_sub8x8_mvs_for_idx(cm, xd, j, ref, mi_row, mi_col,
+#if CONFIG_EXT_INTER
+ mv_ref_list,
+#endif // CONFIG_EXT_INTER
&nearest_sub8x8[ref],
- &near_sub8x8[ref],
- dummy_mode_ctx);
+ &near_sub8x8[ref]);
+#if CONFIG_EXT_INTER
+ if (have_newmv_in_inter_mode(b_mode)) {
+ mv_ref_list[0].as_int = nearest_sub8x8[ref].as_int;
+ mv_ref_list[1].as_int = near_sub8x8[ref].as_int;
+ vp10_find_best_ref_mvs(allow_hp, mv_ref_list,
+ &ref_mv[0][ref], &ref_mv[1][ref]);
+ }
+ }
+#endif // CONFIG_EXT_INTER
}
- if (!assign_mv(cm, xd, b_mode, block, nearestmv,
+ if (!assign_mv(cm, xd, b_mode, block,
+#if CONFIG_EXT_INTER
+ ref_mv[mv_idx],
+#else
+ nearestmv,
+#endif // CONFIG_EXT_INTER
nearest_sub8x8, near_sub8x8,
is_compound, allow_hp, r)) {
xd->corrupted |= 1;
@@ -629,33 +1280,140 @@
mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
} else {
- xd->corrupted |= !assign_mv(cm, xd, mbmi->mode, mbmi->mv, nearestmv,
+ xd->corrupted |= !assign_mv(cm, xd, mbmi->mode, mbmi->mv,
+#if CONFIG_EXT_INTER
+ mbmi->mode == NEWFROMNEARMV ?
+ nearmv : nearestmv,
+#else
+ nearestmv,
+#endif // CONFIG_EXT_INTER
nearestmv, nearmv, is_compound, allow_hp, r);
}
+#if CONFIG_EXT_INTERP
+ mbmi->interp_filter = (cm->interp_filter == SWITCHABLE)
+ ? read_switchable_interp_filter(cm, xd, r)
+ : cm->interp_filter;
+#endif // CONFIG_EXT_INTERP
}
static void read_inter_frame_mode_info(VP10Decoder *const pbi,
MACROBLOCKD *const xd,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif // CONFIG_SUPERTX
int mi_row, int mi_col, vpx_reader *r) {
VP10_COMMON *const cm = &pbi->common;
MODE_INFO *const mi = xd->mi[0];
MB_MODE_INFO *const mbmi = &mi->mbmi;
- int inter_block;
+ int inter_block = 1;
+#if CONFIG_VAR_TX
+ BLOCK_SIZE bsize = mbmi->sb_type;
+#endif // CONFIG_VAR_TX
mbmi->mv[0].as_int = 0;
mbmi->mv[1].as_int = 0;
mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r);
- mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
- inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
- mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r);
+#if CONFIG_SUPERTX
+ if (!supertx_enabled) {
+#endif // CONFIG_SUPERTX
+ mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
+ inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
+
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+ if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
+ !mbmi->skip && inter_block) {
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ const int bs = num_4x4_blocks_wide_lookup[txb_size];
+ const int width = num_4x4_blocks_wide_lookup[bsize];
+ const int height = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ for (idy = 0; idy < height; idy += bs)
+ for (idx = 0; idx < width; idx += bs)
+ read_tx_size_inter(cm, xd, mbmi, xd->counts, max_tx_size,
+ idy, idx, r);
+ if (xd->counts) {
+ const int ctx = get_tx_size_context(xd);
+ ++get_tx_counts(max_tx_size, ctx, &xd->counts->tx)[mbmi->tx_size];
+ }
+ } else {
+ mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r);
+ if (inter_block) {
+ const int width = num_4x4_blocks_wide_lookup[bsize];
+ const int height = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ for (idy = 0; idy < height; ++idy)
+ for (idx = 0; idx < width; ++idx)
+ mbmi->inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] = mbmi->tx_size;
+ }
+
+ set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h);
+ set_txfm_ctx(xd->above_txfm_context, mbmi->tx_size, xd->n8_w);
+ }
+#else
+ mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r);
+#endif // CONFIG_VAR_TX
+#if CONFIG_SUPERTX
+ }
+#if CONFIG_VAR_TX
+ else if (inter_block) {
+ const int width = num_4x4_blocks_wide_lookup[bsize];
+ const int height = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ xd->mi[0]->mbmi.tx_size = xd->supertx_size;
+ for (idy = 0; idy < height; ++idy)
+ for (idx = 0; idx < width; ++idx)
+ xd->mi[0]->mbmi.inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] = xd->supertx_size;
+ }
+#endif // CONFIG_VAR_TX
+#endif // CONFIG_SUPERTX
if (inter_block)
- read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r);
+ read_inter_block_mode_info(pbi, xd,
+ mi, mi_row, mi_col, r);
else
read_intra_block_mode_info(cm, xd, mi, r);
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, mbmi->sb_type, inter_block) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+#if CONFIG_SUPERTX
+ !supertx_enabled &&
+#endif // CONFIG_SUPERTX
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ int eset = get_ext_tx_set(mbmi->tx_size, mbmi->sb_type,
+ inter_block);
+ FRAME_COUNTS *counts = xd->counts;
+
+ if (inter_block) {
+ if (eset > 0) {
+ mbmi->tx_type =
+ vpx_read_tree(r, vp10_ext_tx_inter_tree[eset],
+ cm->fc->inter_ext_tx_prob[eset][mbmi->tx_size]);
+ if (counts)
+ ++counts->inter_ext_tx[eset][mbmi->tx_size][mbmi->tx_type];
+ }
+ } else if (ALLOW_INTRA_EXT_TX) {
+ if (eset > 0) {
+ mbmi->tx_type = vpx_read_tree(r, vp10_ext_tx_intra_tree[eset],
+ cm->fc->intra_ext_tx_prob[eset]
+ [mbmi->tx_size][mbmi->mode]);
+ if (counts)
+ ++counts->intra_ext_tx[eset][mbmi->tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+ }
+ } else {
+ mbmi->tx_type = DCT_DCT;
+ }
+#else
if (mbmi->tx_size < TX_32X32 &&
cm->base_qindex > 0 && !mbmi->skip &&
+#if CONFIG_SUPERTX
+ !supertx_enabled &&
+#endif // CONFIG_SUPERTX
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
FRAME_COUNTS *counts = xd->counts;
if (inter_block) {
@@ -675,11 +1433,15 @@
} else {
mbmi->tx_type = DCT_DCT;
}
+#endif // CONFIG_EXT_TX
}
void vp10_read_mode_info(VP10Decoder *const pbi, MACROBLOCKD *xd,
- int mi_row, int mi_col, vpx_reader *r,
- int x_mis, int y_mis) {
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif // CONFIG_SUPERTX
+ int mi_row, int mi_col, vpx_reader *r,
+ int x_mis, int y_mis) {
VP10_COMMON *const cm = &pbi->common;
MODE_INFO *const mi = xd->mi[0];
MV_REF* frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
@@ -687,9 +1449,22 @@
if (frame_is_intra_only(cm)) {
read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r);
+#if CONFIG_REF_MV
+ for (h = 0; h < y_mis; ++h) {
+ MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
+ for (w = 0; w < x_mis; ++w) {
+ MV_REF *const mv = frame_mv + w;
+ mv->ref_frame[0] = NONE;
+ mv->ref_frame[1] = NONE;
+ }
+ }
+#endif
} else {
- read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r);
-
+ read_inter_frame_mode_info(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, r);
for (h = 0; h < y_mis; ++h) {
MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
for (w = 0; w < x_mis; ++w) {
diff --git a/vp10/decoder/decodemv.h b/vp10/decoder/decodemv.h
index 6653be5..959a001 100644
--- a/vp10/decoder/decodemv.h
+++ b/vp10/decoder/decodemv.h
@@ -20,8 +20,12 @@
#endif
void vp10_read_mode_info(VP10Decoder *const pbi, MACROBLOCKD *xd,
- int mi_row, int mi_col, vpx_reader *r,
- int x_mis, int y_mis);
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif
+
+ int mi_row, int mi_col, vpx_reader *r,
+ int x_mis, int y_mis);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c
index d8864d2..05a2539 100644
--- a/vp10/decoder/decoder.c
+++ b/vp10/decoder/decoder.c
@@ -115,6 +115,12 @@
cm->setup_mi = vp10_dec_setup_mi;
vp10_loop_filter_init(cm);
+#if CONFIG_LOOP_RESTORATION
+ vp10_loop_restoration_precal();
+#endif // CONFIG_LOOP_RESTORATION
+#if CONFIG_ANS
+ vp10_build_pareto8_dec_tab(vp10_pareto8_token_probs, pbi->token_tab);
+#endif // CONFIG_ANS
cm->error.setjmp = 0;
@@ -196,10 +202,23 @@
// later commit that adds VP9-specific controls for this functionality.
if (ref_frame_flag == VP9_LAST_FLAG) {
ref_buf = &cm->frame_refs[0];
+#if CONFIG_EXT_REFS
+ } else if (ref_frame_flag == VP9_LAST2_FLAG) {
+ ref_buf = &cm->frame_refs[1];
+ } else if (ref_frame_flag == VP9_LAST3_FLAG) {
+ ref_buf = &cm->frame_refs[2];
+ } else if (ref_frame_flag == VP9_LAST4_FLAG) {
+ ref_buf = &cm->frame_refs[3];
+ } else if (ref_frame_flag == VP9_GOLD_FLAG) {
+ ref_buf = &cm->frame_refs[4];
+ } else if (ref_frame_flag == VP9_ALT_FLAG) {
+ ref_buf = &cm->frame_refs[5];
+#else
} else if (ref_frame_flag == VP9_GOLD_FLAG) {
ref_buf = &cm->frame_refs[1];
} else if (ref_frame_flag == VP9_ALT_FLAG) {
ref_buf = &cm->frame_refs[2];
+#endif // CONFIG_EXT_REFS
} else {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Invalid reference frame");
@@ -243,10 +262,10 @@
// Current thread releases the holding of reference frame.
decrease_ref_count(old_idx, frame_bufs, pool);
- // Release the reference frame in reference map.
- if ((mask & 1) && old_idx >= 0) {
+ // Release the reference frame holding in the reference map for the decoding
+ // of the next frame.
+ if (mask & 1)
decrease_ref_count(old_idx, frame_bufs, pool);
- }
cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index];
++ref_index;
}
@@ -268,7 +287,7 @@
}
// Invalidate these references until the next frame starts.
- for (ref_index = 0; ref_index < 3; ref_index++)
+ for (ref_index = 0; ref_index < REFS_PER_FRAME; ref_index++)
cm->frame_refs[ref_index].idx = -1;
}
@@ -326,7 +345,6 @@
pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
}
-
if (setjmp(cm->error.jmp)) {
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
int i;
@@ -350,10 +368,10 @@
// Current thread releases the holding of reference frame.
decrease_ref_count(old_idx, frame_bufs, pool);
- // Release the reference frame in reference map.
- if ((mask & 1) && old_idx >= 0) {
+ // Release the reference frame holding in the reference map for the
+ // decoding of the next frame.
+ if (mask & 1)
decrease_ref_count(old_idx, frame_bufs, pool);
- }
++ref_index;
}
@@ -459,9 +477,7 @@
// an invalid bitstream and need to return an error.
uint8_t marker;
-#if CONFIG_MISC_FIXES
size_t frame_sz_sum = 0;
-#endif
assert(data_sz);
marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1);
@@ -470,7 +486,7 @@
if ((marker & 0xe0) == 0xc0) {
const uint32_t frames = (marker & 0x7) + 1;
const uint32_t mag = ((marker >> 3) & 0x3) + 1;
- const size_t index_sz = 2 + mag * (frames - CONFIG_MISC_FIXES);
+ const size_t index_sz = 2 + mag * (frames - 1);
// This chunk is marked as having a superframe index but doesn't have
// enough data for it, thus it's an invalid superframe index.
@@ -501,20 +517,16 @@
x = clear_buffer;
}
- for (i = 0; i < frames - CONFIG_MISC_FIXES; ++i) {
+ for (i = 0; i < frames - 1; ++i) {
uint32_t this_sz = 0;
for (j = 0; j < mag; ++j)
this_sz |= (*x++) << (j * 8);
- this_sz += CONFIG_MISC_FIXES;
+ this_sz += 1;
sizes[i] = this_sz;
-#if CONFIG_MISC_FIXES
frame_sz_sum += this_sz;
-#endif
}
-#if CONFIG_MISC_FIXES
- sizes[i] = data_sz - index_sz - frame_sz_sum;
-#endif
+ sizes[i] = (uint32_t)(data_sz - index_sz - frame_sz_sum);
*count = frames;
}
}
diff --git a/vp10/decoder/decoder.h b/vp10/decoder/decoder.h
index 72a6310..e590d8b 100644
--- a/vp10/decoder/decoder.h
+++ b/vp10/decoder/decoder.h
@@ -18,6 +18,9 @@
#include "vpx_scale/yv12config.h"
#include "vpx_util/vpx_thread.h"
+#if CONFIG_ANS
+#include "vp10/common/ans.h"
+#endif
#include "vp10/common/thread_common.h"
#include "vp10/common/onyxc_int.h"
#include "vp10/common/ppflags.h"
@@ -31,6 +34,9 @@
typedef struct TileData {
VP10_COMMON *cm;
vpx_reader bit_reader;
+#if CONFIG_ANS
+ struct AnsDecoder token_ans;
+#endif // CONFIG_ANS
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
@@ -40,6 +46,9 @@
typedef struct TileWorkerData {
struct VP10Decoder *pbi;
vpx_reader bit_reader;
+#if CONFIG_ANS
+ struct AnsDecoder token_ans;
+#endif // CONFIG_ANS
FRAME_COUNTS counts;
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
@@ -80,6 +89,9 @@
int inv_tile_order;
int need_resync; // wait for key/intra-only frame.
int hold_ref_buf; // hold the reference buffer.
+#if CONFIG_ANS
+ rans_dec_lut token_tab[COEFF_PROB_MODELS];
+#endif // CONFIG_ANS
} VP10Decoder;
int vp10_receive_compressed_data(struct VP10Decoder *pbi,
diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c
index d39e3dc..c5dec87 100644
--- a/vp10/decoder/detokenize.c
+++ b/vp10/decoder/detokenize.c
@@ -11,6 +11,7 @@
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
+#include "vp10/common/ans.h"
#include "vp10/common/blockd.h"
#include "vp10/common/common.h"
#include "vp10/common/entropy.h"
@@ -38,6 +39,7 @@
++coef_counts[band][ctx][token]; \
} while (0)
+#if !CONFIG_ANS
static INLINE int read_coeff(const vpx_prob *probs, int n, vpx_reader *r) {
int i, val = 0;
for (i = 0; i < n; ++i)
@@ -164,11 +166,7 @@
val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, r);
break;
case CATEGORY6_TOKEN: {
-#if CONFIG_MISC_FIXES
const int skip_bits = TX_SIZES - 1 - tx_size;
-#else
- const int skip_bits = 0;
-#endif
const uint8_t *cat6p = cat6_prob + skip_bits;
#if CONFIG_VP9_HIGHBITDEPTH
switch (xd->bd) {
@@ -211,6 +209,175 @@
return c;
}
+#else // !CONFIG_ANS
+static INLINE int read_coeff(const vpx_prob *const probs, int n,
+ struct AnsDecoder *const ans) {
+ int i, val = 0;
+ for (i = 0; i < n; ++i)
+ val = (val << 1) | uabs_read(ans, probs[i]);
+ return val;
+}
+
+static int decode_coefs_ans(const MACROBLOCKD *const xd,
+ const rans_dec_lut *const token_tab,
+ PLANE_TYPE type,
+ tran_low_t *dqcoeff, TX_SIZE tx_size,
+ const int16_t *dq,
+ int ctx, const int16_t *scan, const int16_t *nb,
+ struct AnsDecoder *const ans) {
+ FRAME_COUNTS *counts = xd->counts;
+ const int max_eob = 16 << (tx_size << 1);
+ const FRAME_CONTEXT *const fc = xd->fc;
+ const int ref = is_inter_block(&xd->mi[0]->mbmi);
+ int band, c = 0;
+ const vpx_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
+ fc->coef_probs[tx_size][type][ref];
+ const vpx_prob *prob;
+ unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
+ unsigned int (*eob_branch_count)[COEFF_CONTEXTS];
+ uint8_t token_cache[32 * 32];
+ const uint8_t *band_translate = get_band_translate(tx_size);
+ const int dq_shift = (tx_size == TX_32X32);
+ int v, token;
+ int16_t dqv = dq[0];
+ const uint8_t *cat1_prob;
+ const uint8_t *cat2_prob;
+ const uint8_t *cat3_prob;
+ const uint8_t *cat4_prob;
+ const uint8_t *cat5_prob;
+ const uint8_t *cat6_prob;
+
+ if (counts) {
+ coef_counts = counts->coef[tx_size][type][ref];
+ eob_branch_count = counts->eob_branch[tx_size][type][ref];
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->bd > VPX_BITS_8) {
+ if (xd->bd == VPX_BITS_10) {
+ cat1_prob = vp10_cat1_prob_high10;
+ cat2_prob = vp10_cat2_prob_high10;
+ cat3_prob = vp10_cat3_prob_high10;
+ cat4_prob = vp10_cat4_prob_high10;
+ cat5_prob = vp10_cat5_prob_high10;
+ cat6_prob = vp10_cat6_prob_high10;
+ } else {
+ cat1_prob = vp10_cat1_prob_high12;
+ cat2_prob = vp10_cat2_prob_high12;
+ cat3_prob = vp10_cat3_prob_high12;
+ cat4_prob = vp10_cat4_prob_high12;
+ cat5_prob = vp10_cat5_prob_high12;
+ cat6_prob = vp10_cat6_prob_high12;
+ }
+ } else {
+ cat1_prob = vp10_cat1_prob;
+ cat2_prob = vp10_cat2_prob;
+ cat3_prob = vp10_cat3_prob;
+ cat4_prob = vp10_cat4_prob;
+ cat5_prob = vp10_cat5_prob;
+ cat6_prob = vp10_cat6_prob;
+ }
+#else
+ cat1_prob = vp10_cat1_prob;
+ cat2_prob = vp10_cat2_prob;
+ cat3_prob = vp10_cat3_prob;
+ cat4_prob = vp10_cat4_prob;
+ cat5_prob = vp10_cat5_prob;
+ cat6_prob = vp10_cat6_prob;
+#endif
+
+ while (c < max_eob) {
+ int val = -1;
+ band = *band_translate++;
+ prob = coef_probs[band][ctx];
+ if (counts)
+ ++eob_branch_count[band][ctx];
+ if (!uabs_read(ans, prob[EOB_CONTEXT_NODE])) {
+ INCREMENT_COUNT(EOB_MODEL_TOKEN);
+ break;
+ }
+
+ while (!uabs_read(ans, prob[ZERO_CONTEXT_NODE])) {
+ INCREMENT_COUNT(ZERO_TOKEN);
+ dqv = dq[1];
+ token_cache[scan[c]] = 0;
+ ++c;
+ if (c >= max_eob)
+ return c; // zero tokens at the end (no eob token)
+ ctx = get_coef_context(nb, token_cache, c);
+ band = *band_translate++;
+ prob = coef_probs[band][ctx];
+ }
+
+ token = ONE_TOKEN + rans_read(ans, token_tab[prob[PIVOT_NODE] - 1]);
+ INCREMENT_COUNT(ONE_TOKEN + (token > ONE_TOKEN));
+ switch (token) {
+ case ONE_TOKEN:
+ case TWO_TOKEN:
+ case THREE_TOKEN:
+ case FOUR_TOKEN:
+ val = token;
+ break;
+ case CATEGORY1_TOKEN:
+ val = CAT1_MIN_VAL + read_coeff(cat1_prob, 1, ans);
+ break;
+ case CATEGORY2_TOKEN:
+ val = CAT2_MIN_VAL + read_coeff(cat2_prob, 2, ans);
+ break;
+ case CATEGORY3_TOKEN:
+ val = CAT3_MIN_VAL + read_coeff(cat3_prob, 3, ans);
+ break;
+ case CATEGORY4_TOKEN:
+ val = CAT4_MIN_VAL + read_coeff(cat4_prob, 4, ans);
+ break;
+ case CATEGORY5_TOKEN:
+ val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, ans);
+ break;
+ case CATEGORY6_TOKEN:
+ {
+ const int skip_bits = TX_SIZES - 1 - tx_size;
+ const uint8_t *cat6p = cat6_prob + skip_bits;
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (xd->bd) {
+ case VPX_BITS_8:
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 14 - skip_bits, ans);
+ break;
+ case VPX_BITS_10:
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 16 - skip_bits, ans);
+ break;
+ case VPX_BITS_12:
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 18 - skip_bits, ans);
+ break;
+ default:
+ assert(0);
+ return -1;
+ }
+#else
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 14 - skip_bits, ans);
+#endif
+ }
+ break;
+ }
+ v = (val * dqv) >> dq_shift;
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+#if CONFIG_VP9_HIGHBITDEPTH
+ dqcoeff[scan[c]] = highbd_check_range((uabs_read_bit(ans) ? -v : v),
+ xd->bd);
+#else
+ dqcoeff[scan[c]] = check_range(uabs_read_bit(ans) ? -v : v);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#else
+ dqcoeff[scan[c]] = uabs_read_bit(ans) ? -v : v;
+#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
+ token_cache[scan[c]] = vp10_pt_energy_class[token];
+ ++c;
+ ctx = get_coef_context(nb, token_cache, c);
+ dqv = dq[1];
+ }
+
+ return c;
+}
+#endif // !CONFIG_ANS
// TODO(slavarnway): Decode version of vp10_set_context. Modify vp10_set_context
// after testing is complete, then delete this version.
@@ -257,18 +424,59 @@
}
}
-int vp10_decode_block_tokens(MACROBLOCKD *xd,
- int plane, const scan_order *sc,
- int x, int y,
- TX_SIZE tx_size, vpx_reader *r,
- int seg_id) {
+void vp10_decode_palette_tokens(MACROBLOCKD *const xd, int plane,
+ vpx_reader *r) {
+ MODE_INFO *const mi = xd->mi[0];
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ int rows = 4 * num_4x4_blocks_high_lookup[bsize];
+ int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
+ int color_idx, color_ctx, color_order[PALETTE_MAX_SIZE];
+ int n = mbmi->palette_mode_info.palette_size[plane != 0];
+ int i, j;
+ uint8_t *color_map = xd->plane[plane].color_index_map;
+ const vpx_prob (* prob)[PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] =
+ plane ? vp10_default_palette_uv_color_prob :
+ vp10_default_palette_y_color_prob;
+
+ for (i = 0; i < rows; ++i) {
+ for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
+ color_ctx = vp10_get_palette_color_context(color_map, cols, i, j, n,
+ color_order);
+ color_idx = vpx_read_tree(r, vp10_palette_color_tree[n - 2],
+ prob[n - 2][color_ctx]);
+ assert(color_idx >= 0 && color_idx < n);
+ color_map[i * cols + j] = color_order[color_idx];
+ }
+ }
+}
+
+int vp10_decode_block_tokens(MACROBLOCKD *const xd,
+#if CONFIG_ANS
+ const rans_dec_lut *const token_tab,
+#endif // CONFIG_ANS
+ int plane, const scan_order *sc,
+ int x, int y,
+ TX_SIZE tx_size,
+#if CONFIG_ANS
+ struct AnsDecoder *const r,
+#else
+ vpx_reader *r,
+#endif // CONFIG_ANS
+ int seg_id) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const int16_t *const dequant = pd->seg_dequant[seg_id];
const int ctx = get_entropy_context(tx_size, pd->above_context + x,
pd->left_context + y);
+#if !CONFIG_ANS
const int eob = decode_coefs(xd, pd->plane_type,
pd->dqcoeff, tx_size,
dequant, ctx, sc->scan, sc->neighbors, r);
+#else
+ const int eob = decode_coefs_ans(xd, token_tab, pd->plane_type,
+ pd->dqcoeff, tx_size,
+ dequant, ctx, sc->scan, sc->neighbors, r);
+#endif // !CONFIG_ANS
dec_set_contexts(xd, pd, tx_size, eob > 0, x, y);
return eob;
}
diff --git a/vp10/decoder/detokenize.h b/vp10/decoder/detokenize.h
index c3fd90a..f87c6f0 100644
--- a/vp10/decoder/detokenize.h
+++ b/vp10/decoder/detokenize.h
@@ -12,19 +12,29 @@
#ifndef VP10_DECODER_DETOKENIZE_H_
#define VP10_DECODER_DETOKENIZE_H_
-#include "vpx_dsp/bitreader.h"
#include "vp10/decoder/decoder.h"
+#include "vp10/common/ans.h"
#include "vp10/common/scan.h"
#ifdef __cplusplus
extern "C" {
#endif
-int vp10_decode_block_tokens(MACROBLOCKD *xd,
- int plane, const scan_order *sc,
- int x, int y,
- TX_SIZE tx_size, vpx_reader *r,
- int seg_id);
+void vp10_decode_palette_tokens(MACROBLOCKD *const xd, int plane,
+ vpx_reader *r);
+int vp10_decode_block_tokens(MACROBLOCKD *const xd,
+#if CONFIG_ANS
+ const rans_dec_lut *const token_tab,
+#endif // CONFIG_ANS
+ int plane, const scan_order *sc,
+ int x, int y,
+ TX_SIZE tx_size,
+#if CONFIG_ANS
+ struct AnsDecoder *const r,
+#else
+ vpx_reader *r,
+#endif // CONFIG_ANS
+ int seg_id);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp10/decoder/dsubexp.c b/vp10/decoder/dsubexp.c
index 36c1917..7d2872e 100644
--- a/vp10/decoder/dsubexp.c
+++ b/vp10/decoder/dsubexp.c
@@ -23,13 +23,13 @@
static int decode_uniform(vpx_reader *r) {
const int l = 8;
- const int m = (1 << l) - 191 + CONFIG_MISC_FIXES;
+ const int m = (1 << l) - 190;
const int v = vpx_read_literal(r, l - 1);
return v < m ? v : (v << 1) - m + vpx_read_bit(r);
}
static int inv_remap_prob(int v, int m) {
- static uint8_t inv_map_table[MAX_PROB - CONFIG_MISC_FIXES] = {
+ static uint8_t inv_map_table[MAX_PROB - 1] = {
7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 189,
202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27,
@@ -47,9 +47,6 @@
207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, 222,
223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238,
239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,
-#if !CONFIG_MISC_FIXES
- 253
-#endif
};
assert(v < (int)(sizeof(inv_map_table) / sizeof(inv_map_table[0])));
v = inv_map_table[v];
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 04ce61d..73111c8 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -24,6 +24,7 @@
#include "vp10/common/entropymv.h"
#include "vp10/common/mvref_common.h"
#include "vp10/common/pred_common.h"
+#include "vp10/common/reconinter.h"
#include "vp10/common/seg_common.h"
#include "vp10/common/tile_common.h"
@@ -38,17 +39,83 @@
static const struct vp10_token intra_mode_encodings[INTRA_MODES] = {
{0, 1}, {6, 3}, {28, 5}, {30, 5}, {58, 6}, {59, 6}, {126, 7}, {127, 7},
{62, 6}, {2, 2}};
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
+ {{0, 1}, {4, 3}, {3, 2}, {5, 3}};
+#else
static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
{{0, 1}, {2, 2}, {3, 2}};
+#endif // CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
static const struct vp10_token partition_encodings[PARTITION_TYPES] =
{{0, 1}, {2, 2}, {6, 3}, {7, 3}};
+#if !CONFIG_REF_MV
static const struct vp10_token inter_mode_encodings[INTER_MODES] =
+#if CONFIG_EXT_INTER
+ {{2, 2}, {6, 3}, {0, 1}, {14, 4}, {15, 4}};
+#else
{{2, 2}, {6, 3}, {0, 1}, {7, 3}};
+#endif // CONFIG_EXT_INTER
+#endif
+#if CONFIG_EXT_INTER
+static const struct vp10_token inter_compound_mode_encodings
+ [INTER_COMPOUND_MODES] = {
+ {2, 2}, {24, 5}, {25, 5}, {52, 6}, {53, 6},
+ {54, 6}, {55, 6}, {0, 1}, {7, 3}
+};
+#endif // CONFIG_EXT_INTER
+static const struct vp10_token palette_size_encodings[] = {
+ {0, 1}, {2, 2}, {6, 3}, {14, 4}, {30, 5}, {62, 6}, {63, 6},
+};
+static const struct vp10_token
+palette_color_encodings[PALETTE_MAX_SIZE - 1][PALETTE_MAX_SIZE] = {
+ {{0, 1}, {1, 1}}, // 2 colors
+ {{0, 1}, {2, 2}, {3, 2}}, // 3 colors
+ {{0, 1}, {2, 2}, {6, 3}, {7, 3}}, // 4 colors
+ {{0, 1}, {2, 2}, {6, 3}, {14, 4}, {15, 4}}, // 5 colors
+ {{0, 1}, {2, 2}, {6, 3}, {14, 4}, {30, 5}, {31, 5}}, // 6 colors
+ {{0, 1}, {2, 2}, {6, 3}, {14, 4}, {30, 5}, {62, 6}, {63, 6}}, // 7 colors
+ {{0, 1}, {2, 2}, {6, 3}, {14, 4},
+ {30, 5}, {62, 6}, {126, 7}, {127, 7}}, // 8 colors
+};
+static INLINE void write_uniform(vpx_writer *w, int n, int v) {
+ int l = get_unsigned_bits(n);
+ int m = (1 << l) - n;
+ if (l == 0)
+ return;
+ if (v < m) {
+ vpx_write_literal(w, v, l - 1);
+ } else {
+ vpx_write_literal(w, m + ((v - m) >> 1), l - 1);
+ vpx_write_literal(w, (v - m) & 1, 1);
+ }
+}
+
+#if CONFIG_EXT_TX
+static struct vp10_token ext_tx_inter_encodings[EXT_TX_SETS_INTER][TX_TYPES];
+static struct vp10_token ext_tx_intra_encodings[EXT_TX_SETS_INTRA][TX_TYPES];
+#else
static struct vp10_token ext_tx_encodings[TX_TYPES];
+#endif // CONFIG_EXT_TX
+#if CONFIG_EXT_INTRA
+static struct vp10_token intra_filter_encodings[INTRA_FILTERS];
+#endif // CONFIG_EXT_INTRA
void vp10_encode_token_init() {
+#if CONFIG_EXT_TX
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ vp10_tokens_from_tree(ext_tx_inter_encodings[s], vp10_ext_tx_inter_tree[s]);
+ }
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ vp10_tokens_from_tree(ext_tx_intra_encodings[s], vp10_ext_tx_intra_tree[s]);
+ }
+#else
vp10_tokens_from_tree(ext_tx_encodings, vp10_ext_tx_tree);
+#endif // CONFIG_EXT_TX
+#if CONFIG_EXT_INTRA
+ vp10_tokens_from_tree(intra_filter_encodings, vp10_intra_filter_tree);
+#endif // CONFIG_EXT_INTRA
}
static void write_intra_mode(vpx_writer *w, PREDICTION_MODE mode,
@@ -56,13 +123,95 @@
vp10_write_token(w, vp10_intra_mode_tree, probs, &intra_mode_encodings[mode]);
}
-static void write_inter_mode(vpx_writer *w, PREDICTION_MODE mode,
- const vpx_prob *probs) {
+static void write_inter_mode(VP10_COMMON *cm,
+ vpx_writer *w, PREDICTION_MODE mode,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ int is_compound,
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ const int16_t mode_ctx) {
+#if CONFIG_REF_MV
+ const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
+ const vpx_prob newmv_prob = cm->fc->newmv_prob[newmv_ctx];
+#if CONFIG_EXT_INTER
+ vpx_write(w, mode != NEWMV && mode != NEWFROMNEARMV, newmv_prob);
+
+ if (!is_compound && (mode == NEWMV || mode == NEWFROMNEARMV))
+ vpx_write(w, mode == NEWFROMNEARMV, cm->fc->new2mv_prob);
+
+ if (mode != NEWMV && mode != NEWFROMNEARMV) {
+#else
+ vpx_write(w, mode != NEWMV, newmv_prob);
+
+ if (mode != NEWMV) {
+#endif // CONFIG_EXT_INTER
+ const int16_t zeromv_ctx = (mode_ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
+ const vpx_prob zeromv_prob = cm->fc->zeromv_prob[zeromv_ctx];
+
+ if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
+ assert(mode == ZEROMV);
+ return;
+ }
+
+ vpx_write(w, mode != ZEROMV, zeromv_prob);
+
+ if (mode != ZEROMV) {
+ int16_t refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
+ vpx_prob refmv_prob;
+
+ if (mode_ctx & (1 << SKIP_NEARESTMV_OFFSET))
+ refmv_ctx = 6;
+ if (mode_ctx & (1 << SKIP_NEARMV_OFFSET))
+ refmv_ctx = 7;
+ if (mode_ctx & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET))
+ refmv_ctx = 8;
+
+ refmv_prob = cm->fc->refmv_prob[refmv_ctx];
+ vpx_write(w, mode != NEARESTMV, refmv_prob);
+ }
+ }
+#else
+ const vpx_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx];
assert(is_inter_mode(mode));
- vp10_write_token(w, vp10_inter_mode_tree, probs,
+ vp10_write_token(w, vp10_inter_mode_tree, inter_probs,
&inter_mode_encodings[INTER_OFFSET(mode)]);
+#endif
}
+#if CONFIG_REF_MV
+static void write_drl_idx(const VP10_COMMON *cm,
+ const MB_MODE_INFO *mbmi,
+ const MB_MODE_INFO_EXT *mbmi_ext,
+ vpx_writer *w) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 2) {
+ uint8_t drl0_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 0);
+ vpx_prob drl0_prob = cm->fc->drl_prob0[drl0_ctx];
+ vpx_write(w, mbmi->ref_mv_idx != 0, drl0_prob);
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 3 &&
+ mbmi->ref_mv_idx > 0) {
+ uint8_t drl1_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
+ vpx_prob drl1_prob = cm->fc->drl_prob1[drl1_ctx];
+ vpx_write(w, mbmi->ref_mv_idx != 1, drl1_prob);
+ }
+ }
+}
+#endif
+
+#if CONFIG_EXT_INTER
+static void write_inter_compound_mode(VP10_COMMON *cm, vpx_writer *w,
+ PREDICTION_MODE mode,
+ const int16_t mode_ctx) {
+ const vpx_prob *const inter_compound_probs =
+ cm->fc->inter_compound_mode_probs[mode_ctx];
+
+ assert(is_inter_compound_mode(mode));
+ vp10_write_token(w, vp10_inter_compound_mode_tree, inter_compound_probs,
+ &inter_compound_mode_encodings[INTER_COMPOUND_OFFSET(mode)]);
+}
+#endif // CONFIG_EXT_INTER
+
static void encode_unsigned_max(struct vpx_write_bit_buffer *wb,
int data, int max) {
vpx_wb_write_literal(wb, data, get_unsigned_bits(max));
@@ -101,6 +250,62 @@
return savings;
}
+#if CONFIG_VAR_TX
+static void write_tx_size_inter(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd,
+ const MB_MODE_INFO *mbmi,
+ TX_SIZE tx_size, int blk_row, int blk_col,
+ vpx_writer *w) {
+ const int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+ int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1),
+ tx_size);
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> 5;
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> 5;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == mbmi->inter_tx_size[tx_idx]) {
+ vpx_write(w, 0, cm->fc->txfm_partition_prob[ctx]);
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), tx_size);
+ } else {
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+ vpx_write(w, 1, cm->fc->txfm_partition_prob[ctx]);
+
+ if (tx_size == TX_8X8) {
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+ return;
+ }
+
+ assert(bsl > 0);
+ --bsl;
+ for (i = 0; i < 4; ++i) {
+ int offsetr = blk_row + ((i >> 1) << bsl);
+ int offsetc = blk_col + ((i & 0x01) << bsl);
+ write_tx_size_inter(cm, xd, mbmi, tx_size - 1, offsetr, offsetc, w);
+ }
+ }
+}
+
+static void update_txfm_partition_probs(VP10_COMMON *cm, vpx_writer *w,
+ FRAME_COUNTS *counts) {
+ int k;
+ for (k = 0; k < TXFM_PARTITION_CONTEXTS; ++k)
+ vp10_cond_prob_diff_update(w, &cm->fc->txfm_partition_prob[k],
+ counts->txfm_partition[k]);
+}
+#endif
+
static void write_selected_tx_size(const VP10_COMMON *cm,
const MACROBLOCKD *xd, vpx_writer *w) {
TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size;
@@ -116,6 +321,57 @@
}
}
+#if CONFIG_REF_MV
+static void update_inter_mode_probs(VP10_COMMON *cm, vpx_writer *w,
+ FRAME_COUNTS *counts) {
+ int i;
+ for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
+ vp10_cond_prob_diff_update(w, &cm->fc->newmv_prob[i],
+ counts->newmv_mode[i]);
+ for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i)
+ vp10_cond_prob_diff_update(w, &cm->fc->zeromv_prob[i],
+ counts->zeromv_mode[i]);
+ for (i = 0; i < REFMV_MODE_CONTEXTS; ++i)
+ vp10_cond_prob_diff_update(w, &cm->fc->refmv_prob[i],
+ counts->refmv_mode[i]);
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
+ vp10_cond_prob_diff_update(w, &cm->fc->drl_prob0[i],
+ counts->drl_mode0[i]);
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
+ vp10_cond_prob_diff_update(w, &cm->fc->drl_prob1[i],
+ counts->drl_mode1[i]);
+#if CONFIG_EXT_INTER
+ vp10_cond_prob_diff_update(w, &cm->fc->new2mv_prob, counts->new2mv_mode);
+#endif // CONFIG_EXT_INTER
+}
+#endif
+
+#if CONFIG_EXT_INTER
+static void update_inter_compound_mode_probs(VP10_COMMON *cm, vpx_writer *w) {
+ const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) -
+ vp10_cost_zero(GROUP_DIFF_UPDATE_PROB);
+ int i;
+ int savings = 0;
+ int do_update = 0;
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
+ savings += prob_diff_update_savings(vp10_inter_compound_mode_tree,
+ cm->fc->inter_compound_mode_probs[i],
+ cm->counts.inter_compound_mode[i],
+ INTER_COMPOUND_MODES);
+ }
+ do_update = savings > savings_thresh;
+ vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
+ prob_diff_update(vp10_inter_compound_mode_tree,
+ cm->fc->inter_compound_mode_probs[i],
+ cm->counts.inter_compound_mode[i],
+ INTER_COMPOUND_MODES, w);
+ }
+ }
+}
+#endif // CONFIG_EXT_INTER
+
static int write_skip(const VP10_COMMON *cm, const MACROBLOCKD *xd,
int segment_id, const MODE_INFO *mi, vpx_writer *w) {
if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
@@ -144,6 +400,62 @@
counts->switchable_interp[j], SWITCHABLE_FILTERS, w);
}
+
+#if CONFIG_EXT_TX
+static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) {
+ const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) -
+ vp10_cost_zero(GROUP_DIFF_UPDATE_PROB);
+ int i, j;
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ int savings = 0;
+ int do_update = 0;
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_inter_ext_tx_for_txsize[s][i]) continue;
+ savings += prob_diff_update_savings(
+ vp10_ext_tx_inter_tree[s], cm->fc->inter_ext_tx_prob[s][i],
+ cm->counts.inter_ext_tx[s][i], num_ext_tx_set_inter[s]);
+ }
+ do_update = savings > savings_thresh;
+ vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_inter_ext_tx_for_txsize[s][i]) continue;
+ prob_diff_update(vp10_ext_tx_inter_tree[s],
+ cm->fc->inter_ext_tx_prob[s][i],
+ cm->counts.inter_ext_tx[s][i],
+ num_ext_tx_set_inter[s], w);
+ }
+ }
+ }
+
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ int savings = 0;
+ int do_update = 0;
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_intra_ext_tx_for_txsize[s][i]) continue;
+ for (j = 0; j < INTRA_MODES; ++j)
+ savings += prob_diff_update_savings(
+ vp10_ext_tx_intra_tree[s], cm->fc->intra_ext_tx_prob[s][i][j],
+ cm->counts.intra_ext_tx[s][i][j], num_ext_tx_set_intra[s]);
+ }
+ do_update = savings > savings_thresh;
+ vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_intra_ext_tx_for_txsize[s][i]) continue;
+ for (j = 0; j < INTRA_MODES; ++j)
+ prob_diff_update(vp10_ext_tx_intra_tree[s],
+ cm->fc->intra_ext_tx_prob[s][i][j],
+ cm->counts.intra_ext_tx[s][i][j],
+ num_ext_tx_set_intra[s], w);
+ }
+ }
+ }
+}
+
+#else
+
static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) {
const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) -
vp10_cost_zero(GROUP_DIFF_UPDATE_PROB);
@@ -186,19 +498,63 @@
}
}
}
+#endif // CONFIG_EXT_TX
+static void pack_palette_tokens(vpx_writer *w, TOKENEXTRA **tp,
+ BLOCK_SIZE bsize, int n) {
+ int rows = 4 * num_4x4_blocks_high_lookup[bsize];
+ int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
+ int i;
+ TOKENEXTRA *p = *tp;
+
+ for (i = 0; i < rows * cols -1; ++i) {
+ vp10_write_token(w, vp10_palette_color_tree[n - 2], p->context_tree,
+ &palette_color_encodings[n - 2][p->token]);
+ ++p;
+ }
+
+ *tp = p;
+}
+
+#if CONFIG_SUPERTX
+static void update_supertx_probs(VP10_COMMON *cm, vpx_writer *w) {
+ const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) -
+ vp10_cost_zero(GROUP_DIFF_UPDATE_PROB);
+ int i, j;
+ int savings = 0;
+ int do_update = 0;
+ for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
+ for (j = 1; j < TX_SIZES; ++j) {
+ savings += vp10_cond_prob_diff_update_savings(&cm->fc->supertx_prob[i][j],
+ cm->counts.supertx[i][j]);
+ }
+ }
+ do_update = savings > savings_thresh;
+ vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
+ for (j = 1; j < TX_SIZES; ++j) {
+ vp10_cond_prob_diff_update(w, &cm->fc->supertx_prob[i][j],
+ cm->counts.supertx[i][j]);
+ }
+ }
+ }
+}
+#endif // CONFIG_SUPERTX
+
+#if !CONFIG_ANS
static void pack_mb_tokens(vpx_writer *w,
TOKENEXTRA **tp, const TOKENEXTRA *const stop,
vpx_bit_depth_t bit_depth, const TX_SIZE tx) {
TOKENEXTRA *p = *tp;
-#if !CONFIG_MISC_FIXES
- (void) tx;
+#if CONFIG_VAR_TX
+ int count = 0;
+ const int seg_eob = 16 << (tx << 1);
#endif
while (p < stop && p->token != EOSB_TOKEN) {
const int t = p->token;
const struct vp10_token *const a = &vp10_coef_encodings[t];
- int i = 0;
int v = a->value;
int n = a->len;
#if CONFIG_VP9_HIGHBITDEPTH
@@ -215,38 +571,30 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
/* skip one or two nodes */
- if (p->skip_eob_node) {
+ if (p->skip_eob_node)
n -= p->skip_eob_node;
- i = 2 * p->skip_eob_node;
- }
+ else
+ vpx_write(w, t != EOB_TOKEN, p->context_tree[0]);
- // TODO(jbb): expanding this can lead to big gains. It allows
- // much better branch prediction and would enable us to avoid numerous
- // lookups and compares.
+ if (t != EOB_TOKEN) {
+ vpx_write(w, t != ZERO_TOKEN, p->context_tree[1]);
- // If we have a token that's in the constrained set, the coefficient tree
- // is split into two treed writes. The first treed write takes care of the
- // unconstrained nodes. The second treed write takes care of the
- // constrained nodes.
- if (t >= TWO_TOKEN && t < EOB_TOKEN) {
- int len = UNCONSTRAINED_NODES - p->skip_eob_node;
- int bits = v >> (n - len);
- vp10_write_tree(w, vp10_coef_tree, p->context_tree, bits, len, i);
- vp10_write_tree(w, vp10_coef_con_tree,
- vp10_pareto8_full[p->context_tree[PIVOT_NODE] - 1],
- v, n - len, 0);
- } else {
- vp10_write_tree(w, vp10_coef_tree, p->context_tree, v, n, i);
+ if (t != ZERO_TOKEN) {
+ vpx_write(w, t != ONE_TOKEN, p->context_tree[2]);
+
+ if (t != ONE_TOKEN) {
+ int len = UNCONSTRAINED_NODES - p->skip_eob_node;
+ vp10_write_tree(w, vp10_coef_con_tree,
+ vp10_pareto8_full[p->context_tree[PIVOT_NODE] - 1],
+ v, n - len, 0);
+ }
+ }
}
if (b->base_val) {
const int e = p->extra, l = b->len;
-#if CONFIG_MISC_FIXES
int skip_bits =
(b->base_val == CAT6_MIN_VAL) ? TX_SIZES - 1 - tx : 0;
-#else
- int skip_bits = 0;
-#endif
if (l) {
const unsigned char *pb = b->prob;
@@ -269,10 +617,132 @@
vpx_write_bit(w, e & 1);
}
++p;
+
+#if CONFIG_VAR_TX
+ ++count;
+ if (t == EOB_TOKEN || count == seg_eob)
+ break;
+#endif
}
*tp = p;
}
+#else
+// This function serializes the tokens backwards both in token order and
+// bit order in each token.
+static void pack_mb_tokens_ans(struct AnsCoder *const ans,
+ const TOKENEXTRA *const start,
+ const TOKENEXTRA *const stop,
+ vpx_bit_depth_t bit_depth) {
+ const TOKENEXTRA *p;
+ TX_SIZE tx_size = TX_SIZES;
+
+ for (p = stop - 1; p >= start; --p) {
+ const int t = p->token;
+ if (t == EOSB_TOKEN) {
+ tx_size = (TX_SIZE)p->extra;
+ } else {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const vp10_extra_bit *const b =
+ (bit_depth == VPX_BITS_12) ? &vp10_extra_bits_high12[t] :
+ (bit_depth == VPX_BITS_10) ? &vp10_extra_bits_high10[t] :
+ &vp10_extra_bits[t];
+#else
+ const vp10_extra_bit *const b = &vp10_extra_bits[t];
+ (void) bit_depth;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (t != EOB_TOKEN && t != ZERO_TOKEN) {
+ // Write extra bits first
+ const int e = p->extra;
+ const int l = b->len;
+ const int skip_bits = (t == CATEGORY6_TOKEN) ? TX_SIZES - 1 - tx_size : 0;
+ assert(tx_size < TX_SIZES);
+ uabs_write(ans, e & 1, 128);
+ if (l) {
+ const int v = e >> 1;
+ int n;
+ for (n = 0; n < l - skip_bits; ++n) {
+ const int bb = (v >> n) & 1;
+ uabs_write(ans, bb, b->prob[l - 1 - n]);
+ }
+ for (; n < l; ++n) {
+ assert(((v >> n) & 1) == 0);
+ }
+ }
+
+ {
+ struct rans_sym s;
+ int j;
+ const vpx_prob *token_probs =
+ vp10_pareto8_token_probs[p->context_tree[PIVOT_NODE] - 1];
+ s.cum_prob = 0;
+ for (j = ONE_TOKEN; j < t; ++j) {
+ s.cum_prob += token_probs[j - ONE_TOKEN];
+ }
+ s.prob = token_probs[t - ONE_TOKEN];
+ rans_write(ans, &s);
+ }
+ }
+ if (t != EOB_TOKEN)
+ uabs_write(ans, t != ZERO_TOKEN, p->context_tree[1]);
+ if (!p->skip_eob_node)
+ uabs_write(ans, t != EOB_TOKEN, p->context_tree[0]);
+ }
+ }
+}
+#endif // !CONFIG_ANS
+
+#if CONFIG_VAR_TX
+static void pack_txb_tokens(vpx_writer *w,
+ TOKENEXTRA **tp, const TOKENEXTRA *const tok_end,
+ MACROBLOCKD *xd, MB_MODE_INFO *mbmi, int plane,
+ BLOCK_SIZE plane_bsize,
+ vpx_bit_depth_t bit_depth,
+ int block,
+ int blk_row, int blk_col, TX_SIZE tx_size) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, 0, 0) :
+ mbmi->inter_tx_size[tx_idx];
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ pack_mb_tokens(w, tp, tok_end, bit_depth, tx_size);
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+ int step = 1 << (2 * (tx_size - 1));
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
+ continue;
+
+ pack_txb_tokens(w, tp, tok_end, xd, mbmi, plane,
+ plane_bsize, bit_depth, block + i * step,
+ offsetr, offsetc, tx_size - 1);
+ }
+ }
+}
+#endif
static void write_segment_id(vpx_writer *w, const struct segmentation *seg,
const struct segmentation_probs *segp,
@@ -304,31 +774,119 @@
}
if (is_compound) {
- vpx_write(w, mbmi->ref_frame[0] == GOLDEN_FRAME,
- vp10_get_pred_prob_comp_ref_p(cm, xd));
+#if CONFIG_EXT_REFS
+ const int bit = (mbmi->ref_frame[0] == GOLDEN_FRAME ||
+ mbmi->ref_frame[0] == LAST3_FRAME ||
+ mbmi->ref_frame[0] == LAST4_FRAME);
+#else
+ const int bit = mbmi->ref_frame[0] == GOLDEN_FRAME;
+#endif // CONFIG_EXT_REFS
+ vpx_write(w, bit, vp10_get_pred_prob_comp_ref_p(cm, xd));
+
+#if CONFIG_EXT_REFS
+ if (!bit) {
+ const int bit1 = mbmi->ref_frame[0] == LAST_FRAME;
+ vpx_write(w, bit1, vp10_get_pred_prob_comp_ref_p1(cm, xd));
+ } else {
+ const int bit2 = mbmi->ref_frame[0] == GOLDEN_FRAME;
+ vpx_write(w, bit2, vp10_get_pred_prob_comp_ref_p2(cm, xd));
+ if (!bit2) {
+ const int bit3 = mbmi->ref_frame[0] == LAST3_FRAME;
+ vpx_write(w, bit3, vp10_get_pred_prob_comp_ref_p3(cm, xd));
+ }
+ }
+#endif // CONFIG_EXT_REFS
} else {
+#if CONFIG_EXT_REFS
+ const int bit0 = (mbmi->ref_frame[0] == GOLDEN_FRAME ||
+ mbmi->ref_frame[0] == ALTREF_FRAME);
+ vpx_write(w, bit0, vp10_get_pred_prob_single_ref_p1(cm, xd));
+
+ if (bit0) {
+ const int bit1 = mbmi->ref_frame[0] != GOLDEN_FRAME;
+ vpx_write(w, bit1, vp10_get_pred_prob_single_ref_p2(cm, xd));
+ } else {
+ const int bit2 = (mbmi->ref_frame[0] == LAST3_FRAME ||
+ mbmi->ref_frame[0] == LAST4_FRAME);
+ vpx_write(w, bit2, vp10_get_pred_prob_single_ref_p3(cm, xd));
+
+ if (!bit2) {
+ const int bit3 = mbmi->ref_frame[0] != LAST_FRAME;
+ vpx_write(w, bit3, vp10_get_pred_prob_single_ref_p4(cm, xd));
+ } else {
+ const int bit4 = mbmi->ref_frame[0] != LAST3_FRAME;
+ vpx_write(w, bit4, vp10_get_pred_prob_single_ref_p5(cm, xd));
+ }
+ }
+#else
const int bit0 = mbmi->ref_frame[0] != LAST_FRAME;
vpx_write(w, bit0, vp10_get_pred_prob_single_ref_p1(cm, xd));
if (bit0) {
const int bit1 = mbmi->ref_frame[0] != GOLDEN_FRAME;
vpx_write(w, bit1, vp10_get_pred_prob_single_ref_p2(cm, xd));
}
+#endif // CONFIG_EXT_REFS
}
}
}
+#if CONFIG_EXT_INTRA
+static void write_ext_intra_mode_info(const VP10_COMMON *const cm,
+ const MB_MODE_INFO *const mbmi,
+ vpx_writer *w) {
+#if !ALLOW_FILTER_INTRA_MODES
+ return;
+#endif
+ if (mbmi->mode == DC_PRED) {
+ vpx_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[0],
+ cm->fc->ext_intra_probs[0]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
+ EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[0];
+ write_uniform(w, FILTER_INTRA_MODES, mode);
+ }
+ }
+ if (mbmi->uv_mode == DC_PRED) {
+ vpx_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[1],
+ cm->fc->ext_intra_probs[1]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) {
+ EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[1];
+ write_uniform(w, FILTER_INTRA_MODES, mode);
+ }
+ }
+}
+#endif // CONFIG_EXT_INTRA
+
+static void write_switchable_interp_filter(VP10_COMP *cpi,
+ const MACROBLOCKD *xd,
+ vpx_writer *w) {
+ VP10_COMMON *const cm = &cpi->common;
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ if (cm->interp_filter == SWITCHABLE) {
+ const int ctx = vp10_get_pred_context_switchable_interp(xd);
+#if CONFIG_EXT_INTERP
+ if (!vp10_is_interp_needed(xd)) {
+ assert(mbmi->interp_filter == EIGHTTAP);
+ return;
+ }
+#endif
+ vp10_write_token(w, vp10_switchable_interp_tree,
+ cm->fc->switchable_interp_prob[ctx],
+ &switchable_interp_encodings[mbmi->interp_filter]);
+ ++cpi->interp_filter_selected[0][mbmi->interp_filter];
+ }
+}
+
static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif
vpx_writer *w) {
VP10_COMMON *const cm = &cpi->common;
const nmv_context *nmvc = &cm->fc->nmvc;
- const MACROBLOCK *const x = &cpi->td.mb;
- const MACROBLOCKD *const xd = &x->e_mbd;
+ const MACROBLOCK *x = &cpi->td.mb;
+ const MACROBLOCKD *xd = &x->e_mbd;
const struct segmentation *const seg = &cm->seg;
-#if CONFIG_MISC_FIXES
const struct segmentation_probs *const segp = &cm->fc->seg;
-#else
- const struct segmentation_probs *const segp = &cm->segp;
-#endif
const MB_MODE_INFO *const mbmi = &mi->mbmi;
const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
const PREDICTION_MODE mode = mbmi->mode;
@@ -351,19 +909,68 @@
}
}
+#if CONFIG_SUPERTX
+ if (supertx_enabled)
+ skip = mbmi->skip;
+ else
+ skip = write_skip(cm, xd, segment_id, mi, w);
+#else
skip = write_skip(cm, xd, segment_id, mi, w);
+#endif // CONFIG_SUPERTX
- if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
- vpx_write(w, is_inter, vp10_get_intra_inter_prob(cm, xd));
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif // CONFIG_SUPERTX
+ if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+ vpx_write(w, is_inter, vp10_get_intra_inter_prob(cm, xd));
if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
+#if CONFIG_SUPERTX
+ !supertx_enabled &&
+#endif // CONFIG_SUPERTX
!(is_inter && skip) && !xd->lossless[segment_id]) {
- write_selected_tx_size(cm, xd, w);
+#if CONFIG_VAR_TX
+ if (is_inter) { // This implies skip flag is 0.
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ const int txb_size = txsize_to_bsize[max_tx_size];
+ const int bs = num_4x4_blocks_wide_lookup[txb_size];
+ const int width = num_4x4_blocks_wide_lookup[bsize];
+ const int height = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ for (idy = 0; idy < height; idy += bs)
+ for (idx = 0; idx < width; idx += bs)
+ write_tx_size_inter(cm, xd, mbmi, max_tx_size, idy, idx, w);
+ } else {
+ set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h);
+ set_txfm_ctx(xd->above_txfm_context, mbmi->tx_size, xd->n8_w);
+
+ write_selected_tx_size(cm, xd, w);
+ }
+ } else {
+ set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h);
+ set_txfm_ctx(xd->above_txfm_context, mbmi->tx_size, xd->n8_w);
+#else
+ write_selected_tx_size(cm, xd, w);
+#endif
}
if (!is_inter) {
if (bsize >= BLOCK_8X8) {
write_intra_mode(w, mode, cm->fc->y_mode_prob[size_group_lookup[bsize]]);
+#if CONFIG_EXT_INTRA
+ if (mode != DC_PRED && mode != TM_PRED) {
+ int p_angle;
+ const int intra_filter_ctx = vp10_get_pred_context_intra_interp(xd);
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[0]);
+ p_angle = mode_to_angle_map[mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+ if (pick_intra_filter(p_angle)) {
+ vp10_write_token(w, vp10_intra_filter_tree,
+ cm->fc->intra_filter_probs[intra_filter_ctx],
+ &intra_filter_encodings[mbmi->intra_filter]);
+ }
+ }
+#endif // CONFIG_EXT_INTRA
} else {
int idx, idy;
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
@@ -376,27 +983,53 @@
}
}
write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mode]);
+#if CONFIG_EXT_INTRA
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED &&
+ bsize >= BLOCK_8X8)
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
+
+ if (bsize >= BLOCK_8X8)
+ write_ext_intra_mode_info(cm, mbmi, w);
+#endif // CONFIG_EXT_INTRA
} else {
- const int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
- const vpx_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx];
+ int16_t mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
write_ref_frames(cm, xd, w);
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (is_compound)
+ mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
+ else
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
+ mbmi->ref_frame, bsize, -1);
+#endif
+
// If segment skip is not enabled code the mode.
if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
if (bsize >= BLOCK_8X8) {
- write_inter_mode(w, mode, inter_probs);
+#if CONFIG_EXT_INTER
+ if (is_inter_compound_mode(mode))
+ write_inter_compound_mode(cm, w, mode, mode_ctx);
+ else if (is_inter_singleref_mode(mode))
+#endif // CONFIG_EXT_INTER
+ write_inter_mode(cm, w, mode,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ has_second_ref(mbmi),
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ mode_ctx);
+
+#if CONFIG_REF_MV
+ if (mode == NEARMV)
+ write_drl_idx(cm, mbmi, mbmi_ext, w);
+#endif
}
}
- if (cm->interp_filter == SWITCHABLE) {
- const int ctx = vp10_get_pred_context_switchable_interp(xd);
- vp10_write_token(w, vp10_switchable_interp_tree,
- cm->fc->switchable_interp_prob[ctx],
- &switchable_interp_encodings[mbmi->interp_filter]);
- ++cpi->interp_filter_selected[0][mbmi->interp_filter];
- } else {
- assert(mbmi->interp_filter == cm->interp_filter);
- }
+#if !CONFIG_EXT_INTERP
+ write_switchable_interp_filter(cpi, xd, w);
+#endif // !CONFIG_EXT_INTERP
if (bsize < BLOCK_8X8) {
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
@@ -406,26 +1039,114 @@
for (idx = 0; idx < 2; idx += num_4x4_w) {
const int j = idy * 2 + idx;
const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
- write_inter_mode(w, b_mode, inter_probs);
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (!is_compound)
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
+ mbmi->ref_frame, bsize, j);
+#endif
+#if CONFIG_EXT_INTER
+ if (is_inter_compound_mode(b_mode))
+ write_inter_compound_mode(cm, w, b_mode, mode_ctx);
+ else if (is_inter_singleref_mode(b_mode))
+#endif // CONFIG_EXT_INTER
+ write_inter_mode(cm, w, b_mode,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ has_second_ref(mbmi),
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ mode_ctx);
+
+#if CONFIG_EXT_INTER
+ if (b_mode == NEWMV || b_mode == NEWFROMNEARMV ||
+ b_mode == NEW_NEWMV) {
+#else
if (b_mode == NEWMV) {
+#endif // CONFIG_EXT_INTER
for (ref = 0; ref < 1 + is_compound; ++ref)
vp10_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv,
- &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv,
+#if CONFIG_EXT_INTER
+ &mi->bmi[j].ref_mv[ref].as_mv,
+#else
+ &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv,
+#endif // CONFIG_EXT_INTER
nmvc, allow_hp);
}
+#if CONFIG_EXT_INTER
+ else if (b_mode == NEAREST_NEWMV || b_mode == NEAR_NEWMV) {
+ vp10_encode_mv(cpi, w, &mi->bmi[j].as_mv[1].as_mv,
+ &mi->bmi[j].ref_mv[1].as_mv, nmvc, allow_hp);
+ } else if (b_mode == NEW_NEARESTMV || b_mode == NEW_NEARMV) {
+ vp10_encode_mv(cpi, w, &mi->bmi[j].as_mv[0].as_mv,
+ &mi->bmi[j].ref_mv[0].as_mv, nmvc, allow_hp);
+ }
+#endif // CONFIG_EXT_INTER
}
}
} else {
+#if CONFIG_EXT_INTER
+ if (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV) {
+#else
if (mode == NEWMV) {
+#endif // CONFIG_EXT_INTER
for (ref = 0; ref < 1 + is_compound; ++ref)
+#if CONFIG_EXT_INTER
+ {
+ if (mode == NEWFROMNEARMV)
+ vp10_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
+ &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][1].as_mv,
+ nmvc, allow_hp);
+ else
+#endif // CONFIG_EXT_INTER
vp10_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
&mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, nmvc,
allow_hp);
+#if CONFIG_EXT_INTER
+ }
+ } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
+ vp10_encode_mv(cpi, w, &mbmi->mv[1].as_mv,
+ &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv, nmvc,
+ allow_hp);
+ } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
+ vp10_encode_mv(cpi, w, &mbmi->mv[0].as_mv,
+ &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv, nmvc,
+ allow_hp);
+#endif // CONFIG_EXT_INTER
}
}
+
+#if CONFIG_EXT_INTERP
+ write_switchable_interp_filter(cpi, xd, w);
+#endif // CONFIG_EXT_INTERP
}
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+#if CONFIG_SUPERTX
+ !supertx_enabled &&
+#endif // CONFIG_SUPERTX
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ int eset = get_ext_tx_set(mbmi->tx_size, bsize, is_inter);
+ if (is_inter) {
+ if (eset > 0)
+ vp10_write_token(w, vp10_ext_tx_inter_tree[eset],
+ cm->fc->inter_ext_tx_prob[eset][mbmi->tx_size],
+ &ext_tx_inter_encodings[eset][mbmi->tx_type]);
+ } else if (ALLOW_INTRA_EXT_TX) {
+ if (eset > 0)
+ vp10_write_token(
+ w, vp10_ext_tx_intra_tree[eset],
+ cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode],
+ &ext_tx_intra_encodings[eset][mbmi->tx_type]);
+ }
+ }
+#else
if (mbmi->tx_size < TX_32X32 &&
cm->base_qindex > 0 && !mbmi->skip &&
+#if CONFIG_SUPERTX
+ !supertx_enabled &&
+#endif // CONFIG_SUPERTX
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
if (is_inter) {
vp10_write_token(
@@ -440,19 +1161,50 @@
&ext_tx_encodings[mbmi->tx_type]);
}
} else {
- if (!mbmi->skip)
- assert(mbmi->tx_type == DCT_DCT);
+ if (!mbmi->skip) {
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif // CONFIG_SUPERTX
+ assert(mbmi->tx_type == DCT_DCT);
+ }
+ }
+#endif // CONFIG_EXT_TX
+}
+
+static void write_palette_mode_info(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd,
+ const MODE_INFO *const mi,
+ vpx_writer *w) {
+ const MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info;
+ int palette_ctx = 0;
+ int n, i;
+
+ n = pmi->palette_size[0];
+ if (above_mi)
+ palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ if (left_mi)
+ palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ vpx_write(w, n > 0,
+ vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx]);
+ if (n > 0) {
+ vp10_write_token(w, vp10_palette_size_tree,
+ vp10_default_palette_y_size_prob[bsize - BLOCK_8X8],
+ &palette_size_encodings[n - 2]);
+ for (i = 0; i < n; ++i)
+ vpx_write_literal(w, pmi->palette_colors[i],
+ cm->bit_depth);
+ write_uniform(w, n, pmi->palette_first_color_idx[0]);
}
}
static void write_mb_modes_kf(const VP10_COMMON *cm, const MACROBLOCKD *xd,
MODE_INFO **mi_8x8, vpx_writer *w) {
const struct segmentation *const seg = &cm->seg;
-#if CONFIG_MISC_FIXES
const struct segmentation_probs *const segp = &cm->fc->seg;
-#else
- const struct segmentation_probs *const segp = &cm->segp;
-#endif
const MODE_INFO *const mi = mi_8x8[0];
const MODE_INFO *const above_mi = xd->above_mi;
const MODE_INFO *const left_mi = xd->left_mi;
@@ -471,6 +1223,21 @@
if (bsize >= BLOCK_8X8) {
write_intra_mode(w, mbmi->mode,
get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
+#if CONFIG_EXT_INTRA
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) {
+ int p_angle;
+ const int intra_filter_ctx = vp10_get_pred_context_intra_interp(xd);
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[0]);
+ p_angle =
+ mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+ if (pick_intra_filter(p_angle)) {
+ vp10_write_token(w, vp10_intra_filter_tree,
+ cm->fc->intra_filter_probs[intra_filter_ctx],
+ &intra_filter_encodings[mbmi->intra_filter]);
+ }
+ }
+#endif // CONFIG_EXT_INTRA
} else {
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -486,7 +1253,31 @@
}
write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mbmi->mode]);
+#if CONFIG_EXT_INTRA
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED &&
+ bsize >= BLOCK_8X8)
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
+#endif // CONFIG_EXT_INTRA
+ if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools &&
+ mbmi->mode == DC_PRED)
+ write_palette_mode_info(cm, xd, mi, w);
+
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bsize, 0) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
+ ALLOW_INTRA_EXT_TX) {
+ int eset = get_ext_tx_set(mbmi->tx_size, bsize, 0);
+ if (eset > 0)
+ vp10_write_token(
+ w, vp10_ext_tx_intra_tree[eset],
+ cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode],
+ &ext_tx_intra_encodings[eset][mbmi->tx_type]);
+ }
+#else
if (mbmi->tx_size < TX_32X32 &&
cm->base_qindex > 0 && !mbmi->skip &&
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
@@ -496,16 +1287,30 @@
[intra_mode_to_tx_type_context[mbmi->mode]],
&ext_tx_encodings[mbmi->tx_type]);
}
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_EXT_INTRA
+ if (bsize >= BLOCK_8X8)
+ write_ext_intra_mode_info(cm, mbmi, w);
+#endif // CONFIG_EXT_INTRA
}
static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile,
vpx_writer *w, TOKENEXTRA **tok,
const TOKENEXTRA *const tok_end,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif
int mi_row, int mi_col) {
const VP10_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
MODE_INFO *m;
int plane;
+#if CONFIG_ANS
+ (void) tok;
+ (void) tok_end;
+ (void) plane;
+#endif // !CONFIG_ANS
xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
m = xd->mi[0];
@@ -519,19 +1324,76 @@
if (frame_is_intra_only(cm)) {
write_mb_modes_kf(cm, xd, xd->mi, w);
} else {
- pack_inter_mode_mvs(cpi, m, w);
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+#endif
+ pack_inter_mode_mvs(cpi, m,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ w);
}
+ if (m->mbmi.palette_mode_info.palette_size[0] > 0) {
+ assert(*tok < tok_end);
+ pack_palette_tokens(w, tok, m->mbmi.sb_type,
+ m->mbmi.palette_mode_info.palette_size[0]);
+ assert(*tok < tok_end);
+ }
+
+#if CONFIG_SUPERTX
+ if (supertx_enabled) return;
+#endif // CONFIG_SUPERTX
+
+#if !CONFIG_ANS
if (!m->mbmi.skip) {
assert(*tok < tok_end);
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_VAR_TX
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ MB_MODE_INFO *mbmi = &m->mbmi;
+ BLOCK_SIZE bsize = mbmi->sb_type;
+ const BLOCK_SIZE plane_bsize =
+ get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), pd);
+
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ int row, col;
+
+ if (is_inter_block(mbmi)) {
+ const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bw = num_4x4_blocks_wide_lookup[txb_size];
+ int block = 0;
+ const int step = 1 << (max_tx_size << 1);
+ for (row = 0; row < num_4x4_h; row += bw) {
+ for (col = 0; col < num_4x4_w; col += bw) {
+ pack_txb_tokens(w, tok, tok_end, xd, mbmi, plane, plane_bsize,
+ cm->bit_depth, block, row, col, max_tx_size);
+ block += step;
+ }
+ }
+ } else {
+ TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane])
+ : m->mbmi.tx_size;
+ BLOCK_SIZE txb_size = txsize_to_bsize[tx];
+ int bw = num_4x4_blocks_wide_lookup[txb_size];
+
+ for (row = 0; row < num_4x4_h; row += bw)
+ for (col = 0; col < num_4x4_w; col += bw)
+ pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+ }
+#else
TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane])
: m->mbmi.tx_size;
pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+#endif // CONFIG_VAR_TX
assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
(*tok)++;
}
}
+#endif
}
static void write_partition(const VP10_COMMON *const cm,
@@ -559,6 +1421,9 @@
static void write_modes_sb(VP10_COMP *cpi,
const TileInfo *const tile, vpx_writer *w,
TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif
int mi_row, int mi_col, BLOCK_SIZE bsize) {
const VP10_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
@@ -567,7 +1432,12 @@
const int bs = (1 << bsl) / 4;
PARTITION_TYPE partition;
BLOCK_SIZE subsize;
- const MODE_INFO *m = NULL;
+ MODE_INFO *m = NULL;
+#if CONFIG_SUPERTX
+ const int pack_token = !supertx_enabled;
+ TX_SIZE supertx_size;
+ int plane;
+#endif
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
@@ -577,36 +1447,135 @@
partition = partition_lookup[bsl][m->mbmi.sb_type];
write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w);
subsize = get_subsize(bsize, partition);
+#if CONFIG_SUPERTX
+ xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
+ set_mi_row_col(xd, tile,
+ mi_row, num_8x8_blocks_high_lookup[bsize],
+ mi_col, num_8x8_blocks_wide_lookup[bsize],
+ cm->mi_rows, cm->mi_cols);
+ if (!supertx_enabled &&
+ !frame_is_intra_only(cm) &&
+ partition != PARTITION_NONE && bsize <= MAX_SUPERTX_BLOCK_SIZE &&
+ !xd->lossless[0]) {
+ vpx_prob prob;
+ supertx_size = max_txsize_lookup[bsize];
+ prob = cm->fc->supertx_prob[partition_supertx_context_lookup[partition]]
+ [supertx_size];
+ supertx_enabled = (xd->mi[0]->mbmi.tx_size == supertx_size);
+ vpx_write(w, supertx_enabled, prob);
+ if (supertx_enabled) {
+ vpx_write(w, xd->mi[0]->mbmi.skip, vp10_get_skip_prob(cm, xd));
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(supertx_size, bsize, 1) > 1 &&
+ !xd->mi[0]->mbmi.skip) {
+ int eset = get_ext_tx_set(supertx_size, bsize, 1);
+ if (eset > 0) {
+ vp10_write_token(
+ w, vp10_ext_tx_inter_tree[eset],
+ cm->fc->inter_ext_tx_prob[eset][supertx_size],
+ &ext_tx_inter_encodings[eset][xd->mi[0]->mbmi.tx_type]);
+ }
+ }
+#else
+ if (supertx_size < TX_32X32 && !xd->mi[0]->mbmi.skip) {
+ vp10_write_token(
+ w, vp10_ext_tx_tree,
+ cm->fc->inter_ext_tx_prob[supertx_size],
+ &ext_tx_encodings[xd->mi[0]->mbmi.tx_type]);
+ }
+#endif // CONFIG_EXT_TX
+ }
+ }
+#endif // CONFIG_SUPERTX
if (subsize < BLOCK_8X8) {
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col);
} else {
switch (partition) {
case PARTITION_NONE:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col);
break;
case PARTITION_HORZ:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col);
if (mi_row + bs < cm->mi_rows)
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col);
+ write_modes_b(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row + bs, mi_col);
break;
case PARTITION_VERT:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col);
if (mi_col + bs < cm->mi_cols)
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs);
+ write_modes_b(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col + bs);
break;
case PARTITION_SPLIT:
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs,
- subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col,
- subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col + bs,
- subsize);
+ write_modes_sb(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, subsize);
+ write_modes_sb(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col + bs, subsize);
+ write_modes_sb(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row + bs, mi_col, subsize);
+ write_modes_sb(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row + bs, mi_col + bs, subsize);
break;
default:
assert(0);
}
}
+#if CONFIG_SUPERTX
+ if (partition != PARTITION_NONE && supertx_enabled && pack_token &&
+ !m->mbmi.skip) {
+ assert(*tok < tok_end);
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const int mbmi_txb_size = txsize_to_bsize[m->mbmi.tx_size];
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi_txb_size];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[mbmi_txb_size];
+ int row, col;
+ TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane])
+ : m->mbmi.tx_size;
+ BLOCK_SIZE txb_size = txsize_to_bsize[tx];
+ int bw = num_4x4_blocks_wide_lookup[txb_size];
+
+ for (row = 0; row < num_4x4_h; row += bw)
+ for (col = 0; col < num_4x4_w; col += bw)
+ pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+ assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
+ (*tok)++;
+ }
+ }
+#endif // CONFIG_SUPERTX
// update partition context
if (bsize >= BLOCK_8X8 &&
@@ -623,10 +1592,16 @@
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
vp10_zero(xd->left_seg_context);
+#if CONFIG_VAR_TX
+ vp10_zero(xd->left_txfm_context_buffer);
+#endif
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE)
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col,
- BLOCK_64X64);
+ write_modes_sb(cpi, tile, w, tok, tok_end,
+#if CONFIG_SUPERTX
+ 0,
+#endif
+ mi_row, mi_col, BLOCK_64X64);
}
}
@@ -701,7 +1676,6 @@
}
}
- // printf("Update %d %d, savings %d\n", update[0], update[1], savings);
/* Is coef updated at all */
if (update[1] == 0 || savings < 0) {
vpx_write_bit(bc, 0);
@@ -821,9 +1795,10 @@
}
}
-static void encode_loopfilter(struct loopfilter *lf,
+static void encode_loopfilter(VP10_COMMON *cm,
struct vpx_write_bit_buffer *wb) {
int i;
+ struct loopfilter *lf = &cm->lf;
// Encode the loop filter level and type
vpx_wb_write_literal(wb, lf->filter_level, 6);
@@ -857,12 +1832,21 @@
}
}
}
+#if CONFIG_LOOP_RESTORATION
+ vpx_wb_write_bit(wb, lf->restoration_level != lf->last_restoration_level);
+ if (lf->restoration_level != lf->last_restoration_level) {
+ int level = lf->restoration_level -
+ (lf->restoration_level > lf->last_restoration_level);
+ vpx_wb_write_literal(wb, level,
+ vp10_restoration_level_bits(cm));
+ }
+#endif // CONFIG_LOOP_RESTORATION
}
static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) {
if (delta_q != 0) {
vpx_wb_write_bit(wb, 1);
- vpx_wb_write_inv_signed_literal(wb, delta_q, CONFIG_MISC_FIXES ? 6 : 4);
+ vpx_wb_write_inv_signed_literal(wb, delta_q, 6);
} else {
vpx_wb_write_bit(wb, 0);
}
@@ -879,11 +1863,7 @@
static void encode_segmentation(VP10_COMMON *cm, MACROBLOCKD *xd,
struct vpx_write_bit_buffer *wb) {
int i, j;
-
const struct segmentation *seg = &cm->seg;
-#if !CONFIG_MISC_FIXES
- const struct segmentation_probs *segp = &cm->segp;
-#endif
vpx_wb_write_bit(wb, seg->enabled);
if (!seg->enabled)
@@ -898,16 +1878,6 @@
if (seg->update_map) {
// Select the coding strategy (temporal or spatial)
vp10_choose_segmap_coding_method(cm, xd);
-#if !CONFIG_MISC_FIXES
- // Write out probabilities used to decode unpredicted macro-block segments
- for (i = 0; i < SEG_TREE_PROBS; i++) {
- const int prob = segp->tree_probs[i];
- const int update = prob != MAX_PROB;
- vpx_wb_write_bit(wb, update);
- if (update)
- vpx_wb_write_literal(wb, prob, 8);
- }
-#endif
// Write out the chosen coding method.
if (!frame_is_intra_only(cm) && !cm->error_resilient_mode) {
@@ -915,18 +1885,6 @@
} else {
assert(seg->temporal_update == 0);
}
-
-#if !CONFIG_MISC_FIXES
- if (seg->temporal_update) {
- for (i = 0; i < PREDICTION_PROBS; i++) {
- const int prob = segp->pred_probs[i];
- const int update = prob != MAX_PROB;
- vpx_wb_write_bit(wb, update);
- if (update)
- vpx_wb_write_literal(wb, prob, 8);
- }
- }
-#endif
}
// Segmentation data
@@ -954,7 +1912,6 @@
}
}
-#if CONFIG_MISC_FIXES
static void update_seg_probs(VP10_COMP *cpi, vpx_writer *w) {
VP10_COMMON *cm = &cpi->common;
@@ -981,18 +1938,10 @@
if (mode != TX_MODE_SELECT)
vpx_wb_write_literal(wb, mode, 2);
}
-#else
-static void write_txfm_mode(TX_MODE mode, struct vpx_writer *wb) {
- vpx_write_literal(wb, VPXMIN(mode, ALLOW_32X32), 2);
- if (mode >= ALLOW_32X32)
- vpx_write_bit(wb, mode == TX_MODE_SELECT);
-}
-#endif
static void update_txfm_probs(VP10_COMMON *cm, vpx_writer *w,
FRAME_COUNTS *counts) {
-
if (cm->tx_mode == TX_MODE_SELECT) {
int i, j;
unsigned int ct_8x8p[TX_SIZES - 3][2];
@@ -1026,7 +1975,7 @@
struct vpx_write_bit_buffer *wb) {
vpx_wb_write_bit(wb, filter == SWITCHABLE);
if (filter != SWITCHABLE)
- vpx_wb_write_literal(wb, filter, 2);
+ vpx_wb_write_literal(wb, filter, 2 + CONFIG_EXT_INTERP);
}
static void fix_interp_filter(VP10_COMMON *cm, FRAME_COUNTS *counts) {
@@ -1072,6 +2021,17 @@
}
static int get_refresh_mask(VP10_COMP *cpi) {
+ int refresh_mask = 0;
+#if CONFIG_EXT_REFS
+ int ref_frame;
+ for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame) {
+ refresh_mask |= (cpi->refresh_last_frames[ref_frame - LAST_FRAME] <<
+ cpi->lst_fb_idxes[ref_frame - LAST_FRAME]);
+ }
+#else
+ refresh_mask = cpi->refresh_last_frame << cpi->lst_fb_idx;
+#endif // CONFIG_EXT_REFS
+
if (vp10_preserve_existing_gf(cpi)) {
// We have decided to preserve the previously existing golden frame as our
// new ARF frame. However, in the short term we leave it in the GF slot and,
@@ -1083,15 +2043,14 @@
// Note: This is highly specific to the use of ARF as a forward reference,
// and this needs to be generalized as other uses are implemented
// (like RTC/temporal scalability).
- return (cpi->refresh_last_frame << cpi->lst_fb_idx) |
- (cpi->refresh_golden_frame << cpi->alt_fb_idx);
+ return refresh_mask | (cpi->refresh_golden_frame << cpi->alt_fb_idx);
} else {
int arf_idx = cpi->alt_fb_idx;
if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
arf_idx = gf_group->arf_update_idx[gf_group->index];
}
- return (cpi->refresh_last_frame << cpi->lst_fb_idx) |
+ return refresh_mask |
(cpi->refresh_golden_frame << cpi->gld_fb_idx) |
(cpi->refresh_alt_ref_frame << arf_idx);
}
@@ -1100,7 +2059,10 @@
static size_t encode_tiles(VP10_COMP *cpi, uint8_t *data_ptr,
unsigned int *max_tile_sz) {
VP10_COMMON *const cm = &cpi->common;
- vpx_writer residual_bc;
+ vpx_writer mode_bc;
+#if CONFIG_ANS
+ struct AnsCoder token_ans;
+#endif
int tile_row, tile_col;
TOKENEXTRA *tok_end;
size_t total_size = 0;
@@ -1110,36 +2072,57 @@
memset(cm->above_seg_context, 0,
sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols));
+#if CONFIG_VAR_TX
+ memset(cm->above_txfm_context, 0,
+ sizeof(*cm->above_txfm_context) * mi_cols_aligned_to_sb(cm->mi_cols));
+#endif
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
int tile_idx = tile_row * tile_cols + tile_col;
+ int put_tile_size = tile_col < tile_cols - 1 || tile_row < tile_rows - 1;
+ uint8_t *const mode_data_start =
+ data_ptr + total_size + (put_tile_size ? 4 : 0);
+ int token_section_size;
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
tok_end = cpi->tile_tok[tile_row][tile_col] +
cpi->tok_count[tile_row][tile_col];
- if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
- vpx_start_encode(&residual_bc, data_ptr + total_size + 4);
- else
- vpx_start_encode(&residual_bc, data_ptr + total_size);
+ vpx_start_encode(&mode_bc, mode_data_start);
+#if !CONFIG_ANS
+ (void) token_section_size;
write_modes(cpi, &cpi->tile_data[tile_idx].tile_info,
- &residual_bc, &tok, tok_end);
+ &mode_bc, &tok, tok_end);
assert(tok == tok_end);
- vpx_stop_encode(&residual_bc);
- if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
+ vpx_stop_encode(&mode_bc);
+ if (put_tile_size) {
unsigned int tile_sz;
// size of this tile
- assert(residual_bc.pos > 0);
- tile_sz = residual_bc.pos - CONFIG_MISC_FIXES;
+ assert(mode_bc.pos > 0);
+ tile_sz = mode_bc.pos - 1;
mem_put_le32(data_ptr + total_size, tile_sz);
max_tile = max_tile > tile_sz ? max_tile : tile_sz;
total_size += 4;
}
-
- total_size += residual_bc.pos;
+ total_size += mode_bc.pos;
+#else
+ write_modes(cpi, &cpi->tile_data[tile_idx].tile_info, &mode_bc,
+ NULL, NULL);
+ vpx_stop_encode(&mode_bc);
+ ans_write_init(&token_ans, mode_data_start + mode_bc.pos);
+ pack_mb_tokens_ans(&token_ans, tok, tok_end, cm->bit_depth);
+ token_section_size = ans_write_end(&token_ans);
+ if (put_tile_size) {
+ // size of this tile
+ mem_put_be32(data_ptr + total_size,
+ 4 + mode_bc.pos + token_section_size);
+ total_size += 4;
+ }
+ total_size += mode_bc.pos + token_section_size;
+#endif // !CONFIG_ANS
}
}
*max_tile_sz = max_tile;
@@ -1178,10 +2161,8 @@
if (cfg != NULL) {
found = cm->width == cfg->y_crop_width &&
cm->height == cfg->y_crop_height;
-#if CONFIG_MISC_FIXES
found &= cm->render_width == cfg->render_width &&
cm->render_height == cfg->render_height;
-#endif
}
vpx_wb_write_bit(wb, found);
if (found) {
@@ -1192,15 +2173,8 @@
if (!found) {
vpx_wb_write_literal(wb, cm->width - 1, 16);
vpx_wb_write_literal(wb, cm->height - 1, 16);
-
-#if CONFIG_MISC_FIXES
write_render_size(cm, wb);
-#endif
}
-
-#if !CONFIG_MISC_FIXES
- write_render_size(cm, wb);
-#endif
}
static void write_sync_code(struct vpx_write_bit_buffer *wb) {
@@ -1271,12 +2245,13 @@
write_sync_code(wb);
write_bitdepth_colorspace_sampling(cm, wb);
write_frame_size(cm, wb);
+ if (frame_is_intra_only(cm))
+ vpx_wb_write_bit(wb, cm->allow_screen_content_tools);
} else {
if (!cm->show_frame)
vpx_wb_write_bit(wb, cm->intra_only);
if (!cm->error_resilient_mode) {
-#if CONFIG_MISC_FIXES
if (cm->intra_only) {
vpx_wb_write_bit(wb,
cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL);
@@ -1287,25 +2262,11 @@
vpx_wb_write_bit(wb,
cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL);
}
-#else
- static const int reset_frame_context_conv_tbl[3] = { 0, 2, 3 };
-
- vpx_wb_write_literal(wb,
- reset_frame_context_conv_tbl[cm->reset_frame_context], 2);
-#endif
}
if (cm->intra_only) {
write_sync_code(wb);
-
-#if CONFIG_MISC_FIXES
write_bitdepth_colorspace_sampling(cm, wb);
-#else
- // Note for profile 0, 420 8bpp is assumed.
- if (cm->profile > PROFILE_0) {
- write_bitdepth_colorspace_sampling(cm, wb);
- }
-#endif
vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
write_frame_size(cm, wb);
@@ -1331,19 +2292,16 @@
if (!cm->error_resilient_mode) {
vpx_wb_write_bit(wb,
cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_OFF);
-#if CONFIG_MISC_FIXES
if (cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_OFF)
-#endif
vpx_wb_write_bit(wb, cm->refresh_frame_context !=
- REFRESH_FRAME_CONTEXT_BACKWARD);
+ REFRESH_FRAME_CONTEXT_BACKWARD);
}
vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2);
- encode_loopfilter(&cm->lf, wb);
+ encode_loopfilter(cm, wb);
encode_quantization(cm, wb);
encode_segmentation(cm, xd, wb);
-#if CONFIG_MISC_FIXES
if (!cm->seg.enabled && xd->lossless[0])
cm->tx_mode = TX_4X4;
else
@@ -1356,36 +2314,29 @@
if (!use_hybrid_pred)
vpx_wb_write_bit(wb, use_compound_pred);
}
-#endif
write_tile_info(cm, wb);
}
static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) {
VP10_COMMON *const cm = &cpi->common;
+#if CONFIG_SUPERTX
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+#endif // CONFIG_SUPERTX
FRAME_CONTEXT *const fc = cm->fc;
FRAME_COUNTS *counts = cpi->td.counts;
vpx_writer header_bc;
- int i;
-#if CONFIG_MISC_FIXES
- int j;
-#endif
+ int i, j;
vpx_start_encode(&header_bc, data);
-
-#if !CONFIG_MISC_FIXES
- if (cpi->td.mb.e_mbd.lossless[0]) {
- cm->tx_mode = TX_4X4;
- } else {
- write_txfm_mode(cm->tx_mode, &header_bc);
- update_txfm_probs(cm, &header_bc, counts);
- }
-#else
update_txfm_probs(cm, &header_bc, counts);
-#endif
update_coef_probs(cpi, &header_bc);
+
+#if CONFIG_VAR_TX
+ update_txfm_partition_probs(cm, &header_bc, counts);
+#endif
+
update_skip_probs(cm, &header_bc, counts);
-#if CONFIG_MISC_FIXES
update_seg_probs(cpi, &header_bc);
for (i = 0; i < INTRA_MODES; ++i)
@@ -1395,20 +2346,31 @@
for (i = 0; i < PARTITION_CONTEXTS; ++i)
prob_diff_update(vp10_partition_tree, fc->partition_prob[i],
counts->partition[i], PARTITION_TYPES, &header_bc);
-#endif
+
+#if CONFIG_EXT_INTRA
+ for (i = 0; i < INTRA_FILTERS + 1; ++i)
+ prob_diff_update(vp10_intra_filter_tree, fc->intra_filter_probs[i],
+ counts->intra_filter[i], INTRA_FILTERS, &header_bc);
+#endif // CONFIG_EXT_INTRA
if (frame_is_intra_only(cm)) {
vp10_copy(cm->kf_y_prob, vp10_kf_y_mode_prob);
-#if CONFIG_MISC_FIXES
for (i = 0; i < INTRA_MODES; ++i)
for (j = 0; j < INTRA_MODES; ++j)
prob_diff_update(vp10_intra_mode_tree, cm->kf_y_prob[i][j],
counts->kf_y_mode[i][j], INTRA_MODES, &header_bc);
-#endif
} else {
+#if CONFIG_REF_MV
+ update_inter_mode_probs(cm, &header_bc, counts);
+#else
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
prob_diff_update(vp10_inter_mode_tree, cm->fc->inter_mode_probs[i],
counts->inter_mode[i], INTER_MODES, &header_bc);
+#endif
+
+#if CONFIG_EXT_INTER
+ update_inter_compound_mode_probs(cm, &header_bc);
+#endif // CONFIG_EXT_INTER
if (cm->interp_filter == SWITCHABLE)
update_switchable_interp_probs(cm, &header_bc, counts);
@@ -1419,52 +2381,41 @@
if (cpi->allow_comp_inter_inter) {
const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
-#if !CONFIG_MISC_FIXES
- const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE;
-
- vpx_write_bit(&header_bc, use_compound_pred);
- if (use_compound_pred) {
- vpx_write_bit(&header_bc, use_hybrid_pred);
- if (use_hybrid_pred)
- for (i = 0; i < COMP_INTER_CONTEXTS; i++)
- vp10_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i],
- counts->comp_inter[i]);
- }
-#else
if (use_hybrid_pred)
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
vp10_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i],
counts->comp_inter[i]);
-#endif
}
if (cm->reference_mode != COMPOUND_REFERENCE) {
for (i = 0; i < REF_CONTEXTS; i++) {
- vp10_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][0],
- counts->single_ref[i][0]);
- vp10_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][1],
- counts->single_ref[i][1]);
+ for (j = 0; j < (SINGLE_REFS - 1); j ++) {
+ vp10_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][j],
+ counts->single_ref[i][j]);
+ }
}
}
- if (cm->reference_mode != SINGLE_REFERENCE)
- for (i = 0; i < REF_CONTEXTS; i++)
- vp10_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i],
- counts->comp_ref[i]);
+ if (cm->reference_mode != SINGLE_REFERENCE) {
+ for (i = 0; i < REF_CONTEXTS; i++) {
+ for (j = 0; j < (COMP_REFS - 1); j ++) {
+ vp10_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i][j],
+ counts->comp_ref[i][j]);
+ }
+ }
+ }
for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
prob_diff_update(vp10_intra_mode_tree, cm->fc->y_mode_prob[i],
counts->y_mode[i], INTRA_MODES, &header_bc);
-#if !CONFIG_MISC_FIXES
- for (i = 0; i < PARTITION_CONTEXTS; ++i)
- prob_diff_update(vp10_partition_tree, fc->partition_prob[i],
- counts->partition[i], PARTITION_TYPES, &header_bc);
-#endif
-
vp10_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc,
&counts->mv);
update_ext_tx_probs(cm, &header_bc);
+#if CONFIG_SUPERTX
+ if (!xd->lossless[0])
+ update_supertx_probs(cm, &header_bc);
+#endif // CONFIG_SUPERTX
}
vpx_stop_encode(&header_bc);
@@ -1473,7 +2424,6 @@
return header_bc.pos;
}
-#if CONFIG_MISC_FIXES
static int remux_tiles(uint8_t *dest, const int sz,
const int n_tiles, const int mag) {
int rpos = 0, wpos = 0, n;
@@ -1513,7 +2463,6 @@
return wpos;
}
-#endif
void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size) {
uint8_t *data = dest;
@@ -1521,14 +2470,9 @@
struct vpx_write_bit_buffer wb = {data, 0};
struct vpx_write_bit_buffer saved_wb;
unsigned int max_tile;
-#if CONFIG_MISC_FIXES
VP10_COMMON *const cm = &cpi->common;
const int n_log2_tiles = cm->log2_tile_rows + cm->log2_tile_cols;
const int have_tiles = n_log2_tiles > 0;
-#else
- const int have_tiles = 0; // we have tiles, but we don't want to write a
- // tile size marker in the header
-#endif
write_uncompressed_header(cpi, &wb);
saved_wb = wb;
@@ -1544,7 +2488,6 @@
data += first_part_size;
data_sz = encode_tiles(cpi, data, &max_tile);
-#if CONFIG_MISC_FIXES
if (max_tile > 0) {
int mag;
unsigned int mask;
@@ -1563,7 +2506,6 @@
} else {
assert(n_log2_tiles == 0);
}
-#endif
data += data_sz;
// TODO(jbb): Figure out what to do if first_part_size > 16 bits.
diff --git a/vp10/encoder/bitstream.h b/vp10/encoder/bitstream.h
index b1da89f..9df03da 100644
--- a/vp10/encoder/bitstream.h
+++ b/vp10/encoder/bitstream.h
@@ -21,6 +21,8 @@
void vp10_encode_token_init();
void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size);
+void vp10_encode_token_init();
+
static INLINE int vp10_preserve_existing_gf(VP10_COMP *cpi) {
return !cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
cpi->rc.is_src_frame_alt_ref;
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h
index ab0252b..3e322de 100644
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@@ -13,6 +13,9 @@
#include "vp10/common/entropymv.h"
#include "vp10/common/entropy.h"
+#if CONFIG_REF_MV
+#include "vp10/common/mvref_common.h"
+#endif
#ifdef __cplusplus
extern "C" {
@@ -24,7 +27,7 @@
unsigned int var;
} diff;
-struct macroblock_plane {
+typedef struct macroblock_plane {
DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]);
tran_low_t *qcoeff;
tran_low_t *coeff;
@@ -40,7 +43,7 @@
int16_t *round;
int64_t quant_thred[2];
-};
+} MACROBLOCK_PLANE;
/* The [2] dimension is for whether we skip the EOB node (i.e. if previous
* coefficient in this block was zero) or not. */
@@ -48,10 +51,24 @@
[COEFF_CONTEXTS][ENTROPY_TOKENS];
typedef struct {
- int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
- uint8_t mode_context[MAX_REF_FRAMES];
+ int_mv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+ int16_t mode_context[MODE_CTX_REF_FRAMES];
+#if CONFIG_REF_MV
+ uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
+ CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
+#if CONFIG_EXT_INTER
+ int16_t compound_mode_context[MODE_CTX_REF_FRAMES];
+#endif // CONFIG_EXT_INTER
+#endif
} MB_MODE_INFO_EXT;
+typedef struct {
+ uint8_t best_palette_color_map[4096];
+ double kmeans_data_buf[4096];
+ uint8_t kmeans_indices_buf[4096];
+ uint8_t kmeans_pre_indices_buf[4096];
+} PALETTE_BUFFER;
+
typedef struct macroblock MACROBLOCK;
struct macroblock {
struct macroblock_plane plane[MAX_MB_PLANE];
@@ -94,6 +111,8 @@
int *nmvsadcost_hp[2];
int **mvsadcost;
+ PALETTE_BUFFER *palette_buffer;
+
// These define limits to motion vector components to prevent them
// from extending outside the UMV borders
int mv_col_min;
@@ -104,6 +123,9 @@
// Notes transform blocks where no coefficents are coded.
// Set during mode selection. Read during block encoding.
uint8_t zcoeff_blk[TX_SIZES][256];
+#if CONFIG_VAR_TX
+ uint8_t blk_skip[MAX_MB_PLANE][256];
+#endif
int skip;
diff --git a/vp10/encoder/context_tree.c b/vp10/encoder/context_tree.c
index 6c056d2..3cd23ec 100644
--- a/vp10/encoder/context_tree.c
+++ b/vp10/encoder/context_tree.c
@@ -28,6 +28,10 @@
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_blk, sizeof(uint8_t)));
for (i = 0; i < MAX_MB_PLANE; ++i) {
+#if CONFIG_VAR_TX
+ CHECK_MEM_ERROR(cm, ctx->blk_skip[i],
+ vpx_calloc(num_blk, sizeof(uint8_t)));
+#endif
for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k])));
@@ -50,6 +54,10 @@
vpx_free(ctx->zcoeff_blk);
ctx->zcoeff_blk = 0;
for (i = 0; i < MAX_MB_PLANE; ++i) {
+#if CONFIG_VAR_TX
+ vpx_free(ctx->blk_skip[i]);
+ ctx->blk_skip[i] = 0;
+#endif
for (k = 0; k < 3; ++k) {
vpx_free(ctx->coeff[i][k]);
ctx->coeff[i][k] = 0;
@@ -73,6 +81,11 @@
alloc_mode_context(cm, num_4x4_blk, &tree->none);
alloc_mode_context(cm, num_4x4_blk/2, &tree->horizontal[0]);
alloc_mode_context(cm, num_4x4_blk/2, &tree->vertical[0]);
+#ifdef CONFIG_SUPERTX
+ alloc_mode_context(cm, num_4x4_blk, &tree->horizontal_supertx);
+ alloc_mode_context(cm, num_4x4_blk, &tree->vertical_supertx);
+ alloc_mode_context(cm, num_4x4_blk, &tree->split_supertx);
+#endif
if (num_4x4_blk > 4) {
alloc_mode_context(cm, num_4x4_blk/2, &tree->horizontal[1]);
@@ -89,6 +102,11 @@
free_mode_context(&tree->horizontal[1]);
free_mode_context(&tree->vertical[0]);
free_mode_context(&tree->vertical[1]);
+#ifdef CONFIG_SUPERTX
+ free_mode_context(&tree->horizontal_supertx);
+ free_mode_context(&tree->vertical_supertx);
+ free_mode_context(&tree->split_supertx);
+#endif
}
// This function sets up a tree of contexts such that at each square
diff --git a/vp10/encoder/context_tree.h b/vp10/encoder/context_tree.h
index 2a0fffb..4fa5806 100644
--- a/vp10/encoder/context_tree.h
+++ b/vp10/encoder/context_tree.h
@@ -28,6 +28,9 @@
MB_MODE_INFO_EXT mbmi_ext;
uint8_t *zcoeff_blk;
uint8_t *color_index_map[2];
+#if CONFIG_VAR_TX
+ uint8_t *blk_skip[MAX_MB_PLANE];
+#endif
tran_low_t *coeff[MAX_MB_PLANE][3];
tran_low_t *qcoeff[MAX_MB_PLANE][3];
tran_low_t *dqcoeff[MAX_MB_PLANE][3];
@@ -84,6 +87,11 @@
struct PC_TREE *split[4];
PICK_MODE_CONTEXT *leaf_split[4];
};
+#ifdef CONFIG_SUPERTX
+ PICK_MODE_CONTEXT horizontal_supertx;
+ PICK_MODE_CONTEXT vertical_supertx;
+ PICK_MODE_CONTEXT split_supertx;
+#endif
} PC_TREE;
void vp10_setup_pc_tree(struct VP10Common *cm, struct ThreadData *td);
diff --git a/vp10/encoder/cost.c b/vp10/encoder/cost.c
index aab8263..ded51d3 100644
--- a/vp10/encoder/cost.c
+++ b/vp10/encoder/cost.c
@@ -10,6 +10,7 @@
#include <assert.h>
#include "vp10/encoder/cost.h"
+#include "vp10/common/entropy.h"
const unsigned int vp10_prob_cost[256] = {
2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161,
@@ -51,6 +52,22 @@
}
}
+#if CONFIG_ANS
+void vp10_cost_tokens_ans(int *costs, const vpx_prob *tree_probs,
+ const vpx_prob *token_probs, int skip_eob) {
+ int c_tree = 0; // Cost of the "tree" nodes EOB and ZERO.
+ int i;
+ costs[EOB_TOKEN] = vp10_cost_bit(tree_probs[0], 0);
+ if (!skip_eob)
+ c_tree = vp10_cost_bit(tree_probs[0], 1);
+ costs[ZERO_TOKEN] = c_tree + vp10_cost_bit(tree_probs[1], 0);
+ c_tree += vp10_cost_bit(tree_probs[1], 1);
+ for (i = ONE_TOKEN; i <= CATEGORY6_TOKEN; ++i) {
+ costs[i] = c_tree + vp10_cost_bit(token_probs[i - ONE_TOKEN], 0);
+ }
+}
+#endif // CONFIG_ANS
+
void vp10_cost_tokens(int *costs, const vpx_prob *probs, vpx_tree tree) {
cost(costs, tree, probs, 0, 0);
}
diff --git a/vp10/encoder/cost.h b/vp10/encoder/cost.h
index b9619c6..551e4e5 100644
--- a/vp10/encoder/cost.h
+++ b/vp10/encoder/cost.h
@@ -48,6 +48,11 @@
void vp10_cost_tokens(int *costs, const vpx_prob *probs, vpx_tree tree);
void vp10_cost_tokens_skip(int *costs, const vpx_prob *probs, vpx_tree tree);
+#if CONFIG_ANS
+void vp10_cost_tokens_ans(int *costs, const vpx_prob *tree_probs,
+ const vpx_prob *token_probs, int skip_eob);
+#endif
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 132a141..5602753 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -37,6 +37,382 @@
#endif
}
+#if CONFIG_EXT_TX
+void fdst4(const tran_low_t *input, tran_low_t *output) {
+#if USE_DST2
+ tran_high_t step[4];
+ tran_high_t temp1, temp2;
+
+ step[0] = input[0] - input[3];
+ step[1] = -input[1] + input[2];
+ step[2] = -input[1] - input[2];
+ step[3] = input[0] + input[3];
+
+ temp1 = (step[0] + step[1]) * cospi_16_64;
+ temp2 = (step[0] - step[1]) * cospi_16_64;
+ output[3] = fdct_round_shift(temp1);
+ output[1] = fdct_round_shift(temp2);
+ temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
+ temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
+ output[2] = fdct_round_shift(temp1);
+ output[0] = fdct_round_shift(temp2);
+#else
+ // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 141124871, 228344838,
+ };
+ int64_t sum;
+ int64_t s03 = (input[0] + input[3]);
+ int64_t d03 = (input[0] - input[3]);
+ int64_t s12 = (input[1] + input[2]);
+ int64_t d12 = (input[1] - input[2]);
+ sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
+ output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
+ output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
+ output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
+ output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+#endif // USE_DST2
+}
+
+void fdst8(const tran_low_t *input, tran_low_t *output) {
+#if USE_DST2
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
+ tran_high_t t0, t1, t2, t3; // needs32
+ tran_high_t x0, x1, x2, x3; // canbe16
+
+ // stage 1
+ s0 = input[0] - input[7];
+ s1 = -input[1] + input[6];
+ s2 = input[2] - input[5];
+ s3 = -input[3] + input[4];
+ s4 = -input[3] - input[4];
+ s5 = input[2] + input[5];
+ s6 = -input[1] - input[6];
+ s7 = input[0] + input[7];
+
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
+ t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
+ output[7] = fdct_round_shift(t0);
+ output[5] = fdct_round_shift(t2);
+ output[3] = fdct_round_shift(t1);
+ output[1] = fdct_round_shift(t3);
+
+ // Stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = fdct_round_shift(t0);
+ t3 = fdct_round_shift(t1);
+
+ // Stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
+
+ // Stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ output[6] = fdct_round_shift(t0);
+ output[4] = fdct_round_shift(t2);
+ output[2] = fdct_round_shift(t1);
+ output[0] = fdct_round_shift(t3);
+#else
+ // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
+ static const int sinvalue_lookup[] = {
+ 86559612, 162678858, 219176632, 249238470
+ };
+ int64_t sum;
+ int64_t s07 = (input[0] + input[7]);
+ int64_t d07 = (input[0] - input[7]);
+ int64_t s16 = (input[1] + input[6]);
+ int64_t d16 = (input[1] - input[6]);
+ int64_t s25 = (input[2] + input[5]);
+ int64_t d25 = (input[2] - input[5]);
+ int64_t s34 = (input[3] + input[4]);
+ int64_t d34 = (input[3] - input[4]);
+ sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
+ output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
+ d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
+ output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = (s07 + s16 - s34)* sinvalue_lookup[2];
+ output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
+ output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
+ output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = (d07 - d16 + d34)* sinvalue_lookup[2];
+ output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
+ s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
+ output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
+ output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+#endif // USE_DST2
+}
+
+void fdst16(const tran_low_t *input, tran_low_t *output) {
+#if USE_DST2
+ tran_high_t step1[8]; // canbe16
+ tran_high_t step2[8]; // canbe16
+ tran_high_t step3[8]; // canbe16
+ tran_high_t in[8]; // canbe16
+ tran_high_t temp1, temp2; // needs32
+
+ // step 1
+ in[0] = input[0] - input[15];
+ in[1] = -input[1] + input[14];
+ in[2] = input[2] - input[13];
+ in[3] = -input[3] + input[12];
+ in[4] = input[4] - input[11];
+ in[5] = -input[5] + input[10];
+ in[6] = input[6] - input[ 9];
+ in[7] = -input[7] + input[ 8];
+
+ step1[0] = -input[7] - input[ 8];
+ step1[1] = input[6] + input[ 9];
+ step1[2] = -input[5] - input[10];
+ step1[3] = input[4] + input[11];
+ step1[4] = -input[3] - input[12];
+ step1[5] = input[2] + input[13];
+ step1[6] = -input[1] - input[14];
+ step1[7] = input[0] + input[15];
+
+ // fdct8(step, step);
+ {
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
+ tran_high_t t0, t1, t2, t3; // needs32
+ tran_high_t x0, x1, x2, x3; // canbe16
+
+ // stage 1
+ s0 = in[0] + in[7];
+ s1 = in[1] + in[6];
+ s2 = in[2] + in[5];
+ s3 = in[3] + in[4];
+ s4 = in[3] - in[4];
+ s5 = in[2] - in[5];
+ s6 = in[1] - in[6];
+ s7 = in[0] - in[7];
+
+ // fdct4(step, step);
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
+ t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
+ output[15] = fdct_round_shift(t0);
+ output[11] = fdct_round_shift(t2);
+ output[7] = fdct_round_shift(t1);
+ output[3] = fdct_round_shift(t3);
+
+ // Stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = fdct_round_shift(t0);
+ t3 = fdct_round_shift(t1);
+
+ // Stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
+
+ // Stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ output[13] = fdct_round_shift(t0);
+ output[9] = fdct_round_shift(t2);
+ output[5] = fdct_round_shift(t1);
+ output[1] = fdct_round_shift(t3);
+ }
+
+ // step 2
+ temp1 = (step1[5] - step1[2]) * cospi_16_64;
+ temp2 = (step1[4] - step1[3]) * cospi_16_64;
+ step2[2] = fdct_round_shift(temp1);
+ step2[3] = fdct_round_shift(temp2);
+ temp1 = (step1[4] + step1[3]) * cospi_16_64;
+ temp2 = (step1[5] + step1[2]) * cospi_16_64;
+ step2[4] = fdct_round_shift(temp1);
+ step2[5] = fdct_round_shift(temp2);
+
+ // step 3
+ step3[0] = step1[0] + step2[3];
+ step3[1] = step1[1] + step2[2];
+ step3[2] = step1[1] - step2[2];
+ step3[3] = step1[0] - step2[3];
+ step3[4] = step1[7] - step2[4];
+ step3[5] = step1[6] - step2[5];
+ step3[6] = step1[6] + step2[5];
+ step3[7] = step1[7] + step2[4];
+
+ // step 4
+ temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
+ temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
+ step2[1] = fdct_round_shift(temp1);
+ step2[2] = fdct_round_shift(temp2);
+ temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
+ temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
+ step2[5] = fdct_round_shift(temp1);
+ step2[6] = fdct_round_shift(temp2);
+
+ // step 5
+ step1[0] = step3[0] + step2[1];
+ step1[1] = step3[0] - step2[1];
+ step1[2] = step3[3] + step2[2];
+ step1[3] = step3[3] - step2[2];
+ step1[4] = step3[4] - step2[5];
+ step1[5] = step3[4] + step2[5];
+ step1[6] = step3[7] - step2[6];
+ step1[7] = step3[7] + step2[6];
+
+ // step 6
+ temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
+ temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
+ output[14] = fdct_round_shift(temp1);
+ output[6] = fdct_round_shift(temp2);
+
+ temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
+ temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
+ output[10] = fdct_round_shift(temp1);
+ output[2] = fdct_round_shift(temp2);
+
+ temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
+ temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
+ output[12] = fdct_round_shift(temp1);
+ output[4] = fdct_round_shift(temp2);
+
+ temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
+ temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
+ output[8] = fdct_round_shift(temp1);
+ output[0] = fdct_round_shift(temp2);
+#else
+ // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
+ static const int sinvalue_lookup[] = {
+ 47852167, 94074787, 137093803, 175444254,
+ 207820161, 233119001, 250479254, 259309736
+ };
+ int64_t sum;
+ int64_t s015 = (input[0] + input[15]);
+ int64_t d015 = (input[0] - input[15]);
+ int64_t s114 = (input[1] + input[14]);
+ int64_t d114 = (input[1] - input[14]);
+ int64_t s213 = (input[2] + input[13]);
+ int64_t d213 = (input[2] - input[13]);
+ int64_t s312 = (input[3] + input[12]);
+ int64_t d312 = (input[3] - input[12]);
+ int64_t s411 = (input[4] + input[11]);
+ int64_t d411 = (input[4] - input[11]);
+ int64_t s510 = (input[5] + input[10]);
+ int64_t d510 = (input[5] - input[10]);
+ int64_t s69 = (input[6] + input[9]);
+ int64_t d69 = (input[6] - input[9]);
+ int64_t s78 = (input[7] + input[8]);
+ int64_t d78 = (input[7] - input[8]);
+ sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
+ s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
+ s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
+ s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
+ output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
+ d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
+ d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
+ d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
+ output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
+ s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
+ s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
+ s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
+ output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
+ d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
+ d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
+ d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
+ output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
+ s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
+ s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
+ s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
+ output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
+ d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
+ d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
+ d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
+ output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
+ s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
+ s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
+ s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
+ output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
+ d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
+ d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
+ d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
+ output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
+ s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
+ s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
+ s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
+ output[8] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
+ d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
+ d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
+ d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
+ output[9] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
+ s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
+ s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
+ s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
+ output[10] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
+ d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
+ d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
+ d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
+ output[11] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
+ s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
+ s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
+ s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
+ output[12] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
+ d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
+ d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
+ d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
+ output[13] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
+ s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
+ s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
+ s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
+ output[14] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
+ d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
+ d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
+ d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
+ output[15] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+#endif // USE_DST2
+}
+#endif // CONFIG_EXT_TX
+
static void fdct4(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[4];
@@ -999,29 +1375,171 @@
output[15] = (tran_low_t)-x1;
}
+#if CONFIG_EXT_TX
+static void copy_block(const int16_t *src, int src_stride, int l,
+ int16_t *dest, int dest_stride) {
+ int i;
+ for (i = 0; i < l; ++i) {
+ memcpy(dest + dest_stride * i, src + src_stride * i,
+ l * sizeof(int16_t));
+ }
+}
+
+static void fliplr(int16_t *dest, int stride, int l) {
+ int i, j;
+ for (i = 0; i < l; ++i) {
+ for (j = 0; j < l / 2; ++j) {
+ const int16_t tmp = dest[i * stride + j];
+ dest[i * stride + j] = dest[i * stride + l - 1 - j];
+ dest[i * stride + l - 1 - j] = tmp;
+ }
+ }
+}
+
+static void flipud(int16_t *dest, int stride, int l) {
+ int i, j;
+ for (j = 0; j < l; ++j) {
+ for (i = 0; i < l / 2; ++i) {
+ const int16_t tmp = dest[i * stride + j];
+ dest[i * stride + j] = dest[(l - 1 - i) * stride + j];
+ dest[(l - 1 - i) * stride + j] = tmp;
+ }
+ }
+}
+
+static void fliplrud(int16_t *dest, int stride, int l) {
+ int i, j;
+ for (i = 0; i < l / 2; ++i) {
+ for (j = 0; j < l; ++j) {
+ const int16_t tmp = dest[i * stride + j];
+ dest[i * stride + j] = dest[(l - 1 - i) * stride + l - 1 - j];
+ dest[(l - 1 - i) * stride + l - 1 - j] = tmp;
+ }
+ }
+}
+
+static void copy_fliplr(const int16_t *src, int src_stride, int l,
+ int16_t *dest, int dest_stride) {
+ copy_block(src, src_stride, l, dest, dest_stride);
+ fliplr(dest, dest_stride, l);
+}
+
+static void copy_flipud(const int16_t *src, int src_stride, int l,
+ int16_t *dest, int dest_stride) {
+ copy_block(src, src_stride, l, dest, dest_stride);
+ flipud(dest, dest_stride, l);
+}
+
+static void copy_fliplrud(const int16_t *src, int src_stride, int l,
+ int16_t *dest, int dest_stride) {
+ copy_block(src, src_stride, l, dest, dest_stride);
+ fliplrud(dest, dest_stride, l);
+}
+
+static void maybe_flip_input(const int16_t **src, int *src_stride, int l,
+ int16_t *buff, int tx_type) {
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ break;
+ case FLIPADST_DCT:
+ case FLIPADST_ADST:
+ case FLIPADST_DST:
+ copy_flipud(*src, *src_stride, l, buff, l);
+ *src = buff;
+ *src_stride = l;
+ break;
+ case DCT_FLIPADST:
+ case ADST_FLIPADST:
+ case DST_FLIPADST:
+ copy_fliplr(*src, *src_stride, l, buff, l);
+ *src = buff;
+ *src_stride = l;
+ break;
+ case FLIPADST_FLIPADST:
+ copy_fliplrud(*src, *src_stride, l, buff, l);
+ *src = buff;
+ *src_stride = l;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+#endif // CONFIG_EXT_TX
+
static const transform_2d FHT_4[] = {
- { fdct4, fdct4 }, // DCT_DCT = 0
- { fadst4, fdct4 }, // ADST_DCT = 1
- { fdct4, fadst4 }, // DCT_ADST = 2
- { fadst4, fadst4 } // ADST_ADST = 3
+ { fdct4, fdct4 }, // DCT_DCT = 0,
+ { fadst4, fdct4 }, // ADST_DCT = 1,
+ { fdct4, fadst4 }, // DCT_ADST = 2,
+ { fadst4, fadst4 }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { fadst4, fdct4 }, // FLIPADST_DCT = 4,
+ { fdct4, fadst4 }, // DCT_FLIPADST = 5,
+ { fadst4, fadst4 }, // FLIPADST_FLIPADST = 6,
+ { fadst4, fadst4 }, // ADST_FLIPADST = 7,
+ { fadst4, fadst4 }, // FLIPADST_ADST = 8,
+ { fdst4, fdct4 }, // DST_DCT = 9,
+ { fdct4, fdst4 }, // DCT_DST = 10,
+ { fdst4, fadst4 }, // DST_ADST = 11,
+ { fadst4, fdst4 }, // ADST_DST = 12,
+ { fdst4, fadst4 }, // DST_FLIPADST = 13,
+ { fadst4, fdst4 }, // FLIPADST_DST = 14,
+ { fdst4, fdst4 }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
};
static const transform_2d FHT_8[] = {
- { fdct8, fdct8 }, // DCT_DCT = 0
- { fadst8, fdct8 }, // ADST_DCT = 1
- { fdct8, fadst8 }, // DCT_ADST = 2
- { fadst8, fadst8 } // ADST_ADST = 3
+ { fdct8, fdct8 }, // DCT_DCT = 0,
+ { fadst8, fdct8 }, // ADST_DCT = 1,
+ { fdct8, fadst8 }, // DCT_ADST = 2,
+ { fadst8, fadst8 }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { fadst8, fdct8 }, // FLIPADST_DCT = 4,
+ { fdct8, fadst8 }, // DCT_FLIPADST = 5,
+ { fadst8, fadst8 }, // FLIPADST_FLIPADST = 6,
+ { fadst8, fadst8 }, // ADST_FLIPADST = 7,
+ { fadst8, fadst8 }, // FLIPADST_ADST = 8,
+ { fdst8, fdct8 }, // DST_DCT = 9,
+ { fdct8, fdst8 }, // DCT_DST = 10,
+ { fdst8, fadst8 }, // DST_ADST = 11,
+ { fadst8, fdst8 }, // ADST_DST = 12,
+ { fdst8, fadst8 }, // DST_FLIPADST = 13,
+ { fadst8, fdst8 }, // FLIPADST_DST = 14,
+ { fdst8, fdst8 }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
};
static const transform_2d FHT_16[] = {
- { fdct16, fdct16 }, // DCT_DCT = 0
- { fadst16, fdct16 }, // ADST_DCT = 1
- { fdct16, fadst16 }, // DCT_ADST = 2
- { fadst16, fadst16 } // ADST_ADST = 3
+ { fdct16, fdct16 }, // DCT_DCT = 0,
+ { fadst16, fdct16 }, // ADST_DCT = 1,
+ { fdct16, fadst16 }, // DCT_ADST = 2,
+ { fadst16, fadst16 }, // ADST_ADST = 3,
+#if CONFIG_EXT_TX
+ { fadst16, fdct16 }, // FLIPADST_DCT = 4,
+ { fdct16, fadst16 }, // DCT_FLIPADST = 5,
+ { fadst16, fadst16 }, // FLIPADST_FLIPADST = 6,
+ { fadst16, fadst16 }, // ADST_FLIPADST = 7,
+ { fadst16, fadst16 }, // FLIPADST_ADST = 8,
+ { fdst16, fdct16 }, // DST_DCT = 9,
+ { fdct16, fdst16 }, // DCT_DST = 10,
+ { fdst16, fadst16 }, // DST_ADST = 11,
+ { fadst16, fdst16 }, // ADST_DST = 12,
+ { fdst16, fadst16 }, // DST_FLIPADST = 13,
+ { fadst16, fdst16 }, // FLIPADST_DST = 14,
+ { fdst16, fdst16 }, // DST_DST = 15
+#endif // CONFIG_EXT_TX
};
void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
+ int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct4x4_c(input, output, stride);
} else {
@@ -1030,6 +1548,11 @@
tran_low_t temp_in[4], temp_out[4];
const transform_2d ht = FHT_4[tx_type];
+#if CONFIG_EXT_TX
+ int16_t flipped_input[4 * 4];
+ maybe_flip_input(&input, &stride, 4, flipped_input, tx_type);
+#endif
+
// Columns
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
@@ -1053,15 +1576,15 @@
}
void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
- tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr,
- uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
+ tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
int eob = -1;
int i, j;
@@ -1165,7 +1688,7 @@
}
void vp10_fht8x8_c(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
+ int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct8x8_c(input, output, stride);
} else {
@@ -1174,6 +1697,11 @@
tran_low_t temp_in[8], temp_out[8];
const transform_2d ht = FHT_8[tx_type];
+#if CONFIG_EXT_TX
+ int16_t flipped_input[8 * 8];
+ maybe_flip_input(&input, &stride, 8, flipped_input, tx_type);
+#endif
+
// Columns
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
@@ -1251,7 +1779,7 @@
}
void vp10_fht16x16_c(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
+ int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct16x16_c(input, output, stride);
} else {
@@ -1260,6 +1788,11 @@
tran_low_t temp_in[16], temp_out[16];
const transform_2d ht = FHT_16[tx_type];
+#if CONFIG_EXT_TX
+ int16_t flipped_input[16 * 16];
+ maybe_flip_input(&input, &stride, 16, flipped_input, tx_type);
+#endif
+
// Columns
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
diff --git a/vp10/encoder/denoiser.c b/vp10/encoder/denoiser.c
index e5d8157..e876676 100644
--- a/vp10/encoder/denoiser.c
+++ b/vp10/encoder/denoiser.c
@@ -230,9 +230,19 @@
frame = ctx->best_zeromv_reference_frame;
mbmi->ref_frame[0] = ctx->best_zeromv_reference_frame;
+#if CONFIG_EXT_INTER
+ if (has_second_ref(mbmi))
+ mbmi->mode = ZERO_ZEROMV;
+ else
+#endif // CONFIG_EXT_INTER
mbmi->mode = ZEROMV;
mbmi->mv[0].as_int = 0;
+#if CONFIG_EXT_INTER
+ if (has_second_ref(mbmi))
+ ctx->best_sse_inter_mode = ZERO_ZEROMV;
+ else
+#endif // CONFIG_EXT_INTER
ctx->best_sse_inter_mode = ZEROMV;
ctx->best_sse_mv.as_int = 0;
ctx->newmv_sse = ctx->zeromv_sse;
@@ -377,9 +387,17 @@
void vp10_denoiser_update_frame_info(VP9_DENOISER *denoiser,
YV12_BUFFER_CONFIG src,
FRAME_TYPE frame_type,
+#if CONFIG_EXT_REFS
+ int refresh_last_frames[LAST_REF_FRAMES],
+#else
+ int refresh_last_frame,
+#endif // CONFIG_EXT_REFS
int refresh_alt_ref_frame,
- int refresh_golden_frame,
- int refresh_last_frame) {
+ int refresh_golden_frame) {
+#if CONFIG_EXT_REFS
+ int ref_frame;
+#endif // CONFIG_EXT_REFS
+
if (frame_type == KEY_FRAME) {
int i;
// Start at 1 so as not to overwrite the INTRA_FRAME
@@ -397,10 +415,19 @@
swap_frame_buffer(&denoiser->running_avg_y[GOLDEN_FRAME],
&denoiser->running_avg_y[INTRA_FRAME]);
}
+#if CONFIG_EXT_REFS
+ for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame) {
+ if (refresh_last_frames[ref_frame - LAST_FRAME]) {
+ swap_frame_buffer(&denoiser->running_avg_y[ref_frame],
+ &denoiser->running_avg_y[INTRA_FRAME]);
+ }
+ }
+#else
if (refresh_last_frame) {
swap_frame_buffer(&denoiser->running_avg_y[LAST_FRAME],
&denoiser->running_avg_y[INTRA_FRAME]);
}
+#endif // CONFIG_EXT_REFS
}
void vp10_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) {
diff --git a/vp10/encoder/denoiser.h b/vp10/encoder/denoiser.h
index e543fb0..f48cbb0 100644
--- a/vp10/encoder/denoiser.h
+++ b/vp10/encoder/denoiser.h
@@ -35,9 +35,13 @@
void vp10_denoiser_update_frame_info(VP9_DENOISER *denoiser,
YV12_BUFFER_CONFIG src,
FRAME_TYPE frame_type,
+#if CONFIG_EXT_REFS
+ int refresh_last_frames[LAST_REF_FRAMES],
+#else
+ int refresh_last_frame,
+#endif // CONFIG_EXT_REFS
int refresh_alt_ref_frame,
- int refresh_golden_frame,
- int refresh_last_frame);
+ int refresh_golden_frame);
void vp10_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs,
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 26ce5a1..898b18f 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -36,6 +36,9 @@
#include "vp10/encoder/aq_complexity.h"
#include "vp10/encoder/aq_cyclicrefresh.h"
#include "vp10/encoder/aq_variance.h"
+#if CONFIG_SUPERTX
+#include "vp10/encoder/cost.h"
+#endif
#include "vp10/encoder/encodeframe.h"
#include "vp10/encoder/encodemb.h"
#include "vp10/encoder/encodemv.h"
@@ -51,6 +54,38 @@
int mi_row, int mi_col, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx);
+#if CONFIG_SUPERTX
+static int check_intra_b(PICK_MODE_CONTEXT *ctx);
+
+static int check_intra_sb(VP10_COMP *cpi, const TileInfo *const tile,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ PC_TREE *pc_tree);
+static void predict_superblock(VP10_COMP *cpi, ThreadData *td,
+ int mi_row_pred, int mi_col_pred,
+ BLOCK_SIZE bsize_pred, int b_sub8x8, int block);
+static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size,
+ PC_TREE *pc_tree);
+static void predict_sb_complex(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile,
+ int mi_row, int mi_col,
+ int mi_row_ori, int mi_col_ori,
+ int output_enabled, BLOCK_SIZE bsize,
+ BLOCK_SIZE top_bsize,
+ uint8_t *dst_buf[3], int dst_stride[3],
+ PC_TREE *pc_tree);
+static void update_state_sb_supertx(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize,
+ int output_enabled, PC_TREE *pc_tree);
+static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ int *tmp_rate, int64_t *tmp_dist,
+ TX_TYPE *best_tx,
+ PC_TREE *pc_tree);
+#endif // CONFIG_SUPERTX
+
// This is used as a reference when computing the source variance for the
// purposes of activity masking.
// Eventually this should be replaced by custom no-reference routines,
@@ -170,11 +205,11 @@
// Lighter version of set_offsets that only sets the mode info
// pointers.
-static INLINE void set_mode_info_offsets(VP10_COMP *const cpi,
- MACROBLOCK *const x,
- MACROBLOCKD *const xd,
- int mi_row,
- int mi_col) {
+static void set_mode_info_offsets(VP10_COMP *const cpi,
+ MACROBLOCK *const x,
+ MACROBLOCKD *const xd,
+ int mi_row,
+ int mi_col) {
VP10_COMMON *const cm = &cpi->common;
const int idx_str = xd->mi_stride * mi_row + mi_col;
xd->mi = cm->mi_grid_visible + idx_str;
@@ -196,6 +231,12 @@
set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+ xd->max_tx_size = max_txsize_lookup[bsize];
+#endif
+
mbmi = &xd->mi[0]->mbmi;
// Set up destination pointers.
@@ -239,6 +280,80 @@
xd->tile = *tile;
}
+#if CONFIG_SUPERTX
+static void set_offsets_supertx(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile,
+ int mi_row, int mi_col, BLOCK_SIZE bsize) {
+ MACROBLOCK *const x = &td->mb;
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+
+ set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+
+ // Set up distance of MB to edge of frame in 1/8th pel units.
+ assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
+ set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width,
+ cm->mi_rows, cm->mi_cols);
+}
+
+static void set_offsets_extend(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile,
+ int mi_row_pred, int mi_col_pred,
+ int mi_row_ori, int mi_col_ori,
+ BLOCK_SIZE bsize_pred, BLOCK_SIZE bsize_ori) {
+ // Used in supertx
+ // (mi_row_ori, mi_col_ori, bsize_ori): region for mv
+ // (mi_row_pred, mi_col_pred, bsize_pred): region to predict
+ MACROBLOCK *const x = &td->mb;
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi;
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize_pred];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize_pred];
+ const struct segmentation *const seg = &cm->seg;
+
+ set_mode_info_offsets(cpi, x, xd, mi_row_ori, mi_col_ori);
+
+ mbmi = &xd->mi[0]->mbmi;
+
+ // Set up limit values for MV components.
+ // Mv beyond the range do not produce new/different prediction block.
+ x->mv_row_min = -(((mi_row_pred + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
+ x->mv_col_min = -(((mi_col_pred + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
+ x->mv_row_max = (cm->mi_rows - mi_row_pred) * MI_SIZE + VP9_INTERP_EXTEND;
+ x->mv_col_max = (cm->mi_cols - mi_col_pred) * MI_SIZE + VP9_INTERP_EXTEND;
+
+ // Set up distance of MB to edge of frame in 1/8th pel units.
+ assert(!(mi_col_pred & (mi_width - 1)) && !(mi_row_pred & (mi_height - 1)));
+ set_mi_row_col(xd, tile, mi_row_pred, mi_height, mi_col_pred, mi_width,
+ cm->mi_rows, cm->mi_cols);
+ xd->up_available = (mi_row_ori != 0);
+ xd->left_available = (mi_col_ori > tile->mi_col_start);
+
+ // R/D setup.
+ x->rddiv = cpi->rd.RDDIV;
+ x->rdmult = cpi->rd.RDMULT;
+
+ // Setup segment ID.
+ if (seg->enabled) {
+ if (cpi->oxcf.aq_mode != VARIANCE_AQ) {
+ const uint8_t *const map = seg->update_map ? cpi->segmentation_map
+ : cm->last_frame_seg_map;
+ mbmi->segment_id = get_segment_id(cm, map, bsize_ori,
+ mi_row_ori, mi_col_ori);
+ }
+ vp10_init_plane_quantizers(cpi, x);
+
+ x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
+ } else {
+ mbmi->segment_id = 0;
+ x->encode_breakout = cpi->encode_breakout;
+ }
+}
+#endif // CONFIG_SUPERTX
+
static void set_block_size(VP10_COMP * const cpi,
MACROBLOCK *const x,
MACROBLOCKD *const xd,
@@ -967,7 +1082,9 @@
const int mi_height = num_8x8_blocks_high_lookup[bsize];
int max_plane;
+#if !CONFIG_SUPERTX
assert(mi->mbmi.sb_type == bsize);
+#endif
*mi_addr = *mi;
*x->mbmi_ext = ctx->mbmi_ext;
@@ -985,8 +1102,8 @@
// and then update the quantizer.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
vp10_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row,
- mi_col, bsize, ctx->rate, ctx->dist,
- x->skip);
+ mi_col, bsize, ctx->rate, ctx->dist,
+ x->skip);
}
}
@@ -1026,6 +1143,12 @@
}
x->skip = ctx->skip;
+
+#if CONFIG_VAR_TX
+ for (i = 0; i < 1; ++i)
+ memcpy(x->blk_skip[i], ctx->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif
memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
@@ -1055,8 +1178,11 @@
if (!frame_is_intra_only(cm)) {
if (is_inter_block(mbmi)) {
vp10_update_mv_count(td);
-
- if (cm->interp_filter == SWITCHABLE) {
+ if (cm->interp_filter == SWITCHABLE
+#if CONFIG_EXT_INTERP
+ && vp10_is_interp_needed(xd)
+#endif
+ ) {
const int ctx = vp10_get_pred_context_switchable_interp(xd);
++td->counts->switchable_interp[ctx][mbmi->interp_filter];
}
@@ -1082,6 +1208,294 @@
}
}
+#if CONFIG_SUPERTX
+static void update_state_supertx(VP10_COMP *cpi, ThreadData *td,
+ PICK_MODE_CONTEXT *ctx,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ int output_enabled) {
+ int i, y, x_idx;
+ VP10_COMMON *const cm = &cpi->common;
+ RD_COUNTS *const rdc = &td->rd_counts;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *mi = &ctx->mic;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *mi_addr = xd->mi[0];
+ const struct segmentation *const seg = &cm->seg;
+ const int mis = cm->mi_stride;
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int x_mis = VPXMIN(mi_width, cm->mi_cols - mi_col);
+ const int y_mis = VPXMIN(mi_height, cm->mi_rows - mi_row);
+ MV_REF *const frame_mvs =
+ cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
+ int w, h;
+
+ *mi_addr = *mi;
+ *x->mbmi_ext = ctx->mbmi_ext;
+ assert(is_inter_block(mbmi));
+ assert(mbmi->tx_size == ctx->mic.mbmi.tx_size);
+
+ // If segmentation in use
+ if (seg->enabled && output_enabled) {
+ // For in frame complexity AQ copy the segment id from the segment map.
+ if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
+ const uint8_t *const map = seg->update_map ? cpi->segmentation_map
+ : cm->last_frame_seg_map;
+ mi_addr->mbmi.segment_id =
+ get_segment_id(cm, map, bsize, mi_row, mi_col);
+ } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+ // Else for cyclic refresh mode update the segment map, set the segment id
+ // and then update the quantizer.
+ vp10_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi,
+ mi_row, mi_col, bsize,
+ ctx->rate, ctx->dist, 1);
+ vp10_init_plane_quantizers(cpi, x);
+ }
+ }
+
+ // Restore the coding context of the MB to that that was in place
+ // when the mode was picked for it
+ for (y = 0; y < mi_height; y++)
+ for (x_idx = 0; x_idx < mi_width; x_idx++)
+ if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx
+ && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
+ xd->mi[x_idx + y * mis] = mi_addr;
+ }
+
+ if (cpi->oxcf.aq_mode)
+ vp10_init_plane_quantizers(cpi, x);
+
+ if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) {
+ mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
+ mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
+ }
+
+ x->skip = ctx->skip;
+
+#if CONFIG_VAR_TX
+ for (i = 0; i < 1; ++i)
+ memcpy(x->blk_skip[i], ctx->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif // CONFIG_VAR_TX
+ memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+
+#if CONFIG_VAR_TX
+ {
+ const TX_SIZE mtx = mbmi->tx_size;
+ int idy, idx;
+ for (idy = 0; idy < (1 << mtx) / 2; ++idy)
+ for (idx = 0; idx < (1 << mtx) / 2; ++idx)
+ mbmi->inter_tx_size[(idy << 3) + idx] = mbmi->tx_size;
+ }
+#endif // CONFIG_VAR_TX
+
+ if (!output_enabled)
+ return;
+
+ if (!frame_is_intra_only(cm)) {
+ vp10_update_mv_count(td);
+
+ if (cm->interp_filter == SWITCHABLE
+#if CONFIG_EXT_INTERP
+ && vp10_is_interp_needed(xd)
+#endif
+ ) {
+ const int ctx = vp10_get_pred_context_switchable_interp(xd);
+ ++td->counts->switchable_interp[ctx][mbmi->interp_filter];
+ }
+
+ rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
+ rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
+ rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
+
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
+ rdc->filter_diff[i] += ctx->best_filter_diff[i];
+ }
+
+ for (h = 0; h < y_mis; ++h) {
+ MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
+ for (w = 0; w < x_mis; ++w) {
+ MV_REF *const mv = frame_mv + w;
+ mv->ref_frame[0] = mi->mbmi.ref_frame[0];
+ mv->ref_frame[1] = mi->mbmi.ref_frame[1];
+ mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
+ mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
+ }
+ }
+}
+
+static void update_state_sb_supertx(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize,
+ int output_enabled, PC_TREE *pc_tree) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = xd->plane;
+ int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
+ PARTITION_TYPE partition = pc_tree->partitioning;
+ BLOCK_SIZE subsize = get_subsize(bsize, partition);
+ int i;
+ PICK_MODE_CONTEXT *pmc = NULL;
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ return;
+
+ switch (partition) {
+ case PARTITION_NONE:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->none, mi_row, mi_col,
+ subsize, output_enabled);
+ break;
+ case PARTITION_VERT:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->vertical[0], mi_row, mi_col,
+ subsize, output_enabled);
+ if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize);
+ update_state_supertx(cpi, td, &pc_tree->vertical[1],
+ mi_row, mi_col + hbs, subsize, output_enabled);
+ }
+ pmc = &pc_tree->vertical_supertx;
+ break;
+ case PARTITION_HORZ:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->horizontal[0], mi_row, mi_col,
+ subsize, output_enabled);
+ if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->horizontal[1], mi_row + hbs,
+ mi_col, subsize, output_enabled);
+ }
+ pmc = &pc_tree->horizontal_supertx;
+ break;
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8) {
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_supertx(cpi, td, pc_tree->leaf_split[0], mi_row, mi_col,
+ subsize, output_enabled);
+ } else {
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_sb_supertx(cpi, td, tile, mi_row, mi_col, subsize,
+ output_enabled, pc_tree->split[0]);
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize);
+ update_state_sb_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize,
+ output_enabled, pc_tree->split[1]);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize);
+ update_state_sb_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize,
+ output_enabled, pc_tree->split[2]);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col + hbs, subsize);
+ update_state_sb_supertx(cpi, td, tile, mi_row + hbs, mi_col + hbs,
+ subsize, output_enabled, pc_tree->split[3]);
+ }
+ pmc = &pc_tree->split_supertx;
+ break;
+ default:
+ assert(0);
+ }
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ if (pmc != NULL) {
+ p[i].coeff = pmc->coeff_pbuf[i][1];
+ p[i].qcoeff = pmc->qcoeff_pbuf[i][1];
+ pd[i].dqcoeff = pmc->dqcoeff_pbuf[i][1];
+ p[i].eobs = pmc->eobs_pbuf[i][1];
+ } else {
+ // These should never be used
+ p[i].coeff = NULL;
+ p[i].qcoeff = NULL;
+ pd[i].dqcoeff = NULL;
+ p[i].eobs = NULL;
+ }
+ }
+}
+
+static void update_supertx_param(ThreadData *td,
+ PICK_MODE_CONTEXT *ctx,
+ int best_tx,
+ TX_SIZE supertx_size) {
+ MACROBLOCK *const x = &td->mb;
+#if CONFIG_VAR_TX
+ int i;
+
+ for (i = 0; i < 1; ++i)
+ memcpy(ctx->blk_skip[i], x->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif // CONFIG_VAR_TX
+ memcpy(ctx->zcoeff_blk, x->zcoeff_blk[supertx_size],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+ ctx->mic.mbmi.tx_size = supertx_size;
+ ctx->skip = x->skip;
+ ctx->mic.mbmi.tx_type = best_tx;
+}
+
+static void update_supertx_param_sb(VP10_COMP *cpi, ThreadData *td,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize,
+ int best_tx,
+ TX_SIZE supertx_size, PC_TREE *pc_tree) {
+ VP10_COMMON *const cm = &cpi->common;
+ int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
+ PARTITION_TYPE partition = pc_tree->partitioning;
+ BLOCK_SIZE subsize = get_subsize(bsize, partition);
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ return;
+
+ switch (partition) {
+ case PARTITION_NONE:
+ update_supertx_param(td, &pc_tree->none,
+ best_tx,
+ supertx_size);
+ break;
+ case PARTITION_VERT:
+ update_supertx_param(td, &pc_tree->vertical[0],
+ best_tx,
+ supertx_size);
+ if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8)
+ update_supertx_param(td, &pc_tree->vertical[1],
+ best_tx,
+ supertx_size);
+ break;
+ case PARTITION_HORZ:
+ update_supertx_param(td, &pc_tree->horizontal[0],
+ best_tx,
+ supertx_size);
+ if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8)
+ update_supertx_param(td, &pc_tree->horizontal[1],
+ best_tx,
+ supertx_size);
+ break;
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8) {
+ update_supertx_param(td, pc_tree->leaf_split[0],
+ best_tx,
+ supertx_size);
+ } else {
+ update_supertx_param_sb(cpi, td, mi_row, mi_col, subsize,
+ best_tx,
+ supertx_size, pc_tree->split[0]);
+ update_supertx_param_sb(cpi, td, mi_row, mi_col + hbs, subsize,
+ best_tx,
+ supertx_size, pc_tree->split[1]);
+ update_supertx_param_sb(cpi, td, mi_row + hbs, mi_col, subsize,
+ best_tx,
+ supertx_size, pc_tree->split[2]);
+ update_supertx_param_sb(cpi, td, mi_row + hbs, mi_col + hbs, subsize,
+ best_tx,
+ supertx_size, pc_tree->split[3]);
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+#endif // CONFIG_SUPERTX
+
void vp10_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
int mi_row, int mi_col) {
uint8_t *const buffers[3] = {src->y_buffer, src->u_buffer, src->v_buffer };
@@ -1113,6 +1527,9 @@
TileDataEnc *tile_data,
MACROBLOCK *const x,
int mi_row, int mi_col, RD_COST *rd_cost,
+#if CONFIG_SUPERTX
+ int *totalrate_nocoef,
+#endif
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd) {
VP10_COMMON *const cm = &cpi->common;
@@ -1132,6 +1549,14 @@
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
mbmi = &xd->mi[0]->mbmi;
mbmi->sb_type = bsize;
+#if CONFIG_SUPERTX
+ // We set tx_size here as skip blocks would otherwise not set it.
+ // tx_size needs to be set at this point as supertx_enable in
+ // write_modes_sb is computed based on this, and if the garbage in memory
+ // just happens to be the supertx_size, then the packer will code this
+ // block as a supertx block, even if rdopt did not pick it as such.
+ mbmi->tx_size = max_txsize_lookup[bsize];
+#endif
for (i = 0; i < MAX_MB_PLANE; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][0];
@@ -1140,6 +1565,15 @@
p[i].eobs = ctx->eobs_pbuf[i][0];
}
+ if (cm->current_video_frame == 0 && cm->allow_screen_content_tools) {
+ for (i = 0; i < 2; ++i) {
+ if (ctx->color_index_map[i] == 0) {
+ CHECK_MEM_ERROR(cm, ctx->color_index_map[i],
+ vpx_memalign(16, (ctx->num_4x4_blk << 4) *
+ sizeof(*ctx->color_index_map[i])));
+ }
+ }
+ }
for (i = 0; i < 2; ++i)
pd[i].color_index_map = ctx->color_index_map[i];
@@ -1196,17 +1630,30 @@
// as a predictor for MBs that follow in the SB
if (frame_is_intra_only(cm)) {
vp10_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
+#if CONFIG_SUPERTX
+ *totalrate_nocoef = 0;
+#endif // CONFIG_SUPERTX
} else {
if (bsize >= BLOCK_8X8) {
- if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
vp10_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
ctx, best_rd);
- else
- vp10_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col,
- rd_cost, bsize, ctx, best_rd);
+#if CONFIG_SUPERTX
+ *totalrate_nocoef = rd_cost->rate;
+#endif // CONFIG_SUPERTX
+ } else {
+ vp10_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
+#if CONFIG_SUPERTX
+ totalrate_nocoef,
+#endif // CONFIG_SUPERTX
+ bsize, ctx, best_rd);
+ }
} else {
- vp10_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col,
- rd_cost, bsize, ctx, best_rd);
+ vp10_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost,
+#if CONFIG_SUPERTX
+ totalrate_nocoef,
+#endif // CONFIG_SUPERTX
+ bsize, ctx, best_rd);
}
}
@@ -1231,7 +1678,56 @@
ctx->dist = rd_cost->dist;
}
-static void update_stats(VP10_COMMON *cm, ThreadData *td) {
+#if CONFIG_REF_MV
+static void update_inter_mode_stats(FRAME_COUNTS *counts,
+ PREDICTION_MODE mode,
+#if CONFIG_EXT_INTER
+ int is_compound,
+#endif // CONFIG_EXT_INTER
+ int16_t mode_context) {
+ int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
+#if CONFIG_EXT_INTER
+ if (mode == NEWMV || mode == NEWFROMNEARMV) {
+ if (!is_compound)
+ ++counts->new2mv_mode[mode == NEWFROMNEARMV];
+#else
+ if (mode == NEWMV) {
+#endif // CONFIG_EXT_INTER
+ ++counts->newmv_mode[mode_ctx][0];
+ return;
+ } else {
+ ++counts->newmv_mode[mode_ctx][1];
+
+ if (mode_context & (1 << ALL_ZERO_FLAG_OFFSET)) {
+ return;
+ }
+
+ mode_ctx = (mode_context >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
+ if (mode == ZEROMV) {
+ ++counts->zeromv_mode[mode_ctx][0];
+ return;
+ } else {
+ ++counts->zeromv_mode[mode_ctx][1];
+ mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
+
+ if (mode_context & (1 << SKIP_NEARESTMV_OFFSET))
+ mode_ctx = 6;
+ if (mode_context & (1 << SKIP_NEARMV_OFFSET))
+ mode_ctx = 7;
+ if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET))
+ mode_ctx = 8;
+
+ ++counts->refmv_mode[mode_ctx][mode != NEARESTMV];
+ }
+ }
+}
+#endif
+
+static void update_stats(VP10_COMMON *cm, ThreadData *td
+#if CONFIG_SUPERTX
+ , int supertx_enabled
+#endif
+ ) {
const MACROBLOCK *x = &td->mb;
const MACROBLOCKD *const xd = &x->e_mbd;
const MODE_INFO *const mi = xd->mi[0];
@@ -1245,6 +1741,9 @@
const int seg_ref_active = segfeature_active(&cm->seg, mbmi->segment_id,
SEG_LVL_REF_FRAME);
if (!seg_ref_active) {
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif
counts->intra_inter[vp10_get_intra_inter_context(xd)][inter_block]++;
// If the segment reference feature is enabled we have only a single
// reference frame allowed for the segment so exclude it from
@@ -1256,23 +1755,106 @@
[has_second_ref(mbmi)]++;
if (has_second_ref(mbmi)) {
- counts->comp_ref[vp10_get_pred_context_comp_ref_p(cm, xd)]
+#if CONFIG_EXT_REFS
+ const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME ||
+ ref0 == LAST4_FRAME);
+ counts->comp_ref[vp10_get_pred_context_comp_ref_p(cm, xd)][0][bit]++;
+ if (!bit) {
+ counts->comp_ref[vp10_get_pred_context_comp_ref_p1(cm, xd)][1]
+ [ref0 == LAST_FRAME]++;
+ } else {
+ counts->comp_ref[vp10_get_pred_context_comp_ref_p2(cm, xd)][2]
+ [ref0 == GOLDEN_FRAME]++;
+ if (ref0 != GOLDEN_FRAME) {
+ counts->comp_ref[vp10_get_pred_context_comp_ref_p3(cm, xd)][3]
+ [ref0 == LAST3_FRAME]++;
+ }
+ }
+#else
+ counts->comp_ref[vp10_get_pred_context_comp_ref_p(cm, xd)][0]
[ref0 == GOLDEN_FRAME]++;
+#endif // CONFIG_EXT_REFS
} else {
+#if CONFIG_EXT_REFS
+ const int bit = (ref0 == ALTREF_FRAME || ref0 == GOLDEN_FRAME);
+ counts->single_ref[vp10_get_pred_context_single_ref_p1(xd)][0][bit]++;
+ if (bit) {
+ counts->single_ref[vp10_get_pred_context_single_ref_p2(xd)][1]
+ [ref0 != GOLDEN_FRAME]++;
+ } else {
+ const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME);
+ counts->single_ref[vp10_get_pred_context_single_ref_p3(xd)][2]
+ [bit1]++;
+ if (!bit1) {
+ counts->single_ref[vp10_get_pred_context_single_ref_p4(xd)][3]
+ [ref0 != LAST_FRAME]++;
+ } else {
+ counts->single_ref[vp10_get_pred_context_single_ref_p5(xd)][4]
+ [ref0 != LAST3_FRAME]++;
+ }
+ }
+#else
counts->single_ref[vp10_get_pred_context_single_ref_p1(xd)][0]
[ref0 != LAST_FRAME]++;
if (ref0 != LAST_FRAME)
counts->single_ref[vp10_get_pred_context_single_ref_p2(xd)][1]
[ref0 != GOLDEN_FRAME]++;
+#endif // CONFIG_EXT_REFS
}
}
}
if (inter_block &&
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- const int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
+ int16_t mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
if (bsize >= BLOCK_8X8) {
const PREDICTION_MODE mode = mbmi->mode;
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (has_second_ref(mbmi)) {
+ mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
+ ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
+ } else {
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
+ mbmi->ref_frame, bsize, -1);
+ update_inter_mode_stats(counts, mode,
+#if CONFIG_EXT_INTER
+ has_second_ref(mbmi),
+#endif // CONFIG_EXT_INTER
+ mode_ctx);
+
+ if (mode == NEARMV) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 2) {
+ uint8_t drl0_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 0);
+ if (mbmi->ref_mv_idx == 0)
+ ++counts->drl_mode0[drl0_ctx][0];
+ else
+ ++counts->drl_mode0[drl0_ctx][1];
+
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 3 &&
+ mbmi->ref_mv_idx > 0) {
+ uint8_t drl1_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
+ if (mbmi->ref_mv_idx == 1)
+ ++counts->drl_mode1[drl1_ctx][0];
+ else
+ ++counts->drl_mode1[drl1_ctx][1];
+ }
+ }
+ }
+#if CONFIG_EXT_INTER
+ }
+#endif // CONFIG_EXT_INTER
+#else
+#if CONFIG_EXT_INTER
+ if (is_inter_compound_mode(mode))
+ ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
+ else
+#endif // CONFIG_EXT_INTER
++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)];
+#endif
} else {
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -1281,7 +1863,33 @@
for (idx = 0; idx < 2; idx += num_4x4_w) {
const int j = idy * 2 + idx;
const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (has_second_ref(mbmi)) {
+ mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
+ ++counts->inter_compound_mode[mode_ctx]
+ [INTER_COMPOUND_OFFSET(b_mode)];
+ } else {
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
+ mbmi->ref_frame, bsize, j);
+ update_inter_mode_stats(counts, b_mode,
+#if CONFIG_EXT_INTER
+ has_second_ref(mbmi),
+#endif // CONFIG_EXT_INTER
+ mode_ctx);
+#if CONFIG_EXT_INTER
+ }
+#endif // CONFIG_EXT_INTER
+#else
+#if CONFIG_EXT_INTER
+ if (is_inter_compound_mode(b_mode))
+ ++counts->inter_compound_mode[mode_ctx]
+ [INTER_COMPOUND_OFFSET(b_mode)];
+ else
+#endif // CONFIG_EXT_INTER
++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)];
+#endif
}
}
}
@@ -1293,6 +1901,9 @@
ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT ta[8], TXFM_CONTEXT tl[8],
+#endif
BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd;
int p;
@@ -1317,12 +1928,21 @@
sizeof(*xd->above_seg_context) * mi_width);
memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
sizeof(xd->left_seg_context[0]) * mi_height);
+#if CONFIG_VAR_TX
+ memcpy(xd->above_txfm_context, ta,
+ sizeof(*xd->above_txfm_context) * mi_width);
+ memcpy(xd->left_txfm_context, tl,
+ sizeof(*xd->left_txfm_context) * mi_height);
+#endif
}
static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT ta[8], TXFM_CONTEXT tl[8],
+#endif
BLOCK_SIZE bsize) {
const MACROBLOCKD *const xd = &x->e_mbd;
int p;
@@ -1349,6 +1969,12 @@
sizeof(*xd->above_seg_context) * mi_width);
memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
sizeof(xd->left_seg_context[0]) * mi_height);
+#if CONFIG_VAR_TX
+ memcpy(ta, xd->above_txfm_context,
+ sizeof(*xd->above_txfm_context) * mi_width);
+ memcpy(tl, xd->left_txfm_context,
+ sizeof(*xd->left_txfm_context) * mi_height);
+#endif
}
static void encode_b(VP10_COMP *cpi, const TileInfo *const tile,
@@ -1362,7 +1988,11 @@
encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
if (output_enabled) {
+#if CONFIG_SUPERTX
+ update_stats(&cpi->common, td, 0);
+#else
update_stats(&cpi->common, td);
+#endif
}
}
@@ -1395,6 +2025,92 @@
if (output_enabled && bsize != BLOCK_4X4)
td->counts->partition[ctx][partition]++;
+#if CONFIG_SUPERTX
+ if (!frame_is_intra_only(cm) &&
+ bsize <= MAX_SUPERTX_BLOCK_SIZE &&
+ partition != PARTITION_NONE &&
+ !xd->lossless[0]) {
+ int supertx_enabled;
+ TX_SIZE supertx_size = max_txsize_lookup[bsize];
+ supertx_enabled = check_supertx_sb(bsize, supertx_size, pc_tree);
+ if (supertx_enabled) {
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ int x_idx, y_idx, i;
+ uint8_t *dst_buf[3];
+ int dst_stride[3];
+ set_skip_context(xd, mi_row, mi_col);
+ set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+ update_state_sb_supertx(cpi, td, tile, mi_row, mi_col, bsize,
+ output_enabled, pc_tree);
+
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm),
+ mi_row, mi_col);
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ dst_buf[i] = xd->plane[i].dst.buf;
+ dst_stride[i] = xd->plane[i].dst.stride;
+ }
+ predict_sb_complex(cpi, td, tile, mi_row, mi_col, mi_row, mi_col,
+ output_enabled, bsize, bsize,
+ dst_buf, dst_stride, pc_tree);
+
+ set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
+ if (!x->skip) {
+ // TODO(geza.lore): Investigate if this can be relaxed
+ x->skip_recode = 0;
+ vp10_encode_sb_supertx(x, bsize);
+ vp10_tokenize_sb_supertx(cpi, td, tp, !output_enabled, bsize);
+ } else {
+ xd->mi[0]->mbmi.skip = 1;
+ if (output_enabled)
+ td->counts->skip[vp10_get_skip_context(xd)][1]++;
+ reset_skip_context(xd, bsize);
+ }
+ if (output_enabled) {
+ for (y_idx = 0; y_idx < mi_height; y_idx++)
+ for (x_idx = 0; x_idx < mi_width; x_idx++) {
+ if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx
+ && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height
+ > y_idx) {
+ xd->mi[x_idx + y_idx * cm->mi_stride]->mbmi.skip =
+ xd->mi[0]->mbmi.skip;
+ }
+ }
+ td->counts->supertx
+ [partition_supertx_context_lookup[partition]][supertx_size][1]++;
+ td->counts->supertx_size[supertx_size]++;
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(supertx_size, bsize, 1) > 1 &&
+ !xd->mi[0]->mbmi.skip) {
+ int eset = get_ext_tx_set(supertx_size, bsize, 1);
+ if (eset > 0) {
+ ++td->counts->inter_ext_tx[eset][supertx_size]
+ [xd->mi[0]->mbmi.tx_type];
+ }
+ }
+#else
+ if (supertx_size < TX_32X32 &&
+ !xd->mi[0]->mbmi.skip) {
+ ++td->counts->inter_ext_tx[supertx_size][xd->mi[0]->mbmi.tx_type];
+ }
+#endif // CONFIG_EXT_TX
+ }
+ if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+#if CONFIG_VAR_TX
+ set_txfm_ctx(xd->left_txfm_context, supertx_size, xd->n8_h);
+ set_txfm_ctx(xd->above_txfm_context, supertx_size, mi_height);
+#endif // CONFIG_VAR_TX
+ return;
+ } else {
+ if (output_enabled) {
+ td->counts->supertx
+ [partition_supertx_context_lookup[partition]][supertx_size][0]++;
+ }
+ }
+ }
+#endif // CONFIG_SUPERTX
+
switch (partition) {
case PARTITION_NONE:
encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
@@ -1519,6 +2235,9 @@
int mi_row, int mi_col,
BLOCK_SIZE bsize,
int *rate, int64_t *dist,
+#if CONFIG_SUPERTX
+ int *rate_nocoef,
+#endif
int do_recon, PC_TREE *pc_tree) {
VP10_COMMON *const cm = &cpi->common;
TileInfo *const tile_info = &tile_data->tile_info;
@@ -1533,12 +2252,20 @@
BLOCK_SIZE subsize;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT tl[8], ta[8];
+#endif
RD_COST last_part_rdc, none_rdc, chosen_rdc;
BLOCK_SIZE sub_subsize = BLOCK_4X4;
int splits_below = 0;
BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
int do_partition_search = 1;
PICK_MODE_CONTEXT *ctx = &pc_tree->none;
+#if CONFIG_SUPERTX
+ int last_part_rate_nocoef = INT_MAX;
+ int none_rate_nocoef = INT_MAX;
+ int chosen_rate_nocoef = INT_MAX;
+#endif
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
@@ -1553,8 +2280,16 @@
partition = partition_lookup[bsl][bs_type];
subsize = get_subsize(bsize, partition);
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+#endif
pc_tree->partitioning = partition;
- save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+ save_context(x, mi_row, mi_col, a, l, sa, sl,
+#if CONFIG_VAR_TX
+ ta, tl,
+#endif
+ bsize);
if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
@@ -1583,8 +2318,11 @@
mi_row + (mi_step >> 1) < cm->mi_rows &&
mi_col + (mi_step >> 1) < cm->mi_cols) {
pc_tree->partitioning = PARTITION_NONE;
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize,
- ctx, INT64_MAX);
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
+#if CONFIG_SUPERTX
+ &none_rate_nocoef,
+#endif
+ bsize, ctx, INT64_MAX);
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -1592,9 +2330,16 @@
none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
none_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, none_rdc.rate,
none_rdc.dist);
+#if CONFIG_SUPERTX
+ none_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE];
+#endif
}
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl,
+#if CONFIG_VAR_TX
+ ta, tl,
+#endif
+ bsize);
mi_8x8[0]->mbmi.sb_type = bs_type;
pc_tree->partitioning = partition;
}
@@ -1603,68 +2348,110 @@
switch (partition) {
case PARTITION_NONE:
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+#if CONFIG_SUPERTX
+ &last_part_rate_nocoef,
+#endif
bsize, ctx, INT64_MAX);
break;
case PARTITION_HORZ:
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+#if CONFIG_SUPERTX
+ &last_part_rate_nocoef,
+#endif
subsize, &pc_tree->horizontal[0],
INT64_MAX);
if (last_part_rdc.rate != INT_MAX &&
bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) {
RD_COST tmp_rdc;
+#if CONFIG_SUPERTX
+ int rt_nocoef = 0;
+#endif
PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
vp10_rd_cost_init(&tmp_rdc);
update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
rd_pick_sb_modes(cpi, tile_data, x,
mi_row + (mi_step >> 1), mi_col, &tmp_rdc,
+#if CONFIG_SUPERTX
+ &rt_nocoef,
+#endif
subsize, &pc_tree->horizontal[1], INT64_MAX);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
vp10_rd_cost_reset(&last_part_rdc);
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef = INT_MAX;
+#endif
break;
}
last_part_rdc.rate += tmp_rdc.rate;
last_part_rdc.dist += tmp_rdc.dist;
last_part_rdc.rdcost += tmp_rdc.rdcost;
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef += rt_nocoef;
+#endif
}
break;
case PARTITION_VERT:
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+#if CONFIG_SUPERTX
+ &last_part_rate_nocoef,
+#endif
subsize, &pc_tree->vertical[0], INT64_MAX);
if (last_part_rdc.rate != INT_MAX &&
bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) {
RD_COST tmp_rdc;
+#if CONFIG_SUPERTX
+ int rt_nocoef = 0;
+#endif
PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
vp10_rd_cost_init(&tmp_rdc);
update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
rd_pick_sb_modes(cpi, tile_data, x,
mi_row, mi_col + (mi_step >> 1), &tmp_rdc,
+#if CONFIG_SUPERTX
+ &rt_nocoef,
+#endif
subsize, &pc_tree->vertical[bsize > BLOCK_8X8],
INT64_MAX);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
vp10_rd_cost_reset(&last_part_rdc);
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef = INT_MAX;
+#endif
break;
}
last_part_rdc.rate += tmp_rdc.rate;
last_part_rdc.dist += tmp_rdc.dist;
last_part_rdc.rdcost += tmp_rdc.rdcost;
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef += rt_nocoef;
+#endif
}
break;
case PARTITION_SPLIT:
if (bsize == BLOCK_8X8) {
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+#if CONFIG_SUPERTX
+ &last_part_rate_nocoef,
+#endif
subsize, pc_tree->leaf_split[0], INT64_MAX);
break;
}
last_part_rdc.rate = 0;
last_part_rdc.dist = 0;
last_part_rdc.rdcost = 0;
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef = 0;
+#endif
for (i = 0; i < 4; i++) {
int x_idx = (i & 1) * (mi_step >> 1);
int y_idx = (i >> 1) * (mi_step >> 1);
int jj = i >> 1, ii = i & 0x01;
RD_COST tmp_rdc;
+#if CONFIG_SUPERTX
+ int rt_nocoef;
+#endif
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
@@ -1673,13 +2460,22 @@
mi_8x8 + jj * bss * mis + ii * bss, tp,
mi_row + y_idx, mi_col + x_idx, subsize,
&tmp_rdc.rate, &tmp_rdc.dist,
+#if CONFIG_SUPERTX
+ &rt_nocoef,
+#endif
i != 3, pc_tree->split[i]);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
vp10_rd_cost_reset(&last_part_rdc);
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef = INT_MAX;
+#endif
break;
}
last_part_rdc.rate += tmp_rdc.rate;
last_part_rdc.dist += tmp_rdc.dist;
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef += rt_nocoef;
+#endif
}
break;
default:
@@ -1692,6 +2488,9 @@
last_part_rdc.rate += cpi->partition_cost[pl][partition];
last_part_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
last_part_rdc.rate, last_part_rdc.dist);
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef += cpi->partition_cost[pl][partition];
+#endif
}
if (do_partition_search
@@ -1705,7 +2504,14 @@
BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
chosen_rdc.rate = 0;
chosen_rdc.dist = 0;
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef = 0;
+#endif
+ restore_context(x, mi_row, mi_col, a, l, sa, sl,
+#if CONFIG_VAR_TX
+ ta, tl,
+#endif
+ bsize);
pc_tree->partitioning = PARTITION_SPLIT;
// Split partition.
@@ -1713,27 +2519,50 @@
int x_idx = (i & 1) * (mi_step >> 1);
int y_idx = (i >> 1) * (mi_step >> 1);
RD_COST tmp_rdc;
+#if CONFIG_SUPERTX
+ int rt_nocoef = 0;
+#endif
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT tl[8], ta[8];
+#endif
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
- save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+ save_context(x, mi_row, mi_col, a, l, sa, sl,
+#if CONFIG_VAR_TX
+ ta, tl,
+#endif
+ bsize);
pc_tree->split[i]->partitioning = PARTITION_NONE;
rd_pick_sb_modes(cpi, tile_data, x,
mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
+#if CONFIG_SUPERTX
+ &rt_nocoef,
+#endif
split_subsize, &pc_tree->split[i]->none, INT64_MAX);
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl,
+#if CONFIG_VAR_TX
+ ta, tl,
+#endif
+ bsize);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
vp10_rd_cost_reset(&chosen_rdc);
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef = INT_MAX;
+#endif
break;
}
chosen_rdc.rate += tmp_rdc.rate;
chosen_rdc.dist += tmp_rdc.dist;
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef += rt_nocoef;
+#endif
if (i != 3)
encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0,
@@ -1742,12 +2571,18 @@
pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
split_subsize);
chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT];
+#endif
}
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
if (chosen_rdc.rate < INT_MAX) {
chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
chosen_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
chosen_rdc.rate, chosen_rdc.dist);
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE];
+#endif
}
}
@@ -1757,15 +2592,29 @@
if (bsize >= BLOCK_8X8)
pc_tree->partitioning = partition;
chosen_rdc = last_part_rdc;
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef = last_part_rate_nocoef;
+#endif
}
// If none was better set the partitioning to that.
if (none_rdc.rdcost < chosen_rdc.rdcost) {
if (bsize >= BLOCK_8X8)
pc_tree->partitioning = PARTITION_NONE;
chosen_rdc = none_rdc;
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef = none_rate_nocoef;
+#endif
}
- restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+#endif
+ restore_context(x, mi_row, mi_col, a, l, sa, sl,
+#if CONFIG_VAR_TX
+ ta, tl,
+#endif
+ bsize);
// We must have chosen a partitioning and encoding or we'll fail later on.
// No other opportunities for success.
@@ -1780,6 +2629,9 @@
*rate = chosen_rdc.rate;
*dist = chosen_rdc.dist;
+#if CONFIG_SUPERTX
+ *rate_nocoef = chosen_rate_nocoef;
+#endif
}
static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
@@ -2029,6 +2881,9 @@
TileDataEnc *tile_data,
TOKENEXTRA **tp, int mi_row, int mi_col,
BLOCK_SIZE bsize, RD_COST *rd_cost,
+#if CONFIG_SUPERTX
+ int *rate_nocoef,
+#endif
int64_t best_rd, PC_TREE *pc_tree) {
VP10_COMMON *const cm = &cpi->common;
TileInfo *const tile_info = &tile_data->tile_info;
@@ -2037,11 +2892,22 @@
const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT tl[8], ta[8];
+#endif
TOKENEXTRA *tp_orig = *tp;
PICK_MODE_CONTEXT *ctx = &pc_tree->none;
- int i, pl;
+ int i;
+ const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
BLOCK_SIZE subsize;
RD_COST this_rdc, sum_rdc, best_rdc;
+#if CONFIG_SUPERTX
+ int this_rate_nocoef, sum_rate_nocoef = 0, best_rate_nocoef = INT_MAX;
+ int tmp_rate;
+ int abort_flag;
+ int64_t tmp_dist, tmp_rd;
+ PARTITION_TYPE best_partition;
+#endif // CONFIG_SUPERTX
int do_split = bsize >= BLOCK_8X8;
int do_rect = 1;
@@ -2102,7 +2968,13 @@
partition_vert_allowed &= force_vert_split;
}
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+ save_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#endif
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
@@ -2165,14 +3037,19 @@
// PARTITION_NONE
if (partition_none_allowed) {
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col,
- &this_rdc, bsize, ctx, best_rdc.rdcost);
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
+#if CONFIG_SUPERTX
+ &this_rate_nocoef,
+#endif
+ bsize, ctx, best_rdc.rdcost);
if (this_rdc.rate != INT_MAX) {
if (bsize >= BLOCK_8X8) {
- pl = partition_plane_context(xd, mi_row, mi_col, bsize);
this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
this_rdc.rate, this_rdc.dist);
+#if CONFIG_SUPERTX
+ this_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE];
+#endif
}
if (this_rdc.rdcost < best_rdc.rdcost) {
@@ -2180,6 +3057,10 @@
int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr;
best_rdc = this_rdc;
+#if CONFIG_SUPERTX
+ best_rate_nocoef = this_rate_nocoef;
+ assert(best_rate_nocoef >= 0);
+#endif
if (bsize >= BLOCK_8X8)
pc_tree->partitioning = PARTITION_NONE;
@@ -2248,7 +3129,13 @@
#endif
}
}
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#endif // CONFIG_VAR_TX
}
// store estimated motion vector
@@ -2265,14 +3152,75 @@
if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
pc_tree->leaf_split[0]->pred_interp_filter =
ctx->mic.mbmi.interp_filter;
+#if CONFIG_SUPERTX
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
+ &sum_rate_nocoef, subsize, pc_tree->leaf_split[0],
+ INT64_MAX);
+#else
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
pc_tree->leaf_split[0], best_rdc.rdcost);
- if (sum_rdc.rate == INT_MAX)
+#endif // CONFIG_SUPERTX
+ if (sum_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef = INT_MAX;
+#endif
+ }
+#if CONFIG_SUPERTX
+ if (!frame_is_intra_only(cm) && sum_rdc.rdcost < INT64_MAX &&
+ !xd->lossless[0]) {
+ TX_SIZE supertx_size = max_txsize_lookup[bsize];
+ best_partition = pc_tree->partitioning;
+ pc_tree->partitioning = PARTITION_SPLIT;
+
+ sum_rdc.rate += vp10_cost_bit(
+ cm->fc->supertx_prob
+ [partition_supertx_context_lookup[PARTITION_SPLIT]][supertx_size],
+ 0);
+ sum_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ if (is_inter_mode(pc_tree->leaf_split[0]->mic.mbmi.mode)) {
+ TX_TYPE best_tx = DCT_DCT;
+ tmp_rate = sum_rate_nocoef;
+ tmp_dist = 0;
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#endif // CONFIG_VAR_TX
+ rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
+ &tmp_rate, &tmp_dist,
+ &best_tx,
+ pc_tree);
+
+ tmp_rate += vp10_cost_bit(
+ cm->fc->supertx_prob
+ [partition_supertx_context_lookup[PARTITION_SPLIT]][supertx_size],
+ 1);
+ tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist);
+ if (tmp_rd < sum_rdc.rdcost) {
+ sum_rdc.rdcost = tmp_rd;
+ sum_rdc.rate = tmp_rate;
+ sum_rdc.dist = tmp_dist;
+ update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize,
+ best_tx,
+ supertx_size, pc_tree);
+ }
+ }
+ pc_tree->partitioning = best_partition;
+ }
+#endif // CONFIG_SUPERTX
} else {
+#if CONFIG_SUPERTX
+ for (i = 0; i < 4 && sum_rdc.rdcost < INT64_MAX; ++i) {
+#else
for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
- const int x_idx = (i & 1) * mi_step;
- const int y_idx = (i >> 1) * mi_step;
+#endif // CONFIG_SUPERTX
+ const int x_idx = (i & 1) * mi_step;
+ const int y_idx = (i >> 1) * mi_step;
if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
continue;
@@ -2281,30 +3229,100 @@
load_pred_mv(x, ctx);
pc_tree->split[i]->index = i;
+#if CONFIG_SUPERTX
+ rd_pick_partition(cpi, td, tile_data, tp,
+ mi_row + y_idx, mi_col + x_idx,
+ subsize, &this_rdc, &this_rate_nocoef,
+ INT64_MAX - sum_rdc.rdcost, pc_tree->split[i]);
+#else
rd_pick_partition(cpi, td, tile_data, tp,
mi_row + y_idx, mi_col + x_idx,
subsize, &this_rdc,
best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
+#endif // CONFIG_SUPERTX
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef = INT_MAX;
+#endif // CONFIG_SUPERTX
break;
} else {
sum_rdc.rate += this_rdc.rate;
sum_rdc.dist += this_rdc.dist;
sum_rdc.rdcost += this_rdc.rdcost;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += this_rate_nocoef;
+#endif // CONFIG_SUPERTX
}
}
+#if CONFIG_SUPERTX
+ if (!frame_is_intra_only(cm) &&
+ sum_rdc.rdcost < INT64_MAX &&
+ i == 4 && bsize <= MAX_SUPERTX_BLOCK_SIZE &&
+ !xd->lossless[0]) {
+ TX_SIZE supertx_size = max_txsize_lookup[bsize];
+ best_partition = pc_tree->partitioning;
+ pc_tree->partitioning = PARTITION_SPLIT;
+
+ sum_rdc.rate += vp10_cost_bit(
+ cm->fc->supertx_prob
+ [partition_supertx_context_lookup[PARTITION_SPLIT]][supertx_size],
+ 0);
+ sum_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+
+ if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
+ TX_TYPE best_tx = DCT_DCT;
+
+ tmp_rate = sum_rate_nocoef;
+ tmp_dist = 0;
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#endif // CONFIG_VAR_TX
+ rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
+ &tmp_rate, &tmp_dist,
+ &best_tx,
+ pc_tree);
+
+ tmp_rate += vp10_cost_bit(
+ cm->fc->supertx_prob
+ [partition_supertx_context_lookup[PARTITION_SPLIT]][supertx_size],
+ 1);
+ tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist);
+ if (tmp_rd < sum_rdc.rdcost) {
+ sum_rdc.rdcost = tmp_rd;
+ sum_rdc.rate = tmp_rate;
+ sum_rdc.dist = tmp_dist;
+ update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize,
+ best_tx,
+ supertx_size, pc_tree);
+ }
+ }
+ pc_tree->partitioning = best_partition;
+ }
+#endif // CONFIG_SUPERTX
}
if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
- pl = partition_plane_context(xd, mi_row, mi_col, bsize);
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
sum_rdc.rate, sum_rdc.dist);
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT];
+#endif // CONFIG_SUPERTX
if (sum_rdc.rdcost < best_rdc.rdcost) {
best_rdc = sum_rdc;
+#if CONFIG_SUPERTX
+ best_rate_nocoef = sum_rate_nocoef;
+ assert(best_rate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
pc_tree->partitioning = PARTITION_SPLIT;
}
} else {
@@ -2313,23 +3331,39 @@
if (cpi->sf.less_rectangular_check)
do_rect &= !partition_none_allowed;
}
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
- }
+#endif
+ } // if (do_split)
// PARTITION_HORZ
if (partition_horz_allowed &&
(do_rect || vp10_active_h_edge(cpi, mi_row, mi_step))) {
- subsize = get_subsize(bsize, PARTITION_HORZ);
+ subsize = get_subsize(bsize, PARTITION_HORZ);
if (cpi->sf.adaptive_motion_search)
load_pred_mv(x, ctx);
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
pc_tree->horizontal[0].pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
- &pc_tree->horizontal[0], best_rdc.rdcost);
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
+#if CONFIG_SUPERTX
+ &sum_rate_nocoef,
+#endif // CONFIG_SUPERTX
+ subsize, &pc_tree->horizontal[0], best_rdc.rdcost);
- if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows &&
+#if CONFIG_SUPERTX
+ abort_flag = (sum_rdc.rdcost >= best_rd && bsize > BLOCK_8X8) ||
+ (sum_rdc.rate == INT_MAX && bsize == BLOCK_8X8);
+ if (sum_rdc.rdcost < INT64_MAX &&
+#else
+ if (sum_rdc.rdcost < best_rdc.rdcost &&
+#endif // CONFIG_SUPERTX
+ mi_row + mi_step < cm->mi_rows &&
bsize > BLOCK_8X8) {
PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
@@ -2341,33 +3375,105 @@
partition_none_allowed)
pc_tree->horizontal[1].pred_interp_filter =
ctx->mic.mbmi.interp_filter;
+#if CONFIG_SUPERTX
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col,
+ &this_rdc, &this_rate_nocoef,
+ subsize, &pc_tree->horizontal[1],
+ INT64_MAX);
+#else
rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col,
&this_rdc, subsize, &pc_tree->horizontal[1],
best_rdc.rdcost - sum_rdc.rdcost);
+#endif // CONFIG_SUPERTX
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef = INT_MAX;
+#endif // CONFIG_SUPERTX
} else {
sum_rdc.rate += this_rdc.rate;
sum_rdc.dist += this_rdc.dist;
sum_rdc.rdcost += this_rdc.rdcost;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += this_rate_nocoef;
+#endif // CONFIG_SUPERTX
}
}
+#if CONFIG_SUPERTX
+ if (!frame_is_intra_only(cm) && !abort_flag &&
+ sum_rdc.rdcost < INT64_MAX && bsize <= MAX_SUPERTX_BLOCK_SIZE &&
+ !xd->lossless[0]) {
+ TX_SIZE supertx_size = max_txsize_lookup[bsize];
+ best_partition = pc_tree->partitioning;
+ pc_tree->partitioning = PARTITION_HORZ;
+
+ sum_rdc.rate += vp10_cost_bit(
+ cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_HORZ]]
+ [supertx_size], 0);
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+
+ if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
+ TX_TYPE best_tx = DCT_DCT;
+ tmp_rate = sum_rate_nocoef;
+ tmp_dist = 0;
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#endif // CONFIG_VAR_TX
+ rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
+ &tmp_rate, &tmp_dist,
+ &best_tx,
+ pc_tree);
+
+ tmp_rate += vp10_cost_bit(
+ cm->fc->supertx_prob
+ [partition_supertx_context_lookup[PARTITION_HORZ]][supertx_size],
+ 1);
+ tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist);
+ if (tmp_rd < sum_rdc.rdcost) {
+ sum_rdc.rdcost = tmp_rd;
+ sum_rdc.rate = tmp_rate;
+ sum_rdc.dist = tmp_dist;
+ update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize,
+ best_tx,
+ supertx_size, pc_tree);
+ }
+ }
+ pc_tree->partitioning = best_partition;
+ }
+#endif // CONFIG_SUPERTX
+
if (sum_rdc.rdcost < best_rdc.rdcost) {
- pl = partition_plane_context(xd, mi_row, mi_col, bsize);
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_HORZ];
+#endif // CONFIG_SUPERTX
if (sum_rdc.rdcost < best_rdc.rdcost) {
best_rdc = sum_rdc;
+#if CONFIG_SUPERTX
+ best_rate_nocoef = sum_rate_nocoef;
+ assert(best_rate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
pc_tree->partitioning = PARTITION_HORZ;
}
}
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#endif
}
// PARTITION_VERT
if (partition_vert_allowed &&
(do_rect || vp10_active_v_edge(cpi, mi_col, mi_step))) {
- subsize = get_subsize(bsize, PARTITION_VERT);
+ subsize = get_subsize(bsize, PARTITION_VERT);
if (cpi->sf.adaptive_motion_search)
load_pred_mv(x, ctx);
@@ -2375,9 +3481,19 @@
partition_none_allowed)
pc_tree->vertical[0].pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
- &pc_tree->vertical[0], best_rdc.rdcost);
- if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
+#if CONFIG_SUPERTX
+ &sum_rate_nocoef,
+#endif // CONFIG_SUPERTX
+ subsize, &pc_tree->vertical[0], best_rdc.rdcost);
+#if CONFIG_SUPERTX
+ abort_flag = (sum_rdc.rdcost >= best_rd && bsize > BLOCK_8X8) ||
+ (sum_rdc.rate == INT_MAX && bsize == BLOCK_8X8);
+ if (sum_rdc.rdcost < INT64_MAX &&
+#else
+ if (sum_rdc.rdcost < best_rdc.rdcost &&
+#endif // CONFIG_SUPERTX
+ mi_col + mi_step < cm->mi_cols &&
bsize > BLOCK_8X8) {
update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize,
@@ -2389,29 +3505,99 @@
partition_none_allowed)
pc_tree->vertical[1].pred_interp_filter =
ctx->mic.mbmi.interp_filter;
+#if CONFIG_SUPERTX
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
+ &this_rate_nocoef, subsize, &pc_tree->vertical[1],
+ INT64_MAX - sum_rdc.rdcost);
+#else
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step,
&this_rdc, subsize,
&pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost);
+#endif // CONFIG_SUPERTX
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef = INT_MAX;
+#endif // CONFIG_SUPERTX
} else {
sum_rdc.rate += this_rdc.rate;
sum_rdc.dist += this_rdc.dist;
sum_rdc.rdcost += this_rdc.rdcost;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += this_rate_nocoef;
+#endif // CONFIG_SUPERTX
}
}
+#if CONFIG_SUPERTX
+ if (!frame_is_intra_only(cm) && !abort_flag &&
+ sum_rdc.rdcost < INT64_MAX && bsize <= MAX_SUPERTX_BLOCK_SIZE &&
+ !xd->lossless[0]) {
+ TX_SIZE supertx_size = max_txsize_lookup[bsize];
+ best_partition = pc_tree->partitioning;
+ pc_tree->partitioning = PARTITION_VERT;
+ sum_rdc.rate += vp10_cost_bit(
+ cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_VERT]]
+ [supertx_size], 0);
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+
+ if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
+ TX_TYPE best_tx = DCT_DCT;
+
+ tmp_rate = sum_rate_nocoef;
+ tmp_dist = 0;
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#endif // CONFIG_VAR_TX
+ rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
+ &tmp_rate, &tmp_dist,
+ &best_tx,
+ pc_tree);
+
+ tmp_rate += vp10_cost_bit(
+ cm->fc->supertx_prob
+ [partition_supertx_context_lookup[PARTITION_VERT]][supertx_size],
+ 1);
+ tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist);
+ if (tmp_rd < sum_rdc.rdcost) {
+ sum_rdc.rdcost = tmp_rd;
+ sum_rdc.rate = tmp_rate;
+ sum_rdc.dist = tmp_dist;
+ update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize,
+ best_tx,
+ supertx_size, pc_tree);
+ }
+ }
+ pc_tree->partitioning = best_partition;
+ }
+#endif // CONFIG_SUPERTX
if (sum_rdc.rdcost < best_rdc.rdcost) {
- pl = partition_plane_context(xd, mi_row, mi_col, bsize);
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
sum_rdc.rate, sum_rdc.dist);
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_VERT];
+#endif // CONFIG_SUPERTX
if (sum_rdc.rdcost < best_rdc.rdcost) {
best_rdc = sum_rdc;
+#if CONFIG_SUPERTX
+ best_rate_nocoef = sum_rate_nocoef;
+ assert(best_rate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
pc_tree->partitioning = PARTITION_VERT;
}
}
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+ restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
+#else
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+#endif
}
// TODO(jbb): This code added so that we avoid static analysis
@@ -2420,7 +3606,9 @@
// checks occur in some sub function and thus are used...
(void) best_rd;
*rd_cost = best_rdc;
-
+#if CONFIG_SUPERTX
+ *rate_nocoef = best_rate_nocoef;
+#endif // CONFIG_SUPERTX
if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
pc_tree->index != 3) {
@@ -2453,7 +3641,10 @@
// Initialize the left context for the new SB row
memset(&xd->left_context, 0, sizeof(xd->left_context));
memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
-
+#if CONFIG_VAR_TX
+ memset(xd->left_txfm_context_buffer, 0,
+ sizeof(xd->left_txfm_context_buffer));
+#endif
// Code each SB in the row
for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
@@ -2461,6 +3652,9 @@
int dummy_rate;
int64_t dummy_dist;
RD_COST dummy_rdc;
+#if CONFIG_SUPERTX
+ int dummy_rate_nocoef;
+#endif // CONFIG_SUPERTX
int i;
int seg_skip = 0;
@@ -2496,19 +3690,31 @@
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
- BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root);
+ BLOCK_64X64, &dummy_rate, &dummy_dist,
+#if CONFIG_SUPERTX
+ &dummy_rate_nocoef,
+#endif // CONFIG_SUPERTX
+ 1, td->pc_root);
} else if (cpi->partition_search_skippable_frame) {
BLOCK_SIZE bsize;
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
- BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root);
+ BLOCK_64X64, &dummy_rate, &dummy_dist,
+#if CONFIG_SUPERTX
+ &dummy_rate_nocoef,
+#endif // CONFIG_SUPERTX
+ 1, td->pc_root);
} else if (sf->partition_search_type == VAR_BASED_PARTITION &&
cm->frame_type != KEY_FRAME) {
choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
- BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root);
+ BLOCK_64X64, &dummy_rate, &dummy_dist,
+#if CONFIG_SUPERTX
+ &dummy_rate_nocoef,
+#endif // CONFIG_SUPERTX
+ 1, td->pc_root);
} else {
// If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) {
@@ -2518,7 +3724,11 @@
&x->max_partition_size);
}
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rdc, INT64_MAX, td->pc_root);
+ &dummy_rdc,
+#if CONFIG_SUPERTX
+ &dummy_rate_nocoef,
+#endif // CONFIG_SUPERTX
+ INT64_MAX, td->pc_root);
}
}
}
@@ -2541,6 +3751,10 @@
2 * aligned_mi_cols * MAX_MB_PLANE);
memset(xd->above_seg_context, 0,
sizeof(*xd->above_seg_context) * aligned_mi_cols);
+#if CONFIG_VAR_TX
+ memset(cm->above_txfm_context, 0,
+ sizeof(*xd->above_txfm_context) * aligned_mi_cols);
+#endif
}
static int check_dual_ref_flags(VP10_COMP *cpi) {
@@ -2549,11 +3763,18 @@
if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
return 0;
} else {
- return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG)
- + !!(ref_flags & VP9_ALT_FLAG)) >= 2;
+ return (!!(ref_flags & VP9_GOLD_FLAG) +
+ !!(ref_flags & VP9_LAST_FLAG) +
+#if CONFIG_EXT_REFS
+ !!(ref_flags & VP9_LAST2_FLAG) +
+ !!(ref_flags & VP9_LAST3_FLAG) +
+ !!(ref_flags & VP9_LAST4_FLAG) +
+#endif // CONFIG_EXT_REFS
+ !!(ref_flags & VP9_ALT_FLAG)) >= 2;
}
}
+#if !CONFIG_VAR_TX
static void reset_skip_tx_size(VP10_COMMON *cm, TX_SIZE max_tx_size) {
int mi_row, mi_col;
const int mis = cm->mi_stride;
@@ -2566,6 +3787,7 @@
}
}
}
+#endif
static MV_REFERENCE_FRAME get_frame_type(const VP10_COMP *cpi) {
if (frame_is_intra_only(&cpi->common))
@@ -2575,6 +3797,8 @@
else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
return GOLDEN_FRAME;
else
+ // TODO(zoeliu): TO investigate whether a frame_type other than
+ // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
return LAST_FRAME;
}
@@ -2703,9 +3927,8 @@
rdc->ex_search_count = 0; // Exhaustive mesh search hits.
for (i = 0; i < MAX_SEGMENTS; ++i) {
- const int qindex = CONFIG_MISC_FIXES && cm->seg.enabled ?
- vp10_get_qindex(&cm->seg, i, cm->base_qindex) :
- cm->base_qindex;
+ const int qindex = cm->seg.enabled ?
+ vp10_get_qindex(&cm->seg, i, cm->base_qindex) : cm->base_qindex;
xd->lossless[i] = qindex == 0 &&
cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 &&
@@ -2734,6 +3957,9 @@
x->quant_fp = cpi->sf.use_quant_fp;
vp10_zero(x->skip_txfm);
+#if CONFIG_VAR_TX
+ vp10_zero(x->blk_skip);
+#endif
{
struct vpx_usec_timer emr_timer;
@@ -2764,6 +3990,15 @@
static INTERP_FILTER get_interp_filter(
const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) {
+#if CONFIG_EXT_INTERP
+ if (!is_alt_ref &&
+ threshes[EIGHTTAP_SMOOTH2] > threshes[EIGHTTAP_SMOOTH] &&
+ threshes[EIGHTTAP_SMOOTH2] > threshes[EIGHTTAP] &&
+ threshes[EIGHTTAP_SMOOTH2] > threshes[EIGHTTAP_SHARP] &&
+ threshes[EIGHTTAP_SMOOTH2] > threshes[SWITCHABLE - 1]) {
+ return EIGHTTAP_SMOOTH2;
+ }
+#endif // CONFIG_EXT_INTERP
if (!is_alt_ref &&
threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] &&
threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] &&
@@ -2798,7 +4033,14 @@
cpi->allow_comp_inter_inter = 1;
cm->comp_fixed_ref = ALTREF_FRAME;
cm->comp_var_ref[0] = LAST_FRAME;
+#if CONFIG_EXT_REFS
+ cm->comp_var_ref[1] = LAST2_FRAME;
+ cm->comp_var_ref[2] = LAST3_FRAME;
+ cm->comp_var_ref[3] = LAST4_FRAME;
+ cm->comp_var_ref[4] = GOLDEN_FRAME;
+#else
cm->comp_var_ref[1] = GOLDEN_FRAME;
+#endif // CONFIG_EXT_REFS
}
} else {
cpi->allow_comp_inter_inter = 0;
@@ -2814,9 +4056,12 @@
// either compound, single or hybrid prediction as per whatever has
// worked best for that type of frame in the past.
// It also predicts whether another coding mode would have worked
- // better that this coding mode. If that is the case, it remembers
+ // better than this coding mode. If that is the case, it remembers
// that for subsequent frames.
// It does the same analysis for transform size selection also.
+ //
+ // TODO(zoeliu): TO investigate whether a frame_type other than
+ // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type];
@@ -2836,8 +4081,9 @@
else
cm->reference_mode = REFERENCE_MODE_SELECT;
- if (cm->interp_filter == SWITCHABLE)
+ if (cm->interp_filter == SWITCHABLE) {
cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref);
+ }
encode_frame_internal(cpi);
@@ -2865,6 +4111,7 @@
}
}
+#if !CONFIG_VAR_TX
if (cm->tx_mode == TX_MODE_SELECT) {
int count4x4 = 0;
int count8x8_lp = 0, count8x8_8x8p = 0;
@@ -2885,20 +4132,36 @@
count32x32 += counts->tx.p32x32[i][TX_32X32];
}
if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
+#if CONFIG_SUPERTX
+ cm->counts.supertx_size[TX_16X16] == 0 &&
+ cm->counts.supertx_size[TX_32X32] == 0 &&
+#endif // CONFIG_SUPERTX
count32x32 == 0) {
cm->tx_mode = ALLOW_8X8;
reset_skip_tx_size(cm, TX_8X8);
} else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
- count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
+ count8x8_lp == 0 && count16x16_lp == 0 &&
+#if CONFIG_SUPERTX
+ cm->counts.supertx_size[TX_8X8] == 0 &&
+ cm->counts.supertx_size[TX_16X16] == 0 &&
+ cm->counts.supertx_size[TX_32X32] == 0 &&
+#endif // CONFIG_SUPERTX
+ count32x32 == 0) {
cm->tx_mode = ONLY_4X4;
reset_skip_tx_size(cm, TX_4X4);
- } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
+ } else if (count8x8_lp == 0 && count16x16_lp == 0 &&
+ count4x4 == 0) {
cm->tx_mode = ALLOW_32X32;
- } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
+ } else if (count32x32 == 0 && count8x8_lp == 0 &&
+#if CONFIG_SUPERTX
+ cm->counts.supertx_size[TX_32X32] == 0 &&
+#endif // CONFIG_SUPERTX
+ count4x4 == 0) {
cm->tx_mode = ALLOW_16X16;
reset_skip_tx_size(cm, TX_16X16);
}
}
+#endif
} else {
cm->reference_mode = SINGLE_REFERENCE;
encode_frame_internal(cpi);
@@ -2941,6 +4204,140 @@
++counts->uv_mode[y_mode][uv_mode];
}
+#if CONFIG_VAR_TX
+static void update_txfm_count(MACROBLOCKD *xd,
+ FRAME_COUNTS *counts,
+ TX_SIZE tx_size, int blk_row, int blk_col) {
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+ int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1),
+ tx_size);
+ TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> 5;
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> 5;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ ++counts->txfm_partition[ctx][0];
+ mbmi->tx_size = tx_size;
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), tx_size);
+ } else {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bh = num_4x4_blocks_high_lookup[bsize];
+ int i;
+ ++counts->txfm_partition[ctx][1];
+
+ if (tx_size == TX_8X8) {
+ mbmi->inter_tx_size[tx_idx] = TX_4X4;
+ mbmi->tx_size = TX_4X4;
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+ return;
+ }
+
+ for (i = 0; i < 4; ++i) {
+ int offsetr = (i >> 1) * bh / 2;
+ int offsetc = (i & 0x01) * bh / 2;
+ update_txfm_count(xd, counts, tx_size - 1,
+ blk_row + offsetr, blk_col + offsetc);
+ }
+ }
+}
+
+static void tx_partition_count_update(VP10_COMMON *cm,
+ MACROBLOCKD *xd,
+ BLOCK_SIZE plane_bsize,
+ int mi_row, int mi_col,
+ FRAME_COUNTS *td_counts) {
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+
+ for (idy = 0; idy < mi_height; idy += bh)
+ for (idx = 0; idx < mi_width; idx += bh)
+ update_txfm_count(xd, td_counts, max_tx_size, idy, idx);
+}
+
+static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size,
+ int blk_row, int blk_col) {
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+ int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> 5;
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> 5;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ mbmi->tx_size = tx_size;
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), tx_size);
+
+ } else {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ if (tx_size == TX_8X8) {
+ mbmi->inter_tx_size[tx_idx] = TX_4X4;
+ mbmi->tx_size = TX_4X4;
+ txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
+ xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+ return;
+ }
+
+ assert(bsl > 0);
+ --bsl;
+ for (i = 0; i < 4; ++i) {
+ int offsetr = (i >> 1) << bsl;
+ int offsetc = (i & 0x01) << bsl;
+ set_txfm_context(xd, tx_size - 1,
+ blk_row + offsetr, blk_col + offsetc);
+ }
+ }
+}
+
+static void tx_partition_set_contexts(VP10_COMMON *cm,
+ MACROBLOCKD *xd,
+ BLOCK_SIZE plane_bsize,
+ int mi_row, int mi_col) {
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
+
+ for (idy = 0; idy < mi_height; idy += bh)
+ for (idx = 0; idx < mi_width; idx += bh)
+ set_txfm_context(xd, max_tx_size, idy, idx);
+}
+#endif
+
static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
TOKENEXTRA **t, int output_enabled,
int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -2977,6 +4374,34 @@
if (output_enabled)
sum_intra_stats(td->counts, mi, xd->above_mi, xd->left_mi,
frame_is_intra_only(cm));
+
+#if CONFIG_EXT_INTRA
+ if (output_enabled && bsize >= BLOCK_8X8) {
+ FRAME_COUNTS *counts = td->counts;
+ if (mbmi->mode == DC_PRED)
+ ++counts->ext_intra[0][mbmi->ext_intra_mode_info.use_ext_intra_mode[0]];
+ if (mbmi->uv_mode == DC_PRED)
+ ++counts->ext_intra[1][mbmi->ext_intra_mode_info.use_ext_intra_mode[1]];
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) {
+ int p_angle;
+ const int intra_filter_ctx = vp10_get_pred_context_intra_interp(xd);
+ p_angle = mode_to_angle_map[mbmi->mode] +
+ mbmi->angle_delta[0] * ANGLE_STEP;
+ if (pick_intra_filter(p_angle))
+ ++counts->intra_filter[intra_filter_ctx][mbmi->intra_filter];
+ }
+ }
+#endif // CONFIG_EXT_INTRA
+
+ if (bsize >= BLOCK_8X8 && output_enabled) {
+ if (mbmi->palette_mode_info.palette_size[0] > 0) {
+ mbmi->palette_mode_info.palette_first_color_idx[0] =
+ xd->plane[0].color_index_map[0];
+ // TODO(huisu): this increases the use of token buffer. Needs stretch
+ // test to verify.
+ vp10_tokenize_palette_sb(td, bsize, 0, t);
+ }
+ }
vp10_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8));
} else {
int ref;
@@ -2997,25 +4422,33 @@
VPXMAX(bsize, BLOCK_8X8));
vp10_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
+#if CONFIG_VAR_TX
+ vp10_tokenize_sb_inter(cpi, td, t, !output_enabled,
+ mi_row, mi_col, VPXMAX(bsize, BLOCK_8X8));
+#else
vp10_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8));
+#endif
}
if (output_enabled) {
if (cm->tx_mode == TX_MODE_SELECT &&
mbmi->sb_type >= BLOCK_8X8 &&
!(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) {
+#if CONFIG_VAR_TX
+ if (is_inter_block(mbmi))
+ tx_partition_count_update(cm, xd, bsize, mi_row, mi_col, td->counts);
+#endif
++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd),
&td->counts->tx)[mbmi->tx_size];
} else {
int x, y;
TX_SIZE tx_size;
// The new intra coding scheme requires no change of transform size
- if (is_inter_block(&mi->mbmi)) {
+ if (is_inter_block(&mi->mbmi))
tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
max_txsize_lookup[bsize]);
- } else {
+ else
tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
- }
for (y = 0; y < mi_height; y++)
for (x = 0; x < mi_width; x++)
@@ -3024,6 +4457,22 @@
}
++td->counts->tx.tx_totals[mbmi->tx_size];
++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter_block(mbmi)) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ int eset = get_ext_tx_set(mbmi->tx_size, bsize,
+ is_inter_block(mbmi));
+ if (eset > 0) {
+ if (is_inter_block(mbmi)) {
+ ++td->counts->inter_ext_tx[eset][mbmi->tx_size][mbmi->tx_type];
+ } else {
+ ++td->counts->intra_ext_tx[eset][mbmi->tx_size][mbmi->mode]
+ [mbmi->tx_type];
+ }
+ }
+ }
+#else
if (mbmi->tx_size < TX_32X32 &&
cm->base_qindex > 0 && !mbmi->skip &&
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
@@ -3035,5 +4484,841 @@
[mbmi->tx_type];
}
}
+#endif // CONFIG_EXT_TX
+ }
+
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8 &&
+ is_inter_block(mbmi) && !(mbmi->skip || seg_skip)) {
+ if (!output_enabled)
+ tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col);
+ } else {
+ TX_SIZE tx_size;
+ // The new intra coding scheme requires no change of transform size
+ if (is_inter_block(mbmi))
+ tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
+ max_txsize_lookup[bsize]);
+ else
+ tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
+ mbmi->tx_size = tx_size;
+ set_txfm_ctx(xd->left_txfm_context, tx_size, xd->n8_h);
+ set_txfm_ctx(xd->above_txfm_context, tx_size, xd->n8_w);
+ }
+#endif
+}
+
+#if CONFIG_SUPERTX
+static int check_intra_b(PICK_MODE_CONTEXT *ctx) {
+ return !is_inter_mode((&ctx->mic)->mbmi.mode);
+}
+
+static int check_intra_sb(VP10_COMP *cpi, const TileInfo *const tile,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ PC_TREE *pc_tree) {
+ VP10_COMMON *const cm = &cpi->common;
+
+ const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
+ PARTITION_TYPE partition;
+ BLOCK_SIZE subsize = bsize;
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ return 1;
+
+ if (bsize >= BLOCK_8X8)
+ subsize = get_subsize(bsize, pc_tree->partitioning);
+ else
+ subsize = BLOCK_4X4;
+
+ partition = partition_lookup[bsl][subsize];
+
+ switch (partition) {
+ case PARTITION_NONE:
+ return check_intra_b(&pc_tree->none);
+ break;
+ case PARTITION_VERT:
+ if (check_intra_b(&pc_tree->vertical[0]))
+ return 1;
+ if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
+ if (check_intra_b(&pc_tree->vertical[1]))
+ return 1;
+ }
+ break;
+ case PARTITION_HORZ:
+ if (check_intra_b(&pc_tree->horizontal[0]))
+ return 1;
+ if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
+ if (check_intra_b(&pc_tree->horizontal[1]))
+ return 1;
+ }
+ break;
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8) {
+ if (check_intra_b(pc_tree->leaf_split[0]))
+ return 1;
+ } else {
+ if (check_intra_sb(cpi, tile, mi_row, mi_col, subsize,
+ pc_tree->split[0]))
+ return 1;
+ if (check_intra_sb(cpi, tile, mi_row, mi_col + hbs, subsize,
+ pc_tree->split[1]))
+ return 1;
+ if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col, subsize,
+ pc_tree->split[2]))
+ return 1;
+ if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col + hbs, subsize,
+ pc_tree->split[3]))
+ return 1;
+ }
+ break;
+ default:
+ assert(0);
+ }
+ return 0;
+}
+
+static int check_supertx_b(TX_SIZE supertx_size, PICK_MODE_CONTEXT *ctx) {
+ return ctx->mic.mbmi.tx_size == supertx_size;
+}
+
+static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size,
+ PC_TREE *pc_tree) {
+ PARTITION_TYPE partition;
+ BLOCK_SIZE subsize;
+
+ partition = pc_tree->partitioning;
+ subsize = get_subsize(bsize, partition);
+ switch (partition) {
+ case PARTITION_NONE:
+ return check_supertx_b(supertx_size, &pc_tree->none);
+ case PARTITION_VERT:
+ return check_supertx_b(supertx_size, &pc_tree->vertical[0]);
+ case PARTITION_HORZ:
+ return check_supertx_b(supertx_size, &pc_tree->horizontal[0]);
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8)
+ return check_supertx_b(supertx_size, pc_tree->leaf_split[0]);
+ else
+ return check_supertx_sb(subsize, supertx_size, pc_tree->split[0]);
+ default:
+ assert(0);
+ return 0;
}
}
+
+static void predict_superblock(VP10_COMP *cpi, ThreadData *td,
+ int mi_row_pred, int mi_col_pred,
+ BLOCK_SIZE bsize_pred, int b_sub8x8, int block) {
+ // Used in supertx
+ // (mi_row_ori, mi_col_ori): location for mv
+ // (mi_row_pred, mi_col_pred, bsize_pred): region to predict
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *mi_8x8 = xd->mi[0];
+ MODE_INFO *mi = mi_8x8;
+ MB_MODE_INFO *mbmi = &mi->mbmi;
+ int ref;
+ const int is_compound = has_second_ref(mbmi);
+
+ set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi,
+ mbmi->ref_frame[ref]);
+ vp10_setup_pre_planes(xd, ref, cfg, mi_row_pred, mi_col_pred,
+ &xd->block_refs[ref]->sf);
+ }
+
+ if (!b_sub8x8)
+ vp10_build_inter_predictors_sb(xd, mi_row_pred, mi_col_pred, bsize_pred);
+ else
+ vp10_build_inter_predictors_sb_sub8x8(xd, mi_row_pred, mi_col_pred,
+ bsize_pred, block);
+}
+
+static void predict_b_extend(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile,
+ int block,
+ int mi_row_ori, int mi_col_ori,
+ int mi_row_pred, int mi_col_pred,
+ int mi_row_top, int mi_col_top,
+ uint8_t * dst_buf[3], int dst_stride[3],
+ BLOCK_SIZE bsize_ori, BLOCK_SIZE bsize_top,
+ BLOCK_SIZE bsize_pred, int output_enabled,
+ int b_sub8x8, int bextend) {
+ // Used in supertx
+ // (mi_row_ori, mi_col_ori): location for mv
+ // (mi_row_pred, mi_col_pred, bsize_pred): region to predict
+ // (mi_row_top, mi_col_top, bsize_top): region of the top partition size
+ // block: sub location of sub8x8 blocks
+ // b_sub8x8: 1: ori is sub8x8; 0: ori is not sub8x8
+ // bextend: 1: region to predict is an extension of ori; 0: not
+
+ MACROBLOCK *const x = &td->mb;
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int r = (mi_row_pred - mi_row_top) * MI_SIZE;
+ int c = (mi_col_pred - mi_col_top) * MI_SIZE;
+ const int mi_width_top = num_8x8_blocks_wide_lookup[bsize_top];
+ const int mi_height_top = num_8x8_blocks_high_lookup[bsize_top];
+
+ if (mi_row_pred < mi_row_top || mi_col_pred < mi_col_top ||
+ mi_row_pred >= mi_row_top + mi_height_top ||
+ mi_col_pred >= mi_col_top + mi_width_top ||
+ mi_row_pred >= cm->mi_rows || mi_col_pred >= cm->mi_cols)
+ return;
+
+ set_offsets_extend(cpi, td, tile, mi_row_pred, mi_col_pred,
+ mi_row_ori, mi_col_ori, bsize_pred, bsize_ori);
+ xd->plane[0].dst.stride = dst_stride[0];
+ xd->plane[1].dst.stride = dst_stride[1];
+ xd->plane[2].dst.stride = dst_stride[2];
+ xd->plane[0].dst.buf = dst_buf[0] +
+ (r >> xd->plane[0].subsampling_y) * dst_stride[0] +
+ (c >> xd->plane[0].subsampling_x);
+ xd->plane[1].dst.buf = dst_buf[1] +
+ (r >> xd->plane[1].subsampling_y) * dst_stride[1] +
+ (c >> xd->plane[1].subsampling_x);
+ xd->plane[2].dst.buf = dst_buf[2] +
+ (r >> xd->plane[2].subsampling_y) * dst_stride[2] +
+ (c >> xd->plane[2].subsampling_x);
+
+ predict_superblock(cpi, td,
+ mi_row_pred, mi_col_pred, bsize_pred,
+ b_sub8x8, block);
+
+ if (output_enabled && !bextend)
+ update_stats(&cpi->common, td, 1);
+}
+
+static void extend_dir(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile,
+ int block, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize,
+ int mi_row, int mi_col,
+ int mi_row_top, int mi_col_top,
+ int output_enabled,
+ uint8_t * dst_buf[3], int dst_stride[3], int dir) {
+ // dir: 0-lower, 1-upper, 2-left, 3-right
+ // 4-lowerleft, 5-upperleft, 6-lowerright, 7-upperright
+ MACROBLOCKD *xd = &td->mb.e_mbd;
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ int xss = xd->plane[1].subsampling_x;
+ int yss = xd->plane[1].subsampling_y;
+ int b_sub8x8 = (bsize < BLOCK_8X8) ? 1 : 0;
+
+ BLOCK_SIZE extend_bsize;
+ int unit, mi_row_pred, mi_col_pred;
+
+ if (dir == 0 || dir == 1) { // lower and upper
+ extend_bsize = (mi_width == 1 || bsize < BLOCK_8X8 || xss < yss) ?
+ BLOCK_8X8 : BLOCK_16X8;
+ unit = num_8x8_blocks_wide_lookup[extend_bsize];
+ mi_row_pred = mi_row + ((dir == 0) ? mi_height : -1);
+ mi_col_pred = mi_col;
+
+ predict_b_extend(cpi, td, tile, block, mi_row, mi_col,
+ mi_row_pred, mi_col_pred,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ bsize, top_bsize, extend_bsize,
+ output_enabled, b_sub8x8, 1);
+
+ if (mi_width > unit) {
+ int i;
+ for (i = 0; i < mi_width/unit - 1; i++) {
+ mi_col_pred += unit;
+ predict_b_extend(cpi, td, tile, block, mi_row, mi_col,
+ mi_row_pred, mi_col_pred, mi_row_top, mi_col_top,
+ dst_buf, dst_stride, bsize, top_bsize, extend_bsize,
+ output_enabled, b_sub8x8, 1);
+ }
+ }
+ } else if (dir == 2 || dir == 3) { // left and right
+ extend_bsize = (mi_height == 1 || bsize < BLOCK_8X8 || yss < xss) ?
+ BLOCK_8X8 : BLOCK_8X16;
+ unit = num_8x8_blocks_high_lookup[extend_bsize];
+ mi_row_pred = mi_row;
+ mi_col_pred = mi_col + ((dir == 3) ? mi_width : -1);
+
+ predict_b_extend(cpi, td, tile, block, mi_row, mi_col,
+ mi_row_pred, mi_col_pred, mi_row_top, mi_col_top,
+ dst_buf, dst_stride, bsize, top_bsize, extend_bsize,
+ output_enabled, b_sub8x8, 1);
+
+ if (mi_height > unit) {
+ int i;
+ for (i = 0; i < mi_height/unit - 1; i++) {
+ mi_row_pred += unit;
+ predict_b_extend(cpi, td, tile, block, mi_row, mi_col,
+ mi_row_pred, mi_col_pred, mi_row_top, mi_col_top,
+ dst_buf, dst_stride, bsize, top_bsize, extend_bsize,
+ output_enabled, b_sub8x8, 1);
+ }
+ }
+ } else {
+ extend_bsize = BLOCK_8X8;
+ mi_row_pred = mi_row + ((dir == 4 || dir == 6) ? mi_height : -1);
+ mi_col_pred = mi_col + ((dir == 6 || dir == 7) ? mi_width : -1);
+
+ predict_b_extend(cpi, td, tile, block, mi_row, mi_col,
+ mi_row_pred, mi_col_pred, mi_row_top, mi_col_top,
+ dst_buf, dst_stride, bsize, top_bsize, extend_bsize,
+ output_enabled, b_sub8x8, 1);
+ }
+}
+
+static void extend_all(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile,
+ int block,
+ BLOCK_SIZE bsize, BLOCK_SIZE top_bsize,
+ int mi_row, int mi_col,
+ int mi_row_top, int mi_col_top,
+ int output_enabled,
+ uint8_t * dst_buf[3], int dst_stride[3]) {
+ assert(block >= 0 && block < 4);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 0);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 1);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 2);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 3);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 4);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 5);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 6);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 7);
+}
+
+
+// This function generates prediction for multiple blocks, between which
+// discontinuity around boundary is reduced by smoothing masks. The basic
+// smoothing mask is a soft step function along horz/vert direction. In more
+// complicated case when a block is split into 4 subblocks, the basic mask is
+// first applied to neighboring subblocks (2 pairs) in horizontal direction and
+// then applied to the 2 masked prediction mentioned above in vertical direction
+// If the block is split into more than one level, at every stage, masked
+// prediction is stored in dst_buf[] passed from higher level.
+static void predict_sb_complex(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile,
+ int mi_row, int mi_col,
+ int mi_row_top, int mi_col_top,
+ int output_enabled, BLOCK_SIZE bsize,
+ BLOCK_SIZE top_bsize,
+ uint8_t *dst_buf[3], int dst_stride[3],
+ PC_TREE *pc_tree) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
+ PARTITION_TYPE partition;
+ BLOCK_SIZE subsize;
+
+ int i, ctx;
+ uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3];
+ DECLARE_ALIGNED(16, uint8_t,
+ tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]);
+ DECLARE_ALIGNED(16, uint8_t,
+ tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]);
+ DECLARE_ALIGNED(16, uint8_t,
+ tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]);
+ int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
+ int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
+ int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ int len = sizeof(uint16_t);
+ dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
+ dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len);
+ dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len);
+ dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
+ dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len);
+ dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len);
+ dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3);
+ dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len);
+ dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len);
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ dst_buf1[0] = tmp_buf1;
+ dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN;
+ dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN;
+ dst_buf2[0] = tmp_buf2;
+ dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN;
+ dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN;
+ dst_buf3[0] = tmp_buf3;
+ dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN;
+ dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN;
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ return;
+
+ if (bsize >= BLOCK_8X8) {
+ ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
+ subsize = get_subsize(bsize, pc_tree->partitioning);
+ } else {
+ ctx = 0;
+ subsize = BLOCK_4X4;
+ }
+ partition = partition_lookup[bsl][subsize];
+ if (output_enabled && bsize != BLOCK_4X4 && bsize < top_bsize)
+ cm->counts.partition[ctx][partition]++;
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ }
+
+ switch (partition) {
+ case PARTITION_NONE:
+ assert(bsize < top_bsize);
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ bsize, top_bsize, bsize, output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride);
+ break;
+ case PARTITION_HORZ:
+ if (bsize == BLOCK_8X8) {
+ // Fisrt half
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf, dst_stride);
+
+ // Second half
+ predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf1, dst_stride1);
+
+ // Smooth
+ xd->plane[0].dst.buf = dst_buf[0];
+ xd->plane[0].dst.stride = dst_stride[0];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[0], dst_stride[0],
+ dst_buf1[0], dst_stride1[0],
+ &xd->plane[0],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, 0);
+ } else {
+ // First half
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ subsize, top_bsize, subsize, output_enabled, 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf, dst_stride);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf, dst_stride, 0);
+
+ if (mi_row + hbs < cm->mi_rows) {
+ // Second half
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col,
+ mi_row + hbs, mi_col, mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1, subsize, top_bsize, subsize,
+ output_enabled, 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, output_enabled,
+ dst_buf1, dst_stride1);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, output_enabled,
+ dst_buf1, dst_stride1, 1);
+
+ // Smooth
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
+ &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top,
+ bsize, top_bsize, PARTITION_HORZ, i);
+ }
+ }
+ }
+ break;
+ case PARTITION_VERT:
+ if (bsize == BLOCK_8X8) {
+ // First half
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf, dst_stride);
+
+ // Second half
+ predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf1, dst_stride1);
+
+ // Smooth
+ xd->plane[0].dst.buf = dst_buf[0];
+ xd->plane[0].dst.stride = dst_stride[0];
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[0], dst_stride[0],
+ dst_buf1[0], dst_stride1[0],
+ &xd->plane[0],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, 0);
+ } else {
+ // bsize: not important, not useful
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ subsize, top_bsize, subsize, output_enabled, 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf, dst_stride);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf, dst_stride, 3);
+
+
+ if (mi_col + hbs < cm->mi_cols) {
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs,
+ mi_row, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1, subsize, top_bsize, subsize,
+ output_enabled, 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, output_enabled,
+ dst_buf1, dst_stride1);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, output_enabled,
+ dst_buf1, dst_stride1, 2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
+ &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top,
+ bsize, top_bsize, PARTITION_VERT, i);
+ }
+ }
+ }
+ break;
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8) {
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 0);
+ predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1);
+ predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf2, dst_stride2,
+ subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1);
+ predict_b_extend(cpi, td, tile, 3, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf3, dst_stride3,
+ subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1);
+
+ if (bsize < top_bsize) {
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf, dst_stride);
+ extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf1, dst_stride1);
+ extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf2, dst_stride2);
+ extend_all(cpi, td, tile, 3, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled,
+ dst_buf3, dst_stride3);
+ }
+ } else {
+ predict_sb_complex(cpi, td, tile, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, subsize,
+ top_bsize, dst_buf, dst_stride,
+ pc_tree->split[0]);
+ if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols)
+ predict_sb_complex(cpi, td, tile, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, output_enabled, subsize,
+ top_bsize, dst_buf1, dst_stride1,
+ pc_tree->split[1]);
+ if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols)
+ predict_sb_complex(cpi, td, tile, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, output_enabled, subsize,
+ top_bsize, dst_buf2, dst_stride2,
+ pc_tree->split[2]);
+ if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols)
+ predict_sb_complex(cpi, td, tile, mi_row + hbs, mi_col + hbs,
+ mi_row_top, mi_col_top, output_enabled, subsize,
+ top_bsize, dst_buf3, dst_stride3,
+ pc_tree->split[3]);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ if (bsize == BLOCK_8X8 && i != 0)
+ continue; // Skip <4x4 chroma smoothing
+ if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i],
+ dst_stride[i],
+ dst_buf1[i],
+ dst_stride1[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, i);
+ if (mi_row + hbs < cm->mi_rows) {
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf2[i],
+ dst_stride2[i],
+ dst_buf3[i],
+ dst_stride3[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_VERT, i);
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i],
+ dst_stride[i],
+ dst_buf2[i],
+ dst_stride2[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) {
+ vp10_build_masked_inter_predictor_complex(xd,
+ dst_buf[i],
+ dst_stride[i],
+ dst_buf2[i],
+ dst_stride2[i],
+ &xd->plane[i],
+ mi_row, mi_col,
+ mi_row_top, mi_col_top,
+ bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+
+ if (bsize < top_bsize && (partition != PARTITION_SPLIT || bsize == BLOCK_8X8))
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+}
+
+static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ int *tmp_rate, int64_t *tmp_dist,
+ TX_TYPE *best_tx,
+ PC_TREE *pc_tree) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int plane, pnskip, skippable, skippable_uv, rate_uv, this_rate,
+ base_rate = *tmp_rate;
+ int64_t sse, pnsse, sse_uv, this_dist, dist_uv;
+ uint8_t *dst_buf[3];
+ int dst_stride[3];
+ TX_SIZE tx_size;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ TX_TYPE tx_type, best_tx_nostx = xd->mi[0]->mbmi.tx_type;
+#if CONFIG_EXT_TX
+ int ext_tx_set;
+#endif // CONFIG_EXT_TX
+ int tmp_rate_tx = 0, skip_tx = 0;
+ int64_t tmp_dist_tx = 0, rd_tx, bestrd_tx = INT64_MAX;
+ uint8_t tmp_zcoeff_blk = 0;
+
+ update_state_sb_supertx(cpi, td, tile, mi_row, mi_col, bsize, 0, pc_tree);
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm),
+ mi_row, mi_col);
+ for (plane = 0; plane < MAX_MB_PLANE; plane++) {
+ dst_buf[plane] = xd->plane[plane].dst.buf;
+ dst_stride[plane] = xd->plane[plane].dst.stride;
+ }
+ predict_sb_complex(cpi, td, tile, mi_row, mi_col, mi_row, mi_col,
+ 0, bsize, bsize, dst_buf, dst_stride, pc_tree);
+
+ // These skip_txfm flags are previously set by the non-supertx RD search.
+ // vp10_txfm_rd_in_plane_supertx calls block_rd_txfm, which checks these
+ // to reuse distortion values from the RD estimation, so we reset these
+ // flags here before evaluating RD for supertx coding.
+ for (plane = 0 ; plane < MAX_MB_PLANE ; plane++)
+ x->skip_txfm[plane << 2] = SKIP_TXFM_NONE;
+
+ set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
+ *best_tx = DCT_DCT;
+
+ // chroma
+ skippable_uv = 1;
+ rate_uv = 0;
+ dist_uv = 0;
+ sse_uv = 0;
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_VAR_TX
+ ENTROPY_CONTEXT ctxa[16], ctxl[16];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ int coeff_ctx = 1;
+
+ this_rate = 0;
+ this_dist = 0;
+ pnsse = 0;
+ pnskip = 1;
+
+ tx_size = max_txsize_lookup[bsize];
+ tx_size = get_uv_tx_size_impl(tx_size, bsize,
+ cm->subsampling_x, cm->subsampling_y);
+ vp10_get_entropy_contexts(bsize, tx_size, pd, ctxa, ctxl);
+ coeff_ctx = combine_entropy_contexts(ctxa[0], ctxl[0]);
+
+ vp10_subtract_plane(x, bsize, plane);
+ vp10_tx_block_rd_b(cpi, x, tx_size,
+ 0, 0, plane, 0,
+ get_plane_block_size(bsize, pd), coeff_ctx,
+ &this_rate, &this_dist, &pnsse, &pnskip);
+#else
+ tx_size = max_txsize_lookup[bsize];
+ tx_size = get_uv_tx_size_impl(tx_size, bsize,
+ cm->subsampling_x, cm->subsampling_y);
+ vp10_subtract_plane(x, bsize, plane);
+ vp10_txfm_rd_in_plane_supertx(x, &this_rate, &this_dist, &pnskip, &pnsse,
+ INT64_MAX, plane, bsize, tx_size, 0);
+#endif // CONFIG_VAR_TX
+
+ rate_uv += this_rate;
+ dist_uv += this_dist;
+ sse_uv += pnsse;
+ skippable_uv &= pnskip;
+ }
+
+ // luma
+ tx_size = max_txsize_lookup[bsize];
+ vp10_subtract_plane(x, bsize, 0);
+#if CONFIG_EXT_TX
+ ext_tx_set = get_ext_tx_set(tx_size, bsize, 1);
+#endif // CONFIG_EXT_TX
+ for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+#if CONFIG_VAR_TX
+ ENTROPY_CONTEXT ctxa[16], ctxl[16];
+ const struct macroblockd_plane *const pd = &xd->plane[0];
+ int coeff_ctx = 1;
+#endif // CONFIG_VAR_TX
+#if CONFIG_EXT_TX
+ if (!ext_tx_used_inter[ext_tx_set][tx_type])
+ continue;
+ if (ext_tx_set == 1 &&
+ tx_type >= DST_ADST && tx_type < IDTX && *best_tx == DCT_DCT) {
+ tx_type = IDTX - 1;
+ continue;
+ }
+#else
+ if (tx_size >= TX_32X32 && tx_type != DCT_DCT)
+ continue;
+#endif // CONFIG_EXT_TX
+ mbmi->tx_type = tx_type;
+
+#if CONFIG_VAR_TX
+ this_rate = 0;
+ this_dist = 0;
+ pnsse = 0;
+ pnskip = 1;
+
+ vp10_get_entropy_contexts(bsize, tx_size, pd, ctxa, ctxl);
+ coeff_ctx = combine_entropy_contexts(ctxa[0], ctxl[0]);
+ vp10_tx_block_rd_b(cpi, x, tx_size,
+ 0, 0, 0, 0,
+ bsize, coeff_ctx,
+ &this_rate, &this_dist, &pnsse, &pnskip);
+#else
+ vp10_txfm_rd_in_plane_supertx(x, &this_rate, &this_dist, &pnskip,
+ &pnsse, INT64_MAX, 0, bsize, tx_size, 0);
+#endif // CONFIG_VAR_TX
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(tx_size, bsize, 1) > 1 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+ this_rate != INT_MAX) {
+ if (ext_tx_set > 0)
+ this_rate += cpi->inter_tx_type_costs[ext_tx_set]
+ [mbmi->tx_size][mbmi->tx_type];
+ }
+#else
+ if (tx_size < TX_32X32 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+ this_rate != INT_MAX) {
+ this_rate += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+ }
+#endif // CONFIG_EXT_TX
+ *tmp_rate = rate_uv + this_rate;
+ *tmp_dist = dist_uv + this_dist;
+ sse = sse_uv + pnsse;
+ skippable = skippable_uv && pnskip;
+ if (skippable) {
+ *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ x->skip = 1;
+ } else {
+ if (RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist)
+ < RDCOST(x->rdmult, x->rddiv, 0, sse)) {
+ *tmp_rate += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
+ x->skip = 0;
+ } else {
+ *tmp_dist = sse;
+ *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ x->skip = 1;
+ }
+ }
+ *tmp_rate += base_rate;
+ rd_tx = RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist);
+ if (rd_tx < bestrd_tx * 0.99 || tx_type == DCT_DCT) {
+ *best_tx = tx_type;
+ bestrd_tx = rd_tx;
+ tmp_rate_tx = *tmp_rate;
+ tmp_dist_tx = *tmp_dist;
+ skip_tx = x->skip;
+ tmp_zcoeff_blk = x->zcoeff_blk[tx_size][0];
+ }
+ }
+ x->zcoeff_blk[tx_size][0] = tmp_zcoeff_blk;
+ *tmp_rate = tmp_rate_tx;
+ *tmp_dist = tmp_dist_tx;
+ x->skip = skip_tx;
+#if CONFIG_VAR_TX
+ for (plane = 0; plane < 1; ++plane)
+ memset(x->blk_skip[plane], x->skip,
+ sizeof(uint8_t) * pc_tree->none.num_4x4_blk);
+#endif // CONFIG_VAR_TX
+ xd->mi[0]->mbmi.tx_type = best_tx_nostx;
+}
+#endif // CONFIG_SUPERTX
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 92ba4dd..e359b93 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -23,6 +23,8 @@
#include "vp10/common/scan.h"
#include "vp10/encoder/encodemb.h"
+#include "vp10/encoder/hybrid_fwd_txfm.h"
+#include "vp10/encoder/quantize.h"
#include "vp10/encoder/rd.h"
#include "vp10/encoder/tokenize.h"
@@ -104,8 +106,9 @@
const int mul = 1 + (tx_size == TX_32X32);
const int16_t *dequant_ptr = pd->dequant;
const uint8_t *const band_translate = get_band_translate(tx_size);
- TX_TYPE tx_type = get_tx_type(type, xd, block);
- const scan_order *const so = get_scan(tx_size, tx_type);
+ TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
+ const scan_order *const so =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
const int16_t *const scan = so->scan;
const int16_t *const nb = so->neighbors;
int next = eob, sz = 0;
@@ -301,451 +304,107 @@
final_eob++;
mb->plane[plane].eobs[block] = final_eob;
+ assert(final_eob <= default_eob);
return final_eob;
}
-static INLINE void fdct32x32(int rd_transform,
- const int16_t *src, tran_low_t *dst,
- int src_stride) {
- if (rd_transform)
- vpx_fdct32x32_rd(src, dst, src_stride);
- else
- vpx_fdct32x32(src, dst, src_stride);
-}
-
#if CONFIG_VP9_HIGHBITDEPTH
-static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
- tran_low_t *dst, int src_stride) {
- if (rd_transform)
- vpx_highbd_fdct32x32_rd(src, dst, src_stride);
- else
- vpx_highbd_fdct32x32(src, dst, src_stride);
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
+typedef enum QUANT_FUNC {
+ QUANT_FUNC_LOWBD = 0,
+ QUANT_FUNC_LOWBD_32 = 1,
+ QUANT_FUNC_HIGHBD = 2,
+ QUANT_FUNC_HIGHBD_32 = 3,
+ QUANT_FUNC_LAST = 4
+} QUANT_FUNC;
-void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type, int lossless) {
- if (lossless) {
- vp10_fwht4x4(src_diff, coeff, diff_stride);
- } else {
- switch (tx_type) {
- case DCT_DCT:
- vpx_fdct4x4(src_diff, coeff, diff_stride);
- break;
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
- break;
- default:
- assert(0);
- break;
- }
- }
-}
+static VP10_QUANT_FACADE
+ quant_func_list[VP10_XFORM_QUANT_LAST][QUANT_FUNC_LAST] = {
+ {vp10_quantize_fp_facade, vp10_quantize_fp_32x32_facade,
+ vp10_highbd_quantize_fp_facade, vp10_highbd_quantize_fp_32x32_facade},
+ {vp10_quantize_b_facade, vp10_quantize_b_32x32_facade,
+ vp10_highbd_quantize_b_facade, vp10_highbd_quantize_b_32x32_facade},
+ {vp10_quantize_dc_facade, vp10_quantize_dc_32x32_facade,
+ vp10_highbd_quantize_dc_facade, vp10_highbd_quantize_dc_32x32_facade},
+ {NULL, NULL, NULL, NULL}};
-static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type) {
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
- break;
- default:
- assert(0);
- break;
- }
-}
+#else
+typedef enum QUANT_FUNC {
+ QUANT_FUNC_LOWBD = 0,
+ QUANT_FUNC_LOWBD_32 = 1,
+ QUANT_FUNC_LAST = 2
+} QUANT_FUNC;
-static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type) {
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- vp10_fht16x16(src_diff, coeff, diff_stride, tx_type);
- break;
- default:
- assert(0);
- break;
- }
-}
+static VP10_QUANT_FACADE
+ quant_func_list[VP10_XFORM_QUANT_LAST][QUANT_FUNC_LAST] = {
+ {vp10_quantize_fp_facade, vp10_quantize_fp_32x32_facade},
+ {vp10_quantize_b_facade, vp10_quantize_b_32x32_facade},
+ {vp10_quantize_dc_facade, vp10_quantize_dc_32x32_facade},
+ {NULL, NULL}};
+#endif
-static void fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
- tran_low_t *coeff, int diff_stride,
- TX_TYPE tx_type) {
- switch (tx_type) {
- case DCT_DCT:
- fdct32x32(rd_transform, src_diff, coeff, diff_stride);
- break;
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- assert(0);
- break;
- default:
- assert(0);
- break;
- }
-}
+static FWD_TXFM_OPT fwd_txfm_opt_list[VP10_XFORM_QUANT_LAST] = {
+ FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_DC,
+ FWD_TXFM_OPT_NORMAL};
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type, int lossless) {
- if (lossless) {
- assert(tx_type == DCT_DCT);
- vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
- } else {
- switch (tx_type) {
- case DCT_DCT:
- vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
- break;
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
- break;
- default:
- assert(0);
- break;
- }
- }
-}
-
-static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type) {
- switch (tx_type) {
- case DCT_DCT:
- vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
- break;
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
- break;
- default:
- assert(0);
- break;
- }
-}
-
-static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type) {
- switch (tx_type) {
- case DCT_DCT:
- vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
- break;
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
- break;
- default:
- assert(0);
- break;
- }
-}
-
-static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
- tran_low_t *coeff, int diff_stride,
- TX_TYPE tx_type) {
- switch (tx_type) {
- case DCT_DCT:
- highbd_fdct32x32(rd_transform, src_diff, coeff, diff_stride);
- break;
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- assert(0);
- break;
- default:
- assert(0);
- break;
- }
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
-void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block,
- int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ VP10_XFORM_QUANT xform_quant_idx) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block);
- const scan_order *const scan_order = get_scan(tx_size, tx_type);
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *const scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int16_t *src_diff;
+ const int tx1d_size = get_tx1d_size(tx_size);
+ const int tx2d_size = tx1d_size * tx1d_size;
+
+ FWD_TXFM_PARAM fwd_txfm_param;
+ fwd_txfm_param.tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[xform_quant_idx];
+ fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+ fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- switch (tx_size) {
- case TX_32X32:
- highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
- vp10_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
- p->round_fp, p->quant_fp, p->quant_shift,
- qcoeff, dqcoeff, pd->dequant,
- eob, scan_order->scan,
- scan_order->iscan);
- break;
- case TX_16X16:
- vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
- vp10_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
- case TX_8X8:
- vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
- vp10_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
- case TX_4X4:
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
- } else {
- vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
- }
- vp10_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
- default:
- assert(0);
- }
- return;
- }
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
- switch (tx_size) {
- case TX_32X32:
- fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
- vp10_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob, scan_order->scan,
- scan_order->iscan);
- break;
- case TX_16X16:
- vpx_fdct16x16(src_diff, coeff, diff_stride);
- vp10_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
- case TX_8X8:
- vp10_fdct8x8_quant(src_diff, diff_stride, coeff, 64,
- x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
- case TX_4X4:
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- vp10_fwht4x4(src_diff, coeff, diff_stride);
+ highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (xform_quant_idx != VP10_XFORM_QUANT_SKIP_QUANT) {
+ if (x->skip_block) {
+ vp10_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
} else {
- vpx_fdct4x4(src_diff, coeff, diff_stride);
+ if (tx_size == TX_32X32)
+ quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD_32](
+ coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order);
+ else
+ quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD](
+ coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order);
}
- vp10_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
- default:
- assert(0);
- break;
- }
-}
-
-void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block,
- int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
- MACROBLOCKD *const xd = &x->e_mbd;
- const struct macroblock_plane *const p = &x->plane[plane];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
- tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
- tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- uint16_t *const eob = &p->eobs[block];
- const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
- const int16_t *src_diff;
- src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
-
-#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- switch (tx_size) {
- case TX_32X32:
- vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
- vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
- p->quant_fp[0], qcoeff, dqcoeff,
- pd->dequant[0], eob);
- break;
- case TX_16X16:
- vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
- vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
- p->quant_fp[0], qcoeff, dqcoeff,
- pd->dequant[0], eob);
- break;
- case TX_8X8:
- vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
- vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
- p->quant_fp[0], qcoeff, dqcoeff,
- pd->dequant[0], eob);
- break;
- case TX_4X4:
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
- } else {
- vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
- }
- vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
- p->quant_fp[0], qcoeff, dqcoeff,
- pd->dequant[0], eob);
- break;
- default:
- assert(0);
}
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
- switch (tx_size) {
- case TX_32X32:
- vpx_fdct32x32_1(src_diff, coeff, diff_stride);
- vpx_quantize_dc_32x32(coeff, x->skip_block, p->round,
- p->quant_fp[0], qcoeff, dqcoeff,
- pd->dequant[0], eob);
- break;
- case TX_16X16:
- vpx_fdct16x16_1(src_diff, coeff, diff_stride);
- vpx_quantize_dc(coeff, 256, x->skip_block, p->round,
- p->quant_fp[0], qcoeff, dqcoeff,
- pd->dequant[0], eob);
- break;
- case TX_8X8:
- vpx_fdct8x8_1(src_diff, coeff, diff_stride);
- vpx_quantize_dc(coeff, 64, x->skip_block, p->round,
- p->quant_fp[0], qcoeff, dqcoeff,
- pd->dequant[0], eob);
- break;
- case TX_4X4:
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- vp10_fwht4x4(src_diff, coeff, diff_stride);
- } else {
- vpx_fdct4x4(src_diff, coeff, diff_stride);
- }
- vpx_quantize_dc(coeff, 16, x->skip_block, p->round,
- p->quant_fp[0], qcoeff, dqcoeff,
- pd->dequant[0], eob);
- break;
- default:
- assert(0);
- break;
- }
-}
-
-
-
-void vp10_xform_quant(MACROBLOCK *x, int plane, int block,
- int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
- MACROBLOCKD *const xd = &x->e_mbd;
- const struct macroblock_plane *const p = &x->plane[plane];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block);
- const scan_order *const scan_order = get_scan(tx_size, tx_type);
- tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
- tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
- tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- uint16_t *const eob = &p->eobs[block];
- const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
- const int16_t *src_diff;
- src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
-
-#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- switch (tx_size) {
- case TX_32X32:
- highbd_fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride,
- tx_type);
- vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
- p->round, p->quant, p->quant_shift, qcoeff,
- dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
- case TX_16X16:
- highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type);
- vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
- case TX_8X8:
- highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type);
- vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
- case TX_4X4:
- vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
- xd->lossless[xd->mi[0]->mbmi.segment_id]);
- vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
- default:
- assert(0);
+ fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (xform_quant_idx != VP10_XFORM_QUANT_SKIP_QUANT) {
+ if (x->skip_block) {
+ vp10_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
+ } else {
+ if (tx_size == TX_32X32)
+ quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD_32](
+ coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order);
+ else
+ quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD](
+ coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order);
}
- return;
- }
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
- switch (tx_size) {
- case TX_32X32:
- fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride, tx_type);
- vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob, scan_order->scan,
- scan_order->iscan);
- break;
- case TX_16X16:
- fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type);
- vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
- case TX_8X8:
- fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type);
- vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
- case TX_4X4:
- vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
- xd->lossless[xd->mi[0]->mbmi.segment_id]);
- vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
- default:
- assert(0);
- break;
}
}
@@ -761,20 +420,31 @@
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint8_t *dst;
ENTROPY_CONTEXT *a, *l;
- TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block);
+ INV_TXFM_PARAM inv_txfm_param;
+#if CONFIG_VAR_TX
+ int i;
+ const int bwl = b_width_log2_lookup[plane_bsize];
+#endif
dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
a = &ctx->ta[plane][blk_col];
l = &ctx->tl[plane][blk_row];
// TODO(jingning): per transformed block zero forcing only enabled for
// luma component. will integrate chroma components as well.
- if (x->zcoeff_blk[tx_size][block] && plane == 0) {
- p->eobs[block] = 0;
- *a = *l = 0;
- return;
- }
+ // Turn this back on when the rate-distortion loop is synchronized with
+ // the recursive transform block coding.
+// if (x->zcoeff_blk[tx_size][block] && plane == 0) {
+// p->eobs[block] = 0;
+// *a = *l = 0;
+// return;
+// }
+#if CONFIG_VAR_TX
+ if (!x->skip_recode &&
+ x->blk_skip[plane][(blk_row << bwl) + blk_col] == 0) {
+#else
if (!x->skip_recode) {
+#endif
if (x->quant_fp) {
// Encoding process for rtc mode
if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
@@ -783,102 +453,151 @@
*a = *l = 0;
return;
} else {
- vp10_xform_quant_fp(x, plane, block, blk_row, blk_col,
- plane_bsize, tx_size);
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, VP10_XFORM_QUANT_FP);
}
} else {
if (max_txsize_lookup[plane_bsize] == tx_size) {
int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
// full forward transform and quantization
- vp10_xform_quant(x, plane, block, blk_row, blk_col,
- plane_bsize, tx_size);
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, VP10_XFORM_QUANT_B);
} else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
// fast path forward transform and quantization
- vp10_xform_quant_dc(x, plane, block, blk_row, blk_col,
- plane_bsize, tx_size);
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, VP10_XFORM_QUANT_DC);
} else {
// skip forward transform
p->eobs[block] = 0;
*a = *l = 0;
+#if !CONFIG_VAR_TX
return;
+#endif
}
} else {
- vp10_xform_quant(x, plane, block, blk_row, blk_col,
- plane_bsize, tx_size);
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, VP10_XFORM_QUANT_B);
}
}
}
+#if CONFIG_VAR_TX
+ else {
+ if (!x->skip_recode)
+ p->eobs[block] = 0;
+ }
+#endif
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
- const int ctx = combine_entropy_contexts(*a, *l);
+ int ctx;
+#if CONFIG_VAR_TX
+ switch (tx_size) {
+ case TX_4X4:
+ break;
+ case TX_8X8:
+ a[0] = !!*(const uint16_t *)&a[0];
+ l[0] = !!*(const uint16_t *)&l[0];
+ break;
+ case TX_16X16:
+ a[0] = !!*(const uint32_t *)&a[0];
+ l[0] = !!*(const uint32_t *)&l[0];
+ break;
+ case TX_32X32:
+ a[0] = !!*(const uint64_t *)&a[0];
+ l[0] = !!*(const uint64_t *)&l[0];
+ break;
+ default:
+ assert(0 && "Invalid transform size.");
+ break;
+ }
+#endif
+ ctx = combine_entropy_contexts(*a, *l);
*a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0;
} else {
*a = *l = p->eobs[block] > 0;
}
+#if CONFIG_VAR_TX
+ for (i = 0; i < (1 << tx_size); ++i) {
+ a[i] = a[0];
+ l[i] = l[0];
+ }
+#endif
+
if (p->eobs[block])
*(args->skip) = 0;
if (p->eobs[block] == 0)
return;
+
+ // inverse transform parameters
+ inv_txfm_param.tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
+ inv_txfm_param.tx_size = tx_size;
+ inv_txfm_param.eob = p->eobs[block];
+ inv_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- switch (tx_size) {
- case TX_32X32:
- vp10_highbd_inv_txfm_add_32x32(dqcoeff, dst, pd->dst.stride,
- p->eobs[block], xd->bd, tx_type);
- break;
- case TX_16X16:
- vp10_highbd_inv_txfm_add_16x16(dqcoeff, dst, pd->dst.stride,
- p->eobs[block], xd->bd, tx_type);
- break;
- case TX_8X8:
- vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, pd->dst.stride,
- p->eobs[block], xd->bd, tx_type);
- break;
- case TX_4X4:
- // this is like vp10_short_idct4x4 but has a special case around eob<=1
- // which is significant (not just an optimization) for the lossless
- // case.
- vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride,
- p->eobs[block], xd->bd, tx_type,
- xd->lossless[xd->mi[0]->mbmi.segment_id]);
- break;
- default:
- assert(0 && "Invalid transform size");
- break;
- }
-
+ inv_txfm_param.bd = xd->bd;
+ highbd_inv_txfm_add(dqcoeff, dst, pd->dst.stride, &inv_txfm_param);
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
+ inv_txfm_add(dqcoeff, dst, pd->dst.stride, &inv_txfm_param);
+}
- switch (tx_size) {
- case TX_32X32:
- vp10_inv_txfm_add_32x32(dqcoeff, dst, pd->dst.stride, p->eobs[block],
- tx_type);
- break;
- case TX_16X16:
- vp10_inv_txfm_add_16x16(dqcoeff, dst, pd->dst.stride, p->eobs[block],
- tx_type);
- break;
- case TX_8X8:
- vp10_inv_txfm_add_8x8(dqcoeff, dst, pd->dst.stride, p->eobs[block],
- tx_type);
- break;
- case TX_4X4:
- // this is like vp10_short_idct4x4 but has a special case around eob<=1
- // which is significant (not just an optimization) for the lossless
- // case.
- vp10_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride, p->eobs[block],
- tx_type, xd->lossless[xd->mi[0]->mbmi.segment_id]);
- break;
- default:
- assert(0 && "Invalid transform size");
- break;
+#if CONFIG_VAR_TX
+static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ void *arg) {
+ struct encode_b_args *const args = arg;
+ MACROBLOCK *const x = args->x;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ int blk_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], bsize,
+ 0, 0) :
+ mbmi->inter_tx_size[blk_idx];
+
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ encode_block(plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, arg);
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+ int step = 1 << (2 * (tx_size - 1));
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
+ continue;
+
+ encode_block_inter(plane, block + i * step, offsetr, offsetc,
+ plane_bsize, tx_size - 1, arg);
+ }
}
}
+#endif
static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize,
@@ -891,12 +610,13 @@
uint8_t *dst;
dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
- vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, VP10_XFORM_QUANT_B);
if (p->eobs[block] > 0) {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- if (xd->lossless[0]) {
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
vp10_highbd_iwht4x4_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
} else {
@@ -906,7 +626,7 @@
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
- if (xd->lossless[0]) {
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
vp10_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
} else {
vp10_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
@@ -917,7 +637,7 @@
void vp10_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
vp10_subtract_plane(x, bsize, 0);
vp10_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
- encode_block_pass1, x);
+ encode_block_pass1, x);
}
void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
@@ -933,20 +653,72 @@
return;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_VAR_TX
+ // TODO(jingning): Clean this up.
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ const int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+ int block = 0;
+ int step = 1 << (max_tx_size * 2);
+#endif
if (!x->skip_recode)
vp10_subtract_plane(x, bsize, plane);
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
+#if CONFIG_VAR_TX
+ vp10_get_entropy_contexts(bsize, TX_4X4, pd,
+ ctx.ta[plane], ctx.tl[plane]);
+#else
const struct macroblockd_plane* const pd = &xd->plane[plane];
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
vp10_get_entropy_contexts(bsize, tx_size, pd,
- ctx.ta[plane], ctx.tl[plane]);
+ ctx.ta[plane], ctx.tl[plane]);
+#endif
}
+#if CONFIG_VAR_TX
+ for (idy = 0; idy < mi_height; idy += bh) {
+ for (idx = 0; idx < mi_width; idx += bh) {
+ encode_block_inter(plane, block, idy, idx, plane_bsize,
+ max_tx_size, &arg);
+ block += step;
+ }
+ }
+#else
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
+ &arg);
+#endif
+ }
+}
+
+#if CONFIG_SUPERTX
+void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct optimize_ctx ctx;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ struct encode_b_args arg = {x, &ctx, &mbmi->skip};
+ int plane;
+
+ mbmi->skip = 1;
+ if (x->skip)
+ return;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane* const pd = &xd->plane[plane];
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
+ vp10_subtract_plane(x, bsize, plane);
+ vp10_get_entropy_contexts(bsize, tx_size, pd,
+ ctx.ta[plane], ctx.tl[plane]);
vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
&arg);
}
}
+#endif // CONFIG_SUPERTX
void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize,
@@ -957,12 +729,9 @@
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
- tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
- tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block);
- const scan_order *const scan_order = get_scan(tx_size, tx_type);
+ const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
PREDICTION_MODE mode;
const int bwl = b_width_log2_lookup[plane_bsize];
const int bhl = b_height_log2_lookup[plane_bsize];
@@ -972,156 +741,62 @@
uint16_t *eob = &p->eobs[block];
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
+
+ const int tx1d_size = get_tx1d_size(tx_size);
+
+ INV_TXFM_PARAM inv_txfm_param;
+
dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
- vp10_predict_intra_block(xd, bwl, bhl, tx_size, mode, dst, dst_stride,
- dst, dst_stride, blk_col, blk_row, plane);
-
+ vp10_predict_intra_block(xd, bwl, bhl, tx_size, mode, dst, dst_stride, dst,
+ dst_stride, blk_col, blk_row, plane);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- switch (tx_size) {
- case TX_32X32:
- if (!x->skip_recode) {
- vpx_highbd_subtract_block(32, 32, src_diff, diff_stride,
- src, src_stride, dst, dst_stride, xd->bd);
- highbd_fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff,
- diff_stride, tx_type);
- vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
- p->round, p->quant, p->quant_shift,
- qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- }
- if (*eob)
- vp10_highbd_inv_txfm_add_32x32(dqcoeff, dst, dst_stride, *eob, xd->bd,
- tx_type);
- break;
- case TX_16X16:
- if (!x->skip_recode) {
- vpx_highbd_subtract_block(16, 16, src_diff, diff_stride,
- src, src_stride, dst, dst_stride, xd->bd);
- highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type);
- vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- }
- if (*eob)
- vp10_highbd_inv_txfm_add_16x16(dqcoeff, dst, dst_stride, *eob, xd->bd,
- tx_type);
- break;
- case TX_8X8:
- if (!x->skip_recode) {
- vpx_highbd_subtract_block(8, 8, src_diff, diff_stride,
- src, src_stride, dst, dst_stride, xd->bd);
- highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type);
- vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- }
- if (*eob)
- vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, dst_stride, *eob, xd->bd,
- tx_type);
- break;
- case TX_4X4:
- if (!x->skip_recode) {
- vpx_highbd_subtract_block(4, 4, src_diff, diff_stride,
- src, src_stride, dst, dst_stride, xd->bd);
- vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
- xd->lossless[mbmi->segment_id]);
- vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- }
-
- if (*eob)
- // this is like vp10_short_idct4x4 but has a special case around
- // eob<=1 which is significant (not just an optimization) for the
- // lossless case.
- vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, xd->bd,
- tx_type, xd->lossless[mbmi->segment_id]);
- break;
- default:
- assert(0);
- return;
- }
- if (*eob)
- *(args->skip) = 0;
- return;
+ vpx_highbd_subtract_block(tx1d_size, tx1d_size, src_diff, diff_stride, src,
+ src_stride, dst, dst_stride, xd->bd);
+ } else {
+ vpx_subtract_block(tx1d_size, tx1d_size, src_diff, diff_stride, src,
+ src_stride, dst, dst_stride);
}
+#else
+ vpx_subtract_block(tx1d_size, tx1d_size, src_diff, diff_stride, src,
+ src_stride, dst, dst_stride);
#endif // CONFIG_VP9_HIGHBITDEPTH
- switch (tx_size) {
- case TX_32X32:
- if (!x->skip_recode) {
- vpx_subtract_block(32, 32, src_diff, diff_stride,
- src, src_stride, dst, dst_stride);
- fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride,
- tx_type);
- vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob, scan_order->scan,
- scan_order->iscan);
- }
- if (*eob)
- vp10_inv_txfm_add_32x32(dqcoeff, dst, dst_stride, *eob, tx_type);
- break;
- case TX_16X16:
- if (!x->skip_recode) {
- vpx_subtract_block(16, 16, src_diff, diff_stride,
- src, src_stride, dst, dst_stride);
- fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type);
- vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob, scan_order->scan,
- scan_order->iscan);
- }
- if (*eob)
- vp10_inv_txfm_add_16x16(dqcoeff, dst, dst_stride, *eob, tx_type);
- break;
- case TX_8X8:
- if (!x->skip_recode) {
- vpx_subtract_block(8, 8, src_diff, diff_stride,
- src, src_stride, dst, dst_stride);
- fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type);
- vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
- p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob, scan_order->scan,
- scan_order->iscan);
- }
- if (*eob)
- vp10_inv_txfm_add_8x8(dqcoeff, dst, dst_stride, *eob, tx_type);
- break;
- case TX_4X4:
- if (!x->skip_recode) {
- vpx_subtract_block(4, 4, src_diff, diff_stride,
- src, src_stride, dst, dst_stride);
- vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
- xd->lossless[mbmi->segment_id]);
- vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
- p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob, scan_order->scan,
- scan_order->iscan);
- }
+#if CONFIG_EXT_INTRA
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_B);
+#else
+ if (!x->skip_recode)
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_B);
+ else
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_SKIP_QUANT);
+#endif // CONFIG_EXT_INTRA
- if (*eob) {
- // this is like vp10_short_idct4x4 but has a special case around eob<=1
- // which is significant (not just an optimization) for the lossless
- // case.
- vp10_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, tx_type,
- xd->lossless[mbmi->segment_id]);
- }
- break;
- default:
- assert(0);
- break;
- }
- if (*eob)
+ if (*eob) {
+ // inverse transform
+ inv_txfm_param.tx_type = tx_type;
+ inv_txfm_param.tx_size = tx_size;
+ inv_txfm_param.eob = *eob;
+ inv_txfm_param.lossless = xd->lossless[mbmi->segment_id];
+#if CONFIG_VP9_HIGHBITDEPTH
+ inv_txfm_param.bd = xd->bd;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_inv_txfm_add(dqcoeff, dst, dst_stride, &inv_txfm_param);
+ } else {
+ inv_txfm_add(dqcoeff, dst, dst_stride, &inv_txfm_param);
+ }
+#else
+ inv_txfm_add(dqcoeff, dst, dst_stride, &inv_txfm_param);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
*(args->skip) = 0;
+ }
}
void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
diff --git a/vp10/encoder/encodemb.h b/vp10/encoder/encodemb.h
index 2e6516e..e208c88 100644
--- a/vp10/encoder/encodemb.h
+++ b/vp10/encoder/encodemb.h
@@ -23,17 +23,24 @@
struct optimize_ctx *ctx;
int8_t *skip;
};
+
+typedef enum VP10_XFORM_QUANT {
+ VP10_XFORM_QUANT_FP = 0,
+ VP10_XFORM_QUANT_B = 1,
+ VP10_XFORM_QUANT_DC = 2,
+ VP10_XFORM_QUANT_SKIP_QUANT = 3,
+ VP10_XFORM_QUANT_LAST = 4
+} VP10_XFORM_QUANT;
+
void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
+#if CONFIG_SUPERTX
+void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize);
+#endif // CONFIG_SUPERTX
void vp10_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
-void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block,
- int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
-void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block,
- int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
void vp10_xform_quant(MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ VP10_XFORM_QUANT xform_quant_idx);
void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
@@ -43,14 +50,6 @@
void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
-void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type, int lossless);
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type, int lossless);
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/encoder/encodemv.c b/vp10/encoder/encodemv.c
index 0736c65..4124c4a 100644
--- a/vp10/encoder/encodemv.c
+++ b/vp10/encoder/encodemv.c
@@ -137,19 +137,8 @@
static void update_mv(vpx_writer *w, const unsigned int ct[2], vpx_prob *cur_p,
vpx_prob upd_p) {
-#if CONFIG_MISC_FIXES
(void) upd_p;
vp10_cond_prob_diff_update(w, cur_p, ct);
-#else
- const vpx_prob new_p = get_binary_prob(ct[0], ct[1]) | 1;
- const int update = cost_branch256(ct, *cur_p) + vp10_cost_zero(upd_p) >
- cost_branch256(ct, new_p) + vp10_cost_one(upd_p) + 7 * 256;
- vpx_write(w, update, upd_p);
- if (update) {
- *cur_p = new_p;
- vpx_write_literal(w, new_p >> 1, 7);
- }
-#endif
}
static void write_mv_update(const vpx_tree_index *tree,
@@ -235,6 +224,61 @@
build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp);
}
+#if CONFIG_EXT_INTER
+static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
+ const int_mv mvs[2],
+ nmv_context_counts *counts) {
+ int i;
+ PREDICTION_MODE mode = mbmi->mode;
+ int mv_idx = (mode == NEWFROMNEARMV);
+
+ if (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV) {
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+ const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][mv_idx].as_mv;
+ const MV diff = {mvs[i].as_mv.row - ref->row,
+ mvs[i].as_mv.col - ref->col};
+ vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
+ }
+ } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
+ const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv;
+ const MV diff = {mvs[1].as_mv.row - ref->row,
+ mvs[1].as_mv.col - ref->col};
+ vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
+ } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
+ const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
+ const MV diff = {mvs[0].as_mv.row - ref->row,
+ mvs[0].as_mv.col - ref->col};
+ vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
+ }
+}
+
+static void inc_mvs_sub8x8(const MODE_INFO *mi,
+ int block,
+ const int_mv mvs[2],
+ nmv_context_counts *counts) {
+ int i;
+ PREDICTION_MODE mode = mi->bmi[block].as_mode;
+
+ if (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV) {
+ for (i = 0; i < 1 + has_second_ref(&mi->mbmi); ++i) {
+ const MV *ref = &mi->bmi[block].ref_mv[i].as_mv;
+ const MV diff = {mvs[i].as_mv.row - ref->row,
+ mvs[i].as_mv.col - ref->col};
+ vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
+ }
+ } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
+ const MV *ref = &mi->bmi[block].ref_mv[1].as_mv;
+ const MV diff = {mvs[1].as_mv.row - ref->row,
+ mvs[1].as_mv.col - ref->col};
+ vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
+ } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
+ const MV *ref = &mi->bmi[block].ref_mv[0].as_mv;
+ const MV diff = {mvs[0].as_mv.row - ref->row,
+ mvs[0].as_mv.col - ref->col};
+ vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
+ }
+}
+#else
static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
const int_mv mvs[2],
nmv_context_counts *counts) {
@@ -247,6 +291,7 @@
vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
}
}
+#endif // CONFIG_EXT_INTER
void vp10_update_mv_count(ThreadData *td) {
const MACROBLOCKD *xd = &td->mb.e_mbd;
@@ -262,12 +307,22 @@
for (idy = 0; idy < 2; idy += num_4x4_h) {
for (idx = 0; idx < 2; idx += num_4x4_w) {
const int i = idy * 2 + idx;
+
+#if CONFIG_EXT_INTER
+ if (have_newmv_in_inter_mode(mi->bmi[i].as_mode))
+ inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv, &td->counts->mv);
+#else
if (mi->bmi[i].as_mode == NEWMV)
inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv, &td->counts->mv);
+#endif // CONFIG_EXT_INTER
}
}
} else {
+#if CONFIG_EXT_INTER
+ if (have_newmv_in_inter_mode(mbmi->mode))
+#else
if (mbmi->mode == NEWMV)
+#endif // CONFIG_EXT_INTER
inc_mvs(mbmi, mbmi_ext, mbmi->mv, &td->counts->mv);
}
}
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 9e3bec4..80bf47c 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -377,10 +377,16 @@
vp10_free_ref_frame_buffers(cm->buffer_pool);
#if CONFIG_VP9_POSTPROC
vp10_free_postproc_buffers(cm);
-#endif
+#endif // CONFIG_VP9_POSTPROC
+#if CONFIG_LOOP_RESTORATION
+ vp10_free_restoration_buffers(cm);
+#endif // CONFIG_LOOP_RESTORATION
vp10_free_context_buffers(cm);
vpx_free_frame_buffer(&cpi->last_frame_uf);
+#if CONFIG_LOOP_RESTORATION
+ vpx_free_frame_buffer(&cpi->last_frame_db);
+#endif // CONFIG_LOOP_RESTORATION
vpx_free_frame_buffer(&cpi->scaled_source);
vpx_free_frame_buffer(&cpi->scaled_last_source);
vpx_free_frame_buffer(&cpi->alt_ref_buffer);
@@ -391,6 +397,9 @@
vp10_free_pc_tree(&cpi->td);
+ if (cpi->common.allow_screen_content_tools)
+ vpx_free(cpi->td.mb.palette_buffer);
+
if (cpi->source_diff_var != NULL) {
vpx_free(cpi->source_diff_var);
cpi->source_diff_var = NULL;
@@ -416,10 +425,6 @@
memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1],
MV_VALS * sizeof(*cpi->nmvcosts_hp[1]));
-#if !CONFIG_MISC_FIXES
- vp10_copy(cc->segment_pred_probs, cm->segp.pred_probs);
-#endif
-
memcpy(cpi->coding_context.last_frame_seg_map_copy,
cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols));
@@ -444,10 +449,6 @@
memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1],
MV_VALS * sizeof(*cc->nmvcosts_hp[1]));
-#if !CONFIG_MISC_FIXES
- vp10_copy(cm->segp.pred_probs, cc->segment_pred_probs);
-#endif
-
memcpy(cm->last_frame_seg_map,
cpi->coding_context.last_frame_seg_map_copy,
(cm->mi_rows * cm->mi_cols));
@@ -639,6 +640,19 @@
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate last frame buffer");
+#if CONFIG_LOOP_RESTORATION
+ if (vpx_realloc_frame_buffer(&cpi->last_frame_db,
+ cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
+ NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate last frame deblocked buffer");
+#endif // CONFIG_LOOP_RESTORATION
+
if (vpx_realloc_frame_buffer(&cpi->scaled_source,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
@@ -722,9 +736,17 @@
}
static void init_buffer_indices(VP10_COMP *cpi) {
+#if CONFIG_EXT_REFS
+ int fb_idx;
+ for (fb_idx = 0; fb_idx < LAST_REF_FRAMES; ++fb_idx)
+ cpi->lst_fb_idxes[fb_idx] = fb_idx;
+ cpi->gld_fb_idx = LAST_REF_FRAMES;
+ cpi->alt_fb_idx = cpi->gld_fb_idx + 1;
+#else
cpi->lst_fb_idx = 0;
cpi->gld_fb_idx = 1;
cpi->alt_fb_idx = 2;
+#endif // CONFIG_EXT_REFS
}
static void init_config(struct VP10_COMP *cpi, VP10EncoderConfig *oxcf) {
@@ -749,6 +771,10 @@
cpi->td.counts = &cm->counts;
// change includes all joint functionality
+#if CONFIG_EXT_REFS
+ cpi->last_ref_to_refresh = LAST_FRAME;
+#endif // CONFIG_EXT_REFS
+
vp10_change_config(cpi, oxcf);
cpi->static_mb_pct = 0;
@@ -1406,6 +1432,9 @@
void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) {
VP10_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+#if CONFIG_EXT_REFS
+ int ref_frame;
+#endif // CONFIG_EXT_REFS
if (cm->profile != oxcf->profile)
cm->profile = oxcf->profile;
@@ -1430,13 +1459,33 @@
}
cpi->refresh_golden_frame = 0;
+
+#if CONFIG_EXT_REFS
+ for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame) {
+ if (ref_frame == cpi->last_ref_to_refresh)
+ cpi->refresh_last_frames[ref_frame - LAST_FRAME] = 1;
+ else
+ cpi->refresh_last_frames[ref_frame - LAST_FRAME] = 0;
+ }
+#else
cpi->refresh_last_frame = 1;
+#endif // CONFIG_EXT_REFS
+
cm->refresh_frame_context =
oxcf->error_resilient_mode ? REFRESH_FRAME_CONTEXT_OFF :
oxcf->frame_parallel_decoding_mode ? REFRESH_FRAME_CONTEXT_FORWARD
: REFRESH_FRAME_CONTEXT_BACKWARD;
cm->reset_frame_context = RESET_FRAME_CONTEXT_NONE;
+ cm->allow_screen_content_tools = (cpi->oxcf.content == VP9E_CONTENT_SCREEN);
+ if (cm->allow_screen_content_tools) {
+ MACROBLOCK *x = &cpi->td.mb;
+ if (x->palette_buffer == 0) {
+ CHECK_MEM_ERROR(cm, x->palette_buffer,
+ vpx_memalign(16, sizeof(*x->palette_buffer)));
+ }
+ }
+
vp10_reset_segment_features(cm);
vp10_set_high_precision_mv(cpi, 0);
@@ -1628,7 +1677,6 @@
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
#if CONFIG_INTERNAL_STATS
- cpi->b_calculate_ssimg = 0;
cpi->b_calculate_blockiness = 1;
cpi->b_calculate_consistency = 1;
cpi->total_inconsistency = 0;
@@ -1641,22 +1689,12 @@
if (cpi->b_calculate_psnr) {
cpi->total_sq_error = 0;
cpi->total_samples = 0;
-
- cpi->totalp_sq_error = 0;
- cpi->totalp_samples = 0;
-
cpi->tot_recode_hits = 0;
cpi->summed_quality = 0;
cpi->summed_weights = 0;
- cpi->summedp_quality = 0;
- cpi->summedp_weights = 0;
}
- if (cpi->b_calculate_ssimg) {
- cpi->ssimg.worst= 100.0;
- }
cpi->fastssim.worst = 100.0;
-
cpi->psnrhvs.worst = 100.0;
if (cpi->b_calculate_blockiness) {
@@ -1669,7 +1707,6 @@
4 * cpi->common.mi_rows * cpi->common.mi_cols);
cpi->worst_consistency = 100.0;
}
-
#endif
cpi->first_time_stamp_ever = INT64_MAX;
@@ -1822,6 +1859,9 @@
vp10_init_quantizer(cpi);
vp10_loop_filter_init(cm);
+#if CONFIG_LOOP_RESTORATION
+ vp10_loop_restoration_precal();
+#endif // CONFIG_LOOP_RESTORATION
cm->error.setjmp = 0;
@@ -1862,14 +1902,8 @@
const double total_psnr =
vpx_sse_to_psnr((double)cpi->total_samples, peak,
(double)cpi->total_sq_error);
- const double totalp_psnr =
- vpx_sse_to_psnr((double)cpi->totalp_samples, peak,
- (double)cpi->totalp_sq_error);
const double total_ssim = 100 * pow(cpi->summed_quality /
cpi->summed_weights, 8.0);
- const double totalp_ssim = 100 * pow(cpi->summedp_quality /
- cpi->summedp_weights, 8.0);
-
snprintf(headings, sizeof(headings),
"Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
"VPXSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
@@ -1879,8 +1913,8 @@
"%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
"%7.3f\t%7.3f\t%7.3f\t%7.3f",
dr, cpi->psnr.stat[ALL] / cpi->count, total_psnr,
- cpi->psnrp.stat[ALL] / cpi->count, totalp_psnr,
- total_ssim, totalp_ssim,
+ cpi->psnr.stat[ALL] / cpi->count, total_psnr,
+ total_ssim, total_ssim,
cpi->fastssim.stat[ALL] / cpi->count,
cpi->psnrhvs.stat[ALL] / cpi->count,
cpi->psnr.worst, cpi->worst_ssim, cpi->fastssim.worst,
@@ -1894,7 +1928,7 @@
if (cpi->b_calculate_consistency) {
double consistency =
- vpx_sse_to_psnr((double)cpi->totalp_samples, peak,
+ vpx_sse_to_psnr((double)cpi->total_samples, peak,
(double)cpi->total_inconsistency);
SNPRINT(headings, "\tConsist\tWstCons");
@@ -1902,12 +1936,6 @@
SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
}
- if (cpi->b_calculate_ssimg) {
- SNPRINT(headings, "\t SSIMG\tWtSSIMG");
- SNPRINT2(results, "\t%7.3f", cpi->ssimg.stat[ALL] / cpi->count);
- SNPRINT2(results, "\t%7.3f", cpi->ssimg.worst);
- }
-
fprintf(f, "%s\t Time\n", headings);
fprintf(f, "%s\t%8.0f\n", results, total_encode_time);
}
@@ -1942,6 +1970,8 @@
// Deallocate allocated thread data.
if (t < cpi->num_workers - 1) {
+ if (cpi->common.allow_screen_content_tools)
+ vpx_free(thread_data->td->mb.palette_buffer);
vpx_free(thread_data->td->counts);
vp10_free_pc_tree(thread_data->td);
vpx_free(thread_data->td);
@@ -2277,7 +2307,7 @@
}
int vp10_use_as_reference(VP10_COMP *cpi, int ref_frame_flags) {
- if (ref_frame_flags > 7)
+ if (ref_frame_flags > ((1 << REFS_PER_FRAME) - 1))
return -1;
cpi->ref_frame_flags = ref_frame_flags;
@@ -2287,7 +2317,14 @@
void vp10_update_reference(VP10_COMP *cpi, int ref_frame_flags) {
cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0;
cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0;
+#if CONFIG_EXT_REFS
+ cpi->ext_refresh_last_frames[0] = (ref_frame_flags & VP9_LAST_FLAG) != 0;
+ cpi->ext_refresh_last_frames[1] = (ref_frame_flags & VP9_LAST2_FLAG) != 0;
+ cpi->ext_refresh_last_frames[2] = (ref_frame_flags & VP9_LAST3_FLAG) != 0;
+ cpi->ext_refresh_last_frames[3] = (ref_frame_flags & VP9_LAST4_FLAG) != 0;
+#else
cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0;
+#endif // CONFIG_EXT_REFS
cpi->ext_refresh_frame_flags_pending = 1;
}
@@ -2296,6 +2333,14 @@
MV_REFERENCE_FRAME ref_frame = NONE;
if (ref_frame_flag == VP9_LAST_FLAG)
ref_frame = LAST_FRAME;
+#if CONFIG_EXT_REFS
+ else if (ref_frame_flag == VP9_LAST2_FLAG)
+ ref_frame = LAST2_FRAME;
+ else if (ref_frame_flag == VP9_LAST3_FLAG)
+ ref_frame = LAST3_FRAME;
+ else if (ref_frame_flag == VP9_LAST4_FLAG)
+ ref_frame = LAST4_FRAME;
+#endif // CONFIG_EXT_REFS
else if (ref_frame_flag == VP9_GOLD_FLAG)
ref_frame = GOLDEN_FRAME;
else if (ref_frame_flag == VP9_ALT_FLAG)
@@ -2577,6 +2622,9 @@
void vp10_update_reference_frames(VP10_COMP *cpi) {
VP10_COMMON * const cm = &cpi->common;
BufferPool *const pool = cm->buffer_pool;
+#if CONFIG_EXT_REFS
+ int ref_frame;
+#endif // CONFIG_EXT_REFS
// At this point the new frame has been encoded.
// If any buffer copy / swapping is signaled it should be done here.
@@ -2631,22 +2679,49 @@
}
}
+#if CONFIG_EXT_REFS
+ for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame) {
+ const int ref_idx = ref_frame - LAST_FRAME;
+ if (cpi->refresh_last_frames[ref_idx]) {
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->lst_fb_idxes[ref_idx]],
+ cm->new_fb_idx);
+ if (!cpi->rc.is_src_frame_alt_ref) {
+ memcpy(cpi->interp_filter_selected[ref_frame],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+ }
+ }
+ }
+ // NOTE: The order for the refreshing of the 4 last reference frames are:
+ // LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME -> LAST4_FRAME -> LAST_FRAME
+ cpi->last_ref_to_refresh += 1;
+ if (cpi->last_ref_to_refresh == LAST4_FRAME)
+ cpi->last_ref_to_refresh = LAST_FRAME;
+#else
if (cpi->refresh_last_frame) {
ref_cnt_fb(pool->frame_bufs,
&cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
- if (!cpi->rc.is_src_frame_alt_ref)
+ if (!cpi->rc.is_src_frame_alt_ref) {
memcpy(cpi->interp_filter_selected[LAST_FRAME],
cpi->interp_filter_selected[0],
sizeof(cpi->interp_filter_selected[0]));
+ }
}
+#endif // CONFIG_EXT_REFS
+
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0) {
vp10_denoiser_update_frame_info(&cpi->denoiser,
*cpi->Source,
cpi->common.frame_type,
+#if CONFIG_EXT_REFS
+ cpi->refresh_last_frames,
+#else
+ cpi->refresh_last_frame,
+#endif // CONFIG_EXT_REFS
cpi->refresh_alt_ref_frame,
- cpi->refresh_golden_frame,
- cpi->refresh_last_frame);
+ cpi->refresh_golden_frame);
}
#endif
}
@@ -2670,6 +2745,9 @@
}
if (lf->filter_level > 0) {
+#if CONFIG_VAR_TX
+ vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
+#else
if (cpi->num_workers > 1)
vp10_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
lf->filter_level, 0, 0,
@@ -2677,7 +2755,14 @@
&cpi->lf_row_sync);
else
vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
+#endif
}
+#if CONFIG_LOOP_RESTORATION
+ vp10_loop_restoration_init(&cm->rst_info, cm->lf.restoration_level,
+ cm->frame_type == KEY_FRAME);
+ if (cm->rst_info.restoration_used)
+ vp10_loop_restoration_rows(cm->frame_to_show, cm, 0, cm->mi_rows, 0);
+#endif // CONFIG_LOOP_RESTORATION
vpx_extend_frame_inner_borders(cm->frame_to_show);
}
@@ -2700,7 +2785,16 @@
void vp10_scale_references(VP10_COMP *cpi) {
VP10_COMMON *cm = &cpi->common;
MV_REFERENCE_FRAME ref_frame;
- const VP9_REFFRAME ref_mask[3] = {VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG};
+ const VP9_REFFRAME ref_mask[REFS_PER_FRAME] = {
+ VP9_LAST_FLAG,
+#if CONFIG_EXT_REFS
+ VP9_LAST2_FLAG,
+ VP9_LAST3_FLAG,
+ VP9_LAST4_FLAG,
+#endif // CONFIG_EXT_REFS
+ VP9_GOLD_FLAG,
+ VP9_ALT_FLAG
+ };
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
// Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1).
@@ -2785,10 +2879,18 @@
if (cpi->oxcf.pass == 0) {
// Only release scaled references under certain conditions:
// if reference will be updated, or if scaled reference has same resolution.
- int refresh[3];
+ int refresh[REFS_PER_FRAME];
+#if CONFIG_EXT_REFS
+ for (i = LAST_FRAME; i <= LAST4_FRAME; ++i)
+ refresh[i - LAST_FRAME] =
+ (cpi->refresh_last_frames[i - LAST_FRAME]) ? 1 : 0;
+ refresh[4] = (cpi->refresh_golden_frame) ? 1 : 0;
+ refresh[5] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
+#else
refresh[0] = (cpi->refresh_last_frame) ? 1 : 0;
refresh[1] = (cpi->refresh_golden_frame) ? 1 : 0;
refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
+#endif // CONFIG_EXT_REFS
for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
const int idx = cpi->scaled_ref_idx[i - 1];
RefCntBuffer *const buf = idx != INVALID_IDX ?
@@ -3060,7 +3162,7 @@
init_motion_estimation(cpi);
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
+ RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - LAST_FRAME];
const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
ref_buf->idx = buf_idx;
@@ -3249,7 +3351,6 @@
if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
save_coding_context(cpi);
vp10_pack_bitstream(cpi, dest, size);
-
rc->projected_frame_size = (int)(*size) << 3;
restore_coding_context(cpi);
@@ -3422,12 +3523,45 @@
static int get_ref_frame_flags(const VP10_COMP *cpi) {
const int *const map = cpi->common.ref_frame_map;
- const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
- const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
- const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
- int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
- if (gold_is_last)
+#if CONFIG_EXT_REFS
+ const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[0]];
+ const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[0]];
+
+ const int last2_is_last =
+ map[cpi->lst_fb_idxes[1]] == map[cpi->lst_fb_idxes[0]];
+ const int gld_is_last2 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[1]];
+ const int alt_is_last2 = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[1]];
+
+ const int last3_is_last =
+ map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[0]];
+ const int last3_is_last2 =
+ map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[1]];
+ const int gld_is_last3 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[2]];
+ const int alt_is_last3 = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[2]];
+
+ const int last4_is_last =
+ map[cpi->lst_fb_idxes[3]] == map[cpi->lst_fb_idxes[0]];
+ const int last4_is_last2 =
+ map[cpi->lst_fb_idxes[3]] == map[cpi->lst_fb_idxes[1]];
+ const int last4_is_last3 =
+ map[cpi->lst_fb_idxes[3]] == map[cpi->lst_fb_idxes[2]];
+ const int gld_is_last4 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[3]];
+ const int alt_is_last4 = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[3]];
+#else
+ const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
+ const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
+#endif // CONFIG_EXT_REFS
+ const int gld_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
+
+ int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
+#if CONFIG_EXT_REFS
+ flags |= VP9_LAST2_FLAG;
+ flags |= VP9_LAST3_FLAG;
+ flags |= VP9_LAST4_FLAG;
+#endif // CONFIG_EXT_REFS
+
+ if (gld_is_last)
flags &= ~VP9_GOLD_FLAG;
if (cpi->rc.frames_till_gf_update_due == INT_MAX)
@@ -3436,9 +3570,26 @@
if (alt_is_last)
flags &= ~VP9_ALT_FLAG;
- if (gold_is_alt)
+ if (gld_is_alt)
flags &= ~VP9_ALT_FLAG;
+#if CONFIG_EXT_REFS
+ if (last4_is_last || last4_is_last2 || last4_is_last3)
+ flags &= ~VP9_LAST4_FLAG;
+
+ if (last3_is_last || last3_is_last2)
+ flags &= ~VP9_LAST3_FLAG;
+
+ if (last2_is_last)
+ flags &= ~VP9_LAST2_FLAG;
+
+ if (gld_is_last4 || gld_is_last3 || gld_is_last2)
+ flags &= ~VP9_GOLD_FLAG;
+
+ if (alt_is_last4 || alt_is_last3 || alt_is_last2)
+ flags &= ~VP9_ALT_FLAG;
+#endif // CONFIG_EXT_REFS
+
return flags;
}
@@ -3452,7 +3603,15 @@
cpi->ext_refresh_frame_context_pending = 0;
}
if (cpi->ext_refresh_frame_flags_pending) {
+#if CONFIG_EXT_REFS
+ int ref_frame;
+ for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame) {
+ cpi->refresh_last_frames[ref_frame - LAST_FRAME] =
+ cpi->ext_refresh_last_frames[ref_frame - LAST_FRAME];
+ }
+#else
cpi->refresh_last_frame = cpi->ext_refresh_last_frame;
+#endif // CONFIG_EXT_REFS
cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame;
cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame;
cpi->ext_refresh_frame_flags_pending = 0;
@@ -3515,12 +3674,23 @@
cpi->refresh_alt_ref_frame)
return mask;
for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
- for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter)
+ for (ifilter = EIGHTTAP; ifilter < SWITCHABLE_FILTERS; ++ifilter)
ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
- for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) {
+ for (ifilter = EIGHTTAP; ifilter < SWITCHABLE_FILTERS; ++ifilter) {
if ((ref_total[LAST_FRAME] &&
cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) &&
+#if CONFIG_EXT_REFS
+ (ref_total[LAST2_FRAME] == 0 ||
+ cpi->interp_filter_selected[LAST2_FRAME][ifilter] * 50
+ < ref_total[LAST2_FRAME]) &&
+ (ref_total[LAST3_FRAME] == 0 ||
+ cpi->interp_filter_selected[LAST3_FRAME][ifilter] * 50
+ < ref_total[LAST3_FRAME]) &&
+ (ref_total[LAST4_FRAME] == 0 ||
+ cpi->interp_filter_selected[LAST4_FRAME][ifilter] * 50
+ < ref_total[LAST4_FRAME]) &&
+#endif // CONFIG_EXT_REFS
(ref_total[GOLDEN_FRAME] == 0 ||
cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50
< ref_total[GOLDEN_FRAME]) &&
@@ -3637,8 +3807,16 @@
}
// If the encoder forced a KEY_FRAME decision
- if (cm->frame_type == KEY_FRAME)
+ if (cm->frame_type == KEY_FRAME) {
+#if CONFIG_EXT_REFS
+ int ref_frame;
+ for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame)
+ cpi->refresh_last_frames[ref_frame - LAST_FRAME] = 1;
+ cpi->last_ref_to_refresh = LAST_FRAME;
+#else
cpi->refresh_last_frame = 1;
+#endif // CONFIG_EXT_REFS
+ }
cm->frame_to_show = get_frame_new_buffer(cm);
cm->frame_to_show->color_space = cm->color_space;
@@ -3666,12 +3844,7 @@
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
vp10_adapt_coef_probs(cm);
-#if CONFIG_MISC_FIXES
vp10_adapt_intra_frame_probs(cm);
-#else
- if (!frame_is_intra_only(cm))
- vp10_adapt_intra_frame_probs(cm);
-#endif
}
if (!frame_is_intra_only(cm)) {
@@ -3693,7 +3866,17 @@
cpi->ref_frame_flags = get_ref_frame_flags(cpi);
+#if CONFIG_EXT_REFS
+ cm->last3_frame_type = cm->last2_frame_type;
+ cm->last2_frame_type = cm->last_frame_type;
+#endif // CONFIG_EXT_REFS
cm->last_frame_type = cm->frame_type;
+#if CONFIG_LOOP_RESTORATION
+ if (cm->frame_type != KEY_FRAME)
+ cm->lf.last_restoration_level = cm->lf.restoration_level;
+ else
+ cm->lf.last_restoration_level = 0;
+#endif // CONFIG_LOOP_RESTORATION
vp10_rc_postencode_update(cpi, *size);
@@ -3814,7 +3997,7 @@
const int subsampling_x = sd->subsampling_x;
const int subsampling_y = sd->subsampling_y;
#if CONFIG_VP9_HIGHBITDEPTH
- const int use_highbitdepth = sd->flags & YV12_FLAG_HIGHBITDEPTH;
+ const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
#else
check_initial_width(cpi, subsampling_x, subsampling_y);
@@ -3855,7 +4038,14 @@
const VP10_COMMON *cm = &cpi->common;
return cm->frame_type == KEY_FRAME ||
+#if CONFIG_EXT_REFS
+ cpi->refresh_last_frames[LAST_FRAME - LAST_FRAME] ||
+ cpi->refresh_last_frames[LAST2_FRAME - LAST_FRAME] ||
+ cpi->refresh_last_frames[LAST3_FRAME - LAST_FRAME] ||
+ cpi->refresh_last_frames[LAST4_FRAME - LAST_FRAME] ||
+#else
cpi->refresh_last_frame ||
+#endif // CONFIG_EXT_REFS
cpi->refresh_golden_frame ||
cpi->refresh_alt_ref_frame ||
cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_OFF ||
@@ -3935,12 +4125,21 @@
}
if (rc->is_src_frame_alt_ref) {
+#if CONFIG_EXT_REFS
+ int ref_frame;
+#endif // CONFIG_EXT_REFS
+
// Current frame is an ARF overlay frame.
cpi->alt_ref_source = NULL;
// Don't refresh the last buffer for an ARF overlay frame. It will
// become the GF so preserve last as an alternative prediction option.
+#if CONFIG_EXT_REFS
+ for (ref_frame = LAST_FRAME; ref_frame <= LAST4_FRAME; ++ref_frame)
+ cpi->refresh_last_frames[ref_frame - LAST_FRAME] = 0;
+#else
cpi->refresh_last_frame = 0;
+#endif // CONFIG_EXT_REFS
}
}
@@ -3957,6 +4156,103 @@
s->stat[ALL] += all;
s->worst = VPXMIN(s->worst, all);
}
+
+static void compute_internal_stats(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ double samples = 0.0;
+ uint32_t in_bit_depth = 8;
+ uint32_t bit_depth = 8;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ in_bit_depth = cpi->oxcf.input_bit_depth;
+ bit_depth = cm->bit_depth;
+ }
+#endif
+ if (cm->show_frame) {
+ const YV12_BUFFER_CONFIG *orig = cpi->Source;
+ const YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
+ double y, u, v, frame_all;
+
+ cpi->count++;
+ if (cpi->b_calculate_psnr) {
+ PSNR_STATS psnr;
+ double frame_ssim2 = 0.0, weight = 0.0;
+ vpx_clear_system_state();
+ // TODO(yaowu): unify these two versions into one.
+#if CONFIG_VP9_HIGHBITDEPTH
+ calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd, in_bit_depth);
+#else
+ calc_psnr(orig, recon, &psnr);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3],
+ psnr.psnr[0], &cpi->psnr);
+ cpi->total_sq_error += psnr.sse[0];
+ cpi->total_samples += psnr.samples[0];
+ samples = psnr.samples[0];
+ // TODO(yaowu): unify these two versions into one.
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth)
+ frame_ssim2 = vpx_highbd_calc_ssim(orig, recon, &weight, bit_depth);
+ else
+ frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
+#else
+ frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ cpi->worst_ssim= VPXMIN(cpi->worst_ssim, frame_ssim2);
+ cpi->summed_quality += frame_ssim2 * weight;
+ cpi->summed_weights += weight;
+
+#if 0
+ {
+ FILE *f = fopen("q_used.stt", "a");
+ fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
+ cpi->common.current_video_frame, y2, u2, v2,
+ frame_psnr2, frame_ssim2);
+ fclose(f);
+ }
+#endif
+ }
+ if (cpi->b_calculate_blockiness) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!cm->use_highbitdepth)
+#endif
+ {
+ const double frame_blockiness = vp10_get_blockiness(
+ orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride,
+ orig->y_width, orig->y_height);
+ cpi->worst_blockiness = VPXMAX(cpi->worst_blockiness, frame_blockiness);
+ cpi->total_blockiness += frame_blockiness;
+ }
+
+ if (cpi->b_calculate_consistency) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!cm->use_highbitdepth)
+#endif
+ {
+ const double this_inconsistency = vpx_get_ssim_metrics(
+ orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride,
+ orig->y_width, orig->y_height, cpi->ssim_vars, &cpi->metrics, 1);
+
+ const double peak = (double)((1 << in_bit_depth) - 1);
+ const double consistency = vpx_sse_to_psnr(
+ samples, peak, cpi->total_inconsistency);
+ if (consistency > 0.0)
+ cpi->worst_consistency =
+ VPXMIN(cpi->worst_consistency, consistency);
+ cpi->total_inconsistency += this_inconsistency;
+ }
+ }
+ }
+
+ frame_all = vpx_calc_fastssim(orig, recon, &y, &u, &v, bit_depth);
+ adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
+ frame_all = vpx_psnrhvs(orig, recon, &y, &u, &v, bit_depth);
+ adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
+ }
+}
#endif // CONFIG_INTERNAL_STATS
int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
@@ -3991,7 +4287,16 @@
oxcf->frame_parallel_decoding_mode ? REFRESH_FRAME_CONTEXT_FORWARD
: REFRESH_FRAME_CONTEXT_BACKWARD;
+#if CONFIG_EXT_REFS
+ for (i = LAST_FRAME; i <= LAST4_FRAME; ++i) {
+ if (i == cpi->last_ref_to_refresh)
+ cpi->refresh_last_frames[i - LAST_FRAME] = 1;
+ else
+ cpi->refresh_last_frames[i - LAST_FRAME] = 0;
+ }
+#else
cpi->refresh_last_frame = 1;
+#endif // CONFIG_EXT_REFS
cpi->refresh_golden_frame = 0;
cpi->refresh_alt_ref_frame = 0;
@@ -4015,12 +4320,15 @@
cm->intra_only = 0;
cpi->refresh_alt_ref_frame = 1;
cpi->refresh_golden_frame = 0;
+#if CONFIG_EXT_REFS
+ for (i = LAST_FRAME; i <= LAST4_FRAME; ++i)
+ cpi->refresh_last_frames[i - LAST_FRAME] = 0;
+#else
cpi->refresh_last_frame = 0;
+#endif // CONFIG_EXT_REFS
rc->is_src_frame_alt_ref = 0;
- rc->source_alt_ref_pending = 0;
- } else {
- rc->source_alt_ref_pending = 0;
}
+ rc->source_alt_ref_pending = 0;
}
if (!source) {
@@ -4140,177 +4448,11 @@
generate_psnr_packet(cpi);
#if CONFIG_INTERNAL_STATS
-
if (oxcf->pass != 1) {
- double samples = 0.0;
+ compute_internal_stats(cpi);
cpi->bytes += (int)(*size);
-
- if (cm->show_frame) {
- cpi->count++;
-
- if (cpi->b_calculate_psnr) {
- YV12_BUFFER_CONFIG *orig = cpi->Source;
- YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
- YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
- PSNR_STATS psnr;
-#if CONFIG_VP9_HIGHBITDEPTH
- calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd,
- cpi->oxcf.input_bit_depth);
-#else
- calc_psnr(orig, recon, &psnr);
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
- adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3],
- psnr.psnr[0], &cpi->psnr);
- cpi->total_sq_error += psnr.sse[0];
- cpi->total_samples += psnr.samples[0];
- samples = psnr.samples[0];
-
- {
- PSNR_STATS psnr2;
- double frame_ssim2 = 0, weight = 0;
-#if CONFIG_VP9_POSTPROC
- if (vpx_alloc_frame_buffer(&cm->post_proc_buffer,
- recon->y_crop_width, recon->y_crop_height,
- cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- VP9_ENC_BORDER_IN_PIXELS,
- cm->byte_alignment) < 0) {
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate post processing buffer");
- }
-
- vp10_deblock(cm->frame_to_show, &cm->post_proc_buffer,
- cm->lf.filter_level * 10 / 6);
-#endif
- vpx_clear_system_state();
-
-#if CONFIG_VP9_HIGHBITDEPTH
- calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd,
- cpi->oxcf.input_bit_depth);
-#else
- calc_psnr(orig, pp, &psnr2);
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
- cpi->totalp_sq_error += psnr2.sse[0];
- cpi->totalp_samples += psnr2.samples[0];
- adjust_image_stat(psnr2.psnr[1], psnr2.psnr[2], psnr2.psnr[3],
- psnr2.psnr[0], &cpi->psnrp);
-
-#if CONFIG_VP9_HIGHBITDEPTH
- if (cm->use_highbitdepth) {
- frame_ssim2 = vpx_highbd_calc_ssim(orig, recon, &weight,
- (int)cm->bit_depth);
- } else {
- frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
- }
-#else
- frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
- cpi->worst_ssim= VPXMIN(cpi->worst_ssim, frame_ssim2);
- cpi->summed_quality += frame_ssim2 * weight;
- cpi->summed_weights += weight;
-
-#if CONFIG_VP9_HIGHBITDEPTH
- if (cm->use_highbitdepth) {
- frame_ssim2 = vpx_highbd_calc_ssim(
- orig, &cm->post_proc_buffer, &weight, (int)cm->bit_depth);
- } else {
- frame_ssim2 = vpx_calc_ssim(orig, &cm->post_proc_buffer, &weight);
- }
-#else
- frame_ssim2 = vpx_calc_ssim(orig, &cm->post_proc_buffer, &weight);
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
- cpi->summedp_quality += frame_ssim2 * weight;
- cpi->summedp_weights += weight;
-#if 0
- {
- FILE *f = fopen("q_used.stt", "a");
- fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
- cpi->common.current_video_frame, y2, u2, v2,
- frame_psnr2, frame_ssim2);
- fclose(f);
- }
-#endif
- }
- }
- if (cpi->b_calculate_blockiness) {
-#if CONFIG_VP9_HIGHBITDEPTH
- if (!cm->use_highbitdepth)
-#endif
- {
- double frame_blockiness = vp10_get_blockiness(
- cpi->Source->y_buffer, cpi->Source->y_stride,
- cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
- cpi->Source->y_width, cpi->Source->y_height);
- cpi->worst_blockiness =
- VPXMAX(cpi->worst_blockiness, frame_blockiness);
- cpi->total_blockiness += frame_blockiness;
- }
- }
-
- if (cpi->b_calculate_consistency) {
-#if CONFIG_VP9_HIGHBITDEPTH
- if (!cm->use_highbitdepth)
-#endif
- {
- double this_inconsistency = vpx_get_ssim_metrics(
- cpi->Source->y_buffer, cpi->Source->y_stride,
- cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
- cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
- &cpi->metrics, 1);
-
- const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
- double consistency = vpx_sse_to_psnr(samples, peak,
- (double)cpi->total_inconsistency);
- if (consistency > 0.0)
- cpi->worst_consistency =
- VPXMIN(cpi->worst_consistency, consistency);
- cpi->total_inconsistency += this_inconsistency;
- }
- }
-
- if (cpi->b_calculate_ssimg) {
- double y, u, v, frame_all;
-#if CONFIG_VP9_HIGHBITDEPTH
- if (cm->use_highbitdepth) {
- frame_all = vpx_highbd_calc_ssimg(cpi->Source, cm->frame_to_show, &y,
- &u, &v, (int)cm->bit_depth);
- } else {
- frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u,
- &v);
- }
-#else
- frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v);
-#endif // CONFIG_VP9_HIGHBITDEPTH
- adjust_image_stat(y, u, v, frame_all, &cpi->ssimg);
- }
-#if CONFIG_VP9_HIGHBITDEPTH
- if (!cm->use_highbitdepth)
-#endif
- {
- double y, u, v, frame_all;
- frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
- &v);
- adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
- /* TODO(JBB): add 10/12 bit support */
- }
-#if CONFIG_VP9_HIGHBITDEPTH
- if (!cm->use_highbitdepth)
-#endif
- {
- double y, u, v, frame_all;
- frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v);
- adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
- }
- }
}
#endif
-
vpx_clear_system_state();
return 0;
}
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index bd6a009..8faf4ed 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -55,10 +55,6 @@
int nmvcosts[2][MV_VALS];
int nmvcosts_hp[2][MV_VALS];
-#if !CONFIG_MISC_FIXES
- vpx_prob segment_pred_probs[PREDICTION_PROBS];
-#endif
-
unsigned char *last_frame_seg_map_copy;
// 0 = Intra, Last, GF, ARF
@@ -308,17 +304,33 @@
// For a still frame, this flag is set to 1 to skip partition search.
int partition_search_skippable_frame;
+#if CONFIG_EXT_REFS
+ int last_ref_to_refresh;
+#endif // CONFIG_EXT_REFS
+
int scaled_ref_idx[MAX_REF_FRAMES];
+#if CONFIG_EXT_REFS
+ int lst_fb_idxes[LAST_REF_FRAMES];
+#else
int lst_fb_idx;
+#endif // CONFIG_EXT_REFS
int gld_fb_idx;
int alt_fb_idx;
+#if CONFIG_EXT_REFS
+ int refresh_last_frames[LAST_REF_FRAMES];
+#else
int refresh_last_frame;
+#endif // CONFIG_EXT_REFS
int refresh_golden_frame;
int refresh_alt_ref_frame;
int ext_refresh_frame_flags_pending;
+#if CONFIG_EXT_REFS
+ int ext_refresh_last_frames[LAST_REF_FRAMES];
+#else
int ext_refresh_last_frame;
+#endif // CONFIG_EXT_REFS
int ext_refresh_golden_frame;
int ext_refresh_alt_ref_frame;
@@ -326,6 +338,9 @@
int ext_refresh_frame_context;
YV12_BUFFER_CONFIG last_frame_uf;
+#if CONFIG_LOOP_RESTORATION
+ YV12_BUFFER_CONFIG last_frame_db;
+#endif // CONFIG_LOOP_RESTORATION
TOKENEXTRA *tile_tok[4][1 << 6];
unsigned int tok_count[4][1 << 6];
@@ -406,28 +421,19 @@
uint64_t total_samples;
ImageStat psnr;
- uint64_t totalp_sq_error;
- uint64_t totalp_samples;
- ImageStat psnrp;
-
double total_blockiness;
double worst_blockiness;
int bytes;
double summed_quality;
double summed_weights;
- double summedp_quality;
- double summedp_weights;
unsigned int tot_recode_hits;
double worst_ssim;
- ImageStat ssimg;
ImageStat fastssim;
ImageStat psnrhvs;
- int b_calculate_ssimg;
int b_calculate_blockiness;
-
int b_calculate_consistency;
double total_inconsistency;
@@ -456,19 +462,48 @@
search_site_config ss_cfg;
- int mbmode_cost[INTRA_MODES];
+ int mbmode_cost[BLOCK_SIZE_GROUPS][INTRA_MODES];
+#if CONFIG_REF_MV
+ int newmv_mode_cost[NEWMV_MODE_CONTEXTS][2];
+ int zeromv_mode_cost[ZEROMV_MODE_CONTEXTS][2];
+ int refmv_mode_cost[REFMV_MODE_CONTEXTS][2];
+ int drl_mode_cost0[DRL_MODE_CONTEXTS][2];
+ int drl_mode_cost1[DRL_MODE_CONTEXTS][2];
+#if CONFIG_EXT_INTER
+ int new2mv_mode_cost[2];
+#endif // CONFIG_EXT_INTER
+#endif
+
unsigned int inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
+#if CONFIG_EXT_INTER
+ unsigned int inter_compound_mode_cost[INTER_MODE_CONTEXTS]
+ [INTER_COMPOUND_MODES];
+#endif // CONFIG_EXT_INTER
int intra_uv_mode_cost[INTRA_MODES][INTRA_MODES];
int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
+ int palette_y_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
+ int palette_uv_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
+ int palette_y_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS]
+ [PALETTE_COLORS];
+ int palette_uv_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS]
+ [PALETTE_COLORS];
+#if CONFIG_EXT_TX
+ int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
+ int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
+ [TX_TYPES];
+#else
+ int intra_tx_type_costs[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
+ int inter_tx_type_costs[EXT_TX_SIZES][TX_TYPES];
+#endif // CONFIG_EXT_TX
+#if CONFIG_EXT_INTRA
+ int intra_filter_cost[INTRA_FILTERS + 1][INTRA_FILTERS];
+#endif // CONFIG_EXT_INTRA
int multi_arf_allowed;
int multi_arf_enabled;
int multi_arf_last_grp_enabled;
-
- int intra_tx_type_costs[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
- int inter_tx_type_costs[EXT_TX_SIZES][TX_TYPES];
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_DENOISER denoiser;
#endif
@@ -549,13 +584,17 @@
static INLINE int get_ref_frame_map_idx(const VP10_COMP *cpi,
MV_REFERENCE_FRAME ref_frame) {
- if (ref_frame == LAST_FRAME) {
+#if CONFIG_EXT_REFS
+ if (ref_frame >= LAST_FRAME && ref_frame <= LAST4_FRAME)
+ return cpi->lst_fb_idxes[ref_frame - 1];
+#else
+ if (ref_frame == LAST_FRAME)
return cpi->lst_fb_idx;
- } else if (ref_frame == GOLDEN_FRAME) {
+#endif // CONFIG_EXT_REFS
+ else if (ref_frame == GOLDEN_FRAME)
return cpi->gld_fb_idx;
- } else {
+ else
return cpi->alt_fb_idx;
- }
}
static INLINE int get_ref_frame_buf_idx(const VP10_COMP *const cpi,
diff --git a/vp10/encoder/ethread.c b/vp10/encoder/ethread.c
index ad47ccf..6cb9494 100644
--- a/vp10/encoder/ethread.c
+++ b/vp10/encoder/ethread.c
@@ -133,6 +133,13 @@
memcpy(thread_data->td->counts, &cpi->common.counts,
sizeof(cpi->common.counts));
}
+
+ // Allocate buffers used by palette coding mode.
+ if (cpi->common.allow_screen_content_tools && i < num_workers - 1) {
+ MACROBLOCK *x = &thread_data->td->mb;
+ CHECK_MEM_ERROR(cm, x->palette_buffer,
+ vpx_memalign(16, sizeof(*x->palette_buffer)));
+ }
}
// Encode a frame
diff --git a/vp10/encoder/firstpass.c b/vp10/encoder/firstpass.c
index bc1ce00..0404e27 100644
--- a/vp10/encoder/firstpass.c
+++ b/vp10/encoder/firstpass.c
@@ -1044,8 +1044,13 @@
((twopass->this_frame_stats.intra_error /
DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) {
if (gld_yv12 != NULL) {
+#if CONFIG_EXT_REFS
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
+ cm->ref_frame_map[cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]]);
+#else
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
cm->ref_frame_map[cpi->lst_fb_idx]);
+#endif // CONFIG_EXT_REFS
}
twopass->sr_update_lag = 1;
} else {
@@ -1055,14 +1060,25 @@
vpx_extend_frame_borders(new_yv12);
// The frame we just compressed now becomes the last frame.
+#if CONFIG_EXT_REFS
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]],
+ cm->new_fb_idx);
+#else
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
cm->new_fb_idx);
+#endif // CONFIG_EXT_REFS
// Special case for the first frame. Copy into the GF buffer as a second
// reference.
if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX) {
+#if CONFIG_EXT_REFS
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
+ cm->ref_frame_map[cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]]);
+#else
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
cm->ref_frame_map[cpi->lst_fb_idx]);
+#endif // CONFIG_EXT_REFS
}
// Use this to see what the first pass reconstruction looks like.
@@ -2384,28 +2400,48 @@
cpi->rc.is_src_frame_alt_ref = 0;
switch (twopass->gf_group.update_type[twopass->gf_group.index]) {
case KF_UPDATE:
+#if CONFIG_EXT_REFS
+ cpi->refresh_last_frames[LAST_FRAME - LAST_FRAME] = 1;
+#else
cpi->refresh_last_frame = 1;
+#endif // CONFIG_EXT_REFS
cpi->refresh_golden_frame = 1;
cpi->refresh_alt_ref_frame = 1;
break;
case LF_UPDATE:
+#if CONFIG_EXT_REFS
+ cpi->refresh_last_frames[LAST_FRAME - LAST_FRAME] = 1;
+#else
cpi->refresh_last_frame = 1;
+#endif // CONFIG_EXT_REFS
cpi->refresh_golden_frame = 0;
cpi->refresh_alt_ref_frame = 0;
break;
case GF_UPDATE:
+#if CONFIG_EXT_REFS
+ cpi->refresh_last_frames[LAST_FRAME - LAST_FRAME] = 1;
+#else
cpi->refresh_last_frame = 1;
+#endif // CONFIG_EXT_REFS
cpi->refresh_golden_frame = 1;
cpi->refresh_alt_ref_frame = 0;
break;
case OVERLAY_UPDATE:
+#if CONFIG_EXT_REFS
+ cpi->refresh_last_frames[LAST_FRAME - LAST_FRAME] = 0;
+#else
cpi->refresh_last_frame = 0;
+#endif // CONFIG_EXT_REFS
cpi->refresh_golden_frame = 1;
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_src_frame_alt_ref = 1;
break;
case ARF_UPDATE:
+#if CONFIG_EXT_REFS
+ cpi->refresh_last_frames[LAST_FRAME - LAST_FRAME] = 0;
+#else
cpi->refresh_last_frame = 0;
+#endif // CONFIG_EXT_REFS
cpi->refresh_golden_frame = 0;
cpi->refresh_alt_ref_frame = 1;
break;
diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c
new file mode 100644
index 0000000..0f59259
--- /dev/null
+++ b/vp10/encoder/hybrid_fwd_txfm.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "vp10/common/idct.h"
+#include "vp10/encoder/hybrid_fwd_txfm.h"
+
+static INLINE void fdct32x32(int rd_transform, const int16_t *src,
+ tran_low_t *dst, int src_stride) {
+ if (rd_transform)
+ vpx_fdct32x32_rd(src, dst, src_stride);
+ else
+ vpx_fdct32x32(src, dst, src_stride);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
+ tran_low_t *dst, int src_stride) {
+ if (rd_transform)
+ vpx_highbd_fdct32x32_rd(src, dst, src_stride);
+ else
+ vpx_highbd_fdct32x32(src, dst, src_stride);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_EXT_TX
+// Forward identity transform.
+static void fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride,
+ int bs) {
+ int r, c;
+ const int shift = bs < 32 ? 3 : 2;
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift;
+ src_diff += stride;
+ coeff += bs;
+ }
+}
+#endif // CONFIG_EXT_TX
+
+void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type, int lossless) {
+ if (lossless) {
+ assert(tx_type == DCT_DCT);
+ vp10_fwht4x4(src_diff, coeff, diff_stride);
+ return;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 4);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL)
+ vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ else // FWD_TXFM_OPT_DC
+ vpx_fdct8x8_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 8);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL)
+ vp10_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ else // FWD_TXFM_OPT_DC
+ vpx_fdct16x16_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 16);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
+ tran_low_t *coeff, int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ switch (tx_type) {
+ case DCT_DCT:
+ if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL)
+ fdct32x32(rd_transform, src_diff, coeff, diff_stride);
+ else // FWD_TXFM_OPT_DC
+ vpx_fdct32x32_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 32);
+ break;
+#endif // CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ assert(0);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type, int lossless) {
+ if (lossless) {
+ assert(tx_type == DCT_DCT);
+ vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
+ return;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 4);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void)fwd_txfm_opt;
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL)
+ vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ else // FWD_TXFM_OPT_DC
+ vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 8);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void)fwd_txfm_opt;
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL)
+ vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ else // FWD_TXFM_OPT_DC
+ vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ case DST_FLIPADST:
+ case FLIPADST_DST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 16);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
+ tran_low_t *coeff, int diff_stride,
+ TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt) {
+ switch (tx_type) {
+ case DCT_DCT:
+ if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL)
+ highbd_fdct32x32(rd_transform, src_diff, coeff, diff_stride);
+ else // FWD_TXFM_OPT_DC
+ vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case IDTX:
+ fwd_idtx_c(src_diff, coeff, diff_stride, 32);
+ break;
+#endif // CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ assert(0);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
+ FWD_TXFM_PARAM *fwd_txfm_param) {
+ const int fwd_txfm_opt = fwd_txfm_param->fwd_txfm_opt;
+ const TX_TYPE tx_type = fwd_txfm_param->tx_type;
+ const TX_SIZE tx_size = fwd_txfm_param->tx_size;
+ const int rd_transform = fwd_txfm_param->rd_transform;
+ const int lossless = fwd_txfm_param->lossless;
+ switch (tx_size) {
+ case TX_32X32:
+ fwd_txfm_32x32(rd_transform, src_diff, coeff, diff_stride, tx_type,
+ fwd_txfm_opt);
+ break;
+ case TX_16X16:
+ fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+ case TX_8X8:
+ fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+ case TX_4X4:
+ vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param) {
+ const int fwd_txfm_opt = fwd_txfm_param->fwd_txfm_opt;
+ const TX_TYPE tx_type = fwd_txfm_param->tx_type;
+ const TX_SIZE tx_size = fwd_txfm_param->tx_size;
+ const int rd_transform = fwd_txfm_param->rd_transform;
+ const int lossless = fwd_txfm_param->lossless;
+ switch (tx_size) {
+ case TX_32X32:
+ highbd_fwd_txfm_32x32(rd_transform, src_diff, coeff, diff_stride, tx_type,
+ fwd_txfm_opt);
+ break;
+ case TX_16X16:
+ highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type,
+ fwd_txfm_opt);
+ break;
+ case TX_8X8:
+ highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+ case TX_4X4:
+ vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp10/encoder/hybrid_fwd_txfm.h b/vp10/encoder/hybrid_fwd_txfm.h
new file mode 100644
index 0000000..62b8d5a
--- /dev/null
+++ b/vp10/encoder/hybrid_fwd_txfm.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_HYBRID_FWD_TXFM_H_
+#define VP10_ENCODER_HYBRID_FWD_TXFM_H_
+
+#include "./vpx_config.h"
+
+typedef enum FWD_TXFM_OPT { FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_DC } FWD_TXFM_OPT;
+
+typedef struct FWD_TXFM_PARAM {
+ TX_TYPE tx_type;
+ TX_SIZE tx_size;
+ FWD_TXFM_OPT fwd_txfm_opt;
+ int rd_transform;
+ int lossless;
+} FWD_TXFM_PARAM;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
+ FWD_TXFM_PARAM *fwd_txfm_param);
+void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type, int lossless);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param);
+void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type, int lossless);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static INLINE int get_tx1d_size(TX_SIZE tx_size) {
+ switch (tx_size) {
+ case TX_32X32:
+ return 32;
+ case TX_16X16:
+ return 16;
+ case TX_8X8:
+ return 8;
+ case TX_4X4:
+ return 4;
+ default:
+ assert(0);
+ return -1;
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_HYBRID_FWD_TXFM_H_
diff --git a/vp10/encoder/mbgraph.c b/vp10/encoder/mbgraph.c
index ed0f539..ab1e60f 100644
--- a/vp10/encoder/mbgraph.c
+++ b/vp10/encoder/mbgraph.c
@@ -67,6 +67,11 @@
&distortion, &sse, NULL, 0, 0);
}
+#if CONFIG_EXT_INTER
+ if (has_second_ref(&xd->mi[0]->mbmi))
+ xd->mi[0]->mbmi.mode = NEW_NEWMV;
+ else
+#endif // CONFIG_EXT_INTER
xd->mi[0]->mbmi.mode = NEWMV;
xd->mi[0]->mbmi.mv[0].as_mv = *dst_mv;
diff --git a/vp10/encoder/palette.c b/vp10/encoder/palette.c
new file mode 100644
index 0000000..522e185
--- /dev/null
+++ b/vp10/encoder/palette.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include "vp10/encoder/palette.h"
+
+static double calc_dist(const double *p1, const double *p2, int dim) {
+ double dist = 0;
+ int i = 0;
+
+ for (i = 0; i < dim; ++i) {
+ dist = dist + (p1[i] - round(p2[i])) * (p1[i] - round(p2[i]));
+ }
+ return dist;
+}
+
+void vp10_calc_indices(const double *data, const double *centroids,
+ uint8_t *indices, int n, int k, int dim) {
+ int i, j;
+ double min_dist, this_dist;
+
+ for (i = 0; i < n; ++i) {
+ min_dist = calc_dist(data + i * dim, centroids, dim);
+ indices[i] = 0;
+ for (j = 1; j < k; ++j) {
+ this_dist = calc_dist(data + i * dim, centroids + j * dim, dim);
+ if (this_dist < min_dist) {
+ min_dist = this_dist;
+ indices[i] = j;
+ }
+ }
+ }
+}
+
+// Generate a random number in the range [0, 32768).
+static unsigned int lcg_rand16(unsigned int *state) {
+ *state = *state * 1103515245 + 12345;
+ return *state / 65536 % 32768;
+}
+
+static void calc_centroids(const double *data, double *centroids,
+ const uint8_t *indices, int n, int k, int dim) {
+ int i, j, index;
+ int count[PALETTE_MAX_SIZE];
+ unsigned int rand_state = (unsigned int)data[0];
+
+ assert(n <= 32768);
+
+ memset(count, 0, sizeof(count[0]) * k);
+ memset(centroids, 0, sizeof(centroids[0]) * k * dim);
+
+ for (i = 0; i < n; ++i) {
+ index = indices[i];
+ assert(index < k);
+ ++count[index];
+ for (j = 0; j < dim; ++j) {
+ centroids[index * dim + j] += data[i * dim + j];
+ }
+ }
+
+ for (i = 0; i < k; ++i) {
+ if (count[i] == 0) {
+ memcpy(centroids + i * dim, data + (lcg_rand16(&rand_state) % n) * dim,
+ sizeof(centroids[0]) * dim);
+ } else {
+ const double norm = 1.0 / count[i];
+ for (j = 0; j < dim; ++j)
+ centroids[i * dim + j] *= norm;
+ }
+ }
+}
+
+static double calc_total_dist(const double *data, const double *centroids,
+ const uint8_t *indices, int n, int k, int dim) {
+ double dist = 0;
+ int i;
+ (void) k;
+
+ for (i = 0; i < n; ++i)
+ dist += calc_dist(data + i * dim, centroids + indices[i] * dim, dim);
+
+ return dist;
+}
+
+int vp10_k_means(const double *data, double *centroids, uint8_t *indices,
+ uint8_t *pre_indices, int n, int k, int dim, int max_itr) {
+ int i = 0;
+ double pre_dist, this_dist;
+ double pre_centroids[PALETTE_MAX_SIZE];
+
+ vp10_calc_indices(data, centroids, indices, n, k, dim);
+ pre_dist = calc_total_dist(data, centroids, indices, n, k, dim);
+ memcpy(pre_centroids, centroids, sizeof(pre_centroids[0]) * k * dim);
+ memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n);
+ while (i < max_itr) {
+ calc_centroids(data, centroids, indices, n, k, dim);
+ vp10_calc_indices(data, centroids, indices, n, k, dim);
+ this_dist = calc_total_dist(data, centroids, indices, n, k, dim);
+
+ if (this_dist > pre_dist) {
+ memcpy(centroids, pre_centroids, sizeof(pre_centroids[0]) * k * dim);
+ memcpy(indices, pre_indices, sizeof(pre_indices[0]) * n);
+ break;
+ }
+ if (!memcmp(centroids, pre_centroids, sizeof(pre_centroids[0]) * k * dim))
+ break;
+
+ memcpy(pre_centroids, centroids, sizeof(pre_centroids[0]) * k * dim);
+ memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n);
+ pre_dist = this_dist;
+ ++i;
+ }
+
+ return i;
+}
+
+void vp10_insertion_sort(double *data, int n) {
+ int i, j, k;
+ double val;
+
+ if (n <= 1)
+ return;
+
+ for (i = 1; i < n; ++i) {
+ val = data[i];
+ j = 0;
+ while (val > data[j] && j < i)
+ ++j;
+
+ if (j == i)
+ continue;
+
+ for (k = i; k > j; --k)
+ data[k] = data[k - 1];
+ data[j] = val;
+ }
+}
+
+int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols) {
+ int n = 0, r, c, i, val_count[256];
+ uint8_t val;
+ memset(val_count, 0, sizeof(val_count));
+
+ for (r = 0; r < rows; ++r) {
+ for (c = 0; c < cols; ++c) {
+ val = src[r * stride + c];
+ ++val_count[val];
+ }
+ }
+
+ for (i = 0; i < 256; ++i) {
+ if (val_count[i]) {
+ ++n;
+ }
+ }
+
+ return n;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+int vp10_count_colors_highbd(const uint8_t *src8, int stride, int rows,
+ int cols, int bit_depth) {
+ int n = 0, r, c, i;
+ uint16_t val;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ int val_count[1 << 12];
+
+ assert(bit_depth <= 12);
+ memset(val_count, 0, (1 << 12) * sizeof(val_count[0]));
+ for (r = 0; r < rows; ++r) {
+ for (c = 0; c < cols; ++c) {
+ val = src[r * stride + c];
+ ++val_count[val];
+ }
+ }
+
+ for (i = 0; i < (1 << bit_depth); ++i) {
+ if (val_count[i]) {
+ ++n;
+ }
+ }
+
+ return n;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+
diff --git a/vp10/encoder/palette.h b/vp10/encoder/palette.h
new file mode 100644
index 0000000..124cf74
--- /dev/null
+++ b/vp10/encoder/palette.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_PALETTE_H_
+#define VP10_ENCODER_PALETTE_H_
+
+#include "vp10/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_insertion_sort(double *data, int n);
+void vp10_calc_indices(const double *data, const double *centroids,
+ uint8_t *indices, int n, int k, int dim);
+int vp10_k_means(const double *data, double *centroids, uint8_t *indices,
+ uint8_t *pre_indices, int n, int k, int dim, int max_itr);
+int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols);
+#if CONFIG_VP9_HIGHBITDEPTH
+int vp10_count_colors_highbd(const uint8_t *src8, int stride, int rows,
+ int cols, int bit_depth);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* VP10_ENCODER_PALETTE_H_ */
diff --git a/vp10/encoder/picklpf.c b/vp10/encoder/picklpf.c
index 045e03d..85735a4 100644
--- a/vp10/encoder/picklpf.c
+++ b/vp10/encoder/picklpf.c
@@ -34,20 +34,27 @@
}
}
-
+#if !CONFIG_LOOP_RESTORATION
+#if !JOINT_FILTER_RESTORATION_SEARCH
static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
VP10_COMP *const cpi,
int filt_level, int partial_frame) {
VP10_COMMON *const cm = &cpi->common;
int64_t filt_err;
+#if CONFIG_VAR_TX
+ vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
+ 1, partial_frame);
+#else
if (cpi->num_workers > 1)
vp10_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane,
- filt_level, 1, partial_frame,
- cpi->workers, cpi->num_workers, &cpi->lf_row_sync);
+ filt_level, 1, partial_frame,
+ cpi->workers, cpi->num_workers,
+ &cpi->lf_row_sync);
else
vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
- 1, partial_frame);
+ 1, partial_frame);
+#endif
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
@@ -64,7 +71,182 @@
return filt_err;
}
+#endif
+#endif
+#if CONFIG_LOOP_RESTORATION
+#define JOINT_FILTER_RESTORATION_SEARCH
+#define USE_RD_LOOP_POSTFILTER_SEARCH
+static int try_restoration_frame(const YV12_BUFFER_CONFIG *sd,
+ VP10_COMP *const cpi,
+ int restoration_level,
+ int partial_frame) {
+ VP10_COMMON *const cm = &cpi->common;
+ int filt_err;
+ vp10_loop_restoration_frame(cm->frame_to_show, cm,
+ restoration_level, 1, partial_frame);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ filt_err = vp10_highbd_get_y_sse(sd, cm->frame_to_show);
+ } else {
+ filt_err = vp10_get_y_sse(sd, cm->frame_to_show);
+ }
+#else
+ filt_err = vp10_get_y_sse(sd, cm->frame_to_show);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // Re-instate the unfiltered frame
+ vpx_yv12_copy_y(&cpi->last_frame_db, cm->frame_to_show);
+ return filt_err;
+}
+
+static int search_restoration_level(const YV12_BUFFER_CONFIG *sd,
+ VP10_COMP *cpi,
+ int filter_level, int partial_frame,
+ double *best_cost_ret) {
+ VP10_COMMON *const cm = &cpi->common;
+ int i, restoration_best, err;
+ double best_cost;
+ double cost;
+ const int restoration_level_bits = vp10_restoration_level_bits(&cpi->common);
+ const int restoration_levels = 1 << restoration_level_bits;
+#ifdef USE_RD_LOOP_POSTFILTER_SEARCH
+ MACROBLOCK *x = &cpi->td.mb;
+ int bits;
+#endif
+
+ // Make a copy of the unfiltered / processed recon buffer
+ vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
+ vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filter_level,
+ 1, partial_frame);
+ vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_db);
+
+ restoration_best = 0;
+ err = try_restoration_frame(sd, cpi, 0, partial_frame);
+#ifdef USE_RD_LOOP_POSTFILTER_SEARCH
+ bits = cm->lf.last_restoration_level == 0 ? 0 : restoration_level_bits;
+ cost = RDCOST_DBL(x->rdmult, x->rddiv, (bits << 2), err);
+#else
+ cost = (double)err;
+#endif // USE_RD_LOOP_POSTFILTER_SEARCH
+ best_cost = cost;
+ for (i = 1; i <= restoration_levels; ++i) {
+ err = try_restoration_frame(sd, cpi, i, partial_frame);
+#ifdef USE_RD_LOOP_POSTFILTER_SEARCH
+ // Normally the rate is rate in bits * 256 and dist is sum sq err * 64
+ // when RDCOST is used. However below we just scale both in the correct
+ // ratios appropriately but not exactly by these values.
+ bits = cm->lf.last_restoration_level == i ? 0 : restoration_level_bits;
+ cost = RDCOST_DBL(x->rdmult, x->rddiv, (bits << 2), err);
+#else
+ cost = (double)err;
+#endif // USE_RD_LOOP_POSTFILTER_SEARCH
+ if (cost < best_cost) {
+ restoration_best = i;
+ best_cost = cost;
+ }
+ }
+ if (best_cost_ret) *best_cost_ret = best_cost;
+ vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+ return restoration_best;
+}
+
+#ifdef JOINT_FILTER_RESTORATION_SEARCH
+static int search_filter_restoration_level(const YV12_BUFFER_CONFIG *sd,
+ VP10_COMP *cpi,
+ int partial_frame,
+ int *restoration_level) {
+ const VP10_COMMON *const cm = &cpi->common;
+ const struct loopfilter *const lf = &cm->lf;
+ const int min_filter_level = 0;
+ const int max_filter_level = get_max_filter_level(cpi);
+ int filt_direction = 0;
+ int filt_best, restoration_best;
+ double best_err;
+ int i;
+
+ // Start the search at the previous frame filter level unless it is now out of
+ // range.
+ int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
+ int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
+ double ss_err[MAX_LOOP_FILTER + 1];
+ int bilateral;
+
+ // Set each entry to -1
+ for (i = 0; i <= MAX_LOOP_FILTER; ++i)
+ ss_err[i] = -1.0;
+
+ bilateral = search_restoration_level(sd, cpi, filt_mid,
+ partial_frame, &best_err);
+ filt_best = filt_mid;
+ restoration_best = bilateral;
+ ss_err[filt_mid] = best_err;
+
+ while (filter_step > 0) {
+ const int filt_high = VPXMIN(filt_mid + filter_step, max_filter_level);
+ const int filt_low = VPXMAX(filt_mid - filter_step, min_filter_level);
+
+ // Bias against raising loop filter in favor of lowering it.
+ double bias = (best_err / (1 << (15 - (filt_mid / 8)))) * filter_step;
+
+ if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20))
+ bias = (bias * cpi->twopass.section_intra_rating) / 20;
+
+ // yx, bias less for large block size
+ if (cm->tx_mode != ONLY_4X4)
+ bias /= 2;
+
+ if (filt_direction <= 0 && filt_low != filt_mid) {
+ // Get Low filter error score
+ if (ss_err[filt_low] < 0) {
+ bilateral = search_restoration_level(sd, cpi, filt_low,
+ partial_frame,
+ &ss_err[filt_low]);
+ }
+ // If value is close to the best so far then bias towards a lower loop
+ // filter value.
+ if ((ss_err[filt_low] - bias) < best_err) {
+ // Was it actually better than the previous best?
+ if (ss_err[filt_low] < best_err) {
+ best_err = ss_err[filt_low];
+ }
+
+ filt_best = filt_low;
+ restoration_best = bilateral;
+ }
+ }
+
+ // Now look at filt_high
+ if (filt_direction >= 0 && filt_high != filt_mid) {
+ if (ss_err[filt_high] < 0) {
+ bilateral = search_restoration_level(sd, cpi, filt_high, partial_frame,
+ &ss_err[filt_high]);
+ }
+ // Was it better than the previous best?
+ if (ss_err[filt_high] < (best_err - bias)) {
+ best_err = ss_err[filt_high];
+ filt_best = filt_high;
+ restoration_best = bilateral;
+ }
+ }
+
+ // Half the step distance if the best filter value was the same as last time
+ if (filt_best == filt_mid) {
+ filter_step /= 2;
+ filt_direction = 0;
+ } else {
+ filt_direction = (filt_best < filt_mid) ? -1 : 1;
+ filt_mid = filt_best;
+ }
+ }
+ *restoration_level = restoration_best;
+ return filt_best;
+}
+#endif // JOINT_FILTER_RESTORATION_SEARCH
+#endif // CONFIG_LOOP_RESTORATION
+
+#if !CONFIG_LOOP_RESTORATION
+#if !JOINT_FILTER_RESTORATION_SEARCH
static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
int partial_frame) {
const VP10_COMMON *const cm = &cpi->common;
@@ -146,6 +328,8 @@
return filt_best;
}
+#endif
+#endif
void vp10_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
LPF_PICK_METHOD method) {
@@ -186,8 +370,24 @@
if (cm->frame_type == KEY_FRAME)
filt_guess -= 4;
lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
+#if CONFIG_LOOP_RESTORATION
+ lf->restoration_level = search_restoration_level(
+ sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE, NULL);
+#endif // CONFIG_LOOP_RESTORATION
} else {
- lf->filter_level = search_filter_level(sd, cpi,
- method == LPF_PICK_FROM_SUBIMAGE);
+#if CONFIG_LOOP_RESTORATION
+#ifdef JOINT_FILTER_RESTORATION_SEARCH
+ lf->filter_level = search_filter_restoration_level(
+ sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, &lf->restoration_level);
+#else
+ lf->filter_level = search_filter_level(
+ sd, cpi, method == LPF_PICK_FROM_SUBIMAGE);
+ lf->restoration_level = search_restoration_level(
+ sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE, NULL);
+#endif // JOINT_FILTER_RESTORATION_SEARCH
+#else
+ lf->filter_level = search_filter_level(
+ sd, cpi, method == LPF_PICK_FROM_SUBIMAGE);
+#endif // CONFIG_LOOP_RESTORATION
}
}
diff --git a/vp10/encoder/quantize.c b/vp10/encoder/quantize.c
index 86b324f..66db396 100644
--- a/vp10/encoder/quantize.c
+++ b/vp10/encoder/quantize.c
@@ -10,16 +10,193 @@
#include <math.h>
#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/quantize.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vp10/common/quant_common.h"
+#include "vp10/common/scan.h"
#include "vp10/common/seg_common.h"
#include "vp10/encoder/encoder.h"
#include "vp10/encoder/quantize.h"
#include "vp10/encoder/rd.h"
+void vp10_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ *eob_ptr = 0;
+}
+
+void vp10_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ vp10_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan);
+}
+
+void vp10_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ vpx_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, p->quant,
+ p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
+ sc->scan, sc->iscan);
+}
+
+void vp10_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc) {
+ // obsolete skip_block
+ const int skip_block = 0;
+ (void)sc;
+ vpx_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round,
+ p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, pd->dequant[0],
+ eob_ptr);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_quantize_fp_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ vp10_highbd_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan);
+}
+
+void vp10_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ vpx_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan);
+}
+
+void vp10_highbd_quantize_dc_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ (void)sc;
+
+ vpx_highbd_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round,
+ p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant[0], eob_ptr);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_quantize_fp_32x32_facade(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ vp10_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan);
+}
+
+void vp10_quantize_b_32x32_facade(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ vpx_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan);
+}
+
+void vp10_quantize_dc_32x32_facade(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ (void)sc;
+ (void)n_coeffs;
+
+ vpx_quantize_dc_32x32(coeff_ptr, skip_block, p->round, p->quant_fp[0],
+ qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_quantize_fp_32x32_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ vp10_highbd_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ p->round_fp, p->quant_fp, p->quant_shift,
+ qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
+ sc->scan, sc->iscan);
+}
+
+void vp10_highbd_quantize_b_32x32_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ vpx_highbd_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ p->round, p->quant, p->quant_shift, qcoeff_ptr,
+ dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
+ sc->iscan);
+}
+
+void vp10_highbd_quantize_dc_32x32_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ (void)sc;
+ (void)n_coeffs;
+
+ vpx_highbd_quantize_dc_32x32(coeff_ptr, skip_block, p->round, p->quant_fp[0],
+ qcoeff_ptr, dqcoeff_ptr, pd->dequant[0],
+ eob_ptr);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
void vp10_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
diff --git a/vp10/encoder/quantize.h b/vp10/encoder/quantize.h
index b44088e..9c0ab3f 100644
--- a/vp10/encoder/quantize.h
+++ b/vp10/encoder/quantize.h
@@ -12,12 +12,20 @@
#define VP10_ENCODER_QUANTIZE_H_
#include "./vpx_config.h"
+#include "vp10/common/scan.h"
#include "vp10/encoder/block.h"
#ifdef __cplusplus
extern "C" {
#endif
+typedef void (*VP10_QUANT_FACADE)(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc);
+
typedef struct {
DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
@@ -38,7 +46,7 @@
} QUANTS;
void vp10_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
- const int16_t *scan, const int16_t *iscan);
+ const int16_t *scan, const int16_t *iscan);
struct VP10_COMP;
struct VP10Common;
@@ -55,6 +63,81 @@
int vp10_qindex_to_quantizer(int qindex);
+void vp10_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
+
+void vp10_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc);
+
+void vp10_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc);
+
+void vp10_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc);
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_quantize_fp_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc);
+
+void vp10_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc);
+
+void vp10_highbd_quantize_dc_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_quantize_fp_32x32_facade(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc);
+
+void vp10_quantize_b_32x32_facade(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc);
+
+void vp10_quantize_dc_32x32_facade(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc);
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_quantize_fp_32x32_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc);
+
+void vp10_highbd_quantize_b_32x32_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc);
+
+void vp10_highbd_quantize_dc_32x32_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc);
+#endif // CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index f4fdb24..8498ce9 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -75,7 +75,10 @@
vp10_cost_tokens(cpi->y_mode_costs[i][j], vp10_kf_y_mode_prob[i][j],
vp10_intra_mode_tree);
- vp10_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp10_intra_mode_tree);
+ for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
+ vp10_cost_tokens(cpi->mbmode_cost[i], fc->y_mode_prob[i],
+ vp10_intra_mode_tree);
+
for (i = 0; i < INTRA_MODES; ++i)
vp10_cost_tokens(cpi->intra_uv_mode_cost[i],
fc->uv_mode_prob[i], vp10_intra_mode_tree);
@@ -84,6 +87,44 @@
vp10_cost_tokens(cpi->switchable_interp_costs[i],
fc->switchable_interp_prob[i], vp10_switchable_interp_tree);
+ for (i = 0; i < PALETTE_BLOCK_SIZES; ++i) {
+ vp10_cost_tokens(cpi->palette_y_size_cost[i],
+ vp10_default_palette_y_size_prob[i],
+ vp10_palette_size_tree);
+ vp10_cost_tokens(cpi->palette_uv_size_cost[i],
+ vp10_default_palette_uv_size_prob[i],
+ vp10_palette_size_tree);
+ }
+
+ for (i = 0; i < PALETTE_MAX_SIZE - 1; ++i)
+ for (j = 0; j < PALETTE_COLOR_CONTEXTS; ++j) {
+ vp10_cost_tokens(cpi->palette_y_color_cost[i][j],
+ vp10_default_palette_y_color_prob[i][j],
+ vp10_palette_color_tree[i]);
+ vp10_cost_tokens(cpi->palette_uv_color_cost[i][j],
+ vp10_default_palette_uv_color_prob[i][j],
+ vp10_palette_color_tree[i]);
+ }
+#if CONFIG_EXT_TX
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ if (use_inter_ext_tx_for_txsize[s][i]) {
+ vp10_cost_tokens(cpi->inter_tx_type_costs[s][i],
+ fc->inter_ext_tx_prob[s][i],
+ vp10_ext_tx_inter_tree[s]);
+ }
+ }
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ if (use_intra_ext_tx_for_txsize[s][i]) {
+ for (j = 0; j < INTRA_MODES; ++j)
+ vp10_cost_tokens(cpi->intra_tx_type_costs[s][i][j],
+ fc->intra_ext_tx_prob[s][i][j],
+ vp10_ext_tx_intra_tree[s]);
+ }
+ }
+ }
+#else
for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
for (j = 0; j < TX_TYPES; ++j)
vp10_cost_tokens(cpi->intra_tx_type_costs[i][j],
@@ -95,6 +136,12 @@
fc->inter_ext_tx_prob[i],
vp10_ext_tx_tree);
}
+#endif // CONFIG_EXT_TX
+#if CONFIG_EXT_INTRA
+ for (i = 0; i < INTRA_FILTERS + 1; ++i)
+ vp10_cost_tokens(cpi->intra_filter_cost[i], fc->intra_filter_probs[i],
+ vp10_intra_filter_tree);
+#endif // CONFIG_EXT_INTRA
}
static void fill_token_costs(vp10_coeff_cost *c,
@@ -106,12 +153,21 @@
for (j = 0; j < REF_TYPES; ++j)
for (k = 0; k < COEF_BANDS; ++k)
for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+#if CONFIG_ANS
+ const vpx_prob *const tree_probs = p[t][i][j][k][l];
+ vpx_prob pivot = tree_probs[PIVOT_NODE];
+ vp10_cost_tokens_ans((int *)c[t][i][j][k][0][l], tree_probs,
+ vp10_pareto8_token_probs[pivot - 1], 0);
+ vp10_cost_tokens_ans((int *)c[t][i][j][k][1][l], tree_probs,
+ vp10_pareto8_token_probs[pivot - 1], 1);
+#else
vpx_prob probs[ENTROPY_NODES];
vp10_model_to_full_probs(p[t][i][j][k][l], probs);
vp10_cost_tokens((int *)c[t][i][j][k][0][l], probs,
vp10_coef_tree);
vp10_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
vp10_coef_tree);
+#endif // CONFIG_ANS
assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
c[t][i][j][k][1][l][EOB_TOKEN]);
}
@@ -311,10 +367,46 @@
cm->allow_high_precision_mv ? x->nmvcost_hp
: x->nmvcost,
&cm->fc->nmvc, cm->allow_high_precision_mv);
+#if CONFIG_REF_MV
+ for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
+ cpi->newmv_mode_cost[i][0] = vp10_cost_bit(cm->fc->newmv_prob[i], 0);
+ cpi->newmv_mode_cost[i][1] = vp10_cost_bit(cm->fc->newmv_prob[i], 1);
+ }
+ for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i) {
+ cpi->zeromv_mode_cost[i][0] = vp10_cost_bit(cm->fc->zeromv_prob[i], 0);
+ cpi->zeromv_mode_cost[i][1] = vp10_cost_bit(cm->fc->zeromv_prob[i], 1);
+ }
+
+ for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
+ cpi->refmv_mode_cost[i][0] = vp10_cost_bit(cm->fc->refmv_prob[i], 0);
+ cpi->refmv_mode_cost[i][1] = vp10_cost_bit(cm->fc->refmv_prob[i], 1);
+ }
+
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
+ cpi->drl_mode_cost0[i][0] = vp10_cost_bit(cm->fc->drl_prob0[i], 0);
+ cpi->drl_mode_cost0[i][1] = vp10_cost_bit(cm->fc->drl_prob0[i], 1);
+ }
+
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
+ cpi->drl_mode_cost1[i][0] = vp10_cost_bit(cm->fc->drl_prob1[i], 0);
+ cpi->drl_mode_cost1[i][1] = vp10_cost_bit(cm->fc->drl_prob1[i], 1);
+ }
+#if CONFIG_EXT_INTER
+ cpi->new2mv_mode_cost[0] = vp10_cost_bit(cm->fc->new2mv_prob, 0);
+ cpi->new2mv_mode_cost[1] = vp10_cost_bit(cm->fc->new2mv_prob, 1);
+#endif // CONFIG_EXT_INTER
+#else
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
vp10_cost_tokens((int *)cpi->inter_mode_cost[i],
cm->fc->inter_mode_probs[i], vp10_inter_mode_tree);
+#endif
+#if CONFIG_EXT_INTER
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+ vp10_cost_tokens((int *)cpi->inter_compound_mode_cost[i],
+ cm->fc->inter_compound_mode_probs[i],
+ vp10_inter_compound_mode_tree);
+#endif // CONFIG_EXT_INTER
}
}
@@ -563,8 +655,11 @@
const MACROBLOCKD *const xd) {
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const int ctx = vp10_get_pred_context_switchable_interp(xd);
+#if CONFIG_EXT_INTERP
+ if (!vp10_is_interp_needed(xd)) return 0;
+#endif // CONFIG_EXT_INTERP
return SWITCHABLE_INTERP_RATE_FACTOR *
- cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
+ cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
}
void vp10_set_rd_speed_thresholds(VP10_COMP *cpi) {
@@ -578,10 +673,20 @@
if (sf->adaptive_rd_thresh) {
rd->thresh_mult[THR_NEARESTMV] = 300;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEARESTL2] = 300;
+ rd->thresh_mult[THR_NEARESTL3] = 300;
+ rd->thresh_mult[THR_NEARESTL4] = 300;
+#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_NEARESTG] = 300;
rd->thresh_mult[THR_NEARESTA] = 300;
} else {
rd->thresh_mult[THR_NEARESTMV] = 0;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEARESTL2] = 0;
+ rd->thresh_mult[THR_NEARESTL3] = 0;
+ rd->thresh_mult[THR_NEARESTL4] = 0;
+#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_NEARESTG] = 0;
rd->thresh_mult[THR_NEARESTA] = 0;
}
@@ -589,27 +694,127 @@
rd->thresh_mult[THR_DC] += 1000;
rd->thresh_mult[THR_NEWMV] += 1000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEWL2] += 1000;
+ rd->thresh_mult[THR_NEWL3] += 1000;
+ rd->thresh_mult[THR_NEWL4] += 1000;
+#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_NEWA] += 1000;
rd->thresh_mult[THR_NEWG] += 1000;
rd->thresh_mult[THR_NEARMV] += 1000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEARL2] += 1000;
+ rd->thresh_mult[THR_NEARL3] += 1000;
+ rd->thresh_mult[THR_NEARL4] += 1000;
+#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_NEARA] += 1000;
- rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
- rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
+ rd->thresh_mult[THR_NEARG] += 1000;
+
+#if CONFIG_EXT_INTER
+ rd->thresh_mult[THR_NEWFROMNEARMV] += 1000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEWFROMNEARL2] += 1000;
+ rd->thresh_mult[THR_NEWFROMNEARL3] += 1000;
+ rd->thresh_mult[THR_NEWFROMNEARL4] += 1000;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEWFROMNEARG] += 1000;
+ rd->thresh_mult[THR_NEWFROMNEARA] += 1000;
+#endif // CONFIG_EXT_INTER
+
+ rd->thresh_mult[THR_ZEROMV] += 2000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_ZEROL2] += 2000;
+ rd->thresh_mult[THR_ZEROL3] += 2000;
+ rd->thresh_mult[THR_ZEROL4] += 2000;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_ZEROG] += 2000;
+ rd->thresh_mult[THR_ZEROA] += 2000;
rd->thresh_mult[THR_TM] += 1000;
+#if CONFIG_EXT_INTER
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] += 1000;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] += 1000;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARLA] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARGA] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTLA] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTGA] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWLA] += 1500;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWGA] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTLA] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTGA] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWLA] += 1700;
+ rd->thresh_mult[THR_COMP_NEAR_NEWGA] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARLA] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARGA] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWLA] += 2000;
+ rd->thresh_mult[THR_COMP_NEW_NEWGA] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROLA] += 2500;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROGA] += 2500;
+
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] += 1000;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARL2A] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTL2A] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWL2A] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARL2A] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWL2A] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROL2A] += 2500;
+
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] += 1000;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARL3A] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTL3A] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWL3A] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARL3A] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWL3A] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROL3A] += 2500;
+
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTL4A] += 1000;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARL4A] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTL4A] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWL4A] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTL4A] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWL4A] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARL4A] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWL4A] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROL4A] += 2500;
+
+#endif // CONFIG_EXT_REFS
+#else
+ rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEARESTL2A] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTL3A] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTL4A] += 1000;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
+
rd->thresh_mult[THR_COMP_NEARLA] += 1500;
rd->thresh_mult[THR_COMP_NEWLA] += 2000;
- rd->thresh_mult[THR_NEARG] += 1000;
rd->thresh_mult[THR_COMP_NEARGA] += 1500;
rd->thresh_mult[THR_COMP_NEWGA] += 2000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEARL2A] += 1500;
+ rd->thresh_mult[THR_COMP_NEWL2A] += 2000;
+ rd->thresh_mult[THR_COMP_NEARL3A] += 1500;
+ rd->thresh_mult[THR_COMP_NEWL3A] += 2000;
+ rd->thresh_mult[THR_COMP_NEARL4A] += 1500;
+ rd->thresh_mult[THR_COMP_NEWL4A] += 2000;
+#endif // CONFIG_EXT_REFS
- rd->thresh_mult[THR_ZEROMV] += 2000;
- rd->thresh_mult[THR_ZEROG] += 2000;
- rd->thresh_mult[THR_ZEROA] += 2000;
rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_ZEROL2A] += 2500;
+ rd->thresh_mult[THR_COMP_ZEROL3A] += 2500;
+ rd->thresh_mult[THR_COMP_ZEROL4A] += 2500;
+#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
+#endif // CONFIG_EXT_INTER
rd->thresh_mult[THR_H_PRED] += 2000;
rd->thresh_mult[THR_V_PRED] += 2000;
@@ -622,9 +827,15 @@
}
void vp10_set_rd_speed_thresholds_sub8x8(VP10_COMP *cpi) {
- static const int thresh_mult[2][MAX_REFS] =
- {{2500, 2500, 2500, 4500, 4500, 2500},
- {2000, 2000, 2000, 4000, 4000, 2000}};
+ static const int thresh_mult[2][MAX_REFS] = {
+#if CONFIG_EXT_REFS
+ {2500, 2500, 2500, 2500, 2500, 2500, 4500, 4500, 4500, 4500, 4500, 2500},
+ {2000, 2000, 2000, 2000, 2000, 2000, 4000, 4000, 4000, 4000, 4000, 2000}
+#else
+ {2500, 2500, 2500, 4500, 4500, 2500},
+ {2000, 2000, 2000, 4000, 4000, 2000}
+#endif // CONFIG_EXT_REFS
+ };
RD_OPT *const rd = &cpi->rd;
const int idx = cpi->oxcf.mode == BEST;
memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
diff --git a/vp10/encoder/rd.h b/vp10/encoder/rd.h
index cd58bf8..2303c20 100644
--- a/vp10/encoder/rd.h
+++ b/vp10/encoder/rd.h
@@ -26,6 +26,10 @@
#define RDCOST(RM, DM, R, D) \
(((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM))
+
+#define RDCOST_DBL(RM, DM, R, D) \
+ (((((double)(R)) * (RM)) / 256.0) + ((double)(D) * (1 << (DM))))
+
#define QIDX_SKIP_THRESH 115
#define MV_COST_WEIGHT 108
@@ -33,8 +37,25 @@
#define INVALID_MV 0x80008000
+#if CONFIG_EXT_REFS
+#if CONFIG_EXT_INTER
+#define MAX_MODES 85
+#else
+#define MAX_MODES 54
+#endif // CONFIG_EXT_INTER
+#else
+#if CONFIG_EXT_INTER
+#define MAX_MODES 43
+#else
#define MAX_MODES 30
+#endif // CONFIG_EXT_INTER
+#endif // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+#define MAX_REFS 12
+#else
#define MAX_REFS 6
+#endif // CONFIG_EXT_REFS
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC 1
@@ -43,35 +64,142 @@
// const MODE_DEFINITION vp10_mode_order[MAX_MODES] used in the rd code.
typedef enum {
THR_NEARESTMV,
+#if CONFIG_EXT_REFS
+ THR_NEARESTL2,
+ THR_NEARESTL3,
+ THR_NEARESTL4,
+#endif // CONFIG_EXT_REFS
THR_NEARESTA,
THR_NEARESTG,
THR_DC,
THR_NEWMV,
+#if CONFIG_EXT_REFS
+ THR_NEWL2,
+ THR_NEWL3,
+ THR_NEWL4,
+#endif // CONFIG_EXT_REFS
THR_NEWA,
THR_NEWG,
THR_NEARMV,
+#if CONFIG_EXT_REFS
+ THR_NEARL2,
+ THR_NEARL3,
+ THR_NEARL4,
+#endif // CONFIG_EXT_REFS
THR_NEARA,
THR_NEARG,
+#if CONFIG_EXT_INTER
+ THR_NEWFROMNEARMV,
+#if CONFIG_EXT_REFS
+ THR_NEWFROMNEARL2,
+ THR_NEWFROMNEARL3,
+ THR_NEWFROMNEARL4,
+#endif // CONFIG_EXT_REFS
+ THR_NEWFROMNEARA,
+ THR_NEWFROMNEARG,
+#endif // CONFIG_EXT_INTER
+
THR_ZEROMV,
+#if CONFIG_EXT_REFS
+ THR_ZEROL2,
+ THR_ZEROL3,
+ THR_ZEROL4,
+#endif // CONFIG_EXT_REFS
THR_ZEROG,
THR_ZEROA,
+#if CONFIG_EXT_INTER
+ THR_COMP_NEAREST_NEARESTLA,
+#if CONFIG_EXT_REFS
+ THR_COMP_NEAREST_NEARESTL2A,
+ THR_COMP_NEAREST_NEARESTL3A,
+ THR_COMP_NEAREST_NEARESTL4A,
+#endif // CONFIG_EXT_REFS
+ THR_COMP_NEAREST_NEARESTGA,
+#else // CONFIG_EXT_INTER
THR_COMP_NEARESTLA,
+#if CONFIG_EXT_REFS
+ THR_COMP_NEARESTL2A,
+ THR_COMP_NEARESTL3A,
+ THR_COMP_NEARESTL4A,
+#endif // CONFIG_EXT_REFS
THR_COMP_NEARESTGA,
+#endif // CONFIG_EXT_INTER
THR_TM,
+#if CONFIG_EXT_INTER
+ THR_COMP_NEAR_NEARESTLA,
+ THR_COMP_NEAR_NEARESTGA,
+ THR_COMP_NEAREST_NEARLA,
+ THR_COMP_NEAREST_NEARGA,
+ THR_COMP_NEW_NEARESTLA,
+ THR_COMP_NEW_NEARESTGA,
+ THR_COMP_NEAREST_NEWLA,
+ THR_COMP_NEAREST_NEWGA,
+ THR_COMP_NEW_NEARLA,
+ THR_COMP_NEW_NEARGA,
+ THR_COMP_NEAR_NEWLA,
+ THR_COMP_NEAR_NEWGA,
+ THR_COMP_NEW_NEWLA,
+ THR_COMP_NEW_NEWGA,
+ THR_COMP_ZERO_ZEROLA,
+ THR_COMP_ZERO_ZEROGA,
+
+#if CONFIG_EXT_REFS
+ THR_COMP_NEAR_NEARESTL2A,
+ THR_COMP_NEAREST_NEARL2A,
+ THR_COMP_NEW_NEARESTL2A,
+ THR_COMP_NEAREST_NEWL2A,
+ THR_COMP_NEW_NEARL2A,
+ THR_COMP_NEAR_NEWL2A,
+ THR_COMP_NEW_NEWL2A,
+ THR_COMP_ZERO_ZEROL2A,
+
+ THR_COMP_NEAR_NEARESTL3A,
+ THR_COMP_NEAREST_NEARL3A,
+ THR_COMP_NEW_NEARESTL3A,
+ THR_COMP_NEAREST_NEWL3A,
+ THR_COMP_NEW_NEARL3A,
+ THR_COMP_NEAR_NEWL3A,
+ THR_COMP_NEW_NEWL3A,
+ THR_COMP_ZERO_ZEROL3A,
+
+ THR_COMP_NEAR_NEARESTL4A,
+ THR_COMP_NEAREST_NEARL4A,
+ THR_COMP_NEW_NEARESTL4A,
+ THR_COMP_NEAREST_NEWL4A,
+ THR_COMP_NEW_NEARL4A,
+ THR_COMP_NEAR_NEWL4A,
+ THR_COMP_NEW_NEWL4A,
+ THR_COMP_ZERO_ZEROL4A,
+#endif // CONFIG_EXT_REFS
+#else
THR_COMP_NEARLA,
THR_COMP_NEWLA,
+#if CONFIG_EXT_REFS
+ THR_COMP_NEARL2A,
+ THR_COMP_NEWL2A,
+ THR_COMP_NEARL3A,
+ THR_COMP_NEWL3A,
+ THR_COMP_NEARL4A,
+ THR_COMP_NEWL4A,
+#endif // CONFIG_EXT_REFS
THR_COMP_NEARGA,
THR_COMP_NEWGA,
THR_COMP_ZEROLA,
+#if CONFIG_EXT_REFS
+ THR_COMP_ZEROL2A,
+ THR_COMP_ZEROL3A,
+ THR_COMP_ZEROL4A,
+#endif // CONFIG_EXT_REFS
THR_COMP_ZEROGA,
+#endif // CONFIG_EXT_INTER
THR_H_PRED,
THR_V_PRED,
@@ -85,9 +213,19 @@
typedef enum {
THR_LAST,
+#if CONFIG_EXT_REFS
+ THR_LAST2,
+ THR_LAST3,
+ THR_LAST4,
+#endif // CONFIG_EXT_REFS
THR_GOLD,
THR_ALTR,
THR_COMP_LA,
+#if CONFIG_EXT_REFS
+ THR_COMP_L2A,
+ THR_COMP_L3A,
+ THR_COMP_L4A,
+#endif // CONFIG_EXT_REFS
THR_COMP_GA,
THR_INTRA,
} THR_MODES_SUB8X8;
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index c62da96..78aa590 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -35,13 +35,38 @@
#include "vp10/encoder/encodemb.h"
#include "vp10/encoder/encodemv.h"
#include "vp10/encoder/encoder.h"
+#include "vp10/encoder/hybrid_fwd_txfm.h"
#include "vp10/encoder/mcomp.h"
+#include "vp10/encoder/palette.h"
#include "vp10/encoder/quantize.h"
#include "vp10/encoder/ratectrl.h"
#include "vp10/encoder/rd.h"
#include "vp10/encoder/rdopt.h"
#include "vp10/encoder/aq_variance.h"
+#if CONFIG_EXT_REFS
+
+#define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
+ (1 << LAST2_FRAME) | (1 << INTRA_FRAME) | \
+ (1 << LAST3_FRAME) | (1 << LAST4_FRAME))
+#define LAST2_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
+ (1 << LAST_FRAME) | (1 << INTRA_FRAME) | \
+ (1 << LAST3_FRAME) | (1 << LAST4_FRAME))
+#define LAST3_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
+ (1 << LAST_FRAME) | (1 << INTRA_FRAME) | \
+ (1 << LAST2_FRAME) | (1 << LAST4_FRAME))
+#define LAST4_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
+ (1 << LAST_FRAME) | (1 << INTRA_FRAME) | \
+ (1 << LAST2_FRAME) | (1 << LAST3_FRAME))
+#define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
+ (1 << LAST2_FRAME) | (1 << INTRA_FRAME) | \
+ (1 << LAST3_FRAME) | (1 << LAST4_FRAME))
+#define ALT_REF_MODE_MASK ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
+ (1 << LAST2_FRAME) | (1 << INTRA_FRAME) | \
+ (1 << LAST3_FRAME) | (1 << LAST4_FRAME))
+
+#else
+
#define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
(1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
@@ -49,12 +74,19 @@
#define ALT_REF_MODE_MASK ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
(1 << INTRA_FRAME))
+#endif // CONFIG_EXT_REFS
+
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
+#if CONFIG_EXT_TX
+const double ext_tx_th = 0.98;
+#else
const double ext_tx_th = 0.99;
+#endif
+
typedef struct {
PREDICTION_MODE mode;
@@ -66,6 +98,9 @@
} REF_DEFINITION;
struct rdcost_block_args {
+#if CONFIG_VAR_TX
+ const VP10_COMP *cpi;
+#endif
MACROBLOCK *x;
ENTROPY_CONTEXT t_above[16];
ENTROPY_CONTEXT t_left[16];
@@ -83,35 +118,141 @@
#define LAST_NEW_MV_INDEX 6
static const MODE_DEFINITION vp10_mode_order[MAX_MODES] = {
{NEARESTMV, {LAST_FRAME, NONE}},
+#if CONFIG_EXT_REFS
+ {NEARESTMV, {LAST2_FRAME, NONE}},
+ {NEARESTMV, {LAST3_FRAME, NONE}},
+ {NEARESTMV, {LAST4_FRAME, NONE}},
+#endif // CONFIG_EXT_REFS
{NEARESTMV, {ALTREF_FRAME, NONE}},
{NEARESTMV, {GOLDEN_FRAME, NONE}},
{DC_PRED, {INTRA_FRAME, NONE}},
{NEWMV, {LAST_FRAME, NONE}},
+#if CONFIG_EXT_REFS
+ {NEWMV, {LAST2_FRAME, NONE}},
+ {NEWMV, {LAST3_FRAME, NONE}},
+ {NEWMV, {LAST4_FRAME, NONE}},
+#endif // CONFIG_EXT_REFS
{NEWMV, {ALTREF_FRAME, NONE}},
{NEWMV, {GOLDEN_FRAME, NONE}},
{NEARMV, {LAST_FRAME, NONE}},
+#if CONFIG_EXT_REFS
+ {NEARMV, {LAST2_FRAME, NONE}},
+ {NEARMV, {LAST3_FRAME, NONE}},
+ {NEARMV, {LAST4_FRAME, NONE}},
+#endif // CONFIG_EXT_REFS
{NEARMV, {ALTREF_FRAME, NONE}},
{NEARMV, {GOLDEN_FRAME, NONE}},
+#if CONFIG_EXT_INTER
+ {NEWFROMNEARMV, {LAST_FRAME, NONE}},
+#if CONFIG_EXT_REFS
+ {NEWFROMNEARMV, {LAST2_FRAME, NONE}},
+ {NEWFROMNEARMV, {LAST3_FRAME, NONE}},
+ {NEWFROMNEARMV, {LAST4_FRAME, NONE}},
+#endif // CONFIG_EXT_REFS
+ {NEWFROMNEARMV, {ALTREF_FRAME, NONE}},
+ {NEWFROMNEARMV, {GOLDEN_FRAME, NONE}},
+#endif // CONFIG_EXT_INTER
+
{ZEROMV, {LAST_FRAME, NONE}},
+#if CONFIG_EXT_REFS
+ {ZEROMV, {LAST2_FRAME, NONE}},
+ {ZEROMV, {LAST3_FRAME, NONE}},
+ {ZEROMV, {LAST4_FRAME, NONE}},
+#endif // CONFIG_EXT_REFS
{ZEROMV, {GOLDEN_FRAME, NONE}},
{ZEROMV, {ALTREF_FRAME, NONE}},
+#if CONFIG_EXT_INTER
+ {NEAREST_NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
+#if CONFIG_EXT_REFS
+ {NEAREST_NEARESTMV, {LAST2_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEARESTMV, {LAST3_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEARESTMV, {LAST4_FRAME, ALTREF_FRAME}},
+#endif // CONFIG_EXT_REFS
+ {NEAREST_NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+#else // CONFIG_EXT_INTER
{NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
+#if CONFIG_EXT_REFS
+ {NEARESTMV, {LAST2_FRAME, ALTREF_FRAME}},
+ {NEARESTMV, {LAST3_FRAME, ALTREF_FRAME}},
+ {NEARESTMV, {LAST4_FRAME, ALTREF_FRAME}},
+#endif // CONFIG_EXT_REFS
{NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+#endif // CONFIG_EXT_INTER
{TM_PRED, {INTRA_FRAME, NONE}},
+#if CONFIG_EXT_INTER
+ {NEAR_NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
+ {NEAR_NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEARMV, {LAST_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+ {NEW_NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
+ {NEW_NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEWMV, {LAST_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+ {NEW_NEARMV, {LAST_FRAME, ALTREF_FRAME}},
+ {NEW_NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+ {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}},
+ {NEAR_NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+ {NEW_NEWMV, {LAST_FRAME, ALTREF_FRAME}},
+ {NEW_NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+ {ZERO_ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
+ {ZERO_ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+#if CONFIG_EXT_REFS
+ {NEAR_NEARESTMV, {LAST2_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEARMV, {LAST2_FRAME, ALTREF_FRAME}},
+ {NEW_NEARESTMV, {LAST2_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEWMV, {LAST2_FRAME, ALTREF_FRAME}},
+ {NEW_NEARMV, {LAST2_FRAME, ALTREF_FRAME}},
+ {NEAR_NEWMV, {LAST2_FRAME, ALTREF_FRAME}},
+ {NEW_NEWMV, {LAST2_FRAME, ALTREF_FRAME}},
+ {ZERO_ZEROMV, {LAST2_FRAME, ALTREF_FRAME}},
+
+ {NEAR_NEARESTMV, {LAST3_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEARMV, {LAST3_FRAME, ALTREF_FRAME}},
+ {NEW_NEARESTMV, {LAST3_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEWMV, {LAST3_FRAME, ALTREF_FRAME}},
+ {NEW_NEARMV, {LAST3_FRAME, ALTREF_FRAME}},
+ {NEAR_NEWMV, {LAST3_FRAME, ALTREF_FRAME}},
+ {NEW_NEWMV, {LAST3_FRAME, ALTREF_FRAME}},
+ {ZERO_ZEROMV, {LAST3_FRAME, ALTREF_FRAME}},
+
+ {NEAR_NEARESTMV, {LAST4_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEARMV, {LAST4_FRAME, ALTREF_FRAME}},
+ {NEW_NEARESTMV, {LAST4_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEWMV, {LAST4_FRAME, ALTREF_FRAME}},
+ {NEW_NEARMV, {LAST4_FRAME, ALTREF_FRAME}},
+ {NEAR_NEWMV, {LAST4_FRAME, ALTREF_FRAME}},
+ {NEW_NEWMV, {LAST4_FRAME, ALTREF_FRAME}},
+ {ZERO_ZEROMV, {LAST4_FRAME, ALTREF_FRAME}},
+#endif // CONFIG_EXT_REFS
+#else
{NEARMV, {LAST_FRAME, ALTREF_FRAME}},
{NEWMV, {LAST_FRAME, ALTREF_FRAME}},
+#if CONFIG_EXT_REFS
+ {NEARMV, {LAST2_FRAME, ALTREF_FRAME}},
+ {NEWMV, {LAST2_FRAME, ALTREF_FRAME}},
+ {NEARMV, {LAST3_FRAME, ALTREF_FRAME}},
+ {NEWMV, {LAST3_FRAME, ALTREF_FRAME}},
+ {NEARMV, {LAST4_FRAME, ALTREF_FRAME}},
+ {NEWMV, {LAST4_FRAME, ALTREF_FRAME}},
+#endif // CONFIG_EXT_REFS
{NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
{NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
{ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
+#if CONFIG_EXT_REFS
+ {ZEROMV, {LAST3_FRAME, ALTREF_FRAME}},
+ {ZEROMV, {LAST2_FRAME, ALTREF_FRAME}},
+ {ZEROMV, {LAST4_FRAME, ALTREF_FRAME}},
+#endif // CONFIG_EXT_REFS
{ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+#endif // CONFIG_EXT_INTER
{H_PRED, {INTRA_FRAME, NONE}},
{V_PRED, {INTRA_FRAME, NONE}},
@@ -125,13 +266,33 @@
static const REF_DEFINITION vp10_ref_order[MAX_REFS] = {
{{LAST_FRAME, NONE}},
+#if CONFIG_EXT_REFS
+ {{LAST2_FRAME, NONE}},
+ {{LAST3_FRAME, NONE}},
+ {{LAST4_FRAME, NONE}},
+#endif // CONFIG_EXT_REFS
{{GOLDEN_FRAME, NONE}},
{{ALTREF_FRAME, NONE}},
{{LAST_FRAME, ALTREF_FRAME}},
+#if CONFIG_EXT_REFS
+ {{LAST2_FRAME, ALTREF_FRAME}},
+ {{LAST3_FRAME, ALTREF_FRAME}},
+ {{LAST4_FRAME, ALTREF_FRAME}},
+#endif // CONFIG_EXT_REFS
{{GOLDEN_FRAME, ALTREF_FRAME}},
{{INTRA_FRAME, NONE}},
};
+static INLINE int write_uniform_cost(int n, int v) {
+ int l = get_unsigned_bits(n), m = (1 << l) - n;
+ if (l == 0)
+ return 0;
+ if (v < m)
+ return (l - 1) * vp10_cost_bit(128, 0);
+ else
+ return l * vp10_cost_bit(128, 0);
+}
+
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int m, int n, int min_plane, int max_plane) {
int i;
@@ -336,7 +497,11 @@
};
static int cost_coeffs(MACROBLOCK *x,
int plane, int block,
+#if CONFIG_VAR_TX
+ int coeff_ctx,
+#else
ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
+#endif
TX_SIZE tx_size,
const int16_t *scan, const int16_t *nb,
int use_fast_coef_costing) {
@@ -351,7 +516,11 @@
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
x->token_costs[tx_size][type][is_inter_block(mbmi)];
uint8_t token_cache[32 * 32];
+#if CONFIG_VAR_TX
+ int pt = coeff_ctx;
+#else
int pt = combine_entropy_contexts(*A, *L);
+#endif
int c, cost;
#if CONFIG_VP9_HIGHBITDEPTH
const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
@@ -359,9 +528,11 @@
const int16_t *cat6_high_cost = vp10_get_high_cost_table(8);
#endif
+#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
// Check for consistency of tx_size with mode info
assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
: get_uv_tx_size(mbmi, pd) == tx_size);
+#endif // !CONFIG_VAR_TX && !CONFIG_SUPERTX
if (eob == 0) {
// single eob token
@@ -415,8 +586,10 @@
}
}
+#if !CONFIG_VAR_TX
// is eob first coefficient;
*A = *L = (c > 0);
+#endif
return cost;
}
@@ -444,10 +617,23 @@
static int rate_block(int plane, int block, int blk_row, int blk_col,
TX_SIZE tx_size, struct rdcost_block_args* args) {
- return cost_coeffs(args->x, plane, block, args->t_above + blk_col,
- args->t_left + blk_row, tx_size,
- args->so->scan, args->so->neighbors,
+#if CONFIG_VAR_TX
+ int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col),
+ *(args->t_left + blk_row));
+ int coeff_cost = cost_coeffs(args->x, plane, block, coeff_ctx,
+ tx_size, args->so->scan, args->so->neighbors,
+ args->use_fast_coef_costing);
+ const struct macroblock_plane *p = &args->x->plane[plane];
+ *(args->t_above + blk_col) = !(p->eobs[block] == 0);
+ *(args->t_left + blk_row) = !(p->eobs[block] == 0);
+ return coeff_cost;
+#else
+ return cost_coeffs(args->x, plane, block,
+ args->t_above + blk_col,
+ args->t_left + blk_row,
+ tx_size, args->so->scan, args->so->neighbors,
args->use_fast_coef_costing);
+#endif
}
static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
@@ -467,23 +653,47 @@
if (!is_inter_block(mbmi)) {
struct encode_b_args arg = {x, NULL, &mbmi->skip};
+#if CONFIG_VAR_TX
+ uint8_t *dst, *src;
+ int src_stride = x->plane[plane].src.stride;
+ int dst_stride = xd->plane[plane].dst.stride;
+ unsigned int tmp_sse;
+ PREDICTION_MODE mode = (plane == 0) ?
+ get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
+
+ src = &x->plane[plane].src.buf[4 * (blk_row * src_stride + blk_col)];
+ dst = &xd->plane[plane].dst.buf[4 * (blk_row * dst_stride + blk_col)];
+ vp10_predict_intra_block(xd, b_width_log2_lookup[plane_bsize],
+ b_height_log2_lookup[plane_bsize],
+ tx_size, mode, dst, dst_stride,
+ dst, dst_stride, blk_col, blk_row, plane);
+ args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
+ dst, dst_stride, &tmp_sse);
+ sse = (int64_t)tmp_sse * 16;
+ vp10_encode_block_intra(plane, block, blk_row, blk_col,
+ plane_bsize, tx_size, &arg);
+ args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
+ dst, dst_stride, &tmp_sse);
+ dist = (int64_t)tmp_sse * 16;
+#else
vp10_encode_block_intra(plane, block, blk_row, blk_col,
plane_bsize, tx_size, &arg);
dist_block(x, plane, block, tx_size, &dist, &sse);
+#endif
} else if (max_txsize_lookup[plane_bsize] == tx_size) {
if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
SKIP_TXFM_NONE) {
// full forward transform and quantization
vp10_xform_quant(x, plane, block, blk_row, blk_col,
- plane_bsize, tx_size);
+ plane_bsize, tx_size, VP10_XFORM_QUANT_B);
dist_block(x, plane, block, tx_size, &dist, &sse);
} else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
SKIP_TXFM_AC_ONLY) {
// compute DC coefficient
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
- vp10_xform_quant_dc(x, plane, block, blk_row, blk_col,
- plane_bsize, tx_size);
+ vp10_xform_quant(x, plane, block, blk_row, blk_col,
+ plane_bsize, tx_size, VP10_XFORM_QUANT_DC);
sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
dist = sse;
if (x->plane[plane].eobs[block]) {
@@ -507,7 +717,8 @@
}
} else {
// full forward transform and quantization
- vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_B);
dist_block(x, plane, block, tx_size, &dist, &sse);
}
@@ -541,6 +752,9 @@
}
static void txfm_rd_in_plane(MACROBLOCK *x,
+#if CONFIG_VAR_TX
+ const VP10_COMP *cpi,
+#endif
int *rate, int64_t *distortion,
int *skippable, int64_t *sse,
int64_t ref_best_rd, int plane,
@@ -552,6 +766,9 @@
struct rdcost_block_args args;
vp10_zero(args);
args.x = x;
+#if CONFIG_VAR_TX
+ args.cpi = cpi;
+#endif
args.best_rd = ref_best_rd;
args.use_fast_coef_costing = use_fast_coef_casting;
args.skippable = 1;
@@ -561,11 +778,11 @@
vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
- tx_type = get_tx_type(pd->plane_type, xd, 0);
- args.so = get_scan(tx_size, tx_type);
+ tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
+ args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
vp10_foreach_transformed_block_in_plane(xd, bsize, plane,
- block_rd_txfm, &args);
+ block_rd_txfm, &args);
if (args.exit_early) {
*rate = INT_MAX;
*distortion = INT64_MAX;
@@ -579,6 +796,48 @@
}
}
+#if CONFIG_SUPERTX
+void vp10_txfm_rd_in_plane_supertx(MACROBLOCK *x,
+ int *rate, int64_t *distortion,
+ int *skippable, int64_t *sse,
+ int64_t ref_best_rd, int plane,
+ BLOCK_SIZE bsize, TX_SIZE tx_size,
+ int use_fast_coef_casting) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ struct rdcost_block_args args;
+ TX_TYPE tx_type;
+
+ vp10_zero(args);
+ args.x = x;
+ args.best_rd = ref_best_rd;
+ args.use_fast_coef_costing = use_fast_coef_casting;
+
+ if (plane == 0)
+ xd->mi[0]->mbmi.tx_size = tx_size;
+
+ vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
+
+ tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
+ args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+
+ block_rd_txfm(plane, 0, 0, 0, get_plane_block_size(bsize, pd),
+ tx_size, &args);
+
+ if (args.exit_early) {
+ *rate = INT_MAX;
+ *distortion = INT64_MAX;
+ *sse = INT64_MAX;
+ *skippable = 0;
+ } else {
+ *distortion = args.this_dist;
+ *rate = args.this_rate;
+ *sse = args.this_sse;
+ *skippable = !x->plane[plane].eobs[0];
+ }
+}
+#endif // CONFIG_SUPERTX
+
static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
int *rate, int64_t *distortion,
int *skip, int64_t *sse,
@@ -589,21 +848,91 @@
const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-
TX_TYPE tx_type, best_tx_type = DCT_DCT;
int r, s;
int64_t d, psse, this_rd, best_rd = INT64_MAX;
vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
int s0 = vp10_cost_bit(skip_prob, 0);
int s1 = vp10_cost_bit(skip_prob, 1);
+#if CONFIG_EXT_TX
+ int ext_tx_set;
+#endif // CONFIG_EXT_TX
const int is_inter = is_inter_block(mbmi);
mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
+
+#if CONFIG_EXT_TX
+ ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter);
+
+ if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
+ !xd->lossless[mbmi->segment_id]) {
+ for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+ if (is_inter) {
+ if (!ext_tx_used_inter[ext_tx_set][tx_type])
+ continue;
+ } else {
+ if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
+ if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
+ continue;
+ }
+ if (!ext_tx_used_intra[ext_tx_set][tx_type])
+ continue;
+ }
+
+ mbmi->tx_type = tx_type;
+ if (ext_tx_set == 1 &&
+ mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ continue;
+ }
+
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &r, &d, &s,
+ &psse, ref_best_rd, 0, bs, mbmi->tx_size,
+ cpi->sf.use_fast_coef_costing);
+
+ if (r == INT_MAX)
+ continue;
+ if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1) {
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ r += cpi->inter_tx_type_costs[ext_tx_set]
+ [mbmi->tx_size][mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
+ r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+ }
+
+ if (s)
+ this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
+ else
+ this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d);
+ if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] && !s)
+ this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse));
+
+ if (this_rd < ((best_tx_type == DCT_DCT) ? ext_tx_th : 1) * best_rd) {
+ best_rd = this_rd;
+ best_tx_type = mbmi->tx_type;
+ }
+ }
+ }
+
+#else // CONFIG_EXT_TX
if (mbmi->tx_size < TX_32X32 &&
!xd->lossless[mbmi->segment_id]) {
for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
mbmi->tx_type = tx_type;
- txfm_rd_in_plane(x, &r, &d, &s,
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &r, &d, &s,
&psse, ref_best_rd, 0, bs, mbmi->tx_size,
cpi->sf.use_fast_coef_costing);
if (r == INT_MAX)
@@ -627,10 +956,33 @@
}
}
}
+#endif // CONFIG_EXT_TX
mbmi->tx_type = best_tx_type;
- txfm_rd_in_plane(x, rate, distortion, skip,
+
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ rate, distortion, skip,
sse, ref_best_rd, 0, bs,
mbmi->tx_size, cpi->sf.use_fast_coef_costing);
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
+ !xd->lossless[mbmi->segment_id] && *rate != INT_MAX) {
+ int ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter);
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ *rate += cpi->inter_tx_type_costs[ext_tx_set][mbmi->tx_size]
+ [mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
+ *rate +=
+ cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+ }
+#else
if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id] &&
*rate != INT_MAX) {
if (is_inter)
@@ -640,6 +992,7 @@
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
}
+#endif // CONFIG_EXT_TX
}
static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
@@ -651,12 +1004,32 @@
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
mbmi->tx_size = TX_4X4;
+ mbmi->tx_type = DCT_DCT;
- txfm_rd_in_plane(x, rate, distortion, skip,
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ rate, distortion, skip,
sse, ref_best_rd, 0, bs,
mbmi->tx_size, cpi->sf.use_fast_coef_costing);
}
+static INLINE int vp10_cost_tx_size(TX_SIZE tx_size, TX_SIZE max_tx_size,
+ const vpx_prob *tx_probs) {
+ int m;
+ int r_tx_size = 0;
+
+ for (m = 0; m <= tx_size - (tx_size == max_tx_size); ++m) {
+ if (m == tx_size)
+ r_tx_size += vp10_cost_zero(tx_probs[m]);
+ else
+ r_tx_size += vp10_cost_one(tx_probs[m]);
+ }
+
+ return r_tx_size;
+}
+
static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
int *rate,
int64_t *distortion,
@@ -672,7 +1045,7 @@
int r, s;
int64_t d, sse;
int64_t rd = INT64_MAX;
- int n, m;
+ int n;
int s0, s1;
int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
TX_SIZE best_tx = max_tx_size;
@@ -680,6 +1053,9 @@
const int tx_select = cm->tx_mode == TX_MODE_SELECT;
TX_TYPE tx_type, best_tx_type = DCT_DCT;
const int is_inter = is_inter_block(mbmi);
+#if CONFIG_EXT_TX
+ int ext_tx_set;
+#endif // CONFIG_EXT_TX
const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
assert(skip_prob > 0);
@@ -704,19 +1080,58 @@
for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
last_rd = INT64_MAX;
for (n = start_tx; n >= end_tx; --n) {
- int r_tx_size = 0;
- for (m = 0; m <= n - (n == (int) max_tx_size); ++m) {
- if (m == n)
- r_tx_size += vp10_cost_zero(tx_probs[m]);
- else
- r_tx_size += vp10_cost_one(tx_probs[m]);
- }
+ const int r_tx_size = vp10_cost_tx_size(n, max_tx_size, tx_probs);
+#if CONFIG_EXT_TX
+ ext_tx_set = get_ext_tx_set(n, bs, is_inter);
+ if (is_inter) {
+ if (!ext_tx_used_inter[ext_tx_set][tx_type])
+ continue;
+ } else {
+ if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
+ if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
+ continue;
+ }
+ if (!ext_tx_used_intra[ext_tx_set][tx_type])
+ continue;
+ }
+ mbmi->tx_type = tx_type;
+ if (ext_tx_set == 1 &&
+ mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ break;
+ }
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &r, &d, &s,
+ &sse, ref_best_rd, 0, bs, n,
+ cpi->sf.use_fast_coef_costing);
+ if (get_ext_tx_types(n, bs, is_inter) > 1 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+ r != INT_MAX) {
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ r += cpi->inter_tx_type_costs[ext_tx_set]
+ [mbmi->tx_size][mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
+ r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+ }
+#else // CONFIG_EXT_TX
if (n >= TX_32X32 && tx_type != DCT_DCT) {
continue;
}
mbmi->tx_type = tx_type;
- txfm_rd_in_plane(x, &r, &d, &s,
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &r, &d, &s,
&sse, ref_best_rd, 0, bs, n,
cpi->sf.use_fast_coef_costing);
if (n < TX_32X32 &&
@@ -729,6 +1144,7 @@
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
}
+#endif // CONFIG_EXT_TX
if (r == INT_MAX)
continue;
@@ -775,7 +1191,11 @@
mbmi->tx_type = best_tx_type;
if (mbmi->tx_size >= TX_32X32)
assert(mbmi->tx_type == DCT_DCT);
- txfm_rd_in_plane(x, &r, &d, &s,
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &r, &d, &s,
&sse, ref_best_rd, 0, bs, best_tx,
cpi->sf.use_fast_coef_costing);
}
@@ -790,11 +1210,10 @@
assert(bs == xd->mi[0]->mbmi.sb_type);
- if (CONFIG_MISC_FIXES && xd->lossless[0]) {
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
choose_smallest_tx_size(cpi, x, rate, distortion, skip, ret_sse,
ref_best_rd, bs);
- } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
- xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
bs);
} else {
@@ -824,6 +1243,163 @@
return 0;
}
+void rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
+ int palette_ctx, int dc_mode_cost,
+ PALETTE_MODE_INFO *palette_mode_info,
+ uint8_t *best_palette_color_map,
+ TX_SIZE *best_tx, PREDICTION_MODE *mode_selected,
+ int64_t *best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mic = xd->mi[0];
+ int rows = 4 * num_4x4_blocks_high_lookup[bsize];
+ int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
+ int this_rate, this_rate_tokenonly, s;
+ int64_t this_distortion, this_rd;
+ int colors, n;
+ int src_stride = x->plane[0].src.stride;
+ uint8_t *src = x->plane[0].src.buf;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->common.use_highbitdepth)
+ colors = vp10_count_colors_highbd(src, src_stride, rows, cols,
+ cpi->common.bit_depth);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ colors = vp10_count_colors(src, src_stride, rows, cols);
+ palette_mode_info->palette_size[0] = 0;
+
+ if (colors > 1 && colors <= 64 && cpi->common.allow_screen_content_tools) {
+ int r, c, i, j, k;
+ int max_itr = 50;
+ int color_ctx, color_idx = 0;
+ int color_order[PALETTE_MAX_SIZE];
+ double *data = x->palette_buffer->kmeans_data_buf;
+ uint8_t *indices = x->palette_buffer->kmeans_indices_buf;
+ uint8_t *pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
+ double centroids[PALETTE_MAX_SIZE];
+ uint8_t *color_map;
+ double lb, ub, val;
+ PALETTE_MODE_INFO *pmi = &mic->mbmi.palette_mode_info;
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+ if (cpi->common.use_highbitdepth)
+ lb = ub = src16[0];
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ lb = ub = src[0];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->common.use_highbitdepth) {
+ for (r = 0; r < rows; ++r) {
+ for (c = 0; c < cols; ++c) {
+ val = src16[r * src_stride + c];
+ data[r * cols + c] = val;
+ if (val < lb)
+ lb = val;
+ else if (val > ub)
+ ub = val;
+ }
+ }
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ for (r = 0; r < rows; ++r) {
+ for (c = 0; c < cols; ++c) {
+ val = src[r * src_stride + c];
+ data[r * cols + c] = val;
+ if (val < lb)
+ lb = val;
+ else if (val > ub)
+ ub = val;
+ }
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ mic->mbmi.mode = DC_PRED;
+
+ for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors;
+ n >= 2; --n) {
+ for (i = 0; i < n; ++i)
+ centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
+ vp10_k_means(data, centroids, indices, pre_indices, rows * cols,
+ n, 1, max_itr);
+ vp10_insertion_sort(centroids, n);
+ for (i = 0; i < n; ++i)
+ centroids[i] = round(centroids[i]);
+ // remove duplicates
+ i = 1;
+ k = n;
+ while (i < k) {
+ if (centroids[i] == centroids[i - 1]) {
+ j = i;
+ while (j < k - 1) {
+ centroids[j] = centroids[j + 1];
+ ++j;
+ }
+ --k;
+ } else {
+ ++i;
+ }
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->common.use_highbitdepth)
+ for (i = 0; i < k; ++i)
+ pmi->palette_colors[i] = clip_pixel_highbd((int)round(centroids[i]),
+ cpi->common.bit_depth);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ for (i = 0; i < k; ++i)
+ pmi->palette_colors[i] = clip_pixel((int)round(centroids[i]));
+ pmi->palette_size[0] = k;
+
+ vp10_calc_indices(data, centroids, indices, rows * cols, k, 1);
+ for (r = 0; r < rows; ++r)
+ for (c = 0; c < cols; ++c)
+ xd->plane[0].color_index_map[r * cols + c] = indices[r * cols + c];
+
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, *best_rd);
+ if (this_rate_tokenonly == INT_MAX)
+ continue;
+
+ this_rate = this_rate_tokenonly + dc_mode_cost +
+ cpi->common.bit_depth * k * vp10_cost_bit(128, 0) +
+ cpi->palette_y_size_cost[bsize - BLOCK_8X8][k - 2];
+ this_rate +=
+ vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
+ [palette_ctx], 1);
+ color_map = xd->plane[0].color_index_map;
+ this_rate += write_uniform_cost(k, xd->plane[0].color_index_map[0]);
+ for (i = 0; i < rows; ++i) {
+ for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
+ color_ctx = vp10_get_palette_color_context(color_map, cols, i, j,
+ k, color_order);
+ for (r = 0; r < k; ++r)
+ if (color_map[i * cols + j] == color_order[r]) {
+ color_idx = r;
+ break;
+ }
+ assert(color_idx < k);
+ this_rate +=
+ cpi->palette_y_color_cost[k - 2][color_ctx][color_idx];
+ }
+ }
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ *palette_mode_info = mic->mbmi.palette_mode_info;
+ memcpy(best_palette_color_map, xd->plane[0].color_index_map,
+ rows * cols * sizeof(xd->plane[0].color_index_map[0]));
+ *mode_selected = DC_PRED;
+ *best_tx = mic->mbmi.tx_size;
+ }
+ }
+ }
+}
+
static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
int row, int col,
PREDICTION_MODE *best_mode,
@@ -854,6 +1430,7 @@
memcpy(ta, a, sizeof(ta));
memcpy(tl, l, sizeof(tl));
xd->mi[0]->mbmi.tx_size = TX_4X4;
+ xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -870,7 +1447,7 @@
// one of the neighboring directional modes
if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
if (conditional_skipintra(mode, *best_mode))
- continue;
+ continue;
}
memcpy(tempa, ta, sizeof(ta));
@@ -882,8 +1459,8 @@
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
int16_t *const src_diff = vp10_raster_block_offset_int16(BLOCK_8X8,
- block,
- p->src_diff);
+ block,
+ p->src_diff);
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
xd->mi[0]->bmi[block].as_mode = mode;
vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
@@ -892,13 +1469,25 @@
vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
dst, dst_stride, xd->bd);
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
- const scan_order *so = get_scan(TX_4X4, tx_type);
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+#if CONFIG_VAR_TX
+ const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
+ *(templ + idy));
+#endif // CONFIG_VAR_TX
vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
- ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
+#if CONFIG_VAR_TX
+ ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
+ so->neighbors, cpi->sf.use_fast_coef_costing);
+ *(tempa + idx) = !(p->eobs[block] == 0);
+ *(templ + idy) = !(p->eobs[block] == 0);
+#else
+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
+ TX_4X4,
so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
+#endif // CONFIG_VAR_TX
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next_highbd;
vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
@@ -906,13 +1495,24 @@
xd->bd, DCT_DCT, 1);
} else {
int64_t unused;
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
- const scan_order *so = get_scan(TX_4X4, tx_type);
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+#if CONFIG_VAR_TX
+ const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
+ *(templ + idy));
+#endif // CONFIG_VAR_TX
vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
- ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
- so->scan, so->neighbors,
+#if CONFIG_VAR_TX
+ ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
+ so->neighbors, cpi->sf.use_fast_coef_costing);
+ *(tempa + idx) = !(p->eobs[block] == 0);
+ *(templ + idy) = !(p->eobs[block] == 0);
+#else
+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
+ TX_4X4, so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
+#endif // CONFIG_VAR_TX
distortion += vp10_highbd_block_error(
coeff, BLOCK_OFFSET(pd->dqcoeff, block),
16, &unused, xd->bd) >> 2;
@@ -942,9 +1542,10 @@
num_4x4_blocks_wide * 4 * sizeof(uint16_t));
}
}
- next_highbd:
+next_highbd:
{}
}
+
if (best_rd >= rd_thresh)
return best_rd;
@@ -971,7 +1572,7 @@
// one of the neighboring directional modes
if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
if (conditional_skipintra(mode, *best_mode))
- continue;
+ continue;
}
memcpy(tempa, ta, sizeof(ta));
@@ -991,26 +1592,49 @@
vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
- const scan_order *so = get_scan(TX_4X4, tx_type);
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+#if CONFIG_VAR_TX
+ const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
+ *(templ + idy));
+#endif
vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
- ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
+#if CONFIG_VAR_TX
+ ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
+ so->neighbors, cpi->sf.use_fast_coef_costing);
+ *(tempa + idx) = !(p->eobs[block] == 0);
+ *(templ + idy) = !(p->eobs[block] == 0);
+#else
+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
+ TX_4X4,
so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
+#endif
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next;
vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
dst, dst_stride, p->eobs[block], DCT_DCT, 1);
} else {
int64_t unused;
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
- const scan_order *so = get_scan(TX_4X4, tx_type);
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+#if CONFIG_VAR_TX
+ const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
+ *(templ + idy));
+#endif
vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
- ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
- so->scan, so->neighbors,
- cpi->sf.use_fast_coef_costing);
+#if CONFIG_VAR_TX
+ ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
+ so->neighbors, cpi->sf.use_fast_coef_costing);
+ *(tempa + idx) = !(p->eobs[block] == 0);
+ *(templ + idy) = !(p->eobs[block] == 0);
+#else
+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
+ TX_4X4, so->scan, so->neighbors,
+ cpi->sf.use_fast_coef_costing);
+#endif
distortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
16, &unused) >> 2;
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
@@ -1068,17 +1692,21 @@
int tot_rate_y = 0;
int64_t total_rd = 0;
ENTROPY_CONTEXT t_above[4], t_left[4];
- const int *bmode_costs = cpi->mbmode_cost;
+ const int *bmode_costs = cpi->mbmode_cost[0];
memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
+#if CONFIG_EXT_INTRA
+ mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mic->mbmi.intra_filter = INTRA_FILTER_LINEAR;
+#endif // CONFIG_EXT_INTRA
+
// TODO(any): Add search of the tx_type to improve rd performance at the
// expense of speed.
mic->mbmi.tx_type = DCT_DCT;
+ mic->mbmi.tx_size = TX_4X4;
- // Later we can add search of the tx_type to improve results.
- // For now just set it to DCT_DCT
// Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
@@ -1114,15 +1742,369 @@
return INT64_MAX;
}
}
+ mic->mbmi.mode = mic->bmi[3].as_mode;
+
+ // Add in the cost of the transform type
+ if (!xd->lossless[mic->mbmi.segment_id]) {
+ int rate_tx_type = 0;
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(TX_4X4, bsize, 0) > 1) {
+ const int eset = get_ext_tx_set(TX_4X4, bsize, 0);
+ rate_tx_type =
+ cpi->intra_tx_type_costs[eset][TX_4X4]
+ [mic->mbmi.mode][mic->mbmi.tx_type];
+ }
+#else
+ rate_tx_type =
+ cpi->intra_tx_type_costs[TX_4X4]
+ [intra_mode_to_tx_type_context[mic->mbmi.mode]]
+ [mic->mbmi.tx_type];
+#endif
+ assert(mic->mbmi.tx_size == TX_4X4);
+ cost += rate_tx_type;
+ tot_rate_y += rate_tx_type;
+ }
*rate = cost;
*rate_y = tot_rate_y;
*distortion = total_distortion;
- mic->mbmi.mode = mic->bmi[3].as_mode;
return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
}
+#if CONFIG_EXT_INTRA
+// Return 1 if an ext intra mode is selected; return 0 otherwise.
+static int rd_pick_ext_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int mode_cost,
+ int64_t *best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mic = xd->mi[0];
+ MB_MODE_INFO *mbmi = &mic->mbmi;
+ int this_rate, this_rate_tokenonly, s;
+ int ext_intra_selected_flag = 0;
+ int64_t this_distortion, this_rd;
+ EXT_INTRA_MODE mode;
+ TX_SIZE best_tx_size = TX_4X4;
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+ TX_TYPE best_tx_type;
+
+ vp10_zero(ext_intra_mode_info);
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 1;
+ mbmi->mode = DC_PRED;
+
+ for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[0] = mode;
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, *best_rd);
+ if (this_rate_tokenonly == INT_MAX)
+ continue;
+
+ this_rate = this_rate_tokenonly +
+ vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
+ write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ best_tx_size = mic->mbmi.tx_size;
+ ext_intra_mode_info = mbmi->ext_intra_mode_info;
+ best_tx_type = mic->mbmi.tx_type;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ ext_intra_selected_flag = 1;
+ }
+ }
+
+ if (ext_intra_selected_flag) {
+ mbmi->mode = DC_PRED;
+ mbmi->tx_size = best_tx_size;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] =
+ ext_intra_mode_info.use_ext_intra_mode[0];
+ mbmi->ext_intra_mode_info.ext_intra_mode[0] =
+ ext_intra_mode_info.ext_intra_mode[0];
+ mbmi->tx_type = best_tx_type;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static void pick_intra_angle_routine_sby(VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ int *best_angle_delta,
+ TX_SIZE *best_tx_size,
+ TX_TYPE *best_tx_type,
+ INTRA_FILTER *best_filter,
+ BLOCK_SIZE bsize, int rate_overhead,
+ int64_t *best_rd) {
+ int this_rate, this_rate_tokenonly, s;
+ int64_t this_distortion, this_rd;
+ MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, *best_rd);
+ if (this_rate_tokenonly == INT_MAX)
+ return;
+
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ *best_angle_delta = mbmi->angle_delta[0];
+ *best_tx_size = mbmi->tx_size;
+ *best_filter = mbmi->intra_filter;
+ *best_tx_type = mbmi->tx_type;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+}
+
+static int64_t rd_pick_intra_angle_sby(VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int rate_overhead,
+ int64_t best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mic = xd->mi[0];
+ MB_MODE_INFO *mbmi = &mic->mbmi;
+ int this_rate, this_rate_tokenonly, s;
+ int angle_delta, best_angle_delta = 0, p_angle;
+ const int intra_filter_ctx = vp10_get_pred_context_intra_interp(xd);
+ INTRA_FILTER filter, best_filter = INTRA_FILTER_LINEAR;
+ const double rd_adjust = 1.2;
+ int64_t this_distortion, this_rd, sse_dummy;
+ TX_SIZE best_tx_size = mic->mbmi.tx_size;
+ TX_TYPE best_tx_type = mbmi->tx_type;
+
+ if (ANGLE_FAST_SEARCH) {
+ int deltas_level1[3] = {0, -2, 2};
+ int deltas_level2[3][2] = {
+ {-1, 1}, {-3, -1}, {1, 3},
+ };
+ const int level1 = 3, level2 = 2;
+ int i, j, best_i = -1;
+
+ for (i = 0; i < level1; ++i) {
+ mic->mbmi.angle_delta[0] = deltas_level1[i];
+ p_angle = mode_to_angle_map[mbmi->mode] +
+ mbmi->angle_delta[0] * ANGLE_STEP;
+ for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
+ if ((FILTER_FAST_SEARCH || !pick_intra_filter(p_angle)) &&
+ filter != INTRA_FILTER_LINEAR)
+ continue;
+ mic->mbmi.intra_filter = filter;
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize,
+ (i == 0 && filter == INTRA_FILTER_LINEAR &&
+ best_rd < INT64_MAX) ? best_rd * rd_adjust : best_rd);
+ if (this_rate_tokenonly == INT_MAX) {
+ if (i == 0 && filter == INTRA_FILTER_LINEAR)
+ return best_rd;
+ else
+ continue;
+ }
+ this_rate = this_rate_tokenonly + rate_overhead +
+ cpi->intra_filter_cost[intra_filter_ctx][filter];
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (i == 0 && filter == INTRA_FILTER_LINEAR &&
+ best_rd < INT64_MAX && this_rd > best_rd * rd_adjust)
+ return best_rd;
+ if (this_rd < best_rd) {
+ best_i = i;
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[0];
+ best_tx_size = mbmi->tx_size;
+ best_filter = mbmi->intra_filter;
+ best_tx_type = mbmi->tx_type;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+ }
+
+ if (best_i >= 0) {
+ for (j = 0; j < level2; ++j) {
+ mic->mbmi.angle_delta[0] = deltas_level2[best_i][j];
+ p_angle = mode_to_angle_map[mbmi->mode] +
+ mbmi->angle_delta[0] * ANGLE_STEP;
+ for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
+ mic->mbmi.intra_filter = filter;
+ if ((FILTER_FAST_SEARCH || !pick_intra_filter(p_angle)) &&
+ filter != INTRA_FILTER_LINEAR)
+ continue;
+ pick_intra_angle_routine_sby(cpi, x, rate, rate_tokenonly,
+ distortion, skippable,
+ &best_angle_delta, &best_tx_size,
+ &best_tx_type, &best_filter, bsize,
+ rate_overhead +
+ cpi->intra_filter_cost
+ [intra_filter_ctx][filter],
+ &best_rd);
+ }
+ }
+ }
+ } else {
+ for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS;
+ ++angle_delta) {
+ mbmi->angle_delta[0] = angle_delta;
+ p_angle = mode_to_angle_map[mbmi->mode] +
+ mbmi->angle_delta[0] * ANGLE_STEP;
+ for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
+ mic->mbmi.intra_filter = filter;
+ if ((FILTER_FAST_SEARCH || !pick_intra_filter(p_angle)) &&
+ filter != INTRA_FILTER_LINEAR)
+ continue;
+ pick_intra_angle_routine_sby(cpi, x, rate, rate_tokenonly,
+ distortion, skippable,
+ &best_angle_delta, &best_tx_size,
+ &best_tx_type, &best_filter, bsize,
+ rate_overhead +
+ cpi->intra_filter_cost
+ [intra_filter_ctx][filter],
+ &best_rd);
+ }
+ }
+ }
+
+ if (FILTER_FAST_SEARCH && *rate_tokenonly < INT_MAX) {
+ mbmi->angle_delta[0] = best_angle_delta;
+ p_angle = mode_to_angle_map[mbmi->mode] +
+ mbmi->angle_delta[0] * ANGLE_STEP;
+ if (pick_intra_filter(p_angle)) {
+ for (filter = INTRA_FILTER_LINEAR + 1; filter < INTRA_FILTERS; ++filter) {
+ mic->mbmi.intra_filter = filter;
+ pick_intra_angle_routine_sby(cpi, x, rate, rate_tokenonly,
+ distortion, skippable,
+ &best_angle_delta, &best_tx_size,
+ &best_tx_type, &best_filter, bsize,
+ rate_overhead + cpi->intra_filter_cost
+ [intra_filter_ctx][filter], &best_rd);
+ }
+ }
+ }
+
+ mbmi->tx_size = best_tx_size;
+ mbmi->angle_delta[0] = best_angle_delta;
+ mic->mbmi.intra_filter = best_filter;
+ mbmi->tx_type = best_tx_type;
+
+ if (*rate_tokenonly < INT_MAX) {
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &this_rate_tokenonly, &this_distortion, &s,
+ &sse_dummy, INT64_MAX, 0, bsize, mbmi->tx_size,
+ cpi->sf.use_fast_coef_costing);
+ }
+
+ return best_rd;
+}
+
+static inline int get_angle_index(double angle) {
+ const double step = 22.5, base = 45;
+ return (int)round((angle - base) / step);
+}
+
+static void angle_estimation(const uint8_t *src, int src_stride,
+ int rows, int cols, double *hist) {
+ int r, c, i, index;
+ const double pi = 3.1415;
+ double angle, dx, dy;
+ double temp, divisor = 0;
+
+ for (i = 0; i < DIRECTIONAL_MODES; ++i)
+ hist[i] = 0;
+
+ src += src_stride;
+ for (r = 1; r < rows; ++r) {
+ for (c = 1; c < cols; ++c) {
+ dx = src[c] - src[c - 1];
+ dy = src[c] - src[c - src_stride];
+ temp = dx * dx + dy * dy;
+ if (dy == 0)
+ angle = 90;
+ else
+ angle = (atan((double)dx / (double)dy)) * 180 / pi;
+ assert(angle >= -90 && angle <= 90);
+ index = get_angle_index(angle + 180);
+ if (index < DIRECTIONAL_MODES) {
+ hist[index] += temp;
+ divisor += temp;
+ }
+ if (angle > 0) {
+ index = get_angle_index(angle);
+ if (index >= 0) {
+ hist[index] += temp;
+ divisor += temp;
+ }
+ }
+ }
+ src += src_stride;
+ }
+
+ if (divisor < 1)
+ divisor = 1;
+ for (i = 0; i < DIRECTIONAL_MODES; ++i)
+ hist[i] /= divisor;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
+ int rows, int cols, double *hist) {
+ int r, c, i, index;
+ const double pi = 3.1415;
+ double angle, dx, dy;
+ double temp, divisor = 0;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+
+ for (i = 0; i < DIRECTIONAL_MODES; ++i)
+ hist[i] = 0;
+
+ src += src_stride;
+ for (r = 1; r < rows; ++r) {
+ for (c = 1; c < cols; ++c) {
+ dx = src[c] - src[c - 1];
+ dy = src[c] - src[c - src_stride];
+ temp = dx * dx + dy * dy;
+ if (dy == 0)
+ angle = 90;
+ else
+ angle = (atan((double)dx / (double)dy)) * 180 / pi;
+ assert(angle >= -90 && angle <= 90);
+ index = get_angle_index(angle + 180);
+ if (index < DIRECTIONAL_MODES) {
+ hist[index] += temp;
+ divisor += temp;
+ }
+ if (angle > 0) {
+ index = get_angle_index(angle);
+ if (index >= 0) {
+ hist[index] += temp;
+ divisor += temp;
+ }
+ }
+ }
+ src += src_stride;
+ }
+
+ if (divisor < 1)
+ divisor = 1;
+ for (i = 0; i < DIRECTIONAL_MODES; ++i)
+ hist[i] /= divisor;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_INTRA
+
// This function is used only for intra_only frames
static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
@@ -1136,33 +2118,139 @@
int this_rate, this_rate_tokenonly, s;
int64_t this_distortion, this_rd;
TX_SIZE best_tx = TX_4X4;
+#if CONFIG_EXT_INTRA
+ const int intra_filter_ctx = vp10_get_pred_context_intra_interp(xd);
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+ int is_directional_mode, rate_overhead, best_angle_delta = 0;
+ INTRA_FILTER best_filter = INTRA_FILTER_LINEAR;
+ uint8_t directional_mode_skip_mask[INTRA_MODES];
+ const int src_stride = x->plane[0].src.stride;
+ const uint8_t *src = x->plane[0].src.buf;
+ double hist[DIRECTIONAL_MODES];
+#endif // CONFIG_EXT_INTRA
TX_TYPE best_tx_type = DCT_DCT;
int *bmode_costs;
+ PALETTE_MODE_INFO palette_mode_info;
+ uint8_t *best_palette_color_map = cpi->common.allow_screen_content_tools ?
+ x->palette_buffer->best_palette_color_map : NULL;
+ const int rows = 4 * num_4x4_blocks_high_lookup[bsize];
+ const int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
+ int palette_ctx = 0;
const MODE_INFO *above_mi = xd->above_mi;
const MODE_INFO *left_mi = xd->left_mi;
const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, 0);
const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0);
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd,
+ &cpi->common.fc->tx_probs);
bmode_costs = cpi->y_mode_costs[A][L];
+#if CONFIG_EXT_INTRA
+ ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mic->mbmi.angle_delta[0] = 0;
+ memset(directional_mode_skip_mask, 0,
+ sizeof(directional_mode_skip_mask[0]) * INTRA_MODES);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ highbd_angle_estimation(src, src_stride, rows, cols, hist);
+ else
+#endif
+ angle_estimation(src, src_stride, rows, cols, hist);
+
+ for (mode = 0; mode < INTRA_MODES; ++mode) {
+ if (mode != DC_PRED && mode != TM_PRED) {
+ int index = get_angle_index((double)mode_to_angle_map[mode]);
+ double score, weight = 1.0;
+ score = hist[index];
+ if (index > 0) {
+ score += hist[index - 1] * 0.5;
+ weight += 0.5;
+ }
+ if (index < DIRECTIONAL_MODES - 1) {
+ score += hist[index + 1] * 0.5;
+ weight += 0.5;
+ }
+ score /= weight;
+ if (score < ANGLE_SKIP_THRESH)
+ directional_mode_skip_mask[mode] = 1;
+ }
+ }
+#endif // CONFIG_EXT_INTRA
memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
+ palette_mode_info.palette_size[0] = 0;
+ mic->mbmi.palette_mode_info.palette_size[0] = 0;
+ if (above_mi)
+ palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ if (left_mi)
+ palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
/* Y Search for intra prediction mode */
- for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+ for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
mic->mbmi.mode = mode;
-
+#if CONFIG_EXT_INTRA
+ is_directional_mode = (mode != DC_PRED && mode != TM_PRED);
+ if (is_directional_mode && directional_mode_skip_mask[mode])
+ continue;
+ if (is_directional_mode) {
+ rate_overhead = bmode_costs[mode] +
+ write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0);
+ this_rate_tokenonly = INT_MAX;
+ this_rd =
+ rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rate_tokenonly,
+ &this_distortion, &s, bsize, rate_overhead,
+ best_rd);
+ } else {
+ mic->mbmi.angle_delta[0] = 0;
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, best_rd);
+ }
+#endif // CONFIG_EXT_INTRA
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
- &s, NULL, bsize, best_rd);
+ &s, NULL, bsize, best_rd);
if (this_rate_tokenonly == INT_MAX)
continue;
this_rate = this_rate_tokenonly + bmode_costs[mode];
+
+ if (!xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ // super_block_yrd above includes the cost of the tx_size in the
+ // tokenonly rate, but for intra blocks, tx_size is always coded
+ // (prediction granularity), so we account for it in the full rate,
+ // not the tokenonly rate.
+ this_rate_tokenonly -= vp10_cost_tx_size(mic->mbmi.tx_size, max_tx_size,
+ tx_probs);
+ }
+ if (cpi->common.allow_screen_content_tools && mode == DC_PRED)
+ this_rate +=
+ vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
+ [palette_ctx], 0);
+#if CONFIG_EXT_INTRA
+ if (mode == DC_PRED && ALLOW_FILTER_INTRA_MODES)
+ this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 0);
+ if (is_directional_mode) {
+ int p_angle;
+ this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS +
+ mic->mbmi.angle_delta[0]);
+ p_angle = mode_to_angle_map[mic->mbmi.mode] +
+ mic->mbmi.angle_delta[0] * ANGLE_STEP;
+ if (pick_intra_filter(p_angle))
+ this_rate +=
+ cpi->intra_filter_cost[intra_filter_ctx][mic->mbmi.intra_filter];
+ }
+#endif // CONFIG_EXT_INTRA
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
mode_selected = mode;
best_rd = this_rd;
best_tx = mic->mbmi.tx_size;
+#if CONFIG_EXT_INTRA
+ best_angle_delta = mic->mbmi.angle_delta[0];
+ best_filter = mic->mbmi.intra_filter;
+#endif // CONFIG_EXT_INTRA
best_tx_type = mic->mbmi.tx_type;
*rate = this_rate;
*rate_tokenonly = this_rate_tokenonly;
@@ -1171,13 +2259,719 @@
}
}
+ if (cpi->common.allow_screen_content_tools)
+ rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED],
+ &palette_mode_info, best_palette_color_map,
+ &best_tx, &mode_selected, &best_rd);
+
+#if CONFIG_EXT_INTRA
+ if (!palette_mode_info.palette_size[0] > 0 && ALLOW_FILTER_INTRA_MODES) {
+ if (rd_pick_ext_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
+ skippable, bsize, bmode_costs[DC_PRED],
+ &best_rd)) {
+ mode_selected = mic->mbmi.mode;
+ best_tx = mic->mbmi.tx_size;
+ ext_intra_mode_info = mic->mbmi.ext_intra_mode_info;
+ best_tx_type = mic->mbmi.tx_type;
+ }
+ }
+
+ mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] =
+ ext_intra_mode_info.use_ext_intra_mode[0];
+ if (ext_intra_mode_info.use_ext_intra_mode[0]) {
+ mic->mbmi.ext_intra_mode_info.ext_intra_mode[0] =
+ ext_intra_mode_info.ext_intra_mode[0];
+ }
+#endif // CONFIG_EXT_INTRA
+
mic->mbmi.mode = mode_selected;
mic->mbmi.tx_size = best_tx;
+#if CONFIG_EXT_INTRA
+ mic->mbmi.angle_delta[0] = best_angle_delta;
+ mic->mbmi.intra_filter = best_filter;
+#endif // CONFIG_EXT_INTRA
mic->mbmi.tx_type = best_tx_type;
+ mic->mbmi.palette_mode_info.palette_size[0] =
+ palette_mode_info.palette_size[0];
+ if (palette_mode_info.palette_size[0] > 0) {
+ memcpy(mic->mbmi.palette_mode_info.palette_colors,
+ palette_mode_info.palette_colors,
+ PALETTE_MAX_SIZE * sizeof(palette_mode_info.palette_colors[0]));
+ memcpy(xd->plane[0].color_index_map, best_palette_color_map,
+ rows * cols * sizeof(best_palette_color_map[0]));
+ }
return best_rd;
}
+#if CONFIG_VAR_TX
+void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
+ int blk_row, int blk_col, int plane, int block,
+ int plane_bsize, int coeff_ctx,
+ int *rate, int64_t *dist, int64_t *bsse, int *skip) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ unsigned int tmp_sse = 0;
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *const scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+
+ BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
+ int bh = 4 * num_4x4_blocks_wide_lookup[txm_bsize];
+ int src_stride = p->src.stride;
+ uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col];
+ uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, rec_buffer_alloc_16[32 * 32]);
+ uint8_t *rec_buffer;
+#else
+ DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]);
+#endif
+
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_B);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer_alloc_16);
+ vpx_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, 32,
+ NULL, 0, NULL, 0, bh, bh, xd->bd);
+ } else {
+ rec_buffer = (uint8_t *)rec_buffer_alloc_16;
+ vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32,
+ NULL, 0, NULL, 0, bh, bh);
+ }
+#else
+ vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32,
+ NULL, 0, NULL, 0, bh, bh);
+#endif
+
+ if (blk_row + (bh >> 2) > max_blocks_high ||
+ blk_col + (bh >> 2) > max_blocks_wide) {
+ int idx, idy;
+ unsigned int this_sse;
+ int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
+ int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
+ for (idy = 0; idy < blocks_height; idy += 2) {
+ for (idx = 0; idx < blocks_width; idx += 2) {
+ cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx,
+ src_stride,
+ rec_buffer + 4 * idy * 32 + 4 * idx,
+ 32, &this_sse);
+ tmp_sse += this_sse;
+ }
+ }
+ } else {
+ cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse);
+ }
+
+ *bsse += (int64_t)tmp_sse * 16;
+
+ if (p->eobs[block] > 0) {
+ const int lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ const int bd = xd->bd;
+ switch (tx_size) {
+ case TX_32X32:
+ vp10_highbd_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32,
+ p->eobs[block], bd, tx_type);
+ break;
+ case TX_16X16:
+ vp10_highbd_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32,
+ p->eobs[block], bd, tx_type);
+ break;
+ case TX_8X8:
+ vp10_highbd_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32,
+ p->eobs[block], bd, tx_type);
+ break;
+ case TX_4X4:
+ vp10_highbd_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32,
+ p->eobs[block], bd, tx_type, lossless);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ break;
+ }
+ } else {
+#else
+ {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ switch (tx_size) {
+ case TX_32X32:
+ vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block],
+ tx_type);
+ break;
+ case TX_16X16:
+ vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block],
+ tx_type);
+ break;
+ case TX_8X8:
+ vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block],
+ tx_type);
+ break;
+ case TX_4X4:
+ vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block],
+ tx_type, lossless);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ break;
+ }
+ }
+
+ if ((bh >> 2) + blk_col > max_blocks_wide ||
+ (bh >> 2) + blk_row > max_blocks_high) {
+ int idx, idy;
+ unsigned int this_sse;
+ int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
+ int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
+ tmp_sse = 0;
+ for (idy = 0; idy < blocks_height; idy += 2) {
+ for (idx = 0; idx < blocks_width; idx += 2) {
+ cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx,
+ src_stride,
+ rec_buffer + 4 * idy * 32 + 4 * idx,
+ 32, &this_sse);
+ tmp_sse += this_sse;
+ }
+ }
+ } else {
+ cpi->fn_ptr[txm_bsize].vf(src, src_stride,
+ rec_buffer, 32, &tmp_sse);
+ }
+ }
+ *dist += (int64_t)tmp_sse * 16;
+
+ *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size,
+ scan_order->scan, scan_order->neighbors, 0);
+ *skip &= (p->eobs[block] == 0);
+}
+
+static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x,
+ int blk_row, int blk_col, int plane, int block,
+ TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
+ ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
+ TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
+ int *rate, int64_t *dist,
+ int64_t *bsse, int *skip,
+ int64_t ref_best_rd, int *is_cost_valid) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+ int64_t this_rd = INT64_MAX;
+ ENTROPY_CONTEXT *pta = ta + blk_col;
+ ENTROPY_CONTEXT *ptl = tl + blk_row;
+ ENTROPY_CONTEXT stxa = 0, stxl = 0;
+ int coeff_ctx, i;
+ int ctx = txfm_partition_context(tx_above + (blk_col >> 1),
+ tx_left + (blk_row >> 1), tx_size);
+
+ int64_t sum_dist = 0, sum_bsse = 0;
+ int64_t sum_rd = INT64_MAX;
+ int sum_rate = vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1);
+ int all_skip = 1;
+ int tmp_eob = 0;
+ int zero_blk_rate;
+
+ if (ref_best_rd < 0) {
+ *is_cost_valid = 0;
+ return;
+ }
+
+ switch (tx_size) {
+ case TX_4X4:
+ stxa = pta[0];
+ stxl = ptl[0];
+ break;
+ case TX_8X8:
+ stxa = !!*(const uint16_t *)&pta[0];
+ stxl = !!*(const uint16_t *)&ptl[0];
+ break;
+ case TX_16X16:
+ stxa = !!*(const uint32_t *)&pta[0];
+ stxl = !!*(const uint32_t *)&ptl[0];
+ break;
+ case TX_32X32:
+ stxa = !!*(const uint64_t *)&pta[0];
+ stxl = !!*(const uint64_t *)&ptl[0];
+ break;
+ default:
+ assert(0 && "Invalid transform size.");
+ break;
+ }
+ coeff_ctx = combine_entropy_contexts(stxa, stxl);
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ *rate = 0;
+ *dist = 0;
+ *bsse = 0;
+ *skip = 1;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ zero_blk_rate =
+ x->token_costs[tx_size][pd->plane_type][1][0][0][coeff_ctx][EOB_TOKEN];
+
+ if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
+ mbmi->inter_tx_size[tx_idx] = tx_size;
+ vp10_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
+ plane_bsize, coeff_ctx, rate, dist, bsse, skip);
+
+ if ((RDCOST(x->rdmult, x->rddiv, *rate, *dist) >=
+ RDCOST(x->rdmult, x->rddiv, zero_blk_rate, *bsse) || *skip == 1) &&
+ !xd->lossless[mbmi->segment_id]) {
+ *rate = zero_blk_rate;
+ *dist = *bsse;
+ *skip = 1;
+ x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 1;
+ p->eobs[block] = 0;
+ } else {
+ x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 0;
+ *skip = 0;
+ }
+
+ if (tx_size > TX_4X4)
+ *rate += vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
+ this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
+ tmp_eob = p->eobs[block];
+ }
+
+ if (tx_size > TX_4X4) {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bsl = b_height_log2_lookup[bsize];
+ int sub_step = 1 << (2 * (tx_size - 1));
+ int i;
+ int this_rate;
+ int64_t this_dist;
+ int64_t this_bsse;
+ int this_skip;
+ int this_cost_valid = 1;
+ int64_t tmp_rd = 0;
+
+ --bsl;
+ for (i = 0; i < 4 && this_cost_valid; ++i) {
+ int offsetr = (i >> 1) << bsl;
+ int offsetc = (i & 0x01) << bsl;
+ select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc,
+ plane, block + i * sub_step, tx_size - 1,
+ plane_bsize, ta, tl, tx_above, tx_left,
+ &this_rate, &this_dist,
+ &this_bsse, &this_skip,
+ ref_best_rd - tmp_rd, &this_cost_valid);
+ sum_rate += this_rate;
+ sum_dist += this_dist;
+ sum_bsse += this_bsse;
+ all_skip &= this_skip;
+ tmp_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ if (this_rd < tmp_rd)
+ break;
+ }
+ if (this_cost_valid)
+ sum_rd = tmp_rd;
+ }
+
+ if (this_rd < sum_rd) {
+ int idx, idy;
+ for (i = 0; i < (1 << tx_size); ++i)
+ pta[i] = ptl[i] = !(tmp_eob == 0);
+ txfm_partition_update(tx_above + (blk_col >> 1),
+ tx_left + (blk_row >> 1), tx_size);
+ mbmi->inter_tx_size[tx_idx] = tx_size;
+
+ for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
+ for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
+ mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size;
+ mbmi->tx_size = tx_size;
+ if (this_rd == INT64_MAX)
+ *is_cost_valid = 0;
+ x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = *skip;
+ } else {
+ *rate = sum_rate;
+ *dist = sum_dist;
+ *bsse = sum_bsse;
+ *skip = all_skip;
+ if (sum_rd == INT64_MAX)
+ *is_cost_valid = 0;
+ }
+}
+
+static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int64_t *distortion, int *skippable,
+ int64_t *sse, BLOCK_SIZE bsize,
+ int64_t ref_best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int is_cost_valid = 1;
+ int64_t this_rd = 0;
+
+ if (ref_best_rd < 0)
+ is_cost_valid = 0;
+
+ *rate = 0;
+ *distortion = 0;
+ *sse = 0;
+ *skippable = 1;
+
+ if (is_cost_valid) {
+ const struct macroblockd_plane *const pd = &xd->plane[0];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+ int block = 0;
+ int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
+ ENTROPY_CONTEXT ctxa[16], ctxl[16];
+ TXFM_CONTEXT tx_above[8], tx_left[8];
+
+ int pnrate = 0, pnskip = 1;
+ int64_t pndist = 0, pnsse = 0;
+
+ vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctxa, ctxl);
+ memcpy(tx_above, xd->above_txfm_context,
+ sizeof(TXFM_CONTEXT) * (mi_width >> 1));
+ memcpy(tx_left, xd->left_txfm_context,
+ sizeof(TXFM_CONTEXT) * (mi_height >> 1));
+
+ for (idy = 0; idy < mi_height; idy += bh) {
+ for (idx = 0; idx < mi_width; idx += bh) {
+ select_tx_block(cpi, x, idy, idx, 0, block,
+ max_txsize_lookup[plane_bsize], plane_bsize,
+ ctxa, ctxl, tx_above, tx_left,
+ &pnrate, &pndist, &pnsse, &pnskip,
+ ref_best_rd - this_rd, &is_cost_valid);
+ *rate += pnrate;
+ *distortion += pndist;
+ *sse += pnsse;
+ *skippable &= pnskip;
+ this_rd += VPXMIN(RDCOST(x->rdmult, x->rddiv, pnrate, pndist),
+ RDCOST(x->rdmult, x->rddiv, 0, pnsse));
+ block += step;
+ }
+ }
+ }
+
+ this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
+ RDCOST(x->rdmult, x->rddiv, 0, *sse));
+ if (this_rd > ref_best_rd)
+ is_cost_valid = 0;
+
+ if (!is_cost_valid) {
+ // reset cost value
+ *rate = INT_MAX;
+ *distortion = INT64_MAX;
+ *sse = INT64_MAX;
+ *skippable = 0;
+ }
+}
+
+static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int64_t *distortion, int *skippable,
+ int64_t *sse, BLOCK_SIZE bsize,
+ int64_t ref_best_rd) {
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ const VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int64_t rd = INT64_MAX;
+ int64_t best_rd = INT64_MAX;
+ TX_TYPE tx_type, best_tx_type = DCT_DCT;
+ const int is_inter = is_inter_block(mbmi);
+ vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
+ int s0 = vp10_cost_bit(skip_prob, 0);
+ int s1 = vp10_cost_bit(skip_prob, 1);
+ TX_SIZE best_tx_size[64];
+ TX_SIZE best_tx = TX_SIZES;
+ uint8_t best_blk_skip[256];
+ const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
+ int idx, idy;
+#if CONFIG_EXT_TX
+ int ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter);
+#endif
+
+ *distortion = INT64_MAX;
+ *rate = INT_MAX;
+ *skippable = 0;
+ *sse = INT64_MAX;
+
+ for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+ int this_rate = 0;
+ int this_skip = 1;
+ int64_t this_dist = 0;
+ int64_t this_sse = 0;
+#if CONFIG_EXT_TX
+ if (is_inter) {
+ if (!ext_tx_used_inter[ext_tx_set][tx_type])
+ continue;
+ } else {
+ if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
+ if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
+ continue;
+ }
+ if (!ext_tx_used_intra[ext_tx_set][tx_type])
+ continue;
+ }
+
+ mbmi->tx_type = tx_type;
+
+ if (ext_tx_set == 1 &&
+ mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ break;
+ }
+
+ inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse,
+ bsize, ref_best_rd);
+
+ if (get_ext_tx_types(max_tx_size, bsize, is_inter) > 1 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+ this_rate != INT_MAX) {
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ this_rate += cpi->inter_tx_type_costs[ext_tx_set]
+ [max_tx_size][mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
+ this_rate += cpi->intra_tx_type_costs[ext_tx_set][max_tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+ }
+#else // CONFIG_EXT_TX
+ if (max_tx_size >= TX_32X32 && tx_type != DCT_DCT)
+ continue;
+
+ mbmi->tx_type = tx_type;
+
+ inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse,
+ bsize, ref_best_rd);
+
+ if (max_tx_size < TX_32X32 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+ this_rate != INT_MAX) {
+ if (is_inter)
+ this_rate += cpi->inter_tx_type_costs[max_tx_size][mbmi->tx_type];
+ else
+ this_rate += cpi->intra_tx_type_costs[max_tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]]
+ [mbmi->tx_type];
+ }
+#endif // CONFIG_EXT_TX
+
+ if (this_rate == INT_MAX)
+ continue;
+
+ if (this_skip)
+ rd = RDCOST(x->rdmult, x->rddiv, s1, this_sse);
+ else
+ rd = RDCOST(x->rdmult, x->rddiv, this_rate + s0, this_dist);
+
+ if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !this_skip)
+ rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, this_sse));
+
+ if (rd < (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) * best_rd) {
+ best_rd = rd;
+ *distortion = this_dist;
+ *rate = this_rate;
+ *skippable = this_skip;
+ *sse = this_sse;
+ best_tx_type = mbmi->tx_type;
+ best_tx = mbmi->tx_size;
+ memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ best_tx_size[idy * 8 + idx] = mbmi->inter_tx_size[idy * 8 + idx];
+ }
+ }
+
+ mbmi->tx_type = best_tx_type;
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ mbmi->inter_tx_size[idy * 8 + idx] = best_tx_size[idy * 8 + idx];
+ mbmi->tx_size = best_tx;
+ memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
+}
+
+static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x,
+ int blk_row, int blk_col, int plane, int block,
+ TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
+ ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx,
+ int *rate, int64_t *dist, int64_t *bsse, int *skip) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize,
+ 0, 0) :
+ mbmi->inter_tx_size[tx_idx];
+
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ int coeff_ctx, i;
+ ENTROPY_CONTEXT *ta = above_ctx + blk_col;
+ ENTROPY_CONTEXT *tl = left_ctx + blk_row;
+ switch (tx_size) {
+ case TX_4X4:
+ break;
+ case TX_8X8:
+ ta[0] = !!*(const uint16_t *)&ta[0];
+ tl[0] = !!*(const uint16_t *)&tl[0];
+ break;
+ case TX_16X16:
+ ta[0] = !!*(const uint32_t *)&ta[0];
+ tl[0] = !!*(const uint32_t *)&tl[0];
+ break;
+ case TX_32X32:
+ ta[0] = !!*(const uint64_t *)&ta[0];
+ tl[0] = !!*(const uint64_t *)&tl[0];
+ break;
+ default:
+ assert(0 && "Invalid transform size.");
+ break;
+ }
+ coeff_ctx = combine_entropy_contexts(ta[0], tl[0]);
+ vp10_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
+ plane_bsize, coeff_ctx, rate, dist, bsse, skip);
+ for (i = 0; i < (1 << tx_size); ++i) {
+ ta[i] = !(p->eobs[block] == 0);
+ tl[i] = !(p->eobs[block] == 0);
+ }
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int step = 1 << (2 * (tx_size - 1));
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ int offsetr = (i >> 1) << bsl;
+ int offsetc = (i & 0x01) << bsl;
+ tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
+ block + i * step, tx_size - 1, plane_bsize,
+ above_ctx, left_ctx, rate, dist, bsse, skip);
+ }
+ }
+}
+
+// Return value 0: early termination triggered, no valid rd cost available;
+// 1: rd cost values are valid.
+static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int64_t *distortion, int *skippable,
+ int64_t *sse, BLOCK_SIZE bsize,
+ int64_t ref_best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int plane;
+ int is_cost_valid = 1;
+ int64_t this_rd;
+
+ if (ref_best_rd < 0)
+ is_cost_valid = 0;
+
+ if (is_inter_block(mbmi) && is_cost_valid) {
+ int plane;
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane)
+ vp10_subtract_plane(x, bsize, plane);
+ }
+
+ *rate = 0;
+ *distortion = 0;
+ *sse = 0;
+ *skippable = 1;
+
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+ int block = 0;
+ int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
+ int pnrate = 0, pnskip = 1;
+ int64_t pndist = 0, pnsse = 0;
+ ENTROPY_CONTEXT ta[16], tl[16];
+
+ vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl);
+
+ for (idy = 0; idy < mi_height; idy += bh) {
+ for (idx = 0; idx < mi_width; idx += bh) {
+ tx_block_rd(cpi, x, idy, idx, plane, block,
+ max_txsize_lookup[plane_bsize], plane_bsize, ta, tl,
+ &pnrate, &pndist, &pnsse, &pnskip);
+ block += step;
+ }
+ }
+
+ if (pnrate == INT_MAX) {
+ is_cost_valid = 0;
+ break;
+ }
+
+ *rate += pnrate;
+ *distortion += pndist;
+ *sse += pnsse;
+ *skippable &= pnskip;
+
+ this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
+ RDCOST(x->rdmult, x->rddiv, 0, *sse));
+
+ if (this_rd > ref_best_rd) {
+ is_cost_valid = 0;
+ break;
+ }
+ }
+
+ if (!is_cost_valid) {
+ // reset cost value
+ *rate = INT_MAX;
+ *distortion = INT64_MAX;
+ *sse = INT64_MAX;
+ *skippable = 0;
+ }
+
+ return is_cost_valid;
+}
+#endif
+
// Return value 0: early termination triggered, no valid rd cost available;
// 1: rd cost values are valid.
static int super_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
@@ -1207,7 +3001,11 @@
*skippable = 1;
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
- txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &pnrate, &pndist, &pnskip, &pnsse,
ref_best_rd, plane, bsize, uv_tx_size,
cpi->sf.use_fast_coef_costing);
if (pnrate == INT_MAX) {
@@ -1231,34 +3029,229 @@
return is_cost_valid;
}
+#if CONFIG_EXT_INTRA
+// Return 1 if an ext intra mode is selected; return 0 otherwise.
+static int rd_pick_ext_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
+ PICK_MODE_CONTEXT *ctx,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int64_t *best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ int ext_intra_selected_flag = 0;
+ int this_rate_tokenonly, this_rate, s;
+ int64_t this_distortion, this_sse, this_rd;
+ EXT_INTRA_MODE mode;
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+
+ vp10_zero(ext_intra_mode_info);
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 1;
+ mbmi->uv_mode = DC_PRED;
+
+ for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] = mode;
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, *best_rd))
+ continue;
+
+ this_rate = this_rate_tokenonly +
+ vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
+ cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
+ write_uniform_cost(FILTER_INTRA_MODES, mode);
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ ext_intra_mode_info = mbmi->ext_intra_mode_info;
+ ext_intra_selected_flag = 1;
+ if (!x->select_tx_size)
+ swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
+ }
+ }
+
+
+ if (ext_intra_selected_flag) {
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ ext_intra_mode_info.use_ext_intra_mode[1];
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ ext_intra_mode_info.ext_intra_mode[1];
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static void pick_intra_angle_routine_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ int *best_angle_delta,
+ BLOCK_SIZE bsize, int rate_overhead,
+ int64_t *best_rd) {
+ MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
+ int this_rate_tokenonly, this_rate, s;
+ int64_t this_distortion, this_sse, this_rd;
+
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, *best_rd))
+ return;
+
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ *best_angle_delta = mbmi->angle_delta[1];
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+}
+
+static int rd_pick_intra_angle_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
+ PICK_MODE_CONTEXT *ctx,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int rate_overhead,
+ int64_t best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ int this_rate_tokenonly, this_rate, s;
+ int64_t this_distortion, this_sse, this_rd;
+ int angle_delta, best_angle_delta = 0;
+ const double rd_adjust = 1.2;
+
+ (void)ctx;
+ *rate_tokenonly = INT_MAX;
+ if (ANGLE_FAST_SEARCH) {
+ int deltas_level1[3] = {0, -2, 2};
+ int deltas_level2[3][2] = {
+ {-1, 1}, {-3, -1}, {1, 3},
+ };
+ const int level1 = 3, level2 = 2;
+ int i, j, best_i = -1;
+
+ for (i = 0; i < level1; ++i) {
+ mbmi->angle_delta[1] = deltas_level1[i];
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize,
+ (i == 0 && best_rd < INT64_MAX) ?
+ best_rd * rd_adjust : best_rd)) {
+ if (i == 0)
+ break;
+ else
+ continue;
+ }
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (i == 0 && best_rd < INT64_MAX && this_rd > best_rd * rd_adjust)
+ break;
+ if (this_rd < best_rd) {
+ best_i = i;
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[1];
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+
+ if (best_i >= 0) {
+ for (j = 0; j < level2; ++j) {
+ mbmi->angle_delta[1] = deltas_level2[best_i][j];
+ pick_intra_angle_routine_sbuv(cpi, x, rate, rate_tokenonly,
+ distortion, skippable,
+ &best_angle_delta, bsize,
+ rate_overhead, &best_rd);
+ }
+ }
+ } else {
+ for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS;
+ ++angle_delta) {
+ mbmi->angle_delta[1] = angle_delta;
+ pick_intra_angle_routine_sbuv(cpi, x, rate, rate_tokenonly,
+ distortion, skippable,
+ &best_angle_delta, bsize,
+ rate_overhead, &best_rd);
+ }
+ }
+
+ mbmi->angle_delta[1] = best_angle_delta;
+ if (*rate_tokenonly != INT_MAX)
+ super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, INT_MAX);
+ return *rate_tokenonly != INT_MAX;
+}
+#endif // CONFIG_EXT_INTRA
+
static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x,
PICK_MODE_CONTEXT *ctx,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
PREDICTION_MODE mode;
PREDICTION_MODE mode_selected = DC_PRED;
int64_t best_rd = INT64_MAX, this_rd;
int this_rate_tokenonly, this_rate, s;
int64_t this_distortion, this_sse;
+#if CONFIG_EXT_INTRA
+ int is_directional_mode, rate_overhead, best_angle_delta = 0;
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+ ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
+ xd->mi[0]->mbmi.palette_mode_info.palette_size[1] = 0;
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
continue;
- xd->mi[0]->mbmi.uv_mode = mode;
-
+ mbmi->uv_mode = mode;
+#if CONFIG_EXT_INTRA
+ is_directional_mode = (mode != DC_PRED && mode != TM_PRED);
+ rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] +
+ write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0);
+ mbmi->angle_delta[1] = 0;
+ if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode) {
+ if (!rd_pick_intra_angle_sbuv(cpi, x, ctx, &this_rate,
+ &this_rate_tokenonly, &this_distortion, &s,
+ bsize, rate_overhead, best_rd))
+ continue;
+ } else {
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, best_rd))
+ continue;
+ }
+ this_rate = this_rate_tokenonly +
+ cpi->intra_uv_mode_cost[mbmi->mode][mode];
+ if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode)
+ this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS +
+ mbmi->angle_delta[1]);
+ if (mode == DC_PRED && 0)
+ this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 0);
+#else
if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
&this_distortion, &s, &this_sse, bsize, best_rd))
continue;
this_rate = this_rate_tokenonly +
cpi->intra_uv_mode_cost[xd->mi[0]->mbmi.mode][mode];
+#endif // CONFIG_EXT_INTRA
+
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
mode_selected = mode;
+#if CONFIG_EXT_INTRA
+ best_angle_delta = mbmi->angle_delta[1];
+#endif // CONFIG_EXT_INTRA
best_rd = this_rd;
*rate = this_rate;
*rate_tokenonly = this_rate_tokenonly;
@@ -1269,7 +3262,23 @@
}
}
- xd->mi[0]->mbmi.uv_mode = mode_selected;
+#if CONFIG_EXT_INTRA
+ if (mbmi->sb_type >= BLOCK_8X8 && ALLOW_FILTER_INTRA_MODES) {
+ if (rd_pick_ext_intra_sbuv(cpi, x, ctx, rate, rate_tokenonly, distortion,
+ skippable, bsize, &best_rd)) {
+ mode_selected = mbmi->uv_mode;
+ ext_intra_mode_info = mbmi->ext_intra_mode_info;
+ }
+ }
+
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ ext_intra_mode_info.use_ext_intra_mode[1];
+ if (ext_intra_mode_info.use_ext_intra_mode[1])
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ ext_intra_mode_info.ext_intra_mode[1];
+ mbmi->angle_delta[1] = best_angle_delta;
+#endif // CONFIG_EXT_INTRA
+ mbmi->uv_mode = mode_selected;
return best_rd;
}
@@ -1310,9 +3319,78 @@
}
static int cost_mv_ref(const VP10_COMP *cpi, PREDICTION_MODE mode,
- int mode_context) {
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ int is_compound,
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ int16_t mode_context) {
+#if CONFIG_REF_MV
+ int mode_cost = 0;
+#if CONFIG_EXT_INTER
+ int16_t mode_ctx = is_compound ? mode_context :
+ (mode_context & NEWMV_CTX_MASK);
+#else
+ int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
+#endif // CONFIG_EXT_INTER
+ int16_t is_all_zero_mv = mode_context & (1 << ALL_ZERO_FLAG_OFFSET);
+
assert(is_inter_mode(mode));
+
+#if CONFIG_EXT_INTER
+ if (is_compound) {
+ return cpi->inter_compound_mode_cost[mode_context]
+ [INTER_COMPOUND_OFFSET(mode)];
+ } else {
+ if (mode == NEWMV || mode == NEWFROMNEARMV) {
+#else
+ if (mode == NEWMV) {
+#endif // CONFIG_EXT_INTER
+ mode_cost = cpi->newmv_mode_cost[mode_ctx][0];
+#if CONFIG_EXT_INTER
+ if (!is_compound)
+ mode_cost += cpi->new2mv_mode_cost[mode == NEWFROMNEARMV];
+#endif // CONFIG_EXT_INTER
+ return mode_cost;
+ } else {
+ mode_cost = cpi->newmv_mode_cost[mode_ctx][1];
+ mode_ctx = (mode_context >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
+
+ if (is_all_zero_mv)
+ return mode_cost;
+
+ if (mode == ZEROMV) {
+ mode_cost += cpi->zeromv_mode_cost[mode_ctx][0];
+ return mode_cost;
+ } else {
+ mode_cost += cpi->zeromv_mode_cost[mode_ctx][1];
+ mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
+
+ if (mode_context & (1 << SKIP_NEARESTMV_OFFSET))
+ mode_ctx = 6;
+ if (mode_context & (1 << SKIP_NEARMV_OFFSET))
+ mode_ctx = 7;
+ if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET))
+ mode_ctx = 8;
+
+ mode_cost += cpi->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
+ return mode_cost;
+ }
+ }
+#if CONFIG_EXT_INTER
+ }
+#endif // CONFIG_EXT_INTER
+#else
+ assert(is_inter_mode(mode));
+#if CONFIG_EXT_INTER
+ if (is_inter_compound_mode(mode)) {
+ return cpi->inter_compound_mode_cost[mode_context]
+ [INTER_COMPOUND_OFFSET(mode)];
+ } else {
+#endif // CONFIG_EXT_INTER
return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
+#if CONFIG_EXT_INTER
+ }
+#endif // CONFIG_EXT_INTER
+#endif
}
static int set_and_cost_bmi_mvs(VP10_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
@@ -1320,6 +3398,9 @@
PREDICTION_MODE mode, int_mv this_mv[2],
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
int_mv seg_mvs[MAX_REF_FRAMES],
+#if CONFIG_EXT_INTER
+ int_mv compound_seg_newmvs[2],
+#endif // CONFIG_EXT_INTER
int_mv *best_ref_mv[2], const int *mvjcost,
int *mvcost[2]) {
MODE_INFO *const mic = xd->mi[0];
@@ -1330,17 +3411,28 @@
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
const int is_compound = has_second_ref(mbmi);
+ int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
switch (mode) {
case NEWMV:
+#if CONFIG_EXT_INTER
+ case NEWFROMNEARMV:
+#endif // CONFIG_EXT_INTER
this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
+#if CONFIG_EXT_INTER
+ if (!cpi->common.allow_high_precision_mv ||
+ !vp10_use_mv_hp(&best_ref_mv[0]->as_mv))
+ lower_mv_precision(&this_mv[0].as_mv, 0);
+#endif // CONFIG_EXT_INTER
thismvcost += vp10_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
mvjcost, mvcost, MV_COST_WEIGHT_SUB);
+#if !CONFIG_EXT_INTER
if (is_compound) {
this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
thismvcost += vp10_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
mvjcost, mvcost, MV_COST_WEIGHT_SUB);
}
+#endif // !CONFIG_EXT_INTER
break;
case NEARMV:
case NEARESTMV:
@@ -1353,6 +3445,60 @@
if (is_compound)
this_mv[1].as_int = 0;
break;
+#if CONFIG_EXT_INTER
+ case NEW_NEWMV:
+ if (compound_seg_newmvs[0].as_int == INVALID_MV ||
+ compound_seg_newmvs[1].as_int == INVALID_MV) {
+ this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
+ this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
+ } else {
+ this_mv[0].as_int = compound_seg_newmvs[0].as_int;
+ this_mv[1].as_int = compound_seg_newmvs[1].as_int;
+ }
+ if (!cpi->common.allow_high_precision_mv ||
+ !vp10_use_mv_hp(&best_ref_mv[0]->as_mv))
+ lower_mv_precision(&this_mv[0].as_mv, 0);
+ if (!cpi->common.allow_high_precision_mv ||
+ !vp10_use_mv_hp(&best_ref_mv[1]->as_mv))
+ lower_mv_precision(&this_mv[1].as_mv, 0);
+ thismvcost += vp10_mv_bit_cost(&this_mv[0].as_mv,
+ &best_ref_mv[0]->as_mv,
+ mvjcost, mvcost, MV_COST_WEIGHT_SUB);
+ thismvcost += vp10_mv_bit_cost(&this_mv[1].as_mv,
+ &best_ref_mv[1]->as_mv,
+ mvjcost, mvcost, MV_COST_WEIGHT_SUB);
+ break;
+ case NEW_NEARMV:
+ case NEW_NEARESTMV:
+ this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
+ if (!cpi->common.allow_high_precision_mv ||
+ !vp10_use_mv_hp(&best_ref_mv[0]->as_mv))
+ lower_mv_precision(&this_mv[0].as_mv, 0);
+ thismvcost += vp10_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
+ mvjcost, mvcost, MV_COST_WEIGHT_SUB);
+ this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
+ break;
+ case NEAR_NEWMV:
+ case NEAREST_NEWMV:
+ this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
+ this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
+ if (!cpi->common.allow_high_precision_mv ||
+ !vp10_use_mv_hp(&best_ref_mv[1]->as_mv))
+ lower_mv_precision(&this_mv[1].as_mv, 0);
+ thismvcost += vp10_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
+ mvjcost, mvcost, MV_COST_WEIGHT_SUB);
+ break;
+ case NEAREST_NEARMV:
+ case NEAR_NEARESTMV:
+ case NEAREST_NEARESTMV:
+ this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
+ this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
+ break;
+ case ZERO_ZEROMV:
+ this_mv[0].as_int = 0;
+ this_mv[1].as_int = 0;
+ break;
+#endif // CONFIG_EXT_INTER
default:
break;
}
@@ -1367,8 +3513,20 @@
for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i]));
- return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mbmi->ref_frame[0]]) +
- thismvcost;
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (is_compound)
+ mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
+ else
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
+ mbmi->ref_frame, mbmi->sb_type, i);
+#endif
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ return cost_mv_ref(cpi, mode, is_compound, mode_ctx) + thismvcost;
+#else
+ return cost_mv_ref(cpi, mode, mode_ctx) + thismvcost;
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
}
static int64_t encode_inter_mb_segment(VP10_COMP *cpi,
@@ -1398,8 +3556,8 @@
pd->dst.stride)];
int64_t thisdistortion = 0, thissse = 0;
int thisrate = 0;
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i);
- const scan_order *so = get_scan(TX_4X4, tx_type);
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 1);
vp10_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col);
@@ -1435,8 +3593,14 @@
for (idx = 0; idx < width / 4; ++idx) {
int64_t ssz, rd, rd1, rd2;
tran_low_t* coeff;
-
+#if CONFIG_VAR_TX
+ int coeff_ctx;
+#endif
k += (idy * 2 + idx);
+#if CONFIG_VAR_TX
+ coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)),
+ *(tl + (k >> 1)));
+#endif
coeff = BLOCK_OFFSET(p->coeff, k);
fwd_txm4x4(vp10_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
coeff, 8);
@@ -1455,9 +3619,19 @@
16, &ssz);
#endif // CONFIG_VP9_HIGHBITDEPTH
thissse += ssz;
- thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
+#if CONFIG_VAR_TX
+ thisrate += cost_coeffs(x, 0, k, coeff_ctx,
+ TX_4X4,
so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
+ *(ta + (k & 1)) = !(p->eobs[k] == 0);
+ *(tl + (k >> 1)) = !(p->eobs[k] == 0);
+#else
+ thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1),
+ TX_4X4,
+ so->scan, so->neighbors,
+ cpi->sf.use_fast_coef_costing);
+#endif
rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
rd = VPXMIN(rd1, rd2);
@@ -1481,6 +3655,9 @@
int64_t bsse;
int64_t brdcost;
int_mv mvs[2];
+#if CONFIG_EXT_INTER
+ int_mv ref_mv[2];
+#endif // CONFIG_EXT_INTER
ENTROPY_CONTEXT ta[2];
ENTROPY_CONTEXT tl[2];
} SEG_RDSTAT;
@@ -1495,7 +3672,11 @@
int64_t sse;
int segment_yrate;
PREDICTION_MODE modes[4];
+#if CONFIG_EXT_INTER
+ SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
+#else
SEG_RDSTAT rdstat[4][INTER_MODES];
+#endif // CONFIG_EXT_INTER
int mvthresh;
} BEST_SEG_INFO;
@@ -1530,24 +3711,40 @@
x->e_mbd.plane[0].pre[1] = orig_pre[1];
}
-static INLINE int mv_has_subpel(const MV *mv) {
- return (mv->row & 0x0F) || (mv->col & 0x0F);
-}
-
// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
// TODO(aconverse): Find out if this is still productive then clean up or remove
static int check_best_zero_mv(
- const VP10_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
+ const VP10_COMP *cpi, const int16_t mode_context[MAX_REF_FRAMES],
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ const int16_t compound_mode_context[MAX_REF_FRAMES],
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int this_mode,
- const MV_REFERENCE_FRAME ref_frames[2]) {
+ const MV_REFERENCE_FRAME ref_frames[2],
+ const BLOCK_SIZE bsize, int block) {
if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
(ref_frames[1] == NONE ||
frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
- int rfc = mode_context[ref_frames[0]];
+#if CONFIG_REF_MV
+ int16_t rfc = vp10_mode_context_analyzer(mode_context,
+ ref_frames, bsize, block);
+#else
+ int16_t rfc = mode_context[ref_frames[0]];
+#endif
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ int c1 = cost_mv_ref(cpi, NEARMV, ref_frames[1] > INTRA_FRAME, rfc);
+ int c2 = cost_mv_ref(cpi, NEARESTMV, ref_frames[1] > INTRA_FRAME, rfc);
+ int c3 = cost_mv_ref(cpi, ZEROMV, ref_frames[1] > INTRA_FRAME, rfc);
+#else
int c1 = cost_mv_ref(cpi, NEARMV, rfc);
int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+
+#if !CONFIG_REF_MV
+ (void)bsize;
+ (void)block;
+#endif
if (this_mode == NEARMV) {
if (c1 > c3) return 0;
@@ -1568,6 +3765,56 @@
}
}
}
+#if CONFIG_EXT_INTER
+ else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAREST_NEARMV ||
+ this_mode == NEAR_NEARESTMV || this_mode == ZERO_ZEROMV) &&
+ frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
+ frame_mv[this_mode][ref_frames[1]].as_int == 0) {
+#if CONFIG_REF_MV
+ int16_t rfc = compound_mode_context[ref_frames[0]];
+ int c1 = cost_mv_ref(cpi, NEAREST_NEARMV, 1, rfc);
+ int c2 = cost_mv_ref(cpi, NEAREST_NEARESTMV, 1, rfc);
+ int c3 = cost_mv_ref(cpi, ZERO_ZEROMV, 1, rfc);
+ int c4 = cost_mv_ref(cpi, NEAR_NEARESTMV, 1, rfc);
+#else
+ int16_t rfc = mode_context[ref_frames[0]];
+ int c1 = cost_mv_ref(cpi, NEAREST_NEARMV, rfc);
+ int c2 = cost_mv_ref(cpi, NEAREST_NEARESTMV, rfc);
+ int c3 = cost_mv_ref(cpi, ZERO_ZEROMV, rfc);
+ int c4 = cost_mv_ref(cpi, NEAR_NEARESTMV, rfc);
+#endif
+
+ if (this_mode == NEAREST_NEARMV) {
+ if (c1 > c3) return 0;
+ } else if (this_mode == NEAREST_NEARESTMV) {
+ if (c2 > c3) return 0;
+ } else if (this_mode == NEAR_NEARESTMV) {
+ if (c4 > c3) return 0;
+ } else {
+ assert(this_mode == ZERO_ZEROMV);
+ if (ref_frames[1] == NONE) {
+ if ((c3 >= c2 &&
+ frame_mv[NEAREST_NEARESTMV][ref_frames[0]].as_int == 0) ||
+ (c3 >= c1 &&
+ frame_mv[NEAREST_NEARMV][ref_frames[0]].as_int == 0) ||
+ (c3 >= c4 &&
+ frame_mv[NEAR_NEARESTMV][ref_frames[0]].as_int == 0))
+ return 0;
+ } else {
+ if ((c3 >= c2 &&
+ frame_mv[NEAREST_NEARESTMV][ref_frames[0]].as_int == 0 &&
+ frame_mv[NEAREST_NEARESTMV][ref_frames[1]].as_int == 0) ||
+ (c3 >= c1 &&
+ frame_mv[NEAREST_NEARMV][ref_frames[0]].as_int == 0 &&
+ frame_mv[NEAREST_NEARMV][ref_frames[1]].as_int == 0) ||
+ (c3 >= c4 &&
+ frame_mv[NEAR_NEARESTMV][ref_frames[0]].as_int == 0 &&
+ frame_mv[NEAR_NEARESTMV][ref_frames[1]].as_int == 0))
+ return 0;
+ }
+ }
+ }
+#endif // CONFIG_EXT_INTER
return 1;
}
@@ -1575,6 +3822,9 @@
BLOCK_SIZE bsize,
int_mv *frame_mv,
int mi_row, int mi_col,
+#if CONFIG_EXT_INTER
+ int_mv* ref_mv_sub8x8[2],
+#endif
int_mv single_newmv[MAX_REF_FRAMES],
int *rate_mv) {
const VP10_COMMON *const cm = &cpi->common;
@@ -1586,7 +3836,7 @@
mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]};
int_mv ref_mv[2];
int ite, ref;
- const InterpKernel *kernel = vp10_filter_kernels[mbmi->interp_filter];
+ const INTERP_FILTER interp_filter = mbmi->interp_filter;
struct scale_factors sf;
// Do joint motion search in compound mode to get more accurate mv.
@@ -1606,6 +3856,11 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
for (ref = 0; ref < 2; ++ref) {
+#if CONFIG_EXT_INTER
+ if (bsize < BLOCK_8X8 && ref_mv_sub8x8 != NULL)
+ ref_mv[ref].as_int = ref_mv_sub8x8[ref]->as_int;
+ else
+#endif // CONFIG_EXT_INTER
ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
if (scaled_ref_frame[ref]) {
@@ -1626,11 +3881,11 @@
// frame we must use a unit scaling factor during mode selection.
#if CONFIG_VP9_HIGHBITDEPTH
vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
- cm->width, cm->height,
- cm->use_highbitdepth);
+ cm->width, cm->height,
+ cm->use_highbitdepth);
#else
vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
- cm->width, cm->height);
+ cm->width, cm->height);
#endif // CONFIG_VP9_HIGHBITDEPTH
// Allow joint search multiple times iteratively for each reference frame
@@ -1663,7 +3918,7 @@
second_pred, pw,
&frame_mv[refs[!id]].as_mv,
&sf, pw, ph, 0,
- kernel, MV_PRECISION_Q3,
+ interp_filter, MV_PRECISION_Q3,
mi_col * MI_SIZE, mi_row * MI_SIZE,
xd->bd);
} else {
@@ -1673,7 +3928,7 @@
second_pred, pw,
&frame_mv[refs[!id]].as_mv,
&sf, pw, ph, 0,
- kernel, MV_PRECISION_Q3,
+ interp_filter, MV_PRECISION_Q3,
mi_col * MI_SIZE, mi_row * MI_SIZE);
}
#else
@@ -1682,7 +3937,7 @@
second_pred, pw,
&frame_mv[refs[!id]].as_mv,
&sf, pw, ph, 0,
- kernel, MV_PRECISION_Q3,
+ interp_filter, MV_PRECISION_Q3,
mi_col * MI_SIZE, mi_row * MI_SIZE);
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -1749,9 +4004,18 @@
xd->plane[i].pre[ref] = backup_yv12[ref][i];
}
+#if CONFIG_EXT_INTER
+ if (bsize >= BLOCK_8X8)
+#endif // CONFIG_EXT_INTER
*rate_mv += vp10_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
&x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+#if CONFIG_EXT_INTER
+ else
+ *rate_mv += vp10_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
+ &ref_mv_sub8x8[ref]->as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+#endif // CONFIG_EXT_INTER
}
}
@@ -1763,7 +4027,12 @@
int64_t *returndistortion,
int *skippable, int64_t *psse,
int mvthresh,
+#if CONFIG_EXT_INTER
+ int_mv seg_mvs[4][2][MAX_REF_FRAMES],
+ int_mv compound_seg_newmvs[4][2],
+#else
int_mv seg_mvs[4][MAX_REF_FRAMES],
+#endif // CONFIG_EXT_INTER
BEST_SEG_INFO *bsi_buf, int filter_idx,
int mi_row, int mi_col) {
int i;
@@ -1822,28 +4091,87 @@
int64_t best_rd = INT64_MAX;
const int i = idy * 2 + idx;
int ref;
+#if CONFIG_EXT_INTER
+ int mv_idx;
+ int_mv ref_mvs_sub8x8[2][2];
+#endif // CONFIG_EXT_INTER
for (ref = 0; ref < 1 + has_second_rf; ++ref) {
const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+#if CONFIG_EXT_INTER
+ int_mv mv_ref_list[MAX_MV_REF_CANDIDATES];
+ vp10_update_mv_context(cm, xd, mi, frame, mv_ref_list, i,
+ mi_row, mi_col, NULL);
+#endif // CONFIG_EXT_INTER
frame_mv[ZEROMV][frame].as_int = 0;
vp10_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col,
+#if CONFIG_EXT_INTER
+ mv_ref_list,
+#endif // CONFIG_EXT_INTER
&frame_mv[NEARESTMV][frame],
- &frame_mv[NEARMV][frame],
- mbmi_ext->mode_context);
+ &frame_mv[NEARMV][frame]);
+#if CONFIG_EXT_INTER
+ mv_ref_list[0].as_int = frame_mv[NEARESTMV][frame].as_int;
+ mv_ref_list[1].as_int = frame_mv[NEARMV][frame].as_int;
+ vp10_find_best_ref_mvs(cm->allow_high_precision_mv, mv_ref_list,
+ &ref_mvs_sub8x8[0][ref], &ref_mvs_sub8x8[1][ref]);
+
+ if (has_second_rf) {
+ frame_mv[ZERO_ZEROMV][frame].as_int = 0;
+ frame_mv[NEAREST_NEARESTMV][frame].as_int =
+ frame_mv[NEARESTMV][frame].as_int;
+
+ if (ref == 0) {
+ frame_mv[NEAREST_NEARMV][frame].as_int =
+ frame_mv[NEARESTMV][frame].as_int;
+ frame_mv[NEAR_NEARESTMV][frame].as_int =
+ frame_mv[NEARMV][frame].as_int;
+ frame_mv[NEAREST_NEWMV][frame].as_int =
+ frame_mv[NEARESTMV][frame].as_int;
+ frame_mv[NEAR_NEWMV][frame].as_int =
+ frame_mv[NEARMV][frame].as_int;
+ } else if (ref == 1) {
+ frame_mv[NEAREST_NEARMV][frame].as_int =
+ frame_mv[NEARMV][frame].as_int;
+ frame_mv[NEAR_NEARESTMV][frame].as_int =
+ frame_mv[NEARESTMV][frame].as_int;
+ frame_mv[NEW_NEARESTMV][frame].as_int =
+ frame_mv[NEARESTMV][frame].as_int;
+ frame_mv[NEW_NEARMV][frame].as_int =
+ frame_mv[NEARMV][frame].as_int;
+ }
+ }
+#endif // CONFIG_EXT_INTER
}
// search for the best motion vector on this segment
+#if CONFIG_EXT_INTER
+ for (this_mode = (has_second_rf ? NEAREST_NEARESTMV : NEARESTMV);
+ this_mode <= (has_second_rf ? NEW_NEWMV : NEWFROMNEARMV);
+ ++this_mode) {
+#else
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
+#endif // CONFIG_EXT_INTER
const struct buf_2d orig_src = x->plane[0].src;
struct buf_2d orig_pre[2];
mode_idx = INTER_OFFSET(this_mode);
+#if CONFIG_EXT_INTER
+ mv_idx = (this_mode == NEWFROMNEARMV) ? 1 : 0;
+
+ for (ref = 0; ref < 1 + has_second_rf; ++ref)
+ bsi->ref_mv[ref]->as_int = ref_mvs_sub8x8[mv_idx][ref].as_int;
+#endif // CONFIG_EXT_INTER
bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
if (!(inter_mode_mask & (1 << this_mode)))
continue;
- if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
- this_mode, mbmi->ref_frame))
+ if (!check_best_zero_mv(cpi, mbmi_ext->mode_context,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ mbmi_ext->compound_mode_context,
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ frame_mv,
+ this_mode, mbmi->ref_frame, bsize, i))
continue;
memcpy(orig_pre, pd->pre, sizeof(orig_pre));
@@ -1853,9 +4181,20 @@
sizeof(bsi->rdstat[i][mode_idx].tl));
// motion search for newmv (single predictor case only)
- if (!has_second_rf && this_mode == NEWMV &&
- seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
+ if (!has_second_rf &&
+#if CONFIG_EXT_INTER
+ have_newmv_in_inter_mode(this_mode) &&
+ seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV
+#else
+ this_mode == NEWMV &&
+ seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV
+#endif // CONFIG_EXT_INTER
+ ) {
+#if CONFIG_EXT_INTER
+ MV *const new_mv = &mode_mv[this_mode][0].as_mv;
+#else
MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
+#endif // CONFIG_EXT_INTER
int step_param = 0;
int bestsme = INT_MAX;
int sadpb = x->sadperbit4;
@@ -1869,12 +4208,16 @@
break;
if (cpi->oxcf.mode != BEST) {
+#if CONFIG_EXT_INTER
+ bsi->mvp.as_int = bsi->ref_mv[0]->as_int;
+#else
// use previous block's result as next block's MV predictor.
if (i > 0) {
bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
if (i == 2)
bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
}
+#endif // CONFIG_EXT_INTER
}
if (i == 0)
max_mv = x->max_mv_context[mbmi->ref_frame[0]];
@@ -1929,7 +4272,11 @@
NULL, 0, 0);
// save motion search result for use in compound prediction
+#if CONFIG_EXT_INTER
+ seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_mv = *new_mv;
+#else
seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
+#endif // CONFIG_EXT_INTER
}
if (cpi->sf.adaptive_motion_search)
@@ -1940,24 +4287,47 @@
}
if (has_second_rf) {
+#if CONFIG_EXT_INTER
+ if (seg_mvs[i][mv_idx][mbmi->ref_frame[1]].as_int == INVALID_MV ||
+ seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV)
+#else
if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
+#endif // CONFIG_EXT_INTER
continue;
}
- if (has_second_rf && this_mode == NEWMV &&
+ if (has_second_rf &&
+#if CONFIG_EXT_INTER
+ this_mode == NEW_NEWMV &&
+#else
+ this_mode == NEWMV &&
+#endif // CONFIG_EXT_INTER
mbmi->interp_filter == EIGHTTAP) {
// adjust src pointers
mi_buf_shift(x, i);
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
int rate_mv;
joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
- mi_row, mi_col, seg_mvs[i],
+ mi_row, mi_col,
+#if CONFIG_EXT_INTER
+ bsi->ref_mv,
+ seg_mvs[i][mv_idx],
+#else
+ seg_mvs[i],
+#endif // CONFIG_EXT_INTER
&rate_mv);
+#if CONFIG_EXT_INTER
+ compound_seg_newmvs[i][0].as_int =
+ frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
+ compound_seg_newmvs[i][1].as_int =
+ frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
+#else
seg_mvs[i][mbmi->ref_frame[0]].as_int =
frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
seg_mvs[i][mbmi->ref_frame[1]].as_int =
frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
+#endif // CONFIG_EXT_INTER
}
// restore src pointers
mi_buf_restore(x, orig_src, orig_pre);
@@ -1965,7 +4335,14 @@
bsi->rdstat[i][mode_idx].brate =
set_and_cost_bmi_mvs(cpi, x, xd, i, this_mode, mode_mv[this_mode],
- frame_mv, seg_mvs[i], bsi->ref_mv,
+ frame_mv,
+#if CONFIG_EXT_INTER
+ seg_mvs[i][mv_idx],
+ compound_seg_newmvs[i],
+#else
+ seg_mvs[i],
+#endif // CONFIG_EXT_INTER
+ bsi->ref_mv,
x->nmvjointcost, x->mvcost);
for (ref = 0; ref < 1 + has_second_rf; ++ref) {
@@ -1977,6 +4354,16 @@
if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
mode_mv[this_mode][ref].as_int;
+#if CONFIG_EXT_INTER
+ bsi->rdstat[i][mode_idx].ref_mv[ref].as_int =
+ bsi->ref_mv[ref]->as_int;
+ if (num_4x4_blocks_wide > 1)
+ bsi->rdstat[i + 1][mode_idx].ref_mv[ref].as_int =
+ bsi->ref_mv[ref]->as_int;
+ if (num_4x4_blocks_high > 1)
+ bsi->rdstat[i + 2][mode_idx].ref_mv[ref].as_int =
+ bsi->ref_mv[ref]->as_int;
+#endif // CONFIG_EXT_INTER
}
// Trap vectors that reach beyond the UMV borders
@@ -1992,6 +4379,15 @@
for (ref = 0; ref < 1 + has_second_rf; ++ref) {
subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
+#if CONFIG_EXT_INTER
+ if (have_newmv_in_inter_mode(this_mode))
+ have_ref &= (
+ (mode_mv[this_mode][ref].as_int ==
+ ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int) &&
+ (bsi->ref_mv[ref]->as_int ==
+ ref_bsi->rdstat[i][mode_idx].ref_mv[ref].as_int));
+ else
+#endif // CONFIG_EXT_INTER
have_ref &= mode_mv[this_mode][ref].as_int ==
ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
}
@@ -2000,6 +4396,15 @@
ref_bsi = bsi_buf + 1;
have_ref = 1;
for (ref = 0; ref < 1 + has_second_rf; ++ref)
+#if CONFIG_EXT_INTER
+ if (have_newmv_in_inter_mode(this_mode))
+ have_ref &= (
+ (mode_mv[this_mode][ref].as_int ==
+ ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int) &&
+ (bsi->ref_mv[ref]->as_int ==
+ ref_bsi->rdstat[i][mode_idx].ref_mv[ref].as_int));
+ else
+#endif // CONFIG_EXT_INTER
have_ref &= mode_mv[this_mode][ref].as_int ==
ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
}
@@ -2053,7 +4458,11 @@
if (best_rd == INT64_MAX) {
int iy, midx;
for (iy = i + 1; iy < 4; ++iy)
+#if CONFIG_EXT_INTER
+ for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx)
+#else
for (midx = 0; midx < INTER_MODES; ++midx)
+#endif // CONFIG_EXT_INTER
bsi->rdstat[iy][midx].brdcost = INT64_MAX;
bsi->segment_rd = INT64_MAX;
return INT64_MAX;
@@ -2063,9 +4472,21 @@
memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
+#if CONFIG_EXT_INTER
+ mv_idx = (mode_selected == NEWFROMNEARMV) ? 1 : 0;
+ bsi->ref_mv[0]->as_int = bsi->rdstat[i][mode_idx].ref_mv[0].as_int;
+ if (has_second_rf)
+ bsi->ref_mv[1]->as_int = bsi->rdstat[i][mode_idx].ref_mv[1].as_int;
+#endif // CONFIG_EXT_INTER
set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
- frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
- x->mvcost);
+ frame_mv,
+#if CONFIG_EXT_INTER
+ seg_mvs[i][mv_idx],
+ compound_seg_newmvs[i],
+#else
+ seg_mvs[i],
+#endif // CONFIG_EXT_INTER
+ bsi->ref_mv, x->nmvjointcost, x->mvcost);
br += bsi->rdstat[i][mode_idx].brate;
bd += bsi->rdstat[i][mode_idx].bdist;
@@ -2076,7 +4497,11 @@
if (this_segment_rd > bsi->segment_rd) {
int iy, midx;
for (iy = i + 1; iy < 4; ++iy)
+#if CONFIG_EXT_INTER
+ for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx)
+#else
for (midx = 0; midx < INTER_MODES; ++midx)
+#endif // CONFIG_EXT_INTER
bsi->rdstat[iy][midx].brdcost = INT64_MAX;
bsi->segment_rd = INT64_MAX;
return INT64_MAX;
@@ -2102,6 +4527,11 @@
mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
if (has_second_ref(mbmi))
mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
+#if CONFIG_EXT_INTER
+ mi->bmi[i].ref_mv[0].as_int = bsi->rdstat[i][mode_idx].ref_mv[0].as_int;
+ if (has_second_rf)
+ mi->bmi[i].ref_mv[1].as_int = bsi->rdstat[i][mode_idx].ref_mv[1].as_int;
+#endif // CONFIG_EXT_INTER
x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
mi->bmi[i].as_mode = bsi->modes[i];
}
@@ -2147,34 +4577,108 @@
if (cm->reference_mode != COMPOUND_REFERENCE) {
vpx_prob ref_single_p1 = vp10_get_pred_prob_single_ref_p1(cm, xd);
vpx_prob ref_single_p2 = vp10_get_pred_prob_single_ref_p2(cm, xd);
+#if CONFIG_EXT_REFS
+ vpx_prob ref_single_p3 = vp10_get_pred_prob_single_ref_p3(cm, xd);
+ vpx_prob ref_single_p4 = vp10_get_pred_prob_single_ref_p4(cm, xd);
+ vpx_prob ref_single_p5 = vp10_get_pred_prob_single_ref_p5(cm, xd);
+#endif // CONFIG_EXT_REFS
unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
if (cm->reference_mode == REFERENCE_MODE_SELECT)
base_cost += vp10_cost_bit(comp_inter_p, 0);
- ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
+ ref_costs_single[LAST_FRAME] =
+#if CONFIG_EXT_REFS
+ ref_costs_single[LAST2_FRAME] =
+ ref_costs_single[LAST3_FRAME] =
+ ref_costs_single[LAST4_FRAME] =
+#endif // CONFIG_EXT_REFS
+ ref_costs_single[GOLDEN_FRAME] =
ref_costs_single[ALTREF_FRAME] = base_cost;
+
+#if CONFIG_EXT_REFS
+ ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p1, 0);
+ ref_costs_single[LAST2_FRAME] += vp10_cost_bit(ref_single_p1, 0);
+ ref_costs_single[LAST3_FRAME] += vp10_cost_bit(ref_single_p1, 0);
+ ref_costs_single[LAST4_FRAME] += vp10_cost_bit(ref_single_p1, 0);
+ ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p1, 1);
+ ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p1, 1);
+
+ ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p3, 0);
+ ref_costs_single[LAST2_FRAME] += vp10_cost_bit(ref_single_p3, 0);
+ ref_costs_single[LAST3_FRAME] += vp10_cost_bit(ref_single_p3, 1);
+ ref_costs_single[LAST4_FRAME] += vp10_cost_bit(ref_single_p3, 1);
+ ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p2, 0);
+ ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p2, 1);
+
+ ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p4, 0);
+ ref_costs_single[LAST2_FRAME] += vp10_cost_bit(ref_single_p4, 1);
+ ref_costs_single[LAST3_FRAME] += vp10_cost_bit(ref_single_p5, 0);
+ ref_costs_single[LAST4_FRAME] += vp10_cost_bit(ref_single_p5, 1);
+#else
ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p1, 0);
ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p1, 1);
ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p1, 1);
ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p2, 0);
ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p2, 1);
+#endif // CONFIG_EXT_REFS
} else {
ref_costs_single[LAST_FRAME] = 512;
+#if CONFIG_EXT_REFS
+ ref_costs_single[LAST2_FRAME] = 512;
+ ref_costs_single[LAST3_FRAME] = 512;
+ ref_costs_single[LAST4_FRAME] = 512;
+#endif // CONFIG_EXT_REFS
ref_costs_single[GOLDEN_FRAME] = 512;
ref_costs_single[ALTREF_FRAME] = 512;
}
+
if (cm->reference_mode != SINGLE_REFERENCE) {
vpx_prob ref_comp_p = vp10_get_pred_prob_comp_ref_p(cm, xd);
+#if CONFIG_EXT_REFS
+ vpx_prob ref_comp_p1 = vp10_get_pred_prob_comp_ref_p1(cm, xd);
+ vpx_prob ref_comp_p2 = vp10_get_pred_prob_comp_ref_p2(cm, xd);
+ vpx_prob ref_comp_p3 = vp10_get_pred_prob_comp_ref_p3(cm, xd);
+#endif // CONFIG_EXT_REFS
unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
if (cm->reference_mode == REFERENCE_MODE_SELECT)
base_cost += vp10_cost_bit(comp_inter_p, 1);
- ref_costs_comp[LAST_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 0);
- ref_costs_comp[GOLDEN_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 1);
+ ref_costs_comp[LAST_FRAME] =
+#if CONFIG_EXT_REFS
+ ref_costs_comp[LAST2_FRAME] =
+ ref_costs_comp[LAST3_FRAME] =
+ ref_costs_comp[LAST4_FRAME] =
+#endif // CONFIG_EXT_REFS
+ ref_costs_comp[GOLDEN_FRAME] = base_cost;
+
+#if CONFIG_EXT_REFS
+ ref_costs_comp[LAST_FRAME] += vp10_cost_bit(ref_comp_p, 0);
+ ref_costs_comp[LAST2_FRAME] += vp10_cost_bit(ref_comp_p, 0);
+ ref_costs_comp[LAST3_FRAME] += vp10_cost_bit(ref_comp_p, 1);
+ ref_costs_comp[LAST4_FRAME] += vp10_cost_bit(ref_comp_p, 1);
+ ref_costs_comp[GOLDEN_FRAME] += vp10_cost_bit(ref_comp_p, 1);
+
+ ref_costs_comp[LAST_FRAME] += vp10_cost_bit(ref_comp_p1, 1);
+ ref_costs_comp[LAST2_FRAME] += vp10_cost_bit(ref_comp_p1, 0);
+ ref_costs_comp[LAST3_FRAME] += vp10_cost_bit(ref_comp_p2, 0);
+ ref_costs_comp[LAST4_FRAME] += vp10_cost_bit(ref_comp_p2, 0);
+ ref_costs_comp[GOLDEN_FRAME] += vp10_cost_bit(ref_comp_p2, 1);
+
+ ref_costs_comp[LAST3_FRAME] += vp10_cost_bit(ref_comp_p3, 1);
+ ref_costs_comp[LAST4_FRAME] += vp10_cost_bit(ref_comp_p3, 0);
+#else
+ ref_costs_comp[LAST_FRAME] += vp10_cost_bit(ref_comp_p, 0);
+ ref_costs_comp[GOLDEN_FRAME] += vp10_cost_bit(ref_comp_p, 1);
+#endif // CONFIG_EXT_REFS
} else {
ref_costs_comp[LAST_FRAME] = 512;
+#if CONFIG_EXT_REFS
+ ref_costs_comp[LAST2_FRAME] = 512;
+ ref_costs_comp[LAST3_FRAME] = 512;
+ ref_costs_comp[LAST4_FRAME] = 512;
+#endif // CONFIG_EXT_REFS
ref_costs_comp[GOLDEN_FRAME] = 512;
}
}
@@ -2202,13 +4706,14 @@
sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
}
-static void setup_buffer_inter(VP10_COMP *cpi, MACROBLOCK *x,
- MV_REFERENCE_FRAME ref_frame,
- BLOCK_SIZE block_size,
- int mi_row, int mi_col,
- int_mv frame_nearest_mv[MAX_REF_FRAMES],
- int_mv frame_near_mv[MAX_REF_FRAMES],
- struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
+static void setup_buffer_inter(
+ VP10_COMP *cpi, MACROBLOCK *x,
+ MV_REFERENCE_FRAME ref_frame,
+ BLOCK_SIZE block_size,
+ int mi_row, int mi_col,
+ int_mv frame_nearest_mv[MAX_REF_FRAMES],
+ int_mv frame_near_mv[MAX_REF_FRAMES],
+ struct buf_2d yv12_mb[MAX_REF_FRAMES][MAX_MB_PLANE]) {
const VP10_COMMON *cm = &cpi->common;
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
MACROBLOCKD *const xd = &x->e_mbd;
@@ -2224,8 +4729,16 @@
vp10_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
// Gets an initial list of candidate vectors from neighbours and orders them
- vp10_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col,
- NULL, NULL, mbmi_ext->mode_context);
+ vp10_find_mv_refs(cm, xd, mi, ref_frame,
+#if CONFIG_REF_MV
+ &mbmi_ext->ref_mv_count[ref_frame],
+ mbmi_ext->ref_mv_stack[ref_frame],
+#if CONFIG_EXT_INTER
+ mbmi_ext->compound_mode_context,
+#endif // CONFIG_EXT_INTER
+#endif
+ candidates, mi_row, mi_col,
+ NULL, NULL, mbmi_ext->mode_context);
// Candidate refinement carried out at encoder and decoder
vp10_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
@@ -2243,6 +4756,10 @@
static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize,
int mi_row, int mi_col,
+#if CONFIG_EXT_INTER
+ int ref_idx,
+ int mv_idx,
+#endif // CONFIG_EXT_INTER
int_mv *tmp_mv, int *rate_mv) {
MACROBLOCKD *xd = &x->e_mbd;
const VP10_COMMON *cm = &cpi->common;
@@ -2252,8 +4769,13 @@
int step_param;
int sadpb = x->sadperbit16;
MV mvp_full;
+#if CONFIG_EXT_INTER
+ int ref = mbmi->ref_frame[ref_idx];
+ MV ref_mv = x->mbmi_ext->ref_mvs[ref][mv_idx].as_mv;
+#else
int ref = mbmi->ref_frame[0];
MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
+#endif // CONFIG_EXT_INTER
int tmp_col_min = x->mv_col_min;
int tmp_col_max = x->mv_col_max;
@@ -2368,8 +4890,6 @@
}
}
-
-
static INLINE void restore_dst_buf(MACROBLOCKD *xd,
uint8_t *orig_dst[MAX_MB_PLANE],
int orig_dst_stride[MAX_MB_PLANE]) {
@@ -2413,6 +4933,118 @@
xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
}
+static INTERP_FILTER predict_interp_filter(const VP10_COMP *cpi,
+ const MACROBLOCK *x,
+ const BLOCK_SIZE bsize,
+ const int mi_row,
+ const int mi_col,
+ INTERP_FILTER
+ (*single_filter)[MAX_REF_FRAMES]
+ ) {
+ INTERP_FILTER best_filter = SWITCHABLE;
+
+ const VP10_COMMON *cm = &cpi->common;
+ const MACROBLOCKD *xd = &x->e_mbd;
+ int bsl = mi_width_log2_lookup[bsize];
+ int pred_filter_search = cpi->sf.cb_pred_filter_search ?
+ (((mi_row + mi_col) >> bsl) +
+ get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ const int is_comp_pred = has_second_ref(mbmi);
+ const int this_mode = mbmi->mode;
+ int refs[2] = { mbmi->ref_frame[0],
+ (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
+
+ if (pred_filter_search) {
+ INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
+ if (xd->up_available)
+ af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
+ if (xd->left_available)
+ lf = xd->mi[-1]->mbmi.interp_filter;
+
+#if CONFIG_EXT_INTER
+ if ((this_mode != NEWMV && this_mode != NEWFROMNEARMV &&
+ this_mode != NEW_NEWMV) || (af == lf))
+#else
+ if ((this_mode != NEWMV) || (af == lf))
+#endif // CONFIG_EXT_INTER
+ best_filter = af;
+ }
+ if (is_comp_pred) {
+ if (cpi->sf.adaptive_mode_search) {
+#if CONFIG_EXT_INTER
+ switch (this_mode) {
+ case NEAREST_NEARESTMV:
+ if (single_filter[NEARESTMV][refs[0]] ==
+ single_filter[NEARESTMV][refs[1]])
+ best_filter = single_filter[NEARESTMV][refs[0]];
+ break;
+ case NEAREST_NEARMV:
+ if (single_filter[NEARESTMV][refs[0]] ==
+ single_filter[NEARMV][refs[1]])
+ best_filter = single_filter[NEARESTMV][refs[0]];
+ break;
+ case NEAR_NEARESTMV:
+ if (single_filter[NEARMV][refs[0]] ==
+ single_filter[NEARESTMV][refs[1]])
+ best_filter = single_filter[NEARMV][refs[0]];
+ break;
+ case ZERO_ZEROMV:
+ if (single_filter[ZEROMV][refs[0]] ==
+ single_filter[ZEROMV][refs[1]])
+ best_filter = single_filter[ZEROMV][refs[0]];
+ break;
+ case NEW_NEWMV:
+ if (single_filter[NEWMV][refs[0]] ==
+ single_filter[NEWMV][refs[1]])
+ best_filter = single_filter[NEWMV][refs[0]];
+ break;
+ case NEAREST_NEWMV:
+ if (single_filter[NEARESTMV][refs[0]] ==
+ single_filter[NEWMV][refs[1]])
+ best_filter = single_filter[NEARESTMV][refs[0]];
+ break;
+ case NEAR_NEWMV:
+ if (single_filter[NEARMV][refs[0]] ==
+ single_filter[NEWMV][refs[1]])
+ best_filter = single_filter[NEARMV][refs[0]];
+ break;
+ case NEW_NEARESTMV:
+ if (single_filter[NEWMV][refs[0]] ==
+ single_filter[NEARESTMV][refs[1]])
+ best_filter = single_filter[NEWMV][refs[0]];
+ break;
+ case NEW_NEARMV:
+ if (single_filter[NEWMV][refs[0]] ==
+ single_filter[NEARMV][refs[1]])
+ best_filter = single_filter[NEWMV][refs[0]];
+ break;
+ default:
+ if (single_filter[this_mode][refs[0]] ==
+ single_filter[this_mode][refs[1]])
+ best_filter = single_filter[this_mode][refs[0]];
+ break;
+ }
+#else
+ if (single_filter[this_mode][refs[0]] ==
+ single_filter[this_mode][refs[1]])
+ best_filter = single_filter[this_mode][refs[0]];
+#endif // CONFIG_EXT_INTER
+ }
+ }
+ if (cm->interp_filter != BILINEAR) {
+ if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
+ best_filter = EIGHTTAP;
+ }
+#if CONFIG_EXT_INTERP
+ else if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE) {
+ best_filter = EIGHTTAP;
+ }
+#endif
+ }
+ return best_filter;
+}
+
static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize,
int *rate2, int64_t *distortion,
@@ -2421,7 +5053,11 @@
int *disable_skip,
int_mv (*mode_mv)[MAX_REF_FRAMES],
int mi_row, int mi_col,
+#if CONFIG_EXT_INTER
+ int_mv single_newmvs[2][MAX_REF_FRAMES],
+#else
int_mv single_newmv[MAX_REF_FRAMES],
+#endif // CONFIG_EXT_INTER
INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
int (*single_skippable)[MAX_REF_FRAMES],
int64_t *psse,
@@ -2439,6 +5075,13 @@
int refs[2] = { mbmi->ref_frame[0],
(mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
int_mv cur_mv[2];
+#if CONFIG_EXT_INTER
+ int mv_idx = (this_mode == NEWFROMNEARMV) ? 1 : 0;
+ int_mv single_newmv[MAX_REF_FRAMES];
+#if CONFIG_REF_MV
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+#endif
+#endif // CONFIG_EXT_INTER
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
uint8_t *tmp_buf;
@@ -2456,14 +5099,20 @@
uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
int64_t bsse[MAX_MB_PLANE << 2] = {0};
- int bsl = mi_width_log2_lookup[bsize];
- int pred_filter_search = cpi->sf.cb_pred_filter_search ?
- (((mi_row + mi_col) >> bsl) +
- get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
-
int skip_txfm_sb = 0;
int64_t skip_sse_sb = INT64_MAX;
int64_t distortion_y = 0, distortion_uv = 0;
+ int16_t mode_ctx = mbmi_ext->mode_context[refs[0]];
+
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (is_comp_pred)
+ mode_ctx = mbmi_ext->compound_mode_context[refs[0]];
+ else
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
+ mbmi->ref_frame, bsize, -1);
+#endif
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -2473,39 +5122,62 @@
}
#endif // CONFIG_VP9_HIGHBITDEPTH
- if (pred_filter_search) {
- INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
- if (xd->up_available)
- af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
- if (xd->left_available)
- lf = xd->mi[-1]->mbmi.interp_filter;
-
- if ((this_mode != NEWMV) || (af == lf))
- best_filter = af;
- }
-
if (is_comp_pred) {
if (frame_mv[refs[0]].as_int == INVALID_MV ||
frame_mv[refs[1]].as_int == INVALID_MV)
return INT64_MAX;
-
- if (cpi->sf.adaptive_mode_search) {
- if (single_filter[this_mode][refs[0]] ==
- single_filter[this_mode][refs[1]])
- best_filter = single_filter[this_mode][refs[0]];
- }
}
+#if CONFIG_EXT_INTER
+ if (have_newmv_in_inter_mode(this_mode)) {
+#else
if (this_mode == NEWMV) {
+#endif // CONFIG_EXT_INTER
int rate_mv;
if (is_comp_pred) {
+#if CONFIG_EXT_INTER
+ for (i = 0; i < 2; ++i) {
+ single_newmv[refs[i]].as_int =
+ single_newmvs[mv_idx][refs[i]].as_int;
+ }
+
+ if (this_mode == NEW_NEWMV) {
+ frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
+ frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
+
+ if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
+ joint_motion_search(cpi, x, bsize, frame_mv,
+ mi_row, mi_col, NULL, single_newmv, &rate_mv);
+ } else {
+ rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost,
+ MV_COST_WEIGHT);
+ rate_mv += vp10_mv_bit_cost(&frame_mv[refs[1]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
+ x->nmvjointcost, x->mvcost,
+ MV_COST_WEIGHT);
+ }
+ } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
+ frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
+ rate_mv = vp10_mv_bit_cost(&frame_mv[refs[1]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ } else {
+ frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
+ rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ }
+#else
// Initialize mv using single prediction mode result.
frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
joint_motion_search(cpi, x, bsize, frame_mv,
- mi_row, mi_col, single_newmv, &rate_mv);
+ mi_row, mi_col,
+ single_newmv, &rate_mv);
} else {
rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
&x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
@@ -2514,17 +5186,25 @@
&x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
}
+#endif // CONFIG_EXT_INTER
*rate2 += rate_mv;
} else {
int_mv tmp_mv;
single_motion_search(cpi, x, bsize, mi_row, mi_col,
+#if CONFIG_EXT_INTER
+ 0, mv_idx,
+#endif // CONFIG_EXT_INTER
&tmp_mv, &rate_mv);
if (tmp_mv.as_int == INVALID_MV)
return INT64_MAX;
frame_mv[refs[0]].as_int =
xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
+#if CONFIG_EXT_INTER
+ single_newmvs[mv_idx][refs[0]].as_int = tmp_mv.as_int;
+#else
single_newmv[refs[0]].as_int = tmp_mv.as_int;
+#endif // CONFIG_EXT_INTER
// Estimate the rate implications of a new mv but discount this
// under certain circumstances where we want to help initiate a weak
@@ -2541,7 +5221,11 @@
for (i = 0; i < is_comp_pred + 1; ++i) {
cur_mv[i] = frame_mv[refs[i]];
// Clip "next_nearest" so that it does not extend to far out of image
+#if CONFIG_EXT_INTER
+ if (this_mode != NEWMV && this_mode != NEWFROMNEARMV)
+#else
if (this_mode != NEWMV)
+#endif // CONFIG_EXT_INTER
clamp_mv2(&cur_mv[i].as_mv, xd);
if (mv_check_bounds(x, &cur_mv[i].as_mv))
@@ -2549,6 +5233,91 @@
mbmi->mv[i].as_int = cur_mv[i].as_int;
}
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (this_mode == NEAREST_NEARESTMV) {
+#else
+ if (this_mode == NEARESTMV && is_comp_pred) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+#endif // CONFIG_EXT_INTER
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
+ cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
+
+ for (i = 0; i < 2; ++i) {
+ lower_mv_precision(&cur_mv[i].as_mv, cm->allow_high_precision_mv);
+ clamp_mv2(&cur_mv[i].as_mv, xd);
+ if (mv_check_bounds(x, &cur_mv[i].as_mv))
+ return INT64_MAX;
+ mbmi->mv[i].as_int = cur_mv[i].as_int;
+ }
+ }
+ }
+
+#if CONFIG_EXT_INTER
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
+ if (this_mode == NEAREST_NEWMV || this_mode == NEAREST_NEARMV) {
+ cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
+
+ lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
+ clamp_mv2(&cur_mv[0].as_mv, xd);
+ if (mv_check_bounds(x, &cur_mv[0].as_mv))
+ return INT64_MAX;
+ mbmi->mv[0].as_int = cur_mv[0].as_int;
+ }
+
+ if (this_mode == NEW_NEARESTMV || this_mode == NEAR_NEARESTMV) {
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
+
+ lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
+ clamp_mv2(&cur_mv[1].as_mv, xd);
+ if (mv_check_bounds(x, &cur_mv[1].as_mv))
+ return INT64_MAX;
+ mbmi->mv[1].as_int = cur_mv[1].as_int;
+ }
+ }
+
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
+ if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARESTMV) {
+ cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][1].this_mv;
+
+ lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
+ clamp_mv2(&cur_mv[0].as_mv, xd);
+ if (mv_check_bounds(x, &cur_mv[0].as_mv))
+ return INT64_MAX;
+ mbmi->mv[0].as_int = cur_mv[0].as_int;
+ }
+
+ if (this_mode == NEW_NEARMV || this_mode == NEAREST_NEARMV) {
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][1].comp_mv;
+
+ lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
+ clamp_mv2(&cur_mv[1].as_mv, xd);
+ if (mv_check_bounds(x, &cur_mv[1].as_mv))
+ return INT64_MAX;
+ mbmi->mv[1].as_int = cur_mv[1].as_int;
+ }
+ }
+#else
+ if (this_mode == NEARMV && is_comp_pred) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
+ int ref_mv_idx = mbmi->ref_mv_idx + 1;
+ cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
+
+ for (i = 0; i < 2; ++i) {
+ lower_mv_precision(&cur_mv[i].as_mv, cm->allow_high_precision_mv);
+ clamp_mv2(&cur_mv[i].as_mv, xd);
+ if (mv_check_bounds(x, &cur_mv[i].as_mv))
+ return INT64_MAX;
+ mbmi->mv[i].as_int = cur_mv[i].as_int;
+ }
+ }
+ }
+#endif // CONFIG_EXT_INTER
+#endif
+
// do first prediction into the destination buffer. Do the next
// prediction into a temporary buffer. Then keep track of which one
// of these currently holds the best predictor, and use the other
@@ -2568,16 +5337,27 @@
// initiation of a motion field.
if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]],
mode_mv, refs[0])) {
- *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode,
- mbmi_ext->mode_context[refs[0]]),
- cost_mv_ref(cpi, NEARESTMV,
- mbmi_ext->mode_context[refs[0]]));
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode, is_comp_pred, mode_ctx),
+ cost_mv_ref(cpi, NEARESTMV, is_comp_pred, mode_ctx));
+#else
+ *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode, mode_ctx),
+ cost_mv_ref(cpi, NEARESTMV, mode_ctx));
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
} else {
- *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]);
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ *rate2 += cost_mv_ref(cpi, this_mode, is_comp_pred, mode_ctx);
+#else
+ *rate2 += cost_mv_ref(cpi, this_mode, mode_ctx);
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
}
if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
+#if CONFIG_EXT_INTER
+ mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV)
+#else
mbmi->mode != NEARESTMV)
+#endif // CONFIG_EXT_INTER
return INT64_MAX;
pred_exists = 0;
@@ -2591,102 +5371,102 @@
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
filter_cache[i] = INT64_MAX;
- if (cm->interp_filter != BILINEAR) {
- if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
- best_filter = EIGHTTAP;
- } else if (best_filter == SWITCHABLE) {
- int newbest;
- int tmp_rate_sum = 0;
- int64_t tmp_dist_sum = 0;
+ best_filter = predict_interp_filter(cpi, x, bsize, mi_row, mi_col,
+ single_filter);
+ if (cm->interp_filter != BILINEAR && best_filter == SWITCHABLE) {
+ int newbest;
+ int tmp_rate_sum = 0;
+ int64_t tmp_dist_sum = 0;
- for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
- int j;
- int64_t rs_rd;
- int tmp_skip_sb = 0;
- int64_t tmp_skip_sse = INT64_MAX;
+ for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
+ int j;
+ int64_t rs_rd;
+ int tmp_skip_sb = 0;
+ int64_t tmp_skip_sse = INT64_MAX;
- mbmi->interp_filter = i;
- rs = vp10_get_switchable_rate(cpi, xd);
- rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
+ mbmi->interp_filter = i;
+ rs = vp10_get_switchable_rate(cpi, xd);
+ rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
- if (i > 0 && intpel_mv) {
- rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
- filter_cache[i] = rd;
- filter_cache[SWITCHABLE_FILTERS] =
- VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
- if (cm->interp_filter == SWITCHABLE)
- rd += rs_rd;
- *mask_filter = VPXMAX(*mask_filter, rd);
- } else {
- int rate_sum = 0;
- int64_t dist_sum = 0;
- if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
- (cpi->sf.interp_filter_search_mask & (1 << i))) {
- rate_sum = INT_MAX;
- dist_sum = INT64_MAX;
- continue;
- }
-
- if ((cm->interp_filter == SWITCHABLE &&
- (!i || best_needs_copy)) ||
- (cm->interp_filter != SWITCHABLE &&
- (cm->interp_filter == mbmi->interp_filter ||
- (i == 0 && intpel_mv)))) {
- restore_dst_buf(xd, orig_dst, orig_dst_stride);
- } else {
- for (j = 0; j < MAX_MB_PLANE; j++) {
- xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
- xd->plane[j].dst.stride = 64;
- }
- }
- vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
- model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
- &tmp_skip_sb, &tmp_skip_sse);
-
- rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
- filter_cache[i] = rd;
- filter_cache[SWITCHABLE_FILTERS] =
- VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
- if (cm->interp_filter == SWITCHABLE)
- rd += rs_rd;
- *mask_filter = VPXMAX(*mask_filter, rd);
-
- if (i == 0 && intpel_mv) {
- tmp_rate_sum = rate_sum;
- tmp_dist_sum = dist_sum;
- }
+ if (i > 0 && intpel_mv && IsInterpolatingFilter(i)) {
+ rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
+ filter_cache[i] = rd;
+ filter_cache[SWITCHABLE_FILTERS] =
+ VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
+ if (cm->interp_filter == SWITCHABLE)
+ rd += rs_rd;
+ *mask_filter = VPXMAX(*mask_filter, rd);
+ } else {
+ int rate_sum = 0;
+ int64_t dist_sum = 0;
+ if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
+ (cpi->sf.interp_filter_search_mask & (1 << i))) {
+ rate_sum = INT_MAX;
+ dist_sum = INT64_MAX;
+ continue;
}
- if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
- if (rd / 2 > ref_best_rd) {
- restore_dst_buf(xd, orig_dst, orig_dst_stride);
- return INT64_MAX;
- }
- }
- newbest = i == 0 || rd < best_rd;
-
- if (newbest) {
- best_rd = rd;
- best_filter = mbmi->interp_filter;
- if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
- best_needs_copy = !best_needs_copy;
- }
-
- if ((cm->interp_filter == SWITCHABLE && newbest) ||
+ if ((cm->interp_filter == SWITCHABLE &&
+ (!i || best_needs_copy)) ||
(cm->interp_filter != SWITCHABLE &&
- cm->interp_filter == mbmi->interp_filter)) {
- pred_exists = 1;
- tmp_rd = best_rd;
+ (cm->interp_filter == mbmi->interp_filter ||
+ (i == 0 && intpel_mv && IsInterpolatingFilter(i))))) {
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ } else {
+ for (j = 0; j < MAX_MB_PLANE; j++) {
+ xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
+ xd->plane[j].dst.stride = 64;
+ }
+ }
+ vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+ model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
+ &tmp_skip_sb, &tmp_skip_sse);
- skip_txfm_sb = tmp_skip_sb;
- skip_sse_sb = tmp_skip_sse;
- memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
- memcpy(bsse, x->bsse, sizeof(bsse));
+ rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
+ filter_cache[i] = rd;
+ filter_cache[SWITCHABLE_FILTERS] =
+ VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
+ if (cm->interp_filter == SWITCHABLE)
+ rd += rs_rd;
+ *mask_filter = VPXMAX(*mask_filter, rd);
+
+ if (i == 0 && intpel_mv && IsInterpolatingFilter(i)) {
+ tmp_rate_sum = rate_sum;
+ tmp_dist_sum = dist_sum;
}
}
- restore_dst_buf(xd, orig_dst, orig_dst_stride);
+
+ if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
+ if (rd / 2 > ref_best_rd) {
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ return INT64_MAX;
+ }
+ }
+ newbest = i == 0 || rd < best_rd;
+
+ if (newbest) {
+ best_rd = rd;
+ best_filter = mbmi->interp_filter;
+ if (cm->interp_filter == SWITCHABLE && i &&
+ !(intpel_mv && IsInterpolatingFilter(i)))
+ best_needs_copy = !best_needs_copy;
+ }
+
+ if ((cm->interp_filter == SWITCHABLE && newbest) ||
+ (cm->interp_filter != SWITCHABLE &&
+ cm->interp_filter == mbmi->interp_filter)) {
+ pred_exists = 1;
+ tmp_rd = best_rd;
+
+ skip_txfm_sb = tmp_skip_sb;
+ skip_sse_sb = tmp_skip_sse;
+ memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
+ memcpy(bsse, x->bsse, sizeof(bsse));
+ }
}
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
}
+
// Set the appropriate filter
mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
cm->interp_filter : best_filter;
@@ -2720,9 +5500,64 @@
if (cpi->sf.adaptive_mode_search)
if (is_comp_pred)
+#if CONFIG_EXT_INTER
+ switch (this_mode) {
+ case NEAREST_NEARESTMV:
+ if (single_skippable[NEARESTMV][refs[0]] &&
+ single_skippable[NEARESTMV][refs[1]])
+ memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
+ break;
+ case ZERO_ZEROMV:
+ if (single_skippable[ZEROMV][refs[0]] &&
+ single_skippable[ZEROMV][refs[1]])
+ memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
+ break;
+ case NEW_NEWMV:
+ if (single_skippable[NEWMV][refs[0]] &&
+ single_skippable[NEWMV][refs[1]])
+ memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
+ break;
+ case NEAREST_NEWMV:
+ if (single_skippable[NEARESTMV][refs[0]] &&
+ single_skippable[NEWMV][refs[1]])
+ memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
+ break;
+ case NEAR_NEWMV:
+ if (single_skippable[NEARMV][refs[0]] &&
+ single_skippable[NEWMV][refs[1]])
+ memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
+ break;
+ case NEW_NEARESTMV:
+ if (single_skippable[NEWMV][refs[0]] &&
+ single_skippable[NEARESTMV][refs[1]])
+ memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
+ break;
+ case NEW_NEARMV:
+ if (single_skippable[NEWMV][refs[0]] &&
+ single_skippable[NEARMV][refs[1]])
+ memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
+ break;
+ case NEAREST_NEARMV:
+ if (single_skippable[NEARESTMV][refs[0]] &&
+ single_skippable[NEARMV][refs[1]])
+ memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
+ break;
+ case NEAR_NEARESTMV:
+ if (single_skippable[NEARMV][refs[0]] &&
+ single_skippable[NEARESTMV][refs[1]])
+ memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
+ break;
+ default:
+ if (single_skippable[this_mode][refs[0]] &&
+ single_skippable[this_mode][refs[1]])
+ memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
+ break;
+ }
+#else
if (single_skippable[this_mode][refs[0]] &&
single_skippable[this_mode][refs[1]])
memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
+#endif // CONFIG_EXT_INTER
if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
// if current pred_error modeled rd is substantially more than the best
@@ -2746,8 +5581,22 @@
// Y cost and distortion
vp10_subtract_plane(x, bsize, 0);
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
+ select_tx_type_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
+ bsize, ref_best_rd);
+ } else {
+ int idx, idy;
+ super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
+ bsize, ref_best_rd);
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ mbmi->inter_tx_size[idy * 8 + idx] = mbmi->tx_size;
+ }
+#else
super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
bsize, ref_best_rd);
+#endif // CONFIG_VAR_TX
if (*rate_y == INT_MAX) {
*rate2 = INT_MAX;
@@ -2762,8 +5611,13 @@
rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
+#if CONFIG_VAR_TX
+ if (!inter_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
+ &sseuv, bsize, ref_best_rd - rdcosty)) {
+#else
if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
&sseuv, bsize, ref_best_rd - rdcosty)) {
+#endif // CONFIG_VAR_TX
*rate2 = INT_MAX;
*distortion = INT64_MAX;
restore_dst_buf(xd, orig_dst, orig_dst_stride);
@@ -2976,7 +5830,11 @@
TileDataEnc *tile_data,
MACROBLOCK *x,
int mi_row, int mi_col,
- RD_COST *rd_cost, BLOCK_SIZE bsize,
+ RD_COST *rd_cost,
+#if CONFIG_SUPERTX
+ int *returnrate_nocoef,
+#endif // CONFIG_SUPERTX
+ BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far) {
VP10_COMMON *const cm = &cpi->common;
@@ -2991,18 +5849,34 @@
unsigned char segment_id = mbmi->segment_id;
int comp_pred, i, k;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
- struct buf_2d yv12_mb[4][MAX_MB_PLANE];
+ struct buf_2d yv12_mb[MAX_REF_FRAMES][MAX_MB_PLANE];
+#if CONFIG_EXT_INTER
+ int_mv single_newmvs[2][MAX_REF_FRAMES] = { { { 0 } }, { { 0 } } };
+#else
int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
+#endif // CONFIG_EXT_INTER
INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
- static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
- VP9_ALT_FLAG };
+ static const int flag_list[REFS_PER_FRAME + 1] = {
+ 0,
+ VP9_LAST_FLAG,
+#if CONFIG_EXT_REFS
+ VP9_LAST2_FLAG,
+ VP9_LAST3_FLAG,
+ VP9_LAST4_FLAG,
+#endif // CONFIG_EXT_REFS
+ VP9_GOLD_FLAG,
+ VP9_ALT_FLAG
+ };
int64_t best_rd = best_rd_so_far;
int64_t best_pred_diff[REFERENCE_MODES];
int64_t best_pred_rd[REFERENCE_MODES];
int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
MB_MODE_INFO best_mbmode;
+#if CONFIG_REF_MV
+ uint8_t best_ref_mv_idx[MODE_CTX_REF_FRAMES] = { 0 };
+#endif
int best_mode_skippable = 0;
int midx, best_mode_index = -1;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
@@ -3014,11 +5888,24 @@
int64_t dist_uv[TX_SIZES];
int skip_uv[TX_SIZES];
PREDICTION_MODE mode_uv[TX_SIZES];
+#if CONFIG_EXT_INTRA
+ EXT_INTRA_MODE_INFO ext_intra_mode_info_uv[TX_SIZES];
+ int8_t uv_angle_delta[TX_SIZES];
+ int is_directional_mode, angle_stats_ready = 0;
+ int rate_overhead, rate_dummy;
+ uint8_t directional_mode_skip_mask[INTRA_MODES];
+#endif // CONFIG_EXT_INTRA
const int intra_cost_penalty = vp10_get_intra_cost_penalty(
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
+ const int * const intra_mode_cost =
+ cpi->mbmode_cost[size_group_lookup[bsize]];
int best_skip2 = 0;
uint8_t ref_frame_skip_mask[2] = { 0 };
+#if CONFIG_EXT_INTER
+ uint32_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
+#else
uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
+#endif // CONFIG_EXT_INTER
int mode_skip_start = sf->mode_skip_start + 1;
const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
@@ -3027,9 +5914,16 @@
const int mode_search_skip_flags = sf->mode_search_skip_flags;
int64_t mask_filter = 0;
int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
vp10_zero(best_mbmode);
+#if CONFIG_EXT_INTRA
+ memset(directional_mode_skip_mask, 0,
+ sizeof(directional_mode_skip_mask[0]) * INTRA_MODES);
+#endif // CONFIG_EXT_INTRA
+
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
filter_cache[i] = INT64_MAX;
@@ -3052,9 +5946,16 @@
}
rd_cost->rate = INT_MAX;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = INT_MAX;
+#endif // CONFIG_SUPERTX
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
+ x->mbmi_ext->mode_context[ref_frame] = 0;
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ x->mbmi_ext->compound_mode_context[ref_frame] = 0;
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
@@ -3062,8 +5963,31 @@
}
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
+#if CONFIG_EXT_INTER
+ frame_mv[NEWFROMNEARMV][ref_frame].as_int = INVALID_MV;
+ frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
+ frame_mv[ZERO_ZEROMV][ref_frame].as_int = 0;
+#endif // CONFIG_EXT_INTER
}
+#if CONFIG_REF_MV
+ for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
+ MODE_INFO *const mi = xd->mi[0];
+ int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
+ x->mbmi_ext->mode_context[ref_frame] = 0;
+ vp10_find_mv_refs(cm, xd, mi, ref_frame,
+#if CONFIG_REF_MV
+ &mbmi_ext->ref_mv_count[ref_frame],
+ mbmi_ext->ref_mv_stack[ref_frame],
+#if CONFIG_EXT_INTER
+ mbmi_ext->compound_mode_context,
+#endif // CONFIG_EXT_INTER
+#endif
+ candidates, mi_row, mi_col,
+ NULL, NULL, mbmi_ext->mode_context);
+ }
+#endif
+
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
// Skip checking missing references in both single and compound reference
@@ -3098,13 +6022,28 @@
// an unfiltered alternative. We allow near/nearest as well
// because they may result in zero-zero MVs but be cheaper.
if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
- ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
+ ref_frame_skip_mask[0] =
+ (1 << LAST_FRAME) |
+#if CONFIG_EXT_REFS
+ (1 << LAST2_FRAME) |
+ (1 << LAST3_FRAME) |
+ (1 << LAST4_FRAME) |
+#endif // CONFIG_EXT_REFS
+ (1 << GOLDEN_FRAME);
ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
+#if CONFIG_EXT_INTER
+ if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != 0)
+ mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
+ if (frame_mv[NEAREST_NEARMV][ALTREF_FRAME].as_int != 0)
+ mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARMV);
+ if (frame_mv[NEAR_NEARESTMV][ALTREF_FRAME].as_int != 0)
+ mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARESTMV);
+#endif // CONFIG_EXT_INTER
}
}
@@ -3155,6 +6094,8 @@
midx = end_pos;
}
+ mbmi->palette_mode_info.palette_size[0] = 0;
+ mbmi->palette_mode_info.palette_size[1] = 0;
for (midx = 0; midx < MAX_MODES; ++midx) {
int mode_index = mode_map[midx];
int mode_excluded = 0;
@@ -3167,11 +6108,58 @@
int this_skip2 = 0;
int64_t total_sse = INT64_MAX;
int early_term = 0;
+#if CONFIG_REF_MV
+ uint8_t ref_frame_type;
+#endif
this_mode = vp10_mode_order[mode_index].mode;
ref_frame = vp10_mode_order[mode_index].ref_frame[0];
second_ref_frame = vp10_mode_order[mode_index].ref_frame[1];
+#if CONFIG_EXT_INTER
+ if (this_mode == NEAREST_NEARESTMV) {
+ frame_mv[NEAREST_NEARESTMV][ref_frame].as_int =
+ frame_mv[NEARESTMV][ref_frame].as_int;
+ frame_mv[NEAREST_NEARESTMV][second_ref_frame].as_int =
+ frame_mv[NEARESTMV][second_ref_frame].as_int;
+ } else if (this_mode == NEAREST_NEARMV) {
+ frame_mv[NEAREST_NEARMV][ref_frame].as_int =
+ frame_mv[NEARESTMV][ref_frame].as_int;
+ frame_mv[NEAREST_NEARMV][second_ref_frame].as_int =
+ frame_mv[NEARMV][second_ref_frame].as_int;
+ } else if (this_mode == NEAR_NEARESTMV) {
+ frame_mv[NEAR_NEARESTMV][ref_frame].as_int =
+ frame_mv[NEARMV][ref_frame].as_int;
+ frame_mv[NEAR_NEARESTMV][second_ref_frame].as_int =
+ frame_mv[NEARESTMV][second_ref_frame].as_int;
+ } else if (this_mode == NEAREST_NEWMV) {
+ frame_mv[NEAREST_NEWMV][ref_frame].as_int =
+ frame_mv[NEARESTMV][ref_frame].as_int;
+ frame_mv[NEAREST_NEWMV][second_ref_frame].as_int =
+ frame_mv[NEWMV][second_ref_frame].as_int;
+ } else if (this_mode == NEW_NEARESTMV) {
+ frame_mv[NEW_NEARESTMV][ref_frame].as_int =
+ frame_mv[NEWMV][ref_frame].as_int;
+ frame_mv[NEW_NEARESTMV][second_ref_frame].as_int =
+ frame_mv[NEARESTMV][second_ref_frame].as_int;
+ } else if (this_mode == NEAR_NEWMV) {
+ frame_mv[NEAR_NEWMV][ref_frame].as_int =
+ frame_mv[NEARMV][ref_frame].as_int;
+ frame_mv[NEAR_NEWMV][second_ref_frame].as_int =
+ frame_mv[NEWMV][second_ref_frame].as_int;
+ } else if (this_mode == NEW_NEARMV) {
+ frame_mv[NEW_NEARMV][ref_frame].as_int =
+ frame_mv[NEWMV][ref_frame].as_int;
+ frame_mv[NEW_NEARMV][second_ref_frame].as_int =
+ frame_mv[NEARMV][second_ref_frame].as_int;
+ } else if (this_mode == NEW_NEWMV) {
+ frame_mv[NEW_NEWMV][ref_frame].as_int =
+ frame_mv[NEWMV][ref_frame].as_int;
+ frame_mv[NEW_NEWMV][second_ref_frame].as_int =
+ frame_mv[NEWMV][second_ref_frame].as_int;
+ }
+#endif // CONFIG_EXT_INTER
+
// Look at the reference frame of the best mode so far and set the
// skip mask to look at a subset of the remaining modes.
if (midx == mode_skip_start && best_mode_index >= 0) {
@@ -3182,6 +6170,20 @@
ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
break;
+#if CONFIG_EXT_REFS
+ case LAST2_FRAME:
+ ref_frame_skip_mask[0] |= LAST2_FRAME_MODE_MASK;
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+ case LAST3_FRAME:
+ ref_frame_skip_mask[0] |= LAST3_FRAME_MODE_MASK;
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+ case LAST4_FRAME:
+ ref_frame_skip_mask[0] |= LAST4_FRAME_MODE_MASK;
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+#endif // CONFIG_EXT_REFS
case GOLDEN_FRAME:
ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
@@ -3262,8 +6264,12 @@
}
} else {
const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
- if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
- this_mode, ref_frames))
+ if (!check_best_zero_mv(cpi, mbmi_ext->mode_context,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ mbmi_ext->compound_mode_context,
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ frame_mv,
+ this_mode, ref_frames, bsize, -1))
continue;
}
@@ -3271,6 +6277,10 @@
mbmi->uv_mode = DC_PRED;
mbmi->ref_frame[0] = ref_frame;
mbmi->ref_frame[1] = second_ref_frame;
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
// Evaluate all sub-pel filters irrespective of whether we can use
// them for this frame.
mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
@@ -3291,37 +6301,282 @@
TX_SIZE uv_tx;
struct macroblockd_plane *const pd = &xd->plane[1];
memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
+
+#if CONFIG_EXT_INTRA
+ is_directional_mode = (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED);
+ if (is_directional_mode) {
+ if (!angle_stats_ready) {
+ const int src_stride = x->plane[0].src.stride;
+ const uint8_t *src = x->plane[0].src.buf;
+ const int rows = 4 * num_4x4_blocks_high_lookup[bsize];
+ const int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
+ double hist[DIRECTIONAL_MODES];
+ PREDICTION_MODE mode;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ highbd_angle_estimation(src, src_stride, rows, cols, hist);
+ else
+#endif
+ angle_estimation(src, src_stride, rows, cols, hist);
+ for (mode = 0; mode < INTRA_MODES; ++mode) {
+ if (mode != DC_PRED && mode != TM_PRED) {
+ int index = get_angle_index((double)mode_to_angle_map[mode]);
+ double score, weight = 1.0;
+ score = hist[index];
+ if (index > 0) {
+ score += hist[index - 1] * 0.5;
+ weight += 0.5;
+ }
+ if (index < DIRECTIONAL_MODES - 1) {
+ score += hist[index + 1] * 0.5;
+ weight += 0.5;
+ }
+ score /= weight;
+ if (score < ANGLE_SKIP_THRESH)
+ directional_mode_skip_mask[mode] = 1;
+ }
+ }
+ angle_stats_ready = 1;
+ }
+ if (directional_mode_skip_mask[mbmi->mode])
+ continue;
+ rate_overhead = write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0) +
+ intra_mode_cost[mbmi->mode];
+ rate_y = INT_MAX;
+ this_rd =
+ rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
+ &skippable, bsize, rate_overhead, best_rd);
+ } else {
+ mbmi->angle_delta[0] = 0;
+ super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
+ NULL, bsize, best_rd);
+ }
+
+ // TODO(huisu): ext-intra is turned off in lossless mode for now to
+ // avoid a unit test failure
+ if (mbmi->mode == DC_PRED && !xd->lossless[mbmi->segment_id] &&
+ ALLOW_FILTER_INTRA_MODES) {
+ MB_MODE_INFO mbmi_copy = *mbmi;
+
+ if (rate_y != INT_MAX) {
+ int this_rate = rate_y + intra_mode_cost[mbmi->mode] +
+ vp10_cost_bit(cm->fc->ext_intra_probs[0], 0);
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, distortion_y);
+ } else {
+ this_rd = best_rd;
+ }
+
+ if (!rd_pick_ext_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
+ &skippable, bsize,
+ intra_mode_cost[mbmi->mode], &this_rd))
+ *mbmi = mbmi_copy;
+ }
+#else
super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
NULL, bsize, best_rd);
+#endif // CONFIG_EXT_INTRA
+
if (rate_y == INT_MAX)
continue;
-
uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
pd->subsampling_y);
if (rate_uv_intra[uv_tx] == INT_MAX) {
choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx,
&rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
&dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
+#if CONFIG_EXT_INTRA
+ ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info;
+ uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
+#endif // CONFIG_EXT_INTRA
}
rate_uv = rate_uv_tokenonly[uv_tx];
distortion_uv = dist_uv[uv_tx];
skippable = skippable && skip_uv[uv_tx];
mbmi->uv_mode = mode_uv[uv_tx];
+#if CONFIG_EXT_INTRA
+ mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1];
+ if (ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1]) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1];
+ }
+#endif // CONFIG_EXT_INTRA
- rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
+ rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
+
+ if (!xd->lossless[mbmi->segment_id]) {
+ // super_block_yrd above includes the cost of the tx_size in the
+ // tokenonly rate, but for intra blocks, tx_size is always coded
+ // (prediction granularity), so we account for it in the full rate,
+ // not the tokenonly rate.
+ rate_y -= vp10_cost_tx_size(mbmi->tx_size, max_tx_size, tx_probs);
+ }
+#if CONFIG_EXT_INTRA
+ if (is_directional_mode) {
+ int p_angle;
+ const int intra_filter_ctx = vp10_get_pred_context_intra_interp(xd);
+ rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS +
+ mbmi->angle_delta[0]);
+ p_angle = mode_to_angle_map[mbmi->mode] +
+ mbmi->angle_delta[0] * ANGLE_STEP;
+ if (pick_intra_filter(p_angle))
+ rate2 += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
+ }
+
+ if (mbmi->mode == DC_PRED && ALLOW_FILTER_INTRA_MODES) {
+ rate2 += vp10_cost_bit(cm->fc->ext_intra_probs[0],
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
+ EXT_INTRA_MODE ext_intra_mode =
+ mbmi->ext_intra_mode_info.ext_intra_mode[0];
+ rate2 += write_uniform_cost(FILTER_INTRA_MODES, ext_intra_mode);
+ }
+ }
+#endif // CONFIG_EXT_INTRA
if (this_mode != DC_PRED && this_mode != TM_PRED)
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
} else {
+#if CONFIG_REF_MV
+ mbmi->ref_mv_idx = 0;
+ ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+#endif
this_rd = handle_inter_mode(cpi, x, bsize,
&rate2, &distortion2, &skippable,
&rate_y, &rate_uv,
&disable_skip, frame_mv,
mi_row, mi_col,
- single_newmv, single_inter_filter,
+#if CONFIG_EXT_INTER
+ single_newmvs,
+#else
+ single_newmv,
+#endif // CONFIG_EXT_INTER
+ single_inter_filter,
single_skippable, &total_sse, best_rd,
&mask_filter, filter_cache);
+
+#if CONFIG_REF_MV
+ // TODO(jingning): This needs some refactoring to improve code quality
+ // and reduce redundant steps.
+ if (mbmi->mode == NEARMV &&
+ mbmi_ext->ref_mv_count[ref_frame_type] > 2) {
+ int_mv backup_mv = frame_mv[NEARMV][ref_frame];
+ int_mv cur_mv = mbmi_ext->ref_mv_stack[ref_frame][2].this_mv;
+ MB_MODE_INFO backup_mbmi = *mbmi;
+
+ int64_t tmp_ref_rd = this_rd;
+ int ref_idx;
+ int ref_set = VPXMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 2);
+
+ uint8_t drl0_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 0);
+ rate2 += cpi->drl_mode_cost0[drl0_ctx][0];
+
+ if (this_rd < INT64_MAX) {
+ if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
+ RDCOST(x->rdmult, x->rddiv, 0, total_sse))
+ tmp_ref_rd = RDCOST(x->rdmult, x->rddiv,
+ rate2 + vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0),
+ distortion2);
+ else
+ tmp_ref_rd = RDCOST(x->rdmult, x->rddiv,
+ rate2 + vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1) -
+ rate_y - rate_uv,
+ total_sse);
+ }
+
+ for (ref_idx = 0; ref_idx < ref_set; ++ref_idx) {
+ int64_t tmp_alt_rd = INT64_MAX;
+ int tmp_rate = 0, tmp_rate_y = 0, tmp_rate_uv = 0;
+ int tmp_skip = 1;
+ int64_t tmp_dist = 0, tmp_sse = 0;
+
+ cur_mv = mbmi_ext->ref_mv_stack[ref_frame][2 + ref_idx].this_mv;
+ lower_mv_precision(&cur_mv.as_mv, cm->allow_high_precision_mv);
+ clamp_mv2(&cur_mv.as_mv, xd);
+
+ if (!mv_check_bounds(x, &cur_mv.as_mv)) {
+ int64_t dummy_filter_cache[SWITCHABLE_FILTER_CONTEXTS];
+ INTERP_FILTER dummy_single_inter_filter[MB_MODE_COUNT]
+ [MAX_REF_FRAMES];
+ int dummy_single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
+ int dummy_disable_skip = 0;
+ int64_t dummy_mask_filter = 0;
+#if CONFIG_EXT_INTER
+ int_mv dummy_single_newmvs[2][MAX_REF_FRAMES] =
+ { { { 0 } }, { { 0 } } };
+#else
+ int_mv dummy_single_newmv[MAX_REF_FRAMES] = { { 0 } };
+#endif
+
+
+ mbmi->ref_mv_idx = 1 + ref_idx;
+
+ frame_mv[NEARMV][ref_frame] = cur_mv;
+ tmp_alt_rd = handle_inter_mode(cpi, x, bsize,
+ &tmp_rate, &tmp_dist, &tmp_skip,
+ &tmp_rate_y, &tmp_rate_uv,
+ &dummy_disable_skip, frame_mv,
+ mi_row, mi_col,
+#if CONFIG_EXT_INTER
+ dummy_single_newmvs,
+#else
+ dummy_single_newmv,
+#endif
+ dummy_single_inter_filter,
+ dummy_single_skippable,
+ &tmp_sse, best_rd,
+ &dummy_mask_filter,
+ dummy_filter_cache);
+ }
+
+ tmp_rate += cpi->drl_mode_cost0[drl0_ctx][1];
+
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 3) {
+ uint8_t drl1_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
+ tmp_rate += cpi->drl_mode_cost1[drl1_ctx][ref_idx];
+ }
+
+ if (tmp_alt_rd < INT64_MAX) {
+ if (RDCOST(x->rdmult, x->rddiv,
+ tmp_rate_y + tmp_rate_uv, tmp_dist) <
+ RDCOST(x->rdmult, x->rddiv, 0, tmp_sse))
+ tmp_alt_rd = RDCOST(x->rdmult, x->rddiv,
+ tmp_rate + vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0),
+ tmp_dist);
+ else
+ tmp_alt_rd = RDCOST(x->rdmult, x->rddiv,
+ tmp_rate + vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1) -
+ tmp_rate_y - tmp_rate_uv,
+ tmp_sse);
+ }
+
+ if (tmp_ref_rd > tmp_alt_rd) {
+ rate2 = tmp_rate;
+ distortion2 = tmp_dist;
+ skippable = tmp_skip;
+ rate_y = tmp_rate_y;
+ rate_uv = tmp_rate_uv;
+ total_sse = tmp_sse;
+ this_rd = tmp_alt_rd;
+ // Indicator of the effective nearmv reference motion vector.
+ best_ref_mv_idx[ref_frame_type] = 1 + ref_idx;
+ tmp_ref_rd = tmp_alt_rd;
+ backup_mbmi = *mbmi;
+ } else {
+ *mbmi = backup_mbmi;
+ }
+ }
+
+ frame_mv[NEARMV][ref_frame] = backup_mv;
+ }
+#endif
+
if (this_rd == INT64_MAX)
continue;
@@ -3343,9 +6598,11 @@
if (skippable) {
// Back out the coefficient coding costs
rate2 -= (rate_y + rate_uv);
-
+ rate_y = 0;
+ rate_uv = 0;
// Cost the skip mb case
rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+
} else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
@@ -3358,6 +6615,8 @@
assert(total_sse >= 0);
rate2 -= (rate_y + rate_uv);
this_skip2 = 1;
+ rate_y = 0;
+ rate_uv = 0;
}
} else {
// Add in the cost of the no skip flag.
@@ -3404,6 +6663,15 @@
}
rd_cost->rate = rate2;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = rate2 - rate_y - rate_uv;
+ if (!disable_skip) {
+ *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd),
+ skippable || this_skip2);
+ }
+ *returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd),
+ mbmi->ref_frame[0] != INTRA_FRAME);
+#endif // CONFIG_SUPERTX
rd_cost->dist = distortion2;
rd_cost->rdcost = this_rd;
best_rd = this_rd;
@@ -3413,8 +6681,15 @@
if (!x->select_tx_size)
swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
+
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memcpy(ctx->blk_skip[i], x->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#else
memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
+#endif
// TODO(debargha): enhance this test with a better distortion prediction
// based on qp, activity mask and history
@@ -3500,11 +6775,83 @@
// Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
// ZEROMV. Here, checks are added for those cases, and the mode decisions
// are corrected.
- if (best_mbmode.mode == NEWMV) {
+ if (best_mbmode.mode == NEWMV
+#if CONFIG_EXT_INTER
+ || best_mbmode.mode == NEWFROMNEARMV
+ || best_mbmode.mode == NEW_NEWMV
+#endif // CONFIG_EXT_INTER
+ ) {
const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
best_mbmode.ref_frame[1]};
int comp_pred_mode = refs[1] > INTRA_FRAME;
+#if CONFIG_REF_MV
+ if (!comp_pred_mode) {
+ if (best_ref_mv_idx[best_mbmode.ref_frame[0]] > 0 &&
+ best_mbmode.ref_frame[1] == NONE) {
+ int idx = best_ref_mv_idx[best_mbmode.ref_frame[0]] + 1;
+ int_mv cur_mv =
+ mbmi_ext->ref_mv_stack[best_mbmode.ref_frame[0]][idx].this_mv;
+ lower_mv_precision(&cur_mv.as_mv, cm->allow_high_precision_mv);
+ frame_mv[NEARMV][refs[0]] = cur_mv;
+ }
+ if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
+ best_mbmode.mode = NEARESTMV;
+ else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
+ best_mbmode.mode = NEARMV;
+ else if (best_mbmode.mv[0].as_int == 0)
+ best_mbmode.mode = ZEROMV;
+ } else {
+ uint8_t rf_type = vp10_ref_frame_type(best_mbmode.ref_frame);
+ int i;
+ const int allow_hp = cm->allow_high_precision_mv;
+ int_mv nearestmv[2] = { frame_mv[NEARESTMV][refs[0]],
+ frame_mv[NEARESTMV][refs[1]] };
+
+ int_mv nearmv[2] = { frame_mv[NEARMV][refs[0]],
+ frame_mv[NEARMV][refs[1]] };
+
+ if (mbmi_ext->ref_mv_count[rf_type] >= 1) {
+ nearestmv[0] = mbmi_ext->ref_mv_stack[rf_type][0].this_mv;
+ nearestmv[1] = mbmi_ext->ref_mv_stack[rf_type][0].comp_mv;
+ }
+
+ if (mbmi_ext->ref_mv_count[rf_type] > 1) {
+ int ref_mv_idx = best_ref_mv_idx[rf_type] + 1;
+ nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][ref_mv_idx].this_mv;
+ nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][ref_mv_idx].comp_mv;
+ }
+
+ for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
+ lower_mv_precision(&nearestmv[i].as_mv, allow_hp);
+ lower_mv_precision(&nearmv[i].as_mv, allow_hp);
+ }
+
+ if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
+ nearestmv[1].as_int == best_mbmode.mv[1].as_int)
+#if CONFIG_EXT_INTER
+ best_mbmode.mode = NEAREST_NEARESTMV;
+ else if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
+ nearmv[1].as_int == best_mbmode.mv[1].as_int)
+ best_mbmode.mode = NEAREST_NEARMV;
+ else if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
+ nearestmv[1].as_int == best_mbmode.mv[1].as_int)
+ best_mbmode.mode = NEAR_NEARESTMV;
+ else if (best_mbmode.mv[0].as_int == 0 && best_mbmode.mv[1].as_int == 0)
+ best_mbmode.mode = ZERO_ZEROMV;
+#else
+ best_mbmode.mode = NEARESTMV;
+ else if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
+ nearmv[1].as_int == best_mbmode.mv[1].as_int)
+ best_mbmode.mode = NEARMV;
+ else if (best_mbmode.mv[0].as_int == 0 && best_mbmode.mv[1].as_int == 0)
+ best_mbmode.mode = ZEROMV;
+#endif // CONFIG_EXT_INTER
+ }
+#else
+#if CONFIG_EXT_INTER
+ if (!comp_pred_mode) {
+#endif // CONFIG_EXT_INTER
if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int ==
best_mbmode.mv[1].as_int) || !comp_pred_mode))
@@ -3516,8 +6863,57 @@
else if (best_mbmode.mv[0].as_int == 0 &&
((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode))
best_mbmode.mode = ZEROMV;
+#if CONFIG_EXT_INTER
+ } else {
+ const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
+ best_mbmode.ref_frame[1]};
+
+ if (frame_mv[NEAREST_NEARESTMV][refs[0]].as_int ==
+ best_mbmode.mv[0].as_int &&
+ frame_mv[NEAREST_NEARESTMV][refs[1]].as_int ==
+ best_mbmode.mv[1].as_int)
+ best_mbmode.mode = NEAREST_NEARESTMV;
+ else if (frame_mv[NEAREST_NEARMV][refs[0]].as_int ==
+ best_mbmode.mv[0].as_int &&
+ frame_mv[NEAREST_NEARMV][refs[1]].as_int ==
+ best_mbmode.mv[1].as_int)
+ best_mbmode.mode = NEAREST_NEARMV;
+ else if (frame_mv[NEAR_NEARESTMV][refs[0]].as_int ==
+ best_mbmode.mv[0].as_int &&
+ frame_mv[NEAR_NEARESTMV][refs[1]].as_int ==
+ best_mbmode.mv[1].as_int)
+ best_mbmode.mode = NEAR_NEARESTMV;
+ else if (best_mbmode.mv[0].as_int == 0 && best_mbmode.mv[1].as_int == 0)
+ best_mbmode.mode = ZERO_ZEROMV;
+ }
+#endif // CONFIG_EXT_INTER
+#endif
}
+#if CONFIG_REF_MV
+ if (best_mbmode.ref_frame[0] > INTRA_FRAME &&
+ best_mbmode.mv[0].as_int == 0 &&
+#if CONFIG_EXT_INTER
+ best_mbmode.ref_frame[1] == NONE) {
+#else
+ (best_mbmode.ref_frame[1] == NONE || best_mbmode.mv[1].as_int == 0)) {
+#endif // CONFIG_EXT_INTER
+ int16_t mode_ctx = mbmi_ext->mode_context[best_mbmode.ref_frame[0]];
+#if !CONFIG_EXT_INTER
+ if (best_mbmode.ref_frame[1] > NONE)
+ mode_ctx &= (mbmi_ext->mode_context[best_mbmode.ref_frame[1]] | 0x00ff);
+#endif // !CONFIG_EXT_INTER
+
+ if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET))
+ best_mbmode.mode = ZEROMV;
+ }
+
+ if (best_mbmode.mode == NEARMV) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(best_mbmode.ref_frame);
+ best_mbmode.ref_mv_idx = best_ref_mv_idx[ref_frame_type];
+ }
+#endif
+
if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
rd_cost->rate = INT_MAX;
rd_cost->rdcost = INT64_MAX;
@@ -3634,6 +7030,12 @@
assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
+ mbmi->palette_mode_info.palette_size[0] = 0;
+ mbmi->palette_mode_info.palette_size[1] = 0;
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
mbmi->mode = ZEROMV;
mbmi->uv_mode = DC_PRED;
mbmi->ref_frame[0] = LAST_FRAME;
@@ -3644,6 +7046,9 @@
if (cm->interp_filter != BILINEAR) {
best_filter = EIGHTTAP;
if (cm->interp_filter == SWITCHABLE &&
+#if CONFIG_EXT_INTERP
+ vp10_is_interp_needed(xd) &&
+#endif // CONFIG_EXT_INTERP
x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
int rs;
int best_rs = INT_MAX;
@@ -3698,14 +7103,17 @@
best_pred_diff, best_filter_diff, 0);
}
-void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
- TileDataEnc *tile_data,
- MACROBLOCK *x,
- int mi_row, int mi_col,
- RD_COST *rd_cost,
- BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx,
- int64_t best_rd_so_far) {
+void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
+ TileDataEnc *tile_data,
+ struct macroblock *x,
+ int mi_row, int mi_col,
+ struct RD_COST *rd_cost,
+#if CONFIG_SUPERTX
+ int *returnrate_nocoef,
+#endif // CONFIG_SUPERTX
+ BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far) {
VP10_COMMON *const cm = &cpi->common;
RD_OPT *const rd_opt = &cpi->rd;
SPEED_FEATURES *const sf = &cpi->sf;
@@ -3716,9 +7124,18 @@
unsigned char segment_id = mbmi->segment_id;
int comp_pred, i;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
- struct buf_2d yv12_mb[4][MAX_MB_PLANE];
- static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
- VP9_ALT_FLAG };
+ struct buf_2d yv12_mb[MAX_REF_FRAMES][MAX_MB_PLANE];
+ static const int flag_list[REFS_PER_FRAME + 1] = {
+ 0,
+ VP9_LAST_FLAG,
+#if CONFIG_EXT_REFS
+ VP9_LAST2_FLAG,
+ VP9_LAST3_FLAG,
+ VP9_LAST4_FLAG,
+#endif // CONFIG_EXT_REFS
+ VP9_GOLD_FLAG,
+ VP9_ALT_FLAG
+ };
int64_t best_rd = best_rd_so_far;
int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
int64_t best_pred_diff[REFERENCE_MODES];
@@ -3736,7 +7153,11 @@
PREDICTION_MODE mode_uv = DC_PRED;
const int intra_cost_penalty = vp10_get_intra_cost_penalty(
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
+#if CONFIG_EXT_INTER
+ int_mv seg_mvs[4][2][MAX_REF_FRAMES];
+#else
int_mv seg_mvs[4][MAX_REF_FRAMES];
+#endif // CONFIG_EXT_INTER
b_mode_info best_bmodes[4];
int best_skip2 = 0;
int ref_frame_skip_mask[2] = { 0 };
@@ -3745,16 +7166,34 @@
int internal_active_edge =
vp10_active_edge_sb(cpi, mi_row, mi_col) && vp10_internal_image_edge(cpi);
+#if CONFIG_SUPERTX
+ best_rd_so_far = INT64_MAX;
+ best_rd = best_rd_so_far;
+ best_yrd = best_rd_so_far;
+#endif // CONFIG_SUPERTX
memset(x->zcoeff_blk[TX_4X4], 0, 4);
vp10_zero(best_mbmode);
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
+
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
filter_cache[i] = INT64_MAX;
for (i = 0; i < 4; i++) {
int j;
+#if CONFIG_EXT_INTER
+ int k;
+
+ for (k = 0; k < 2; k++)
+ for (j = 0; j < MAX_REF_FRAMES; j++)
+ seg_mvs[i][k][j].as_int = INVALID_MV;
+#else
for (j = 0; j < MAX_REF_FRAMES; j++)
seg_mvs[i][j].as_int = INVALID_MV;
+#endif // CONFIG_EXT_INTER
}
estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
@@ -3767,8 +7206,15 @@
rate_uv_intra = INT_MAX;
rd_cost->rate = INT_MAX;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = INT_MAX;
+#endif
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
+ x->mbmi_ext->mode_context[ref_frame] = 0;
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ x->mbmi_ext->compound_mode_context[ref_frame] = 0;
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
frame_mv[NEARESTMV], frame_mv[NEARMV],
@@ -3778,9 +7224,15 @@
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
}
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
+#if CONFIG_EXT_INTER
+ frame_mv[NEWFROMNEARMV][ref_frame].as_int = INVALID_MV;
+#endif // CONFIG_EXT_INTER
frame_mv[ZEROMV][ref_frame].as_int = 0;
}
+ mbmi->palette_mode_info.palette_size[0] = 0;
+ mbmi->palette_mode_info.palette_size[1] = 0;
+
for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
int mode_excluded = 0;
int64_t this_rd = INT64_MAX;
@@ -3805,15 +7257,59 @@
case INTRA_FRAME:
break;
case LAST_FRAME:
- ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
+ ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) |
+#if CONFIG_EXT_REFS
+ (1 << LAST2_FRAME) |
+ (1 << LAST3_FRAME) |
+ (1 << LAST4_FRAME) |
+#endif // CONFIG_EXT_REFS
+ (1 << ALTREF_FRAME);
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
break;
+#if CONFIG_EXT_REFS
+ case LAST2_FRAME:
+ ref_frame_skip_mask[0] |= (1 << LAST_FRAME) |
+ (1 << LAST3_FRAME) |
+ (1 << LAST4_FRAME) |
+ (1 << GOLDEN_FRAME) |
+ (1 << ALTREF_FRAME);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+ case LAST3_FRAME:
+ ref_frame_skip_mask[0] |= (1 << LAST_FRAME) |
+ (1 << LAST2_FRAME) |
+ (1 << LAST4_FRAME) |
+ (1 << GOLDEN_FRAME) |
+ (1 << ALTREF_FRAME);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+ case LAST4_FRAME:
+ ref_frame_skip_mask[0] |= (1 << LAST_FRAME) |
+ (1 << LAST2_FRAME) |
+ (1 << LAST3_FRAME) |
+ (1 << GOLDEN_FRAME) |
+ (1 << ALTREF_FRAME);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+#endif // CONFIG_EXT_REFS
case GOLDEN_FRAME:
- ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
+ ref_frame_skip_mask[0] |= (1 << LAST_FRAME) |
+#if CONFIG_EXT_REFS
+ (1 << LAST2_FRAME) |
+ (1 << LAST3_FRAME) |
+ (1 << LAST4_FRAME) |
+#endif // CONFIG_EXT_REFS
+ (1 << ALTREF_FRAME);
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
break;
case ALTREF_FRAME:
- ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME);
+ ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) |
+#if CONFIG_EXT_REFS
+ (1 << LAST2_FRAME) |
+ (1 << LAST3_FRAME) |
+ (1 << LAST4_FRAME) |
+#endif // CONFIG_EXT_REFS
+ (1 << LAST_FRAME);
break;
case NONE:
case MAX_REF_FRAMES:
@@ -3900,6 +7396,10 @@
xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
}
+#if CONFIG_VAR_TX
+ mbmi->inter_tx_size[0] = mbmi->tx_size;
+#endif
+
if (ref_frame == INTRA_FRAME) {
int rate;
if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
@@ -3937,12 +7437,29 @@
BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
int pred_exists = 0;
int uv_skippable;
+#if CONFIG_EXT_INTER
+ int_mv compound_seg_newmvs[4][2];
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ compound_seg_newmvs[i][0].as_int = INVALID_MV;
+ compound_seg_newmvs[i][1].as_int = INVALID_MV;
+ }
+#endif // CONFIG_EXT_INTER
this_rd_thresh = (ref_frame == LAST_FRAME) ?
rd_opt->threshes[segment_id][bsize][THR_LAST] :
rd_opt->threshes[segment_id][bsize][THR_ALTR];
+#if CONFIG_EXT_REFS
+ this_rd_thresh = (ref_frame == LAST2_FRAME) ?
+ rd_opt->threshes[segment_id][bsize][THR_LAST2] : this_rd_thresh;
+ this_rd_thresh = (ref_frame == LAST3_FRAME) ?
+ rd_opt->threshes[segment_id][bsize][THR_LAST3] : this_rd_thresh;
+ this_rd_thresh = (ref_frame == LAST4_FRAME) ?
+ rd_opt->threshes[segment_id][bsize][THR_LAST4] : this_rd_thresh;
+#endif // CONFIG_EXT_REFS
this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
- rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
+ rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
filter_cache[i] = INT64_MAX;
@@ -3974,9 +7491,16 @@
&rate_y, &distortion,
&skippable, &total_sse,
(int) this_rd_thresh, seg_mvs,
+#if CONFIG_EXT_INTER
+ compound_seg_newmvs,
+#endif // CONFIG_EXT_INTER
bsi, switchable_filter_index,
mi_row, mi_col);
-
+#if CONFIG_EXT_INTERP
+ if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
+ mbmi->interp_filter != EIGHTTAP) // invalid configuration
+ continue;
+#endif // CONFIG_EXT_INTERP
if (tmp_rd == INT64_MAX)
continue;
rs = vp10_get_switchable_rate(cpi, xd);
@@ -4030,15 +7554,36 @@
mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
tmp_best_filter : cm->interp_filter);
+
if (!pred_exists) {
// Handles the special case when a filter that is not in the
- // switchable list (bilinear, 6-tap) is indicated at the frame level
+ // switchable list (bilinear) is indicated at the frame level
tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
&x->mbmi_ext->ref_mvs[ref_frame][0],
second_ref, best_yrd, &rate, &rate_y,
&distortion, &skippable, &total_sse,
- (int) this_rd_thresh, seg_mvs, bsi, 0,
+ (int) this_rd_thresh, seg_mvs,
+#if CONFIG_EXT_INTER
+ compound_seg_newmvs,
+#endif // CONFIG_EXT_INTER
+ bsi, 0,
mi_row, mi_col);
+#if CONFIG_EXT_INTERP
+ if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
+ mbmi->interp_filter != EIGHTTAP) {
+ mbmi->interp_filter = EIGHTTAP;
+ tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
+ &x->mbmi_ext->ref_mvs[ref_frame][0],
+ second_ref, best_yrd, &rate, &rate_y,
+ &distortion, &skippable, &total_sse,
+ (int) this_rd_thresh, seg_mvs,
+#if CONFIG_EXT_INTER
+ compound_seg_newmvs,
+#endif // CONFIG_EXT_INTER
+ bsi, 0,
+ mi_row, mi_col);
+ }
+#endif // CONFIG_EXT_INTERP
if (tmp_rd == INT64_MAX)
continue;
} else {
@@ -4051,6 +7596,23 @@
for (i = 0; i < 4; i++)
xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
}
+ // Add in the cost of the transform type
+ if (!xd->lossless[mbmi->segment_id]) {
+ int rate_tx_type = 0;
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bsize, 1) > 1) {
+ const int eset = get_ext_tx_set(mbmi->tx_size, bsize, 1);
+ rate_tx_type =
+ cpi->inter_tx_type_costs[eset][mbmi->tx_size][mbmi->tx_type];
+ }
+#else
+ if (mbmi->tx_size < TX_32X32) {
+ rate_tx_type = cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+ }
+#endif
+ rate += rate_tx_type;
+ rate_y += rate_tx_type;
+ }
rate2 += rate;
distortion2 += distortion;
@@ -4074,10 +7636,15 @@
vp10_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
BLOCK_8X8);
memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
+#if CONFIG_VAR_TX
+ if (!inter_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
+ &uv_sse, BLOCK_8X8, tmp_best_rdu))
+ continue;
+#else
if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
&uv_sse, BLOCK_8X8, tmp_best_rdu))
continue;
-
+#endif
rate2 += rate_uv;
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
@@ -4145,6 +7712,15 @@
}
rd_cost->rate = rate2;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = rate2 - rate_y - rate_uv;
+ if (!disable_skip)
+ *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd),
+ this_skip2);
+ *returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd),
+ mbmi->ref_frame[0] != INTRA_FRAME);
+ assert(*returnrate_nocoef > 0);
+#endif // CONFIG_SUPERTX
rd_cost->dist = distortion2;
rd_cost->rdcost = this_rd;
best_rd = this_rd;
@@ -4154,8 +7730,14 @@
best_skip2 = this_skip2;
if (!x->select_tx_size)
swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
+
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memset(ctx->blk_skip[i], 0, sizeof(uint8_t) * ctx->num_4x4_blk);
+#else
memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
+#endif
for (i = 0; i < 4; i++)
best_bmodes[i] = xd->mi[0]->bmi[i];
@@ -4241,6 +7823,9 @@
if (best_rd >= best_rd_so_far) {
rd_cost->rate = INT_MAX;
rd_cost->rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = INT_MAX;
+#endif // CONFIG_SUPERTX
return;
}
@@ -4261,6 +7846,9 @@
rd_cost->rate = INT_MAX;
rd_cost->dist = INT64_MAX;
rd_cost->rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = INT_MAX;
+#endif // CONFIG_SUPERTX
return;
}
diff --git a/vp10/encoder/rdopt.h b/vp10/encoder/rdopt.h
index b1a8036..7c7e9eb 100644
--- a/vp10/encoder/rdopt.h
+++ b/vp10/encoder/rdopt.h
@@ -43,6 +43,9 @@
struct macroblock *x,
int mi_row, int mi_col,
struct RD_COST *rd_cost,
+#if CONFIG_SUPERTX
+ int *returnrate_nocoef,
+#endif // CONFIG_SUPERTX
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far);
@@ -60,12 +63,31 @@
int vp10_active_edge_sb(struct VP10_COMP *cpi, int mi_row, int mi_col);
void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
- struct TileDataEnc *tile_data,
- struct macroblock *x,
- int mi_row, int mi_col,
- struct RD_COST *rd_cost,
- BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
- int64_t best_rd_so_far);
+ struct TileDataEnc *tile_data,
+ struct macroblock *x,
+ int mi_row, int mi_col,
+ struct RD_COST *rd_cost,
+#if CONFIG_SUPERTX
+ int *returnrate_nocoef,
+#endif // CONFIG_SUPERTX
+ BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far);
+
+#if CONFIG_SUPERTX
+#if CONFIG_VAR_TX
+void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
+ int blk_row, int blk_col, int plane, int block,
+ int plane_bsize, int coeff_ctx,
+ int *rate, int64_t *dist, int64_t *bsse, int *skip);
+#endif
+
+void vp10_txfm_rd_in_plane_supertx(MACROBLOCK *x,
+ int *rate, int64_t *distortion,
+ int *skippable, int64_t *sse,
+ int64_t ref_best_rd, int plane,
+ BLOCK_SIZE bsize, TX_SIZE tx_size,
+ int use_fast_coef_casting);
+#endif // CONFIG_SUPERTX
#ifdef __cplusplus
} // extern "C"
diff --git a/vp10/encoder/segmentation.c b/vp10/encoder/segmentation.c
index 677910f..969b87f 100644
--- a/vp10/encoder/segmentation.c
+++ b/vp10/encoder/segmentation.c
@@ -58,9 +58,7 @@
segcounts[4] + segcounts[5], segcounts[6] + segcounts[7]
};
const unsigned ccc[2] = { cc[0] + cc[1], cc[2] + cc[3] };
-#if CONFIG_MISC_FIXES
int i;
-#endif
segment_tree_probs[0] = get_binary_prob(ccc[0], ccc[1]);
segment_tree_probs[1] = get_binary_prob(cc[0], cc[1]);
@@ -70,16 +68,12 @@
segment_tree_probs[5] = get_binary_prob(segcounts[4], segcounts[5]);
segment_tree_probs[6] = get_binary_prob(segcounts[6], segcounts[7]);
-#if CONFIG_MISC_FIXES
for (i = 0; i < 7; i++) {
const unsigned *ct = i == 0 ? ccc : i < 3 ? cc + (i & 2)
: segcounts + (i - 3) * 2;
vp10_prob_diff_update_savings_search(ct,
cur_tree_probs[i], &segment_tree_probs[i], DIFF_UPDATE_PROB);
}
-#else
- (void) cur_tree_probs;
-#endif
}
// Based on set of segment counts and probabilities calculate a cost estimate
@@ -214,39 +208,22 @@
void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
struct segmentation *seg = &cm->seg;
-#if CONFIG_MISC_FIXES
struct segmentation_probs *segp = &cm->fc->seg;
-#else
- struct segmentation_probs *segp = &cm->segp;
-#endif
int no_pred_cost;
int t_pred_cost = INT_MAX;
int i, tile_col, mi_row, mi_col;
-#if CONFIG_MISC_FIXES
unsigned (*temporal_predictor_count)[2] = cm->counts.seg.pred;
unsigned *no_pred_segcounts = cm->counts.seg.tree_total;
unsigned *t_unpred_seg_counts = cm->counts.seg.tree_mispred;
-#else
- unsigned temporal_predictor_count[PREDICTION_PROBS][2] = { { 0 } };
- unsigned no_pred_segcounts[MAX_SEGMENTS] = { 0 };
- unsigned t_unpred_seg_counts[MAX_SEGMENTS] = { 0 };
-#endif
vpx_prob no_pred_tree[SEG_TREE_PROBS];
vpx_prob t_pred_tree[SEG_TREE_PROBS];
vpx_prob t_nopred_prob[PREDICTION_PROBS];
-#if CONFIG_MISC_FIXES
(void) xd;
-#else
- // Set default state for the segment tree probabilities and the
- // temporal coding probabilities
- memset(segp->tree_probs, 255, sizeof(segp->tree_probs));
- memset(segp->pred_probs, 255, sizeof(segp->pred_probs));
-#endif
// First of all generate stats regarding how well the last segment map
// predicts this one
@@ -284,13 +261,9 @@
const int count0 = temporal_predictor_count[i][0];
const int count1 = temporal_predictor_count[i][1];
-#if CONFIG_MISC_FIXES
vp10_prob_diff_update_savings_search(temporal_predictor_count[i],
segp->pred_probs[i],
&t_nopred_prob[i], DIFF_UPDATE_PROB);
-#else
- t_nopred_prob[i] = get_binary_prob(count0, count1);
-#endif
// Add in the predictor signaling cost
t_pred_cost += count0 * vp10_cost_zero(t_nopred_prob[i]) +
@@ -302,30 +275,17 @@
if (t_pred_cost < no_pred_cost) {
assert(!cm->error_resilient_mode);
seg->temporal_update = 1;
-#if !CONFIG_MISC_FIXES
- memcpy(segp->tree_probs, t_pred_tree, sizeof(t_pred_tree));
- memcpy(segp->pred_probs, t_nopred_prob, sizeof(t_nopred_prob));
-#endif
} else {
seg->temporal_update = 0;
-#if !CONFIG_MISC_FIXES
- memcpy(segp->tree_probs, no_pred_tree, sizeof(no_pred_tree));
-#endif
}
}
void vp10_reset_segment_features(VP10_COMMON *cm) {
struct segmentation *seg = &cm->seg;
-#if !CONFIG_MISC_FIXES
- struct segmentation_probs *segp = &cm->segp;
-#endif
// Set up default state for MB feature flags
seg->enabled = 0;
seg->update_map = 0;
seg->update_data = 0;
-#if !CONFIG_MISC_FIXES
- memset(segp->tree_probs, 255, sizeof(segp->tree_probs));
-#endif
vp10_clearall_segfeatures(seg);
}
diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h
index 3b91999..170e537 100644
--- a/vp10/encoder/speed_features.h
+++ b/vp10/encoder/speed_features.h
@@ -31,6 +31,46 @@
(1 << H_PRED)
};
+#if CONFIG_EXT_INTER
+enum {
+ INTER_ALL =
+ (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) |
+ (1 << NEWMV) | (1 << NEWFROMNEARMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | (1 << NEAREST_NEARMV) |
+ (1 << NEAR_NEARESTMV) | (1 << NEW_NEWMV) | (1 << NEAREST_NEWMV) |
+ (1 << NEAR_NEWMV) | (1 << NEW_NEARMV) | (1 << NEW_NEARESTMV),
+ INTER_NEAREST = (1 << NEARESTMV) | (1 << NEAREST_NEARESTMV) |
+ (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) |
+ (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV),
+ INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV) | (1 << NEWFROMNEARMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) |
+ (1 << NEAR_NEARESTMV) | (1 << NEAREST_NEARMV) |
+ (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) |
+ (1 << NEW_NEARMV) | (1 << NEAR_NEWMV),
+ INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) |
+ (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) |
+ (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV),
+ INTER_NEAREST_NEW_ZERO =
+ (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV) | (1 << NEWFROMNEARMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | (1 << NEW_NEWMV) |
+ (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) |
+ (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) |
+ (1 << NEW_NEARMV) | (1 << NEAR_NEWMV),
+ INTER_NEAREST_NEAR_NEW =
+ (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV) | (1 << NEWFROMNEARMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) |
+ (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) |
+ (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) |
+ (1 << NEW_NEARMV) | (1 << NEAR_NEWMV),
+ INTER_NEAREST_NEAR_ZERO =
+ (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) |
+ (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) |
+ (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) |
+ (1 << NEW_NEARMV) | (1 << NEAR_NEWMV)
+};
+#else
enum {
INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV),
INTER_NEAREST = (1 << NEARESTMV),
@@ -40,6 +80,7 @@
INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV),
};
+#endif // CONFIG_EXT_INTER
enum {
DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) |
diff --git a/vp10/encoder/subexp.c b/vp10/encoder/subexp.c
index eccee8e..8e60f40 100644
--- a/vp10/encoder/subexp.c
+++ b/vp10/encoder/subexp.c
@@ -25,8 +25,7 @@
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 11 - CONFIG_MISC_FIXES,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
@@ -86,7 +85,7 @@
static void encode_uniform(vpx_writer *w, int v) {
const int l = 8;
- const int m = (1 << l) - 191 + CONFIG_MISC_FIXES;
+ const int m = (1 << l) - 190;
if (v < m) {
vpx_write_literal(w, v, l - 1);
} else {
diff --git a/vp10/encoder/subexp.h b/vp10/encoder/subexp.h
index 091334f..64eb275 100644
--- a/vp10/encoder/subexp.h
+++ b/vp10/encoder/subexp.h
@@ -36,7 +36,6 @@
vpx_prob *bestp,
vpx_prob upd,
int stepsize);
-
int vp10_cond_prob_diff_update_savings(vpx_prob *oldp,
const unsigned int ct[2]);
#ifdef __cplusplus
diff --git a/vp10/encoder/temporal_filter.c b/vp10/encoder/temporal_filter.c
index 5278d3b..035b66a 100644
--- a/vp10/encoder/temporal_filter.c
+++ b/vp10/encoder/temporal_filter.c
@@ -45,8 +45,7 @@
int x, int y) {
const int which_mv = 0;
const MV mv = { mv_row, mv_col };
- const InterpKernel *const kernel =
- vp10_filter_kernels[xd->mi[0]->mbmi.interp_filter];
+ const INTERP_FILTER interp_filter = xd->mi[0]->mbmi.interp_filter;
enum mv_precision mv_precision_uv;
int uv_stride;
@@ -66,7 +65,8 @@
scale,
16, 16,
which_mv,
- kernel, MV_PRECISION_Q3, x, y, xd->bd);
+ interp_filter,
+ MV_PRECISION_Q3, x, y, xd->bd);
vp10_highbd_build_inter_predictor(u_mb_ptr, uv_stride,
&pred[256], uv_block_width,
@@ -74,7 +74,8 @@
scale,
uv_block_width, uv_block_height,
which_mv,
- kernel, mv_precision_uv, x, y, xd->bd);
+ interp_filter,
+ mv_precision_uv, x, y, xd->bd);
vp10_highbd_build_inter_predictor(v_mb_ptr, uv_stride,
&pred[512], uv_block_width,
@@ -82,7 +83,8 @@
scale,
uv_block_width, uv_block_height,
which_mv,
- kernel, mv_precision_uv, x, y, xd->bd);
+ interp_filter,
+ mv_precision_uv, x, y, xd->bd);
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -92,7 +94,7 @@
scale,
16, 16,
which_mv,
- kernel, MV_PRECISION_Q3, x, y);
+ interp_filter, MV_PRECISION_Q3, x, y);
vp10_build_inter_predictor(u_mb_ptr, uv_stride,
&pred[256], uv_block_width,
@@ -100,7 +102,7 @@
scale,
uv_block_width, uv_block_height,
which_mv,
- kernel, mv_precision_uv, x, y);
+ interp_filter, mv_precision_uv, x, y);
vp10_build_inter_predictor(v_mb_ptr, uv_stride,
&pred[512], uv_block_width,
@@ -108,7 +110,7 @@
scale,
uv_block_width, uv_block_height,
which_mv,
- kernel, mv_precision_uv, x, y);
+ interp_filter, mv_precision_uv, x, y);
}
void vp10_temporal_filter_init(void) {
@@ -135,15 +137,38 @@
for (i = 0, k = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++, k++) {
- int src_byte = frame1[byte];
- int pixel_value = *frame2++;
+ int pixel_value = *frame2;
- modifier = src_byte - pixel_value;
- // This is an integer approximation of:
- // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
- // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
- modifier *= modifier;
- modifier *= 3;
+ // non-local mean approach
+ int diff_sse[9] = { 0 };
+ int idx, idy, index = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ int row = i + idy;
+ int col = j + idx;
+
+ if (row >= 0 && row < (int)block_height &&
+ col >= 0 && col < (int)block_width) {
+ int diff = frame1[byte + idy * (int)stride + idx] -
+ frame2[idy * (int)block_width + idx];
+ diff_sse[index] = diff * diff;
+ ++index;
+ }
+ }
+ }
+
+ assert(index > 0);
+
+ modifier = 0;
+ for (idx = 0; idx < 9; ++idx)
+ modifier += diff_sse[idx];
+
+ modifier *= 3;
+ modifier /= index;
+
+ ++frame2;
+
modifier += rounding;
modifier >>= strength;
@@ -182,15 +207,38 @@
for (i = 0, k = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++, k++) {
- int src_byte = frame1[byte];
- int pixel_value = *frame2++;
+ int pixel_value = *frame2;
- modifier = src_byte - pixel_value;
- // This is an integer approximation of:
- // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
- // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
- modifier *= modifier;
+ // non-local mean approach
+ int diff_sse[9] = { 0 };
+ int idx, idy, index = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ int row = i + idy;
+ int col = j + idx;
+
+ if (row >= 0 && row < (int)block_height &&
+ col >= 0 && col < (int)block_width) {
+ int diff = frame1[byte + idy * (int)stride + idx] -
+ frame2[idy * (int)block_width + idx];
+ diff_sse[index] = diff * diff;
+ ++index;
+ }
+ }
+ }
+
+ assert(index > 0);
+
+ modifier = 0;
+ for (idx = 0; idx < 9; ++idx)
+ modifier += diff_sse[idx];
+
modifier *= 3;
+ modifier /= index;
+
+ ++frame2;
+
modifier += rounding;
modifier >>= strength;
@@ -382,50 +430,50 @@
int adj_strength = strength + 2 * (mbd->bd - 8);
// Apply the filter (YUV)
vp10_highbd_temporal_filter_apply(f->y_buffer + mb_y_offset,
- f->y_stride,
- predictor, 16, 16, adj_strength,
- filter_weight,
- accumulator, count);
+ f->y_stride,
+ predictor, 16, 16, adj_strength,
+ filter_weight,
+ accumulator, count);
vp10_highbd_temporal_filter_apply(f->u_buffer + mb_uv_offset,
- f->uv_stride, predictor + 256,
- mb_uv_width, mb_uv_height,
- adj_strength,
- filter_weight, accumulator + 256,
- count + 256);
+ f->uv_stride, predictor + 256,
+ mb_uv_width, mb_uv_height,
+ adj_strength,
+ filter_weight, accumulator + 256,
+ count + 256);
vp10_highbd_temporal_filter_apply(f->v_buffer + mb_uv_offset,
- f->uv_stride, predictor + 512,
- mb_uv_width, mb_uv_height,
- adj_strength, filter_weight,
- accumulator + 512, count + 512);
+ f->uv_stride, predictor + 512,
+ mb_uv_width, mb_uv_height,
+ adj_strength, filter_weight,
+ accumulator + 512, count + 512);
} else {
// Apply the filter (YUV)
- vp10_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
- predictor, 16, 16,
- strength, filter_weight,
- accumulator, count);
- vp10_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
- predictor + 256,
- mb_uv_width, mb_uv_height, strength,
- filter_weight, accumulator + 256,
- count + 256);
- vp10_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
- predictor + 512,
- mb_uv_width, mb_uv_height, strength,
- filter_weight, accumulator + 512,
- count + 512);
+ vp10_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
+ predictor, 16, 16,
+ strength, filter_weight,
+ accumulator, count);
+ vp10_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
+ f->uv_stride, predictor + 256,
+ mb_uv_width, mb_uv_height, strength,
+ filter_weight, accumulator + 256,
+ count + 256);
+ vp10_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
+ f->uv_stride, predictor + 512,
+ mb_uv_width, mb_uv_height, strength,
+ filter_weight, accumulator + 512,
+ count + 512);
}
#else
// Apply the filter (YUV)
- vp10_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
+ vp10_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16,
strength, filter_weight,
accumulator, count);
- vp10_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
+ vp10_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
predictor + 256,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 256,
count + 256);
- vp10_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
+ vp10_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
predictor + 512,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 512,
diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c
index a665a3c..66bb990 100644
--- a/vp10/encoder/tokenize.c
+++ b/vp10/encoder/tokenize.c
@@ -428,11 +428,12 @@
};
#endif
+#if !CONFIG_ANS
const struct vp10_token vp10_coef_encodings[ENTROPY_TOKENS] = {
{2, 2}, {6, 3}, {28, 5}, {58, 6}, {59, 6}, {60, 6}, {61, 6}, {124, 7},
{125, 7}, {126, 7}, {127, 7}, {0, 1}
};
-
+#endif // !CONFIG_ANS
struct tokenize_b_args {
VP10_COMP *cpi;
@@ -484,6 +485,39 @@
return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
}
+void vp10_tokenize_palette_sb(struct ThreadData *const td,
+ BLOCK_SIZE bsize, int plane,
+ TOKENEXTRA **t) {
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ uint8_t *color_map = xd->plane[0].color_index_map;
+ PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info;
+ int n = pmi->palette_size[plane != 0];
+ int i, j, k;
+ int color_new_idx = -1, color_ctx, color_order[PALETTE_MAX_SIZE];
+ int rows = 4 * num_4x4_blocks_high_lookup[bsize];
+ int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
+
+ for (i = 0; i < rows; ++i) {
+ for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
+ color_ctx = vp10_get_palette_color_context(color_map, cols, i, j, n,
+ color_order);
+ for (k = 0; k < n; ++k)
+ if (color_map[i * cols + j] == color_order[k]) {
+ color_new_idx = k;
+ break;
+ }
+ assert(color_new_idx >= 0 && color_new_idx < n);
+
+ (*t)->token = color_new_idx;
+ (*t)->context_tree = vp10_default_palette_y_color_prob[n - 2][color_ctx];
+ (*t)->skip_eob_node = 0;
+ ++(*t);
+ }
+ }
+}
+
static void tokenize_b(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
@@ -505,8 +539,8 @@
const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
const int segment_id = mbmi->segment_id;
const int16_t *scan, *nb;
- const TX_TYPE tx_type = get_tx_type(type, xd, block);
- const scan_order *const so = get_scan(tx_size, tx_type);
+ const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
+ const scan_order *const so = get_scan(tx_size, tx_type, is_inter_block(mbmi));
const int ref = is_inter_block(mbmi);
unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
td->rd_counts.coef_counts[tx_size][type][ref];
@@ -540,6 +574,7 @@
pt = get_coef_context(nb, token_cache, c);
v = qcoeff[scan[c]];
}
+ assert(c < eob);
vp10_get_token_extra(v, &token, &extra);
@@ -609,6 +644,118 @@
return result;
}
+#if CONFIG_VAR_TX
+void tokenize_tx(ThreadData *td, TOKENEXTRA **t,
+ int dry_run, TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
+ int blk_row, int blk_col, int block, int plane,
+ void *arg) {
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int blk_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
+ (blk_col >> (1 - pd->subsampling_x));
+ TX_SIZE plane_tx_size = plane ?
+ get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], bsize, 0, 0) :
+ mbmi->inter_tx_size[blk_idx];
+
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+ return;
+
+ if (tx_size == plane_tx_size) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
+ if (!dry_run)
+ tokenize_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
+ else
+ set_entropy_context_b(plane, block, blk_row, blk_col,
+ plane_bsize, tx_size, arg);
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+ int step = 1 << (2 * (tx_size - 1));
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
+ continue;
+
+ tokenize_tx(td, t, dry_run, tx_size - 1, plane_bsize,
+ offsetr, offsetc, block + i * step, plane, arg);
+ }
+ }
+}
+
+void vp10_tokenize_sb_inter(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+ int dry_run, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ TOKENEXTRA *t_backup = *t;
+ const int ctx = vp10_get_skip_context(xd);
+ const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id,
+ SEG_LVL_SKIP);
+ struct tokenize_b_args arg = {cpi, td, t};
+ int plane;
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ return;
+
+ if (mbmi->skip) {
+ if (!dry_run)
+ td->counts->skip[ctx][1] += skip_inc;
+ reset_skip_context(xd, bsize);
+ if (dry_run)
+ *t = t_backup;
+ return;
+ }
+
+ if (!dry_run)
+ td->counts->skip[ctx][0] += skip_inc;
+ else
+ *t = t_backup;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+ int block = 0;
+ int step = 1 << (max_tx_size * 2);
+ for (idy = 0; idy < mi_height; idy += bh) {
+ for (idx = 0; idx < mi_width; idx += bh) {
+ tokenize_tx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx,
+ block, plane, &arg);
+ block += step;
+ }
+ }
+
+ if (!dry_run) {
+ (*t)->token = EOSB_TOKEN;
+ (*t)++;
+ }
+ }
+}
+#endif
+
void vp10_tokenize_sb(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
int dry_run, BLOCK_SIZE bsize) {
VP10_COMMON *const cm = &cpi->common;
@@ -635,9 +782,54 @@
vp10_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_b,
&arg);
(*t)->token = EOSB_TOKEN;
+#if CONFIG_ANS
+ // TODO(aconverse): clip the number of bits in tokenize_b
+ // Smuggle TX_SIZE in the unused extrabits field so the ANS encoder
+ // knows the maximum number of extrabits to write at the end of the block
+ // (where it starts).
+ (*t)->extra = (EXTRABIT)(plane ? get_uv_tx_size(mbmi, &xd->plane[plane])
+ : mbmi->tx_size);
+#endif // CONFIG_ANS
(*t)++;
}
} else {
vp10_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg);
}
}
+
+#if CONFIG_SUPERTX
+void vp10_tokenize_sb_supertx(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+ int dry_run, BLOCK_SIZE bsize) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &td->mb.e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ TOKENEXTRA *t_backup = *t;
+ const int ctx = vp10_get_skip_context(xd);
+ const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id,
+ SEG_LVL_SKIP);
+ struct tokenize_b_args arg = {cpi, td, t};
+ if (mbmi->skip) {
+ if (!dry_run)
+ td->counts->skip[ctx][1] += skip_inc;
+ reset_skip_context(xd, bsize);
+ if (dry_run)
+ *t = t_backup;
+ return;
+ }
+
+ if (!dry_run) {
+ int plane;
+ td->counts->skip[ctx][0] += skip_inc;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_b,
+ &arg);
+ (*t)->token = EOSB_TOKEN;
+ (*t)++;
+ }
+ } else {
+ vp10_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg);
+ *t = t_backup;
+ }
+}
+#endif // CONFIG_SUPERTX
diff --git a/vp10/encoder/tokenize.h b/vp10/encoder/tokenize.h
index 5bad415..c03ec02 100644
--- a/vp10/encoder/tokenize.h
+++ b/vp10/encoder/tokenize.h
@@ -43,7 +43,9 @@
extern const vpx_tree_index vp10_coef_tree[];
extern const vpx_tree_index vp10_coef_con_tree[];
+#if !CONFIG_ANS
extern const struct vp10_token vp10_coef_encodings[];
+#endif // !CONFIG_ANS
int vp10_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
int vp10_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
@@ -51,8 +53,21 @@
struct VP10_COMP;
struct ThreadData;
+#if CONFIG_VAR_TX
+void vp10_tokenize_sb_inter(struct VP10_COMP *cpi, struct ThreadData *td,
+ TOKENEXTRA **t, int dry_run, int mi_row, int mi_col,
+ BLOCK_SIZE bsize);
+#endif
+
+void vp10_tokenize_palette_sb(struct ThreadData *const td,
+ BLOCK_SIZE bsize, int plane,
+ TOKENEXTRA **t);
void vp10_tokenize_sb(struct VP10_COMP *cpi, struct ThreadData *td,
TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);
+#if CONFIG_SUPERTX
+void vp10_tokenize_sb_supertx(struct VP10_COMP *cpi, struct ThreadData *td,
+ TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);
+#endif
extern const int16_t *vp10_dct_value_cost_ptr;
/* TODO: The Token field should be broken out into a separate char array to
diff --git a/vp10/encoder/x86/dct_sse2.c b/vp10/encoder/x86/dct_sse2.c
index e111157..976fe45 100644
--- a/vp10/encoder/x86/dct_sse2.c
+++ b/vp10/encoder/x86/dct_sse2.c
@@ -18,16 +18,37 @@
#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
+// Reverse the 8 16 bit words in __m128i
+static INLINE __m128i mm_reverse_epi16(const __m128i x) {
+ const __m128i a = _mm_shufflelo_epi16(x, 0x1b);
+ const __m128i b = _mm_shufflehi_epi16(a, 0x1b);
+ return _mm_shuffle_epi32(b, 0x4e);
+}
+
static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in,
- int stride) {
+ int stride, int flipud, int fliplr) {
const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1);
const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
__m128i mask;
- in[0] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
- in[1] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
- in[2] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
- in[3] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
+ if (!flipud) {
+ in[0] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+ in[1] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+ in[2] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
+ in[3] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
+ } else {
+ in[0] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
+ in[1] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
+ in[2] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+ in[3] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+ }
+
+ if (fliplr) {
+ in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
+ in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
+ in[2] = _mm_shufflelo_epi16(in[2], 0x1b);
+ in[3] = _mm_shufflelo_epi16(in[3], 0x1b);
+ }
in[0] = _mm_slli_epi16(in[0], 4);
in[1] = _mm_slli_epi16(in[1], 4);
@@ -160,23 +181,55 @@
vpx_fdct4x4_sse2(input, output, stride);
break;
case ADST_DCT:
- load_buffer_4x4(input, in, stride);
+ load_buffer_4x4(input, in, stride, 0, 0);
fadst4_sse2(in);
fdct4_sse2(in);
write_buffer_4x4(output, in);
break;
case DCT_ADST:
- load_buffer_4x4(input, in, stride);
+ load_buffer_4x4(input, in, stride, 0, 0);
fdct4_sse2(in);
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
case ADST_ADST:
- load_buffer_4x4(input, in, stride);
+ load_buffer_4x4(input, in, stride, 0, 0);
fadst4_sse2(in);
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ load_buffer_4x4(input, in, stride, 1, 0);
+ fadst4_sse2(in);
+ fdct4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case DCT_FLIPADST:
+ load_buffer_4x4(input, in, stride, 0, 1);
+ fdct4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case FLIPADST_FLIPADST:
+ load_buffer_4x4(input, in, stride, 1, 1);
+ fadst4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case ADST_FLIPADST:
+ load_buffer_4x4(input, in, stride, 0, 1);
+ fadst4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case FLIPADST_ADST:
+ load_buffer_4x4(input, in, stride, 1, 0);
+ fadst4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -627,15 +680,37 @@
// load 8x8 array
static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in,
- int stride) {
- in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
- in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
- in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
- in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
- in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
- in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
- in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
- in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ int stride, int flipud, int fliplr) {
+ if (!flipud) {
+ in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ } else {
+ in[0] = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ in[1] = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ in[2] = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ in[3] = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ in[4] = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ in[5] = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ in[6] = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ in[7] = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ }
+
+ if (fliplr) {
+ in[0] = mm_reverse_epi16(in[0]);
+ in[1] = mm_reverse_epi16(in[1]);
+ in[2] = mm_reverse_epi16(in[2]);
+ in[3] = mm_reverse_epi16(in[3]);
+ in[4] = mm_reverse_epi16(in[4]);
+ in[5] = mm_reverse_epi16(in[5]);
+ in[6] = mm_reverse_epi16(in[6]);
+ in[7] = mm_reverse_epi16(in[7]);
+ }
in[0] = _mm_slli_epi16(in[0], 2);
in[1] = _mm_slli_epi16(in[1], 2);
@@ -1144,26 +1219,63 @@
vpx_fdct8x8_sse2(input, output, stride);
break;
case ADST_DCT:
- load_buffer_8x8(input, in, stride);
+ load_buffer_8x8(input, in, stride, 0, 0);
fadst8_sse2(in);
fdct8_sse2(in);
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
case DCT_ADST:
- load_buffer_8x8(input, in, stride);
+ load_buffer_8x8(input, in, stride, 0, 0);
fdct8_sse2(in);
fadst8_sse2(in);
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
case ADST_ADST:
- load_buffer_8x8(input, in, stride);
+ load_buffer_8x8(input, in, stride, 0, 0);
fadst8_sse2(in);
fadst8_sse2(in);
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ load_buffer_8x8(input, in, stride, 1, 0);
+ fadst8_sse2(in);
+ fdct8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case DCT_FLIPADST:
+ load_buffer_8x8(input, in, stride, 0, 1);
+ fdct8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case FLIPADST_FLIPADST:
+ load_buffer_8x8(input, in, stride, 1, 1);
+ fadst8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case ADST_FLIPADST:
+ load_buffer_8x8(input, in, stride, 0, 1);
+ fadst8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case FLIPADST_ADST:
+ load_buffer_8x8(input, in, stride, 1, 0);
+ fadst8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
@@ -1171,15 +1283,37 @@
}
static INLINE void load_buffer_16x16(const int16_t* input, __m128i *in0,
- __m128i *in1, int stride) {
- // load first 8 columns
- load_buffer_8x8(input, in0, stride);
- load_buffer_8x8(input + 8 * stride, in0 + 8, stride);
+ __m128i *in1, int stride,
+ int flipud, int fliplr) {
+ // Load 4 8x8 blocks
+ const int16_t *topL = input;
+ const int16_t *topR = input + 8;
+ const int16_t *botL = input + 8 * stride;
+ const int16_t *botR = input + 8 * stride + 8;
- input += 8;
+ const int16_t *tmp;
+
+ if (flipud) {
+ // Swap left columns
+ tmp = topL; topL = botL; botL = tmp;
+ // Swap right columns
+ tmp = topR; topR = botR; botR = tmp;
+ }
+
+ if (fliplr) {
+ // Swap top rows
+ tmp = topL; topL = topR; topR = tmp;
+ // Swap bottom rows
+ tmp = botL; botL = botR; botR = tmp;
+ }
+
+ // load first 8 columns
+ load_buffer_8x8(topL, in0, stride, flipud, fliplr);
+ load_buffer_8x8(botL, in0 + 8, stride, flipud, fliplr);
+
// load second 8 columns
- load_buffer_8x8(input, in1, stride);
- load_buffer_8x8(input + 8 * stride, in1 + 8, stride);
+ load_buffer_8x8(topR, in1, stride, flipud, fliplr);
+ load_buffer_8x8(botR, in1 + 8, stride, flipud, fliplr);
}
static INLINE void write_buffer_16x16(tran_low_t *output, __m128i *in0,
@@ -2031,26 +2165,63 @@
vpx_fdct16x16_sse2(input, output, stride);
break;
case ADST_DCT:
- load_buffer_16x16(input, in0, in1, stride);
+ load_buffer_16x16(input, in0, in1, stride, 0, 0);
fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fdct16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
case DCT_ADST:
- load_buffer_16x16(input, in0, in1, stride);
+ load_buffer_16x16(input, in0, in1, stride, 0, 0);
fdct16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
case ADST_ADST:
- load_buffer_16x16(input, in0, in1, stride);
+ load_buffer_16x16(input, in0, in1, stride, 0, 0);
fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ load_buffer_16x16(input, in0, in1, stride, 1, 0);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fdct16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case DCT_FLIPADST:
+ load_buffer_16x16(input, in0, in1, stride, 0, 1);
+ fdct16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case FLIPADST_FLIPADST:
+ load_buffer_16x16(input, in0, in1, stride, 1, 1);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case ADST_FLIPADST:
+ load_buffer_16x16(input, in0, in1, stride, 0, 1);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case FLIPADST_ADST:
+ load_buffer_16x16(input, in0, in1, stride, 1, 0);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk
index 2eb3488..4e89e5e 100644
--- a/vp10/vp10_common.mk
+++ b/vp10/vp10_common.mk
@@ -63,9 +63,27 @@
VP10_COMMON_SRCS-yes += common/scan.h
VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.h
VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.c
+VP10_COMMON_SRCS-yes += common/vp10_txfm.h
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm1d.h
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm1d.c
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm1d.h
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm1d.c
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d.h
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d.c
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d_cfg.h
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.h
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.c
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d_cfg.h
+VP10_COMMON_SRCS-yes += common/vp10_convolve.c
+VP10_COMMON_SRCS-yes += common/vp10_convolve.h
+VP10_COMMON_SRCS-$(CONFIG_ANS) += common/ans.h
+VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.h
+VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.c
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.c
+VP10_COMMON_SRCS-$(CONFIG_LOOP_RESTORATION) += common/restoration.h
+VP10_COMMON_SRCS-$(CONFIG_LOOP_RESTORATION) += common/restoration.c
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/mfqe.h
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/mfqe.c
ifeq ($(CONFIG_VP9_POSTPROC),yes)
diff --git a/vp10/vp10_cx_iface.c b/vp10/vp10_cx_iface.c
index 63d3adc..65a216e 100644
--- a/vp10/vp10_cx_iface.c
+++ b/vp10/vp10_cx_iface.c
@@ -91,9 +91,6 @@
size_t pending_cx_data_sz;
int pending_frame_count;
size_t pending_frame_sizes[8];
-#if !CONFIG_MISC_FIXES
- size_t pending_frame_magnitude;
-#endif
vpx_image_t preview_img;
vpx_enc_frame_flags_t next_frame_flags;
vp8_postproc_cfg_t preview_ppcfg;
@@ -783,39 +780,30 @@
uint8_t marker = 0xc0;
unsigned int mask;
int mag, index_sz;
-#if CONFIG_MISC_FIXES
int i;
size_t max_frame_sz = 0;
-#endif
assert(ctx->pending_frame_count);
assert(ctx->pending_frame_count <= 8);
// Add the number of frames to the marker byte
marker |= ctx->pending_frame_count - 1;
-#if CONFIG_MISC_FIXES
for (i = 0; i < ctx->pending_frame_count - 1; i++) {
const size_t frame_sz = (unsigned int) ctx->pending_frame_sizes[i] - 1;
max_frame_sz = frame_sz > max_frame_sz ? frame_sz : max_frame_sz;
}
-#endif
// Choose the magnitude
for (mag = 0, mask = 0xff; mag < 4; mag++) {
-#if CONFIG_MISC_FIXES
if (max_frame_sz <= mask)
break;
-#else
- if (ctx->pending_frame_magnitude < mask)
- break;
-#endif
mask <<= 8;
mask |= 0xff;
}
marker |= mag << 3;
// Write the index
- index_sz = 2 + (mag + 1) * (ctx->pending_frame_count - CONFIG_MISC_FIXES);
+ index_sz = 2 + (mag + 1) * (ctx->pending_frame_count - 1);
if (ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz) {
uint8_t *x = ctx->pending_cx_data + ctx->pending_cx_data_sz;
int i, j;
@@ -835,11 +823,11 @@
#endif
*x++ = marker;
- for (i = 0; i < ctx->pending_frame_count - CONFIG_MISC_FIXES; i++) {
+ for (i = 0; i < ctx->pending_frame_count - 1; i++) {
unsigned int this_sz;
assert(ctx->pending_frame_sizes[i] > 0);
- this_sz = (unsigned int)ctx->pending_frame_sizes[i] - CONFIG_MISC_FIXES;
+ this_sz = (unsigned int)ctx->pending_frame_sizes[i] - 1;
for (j = 0; j <= mag; j++) {
*x++ = this_sz & 0xff;
this_sz >>= 8;
@@ -993,9 +981,6 @@
ctx->pending_cx_data = cx_data;
ctx->pending_cx_data_sz += size;
ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
-#if !CONFIG_MISC_FIXES
- ctx->pending_frame_magnitude |= size;
-#endif
cx_data += size;
cx_data_sz -= size;
@@ -1012,9 +997,6 @@
ctx->pending_cx_data = NULL;
ctx->pending_cx_data_sz = 0;
ctx->pending_frame_count = 0;
-#if !CONFIG_MISC_FIXES
- ctx->pending_frame_magnitude = 0;
-#endif
ctx->output_cx_pkt_cb.output_cx_pkt(
&pkt, ctx->output_cx_pkt_cb.user_priv);
}
@@ -1031,9 +1013,6 @@
if (ctx->pending_cx_data) {
ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
-#if !CONFIG_MISC_FIXES
- ctx->pending_frame_magnitude |= size;
-#endif
ctx->pending_cx_data_sz += size;
// write the superframe only for the case when
if (!ctx->output_cx_pkt_cb.output_cx_pkt)
@@ -1043,9 +1022,6 @@
ctx->pending_cx_data = NULL;
ctx->pending_cx_data_sz = 0;
ctx->pending_frame_count = 0;
-#if !CONFIG_MISC_FIXES
- ctx->pending_frame_magnitude = 0;
-#endif
} else {
pkt.data.frame.buf = cx_data;
pkt.data.frame.sz = size;
diff --git a/vp10/vp10_dx_iface.c b/vp10/vp10_dx_iface.c
index 33337a4..a0a58e8 100644
--- a/vp10/vp10_dx_iface.c
+++ b/vp10/vp10_dx_iface.c
@@ -122,6 +122,9 @@
#if CONFIG_VP9_POSTPROC
vp10_free_postproc_buffers(&frame_worker_data->pbi->common);
#endif
+#if CONFIG_LOOP_RESTORATION
+ vp10_free_restoration_buffers(&frame_worker_data->pbi->common);
+#endif // CONFIG_LOOP_RESTORATION
vp10_decoder_remove(frame_worker_data->pbi);
vpx_free(frame_worker_data->scratch_buffer);
#if CONFIG_MULTITHREAD
diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk
index dc3b271..7ae2fb2 100644
--- a/vp10/vp10cx.mk
+++ b/vp10/vp10cx.mk
@@ -23,6 +23,8 @@
VP10_CX_SRCS-yes += encoder/cost.h
VP10_CX_SRCS-yes += encoder/cost.c
VP10_CX_SRCS-yes += encoder/dct.c
+VP10_CX_SRCS-yes += encoder/hybrid_fwd_txfm.c
+VP10_CX_SRCS-yes += encoder/hybrid_fwd_txfm.h
VP10_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/denoiser.c
VP10_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/denoiser.h
VP10_CX_SRCS-yes += encoder/encodeframe.c
@@ -51,6 +53,8 @@
VP10_CX_SRCS-yes += encoder/treewriter.h
VP10_CX_SRCS-yes += encoder/mcomp.c
VP10_CX_SRCS-yes += encoder/encoder.c
+VP10_CX_SRCS-yes += encoder/palette.h
+VP10_CX_SRCS-yes += encoder/palette.c
VP10_CX_SRCS-yes += encoder/picklpf.c
VP10_CX_SRCS-yes += encoder/picklpf.h
VP10_CX_SRCS-yes += encoder/quantize.c
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 5a4b37d..b1c2e11 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2007,8 +2007,6 @@
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
#if CONFIG_INTERNAL_STATS
- cpi->b_calculate_ssimg = 0;
-
cpi->count = 0;
cpi->bytes = 0;
@@ -2029,14 +2027,6 @@
cpi->summed_weights = 0;
}
- if (cpi->b_calculate_ssimg)
- {
- cpi->total_ssimg_y = 0;
- cpi->total_ssimg_u = 0;
- cpi->total_ssimg_v = 0;
- cpi->total_ssimg_all = 0;
- }
-
#endif
cpi->first_time_stamp_ever = 0x7FFFFFFF;
@@ -2315,45 +2305,6 @@
total_ssim, total_encode_time);
}
}
-
- if (cpi->b_calculate_ssimg)
- {
- if (cpi->oxcf.number_of_layers > 1)
- {
- int i;
-
- fprintf(f, "Layer\tBitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t"
- "Time(us)\n");
- for (i=0; i<(int)cpi->oxcf.number_of_layers; i++)
- {
- double dr = (double)cpi->bytes_in_layer[i] *
- 8.0 / 1000.0 / time_encoded;
- fprintf(f, "%5d\t%7.3f\t%6.4f\t"
- "%6.4f\t%6.4f\t%6.4f\t%8.0f\n",
- i, dr,
- cpi->total_ssimg_y_in_layer[i] /
- cpi->frames_in_layer[i],
- cpi->total_ssimg_u_in_layer[i] /
- cpi->frames_in_layer[i],
- cpi->total_ssimg_v_in_layer[i] /
- cpi->frames_in_layer[i],
- cpi->total_ssimg_all_in_layer[i] /
- cpi->frames_in_layer[i],
- total_encode_time);
- }
- }
- else
- {
- fprintf(f, "BitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t"
- "Time(us)\n");
- fprintf(f, "%7.3f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.0f\n", dr,
- cpi->total_ssimg_y / cpi->count,
- cpi->total_ssimg_u / cpi->count,
- cpi->total_ssimg_v / cpi->count,
- cpi->total_ssimg_all / cpi->count, total_encode_time);
- }
- }
-
fclose(f);
#if 0
f = fopen("qskip.stt", "a");
@@ -5761,38 +5712,6 @@
}
#endif
}
-
- if (cpi->b_calculate_ssimg)
- {
- double y, u, v, frame_all;
- frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show,
- &y, &u, &v);
-
- if (cpi->oxcf.number_of_layers > 1)
- {
- unsigned int i;
-
- for (i=cpi->current_layer;
- i<cpi->oxcf.number_of_layers; i++)
- {
- if (!cpi->b_calculate_psnr)
- cpi->frames_in_layer[i]++;
-
- cpi->total_ssimg_y_in_layer[i] += y;
- cpi->total_ssimg_u_in_layer[i] += u;
- cpi->total_ssimg_v_in_layer[i] += v;
- cpi->total_ssimg_all_in_layer[i] += frame_all;
- }
- }
- else
- {
- cpi->total_ssimg_y += y;
- cpi->total_ssimg_u += u;
- cpi->total_ssimg_v += v;
- cpi->total_ssimg_all += frame_all;
- }
- }
-
}
}
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 2b2f7a0..b436548 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -637,13 +637,6 @@
double summed_weights;
unsigned int tot_recode_hits;
-
- double total_ssimg_y;
- double total_ssimg_u;
- double total_ssimg_v;
- double total_ssimg_all;
-
- int b_calculate_ssimg;
#endif
int b_calculate_psnr;
@@ -689,11 +682,6 @@
double sum_ssim[VPX_TS_MAX_LAYERS];
double sum_weights[VPX_TS_MAX_LAYERS];
- double total_ssimg_y_in_layer[VPX_TS_MAX_LAYERS];
- double total_ssimg_u_in_layer[VPX_TS_MAX_LAYERS];
- double total_ssimg_v_in_layer[VPX_TS_MAX_LAYERS];
- double total_ssimg_all_in_layer[VPX_TS_MAX_LAYERS];
-
#if CONFIG_MULTI_RES_ENCODING
/* Number of MBs per row at lower-resolution level */
int mr_low_res_mb_cols;
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index c201890..0a36dba 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -192,6 +192,7 @@
const MODE_INFO *const left_mi = xd->left_mi;
const int has_above = xd->up_available;
const int has_left = xd->left_available;
+
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
diff --git a/vp9/common/vp9_thread_common.c b/vp9/common/vp9_thread_common.c
index db78d6b..033326d 100644
--- a/vp9/common/vp9_thread_common.c
+++ b/vp9/common/vp9_thread_common.c
@@ -379,11 +379,11 @@
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < 2; j++)
for (k = 0; k < 2; k++)
- accum->single_ref[i][j][k] += counts->single_ref[i][j][k];
+ accum->single_ref[i][j][k] += counts->single_ref[i][j][k];
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < 2; j++)
- accum->comp_ref[i][j] += counts->comp_ref[i][j];
+ accum->comp_ref[i][j] += counts->comp_ref[i][j];
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
for (j = 0; j < TX_SIZES; j++)
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index f5da07e..ed49a69 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -237,15 +237,16 @@
RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
lock_buffer_pool(pool);
+
for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
const int old_idx = cm->ref_frame_map[ref_index];
// Current thread releases the holding of reference frame.
decrease_ref_count(old_idx, frame_bufs, pool);
// Release the reference frame in reference map.
- if (mask & 1) {
+ if (mask & 1)
decrease_ref_count(old_idx, frame_bufs, pool);
- }
+
cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index];
++ref_index;
}
@@ -267,7 +268,7 @@
}
// Invalidate these references until the next frame starts.
- for (ref_index = 0; ref_index < 3; ref_index++)
+ for (ref_index = 0; ref_index < REFS_PER_FRAME; ref_index++)
cm->frame_refs[ref_index].idx = -1;
}
@@ -325,7 +326,6 @@
pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
}
-
if (setjmp(cm->error.jmp)) {
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
int i;
@@ -350,9 +350,8 @@
decrease_ref_count(old_idx, frame_bufs, pool);
// Release the reference frame in reference map.
- if (mask & 1) {
+ if (mask & 1)
decrease_ref_count(old_idx, frame_bufs, pool);
- }
++ref_index;
}
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index b7cfdf6..ebc850d 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -20,7 +20,6 @@
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_segmentation.h"
-
CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
size_t last_coded_q_map_size;
size_t consec_zero_mv_size;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 4159fe5..a2445b0 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -4132,7 +4132,7 @@
// either compound, single or hybrid prediction as per whatever has
// worked best for that type of frame in the past.
// It also predicts whether another coding mode would have worked
- // better that this coding mode. If that is the case, it remembers
+ // better than this coding mode. If that is the case, it remembers
// that for subsequent frames.
// It does the same analysis for transform size selection also.
const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index ff176fb..f3147e9 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1730,7 +1730,6 @@
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
#if CONFIG_INTERNAL_STATS
- cpi->b_calculate_ssimg = 0;
cpi->b_calculate_blockiness = 1;
cpi->b_calculate_consistency = 1;
cpi->total_inconsistency = 0;
@@ -1754,11 +1753,7 @@
cpi->summedp_weights = 0;
}
- if (cpi->b_calculate_ssimg) {
- cpi->ssimg.worst= 100.0;
- }
cpi->fastssim.worst = 100.0;
-
cpi->psnrhvs.worst = 100.0;
if (cpi->b_calculate_blockiness) {
@@ -2049,13 +2044,6 @@
SNPRINT2(results, "\t%7.3f", consistency);
SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
}
-
- if (cpi->b_calculate_ssimg) {
- SNPRINT(headings, "\t SSIMG\tWtSSIMG");
- SNPRINT2(results, "\t%7.3f", cpi->ssimg.stat[ALL] / cpi->count);
- SNPRINT2(results, "\t%7.3f", cpi->ssimg.worst);
- }
-
fprintf(f, "%s\t Time\n", headings);
fprintf(f, "%s\t%8.0f\n", results, total_encode_time);
}
@@ -4567,7 +4555,17 @@
cpi->bytes += (int)(*size);
if (cm->show_frame) {
+ uint32_t bit_depth = 8;
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint32_t in_bit_depth = 8;
+#endif
cpi->count++;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ in_bit_depth = cpi->oxcf.input_bit_depth;
+ bit_depth = cm->bit_depth;
+ }
+#endif
if (cpi->b_calculate_psnr) {
YV12_BUFFER_CONFIG *orig = cpi->Source;
@@ -4576,7 +4574,7 @@
PSNR_STATS psnr;
#if CONFIG_VP9_HIGHBITDEPTH
calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd,
- cpi->oxcf.input_bit_depth);
+ in_bit_depth);
#else
calc_psnr(orig, recon, &psnr);
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -4695,37 +4693,16 @@
}
}
- if (cpi->b_calculate_ssimg) {
- double y, u, v, frame_all;
-#if CONFIG_VP9_HIGHBITDEPTH
- if (cm->use_highbitdepth) {
- frame_all = vpx_highbd_calc_ssimg(cpi->Source, cm->frame_to_show, &y,
- &u, &v, (int)cm->bit_depth);
- } else {
- frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u,
- &v);
- }
-#else
- frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v);
-#endif // CONFIG_VP9_HIGHBITDEPTH
- adjust_image_stat(y, u, v, frame_all, &cpi->ssimg);
- }
-#if CONFIG_VP9_HIGHBITDEPTH
- if (!cm->use_highbitdepth)
-#endif
{
double y, u, v, frame_all;
frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
- &v);
+ &v, bit_depth);
adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
- /* TODO(JBB): add 10/12 bit support */
}
-#if CONFIG_VP9_HIGHBITDEPTH
- if (!cm->use_highbitdepth)
-#endif
{
double y, u, v, frame_all;
- frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v);
+ frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v,
+ bit_depth);
adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
}
}
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index c486ac2..8759cbe 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -432,13 +432,10 @@
unsigned int tot_recode_hits;
double worst_ssim;
- ImageStat ssimg;
ImageStat fastssim;
ImageStat psnrhvs;
- int b_calculate_ssimg;
int b_calculate_blockiness;
-
int b_calculate_consistency;
double total_inconsistency;
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index fc32d19..44b6ae7 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -588,6 +588,12 @@
rd->thresh_mult[THR_NEARMV] += 1000;
rd->thresh_mult[THR_NEARA] += 1000;
+ rd->thresh_mult[THR_NEARG] += 1000;
+
+ rd->thresh_mult[THR_ZEROMV] += 2000;
+ rd->thresh_mult[THR_ZEROG] += 2000;
+ rd->thresh_mult[THR_ZEROA] += 2000;
+
rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
@@ -595,13 +601,9 @@
rd->thresh_mult[THR_COMP_NEARLA] += 1500;
rd->thresh_mult[THR_COMP_NEWLA] += 2000;
- rd->thresh_mult[THR_NEARG] += 1000;
rd->thresh_mult[THR_COMP_NEARGA] += 1500;
rd->thresh_mult[THR_COMP_NEWGA] += 2000;
- rd->thresh_mult[THR_ZEROMV] += 2000;
- rd->thresh_mult[THR_ZEROG] += 2000;
- rd->thresh_mult[THR_ZEROA] += 2000;
rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
@@ -616,9 +618,10 @@
}
void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
- static const int thresh_mult[2][MAX_REFS] =
- {{2500, 2500, 2500, 4500, 4500, 2500},
- {2000, 2000, 2000, 4000, 4000, 2000}};
+ static const int thresh_mult[2][MAX_REFS] = {
+ {2500, 2500, 2500, 4500, 4500, 2500},
+ {2000, 2000, 2000, 4000, 4000, 2000}
+ };
RD_OPT *const rd = &cpi->rd;
const int idx = cpi->oxcf.mode == BEST;
memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
diff --git a/vpx/internal/vpx_psnr.h b/vpx/internal/vpx_psnr.h
index 07d81bb..0e90085 100644
--- a/vpx/internal/vpx_psnr.h
+++ b/vpx/internal/vpx_psnr.h
@@ -11,6 +11,8 @@
#ifndef VPX_INTERNAL_VPX_PSNR_H_
#define VPX_INTERNAL_VPX_PSNR_H_
+#define MAX_PSNR 100.0
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/vpx/src/vpx_psnr.c b/vpx/src/vpx_psnr.c
index 05843ac..27a6180 100644
--- a/vpx/src/vpx_psnr.c
+++ b/vpx/src/vpx_psnr.c
@@ -12,7 +12,6 @@
#include "vpx/internal/vpx_psnr.h"
-#define MAX_PSNR 100.0
double vpx_sse_to_psnr(double samples, double peak, double sse) {
if (sse > 0.0) {
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index bd99c6d..7504c0e 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -806,9 +806,12 @@
VPX_CTRL_USE_TYPE(VP9E_SET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *)
#define VPX_CTRL_VP9E_SET_SVC_REF_FRAME_CONFIG
-VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *)
+/*!\brief
+ *
+ * TODO(rbultje) : add support of the control in ffmpeg
+ */
#define VPX_CTRL_VP9E_SET_RENDER_SIZE
-
+VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *)
/*!\endcond */
/*! @} - end defgroup vp8_encoder */
#ifdef __cplusplus
diff --git a/vpx_dsp/bitreader_buffer.c b/vpx_dsp/bitreader_buffer.c
index d7b55cf..595b9bb 100644
--- a/vpx_dsp/bitreader_buffer.c
+++ b/vpx_dsp/bitreader_buffer.c
@@ -43,11 +43,7 @@
int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb,
int bits) {
-#if CONFIG_MISC_FIXES
const int nbits = sizeof(unsigned) * 8 - bits - 1;
const unsigned value = (unsigned)vpx_rb_read_literal(rb, bits + 1) << nbits;
return ((int) value) >> nbits;
-#else
- return vpx_rb_read_signed_literal(rb, bits);
-#endif
}
diff --git a/vpx_dsp/bitwriter_buffer.c b/vpx_dsp/bitwriter_buffer.c
index 6182a72..8633372 100644
--- a/vpx_dsp/bitwriter_buffer.c
+++ b/vpx_dsp/bitwriter_buffer.c
@@ -39,10 +39,5 @@
void vpx_wb_write_inv_signed_literal(struct vpx_write_bit_buffer *wb,
int data, int bits) {
-#if CONFIG_MISC_FIXES
vpx_wb_write_literal(wb, data, bits + 1);
-#else
- vpx_wb_write_literal(wb, abs(data), bits);
- vpx_wb_write_bit(wb, data < 0);
-#endif
}
diff --git a/vpx_dsp/fastssim.c b/vpx_dsp/fastssim.c
index 1405a30..e3746f0 100644
--- a/vpx_dsp/fastssim.c
+++ b/vpx_dsp/fastssim.c
@@ -10,6 +10,7 @@
* This code was originally written by: Nathan E. Egge, at the Daala
* project.
*/
+#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
@@ -17,19 +18,24 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/ssim.h"
#include "vpx_ports/system_state.h"
-/* TODO(jbb): High bit depth version of this code needed */
+
typedef struct fs_level fs_level;
typedef struct fs_ctx fs_ctx;
#define SSIM_C1 (255 * 255 * 0.01 * 0.01)
#define SSIM_C2 (255 * 255 * 0.03 * 0.03)
-
+#if CONFIG_VP9_HIGHBITDEPTH
+#define SSIM_C1_10 (1023 * 1023 * 0.01 * 0.01)
+#define SSIM_C1_12 (4095 * 4095 * 0.01 * 0.01)
+#define SSIM_C2_10 (1023 * 1023 * 0.03 * 0.03)
+#define SSIM_C2_12 (4095 * 4095 * 0.03 * 0.03)
+#endif
#define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b))
#define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b))
struct fs_level {
- uint16_t *im1;
- uint16_t *im2;
+ uint32_t *im1;
+ uint32_t *im2;
double *ssim;
int w;
int h;
@@ -80,7 +86,7 @@
level_size += sizeof(*_ctx->level[l].ssim) - 1;
level_size /= sizeof(*_ctx->level[l].ssim);
level_size *= sizeof(*_ctx->level[l].ssim);
- _ctx->level[l].im1 = (uint16_t *) data;
+ _ctx->level[l].im1 = (uint32_t *)data;
_ctx->level[l].im2 = _ctx->level[l].im1 + im_size;
data += level_size;
_ctx->level[l].ssim = (double *) data;
@@ -96,10 +102,10 @@
}
static void fs_downsample_level(fs_ctx *_ctx, int _l) {
- const uint16_t *src1;
- const uint16_t *src2;
- uint16_t *dst1;
- uint16_t *dst2;
+ const uint32_t *src1;
+ const uint32_t *src2;
+ uint32_t *dst1;
+ uint32_t *dst2;
int w2;
int h2;
int w;
@@ -132,11 +138,12 @@
}
}
-static void fs_downsample_level0(fs_ctx *_ctx, const unsigned char *_src1,
- int _s1ystride, const unsigned char *_src2,
- int _s2ystride, int _w, int _h) {
- uint16_t *dst1;
- uint16_t *dst2;
+static void fs_downsample_level0(fs_ctx *_ctx, const uint8_t *_src1,
+ int _s1ystride, const uint8_t *_src2,
+ int _s2ystride, int _w, int _h,
+ uint32_t bit_depth) {
+ uint32_t *dst1;
+ uint32_t *dst2;
int w;
int h;
int i;
@@ -155,21 +162,32 @@
int i1;
i0 = 2 * i;
i1 = FS_MINI(i0 + 1, _w);
- dst1[j * w + i] = _src1[j0 * _s1ystride + i0]
- + _src1[j0 * _s1ystride + i1] + _src1[j1 * _s1ystride + i0]
- + _src1[j1 * _s1ystride + i1];
- dst2[j * w + i] = _src2[j0 * _s2ystride + i0]
- + _src2[j0 * _s2ystride + i1] + _src2[j1 * _s2ystride + i0]
- + _src2[j1 * _s2ystride + i1];
+ if (bit_depth == 8) {
+ dst1[j * w + i] = _src1[j0 * _s1ystride + i0]
+ + _src1[j0 * _s1ystride + i1] + _src1[j1 * _s1ystride + i0]
+ + _src1[j1 * _s1ystride + i1];
+ dst2[j * w + i] = _src2[j0 * _s2ystride + i0]
+ + _src2[j0 * _s2ystride + i1] + _src2[j1 * _s2ystride + i0]
+ + _src2[j1 * _s2ystride + i1];
+ } else {
+ uint16_t * src1s = CONVERT_TO_SHORTPTR(_src1);
+ uint16_t * src2s = CONVERT_TO_SHORTPTR(_src2);
+ dst1[j * w + i] = src1s[j0 * _s1ystride + i0]
+ + src1s[j0 * _s1ystride + i1] + src1s[j1 * _s1ystride + i0]
+ + src1s[j1 * _s1ystride + i1];
+ dst2[j * w + i] = src2s[j0 * _s2ystride + i0]
+ + src2s[j0 * _s2ystride + i1] + src2s[j1 * _s2ystride + i0]
+ + src2s[j1 * _s2ystride + i1];
+ }
}
}
}
-static void fs_apply_luminance(fs_ctx *_ctx, int _l) {
+static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) {
unsigned *col_sums_x;
unsigned *col_sums_y;
- uint16_t *im1;
- uint16_t *im2;
+ uint32_t *im1;
+ uint32_t *im2;
double *ssim;
double c1;
int w;
@@ -178,6 +196,15 @@
int j1offs;
int i;
int j;
+ double ssim_c1 = SSIM_C1;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (bit_depth == 10)
+ ssim_c1 = SSIM_C1_10;
+ if (bit_depth == 12)
+ ssim_c1 = SSIM_C1_12;
+#else
+ assert(bit_depth == 8);
+#endif
w = _ctx->level[_l].w;
h = _ctx->level[_l].h;
col_sums_x = _ctx->col_buf;
@@ -196,7 +223,7 @@
col_sums_y[i] += im2[j1offs + i];
}
ssim = _ctx->level[_l].ssim;
- c1 = (double) (SSIM_C1 * 4096 * (1 << 4 * _l));
+ c1 = (double) (ssim_c1 * 4096 * (1 << 4 * _l));
for (j = 0; j < h; j++) {
unsigned mux;
unsigned muy;
@@ -294,9 +321,9 @@
} \
while (0)
-static void fs_calc_structure(fs_ctx *_ctx, int _l) {
- uint16_t *im1;
- uint16_t *im2;
+static void fs_calc_structure(fs_ctx *_ctx, int _l, int bit_depth) {
+ uint32_t *im1;
+ uint32_t *im2;
unsigned *gx_buf;
unsigned *gy_buf;
double *ssim;
@@ -309,6 +336,16 @@
int h;
int i;
int j;
+ double ssim_c2 = SSIM_C2;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (bit_depth == 10)
+ ssim_c2 = SSIM_C2_10;
+ if (bit_depth == 12)
+ ssim_c2 = SSIM_C2_12;
+#else
+ assert(bit_depth == 8);
+#endif
+
w = _ctx->level[_l].w;
h = _ctx->level[_l].h;
im1 = _ctx->level[_l].im1;
@@ -318,7 +355,7 @@
stride = w + 8;
gy_buf = gx_buf + 8 * stride;
memset(gx_buf, 0, 2 * 8 * stride * sizeof(*gx_buf));
- c2 = SSIM_C2 * (1 << 4 * _l) * 16 * 104;
+ c2 = ssim_c2 * (1 << 4 * _l) * 16 * 104;
for (j = 0; j < h + 4; j++) {
if (j < h - 1) {
for (i = 0; i < w - 1; i++) {
@@ -421,48 +458,51 @@
return pow(ret / (w * h), FS_WEIGHTS[_l]);
}
-static double calc_ssim(const unsigned char *_src, int _systride,
- const unsigned char *_dst, int _dystride, int _w, int _h) {
+static double convert_ssim_db(double _ssim, double _weight) {
+ assert(_weight >= _ssim);
+ if ((_weight - _ssim) < 1e-10)
+ return MAX_SSIM_DB;
+ return 10 * (log10(_weight) - log10(_weight - _ssim));
+}
+
+static double calc_ssim(const uint8_t *_src, int _systride,
+ const uint8_t *_dst, int _dystride,
+ int _w, int _h, uint32_t bit_depth) {
fs_ctx ctx;
double ret;
int l;
ret = 1;
fs_ctx_init(&ctx, _w, _h, FS_NLEVELS);
- fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride, _w, _h);
+ fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride,
+ _w, _h, bit_depth);
for (l = 0; l < FS_NLEVELS - 1; l++) {
- fs_calc_structure(&ctx, l);
+ fs_calc_structure(&ctx, l, bit_depth);
ret *= fs_average(&ctx, l);
fs_downsample_level(&ctx, l + 1);
}
- fs_calc_structure(&ctx, l);
- fs_apply_luminance(&ctx, l);
+ fs_calc_structure(&ctx, l, bit_depth);
+ fs_apply_luminance(&ctx, l, bit_depth);
ret *= fs_average(&ctx, l);
fs_ctx_clear(&ctx);
return ret;
}
-static double convert_ssim_db(double _ssim, double _weight) {
- return 10 * (log10(_weight) - log10(_weight - _ssim));
-}
-
double vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *dest,
- double *ssim_y, double *ssim_u, double *ssim_v) {
+ double *ssim_y, double *ssim_u, double *ssim_v,
+ uint32_t bit_depth) {
double ssimv;
vpx_clear_system_state();
-
*ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer,
dest->y_stride, source->y_crop_width,
- source->y_crop_height);
-
+ source->y_crop_height, bit_depth);
*ssim_u = calc_ssim(source->u_buffer, source->uv_stride, dest->u_buffer,
dest->uv_stride, source->uv_crop_width,
- source->uv_crop_height);
-
+ source->uv_crop_height, bit_depth);
*ssim_v = calc_ssim(source->v_buffer, source->uv_stride, dest->v_buffer,
dest->uv_stride, source->uv_crop_width,
- source->uv_crop_height);
- ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v));
+ source->uv_crop_height, bit_depth);
+ ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v));
return convert_ssim_db(ssimv, 1.0);
}
diff --git a/vpx_dsp/intrapred.c b/vpx_dsp/intrapred.c
index 7c42f2a..dcc9b30 100644
--- a/vpx_dsp/intrapred.c
+++ b/vpx_dsp/intrapred.c
@@ -44,7 +44,6 @@
dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
}
-#if CONFIG_MISC_FIXES
static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int r, c;
@@ -59,7 +58,6 @@
dst += stride;
}
}
-#endif // CONFIG_MISC_FIXES
static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
@@ -78,7 +76,6 @@
}
}
-#if CONFIG_MISC_FIXES
static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int r, c;
@@ -92,7 +89,6 @@
dst += stride;
}
}
-#endif // CONFIG_MISC_FIXES
static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
@@ -113,7 +109,6 @@
}
}
-#if CONFIG_MISC_FIXES
static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int r, c;
@@ -126,7 +121,6 @@
dst += stride;
}
}
-#endif // CONFIG_MISC_FIXES
static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
@@ -543,7 +537,6 @@
}
}
-#if CONFIG_MISC_FIXES
static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
const uint16_t *left, int bd) {
@@ -560,7 +553,6 @@
dst += stride;
}
}
-#endif // CONFIG_MISC_FIXES
static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
@@ -596,7 +588,6 @@
}
}
-#if CONFIG_MISC_FIXES
static INLINE void highbd_d45e_predictor(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
const uint16_t *left, int bd) {
@@ -611,7 +602,6 @@
dst += stride;
}
}
-#endif // CONFIG_MISC_FIXES
static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
@@ -851,11 +841,9 @@
intra_pred_no_4x4(d207)
intra_pred_no_4x4(d63)
intra_pred_no_4x4(d45)
-#if CONFIG_MISC_FIXES
intra_pred_allsizes(d207e)
intra_pred_allsizes(d63e)
intra_pred_no_4x4(d45e)
-#endif
intra_pred_no_4x4(d117)
intra_pred_no_4x4(d135)
intra_pred_no_4x4(d153)
diff --git a/vpx_dsp/psnrhvs.c b/vpx_dsp/psnrhvs.c
index 3001705..4d3d6ee 100644
--- a/vpx_dsp/psnrhvs.c
+++ b/vpx_dsp/psnrhvs.c
@@ -10,6 +10,7 @@
* This code was originally written by: Gregory Maxwell, at the Daala
* project.
*/
+#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
@@ -18,6 +19,7 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/ssim.h"
#include "vpx_ports/system_state.h"
+#include "vpx/internal/vpx_psnr.h"
#if !defined(M_PI)
# define M_PI (3.141592653589793238462643)
@@ -26,14 +28,29 @@
static void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
int xstride) {
+ int i, j;
(void) xstride;
vpx_fdct8x8(x, y, ystride);
+ for (i = 0; i < 8; i++)
+ for (j = 0; j< 8; j++)
+ *(y + ystride*i + j) = (*(y + ystride*i + j) + 4) >> 3;
}
+#if CONFIG_VP9_HIGHBITDEPTH
+static void hbd_od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
+ int xstride) {
+ int i, j;
+ (void) xstride;
+ vpx_highbd_fdct8x8(x, y, ystride);
+ for (i = 0; i < 8; i++)
+ for (j = 0; j< 8; j++)
+ *(y + ystride*i + j) = (*(y + ystride*i + j) + 4) >> 3;
+}
+#endif
/* Normalized inverse quantization matrix for 8x8 DCT at the point of
* transparency. This is not the JPEG based matrix from the paper,
this one gives a slightly higher MOS agreement.*/
-static const float csf_y[8][8] = {
+static const double csf_y[8][8] = {
{1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411, 1.00227514334,
0.678296995242, 0.466224900598, 0.3265091542},
{2.2901594831, 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963,
@@ -50,7 +67,7 @@
0.283006984131, 0.215017739696, 0.168869545842, 0.136153931001},
{0.3265091542, 0.436405793551, 0.372504254596, 0.295774038565,
0.226951348204, 0.17408067321, 0.136153931001, 0.109083846276}};
-static const float csf_cb420[8][8] = {
+static const double csf_cb420[8][8] = {
{1.91113096927, 2.46074210438, 1.18284184739, 1.14982565193, 1.05017074788,
0.898018824055, 0.74725392039, 0.615105596242},
{2.46074210438, 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972,
@@ -67,7 +84,7 @@
0.55002013668, 0.454353482512, 0.389234902883, 0.342353999733},
{0.615105596242, 0.830890433625, 0.731221236837, 0.608694761374,
0.495804539034, 0.407050308965, 0.342353999733, 0.295530605237}};
-static const float csf_cr420[8][8] = {
+static const double csf_cr420[8][8] = {
{2.03871978502, 2.62502345193, 1.26180942886, 1.11019789803, 1.01397751469,
0.867069376285, 0.721500455585, 0.593906509971},
{2.62502345193, 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198,
@@ -85,23 +102,37 @@
{0.593906509971, 0.802254508198, 0.706020324706, 0.587716619023,
0.478717061273, 0.393021669543, 0.330555063063, 0.285345396658}};
-static double convert_score_db(double _score, double _weight) {
- return 10 * (log10(255 * 255) - log10(_weight * _score));
+static double convert_score_db(double _score, double _weight, int bit_depth) {
+ int16_t pix_max = 255;
+ assert(_score * _weight >= 0.0);
+ if (bit_depth == 10)
+ pix_max = 1023;
+ else if (bit_depth == 12)
+ pix_max = 4095;
+
+ if (_weight * _score < pix_max * pix_max * 1e-10)
+ return MAX_PSNR;
+ return 10 * (log10(pix_max * pix_max) - log10(_weight * _score));
}
-static double calc_psnrhvs(const unsigned char *_src, int _systride,
- const unsigned char *_dst, int _dystride,
- double _par, int _w, int _h, int _step,
- const float _csf[8][8]) {
- float ret;
+static double calc_psnrhvs(const unsigned char *src, int _systride,
+ const unsigned char *dst, int _dystride,
+ double _par, int _w, int _h, int _step,
+ const double _csf[8][8], uint32_t bit_depth) {
+ double ret;
+ const uint8_t *_src8 = src;
+ const uint8_t *_dst8 = dst;
+ const uint16_t *_src16 = CONVERT_TO_SHORTPTR(src);
+ const uint16_t *_dst16 = CONVERT_TO_SHORTPTR(dst);
int16_t dct_s[8 * 8], dct_d[8 * 8];
tran_low_t dct_s_coef[8 * 8], dct_d_coef[8 * 8];
- float mask[8][8];
+ double mask[8][8];
int pixels;
int x;
int y;
(void) _par;
ret = pixels = 0;
+
/*In the PSNR-HVS-M paper[1] the authors describe the construction of
their masking table as "we have used the quantization table for the
color component Y of JPEG [6] that has been also obtained on the
@@ -126,23 +157,28 @@
for (x = 0; x < _w - 7; x += _step) {
int i;
int j;
- float s_means[4];
- float d_means[4];
- float s_vars[4];
- float d_vars[4];
- float s_gmean = 0;
- float d_gmean = 0;
- float s_gvar = 0;
- float d_gvar = 0;
- float s_mask = 0;
- float d_mask = 0;
+ double s_means[4];
+ double d_means[4];
+ double s_vars[4];
+ double d_vars[4];
+ double s_gmean = 0;
+ double d_gmean = 0;
+ double s_gvar = 0;
+ double d_gvar = 0;
+ double s_mask = 0;
+ double d_mask = 0;
for (i = 0; i < 4; i++)
s_means[i] = d_means[i] = s_vars[i] = d_vars[i] = 0;
for (i = 0; i < 8; i++) {
for (j = 0; j < 8; j++) {
int sub = ((i & 12) >> 2) + ((j & 12) >> 1);
- dct_s[i * 8 + j] = _src[(y + i) * _systride + (j + x)];
- dct_d[i * 8 + j] = _dst[(y + i) * _dystride + (j + x)];
+ if (bit_depth == 8) {
+ dct_s[i * 8 + j] = _src8[(y + i) * _systride + (j + x)];
+ dct_d[i * 8 + j] = _dst8[(y + i) * _dystride + (j + x)];
+ } else if (bit_depth == 10 || bit_depth == 12) {
+ dct_s[i * 8 + j] = _src16[(y + i) * _systride + (j + x)];
+ dct_d[i * 8 + j] = _dst16[(y + i) * _dystride + (j + x)];
+ }
s_gmean += dct_s[i * 8 + j];
d_gmean += dct_d[i * 8 + j];
s_means[sub] += dct_s[i * 8 + j];
@@ -176,8 +212,16 @@
s_gvar = (s_vars[0] + s_vars[1] + s_vars[2] + s_vars[3]) / s_gvar;
if (d_gvar > 0)
d_gvar = (d_vars[0] + d_vars[1] + d_vars[2] + d_vars[3]) / d_gvar;
- od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
- od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (bit_depth == 10 || bit_depth == 12) {
+ hbd_od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
+ hbd_od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
+ }
+#endif
+ if (bit_depth == 8) {
+ od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
+ od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
+ }
for (i = 0; i < 8; i++)
for (j = (i == 0); j < 8; j++)
s_mask += dct_s_coef[i * 8 + j] * dct_s_coef[i * 8 + j] * mask[i][j];
@@ -190,8 +234,8 @@
s_mask = d_mask;
for (i = 0; i < 8; i++) {
for (j = 0; j < 8; j++) {
- float err;
- err = fabs((float)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]));
+ double err;
+ err = fabs((double)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]));
if (i != 0 || j != 0)
err = err < s_mask / mask[i][j] ? 0 : err - s_mask / mask[i][j];
ret += (err * _csf[i][j]) * (err * _csf[i][j]);
@@ -203,25 +247,28 @@
ret /= pixels;
return ret;
}
+
double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *dest, double *y_psnrhvs,
- double *u_psnrhvs, double *v_psnrhvs) {
+ double *u_psnrhvs, double *v_psnrhvs, uint32_t bit_depth) {
double psnrhvs;
const double par = 1.0;
const int step = 7;
vpx_clear_system_state();
+
+ assert(bit_depth == 8 || bit_depth == 10 || bit_depth == 12);
+
*y_psnrhvs = calc_psnrhvs(source->y_buffer, source->y_stride, dest->y_buffer,
dest->y_stride, par, source->y_crop_width,
- source->y_crop_height, step, csf_y);
-
+ source->y_crop_height, step, csf_y, bit_depth);
*u_psnrhvs = calc_psnrhvs(source->u_buffer, source->uv_stride, dest->u_buffer,
dest->uv_stride, par, source->uv_crop_width,
- source->uv_crop_height, step, csf_cb420);
+ source->uv_crop_height, step, csf_cb420, bit_depth);
*v_psnrhvs = calc_psnrhvs(source->v_buffer, source->uv_stride, dest->v_buffer,
dest->uv_stride, par, source->uv_crop_width,
- source->uv_crop_height, step, csf_cr420);
+ source->uv_crop_height, step, csf_cr420, bit_depth);
psnrhvs = (*y_psnrhvs) * .8 + .1 * ((*u_psnrhvs) + (*v_psnrhvs));
-
- return convert_score_db(psnrhvs, 1.0);
+ return convert_score_db(psnrhvs, 1.0, bit_depth);
}
+
diff --git a/vpx_dsp/ssim.c b/vpx_dsp/ssim.c
index cfe5bb3..fd93243 100644
--- a/vpx_dsp/ssim.c
+++ b/vpx_dsp/ssim.c
@@ -182,31 +182,6 @@
return ssimv;
}
-double vpx_calc_ssimg(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest,
- double *ssim_y, double *ssim_u, double *ssim_v) {
- double ssim_all = 0;
- double a, b, c;
-
- a = vpx_ssim2(source->y_buffer, dest->y_buffer,
- source->y_stride, dest->y_stride,
- source->y_crop_width, source->y_crop_height);
-
- b = vpx_ssim2(source->u_buffer, dest->u_buffer,
- source->uv_stride, dest->uv_stride,
- source->uv_crop_width, source->uv_crop_height);
-
- c = vpx_ssim2(source->v_buffer, dest->v_buffer,
- source->uv_stride, dest->uv_stride,
- source->uv_crop_width, source->uv_crop_height);
- *ssim_y = a;
- *ssim_u = b;
- *ssim_v = c;
- ssim_all = (a * 4 + b + c) / 6;
-
- return ssim_all;
-}
-
// traditional ssim as per: http://en.wikipedia.org/wiki/Structural_similarity
//
// Re working out the math ->
@@ -478,28 +453,4 @@
return ssimv;
}
-double vpx_highbd_calc_ssimg(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, double *ssim_y,
- double *ssim_u, double *ssim_v, unsigned int bd) {
- double ssim_all = 0;
- double a, b, c;
-
- a = vpx_highbd_ssim2(source->y_buffer, dest->y_buffer,
- source->y_stride, dest->y_stride,
- source->y_crop_width, source->y_crop_height, bd);
-
- b = vpx_highbd_ssim2(source->u_buffer, dest->u_buffer,
- source->uv_stride, dest->uv_stride,
- source->uv_crop_width, source->uv_crop_height, bd);
-
- c = vpx_highbd_ssim2(source->v_buffer, dest->v_buffer,
- source->uv_stride, dest->uv_stride,
- source->uv_crop_width, source->uv_crop_height, bd);
- *ssim_y = a;
- *ssim_u = b;
- *ssim_v = c;
- ssim_all = (a * 4 + b + c) / 6;
-
- return ssim_all;
-}
#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vpx_dsp/ssim.h b/vpx_dsp/ssim.h
index 132f7f9..6c59540 100644
--- a/vpx_dsp/ssim.h
+++ b/vpx_dsp/ssim.h
@@ -11,6 +11,8 @@
#ifndef VPX_DSP_SSIM_H_
#define VPX_DSP_SSIM_H_
+#define MAX_SSIM_DB 100.0;
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -68,30 +70,21 @@
const YV12_BUFFER_CONFIG *dest,
double *weight);
-double vpx_calc_ssimg(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest,
- double *ssim_y, double *ssim_u, double *ssim_v);
-
double vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *dest,
- double *ssim_y, double *ssim_u, double *ssim_v);
+ double *ssim_y, double *ssim_u,
+ double *ssim_v, uint32_t bit_depth);
double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *dest,
- double *ssim_y, double *ssim_u, double *ssim_v);
+ double *phvs_y, double *phvs_u,
+ double *phvs_v, uint32_t bit_depth);
#if CONFIG_VP9_HIGHBITDEPTH
double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *dest,
double *weight,
- unsigned int bd);
-
-double vpx_highbd_calc_ssimg(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest,
- double *ssim_y,
- double *ssim_u,
- double *ssim_v,
- unsigned int bd);
+ uint32_t bd);
#endif // CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus
diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h
index 7502f90..ae4aec8 100644
--- a/vpx_ports/mem.h
+++ b/vpx_ports/mem.h
@@ -45,9 +45,9 @@
#define ALIGN_POWER_OF_TWO(value, n) \
(((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
+#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)(x)) << 1))
#if CONFIG_VP9_HIGHBITDEPTH
-#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1))
-#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1))
+#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)(x)) >> 1))
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // VPX_PORTS_MEM_H_