Merge "Clear X87 register state before using double." into nextgenv2
diff --git a/test/vp10_ans_test.cc b/test/vp10_ans_test.cc
index 9c93dd8..20aedba 100644
--- a/test/vp10_ans_test.cc
+++ b/test/vp10_ans_test.cc
@@ -148,23 +148,25 @@
return okay;
}
+// TODO(aconverse): replace this with a more representative distribution from
+// the codec.
const rans_sym rans_sym_tab[] = {
- {16, 0}, {100, 16}, {70, 116}, {70, 186},
+ {16 * 4, 0 * 4}, {100 * 4, 16 * 4}, {70 * 4, 116 *4}, {70 * 4, 186 *4},
};
const int kDistinctSyms = sizeof(rans_sym_tab) / sizeof(rans_sym_tab[0]);
std::vector<int> ans_encode_build_vals(const rans_sym *tab, int iters) {
std::vector<int> p_to_sym;
int i = 0;
- while (p_to_sym.size() < 256) {
+ while (p_to_sym.size() < rans_precision) {
p_to_sym.insert(p_to_sym.end(), tab[i].prob, i);
++i;
}
- assert(p_to_sym.size() == 256);
+ assert(p_to_sym.size() == rans_precision);
std::vector<int> ret;
libvpx_test::ACMRandom gen(18543637);
for (int i = 0; i < iters; ++i) {
- int sym = p_to_sym[gen.Rand8()];
+ int sym = p_to_sym[gen.Rand8() * 4];
ret.push_back(sym);
}
return ret;
@@ -173,7 +175,7 @@
void rans_build_dec_tab(const struct rans_sym sym_tab[],
rans_dec_lut dec_tab) {
dec_tab[0] = 0;
- for (int i = 1; dec_tab[i - 1] < ans_p8_precision; ++i) {
+ for (int i = 1; dec_tab[i - 1] < rans_precision; ++i) {
dec_tab[i] = dec_tab[i - 1] + sym_tab[i - 1].prob;
}
}
@@ -229,10 +231,10 @@
* -sym2 -sym3
*/
void tab2tree(const rans_sym *tab, int tab_size, vpx_prob *treep) {
- const unsigned basep = 256;
+ const unsigned basep = rans_precision;
unsigned pleft = basep;
for (int i = 0; i < tab_size - 1; ++i) {
- unsigned prob = (tab[i].prob * basep + (basep / 2)) / pleft;
+ unsigned prob = (tab[i].prob * basep + basep * 2) / (pleft * 4);
assert(prob > 0 && prob < 256);
treep[i] = prob;
pleft -= tab[i].prob;
diff --git a/test/vp10_fht4x4_test.cc b/test/vp10_fht4x4_test.cc
index 63d9ec7..9daf063 100644
--- a/test/vp10_fht4x4_test.cc
+++ b/test/vp10_fht4x4_test.cc
@@ -25,9 +25,9 @@
namespace {
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
int tx_type);
-
+using std::tr1::tuple;
using libvpx_test::FhtFunc;
-typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t, int> Ht4x4Param;
+typedef tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t, int> Ht4x4Param;
void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
int tx_type) {
@@ -37,13 +37,14 @@
#if CONFIG_VP9_HIGHBITDEPTH
typedef void (*IhighbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
int tx_type, int bd);
+typedef void (*HBDFhtFunc)(const int16_t *input, int32_t *output, int stride,
+ int tx_type, int bd);
+// Target optimized function, tx_type, bit depth
+typedef tuple<HBDFhtFunc, int, int> HighbdHt4x4Param;
-typedef std::tr1::tuple<FhtFunc, IhighbdHtFunc, int, vpx_bit_depth_t, int>
-HighbdHt4x4Param;
-
-void highbe_fht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
- int tx_type) {
- vp10_highbd_fht4x4_c(in, out, stride, tx_type);
+void highbe_fht4x4_ref(const int16_t *in, int32_t *out, int stride,
+ int tx_type, int bd) {
+ vp10_fwd_txfm2d_4x4_c(in, out, stride, tx_type, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -83,98 +84,76 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
-class VP10HighbdTrans4x4HT
- : public libvpx_test::TransformTestBase,
- public ::testing::TestWithParam<HighbdHt4x4Param> {
+class VP10HighbdTrans4x4HT : public ::testing::TestWithParam<HighbdHt4x4Param> {
public:
virtual ~VP10HighbdTrans4x4HT() {}
virtual void SetUp() {
fwd_txfm_ = GET_PARAM(0);
- inv_txfm_ = GET_PARAM(1);
- tx_type_ = GET_PARAM(2);
- pitch_ = 4;
- fwd_txfm_ref = highbe_fht4x4_ref;
- bit_depth_ = GET_PARAM(3);
+ fwd_txfm_ref_ = highbe_fht4x4_ref;
+ tx_type_ = GET_PARAM(1);
+ bit_depth_ = GET_PARAM(2);
mask_ = (1 << bit_depth_) - 1;
- num_coeffs_ = GET_PARAM(4);
+ num_coeffs_ = 16;
+
+ input_ = reinterpret_cast<int16_t *>
+ (vpx_memalign(16, sizeof(int16_t) * num_coeffs_));
+ output_ = reinterpret_cast<int32_t *>
+ (vpx_memalign(16, sizeof(int32_t) * num_coeffs_));
+ output_ref_ = reinterpret_cast<int32_t *>
+ (vpx_memalign(16, sizeof(int32_t) * num_coeffs_));
}
- virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ virtual void TearDown() {
+ vpx_free(input_);
+ vpx_free(output_);
+ vpx_free(output_ref_);
+ libvpx_test::ClearSystemState();
+ }
protected:
- void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
- fwd_txfm_(in, out, stride, tx_type_);
- }
+ void RunBitexactCheck();
- void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
- inv_txfm_(out, dst, stride, tx_type_, bit_depth_);
- }
-
- FhtFunc fwd_txfm_;
- IhighbdHtFunc inv_txfm_;
+ private:
+ HBDFhtFunc fwd_txfm_;
+ HBDFhtFunc fwd_txfm_ref_;
+ int tx_type_;
+ int bit_depth_;
+ int mask_;
+ int num_coeffs_;
+ int16_t *input_;
+ int32_t *output_;
+ int32_t *output_ref_;
};
+void VP10HighbdTrans4x4HT::RunBitexactCheck() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int i, j;
+ const int stride = 4;
+ const int num_tests = 200000;
+ const int num_coeffs = 16;
+
+ for (i = 0; i < num_tests; ++i) {
+ for (j = 0; j < num_coeffs; ++j) {
+ input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+ }
+
+ fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
+ fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_);
+
+ for (j = 0; j < num_coeffs; ++j) {
+ EXPECT_EQ(output_[j], output_ref_[j])
+ << "Not bit-exact result at index: " << j
+ << " at test block: " << i;
+ }
+ }
+}
+
TEST_P(VP10HighbdTrans4x4HT, HighbdCoeffCheck) {
- RunCoeffCheck();
+ RunBitexactCheck();
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-#define SPEED_TEST (0)
-#if SPEED_TEST
-#if CONFIG_EXT_TX
-TEST(VP10Trans4x4HTSpeedTest, C_version) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int count_test_block = 200000;
- int bit_depth = 8;
- int mask = (1 << bit_depth) - 1;
- const int num_coeffs = 16;
- int16_t *input = new int16_t[num_coeffs];
- tran_low_t *output = new tran_low_t[num_coeffs];
- const int stride = 4;
- int tx_type;
-
- for (int j = 0; j < num_coeffs; ++j) {
- input[j] = (rnd.Rand8() & mask) - (rnd.Rand8() & mask);
- }
- for (int i = 0; i < count_test_block; ++i) {
- for (tx_type = V_DCT; tx_type <= H_FLIPADST; ++tx_type) {
- vp10_fht4x4_c(input, output, stride, tx_type);
- }
- }
-
- delete[] input;
- delete[] output;
-}
-#endif // CONFIG_EXT_TX
-
-#if HAVE_SSE2 && CONFIG_EXT_TX
-TEST(VP10Trans4x4HTSpeedTest, SSE2_version) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int count_test_block = 200000;
- int bit_depth = 8;
- int mask = (1 << bit_depth) - 1;
- const int num_coeffs = 16;
- int16_t *input = new int16_t[num_coeffs];
- tran_low_t *output = reinterpret_cast<tran_low_t *>(
- vpx_memalign(16, num_coeffs * sizeof(tran_low_t)));
- const int stride = 4;
- int tx_type;
-
- for (int j = 0; j < num_coeffs; ++j) {
- input[j] = (rnd.Rand8() & mask) - (rnd.Rand8() & mask);
- }
- for (int i = 0; i < count_test_block; ++i) {
- for (tx_type = V_DCT; tx_type <= H_FLIPADST; ++tx_type) {
- vp10_fht4x4_sse2(input, output, stride, tx_type);
- }
- }
-
- delete[] input;
- vpx_free(output);
-}
-#endif // HAVE_SSE2 && CONFIG_EXT_TX
-#endif // SPEED_TEST
-
using std::tr1::make_tuple;
#if HAVE_SSE2
@@ -229,83 +208,23 @@
SSE4_1, VP10HighbdTrans4x4HT,
::testing::Values(
#if !CONFIG_EXT_TX
- // make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0,
- // VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3,
- VPX_BITS_10, 16),
- // make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0,
- // VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3,
- VPX_BITS_12, 16)));
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 0, 10),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 0, 12),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 1, 10),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 1, 12),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 2, 10),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 2, 12),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 3, 10),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 3, 12)));
#else
- // make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0,
- // VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 4,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 5,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 6,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 7,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 8,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 10,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 11,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 12,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 13,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 14,
- VPX_BITS_10, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 15,
- VPX_BITS_10, 16),
- // make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0,
- // VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 4,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 5,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 6,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 7,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 8,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 10,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 11,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 12,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 13,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 14,
- VPX_BITS_12, 16),
- make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 15,
- VPX_BITS_12, 16)));
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 0, 10),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 0, 12),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 1, 10),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 1, 12),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 2, 10),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 2, 12),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 3, 10),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 3, 12)));
#endif // !CONFIG_EXT_TX
#endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp10/common/ans.h b/vp10/common/ans.h
index 32babee..24d7c09 100644
--- a/vp10/common/ans.h
+++ b/vp10/common/ans.h
@@ -58,7 +58,12 @@
typedef uint8_t AnsP8;
#define ans_p8_precision 256u
#define ans_p8_shift 8
-#define l_base (ans_p8_precision * 4) // l_base % precision must be 0
+typedef uint16_t AnsP10;
+#define ans_p10_precision 1024u
+
+#define rans_precision ans_p10_precision
+
+#define l_base (ans_p10_precision * 4) // l_base % precision must be 0
#define io_base 256
// Range I = { l_base, l_base + 1, ..., l_base * io_base - 1 }
@@ -75,14 +80,17 @@
assert(ans->state < l_base * io_base);
state = ans->state - l_base;
if (state < (1 << 6)) {
- ans->buf[ans->buf_offset] = (0 << 6) + state;
+ ans->buf[ans->buf_offset] = (0x00 << 6) + state;
return ans->buf_offset + 1;
} else if (state < (1 << 14)) {
- mem_put_le16(ans->buf + ans->buf_offset, (1 << 14) + state);
+ mem_put_le16(ans->buf + ans->buf_offset, (0x01 << 14) + state);
return ans->buf_offset + 2;
- } else {
- mem_put_le24(ans->buf + ans->buf_offset, (1 << 23) + state);
+ } else if (state < (1 << 22)) {
+ mem_put_le24(ans->buf + ans->buf_offset, (0x02 << 22) + state);
return ans->buf_offset + 3;
+ } else {
+ assert(0 && "State is too large to be serialized");
+ return ans->buf_offset;
}
}
@@ -189,7 +197,7 @@
static INLINE void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) {
AnsP8 p = ans_p8_precision - p0;
const unsigned l_s = val ? p : p0;
- if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+ while (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
ans->buf[ans->buf_offset++] = ans->state % io_base;
ans->state /= io_base;
}
@@ -205,7 +213,7 @@
// unsigned int xp1;
unsigned xp, sp;
unsigned state = ans->state;
- if (state < l_base && ans->buf_offset > 0) {
+ while (state < l_base && ans->buf_offset > 0) {
state = state * io_base + ans->buf[--ans->buf_offset];
}
sp = state * p;
@@ -223,7 +231,7 @@
static INLINE int uabs_read_bit(struct AnsDecoder *ans) {
int s;
unsigned state = ans->state;
- if (state < l_base && ans->buf_offset > 0) {
+ while (state < l_base && ans->buf_offset > 0) {
state = state * io_base + ans->buf[--ans->buf_offset];
}
s = (int)(state & 1);
@@ -256,31 +264,31 @@
}
struct rans_sym {
- AnsP8 prob;
- AnsP8 cum_prob; // not-inclusive
+ AnsP10 prob;
+ AnsP10 cum_prob; // not-inclusive
};
struct rans_dec_sym {
uint8_t val;
- AnsP8 prob;
- AnsP8 cum_prob; // not-inclusive
+ AnsP10 prob;
+ AnsP10 cum_prob; // not-inclusive
};
// This is now just a boring cdf. It starts with an explicit zero.
// TODO(aconverse): Remove starting zero.
typedef uint16_t rans_dec_lut[16];
-static INLINE void rans_build_cdf_from_pdf(const AnsP8 token_probs[],
+static INLINE void rans_build_cdf_from_pdf(const AnsP10 token_probs[],
rans_dec_lut cdf_tab) {
int i;
cdf_tab[0] = 0;
- for (i = 1; cdf_tab[i - 1] < ans_p8_precision; ++i) {
+ for (i = 1; cdf_tab[i - 1] < rans_precision; ++i) {
cdf_tab[i] = cdf_tab[i - 1] + token_probs[i - 1];
}
- assert(cdf_tab[i - 1] == ans_p8_precision);
+ assert(cdf_tab[i - 1] == rans_precision);
}
-static INLINE int ans_find_largest(const AnsP8 *const pdf_tab,
+static INLINE int ans_find_largest(const AnsP10 *const pdf_tab,
int num_syms) {
int largest_idx = -1;
int largest_p = -1;
@@ -295,22 +303,22 @@
return largest_idx;
}
-static INLINE void rans_merge_prob_pdf(AnsP8 *const out_pdf,
- const AnsP8 node_prob,
- const AnsP8 *const src_pdf,
- int in_syms) {
+static INLINE void rans_merge_prob8_pdf(AnsP10 *const out_pdf,
+ const AnsP8 node_prob,
+ const AnsP10 *const src_pdf,
+ int in_syms) {
int i;
- int adjustment = ans_p8_precision;
+ int adjustment = rans_precision;
const int round_fact = ans_p8_precision >> 1;
const AnsP8 p1 = ans_p8_precision - node_prob;
const int out_syms = in_syms + 1;
assert(src_pdf != out_pdf);
- out_pdf[0] = node_prob;
- adjustment -= node_prob;
+ out_pdf[0] = node_prob << (10 - 8);
+ adjustment -= out_pdf[0];
for (i = 0; i < in_syms; ++i) {
int p = (p1 * src_pdf[i] + round_fact) >> ans_p8_shift;
- p = VPXMIN(p, (int)ans_p8_precision - in_syms);
+ p = VPXMIN(p, (int)rans_precision - in_syms);
p = VPXMAX(p, 1);
out_pdf[i + 1] = p;
adjustment -= p;
@@ -332,20 +340,20 @@
// rANS with normalization
// sym->prob takes the place of l_s from the paper
-// ans_p8_precision is m
+// ans_p10_precision is m
static INLINE void rans_write(struct AnsCoder *ans,
const struct rans_sym *const sym) {
- const AnsP8 p = sym->prob;
- if (ans->state >= l_base / ans_p8_precision * io_base * p) {
+ const AnsP10 p = sym->prob;
+ while (ans->state >= l_base / rans_precision * io_base * p) {
ans->buf[ans->buf_offset++] = ans->state % io_base;
ans->state /= io_base;
}
ans->state =
- (ans->state / p) * ans_p8_precision + ans->state % p + sym->cum_prob;
+ (ans->state / p) * rans_precision + ans->state % p + sym->cum_prob;
}
static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_dec_lut cdf,
- AnsP8 rem) {
+ AnsP10 rem) {
int i = 0;
// TODO(skal): if critical, could be a binary search.
// Or, better, an O(1) alias-table.
@@ -353,8 +361,8 @@
++i;
}
out->val = i - 1;
- out->prob = (AnsP8)(cdf[i] - cdf[i - 1]);
- out->cum_prob = (AnsP8)cdf[i - 1];
+ out->prob = (AnsP10)(cdf[i] - cdf[i - 1]);
+ out->cum_prob = (AnsP10)cdf[i - 1];
}
static INLINE int rans_read(struct AnsDecoder *ans,
@@ -362,11 +370,11 @@
unsigned rem;
unsigned quo;
struct rans_dec_sym sym;
- if (ans->state < l_base && ans->buf_offset > 0) {
+ while (ans->state < l_base && ans->buf_offset > 0) {
ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
}
- quo = ans->state / ans_p8_precision;
- rem = ans->state % ans_p8_precision;
+ quo = ans->state / rans_precision;
+ rem = ans->state % rans_precision;
fetch_sym(&sym, tab, rem);
ans->state = quo * sym.prob + rem - sym.cum_prob;
return sym.val;
diff --git a/vp10/common/entropy.c b/vp10/common/entropy.c
index d17b854..20594df 100644
--- a/vp10/common/entropy.c
+++ b/vp10/common/entropy.c
@@ -417,263 +417,263 @@
// beta = 8
// Values for tokens ONE_TOKEN through CATEGORY6_TOKEN included here.
// ZERO_TOKEN and EOB_TOKEN are coded as flags outside this coder.
-const vpx_prob vp10_pareto8_token_probs[COEFF_PROB_MODELS]
- [ENTROPY_TOKENS - 2] = {
- {1, 1, 1, 1, 2, 4, 8, 14, 26, 198},
- {2, 2, 2, 2, 4, 7, 14, 26, 42, 155},
- {3, 3, 3, 3, 6, 11, 20, 34, 51, 122},
- {4, 4, 4, 4, 7, 14, 25, 41, 56, 97},
- {5, 5, 5, 5, 9, 17, 30, 46, 58, 76},
- {6, 6, 6, 5, 11, 20, 34, 50, 57, 61},
- {7, 7, 7, 6, 12, 22, 37, 53, 56, 49},
- {8, 8, 7, 7, 14, 25, 40, 54, 53, 40},
- {9, 9, 8, 8, 15, 27, 43, 55, 50, 32},
- {10, 10, 9, 9, 16, 29, 45, 55, 47, 26},
- {11, 10, 10, 10, 18, 31, 47, 55, 43, 21},
- {12, 11, 11, 10, 19, 32, 48, 55, 40, 18},
- {13, 12, 12, 11, 20, 34, 49, 54, 37, 14},
- {14, 13, 12, 12, 21, 35, 50, 53, 34, 12},
- {15, 14, 13, 12, 22, 37, 51, 51, 31, 10},
- {16, 15, 14, 13, 23, 38, 51, 50, 28, 8},
- {17, 16, 15, 13, 24, 39, 51, 48, 26, 7},
- {18, 17, 15, 14, 25, 40, 52, 46, 23, 6},
- {19, 17, 16, 15, 26, 41, 51, 45, 21, 5},
- {20, 18, 17, 15, 27, 42, 51, 43, 19, 4},
- {21, 19, 17, 16, 28, 42, 51, 41, 18, 3},
- {22, 20, 18, 16, 28, 43, 51, 39, 16, 3},
- {23, 21, 19, 17, 29, 43, 50, 37, 14, 3},
- {24, 22, 19, 17, 30, 44, 49, 36, 13, 2},
- {25, 22, 20, 18, 30, 44, 49, 34, 12, 2},
- {26, 23, 20, 18, 31, 44, 48, 33, 11, 2},
- {27, 24, 21, 19, 31, 45, 47, 31, 10, 1},
- {28, 25, 22, 19, 32, 45, 46, 29, 9, 1},
- {29, 25, 22, 20, 32, 45, 46, 28, 8, 1},
- {30, 26, 23, 20, 33, 45, 45, 26, 7, 1},
- {31, 27, 23, 20, 33, 45, 44, 25, 7, 1},
- {32, 27, 24, 21, 33, 45, 43, 24, 6, 1},
- {33, 28, 24, 21, 34, 44, 42, 23, 6, 1},
- {34, 29, 25, 21, 34, 44, 41, 22, 5, 1},
- {35, 30, 25, 22, 34, 44, 40, 20, 5, 1},
- {36, 30, 26, 22, 35, 44, 39, 19, 4, 1},
- {37, 31, 26, 22, 35, 44, 38, 18, 4, 1},
- {38, 32, 27, 22, 35, 43, 37, 17, 4, 1},
- {39, 33, 27, 23, 35, 43, 36, 16, 3, 1},
- {40, 33, 27, 23, 35, 43, 35, 16, 3, 1},
- {41, 34, 28, 23, 35, 42, 34, 15, 3, 1},
- {42, 35, 28, 23, 36, 42, 33, 14, 2, 1},
- {43, 35, 29, 24, 35, 42, 32, 13, 2, 1},
- {44, 36, 29, 24, 36, 41, 31, 12, 2, 1},
- {45, 36, 29, 24, 36, 41, 30, 12, 2, 1},
- {46, 37, 30, 24, 35, 40, 30, 11, 2, 1},
- {47, 37, 30, 24, 36, 40, 29, 10, 2, 1},
- {48, 38, 30, 24, 36, 40, 28, 10, 1, 1},
- {49, 39, 31, 24, 36, 39, 27, 9, 1, 1},
- {50, 39, 31, 25, 35, 39, 26, 9, 1, 1},
- {51, 40, 31, 25, 36, 38, 25, 8, 1, 1},
- {52, 40, 31, 25, 35, 38, 25, 8, 1, 1},
- {53, 41, 32, 25, 35, 37, 24, 7, 1, 1},
- {54, 41, 32, 25, 35, 37, 23, 7, 1, 1},
- {55, 42, 32, 25, 35, 36, 22, 7, 1, 1},
- {56, 42, 33, 25, 35, 35, 22, 6, 1, 1},
- {57, 43, 33, 25, 34, 35, 21, 6, 1, 1},
- {58, 43, 33, 25, 35, 34, 20, 6, 1, 1},
- {59, 44, 33, 25, 34, 34, 20, 5, 1, 1},
- {60, 45, 33, 25, 34, 33, 19, 5, 1, 1},
- {61, 45, 33, 25, 34, 33, 18, 5, 1, 1},
- {62, 45, 34, 25, 34, 32, 18, 4, 1, 1},
- {63, 46, 34, 25, 33, 32, 17, 4, 1, 1},
- {64, 46, 34, 25, 33, 31, 17, 4, 1, 1},
- {65, 47, 34, 25, 33, 30, 16, 4, 1, 1},
- {66, 47, 34, 25, 33, 30, 15, 4, 1, 1},
- {67, 48, 34, 25, 33, 29, 15, 3, 1, 1},
- {68, 48, 35, 25, 32, 29, 14, 3, 1, 1},
- {69, 48, 35, 25, 32, 28, 14, 3, 1, 1},
- {70, 49, 35, 25, 32, 27, 13, 3, 1, 1},
- {71, 49, 35, 25, 31, 27, 13, 3, 1, 1},
- {72, 49, 35, 25, 31, 27, 12, 3, 1, 1},
- {73, 50, 35, 25, 31, 26, 12, 2, 1, 1},
- {74, 50, 35, 25, 31, 25, 12, 2, 1, 1},
- {75, 51, 35, 25, 30, 25, 11, 2, 1, 1},
- {76, 51, 35, 25, 30, 24, 11, 2, 1, 1},
- {77, 51, 35, 25, 30, 24, 10, 2, 1, 1},
- {78, 52, 35, 24, 29, 24, 10, 2, 1, 1},
- {79, 52, 35, 24, 29, 23, 10, 2, 1, 1},
- {80, 52, 35, 24, 29, 23, 9, 2, 1, 1},
- {81, 53, 35, 24, 28, 22, 9, 2, 1, 1},
- {82, 53, 35, 24, 28, 22, 9, 1, 1, 1},
- {83, 54, 35, 24, 28, 21, 8, 1, 1, 1},
- {84, 54, 35, 24, 27, 21, 8, 1, 1, 1},
- {85, 54, 35, 24, 27, 20, 8, 1, 1, 1},
- {86, 54, 35, 24, 27, 20, 7, 1, 1, 1},
- {87, 55, 35, 23, 27, 19, 7, 1, 1, 1},
- {88, 55, 35, 23, 26, 19, 7, 1, 1, 1},
- {89, 55, 35, 23, 26, 18, 7, 1, 1, 1},
- {90, 55, 35, 23, 26, 18, 6, 1, 1, 1},
- {91, 56, 35, 23, 25, 17, 6, 1, 1, 1},
- {92, 56, 35, 22, 25, 17, 6, 1, 1, 1},
- {93, 56, 35, 22, 24, 17, 6, 1, 1, 1},
- {94, 57, 35, 22, 24, 16, 5, 1, 1, 1},
- {95, 56, 35, 22, 24, 16, 5, 1, 1, 1},
- {96, 57, 35, 22, 23, 15, 5, 1, 1, 1},
- {97, 56, 35, 22, 23, 15, 5, 1, 1, 1},
- {98, 57, 34, 21, 23, 15, 5, 1, 1, 1},
- {99, 57, 35, 21, 23, 14, 4, 1, 1, 1},
- {100, 58, 34, 21, 22, 14, 4, 1, 1, 1},
- {101, 57, 34, 21, 22, 14, 4, 1, 1, 1},
- {102, 58, 34, 21, 21, 13, 4, 1, 1, 1},
- {103, 57, 34, 21, 21, 13, 4, 1, 1, 1},
- {104, 57, 34, 20, 21, 13, 4, 1, 1, 1},
- {105, 58, 34, 20, 20, 12, 4, 1, 1, 1},
- {106, 58, 34, 20, 20, 12, 3, 1, 1, 1},
- {107, 58, 33, 20, 20, 12, 3, 1, 1, 1},
- {108, 59, 33, 20, 19, 11, 3, 1, 1, 1},
- {109, 59, 33, 19, 19, 11, 3, 1, 1, 1},
- {110, 58, 33, 19, 19, 11, 3, 1, 1, 1},
- {111, 59, 33, 19, 18, 10, 3, 1, 1, 1},
- {112, 58, 33, 19, 18, 10, 3, 1, 1, 1},
- {113, 58, 32, 19, 18, 10, 3, 1, 1, 1},
- {114, 59, 32, 18, 18, 10, 2, 1, 1, 1},
- {115, 60, 32, 18, 17, 9, 2, 1, 1, 1},
- {116, 59, 32, 18, 17, 9, 2, 1, 1, 1},
- {117, 59, 32, 18, 16, 9, 2, 1, 1, 1},
- {118, 59, 31, 18, 16, 9, 2, 1, 1, 1},
- {119, 59, 32, 17, 16, 8, 2, 1, 1, 1},
- {120, 59, 31, 17, 16, 8, 2, 1, 1, 1},
- {121, 59, 31, 17, 15, 8, 2, 1, 1, 1},
- {122, 59, 30, 17, 15, 8, 2, 1, 1, 1},
- {123, 59, 30, 17, 15, 7, 2, 1, 1, 1},
- {124, 59, 30, 16, 15, 7, 2, 1, 1, 1},
- {125, 59, 30, 16, 14, 7, 2, 1, 1, 1},
- {126, 59, 30, 16, 14, 7, 1, 1, 1, 1},
- {127, 59, 30, 16, 14, 6, 1, 1, 1, 1},
- {128, 59, 30, 16, 13, 6, 1, 1, 1, 1},
- {129, 59, 30, 15, 13, 6, 1, 1, 1, 1},
- {130, 59, 29, 15, 13, 6, 1, 1, 1, 1},
- {131, 59, 29, 15, 12, 6, 1, 1, 1, 1},
- {132, 59, 28, 15, 12, 6, 1, 1, 1, 1},
- {133, 59, 28, 15, 12, 5, 1, 1, 1, 1},
- {134, 59, 28, 14, 12, 5, 1, 1, 1, 1},
- {135, 59, 28, 14, 11, 5, 1, 1, 1, 1},
- {136, 58, 28, 14, 11, 5, 1, 1, 1, 1},
- {137, 58, 27, 14, 11, 5, 1, 1, 1, 1},
- {138, 58, 27, 13, 11, 5, 1, 1, 1, 1},
- {139, 58, 27, 13, 11, 4, 1, 1, 1, 1},
- {140, 58, 27, 13, 10, 4, 1, 1, 1, 1},
- {141, 58, 26, 13, 10, 4, 1, 1, 1, 1},
- {142, 57, 26, 13, 10, 4, 1, 1, 1, 1},
- {143, 57, 26, 12, 10, 4, 1, 1, 1, 1},
- {144, 57, 26, 12, 9, 4, 1, 1, 1, 1},
- {145, 57, 25, 12, 9, 4, 1, 1, 1, 1},
- {146, 57, 25, 12, 9, 3, 1, 1, 1, 1},
- {147, 57, 25, 11, 9, 3, 1, 1, 1, 1},
- {148, 57, 25, 11, 8, 3, 1, 1, 1, 1},
- {149, 57, 24, 11, 8, 3, 1, 1, 1, 1},
- {150, 56, 24, 11, 8, 3, 1, 1, 1, 1},
- {151, 56, 23, 11, 8, 3, 1, 1, 1, 1},
- {152, 56, 23, 10, 8, 3, 1, 1, 1, 1},
- {153, 56, 23, 10, 7, 3, 1, 1, 1, 1},
- {154, 55, 23, 10, 7, 3, 1, 1, 1, 1},
- {155, 55, 22, 10, 7, 3, 1, 1, 1, 1},
- {156, 55, 22, 10, 7, 2, 1, 1, 1, 1},
- {157, 54, 22, 10, 7, 2, 1, 1, 1, 1},
- {158, 54, 22, 9, 7, 2, 1, 1, 1, 1},
- {159, 55, 21, 9, 6, 2, 1, 1, 1, 1},
- {160, 54, 21, 9, 6, 2, 1, 1, 1, 1},
- {161, 53, 21, 9, 6, 2, 1, 1, 1, 1},
- {162, 53, 20, 9, 6, 2, 1, 1, 1, 1},
- {163, 53, 20, 8, 6, 2, 1, 1, 1, 1},
- {164, 53, 20, 8, 5, 2, 1, 1, 1, 1},
- {165, 52, 20, 8, 5, 2, 1, 1, 1, 1},
- {166, 52, 19, 8, 5, 2, 1, 1, 1, 1},
- {167, 51, 19, 8, 5, 2, 1, 1, 1, 1},
- {168, 51, 19, 7, 5, 2, 1, 1, 1, 1},
- {169, 51, 19, 7, 5, 1, 1, 1, 1, 1},
- {170, 51, 18, 7, 5, 1, 1, 1, 1, 1},
- {171, 51, 18, 7, 4, 1, 1, 1, 1, 1},
- {172, 50, 18, 7, 4, 1, 1, 1, 1, 1},
- {173, 50, 17, 7, 4, 1, 1, 1, 1, 1},
- {174, 49, 17, 7, 4, 1, 1, 1, 1, 1},
- {175, 49, 17, 6, 4, 1, 1, 1, 1, 1},
- {176, 49, 16, 6, 4, 1, 1, 1, 1, 1},
- {177, 48, 16, 6, 4, 1, 1, 1, 1, 1},
- {178, 47, 16, 6, 4, 1, 1, 1, 1, 1},
- {179, 47, 16, 6, 3, 1, 1, 1, 1, 1},
- {180, 47, 15, 6, 3, 1, 1, 1, 1, 1},
- {181, 47, 15, 5, 3, 1, 1, 1, 1, 1},
- {182, 46, 15, 5, 3, 1, 1, 1, 1, 1},
- {183, 46, 14, 5, 3, 1, 1, 1, 1, 1},
- {184, 45, 14, 5, 3, 1, 1, 1, 1, 1},
- {185, 44, 14, 5, 3, 1, 1, 1, 1, 1},
- {186, 44, 13, 5, 3, 1, 1, 1, 1, 1},
- {187, 43, 13, 5, 3, 1, 1, 1, 1, 1},
- {188, 44, 13, 4, 2, 1, 1, 1, 1, 1},
- {189, 43, 13, 4, 2, 1, 1, 1, 1, 1},
- {190, 43, 12, 4, 2, 1, 1, 1, 1, 1},
- {191, 42, 12, 4, 2, 1, 1, 1, 1, 1},
- {192, 41, 12, 4, 2, 1, 1, 1, 1, 1},
- {193, 41, 11, 4, 2, 1, 1, 1, 1, 1},
- {194, 40, 11, 4, 2, 1, 1, 1, 1, 1},
- {195, 39, 11, 4, 2, 1, 1, 1, 1, 1},
- {196, 39, 11, 3, 2, 1, 1, 1, 1, 1},
- {197, 39, 10, 3, 2, 1, 1, 1, 1, 1},
- {198, 38, 10, 3, 2, 1, 1, 1, 1, 1},
- {199, 37, 10, 3, 2, 1, 1, 1, 1, 1},
- {200, 37, 10, 3, 1, 1, 1, 1, 1, 1},
- {201, 37, 9, 3, 1, 1, 1, 1, 1, 1},
- {202, 36, 9, 3, 1, 1, 1, 1, 1, 1},
- {203, 35, 9, 3, 1, 1, 1, 1, 1, 1},
- {204, 35, 8, 3, 1, 1, 1, 1, 1, 1},
- {205, 35, 8, 2, 1, 1, 1, 1, 1, 1},
- {206, 34, 8, 2, 1, 1, 1, 1, 1, 1},
- {207, 33, 8, 2, 1, 1, 1, 1, 1, 1},
- {208, 32, 8, 2, 1, 1, 1, 1, 1, 1},
- {209, 32, 7, 2, 1, 1, 1, 1, 1, 1},
- {210, 31, 7, 2, 1, 1, 1, 1, 1, 1},
- {211, 30, 7, 2, 1, 1, 1, 1, 1, 1},
- {212, 30, 6, 2, 1, 1, 1, 1, 1, 1},
- {213, 29, 6, 2, 1, 1, 1, 1, 1, 1},
- {214, 28, 6, 2, 1, 1, 1, 1, 1, 1},
- {215, 27, 6, 2, 1, 1, 1, 1, 1, 1},
- {216, 27, 6, 1, 1, 1, 1, 1, 1, 1},
- {217, 27, 5, 1, 1, 1, 1, 1, 1, 1},
- {218, 26, 5, 1, 1, 1, 1, 1, 1, 1},
- {219, 25, 5, 1, 1, 1, 1, 1, 1, 1},
- {220, 24, 5, 1, 1, 1, 1, 1, 1, 1},
- {221, 24, 4, 1, 1, 1, 1, 1, 1, 1},
- {222, 23, 4, 1, 1, 1, 1, 1, 1, 1},
- {223, 22, 4, 1, 1, 1, 1, 1, 1, 1},
- {224, 21, 4, 1, 1, 1, 1, 1, 1, 1},
- {225, 20, 4, 1, 1, 1, 1, 1, 1, 1},
- {226, 20, 3, 1, 1, 1, 1, 1, 1, 1},
- {227, 19, 3, 1, 1, 1, 1, 1, 1, 1},
- {228, 18, 3, 1, 1, 1, 1, 1, 1, 1},
- {229, 17, 3, 1, 1, 1, 1, 1, 1, 1},
- {230, 16, 3, 1, 1, 1, 1, 1, 1, 1},
- {231, 16, 2, 1, 1, 1, 1, 1, 1, 1},
- {232, 15, 2, 1, 1, 1, 1, 1, 1, 1},
- {233, 14, 2, 1, 1, 1, 1, 1, 1, 1},
- {234, 13, 2, 1, 1, 1, 1, 1, 1, 1},
- {235, 12, 2, 1, 1, 1, 1, 1, 1, 1},
- {236, 11, 2, 1, 1, 1, 1, 1, 1, 1},
- {237, 11, 1, 1, 1, 1, 1, 1, 1, 1},
- {238, 10, 1, 1, 1, 1, 1, 1, 1, 1},
- {239, 9, 1, 1, 1, 1, 1, 1, 1, 1},
- {240, 8, 1, 1, 1, 1, 1, 1, 1, 1},
- {241, 7, 1, 1, 1, 1, 1, 1, 1, 1},
- {242, 6, 1, 1, 1, 1, 1, 1, 1, 1},
- {243, 5, 1, 1, 1, 1, 1, 1, 1, 1},
- {244, 4, 1, 1, 1, 1, 1, 1, 1, 1},
- {245, 3, 1, 1, 1, 1, 1, 1, 1, 1},
- {246, 2, 1, 1, 1, 1, 1, 1, 1, 1},
- {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+const AnsP10 vp10_pareto8_token_probs[COEFF_PROB_MODELS]
+ [ENTROPY_TOKENS - 2] = {
+{ 4, 4, 4, 4, 8, 15, 30, 57, 103, 795 },
+{ 8, 8, 8, 8, 15, 30, 57, 103, 168, 619 },
+{ 12, 12, 12, 12, 23, 43, 80, 138, 205, 487 },
+{ 16, 16, 15, 15, 30, 56, 101, 165, 225, 385 },
+{ 20, 20, 19, 19, 36, 68, 119, 186, 231, 306 },
+{ 24, 23, 23, 22, 43, 79, 135, 201, 230, 244 },
+{ 28, 27, 26, 26, 49, 89, 149, 211, 223, 196 },
+{ 32, 31, 30, 29, 55, 98, 160, 218, 212, 159 },
+{ 36, 35, 33, 32, 60, 107, 171, 221, 200, 129 },
+{ 40, 38, 37, 35, 66, 115, 179, 222, 187, 105 },
+{ 44, 42, 40, 38, 71, 122, 186, 221, 174, 86 },
+{ 48, 45, 43, 41, 76, 129, 192, 219, 160, 71 },
+{ 52, 49, 46, 44, 80, 136, 196, 215, 148, 58 },
+{ 56, 53, 49, 46, 85, 142, 200, 210, 135, 48 },
+{ 60, 56, 52, 49, 89, 147, 203, 204, 124, 40 },
+{ 64, 60, 55, 52, 93, 151, 205, 198, 113, 33 },
+{ 68, 63, 58, 54, 97, 156, 205, 192, 103, 28 },
+{ 72, 66, 61, 57, 100, 160, 206, 185, 94, 23 },
+{ 76, 70, 64, 59, 104, 163, 205, 178, 85, 20 },
+{ 80, 73, 67, 61, 107, 166, 205, 171, 77, 17 },
+{ 84, 76, 69, 63, 110, 169, 204, 164, 71, 14 },
+{ 88, 80, 72, 65, 113, 171, 202, 157, 64, 12 },
+{ 92, 83, 75, 67, 116, 173, 200, 150, 58, 10 },
+{ 96, 86, 77, 69, 118, 175, 198, 143, 53, 9 },
+{ 100, 89, 80, 71, 121, 176, 195, 137, 48, 7 },
+{ 104, 92, 82, 73, 123, 178, 192, 130, 44, 6 },
+{ 108, 96, 84, 75, 125, 178, 189, 124, 40, 5 },
+{ 112, 98, 87, 76, 127, 179, 186, 118, 36, 5 },
+{ 116, 101, 89, 78, 129, 179, 183, 112, 33, 4 },
+{ 120, 104, 91, 80, 131, 180, 179, 106, 30, 3 },
+{ 124, 107, 93, 81, 132, 180, 176, 101, 27, 3 },
+{ 128, 110, 95, 82, 134, 179, 172, 96, 25, 3 },
+{ 132, 113, 97, 84, 135, 179, 168, 91, 23, 2 },
+{ 136, 116, 99, 85, 136, 179, 164, 86, 21, 2 },
+{ 140, 119, 101, 86, 137, 178, 160, 82, 19, 2 },
+{ 144, 122, 103, 88, 138, 177, 157, 77, 17, 1 },
+{ 148, 124, 105, 89, 139, 176, 153, 73, 16, 1 },
+{ 152, 127, 107, 90, 140, 175, 149, 69, 14, 1 },
+{ 156, 130, 108, 91, 141, 173, 145, 66, 13, 1 },
+{ 160, 133, 110, 92, 141, 172, 141, 62, 12, 1 },
+{ 164, 135, 111, 93, 142, 171, 137, 59, 11, 1 },
+{ 168, 138, 113, 94, 142, 169, 133, 56, 10, 1 },
+{ 172, 140, 115, 94, 142, 168, 130, 53, 9, 1 },
+{ 176, 143, 116, 95, 143, 166, 126, 50, 8, 1 },
+{ 180, 145, 118, 96, 143, 164, 122, 47, 8, 1 },
+{ 184, 147, 119, 96, 143, 163, 119, 45, 7, 1 },
+{ 188, 150, 120, 97, 143, 161, 116, 42, 6, 1 },
+{ 192, 152, 121, 98, 143, 159, 112, 40, 6, 1 },
+{ 196, 155, 123, 98, 142, 157, 109, 38, 5, 1 },
+{ 200, 157, 124, 99, 142, 155, 105, 36, 5, 1 },
+{ 204, 159, 125, 99, 142, 153, 102, 34, 5, 1 },
+{ 208, 161, 126, 100, 142, 151, 99, 32, 4, 1 },
+{ 212, 164, 127, 100, 141, 149, 96, 30, 4, 1 },
+{ 216, 166, 129, 100, 141, 147, 93, 28, 3, 1 },
+{ 220, 168, 130, 101, 140, 144, 90, 27, 3, 1 },
+{ 224, 170, 131, 101, 140, 142, 87, 25, 3, 1 },
+{ 228, 172, 132, 101, 139, 140, 84, 24, 3, 1 },
+{ 232, 174, 132, 101, 139, 138, 81, 23, 3, 1 },
+{ 236, 176, 133, 101, 138, 136, 79, 22, 2, 1 },
+{ 240, 178, 134, 102, 137, 134, 76, 20, 2, 1 },
+{ 244, 180, 135, 102, 136, 131, 74, 19, 2, 1 },
+{ 248, 182, 135, 102, 136, 129, 71, 18, 2, 1 },
+{ 252, 184, 136, 101, 135, 127, 69, 17, 2, 1 },
+{ 256, 186, 137, 102, 134, 124, 66, 16, 2, 1 },
+{ 260, 188, 138, 102, 133, 122, 64, 15, 1, 1 },
+{ 264, 190, 138, 101, 132, 120, 62, 15, 1, 1 },
+{ 268, 191, 139, 101, 131, 118, 60, 14, 1, 1 },
+{ 272, 193, 139, 101, 130, 116, 58, 13, 1, 1 },
+{ 276, 195, 139, 101, 129, 114, 56, 12, 1, 1 },
+{ 280, 196, 140, 101, 128, 111, 54, 12, 1, 1 },
+{ 284, 198, 140, 101, 127, 109, 52, 11, 1, 1 },
+{ 288, 200, 141, 100, 126, 107, 50, 10, 1, 1 },
+{ 292, 201, 141, 100, 125, 105, 48, 10, 1, 1 },
+{ 296, 203, 141, 100, 123, 103, 47, 9, 1, 1 },
+{ 300, 204, 142, 99, 122, 101, 45, 9, 1, 1 },
+{ 304, 206, 142, 99, 121, 99, 43, 8, 1, 1 },
+{ 308, 207, 142, 99, 119, 97, 42, 8, 1, 1 },
+{ 312, 209, 142, 99, 118, 95, 40, 7, 1, 1 },
+{ 316, 210, 142, 98, 117, 93, 39, 7, 1, 1 },
+{ 320, 211, 142, 98, 116, 91, 37, 7, 1, 1 },
+{ 324, 213, 142, 97, 115, 89, 36, 6, 1, 1 },
+{ 328, 214, 142, 97, 113, 87, 35, 6, 1, 1 },
+{ 332, 215, 143, 96, 112, 85, 33, 6, 1, 1 },
+{ 336, 216, 143, 96, 111, 83, 32, 5, 1, 1 },
+{ 340, 218, 143, 95, 109, 81, 31, 5, 1, 1 },
+{ 344, 219, 142, 95, 108, 79, 30, 5, 1, 1 },
+{ 348, 220, 142, 94, 107, 78, 29, 4, 1, 1 },
+{ 352, 221, 142, 94, 105, 76, 28, 4, 1, 1 },
+{ 356, 222, 142, 93, 104, 74, 27, 4, 1, 1 },
+{ 360, 223, 142, 92, 103, 72, 26, 4, 1, 1 },
+{ 364, 224, 142, 92, 101, 70, 25, 4, 1, 1 },
+{ 368, 225, 142, 91, 100, 69, 24, 3, 1, 1 },
+{ 372, 226, 141, 91, 99, 67, 23, 3, 1, 1 },
+{ 376, 227, 141, 90, 97, 66, 22, 3, 1, 1 },
+{ 380, 228, 141, 89, 96, 64, 21, 3, 1, 1 },
+{ 384, 229, 140, 89, 95, 62, 20, 3, 1, 1 },
+{ 388, 229, 140, 88, 93, 61, 20, 3, 1, 1 },
+{ 392, 230, 140, 87, 92, 60, 19, 2, 1, 1 },
+{ 396, 231, 140, 86, 91, 58, 18, 2, 1, 1 },
+{ 400, 232, 139, 86, 89, 57, 17, 2, 1, 1 },
+{ 404, 232, 139, 85, 88, 55, 17, 2, 1, 1 },
+{ 408, 233, 138, 84, 87, 54, 16, 2, 1, 1 },
+{ 412, 234, 138, 84, 85, 52, 15, 2, 1, 1 },
+{ 416, 234, 137, 83, 84, 51, 15, 2, 1, 1 },
+{ 420, 235, 137, 82, 82, 50, 14, 2, 1, 1 },
+{ 424, 236, 136, 81, 81, 48, 14, 2, 1, 1 },
+{ 428, 236, 136, 81, 80, 47, 13, 1, 1, 1 },
+{ 432, 236, 135, 80, 79, 46, 13, 1, 1, 1 },
+{ 436, 237, 135, 79, 77, 45, 12, 1, 1, 1 },
+{ 440, 238, 134, 78, 76, 43, 12, 1, 1, 1 },
+{ 444, 238, 134, 77, 75, 42, 11, 1, 1, 1 },
+{ 448, 238, 133, 77, 73, 41, 11, 1, 1, 1 },
+{ 452, 239, 132, 76, 72, 40, 10, 1, 1, 1 },
+{ 456, 239, 131, 75, 71, 39, 10, 1, 1, 1 },
+{ 460, 239, 131, 74, 70, 38, 9, 1, 1, 1 },
+{ 464, 240, 130, 73, 68, 37, 9, 1, 1, 1 },
+{ 468, 240, 129, 72, 67, 36, 9, 1, 1, 1 },
+{ 472, 240, 128, 72, 66, 35, 8, 1, 1, 1 },
+{ 476, 240, 127, 71, 65, 34, 8, 1, 1, 1 },
+{ 480, 240, 127, 70, 63, 33, 8, 1, 1, 1 },
+{ 484, 241, 126, 69, 62, 32, 7, 1, 1, 1 },
+{ 488, 241, 125, 68, 61, 31, 7, 1, 1, 1 },
+{ 492, 241, 124, 67, 60, 30, 7, 1, 1, 1 },
+{ 496, 241, 124, 66, 59, 29, 6, 1, 1, 1 },
+{ 500, 240, 123, 66, 58, 28, 6, 1, 1, 1 },
+{ 504, 240, 122, 65, 57, 27, 6, 1, 1, 1 },
+{ 508, 240, 121, 64, 55, 27, 6, 1, 1, 1 },
+{ 512, 241, 120, 63, 54, 26, 5, 1, 1, 1 },
+{ 516, 241, 119, 62, 53, 25, 5, 1, 1, 1 },
+{ 520, 240, 118, 62, 52, 24, 5, 1, 1, 1 },
+{ 524, 240, 117, 60, 51, 24, 5, 1, 1, 1 },
+{ 528, 239, 116, 60, 50, 23, 5, 1, 1, 1 },
+{ 532, 239, 116, 59, 49, 22, 4, 1, 1, 1 },
+{ 536, 239, 115, 58, 48, 21, 4, 1, 1, 1 },
+{ 540, 239, 113, 57, 47, 21, 4, 1, 1, 1 },
+{ 544, 238, 113, 56, 46, 20, 4, 1, 1, 1 },
+{ 548, 238, 112, 55, 45, 19, 4, 1, 1, 1 },
+{ 552, 238, 110, 55, 44, 19, 3, 1, 1, 1 },
+{ 556, 237, 110, 54, 43, 18, 3, 1, 1, 1 },
+{ 560, 237, 108, 53, 42, 18, 3, 1, 1, 1 },
+{ 564, 236, 108, 52, 41, 17, 3, 1, 1, 1 },
+{ 568, 236, 106, 51, 40, 17, 3, 1, 1, 1 },
+{ 572, 235, 105, 51, 39, 16, 3, 1, 1, 1 },
+{ 576, 235, 104, 50, 38, 15, 3, 1, 1, 1 },
+{ 580, 234, 103, 49, 37, 15, 3, 1, 1, 1 },
+{ 584, 234, 102, 48, 37, 14, 2, 1, 1, 1 },
+{ 588, 233, 101, 47, 36, 14, 2, 1, 1, 1 },
+{ 592, 233, 100, 46, 35, 13, 2, 1, 1, 1 },
+{ 596, 231, 99, 46, 34, 13, 2, 1, 1, 1 },
+{ 600, 230, 98, 45, 33, 13, 2, 1, 1, 1 },
+{ 604, 230, 97, 44, 32, 12, 2, 1, 1, 1 },
+{ 608, 229, 96, 43, 31, 12, 2, 1, 1, 1 },
+{ 612, 228, 95, 42, 31, 11, 2, 1, 1, 1 },
+{ 616, 227, 93, 42, 30, 11, 2, 1, 1, 1 },
+{ 620, 227, 92, 41, 29, 10, 2, 1, 1, 1 },
+{ 624, 226, 92, 40, 28, 10, 1, 1, 1, 1 },
+{ 628, 225, 90, 39, 28, 10, 1, 1, 1, 1 },
+{ 632, 224, 89, 39, 27, 9, 1, 1, 1, 1 },
+{ 636, 223, 88, 38, 26, 9, 1, 1, 1, 1 },
+{ 640, 222, 87, 37, 25, 9, 1, 1, 1, 1 },
+{ 644, 221, 86, 36, 25, 8, 1, 1, 1, 1 },
+{ 648, 220, 84, 36, 24, 8, 1, 1, 1, 1 },
+{ 652, 219, 83, 35, 23, 8, 1, 1, 1, 1 },
+{ 656, 218, 82, 34, 23, 7, 1, 1, 1, 1 },
+{ 660, 217, 81, 33, 22, 7, 1, 1, 1, 1 },
+{ 664, 215, 80, 33, 21, 7, 1, 1, 1, 1 },
+{ 668, 214, 78, 32, 21, 7, 1, 1, 1, 1 },
+{ 672, 213, 78, 31, 20, 6, 1, 1, 1, 1 },
+{ 676, 211, 76, 31, 20, 6, 1, 1, 1, 1 },
+{ 680, 210, 75, 30, 19, 6, 1, 1, 1, 1 },
+{ 684, 209, 74, 29, 18, 6, 1, 1, 1, 1 },
+{ 688, 208, 73, 28, 18, 5, 1, 1, 1, 1 },
+{ 692, 206, 72, 28, 17, 5, 1, 1, 1, 1 },
+{ 696, 205, 70, 27, 17, 5, 1, 1, 1, 1 },
+{ 700, 203, 69, 27, 16, 5, 1, 1, 1, 1 },
+{ 704, 201, 68, 26, 16, 5, 1, 1, 1, 1 },
+{ 708, 201, 67, 25, 15, 4, 1, 1, 1, 1 },
+{ 712, 198, 66, 25, 15, 4, 1, 1, 1, 1 },
+{ 716, 197, 65, 24, 14, 4, 1, 1, 1, 1 },
+{ 720, 196, 63, 23, 14, 4, 1, 1, 1, 1 },
+{ 724, 194, 62, 23, 13, 4, 1, 1, 1, 1 },
+{ 728, 193, 61, 22, 13, 3, 1, 1, 1, 1 },
+{ 732, 191, 60, 22, 12, 3, 1, 1, 1, 1 },
+{ 736, 189, 59, 21, 12, 3, 1, 1, 1, 1 },
+{ 740, 188, 58, 20, 11, 3, 1, 1, 1, 1 },
+{ 744, 186, 56, 20, 11, 3, 1, 1, 1, 1 },
+{ 748, 184, 55, 19, 11, 3, 1, 1, 1, 1 },
+{ 752, 182, 54, 19, 10, 3, 1, 1, 1, 1 },
+{ 756, 181, 53, 18, 10, 2, 1, 1, 1, 1 },
+{ 760, 179, 52, 18, 9, 2, 1, 1, 1, 1 },
+{ 764, 177, 51, 17, 9, 2, 1, 1, 1, 1 },
+{ 768, 174, 50, 17, 9, 2, 1, 1, 1, 1 },
+{ 772, 173, 49, 16, 8, 2, 1, 1, 1, 1 },
+{ 776, 171, 47, 16, 8, 2, 1, 1, 1, 1 },
+{ 780, 169, 46, 15, 8, 2, 1, 1, 1, 1 },
+{ 784, 167, 45, 15, 7, 2, 1, 1, 1, 1 },
+{ 788, 165, 44, 14, 7, 2, 1, 1, 1, 1 },
+{ 792, 162, 43, 14, 7, 2, 1, 1, 1, 1 },
+{ 796, 161, 42, 13, 7, 1, 1, 1, 1, 1 },
+{ 800, 159, 41, 13, 6, 1, 1, 1, 1, 1 },
+{ 804, 157, 40, 12, 6, 1, 1, 1, 1, 1 },
+{ 808, 154, 39, 12, 6, 1, 1, 1, 1, 1 },
+{ 812, 153, 38, 11, 5, 1, 1, 1, 1, 1 },
+{ 816, 150, 37, 11, 5, 1, 1, 1, 1, 1 },
+{ 820, 148, 36, 10, 5, 1, 1, 1, 1, 1 },
+{ 824, 145, 35, 10, 5, 1, 1, 1, 1, 1 },
+{ 828, 143, 34, 10, 4, 1, 1, 1, 1, 1 },
+{ 832, 141, 33, 9, 4, 1, 1, 1, 1, 1 },
+{ 836, 138, 32, 9, 4, 1, 1, 1, 1, 1 },
+{ 840, 136, 30, 9, 4, 1, 1, 1, 1, 1 },
+{ 844, 133, 30, 8, 4, 1, 1, 1, 1, 1 },
+{ 848, 131, 29, 8, 3, 1, 1, 1, 1, 1 },
+{ 852, 129, 28, 7, 3, 1, 1, 1, 1, 1 },
+{ 856, 126, 27, 7, 3, 1, 1, 1, 1, 1 },
+{ 860, 123, 26, 7, 3, 1, 1, 1, 1, 1 },
+{ 864, 121, 25, 6, 3, 1, 1, 1, 1, 1 },
+{ 868, 118, 24, 6, 3, 1, 1, 1, 1, 1 },
+{ 872, 116, 23, 6, 2, 1, 1, 1, 1, 1 },
+{ 876, 113, 22, 6, 2, 1, 1, 1, 1, 1 },
+{ 880, 111, 21, 5, 2, 1, 1, 1, 1, 1 },
+{ 884, 108, 20, 5, 2, 1, 1, 1, 1, 1 },
+{ 888, 105, 19, 5, 2, 1, 1, 1, 1, 1 },
+{ 892, 102, 19, 4, 2, 1, 1, 1, 1, 1 },
+{ 896, 99, 18, 4, 2, 1, 1, 1, 1, 1 },
+{ 900, 97, 17, 4, 1, 1, 1, 1, 1, 1 },
+{ 904, 94, 16, 4, 1, 1, 1, 1, 1, 1 },
+{ 908, 92, 15, 3, 1, 1, 1, 1, 1, 1 },
+{ 912, 89, 14, 3, 1, 1, 1, 1, 1, 1 },
+{ 916, 85, 14, 3, 1, 1, 1, 1, 1, 1 },
+{ 920, 82, 13, 3, 1, 1, 1, 1, 1, 1 },
+{ 924, 79, 12, 3, 1, 1, 1, 1, 1, 1 },
+{ 928, 77, 11, 2, 1, 1, 1, 1, 1, 1 },
+{ 932, 73, 11, 2, 1, 1, 1, 1, 1, 1 },
+{ 936, 70, 10, 2, 1, 1, 1, 1, 1, 1 },
+{ 940, 67, 9, 2, 1, 1, 1, 1, 1, 1 },
+{ 944, 64, 8, 2, 1, 1, 1, 1, 1, 1 },
+{ 948, 60, 8, 2, 1, 1, 1, 1, 1, 1 },
+{ 952, 58, 7, 1, 1, 1, 1, 1, 1, 1 },
+{ 956, 54, 7, 1, 1, 1, 1, 1, 1, 1 },
+{ 960, 51, 6, 1, 1, 1, 1, 1, 1, 1 },
+{ 964, 48, 5, 1, 1, 1, 1, 1, 1, 1 },
+{ 968, 44, 5, 1, 1, 1, 1, 1, 1, 1 },
+{ 972, 41, 4, 1, 1, 1, 1, 1, 1, 1 },
+{ 976, 37, 4, 1, 1, 1, 1, 1, 1, 1 },
+{ 980, 34, 3, 1, 1, 1, 1, 1, 1, 1 },
+{ 984, 30, 3, 1, 1, 1, 1, 1, 1, 1 },
+{ 988, 27, 2, 1, 1, 1, 1, 1, 1, 1 },
+{ 992, 23, 2, 1, 1, 1, 1, 1, 1, 1 },
+{ 996, 19, 2, 1, 1, 1, 1, 1, 1, 1 },
+{ 1000, 16, 1, 1, 1, 1, 1, 1, 1, 1 },
+{ 1004, 12, 1, 1, 1, 1, 1, 1, 1, 1 },
+{ 1008, 8, 1, 1, 1, 1, 1, 1, 1, 1 },
+{ 1012, 4, 1, 1, 1, 1, 1, 1, 1, 1 },
+{ 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+{ 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
};
#endif // CONFIG_ANS
@@ -2800,11 +2800,13 @@
#if CONFIG_ANS
void vp10_build_token_cdfs(const vpx_prob *pdf_model, rans_dec_lut cdf) {
- AnsP8 pdf_tab[ENTROPY_TOKENS - 1];
+ AnsP10 pdf_tab[ENTROPY_TOKENS - 1];
assert(pdf_model[2] != 0);
- rans_merge_prob_pdf(pdf_tab, pdf_model[1],
- vp10_pareto8_token_probs[pdf_model[2] - 1],
- ENTROPY_TOKENS - 2);
+ // TODO(aconverse): Investigate making the precision of the zero and EOB tree
+ // nodes 10-bits.
+ rans_merge_prob8_pdf(pdf_tab, pdf_model[1],
+ vp10_pareto8_token_probs[pdf_model[2] - 1],
+ ENTROPY_TOKENS - 2);
rans_build_cdf_from_pdf(pdf_tab, cdf);
}
diff --git a/vp10/common/entropy.h b/vp10/common/entropy.h
index 1e47883..34da525 100644
--- a/vp10/common/entropy.h
+++ b/vp10/common/entropy.h
@@ -176,7 +176,7 @@
extern const vpx_tree_index vp10_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)];
extern const vpx_prob vp10_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
#if CONFIG_ANS
-extern const vpx_prob
+extern const AnsP10
vp10_pareto8_token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2];
typedef rans_dec_lut coeff_cdf_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS];
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c
index 23c131d..b0f100e 100644
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -731,9 +731,11 @@
} else {
const int w = num_8x8_blocks_wide_lookup[block_size];
const int h = num_8x8_blocks_high_lookup[block_size];
- for (i = 0; i < h; i++) {
- memset(&lfm->lfl_y[i][shift_y], filter_level, w);
- }
+ const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
+ const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
+
+ for (i = 0; i < h; i++)
+ memset(&lfm->lfl_y[row + i][col], filter_level, w);
}
// These set 1 in the current block size for the block size edges.
@@ -811,9 +813,11 @@
} else {
const int w = num_8x8_blocks_wide_lookup[block_size];
const int h = num_8x8_blocks_high_lookup[block_size];
- for (i = 0; i < h; i++) {
- memset(&lfm->lfl_y[i][shift_y], filter_level, w);
- }
+ const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
+ const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
+
+ for (i = 0; i < h; i++)
+ memset(&lfm->lfl_y[row + i][col], filter_level, w);
}
*above_y |= above_prediction_mask[block_size] << shift_y;
diff --git a/vp10/common/vp10_fwd_txfm2d.c b/vp10/common/vp10_fwd_txfm2d.c
index 00f8834..ccb820f 100644
--- a/vp10/common/vp10_fwd_txfm2d.c
+++ b/vp10/common/vp10_fwd_txfm2d.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
diff --git a/vp10/common/x86/vp10_fwd_txfm2d_sse4.c b/vp10/common/x86/vp10_fwd_txfm2d_sse4.c
index d884571..1b11087 100644
--- a/vp10/common/x86/vp10_fwd_txfm2d_sse4.c
+++ b/vp10/common/x86/vp10_fwd_txfm2d_sse4.c
@@ -87,15 +87,6 @@
transpose_32(txfm_size, buf_128, out_128);
}
-void vp10_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output,
- const int stride, int tx_type,
- const int bd) {
- int32_t txfm_buf[16];
- const TXFM_2D_CFG* cfg = vp10_get_txfm_4x4_cfg(tx_type);
- (void)bd;
- fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
-}
-
void vp10_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output,
const int stride, int tx_type,
const int bd) {
diff --git a/vp10/encoder/buf_ans.h b/vp10/encoder/buf_ans.h
index c2d315a..11055d9 100644
--- a/vp10/encoder/buf_ans.h
+++ b/vp10/encoder/buf_ans.h
@@ -29,8 +29,8 @@
struct buffered_ans_symbol {
uint8_t method; // one of ANS_METHOD_UABS or ANS_METHOD_RANS
// TODO(aconverse): Should be possible to write this interms of start for ABS
- AnsP8 val_start; // Boolean value for ABS, start in symbol cycle for Rans
- AnsP8 prob; // Probability of this symbol
+ AnsP10 val_start; // Boolean value for ABS, start in symbol cycle for Rans
+ AnsP10 prob; // Probability of this symbol
};
struct BufAnsCoder {
diff --git a/vp10/encoder/cost.c b/vp10/encoder/cost.c
index 6318ad3..234e0b6 100644
--- a/vp10/encoder/cost.c
+++ b/vp10/encoder/cost.c
@@ -41,6 +41,97 @@
48, 45, 42, 38, 35, 32, 29, 26, 23, 20, 18, 15,
12, 9, 6, 3};
+#if CONFIG_ANS
+// round(-log2(i/1024.) * (1 << VP9_PROB_COST_SHIFT))
+static const uint16_t vp10_prob_cost10[1024] = {
+ 5120, 5120, 4608, 4308, 4096, 3931, 3796, 3683, 3584, 3497, 3419, 3349,
+ 3284, 3225, 3171, 3120, 3072, 3027, 2985, 2945, 2907, 2871, 2837, 2804,
+ 2772, 2742, 2713, 2685, 2659, 2633, 2608, 2583, 2560, 2537, 2515, 2494,
+ 2473, 2453, 2433, 2414, 2395, 2377, 2359, 2342, 2325, 2308, 2292, 2276,
+ 2260, 2245, 2230, 2216, 2201, 2187, 2173, 2160, 2147, 2134, 2121, 2108,
+ 2096, 2083, 2071, 2060, 2048, 2037, 2025, 2014, 2003, 1992, 1982, 1971,
+ 1961, 1951, 1941, 1931, 1921, 1911, 1902, 1892, 1883, 1874, 1865, 1856,
+ 1847, 1838, 1830, 1821, 1813, 1804, 1796, 1788, 1780, 1772, 1764, 1756,
+ 1748, 1741, 1733, 1726, 1718, 1711, 1704, 1697, 1689, 1682, 1675, 1668,
+ 1661, 1655, 1648, 1641, 1635, 1628, 1622, 1615, 1609, 1602, 1596, 1590,
+ 1584, 1578, 1571, 1565, 1559, 1554, 1548, 1542, 1536, 1530, 1525, 1519,
+ 1513, 1508, 1502, 1497, 1491, 1486, 1480, 1475, 1470, 1465, 1459, 1454,
+ 1449, 1444, 1439, 1434, 1429, 1424, 1419, 1414, 1409, 1404, 1399, 1395,
+ 1390, 1385, 1380, 1376, 1371, 1367, 1362, 1357, 1353, 1348, 1344, 1340,
+ 1335, 1331, 1326, 1322, 1318, 1313, 1309, 1305, 1301, 1297, 1292, 1288,
+ 1284, 1280, 1276, 1272, 1268, 1264, 1260, 1256, 1252, 1248, 1244, 1240,
+ 1236, 1233, 1229, 1225, 1221, 1218, 1214, 1210, 1206, 1203, 1199, 1195,
+ 1192, 1188, 1185, 1181, 1177, 1174, 1170, 1167, 1163, 1160, 1156, 1153,
+ 1149, 1146, 1143, 1139, 1136, 1133, 1129, 1126, 1123, 1119, 1116, 1113,
+ 1110, 1106, 1103, 1100, 1097, 1094, 1090, 1087, 1084, 1081, 1078, 1075,
+ 1072, 1069, 1066, 1062, 1059, 1056, 1053, 1050, 1047, 1044, 1042, 1039,
+ 1036, 1033, 1030, 1027, 1024, 1021, 1018, 1015, 1013, 1010, 1007, 1004,
+ 1001, 998, 996, 993, 990, 987, 985, 982, 979, 977, 974, 971,
+ 968, 966, 963, 960, 958, 955, 953, 950, 947, 945, 942, 940,
+ 937, 934, 932, 929, 927, 924, 922, 919, 917, 914, 912, 909,
+ 907, 904, 902, 899, 897, 895, 892, 890, 887, 885, 883, 880,
+ 878, 876, 873, 871, 868, 866, 864, 861, 859, 857, 855, 852,
+ 850, 848, 845, 843, 841, 839, 836, 834, 832, 830, 828, 825,
+ 823, 821, 819, 817, 814, 812, 810, 808, 806, 804, 801, 799,
+ 797, 795, 793, 791, 789, 787, 785, 783, 780, 778, 776, 774,
+ 772, 770, 768, 766, 764, 762, 760, 758, 756, 754, 752, 750,
+ 748, 746, 744, 742, 740, 738, 736, 734, 732, 730, 728, 726,
+ 724, 723, 721, 719, 717, 715, 713, 711, 709, 707, 706, 704,
+ 702, 700, 698, 696, 694, 693, 691, 689, 687, 685, 683, 682,
+ 680, 678, 676, 674, 673, 671, 669, 667, 665, 664, 662, 660,
+ 658, 657, 655, 653, 651, 650, 648, 646, 644, 643, 641, 639,
+ 637, 636, 634, 632, 631, 629, 627, 626, 624, 622, 621, 619,
+ 617, 616, 614, 612, 611, 609, 607, 606, 604, 602, 601, 599,
+ 598, 596, 594, 593, 591, 590, 588, 586, 585, 583, 582, 580,
+ 578, 577, 575, 574, 572, 571, 569, 567, 566, 564, 563, 561,
+ 560, 558, 557, 555, 554, 552, 550, 549, 547, 546, 544, 543,
+ 541, 540, 538, 537, 535, 534, 532, 531, 530, 528, 527, 525,
+ 524, 522, 521, 519, 518, 516, 515, 513, 512, 511, 509, 508,
+ 506, 505, 503, 502, 501, 499, 498, 496, 495, 493, 492, 491,
+ 489, 488, 486, 485, 484, 482, 481, 480, 478, 477, 475, 474,
+ 473, 471, 470, 469, 467, 466, 465, 463, 462, 460, 459, 458,
+ 456, 455, 454, 452, 451, 450, 448, 447, 446, 444, 443, 442,
+ 441, 439, 438, 437, 435, 434, 433, 431, 430, 429, 428, 426,
+ 425, 424, 422, 421, 420, 419, 417, 416, 415, 414, 412, 411,
+ 410, 409, 407, 406, 405, 404, 402, 401, 400, 399, 397, 396,
+ 395, 394, 392, 391, 390, 389, 387, 386, 385, 384, 383, 381,
+ 380, 379, 378, 377, 375, 374, 373, 372, 371, 369, 368, 367,
+ 366, 365, 364, 362, 361, 360, 359, 358, 356, 355, 354, 353,
+ 352, 351, 349, 348, 347, 346, 345, 344, 343, 341, 340, 339,
+ 338, 337, 336, 335, 333, 332, 331, 330, 329, 328, 327, 326,
+ 324, 323, 322, 321, 320, 319, 318, 317, 316, 314, 313, 312,
+ 311, 310, 309, 308, 307, 306, 305, 303, 302, 301, 300, 299,
+ 298, 297, 296, 295, 294, 293, 292, 291, 289, 288, 287, 286,
+ 285, 284, 283, 282, 281, 280, 279, 278, 277, 276, 275, 274,
+ 273, 272, 271, 269, 268, 267, 266, 265, 264, 263, 262, 261,
+ 260, 259, 258, 257, 256, 255, 254, 253, 252, 251, 250, 249,
+ 248, 247, 246, 245, 244, 243, 242, 241, 240, 239, 238, 237,
+ 236, 235, 234, 233, 232, 231, 230, 229, 228, 227, 226, 225,
+ 224, 223, 222, 221, 220, 219, 218, 217, 216, 215, 214, 213,
+ 212, 212, 211, 210, 209, 208, 207, 206, 205, 204, 203, 202,
+ 201, 200, 199, 198, 197, 196, 195, 194, 194, 193, 192, 191,
+ 190, 189, 188, 187, 186, 185, 184, 183, 182, 181, 181, 180,
+ 179, 178, 177, 176, 175, 174, 173, 172, 171, 170, 170, 169,
+ 168, 167, 166, 165, 164, 163, 162, 161, 161, 160, 159, 158,
+ 157, 156, 155, 154, 153, 152, 152, 151, 150, 149, 148, 147,
+ 146, 145, 145, 144, 143, 142, 141, 140, 139, 138, 138, 137,
+ 136, 135, 134, 133, 132, 132, 131, 130, 129, 128, 127, 126,
+ 125, 125, 124, 123, 122, 121, 120, 120, 119, 118, 117, 116,
+ 115, 114, 114, 113, 112, 111, 110, 109, 109, 108, 107, 106,
+ 105, 104, 104, 103, 102, 101, 100, 99, 99, 98, 97, 96,
+ 95, 95, 94, 93, 92, 91, 90, 90, 89, 88, 87, 86,
+ 86, 85, 84, 83, 82, 82, 81, 80, 79, 78, 78, 77,
+ 76, 75, 74, 74, 73, 72, 71, 70, 70, 69, 68, 67,
+ 66, 66, 65, 64, 63, 62, 62, 61, 60, 59, 59, 58,
+ 57, 56, 55, 55, 54, 53, 52, 52, 51, 50, 49, 48,
+ 48, 47, 46, 45, 45, 44, 43, 42, 42, 41, 40, 39,
+ 38, 38, 37, 36, 35, 35, 34, 33, 32, 32, 31, 30,
+ 29, 29, 28, 27, 26, 26, 25, 24, 23, 23, 22, 21,
+ 20, 20, 19, 18, 18, 17, 16, 15, 15, 14, 13, 12,
+ 12, 11, 10, 9, 9, 8, 7, 7, 6, 5, 4, 4,
+ 3, 2, 1, 1};
+#endif // CONFIG_ANS
+
static void cost(int *costs, vpx_tree tree, const vpx_prob *probs,
int i, int c) {
const vpx_prob prob = probs[i / 2];
@@ -68,7 +159,7 @@
c_tree = vp10_cost_bit(tree_probs[0], 1);
for (i = ZERO_TOKEN; i <= CATEGORY6_TOKEN; ++i) {
const int p = (*token_cdf)[i + 1] - (*token_cdf)[i];
- costs[i] = c_tree + vp10_cost_bit(p, 0);
+ costs[i] = c_tree + vp10_prob_cost10[p];
}
}
#endif // CONFIG_ANS
diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c
index 054161d..823095e 100644
--- a/vp10/encoder/mcomp.c
+++ b/vp10/encoder/mcomp.c
@@ -171,15 +171,6 @@
* could reduce the area.
*/
-/* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes
- * from the same math as in mv_err_cost(). */
-#define MVC(r, c) \
- (mvcost ? \
- ((unsigned)(mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
- mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
- error_per_bit + 8192) >> 14 : 0)
-
-
// convert motion vector component to offset for sv[a]f calc
static INLINE int sp(int x) {
return x & 7;
@@ -192,13 +183,16 @@
/* checks if (r, c) has better score than previous best */
#define CHECK_BETTER(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ MV this_mv = {r, c}; \
+ v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
if (second_pred == NULL) \
thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
src_stride, &sse); \
else \
thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
z, src_stride, &sse, second_pred); \
- if ((v = MVC(r, c) + thismse) < besterr) { \
+ v += thismse; \
+ if (v < besterr) { \
besterr = v; \
br = r; \
bc = c; \
@@ -219,10 +213,13 @@
/* checks if (r, c) has better score than previous best */
#define CHECK_BETTER1(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ MV this_mv = {r, c}; \
thismse = upsampled_pref_error(xd, vfp, z, src_stride, \
upre(y, y_stride, r, c), y_stride, \
second_pred, w, h, &sse); \
- if ((v = MVC(r, c) + thismse) < besterr) { \
+ v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
+ v += thismse; \
+ if (v < besterr) { \
besterr = v; \
br = r; \
bc = c; \
@@ -334,8 +331,6 @@
const int offset = bestmv->row * y_stride + bestmv->col; \
const uint8_t *const y = xd->plane[0].pre[0].buf; \
\
- int rr = ref_mv->row; \
- int rc = ref_mv->col; \
int br = bestmv->row * 8; \
int bc = bestmv->col * 8; \
int hstep = 4; \
@@ -762,8 +757,6 @@
const int offset = bestmv->row * y_stride + bestmv->col;
const uint8_t *const y = xd->plane[0].pre[0].buf;
- int rr = ref_mv->row;
- int rc = ref_mv->col;
int br = bestmv->row * 8;
int bc = bestmv->col * 8;
int hstep = 4;
@@ -920,7 +913,6 @@
return besterr;
}
-#undef MVC
#undef PRE
#undef CHECK_BETTER
diff --git a/vp10/encoder/x86/highbd_fwd_txfm_sse4.c b/vp10/encoder/x86/highbd_fwd_txfm_sse4.c
index 16323b3..3cda783 100644
--- a/vp10/encoder/x86/highbd_fwd_txfm_sse4.c
+++ b/vp10/encoder/x86/highbd_fwd_txfm_sse4.c
@@ -111,52 +111,136 @@
in[3] = _mm_unpackhi_epi64(v1, v3);
}
-static INLINE void write_buffer_4x4(tran_low_t *output, __m128i *res) {
+static INLINE void write_buffer_4x4(__m128i *res, tran_low_t *output) {
_mm_store_si128((__m128i *)(output + 0 * 4), res[0]);
_mm_store_si128((__m128i *)(output + 1 * 4), res[1]);
_mm_store_si128((__m128i *)(output + 2 * 4), res[2]);
_mm_store_si128((__m128i *)(output + 3 * 4), res[3]);
}
+// Note:
+// We implement vp10_fwd_txfm2d_4x4(). This function is kept here since
+// vp10_highbd_fht4x4_c() is not removed yet
void vp10_highbd_fht4x4_sse4_1(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
+ (void)input;
+ (void)output;
+ (void)stride;
+ (void)tx_type;
+ assert(0);
+}
+
+static void fadst4x4_sse4_1(__m128i *in, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ const __m128i kZero = _mm_setzero_si128();
+ __m128i s0, s1, s2, s3;
+ __m128i u0, u1, u2, u3;
+ __m128i v0, v1, v2, v3;
+
+ // stage 0
+ // stage 1
+ // stage 2
+ u0 = _mm_mullo_epi32(in[3], cospi8);
+ u1 = _mm_mullo_epi32(in[0], cospi56);
+ u2 = _mm_add_epi32(u0, u1);
+ s0 = _mm_add_epi32(u2, rnding);
+ s0 = _mm_srai_epi32(s0, bit);
+
+ v0 = _mm_mullo_epi32(in[3], cospi56);
+ v1 = _mm_mullo_epi32(in[0], cospi8);
+ v2 = _mm_sub_epi32(v0, v1);
+ s1 = _mm_add_epi32(v2, rnding);
+ s1 = _mm_srai_epi32(s1, bit);
+
+ u0 = _mm_mullo_epi32(in[1], cospi40);
+ u1 = _mm_mullo_epi32(in[2], cospi24);
+ u2 = _mm_add_epi32(u0, u1);
+ s2 = _mm_add_epi32(u2, rnding);
+ s2 = _mm_srai_epi32(s2, bit);
+
+ v0 = _mm_mullo_epi32(in[1], cospi24);
+ v1 = _mm_mullo_epi32(in[2], cospi40);
+ v2 = _mm_sub_epi32(v0, v1);
+ s3 = _mm_add_epi32(v2, rnding);
+ s3 = _mm_srai_epi32(s3, bit);
+
+ // stage 3
+ u0 = _mm_add_epi32(s0, s2);
+ u2 = _mm_sub_epi32(s0, s2);
+ u1 = _mm_add_epi32(s1, s3);
+ u3 = _mm_sub_epi32(s1, s3);
+
+ // stage 4
+ v0 = _mm_mullo_epi32(u2, cospi32);
+ v1 = _mm_mullo_epi32(u3, cospi32);
+ v2 = _mm_add_epi32(v0, v1);
+ s2 = _mm_add_epi32(v2, rnding);
+ u2 = _mm_srai_epi32(s2, bit);
+
+ v2 = _mm_sub_epi32(v0, v1);
+ s3 = _mm_add_epi32(v2, rnding);
+ u3 = _mm_srai_epi32(s3, bit);
+
+ // u0, u1, u2, u3
+ u2 = _mm_sub_epi32(kZero, u2);
+ u1 = _mm_sub_epi32(kZero, u1);
+
+ // u0, u2, u3, u1
+ // Transpose 4x4 32-bit
+ v0 = _mm_unpacklo_epi32(u0, u2);
+ v1 = _mm_unpackhi_epi32(u0, u2);
+ v2 = _mm_unpacklo_epi32(u3, u1);
+ v3 = _mm_unpackhi_epi32(u3, u1);
+
+ in[0] = _mm_unpacklo_epi64(v0, v2);
+ in[1] = _mm_unpackhi_epi64(v0, v2);
+ in[2] = _mm_unpacklo_epi64(v1, v3);
+ in[3] = _mm_unpackhi_epi64(v1, v3);
+}
+
+void vp10_fwd_txfm2d_4x4_sse4_1(const int16_t *input, tran_low_t *coeff,
+ int input_stride, int tx_type,
+ const int bd) {
__m128i in[4];
- const TXFM_2D_CFG *cfg;
- int bit;
+ const TXFM_2D_CFG *cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &fwd_txfm_2d_cfg_dct_dct_4;
- load_buffer_4x4(input, in, stride, 0, 0, cfg->shift[0]);
- bit = cfg->cos_bit_col[2];
- fdct4x4_sse4_1(in, bit);
- bit = cfg->cos_bit_row[2];
- fdct4x4_sse4_1(in, bit);
- write_buffer_4x4(output, in);
+ load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
+ fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
break;
case ADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_4;
+ load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
case DCT_ADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_4;
+ load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
+ fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
case ADST_ADST:
- vp10_highbd_fht4x4_c(input, output, stride, tx_type);
+ cfg = &fwd_txfm_2d_cfg_adst_adst_4;
+ load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- vp10_highbd_fht4x4_c(input, output, stride, tx_type);
- break;
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- vp10_highbd_fht4x4_c(input, output, stride, tx_type);
- break;
-#endif // CONFIG_EXT_TX
default:
assert(0);
}
+ (void)bd;
}