Merge "Add SSE4.1 code for deringing functions." into nextgenv2
diff --git a/aom_dsp/ans.c b/aom_dsp/ans.c
index 18f6d48..30f115c 100644
--- a/aom_dsp/ans.c
+++ b/aom_dsp/ans.c
@@ -15,16 +15,7 @@
#include "aom_dsp/ans.h"
#include "aom_dsp/prob.h"
-void aom_rans_build_cdf_from_pdf(const AnsP10 token_probs[], rans_lut cdf_tab) {
- int i;
- cdf_tab[0] = 0;
- for (i = 1; cdf_tab[i - 1] < RANS_PRECISION; ++i) {
- cdf_tab[i] = cdf_tab[i - 1] + token_probs[i - 1];
- }
- assert(cdf_tab[i - 1] == RANS_PRECISION);
-}
-
-static int find_largest(const AnsP10 *const pdf_tab, int num_syms) {
+static int find_largest(const aom_cdf_prob *const pdf_tab, int num_syms) {
int largest_idx = -1;
int largest_p = -1;
int i;
@@ -38,8 +29,9 @@
return largest_idx;
}
-void aom_rans_merge_prob8_pdf(AnsP10 *const out_pdf, const AnsP8 node_prob,
- const AnsP10 *const src_pdf, int in_syms) {
+void aom_rans_merge_prob8_pdf(aom_cdf_prob *const out_pdf,
+ const AnsP8 node_prob,
+ const aom_cdf_prob *const src_pdf, int in_syms) {
int i;
int adjustment = RANS_PRECISION;
const int round_fact = ANS_P8_PRECISION >> 1;
diff --git a/aom_dsp/ans.h b/aom_dsp/ans.h
index ea99f8b..5927e58 100644
--- a/aom_dsp/ans.h
+++ b/aom_dsp/ans.h
@@ -26,24 +26,16 @@
typedef uint8_t AnsP8;
#define ANS_P8_PRECISION 256u
#define ANS_P8_SHIFT 8
-typedef uint16_t AnsP10;
-#define ANS_P10_PRECISION 1024u
+#define RANS_PRECISION 1024u
#define RANS_PROB_BITS 10
-#define RANS_PRECISION ANS_P10_PRECISION
-
-#define L_BASE (ANS_P10_PRECISION * 4) // L_BASE % precision must be 0
+#define L_BASE (RANS_PRECISION * 4) // L_BASE % precision must be 0
#define IO_BASE 256
// Range I = { L_BASE, L_BASE + 1, ..., L_BASE * IO_BASE - 1 }
-// This is now just a boring cdf. It starts with an explicit zero.
-// TODO(aconverse): Remove starting zero.
-typedef uint16_t rans_lut[16];
-
-void aom_rans_build_cdf_from_pdf(const AnsP10 token_probs[], rans_lut cdf_tab);
-
-void aom_rans_merge_prob8_pdf(AnsP10 *const out_pdf, const AnsP8 node_prob,
- const AnsP10 *const src_pdf, int in_syms);
+void aom_rans_merge_prob8_pdf(aom_cdf_prob *const out_pdf,
+ const AnsP8 node_prob,
+ const aom_cdf_prob *const src_pdf, int in_syms);
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
diff --git a/aom_dsp/ansreader.h b/aom_dsp/ansreader.h
index 11619b0..1f66531 100644
--- a/aom_dsp/ansreader.h
+++ b/aom_dsp/ansreader.h
@@ -62,24 +62,25 @@
struct rans_dec_sym {
uint8_t val;
- AnsP10 prob;
- AnsP10 cum_prob; // not-inclusive
+ aom_cdf_prob prob;
+ aom_cdf_prob cum_prob; // not-inclusive
};
-static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_lut cdf,
- AnsP10 rem) {
- int i = 0;
+static INLINE void fetch_sym(struct rans_dec_sym *out, const aom_cdf_prob *cdf,
+ aom_cdf_prob rem) {
+ int i;
+ aom_cdf_prob cum_prob = 0, top_prob;
// TODO(skal): if critical, could be a binary search.
// Or, better, an O(1) alias-table.
- while (rem >= cdf[i]) {
- ++i;
+ for (i = 0; rem >= (top_prob = cdf[i]); ++i) {
+ cum_prob = top_prob;
}
- out->val = i - 1;
- out->prob = (AnsP10)(cdf[i] - cdf[i - 1]);
- out->cum_prob = (AnsP10)cdf[i - 1];
+ out->val = i;
+ out->prob = top_prob - cum_prob;
+ out->cum_prob = cum_prob;
}
-static INLINE int rans_read(struct AnsDecoder *ans, const rans_lut tab) {
+static INLINE int rans_read(struct AnsDecoder *ans, const aom_cdf_prob *tab) {
unsigned rem;
unsigned quo;
struct rans_dec_sym sym;
diff --git a/aom_dsp/answriter.h b/aom_dsp/answriter.h
index 5a82d35..0ac1bda 100644
--- a/aom_dsp/answriter.h
+++ b/aom_dsp/answriter.h
@@ -75,8 +75,8 @@
}
struct rans_sym {
- AnsP10 prob;
- AnsP10 cum_prob; // not-inclusive
+ aom_cdf_prob prob;
+ aom_cdf_prob cum_prob; // not-inclusive
};
// rANS with normalization
@@ -84,7 +84,7 @@
// ANS_P10_PRECISION is m
static INLINE void rans_write(struct AnsCoder *ans,
const struct rans_sym *const sym) {
- const AnsP10 p = sym->prob;
+ const aom_cdf_prob p = sym->prob;
while (ans->state >= L_BASE / RANS_PRECISION * IO_BASE * p) {
ans->buf[ans->buf_offset++] = ans->state % IO_BASE;
ans->state /= IO_BASE;
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 6397d01..779ed00 100644
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -44,6 +44,27 @@
# Intra prediction
#
+add_proto qw/void aom_dc_predictor_2x2/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/aom_dc_predictor_2x2/;
+
+add_proto qw/void aom_dc_top_predictor_2x2/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/aom_dc_top_predictor_2x2/;
+
+add_proto qw/void aom_dc_left_predictor_2x2/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/aom_dc_left_predictor_2x2/;
+
+add_proto qw/void aom_dc_128_predictor_2x2/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/aom_dc_128_predictor_2x2/;
+
+add_proto qw/void aom_v_predictor_2x2/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/aom_v_predictor_2x2/;
+
+add_proto qw/void aom_h_predictor_2x2/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/aom_h_predictor_2x2/;
+
+add_proto qw/void aom_tm_predictor_2x2/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/aom_tm_predictor_2x2/;
+
add_proto qw/void aom_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/aom_d207_predictor_4x4 sse2/;
@@ -649,58 +670,31 @@
# Forward transform
#
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
-if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct4x4 sse2/;
+ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void aom_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_highbd_fdct4x4 sse2/;
- add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct4x4_1 sse2/;
+ add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_highbd_fdct8x8 sse2/;
- add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct8x8 sse2/;
+ add_proto qw/void aom_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_highbd_fdct8x8_1/;
- add_proto qw/void aom_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct8x8_1 sse2/;
+ add_proto qw/void aom_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_highbd_fdct16x16 sse2/;
- add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct16x16 sse2/;
+ add_proto qw/void aom_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_highbd_fdct16x16_1/;
- add_proto qw/void aom_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct16x16_1 sse2/;
+ add_proto qw/void aom_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_highbd_fdct32x32 sse2/;
- add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct32x32 sse2/;
+ add_proto qw/void aom_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_highbd_fdct32x32_rd sse2/;
- add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct32x32_rd sse2/;
-
- add_proto qw/void aom_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct32x32_1 sse2/;
-
- add_proto qw/void aom_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_highbd_fdct4x4 sse2/;
-
- add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_highbd_fdct8x8 sse2/;
-
- add_proto qw/void aom_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_highbd_fdct8x8_1/;
-
- add_proto qw/void aom_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_highbd_fdct16x16 sse2/;
-
- add_proto qw/void aom_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_highbd_fdct16x16_1/;
-
- add_proto qw/void aom_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_highbd_fdct32x32 sse2/;
-
- add_proto qw/void aom_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_highbd_fdct32x32_rd sse2/;
-
- add_proto qw/void aom_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_highbd_fdct32x32_1/;
-} else {
+ add_proto qw/void aom_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_highbd_fdct32x32_1/;
+ } # CONFIG_AOM_HIGHBITDEPTH
add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/aom_fdct4x4 sse2 msa/;
@@ -726,8 +720,7 @@
specialize qw/aom_fdct32x32_rd sse2 avx2 msa/;
add_proto qw/void aom_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct32x32_1 sse2 msa/;
-} # CONFIG_AOM_HIGHBITDEPTH
+ specialize qw/aom_fdct32x32_1 sse2 avx2 msa/;
} # CONFIG_AV1_ENCODER
#
diff --git a/aom_dsp/bitreader.h b/aom_dsp/bitreader.h
index d062e07..52e4dc8 100644
--- a/aom_dsp/bitreader.h
+++ b/aom_dsp/bitreader.h
@@ -104,6 +104,20 @@
return aom_read_tree_bits(r, tree, probs);
}
+static INLINE int aom_read_symbol(aom_reader *r, const aom_cdf_prob *cdf,
+ int nsymbs) {
+#if CONFIG_ANS
+ (void)nsymbs;
+ return rans_read(r, cdf);
+#else
+ (void)r;
+ (void)cdf;
+ (void)nsymbs;
+ assert(0 && "Unsupported bitreader operation");
+ return -1;
+#endif
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/aom_dsp/bitwriter.h b/aom_dsp/bitwriter.h
index 5e34fd6..d6937aa 100644
--- a/aom_dsp/bitwriter.h
+++ b/aom_dsp/bitwriter.h
@@ -86,6 +86,24 @@
aom_write_tree_bits(w, tree, probs, bits, len, i);
}
+static INLINE void aom_write_symbol(aom_writer *w, int symb,
+ const aom_cdf_prob *cdf, int nsymbs) {
+#if CONFIG_ANS
+ struct rans_sym s;
+ (void)nsymbs;
+ assert(cdf);
+ s.cum_prob = symb > 0 ? cdf[symb - 1] : 0;
+ s.prob = cdf[symb] - s.cum_prob;
+ buf_rans_write(w, &s);
+#else
+ (void)w;
+ (void)symb;
+ (void)cdf;
+ (void)nsymbs;
+ assert(0 && "Unsupported bitwriter operation");
+#endif
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/aom_dsp/intrapred.c b/aom_dsp/intrapred.c
index 1e40e68..c3af1f4 100644
--- a/aom_dsp/intrapred.c
+++ b/aom_dsp/intrapred.c
@@ -837,6 +837,7 @@
/* clang-format off */
#define intra_pred_allsizes(type) \
+ intra_pred_sized(type, 2) \
intra_pred_sized(type, 4) \
intra_pred_sized(type, 8) \
intra_pred_sized(type, 16) \
@@ -846,7 +847,7 @@
intra_pred_highbd_sized(type, 16) \
intra_pred_highbd_sized(type, 32)
-#define intra_pred_no_4x4(type) \
+#define intra_pred_above_4x4(type) \
intra_pred_sized(type, 8) \
intra_pred_sized(type, 16) \
intra_pred_sized(type, 32) \
@@ -857,26 +858,27 @@
#else
#define intra_pred_allsizes(type) \
+ intra_pred_sized(type, 2) \
intra_pred_sized(type, 4) \
intra_pred_sized(type, 8) \
intra_pred_sized(type, 16) \
intra_pred_sized(type, 32)
-#define intra_pred_no_4x4(type) \
+#define intra_pred_above_4x4(type) \
intra_pred_sized(type, 8) \
intra_pred_sized(type, 16) \
intra_pred_sized(type, 32)
#endif // CONFIG_AOM_HIGHBITDEPTH
-intra_pred_no_4x4(d207)
-intra_pred_no_4x4(d63)
-intra_pred_no_4x4(d45)
+intra_pred_above_4x4(d207)
+intra_pred_above_4x4(d63)
+intra_pred_above_4x4(d45)
intra_pred_allsizes(d207e)
intra_pred_allsizes(d63e)
-intra_pred_no_4x4(d45e)
-intra_pred_no_4x4(d117)
-intra_pred_no_4x4(d135)
-intra_pred_no_4x4(d153)
+intra_pred_above_4x4(d45e)
+intra_pred_above_4x4(d117)
+intra_pred_above_4x4(d135)
+intra_pred_above_4x4(d153)
intra_pred_allsizes(v)
intra_pred_allsizes(h)
#if CONFIG_ALT_INTRA
diff --git a/aom_dsp/prob.h b/aom_dsp/prob.h
index 4f25b30..cd133e2 100644
--- a/aom_dsp/prob.h
+++ b/aom_dsp/prob.h
@@ -23,6 +23,9 @@
typedef uint8_t aom_prob;
+// TODO(negge): Rename this aom_prob once we remove vpxbool.
+typedef uint16_t aom_cdf_prob;
+
#define MAX_PROB 255
#define aom_prob_half ((aom_prob)128)
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 8d6fabb..55aee8c 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -391,9 +391,6 @@
add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/av1_fht16x16 sse2 avx2/;
-add_proto qw/void av1_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
-specialize qw/av1_fht32x32/;
-
if (aom_config("CONFIG_EXT_TX") eq "yes") {
add_proto qw/void av1_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/av1_fht4x8 sse2/;
@@ -412,6 +409,9 @@
add_proto qw/void av1_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/av1_fht32x16/;
+
+ add_proto qw/void av1_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/av1_fht32x32 avx2/;
}
if (aom_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index d44cc99..1defc53 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -418,263 +418,264 @@
// beta = 8
// Values for tokens ONE_TOKEN through CATEGORY6_TOKEN included here.
// ZERO_TOKEN and EOB_TOKEN are coded as flags outside this coder.
-const AnsP10 av1_pareto8_token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2] = {
- { 4, 4, 4, 4, 8, 15, 30, 57, 103, 795 },
- { 8, 8, 8, 8, 15, 30, 57, 103, 168, 619 },
- { 12, 12, 12, 12, 23, 43, 80, 138, 205, 487 },
- { 16, 16, 15, 15, 30, 56, 101, 165, 225, 385 },
- { 20, 20, 19, 19, 36, 68, 119, 186, 231, 306 },
- { 24, 23, 23, 22, 43, 79, 135, 201, 230, 244 },
- { 28, 27, 26, 26, 49, 89, 149, 211, 223, 196 },
- { 32, 31, 30, 29, 55, 98, 160, 218, 212, 159 },
- { 36, 35, 33, 32, 60, 107, 171, 221, 200, 129 },
- { 40, 38, 37, 35, 66, 115, 179, 222, 187, 105 },
- { 44, 42, 40, 38, 71, 122, 186, 221, 174, 86 },
- { 48, 45, 43, 41, 76, 129, 192, 219, 160, 71 },
- { 52, 49, 46, 44, 80, 136, 196, 215, 148, 58 },
- { 56, 53, 49, 46, 85, 142, 200, 210, 135, 48 },
- { 60, 56, 52, 49, 89, 147, 203, 204, 124, 40 },
- { 64, 60, 55, 52, 93, 151, 205, 198, 113, 33 },
- { 68, 63, 58, 54, 97, 156, 205, 192, 103, 28 },
- { 72, 66, 61, 57, 100, 160, 206, 185, 94, 23 },
- { 76, 70, 64, 59, 104, 163, 205, 178, 85, 20 },
- { 80, 73, 67, 61, 107, 166, 205, 171, 77, 17 },
- { 84, 76, 69, 63, 110, 169, 204, 164, 71, 14 },
- { 88, 80, 72, 65, 113, 171, 202, 157, 64, 12 },
- { 92, 83, 75, 67, 116, 173, 200, 150, 58, 10 },
- { 96, 86, 77, 69, 118, 175, 198, 143, 53, 9 },
- { 100, 89, 80, 71, 121, 176, 195, 137, 48, 7 },
- { 104, 92, 82, 73, 123, 178, 192, 130, 44, 6 },
- { 108, 96, 84, 75, 125, 178, 189, 124, 40, 5 },
- { 112, 98, 87, 76, 127, 179, 186, 118, 36, 5 },
- { 116, 101, 89, 78, 129, 179, 183, 112, 33, 4 },
- { 120, 104, 91, 80, 131, 180, 179, 106, 30, 3 },
- { 124, 107, 93, 81, 132, 180, 176, 101, 27, 3 },
- { 128, 110, 95, 82, 134, 179, 172, 96, 25, 3 },
- { 132, 113, 97, 84, 135, 179, 168, 91, 23, 2 },
- { 136, 116, 99, 85, 136, 179, 164, 86, 21, 2 },
- { 140, 119, 101, 86, 137, 178, 160, 82, 19, 2 },
- { 144, 122, 103, 88, 138, 177, 157, 77, 17, 1 },
- { 148, 124, 105, 89, 139, 176, 153, 73, 16, 1 },
- { 152, 127, 107, 90, 140, 175, 149, 69, 14, 1 },
- { 156, 130, 108, 91, 141, 173, 145, 66, 13, 1 },
- { 160, 133, 110, 92, 141, 172, 141, 62, 12, 1 },
- { 164, 135, 111, 93, 142, 171, 137, 59, 11, 1 },
- { 168, 138, 113, 94, 142, 169, 133, 56, 10, 1 },
- { 172, 140, 115, 94, 142, 168, 130, 53, 9, 1 },
- { 176, 143, 116, 95, 143, 166, 126, 50, 8, 1 },
- { 180, 145, 118, 96, 143, 164, 122, 47, 8, 1 },
- { 184, 147, 119, 96, 143, 163, 119, 45, 7, 1 },
- { 188, 150, 120, 97, 143, 161, 116, 42, 6, 1 },
- { 192, 152, 121, 98, 143, 159, 112, 40, 6, 1 },
- { 196, 155, 123, 98, 142, 157, 109, 38, 5, 1 },
- { 200, 157, 124, 99, 142, 155, 105, 36, 5, 1 },
- { 204, 159, 125, 99, 142, 153, 102, 34, 5, 1 },
- { 208, 161, 126, 100, 142, 151, 99, 32, 4, 1 },
- { 212, 164, 127, 100, 141, 149, 96, 30, 4, 1 },
- { 216, 166, 129, 100, 141, 147, 93, 28, 3, 1 },
- { 220, 168, 130, 101, 140, 144, 90, 27, 3, 1 },
- { 224, 170, 131, 101, 140, 142, 87, 25, 3, 1 },
- { 228, 172, 132, 101, 139, 140, 84, 24, 3, 1 },
- { 232, 174, 132, 101, 139, 138, 81, 23, 3, 1 },
- { 236, 176, 133, 101, 138, 136, 79, 22, 2, 1 },
- { 240, 178, 134, 102, 137, 134, 76, 20, 2, 1 },
- { 244, 180, 135, 102, 136, 131, 74, 19, 2, 1 },
- { 248, 182, 135, 102, 136, 129, 71, 18, 2, 1 },
- { 252, 184, 136, 101, 135, 127, 69, 17, 2, 1 },
- { 256, 186, 137, 102, 134, 124, 66, 16, 2, 1 },
- { 260, 188, 138, 102, 133, 122, 64, 15, 1, 1 },
- { 264, 190, 138, 101, 132, 120, 62, 15, 1, 1 },
- { 268, 191, 139, 101, 131, 118, 60, 14, 1, 1 },
- { 272, 193, 139, 101, 130, 116, 58, 13, 1, 1 },
- { 276, 195, 139, 101, 129, 114, 56, 12, 1, 1 },
- { 280, 196, 140, 101, 128, 111, 54, 12, 1, 1 },
- { 284, 198, 140, 101, 127, 109, 52, 11, 1, 1 },
- { 288, 200, 141, 100, 126, 107, 50, 10, 1, 1 },
- { 292, 201, 141, 100, 125, 105, 48, 10, 1, 1 },
- { 296, 203, 141, 100, 123, 103, 47, 9, 1, 1 },
- { 300, 204, 142, 99, 122, 101, 45, 9, 1, 1 },
- { 304, 206, 142, 99, 121, 99, 43, 8, 1, 1 },
- { 308, 207, 142, 99, 119, 97, 42, 8, 1, 1 },
- { 312, 209, 142, 99, 118, 95, 40, 7, 1, 1 },
- { 316, 210, 142, 98, 117, 93, 39, 7, 1, 1 },
- { 320, 211, 142, 98, 116, 91, 37, 7, 1, 1 },
- { 324, 213, 142, 97, 115, 89, 36, 6, 1, 1 },
- { 328, 214, 142, 97, 113, 87, 35, 6, 1, 1 },
- { 332, 215, 143, 96, 112, 85, 33, 6, 1, 1 },
- { 336, 216, 143, 96, 111, 83, 32, 5, 1, 1 },
- { 340, 218, 143, 95, 109, 81, 31, 5, 1, 1 },
- { 344, 219, 142, 95, 108, 79, 30, 5, 1, 1 },
- { 348, 220, 142, 94, 107, 78, 29, 4, 1, 1 },
- { 352, 221, 142, 94, 105, 76, 28, 4, 1, 1 },
- { 356, 222, 142, 93, 104, 74, 27, 4, 1, 1 },
- { 360, 223, 142, 92, 103, 72, 26, 4, 1, 1 },
- { 364, 224, 142, 92, 101, 70, 25, 4, 1, 1 },
- { 368, 225, 142, 91, 100, 69, 24, 3, 1, 1 },
- { 372, 226, 141, 91, 99, 67, 23, 3, 1, 1 },
- { 376, 227, 141, 90, 97, 66, 22, 3, 1, 1 },
- { 380, 228, 141, 89, 96, 64, 21, 3, 1, 1 },
- { 384, 229, 140, 89, 95, 62, 20, 3, 1, 1 },
- { 388, 229, 140, 88, 93, 61, 20, 3, 1, 1 },
- { 392, 230, 140, 87, 92, 60, 19, 2, 1, 1 },
- { 396, 231, 140, 86, 91, 58, 18, 2, 1, 1 },
- { 400, 232, 139, 86, 89, 57, 17, 2, 1, 1 },
- { 404, 232, 139, 85, 88, 55, 17, 2, 1, 1 },
- { 408, 233, 138, 84, 87, 54, 16, 2, 1, 1 },
- { 412, 234, 138, 84, 85, 52, 15, 2, 1, 1 },
- { 416, 234, 137, 83, 84, 51, 15, 2, 1, 1 },
- { 420, 235, 137, 82, 82, 50, 14, 2, 1, 1 },
- { 424, 236, 136, 81, 81, 48, 14, 2, 1, 1 },
- { 428, 236, 136, 81, 80, 47, 13, 1, 1, 1 },
- { 432, 236, 135, 80, 79, 46, 13, 1, 1, 1 },
- { 436, 237, 135, 79, 77, 45, 12, 1, 1, 1 },
- { 440, 238, 134, 78, 76, 43, 12, 1, 1, 1 },
- { 444, 238, 134, 77, 75, 42, 11, 1, 1, 1 },
- { 448, 238, 133, 77, 73, 41, 11, 1, 1, 1 },
- { 452, 239, 132, 76, 72, 40, 10, 1, 1, 1 },
- { 456, 239, 131, 75, 71, 39, 10, 1, 1, 1 },
- { 460, 239, 131, 74, 70, 38, 9, 1, 1, 1 },
- { 464, 240, 130, 73, 68, 37, 9, 1, 1, 1 },
- { 468, 240, 129, 72, 67, 36, 9, 1, 1, 1 },
- { 472, 240, 128, 72, 66, 35, 8, 1, 1, 1 },
- { 476, 240, 127, 71, 65, 34, 8, 1, 1, 1 },
- { 480, 240, 127, 70, 63, 33, 8, 1, 1, 1 },
- { 484, 241, 126, 69, 62, 32, 7, 1, 1, 1 },
- { 488, 241, 125, 68, 61, 31, 7, 1, 1, 1 },
- { 492, 241, 124, 67, 60, 30, 7, 1, 1, 1 },
- { 496, 241, 124, 66, 59, 29, 6, 1, 1, 1 },
- { 500, 240, 123, 66, 58, 28, 6, 1, 1, 1 },
- { 504, 240, 122, 65, 57, 27, 6, 1, 1, 1 },
- { 508, 240, 121, 64, 55, 27, 6, 1, 1, 1 },
- { 512, 241, 120, 63, 54, 26, 5, 1, 1, 1 },
- { 516, 241, 119, 62, 53, 25, 5, 1, 1, 1 },
- { 520, 240, 118, 62, 52, 24, 5, 1, 1, 1 },
- { 524, 240, 117, 60, 51, 24, 5, 1, 1, 1 },
- { 528, 239, 116, 60, 50, 23, 5, 1, 1, 1 },
- { 532, 239, 116, 59, 49, 22, 4, 1, 1, 1 },
- { 536, 239, 115, 58, 48, 21, 4, 1, 1, 1 },
- { 540, 239, 113, 57, 47, 21, 4, 1, 1, 1 },
- { 544, 238, 113, 56, 46, 20, 4, 1, 1, 1 },
- { 548, 238, 112, 55, 45, 19, 4, 1, 1, 1 },
- { 552, 238, 110, 55, 44, 19, 3, 1, 1, 1 },
- { 556, 237, 110, 54, 43, 18, 3, 1, 1, 1 },
- { 560, 237, 108, 53, 42, 18, 3, 1, 1, 1 },
- { 564, 236, 108, 52, 41, 17, 3, 1, 1, 1 },
- { 568, 236, 106, 51, 40, 17, 3, 1, 1, 1 },
- { 572, 235, 105, 51, 39, 16, 3, 1, 1, 1 },
- { 576, 235, 104, 50, 38, 15, 3, 1, 1, 1 },
- { 580, 234, 103, 49, 37, 15, 3, 1, 1, 1 },
- { 584, 234, 102, 48, 37, 14, 2, 1, 1, 1 },
- { 588, 233, 101, 47, 36, 14, 2, 1, 1, 1 },
- { 592, 233, 100, 46, 35, 13, 2, 1, 1, 1 },
- { 596, 231, 99, 46, 34, 13, 2, 1, 1, 1 },
- { 600, 230, 98, 45, 33, 13, 2, 1, 1, 1 },
- { 604, 230, 97, 44, 32, 12, 2, 1, 1, 1 },
- { 608, 229, 96, 43, 31, 12, 2, 1, 1, 1 },
- { 612, 228, 95, 42, 31, 11, 2, 1, 1, 1 },
- { 616, 227, 93, 42, 30, 11, 2, 1, 1, 1 },
- { 620, 227, 92, 41, 29, 10, 2, 1, 1, 1 },
- { 624, 226, 92, 40, 28, 10, 1, 1, 1, 1 },
- { 628, 225, 90, 39, 28, 10, 1, 1, 1, 1 },
- { 632, 224, 89, 39, 27, 9, 1, 1, 1, 1 },
- { 636, 223, 88, 38, 26, 9, 1, 1, 1, 1 },
- { 640, 222, 87, 37, 25, 9, 1, 1, 1, 1 },
- { 644, 221, 86, 36, 25, 8, 1, 1, 1, 1 },
- { 648, 220, 84, 36, 24, 8, 1, 1, 1, 1 },
- { 652, 219, 83, 35, 23, 8, 1, 1, 1, 1 },
- { 656, 218, 82, 34, 23, 7, 1, 1, 1, 1 },
- { 660, 217, 81, 33, 22, 7, 1, 1, 1, 1 },
- { 664, 215, 80, 33, 21, 7, 1, 1, 1, 1 },
- { 668, 214, 78, 32, 21, 7, 1, 1, 1, 1 },
- { 672, 213, 78, 31, 20, 6, 1, 1, 1, 1 },
- { 676, 211, 76, 31, 20, 6, 1, 1, 1, 1 },
- { 680, 210, 75, 30, 19, 6, 1, 1, 1, 1 },
- { 684, 209, 74, 29, 18, 6, 1, 1, 1, 1 },
- { 688, 208, 73, 28, 18, 5, 1, 1, 1, 1 },
- { 692, 206, 72, 28, 17, 5, 1, 1, 1, 1 },
- { 696, 205, 70, 27, 17, 5, 1, 1, 1, 1 },
- { 700, 203, 69, 27, 16, 5, 1, 1, 1, 1 },
- { 704, 201, 68, 26, 16, 5, 1, 1, 1, 1 },
- { 708, 201, 67, 25, 15, 4, 1, 1, 1, 1 },
- { 712, 198, 66, 25, 15, 4, 1, 1, 1, 1 },
- { 716, 197, 65, 24, 14, 4, 1, 1, 1, 1 },
- { 720, 196, 63, 23, 14, 4, 1, 1, 1, 1 },
- { 724, 194, 62, 23, 13, 4, 1, 1, 1, 1 },
- { 728, 193, 61, 22, 13, 3, 1, 1, 1, 1 },
- { 732, 191, 60, 22, 12, 3, 1, 1, 1, 1 },
- { 736, 189, 59, 21, 12, 3, 1, 1, 1, 1 },
- { 740, 188, 58, 20, 11, 3, 1, 1, 1, 1 },
- { 744, 186, 56, 20, 11, 3, 1, 1, 1, 1 },
- { 748, 184, 55, 19, 11, 3, 1, 1, 1, 1 },
- { 752, 182, 54, 19, 10, 3, 1, 1, 1, 1 },
- { 756, 181, 53, 18, 10, 2, 1, 1, 1, 1 },
- { 760, 179, 52, 18, 9, 2, 1, 1, 1, 1 },
- { 764, 177, 51, 17, 9, 2, 1, 1, 1, 1 },
- { 768, 174, 50, 17, 9, 2, 1, 1, 1, 1 },
- { 772, 173, 49, 16, 8, 2, 1, 1, 1, 1 },
- { 776, 171, 47, 16, 8, 2, 1, 1, 1, 1 },
- { 780, 169, 46, 15, 8, 2, 1, 1, 1, 1 },
- { 784, 167, 45, 15, 7, 2, 1, 1, 1, 1 },
- { 788, 165, 44, 14, 7, 2, 1, 1, 1, 1 },
- { 792, 162, 43, 14, 7, 2, 1, 1, 1, 1 },
- { 796, 161, 42, 13, 7, 1, 1, 1, 1, 1 },
- { 800, 159, 41, 13, 6, 1, 1, 1, 1, 1 },
- { 804, 157, 40, 12, 6, 1, 1, 1, 1, 1 },
- { 808, 154, 39, 12, 6, 1, 1, 1, 1, 1 },
- { 812, 153, 38, 11, 5, 1, 1, 1, 1, 1 },
- { 816, 150, 37, 11, 5, 1, 1, 1, 1, 1 },
- { 820, 148, 36, 10, 5, 1, 1, 1, 1, 1 },
- { 824, 145, 35, 10, 5, 1, 1, 1, 1, 1 },
- { 828, 143, 34, 10, 4, 1, 1, 1, 1, 1 },
- { 832, 141, 33, 9, 4, 1, 1, 1, 1, 1 },
- { 836, 138, 32, 9, 4, 1, 1, 1, 1, 1 },
- { 840, 136, 30, 9, 4, 1, 1, 1, 1, 1 },
- { 844, 133, 30, 8, 4, 1, 1, 1, 1, 1 },
- { 848, 131, 29, 8, 3, 1, 1, 1, 1, 1 },
- { 852, 129, 28, 7, 3, 1, 1, 1, 1, 1 },
- { 856, 126, 27, 7, 3, 1, 1, 1, 1, 1 },
- { 860, 123, 26, 7, 3, 1, 1, 1, 1, 1 },
- { 864, 121, 25, 6, 3, 1, 1, 1, 1, 1 },
- { 868, 118, 24, 6, 3, 1, 1, 1, 1, 1 },
- { 872, 116, 23, 6, 2, 1, 1, 1, 1, 1 },
- { 876, 113, 22, 6, 2, 1, 1, 1, 1, 1 },
- { 880, 111, 21, 5, 2, 1, 1, 1, 1, 1 },
- { 884, 108, 20, 5, 2, 1, 1, 1, 1, 1 },
- { 888, 105, 19, 5, 2, 1, 1, 1, 1, 1 },
- { 892, 102, 19, 4, 2, 1, 1, 1, 1, 1 },
- { 896, 99, 18, 4, 2, 1, 1, 1, 1, 1 },
- { 900, 97, 17, 4, 1, 1, 1, 1, 1, 1 },
- { 904, 94, 16, 4, 1, 1, 1, 1, 1, 1 },
- { 908, 92, 15, 3, 1, 1, 1, 1, 1, 1 },
- { 912, 89, 14, 3, 1, 1, 1, 1, 1, 1 },
- { 916, 85, 14, 3, 1, 1, 1, 1, 1, 1 },
- { 920, 82, 13, 3, 1, 1, 1, 1, 1, 1 },
- { 924, 79, 12, 3, 1, 1, 1, 1, 1, 1 },
- { 928, 77, 11, 2, 1, 1, 1, 1, 1, 1 },
- { 932, 73, 11, 2, 1, 1, 1, 1, 1, 1 },
- { 936, 70, 10, 2, 1, 1, 1, 1, 1, 1 },
- { 940, 67, 9, 2, 1, 1, 1, 1, 1, 1 },
- { 944, 64, 8, 2, 1, 1, 1, 1, 1, 1 },
- { 948, 60, 8, 2, 1, 1, 1, 1, 1, 1 },
- { 952, 58, 7, 1, 1, 1, 1, 1, 1, 1 },
- { 956, 54, 7, 1, 1, 1, 1, 1, 1, 1 },
- { 960, 51, 6, 1, 1, 1, 1, 1, 1, 1 },
- { 964, 48, 5, 1, 1, 1, 1, 1, 1, 1 },
- { 968, 44, 5, 1, 1, 1, 1, 1, 1, 1 },
- { 972, 41, 4, 1, 1, 1, 1, 1, 1, 1 },
- { 976, 37, 4, 1, 1, 1, 1, 1, 1, 1 },
- { 980, 34, 3, 1, 1, 1, 1, 1, 1, 1 },
- { 984, 30, 3, 1, 1, 1, 1, 1, 1, 1 },
- { 988, 27, 2, 1, 1, 1, 1, 1, 1, 1 },
- { 992, 23, 2, 1, 1, 1, 1, 1, 1, 1 },
- { 996, 19, 2, 1, 1, 1, 1, 1, 1, 1 },
- { 1000, 16, 1, 1, 1, 1, 1, 1, 1, 1 },
- { 1004, 12, 1, 1, 1, 1, 1, 1, 1, 1 },
- { 1008, 8, 1, 1, 1, 1, 1, 1, 1, 1 },
- { 1012, 4, 1, 1, 1, 1, 1, 1, 1, 1 },
- { 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
- { 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-};
+const aom_cdf_prob
+ av1_pareto8_token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2] = {
+ { 4, 4, 4, 4, 8, 15, 30, 57, 103, 795 },
+ { 8, 8, 8, 8, 15, 30, 57, 103, 168, 619 },
+ { 12, 12, 12, 12, 23, 43, 80, 138, 205, 487 },
+ { 16, 16, 15, 15, 30, 56, 101, 165, 225, 385 },
+ { 20, 20, 19, 19, 36, 68, 119, 186, 231, 306 },
+ { 24, 23, 23, 22, 43, 79, 135, 201, 230, 244 },
+ { 28, 27, 26, 26, 49, 89, 149, 211, 223, 196 },
+ { 32, 31, 30, 29, 55, 98, 160, 218, 212, 159 },
+ { 36, 35, 33, 32, 60, 107, 171, 221, 200, 129 },
+ { 40, 38, 37, 35, 66, 115, 179, 222, 187, 105 },
+ { 44, 42, 40, 38, 71, 122, 186, 221, 174, 86 },
+ { 48, 45, 43, 41, 76, 129, 192, 219, 160, 71 },
+ { 52, 49, 46, 44, 80, 136, 196, 215, 148, 58 },
+ { 56, 53, 49, 46, 85, 142, 200, 210, 135, 48 },
+ { 60, 56, 52, 49, 89, 147, 203, 204, 124, 40 },
+ { 64, 60, 55, 52, 93, 151, 205, 198, 113, 33 },
+ { 68, 63, 58, 54, 97, 156, 205, 192, 103, 28 },
+ { 72, 66, 61, 57, 100, 160, 206, 185, 94, 23 },
+ { 76, 70, 64, 59, 104, 163, 205, 178, 85, 20 },
+ { 80, 73, 67, 61, 107, 166, 205, 171, 77, 17 },
+ { 84, 76, 69, 63, 110, 169, 204, 164, 71, 14 },
+ { 88, 80, 72, 65, 113, 171, 202, 157, 64, 12 },
+ { 92, 83, 75, 67, 116, 173, 200, 150, 58, 10 },
+ { 96, 86, 77, 69, 118, 175, 198, 143, 53, 9 },
+ { 100, 89, 80, 71, 121, 176, 195, 137, 48, 7 },
+ { 104, 92, 82, 73, 123, 178, 192, 130, 44, 6 },
+ { 108, 96, 84, 75, 125, 178, 189, 124, 40, 5 },
+ { 112, 98, 87, 76, 127, 179, 186, 118, 36, 5 },
+ { 116, 101, 89, 78, 129, 179, 183, 112, 33, 4 },
+ { 120, 104, 91, 80, 131, 180, 179, 106, 30, 3 },
+ { 124, 107, 93, 81, 132, 180, 176, 101, 27, 3 },
+ { 128, 110, 95, 82, 134, 179, 172, 96, 25, 3 },
+ { 132, 113, 97, 84, 135, 179, 168, 91, 23, 2 },
+ { 136, 116, 99, 85, 136, 179, 164, 86, 21, 2 },
+ { 140, 119, 101, 86, 137, 178, 160, 82, 19, 2 },
+ { 144, 122, 103, 88, 138, 177, 157, 77, 17, 1 },
+ { 148, 124, 105, 89, 139, 176, 153, 73, 16, 1 },
+ { 152, 127, 107, 90, 140, 175, 149, 69, 14, 1 },
+ { 156, 130, 108, 91, 141, 173, 145, 66, 13, 1 },
+ { 160, 133, 110, 92, 141, 172, 141, 62, 12, 1 },
+ { 164, 135, 111, 93, 142, 171, 137, 59, 11, 1 },
+ { 168, 138, 113, 94, 142, 169, 133, 56, 10, 1 },
+ { 172, 140, 115, 94, 142, 168, 130, 53, 9, 1 },
+ { 176, 143, 116, 95, 143, 166, 126, 50, 8, 1 },
+ { 180, 145, 118, 96, 143, 164, 122, 47, 8, 1 },
+ { 184, 147, 119, 96, 143, 163, 119, 45, 7, 1 },
+ { 188, 150, 120, 97, 143, 161, 116, 42, 6, 1 },
+ { 192, 152, 121, 98, 143, 159, 112, 40, 6, 1 },
+ { 196, 155, 123, 98, 142, 157, 109, 38, 5, 1 },
+ { 200, 157, 124, 99, 142, 155, 105, 36, 5, 1 },
+ { 204, 159, 125, 99, 142, 153, 102, 34, 5, 1 },
+ { 208, 161, 126, 100, 142, 151, 99, 32, 4, 1 },
+ { 212, 164, 127, 100, 141, 149, 96, 30, 4, 1 },
+ { 216, 166, 129, 100, 141, 147, 93, 28, 3, 1 },
+ { 220, 168, 130, 101, 140, 144, 90, 27, 3, 1 },
+ { 224, 170, 131, 101, 140, 142, 87, 25, 3, 1 },
+ { 228, 172, 132, 101, 139, 140, 84, 24, 3, 1 },
+ { 232, 174, 132, 101, 139, 138, 81, 23, 3, 1 },
+ { 236, 176, 133, 101, 138, 136, 79, 22, 2, 1 },
+ { 240, 178, 134, 102, 137, 134, 76, 20, 2, 1 },
+ { 244, 180, 135, 102, 136, 131, 74, 19, 2, 1 },
+ { 248, 182, 135, 102, 136, 129, 71, 18, 2, 1 },
+ { 252, 184, 136, 101, 135, 127, 69, 17, 2, 1 },
+ { 256, 186, 137, 102, 134, 124, 66, 16, 2, 1 },
+ { 260, 188, 138, 102, 133, 122, 64, 15, 1, 1 },
+ { 264, 190, 138, 101, 132, 120, 62, 15, 1, 1 },
+ { 268, 191, 139, 101, 131, 118, 60, 14, 1, 1 },
+ { 272, 193, 139, 101, 130, 116, 58, 13, 1, 1 },
+ { 276, 195, 139, 101, 129, 114, 56, 12, 1, 1 },
+ { 280, 196, 140, 101, 128, 111, 54, 12, 1, 1 },
+ { 284, 198, 140, 101, 127, 109, 52, 11, 1, 1 },
+ { 288, 200, 141, 100, 126, 107, 50, 10, 1, 1 },
+ { 292, 201, 141, 100, 125, 105, 48, 10, 1, 1 },
+ { 296, 203, 141, 100, 123, 103, 47, 9, 1, 1 },
+ { 300, 204, 142, 99, 122, 101, 45, 9, 1, 1 },
+ { 304, 206, 142, 99, 121, 99, 43, 8, 1, 1 },
+ { 308, 207, 142, 99, 119, 97, 42, 8, 1, 1 },
+ { 312, 209, 142, 99, 118, 95, 40, 7, 1, 1 },
+ { 316, 210, 142, 98, 117, 93, 39, 7, 1, 1 },
+ { 320, 211, 142, 98, 116, 91, 37, 7, 1, 1 },
+ { 324, 213, 142, 97, 115, 89, 36, 6, 1, 1 },
+ { 328, 214, 142, 97, 113, 87, 35, 6, 1, 1 },
+ { 332, 215, 143, 96, 112, 85, 33, 6, 1, 1 },
+ { 336, 216, 143, 96, 111, 83, 32, 5, 1, 1 },
+ { 340, 218, 143, 95, 109, 81, 31, 5, 1, 1 },
+ { 344, 219, 142, 95, 108, 79, 30, 5, 1, 1 },
+ { 348, 220, 142, 94, 107, 78, 29, 4, 1, 1 },
+ { 352, 221, 142, 94, 105, 76, 28, 4, 1, 1 },
+ { 356, 222, 142, 93, 104, 74, 27, 4, 1, 1 },
+ { 360, 223, 142, 92, 103, 72, 26, 4, 1, 1 },
+ { 364, 224, 142, 92, 101, 70, 25, 4, 1, 1 },
+ { 368, 225, 142, 91, 100, 69, 24, 3, 1, 1 },
+ { 372, 226, 141, 91, 99, 67, 23, 3, 1, 1 },
+ { 376, 227, 141, 90, 97, 66, 22, 3, 1, 1 },
+ { 380, 228, 141, 89, 96, 64, 21, 3, 1, 1 },
+ { 384, 229, 140, 89, 95, 62, 20, 3, 1, 1 },
+ { 388, 229, 140, 88, 93, 61, 20, 3, 1, 1 },
+ { 392, 230, 140, 87, 92, 60, 19, 2, 1, 1 },
+ { 396, 231, 140, 86, 91, 58, 18, 2, 1, 1 },
+ { 400, 232, 139, 86, 89, 57, 17, 2, 1, 1 },
+ { 404, 232, 139, 85, 88, 55, 17, 2, 1, 1 },
+ { 408, 233, 138, 84, 87, 54, 16, 2, 1, 1 },
+ { 412, 234, 138, 84, 85, 52, 15, 2, 1, 1 },
+ { 416, 234, 137, 83, 84, 51, 15, 2, 1, 1 },
+ { 420, 235, 137, 82, 82, 50, 14, 2, 1, 1 },
+ { 424, 236, 136, 81, 81, 48, 14, 2, 1, 1 },
+ { 428, 236, 136, 81, 80, 47, 13, 1, 1, 1 },
+ { 432, 236, 135, 80, 79, 46, 13, 1, 1, 1 },
+ { 436, 237, 135, 79, 77, 45, 12, 1, 1, 1 },
+ { 440, 238, 134, 78, 76, 43, 12, 1, 1, 1 },
+ { 444, 238, 134, 77, 75, 42, 11, 1, 1, 1 },
+ { 448, 238, 133, 77, 73, 41, 11, 1, 1, 1 },
+ { 452, 239, 132, 76, 72, 40, 10, 1, 1, 1 },
+ { 456, 239, 131, 75, 71, 39, 10, 1, 1, 1 },
+ { 460, 239, 131, 74, 70, 38, 9, 1, 1, 1 },
+ { 464, 240, 130, 73, 68, 37, 9, 1, 1, 1 },
+ { 468, 240, 129, 72, 67, 36, 9, 1, 1, 1 },
+ { 472, 240, 128, 72, 66, 35, 8, 1, 1, 1 },
+ { 476, 240, 127, 71, 65, 34, 8, 1, 1, 1 },
+ { 480, 240, 127, 70, 63, 33, 8, 1, 1, 1 },
+ { 484, 241, 126, 69, 62, 32, 7, 1, 1, 1 },
+ { 488, 241, 125, 68, 61, 31, 7, 1, 1, 1 },
+ { 492, 241, 124, 67, 60, 30, 7, 1, 1, 1 },
+ { 496, 241, 124, 66, 59, 29, 6, 1, 1, 1 },
+ { 500, 240, 123, 66, 58, 28, 6, 1, 1, 1 },
+ { 504, 240, 122, 65, 57, 27, 6, 1, 1, 1 },
+ { 508, 240, 121, 64, 55, 27, 6, 1, 1, 1 },
+ { 512, 241, 120, 63, 54, 26, 5, 1, 1, 1 },
+ { 516, 241, 119, 62, 53, 25, 5, 1, 1, 1 },
+ { 520, 240, 118, 62, 52, 24, 5, 1, 1, 1 },
+ { 524, 240, 117, 60, 51, 24, 5, 1, 1, 1 },
+ { 528, 239, 116, 60, 50, 23, 5, 1, 1, 1 },
+ { 532, 239, 116, 59, 49, 22, 4, 1, 1, 1 },
+ { 536, 239, 115, 58, 48, 21, 4, 1, 1, 1 },
+ { 540, 239, 113, 57, 47, 21, 4, 1, 1, 1 },
+ { 544, 238, 113, 56, 46, 20, 4, 1, 1, 1 },
+ { 548, 238, 112, 55, 45, 19, 4, 1, 1, 1 },
+ { 552, 238, 110, 55, 44, 19, 3, 1, 1, 1 },
+ { 556, 237, 110, 54, 43, 18, 3, 1, 1, 1 },
+ { 560, 237, 108, 53, 42, 18, 3, 1, 1, 1 },
+ { 564, 236, 108, 52, 41, 17, 3, 1, 1, 1 },
+ { 568, 236, 106, 51, 40, 17, 3, 1, 1, 1 },
+ { 572, 235, 105, 51, 39, 16, 3, 1, 1, 1 },
+ { 576, 235, 104, 50, 38, 15, 3, 1, 1, 1 },
+ { 580, 234, 103, 49, 37, 15, 3, 1, 1, 1 },
+ { 584, 234, 102, 48, 37, 14, 2, 1, 1, 1 },
+ { 588, 233, 101, 47, 36, 14, 2, 1, 1, 1 },
+ { 592, 233, 100, 46, 35, 13, 2, 1, 1, 1 },
+ { 596, 231, 99, 46, 34, 13, 2, 1, 1, 1 },
+ { 600, 230, 98, 45, 33, 13, 2, 1, 1, 1 },
+ { 604, 230, 97, 44, 32, 12, 2, 1, 1, 1 },
+ { 608, 229, 96, 43, 31, 12, 2, 1, 1, 1 },
+ { 612, 228, 95, 42, 31, 11, 2, 1, 1, 1 },
+ { 616, 227, 93, 42, 30, 11, 2, 1, 1, 1 },
+ { 620, 227, 92, 41, 29, 10, 2, 1, 1, 1 },
+ { 624, 226, 92, 40, 28, 10, 1, 1, 1, 1 },
+ { 628, 225, 90, 39, 28, 10, 1, 1, 1, 1 },
+ { 632, 224, 89, 39, 27, 9, 1, 1, 1, 1 },
+ { 636, 223, 88, 38, 26, 9, 1, 1, 1, 1 },
+ { 640, 222, 87, 37, 25, 9, 1, 1, 1, 1 },
+ { 644, 221, 86, 36, 25, 8, 1, 1, 1, 1 },
+ { 648, 220, 84, 36, 24, 8, 1, 1, 1, 1 },
+ { 652, 219, 83, 35, 23, 8, 1, 1, 1, 1 },
+ { 656, 218, 82, 34, 23, 7, 1, 1, 1, 1 },
+ { 660, 217, 81, 33, 22, 7, 1, 1, 1, 1 },
+ { 664, 215, 80, 33, 21, 7, 1, 1, 1, 1 },
+ { 668, 214, 78, 32, 21, 7, 1, 1, 1, 1 },
+ { 672, 213, 78, 31, 20, 6, 1, 1, 1, 1 },
+ { 676, 211, 76, 31, 20, 6, 1, 1, 1, 1 },
+ { 680, 210, 75, 30, 19, 6, 1, 1, 1, 1 },
+ { 684, 209, 74, 29, 18, 6, 1, 1, 1, 1 },
+ { 688, 208, 73, 28, 18, 5, 1, 1, 1, 1 },
+ { 692, 206, 72, 28, 17, 5, 1, 1, 1, 1 },
+ { 696, 205, 70, 27, 17, 5, 1, 1, 1, 1 },
+ { 700, 203, 69, 27, 16, 5, 1, 1, 1, 1 },
+ { 704, 201, 68, 26, 16, 5, 1, 1, 1, 1 },
+ { 708, 201, 67, 25, 15, 4, 1, 1, 1, 1 },
+ { 712, 198, 66, 25, 15, 4, 1, 1, 1, 1 },
+ { 716, 197, 65, 24, 14, 4, 1, 1, 1, 1 },
+ { 720, 196, 63, 23, 14, 4, 1, 1, 1, 1 },
+ { 724, 194, 62, 23, 13, 4, 1, 1, 1, 1 },
+ { 728, 193, 61, 22, 13, 3, 1, 1, 1, 1 },
+ { 732, 191, 60, 22, 12, 3, 1, 1, 1, 1 },
+ { 736, 189, 59, 21, 12, 3, 1, 1, 1, 1 },
+ { 740, 188, 58, 20, 11, 3, 1, 1, 1, 1 },
+ { 744, 186, 56, 20, 11, 3, 1, 1, 1, 1 },
+ { 748, 184, 55, 19, 11, 3, 1, 1, 1, 1 },
+ { 752, 182, 54, 19, 10, 3, 1, 1, 1, 1 },
+ { 756, 181, 53, 18, 10, 2, 1, 1, 1, 1 },
+ { 760, 179, 52, 18, 9, 2, 1, 1, 1, 1 },
+ { 764, 177, 51, 17, 9, 2, 1, 1, 1, 1 },
+ { 768, 174, 50, 17, 9, 2, 1, 1, 1, 1 },
+ { 772, 173, 49, 16, 8, 2, 1, 1, 1, 1 },
+ { 776, 171, 47, 16, 8, 2, 1, 1, 1, 1 },
+ { 780, 169, 46, 15, 8, 2, 1, 1, 1, 1 },
+ { 784, 167, 45, 15, 7, 2, 1, 1, 1, 1 },
+ { 788, 165, 44, 14, 7, 2, 1, 1, 1, 1 },
+ { 792, 162, 43, 14, 7, 2, 1, 1, 1, 1 },
+ { 796, 161, 42, 13, 7, 1, 1, 1, 1, 1 },
+ { 800, 159, 41, 13, 6, 1, 1, 1, 1, 1 },
+ { 804, 157, 40, 12, 6, 1, 1, 1, 1, 1 },
+ { 808, 154, 39, 12, 6, 1, 1, 1, 1, 1 },
+ { 812, 153, 38, 11, 5, 1, 1, 1, 1, 1 },
+ { 816, 150, 37, 11, 5, 1, 1, 1, 1, 1 },
+ { 820, 148, 36, 10, 5, 1, 1, 1, 1, 1 },
+ { 824, 145, 35, 10, 5, 1, 1, 1, 1, 1 },
+ { 828, 143, 34, 10, 4, 1, 1, 1, 1, 1 },
+ { 832, 141, 33, 9, 4, 1, 1, 1, 1, 1 },
+ { 836, 138, 32, 9, 4, 1, 1, 1, 1, 1 },
+ { 840, 136, 30, 9, 4, 1, 1, 1, 1, 1 },
+ { 844, 133, 30, 8, 4, 1, 1, 1, 1, 1 },
+ { 848, 131, 29, 8, 3, 1, 1, 1, 1, 1 },
+ { 852, 129, 28, 7, 3, 1, 1, 1, 1, 1 },
+ { 856, 126, 27, 7, 3, 1, 1, 1, 1, 1 },
+ { 860, 123, 26, 7, 3, 1, 1, 1, 1, 1 },
+ { 864, 121, 25, 6, 3, 1, 1, 1, 1, 1 },
+ { 868, 118, 24, 6, 3, 1, 1, 1, 1, 1 },
+ { 872, 116, 23, 6, 2, 1, 1, 1, 1, 1 },
+ { 876, 113, 22, 6, 2, 1, 1, 1, 1, 1 },
+ { 880, 111, 21, 5, 2, 1, 1, 1, 1, 1 },
+ { 884, 108, 20, 5, 2, 1, 1, 1, 1, 1 },
+ { 888, 105, 19, 5, 2, 1, 1, 1, 1, 1 },
+ { 892, 102, 19, 4, 2, 1, 1, 1, 1, 1 },
+ { 896, 99, 18, 4, 2, 1, 1, 1, 1, 1 },
+ { 900, 97, 17, 4, 1, 1, 1, 1, 1, 1 },
+ { 904, 94, 16, 4, 1, 1, 1, 1, 1, 1 },
+ { 908, 92, 15, 3, 1, 1, 1, 1, 1, 1 },
+ { 912, 89, 14, 3, 1, 1, 1, 1, 1, 1 },
+ { 916, 85, 14, 3, 1, 1, 1, 1, 1, 1 },
+ { 920, 82, 13, 3, 1, 1, 1, 1, 1, 1 },
+ { 924, 79, 12, 3, 1, 1, 1, 1, 1, 1 },
+ { 928, 77, 11, 2, 1, 1, 1, 1, 1, 1 },
+ { 932, 73, 11, 2, 1, 1, 1, 1, 1, 1 },
+ { 936, 70, 10, 2, 1, 1, 1, 1, 1, 1 },
+ { 940, 67, 9, 2, 1, 1, 1, 1, 1, 1 },
+ { 944, 64, 8, 2, 1, 1, 1, 1, 1, 1 },
+ { 948, 60, 8, 2, 1, 1, 1, 1, 1, 1 },
+ { 952, 58, 7, 1, 1, 1, 1, 1, 1, 1 },
+ { 956, 54, 7, 1, 1, 1, 1, 1, 1, 1 },
+ { 960, 51, 6, 1, 1, 1, 1, 1, 1, 1 },
+ { 964, 48, 5, 1, 1, 1, 1, 1, 1, 1 },
+ { 968, 44, 5, 1, 1, 1, 1, 1, 1, 1 },
+ { 972, 41, 4, 1, 1, 1, 1, 1, 1, 1 },
+ { 976, 37, 4, 1, 1, 1, 1, 1, 1, 1 },
+ { 980, 34, 3, 1, 1, 1, 1, 1, 1, 1 },
+ { 984, 30, 3, 1, 1, 1, 1, 1, 1, 1 },
+ { 988, 27, 2, 1, 1, 1, 1, 1, 1, 1 },
+ { 992, 23, 2, 1, 1, 1, 1, 1, 1, 1 },
+ { 996, 19, 2, 1, 1, 1, 1, 1, 1, 1 },
+ { 1000, 16, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1004, 12, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1008, 8, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1012, 4, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+ };
#endif // CONFIG_ANS
/* clang-format off */
@@ -2801,6 +2802,15 @@
}
#if CONFIG_ANS
+static void build_token_cdfs(const aom_prob *pdf_model,
+ aom_cdf_prob cdf[ENTROPY_TOKENS]) {
+ int i, sum = 0;
+ assert(pdf_model[2] != 0);
+ for (i = 0; i < ENTROPY_TOKENS - 2; ++i) {
+ cdf[i] = sum += av1_pareto8_token_probs[pdf_model[2] - 1][i];
+ }
+}
+
void av1_coef_pareto_cdfs(FRAME_CONTEXT *fc) {
TX_SIZE t;
int i, j, k, l;
@@ -2809,11 +2819,8 @@
for (j = 0; j < REF_TYPES; ++j)
for (k = 0; k < COEF_BANDS; ++k)
for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
- const aom_prob *const tree_probs = fc->coef_probs[t][i][j][k][l];
- aom_prob pivot = tree_probs[PIVOT_NODE];
- assert(pivot != 0);
- aom_rans_build_cdf_from_pdf(av1_pareto8_token_probs[pivot - 1],
- fc->coef_cdfs[t][i][j][k][l]);
+ build_token_cdfs(fc->coef_probs[t][i][j][k][l],
+ fc->coef_cdfs[t][i][j][k][l]);
}
}
#endif // CONFIG_ANS
diff --git a/av1/common/entropy.h b/av1/common/entropy.h
index f0727c0..fd68e82 100644
--- a/av1/common/entropy.h
+++ b/av1/common/entropy.h
@@ -191,10 +191,10 @@
extern const aom_tree_index av1_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)];
extern const aom_prob av1_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
#if CONFIG_ANS
-extern const AnsP10 av1_pareto8_token_probs[COEFF_PROB_MODELS]
- [ENTROPY_TOKENS - 2];
-
-typedef rans_lut coeff_cdf_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS];
+typedef aom_cdf_prob coeff_cdf_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
+ [ENTROPY_TOKENS];
+extern const aom_cdf_prob av1_pareto8_token_probs[COEFF_PROB_MODELS]
+ [ENTROPY_TOKENS - 2];
#endif // CONFIG_ANS
typedef aom_prob av1_coeff_probs_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
diff --git a/av1/common/idct.c b/av1/common/idct.c
index 328f360..eedbc79 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -33,6 +33,9 @@
return txsize_sqr_up_map[tx_size] == TX_32X32;
}
+// NOTE: The implementation of all inverses need to be aware of the fact
+// that input and output could be the same buffer.
+
#if CONFIG_EXT_TX
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
int i;
@@ -56,17 +59,17 @@
for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
}
-// For use in lieu of DST
+// For use in lieu of ADST
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
int i;
tran_low_t inputhalf[16];
- for (i = 0; i < 16; ++i) {
- output[i] = input[16 + i] * 4;
- }
// Multiply input by sqrt(2)
for (i = 0; i < 16; ++i) {
inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
}
+ for (i = 0; i < 16; ++i) {
+ output[i] = input[16 + i] * 4;
+ }
idct16_c(inputhalf, output + 16);
// Note overall scaling factor is 4 times orthogonal
}
@@ -106,14 +109,14 @@
int bd) {
int i;
tran_low_t inputhalf[16];
- for (i = 0; i < 16; ++i) {
- output[i] = input[16 + i] * 4;
- }
// Multiply input by sqrt(2)
for (i = 0; i < 16; ++i) {
inputhalf[i] =
HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
}
+ for (i = 0; i < 16; ++i) {
+ output[i] = input[16 + i] * 4;
+ }
aom_highbd_idct16_c(inputhalf, output + 16, bd);
// Note overall scaling factor is 4 times orthogonal
}
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index 6a0769c..9a40f69 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -75,9 +75,9 @@
fc->coef_probs[tx_size_ctx][type][ref];
const aom_prob *prob;
#if CONFIG_ANS
- const rans_lut(*coef_cdfs)[COEFF_CONTEXTS] =
+ const aom_cdf_prob(*const coef_cdfs)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
fc->coef_cdfs[tx_size_ctx][type][ref];
- const rans_lut *cdf;
+ const aom_cdf_prob(*cdf)[ENTROPY_TOKENS];
#endif // CONFIG_ANS
unsigned int(*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
unsigned int(*eob_branch_count)[COEFF_CONTEXTS];
@@ -166,7 +166,8 @@
}
#if CONFIG_ANS
cdf = &coef_cdfs[band][ctx];
- token = ONE_TOKEN + rans_read(r, *cdf);
+ token =
+ ONE_TOKEN + aom_read_symbol(r, *cdf, CATEGORY6_TOKEN - ONE_TOKEN + 1);
INCREMENT_COUNT(ONE_TOKEN + (token > ONE_TOKEN));
switch (token) {
case ONE_TOKEN:
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 91eeeaa..aaffebb 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -657,11 +657,8 @@
aom_write(w, t != ZERO_TOKEN, p->context_tree[1]);
if (t != ZERO_TOKEN) {
- struct rans_sym s;
- const rans_lut *token_cdf = p->token_cdf;
- s.cum_prob = (*token_cdf)[t - ONE_TOKEN];
- s.prob = (*token_cdf)[t - ONE_TOKEN + 1] - s.cum_prob;
- buf_rans_write(w, &s);
+ aom_write_symbol(w, t - ONE_TOKEN, *p->token_cdf,
+ CATEGORY6_TOKEN - ONE_TOKEN + 1);
}
}
#else
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 983f8cc..90b0416 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3086,15 +3086,15 @@
cpi->lst_fb_idxes[ref_frame] = cpi->lst_fb_idxes[ref_frame - 1];
// [0] is allocated to the current coded frame. The statistics for the
- // reference frames start at [1].
+ // reference frames start at [LAST_FRAME], i.e. [1].
if (!cpi->rc.is_src_frame_alt_ref) {
- memcpy(cpi->interp_filter_selected[ref_frame + 1],
- cpi->interp_filter_selected[ref_frame],
- sizeof(cpi->interp_filter_selected[ref_frame]));
+ memcpy(cpi->interp_filter_selected[ref_frame + LAST_FRAME],
+ cpi->interp_filter_selected[ref_frame - 1 + LAST_FRAME],
+ sizeof(cpi->interp_filter_selected[ref_frame - 1 + LAST_FRAME]));
}
}
}
-#endif
+#endif // CONFIG_EXT_REFS
void av1_update_reference_frames(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
@@ -3181,14 +3181,12 @@
int tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1];
shift_last_ref_frames(cpi);
-
cpi->lst_fb_idxes[0] = cpi->bwd_fb_idx;
- if (!cpi->rc.is_src_frame_alt_ref) {
- memcpy(cpi->interp_filter_selected[0],
- cpi->interp_filter_selected[BWDREF_FRAME],
- sizeof(cpi->interp_filter_selected[BWDREF_FRAME]));
- }
cpi->bwd_fb_idx = tmp;
+
+ memcpy(cpi->interp_filter_selected[LAST_FRAME],
+ cpi->interp_filter_selected[BWDREF_FRAME],
+ sizeof(cpi->interp_filter_selected[BWDREF_FRAME]));
} else if (cpi->rc.is_src_frame_ext_arf && cm->show_existing_frame) {
// Deal with the special case for showing existing internal ALTREF_FRAME
// Refresh the LAST_FRAME with the ALTREF_FRAME and retire the LAST3_FRAME
@@ -3198,15 +3196,15 @@
int tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1];
shift_last_ref_frames(cpi);
-
cpi->lst_fb_idxes[0] = cpi->alt_fb_idx;
+ cpi->alt_fb_idx = tmp;
+
+ // We need to modify the mapping accordingly
+ cpi->arf_map[which_arf] = cpi->alt_fb_idx;
+
memcpy(cpi->interp_filter_selected[LAST_FRAME],
cpi->interp_filter_selected[ALTREF_FRAME + which_arf],
sizeof(cpi->interp_filter_selected[ALTREF_FRAME + which_arf]));
-
- cpi->alt_fb_idx = tmp;
- // We need to modify the mapping accordingly
- cpi->arf_map[which_arf] = cpi->alt_fb_idx;
#endif // CONFIG_EXT_REFS
} else { /* For non key/golden frames */
if (cpi->refresh_alt_ref_frame) {
@@ -3241,22 +3239,12 @@
uref_cnt_fb(cpi->upsampled_ref_bufs,
&cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
- if (!cpi->rc.is_src_frame_alt_ref) {
+#if !CONFIG_EXT_REFS
+ if (!cpi->rc.is_src_frame_alt_ref)
+#endif // !CONFIG_EXT_REFS
memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
cpi->interp_filter_selected[0],
sizeof(cpi->interp_filter_selected[0]));
- } else {
- int which_arf = 0;
-#if CONFIG_EXT_REFS
- if (cpi->oxcf.pass == 2) {
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- which_arf = gf_group->arf_update_idx[gf_group->index];
- }
-#endif // CONFIG_EXT_REFS
- memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
- cpi->interp_filter_selected[ALTREF_FRAME + which_arf],
- sizeof(cpi->interp_filter_selected[ALTREF_FRAME + which_arf]));
- }
}
#if CONFIG_EXT_REFS
@@ -3271,6 +3259,7 @@
cpi->alt_fb_idx = cpi->bwd_fb_idx;
cpi->bwd_fb_idx = tmp;
}
+
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->bwd_fb_idx],
cm->new_fb_idx);
if (use_upsampled_ref)
@@ -3354,20 +3343,14 @@
tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1];
shift_last_ref_frames(cpi);
-
cpi->lst_fb_idxes[0] = tmp;
- if (!cpi->rc.is_src_frame_alt_ref) {
- if (cm->show_existing_frame) {
- memcpy(cpi->interp_filter_selected[LAST_FRAME],
- cpi->interp_filter_selected[BWDREF_FRAME],
- sizeof(cpi->interp_filter_selected[BWDREF_FRAME]));
- } else {
- memcpy(cpi->interp_filter_selected[LAST_FRAME],
- cpi->interp_filter_selected[0],
- sizeof(cpi->interp_filter_selected[0]));
- }
- }
+ assert(cm->show_existing_frame == 0);
+ // NOTE: Currently only LF_UPDATE and INTNL_OVERLAY_UPDATE frames are to
+ // refresh the LAST_FRAME.
+ memcpy(cpi->interp_filter_selected[LAST_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
}
#else
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 5821d3f..9fdf540 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -2585,10 +2585,11 @@
if (cpi->num_extra_arfs) {
int tmp = cpi->bwd_fb_idx;
- cpi->rc.is_bwd_ref_frame = 1;
cpi->bwd_fb_idx = cpi->alt_fb_idx;
cpi->alt_fb_idx = cpi->arf_map[0];
cpi->arf_map[0] = tmp;
+
+ cpi->rc.is_bwd_ref_frame = 1;
} else {
cpi->rc.is_bwd_ref_frame = 0;
}
@@ -2639,11 +2640,13 @@
// NOTE: The indices will be swapped back after this frame is encoded
// (in av1_update_reference_frames()).
int tmp = cpi->bwd_fb_idx;
+
cpi->bwd_fb_idx = cpi->alt_fb_idx;
cpi->alt_fb_idx = cpi->arf_map[0];
cpi->arf_map[0] = tmp;
}
break;
+
case LAST_BIPRED_UPDATE:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c
index 9589a48..1103c4b 100644
--- a/av1/encoder/hybrid_fwd_txfm.c
+++ b/av1/encoder/hybrid_fwd_txfm.c
@@ -180,16 +180,14 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- av1_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
+ av1_fht32x32(src_diff, coeff, diff_stride, tx_type);
break;
case V_DCT:
case H_DCT:
case V_ADST:
case H_ADST:
case V_FLIPADST:
- case H_FLIPADST:
- av1_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
- break;
+ case H_FLIPADST: av1_fht32x32(src_diff, coeff, diff_stride, tx_type); break;
case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type); break;
#endif // CONFIG_EXT_TX
default: assert(0); break;
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index d659607..8095681 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -387,7 +387,7 @@
static INLINE void add_token(TOKENEXTRA **t, const aom_prob *context_tree,
#if CONFIG_ANS
- const rans_lut *token_cdf,
+ const aom_cdf_prob (*token_cdf)[ENTROPY_TOKENS],
#endif // CONFIG_ANS
int32_t extra, uint8_t token,
uint8_t skip_eob_node, unsigned int *counts) {
@@ -402,17 +402,6 @@
++counts[token];
}
-static INLINE void add_token_no_extra(TOKENEXTRA **t,
- const aom_prob *context_tree,
- uint8_t token, uint8_t skip_eob_node,
- unsigned int *counts) {
- (*t)->token = token;
- (*t)->context_tree = context_tree;
- (*t)->skip_eob_node = skip_eob_node;
- (*t)++;
- ++counts[token];
-}
-
static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
TX_SIZE tx_size) {
const int eob_max = num_4x4_blocks_txsize_lookup[tx_size] << 4;
@@ -498,8 +487,8 @@
cpi->common.fc->coef_probs[txsize_sqr_map[tx_size]][type][ref];
#endif // CONFIG_ENTROPY
#if CONFIG_ANS
- rans_lut(*const coef_cdfs)[COEFF_CONTEXTS] =
- cpi->common.fc->coef_cdfs[txsize_sqr_map[tx_size]][type][ref];
+ aom_cdf_prob(*const coef_cdfs)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
+ cpi->common.fc->coef_cdfs[tx_size][type][ref];
#endif // CONFIG_ANS
unsigned int(*const eob_branch)[COEFF_CONTEXTS] =
td->counts->eob_branch[txsize_sqr_map[tx_size]][type][ref];
@@ -522,7 +511,7 @@
add_token(&t, coef_probs[band[c]][pt],
#if CONFIG_ANS
- (const rans_lut *)&coef_cdfs[band[c]][pt],
+ (const aom_cdf_prob(*)[ENTROPY_TOKENS]) & coef_cdfs[band[c]][pt],
#endif // CONFIG_ANS
extra, (uint8_t)token, (uint8_t)skip_eob, counts[band[c]][pt]);
@@ -532,8 +521,11 @@
skip_eob = (token == ZERO_TOKEN);
}
if (c < seg_eob) {
- add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN, 0,
- counts[band[c]][pt]);
+ add_token(&t, coef_probs[band[c]][pt],
+#if CONFIG_ANS || CONFIG_DAALA_EC
+ NULL,
+#endif
+ 0, EOB_TOKEN, 0, counts[band[c]][pt]);
++eob_branch[band[c]][pt];
}
diff --git a/av1/encoder/tokenize.h b/av1/encoder/tokenize.h
index 520e1b6..f20848a 100644
--- a/av1/encoder/tokenize.h
+++ b/av1/encoder/tokenize.h
@@ -37,7 +37,7 @@
typedef struct {
const aom_prob *context_tree;
#if CONFIG_ANS
- const rans_lut *token_cdf;
+ const aom_cdf_prob (*token_cdf)[ENTROPY_TOKENS];
#endif // CONFIG_ANS
EXTRABIT extra;
uint8_t token;
diff --git a/av1/encoder/x86/hybrid_fwd_txfm_avx2.c b/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
index b23d39d..69bf89a 100644
--- a/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
+++ b/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
@@ -25,8 +25,7 @@
*u = _mm256_permute2x128_si256(v, v, 1);
}
-void aom_fdct16x16_1_avx2(const int16_t *input, tran_low_t *output,
- int stride) {
+static int32_t get_16x16_sum(const int16_t *input, int stride) {
__m256i r0, r1, r2, r3, u0, u1;
__m256i zero = _mm256_setzero_si256();
__m256i sum = _mm256_setzero_si256();
@@ -61,8 +60,14 @@
_mm256_castsi256_si128(u1));
v1 = _mm_srli_si128(v0, 4);
v0 = _mm_add_epi32(v0, v1);
- v0 = _mm_srai_epi32(v0, 1);
- output[0] = (tran_low_t)_mm_extract_epi32(v0, 0);
+ return (int32_t)_mm_extract_epi32(v0, 0);
+}
+
+void aom_fdct16x16_1_avx2(const int16_t *input, tran_low_t *output,
+ int stride) {
+ int32_t dc = get_16x16_sum(input, stride);
+ output[0] = (tran_low_t)(dc >> 1);
+ _mm256_zeroupper();
}
static void mm256_transpose_16x16(__m256i *in) {
@@ -559,8 +564,6 @@
x1 = _mm256_unpackhi_epi16(u3, u4);
in[13] = butter_fly(x0, x1, cospi_p06_p26);
in[3] = butter_fly(x0, x1, cospi_m26_p06);
-
- mm256_transpose_16x16(in);
}
void fadst16_avx2(__m256i *in) {
@@ -1105,8 +1108,6 @@
in[3] = _mm256_sub_epi16(zero, x4);
in[13] = _mm256_sub_epi16(zero, x13);
in[15] = _mm256_sub_epi16(zero, x1);
-
- mm256_transpose_16x16(in);
}
#if CONFIG_EXT_TX
@@ -1134,7 +1135,6 @@
in[i] = _mm256_packs_epi32(u0, u1);
i++;
}
- mm256_transpose_16x16(in);
}
#endif
@@ -1146,24 +1146,28 @@
case DCT_DCT:
load_buffer_16x16(input, stride, 0, 0, in);
fdct16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fdct16_avx2(in);
break;
case ADST_DCT:
load_buffer_16x16(input, stride, 0, 0, in);
fadst16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fdct16_avx2(in);
break;
case DCT_ADST:
load_buffer_16x16(input, stride, 0, 0, in);
fdct16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
case ADST_ADST:
load_buffer_16x16(input, stride, 0, 0, in);
fadst16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
@@ -1171,71 +1175,698 @@
case FLIPADST_DCT:
load_buffer_16x16(input, stride, 1, 0, in);
fadst16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fdct16_avx2(in);
break;
case DCT_FLIPADST:
load_buffer_16x16(input, stride, 0, 1, in);
fdct16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
case FLIPADST_FLIPADST:
load_buffer_16x16(input, stride, 1, 1, in);
fadst16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
case ADST_FLIPADST:
load_buffer_16x16(input, stride, 0, 1, in);
fadst16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
case FLIPADST_ADST:
load_buffer_16x16(input, stride, 1, 0, in);
fadst16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
case V_DCT:
load_buffer_16x16(input, stride, 0, 0, in);
fdct16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fidtx16_avx2(in);
break;
case H_DCT:
load_buffer_16x16(input, stride, 0, 0, in);
fidtx16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fdct16_avx2(in);
break;
case V_ADST:
load_buffer_16x16(input, stride, 0, 0, in);
fadst16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fidtx16_avx2(in);
break;
case H_ADST:
load_buffer_16x16(input, stride, 0, 0, in);
fidtx16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
case V_FLIPADST:
load_buffer_16x16(input, stride, 1, 0, in);
fadst16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fidtx16_avx2(in);
break;
case H_FLIPADST:
load_buffer_16x16(input, stride, 0, 1, in);
fidtx16_avx2(in);
+ mm256_transpose_16x16(in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
#endif // CONFIG_EXT_TX
default: assert(0); break;
}
+ mm256_transpose_16x16(in);
write_buffer_16x16(in, 16, output);
+ _mm256_zeroupper();
+}
+
+void aom_fdct32x32_1_avx2(const int16_t *input, tran_low_t *output,
+ int stride) {
+ // left and upper corner
+ int32_t sum = get_16x16_sum(input, stride);
+ // right and upper corner
+ sum += get_16x16_sum(input + 16, stride);
+ // left and lower corner
+ sum += get_16x16_sum(input + (stride << 4), stride);
+ // right and lower corner
+ sum += get_16x16_sum(input + (stride << 4) + 16, stride);
+
+ sum >>= 3;
+ output[0] = (tran_low_t)sum;
+ _mm256_zeroupper();
+}
+
+#if CONFIG_EXT_TX
+static void mm256_vectors_swap(__m256i *a0, __m256i *a1, const int size) {
+ int i = 0;
+ __m256i temp;
+ while (i < size) {
+ temp = a0[i];
+ a0[i] = a1[i];
+ a1[i] = temp;
+ i++;
+ }
+}
+
+static void mm256_transpose_32x32(__m256i *in0, __m256i *in1) {
+ mm256_transpose_16x16(in0);
+ mm256_transpose_16x16(&in0[16]);
+ mm256_transpose_16x16(in1);
+ mm256_transpose_16x16(&in1[16]);
+ mm256_vectors_swap(&in0[16], in1, 16);
+}
+
+static void prepare_16x16_even(const __m256i *in, __m256i *even) {
+ even[0] = _mm256_add_epi16(in[0], in[31]);
+ even[1] = _mm256_add_epi16(in[1], in[30]);
+ even[2] = _mm256_add_epi16(in[2], in[29]);
+ even[3] = _mm256_add_epi16(in[3], in[28]);
+ even[4] = _mm256_add_epi16(in[4], in[27]);
+ even[5] = _mm256_add_epi16(in[5], in[26]);
+ even[6] = _mm256_add_epi16(in[6], in[25]);
+ even[7] = _mm256_add_epi16(in[7], in[24]);
+ even[8] = _mm256_add_epi16(in[8], in[23]);
+ even[9] = _mm256_add_epi16(in[9], in[22]);
+ even[10] = _mm256_add_epi16(in[10], in[21]);
+ even[11] = _mm256_add_epi16(in[11], in[20]);
+ even[12] = _mm256_add_epi16(in[12], in[19]);
+ even[13] = _mm256_add_epi16(in[13], in[18]);
+ even[14] = _mm256_add_epi16(in[14], in[17]);
+ even[15] = _mm256_add_epi16(in[15], in[16]);
+}
+
+static void prepare_16x16_odd(const __m256i *in, __m256i *odd) {
+ odd[0] = _mm256_sub_epi16(in[15], in[16]);
+ odd[1] = _mm256_sub_epi16(in[14], in[17]);
+ odd[2] = _mm256_sub_epi16(in[13], in[18]);
+ odd[3] = _mm256_sub_epi16(in[12], in[19]);
+ odd[4] = _mm256_sub_epi16(in[11], in[20]);
+ odd[5] = _mm256_sub_epi16(in[10], in[21]);
+ odd[6] = _mm256_sub_epi16(in[9], in[22]);
+ odd[7] = _mm256_sub_epi16(in[8], in[23]);
+ odd[8] = _mm256_sub_epi16(in[7], in[24]);
+ odd[9] = _mm256_sub_epi16(in[6], in[25]);
+ odd[10] = _mm256_sub_epi16(in[5], in[26]);
+ odd[11] = _mm256_sub_epi16(in[4], in[27]);
+ odd[12] = _mm256_sub_epi16(in[3], in[28]);
+ odd[13] = _mm256_sub_epi16(in[2], in[29]);
+ odd[14] = _mm256_sub_epi16(in[1], in[30]);
+ odd[15] = _mm256_sub_epi16(in[0], in[31]);
+}
+
+static void collect_16col(const __m256i *even, const __m256i *odd,
+ __m256i *out) {
+ // fdct16_avx2() already maps the output
+ out[0] = even[0];
+ out[2] = even[1];
+ out[4] = even[2];
+ out[6] = even[3];
+ out[8] = even[4];
+ out[10] = even[5];
+ out[12] = even[6];
+ out[14] = even[7];
+ out[16] = even[8];
+ out[18] = even[9];
+ out[20] = even[10];
+ out[22] = even[11];
+ out[24] = even[12];
+ out[26] = even[13];
+ out[28] = even[14];
+ out[30] = even[15];
+
+ out[1] = odd[0];
+ out[17] = odd[1];
+ out[9] = odd[2];
+ out[25] = odd[3];
+ out[5] = odd[4];
+ out[21] = odd[5];
+ out[13] = odd[6];
+ out[29] = odd[7];
+ out[3] = odd[8];
+ out[19] = odd[9];
+ out[11] = odd[10];
+ out[27] = odd[11];
+ out[7] = odd[12];
+ out[23] = odd[13];
+ out[15] = odd[14];
+ out[31] = odd[15];
+}
+
+static void collect_coeffs(const __m256i *first_16col_even,
+ const __m256i *first_16col_odd,
+ const __m256i *second_16col_even,
+ const __m256i *second_16col_odd, __m256i *in0,
+ __m256i *in1) {
+ collect_16col(first_16col_even, first_16col_odd, in0);
+ collect_16col(second_16col_even, second_16col_odd, in1);
+}
+
+static void fdct16_odd_avx2(__m256i *in) {
+ // sequence: cospi_L_H = pairs(L, H) and L first
+ const __m256i cospi_p16_p16 = pair256_set_epi16(cospi_16_64, cospi_16_64);
+ const __m256i cospi_m16_p16 = pair256_set_epi16(-cospi_16_64, cospi_16_64);
+ const __m256i cospi_m08_p24 = pair256_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m256i cospi_p24_p08 = pair256_set_epi16(cospi_24_64, cospi_8_64);
+ const __m256i cospi_m24_m08 = pair256_set_epi16(-cospi_24_64, -cospi_8_64);
+ const __m256i cospi_m04_p28 = pair256_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m256i cospi_p28_p04 = pair256_set_epi16(cospi_28_64, cospi_4_64);
+ const __m256i cospi_m28_m04 = pair256_set_epi16(-cospi_28_64, -cospi_4_64);
+ const __m256i cospi_m20_p12 = pair256_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m256i cospi_p12_p20 = pair256_set_epi16(cospi_12_64, cospi_20_64);
+ const __m256i cospi_m12_m20 = pair256_set_epi16(-cospi_12_64, -cospi_20_64);
+
+ const __m256i cospi_p31_p01 = pair256_set_epi16(cospi_31_64, cospi_1_64);
+ const __m256i cospi_m01_p31 = pair256_set_epi16(-cospi_1_64, cospi_31_64);
+ const __m256i cospi_p15_p17 = pair256_set_epi16(cospi_15_64, cospi_17_64);
+ const __m256i cospi_m17_p15 = pair256_set_epi16(-cospi_17_64, cospi_15_64);
+ const __m256i cospi_p23_p09 = pair256_set_epi16(cospi_23_64, cospi_9_64);
+ const __m256i cospi_m09_p23 = pair256_set_epi16(-cospi_9_64, cospi_23_64);
+ const __m256i cospi_p07_p25 = pair256_set_epi16(cospi_7_64, cospi_25_64);
+ const __m256i cospi_m25_p07 = pair256_set_epi16(-cospi_25_64, cospi_7_64);
+ const __m256i cospi_p27_p05 = pair256_set_epi16(cospi_27_64, cospi_5_64);
+ const __m256i cospi_m05_p27 = pair256_set_epi16(-cospi_5_64, cospi_27_64);
+ const __m256i cospi_p11_p21 = pair256_set_epi16(cospi_11_64, cospi_21_64);
+ const __m256i cospi_m21_p11 = pair256_set_epi16(-cospi_21_64, cospi_11_64);
+ const __m256i cospi_p19_p13 = pair256_set_epi16(cospi_19_64, cospi_13_64);
+ const __m256i cospi_m13_p19 = pair256_set_epi16(-cospi_13_64, cospi_19_64);
+ const __m256i cospi_p03_p29 = pair256_set_epi16(cospi_3_64, cospi_29_64);
+ const __m256i cospi_m29_p03 = pair256_set_epi16(-cospi_29_64, cospi_3_64);
+
+ __m256i x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+ __m256i y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, y15;
+ __m256i u0, u1;
+
+ // stage 1 is in prepare_16x16_odd()
+
+ // stage 2
+ y0 = in[0];
+ y1 = in[1];
+ y2 = in[2];
+ y3 = in[3];
+
+ u0 = _mm256_unpacklo_epi16(in[4], in[11]);
+ u1 = _mm256_unpackhi_epi16(in[4], in[11]);
+ y4 = butter_fly(u0, u1, cospi_m16_p16);
+ y11 = butter_fly(u0, u1, cospi_p16_p16);
+
+ u0 = _mm256_unpacklo_epi16(in[5], in[10]);
+ u1 = _mm256_unpackhi_epi16(in[5], in[10]);
+ y5 = butter_fly(u0, u1, cospi_m16_p16);
+ y10 = butter_fly(u0, u1, cospi_p16_p16);
+
+ u0 = _mm256_unpacklo_epi16(in[6], in[9]);
+ u1 = _mm256_unpackhi_epi16(in[6], in[9]);
+ y6 = butter_fly(u0, u1, cospi_m16_p16);
+ y9 = butter_fly(u0, u1, cospi_p16_p16);
+
+ u0 = _mm256_unpacklo_epi16(in[7], in[8]);
+ u1 = _mm256_unpackhi_epi16(in[7], in[8]);
+ y7 = butter_fly(u0, u1, cospi_m16_p16);
+ y8 = butter_fly(u0, u1, cospi_p16_p16);
+
+ y12 = in[12];
+ y13 = in[13];
+ y14 = in[14];
+ y15 = in[15];
+
+ // stage 3
+ x0 = _mm256_add_epi16(y0, y7);
+ x1 = _mm256_add_epi16(y1, y6);
+ x2 = _mm256_add_epi16(y2, y5);
+ x3 = _mm256_add_epi16(y3, y4);
+ x4 = _mm256_sub_epi16(y3, y4);
+ x5 = _mm256_sub_epi16(y2, y5);
+ x6 = _mm256_sub_epi16(y1, y6);
+ x7 = _mm256_sub_epi16(y0, y7);
+ x8 = _mm256_sub_epi16(y15, y8);
+ x9 = _mm256_sub_epi16(y14, y9);
+ x10 = _mm256_sub_epi16(y13, y10);
+ x11 = _mm256_sub_epi16(y12, y11);
+ x12 = _mm256_add_epi16(y12, y11);
+ x13 = _mm256_add_epi16(y13, y10);
+ x14 = _mm256_add_epi16(y14, y9);
+ x15 = _mm256_add_epi16(y15, y8);
+
+ // stage 4
+ y0 = x0;
+ y1 = x1;
+ y6 = x6;
+ y7 = x7;
+ y8 = x8;
+ y9 = x9;
+ y14 = x14;
+ y15 = x15;
+
+ u0 = _mm256_unpacklo_epi16(x2, x13);
+ u1 = _mm256_unpackhi_epi16(x2, x13);
+ y2 = butter_fly(u0, u1, cospi_m08_p24);
+ y13 = butter_fly(u0, u1, cospi_p24_p08);
+
+ u0 = _mm256_unpacklo_epi16(x3, x12);
+ u1 = _mm256_unpackhi_epi16(x3, x12);
+ y3 = butter_fly(u0, u1, cospi_m08_p24);
+ y12 = butter_fly(u0, u1, cospi_p24_p08);
+
+ u0 = _mm256_unpacklo_epi16(x4, x11);
+ u1 = _mm256_unpackhi_epi16(x4, x11);
+ y4 = butter_fly(u0, u1, cospi_m24_m08);
+ y11 = butter_fly(u0, u1, cospi_m08_p24);
+
+ u0 = _mm256_unpacklo_epi16(x5, x10);
+ u1 = _mm256_unpackhi_epi16(x5, x10);
+ y5 = butter_fly(u0, u1, cospi_m24_m08);
+ y10 = butter_fly(u0, u1, cospi_m08_p24);
+
+ // stage 5
+ x0 = _mm256_add_epi16(y0, y3);
+ x1 = _mm256_add_epi16(y1, y2);
+ x2 = _mm256_sub_epi16(y1, y2);
+ x3 = _mm256_sub_epi16(y0, y3);
+ x4 = _mm256_sub_epi16(y7, y4);
+ x5 = _mm256_sub_epi16(y6, y5);
+ x6 = _mm256_add_epi16(y6, y5);
+ x7 = _mm256_add_epi16(y7, y4);
+
+ x8 = _mm256_add_epi16(y8, y11);
+ x9 = _mm256_add_epi16(y9, y10);
+ x10 = _mm256_sub_epi16(y9, y10);
+ x11 = _mm256_sub_epi16(y8, y11);
+ x12 = _mm256_sub_epi16(y15, y12);
+ x13 = _mm256_sub_epi16(y14, y13);
+ x14 = _mm256_add_epi16(y14, y13);
+ x15 = _mm256_add_epi16(y15, y12);
+
+ // stage 6
+ y0 = x0;
+ y3 = x3;
+ y4 = x4;
+ y7 = x7;
+ y8 = x8;
+ y11 = x11;
+ y12 = x12;
+ y15 = x15;
+
+ u0 = _mm256_unpacklo_epi16(x1, x14);
+ u1 = _mm256_unpackhi_epi16(x1, x14);
+ y1 = butter_fly(u0, u1, cospi_m04_p28);
+ y14 = butter_fly(u0, u1, cospi_p28_p04);
+
+ u0 = _mm256_unpacklo_epi16(x2, x13);
+ u1 = _mm256_unpackhi_epi16(x2, x13);
+ y2 = butter_fly(u0, u1, cospi_m28_m04);
+ y13 = butter_fly(u0, u1, cospi_m04_p28);
+
+ u0 = _mm256_unpacklo_epi16(x5, x10);
+ u1 = _mm256_unpackhi_epi16(x5, x10);
+ y5 = butter_fly(u0, u1, cospi_m20_p12);
+ y10 = butter_fly(u0, u1, cospi_p12_p20);
+
+ u0 = _mm256_unpacklo_epi16(x6, x9);
+ u1 = _mm256_unpackhi_epi16(x6, x9);
+ y6 = butter_fly(u0, u1, cospi_m12_m20);
+ y9 = butter_fly(u0, u1, cospi_m20_p12);
+
+ // stage 7
+ x0 = _mm256_add_epi16(y0, y1);
+ x1 = _mm256_sub_epi16(y0, y1);
+ x2 = _mm256_sub_epi16(y3, y2);
+ x3 = _mm256_add_epi16(y3, y2);
+ x4 = _mm256_add_epi16(y4, y5);
+ x5 = _mm256_sub_epi16(y4, y5);
+ x6 = _mm256_sub_epi16(y7, y6);
+ x7 = _mm256_add_epi16(y7, y6);
+
+ x8 = _mm256_add_epi16(y8, y9);
+ x9 = _mm256_sub_epi16(y8, y9);
+ x10 = _mm256_sub_epi16(y11, y10);
+ x11 = _mm256_add_epi16(y11, y10);
+ x12 = _mm256_add_epi16(y12, y13);
+ x13 = _mm256_sub_epi16(y12, y13);
+ x14 = _mm256_sub_epi16(y15, y14);
+ x15 = _mm256_add_epi16(y15, y14);
+
+ // stage 8
+ u0 = _mm256_unpacklo_epi16(x0, x15);
+ u1 = _mm256_unpackhi_epi16(x0, x15);
+ in[0] = butter_fly(u0, u1, cospi_p31_p01);
+ in[15] = butter_fly(u0, u1, cospi_m01_p31);
+
+ u0 = _mm256_unpacklo_epi16(x1, x14);
+ u1 = _mm256_unpackhi_epi16(x1, x14);
+ in[1] = butter_fly(u0, u1, cospi_p15_p17);
+ in[14] = butter_fly(u0, u1, cospi_m17_p15);
+
+ u0 = _mm256_unpacklo_epi16(x2, x13);
+ u1 = _mm256_unpackhi_epi16(x2, x13);
+ in[2] = butter_fly(u0, u1, cospi_p23_p09);
+ in[13] = butter_fly(u0, u1, cospi_m09_p23);
+
+ u0 = _mm256_unpacklo_epi16(x3, x12);
+ u1 = _mm256_unpackhi_epi16(x3, x12);
+ in[3] = butter_fly(u0, u1, cospi_p07_p25);
+ in[12] = butter_fly(u0, u1, cospi_m25_p07);
+
+ u0 = _mm256_unpacklo_epi16(x4, x11);
+ u1 = _mm256_unpackhi_epi16(x4, x11);
+ in[4] = butter_fly(u0, u1, cospi_p27_p05);
+ in[11] = butter_fly(u0, u1, cospi_m05_p27);
+
+ u0 = _mm256_unpacklo_epi16(x5, x10);
+ u1 = _mm256_unpackhi_epi16(x5, x10);
+ in[5] = butter_fly(u0, u1, cospi_p11_p21);
+ in[10] = butter_fly(u0, u1, cospi_m21_p11);
+
+ u0 = _mm256_unpacklo_epi16(x6, x9);
+ u1 = _mm256_unpackhi_epi16(x6, x9);
+ in[6] = butter_fly(u0, u1, cospi_p19_p13);
+ in[9] = butter_fly(u0, u1, cospi_m13_p19);
+
+ u0 = _mm256_unpacklo_epi16(x7, x8);
+ u1 = _mm256_unpackhi_epi16(x7, x8);
+ in[7] = butter_fly(u0, u1, cospi_p03_p29);
+ in[8] = butter_fly(u0, u1, cospi_m29_p03);
+}
+
+static void fdct32_avx2(__m256i *in0, __m256i *in1) {
+ __m256i even0[16], even1[16], odd0[16], odd1[16];
+ prepare_16x16_even(in0, even0);
+ fdct16_avx2(even0);
+
+ prepare_16x16_odd(in0, odd0);
+ fdct16_odd_avx2(odd0);
+
+ prepare_16x16_even(in1, even1);
+ fdct16_avx2(even1);
+
+ prepare_16x16_odd(in1, odd1);
+ fdct16_odd_avx2(odd1);
+
+ collect_coeffs(even0, odd0, even1, odd1, in0, in1);
+
+ mm256_transpose_32x32(in0, in1);
+}
+#endif // CONFIG_EXT_TX
+
+static INLINE void write_buffer_32x32(const __m256i *in0, const __m256i *in1,
+ int stride, tran_low_t *output) {
+ int i = 0;
+ tran_low_t *coeff = output;
+ while (i < 32) {
+ _mm256_storeu_si256((__m256i *)coeff, in0[i]);
+ _mm256_storeu_si256((__m256i *)(coeff + 16), in1[i]);
+ coeff += stride;
+ i += 1;
+ }
+}
+
+#if CONFIG_EXT_TX
+static void fhalfright32_16col_avx2(__m256i *in) {
+ int i = 0;
+ const __m256i zero = _mm256_setzero_si256();
+ const __m256i sqrt2 = _mm256_set1_epi16(Sqrt2);
+ const __m256i dct_rounding = _mm256_set1_epi32(DCT_CONST_ROUNDING);
+ __m256i x0, x1;
+
+ while (i < 16) {
+ in[i] = _mm256_slli_epi16(in[i], 2);
+ x0 = _mm256_unpacklo_epi16(in[i + 16], zero);
+ x1 = _mm256_unpackhi_epi16(in[i + 16], zero);
+ x0 = _mm256_madd_epi16(x0, sqrt2);
+ x1 = _mm256_madd_epi16(x1, sqrt2);
+ x0 = _mm256_add_epi32(x0, dct_rounding);
+ x1 = _mm256_add_epi32(x1, dct_rounding);
+ x0 = _mm256_srai_epi32(x0, DCT_CONST_BITS);
+ x1 = _mm256_srai_epi32(x1, DCT_CONST_BITS);
+ in[i + 16] = _mm256_packs_epi32(x0, x1);
+ i += 1;
+ }
+ fdct16_avx2(&in[16]);
+}
+
+static void fhalfright32_avx2(__m256i *in0, __m256i *in1) {
+ fhalfright32_16col_avx2(in0);
+ fhalfright32_16col_avx2(in1);
+ mm256_vectors_swap(in0, &in0[16], 16);
+ mm256_vectors_swap(in1, &in1[16], 16);
+ mm256_transpose_32x32(in0, in1);
+}
+
+static void load_buffer_32x32(const int16_t *input, int stride, int flipud,
+ int fliplr, __m256i *in0, __m256i *in1) {
+ // Load 4 16x16 blocks
+ const int16_t *topL = input;
+ const int16_t *topR = input + 16;
+ const int16_t *botL = input + 16 * stride;
+ const int16_t *botR = input + 16 * stride + 16;
+
+ const int16_t *tmp;
+
+ if (flipud) {
+ // Swap left columns
+ tmp = topL;
+ topL = botL;
+ botL = tmp;
+ // Swap right columns
+ tmp = topR;
+ topR = botR;
+ botR = tmp;
+ }
+
+ if (fliplr) {
+ // Swap top rows
+ tmp = topL;
+ topL = topR;
+ topR = tmp;
+ // Swap bottom rows
+ tmp = botL;
+ botL = botR;
+ botR = tmp;
+ }
+
+ // load first 16 columns
+ load_buffer_16x16(topL, stride, flipud, fliplr, in0);
+ load_buffer_16x16(botL, stride, flipud, fliplr, in0 + 16);
+
+ // load second 16 columns
+ load_buffer_16x16(topR, stride, flipud, fliplr, in1);
+ load_buffer_16x16(botR, stride, flipud, fliplr, in1 + 16);
+}
+#endif // CONFIG_EXT_TX
+
+static void nr_right_shift_32x32_16col(__m256i *in) {
+ int i = 0;
+ const __m256i one = _mm256_set1_epi16(1);
+ __m256i sign;
+ while (i < 32) {
+ sign = _mm256_srai_epi16(in[i], 15);
+ in[i] = _mm256_add_epi16(in[i], one);
+ in[i] = _mm256_sub_epi16(in[i], sign);
+ in[i] = _mm256_srai_epi16(in[i], 2);
+ i += 1;
+ }
+}
+
+// Negative rounding
+static void nr_right_shift_32x32(__m256i *in0, __m256i *in1) {
+ nr_right_shift_32x32_16col(in0);
+ nr_right_shift_32x32_16col(in1);
+}
+
+#if CONFIG_EXT_TX
+static void pr_right_shift_32x32_16col(__m256i *in) {
+ int i = 0;
+ const __m256i zero = _mm256_setzero_si256();
+ const __m256i one = _mm256_set1_epi16(1);
+ __m256i sign;
+ while (i < 32) {
+ sign = _mm256_cmpgt_epi16(in[i], zero);
+ in[i] = _mm256_add_epi16(in[i], one);
+ in[i] = _mm256_sub_epi16(in[i], sign);
+ in[i] = _mm256_srai_epi16(in[i], 2);
+ i += 1;
+ }
+}
+
+// Positive rounding
+static void pr_right_shift_32x32(__m256i *in0, __m256i *in1) {
+ pr_right_shift_32x32_16col(in0);
+ pr_right_shift_32x32_16col(in1);
+}
+
+static void fidtx32_avx2(__m256i *in0, __m256i *in1) {
+ int i = 0;
+ while (i < 32) {
+ in0[i] = _mm256_slli_epi16(in0[i], 2);
+ in1[i] = _mm256_slli_epi16(in1[i], 2);
+ i += 1;
+ }
+ mm256_transpose_32x32(in0, in1);
+}
+#endif
+
+void av1_fht32x32_avx2(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ __m256i in0[32]; // left 32 columns
+ __m256i in1[32]; // right 32 columns
+ (void)input;
+ (void)stride;
+
+ switch (tx_type) {
+// TODO(luoyi): For DCT_DCT, fwd_txfm_32x32() uses aom set. But this
+// function has better speed. The replacement must work with the
+// corresponding inverse transform.
+// case DCT_DCT:
+// load_buffer_32x32(input, stride, 0, 0, in0, in1);
+// fdct32_avx2(in0, in1);
+// pr_right_shift_32x32(in0, in1);
+// fdct32_avx2(in0, in1);
+// break;
+#if CONFIG_EXT_TX
+ case ADST_DCT:
+ load_buffer_32x32(input, stride, 0, 0, in0, in1);
+ fhalfright32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fdct32_avx2(in0, in1);
+ break;
+ case DCT_ADST:
+ load_buffer_32x32(input, stride, 0, 0, in0, in1);
+ fdct32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fhalfright32_avx2(in0, in1);
+ break;
+ case ADST_ADST:
+ load_buffer_32x32(input, stride, 0, 0, in0, in1);
+ fhalfright32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fhalfright32_avx2(in0, in1);
+ break;
+ case FLIPADST_DCT:
+ load_buffer_32x32(input, stride, 1, 0, in0, in1);
+ fhalfright32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fdct32_avx2(in0, in1);
+ break;
+ case DCT_FLIPADST:
+ load_buffer_32x32(input, stride, 0, 1, in0, in1);
+ fdct32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fhalfright32_avx2(in0, in1);
+ break;
+ case FLIPADST_FLIPADST:
+ load_buffer_32x32(input, stride, 1, 1, in0, in1);
+ fhalfright32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fhalfright32_avx2(in0, in1);
+ break;
+ case ADST_FLIPADST:
+ load_buffer_32x32(input, stride, 0, 1, in0, in1);
+ fhalfright32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fhalfright32_avx2(in0, in1);
+ break;
+ case FLIPADST_ADST:
+ load_buffer_32x32(input, stride, 1, 0, in0, in1);
+ fhalfright32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fhalfright32_avx2(in0, in1);
+ break;
+ case V_DCT:
+ load_buffer_32x32(input, stride, 0, 0, in0, in1);
+ fdct32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fidtx32_avx2(in0, in1);
+ break;
+ case H_DCT:
+ load_buffer_32x32(input, stride, 0, 0, in0, in1);
+ fidtx32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fdct32_avx2(in0, in1);
+ break;
+ case V_ADST:
+ load_buffer_32x32(input, stride, 0, 0, in0, in1);
+ fhalfright32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fidtx32_avx2(in0, in1);
+ break;
+ case H_ADST:
+ load_buffer_32x32(input, stride, 0, 0, in0, in1);
+ fidtx32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fhalfright32_avx2(in0, in1);
+ break;
+ case V_FLIPADST:
+ load_buffer_32x32(input, stride, 1, 0, in0, in1);
+ fhalfright32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fidtx32_avx2(in0, in1);
+ break;
+ case H_FLIPADST:
+ load_buffer_32x32(input, stride, 0, 1, in0, in1);
+ fidtx32_avx2(in0, in1);
+ pr_right_shift_32x32(in0, in1);
+ fhalfright32_avx2(in0, in1);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+ nr_right_shift_32x32(in0, in1);
+ write_buffer_32x32(in0, in1, 32, output);
+ _mm256_zeroupper();
}
diff --git a/test/ans_test.cc b/test/ans_test.cc
index ca38de2..ba8e3c7 100644
--- a/test/ans_test.cc
+++ b/test/ans_test.cc
@@ -74,18 +74,21 @@
return ans_read_end(&d);
}
-// TODO(aconverse@google.com): replace this with a more representative
-// distribution from the codec.
-const rans_sym rans_sym_tab[] = {
- { 67, 0 }, { 99, 67 }, { 575, 166 }, { 283, 741 },
-};
+const aom_cdf_prob spareto65[] = { 260, 188, 138, 102, 133, 122, 64, 15, 1, 1 };
-std::vector<int> ans_encode_build_vals(const rans_sym *tab, int iters) {
+const int kRansSymbols =
+ static_cast<int>(sizeof(spareto65) / sizeof(spareto65[0]));
+
+std::vector<int> ans_encode_build_vals(rans_sym *const tab, int iters) {
+ aom_cdf_prob sum = 0;
+ for (int i = 0; i < kRansSymbols; ++i) {
+ tab[i].cum_prob = sum;
+ tab[i].prob = spareto65[i];
+ sum += spareto65[i];
+ }
std::vector<int> p_to_sym;
- int i = 0;
- while (p_to_sym.size() < RANS_PRECISION) {
+ for (int i = 0; i < kRansSymbols; ++i) {
p_to_sym.insert(p_to_sym.end(), tab[i].prob, i);
- ++i;
}
assert(p_to_sym.size() == RANS_PRECISION);
std::vector<int> ret;
@@ -97,10 +100,11 @@
return ret;
}
-void rans_build_dec_tab(const struct rans_sym sym_tab[], rans_lut dec_tab) {
- dec_tab[0] = 0;
- for (int i = 1; dec_tab[i - 1] < RANS_PRECISION; ++i) {
- dec_tab[i] = dec_tab[i - 1] + sym_tab[i - 1].prob;
+void rans_build_dec_tab(const struct rans_sym sym_tab[],
+ aom_cdf_prob *dec_tab) {
+ unsigned int sum = 0;
+ for (int i = 0; sum < RANS_PRECISION; ++i) {
+ dec_tab[i] = sum += sym_tab[i].prob;
}
}
@@ -108,7 +112,7 @@
uint8_t *buf) {
AnsCoder a;
ans_write_init(&a, buf);
- rans_lut dec_tab;
+ aom_cdf_prob dec_tab[kRansSymbols];
rans_build_dec_tab(tab, dec_tab);
std::clock_t start = std::clock();
@@ -149,16 +153,20 @@
class AnsTest : public ::testing::Test {
protected:
static void SetUpTestCase() {
- sym_vec_ = ans_encode_build_vals(rans_sym_tab, kNumSyms);
+ sym_vec_ = ans_encode_build_vals(rans_sym_tab_, kNumSyms);
}
virtual void SetUp() { buf_ = new uint8_t[kNumSyms / 2]; }
virtual void TearDown() { delete[] buf_; }
static const int kNumSyms = 25000000;
static std::vector<int> sym_vec_;
+ static rans_sym rans_sym_tab_[kRansSymbols];
uint8_t *buf_;
};
std::vector<int> AnsTest::sym_vec_;
+rans_sym AnsTest::rans_sym_tab_[kRansSymbols];
TEST_F(AbsTest, Uabs) { EXPECT_TRUE(check_uabs(pv_vec_, buf_)); }
-TEST_F(AnsTest, Rans) { EXPECT_TRUE(check_rans(sym_vec_, rans_sym_tab, buf_)); }
+TEST_F(AnsTest, Rans) {
+ EXPECT_TRUE(check_rans(sym_vec_, rans_sym_tab_, buf_));
+}
} // namespace
diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc
index 9a661f9..e4179ef 100644
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -402,6 +402,12 @@
AOM_BITS_8)));
#endif // HAVE_SSE2 && !CONFIG_AOM_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_AVX2 && !CONFIG_AOM_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(AVX2, PartialTrans32x32Test,
+ ::testing::Values(make_tuple(&aom_fdct32x32_1_avx2,
+ AOM_BITS_8)));
+#endif // HAVE_AVX2 && !CONFIG_AOM_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
#if HAVE_SSE2 && CONFIG_AOM_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, Trans32x32Test,
diff --git a/test/fht32x32_test.cc b/test/fht32x32_test.cc
new file mode 100644
index 0000000..a949ebf
--- /dev/null
+++ b/test/fht32x32_test.cc
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+#include "aom_ports/mem.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+ int tx_type);
+using std::tr1::tuple;
+using libaom_test::FhtFunc;
+typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht32x32Param;
+
+void fht32x32_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+ av1_fht32x32_c(in, out, stride, tx_type);
+}
+
+#if CONFIG_AOM_HIGHBITDEPTH
+typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+ int tx_type, int bd);
+typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
+ int tx_type, int bd);
+
+// Target optimized function, tx_type, bit depth
+typedef tuple<HbdHtFunc, int, int> HighbdHt32x32Param;
+
+void highbd_fht32x32_ref(const int16_t *in, int32_t *out, int stride,
+ int tx_type, int bd) {
+ av1_fwd_txfm2d_32x32_c(in, out, stride, tx_type, bd);
+}
+#endif // CONFIG_AOM_HIGHBITDEPTH
+
+#if HAVE_AVX2
+void dummy_inv_txfm(const tran_low_t *in, uint8_t *out, int stride,
+ int tx_type) {
+ (void)in;
+ (void)out;
+ (void)stride;
+ (void)tx_type;
+}
+#endif
+
+class AV1Trans32x32HT : public libaom_test::TransformTestBase,
+ public ::testing::TestWithParam<Ht32x32Param> {
+ public:
+ virtual ~AV1Trans32x32HT() {}
+
+ virtual void SetUp() {
+ fwd_txfm_ = GET_PARAM(0);
+ inv_txfm_ = GET_PARAM(1);
+ tx_type_ = GET_PARAM(2);
+ pitch_ = 32;
+ fwd_txfm_ref = fht32x32_ref;
+ bit_depth_ = GET_PARAM(3);
+ mask_ = (1 << bit_depth_) - 1;
+ num_coeffs_ = GET_PARAM(4);
+ }
+ virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+ void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+ fwd_txfm_(in, out, stride, tx_type_);
+ }
+
+ void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+ inv_txfm_(out, dst, stride, tx_type_);
+ }
+
+ FhtFunc fwd_txfm_;
+ IhtFunc inv_txfm_;
+};
+
+TEST_P(AV1Trans32x32HT, CoeffCheck) { RunCoeffCheck(); }
+
+#if CONFIG_AOM_HIGHBITDEPTH
+class AV1HighbdTrans32x32HT
+ : public ::testing::TestWithParam<HighbdHt32x32Param> {
+ public:
+ virtual ~AV1HighbdTrans32x32HT() {}
+
+ virtual void SetUp() {
+ fwd_txfm_ = GET_PARAM(0);
+ fwd_txfm_ref_ = highbd_fht32x32_ref;
+ tx_type_ = GET_PARAM(1);
+ bit_depth_ = GET_PARAM(2);
+ mask_ = (1 << bit_depth_) - 1;
+ num_coeffs_ = 1024;
+
+ input_ = reinterpret_cast<int16_t *>(
+ aom_memalign(32, sizeof(int16_t) * num_coeffs_));
+ output_ = reinterpret_cast<int32_t *>(
+ aom_memalign(32, sizeof(int32_t) * num_coeffs_));
+ output_ref_ = reinterpret_cast<int32_t *>(
+ aom_memalign(32, sizeof(int32_t) * num_coeffs_));
+ }
+
+ virtual void TearDown() {
+ aom_free(input_);
+ aom_free(output_);
+ aom_free(output_ref_);
+ libaom_test::ClearSystemState();
+ }
+
+ protected:
+ void RunBitexactCheck();
+
+ private:
+ HbdHtFunc fwd_txfm_;
+ HbdHtFunc fwd_txfm_ref_;
+ int tx_type_;
+ int bit_depth_;
+ int mask_;
+ int num_coeffs_;
+ int16_t *input_;
+ int32_t *output_;
+ int32_t *output_ref_;
+};
+
+void AV1HighbdTrans32x32HT::RunBitexactCheck() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int i, j;
+ const int stride = 32;
+ const int num_tests = 1000;
+
+ for (i = 0; i < num_tests; ++i) {
+ for (j = 0; j < num_coeffs_; ++j) {
+ input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+ }
+
+ fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
+ ASM_REGISTER_STATE_CHECK(
+ fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_));
+
+ for (j = 0; j < num_coeffs_; ++j) {
+ EXPECT_EQ(output_ref_[j], output_[j])
+ << "Not bit-exact result at index: " << j << " at test block: " << i;
+ }
+ }
+}
+
+TEST_P(AV1HighbdTrans32x32HT, HighbdCoeffCheck) { RunBitexactCheck(); }
+#endif // CONFIG_AOM_HIGHBITDEPTH
+
+using std::tr1::make_tuple;
+
+#if HAVE_AVX2
+const Ht32x32Param kArrayHt32x32Param_avx2[] = {
+ // TODO(luoyi): DCT_DCT tx_type is not enabled in av1_fht32x32_c(avx2) yet.
+ // make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 0, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 1, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 2, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 3, AOM_BITS_8, 1024),
+#if CONFIG_EXT_TX
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 4, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 5, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 6, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 7, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 8, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 10, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 11, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 12, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 13, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 14, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 15, AOM_BITS_8, 1024)
+#endif // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(AVX2, AV1Trans32x32HT,
+ ::testing::ValuesIn(kArrayHt32x32Param_avx2));
+#endif // HAVE_AVX2
+} // namespace
diff --git a/test/test.mk b/test/test.mk
index c071cea..162d7c9 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -144,6 +144,7 @@
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht16x8_test.cc
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_iht8x16_test.cc
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_iht16x8_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += fht32x32_test.cc
endif
LIBAOM_TEST_SRCS-$(CONFIG_EXT_TILE) += av1_ext_tile_test.cc
diff --git a/tools/gen_constrained_tokenset.py b/tools/gen_constrained_tokenset.py
new file mode 100755
index 0000000..a0f8280
--- /dev/null
+++ b/tools/gen_constrained_tokenset.py
@@ -0,0 +1,115 @@
+#!/usr/bin/python
+##
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+"""Generate the probability model for the constrained token set.
+
+Model obtained from a 2-sided zero-centered distribution derived
+from a Pareto distribution. The cdf of the distribution is:
+cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta]
+
+For a given beta and a given probability of the 1-node, the alpha
+is first solved, and then the {alpha, beta} pair is used to generate
+the probabilities for the rest of the nodes.
+"""
+
+import heapq
+import sys
+import numpy as np
+import scipy.optimize
+import scipy.stats
+
+
+def cdf_spareto(x, xm, beta):
+ p = 1 - (xm / (np.abs(x) + xm))**beta
+ p = 0.5 + 0.5 * np.sign(x) * p
+ return p
+
+
+def get_spareto(p, beta):
+ cdf = cdf_spareto
+
+ def func(x):
+ return ((cdf(1.5, x, beta) - cdf(0.5, x, beta)) /
+ (1 - cdf(0.5, x, beta)) - p)**2
+
+ alpha = scipy.optimize.fminbound(func, 1e-12, 10000, xtol=1e-12)
+ parray = np.zeros(11)
+ parray[0] = 2 * (cdf(0.5, alpha, beta) - 0.5)
+ parray[1] = (2 * (cdf(1.5, alpha, beta) - cdf(0.5, alpha, beta)))
+ parray[2] = (2 * (cdf(2.5, alpha, beta) - cdf(1.5, alpha, beta)))
+ parray[3] = (2 * (cdf(3.5, alpha, beta) - cdf(2.5, alpha, beta)))
+ parray[4] = (2 * (cdf(4.5, alpha, beta) - cdf(3.5, alpha, beta)))
+ parray[5] = (2 * (cdf(6.5, alpha, beta) - cdf(4.5, alpha, beta)))
+ parray[6] = (2 * (cdf(10.5, alpha, beta) - cdf(6.5, alpha, beta)))
+ parray[7] = (2 * (cdf(18.5, alpha, beta) - cdf(10.5, alpha, beta)))
+ parray[8] = (2 * (cdf(34.5, alpha, beta) - cdf(18.5, alpha, beta)))
+ parray[9] = (2 * (cdf(66.5, alpha, beta) - cdf(34.5, alpha, beta)))
+ parray[10] = 2 * (1. - cdf(66.5, alpha, beta))
+ return parray
+
+
+def quantize_probs(p, save_first_bin, bits):
+ """Quantize probability precisely.
+
+ Quantize probabilities minimizing dH (Kullback-Leibler divergence)
+ approximated by: sum (p_i-q_i)^2/p_i.
+ References:
+ https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
+ https://github.com/JarekDuda/AsymmetricNumeralSystemsToolkit
+ """
+ num_sym = p.size
+ p = np.clip(p, 1e-16, 1)
+ L = 2**bits
+ pL = p * L
+ ip = 1. / p # inverse probability
+ q = np.clip(np.round(pL), 1, L + 1 - num_sym)
+ quant_err = (pL - q)**2 * ip
+ sgn = np.sign(L - q.sum()) # direction of correction
+ if sgn != 0: # correction is needed
+ v = [] # heap of adjustment results (adjustment err, index) of each symbol
+ for i in range(1 if save_first_bin else 0, num_sym):
+ q_adj = q[i] + sgn
+ if q_adj > 0 and q_adj < L:
+ adj_err = (pL[i] - q_adj)**2 * ip[i] - quant_err[i]
+ heapq.heappush(v, (adj_err, i))
+ while q.sum() != L:
+ # apply lowest error adjustment
+ (adj_err, i) = heapq.heappop(v)
+ quant_err[i] += adj_err
+ q[i] += sgn
+ # calculate the cost of adjusting this symbol again
+ q_adj = q[i] + sgn
+ if q_adj > 0 and q_adj < L:
+ adj_err = (pL[i] - q_adj)**2 * ip[i] - quant_err[i]
+ heapq.heappush(v, (adj_err, i))
+ return q
+
+
+def get_quantized_spareto(p, beta, bits):
+ parray = get_spareto(p, beta)
+ parray = parray[1:] / (1 - parray[0])
+ qarray = quantize_probs(parray, True, bits)
+ return qarray.astype(np.int)
+
+
+def main(bits=8):
+ beta = 8
+ for q in range(1, 256):
+ parray = get_quantized_spareto(q / 256., beta, bits)
+ assert parray.sum() == 2**bits
+ print '{', ', '.join('%d' % i for i in parray), '},'
+
+
+if __name__ == '__main__':
+ if len(sys.argv) > 1:
+ main(int(sys.argv[1]))
+ else:
+ main()