Merge changes I3922dea2,I3bab2848,I21f7478a,Ida5de713,Ib9f0eefe, ... into nextgenv2
* changes:
Fix warnings reported by -Wshadow: Part4: main directory
Fix warnings reported by -Wshadow: Part3: test/ directory
Fix warnings reported by -Wshadow: Part2b: more from av1 directory
Fix warnings reported by -Wshadow: Part2: av1 directory
Fix warnings reported by -Wshadow: Part1b: scan_order struct and variable
Fix warnings reported by -Wshadow: Part1: aom_dsp directory
Move STAT_TYPE enum to source file.
Code cleanup: mainly rd_pick_partition and methods called from there.
diff --git a/aom_dsp/bitreader.h b/aom_dsp/bitreader.h
index d2fd5f2..68e1339 100644
--- a/aom_dsp/bitreader.h
+++ b/aom_dsp/bitreader.h
@@ -28,6 +28,29 @@
#include "aom_dsp/prob.h"
#include "av1/common/odintrin.h"
+#if CONFIG_ACCOUNTING
+#include "av1/common/accounting.h"
+#define ACCT_STR_NAME acct_str
+#define ACCT_STR_PARAM , const char *ACCT_STR_NAME
+#define ACCT_STR_ARG(s) , s
+#else
+#define ACCT_STR_PARAM
+#define ACCT_STR_ARG(s)
+#endif
+
+#define aom_read(r, prob, ACCT_STR_NAME) \
+ aom_read_(r, prob ACCT_STR_ARG(ACCT_STR_NAME))
+#define aom_read_bit(r, ACCT_STR_NAME) \
+ aom_read_bit_(r ACCT_STR_ARG(ACCT_STR_NAME))
+#define aom_read_tree(r, tree, probs, ACCT_STR_NAME) \
+ aom_read_tree_(r, tree, probs ACCT_STR_ARG(ACCT_STR_NAME))
+#define aom_read_literal(r, bits, ACCT_STR_NAME) \
+ aom_read_literal_(r, bits ACCT_STR_ARG(ACCT_STR_NAME))
+#define aom_read_tree_bits(r, tree, probs, ACCT_STR_NAME) \
+ aom_read_tree_bits_(r, tree, probs ACCT_STR_ARG(ACCT_STR_NAME))
+#define aom_read_symbol(r, cdf, nsymbs, ACCT_STR_NAME) \
+ aom_read_symbol_(r, cdf, nsymbs ACCT_STR_ARG(ACCT_STR_NAME))
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -79,6 +102,7 @@
#endif
}
+// Returns the position in the bit reader in bits.
static INLINE ptrdiff_t aom_reader_tell(const aom_reader *r) {
#if CONFIG_ANS
(void)r;
@@ -91,75 +115,113 @@
#endif
}
-static INLINE int aom_read(aom_reader *r, int prob) {
+// Returns the position in the bit reader in 1/8th bits.
+static INLINE ptrdiff_t aom_reader_tell_frac(const aom_reader *r) {
#if CONFIG_ANS
- return uabs_read(r, prob);
+ (void)r;
+ assert(0 && "aom_reader_tell_frac() is unimplemented for ANS");
+ return 0;
#elif CONFIG_DAALA_EC
- return aom_daala_read(r, prob);
+ return aom_daala_reader_tell_frac(r);
#else
- return aom_dk_read(r, prob);
+ return aom_dk_reader_tell_frac(r);
#endif
}
-static INLINE int aom_read_bit(aom_reader *r) {
+#if CONFIG_ACCOUNTING
+static INLINE void aom_process_accounting(const aom_reader *r ACCT_STR_PARAM) {
+ if (r->accounting != NULL) {
+ uint32_t tell_frac;
+ tell_frac = aom_reader_tell_frac(r);
+ aom_accounting_record(r->accounting, ACCT_STR_NAME,
+ tell_frac - r->accounting->last_tell_frac);
+ r->accounting->last_tell_frac = tell_frac;
+ }
+}
+#endif
+
+static INLINE int aom_read_(aom_reader *r, int prob ACCT_STR_PARAM) {
+ int ret;
#if CONFIG_ANS
- return uabs_read_bit(r); // Non trivial optimization at half probability
+ ret = uabs_read(r, prob);
+#elif CONFIG_DAALA_EC
+ ret = aom_daala_read(r, prob);
#else
- return aom_read(r, 128); // aom_prob_half
+ ret = aom_dk_read(r, prob);
#endif
+#if CONFIG_ACCOUNTING
+ if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
+#endif
+ return ret;
}
-static INLINE int aom_read_literal(aom_reader *r, int bits) {
+static INLINE int aom_read_bit_(aom_reader *r ACCT_STR_PARAM) {
+ int ret;
+#if CONFIG_ANS
+ ret = uabs_read_bit(r); // Non trivial optimization at half probability
+#else
+ ret = aom_read(r, 128, NULL); // aom_prob_half
+#endif
+#if CONFIG_ACCOUNTING
+ if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
+#endif
+ return ret;
+}
+
+static INLINE int aom_read_literal_(aom_reader *r, int bits ACCT_STR_PARAM) {
int literal = 0, bit;
- for (bit = bits - 1; bit >= 0; bit--) literal |= aom_read_bit(r) << bit;
-
+ for (bit = bits - 1; bit >= 0; bit--) literal |= aom_read_bit(r, NULL) << bit;
+#if CONFIG_ACCOUNTING
+ if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
+#endif
return literal;
}
-static INLINE int aom_read_tree_bits(aom_reader *r, const aom_tree_index *tree,
- const aom_prob *probs) {
+static INLINE int aom_read_tree_bits_(aom_reader *r, const aom_tree_index *tree,
+ const aom_prob *probs ACCT_STR_PARAM) {
aom_tree_index i = 0;
- while ((i = tree[i + aom_read(r, probs[i >> 1])]) > 0) continue;
-
+ while ((i = tree[i + aom_read(r, probs[i >> 1], NULL)]) > 0) continue;
+#if CONFIG_ACCOUNTING
+ if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
+#endif
return -i;
}
-static INLINE int aom_read_tree(aom_reader *r, const aom_tree_index *tree,
- const aom_prob *probs) {
+static INLINE int aom_read_tree_(aom_reader *r, const aom_tree_index *tree,
+ const aom_prob *probs ACCT_STR_PARAM) {
+ int ret;
#if CONFIG_DAALA_EC
- return daala_read_tree_bits(r, tree, probs);
+ ret = daala_read_tree_bits(r, tree, probs);
#else
- return aom_read_tree_bits(r, tree, probs);
+ ret = aom_read_tree_bits(r, tree, probs, NULL);
#endif
+#if CONFIG_ACCOUNTING
+ if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
+#endif
+ return ret;
}
-static INLINE int aom_read_symbol(aom_reader *r, const aom_cdf_prob *cdf,
- int nsymbs) {
+static INLINE int aom_read_symbol_(aom_reader *r, const aom_cdf_prob *cdf,
+ int nsymbs ACCT_STR_PARAM) {
+ int ret;
#if CONFIG_ANS
(void)nsymbs;
- return rans_read(r, cdf);
+ ret = rans_read(r, cdf);
+#elif CONFIG_DAALA_EC
+ ret = daala_read_symbol(r, cdf, nsymbs);
#else
(void)r;
(void)cdf;
(void)nsymbs;
assert(0 && "Unsupported bitreader operation");
- return -1;
+ ret = -1;
#endif
-}
-
-static INLINE int aom_read_tree_cdf(aom_reader *r, const uint16_t *cdf,
- int nsymbs) {
-#if CONFIG_DAALA_EC
- return daala_read_symbol(r, cdf, nsymbs);
-#else
- (void)r;
- (void)cdf;
- (void)nsymbs;
- assert(0 && "Unsupported bitreader operation");
- return -1;
+#if CONFIG_ACCOUNTING
+ if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
#endif
+ return ret;
}
#ifdef __cplusplus
diff --git a/aom_dsp/daalaboolreader.c b/aom_dsp/daalaboolreader.c
index 8e1c782..f0da8eb 100644
--- a/aom_dsp/daalaboolreader.c
+++ b/aom_dsp/daalaboolreader.c
@@ -18,6 +18,9 @@
r->buffer_end = buffer + size;
r->buffer = buffer;
od_ec_dec_init(&r->ec, buffer, size - 1);
+#if CONFIG_ACCOUNTING
+ r->accounting = NULL;
+#endif
return 0;
}
@@ -28,3 +31,7 @@
ptrdiff_t aom_daala_reader_tell(const daala_reader *r) {
return od_ec_dec_tell(&r->ec);
}
+
+ptrdiff_t aom_daala_reader_tell_frac(const daala_reader *r) {
+ return od_ec_dec_tell_frac(&r->ec);
+}
diff --git a/aom_dsp/daalaboolreader.h b/aom_dsp/daalaboolreader.h
index 4fc7ff4..10dc391 100644
--- a/aom_dsp/daalaboolreader.h
+++ b/aom_dsp/daalaboolreader.h
@@ -12,8 +12,12 @@
#ifndef AOM_DSP_DAALABOOLREADER_H_
#define AOM_DSP_DAALABOOLREADER_H_
+#include "aom/aom_integer.h"
#include "aom_dsp/entdec.h"
#include "aom_dsp/prob.h"
+#if CONFIG_ACCOUNTING
+#include "av1/common/accounting.h"
+#endif
#ifdef __cplusplus
extern "C" {
@@ -23,6 +27,9 @@
const uint8_t *buffer;
const uint8_t *buffer_end;
od_ec_dec ec;
+#if CONFIG_ACCOUNTING
+ Accounting *accounting;
+#endif
};
typedef struct daala_reader daala_reader;
@@ -30,6 +37,7 @@
int aom_daala_reader_init(daala_reader *r, const uint8_t *buffer, int size);
const uint8_t *aom_daala_reader_find_end(daala_reader *r);
ptrdiff_t aom_daala_reader_tell(const daala_reader *r);
+ptrdiff_t aom_daala_reader_tell_frac(const daala_reader *r);
static INLINE int aom_daala_read(daala_reader *r, int prob) {
if (prob == 128) {
diff --git a/aom_dsp/dkboolreader.c b/aom_dsp/dkboolreader.c
index c3b7782..4079d70 100644
--- a/aom_dsp/dkboolreader.c
+++ b/aom_dsp/dkboolreader.c
@@ -36,6 +36,9 @@
r->decrypt_cb = decrypt_cb;
r->decrypt_state = decrypt_state;
aom_dk_reader_fill(r);
+#if CONFIG_ACCOUNTING
+ r->accounting = NULL;
+#endif
return aom_dk_read_bit(r) != 0; // marker bit
}
}
diff --git a/aom_dsp/dkboolreader.h b/aom_dsp/dkboolreader.h
index 2fd2b37..bc4b02f 100644
--- a/aom_dsp/dkboolreader.h
+++ b/aom_dsp/dkboolreader.h
@@ -26,6 +26,9 @@
#include "aom/aomdx.h"
#include "aom/aom_integer.h"
#include "aom_dsp/prob.h"
+#if CONFIG_ACCOUNTING
+#include "av1/common/accounting.h"
+#endif
#ifdef __cplusplus
extern "C" {
@@ -51,6 +54,9 @@
aom_decrypt_cb decrypt_cb;
void *decrypt_state;
uint8_t clear_buffer[sizeof(BD_VALUE) + 1];
+#if CONFIG_ACCOUNTING
+ Accounting *accounting;
+#endif
};
int aom_dk_reader_init(struct aom_dk_reader *r, const uint8_t *buffer,
@@ -65,7 +71,29 @@
const size_t bits_read = (r->buffer - r->buffer_start) * CHAR_BIT;
const int count =
(r->count < LOTS_OF_BITS) ? r->count : r->count - LOTS_OF_BITS;
- return bits_read + BD_VALUE_SIZE - (count + CHAR_BIT);
+ return bits_read - (count + CHAR_BIT);
+}
+
+/*The resolution of fractional-precision bit usage measurements, i.e.,
+ 3 => 1/8th bits.*/
+#define DK_BITRES (3)
+
+static INLINE ptrdiff_t aom_dk_reader_tell_frac(const struct aom_dk_reader *r) {
+ uint32_t num_bits;
+ uint32_t range;
+ int l;
+ int i;
+ num_bits = aom_dk_reader_tell(r) << DK_BITRES;
+ range = r->range;
+ l = 0;
+ for (i = DK_BITRES; i-- > 0;) {
+ int b;
+ range = range * range >> 7;
+ b = (int)(range >> 8);
+ l = l << 1 | b;
+ range >>= b;
+ }
+ return num_bits - l;
}
static INLINE int aom_dk_reader_has_error(struct aom_dk_reader *r) {
diff --git a/av1/av1_common.mk b/av1/av1_common.mk
index 9730bee..29b71f3 100644
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk
@@ -103,6 +103,10 @@
AV1_COMMON_SRCS-yes += common/dering.c
AV1_COMMON_SRCS-yes += common/dering.h
endif
+ifeq ($(CONFIG_ACCOUNTING),yes)
+AV1_COMMON_SRCS-yes += common/accounting.h
+AV1_COMMON_SRCS-yes += common/accounting.c
+endif
AV1_COMMON_SRCS-yes += common/odintrin.c
AV1_COMMON_SRCS-yes += common/odintrin.h
diff --git a/av1/common/accounting.c b/av1/common/accounting.c
new file mode 100644
index 0000000..41280af
--- /dev/null
+++ b/av1/common/accounting.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "aom/aom_integer.h"
+#include "./accounting.h"
+
+static int aom_accounting_hash(const char *str) {
+ uint32_t val;
+ const unsigned char *ustr;
+ val = 0;
+ ustr = (const unsigned char *)str;
+ /* This is about the worst hash one can design, but it should be good enough
+ here. */
+ while (*ustr) val += *ustr++;
+ return val % AOM_ACCOUNTING_HASH_SIZE;
+}
+
+/* Dictionary lookup based on an open-addressing hash table. */
+int aom_accounting_dictionary_lookup(Accounting *accounting, const char *str) {
+ int hash;
+ int len;
+ AccountingDictionary *dictionary;
+ dictionary = &accounting->syms.dictionary;
+ hash = aom_accounting_hash(str);
+ while (accounting->hash_dictionary[hash] != -1) {
+ if (strcmp(dictionary->strs[accounting->hash_dictionary[hash]], str) == 0) {
+ return accounting->hash_dictionary[hash];
+ }
+ hash++;
+ if (hash == AOM_ACCOUNTING_HASH_SIZE) hash = 0;
+ }
+ /* No match found. */
+ assert(dictionary->num_strs + 1 < MAX_SYMBOL_TYPES);
+ accounting->hash_dictionary[hash] = dictionary->num_strs;
+ len = strlen(str);
+ dictionary->strs[dictionary->num_strs] = malloc(len + 1);
+ snprintf(dictionary->strs[dictionary->num_strs], len + 1, "%s", str);
+ dictionary->num_strs++;
+ return dictionary->num_strs - 1;
+}
+
+void aom_accounting_init(Accounting *accounting) {
+ int i;
+ accounting->num_syms_allocated = 1000;
+ accounting->syms.syms =
+ malloc(sizeof(AccountingSymbol) * accounting->num_syms_allocated);
+ accounting->syms.dictionary.num_strs = 0;
+ assert(AOM_ACCOUNTING_HASH_SIZE > 2 * MAX_SYMBOL_TYPES);
+ for (i = 0; i < AOM_ACCOUNTING_HASH_SIZE; i++)
+ accounting->hash_dictionary[i] = -1;
+ aom_accounting_reset(accounting);
+}
+
+void aom_accounting_reset(Accounting *accounting) {
+ accounting->syms.num_syms = 0;
+ accounting->context.x = -1;
+ accounting->context.y = -1;
+ accounting->last_tell_frac = 0;
+}
+
+void aom_accounting_clear(Accounting *accounting) {
+ int i;
+ AccountingDictionary *dictionary;
+ free(accounting->syms.syms);
+ dictionary = &accounting->syms.dictionary;
+ for (i = 0; i < dictionary->num_strs; i++) {
+ free(dictionary->strs[i]);
+ }
+}
+
+void aom_accounting_set_context(Accounting *accounting, int16_t x, int16_t y) {
+ accounting->context.x = x;
+ accounting->context.y = y;
+}
+
+void aom_accounting_record(Accounting *accounting, const char *str,
+ uint32_t bits) {
+ AccountingSymbol sym;
+ // Reuse previous symbol if it has the same context and symbol id.
+ if (accounting->syms.num_syms) {
+ AccountingSymbol *last_sym;
+ last_sym = &accounting->syms.syms[accounting->syms.num_syms - 1];
+ if (memcmp(&last_sym->context, &accounting->context,
+ sizeof(AccountingSymbolContext)) == 0) {
+ uint32_t id;
+ id = aom_accounting_dictionary_lookup(accounting, str);
+ if (id == last_sym->id) {
+ last_sym->bits += bits;
+ last_sym->samples++;
+ return;
+ }
+ }
+ }
+ sym.context = accounting->context;
+ sym.samples = 1;
+ sym.bits = bits;
+ sym.id = aom_accounting_dictionary_lookup(accounting, str);
+ assert(sym.id <= 255);
+ if (accounting->syms.num_syms == accounting->num_syms_allocated) {
+ accounting->num_syms_allocated *= 2;
+ accounting->syms.syms =
+ realloc(accounting->syms.syms,
+ sizeof(AccountingSymbol) * accounting->num_syms_allocated);
+ assert(accounting->syms.syms != NULL);
+ }
+ accounting->syms.syms[accounting->syms.num_syms++] = sym;
+}
+
+void aom_accounting_dump(Accounting *accounting) {
+ int i;
+ AccountingSymbol *sym;
+ printf("----- %d -----\n", accounting->syms.num_syms);
+ for (i = 0; i < accounting->syms.num_syms; i++) {
+ sym = &accounting->syms.syms[i];
+ printf("%s x: %d, y: %d bits: %f samples: %d\n",
+ accounting->syms.dictionary.strs[sym->id], sym->context.x,
+ sym->context.y, (float)sym->bits / 8.0, sym->samples);
+ }
+}
diff --git a/av1/common/accounting.h b/av1/common/accounting.h
new file mode 100644
index 0000000..04be326
--- /dev/null
+++ b/av1/common/accounting.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef AOM_ACCOUNTING_H_
+#define AOM_ACCOUNTING_H_
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#define AOM_ACCOUNTING_HASH_SIZE (1021)
+
+/* Max number of entries for symbol types in the dictionary (increase as
+ necessary). */
+#define MAX_SYMBOL_TYPES (256)
+
+/*The resolution of fractional-precision bit usage measurements, i.e.,
+ 3 => 1/8th bits.*/
+#define AOM_ACCT_BITRES (3)
+
+typedef struct {
+ int16_t x;
+ int16_t y;
+} AccountingSymbolContext;
+
+typedef struct {
+ AccountingSymbolContext context;
+ uint32_t id;
+ /** Number of bits in units of 1/8 bit. */
+ uint32_t bits;
+ uint32_t samples;
+} AccountingSymbol;
+
+/** Dictionary for translating strings into id. */
+typedef struct {
+ char *(strs[MAX_SYMBOL_TYPES]);
+ int num_strs;
+} AccountingDictionary;
+
+typedef struct {
+ /** All recorded symbols decoded. */
+ AccountingSymbol *syms;
+ /** Number of symbols actually recorded. */
+ int num_syms;
+ /** Dictionary for translating strings into id. */
+ AccountingDictionary dictionary;
+} AccountingSymbols;
+
+typedef struct {
+ AccountingSymbols syms;
+ /** Size allocated for symbols (not all may be used). */
+ int num_syms_allocated;
+ int16_t hash_dictionary[AOM_ACCOUNTING_HASH_SIZE];
+ AccountingSymbolContext context;
+ uint32_t last_tell_frac;
+} Accounting;
+
+void aom_accounting_init(Accounting *accounting);
+void aom_accounting_reset(Accounting *accounting);
+void aom_accounting_clear(Accounting *accounting);
+void aom_accounting_set_context(Accounting *accounting, int16_t x, int16_t y);
+int aom_accounting_dictionary_lookup(Accounting *accounting, const char *str);
+void aom_accounting_record(Accounting *accounting, const char *str,
+ uint32_t bits);
+void aom_accounting_dump(Accounting *accounting);
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+#endif // AOM_ACCOUNTING_H_
diff --git a/av1/common/av1_fwd_txfm1d.c b/av1/common/av1_fwd_txfm1d.c
index 4c695ae..3e9d5ec 100644
--- a/av1/common/av1_fwd_txfm1d.c
+++ b/av1/common/av1_fwd_txfm1d.c
@@ -40,6 +40,7 @@
}
#endif
+// TODO(angiebird): Make 1-d txfm functions static
void av1_fdct4_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
const int8_t *stage_range) {
const int32_t size = 4;
@@ -1528,3 +1529,798 @@
bf1[31] = -bf0[1];
range_check(stage, input, bf1, size, stage_range[stage]);
}
+
+#if CONFIG_TX64X64
+void av1_fdct64_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 64;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[64];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf1 = output;
+ bf1[0] = input[0] + input[63];
+ bf1[1] = input[1] + input[62];
+ bf1[2] = input[2] + input[61];
+ bf1[3] = input[3] + input[60];
+ bf1[4] = input[4] + input[59];
+ bf1[5] = input[5] + input[58];
+ bf1[6] = input[6] + input[57];
+ bf1[7] = input[7] + input[56];
+ bf1[8] = input[8] + input[55];
+ bf1[9] = input[9] + input[54];
+ bf1[10] = input[10] + input[53];
+ bf1[11] = input[11] + input[52];
+ bf1[12] = input[12] + input[51];
+ bf1[13] = input[13] + input[50];
+ bf1[14] = input[14] + input[49];
+ bf1[15] = input[15] + input[48];
+ bf1[16] = input[16] + input[47];
+ bf1[17] = input[17] + input[46];
+ bf1[18] = input[18] + input[45];
+ bf1[19] = input[19] + input[44];
+ bf1[20] = input[20] + input[43];
+ bf1[21] = input[21] + input[42];
+ bf1[22] = input[22] + input[41];
+ bf1[23] = input[23] + input[40];
+ bf1[24] = input[24] + input[39];
+ bf1[25] = input[25] + input[38];
+ bf1[26] = input[26] + input[37];
+ bf1[27] = input[27] + input[36];
+ bf1[28] = input[28] + input[35];
+ bf1[29] = input[29] + input[34];
+ bf1[30] = input[30] + input[33];
+ bf1[31] = input[31] + input[32];
+ bf1[32] = -input[32] + input[31];
+ bf1[33] = -input[33] + input[30];
+ bf1[34] = -input[34] + input[29];
+ bf1[35] = -input[35] + input[28];
+ bf1[36] = -input[36] + input[27];
+ bf1[37] = -input[37] + input[26];
+ bf1[38] = -input[38] + input[25];
+ bf1[39] = -input[39] + input[24];
+ bf1[40] = -input[40] + input[23];
+ bf1[41] = -input[41] + input[22];
+ bf1[42] = -input[42] + input[21];
+ bf1[43] = -input[43] + input[20];
+ bf1[44] = -input[44] + input[19];
+ bf1[45] = -input[45] + input[18];
+ bf1[46] = -input[46] + input[17];
+ bf1[47] = -input[47] + input[16];
+ bf1[48] = -input[48] + input[15];
+ bf1[49] = -input[49] + input[14];
+ bf1[50] = -input[50] + input[13];
+ bf1[51] = -input[51] + input[12];
+ bf1[52] = -input[52] + input[11];
+ bf1[53] = -input[53] + input[10];
+ bf1[54] = -input[54] + input[9];
+ bf1[55] = -input[55] + input[8];
+ bf1[56] = -input[56] + input[7];
+ bf1[57] = -input[57] + input[6];
+ bf1[58] = -input[58] + input[5];
+ bf1[59] = -input[59] + input[4];
+ bf1[60] = -input[60] + input[3];
+ bf1[61] = -input[61] + input[2];
+ bf1[62] = -input[62] + input[1];
+ bf1[63] = -input[63] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[31];
+ bf1[1] = bf0[1] + bf0[30];
+ bf1[2] = bf0[2] + bf0[29];
+ bf1[3] = bf0[3] + bf0[28];
+ bf1[4] = bf0[4] + bf0[27];
+ bf1[5] = bf0[5] + bf0[26];
+ bf1[6] = bf0[6] + bf0[25];
+ bf1[7] = bf0[7] + bf0[24];
+ bf1[8] = bf0[8] + bf0[23];
+ bf1[9] = bf0[9] + bf0[22];
+ bf1[10] = bf0[10] + bf0[21];
+ bf1[11] = bf0[11] + bf0[20];
+ bf1[12] = bf0[12] + bf0[19];
+ bf1[13] = bf0[13] + bf0[18];
+ bf1[14] = bf0[14] + bf0[17];
+ bf1[15] = bf0[15] + bf0[16];
+ bf1[16] = -bf0[16] + bf0[15];
+ bf1[17] = -bf0[17] + bf0[14];
+ bf1[18] = -bf0[18] + bf0[13];
+ bf1[19] = -bf0[19] + bf0[12];
+ bf1[20] = -bf0[20] + bf0[11];
+ bf1[21] = -bf0[21] + bf0[10];
+ bf1[22] = -bf0[22] + bf0[9];
+ bf1[23] = -bf0[23] + bf0[8];
+ bf1[24] = -bf0[24] + bf0[7];
+ bf1[25] = -bf0[25] + bf0[6];
+ bf1[26] = -bf0[26] + bf0[5];
+ bf1[27] = -bf0[27] + bf0[4];
+ bf1[28] = -bf0[28] + bf0[3];
+ bf1[29] = -bf0[29] + bf0[2];
+ bf1[30] = -bf0[30] + bf0[1];
+ bf1[31] = -bf0[31] + bf0[0];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[34];
+ bf1[35] = bf0[35];
+ bf1[36] = bf0[36];
+ bf1[37] = bf0[37];
+ bf1[38] = bf0[38];
+ bf1[39] = bf0[39];
+ bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
+ bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
+ bf1[48] = half_btf(cospi[32], bf0[48], cospi[32], bf0[47], cos_bit[stage]);
+ bf1[49] = half_btf(cospi[32], bf0[49], cospi[32], bf0[46], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[32], bf0[50], cospi[32], bf0[45], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[32], bf0[51], cospi[32], bf0[44], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[32], bf0[52], cospi[32], bf0[43], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[32], bf0[53], cospi[32], bf0[42], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[32], bf0[54], cospi[32], bf0[41], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[32], bf0[55], cospi[32], bf0[40], cos_bit[stage]);
+ bf1[56] = bf0[56];
+ bf1[57] = bf0[57];
+ bf1[58] = bf0[58];
+ bf1[59] = bf0[59];
+ bf1[60] = bf0[60];
+ bf1[61] = bf0[61];
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = -bf0[8] + bf0[7];
+ bf1[9] = -bf0[9] + bf0[6];
+ bf1[10] = -bf0[10] + bf0[5];
+ bf1[11] = -bf0[11] + bf0[4];
+ bf1[12] = -bf0[12] + bf0[3];
+ bf1[13] = -bf0[13] + bf0[2];
+ bf1[14] = -bf0[14] + bf0[1];
+ bf1[15] = -bf0[15] + bf0[0];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[47];
+ bf1[33] = bf0[33] + bf0[46];
+ bf1[34] = bf0[34] + bf0[45];
+ bf1[35] = bf0[35] + bf0[44];
+ bf1[36] = bf0[36] + bf0[43];
+ bf1[37] = bf0[37] + bf0[42];
+ bf1[38] = bf0[38] + bf0[41];
+ bf1[39] = bf0[39] + bf0[40];
+ bf1[40] = -bf0[40] + bf0[39];
+ bf1[41] = -bf0[41] + bf0[38];
+ bf1[42] = -bf0[42] + bf0[37];
+ bf1[43] = -bf0[43] + bf0[36];
+ bf1[44] = -bf0[44] + bf0[35];
+ bf1[45] = -bf0[45] + bf0[34];
+ bf1[46] = -bf0[46] + bf0[33];
+ bf1[47] = -bf0[47] + bf0[32];
+ bf1[48] = -bf0[48] + bf0[63];
+ bf1[49] = -bf0[49] + bf0[62];
+ bf1[50] = -bf0[50] + bf0[61];
+ bf1[51] = -bf0[51] + bf0[60];
+ bf1[52] = -bf0[52] + bf0[59];
+ bf1[53] = -bf0[53] + bf0[58];
+ bf1[54] = -bf0[54] + bf0[57];
+ bf1[55] = -bf0[55] + bf0[56];
+ bf1[56] = bf0[56] + bf0[55];
+ bf1[57] = bf0[57] + bf0[54];
+ bf1[58] = bf0[58] + bf0[53];
+ bf1[59] = bf0[59] + bf0[52];
+ bf1[60] = bf0[60] + bf0[51];
+ bf1[61] = bf0[61] + bf0[50];
+ bf1[62] = bf0[62] + bf0[49];
+ bf1[63] = bf0[63] + bf0[48];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = -bf0[4] + bf0[3];
+ bf1[5] = -bf0[5] + bf0[2];
+ bf1[6] = -bf0[6] + bf0[1];
+ bf1[7] = -bf0[7] + bf0[0];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[23];
+ bf1[17] = bf0[17] + bf0[22];
+ bf1[18] = bf0[18] + bf0[21];
+ bf1[19] = bf0[19] + bf0[20];
+ bf1[20] = -bf0[20] + bf0[19];
+ bf1[21] = -bf0[21] + bf0[18];
+ bf1[22] = -bf0[22] + bf0[17];
+ bf1[23] = -bf0[23] + bf0[16];
+ bf1[24] = -bf0[24] + bf0[31];
+ bf1[25] = -bf0[25] + bf0[30];
+ bf1[26] = -bf0[26] + bf0[29];
+ bf1[27] = -bf0[27] + bf0[28];
+ bf1[28] = bf0[28] + bf0[27];
+ bf1[29] = bf0[29] + bf0[26];
+ bf1[30] = bf0[30] + bf0[25];
+ bf1[31] = bf0[31] + bf0[24];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[34];
+ bf1[35] = bf0[35];
+ bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit[stage]);
+ bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit[stage]);
+ bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit[stage]);
+ bf1[44] = bf0[44];
+ bf1[45] = bf0[45];
+ bf1[46] = bf0[46];
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = bf0[49];
+ bf1[50] = bf0[50];
+ bf1[51] = bf0[51];
+ bf1[52] = half_btf(cospi[48], bf0[52], -cospi[16], bf0[43], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[48], bf0[53], -cospi[16], bf0[42], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[48], bf0[54], -cospi[16], bf0[41], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[48], bf0[55], -cospi[16], bf0[40], cos_bit[stage]);
+ bf1[56] = half_btf(cospi[16], bf0[56], cospi[48], bf0[39], cos_bit[stage]);
+ bf1[57] = half_btf(cospi[16], bf0[57], cospi[48], bf0[38], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[16], bf0[58], cospi[48], bf0[37], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[16], bf0[59], cospi[48], bf0[36], cos_bit[stage]);
+ bf1[60] = bf0[60];
+ bf1[61] = bf0[61];
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = -bf0[2] + bf0[1];
+ bf1[3] = -bf0[3] + bf0[0];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = -bf0[10] + bf0[9];
+ bf1[11] = -bf0[11] + bf0[8];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[14] + bf0[13];
+ bf1[15] = bf0[15] + bf0[12];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[39];
+ bf1[33] = bf0[33] + bf0[38];
+ bf1[34] = bf0[34] + bf0[37];
+ bf1[35] = bf0[35] + bf0[36];
+ bf1[36] = -bf0[36] + bf0[35];
+ bf1[37] = -bf0[37] + bf0[34];
+ bf1[38] = -bf0[38] + bf0[33];
+ bf1[39] = -bf0[39] + bf0[32];
+ bf1[40] = -bf0[40] + bf0[47];
+ bf1[41] = -bf0[41] + bf0[46];
+ bf1[42] = -bf0[42] + bf0[45];
+ bf1[43] = -bf0[43] + bf0[44];
+ bf1[44] = bf0[44] + bf0[43];
+ bf1[45] = bf0[45] + bf0[42];
+ bf1[46] = bf0[46] + bf0[41];
+ bf1[47] = bf0[47] + bf0[40];
+ bf1[48] = bf0[48] + bf0[55];
+ bf1[49] = bf0[49] + bf0[54];
+ bf1[50] = bf0[50] + bf0[53];
+ bf1[51] = bf0[51] + bf0[52];
+ bf1[52] = -bf0[52] + bf0[51];
+ bf1[53] = -bf0[53] + bf0[50];
+ bf1[54] = -bf0[54] + bf0[49];
+ bf1[55] = -bf0[55] + bf0[48];
+ bf1[56] = -bf0[56] + bf0[63];
+ bf1[57] = -bf0[57] + bf0[62];
+ bf1[58] = -bf0[58] + bf0[61];
+ bf1[59] = -bf0[59] + bf0[60];
+ bf1[60] = bf0[60] + bf0[59];
+ bf1[61] = bf0[61] + bf0[58];
+ bf1[62] = bf0[62] + bf0[57];
+ bf1[63] = bf0[63] + bf0[56];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = -bf0[5] + bf0[4];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[7] + bf0[6];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[19];
+ bf1[17] = bf0[17] + bf0[18];
+ bf1[18] = -bf0[18] + bf0[17];
+ bf1[19] = -bf0[19] + bf0[16];
+ bf1[20] = -bf0[20] + bf0[23];
+ bf1[21] = -bf0[21] + bf0[22];
+ bf1[22] = bf0[22] + bf0[21];
+ bf1[23] = bf0[23] + bf0[20];
+ bf1[24] = bf0[24] + bf0[27];
+ bf1[25] = bf0[25] + bf0[26];
+ bf1[26] = -bf0[26] + bf0[25];
+ bf1[27] = -bf0[27] + bf0[24];
+ bf1[28] = -bf0[28] + bf0[31];
+ bf1[29] = -bf0[29] + bf0[30];
+ bf1[30] = bf0[30] + bf0[29];
+ bf1[31] = bf0[31] + bf0[28];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit[stage]);
+ bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit[stage]);
+ bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit[stage]);
+ bf1[38] = bf0[38];
+ bf1[39] = bf0[39];
+ bf1[40] = bf0[40];
+ bf1[41] = bf0[41];
+ bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit[stage]);
+ bf1[46] = bf0[46];
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = bf0[49];
+ bf1[50] = half_btf(cospi[24], bf0[50], -cospi[40], bf0[45], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[24], bf0[51], -cospi[40], bf0[44], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[40], bf0[52], cospi[24], bf0[43], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[40], bf0[53], cospi[24], bf0[42], cos_bit[stage]);
+ bf1[54] = bf0[54];
+ bf1[55] = bf0[55];
+ bf1[56] = bf0[56];
+ bf1[57] = bf0[57];
+ bf1[58] = half_btf(cospi[56], bf0[58], -cospi[8], bf0[37], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[56], bf0[59], -cospi[8], bf0[36], cos_bit[stage]);
+ bf1[60] = half_btf(cospi[8], bf0[60], cospi[56], bf0[35], cos_bit[stage]);
+ bf1[61] = half_btf(cospi[8], bf0[61], cospi[56], bf0[34], cos_bit[stage]);
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = -bf0[9] + bf0[8];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[11] + bf0[10];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = -bf0[13] + bf0[12];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[15] + bf0[14];
+ bf1[16] = bf0[16];
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[35];
+ bf1[33] = bf0[33] + bf0[34];
+ bf1[34] = -bf0[34] + bf0[33];
+ bf1[35] = -bf0[35] + bf0[32];
+ bf1[36] = -bf0[36] + bf0[39];
+ bf1[37] = -bf0[37] + bf0[38];
+ bf1[38] = bf0[38] + bf0[37];
+ bf1[39] = bf0[39] + bf0[36];
+ bf1[40] = bf0[40] + bf0[43];
+ bf1[41] = bf0[41] + bf0[42];
+ bf1[42] = -bf0[42] + bf0[41];
+ bf1[43] = -bf0[43] + bf0[40];
+ bf1[44] = -bf0[44] + bf0[47];
+ bf1[45] = -bf0[45] + bf0[46];
+ bf1[46] = bf0[46] + bf0[45];
+ bf1[47] = bf0[47] + bf0[44];
+ bf1[48] = bf0[48] + bf0[51];
+ bf1[49] = bf0[49] + bf0[50];
+ bf1[50] = -bf0[50] + bf0[49];
+ bf1[51] = -bf0[51] + bf0[48];
+ bf1[52] = -bf0[52] + bf0[55];
+ bf1[53] = -bf0[53] + bf0[54];
+ bf1[54] = bf0[54] + bf0[53];
+ bf1[55] = bf0[55] + bf0[52];
+ bf1[56] = bf0[56] + bf0[59];
+ bf1[57] = bf0[57] + bf0[58];
+ bf1[58] = -bf0[58] + bf0[57];
+ bf1[59] = -bf0[59] + bf0[56];
+ bf1[60] = -bf0[60] + bf0[63];
+ bf1[61] = -bf0[61] + bf0[62];
+ bf1[62] = bf0[62] + bf0[61];
+ bf1[63] = bf0[63] + bf0[60];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
+ bf1[16] = bf0[16] + bf0[17];
+ bf1[17] = -bf0[17] + bf0[16];
+ bf1[18] = -bf0[18] + bf0[19];
+ bf1[19] = bf0[19] + bf0[18];
+ bf1[20] = bf0[20] + bf0[21];
+ bf1[21] = -bf0[21] + bf0[20];
+ bf1[22] = -bf0[22] + bf0[23];
+ bf1[23] = bf0[23] + bf0[22];
+ bf1[24] = bf0[24] + bf0[25];
+ bf1[25] = -bf0[25] + bf0[24];
+ bf1[26] = -bf0[26] + bf0[27];
+ bf1[27] = bf0[27] + bf0[26];
+ bf1[28] = bf0[28] + bf0[29];
+ bf1[29] = -bf0[29] + bf0[28];
+ bf1[30] = -bf0[30] + bf0[31];
+ bf1[31] = bf0[31] + bf0[30];
+ bf1[32] = bf0[32];
+ bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit[stage]);
+ bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit[stage]);
+ bf1[35] = bf0[35];
+ bf1[36] = bf0[36];
+ bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit[stage]);
+ bf1[39] = bf0[39];
+ bf1[40] = bf0[40];
+ bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit[stage]);
+ bf1[43] = bf0[43];
+ bf1[44] = bf0[44];
+ bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit[stage]);
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = half_btf(cospi[12], bf0[49], -cospi[52], bf0[46], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[52], bf0[50], cospi[12], bf0[45], cos_bit[stage]);
+ bf1[51] = bf0[51];
+ bf1[52] = bf0[52];
+ bf1[53] = half_btf(cospi[44], bf0[53], -cospi[20], bf0[42], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[20], bf0[54], cospi[44], bf0[41], cos_bit[stage]);
+ bf1[55] = bf0[55];
+ bf1[56] = bf0[56];
+ bf1[57] = half_btf(cospi[28], bf0[57], -cospi[36], bf0[38], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[36], bf0[58], cospi[28], bf0[37], cos_bit[stage]);
+ bf1[59] = bf0[59];
+ bf1[60] = bf0[60];
+ bf1[61] = half_btf(cospi[60], bf0[61], -cospi[4], bf0[34], cos_bit[stage]);
+ bf1[62] = half_btf(cospi[4], bf0[62], cospi[60], bf0[33], cos_bit[stage]);
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
+ bf1[32] = bf0[32] + bf0[33];
+ bf1[33] = -bf0[33] + bf0[32];
+ bf1[34] = -bf0[34] + bf0[35];
+ bf1[35] = bf0[35] + bf0[34];
+ bf1[36] = bf0[36] + bf0[37];
+ bf1[37] = -bf0[37] + bf0[36];
+ bf1[38] = -bf0[38] + bf0[39];
+ bf1[39] = bf0[39] + bf0[38];
+ bf1[40] = bf0[40] + bf0[41];
+ bf1[41] = -bf0[41] + bf0[40];
+ bf1[42] = -bf0[42] + bf0[43];
+ bf1[43] = bf0[43] + bf0[42];
+ bf1[44] = bf0[44] + bf0[45];
+ bf1[45] = -bf0[45] + bf0[44];
+ bf1[46] = -bf0[46] + bf0[47];
+ bf1[47] = bf0[47] + bf0[46];
+ bf1[48] = bf0[48] + bf0[49];
+ bf1[49] = -bf0[49] + bf0[48];
+ bf1[50] = -bf0[50] + bf0[51];
+ bf1[51] = bf0[51] + bf0[50];
+ bf1[52] = bf0[52] + bf0[53];
+ bf1[53] = -bf0[53] + bf0[52];
+ bf1[54] = -bf0[54] + bf0[55];
+ bf1[55] = bf0[55] + bf0[54];
+ bf1[56] = bf0[56] + bf0[57];
+ bf1[57] = -bf0[57] + bf0[56];
+ bf1[58] = -bf0[58] + bf0[59];
+ bf1[59] = bf0[59] + bf0[58];
+ bf1[60] = bf0[60] + bf0[61];
+ bf1[61] = -bf0[61] + bf0[60];
+ bf1[62] = -bf0[62] + bf0[63];
+ bf1[63] = bf0[63] + bf0[62];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 10
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = bf0[26];
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = half_btf(cospi[63], bf0[32], cospi[1], bf0[63], cos_bit[stage]);
+ bf1[33] = half_btf(cospi[31], bf0[33], cospi[33], bf0[62], cos_bit[stage]);
+ bf1[34] = half_btf(cospi[47], bf0[34], cospi[17], bf0[61], cos_bit[stage]);
+ bf1[35] = half_btf(cospi[15], bf0[35], cospi[49], bf0[60], cos_bit[stage]);
+ bf1[36] = half_btf(cospi[55], bf0[36], cospi[9], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(cospi[23], bf0[37], cospi[41], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(cospi[39], bf0[38], cospi[25], bf0[57], cos_bit[stage]);
+ bf1[39] = half_btf(cospi[7], bf0[39], cospi[57], bf0[56], cos_bit[stage]);
+ bf1[40] = half_btf(cospi[59], bf0[40], cospi[5], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(cospi[27], bf0[41], cospi[37], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(cospi[43], bf0[42], cospi[21], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(cospi[11], bf0[43], cospi[53], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(cospi[51], bf0[44], cospi[13], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(cospi[19], bf0[45], cospi[45], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(cospi[35], bf0[46], cospi[29], bf0[49], cos_bit[stage]);
+ bf1[47] = half_btf(cospi[3], bf0[47], cospi[61], bf0[48], cos_bit[stage]);
+ bf1[48] = half_btf(cospi[3], bf0[48], -cospi[61], bf0[47], cos_bit[stage]);
+ bf1[49] = half_btf(cospi[35], bf0[49], -cospi[29], bf0[46], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[19], bf0[50], -cospi[45], bf0[45], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[51], bf0[51], -cospi[13], bf0[44], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[11], bf0[52], -cospi[53], bf0[43], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[43], bf0[53], -cospi[21], bf0[42], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[27], bf0[54], -cospi[37], bf0[41], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[59], bf0[55], -cospi[5], bf0[40], cos_bit[stage]);
+ bf1[56] = half_btf(cospi[7], bf0[56], -cospi[57], bf0[39], cos_bit[stage]);
+ bf1[57] = half_btf(cospi[39], bf0[57], -cospi[25], bf0[38], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[23], bf0[58], -cospi[41], bf0[37], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[55], bf0[59], -cospi[9], bf0[36], cos_bit[stage]);
+ bf1[60] = half_btf(cospi[15], bf0[60], -cospi[49], bf0[35], cos_bit[stage]);
+ bf1[61] = half_btf(cospi[47], bf0[61], -cospi[17], bf0[34], cos_bit[stage]);
+ bf1[62] = half_btf(cospi[31], bf0[62], -cospi[33], bf0[33], cos_bit[stage]);
+ bf1[63] = half_btf(cospi[63], bf0[63], -cospi[1], bf0[32], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 11
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[32];
+ bf1[2] = bf0[16];
+ bf1[3] = bf0[48];
+ bf1[4] = bf0[8];
+ bf1[5] = bf0[40];
+ bf1[6] = bf0[24];
+ bf1[7] = bf0[56];
+ bf1[8] = bf0[4];
+ bf1[9] = bf0[36];
+ bf1[10] = bf0[20];
+ bf1[11] = bf0[52];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[44];
+ bf1[14] = bf0[28];
+ bf1[15] = bf0[60];
+ bf1[16] = bf0[2];
+ bf1[17] = bf0[34];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[50];
+ bf1[20] = bf0[10];
+ bf1[21] = bf0[42];
+ bf1[22] = bf0[26];
+ bf1[23] = bf0[58];
+ bf1[24] = bf0[6];
+ bf1[25] = bf0[38];
+ bf1[26] = bf0[22];
+ bf1[27] = bf0[54];
+ bf1[28] = bf0[14];
+ bf1[29] = bf0[46];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[62];
+ bf1[32] = bf0[1];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[17];
+ bf1[35] = bf0[49];
+ bf1[36] = bf0[9];
+ bf1[37] = bf0[41];
+ bf1[38] = bf0[25];
+ bf1[39] = bf0[57];
+ bf1[40] = bf0[5];
+ bf1[41] = bf0[37];
+ bf1[42] = bf0[21];
+ bf1[43] = bf0[53];
+ bf1[44] = bf0[13];
+ bf1[45] = bf0[45];
+ bf1[46] = bf0[29];
+ bf1[47] = bf0[61];
+ bf1[48] = bf0[3];
+ bf1[49] = bf0[35];
+ bf1[50] = bf0[19];
+ bf1[51] = bf0[51];
+ bf1[52] = bf0[11];
+ bf1[53] = bf0[43];
+ bf1[54] = bf0[27];
+ bf1[55] = bf0[59];
+ bf1[56] = bf0[7];
+ bf1[57] = bf0[39];
+ bf1[58] = bf0[23];
+ bf1[59] = bf0[55];
+ bf1[60] = bf0[15];
+ bf1[61] = bf0[47];
+ bf1[62] = bf0[31];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+#endif // CONFIG_TX64X64
diff --git a/av1/common/av1_inv_txfm1d.c b/av1/common/av1_inv_txfm1d.c
index 2e9bbcb..40d8403 100644
--- a/av1/common/av1_inv_txfm1d.c
+++ b/av1/common/av1_inv_txfm1d.c
@@ -40,6 +40,7 @@
}
#endif
+// TODO(angiebird): Make 1-d txfm functions static
void av1_idct4_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
const int8_t *stage_range) {
const int32_t size = 4;
@@ -1535,3 +1536,798 @@
bf1[31] = bf0[0];
range_check(stage, input, bf1, size, stage_range[stage]);
}
+
+#if CONFIG_TX64X64
+void av1_idct64_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 64;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[64];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[32];
+ bf1[2] = input[16];
+ bf1[3] = input[48];
+ bf1[4] = input[8];
+ bf1[5] = input[40];
+ bf1[6] = input[24];
+ bf1[7] = input[56];
+ bf1[8] = input[4];
+ bf1[9] = input[36];
+ bf1[10] = input[20];
+ bf1[11] = input[52];
+ bf1[12] = input[12];
+ bf1[13] = input[44];
+ bf1[14] = input[28];
+ bf1[15] = input[60];
+ bf1[16] = input[2];
+ bf1[17] = input[34];
+ bf1[18] = input[18];
+ bf1[19] = input[50];
+ bf1[20] = input[10];
+ bf1[21] = input[42];
+ bf1[22] = input[26];
+ bf1[23] = input[58];
+ bf1[24] = input[6];
+ bf1[25] = input[38];
+ bf1[26] = input[22];
+ bf1[27] = input[54];
+ bf1[28] = input[14];
+ bf1[29] = input[46];
+ bf1[30] = input[30];
+ bf1[31] = input[62];
+ bf1[32] = input[1];
+ bf1[33] = input[33];
+ bf1[34] = input[17];
+ bf1[35] = input[49];
+ bf1[36] = input[9];
+ bf1[37] = input[41];
+ bf1[38] = input[25];
+ bf1[39] = input[57];
+ bf1[40] = input[5];
+ bf1[41] = input[37];
+ bf1[42] = input[21];
+ bf1[43] = input[53];
+ bf1[44] = input[13];
+ bf1[45] = input[45];
+ bf1[46] = input[29];
+ bf1[47] = input[61];
+ bf1[48] = input[3];
+ bf1[49] = input[35];
+ bf1[50] = input[19];
+ bf1[51] = input[51];
+ bf1[52] = input[11];
+ bf1[53] = input[43];
+ bf1[54] = input[27];
+ bf1[55] = input[59];
+ bf1[56] = input[7];
+ bf1[57] = input[39];
+ bf1[58] = input[23];
+ bf1[59] = input[55];
+ bf1[60] = input[15];
+ bf1[61] = input[47];
+ bf1[62] = input[31];
+ bf1[63] = input[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = bf0[26];
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = half_btf(cospi[63], bf0[32], -cospi[1], bf0[63], cos_bit[stage]);
+ bf1[33] = half_btf(cospi[31], bf0[33], -cospi[33], bf0[62], cos_bit[stage]);
+ bf1[34] = half_btf(cospi[47], bf0[34], -cospi[17], bf0[61], cos_bit[stage]);
+ bf1[35] = half_btf(cospi[15], bf0[35], -cospi[49], bf0[60], cos_bit[stage]);
+ bf1[36] = half_btf(cospi[55], bf0[36], -cospi[9], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(cospi[23], bf0[37], -cospi[41], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(cospi[39], bf0[38], -cospi[25], bf0[57], cos_bit[stage]);
+ bf1[39] = half_btf(cospi[7], bf0[39], -cospi[57], bf0[56], cos_bit[stage]);
+ bf1[40] = half_btf(cospi[59], bf0[40], -cospi[5], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(cospi[27], bf0[41], -cospi[37], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(cospi[43], bf0[42], -cospi[21], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(cospi[11], bf0[43], -cospi[53], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(cospi[51], bf0[44], -cospi[13], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(cospi[19], bf0[45], -cospi[45], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(cospi[35], bf0[46], -cospi[29], bf0[49], cos_bit[stage]);
+ bf1[47] = half_btf(cospi[3], bf0[47], -cospi[61], bf0[48], cos_bit[stage]);
+ bf1[48] = half_btf(cospi[61], bf0[47], cospi[3], bf0[48], cos_bit[stage]);
+ bf1[49] = half_btf(cospi[29], bf0[46], cospi[35], bf0[49], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[45], bf0[45], cospi[19], bf0[50], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[13], bf0[44], cospi[51], bf0[51], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[53], bf0[43], cospi[11], bf0[52], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[21], bf0[42], cospi[43], bf0[53], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[37], bf0[41], cospi[27], bf0[54], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[5], bf0[40], cospi[59], bf0[55], cos_bit[stage]);
+ bf1[56] = half_btf(cospi[57], bf0[39], cospi[7], bf0[56], cos_bit[stage]);
+ bf1[57] = half_btf(cospi[25], bf0[38], cospi[39], bf0[57], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[41], bf0[37], cospi[23], bf0[58], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[9], bf0[36], cospi[55], bf0[59], cos_bit[stage]);
+ bf1[60] = half_btf(cospi[49], bf0[35], cospi[15], bf0[60], cos_bit[stage]);
+ bf1[61] = half_btf(cospi[17], bf0[34], cospi[47], bf0[61], cos_bit[stage]);
+ bf1[62] = half_btf(cospi[33], bf0[33], cospi[31], bf0[62], cos_bit[stage]);
+ bf1[63] = half_btf(cospi[1], bf0[32], cospi[63], bf0[63], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit[stage]);
+ bf1[32] = bf0[32] + bf0[33];
+ bf1[33] = bf0[32] - bf0[33];
+ bf1[34] = -bf0[34] + bf0[35];
+ bf1[35] = bf0[34] + bf0[35];
+ bf1[36] = bf0[36] + bf0[37];
+ bf1[37] = bf0[36] - bf0[37];
+ bf1[38] = -bf0[38] + bf0[39];
+ bf1[39] = bf0[38] + bf0[39];
+ bf1[40] = bf0[40] + bf0[41];
+ bf1[41] = bf0[40] - bf0[41];
+ bf1[42] = -bf0[42] + bf0[43];
+ bf1[43] = bf0[42] + bf0[43];
+ bf1[44] = bf0[44] + bf0[45];
+ bf1[45] = bf0[44] - bf0[45];
+ bf1[46] = -bf0[46] + bf0[47];
+ bf1[47] = bf0[46] + bf0[47];
+ bf1[48] = bf0[48] + bf0[49];
+ bf1[49] = bf0[48] - bf0[49];
+ bf1[50] = -bf0[50] + bf0[51];
+ bf1[51] = bf0[50] + bf0[51];
+ bf1[52] = bf0[52] + bf0[53];
+ bf1[53] = bf0[52] - bf0[53];
+ bf1[54] = -bf0[54] + bf0[55];
+ bf1[55] = bf0[54] + bf0[55];
+ bf1[56] = bf0[56] + bf0[57];
+ bf1[57] = bf0[56] - bf0[57];
+ bf1[58] = -bf0[58] + bf0[59];
+ bf1[59] = bf0[58] + bf0[59];
+ bf1[60] = bf0[60] + bf0[61];
+ bf1[61] = bf0[60] - bf0[61];
+ bf1[62] = -bf0[62] + bf0[63];
+ bf1[63] = bf0[62] + bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16] + bf0[17];
+ bf1[17] = bf0[16] - bf0[17];
+ bf1[18] = -bf0[18] + bf0[19];
+ bf1[19] = bf0[18] + bf0[19];
+ bf1[20] = bf0[20] + bf0[21];
+ bf1[21] = bf0[20] - bf0[21];
+ bf1[22] = -bf0[22] + bf0[23];
+ bf1[23] = bf0[22] + bf0[23];
+ bf1[24] = bf0[24] + bf0[25];
+ bf1[25] = bf0[24] - bf0[25];
+ bf1[26] = -bf0[26] + bf0[27];
+ bf1[27] = bf0[26] + bf0[27];
+ bf1[28] = bf0[28] + bf0[29];
+ bf1[29] = bf0[28] - bf0[29];
+ bf1[30] = -bf0[30] + bf0[31];
+ bf1[31] = bf0[30] + bf0[31];
+ bf1[32] = bf0[32];
+ bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit[stage]);
+ bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit[stage]);
+ bf1[35] = bf0[35];
+ bf1[36] = bf0[36];
+ bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit[stage]);
+ bf1[39] = bf0[39];
+ bf1[40] = bf0[40];
+ bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit[stage]);
+ bf1[43] = bf0[43];
+ bf1[44] = bf0[44];
+ bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit[stage]);
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = half_btf(-cospi[52], bf0[46], cospi[12], bf0[49], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[12], bf0[45], cospi[52], bf0[50], cos_bit[stage]);
+ bf1[51] = bf0[51];
+ bf1[52] = bf0[52];
+ bf1[53] = half_btf(-cospi[20], bf0[42], cospi[44], bf0[53], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[44], bf0[41], cospi[20], bf0[54], cos_bit[stage]);
+ bf1[55] = bf0[55];
+ bf1[56] = bf0[56];
+ bf1[57] = half_btf(-cospi[36], bf0[38], cospi[28], bf0[57], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[28], bf0[37], cospi[36], bf0[58], cos_bit[stage]);
+ bf1[59] = bf0[59];
+ bf1[60] = bf0[60];
+ bf1[61] = half_btf(-cospi[4], bf0[34], cospi[60], bf0[61], cos_bit[stage]);
+ bf1[62] = half_btf(cospi[60], bf0[33], cospi[4], bf0[62], cos_bit[stage]);
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = bf0[8] - bf0[9];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[10] + bf0[11];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = bf0[12] - bf0[13];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[14] + bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit[stage]);
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit[stage]);
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[35];
+ bf1[33] = bf0[33] + bf0[34];
+ bf1[34] = bf0[33] - bf0[34];
+ bf1[35] = bf0[32] - bf0[35];
+ bf1[36] = -bf0[36] + bf0[39];
+ bf1[37] = -bf0[37] + bf0[38];
+ bf1[38] = bf0[37] + bf0[38];
+ bf1[39] = bf0[36] + bf0[39];
+ bf1[40] = bf0[40] + bf0[43];
+ bf1[41] = bf0[41] + bf0[42];
+ bf1[42] = bf0[41] - bf0[42];
+ bf1[43] = bf0[40] - bf0[43];
+ bf1[44] = -bf0[44] + bf0[47];
+ bf1[45] = -bf0[45] + bf0[46];
+ bf1[46] = bf0[45] + bf0[46];
+ bf1[47] = bf0[44] + bf0[47];
+ bf1[48] = bf0[48] + bf0[51];
+ bf1[49] = bf0[49] + bf0[50];
+ bf1[50] = bf0[49] - bf0[50];
+ bf1[51] = bf0[48] - bf0[51];
+ bf1[52] = -bf0[52] + bf0[55];
+ bf1[53] = -bf0[53] + bf0[54];
+ bf1[54] = bf0[53] + bf0[54];
+ bf1[55] = bf0[52] + bf0[55];
+ bf1[56] = bf0[56] + bf0[59];
+ bf1[57] = bf0[57] + bf0[58];
+ bf1[58] = bf0[57] - bf0[58];
+ bf1[59] = bf0[56] - bf0[59];
+ bf1[60] = -bf0[60] + bf0[63];
+ bf1[61] = -bf0[61] + bf0[62];
+ bf1[62] = bf0[61] + bf0[62];
+ bf1[63] = bf0[60] + bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = bf0[4] - bf0[5];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[6] + bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[19];
+ bf1[17] = bf0[17] + bf0[18];
+ bf1[18] = bf0[17] - bf0[18];
+ bf1[19] = bf0[16] - bf0[19];
+ bf1[20] = -bf0[20] + bf0[23];
+ bf1[21] = -bf0[21] + bf0[22];
+ bf1[22] = bf0[21] + bf0[22];
+ bf1[23] = bf0[20] + bf0[23];
+ bf1[24] = bf0[24] + bf0[27];
+ bf1[25] = bf0[25] + bf0[26];
+ bf1[26] = bf0[25] - bf0[26];
+ bf1[27] = bf0[24] - bf0[27];
+ bf1[28] = -bf0[28] + bf0[31];
+ bf1[29] = -bf0[29] + bf0[30];
+ bf1[30] = bf0[29] + bf0[30];
+ bf1[31] = bf0[28] + bf0[31];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit[stage]);
+ bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit[stage]);
+ bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit[stage]);
+ bf1[38] = bf0[38];
+ bf1[39] = bf0[39];
+ bf1[40] = bf0[40];
+ bf1[41] = bf0[41];
+ bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit[stage]);
+ bf1[46] = bf0[46];
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = bf0[49];
+ bf1[50] = half_btf(-cospi[40], bf0[45], cospi[24], bf0[50], cos_bit[stage]);
+ bf1[51] = half_btf(-cospi[40], bf0[44], cospi[24], bf0[51], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[24], bf0[43], cospi[40], bf0[52], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[24], bf0[42], cospi[40], bf0[53], cos_bit[stage]);
+ bf1[54] = bf0[54];
+ bf1[55] = bf0[55];
+ bf1[56] = bf0[56];
+ bf1[57] = bf0[57];
+ bf1[58] = half_btf(-cospi[8], bf0[37], cospi[56], bf0[58], cos_bit[stage]);
+ bf1[59] = half_btf(-cospi[8], bf0[36], cospi[56], bf0[59], cos_bit[stage]);
+ bf1[60] = half_btf(cospi[56], bf0[35], cospi[8], bf0[60], cos_bit[stage]);
+ bf1[61] = half_btf(cospi[56], bf0[34], cospi[8], bf0[61], cos_bit[stage]);
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = bf0[9] - bf0[10];
+ bf1[11] = bf0[8] - bf0[11];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[13] + bf0[14];
+ bf1[15] = bf0[12] + bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit[stage]);
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[39];
+ bf1[33] = bf0[33] + bf0[38];
+ bf1[34] = bf0[34] + bf0[37];
+ bf1[35] = bf0[35] + bf0[36];
+ bf1[36] = bf0[35] - bf0[36];
+ bf1[37] = bf0[34] - bf0[37];
+ bf1[38] = bf0[33] - bf0[38];
+ bf1[39] = bf0[32] - bf0[39];
+ bf1[40] = -bf0[40] + bf0[47];
+ bf1[41] = -bf0[41] + bf0[46];
+ bf1[42] = -bf0[42] + bf0[45];
+ bf1[43] = -bf0[43] + bf0[44];
+ bf1[44] = bf0[43] + bf0[44];
+ bf1[45] = bf0[42] + bf0[45];
+ bf1[46] = bf0[41] + bf0[46];
+ bf1[47] = bf0[40] + bf0[47];
+ bf1[48] = bf0[48] + bf0[55];
+ bf1[49] = bf0[49] + bf0[54];
+ bf1[50] = bf0[50] + bf0[53];
+ bf1[51] = bf0[51] + bf0[52];
+ bf1[52] = bf0[51] - bf0[52];
+ bf1[53] = bf0[50] - bf0[53];
+ bf1[54] = bf0[49] - bf0[54];
+ bf1[55] = bf0[48] - bf0[55];
+ bf1[56] = -bf0[56] + bf0[63];
+ bf1[57] = -bf0[57] + bf0[62];
+ bf1[58] = -bf0[58] + bf0[61];
+ bf1[59] = -bf0[59] + bf0[60];
+ bf1[60] = bf0[59] + bf0[60];
+ bf1[61] = bf0[58] + bf0[61];
+ bf1[62] = bf0[57] + bf0[62];
+ bf1[63] = bf0[56] + bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = bf0[3] - bf0[4];
+ bf1[5] = bf0[2] - bf0[5];
+ bf1[6] = bf0[1] - bf0[6];
+ bf1[7] = bf0[0] - bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[23];
+ bf1[17] = bf0[17] + bf0[22];
+ bf1[18] = bf0[18] + bf0[21];
+ bf1[19] = bf0[19] + bf0[20];
+ bf1[20] = bf0[19] - bf0[20];
+ bf1[21] = bf0[18] - bf0[21];
+ bf1[22] = bf0[17] - bf0[22];
+ bf1[23] = bf0[16] - bf0[23];
+ bf1[24] = -bf0[24] + bf0[31];
+ bf1[25] = -bf0[25] + bf0[30];
+ bf1[26] = -bf0[26] + bf0[29];
+ bf1[27] = -bf0[27] + bf0[28];
+ bf1[28] = bf0[27] + bf0[28];
+ bf1[29] = bf0[26] + bf0[29];
+ bf1[30] = bf0[25] + bf0[30];
+ bf1[31] = bf0[24] + bf0[31];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[34];
+ bf1[35] = bf0[35];
+ bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit[stage]);
+ bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit[stage]);
+ bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit[stage]);
+ bf1[44] = bf0[44];
+ bf1[45] = bf0[45];
+ bf1[46] = bf0[46];
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = bf0[49];
+ bf1[50] = bf0[50];
+ bf1[51] = bf0[51];
+ bf1[52] = half_btf(-cospi[16], bf0[43], cospi[48], bf0[52], cos_bit[stage]);
+ bf1[53] = half_btf(-cospi[16], bf0[42], cospi[48], bf0[53], cos_bit[stage]);
+ bf1[54] = half_btf(-cospi[16], bf0[41], cospi[48], bf0[54], cos_bit[stage]);
+ bf1[55] = half_btf(-cospi[16], bf0[40], cospi[48], bf0[55], cos_bit[stage]);
+ bf1[56] = half_btf(cospi[48], bf0[39], cospi[16], bf0[56], cos_bit[stage]);
+ bf1[57] = half_btf(cospi[48], bf0[38], cospi[16], bf0[57], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[48], bf0[37], cospi[16], bf0[58], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[48], bf0[36], cospi[16], bf0[59], cos_bit[stage]);
+ bf1[60] = bf0[60];
+ bf1[61] = bf0[61];
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = bf0[7] - bf0[8];
+ bf1[9] = bf0[6] - bf0[9];
+ bf1[10] = bf0[5] - bf0[10];
+ bf1[11] = bf0[4] - bf0[11];
+ bf1[12] = bf0[3] - bf0[12];
+ bf1[13] = bf0[2] - bf0[13];
+ bf1[14] = bf0[1] - bf0[14];
+ bf1[15] = bf0[0] - bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[47];
+ bf1[33] = bf0[33] + bf0[46];
+ bf1[34] = bf0[34] + bf0[45];
+ bf1[35] = bf0[35] + bf0[44];
+ bf1[36] = bf0[36] + bf0[43];
+ bf1[37] = bf0[37] + bf0[42];
+ bf1[38] = bf0[38] + bf0[41];
+ bf1[39] = bf0[39] + bf0[40];
+ bf1[40] = bf0[39] - bf0[40];
+ bf1[41] = bf0[38] - bf0[41];
+ bf1[42] = bf0[37] - bf0[42];
+ bf1[43] = bf0[36] - bf0[43];
+ bf1[44] = bf0[35] - bf0[44];
+ bf1[45] = bf0[34] - bf0[45];
+ bf1[46] = bf0[33] - bf0[46];
+ bf1[47] = bf0[32] - bf0[47];
+ bf1[48] = -bf0[48] + bf0[63];
+ bf1[49] = -bf0[49] + bf0[62];
+ bf1[50] = -bf0[50] + bf0[61];
+ bf1[51] = -bf0[51] + bf0[60];
+ bf1[52] = -bf0[52] + bf0[59];
+ bf1[53] = -bf0[53] + bf0[58];
+ bf1[54] = -bf0[54] + bf0[57];
+ bf1[55] = -bf0[55] + bf0[56];
+ bf1[56] = bf0[55] + bf0[56];
+ bf1[57] = bf0[54] + bf0[57];
+ bf1[58] = bf0[53] + bf0[58];
+ bf1[59] = bf0[52] + bf0[59];
+ bf1[60] = bf0[51] + bf0[60];
+ bf1[61] = bf0[50] + bf0[61];
+ bf1[62] = bf0[49] + bf0[62];
+ bf1[63] = bf0[48] + bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 10
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[31];
+ bf1[1] = bf0[1] + bf0[30];
+ bf1[2] = bf0[2] + bf0[29];
+ bf1[3] = bf0[3] + bf0[28];
+ bf1[4] = bf0[4] + bf0[27];
+ bf1[5] = bf0[5] + bf0[26];
+ bf1[6] = bf0[6] + bf0[25];
+ bf1[7] = bf0[7] + bf0[24];
+ bf1[8] = bf0[8] + bf0[23];
+ bf1[9] = bf0[9] + bf0[22];
+ bf1[10] = bf0[10] + bf0[21];
+ bf1[11] = bf0[11] + bf0[20];
+ bf1[12] = bf0[12] + bf0[19];
+ bf1[13] = bf0[13] + bf0[18];
+ bf1[14] = bf0[14] + bf0[17];
+ bf1[15] = bf0[15] + bf0[16];
+ bf1[16] = bf0[15] - bf0[16];
+ bf1[17] = bf0[14] - bf0[17];
+ bf1[18] = bf0[13] - bf0[18];
+ bf1[19] = bf0[12] - bf0[19];
+ bf1[20] = bf0[11] - bf0[20];
+ bf1[21] = bf0[10] - bf0[21];
+ bf1[22] = bf0[9] - bf0[22];
+ bf1[23] = bf0[8] - bf0[23];
+ bf1[24] = bf0[7] - bf0[24];
+ bf1[25] = bf0[6] - bf0[25];
+ bf1[26] = bf0[5] - bf0[26];
+ bf1[27] = bf0[4] - bf0[27];
+ bf1[28] = bf0[3] - bf0[28];
+ bf1[29] = bf0[2] - bf0[29];
+ bf1[30] = bf0[1] - bf0[30];
+ bf1[31] = bf0[0] - bf0[31];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[34];
+ bf1[35] = bf0[35];
+ bf1[36] = bf0[36];
+ bf1[37] = bf0[37];
+ bf1[38] = bf0[38];
+ bf1[39] = bf0[39];
+ bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
+ bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
+ bf1[48] = half_btf(cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
+ bf1[49] = half_btf(cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
+ bf1[56] = bf0[56];
+ bf1[57] = bf0[57];
+ bf1[58] = bf0[58];
+ bf1[59] = bf0[59];
+ bf1[60] = bf0[60];
+ bf1[61] = bf0[61];
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 11
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[63];
+ bf1[1] = bf0[1] + bf0[62];
+ bf1[2] = bf0[2] + bf0[61];
+ bf1[3] = bf0[3] + bf0[60];
+ bf1[4] = bf0[4] + bf0[59];
+ bf1[5] = bf0[5] + bf0[58];
+ bf1[6] = bf0[6] + bf0[57];
+ bf1[7] = bf0[7] + bf0[56];
+ bf1[8] = bf0[8] + bf0[55];
+ bf1[9] = bf0[9] + bf0[54];
+ bf1[10] = bf0[10] + bf0[53];
+ bf1[11] = bf0[11] + bf0[52];
+ bf1[12] = bf0[12] + bf0[51];
+ bf1[13] = bf0[13] + bf0[50];
+ bf1[14] = bf0[14] + bf0[49];
+ bf1[15] = bf0[15] + bf0[48];
+ bf1[16] = bf0[16] + bf0[47];
+ bf1[17] = bf0[17] + bf0[46];
+ bf1[18] = bf0[18] + bf0[45];
+ bf1[19] = bf0[19] + bf0[44];
+ bf1[20] = bf0[20] + bf0[43];
+ bf1[21] = bf0[21] + bf0[42];
+ bf1[22] = bf0[22] + bf0[41];
+ bf1[23] = bf0[23] + bf0[40];
+ bf1[24] = bf0[24] + bf0[39];
+ bf1[25] = bf0[25] + bf0[38];
+ bf1[26] = bf0[26] + bf0[37];
+ bf1[27] = bf0[27] + bf0[36];
+ bf1[28] = bf0[28] + bf0[35];
+ bf1[29] = bf0[29] + bf0[34];
+ bf1[30] = bf0[30] + bf0[33];
+ bf1[31] = bf0[31] + bf0[32];
+ bf1[32] = bf0[31] - bf0[32];
+ bf1[33] = bf0[30] - bf0[33];
+ bf1[34] = bf0[29] - bf0[34];
+ bf1[35] = bf0[28] - bf0[35];
+ bf1[36] = bf0[27] - bf0[36];
+ bf1[37] = bf0[26] - bf0[37];
+ bf1[38] = bf0[25] - bf0[38];
+ bf1[39] = bf0[24] - bf0[39];
+ bf1[40] = bf0[23] - bf0[40];
+ bf1[41] = bf0[22] - bf0[41];
+ bf1[42] = bf0[21] - bf0[42];
+ bf1[43] = bf0[20] - bf0[43];
+ bf1[44] = bf0[19] - bf0[44];
+ bf1[45] = bf0[18] - bf0[45];
+ bf1[46] = bf0[17] - bf0[46];
+ bf1[47] = bf0[16] - bf0[47];
+ bf1[48] = bf0[15] - bf0[48];
+ bf1[49] = bf0[14] - bf0[49];
+ bf1[50] = bf0[13] - bf0[50];
+ bf1[51] = bf0[12] - bf0[51];
+ bf1[52] = bf0[11] - bf0[52];
+ bf1[53] = bf0[10] - bf0[53];
+ bf1[54] = bf0[9] - bf0[54];
+ bf1[55] = bf0[8] - bf0[55];
+ bf1[56] = bf0[7] - bf0[56];
+ bf1[57] = bf0[6] - bf0[57];
+ bf1[58] = bf0[5] - bf0[58];
+ bf1[59] = bf0[4] - bf0[59];
+ bf1[60] = bf0[3] - bf0[60];
+ bf1[61] = bf0[2] - bf0[61];
+ bf1[62] = bf0[1] - bf0[62];
+ bf1[63] = bf0[0] - bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+#endif // CONFIG_TX64X64
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 01bcde9..23e0409 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -1325,9 +1325,14 @@
#endif // CONFIG_SUPERTX
// FIXME(someone) need real defaults here
-static const struct segmentation_probs default_seg_probs = {
- { 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128 },
+static const aom_prob default_segment_tree_probs[SEG_TREE_PROBS] = {
+ 128, 128, 128, 128, 128, 128, 128
};
+// clang-format off
+static const aom_prob default_segment_pred_probs[PREDICTION_PROBS] = {
+ 128, 128, 128
+};
+// clang-format on
static void init_mode_probs(FRAME_CONTEXT *fc) {
av1_copy(fc->uv_mode_prob, default_uv_probs);
@@ -1372,8 +1377,8 @@
#if CONFIG_SUPERTX
av1_copy(fc->supertx_prob, default_supertx_prob);
#endif // CONFIG_SUPERTX
- av1_copy(fc->seg.tree_probs, default_seg_probs.tree_probs);
- av1_copy(fc->seg.pred_probs, default_seg_probs.pred_probs);
+ av1_copy(fc->seg.tree_probs, default_segment_tree_probs);
+ av1_copy(fc->seg.pred_probs, default_segment_pred_probs);
#if CONFIG_EXT_INTRA
av1_copy(fc->ext_intra_probs, default_ext_intra_probs);
av1_copy(fc->intra_filter_probs, default_intra_filter_probs);
@@ -1392,6 +1397,7 @@
fc->inter_ext_tx_cdf, EXT_TX_SIZES);
av1_tree_to_cdf_1D(av1_partition_tree, fc->partition_prob, fc->partition_cdf,
PARTITION_CONTEXTS);
+ av1_tree_to_cdf(av1_segment_tree, fc->seg.tree_probs, fc->seg.tree_cdf);
#endif
}
diff --git a/av1/common/odintrin.h b/av1/common/odintrin.h
index 73106df..5b83f8c 100644
--- a/av1/common/odintrin.h
+++ b/av1/common/odintrin.h
@@ -11,10 +11,10 @@
#ifndef AV1_COMMON_ODINTRIN_H_
#define AV1_COMMON_ODINTRIN_H_
-#include "av1/common/enums.h"
#include "aom/aom_integer.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_ports/bitops.h"
+#include "av1/common/enums.h"
#ifdef __cplusplus
extern "C" {
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index ee8cb47..cf8d7b4 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -13,21 +13,21 @@
#define AV1_COMMON_ONYXC_INT_H_
#include "./aom_config.h"
+#include "./av1_rtcd.h"
#include "aom/internal/aom_codec_internal.h"
#include "aom_util/aom_thread.h"
-#include "./av1_rtcd.h"
#include "av1/common/alloccommon.h"
-#include "av1/common/loopfilter.h"
-#include "av1/common/entropymv.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
-#include "av1/common/mv.h"
+#include "av1/common/entropymv.h"
#include "av1/common/frame_buffers.h"
+#include "av1/common/loopfilter.h"
+#include "av1/common/mv.h"
#include "av1/common/quant_common.h"
-#include "av1/common/tile_common.h"
#if CONFIG_LOOP_RESTORATION
#include "av1/common/restoration.h"
#endif // CONFIG_LOOP_RESTORATION
+#include "av1/common/tile_common.h"
#ifdef __cplusplus
extern "C" {
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 22479b6..483b0b2 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -233,6 +233,14 @@
const int w = AOMMAX(num_4x4_blocks_wide_lookup[bsize] >> ss_x, 1);
const int step = 1 << txsz;
+ // TODO(bshacklett, huisu): Currently the RD loop traverses 4X8 blocks in
+ // inverted N order while in the bitstream the subblocks are stored in Z
+ // order. This discrepancy makes this function incorrect when considering 4X8
+ // blocks in the RD loop, so we disable the extended right edge for these
+ // blocks. The correct solution is to change the bitstream to store these
+ // blocks in inverted N order, and then update this function appropriately.
+ if (bsize == BLOCK_4X8 && y == 1) return 0;
+
if (!right_available) {
return 0;
} else {
diff --git a/av1/common/seg_common.h b/av1/common/seg_common.h
index 8c85d9a..d833a86 100644
--- a/av1/common/seg_common.h
+++ b/av1/common/seg_common.h
@@ -48,6 +48,9 @@
struct segmentation_probs {
aom_prob tree_probs[SEG_TREE_PROBS];
+#if CONFIG_DAALA_EC
+ aom_cdf_prob tree_cdf[MAX_SEGMENTS];
+#endif
aom_prob pred_probs[PREDICTION_PROBS];
};
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index f853866..6744572 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -54,6 +54,7 @@
#include "av1/decoder/dsubexp.h"
#define MAX_AV1_HEADER_SIZE 80
+#define ACCT_STR __func__
static int is_compound_reference_allowed(const AV1_COMMON *cm) {
int i;
@@ -109,7 +110,7 @@
int i, j;
for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) {
for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i)
- av1_diff_update_prob(r, &fc->switchable_interp_prob[j][i]);
+ av1_diff_update_prob(r, &fc->switchable_interp_prob[j][i], ACCT_STR);
#if CONFIG_DAALA_EC
av1_tree_to_cdf(av1_switchable_interp_tree, fc->switchable_interp_prob[j],
fc->switchable_interp_cdf[j]);
@@ -121,31 +122,31 @@
int i;
#if CONFIG_REF_MV
for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
- av1_diff_update_prob(r, &fc->newmv_prob[i]);
+ av1_diff_update_prob(r, &fc->newmv_prob[i], ACCT_STR);
for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i)
- av1_diff_update_prob(r, &fc->zeromv_prob[i]);
+ av1_diff_update_prob(r, &fc->zeromv_prob[i], ACCT_STR);
for (i = 0; i < REFMV_MODE_CONTEXTS; ++i)
- av1_diff_update_prob(r, &fc->refmv_prob[i]);
+ av1_diff_update_prob(r, &fc->refmv_prob[i], ACCT_STR);
for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- av1_diff_update_prob(r, &fc->drl_prob[i]);
+ av1_diff_update_prob(r, &fc->drl_prob[i], ACCT_STR);
#if CONFIG_EXT_INTER
- av1_diff_update_prob(r, &fc->new2mv_prob);
+ av1_diff_update_prob(r, &fc->new2mv_prob, ACCT_STR);
#endif // CONFIG_EXT_INTER
#else
int j;
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
for (j = 0; j < INTER_MODES - 1; ++j)
- av1_diff_update_prob(r, &fc->inter_mode_probs[i][j]);
+ av1_diff_update_prob(r, &fc->inter_mode_probs[i][j], ACCT_STR);
#endif
}
#if CONFIG_EXT_INTER
static void read_inter_compound_mode_probs(FRAME_CONTEXT *fc, aom_reader *r) {
int i, j;
- if (aom_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ if (aom_read(r, GROUP_DIFF_UPDATE_PROB, ACCT_STR)) {
for (j = 0; j < INTER_MODE_CONTEXTS; ++j) {
for (i = 0; i < INTER_COMPOUND_MODES - 1; ++i) {
- av1_diff_update_prob(r, &fc->inter_compound_mode_probs[j][i]);
+ av1_diff_update_prob(r, &fc->inter_compound_mode_probs[j][i], ACCT_STR);
}
}
}
@@ -169,12 +170,12 @@
if (cm->reference_mode == REFERENCE_MODE_SELECT)
for (i = 0; i < COMP_INTER_CONTEXTS; ++i)
- av1_diff_update_prob(r, &fc->comp_inter_prob[i]);
+ av1_diff_update_prob(r, &fc->comp_inter_prob[i], ACCT_STR);
if (cm->reference_mode != COMPOUND_REFERENCE) {
for (i = 0; i < REF_CONTEXTS; ++i) {
for (j = 0; j < (SINGLE_REFS - 1); ++j) {
- av1_diff_update_prob(r, &fc->single_ref_prob[i][j]);
+ av1_diff_update_prob(r, &fc->single_ref_prob[i][j], ACCT_STR);
}
}
}
@@ -183,12 +184,12 @@
for (i = 0; i < REF_CONTEXTS; ++i) {
#if CONFIG_EXT_REFS
for (j = 0; j < (FWD_REFS - 1); ++j)
- av1_diff_update_prob(r, &fc->comp_ref_prob[i][j]);
+ av1_diff_update_prob(r, &fc->comp_ref_prob[i][j], ACCT_STR);
for (j = 0; j < (BWD_REFS - 1); ++j)
- av1_diff_update_prob(r, &fc->comp_bwdref_prob[i][j]);
+ av1_diff_update_prob(r, &fc->comp_bwdref_prob[i][j], ACCT_STR);
#else
for (j = 0; j < (COMP_REFS - 1); ++j)
- av1_diff_update_prob(r, &fc->comp_ref_prob[i][j]);
+ av1_diff_update_prob(r, &fc->comp_ref_prob[i][j], ACCT_STR);
#endif // CONFIG_EXT_REFS
}
}
@@ -196,7 +197,7 @@
static void update_mv_probs(aom_prob *p, int n, aom_reader *r) {
int i;
- for (i = 0; i < n; ++i) av1_diff_update_prob(r, &p[i]);
+ for (i = 0; i < n; ++i) av1_diff_update_prob(r, &p[i], ACCT_STR);
}
static void read_mv_probs(nmv_context *ctx, int allow_hp, aom_reader *r) {
@@ -417,12 +418,9 @@
// passing bsize from decode_partition().
xd->mi[0]->mbmi.sb_type = bsize;
for (y = 0; y < y_mis; ++y)
- for (x = !y; x < x_mis; ++x) {
- xd->mi[y * cm->mi_stride + x] = xd->mi[0];
- }
+ for (x = !y; x < x_mis; ++x) xd->mi[y * cm->mi_stride + x] = xd->mi[0];
set_plane_n4(xd, bw, bh, bwl, bhl);
-
set_skip_context(xd, mi_row, mi_col);
#if CONFIG_VAR_TX
@@ -1170,7 +1168,9 @@
const int bh = 1 << (bhl - 1);
const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col);
const int y_mis = AOMMIN(bh, cm->mi_rows - mi_row);
-
+#if CONFIG_ACCOUNTING
+ aom_accounting_set_context(&pbi->accounting, mi_col, mi_row);
+#endif
#if CONFIG_SUPERTX
MB_MODE_INFO *mbmi;
if (supertx_enabled) {
@@ -1415,21 +1415,22 @@
if (has_rows && has_cols)
#if CONFIG_EXT_PARTITION_TYPES
if (bsize <= BLOCK_8X8)
- p = (PARTITION_TYPE)aom_read_tree(r, av1_partition_tree, probs);
+ p = (PARTITION_TYPE)aom_read_tree(r, av1_partition_tree, probs, ACCT_STR);
else
- p = (PARTITION_TYPE)aom_read_tree(r, av1_ext_partition_tree, probs);
+ p = (PARTITION_TYPE)aom_read_tree(r, av1_ext_partition_tree, probs,
+ ACCT_STR);
#else
#if CONFIG_DAALA_EC
p = (PARTITION_TYPE)aom_read_tree_cdf(r, cm->fc->partition_cdf[ctx],
- PARTITION_TYPES);
+ PARTITION_TYPES, ACCT_STR);
#else
- p = (PARTITION_TYPE)aom_read_tree(r, av1_partition_tree, probs);
+ p = (PARTITION_TYPE)aom_read_tree(r, av1_partition_tree, probs, ACCT_STR);
#endif
#endif // CONFIG_EXT_PARTITION_TYPES
else if (!has_rows && has_cols)
- p = aom_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ;
+ p = aom_read(r, probs[1], ACCT_STR) ? PARTITION_SPLIT : PARTITION_HORZ;
else if (has_rows && !has_cols)
- p = aom_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT;
+ p = aom_read(r, probs[2], ACCT_STR) ? PARTITION_SPLIT : PARTITION_VERT;
else
p = PARTITION_SPLIT;
@@ -1445,7 +1446,7 @@
return 1;
} else {
const int ctx = av1_get_skip_context(xd);
- const int skip = aom_read(r, cm->fc->skip_probs[ctx]);
+ const int skip = aom_read(r, cm->fc->skip_probs[ctx], ACCT_STR);
FRAME_COUNTS *counts = xd->counts;
if (counts) ++counts->skip[ctx][skip];
return skip;
@@ -1507,8 +1508,8 @@
if (!frame_is_intra_only(cm) && partition != PARTITION_NONE &&
bsize <= MAX_SUPERTX_BLOCK_SIZE && !supertx_enabled && !xd->lossless[0]) {
const int supertx_context = partition_supertx_context_lookup[partition];
- supertx_enabled =
- aom_read(r, cm->fc->supertx_prob[supertx_context][supertx_size]);
+ supertx_enabled = aom_read(
+ r, cm->fc->supertx_prob[supertx_context][supertx_size], ACCT_STR);
if (xd->counts)
xd->counts->supertx[supertx_context][supertx_size][supertx_enabled]++;
#if CONFIG_VAR_TX
@@ -1710,14 +1711,16 @@
int eset = get_ext_tx_set(supertx_size, bsize, 1);
if (eset > 0) {
txfm = aom_read_tree(r, av1_ext_tx_inter_tree[eset],
- cm->fc->inter_ext_tx_prob[eset][supertx_size]);
+ cm->fc->inter_ext_tx_prob[eset][supertx_size],
+ ACCT_STR);
if (xd->counts) ++xd->counts->inter_ext_tx[eset][supertx_size][txfm];
}
}
#else
if (supertx_size < TX_32X32) {
txfm = aom_read_tree(r, av1_ext_tx_tree,
- cm->fc->inter_ext_tx_prob[supertx_size]);
+ cm->fc->inter_ext_tx_prob[supertx_size],
+ ACCT_STR);
if (xd->counts) ++xd->counts->inter_ext_tx[supertx_size][txfm];
}
#endif // CONFIG_EXT_TX
@@ -1809,10 +1812,10 @@
if (!((mi_row * MI_SIZE) & 127) && !((mi_col * MI_SIZE) & 127) &&
cm->clpf_size == CLPF_128X128) {
- cm->clpf_blocks[tl] = aom_read_literal(r, 1);
+ cm->clpf_blocks[tl] = aom_read_literal(r, 1, ACCT_STR);
} else if (cm->clpf_size == CLPF_64X64 &&
!clpf_all_skip(cm, mi_col, mi_row, 64 / MI_SIZE)) {
- cm->clpf_blocks[tl] = aom_read_literal(r, 1);
+ cm->clpf_blocks[tl] = aom_read_literal(r, 1, ACCT_STR);
} else if (cm->clpf_size == CLPF_32X32) {
const int tr = tl + 1;
const int bl = tl + cm->clpf_stride;
@@ -1821,19 +1824,19 @@
// Up to four bits per SB
if (!clpf_all_skip(cm, mi_col, mi_row, size))
- cm->clpf_blocks[tl] = aom_read_literal(r, 1);
+ cm->clpf_blocks[tl] = aom_read_literal(r, 1, ACCT_STR);
if (mi_col + size < cm->mi_cols &&
!clpf_all_skip(cm, mi_col + size, mi_row, size))
- cm->clpf_blocks[tr] = aom_read_literal(r, 1);
+ cm->clpf_blocks[tr] = aom_read_literal(r, 1, ACCT_STR);
if (mi_row + size < cm->mi_rows &&
!clpf_all_skip(cm, mi_col, mi_row + size, size))
- cm->clpf_blocks[bl] = aom_read_literal(r, 1);
+ cm->clpf_blocks[bl] = aom_read_literal(r, 1, ACCT_STR);
if (mi_col + size < cm->mi_cols && mi_row + size < cm->mi_rows &&
!clpf_all_skip(cm, mi_col + size, mi_row + size, size))
- cm->clpf_blocks[br] = aom_read_literal(r, 1);
+ cm->clpf_blocks[br] = aom_read_literal(r, 1, ACCT_STR);
}
}
#endif
@@ -1894,13 +1897,13 @@
aom_reader *r) {
int i, j, k, l, m;
- if (aom_read_bit(r))
+ if (aom_read_bit(r, ACCT_STR))
for (i = 0; i < PLANE_TYPES; ++i)
for (j = 0; j < REF_TYPES; ++j)
for (k = 0; k < COEF_BANDS; ++k)
for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
for (m = 0; m < UNCONSTRAINED_NODES; ++m)
- av1_diff_update_prob(r, &coef_probs[i][j][k][l][m]);
+ av1_diff_update_prob(r, &coef_probs[i][j][k][l][m], ACCT_STR);
}
static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, aom_reader *r) {
@@ -1990,38 +1993,39 @@
rsi->wiener_info, sizeof(*rsi->wiener_info) * ntiles);
assert(rsi->wiener_info != NULL);
for (i = 0; i < ntiles; ++i) {
- rsi->restoration_type[i] = aom_read_tree(
- rb, av1_switchable_restore_tree, cm->fc->switchable_restore_prob);
+ rsi->restoration_type[i] =
+ aom_read_tree(rb, av1_switchable_restore_tree,
+ cm->fc->switchable_restore_prob, ACCT_STR);
if (rsi->restoration_type[i] == RESTORE_WIENER) {
rsi->wiener_info[i].level = 1;
rsi->wiener_info[i].vfilter[0] =
- aom_read_literal(rb, WIENER_FILT_TAP0_BITS) +
+ aom_read_literal(rb, WIENER_FILT_TAP0_BITS, ACCT_STR) +
WIENER_FILT_TAP0_MINV;
rsi->wiener_info[i].vfilter[1] =
- aom_read_literal(rb, WIENER_FILT_TAP1_BITS) +
+ aom_read_literal(rb, WIENER_FILT_TAP1_BITS, ACCT_STR) +
WIENER_FILT_TAP1_MINV;
rsi->wiener_info[i].vfilter[2] =
- aom_read_literal(rb, WIENER_FILT_TAP2_BITS) +
+ aom_read_literal(rb, WIENER_FILT_TAP2_BITS, ACCT_STR) +
WIENER_FILT_TAP2_MINV;
rsi->wiener_info[i].hfilter[0] =
- aom_read_literal(rb, WIENER_FILT_TAP0_BITS) +
+ aom_read_literal(rb, WIENER_FILT_TAP0_BITS, ACCT_STR) +
WIENER_FILT_TAP0_MINV;
rsi->wiener_info[i].hfilter[1] =
- aom_read_literal(rb, WIENER_FILT_TAP1_BITS) +
+ aom_read_literal(rb, WIENER_FILT_TAP1_BITS, ACCT_STR) +
WIENER_FILT_TAP1_MINV;
rsi->wiener_info[i].hfilter[2] =
- aom_read_literal(rb, WIENER_FILT_TAP2_BITS) +
+ aom_read_literal(rb, WIENER_FILT_TAP2_BITS, ACCT_STR) +
WIENER_FILT_TAP2_MINV;
} else if (rsi->restoration_type[i] == RESTORE_BILATERAL) {
int s;
for (s = 0; s < BILATERAL_SUBTILES; ++s) {
#if BILATERAL_SUBTILES == 0
rsi->bilateral_info[i].level[s] =
- aom_read_literal(rb, av1_bilateral_level_bits(cm));
+ aom_read_literal(rb, av1_bilateral_level_bits(cm), ACCT_STR);
#else
- if (aom_read(rb, RESTORE_NONE_BILATERAL_PROB)) {
+ if (aom_read(rb, RESTORE_NONE_BILATERAL_PROB, ACCT_STR)) {
rsi->bilateral_info[i].level[s] =
- aom_read_literal(rb, av1_bilateral_level_bits(cm));
+ aom_read_literal(rb, av1_bilateral_level_bits(cm), ACCT_STR);
} else {
rsi->bilateral_info[i].level[s] = -1;
}
@@ -2034,26 +2038,26 @@
rsi->wiener_info, sizeof(*rsi->wiener_info) * ntiles);
assert(rsi->wiener_info != NULL);
for (i = 0; i < ntiles; ++i) {
- if (aom_read(rb, RESTORE_NONE_WIENER_PROB)) {
+ if (aom_read(rb, RESTORE_NONE_WIENER_PROB, ACCT_STR)) {
rsi->wiener_info[i].level = 1;
rsi->restoration_type[i] = RESTORE_WIENER;
rsi->wiener_info[i].vfilter[0] =
- aom_read_literal(rb, WIENER_FILT_TAP0_BITS) +
+ aom_read_literal(rb, WIENER_FILT_TAP0_BITS, ACCT_STR) +
WIENER_FILT_TAP0_MINV;
rsi->wiener_info[i].vfilter[1] =
- aom_read_literal(rb, WIENER_FILT_TAP1_BITS) +
+ aom_read_literal(rb, WIENER_FILT_TAP1_BITS, ACCT_STR) +
WIENER_FILT_TAP1_MINV;
rsi->wiener_info[i].vfilter[2] =
- aom_read_literal(rb, WIENER_FILT_TAP2_BITS) +
+ aom_read_literal(rb, WIENER_FILT_TAP2_BITS, ACCT_STR) +
WIENER_FILT_TAP2_MINV;
rsi->wiener_info[i].hfilter[0] =
- aom_read_literal(rb, WIENER_FILT_TAP0_BITS) +
+ aom_read_literal(rb, WIENER_FILT_TAP0_BITS, ACCT_STR) +
WIENER_FILT_TAP0_MINV;
rsi->wiener_info[i].hfilter[1] =
- aom_read_literal(rb, WIENER_FILT_TAP1_BITS) +
+ aom_read_literal(rb, WIENER_FILT_TAP1_BITS, ACCT_STR) +
WIENER_FILT_TAP1_MINV;
rsi->wiener_info[i].hfilter[2] =
- aom_read_literal(rb, WIENER_FILT_TAP2_BITS) +
+ aom_read_literal(rb, WIENER_FILT_TAP2_BITS, ACCT_STR) +
WIENER_FILT_TAP2_MINV;
} else {
rsi->wiener_info[i].level = 0;
@@ -2068,9 +2072,9 @@
int s;
rsi->restoration_type[i] = RESTORE_BILATERAL;
for (s = 0; s < BILATERAL_SUBTILES; ++s) {
- if (aom_read(rb, RESTORE_NONE_BILATERAL_PROB)) {
+ if (aom_read(rb, RESTORE_NONE_BILATERAL_PROB, ACCT_STR)) {
rsi->bilateral_info[i].level[s] =
- aom_read_literal(rb, av1_bilateral_level_bits(cm));
+ aom_read_literal(rb, av1_bilateral_level_bits(cm), ACCT_STR);
} else {
rsi->bilateral_info[i].level[s] = -1;
}
@@ -2773,7 +2777,9 @@
aom_memalign(32, n_tiles * (sizeof(*pbi->tile_data))));
pbi->allocated_tiles = n_tiles;
}
-
+#if CONFIG_ACCOUNTING
+ aom_accounting_reset(&pbi->accounting);
+#endif
// Load all tile information into tile_data.
for (tile_row = tile_rows_start; tile_row < tile_rows_end; ++tile_row) {
for (tile_col = tile_cols_start; tile_col < tile_cols_end; ++tile_col) {
@@ -2796,6 +2802,9 @@
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
&td->bit_reader, pbi->decrypt_cb, pbi->decrypt_state);
#endif
+#if CONFIG_ACCOUNTING
+ tile_data->bit_reader.accounting = &pbi->accounting;
+#endif
av1_init_macroblockd(cm, &td->xd, td->dqcoeff);
#if CONFIG_PALETTE
td->xd.plane[0].color_index_map = td->color_index_map[0];
@@ -2814,6 +2823,10 @@
for (tile_col = tile_cols_start; tile_col < tile_cols_end; ++tile_col) {
const int col = inv_col_order ? tile_cols - 1 - tile_col : tile_col;
TileData *const td = pbi->tile_data + tile_cols * row + col;
+#if CONFIG_ACCOUNTING
+ tile_data->bit_reader.accounting->last_tell_frac =
+ aom_reader_tell_frac(&tile_data->bit_reader);
+#endif
av1_tile_set_col(&tile_info, cm, col);
@@ -3494,22 +3507,23 @@
int i, j, k;
int s;
for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
- if (aom_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ if (aom_read(r, GROUP_DIFF_UPDATE_PROB, ACCT_STR)) {
for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
if (!use_inter_ext_tx_for_txsize[s][i]) continue;
for (j = 0; j < num_ext_tx_set_inter[s] - 1; ++j)
- av1_diff_update_prob(r, &fc->inter_ext_tx_prob[s][i][j]);
+ av1_diff_update_prob(r, &fc->inter_ext_tx_prob[s][i][j], ACCT_STR);
}
}
}
for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
- if (aom_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ if (aom_read(r, GROUP_DIFF_UPDATE_PROB, ACCT_STR)) {
for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
if (!use_intra_ext_tx_for_txsize[s][i]) continue;
for (j = 0; j < INTRA_MODES; ++j)
for (k = 0; k < num_ext_tx_set_intra[s] - 1; ++k)
- av1_diff_update_prob(r, &fc->intra_ext_tx_prob[s][i][j][k]);
+ av1_diff_update_prob(r, &fc->intra_ext_tx_prob[s][i][j][k],
+ ACCT_STR);
}
}
}
@@ -3519,11 +3533,11 @@
static void read_ext_tx_probs(FRAME_CONTEXT *fc, aom_reader *r) {
int i, j, k;
- if (aom_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ if (aom_read(r, GROUP_DIFF_UPDATE_PROB, ACCT_STR)) {
for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
for (j = 0; j < TX_TYPES; ++j) {
for (k = 0; k < TX_TYPES - 1; ++k)
- av1_diff_update_prob(r, &fc->intra_ext_tx_prob[i][j][k]);
+ av1_diff_update_prob(r, &fc->intra_ext_tx_prob[i][j][k], ACCT_STR);
#if CONFIG_DAALA_EC
av1_tree_to_cdf(av1_ext_tx_tree, fc->intra_ext_tx_prob[i][j],
fc->intra_ext_tx_cdf[i][j]);
@@ -3531,10 +3545,10 @@
}
}
}
- if (aom_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ if (aom_read(r, GROUP_DIFF_UPDATE_PROB, ACCT_STR)) {
for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
for (k = 0; k < TX_TYPES - 1; ++k)
- av1_diff_update_prob(r, &fc->inter_ext_tx_prob[i][k]);
+ av1_diff_update_prob(r, &fc->inter_ext_tx_prob[i][k], ACCT_STR);
#if CONFIG_DAALA_EC
av1_tree_to_cdf(av1_ext_tx_tree, fc->inter_ext_tx_prob[i],
fc->inter_ext_tx_cdf[i]);
@@ -3547,10 +3561,10 @@
#if CONFIG_SUPERTX
static void read_supertx_probs(FRAME_CONTEXT *fc, aom_reader *r) {
int i, j;
- if (aom_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ if (aom_read(r, GROUP_DIFF_UPDATE_PROB, ACCT_STR)) {
for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
for (j = 1; j < TX_SIZES; ++j) {
- av1_diff_update_prob(r, &fc->supertx_prob[i][j]);
+ av1_diff_update_prob(r, &fc->supertx_prob[i][j], ACCT_STR);
}
}
}
@@ -3561,7 +3575,7 @@
static void read_global_motion_params(Global_Motion_Params *params,
aom_prob *probs, aom_reader *r) {
GLOBAL_MOTION_TYPE gmtype =
- aom_read_tree(r, av1_global_motion_types_tree, probs);
+ aom_read_tree(r, av1_global_motion_types_tree, probs, ACCT_STR);
params->gmtype = gmtype;
params->motion_params.wmtype = gm_to_trans_type(gmtype);
switch (gmtype) {
@@ -3643,48 +3657,52 @@
for (i = 0; i < TX_SIZES - 1; ++i)
for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
for (k = 0; k < i + 1; ++k)
- av1_diff_update_prob(&r, &fc->tx_size_probs[i][j][k]);
+ av1_diff_update_prob(&r, &fc->tx_size_probs[i][j][k], ACCT_STR);
}
read_coef_probs(fc, cm->tx_mode, &r);
#if CONFIG_VAR_TX
for (k = 0; k < TXFM_PARTITION_CONTEXTS; ++k)
- av1_diff_update_prob(&r, &fc->txfm_partition_prob[k]);
+ av1_diff_update_prob(&r, &fc->txfm_partition_prob[k], ACCT_STR);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
if (cm->tx_mode == TX_MODE_SELECT) {
for (i = 1; i < TX_SIZES - 1; ++i)
- av1_diff_update_prob(&r, &fc->rect_tx_prob[i]);
+ av1_diff_update_prob(&r, &fc->rect_tx_prob[i], ACCT_STR);
}
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
#endif
for (k = 0; k < SKIP_CONTEXTS; ++k)
- av1_diff_update_prob(&r, &fc->skip_probs[k]);
+ av1_diff_update_prob(&r, &fc->skip_probs[k], ACCT_STR);
if (cm->seg.enabled && cm->seg.update_map) {
if (cm->seg.temporal_update) {
for (k = 0; k < PREDICTION_PROBS; k++)
- av1_diff_update_prob(&r, &cm->fc->seg.pred_probs[k]);
+ av1_diff_update_prob(&r, &cm->fc->seg.pred_probs[k], ACCT_STR);
}
for (k = 0; k < MAX_SEGMENTS - 1; k++)
- av1_diff_update_prob(&r, &cm->fc->seg.tree_probs[k]);
+ av1_diff_update_prob(&r, &cm->fc->seg.tree_probs[k], ACCT_STR);
+#if CONFIG_DAALA_EC
+ av1_tree_to_cdf(av1_segment_tree, cm->fc->seg.tree_probs,
+ cm->fc->seg.tree_cdf);
+#endif
}
for (j = 0; j < INTRA_MODES; j++)
for (i = 0; i < INTRA_MODES - 1; ++i)
- av1_diff_update_prob(&r, &fc->uv_mode_prob[j][i]);
+ av1_diff_update_prob(&r, &fc->uv_mode_prob[j][i], ACCT_STR);
#if CONFIG_EXT_PARTITION_TYPES
for (i = 0; i < PARTITION_TYPES - 1; ++i)
- av1_diff_update_prob(&r, &fc->partition_prob[0][i]);
+ av1_diff_update_prob(&r, &fc->partition_prob[0][i], ACCT_STR);
for (j = 1; j < PARTITION_CONTEXTS; ++j)
for (i = 0; i < EXT_PARTITION_TYPES - 1; ++i)
- av1_diff_update_prob(&r, &fc->partition_prob[j][i]);
+ av1_diff_update_prob(&r, &fc->partition_prob[j][i], ACCT_STR);
#else
for (j = 0; j < PARTITION_CONTEXTS; ++j) {
for (i = 0; i < PARTITION_TYPES - 1; ++i)
- av1_diff_update_prob(&r, &fc->partition_prob[j][i]);
+ av1_diff_update_prob(&r, &fc->partition_prob[j][i], ACCT_STR);
#if CONFIG_DAALA_EC
av1_tree_to_cdf(av1_partition_tree, fc->partition_prob[j],
fc->partition_cdf[j]);
@@ -3695,7 +3713,7 @@
#if CONFIG_EXT_INTRA
for (i = 0; i < INTRA_FILTERS + 1; ++i)
for (j = 0; j < INTRA_FILTERS - 1; ++j)
- av1_diff_update_prob(&r, &fc->intra_filter_probs[i][j]);
+ av1_diff_update_prob(&r, &fc->intra_filter_probs[i][j], ACCT_STR);
#endif // CONFIG_EXT_INTRA
if (frame_is_intra_only(cm)) {
@@ -3703,7 +3721,7 @@
for (k = 0; k < INTRA_MODES; k++)
for (j = 0; j < INTRA_MODES; j++)
for (i = 0; i < INTRA_MODES - 1; ++i)
- av1_diff_update_prob(&r, &cm->kf_y_prob[k][j][i]);
+ av1_diff_update_prob(&r, &cm->kf_y_prob[k][j][i], ACCT_STR);
} else {
#if !CONFIG_REF_MV
nmv_context *const nmvc = &fc->nmvc;
@@ -3716,23 +3734,23 @@
if (cm->reference_mode != COMPOUND_REFERENCE) {
for (i = 0; i < BLOCK_SIZE_GROUPS; i++) {
if (is_interintra_allowed_bsize_group(i)) {
- av1_diff_update_prob(&r, &fc->interintra_prob[i]);
+ av1_diff_update_prob(&r, &fc->interintra_prob[i], ACCT_STR);
}
}
for (i = 0; i < BLOCK_SIZE_GROUPS; i++) {
for (j = 0; j < INTERINTRA_MODES - 1; j++)
- av1_diff_update_prob(&r, &fc->interintra_mode_prob[i][j]);
+ av1_diff_update_prob(&r, &fc->interintra_mode_prob[i][j], ACCT_STR);
}
for (i = 0; i < BLOCK_SIZES; i++) {
if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i)) {
- av1_diff_update_prob(&r, &fc->wedge_interintra_prob[i]);
+ av1_diff_update_prob(&r, &fc->wedge_interintra_prob[i], ACCT_STR);
}
}
}
if (cm->reference_mode != SINGLE_REFERENCE) {
for (i = 0; i < BLOCK_SIZES; i++) {
if (is_interinter_wedge_used(i)) {
- av1_diff_update_prob(&r, &fc->wedge_interinter_prob[i]);
+ av1_diff_update_prob(&r, &fc->wedge_interinter_prob[i], ACCT_STR);
}
}
}
@@ -3741,14 +3759,14 @@
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
for (i = BLOCK_8X8; i < BLOCK_SIZES; ++i) {
for (j = 0; j < MOTION_MODES - 1; ++j)
- av1_diff_update_prob(&r, &fc->motion_mode_prob[i][j]);
+ av1_diff_update_prob(&r, &fc->motion_mode_prob[i][j], ACCT_STR);
}
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
if (cm->interp_filter == SWITCHABLE) read_switchable_interp_probs(fc, &r);
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
- av1_diff_update_prob(&r, &fc->intra_inter_prob[i]);
+ av1_diff_update_prob(&r, &fc->intra_inter_prob[i], ACCT_STR);
if (cm->reference_mode != SINGLE_REFERENCE)
setup_compound_reference_mode(cm);
@@ -3757,7 +3775,7 @@
for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
for (i = 0; i < INTRA_MODES - 1; ++i)
- av1_diff_update_prob(&r, &fc->y_mode_prob[j][i]);
+ av1_diff_update_prob(&r, &fc->y_mode_prob[j][i], ACCT_STR);
#if CONFIG_REF_MV
for (i = 0; i < NMV_CONTEXTS; ++i)
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index a05820a..66056c0 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -25,23 +25,25 @@
#include "aom_dsp/aom_dsp_common.h"
+#define ACCT_STR __func__
+
#if CONFIG_EXT_INTRA || CONFIG_PALETTE
static INLINE int read_uniform(aom_reader *r, int n) {
int l = get_unsigned_bits(n);
int m = (1 << l) - n;
- int v = aom_read_literal(r, l - 1);
+ int v = aom_read_literal(r, l - 1, ACCT_STR);
assert(l != 0);
if (v < m)
return v;
else
- return (v << 1) - m + aom_read_literal(r, 1);
+ return (v << 1) - m + aom_read_literal(r, 1, ACCT_STR);
}
#endif // CONFIG_EXT_INTRA || CONFIG_PALETTE
static PREDICTION_MODE read_intra_mode(aom_reader *r, const aom_prob *p) {
- return (PREDICTION_MODE)aom_read_tree(r, av1_intra_mode_tree, p);
+ return (PREDICTION_MODE)aom_read_tree(r, av1_intra_mode_tree, p, ACCT_STR);
}
static PREDICTION_MODE read_intra_mode_y(AV1_COMMON *cm, MACROBLOCKD *xd,
@@ -67,7 +69,8 @@
static INTERINTRA_MODE read_interintra_mode(AV1_COMMON *cm, MACROBLOCKD *xd,
aom_reader *r, int size_group) {
const INTERINTRA_MODE ii_mode = (INTERINTRA_MODE)aom_read_tree(
- r, av1_interintra_mode_tree, cm->fc->interintra_mode_prob[size_group]);
+ r, av1_interintra_mode_tree, cm->fc->interintra_mode_prob[size_group],
+ ACCT_STR);
FRAME_COUNTS *counts = xd->counts;
if (counts) ++counts->interintra_mode[size_group][ii_mode];
return ii_mode;
@@ -84,7 +87,7 @@
int16_t mode_ctx = ctx & NEWMV_CTX_MASK;
aom_prob mode_prob = cm->fc->newmv_prob[mode_ctx];
- if (aom_read(r, mode_prob) == 0) {
+ if (aom_read(r, mode_prob, ACCT_STR) == 0) {
if (counts) ++counts->newmv_mode[mode_ctx][0];
#if CONFIG_EXT_INTER
@@ -94,7 +97,7 @@
#if CONFIG_EXT_INTER
} else {
mode_prob = cm->fc->new2mv_prob;
- if (aom_read(r, mode_prob) == 0) {
+ if (aom_read(r, mode_prob, ACCT_STR) == 0) {
if (counts) ++counts->new2mv_mode[0];
return NEWMV;
} else {
@@ -111,7 +114,7 @@
mode_ctx = (ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
mode_prob = cm->fc->zeromv_prob[mode_ctx];
- if (aom_read(r, mode_prob) == 0) {
+ if (aom_read(r, mode_prob, ACCT_STR) == 0) {
if (counts) ++counts->zeromv_mode[mode_ctx][0];
return ZEROMV;
}
@@ -125,7 +128,7 @@
mode_prob = cm->fc->refmv_prob[mode_ctx];
- if (aom_read(r, mode_prob) == 0) {
+ if (aom_read(r, mode_prob, ACCT_STR) == 0) {
if (counts) ++counts->refmv_mode[mode_ctx][0];
return NEARESTMV;
@@ -137,8 +140,8 @@
// Invalid prediction mode.
assert(0);
#else
- const int mode =
- aom_read_tree(r, av1_inter_mode_tree, cm->fc->inter_mode_probs[ctx]);
+ const int mode = aom_read_tree(r, av1_inter_mode_tree,
+ cm->fc->inter_mode_probs[ctx], ACCT_STR);
FRAME_COUNTS *counts = xd->counts;
if (counts) ++counts->inter_mode[ctx][mode];
@@ -158,7 +161,7 @@
if (xd->ref_mv_count[ref_frame_type] > idx + 1) {
uint8_t drl_ctx = av1_drl_ctx(xd->ref_mv_stack[ref_frame_type], idx);
aom_prob drl_prob = cm->fc->drl_prob[drl_ctx];
- if (!aom_read(r, drl_prob)) {
+ if (!aom_read(r, drl_prob, ACCT_STR)) {
mbmi->ref_mv_idx = idx;
if (xd->counts) ++xd->counts->drl_mode[drl_ctx][0];
return;
@@ -178,7 +181,7 @@
if (xd->ref_mv_count[ref_frame_type] > idx + 1) {
uint8_t drl_ctx = av1_drl_ctx(xd->ref_mv_stack[ref_frame_type], idx);
aom_prob drl_prob = cm->fc->drl_prob[drl_ctx];
- if (!aom_read(r, drl_prob)) {
+ if (!aom_read(r, drl_prob, ACCT_STR)) {
mbmi->ref_mv_idx = idx - 1;
if (xd->counts) ++xd->counts->drl_mode[drl_ctx][0];
return;
@@ -194,8 +197,9 @@
#if CONFIG_EXT_INTER
static PREDICTION_MODE read_inter_compound_mode(AV1_COMMON *cm, MACROBLOCKD *xd,
aom_reader *r, int16_t ctx) {
- const int mode = aom_read_tree(r, av1_inter_compound_mode_tree,
- cm->fc->inter_compound_mode_probs[ctx]);
+ const int mode =
+ aom_read_tree(r, av1_inter_compound_mode_tree,
+ cm->fc->inter_compound_mode_probs[ctx], ACCT_STR);
FRAME_COUNTS *counts = xd->counts;
if (counts) ++counts->inter_compound_mode[ctx][mode];
@@ -207,7 +211,11 @@
static int read_segment_id(aom_reader *r,
const struct segmentation_probs *segp) {
- return aom_read_tree(r, av1_segment_tree, segp->tree_probs);
+#if CONFIG_DAALA_EC
+ return aom_read_symbol(r, segp->tree_cdf, MAX_SEGMENTS, ACCT_STR);
+#else
+ return aom_read_tree(r, av1_segment_tree, segp->tree_probs, ACCT_STR);
+#endif
}
#if CONFIG_VAR_TX
@@ -231,7 +239,7 @@
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
- is_split = aom_read(r, cm->fc->txfm_partition_prob[ctx]);
+ is_split = aom_read(r, cm->fc->txfm_partition_prob[ctx], ACCT_STR);
if (is_split) {
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
@@ -274,8 +282,9 @@
int tx_size_cat, aom_reader *r) {
FRAME_COUNTS *counts = xd->counts;
const int ctx = get_tx_size_context(xd);
- int tx_size = aom_read_tree(r, av1_tx_size_tree[tx_size_cat],
- cm->fc->tx_size_probs[tx_size_cat][ctx]);
+ int tx_size =
+ aom_read_tree(r, av1_tx_size_tree[tx_size_cat],
+ cm->fc->tx_size_probs[tx_size_cat][ctx], ACCT_STR);
if (counts) ++counts->tx_size[tx_size_cat][ctx][tx_size];
return (TX_SIZE)tx_size;
}
@@ -416,7 +425,7 @@
if (seg->temporal_update) {
const int ctx = av1_get_pred_context_seg_id(xd);
const aom_prob pred_prob = segp->pred_probs[ctx];
- mbmi->seg_id_predicted = aom_read(r, pred_prob);
+ mbmi->seg_id_predicted = aom_read(r, pred_prob, ACCT_STR);
if (counts) ++counts->seg.pred[ctx][mbmi->seg_id_predicted];
if (mbmi->seg_id_predicted) {
segment_id = predicted_segment_id;
@@ -438,7 +447,7 @@
return 1;
} else {
const int ctx = av1_get_skip_context(xd);
- const int skip = aom_read(r, cm->fc->skip_probs[ctx]);
+ const int skip = aom_read(r, cm->fc->skip_probs[ctx], ACCT_STR);
FRAME_COUNTS *counts = xd->counts;
if (counts) ++counts->skip[ctx][skip];
return skip;
@@ -461,15 +470,17 @@
palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
if (left_mi)
palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
- if (aom_read(r, av1_default_palette_y_mode_prob[bsize - BLOCK_8X8]
- [palette_ctx])) {
+ if (aom_read(
+ r, av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx],
+ ACCT_STR)) {
pmi->palette_size[0] =
aom_read_tree(r, av1_palette_size_tree,
- av1_default_palette_y_size_prob[bsize - BLOCK_8X8]) +
+ av1_default_palette_y_size_prob[bsize - BLOCK_8X8],
+ ACCT_STR) +
2;
n = pmi->palette_size[0];
for (i = 0; i < n; ++i)
- pmi->palette_colors[i] = aom_read_literal(r, cm->bit_depth);
+ pmi->palette_colors[i] = aom_read_literal(r, cm->bit_depth, ACCT_STR);
xd->plane[0].color_index_map[0] = read_uniform(r, n);
assert(xd->plane[0].color_index_map[0] < n);
@@ -477,18 +488,19 @@
}
if (mbmi->uv_mode == DC_PRED) {
- if (aom_read(r,
- av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0])) {
+ if (aom_read(r, av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0],
+ ACCT_STR)) {
pmi->palette_size[1] =
aom_read_tree(r, av1_palette_size_tree,
- av1_default_palette_uv_size_prob[bsize - BLOCK_8X8]) +
+ av1_default_palette_uv_size_prob[bsize - BLOCK_8X8],
+ ACCT_STR) +
2;
n = pmi->palette_size[1];
for (i = 0; i < n; ++i) {
pmi->palette_colors[PALETTE_MAX_SIZE + i] =
- aom_read_literal(r, cm->bit_depth);
+ aom_read_literal(r, cm->bit_depth, ACCT_STR);
pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] =
- aom_read_literal(r, cm->bit_depth);
+ aom_read_literal(r, cm->bit_depth, ACCT_STR);
}
xd->plane[1].color_index_map[0] = read_uniform(r, n);
assert(xd->plane[1].color_index_map[0] < n);
@@ -513,7 +525,7 @@
#endif // CONFIG_PALETTE
) {
mbmi->ext_intra_mode_info.use_ext_intra_mode[0] =
- aom_read(r, cm->fc->ext_intra_probs[0]);
+ aom_read(r, cm->fc->ext_intra_probs[0], ACCT_STR);
if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
mbmi->ext_intra_mode_info.ext_intra_mode[0] =
read_uniform(r, FILTER_INTRA_MODES);
@@ -527,7 +539,7 @@
#endif // CONFIG_PALETTE
) {
mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
- aom_read(r, cm->fc->ext_intra_probs[1]);
+ aom_read(r, cm->fc->ext_intra_probs[1], ACCT_STR);
if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) {
mbmi->ext_intra_mode_info.ext_intra_mode[1] =
read_uniform(r, FILTER_INTRA_MODES);
@@ -552,8 +564,8 @@
p_angle = mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
if (av1_is_intra_filter_switchable(p_angle)) {
FRAME_COUNTS *counts = xd->counts;
- mbmi->intra_filter = aom_read_tree(r, av1_intra_filter_tree,
- cm->fc->intra_filter_probs[ctx]);
+ mbmi->intra_filter = aom_read_tree(
+ r, av1_intra_filter_tree, cm->fc->intra_filter_probs[ctx], ACCT_STR);
if (counts) ++counts->intra_filter[ctx][mbmi->intra_filter];
} else {
mbmi->intra_filter = INTRA_FILTER_LINEAR;
@@ -641,7 +653,8 @@
if (eset > 0) {
mbmi->tx_type = aom_read_tree(
r, av1_ext_tx_intra_tree[eset],
- cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode]);
+ cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode],
+ ACCT_STR);
if (counts)
++counts->intra_ext_tx[eset][mbmi->tx_size][mbmi->mode]
[mbmi->tx_type];
@@ -654,9 +667,9 @@
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
FRAME_COUNTS *counts = xd->counts;
TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode];
- mbmi->tx_type =
- aom_read_tree(r, av1_ext_tx_tree,
- cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom]);
+ mbmi->tx_type = aom_read_tree(
+ r, av1_ext_tx_tree,
+ cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom], ACCT_STR);
if (counts)
++counts->intra_ext_tx[mbmi->tx_size][tx_type_nom][mbmi->tx_type];
} else {
@@ -669,29 +682,31 @@
static int read_mv_component(aom_reader *r, const nmv_component *mvcomp,
int usehp) {
int mag, d, fr, hp;
- const int sign = aom_read(r, mvcomp->sign);
- const int mv_class = aom_read_tree(r, av1_mv_class_tree, mvcomp->classes);
+ const int sign = aom_read(r, mvcomp->sign, ACCT_STR);
+ const int mv_class =
+ aom_read_tree(r, av1_mv_class_tree, mvcomp->classes, ACCT_STR);
const int class0 = mv_class == MV_CLASS_0;
// Integer part
if (class0) {
- d = aom_read_tree(r, av1_mv_class0_tree, mvcomp->class0);
+ d = aom_read_tree(r, av1_mv_class0_tree, mvcomp->class0, ACCT_STR);
mag = 0;
} else {
int i;
const int n = mv_class + CLASS0_BITS - 1; // number of bits
d = 0;
- for (i = 0; i < n; ++i) d |= aom_read(r, mvcomp->bits[i]) << i;
+ for (i = 0; i < n; ++i) d |= aom_read(r, mvcomp->bits[i], ACCT_STR) << i;
mag = CLASS0_SIZE << (mv_class + 2);
}
// Fractional part
fr = aom_read_tree(r, av1_mv_fp_tree,
- class0 ? mvcomp->class0_fp[d] : mvcomp->fp);
+ class0 ? mvcomp->class0_fp[d] : mvcomp->fp, ACCT_STR);
// High precision part (if hp is not used, the default value of the hp is 1)
- hp = usehp ? aom_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp) : 1;
+ hp = usehp ? aom_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp, ACCT_STR)
+ : 1;
// Result
mag += ((d << 3) | (fr << 1) | hp) + 1;
@@ -704,7 +719,8 @@
MV_JOINT_TYPE joint_type;
const int use_hp = allow_hp && av1_use_mv_hp(ref);
MV diff = { 0, 0 };
- joint_type = (MV_JOINT_TYPE)aom_read_tree(r, av1_mv_joint_tree, ctx->joints);
+ joint_type =
+ (MV_JOINT_TYPE)aom_read_tree(r, av1_mv_joint_tree, ctx->joints, ACCT_STR);
if (mv_joint_vertical(joint_type))
diff.row = read_mv_component(r, &ctx->comps[0], use_hp);
@@ -724,7 +740,7 @@
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
const int ctx = av1_get_reference_mode_context(cm, xd);
const REFERENCE_MODE mode =
- (REFERENCE_MODE)aom_read(r, cm->fc->comp_inter_prob[ctx]);
+ (REFERENCE_MODE)aom_read(r, cm->fc->comp_inter_prob[ctx], ACCT_STR);
FRAME_COUNTS *counts = xd->counts;
if (counts) ++counts->comp_inter[ctx][mode];
return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE
@@ -754,7 +770,7 @@
const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
#endif // CONFIG_EXT_REFS
const int ctx = av1_get_pred_context_comp_ref_p(cm, xd);
- const int bit = aom_read(r, fc->comp_ref_prob[ctx][0]);
+ const int bit = aom_read(r, fc->comp_ref_prob[ctx][0], ACCT_STR);
if (counts) ++counts->comp_ref[ctx][0][bit];
@@ -762,12 +778,12 @@
// Decode forward references.
if (!bit) {
const int ctx1 = av1_get_pred_context_comp_ref_p1(cm, xd);
- const int bit1 = aom_read(r, fc->comp_ref_prob[ctx1][1]);
+ const int bit1 = aom_read(r, fc->comp_ref_prob[ctx1][1], ACCT_STR);
if (counts) ++counts->comp_ref[ctx1][1][bit1];
ref_frame[!idx] = cm->comp_fwd_ref[bit1 ? 0 : 1];
} else {
const int ctx2 = av1_get_pred_context_comp_ref_p2(cm, xd);
- const int bit2 = aom_read(r, fc->comp_ref_prob[ctx2][2]);
+ const int bit2 = aom_read(r, fc->comp_ref_prob[ctx2][2], ACCT_STR);
if (counts) ++counts->comp_ref[ctx2][2][bit2];
ref_frame[!idx] = cm->comp_fwd_ref[bit2 ? 3 : 2];
}
@@ -775,7 +791,8 @@
// Decode backward references.
{
const int ctx_bwd = av1_get_pred_context_comp_bwdref_p(cm, xd);
- const int bit_bwd = aom_read(r, fc->comp_bwdref_prob[ctx_bwd][0]);
+ const int bit_bwd =
+ aom_read(r, fc->comp_bwdref_prob[ctx_bwd][0], ACCT_STR);
if (counts) ++counts->comp_bwdref[ctx_bwd][0][bit_bwd];
ref_frame[idx] = cm->comp_bwd_ref[bit_bwd];
}
@@ -786,38 +803,38 @@
} else if (mode == SINGLE_REFERENCE) {
#if CONFIG_EXT_REFS
const int ctx0 = av1_get_pred_context_single_ref_p1(xd);
- const int bit0 = aom_read(r, fc->single_ref_prob[ctx0][0]);
+ const int bit0 = aom_read(r, fc->single_ref_prob[ctx0][0], ACCT_STR);
if (counts) ++counts->single_ref[ctx0][0][bit0];
if (bit0) {
const int ctx1 = av1_get_pred_context_single_ref_p2(xd);
- const int bit1 = aom_read(r, fc->single_ref_prob[ctx1][1]);
+ const int bit1 = aom_read(r, fc->single_ref_prob[ctx1][1], ACCT_STR);
if (counts) ++counts->single_ref[ctx1][1][bit1];
ref_frame[0] = bit1 ? ALTREF_FRAME : BWDREF_FRAME;
} else {
const int ctx2 = av1_get_pred_context_single_ref_p3(xd);
- const int bit2 = aom_read(r, fc->single_ref_prob[ctx2][2]);
+ const int bit2 = aom_read(r, fc->single_ref_prob[ctx2][2], ACCT_STR);
if (counts) ++counts->single_ref[ctx2][2][bit2];
if (bit2) {
const int ctx4 = av1_get_pred_context_single_ref_p5(xd);
- const int bit4 = aom_read(r, fc->single_ref_prob[ctx4][4]);
+ const int bit4 = aom_read(r, fc->single_ref_prob[ctx4][4], ACCT_STR);
if (counts) ++counts->single_ref[ctx4][4][bit4];
ref_frame[0] = bit4 ? GOLDEN_FRAME : LAST3_FRAME;
} else {
const int ctx3 = av1_get_pred_context_single_ref_p4(xd);
- const int bit3 = aom_read(r, fc->single_ref_prob[ctx3][3]);
+ const int bit3 = aom_read(r, fc->single_ref_prob[ctx3][3], ACCT_STR);
if (counts) ++counts->single_ref[ctx3][3][bit3];
ref_frame[0] = bit3 ? LAST2_FRAME : LAST_FRAME;
}
}
#else
const int ctx0 = av1_get_pred_context_single_ref_p1(xd);
- const int bit0 = aom_read(r, fc->single_ref_prob[ctx0][0]);
+ const int bit0 = aom_read(r, fc->single_ref_prob[ctx0][0], ACCT_STR);
if (counts) ++counts->single_ref[ctx0][0][bit0];
if (bit0) {
const int ctx1 = av1_get_pred_context_single_ref_p2(xd);
- const int bit1 = aom_read(r, fc->single_ref_prob[ctx1][1]);
+ const int bit1 = aom_read(r, fc->single_ref_prob[ctx1][1], ACCT_STR);
if (counts) ++counts->single_ref[ctx1][1][bit1];
ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME;
} else {
@@ -839,8 +856,9 @@
int motion_mode;
FRAME_COUNTS *counts = xd->counts;
- motion_mode = aom_read_tree(r, av1_motion_mode_tree,
- cm->fc->motion_mode_prob[mbmi->sb_type]);
+ motion_mode =
+ aom_read_tree(r, av1_motion_mode_tree,
+ cm->fc->motion_mode_prob[mbmi->sb_type], ACCT_STR);
if (counts) ++counts->motion_mode[mbmi->sb_type][motion_mode];
return (MOTION_MODE)(SIMPLE_TRANSLATION + motion_mode);
} else {
@@ -869,11 +887,13 @@
FRAME_COUNTS *counts = xd->counts;
#if CONFIG_DAALA_EC
const InterpFilter type =
- (InterpFilter)av1_switchable_interp_inv[aom_read_tree_cdf(
- r, cm->fc->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS)];
+ (InterpFilter)av1_switchable_interp_inv[aom_read_symbol(
+ r, cm->fc->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS,
+ ACCT_STR)];
#else
const InterpFilter type = (InterpFilter)aom_read_tree(
- r, av1_switchable_interp_tree, cm->fc->switchable_interp_prob[ctx]);
+ r, av1_switchable_interp_tree, cm->fc->switchable_interp_prob[ctx],
+ ACCT_STR);
#endif
if (counts) ++counts->switchable_interp[ctx][type];
return type;
@@ -1169,7 +1189,7 @@
return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME;
} else {
const int ctx = av1_get_intra_inter_context(xd);
- const int is_inter = aom_read(r, cm->fc->intra_inter_prob[ctx]);
+ const int is_inter = aom_read(r, cm->fc->intra_inter_prob[ctx], ACCT_STR);
FRAME_COUNTS *counts = xd->counts;
if (counts) ++counts->intra_inter[ctx][is_inter];
return is_inter;
@@ -1528,7 +1548,8 @@
#endif
is_interintra_allowed(mbmi)) {
const int bsize_group = size_group_lookup[bsize];
- const int interintra = aom_read(r, cm->fc->interintra_prob[bsize_group]);
+ const int interintra =
+ aom_read(r, cm->fc->interintra_prob[bsize_group], ACCT_STR);
if (xd->counts) xd->counts->interintra[bsize_group][interintra]++;
assert(mbmi->ref_frame[1] == NONE);
if (interintra) {
@@ -1545,12 +1566,12 @@
#endif // CONFIG_EXT_INTRA
if (is_interintra_wedge_used(bsize)) {
mbmi->use_wedge_interintra =
- aom_read(r, cm->fc->wedge_interintra_prob[bsize]);
+ aom_read(r, cm->fc->wedge_interintra_prob[bsize], ACCT_STR);
if (xd->counts)
xd->counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
if (mbmi->use_wedge_interintra) {
mbmi->interintra_wedge_index =
- aom_read_literal(r, get_wedge_bits_lookup(bsize));
+ aom_read_literal(r, get_wedge_bits_lookup(bsize), ACCT_STR);
mbmi->interintra_wedge_sign = 0;
}
}
@@ -1579,13 +1600,13 @@
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
is_interinter_wedge_used(bsize)) {
mbmi->use_wedge_interinter =
- aom_read(r, cm->fc->wedge_interinter_prob[bsize]);
+ aom_read(r, cm->fc->wedge_interinter_prob[bsize], ACCT_STR);
if (xd->counts)
xd->counts->wedge_interinter[bsize][mbmi->use_wedge_interinter]++;
if (mbmi->use_wedge_interinter) {
mbmi->interinter_wedge_index =
- aom_read_literal(r, get_wedge_bits_lookup(bsize));
- mbmi->interinter_wedge_sign = aom_read_bit(r);
+ aom_read_literal(r, get_wedge_bits_lookup(bsize), ACCT_STR);
+ mbmi->interinter_wedge_sign = aom_read_bit(r, ACCT_STR);
}
}
#endif // CONFIG_EXT_INTER
@@ -1655,7 +1676,7 @@
int use_rect_tx = 0;
if (is_rect_tx_allowed) {
- use_rect_tx = aom_read(r, cm->fc->rect_tx_prob[tx_size_cat]);
+ use_rect_tx = aom_read(r, cm->fc->rect_tx_prob[tx_size_cat], ACCT_STR);
if (xd->counts) {
++xd->counts->rect_tx[tx_size_cat][use_rect_tx];
}
@@ -1742,7 +1763,8 @@
if (eset > 0) {
mbmi->tx_type = aom_read_tree(
r, av1_ext_tx_inter_tree[eset],
- cm->fc->inter_ext_tx_prob[eset][txsize_sqr_map[mbmi->tx_size]]);
+ cm->fc->inter_ext_tx_prob[eset][txsize_sqr_map[mbmi->tx_size]],
+ ACCT_STR);
if (counts)
++counts->inter_ext_tx[eset][txsize_sqr_map[mbmi->tx_size]]
[mbmi->tx_type];
@@ -1751,7 +1773,8 @@
if (eset > 0) {
mbmi->tx_type = aom_read_tree(
r, av1_ext_tx_intra_tree[eset],
- cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode]);
+ cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode],
+ ACCT_STR);
if (counts)
++counts->intra_ext_tx[eset][mbmi->tx_size][mbmi->mode]
[mbmi->tx_type];
@@ -1769,22 +1792,24 @@
FRAME_COUNTS *counts = xd->counts;
if (inter_block) {
#if CONFIG_DAALA_EC
- mbmi->tx_type = av1_ext_tx_inv[aom_read_tree_cdf(
- r, cm->fc->inter_ext_tx_cdf[mbmi->tx_size], TX_TYPES)];
+ mbmi->tx_type = av1_ext_tx_inv[aom_read_symbol(
+ r, cm->fc->inter_ext_tx_cdf[mbmi->tx_size], TX_TYPES, ACCT_STR)];
#else
- mbmi->tx_type = aom_read_tree(r, av1_ext_tx_tree,
- cm->fc->inter_ext_tx_prob[mbmi->tx_size]);
+ mbmi->tx_type =
+ aom_read_tree(r, av1_ext_tx_tree,
+ cm->fc->inter_ext_tx_prob[mbmi->tx_size], ACCT_STR);
#endif
if (counts) ++counts->inter_ext_tx[mbmi->tx_size][mbmi->tx_type];
} else {
const TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode];
#if CONFIG_DAALA_EC
- mbmi->tx_type = av1_ext_tx_inv[aom_read_tree_cdf(
- r, cm->fc->intra_ext_tx_cdf[mbmi->tx_size][tx_type_nom], TX_TYPES)];
+ mbmi->tx_type = av1_ext_tx_inv[aom_read_symbol(
+ r, cm->fc->intra_ext_tx_cdf[mbmi->tx_size][tx_type_nom], TX_TYPES,
+ ACCT_STR)];
#else
mbmi->tx_type = aom_read_tree(
r, av1_ext_tx_tree,
- cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom]);
+ cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom], ACCT_STR);
#endif
if (counts)
++counts->intra_ext_tx[mbmi->tx_size][tx_type_nom][mbmi->tx_type];
diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
index 61b9fc0..9952650 100644
--- a/av1/decoder/decoder.c
+++ b/av1/decoder/decoder.c
@@ -126,6 +126,9 @@
#if CONFIG_LOOP_RESTORATION
av1_loop_restoration_precal();
#endif // CONFIG_LOOP_RESTORATION
+#if CONFIG_ACCOUNTING
+ aom_accounting_init(&pbi->accounting);
+#endif
cm->error.setjmp = 0;
@@ -154,6 +157,10 @@
av1_loop_filter_dealloc(&pbi->lf_row_sync);
}
+#if CONFIG_ACCOUNTING
+ aom_accounting_clear(&pbi->accounting);
+#endif
+
aom_free(pbi);
}
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index 919e7b8..7575260 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h
@@ -22,6 +22,9 @@
#include "av1/common/thread_common.h"
#include "av1/common/onyxc_int.h"
#include "av1/decoder/dthread.h"
+#if CONFIG_ACCOUNTING
+#include "av1/common/accounting.h"
+#endif
#ifdef __cplusplus
extern "C" {
@@ -100,6 +103,10 @@
int tile_col_size_bytes;
int dec_tile_row, dec_tile_col;
#endif // CONFIG_EXT_TILE
+#if CONFIG_ACCOUNTING
+ Accounting accounting;
+#endif
+
} AV1Decoder;
int av1_receive_compressed_data(struct AV1Decoder *pbi, size_t size,
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index 24bcd21..1bd4c0d 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -22,6 +22,8 @@
#include "av1/decoder/detokenize.h"
+#define ACCT_STR __func__
+
#define EOB_CONTEXT_NODE 0
#define ZERO_CONTEXT_NODE 1
#define ONE_CONTEXT_NODE 2
@@ -41,7 +43,7 @@
static INLINE int read_coeff(const aom_prob *probs, int n, aom_reader *r) {
int i, val = 0;
- for (i = 0; i < n; ++i) val = (val << 1) | aom_read(r, probs[i]);
+ for (i = 0; i < n; ++i) val = (val << 1) | aom_read(r, probs[i], ACCT_STR);
return val;
}
@@ -142,7 +144,7 @@
band = *band_translate++;
prob = coef_probs[band][ctx];
if (counts) ++eob_branch_count[band][ctx];
- if (!aom_read(r, prob[EOB_CONTEXT_NODE])) {
+ if (!aom_read(r, prob[EOB_CONTEXT_NODE], ACCT_STR)) {
INCREMENT_COUNT(EOB_MODEL_TOKEN);
break;
}
@@ -151,7 +153,7 @@
dqv_val = &dq_val[band][0];
#endif // CONFIG_NEW_QUANT
- while (!aom_read(r, prob[ZERO_CONTEXT_NODE])) {
+ while (!aom_read(r, prob[ZERO_CONTEXT_NODE], ACCT_STR)) {
INCREMENT_COUNT(ZERO_TOKEN);
dqv = dq[1];
token_cache[scan[c]] = 0;
@@ -166,8 +168,8 @@
}
#if CONFIG_ANS
cdf = &coef_cdfs[band][ctx];
- token =
- ONE_TOKEN + aom_read_symbol(r, *cdf, CATEGORY6_TOKEN - ONE_TOKEN + 1);
+ token = ONE_TOKEN +
+ aom_read_symbol(r, *cdf, CATEGORY6_TOKEN - ONE_TOKEN + 1, ACCT_STR);
INCREMENT_COUNT(ONE_TOKEN + (token > ONE_TOKEN));
switch (token) {
case ONE_TOKEN:
@@ -211,14 +213,14 @@
} break;
}
#else
- if (!aom_read(r, prob[ONE_CONTEXT_NODE])) {
+ if (!aom_read(r, prob[ONE_CONTEXT_NODE], ACCT_STR)) {
INCREMENT_COUNT(ONE_TOKEN);
token = ONE_TOKEN;
val = 1;
} else {
INCREMENT_COUNT(TWO_TOKEN);
token = aom_read_tree(r, av1_coef_con_tree,
- av1_pareto8_full[prob[PIVOT_NODE] - 1]);
+ av1_pareto8_full[prob[PIVOT_NODE] - 1], ACCT_STR);
switch (token) {
case TWO_TOKEN:
case THREE_TOKEN:
@@ -275,12 +277,13 @@
#if CONFIG_COEFFICIENT_RANGE_CHECKING
#if CONFIG_AOM_HIGHBITDEPTH
- dqcoeff[scan[c]] = highbd_check_range((aom_read_bit(r) ? -v : v), xd->bd);
+ dqcoeff[scan[c]] =
+ highbd_check_range((aom_read_bit(r, ACCT_STR) ? -v : v), xd->bd);
#else
- dqcoeff[scan[c]] = check_range(aom_read_bit(r) ? -v : v);
+ dqcoeff[scan[c]] = check_range(aom_read_bit(r, ACCT_STR) ? -v : v);
#endif // CONFIG_AOM_HIGHBITDEPTH
#else
- dqcoeff[scan[c]] = aom_read_bit(r) ? -v : v;
+ dqcoeff[scan[c]] = aom_read_bit(r, ACCT_STR) ? -v : v;
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
token_cache[scan[c]] = av1_pt_energy_class[token];
++c;
@@ -355,7 +358,7 @@
color_ctx =
av1_get_palette_color_context(color_map, cols, i, j, n, color_order);
color_idx = aom_read_tree(r, av1_palette_color_tree[n - 2],
- prob[n - 2][color_ctx]);
+ prob[n - 2][color_ctx], ACCT_STR);
assert(color_idx >= 0 && color_idx < n);
color_map[i * cols + j] = color_order[color_idx];
}
diff --git a/av1/decoder/dsubexp.c b/av1/decoder/dsubexp.c
index ebcd784..ee6a295 100644
--- a/av1/decoder/dsubexp.c
+++ b/av1/decoder/dsubexp.c
@@ -21,23 +21,29 @@
return (v & 1) ? m - ((v + 1) >> 1) : m + (v >> 1);
}
-static int decode_uniform(aom_reader *r) {
+#define decode_uniform(r, ACCT_STR_NAME) \
+ decode_uniform_(r ACCT_STR_ARG(ACCT_STR_NAME))
+#define decode_term_subexp(r, ACCT_STR_NAME) \
+ decode_term_subexp_(r ACCT_STR_ARG(ACCT_STR_NAME))
+
+static int decode_uniform_(aom_reader *r ACCT_STR_PARAM) {
const int l = 8;
const int m = (1 << l) - 190;
- const int v = aom_read_literal(r, l - 1);
- return v < m ? v : (v << 1) - m + aom_read_bit(r);
+ const int v = aom_read_literal(r, l - 1, ACCT_STR_NAME);
+ return v < m ? v : (v << 1) - m + aom_read_bit(r, ACCT_STR_NAME);
}
static int inv_remap_prob(int v, int m) {
+ /* clang-format off */
static uint8_t inv_map_table[MAX_PROB - 1] = {
- 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 189,
- 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27,
- 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
- 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60,
- 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 73, 74, 75, 76,
- 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 88, 89, 90, 91, 92,
- 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
+ 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 189,
+ 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27,
+ 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+ 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60,
+ 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 73, 74, 75, 76,
+ 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 88, 89, 90, 91, 92,
+ 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125,
126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 138, 139, 140, 141,
142, 143, 144, 145, 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157,
@@ -46,8 +52,8 @@
191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, 222,
223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238,
- 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,
- };
+ 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253
+ }; /* clang-format on */
assert(v < (int)(sizeof(inv_map_table) / sizeof(inv_map_table[0])));
v = inv_map_table[v];
m--;
@@ -58,26 +64,31 @@
}
}
-static int decode_term_subexp(aom_reader *r) {
- if (!aom_read_bit(r)) return aom_read_literal(r, 4);
- if (!aom_read_bit(r)) return aom_read_literal(r, 4) + 16;
- if (!aom_read_bit(r)) return aom_read_literal(r, 5) + 32;
- return decode_uniform(r) + 64;
+static int decode_term_subexp_(aom_reader *r ACCT_STR_PARAM) {
+ if (!aom_read_bit(r, ACCT_STR_NAME))
+ return aom_read_literal(r, 4, ACCT_STR_NAME);
+ if (!aom_read_bit(r, ACCT_STR_NAME))
+ return aom_read_literal(r, 4, ACCT_STR_NAME) + 16;
+ if (!aom_read_bit(r, ACCT_STR_NAME))
+ return aom_read_literal(r, 5, ACCT_STR_NAME) + 32;
+ return decode_uniform(r, ACCT_STR_NAME) + 64;
}
-void av1_diff_update_prob(aom_reader *r, aom_prob *p) {
- if (aom_read(r, DIFF_UPDATE_PROB)) {
- const int delp = decode_term_subexp(r);
+void av1_diff_update_prob_(aom_reader *r, aom_prob *p ACCT_STR_PARAM) {
+ if (aom_read(r, DIFF_UPDATE_PROB, ACCT_STR_NAME)) {
+ const int delp = decode_term_subexp(r, ACCT_STR_NAME);
*p = (aom_prob)inv_remap_prob(delp, *p);
}
}
+#if CONFIG_GLOBAL_MOTION
int aom_read_primitive_symmetric(aom_reader *r, unsigned int mag_bits) {
- if (aom_read_bit(r)) {
- int s = aom_read_bit(r);
- int x = aom_read_literal(r, mag_bits) + 1;
+ if (aom_read_bit(r, ACCT_STR_NAME)) {
+ int s = aom_read_bit(r, ACCT_STR_NAME);
+ int x = aom_read_literal(r, mag_bits, ACCT_STR_NAME) + 1;
return (s > 0 ? -x : x);
} else {
return 0;
}
}
+#endif // CONFIG_GLOBAL_MOTION
\ No newline at end of file
diff --git a/av1/decoder/dsubexp.h b/av1/decoder/dsubexp.h
index c0d372a..60aa7df 100644
--- a/av1/decoder/dsubexp.h
+++ b/av1/decoder/dsubexp.h
@@ -18,15 +18,22 @@
extern "C" {
#endif
-void av1_diff_update_prob(aom_reader *r, aom_prob *p);
+#if CONFIG_ACCOUNTING
+#define av1_diff_update_prob(r, p, str) av1_diff_update_prob_(r, p, str)
+#else
+#define av1_diff_update_prob(r, p, str) av1_diff_update_prob_(r, p)
+#endif
+
+void av1_diff_update_prob_(aom_reader *r, aom_prob *p ACCT_STR_PARAM);
#ifdef __cplusplus
} // extern "C"
#endif
-
+#if CONFIG_GLOBAL_MOTION
// mag_bits is number of bits for magnitude. The alphabet is of size
// 2 * 2^mag_bits + 1, symmetric around 0, where one bit is used to
// indicate 0 or non-zero, mag_bits bits are used to indicate magnitide
// and 1 more bit for the sign if non-zero.
int aom_read_primitive_symmetric(aom_reader *r, unsigned int mag_bits);
+#endif // CONFIG_GLOBAL_MOTION
#endif // AV1_DECODER_DSUBEXP_H_
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index c361dbf..948c4f5 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -808,8 +808,13 @@
static void write_segment_id(aom_writer *w, const struct segmentation *seg,
const struct segmentation_probs *segp,
int segment_id) {
- if (seg->enabled && seg->update_map)
+ if (seg->enabled && seg->update_map) {
+#if CONFIG_DAALA_EC
+ aom_write_symbol(w, segment_id, segp->tree_cdf, MAX_SEGMENTS);
+#else
aom_write_tree(w, av1_segment_tree, segp->tree_probs, segment_id, 3, 0);
+#endif
+ }
}
// This function encodes the reference frame
@@ -2736,6 +2741,10 @@
prob_diff_update(av1_segment_tree, cm->fc->seg.tree_probs,
cm->counts.seg.tree_total, MAX_SEGMENTS, w);
}
+#if CONFIG_DAALA_EC
+ av1_tree_to_cdf(av1_segment_tree, cm->fc->seg.tree_probs,
+ cm->fc->seg.tree_cdf);
+#endif
}
static void write_txfm_mode(TX_MODE mode, struct aom_write_bit_buffer *wb) {
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 297c354..004ad68 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4459,7 +4459,9 @@
best_param = curr_param;
for (i = 0; i < n_refinements; i++) {
// look to the left
- *param = curr_param - step;
+ *param =
+ (int16_t)clamp(curr_param - step, p < 2 ? GM_TRANS_MIN : GM_ALPHA_MIN,
+ p < 2 ? GM_TRANS_MAX : GM_ALPHA_MAX);
step_error =
av1_warp_erroradv(wm,
#if CONFIG_AOM_HIGHBITDEPTH
@@ -4476,7 +4478,9 @@
}
// look to the right
- *param = curr_param + step;
+ *param =
+ (int16_t)clamp(curr_param + step, p < 2 ? GM_TRANS_MIN : GM_ALPHA_MIN,
+ p < 2 ? GM_TRANS_MAX : GM_ALPHA_MAX);
step_error =
av1_warp_erroradv(wm,
#if CONFIG_AOM_HIGHBITDEPTH
@@ -5245,11 +5249,14 @@
#if CONFIG_VAR_TX
if (cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8 &&
is_inter_block(mbmi) && !(mbmi->skip || seg_skip)) {
- if (dry_run) tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
if (is_rect_tx(mbmi->tx_size)) {
set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, xd);
+ } else {
+ if (dry_run) tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col);
}
+#else
+ if (dry_run) tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col);
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
} else {
TX_SIZE tx_size;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index b9feca9..c39b78a 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1,11 +1,12 @@
/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <limits.h>
@@ -420,8 +421,6 @@
// Delete sementation map
aom_free(cpi->segmentation_map);
cpi->segmentation_map = NULL;
- aom_free(cpi->coding_context.last_frame_seg_map_copy);
- cpi->coding_context.last_frame_seg_map_copy = NULL;
#if CONFIG_REF_MV
for (i = 0; i < NMV_CONTEXTS; ++i) {
@@ -535,9 +534,6 @@
memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1],
MV_VALS * sizeof(*cpi->nmvcosts_hp[1]));
- memcpy(cpi->coding_context.last_frame_seg_map_copy, cm->last_frame_seg_map,
- (cm->mi_rows * cm->mi_cols));
-
av1_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
av1_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
@@ -576,9 +572,6 @@
memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1],
MV_VALS * sizeof(*cc->nmvcosts_hp[1]));
- memcpy(cm->last_frame_seg_map, cpi->coding_context.last_frame_seg_map_copy,
- (cm->mi_rows * cm->mi_cols));
-
av1_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
av1_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
@@ -1899,12 +1892,6 @@
aom_free(cpi->active_map.map);
CHECK_MEM_ERROR(cm, cpi->active_map.map,
aom_calloc(cm->mi_rows * cm->mi_cols, 1));
-
- // And a place holder structure is the coding context
- // for use if we want to save and restore it
- aom_free(cpi->coding_context.last_frame_seg_map_copy);
- CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy,
- aom_calloc(cm->mi_rows * cm->mi_cols, 1));
}
void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 00b30fd..e2046ec 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1,11 +1,12 @@
/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_ENCODER_ENCODER_H_
@@ -59,8 +60,6 @@
int nmv_costs_hp[NMV_CONTEXTS][2][MV_VALS];
#endif
- unsigned char *last_frame_seg_map_copy;
-
// 0 = Intra, Last, GF, ARF
signed char last_ref_lf_deltas[TOTAL_REFS_PER_FRAME];
// 0 = ZERO_MV, MV
diff --git a/av1/encoder/ransac.c b/av1/encoder/ransac.c
index 0c8ad67..714d567 100644
--- a/av1/encoder/ransac.c
+++ b/av1/encoder/ransac.c
@@ -92,16 +92,16 @@
}
}
-static int get_rand_indices(int npoints, int minpts, int *indices) {
+static int get_rand_indices(int npoints, int minpts, int *indices,
+ unsigned int *seed) {
int i, j;
- unsigned int seed = (unsigned int)npoints;
- int ptr = rand_r(&seed) % npoints;
+ int ptr = rand_r(seed) % npoints;
if (minpts > npoints) return 0;
indices[0] = ptr;
ptr = (ptr == npoints - 1 ? 0 : ptr + 1);
i = 1;
while (i < minpts) {
- int index = rand_r(&seed) % npoints;
+ int index = rand_r(seed) % npoints;
while (index) {
ptr = (ptr == npoints - 1 ? 0 : ptr + 1);
for (j = 0; j < i; ++j) {
@@ -132,6 +132,7 @@
int N = 10000, trial_count = 0;
int i;
int ret_val = 0;
+ unsigned int seed = (unsigned int)npoints;
int max_inliers = 0;
double best_variance = 0.0;
@@ -139,7 +140,7 @@
WarpedMotionParams wm;
double points1[2 * MAX_MINPTS];
double points2[2 * MAX_MINPTS];
- int indices[MAX_MINPTS];
+ int indices[MAX_MINPTS] = { 0 };
double *best_inlier_set1;
double *best_inlier_set2;
@@ -153,10 +154,6 @@
double *cnp1, *cnp2;
double T1[9], T2[9];
- // srand((unsigned)time(NULL)) ;
- // better to make this deterministic for a given sequence for ease of testing
- srand(npoints);
-
*number_of_inliers = 0;
if (npoints < minpts * MINPTS_MULTIPLIER) {
printf("Cannot find motion with %d matches\n", npoints);
@@ -203,7 +200,7 @@
int num_degenerate_iter = 0;
while (degenerate) {
num_degenerate_iter++;
- if (!get_rand_indices(npoints, minpts, indices)) {
+ if (!get_rand_indices(npoints, minpts, indices, &seed)) {
ret_val = 1;
goto finish_ransac;
}
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 8399a85..463570a 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -4138,7 +4138,7 @@
bits =
(gm->motion_params.wmmat[0].as_int ? ((GM_ABS_TRANS_BITS + 1) * 2) : 0);
}
- return (bits << AV1_PROB_COST_SHIFT) + gmtype_cost[gm->gmtype];
+ return bits ? (bits << AV1_PROB_COST_SHIFT) + gmtype_cost[gm->gmtype] : 0;
}
#define GLOBAL_MOTION_RATE(ref) \
@@ -7810,47 +7810,6 @@
rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
}
-// This function is designed to apply a bias or adjustment to an rd value based
-// on the relative variance of the source and reconstruction.
-#define LOW_VAR_THRESH 16
-#define VLOW_ADJ_MAX 25
-#define VHIGH_ADJ_MAX 8
-static void rd_variance_adjustment(MACROBLOCK *x, int64_t *this_rd,
- MV_REFERENCE_FRAME ref_frame,
- unsigned int source_variance) {
- unsigned int recon_variance = x->recon_variance;
- unsigned int absvar_diff = 0;
- int64_t var_error = 0;
- int64_t var_factor = 0;
-
- if (*this_rd == INT64_MAX) return;
-
- if ((source_variance + recon_variance) > LOW_VAR_THRESH) {
- absvar_diff = (source_variance > recon_variance)
- ? (source_variance - recon_variance)
- : (recon_variance - source_variance);
-
- var_error = ((int64_t)200 * source_variance * recon_variance) /
- (((int64_t)source_variance * source_variance) +
- ((int64_t)recon_variance * recon_variance));
- var_error = 100 - var_error;
- }
-
- // Source variance above a threshold and ref frame is intra.
- // This case is targeted mainly at discouraging intra modes that give rise
- // to a predictor with a low spatial complexity compared to the source.
- if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) &&
- (source_variance > recon_variance)) {
- var_factor = AOMMIN(absvar_diff, AOMMIN(VLOW_ADJ_MAX, var_error));
- // A second possible case of interest is where the source variance
- // is very low and we wish to discourage false texture or motion trails.
- } else if ((source_variance < (LOW_VAR_THRESH >> 1)) &&
- (recon_variance > source_variance)) {
- var_factor = AOMMIN(absvar_diff, AOMMIN(VHIGH_ADJ_MAX, var_error));
- }
- *this_rd += (*this_rd * var_factor) / 100;
-}
-
// Do we have an internal image edge (e.g. formatting bars).
int av1_internal_image_edge(const AV1_COMP *cpi) {
return (cpi->oxcf.pass == 2) &&
@@ -8109,7 +8068,6 @@
rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
}
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
- rd_variance_adjustment(x, &this_rd, INTRA_FRAME, x->source_variance);
if (this_rd < *best_intra_rd) {
*best_intra_rd = this_rd;
@@ -9175,10 +9133,6 @@
#endif // CONFIG_MOTION_VAR
}
- // Apply an adjustment to the rd value based on the similarity of the
- // source variance and reconstructed variance.
- rd_variance_adjustment(x, &this_rd, ref_frame, x->source_variance);
-
if (ref_frame == INTRA_FRAME) {
// Keep record of best intra rd
if (this_rd < best_intra_rd) {
diff --git a/av1/encoder/segmentation.c b/av1/encoder/segmentation.c
index dfcb404..3292da4 100644
--- a/av1/encoder/segmentation.c
+++ b/av1/encoder/segmentation.c
@@ -366,6 +366,9 @@
} else {
seg->temporal_update = 0;
}
+#if CONFIG_DAALA_EC
+ av1_tree_to_cdf(av1_segment_tree, segp->tree_probs, segp->tree_cdf);
+#endif
}
void av1_reset_segment_features(AV1_COMMON *cm) {
diff --git a/configure b/configure
index 53ba10e..611756f 100755
--- a/configure
+++ b/configure
@@ -61,6 +61,7 @@
enable av1 temporal denoising
${toggle_webm_io} enable input from and output to WebM container
${toggle_libyuv} enable libyuv
+ ${toggle_accounting} enable bit accounting
Codecs:
Codecs can be selectively enabled or disabled individually, or by family:
@@ -283,6 +284,7 @@
alt_intra
palette
daala_ec
+ cb4x4
"
CONFIG_LIST="
dependency_tracking
@@ -327,6 +329,7 @@
unit_tests
webm_io
libyuv
+ accounting
decode_perf_tests
encode_perf_tests
multi_res_encoding
@@ -387,6 +390,7 @@
unit_tests
webm_io
libyuv
+ accounting
decode_perf_tests
encode_perf_tests
multi_res_encoding
diff --git a/test/accounting_test.cc b/test/accounting_test.cc
new file mode 100644
index 0000000..122f9b8
--- /dev/null
+++ b/test/accounting_test.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/bitreader.h"
+#include "aom_dsp/bitwriter.h"
+
+using libaom_test::ACMRandom;
+
+TEST(AV1, TestAccounting) {
+ const int kBufferSize = 10000;
+ const int kSymbols = 1024;
+ aom_writer bw;
+ uint8_t bw_buffer[kBufferSize];
+ aom_start_encode(&bw, bw_buffer);
+ for (int i = 0; i < kSymbols; i++) {
+ aom_write(&bw, 0, 32);
+ aom_write(&bw, 0, 32);
+ aom_write(&bw, 0, 32);
+ }
+ aom_stop_encode(&bw);
+ aom_reader br;
+ aom_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
+
+ Accounting accounting;
+ aom_accounting_init(&accounting);
+ br.accounting = &accounting;
+ for (int i = 0; i < kSymbols; i++) {
+ aom_read(&br, 32, "A");
+ }
+ // Consecutive symbols that are the same are coalesced.
+ GTEST_ASSERT_EQ(accounting.syms.num_syms, 1);
+ GTEST_ASSERT_EQ(accounting.syms.syms[0].samples, (unsigned int)kSymbols);
+
+ aom_accounting_reset(&accounting);
+ GTEST_ASSERT_EQ(accounting.syms.num_syms, 0);
+
+ // Should record 2 * kSymbols accounting symbols.
+ aom_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
+ br.accounting = &accounting;
+ for (int i = 0; i < kSymbols; i++) {
+ aom_read(&br, 32, "A");
+ aom_read(&br, 32, "B");
+ aom_read(&br, 32, "B");
+ }
+ GTEST_ASSERT_EQ(accounting.syms.num_syms, kSymbols * 2);
+ uint32_t tell_frac = aom_reader_tell_frac(&br);
+ for (int i = 0; i < accounting.syms.num_syms; i++) {
+ tell_frac -= accounting.syms.syms[i].bits;
+ }
+ GTEST_ASSERT_EQ(tell_frac, 0U);
+
+ GTEST_ASSERT_EQ(aom_accounting_dictionary_lookup(&accounting, "A"),
+ aom_accounting_dictionary_lookup(&accounting, "A"));
+
+ // Check for collisions. The current aom_accounting_hash function returns
+ // the same hash code for AB and BA.
+ GTEST_ASSERT_NE(aom_accounting_dictionary_lookup(&accounting, "AB"),
+ aom_accounting_dictionary_lookup(&accounting, "BA"));
+}
diff --git a/test/av1_fwd_txfm1d_test.cc b/test/av1_fwd_txfm1d_test.cc
index 03bed19..f671097 100644
--- a/test/av1_fwd_txfm1d_test.cc
+++ b/test/av1_fwd_txfm1d_test.cc
@@ -23,10 +23,15 @@
const TYPE_TXFM txfm_type_ls[2] = { TYPE_DCT, TYPE_ADST };
const int txfm_size_num = 5;
-const int txfm_size_ls[5] = { 4, 8, 16, 32 };
+const int txfm_size_ls[5] = { 4, 8, 16, 32, 64 };
const TxfmFunc fwd_txfm_func_ls[2][5] = {
+#if CONFIG_TX64X64
+ { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new,
+ av1_fdct64_new },
+#else
{ av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new, NULL },
+#endif
{ av1_fadst4_new, av1_fadst8_new, av1_fadst16_new, av1_fadst32_new, NULL }
};
diff --git a/test/av1_inv_txfm1d_test.cc b/test/av1_inv_txfm1d_test.cc
index 110d4c3..8470fc0 100644
--- a/test/av1_inv_txfm1d_test.cc
+++ b/test/av1_inv_txfm1d_test.cc
@@ -18,15 +18,25 @@
namespace {
const int txfm_type_num = 2;
const int txfm_size_num = 5;
-const int txfm_size_ls[5] = { 4, 8, 16, 32 };
+const int txfm_size_ls[5] = { 4, 8, 16, 32, 64 };
const TxfmFunc fwd_txfm_func_ls[2][5] = {
+#if CONFIG_TX64X64
+ { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new,
+ av1_fdct64_new },
+#else
{ av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new, NULL },
+#endif
{ av1_fadst4_new, av1_fadst8_new, av1_fadst16_new, av1_fadst32_new, NULL }
};
const TxfmFunc inv_txfm_func_ls[2][5] = {
+#if CONFIG_TX64X64
+ { av1_idct4_new, av1_idct8_new, av1_idct16_new, av1_idct32_new,
+ av1_idct64_new },
+#else
{ av1_idct4_new, av1_idct8_new, av1_idct16_new, av1_idct32_new, NULL },
+#endif
{ av1_iadst4_new, av1_iadst8_new, av1_iadst16_new, av1_iadst32_new, NULL }
};
diff --git a/test/boolcoder_test.cc b/test/boolcoder_test.cc
index de9b6e4..1000f58 100644
--- a/test/boolcoder_test.cc
+++ b/test/boolcoder_test.cc
@@ -80,7 +80,7 @@
} else if (bit_method == 3) {
bit = bit_rnd(2);
}
- GTEST_ASSERT_EQ(aom_read(&br, probas[i]), bit)
+ GTEST_ASSERT_EQ(aom_read(&br, probas[i], NULL), bit)
<< "pos: " << i << " / " << kBitsToTest
<< " bit_method: " << bit_method << " method: " << method;
}
@@ -88,3 +88,54 @@
}
}
}
+
+#if CONFIG_DAALA_EC
+#define FRAC_DIFF_TOTAL_ERROR 0.07
+#else
+#define FRAC_DIFF_TOTAL_ERROR 0.2
+#endif
+
+TEST(AV1, TestTell) {
+ const int kBufferSize = 10000;
+ aom_writer bw;
+ uint8_t bw_buffer[kBufferSize];
+ const int kSymbols = 1024;
+ // Coders are noisier at low probabilities, so we start at p = 4.
+ for (int p = 4; p <= 256; p++) {
+ double probability = p / 256.;
+ aom_start_encode(&bw, bw_buffer);
+ for (int i = 0; i < kSymbols; i++) {
+ aom_write(&bw, 0, p);
+ }
+ aom_stop_encode(&bw);
+ aom_reader br;
+ aom_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
+ ptrdiff_t last_tell = aom_reader_tell(&br);
+ ptrdiff_t last_tell_frac = aom_reader_tell_frac(&br);
+ double frac_diff_total = 0;
+ GTEST_ASSERT_GE(aom_reader_tell(&br), 0);
+ GTEST_ASSERT_LE(aom_reader_tell(&br), 1);
+ for (int i = 0; i < kSymbols; i++) {
+ aom_read(&br, p, NULL);
+ ptrdiff_t tell = aom_reader_tell(&br);
+ ptrdiff_t tell_frac = aom_reader_tell_frac(&br);
+ GTEST_ASSERT_GE(tell, last_tell) << "tell: " << tell
+ << ", last_tell: " << last_tell;
+ GTEST_ASSERT_GE(tell_frac, last_tell_frac)
+ << "tell_frac: " << tell_frac
+ << ", last_tell_frac: " << last_tell_frac;
+ // Frac tell should round up to tell.
+ GTEST_ASSERT_EQ(tell, (tell_frac + 7) >> 3);
+ last_tell = tell;
+ frac_diff_total +=
+ fabs(((tell_frac - last_tell_frac) / 8.0) + log2(probability));
+ last_tell_frac = tell_frac;
+ }
+ const int expected = (int)(-kSymbols * log2(probability));
+ // Last tell should be close to the expected value.
+ GTEST_ASSERT_LE(last_tell - expected, 20) << " last_tell: " << last_tell;
+ // The average frac_diff error should be pretty small.
+ GTEST_ASSERT_LE(frac_diff_total / kSymbols, FRAC_DIFF_TOTAL_ERROR)
+ << " frac_diff_total: " << frac_diff_total;
+ }
+}
diff --git a/test/test.mk b/test/test.mk
index 42ff2ba..1146428 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -105,6 +105,9 @@
LIBAOM_TEST_SRCS-yes += ans_test.cc
else
LIBAOM_TEST_SRCS-yes += boolcoder_test.cc
+ifeq ($(CONFIG_ACCOUNTING),yes)
+LIBAOM_TEST_SRCS-yes += accounting_test.cc
+endif
endif
LIBAOM_TEST_SRCS-yes += divu_small_test.cc
#LIBAOM_TEST_SRCS-yes += encoder_parms_get_to_decoder.cc