[NORMATIVE] merge base and br levels coding
av1_read_coeffs_txb() is sped up by 10%
The overall decoder is sped up by 1%
The speed performance is tested by runing city_cif.y4m on speed1
with bitrate 1000
BUG=aomedia:1369
Change-Id: I0de5402a88fbb3ea46905a23b32627eba681a250
diff --git a/av1/decoder/decodetxb.c b/av1/decoder/decodetxb.c
index e976999..dd6a8bc 100644
--- a/av1/decoder/decodetxb.c
+++ b/av1/decoder/decodetxb.c
@@ -9,12 +9,13 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
+#include "av1/decoder/decodetxb.h"
+
#include "aom_ports/mem.h"
#include "av1/common/idct.h"
#include "av1/common/scan.h"
#include "av1/common/txb_common.h"
#include "av1/decoder/decodemv.h"
-#include "av1/decoder/decodetxb.h"
#define ACCT_STR __func__
@@ -83,7 +84,6 @@
const PLANE_TYPE plane_type = get_plane_type(plane);
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const int seg_eob = av1_get_max_eob(tx_size);
- int c = 0, v = 0;
int num_updates = 0;
struct macroblockd_plane *const pd = &xd->plane[plane];
const int16_t *const dequant = pd->seg_dequant_QTX[mbmi->segment_id];
@@ -103,7 +103,6 @@
uint8_t levels_buf[TX_PAD_2D];
uint8_t *const levels = set_levels(levels_buf, width);
DECLARE_ALIGNED(16, uint8_t, level_counts[MAX_TX_SQUARE]);
- int8_t signs[MAX_TX_SQUARE] = { 0 };
uint16_t update_pos[MAX_TX_SQUARE];
const int all_zero = aom_read_symbol(
@@ -218,7 +217,7 @@
// printf("=>[%d, %d], (%d, %d)\n", seg_eob, *eob, eob_pt, eob_extra);
for (int i = 0; i < *eob; ++i) {
- c = *eob - 1 - i;
+ const int c = *eob - 1 - i;
const int pos = scan[c];
const int coeff_ctx = get_nz_map_ctx(levels, pos, bwl, height, c,
c == *eob - 1, tx_size, tx_type);
@@ -231,110 +230,75 @@
cdf = ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx];
nsymbs = 4;
}
- const int level =
- aom_read_symbol(r, cdf, nsymbs, ACCT_STR) + (c == *eob - 1);
+ int level = aom_read_symbol(r, cdf, nsymbs, ACCT_STR) + (c == *eob - 1);
+ if (level > NUM_BASE_LEVELS) {
+#if USE_CAUSAL_BR_CTX
+ const int br_ctx =
+ get_br_ctx(levels, pos, bwl, level_counts[pos], tx_type);
+#else
+ const int br_ctx = get_br_ctx(levels, pos, bwl, level_counts[pos]);
+#endif
+ for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
+ const int k = aom_read_symbol(
+ r,
+ ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx],
+ BR_CDF_SIZE, ACCT_STR);
+ level += k;
+ if (k < BR_CDF_SIZE - 1) break;
+ }
+ if (level > NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
+ update_pos[num_updates] = pos;
+ ++num_updates;
+ }
+ }
levels[get_padded_idx(pos, bwl)] = level;
}
+ for (int i = 0; i < num_updates; ++i) {
+ const int pos = update_pos[i];
+ const int level = levels[get_padded_idx(pos, bwl)];
+ tcoeffs[pos] = level + read_golomb(xd, r);
+ }
+
// Loop to decode all signs in the transform block,
// starting with the sign of the DC (if applicable)
- for (c = 0; c < *eob; ++c) {
+ for (int c = 0; c < *eob; ++c) {
const int pos = scan[c];
- int8_t *const sign = &signs[pos];
- const int level = levels[get_padded_idx(pos, bwl)];
+ int8_t sign;
+ tran_low_t level = levels[get_padded_idx(pos, bwl)];
if (level) {
*max_scan_line = AOMMAX(*max_scan_line, pos);
if (c == 0) {
const int dc_sign_ctx = txb_ctx->dc_sign_ctx;
- *sign = aom_read_symbol(r, ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx],
- 2, ACCT_STR);
+ sign = aom_read_symbol(r, ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx],
+ 2, ACCT_STR);
} else {
- *sign = aom_read_bit(r, ACCT_STR);
+ sign = aom_read_bit(r, ACCT_STR);
}
- if (level < 3) {
- cul_level += level;
+ if (level > NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
+ // the quantized coeff with golomb residue is stored in tcoeffs because
+ // levels doesn't have enough bits got store the residue
+ level = tcoeffs[pos];
+ }
+ cul_level += level;
+ tran_low_t dq_coeff;
#if CONFIG_NEW_QUANT
#if CONFIG_AOM_QM
- v = av1_dequant_abscoeff_nuq(level, dequant[!!c], dq_profile, !!c,
- nq_shift);
-#else
- dqv_val = &dq_val[pos != 0][0];
- v = av1_dequant_abscoeff_nuq(level, dequant[!!c], dqv_val, nq_shift);
-#endif // CONFIG_AOM_QM
-#else
- v = level * get_dqv(dequant, scan[c], iqmatrix);
- v = v >> shift;
-#endif // CONFIG_NEW_QUANT
- if (*sign) {
- tcoeffs[pos] = -v;
- } else {
- tcoeffs[pos] = v;
- }
- } else {
- update_pos[num_updates++] = pos;
- }
- }
- }
-
- if (num_updates) {
- for (c = num_updates - 1; c >= 0; --c) {
- const int pos = update_pos[c];
- uint8_t *const level = &levels[get_padded_idx(pos, bwl)];
- int idx = 0;
- int ctx;
-
- assert(*level > NUM_BASE_LEVELS);
-
-#if USE_CAUSAL_BR_CTX
- ctx = get_br_ctx(levels, pos, bwl, level_counts[pos], tx_type);
-#else
- ctx = get_br_ctx(levels, pos, bwl, level_counts[pos]);
-#endif
- for (idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
- int k = aom_read_symbol(
- r, ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][ctx],
- BR_CDF_SIZE, ACCT_STR);
- *level += k;
- if (k < BR_CDF_SIZE - 1) break;
- }
- if (*level <= NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
- cul_level += *level;
- tran_low_t t;
-#if CONFIG_NEW_QUANT
-#if CONFIG_AOM_QM
- t = av1_dequant_abscoeff_nuq(*level, dequant[!!pos], dq_profile, !!pos,
- nq_shift);
-#else
- dqv_val = &dq_val[pos != 0][0];
- t = av1_dequant_abscoeff_nuq(*level, dequant[!!pos], dqv_val, nq_shift);
-#endif // CONFIG_AOM_QM
-#else
- t = *level * get_dqv(dequant, pos, iqmatrix);
- t = t >> shift;
-#endif // CONFIG_NEW_QUANT
- if (signs[pos]) t = -t;
- tcoeffs[pos] = clamp(t, min_value, max_value);
- continue;
- }
- // decode 0-th order Golomb code
- *level = COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS;
- // Save golomb in tcoeffs because adding it to level may incur overflow
- tran_low_t t = *level + read_golomb(xd, r);
- cul_level += (int)t;
-#if CONFIG_NEW_QUANT
-#if CONFIG_AOM_QM
- t = av1_dequant_abscoeff_nuq(t, dequant[!!pos], dq_profile, !!pos,
- nq_shift);
+ dq_coeff = av1_dequant_abscoeff_nuq(level, dequant[!!c], dq_profile, !!c,
+ nq_shift);
#else
dqv_val = &dq_val[pos != 0][0];
- t = av1_dequant_abscoeff_nuq(t, dequant[!!pos], dqv_val, nq_shift);
+ dq_coeff =
+ av1_dequant_abscoeff_nuq(level, dequant[!!c], dqv_val, nq_shift);
#endif // CONFIG_AOM_QM
#else
- t = t * get_dqv(dequant, pos, iqmatrix);
- t = t >> shift;
+ dq_coeff = level * get_dqv(dequant, scan[c], iqmatrix);
+ dq_coeff = dq_coeff >> shift;
#endif // CONFIG_NEW_QUANT
- if (signs[pos]) t = -t;
- tcoeffs[pos] = clamp(t, min_value, max_value);
+ if (sign) {
+ dq_coeff = -dq_coeff;
+ }
+ tcoeffs[pos] = clamp(dq_coeff, min_value, max_value);
}
}
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index 3ee6761..4da7a5d 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -9,15 +9,16 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
+#include "av1/encoder/encodetxb.h"
+
#include "aom_ports/mem.h"
-#include "av1/common/scan.h"
#include "av1/common/blockd.h"
#include "av1/common/idct.h"
#include "av1/common/pred_common.h"
+#include "av1/common/scan.h"
#include "av1/encoder/bitstream.h"
-#include "av1/encoder/encodeframe.h"
#include "av1/encoder/cost.h"
-#include "av1/encoder/encodetxb.h"
+#include "av1/encoder/encodeframe.h"
#include "av1/encoder/hash.h"
#include "av1/encoder/rdopt.h"
#include "av1/encoder/tokenize.h"
@@ -466,12 +467,13 @@
const int bwl = get_txb_bwl(tx_size);
const int width = get_txb_wide(tx_size);
const int height = get_txb_high(tx_size);
- int update_eob = -1;
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
uint8_t levels_buf[TX_PAD_2D];
uint8_t *const levels = set_levels(levels_buf, width);
DECLARE_ALIGNED(16, uint8_t, level_counts[MAX_TX_SQUARE]);
DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
+ uint16_t update_pos[MAX_TX_SQUARE];
+ int num_updates = 0;
aom_write_symbol(w, eob == 0,
ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], 2);
@@ -544,68 +546,63 @@
const int pos = scan[c];
const int coeff_ctx = coeff_contexts[pos];
const tran_low_t v = tcoeff[pos];
+ const tran_low_t level = abs(v);
if (c == eob - 1) {
aom_write_symbol(
- w, AOMMIN(abs(v), 3) - 1,
+ w, AOMMIN(level, 3) - 1,
ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx], 3);
} else {
- aom_write_symbol(w, AOMMIN(abs(v), 3),
+ aom_write_symbol(w, AOMMIN(level, 3),
ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx],
4);
}
- }
- update_eob = eob - 1;
-
- // Loop to code all signs in the transform block,
- // starting with the sign of DC (if applicable)
- for (c = 0; c < eob; ++c) {
- const tran_low_t v = tcoeff[scan[c]];
- const tran_low_t level = abs(v);
- const int sign = (v < 0) ? 1 : 0;
- if (level == 0) continue;
-
- if (c == 0) {
- aom_write_symbol(
- w, sign, ec_ctx->dc_sign_cdf[plane_type][txb_ctx->dc_sign_ctx], 2);
- } else {
- aom_write_bit(w, sign);
- }
- }
-
- if (update_eob >= 0) {
- for (c = update_eob; c >= 0; --c) {
- const int pos = scan[c];
- const tran_low_t level = abs(tcoeff[pos]);
- int idx;
- int ctx;
-
- if (level <= NUM_BASE_LEVELS) continue;
-
+ if (level > NUM_BASE_LEVELS) {
// level is above 1.
const int base_range = level - 1 - NUM_BASE_LEVELS;
#if USE_CAUSAL_BR_CTX
- ctx = get_br_ctx(levels, pos, bwl, level_counts[pos], tx_type);
+ const int br_ctx =
+ get_br_ctx(levels, pos, bwl, level_counts[pos], tx_type);
#else
- ctx = get_br_ctx(levels, pos, bwl, level_counts[pos]);
+ const int br_ctx = get_br_ctx(levels, pos, bwl, level_counts[pos]);
#endif
- for (idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
+ for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
aom_write_symbol(w, k,
#if 0
ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_16X16)][plane_type][ctx],
#else
ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)]
- [plane_type][ctx],
+ [plane_type][br_ctx],
#endif
BR_CDF_SIZE);
if (k < BR_CDF_SIZE - 1) break;
}
- if (base_range < COEFF_BASE_RANGE) continue;
- // use 0-th order Golomb code to handle the residual level.
- write_golomb(w,
- abs(tcoeff[pos]) - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
+ if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS) {
+ update_pos[num_updates] = pos;
+ ++num_updates;
+ }
+ }
+ }
+
+ for (int i = 0; i < num_updates; ++i) {
+ const int pos = update_pos[i];
+ write_golomb(w, abs(tcoeff[pos]) - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
+ }
+
+ // Loop to code all signs in the transform block,
+ // starting with the sign of DC (if applicable)
+ for (c = 0; c < eob; ++c) {
+ const tran_low_t v = tcoeff[scan[c]];
+ const int sign = (v < 0) ? 1 : 0;
+ if (v != 0) {
+ if (c == 0) {
+ aom_write_symbol(
+ w, sign, ec_ctx->dc_sign_cdf[plane_type][txb_ctx->dc_sign_ctx], 2);
+ } else {
+ aom_write_bit(w, sign);
+ }
}
}
}