hash_based_trellis speed feature
Add speed feature that uses hash tables to
reuse previously found optimized coefficients
in av1_optimize_txb. This skips some expensive
optimize_txb calls.
Currently shows no significant quality
degredation or speed improvement, and set to off
by default. Requires hash_me, lv_map and
lv_map_multi. Adding to speed features required
changing AV1_COMMON *cm to AV1_COMP *cpi in a
chain of functions.
Variations that have been tried:
-varying the maximum eob on which the feature
activates: 16, 32, 64. 16 currently used. 64
has best hit rate but longer execution time.
-varying the data hashed and the length of hashes
(first hash is 16 bit and based on context data,
while second hash is 16 bit and based only on
pre-optimized qcoeff values.)
-softening the data used for the hashes: ideally
this would raise the number of hits, without
compromising quality too much.
Change-Id: I94f22be82f3a46637c0489d512f2e334a307575f
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index f6e9502..7b36e0f 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4453,8 +4453,7 @@
#endif // CONFIG_CFL
mbmi->skip = 1;
for (int plane = 0; plane < num_planes; ++plane) {
- av1_encode_intra_block_plane((AV1_COMMON *)cm, x, bsize, plane, 1, mi_row,
- mi_col);
+ av1_encode_intra_block_plane(cpi, x, bsize, plane, 1, mi_row, mi_col);
}
#if CONFIG_CFL
xd->cfl.store_y = 0;
@@ -4519,7 +4518,7 @@
}
#endif
- av1_encode_sb((AV1_COMMON *)cm, x, bsize, mi_row, mi_col, dry_run);
+ av1_encode_sb(cpi, x, bsize, mi_row, mi_col, dry_run);
if (mbmi->skip) mbmi->min_tx_size = mbmi->tx_size;
av1_tokenize_sb_vartx(cpi, td, t, dry_run, mi_row, mi_col, bsize, rate,
tile_data->allow_update_cdf);
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index e0374a1..7b95902 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -419,8 +419,8 @@
}
#endif // !CONFIG_LV_MAP
-int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
- int blk_col, int block, BLOCK_SIZE plane_bsize,
+int av1_optimize_b(const AV1_COMP *const cpi, MACROBLOCK *mb, int plane,
+ int blk_row, int blk_col, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
const ENTROPY_CONTEXT *l, int fast_mode) {
MACROBLOCKD *const xd = &mb->e_mbd;
@@ -434,12 +434,13 @@
(void)blk_row;
(void)blk_col;
int ctx = get_entropy_context(tx_size, a, l);
+ const AV1_COMMON *const cm = &cpi->common;
return optimize_b_greedy(cm, mb, plane, blk_row, blk_col, block, tx_size, ctx,
fast_mode);
#else // !CONFIG_LV_MAP
TXB_CTX txb_ctx;
get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
- return av1_optimize_txb(cm, mb, plane, blk_row, blk_col, block, tx_size,
+ return av1_optimize_txb(cpi, mb, plane, blk_row, blk_col, block, tx_size,
&txb_ctx, fast_mode);
#endif // !CONFIG_LV_MAP
}
@@ -556,7 +557,7 @@
(void)mi_col;
(void)dry_run;
struct encode_b_args *const args = arg;
- AV1_COMMON *cm = args->cm;
+ const AV1_COMMON *const cm = &args->cpi->common;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
@@ -587,8 +588,8 @@
p->eobs[block] = 0;
}
- av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, a,
- l, CONFIG_LV_MAP);
+ av1_optimize_b(args->cpi, x, plane, blk_row, blk_col, block, plane_bsize,
+ tx_size, a, l, CONFIG_LV_MAP);
av1_set_txb_context(x, plane, block, tx_size, a, l);
@@ -738,13 +739,13 @@
encode_block_pass1, &args);
}
-void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
- int mi_col, RUN_TYPE dry_run) {
+void av1_encode_sb(const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
+ int mi_row, int mi_col, RUN_TYPE dry_run) {
(void)dry_run;
MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx ctx;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- struct encode_b_args arg = { cm, x, &ctx, &mbmi->skip, NULL, NULL, 1 };
+ struct encode_b_args arg = { cpi, x, &ctx, &mbmi->skip, NULL, NULL, 1 };
int plane;
mbmi->skip = 1;
@@ -844,7 +845,7 @@
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *arg) {
struct encode_b_args *const args = arg;
- AV1_COMMON *cm = args->cm;
+ const AV1_COMMON *const cm = &args->cpi->common;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
@@ -885,8 +886,8 @@
if (args->enable_optimize_b) {
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
AV1_XFORM_QUANT_FP);
- av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
- a, l, CONFIG_LV_MAP);
+ av1_optimize_b(args->cpi, x, plane, blk_row, blk_col, block, plane_bsize,
+ tx_size, a, l, CONFIG_LV_MAP);
#if CONFIG_TXK_SEL
if (plane == 0 && p->eobs[block] == 0) {
@@ -913,7 +914,7 @@
#endif // CONFIG_CFL
}
-void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
+void av1_encode_intra_block_plane(const AV1_COMP *const cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int plane,
int enable_optimize_b, int mi_row,
int mi_col) {
@@ -922,7 +923,7 @@
ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE] = { 0 };
struct encode_b_args arg = {
- cm, x, NULL, &xd->mi[0]->mbmi.skip, ta, tl, enable_optimize_b
+ cpi, x, NULL, &xd->mi[0]->mbmi.skip, ta, tl, enable_optimize_b
};
if (!is_chroma_reference(mi_row, mi_col, bsize,
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index cf7d3dd..2f7b109 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h
@@ -26,7 +26,7 @@
};
struct encode_b_args {
- AV1_COMMON *cm;
+ const struct AV1_COMP *cpi;
MACROBLOCK *x;
struct optimize_ctx *ctx;
int8_t *skip;
@@ -43,15 +43,15 @@
AV1_XFORM_QUANT_TYPES,
} AV1_XFORM_QUANT;
-void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
- int mi_col, RUN_TYPE dry_run);
+void av1_encode_sb(const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
+ int mi_row, int mi_col, RUN_TYPE dry_run);
void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize);
void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, AV1_XFORM_QUANT xform_quant_idx);
-int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
- int blk_col, int block, BLOCK_SIZE plane_bsize,
+int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
+ int blk_row, int blk_col, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
const ENTROPY_CONTEXT *l, int fast_mode);
@@ -66,7 +66,7 @@
void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg);
-void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
+void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int plane,
int enable_optimize_b, int mi_row,
int mi_col);
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index 99cd71e..086cda6 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -18,11 +18,25 @@
#include "av1/encoder/encodeframe.h"
#include "av1/encoder/cost.h"
#include "av1/encoder/encodetxb.h"
+#include "av1/encoder/hash.h"
#include "av1/encoder/rdopt.h"
#include "av1/encoder/tokenize.h"
#define TEST_OPTIMIZE_TXB 0
+static int hbt_hash_needs_init = 1;
+static CRC_CALCULATOR crc_calculator;
+static CRC_CALCULATOR crc_calculator2;
+static const int HBT_HASH_EOB = 16; // also the length in opt_qcoeff
+
+typedef struct OptTxbQcoeff {
+ uint32_t hbt_hash_match;
+ double hits;
+ tran_low_t opt_qcoeff[16];
+} OptTxbQcoeff;
+
+OptTxbQcoeff hbt_hash_table[65536][16];
+
typedef struct LevelDownStats {
int update;
tran_low_t low_qc;
@@ -291,6 +305,16 @@
stats->update = 0;
stats->rd_low = 0;
stats->rd = 0;
+// TODO(mfo): explore if there's a better way to prevent compiler init
+// warnings
+#if CONFIG_LV_MAP_MULTI
+ stats->nz_rd = 0;
+#else
+ stats->nz_rate = 0;
+#endif
+ stats->dist_low = 0;
+ stats->rate_low = 0;
+ stats->low_qc = 0;
const tran_low_t tqc = txb_info->tcoeff[coeff_idx];
const int dqv = txb_info->dequant[coeff_idx != 0];
@@ -2196,9 +2220,215 @@
{ 17, 13 }, { 16, 10 },
};
-int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane,
+void hbt_hash_init() {
+ av1_crc_calculator_init(&crc_calculator, 16, 0x5D6DCB); // ctx 16 bit hash
+ av1_crc_calculator_init(&crc_calculator2, 16, 0x5D6DCB); // qc 16 bit hash
+ memset(hbt_hash_table, 0, sizeof(hbt_hash_table[0][0]) * 65536 * 16);
+ hbt_hash_needs_init = 0;
+}
+
+int hbt_hash_miss(int found_index, uint16_t hbt_hash_index,
+ uint32_t hbt_hash_match, TxbInfo *txb_info,
+ const LV_MAP_COEFF_COST *txb_costs,
+#if CONFIG_LV_MAP_MULTI
+ const LV_MAP_EOB_COST *txb_eob_costs,
+#endif
+ const struct macroblock_plane *p, int block, int fast_mode) {
+ const int16_t *scan = txb_info->scan_order->scan;
+
+ av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height,
+ txb_info->levels);
+ // The hash_based_trellis speed feature requires lv_map_multi, so always true.
+ const int update = optimize_txb(txb_info, txb_costs,
+#if CONFIG_LV_MAP_MULTI
+ txb_eob_costs,
+#endif
+ NULL, 0, fast_mode);
+
+ if (update) {
+ // Overwrite old lowest entry
+ hbt_hash_table[hbt_hash_index][found_index].hbt_hash_match = hbt_hash_match;
+ hbt_hash_table[hbt_hash_index][found_index].hits = 1.0;
+ for (int i = 0; i < txb_info->eob; i++) {
+ hbt_hash_table[hbt_hash_index][found_index].opt_qcoeff[i] =
+ txb_info->qcoeff[scan[i]];
+ }
+ for (int i = txb_info->eob; i < HBT_HASH_EOB; i++) {
+ hbt_hash_table[hbt_hash_index][found_index].opt_qcoeff[i] = 0;
+ }
+
+ p->eobs[block] = txb_info->eob;
+ p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
+ txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
+ }
+ return txb_info->eob;
+}
+
+int hbt_hash_hit(uint16_t hbt_hash_index, int found_index, TxbInfo *txb_info,
+ const struct macroblock_plane *p, int block) {
+ const int16_t *scan = txb_info->scan_order->scan;
+ int new_eob = 0;
+ int update = 0;
+
+ for (int i = 0; i < txb_info->eob; i++) {
+ if (txb_info->qcoeff[scan[i]] !=
+ hbt_hash_table[hbt_hash_index][found_index].opt_qcoeff[i]) {
+ txb_info->qcoeff[scan[i]] =
+ hbt_hash_table[hbt_hash_index][found_index].opt_qcoeff[i];
+ update = 1;
+ update_coeff(scan[i], txb_info->qcoeff[scan[i]], txb_info);
+ }
+
+ if (txb_info->qcoeff[scan[i]]) new_eob = i + 1;
+ }
+
+ if (update) {
+ txb_info->eob = new_eob;
+ p->eobs[block] = txb_info->eob;
+ p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
+ txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
+ }
+ return txb_info->eob;
+}
+
+int search_hbt_hash_match(uint16_t hbt_hash_index, uint32_t hbt_hash_match,
+ TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
+#if CONFIG_LV_MAP_MULTI
+ const LV_MAP_EOB_COST *txb_eob_costs,
+#endif
+ const struct macroblock_plane *p, int block,
+ int fast_mode) {
+ // Decay all hits
+ double lowest_hits = 1.0;
+ int lowest_index = 0;
+
+ for (int i = 0; i < 16; i++) {
+ hbt_hash_table[hbt_hash_index][i].hits *= 31.0;
+ hbt_hash_table[hbt_hash_index][i].hits /= 32.0;
+
+ if (hbt_hash_table[hbt_hash_index][i].hits < lowest_hits) {
+ lowest_hits = hbt_hash_table[hbt_hash_index][i].hits;
+ lowest_index = i;
+ }
+ }
+
+ // Search soft hash vector for qcoeff match
+ int found_index = -1;
+ for (int i = 0; i < 16; i++) { // OptTxbQcoeff array has fixed size of 16.
+ if (hbt_hash_table[hbt_hash_index][i].hbt_hash_match == hbt_hash_match) {
+ found_index = i;
+ hbt_hash_table[hbt_hash_index][i].hits += 1.0;
+ break; // Found a match and it's at found_index
+ }
+ }
+
+ if (found_index == -1) { // Add new OptTxbQcoeff into array.
+ return hbt_hash_miss(lowest_index, hbt_hash_index, hbt_hash_match, txb_info,
+ txb_costs,
+#if CONFIG_LV_MAP_MULTI
+ txb_eob_costs,
+#endif
+ p, block, fast_mode);
+ } else { // Retrieve data from array.
+ return hbt_hash_hit(hbt_hash_index, found_index, txb_info, p, block);
+ }
+}
+
+int hash_based_trellis_mode(TxbInfo *txb_info,
+ const LV_MAP_COEFF_COST *txb_costs,
+#if CONFIG_LV_MAP_MULTI
+ const LV_MAP_EOB_COST *txb_eob_costs,
+#endif
+ const struct macroblock_plane *p, int block,
+ int fast_mode, TXB_CTX *txb_ctx) {
+ // Initialize hash table if needed.
+ if (hbt_hash_needs_init) {
+ hbt_hash_init();
+ }
+
+ //// Hash creation
+ // TODO(mfo): use exact length once input finalized
+ uint8_t txb_hash_data[256];
+ const int16_t *scan = txb_info->scan_order->scan;
+ uint8_t chunk = 0;
+
+ uint16_t ctx_hash = 0;
+ uint32_t qc_hash = 0;
+
+ int hash_data_index = 0;
+ for (int i = 0; i < txb_info->eob; i++) {
+ // Data softening: data from -3 -> 3 is left alone,
+ // while 'large' data is put into buckets of 16s
+ // Consider bucketing less than 16 down to 4 instead of 0
+ // if(txb_info->qcoeff[scan[i]] < 4 && txb_info->qcoeff[scan[i]] > -4)
+ chunk = (txb_info->qcoeff[scan[i]]) & 0xff;
+ /*else if(txb_info->qcoeff[scan[i]] < 16 && txb_info->qcoeff[scan[i]] > -16)
+ chunk = (txb_info->qcoeff[scan[i]]) & 0xfc; //
+ else
+ chunk = (txb_info->qcoeff[scan[i]]) & 0xf0; // greater than 16*/
+ txb_hash_data[hash_data_index++] = chunk;
+
+ chunk = ((txb_info->qcoeff[scan[i]]) & 0xff00) >> 8;
+ txb_hash_data[hash_data_index++] = chunk;
+ }
+ assert(hash_data_index <= 256);
+ // 16 bit
+ qc_hash = av1_get_crc_value(&crc_calculator2, txb_hash_data, hash_data_index);
+
+ hash_data_index = 0;
+ // tcoeff
+ for (int i = 0; i < txb_info->eob; i++) {
+ chunk = (txb_info->tcoeff[scan[i]] - txb_info->dqcoeff[scan[i]]) & 0xff;
+ txb_hash_data[hash_data_index++] = chunk;
+ }
+ // txb_ctx
+ chunk = txb_ctx->txb_skip_ctx & 0xff;
+ txb_hash_data[hash_data_index++] = chunk;
+ chunk = txb_ctx->dc_sign_ctx & 0xff;
+ txb_hash_data[hash_data_index++] = chunk;
+ // dequant
+ chunk = txb_info->dequant[0] & 0xff;
+ txb_hash_data[hash_data_index++] = chunk;
+ chunk = (txb_info->dequant[0] & 0xff00) >> 8;
+ txb_hash_data[hash_data_index++] = chunk;
+ chunk = txb_info->dequant[1] & 0xff;
+ txb_hash_data[hash_data_index++] = chunk;
+ chunk = (txb_info->dequant[1] & 0xff00) >> 8;
+ txb_hash_data[hash_data_index++] = chunk;
+ // txb_skip_cost
+ /*for (int i = 0; i < 2; i++) {
+ for (int j = 0; j < TXB_SKIP_CONTEXTS; j++) {
+ chunk = (txb_costs->txb_skip_cost[j][i] & 0xff00) >> 8;
+ txb_hash_data[hash_data_index++] = chunk;
+ }
+ }
+ // base_eob_cost
+ for (int i = 1; i < 3; i++) { // i = 0 are softened away
+ for (int j = 0; j < SIG_COEF_CONTEXTS_EOB; j++) {
+ chunk = (txb_costs->base_eob_cost[j][i] & 0xff00) >> 8;
+ txb_hash_data[hash_data_index++] = chunk;
+ }
+ }*/
+ assert(hash_data_index <= 256);
+ // Gives 16 bit hash for ctx
+ ctx_hash = av1_get_crc_value(&crc_calculator, txb_hash_data, hash_data_index);
+
+ uint16_t hbt_hash_index = ctx_hash; // 16 bit ctx_hash: index to table
+ uint32_t hbt_hash_match = qc_hash; // 16 bit qc_hash: matched in array
+ //// End hash creation
+
+ return search_hbt_hash_match(hbt_hash_index, hbt_hash_match, txb_info,
+ txb_costs,
+#if CONFIG_LV_MAP_MULTI
+ txb_eob_costs,
+#endif
+ p, block, fast_mode);
+}
+
+int av1_optimize_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
int blk_row, int blk_col, int block, TX_SIZE tx_size,
TXB_CTX *txb_ctx, int fast_mode) {
+ const AV1_COMMON *cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
const PLANE_TYPE plane_type = get_plane_type(plane);
const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
@@ -2266,6 +2496,17 @@
&cm->coeff_ctx_table
};
+ // Hash based trellis (hbt) speed feature: avoid expensive optimize_txb calls
+ // by storing the optimized coefficients in a hash table.
+ // Currently disabled in speedfeatures.c
+ if (eob <= HBT_HASH_EOB && eob > 0 && cpi->sf.use_hash_based_trellis) {
+ return hash_based_trellis_mode(&txb_info, &txb_costs,
+#if CONFIG_LV_MAP_MULTI
+ &txb_eob_costs,
+#endif
+ p, block, fast_mode, txb_ctx);
+ }
+
av1_txb_init_levels(qcoeff, width, height, levels);
const int update = optimize_txb(&txb_info, &txb_costs,
@@ -2623,7 +2864,7 @@
} else {
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, AV1_XFORM_QUANT_FP);
- av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize,
+ av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, 1);
}
av1_dist_block(cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size,
@@ -2662,7 +2903,7 @@
} else {
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, AV1_XFORM_QUANT_FP);
- av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize,
+ av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, 1);
}
diff --git a/av1/encoder/encodetxb.h b/av1/encoder/encodetxb.h
index 4d3a8ca..beddb49 100644
--- a/av1/encoder/encodetxb.h
+++ b/av1/encoder/encodetxb.h
@@ -107,9 +107,10 @@
const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
int use_fast_coef_costing, RD_STATS *rd_stats);
#endif
-int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane,
- int blk_row, int blk_col, int block, TX_SIZE tx_size,
- TXB_CTX *txb_ctx, int fast_mode);
+
+int av1_optimize_txb(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int blk_row,
+ int blk_col, int block, TX_SIZE tx_size, TXB_CTX *txb_ctx,
+ int fast_mode);
#ifdef __cplusplus
}
#endif
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 18ab9a7..3af8c7a 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -641,7 +641,7 @@
xd->mi[0]->mbmi.mode = DC_PRED;
xd->mi[0]->mbmi.tx_size =
use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
- av1_encode_intra_block_plane(cm, x, bsize, 0, 0, mb_row * 2, mb_col * 2);
+ av1_encode_intra_block_plane(cpi, x, bsize, 0, 0, mb_row * 2, mb_col * 2);
this_error = aom_get_mb_ss(x->plane[0].src_diff);
// Keep a record of blocks that have almost no intra error residual
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index b55c936..e1e1be3 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1900,7 +1900,7 @@
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
AV1_XFORM_QUANT_FP);
-// TX-domain results need to shift down to Q2/D10 to match pixel
+/// TX-domain results need to shift down to Q2/D10 to match pixel
// domain distortion values which are in Q2^2
#if CONFIG_DAALA_TX
const int shift = (TX_COEFF_DEPTH - 10) * 2;
@@ -1929,7 +1929,7 @@
disable_early_skip ||
#endif
RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) {
- av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize,
+ av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, CONFIG_LV_MAP);
} else {
args->exit_early = 1;
@@ -3581,7 +3581,7 @@
disable_early_skip ||
#endif
RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) {
- av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize,
+ av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, fast);
} else {
rd_stats->rate += rd_stats->zero_rate;
@@ -8704,8 +8704,8 @@
// during luma RDO, so we can store reconstructed luma values
memcpy(x->blk_skip[0], ctx->blk_skip[0],
sizeof(uint8_t) * ctx->num_4x4_blk);
- av1_encode_intra_block_plane((AV1_COMMON *)cm, x, bsize, AOM_PLANE_Y, 1,
- mi_row, mi_col);
+ av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y, 1, mi_row,
+ mi_col);
xd->cfl.store_y = 0;
}
#endif // CONFIG_CFL
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index e13819b..0e5e888 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -149,6 +149,10 @@
#if CONFIG_DUAL_FILTER
sf->use_fast_interpolation_filter_search = 1;
#endif // CONFIG_DUAL_FILTER
+#if 0 // CONFIG_HASH_ME && CONFIG_LV_MAP && CONFIG_LV_MAP_MULTI
+ // TODO(mfo): Activate feature once it gives positive results.
+ sf->use_hash_based_trellis = 1;
+#endif // CONFIG_HASH_ME && CONFIG_LV_MAP && CONFIG_LV_MAP_MULTI
}
if (speed >= 2) {
@@ -515,6 +519,7 @@
sf->use_transform_domain_distortion = 0;
sf->gm_search_type = GM_FULL_SEARCH;
sf->use_fast_interpolation_filter_search = 0;
+ sf->use_hash_based_trellis = 0;
set_dev_sf(cpi, sf, oxcf->dev_sf);
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 9b7d3e6..66128a1 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -520,6 +520,10 @@
// usually includes EIGHTTAP_REGULAR.
int use_fast_interpolation_filter_search;
+ // Use a hash table to store previously computed optimized qcoeffs from
+ // expensive calls to optimize_txb.
+ int use_hash_based_trellis;
+
// flag to drop some ref frames in compound motion search
int drop_ref;
} SPEED_FEATURES;