Clean ups on coeff optimization
Change-Id: Ia75dded05b6def859a7cb88a8e5e2afd1c06fe70
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 38b7e71..af463a7 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -50,6 +50,13 @@
unsigned int var;
} DIFF;
+enum {
+ NO_TRELLIS_OPT, // No trellis optimization
+ FULL_TRELLIS_OPT, // Trellis optimization in all stages
+ FINAL_PASS_TRELLIS_OPT, // Trellis optimization in only the final encode pass
+ NO_ESTIMATE_YRD_TRELLIS_OPT // Disable trellis in estimate_yrd_for_sb
+} UENUM1BYTE(TRELLIS_OPT_TYPE);
+
typedef struct macroblock_plane {
DECLARE_ALIGNED(32, int16_t, src_diff[MAX_SB_SQUARE]);
tran_low_t *qcoeff;
diff --git a/av1/encoder/compound_type.c b/av1/encoder/compound_type.c
index a7e9efc..ac377d8 100644
--- a/av1/encoder/compound_type.c
+++ b/av1/encoder/compound_type.c
@@ -481,8 +481,8 @@
if (ref_best_rd < 0) return INT64_MAX;
av1_subtract_plane(x, bs, 0);
x->rd_model = LOW_TXFM_RD;
- int skip_trellis = cpi->optimize_seg_arr[xd->mi[0]->segment_id] ==
- NO_ESTIMATE_YRD_TRELLIS_OPT;
+ const int skip_trellis = (cpi->optimize_seg_arr[xd->mi[0]->segment_id] ==
+ NO_ESTIMATE_YRD_TRELLIS_OPT);
const int64_t rd =
av1_txfm_yrd(cpi, x, rd_stats, ref_best_rd, bs,
max_txsize_rect_lookup[bs], FTXS_NONE, skip_trellis);
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index dff7cd5..5cf1e41 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5658,7 +5658,7 @@
if (xd->lossless[i]) cpi->has_lossless_segment = 1;
xd->qindex[i] = qindex;
if (xd->lossless[i]) {
- cpi->optimize_seg_arr[i] = 0;
+ cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
} else {
cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
}
@@ -6236,7 +6236,7 @@
xd->cfl.store_y = store_cfl_required(cm, xd);
mbmi->skip = 1;
for (int plane = 0; plane < num_planes; ++plane) {
- av1_encode_intra_block_plane(cpi, x, bsize, plane,
+ av1_encode_intra_block_plane(cpi, x, bsize, plane, dry_run,
cpi->optimize_seg_arr[mbmi->segment_id]);
}
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 1dce2ba..d855d2f 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -82,22 +82,22 @@
pd->dst.buf, pd->dst.stride);
}
-int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
+int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
int block, TX_SIZE tx_size, TX_TYPE tx_type,
const TXB_CTX *const txb_ctx, int fast_mode,
int *rate_cost) {
- MACROBLOCKD *const xd = &mb->e_mbd;
- struct macroblock_plane *const p = &mb->plane[plane];
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *const p = &x->plane[plane];
const int eob = p->eobs[block];
const int segment_id = xd->mi[0]->segment_id;
if (eob == 0 || !cpi->optimize_seg_arr[segment_id] ||
xd->lossless[segment_id]) {
- *rate_cost = av1_cost_skip_txb(mb, txb_ctx, plane, tx_size);
+ *rate_cost = av1_cost_skip_txb(x, txb_ctx, plane, tx_size);
return eob;
}
- return av1_optimize_txb_new(cpi, mb, plane, block, tx_size, tx_type, txb_ctx,
+ return av1_optimize_txb_new(cpi, x, plane, block, tx_size, tx_type, txb_ctx,
rate_cost, cpi->oxcf.sharpness, fast_mode);
}
@@ -386,14 +386,13 @@
cm->features.reduced_tx_set_used);
TxfmParam txfm_param;
QUANT_PARAM quant_param;
- int use_trellis = (args->enable_optimize_b != NO_TRELLIS_OPT);
+ const int use_trellis = is_trellis_used(args->enable_optimize_b, dry_run);
int quant_idx;
- if (use_trellis && args->enable_optimize_b != FINAL_PASS_TRELLIS_OPT) {
+ if (use_trellis)
quant_idx = AV1_XFORM_QUANT_FP;
- } else {
+ else
quant_idx =
USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP;
- }
av1_setup_xform(cm, x, tx_size, tx_type, &txfm_param);
av1_setup_quant(tx_size, use_trellis, quant_idx, cpi->use_quant_b_adapt,
&quant_param);
@@ -617,13 +616,10 @@
if (x->force_skip) return;
struct optimize_ctx ctx;
- struct encode_b_args arg = { cpi,
- x,
- &ctx,
- &mbmi->skip,
- NULL,
- NULL,
- cpi->optimize_seg_arr[mbmi->segment_id] };
+ struct encode_b_args arg = {
+ cpi, x, &ctx, &mbmi->skip,
+ NULL, NULL, dry_run, cpi->optimize_seg_arr[mbmi->segment_id]
+ };
const AV1_COMMON *const cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
for (int plane = 0; plane < num_planes; ++plane) {
@@ -718,9 +714,10 @@
cm->features.reduced_tx_set_used);
TxfmParam txfm_param;
QUANT_PARAM quant_param;
- int use_trellis = args->enable_optimize_b != NO_TRELLIS_OPT;
+ const int use_trellis =
+ is_trellis_used(args->enable_optimize_b, args->dry_run);
int quant_idx;
- if (use_trellis && args->enable_optimize_b != FINAL_PASS_TRELLIS_OPT)
+ if (use_trellis)
quant_idx = AV1_XFORM_QUANT_FP;
else
quant_idx =
@@ -792,8 +789,8 @@
}
void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int plane,
- int enable_optimize_b) {
+ BLOCK_SIZE bsize, int plane, RUN_TYPE dry_run,
+ TRELLIS_OPT_TYPE enable_optimize_b) {
assert(bsize < BLOCK_SIZES_ALL);
const MACROBLOCKD *const xd = &x->e_mbd;
if (plane && !xd->is_chroma_ref) return;
@@ -803,9 +800,8 @@
const int ss_y = pd->subsampling_y;
ENTROPY_CONTEXT ta[MAX_MIB_SIZE] = { 0 };
ENTROPY_CONTEXT tl[MAX_MIB_SIZE] = { 0 };
- struct encode_b_args arg = {
- cpi, x, NULL, &(xd->mi[0]->skip), ta, tl, enable_optimize_b
- };
+ struct encode_b_args arg = { cpi, x, NULL, &(xd->mi[0]->skip),
+ ta, tl, dry_run, enable_optimize_b };
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y);
if (enable_optimize_b) {
av1_get_entropy_contexts(plane_bsize, pd, ta, tl);
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index 02f9d35..2abe8ff 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h
@@ -34,7 +34,8 @@
int8_t *skip;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
- int8_t enable_optimize_b;
+ RUN_TYPE dry_run;
+ TRELLIS_OPT_TYPE enable_optimize_b;
};
enum {
@@ -126,9 +127,16 @@
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg);
void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int plane,
- int enable_optimize_b);
+ BLOCK_SIZE bsize, int plane, RUN_TYPE dry_run,
+ TRELLIS_OPT_TYPE enable_optimize_b);
+static INLINE int is_trellis_used(TRELLIS_OPT_TYPE optimize_b,
+ RUN_TYPE dry_run) {
+ if (optimize_b == NO_TRELLIS_OPT) return false;
+ if (optimize_b == FINAL_PASS_TRELLIS_OPT && dry_run != OUTPUT_ENABLED)
+ return false;
+ return true;
+}
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index b6eba2a..79bbe43 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -788,7 +788,7 @@
struct lookahead_entry *alt_ref_source;
int no_show_kf;
- int optimize_seg_arr[MAX_SEGMENTS];
+ TRELLIS_OPT_TYPE optimize_seg_arr[MAX_SEGMENTS];
int mi_ext_alloc_size;
YV12_BUFFER_CONFIG *source;
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 9183c28..1a32356 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -466,7 +466,7 @@
xd->mi[0]->mode = DC_PRED;
xd->mi[0]->tx_size =
use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
- av1_encode_intra_block_plane(cpi, x, bsize, 0, 0);
+ av1_encode_intra_block_plane(cpi, x, bsize, 0, DRY_RUN_NORMAL, 0);
this_intra_error = aom_get_mb_ss(x->plane[0].src_diff);
if (this_intra_error < UL_INTRA_THRESH) {
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c
index 0b5fb3e..5a85037 100644
--- a/av1/encoder/intra_mode_search.c
+++ b/av1/encoder/intra_mode_search.c
@@ -1216,6 +1216,7 @@
if (xd->cfl.store_y) {
// Restore reconstructed luma values.
av1_encode_intra_block_plane(cpi, x, mbmi->sb_type, AOM_PLANE_Y,
+ DRY_RUN_NORMAL,
cpi->optimize_seg_arr[mbmi->segment_id]);
xd->cfl.store_y = 0;
}
@@ -1336,6 +1337,7 @@
int best_rate_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
#endif // CONFIG_DEBUG
+ const int skip_trellis = 0;
for (int plane = 0; plane < CFL_PRED_PLANES; plane++) {
RD_STATS rd_stats;
av1_init_rd_stats(&rd_stats);
@@ -1351,9 +1353,9 @@
if (i == CFL_SIGN_NEG) {
mbmi->cfl_alpha_idx = 0;
mbmi->cfl_alpha_signs = joint_sign;
- av1_txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, 0, plane + 1,
- plane_bsize, tx_size,
- cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0);
+ av1_txfm_rd_in_plane(
+ x, cpi, &rd_stats, best_rd, 0, plane + 1, plane_bsize, tx_size,
+ cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, skip_trellis);
if (rd_stats.rate == INT_MAX) break;
}
const int alpha_rate = x->cfl_cost[joint_sign][plane][0];
@@ -1382,7 +1384,7 @@
mbmi->cfl_alpha_signs = joint_sign;
av1_txfm_rd_in_plane(
x, cpi, &rd_stats, best_rd, 0, plane + 1, plane_bsize, tx_size,
- cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0);
+ cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, skip_trellis);
if (rd_stats.rate == INT_MAX) break;
}
const int alpha_rate = x->cfl_cost[joint_sign][plane][c];
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index fc7870a..48a9d8b 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2843,7 +2843,7 @@
memcpy(x->blk_skip, ctx->blk_skip,
sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
- av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y,
+ av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y, DRY_RUN_NORMAL,
cpi->optimize_seg_arr[mbmi->segment_id]);
av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
xd->cfl.store_y = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 923c0a5..45b0164 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -40,13 +40,6 @@
} UENUM1BYTE(GM_ERRORADV_TYPE);
enum {
- NO_TRELLIS_OPT, // No trellis optimization
- FULL_TRELLIS_OPT, // Trellis optimization in all stages
- FINAL_PASS_TRELLIS_OPT, // Trellis optimization in only the final encode pass
- NO_ESTIMATE_YRD_TRELLIS_OPT // Disable trellis in estimate_yrd_for_sb
-} UENUM1BYTE(TRELLIS_OPT_TYPE);
-
-enum {
FULL_TXFM_RD,
LOW_TXFM_RD,
} UENUM1BYTE(TXFM_RD_MODEL);
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index 0ff71ef..5f905c9 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -2117,9 +2117,9 @@
(mi_row + mi_size_high[plane_bsize] < xd->tile.mi_row_end) &&
mi_col >= xd->tile.mi_col_start &&
(mi_col + mi_size_wide[plane_bsize] < xd->tile.mi_col_end);
- skip_trellis |=
- cpi->optimize_seg_arr[mbmi->segment_id] == NO_TRELLIS_OPT ||
- cpi->optimize_seg_arr[mbmi->segment_id] == FINAL_PASS_TRELLIS_OPT;
+
+ skip_trellis |= !is_trellis_used(cpi->optimize_seg_arr[xd->mi[0]->segment_id],
+ DRY_RUN_NORMAL);
if (is_intra_hash_match(cpi, x, plane, blk_row, blk_col, plane_bsize, tx_size,
txb_ctx, &intra_txb_rd_info, within_border,
tx_type_map_idx, &cur_joint_ctx)) {
@@ -2417,8 +2417,11 @@
}
RD_STATS this_rd_stats;
+
+ const int skip_trellis = 0;
search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
- txb_ctx, ftxs_mode, 0, 0, ref_rdcost, &this_rd_stats);
+ txb_ctx, ftxs_mode, 0, skip_trellis, ref_rdcost,
+ &this_rd_stats);
av1_merge_rd_stats(rd_stats, &this_rd_stats);
@@ -2692,9 +2695,11 @@
// Skip RDcost is used only for Inter blocks
if (is_inter_block(xd->mi[0])) skip_rd = RDCOST(x->rdmult, s1, 0);
+ const int skip_trellis = 0;
av1_txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOMMIN(this_rd, skip_rd),
AOM_PLANE_Y, bs, mbmi->tx_size,
- cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0);
+ cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE,
+ skip_trellis);
}
static AOM_INLINE void choose_smallest_tx_size(const AV1_COMP *const cpi,
@@ -2707,8 +2712,10 @@
mbmi->tx_size = TX_4X4;
// TODO(any) : Pass this_rd based on skip/non-skip cost
+ const int skip_trellis = 0;
av1_txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, 0, bs, mbmi->tx_size,
- cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0);
+ cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE,
+ skip_trellis);
}
static AOM_INLINE void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
@@ -2737,6 +2744,7 @@
init_depth = MAX_TX_DEPTH;
}
+ const int skip_trellis = 0;
uint8_t best_txk_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
TX_SIZE best_tx_size = max_rect_tx_size;
@@ -2750,8 +2758,8 @@
if (!cpi->oxcf.enable_tx64 && txsize_sqr_up_map[n] == TX_64X64) continue;
RD_STATS this_rd_stats;
- rd[depth] =
- av1_txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, n, FTXS_NONE, 0);
+ rd[depth] = av1_txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, n,
+ FTXS_NONE, skip_trellis);
if (rd[depth] < best_rd) {
av1_copy_array(best_blk_skip, x->blk_skip, n4);
@@ -3367,6 +3375,7 @@
av1_subtract_plane(x, plane_bsize, plane);
}
+ const int skip_trellis = 0;
if (is_cost_valid) {
const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
@@ -3382,7 +3391,8 @@
chroma_ref_best_rd = ref_best_rd - AOMMIN(this_rd, skip_rd);
av1_txfm_rd_in_plane(x, cpi, &pn_rd_stats, chroma_ref_best_rd, 0, plane,
plane_bsize, uv_tx_size,
- cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0);
+ cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE,
+ skip_trellis);
if (pn_rd_stats.rate == INT_MAX) {
is_cost_valid = 0;
break;