Clean ups on coeff optimization Change-Id: Ia75dded05b6def859a7cb88a8e5e2afd1c06fe70
diff --git a/av1/encoder/block.h b/av1/encoder/block.h index 38b7e71..af463a7 100644 --- a/av1/encoder/block.h +++ b/av1/encoder/block.h
@@ -50,6 +50,13 @@ unsigned int var; } DIFF; +enum { + NO_TRELLIS_OPT, // No trellis optimization + FULL_TRELLIS_OPT, // Trellis optimization in all stages + FINAL_PASS_TRELLIS_OPT, // Trellis optimization in only the final encode pass + NO_ESTIMATE_YRD_TRELLIS_OPT // Disable trellis in estimate_yrd_for_sb +} UENUM1BYTE(TRELLIS_OPT_TYPE); + typedef struct macroblock_plane { DECLARE_ALIGNED(32, int16_t, src_diff[MAX_SB_SQUARE]); tran_low_t *qcoeff;
diff --git a/av1/encoder/compound_type.c b/av1/encoder/compound_type.c index a7e9efc..ac377d8 100644 --- a/av1/encoder/compound_type.c +++ b/av1/encoder/compound_type.c
@@ -481,8 +481,8 @@ if (ref_best_rd < 0) return INT64_MAX; av1_subtract_plane(x, bs, 0); x->rd_model = LOW_TXFM_RD; - int skip_trellis = cpi->optimize_seg_arr[xd->mi[0]->segment_id] == - NO_ESTIMATE_YRD_TRELLIS_OPT; + const int skip_trellis = (cpi->optimize_seg_arr[xd->mi[0]->segment_id] == + NO_ESTIMATE_YRD_TRELLIS_OPT); const int64_t rd = av1_txfm_yrd(cpi, x, rd_stats, ref_best_rd, bs, max_txsize_rect_lookup[bs], FTXS_NONE, skip_trellis);
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index dff7cd5..5cf1e41 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c
@@ -5658,7 +5658,7 @@ if (xd->lossless[i]) cpi->has_lossless_segment = 1; xd->qindex[i] = qindex; if (xd->lossless[i]) { - cpi->optimize_seg_arr[i] = 0; + cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT; } else { cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients; } @@ -6236,7 +6236,7 @@ xd->cfl.store_y = store_cfl_required(cm, xd); mbmi->skip = 1; for (int plane = 0; plane < num_planes; ++plane) { - av1_encode_intra_block_plane(cpi, x, bsize, plane, + av1_encode_intra_block_plane(cpi, x, bsize, plane, dry_run, cpi->optimize_seg_arr[mbmi->segment_id]); }
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index 1dce2ba..d855d2f 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c
@@ -82,22 +82,22 @@ pd->dst.buf, pd->dst.stride); } -int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane, +int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, TX_TYPE tx_type, const TXB_CTX *const txb_ctx, int fast_mode, int *rate_cost) { - MACROBLOCKD *const xd = &mb->e_mbd; - struct macroblock_plane *const p = &mb->plane[plane]; + MACROBLOCKD *const xd = &x->e_mbd; + struct macroblock_plane *const p = &x->plane[plane]; const int eob = p->eobs[block]; const int segment_id = xd->mi[0]->segment_id; if (eob == 0 || !cpi->optimize_seg_arr[segment_id] || xd->lossless[segment_id]) { - *rate_cost = av1_cost_skip_txb(mb, txb_ctx, plane, tx_size); + *rate_cost = av1_cost_skip_txb(x, txb_ctx, plane, tx_size); return eob; } - return av1_optimize_txb_new(cpi, mb, plane, block, tx_size, tx_type, txb_ctx, + return av1_optimize_txb_new(cpi, x, plane, block, tx_size, tx_type, txb_ctx, rate_cost, cpi->oxcf.sharpness, fast_mode); } @@ -386,14 +386,13 @@ cm->features.reduced_tx_set_used); TxfmParam txfm_param; QUANT_PARAM quant_param; - int use_trellis = (args->enable_optimize_b != NO_TRELLIS_OPT); + const int use_trellis = is_trellis_used(args->enable_optimize_b, dry_run); int quant_idx; - if (use_trellis && args->enable_optimize_b != FINAL_PASS_TRELLIS_OPT) { + if (use_trellis) quant_idx = AV1_XFORM_QUANT_FP; - } else { + else quant_idx = USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP; - } av1_setup_xform(cm, x, tx_size, tx_type, &txfm_param); av1_setup_quant(tx_size, use_trellis, quant_idx, cpi->use_quant_b_adapt, &quant_param); @@ -617,13 +616,10 @@ if (x->force_skip) return; struct optimize_ctx ctx; - struct encode_b_args arg = { cpi, - x, - &ctx, - &mbmi->skip, - NULL, - NULL, - cpi->optimize_seg_arr[mbmi->segment_id] }; + struct encode_b_args arg = { + cpi, x, &ctx, &mbmi->skip, + NULL, NULL, dry_run, cpi->optimize_seg_arr[mbmi->segment_id] + }; const AV1_COMMON *const cm = &cpi->common; const int num_planes = av1_num_planes(cm); for (int plane = 0; plane < num_planes; ++plane) { @@ -718,9 +714,10 @@ cm->features.reduced_tx_set_used); TxfmParam txfm_param; QUANT_PARAM quant_param; - int use_trellis = args->enable_optimize_b != NO_TRELLIS_OPT; + const int use_trellis = + is_trellis_used(args->enable_optimize_b, args->dry_run); int quant_idx; - if (use_trellis && args->enable_optimize_b != FINAL_PASS_TRELLIS_OPT) + if (use_trellis) quant_idx = AV1_XFORM_QUANT_FP; else quant_idx = @@ -792,8 +789,8 @@ } void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int plane, - int enable_optimize_b) { + BLOCK_SIZE bsize, int plane, RUN_TYPE dry_run, + TRELLIS_OPT_TYPE enable_optimize_b) { assert(bsize < BLOCK_SIZES_ALL); const MACROBLOCKD *const xd = &x->e_mbd; if (plane && !xd->is_chroma_ref) return; @@ -803,9 +800,8 @@ const int ss_y = pd->subsampling_y; ENTROPY_CONTEXT ta[MAX_MIB_SIZE] = { 0 }; ENTROPY_CONTEXT tl[MAX_MIB_SIZE] = { 0 }; - struct encode_b_args arg = { - cpi, x, NULL, &(xd->mi[0]->skip), ta, tl, enable_optimize_b - }; + struct encode_b_args arg = { cpi, x, NULL, &(xd->mi[0]->skip), + ta, tl, dry_run, enable_optimize_b }; const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y); if (enable_optimize_b) { av1_get_entropy_contexts(plane_bsize, pd, ta, tl);
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h index 02f9d35..2abe8ff 100644 --- a/av1/encoder/encodemb.h +++ b/av1/encoder/encodemb.h
@@ -34,7 +34,8 @@ int8_t *skip; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; - int8_t enable_optimize_b; + RUN_TYPE dry_run; + TRELLIS_OPT_TYPE enable_optimize_b; }; enum { @@ -126,9 +127,16 @@ BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg); void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int plane, - int enable_optimize_b); + BLOCK_SIZE bsize, int plane, RUN_TYPE dry_run, + TRELLIS_OPT_TYPE enable_optimize_b); +static INLINE int is_trellis_used(TRELLIS_OPT_TYPE optimize_b, + RUN_TYPE dry_run) { + if (optimize_b == NO_TRELLIS_OPT) return false; + if (optimize_b == FINAL_PASS_TRELLIS_OPT && dry_run != OUTPUT_ENABLED) + return false; + return true; +} #ifdef __cplusplus } // extern "C" #endif
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h index b6eba2a..79bbe43 100644 --- a/av1/encoder/encoder.h +++ b/av1/encoder/encoder.h
@@ -788,7 +788,7 @@ struct lookahead_entry *alt_ref_source; int no_show_kf; - int optimize_seg_arr[MAX_SEGMENTS]; + TRELLIS_OPT_TYPE optimize_seg_arr[MAX_SEGMENTS]; int mi_ext_alloc_size; YV12_BUFFER_CONFIG *source;
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c index 9183c28..1a32356 100644 --- a/av1/encoder/firstpass.c +++ b/av1/encoder/firstpass.c
@@ -466,7 +466,7 @@ xd->mi[0]->mode = DC_PRED; xd->mi[0]->tx_size = use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4; - av1_encode_intra_block_plane(cpi, x, bsize, 0, 0); + av1_encode_intra_block_plane(cpi, x, bsize, 0, DRY_RUN_NORMAL, 0); this_intra_error = aom_get_mb_ss(x->plane[0].src_diff); if (this_intra_error < UL_INTRA_THRESH) {
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c index 0b5fb3e..5a85037 100644 --- a/av1/encoder/intra_mode_search.c +++ b/av1/encoder/intra_mode_search.c
@@ -1216,6 +1216,7 @@ if (xd->cfl.store_y) { // Restore reconstructed luma values. av1_encode_intra_block_plane(cpi, x, mbmi->sb_type, AOM_PLANE_Y, + DRY_RUN_NORMAL, cpi->optimize_seg_arr[mbmi->segment_id]); xd->cfl.store_y = 0; } @@ -1336,6 +1337,7 @@ int best_rate_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES]; #endif // CONFIG_DEBUG + const int skip_trellis = 0; for (int plane = 0; plane < CFL_PRED_PLANES; plane++) { RD_STATS rd_stats; av1_init_rd_stats(&rd_stats); @@ -1351,9 +1353,9 @@ if (i == CFL_SIGN_NEG) { mbmi->cfl_alpha_idx = 0; mbmi->cfl_alpha_signs = joint_sign; - av1_txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, 0, plane + 1, - plane_bsize, tx_size, - cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0); + av1_txfm_rd_in_plane( + x, cpi, &rd_stats, best_rd, 0, plane + 1, plane_bsize, tx_size, + cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, skip_trellis); if (rd_stats.rate == INT_MAX) break; } const int alpha_rate = x->cfl_cost[joint_sign][plane][0]; @@ -1382,7 +1384,7 @@ mbmi->cfl_alpha_signs = joint_sign; av1_txfm_rd_in_plane( x, cpi, &rd_stats, best_rd, 0, plane + 1, plane_bsize, tx_size, - cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0); + cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, skip_trellis); if (rd_stats.rate == INT_MAX) break; } const int alpha_rate = x->cfl_cost[joint_sign][plane][c];
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index fc7870a..48a9d8b 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c
@@ -2843,7 +2843,7 @@ memcpy(x->blk_skip, ctx->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk); - av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y, + av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y, DRY_RUN_NORMAL, cpi->optimize_seg_arr[mbmi->segment_id]); av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); xd->cfl.store_y = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h index 923c0a5..45b0164 100644 --- a/av1/encoder/speed_features.h +++ b/av1/encoder/speed_features.h
@@ -40,13 +40,6 @@ } UENUM1BYTE(GM_ERRORADV_TYPE); enum { - NO_TRELLIS_OPT, // No trellis optimization - FULL_TRELLIS_OPT, // Trellis optimization in all stages - FINAL_PASS_TRELLIS_OPT, // Trellis optimization in only the final encode pass - NO_ESTIMATE_YRD_TRELLIS_OPT // Disable trellis in estimate_yrd_for_sb -} UENUM1BYTE(TRELLIS_OPT_TYPE); - -enum { FULL_TXFM_RD, LOW_TXFM_RD, } UENUM1BYTE(TXFM_RD_MODEL);
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c index 0ff71ef..5f905c9 100644 --- a/av1/encoder/tx_search.c +++ b/av1/encoder/tx_search.c
@@ -2117,9 +2117,9 @@ (mi_row + mi_size_high[plane_bsize] < xd->tile.mi_row_end) && mi_col >= xd->tile.mi_col_start && (mi_col + mi_size_wide[plane_bsize] < xd->tile.mi_col_end); - skip_trellis |= - cpi->optimize_seg_arr[mbmi->segment_id] == NO_TRELLIS_OPT || - cpi->optimize_seg_arr[mbmi->segment_id] == FINAL_PASS_TRELLIS_OPT; + + skip_trellis |= !is_trellis_used(cpi->optimize_seg_arr[xd->mi[0]->segment_id], + DRY_RUN_NORMAL); if (is_intra_hash_match(cpi, x, plane, blk_row, blk_col, plane_bsize, tx_size, txb_ctx, &intra_txb_rd_info, within_border, tx_type_map_idx, &cur_joint_ctx)) { @@ -2417,8 +2417,11 @@ } RD_STATS this_rd_stats; + + const int skip_trellis = 0; search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, - txb_ctx, ftxs_mode, 0, 0, ref_rdcost, &this_rd_stats); + txb_ctx, ftxs_mode, 0, skip_trellis, ref_rdcost, + &this_rd_stats); av1_merge_rd_stats(rd_stats, &this_rd_stats); @@ -2692,9 +2695,11 @@ // Skip RDcost is used only for Inter blocks if (is_inter_block(xd->mi[0])) skip_rd = RDCOST(x->rdmult, s1, 0); + const int skip_trellis = 0; av1_txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOMMIN(this_rd, skip_rd), AOM_PLANE_Y, bs, mbmi->tx_size, - cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0); + cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, + skip_trellis); } static AOM_INLINE void choose_smallest_tx_size(const AV1_COMP *const cpi, @@ -2707,8 +2712,10 @@ mbmi->tx_size = TX_4X4; // TODO(any) : Pass this_rd based on skip/non-skip cost + const int skip_trellis = 0; av1_txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, 0, bs, mbmi->tx_size, - cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0); + cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, + skip_trellis); } static AOM_INLINE void choose_tx_size_type_from_rd(const AV1_COMP *const cpi, @@ -2737,6 +2744,7 @@ init_depth = MAX_TX_DEPTH; } + const int skip_trellis = 0; uint8_t best_txk_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE]; uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; TX_SIZE best_tx_size = max_rect_tx_size; @@ -2750,8 +2758,8 @@ if (!cpi->oxcf.enable_tx64 && txsize_sqr_up_map[n] == TX_64X64) continue; RD_STATS this_rd_stats; - rd[depth] = - av1_txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, n, FTXS_NONE, 0); + rd[depth] = av1_txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, n, + FTXS_NONE, skip_trellis); if (rd[depth] < best_rd) { av1_copy_array(best_blk_skip, x->blk_skip, n4); @@ -3367,6 +3375,7 @@ av1_subtract_plane(x, plane_bsize, plane); } + const int skip_trellis = 0; if (is_cost_valid) { const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd); for (plane = 1; plane < MAX_MB_PLANE; ++plane) { @@ -3382,7 +3391,8 @@ chroma_ref_best_rd = ref_best_rd - AOMMIN(this_rd, skip_rd); av1_txfm_rd_in_plane(x, cpi, &pn_rd_stats, chroma_ref_best_rd, 0, plane, plane_bsize, uv_tx_size, - cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0); + cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, + skip_trellis); if (pn_rd_stats.rate == INT_MAX) { is_cost_valid = 0; break;