CWG-E171 Intra mode search adjustment 1. Allow more full transform searchs for intra by relaxing pruning: (1). Allow 6 instead of 4 best modes. (2). Remove the best_model_rd constraint 2. Prune tx partition search Record the rdcost of none partition for each intra prediction mode. Keep the top 4 and terminate more tx partition search if the none tx partition rdcost is already larger than the 4th best rdcost. STATS_CHANGED Change-Id: Iab9875469a5a82c9a5d97d4e82bb415491fd956d
diff --git a/av1/common/enums.h b/av1/common/enums.h index d3617d9..0935a00 100644 --- a/av1/common/enums.h +++ b/av1/common/enums.h
@@ -916,7 +916,8 @@ } UENUM1BYTE(CFL_TYPE); // Number of top model rd to store for pruning y modes in intra mode decision -#define TOP_INTRA_MODEL_COUNT 4 +#define TOP_INTRA_MODEL_COUNT 6 +#define TOP_TX_PART_COUNT 4 // Total number of luma intra prediction modes (include both directional and // non-directional modes) #define LUMA_MODE_COUNT 61
diff --git a/av1/encoder/block.h b/av1/encoder/block.h index 37a2e18..5711070 100644 --- a/av1/encoder/block.h +++ b/av1/encoder/block.h
@@ -1905,6 +1905,10 @@ */ int palette_pixels; #endif // CONFIG_SCC_DETERMINATION + /*! \brief Whether to prune current transform partition search. */ + int prune_tx_partition; + /*! \brief Keep records of top rdcosts of transform partition search. */ + int64_t top_tx_part_rd[TOP_TX_PART_COUNT]; } MACROBLOCK; #undef SINGLE_REF_MODES
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c index 6ad0c6d..449dec4 100644 --- a/av1/encoder/intra_mode_search.c +++ b/av1/encoder/intra_mode_search.c
@@ -82,6 +82,7 @@ if (model_intra_yrd_and_prune(cpi, x, bsize, mode_cost, best_model_rd)) { continue; } + x->prune_tx_partition = 0; av1_pick_uniform_tx_size_type_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd); if (tokenonly_rd_stats.rate == INT_MAX) continue; @@ -225,7 +226,6 @@ */ int prune_intra_y_mode(int64_t this_model_rd, int64_t *best_model_rd, int64_t top_intra_model_rd[]) { - const double thresh_best = 1.50; const double thresh_top = 1.00; for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) { if (this_model_rd < top_intra_model_rd[i]) { @@ -241,9 +241,6 @@ thresh_top * top_intra_model_rd[TOP_INTRA_MODEL_COUNT - 1]) return 1; - if (this_model_rd != INT64_MAX && - this_model_rd > thresh_best * (*best_model_rd)) - return 1; if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd; return 0; } @@ -1093,6 +1090,7 @@ MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = xd->mi[0]; RD_STATS rd_stats; + x->prune_tx_partition = 0; // In order to improve txfm search avoid rd based breakouts during winner // mode evaluation. Hence passing ref_best_rd as a maximum value av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats, bsize, INT64_MAX); @@ -1171,6 +1169,7 @@ for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED; fi_mode < FILTER_INTRA_MODES; ++fi_mode) { mbmi->filter_intra_mode_info.filter_intra_mode = fi_mode; + x->prune_tx_partition = 0; av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y_fi, bsize, best_rd); if (rd_stats_y_fi.rate == INT_MAX) continue; const int this_rate_tmp = @@ -1374,6 +1373,7 @@ ) return INT64_MAX; av1_init_rd_stats(rd_stats_y); + x->prune_tx_partition = 0; av1_pick_uniform_tx_size_type_yrd(cpi, x, rd_stats_y, bsize, best_rd); // Pick filter intra modes. @@ -1602,6 +1602,14 @@ if (xd->lossless[mbmi->segment_id]) { dpcm_fsc_loop = 2; } + int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT]; + for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) { + top_intra_model_rd[i] = INT64_MAX; + } + x->prune_tx_partition = 1; + for (int i = 0; i < TOP_TX_PART_COUNT; i++) { + x->top_tx_part_rd[i] = INT64_MAX; + } for (int dpcm_fsc_index = 0; dpcm_fsc_index < dpcm_fsc_loop; dpcm_fsc_index++) { mbmi->use_dpcm_y = dpcm_fsc_index; @@ -1741,19 +1749,20 @@ #if CONFIG_AIMC mode_costs += mrl_idx_cost; #endif // CONFIG_AIMC - if (model_intra_yrd_and_prune(cpi, x, bsize, + int64_t this_model_rd; + this_model_rd = intra_model_yrd(cpi, x, bsize, #if CONFIG_AIMC - mode_costs, + mode_costs); #else - mode_costs[mbmi->mode] + mrl_idx_cost, -#endif - best_model_rd) + mode_costs[mbmi->mode] + mrl_idx_cost); +#endif // CONFIG_AIMC + + if (prune_intra_y_mode(this_model_rd, best_model_rd, top_intra_model_rd) #if CONFIG_LOSSLESS_DPCM && (!xd->lossless[mbmi->segment_id] || mbmi->use_dpcm_y == 0) #endif // CONFIG_LOSSLESS_DPCM - ) { + ) continue; - } av1_pick_uniform_tx_size_type_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd); if (tokenonly_rd_stats.rate == INT_MAX) continue; @@ -1909,6 +1918,10 @@ for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) { top_intra_model_rd[i] = INT64_MAX; } + x->prune_tx_partition = 1; + for (int i = 0; i < TOP_TX_PART_COUNT; i++) { + x->top_tx_part_rd[i] = INT64_MAX; + } uint8_t enable_mrls_flag = cpi->common.seq_params.enable_mrls; #if CONFIG_LOSSLESS_DPCM int dpcm_loop_num = 1;
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c index 874b7cf..96d3ecf 100644 --- a/av1/encoder/tx_search.c +++ b/av1/encoder/tx_search.c
@@ -4063,6 +4063,20 @@ *rd_stats = this_rd_stats; } if (cur_tx_size == TX_4X4) break; + if (x->prune_tx_partition && type == 0) { + for (int i = 0; i < TOP_TX_PART_COUNT; i++) { + if (cur_rd < x->top_tx_part_rd[i]) { + for (int j = TOP_TX_PART_COUNT - 1; j > i; j--) { + x->top_tx_part_rd[j] = x->top_tx_part_rd[j - 1]; + } + x->top_tx_part_rd[i] = cur_rd; + break; + } + } + if (x->top_tx_part_rd[TOP_TX_PART_COUNT - 1] != INT64_MAX && + cur_rd > x->top_tx_part_rd[TOP_TX_PART_COUNT - 1]) + break; + } } if (rd_stats->rate != INT_MAX) {