Rework fast tx type search speed feature for intra frame
Fix tx type as the default type and search for the best prediction mode;
then do a final search for the best tx type.
When it's enabled at speed 0, encoding speed increases to 3x with 2%
compression loss on keyframes. It's currently turned on for speed
2 and above.
Change-Id: Ic9ca1479da10d467a88a4d692d9c75da40d2798d
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 39873a2..23514e3 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3165,6 +3165,104 @@
}
#endif // CONFIG_EXT_INTRA
+// Given selected prediction mode, search for the best tx type and size.
+static void intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, const int *bmode_costs,
+ int plt_ctx, int64_t *best_rd, int *rate,
+ int *rate_tokenonly, int64_t *distortion,
+ int *skippable, MB_MODE_INFO *best_mbmi,
+ PICK_MODE_CONTEXT *ctx) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ RD_STATS rd_stats;
+ super_block_yrd(cpi, x, &rd_stats, bsize, *best_rd);
+ if (rd_stats.rate == INT_MAX) return;
+ int this_rate_tokenonly = rd_stats.rate;
+ if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) {
+ // super_block_yrd above includes the cost of the tx_size in the
+ // tokenonly rate, but for intra blocks, tx_size is always coded
+ // (prediction granularity), so we account for it in the full rate,
+ // not the tokenonly rate.
+ this_rate_tokenonly -= tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
+ }
+ int this_rate = this_rate_tokenonly + bmode_costs[mbmi->mode];
+ const int use_palette = mbmi->palette_mode_info.palette_size[0] > 0;
+#if CONFIG_FILTER_INTRA
+ const int use_filter_intra = mbmi->filter_intra_mode_info.use_filter_intra;
+#endif // CONFIG_FILTER_INTRA
+#if CONFIG_INTRABC
+ const int use_intrabc = mbmi->use_intrabc;
+#endif // CONFIG_INTRABC
+ // Can only activate one mode.
+ assert((mbmi->mode != DC_PRED) + use_palette +
+#if CONFIG_INTRABC
+ use_intrabc +
+#endif
+ use_filter_intra <=
+ 1);
+ const int try_palette =
+ av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
+ if (try_palette && mbmi->mode == DC_PRED) {
+ const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
+ this_rate += x->palette_y_mode_cost[bsize_ctx][plt_ctx][use_palette];
+ if (use_palette) {
+ const uint8_t *const color_map = xd->plane[0].color_index_map;
+ int block_width, block_height, rows, cols;
+ av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
+ &cols);
+ const int plt_size = mbmi->palette_mode_info.palette_size[0];
+ int palette_mode_cost =
+ x->palette_y_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
+ write_uniform_cost(plt_size, color_map[0]);
+ uint16_t color_cache[2 * PALETTE_MAX_SIZE];
+ const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
+ palette_mode_cost +=
+ av1_palette_color_cost_y(&mbmi->palette_mode_info, color_cache,
+ n_cache, cpi->common.bit_depth);
+ palette_mode_cost +=
+ av1_cost_color_map(x, 0, bsize, mbmi->tx_size, PALETTE_MAP);
+ this_rate += palette_mode_cost;
+ }
+ }
+#if CONFIG_FILTER_INTRA
+ if (mbmi->mode == DC_PRED && av1_filter_intra_allowed_txsize(mbmi->tx_size)) {
+ this_rate += x->filter_intra_cost[mbmi->tx_size][use_filter_intra];
+ if (use_filter_intra) {
+ this_rate += x->filter_intra_mode_cost[mbmi->filter_intra_mode_info
+ .filter_intra_mode];
+ }
+ }
+#endif // CONFIG_FILTER_INTRA
+#if CONFIG_EXT_INTRA
+ if (av1_is_directional_mode(mbmi->mode, bsize)) {
+ if (av1_use_angle_delta(bsize)) {
+#if CONFIG_EXT_INTRA_MOD
+ this_rate += x->angle_delta_cost[mbmi->mode - V_PRED]
+ [MAX_ANGLE_DELTA + mbmi->angle_delta[0]];
+#else
+ this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
+ MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
+#endif // CONFIG_EXT_INTRA_MOD
+ }
+ }
+#endif // CONFIG_EXT_INTRA
+#if CONFIG_INTRABC
+ if (bsize >= BLOCK_8X8 && cpi->common.allow_screen_content_tools)
+ this_rate += x->intrabc_cost[use_intrabc];
+#endif // CONFIG_INTRABC
+ const int64_t this_rd = RDCOST(x->rdmult, this_rate, rd_stats.dist);
+ if (this_rd < *best_rd) {
+ *best_mbmi = *mbmi;
+ *best_rd = this_rd;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = rd_stats.dist;
+ *skippable = rd_stats.skip;
+ memcpy(ctx->blk_skip[0], x->blk_skip[0],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+ }
+}
+
// This function is used only for intra_only frames
static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
@@ -3175,7 +3273,6 @@
MODE_INFO *const mic = xd->mi[0];
MB_MODE_INFO *const mbmi = &mic->mbmi;
assert(!is_inter_block(mbmi));
- MB_MODE_INFO best_mbmi = *mbmi;
int64_t best_model_rd = INT64_MAX;
#if CONFIG_EXT_INTRA
const int rows = block_size_high[bsize];
@@ -3199,7 +3296,6 @@
const MODE_INFO *left_mi = xd->left_mi;
const PREDICTION_MODE A = av1_above_block_mode(above_mi);
const PREDICTION_MODE L = av1_left_block_mode(left_mi);
- const PREDICTION_MODE FINAL_MODE_SEARCH = PAETH_PRED + 1;
#if CONFIG_KF_CTX
const int above_ctx = intra_mode_context[A];
@@ -3238,20 +3334,13 @@
else
x->use_default_intra_tx_type = 0;
+ MB_MODE_INFO best_mbmi = *mbmi;
/* Y Search for intra prediction mode */
- for (int mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) {
+ for (int mode_idx = DC_PRED; mode_idx < INTRA_MODES; ++mode_idx) {
RD_STATS this_rd_stats;
int this_rate, this_rate_tokenonly, s;
int64_t this_distortion, this_rd, this_model_rd;
- if (mode_idx == FINAL_MODE_SEARCH) {
- if (x->use_default_intra_tx_type == 0) break;
- mbmi->mode = best_mbmi.mode;
- x->use_default_intra_tx_type = 0;
- } else {
- assert(mode_idx < INTRA_MODES);
- mbmi->mode = intra_rd_search_mode_order[mode_idx];
- }
-
+ mbmi->mode = intra_rd_search_mode_order[mode_idx];
#if CONFIG_EXT_INTRA
mbmi->angle_delta[0] = 0;
#endif // CONFIG_EXT_INTRA
@@ -3350,6 +3439,14 @@
}
#endif // CONFIG_FILTER_INTRA
+ if (x->use_default_intra_tx_type) {
+ *mbmi = best_mbmi;
+ x->use_default_intra_tx_type = 0;
+ intra_block_yrd(cpi, x, bsize, bmode_costs, palette_y_mode_ctx, &best_rd,
+ rate, rate_tokenonly, distortion, skippable, &best_mbmi,
+ ctx);
+ }
+
*mbmi = best_mbmi;
return best_rd;
}