AV1 RT: Implement palette mode for nonRD path
Initial implementation only checks luma palette. It kicks in only if
tune-content is set to screen and only if intra mode search did not lead
to skip transform and only on scene change.
Results on screen content with speed 8 and on 500k
3-10% slower and 0.6-1.6 dB better. On scene change the perf impact
is ~2x slower. Visually the difference is very noticeable on some clips.
Change-Id: I206d21aad4ee545d5defd2cfa45b35f37ac60a08
diff --git a/av1/encoder/context_tree.c b/av1/encoder/context_tree.c
index e7d1065..96d3058 100644
--- a/av1/encoder/context_tree.c
+++ b/av1/encoder/context_tree.c
@@ -96,7 +96,7 @@
if (num_pix <= MAX_PALETTE_SQUARE) {
for (int i = 0; i < 2; ++i) {
- if (!cpi->sf.rt_sf.use_nonrd_pick_mode || frame_is_intra_only(cm)) {
+ if (cm->features.allow_screen_content_tools) {
AOM_CHECK_MEM_ERROR(
&error, ctx->color_index_map[i],
aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i])));
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 327036f..6f759af 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -401,7 +401,7 @@
!tool_cfg->force_video_mode && (oxcf->input_cfg.limit == 1);
seq->reduced_still_picture_hdr =
seq->still_picture && !tool_cfg->full_still_picture_hdr;
- seq->force_screen_content_tools = (oxcf->mode == REALTIME) ? 0 : 2;
+ seq->force_screen_content_tools = 2;
seq->force_integer_mv = 2;
seq->order_hint_info.enable_order_hint = tool_cfg->enable_order_hint;
seq->frame_id_numbers_present_flag =
@@ -1799,26 +1799,27 @@
return;
}
+ if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
+ features->allow_screen_content_tools = features->allow_intrabc = 1;
+ return;
+ }
+
if (cpi->oxcf.mode == REALTIME) {
- assert(cm->seq_params->reduced_still_picture_hdr);
features->allow_screen_content_tools = features->allow_intrabc = 0;
return;
}
- // Screen content tools are not evaluated in non-RD encoding mode, i.e., when
- // use_nonrd_pick_mode = 1 and hybrid_intra_pickmode = 0. Hence, screen
- // content detection is disabled.
+ // Screen content tools are not evaluated in non-RD encoding mode unless
+ // content type is not set explicitly, i.e., when
+ // cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN, use_nonrd_pick_mode = 1
+ // and hybrid_intra_pickmode = 0. Hence, screen content detection is
+ // disabled.
if (cpi->sf.rt_sf.use_nonrd_pick_mode &&
!cpi->sf.rt_sf.hybrid_intra_pickmode) {
features->allow_screen_content_tools = features->allow_intrabc = 0;
return;
}
- if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
- features->allow_screen_content_tools = features->allow_intrabc = 1;
- return;
- }
-
// Estimate if the source frame is screen content, based on the portion of
// blocks that have few luma colors.
const uint8_t *src = cpi->unfiltered_source->y_buffer;
@@ -3384,7 +3385,7 @@
// Work out whether to force_integer_mv this frame
if (!is_stat_generation_stage(cpi) &&
cpi->common.features.allow_screen_content_tools &&
- !frame_is_intra_only(cm)) {
+ !frame_is_intra_only(cm) && !cpi->sf.rt_sf.use_nonrd_pick_mode) {
if (cpi->common.seq_params->force_integer_mv == 2) {
// Adaptive mode: see what previous frame encoded did
if (cpi->unscaled_last_source != NULL) {
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c
index f8cc387..c94d65f 100644
--- a/av1/encoder/intra_mode_search.c
+++ b/av1/encoder/intra_mode_search.c
@@ -937,6 +937,67 @@
return skippable;
}
+void av1_search_palette_mode_luma(const AV1_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, unsigned int ref_frame_cost,
+ PICK_MODE_CONTEXT *ctx,
+ RD_STATS *this_rd_cost, int64_t best_rd) {
+ MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
+ PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int64_t best_rd_palette = best_rd, this_rd;
+ uint8_t *const best_palette_color_map =
+ x->palette_buffer->best_palette_color_map;
+ uint8_t *const color_map = xd->plane[0].color_index_map;
+ MB_MODE_INFO best_mbmi_palette = *mbmi;
+ uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
+ uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
+ const ModeCosts *mode_costs = &x->mode_costs;
+ const int *const intra_mode_cost =
+ mode_costs->mbmode_cost[size_group_lookup[bsize]];
+ const int rows = block_size_high[bsize];
+ const int cols = block_size_wide[bsize];
+
+ mbmi->mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->ref_frame[1] = NONE_FRAME;
+ av1_zero(pmi->palette_size);
+
+ RD_STATS rd_stats_y;
+ av1_invalid_rd_stats(&rd_stats_y);
+ av1_rd_pick_palette_intra_sby(cpi, x, bsize, intra_mode_cost[DC_PRED],
+ &best_mbmi_palette, best_palette_color_map,
+ &best_rd_palette, &rd_stats_y.rate, NULL,
+ &rd_stats_y.dist, &rd_stats_y.skip_txfm, NULL,
+ ctx, best_blk_skip, best_tx_type_map);
+ if (rd_stats_y.rate == INT_MAX || pmi->palette_size[0] == 0) {
+ this_rd_cost->rdcost = INT64_MAX;
+ return;
+ }
+
+ memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
+ sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize));
+ av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
+ memcpy(color_map, best_palette_color_map,
+ rows * cols * sizeof(best_palette_color_map[0]));
+
+ rd_stats_y.rate += ref_frame_cost;
+
+ if (rd_stats_y.skip_txfm) {
+ rd_stats_y.rate =
+ ref_frame_cost +
+ mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][1];
+ } else {
+ rd_stats_y.rate +=
+ mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
+ }
+ this_rd = RDCOST(x->rdmult, rd_stats_y.rate, rd_stats_y.dist);
+ this_rd_cost->rate = rd_stats_y.rate;
+ this_rd_cost->dist = rd_stats_y.dist;
+ this_rd_cost->rdcost = this_rd;
+ this_rd_cost->skip_txfm = rd_stats_y.skip_txfm;
+}
+
/*!\brief Get the intra prediction by searching through tx_type and tx_size.
*
* \ingroup intra_mode_search
diff --git a/av1/encoder/intra_mode_search.h b/av1/encoder/intra_mode_search.h
index ff03b02..0968558 100644
--- a/av1/encoder/intra_mode_search.h
+++ b/av1/encoder/intra_mode_search.h
@@ -177,6 +177,33 @@
PICK_MODE_CONTEXT *ctx, RD_STATS *this_rd_cost,
int64_t best_rd);
+/*!\brief Evaluate luma palette mode for inter frames.
+ *
+ * \ingroup intra_mode_search
+ * \callergraph
+ * \callgraph
+ * This function handles luma palette mode when the current frame is an
+ * inter frame.
+ *
+ * \param[in] cpi Top-level encoder structure.
+ * \param[in] x Pointer to structure holding all the data
+ * for the current macroblock.
+ * \param[in] bsize Current partition block size.
+ * \param[in] ref_frame_cost The entropy cost for signaling that the
+ * current ref frame is an intra frame.
+ * \param[in] ctx Structure to hold the number of 4x4 blks to
+ * copy the tx_type and txfm_skip arrays.
+ * \param[in] this_rd_cost Struct to keep track of palette mode's
+ * rd_stats.
+ * \param[in] best_rd Best RD seen for this block so far.
+ *
+ * \return Returns nothing.
+ */
+void av1_search_palette_mode_luma(const AV1_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, unsigned int ref_frame_cost,
+ PICK_MODE_CONTEXT *ctx,
+ RD_STATS *this_rd_cost, int64_t best_rd);
+
/*!\brief Perform intra-mode search on luma channels for intra frames.
*
* \ingroup intra_mode_search
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index f2babe6..903708a 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -34,6 +34,8 @@
#include "av1/encoder/rdopt.h"
#include "av1/encoder/reconinter_enc.h"
#include "av1/encoder/var_based_part.h"
+#include "av1/encoder/palette.h"
+#include "av1/encoder/intra_mode_search.h"
extern int g_pick_inter_mode_cnt;
/*!\cond */
@@ -56,6 +58,7 @@
WarpedMotionParams wm_params;
int num_proj_ref;
uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE / 4];
+ PALETTE_MODE_INFO pmi;
} BEST_PICKMODE;
typedef struct {
@@ -143,6 +146,7 @@
bp->num_proj_ref = 0;
memset(&bp->wm_params, 0, sizeof(bp->wm_params));
memset(&bp->blk_skip, 0, sizeof(bp->blk_skip));
+ memset(&bp->pmi, 0, sizeof(bp->pmi));
}
static INLINE int subpel_select(AV1_COMP *cpi, BLOCK_SIZE bsize, int_mv *mv) {
@@ -1772,7 +1776,7 @@
mi->mode = best_mode;
// Keep DC for UV since mode test is based on Y channel only.
- mi->uv_mode = DC_PRED;
+ mi->uv_mode = UV_DC_PRED;
*rd_cost = best_rdc;
#if CONFIG_INTERNAL_STATS
@@ -2881,7 +2885,34 @@
&orig_dst, tmp, &this_mode_pred, &best_rdc,
&best_pickmode);
+ int try_palette =
+ cpi->oxcf.tool_cfg.enable_palette &&
+ av1_allow_palette(cpi->common.features.allow_screen_content_tools,
+ mi->bsize);
+ try_palette = try_palette && is_mode_intra(best_pickmode.best_mode) &&
+ best_pickmode.best_mode_skip_txfm != 1 &&
+ cpi->rc.high_source_sad;
+
+ if (try_palette) {
+ const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
+
+ av1_search_palette_mode_luma(cpi, x, bsize, intra_ref_frame_cost, ctx,
+ &this_rdc, best_rdc.rdcost);
+ if (this_rdc.rdcost < best_rdc.rdcost) {
+ best_pickmode.pmi = mi->palette_mode_info;
+ best_pickmode.best_mode = DC_PRED;
+ mi->mv[0].as_int = 0;
+ best_rdc.rate = this_rdc.rate;
+ best_rdc.dist = this_rdc.dist;
+ best_rdc.rdcost = this_rdc.rdcost;
+ memcpy(best_pickmode.blk_skip, txfm_info->blk_skip,
+ sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
+ av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
+ }
+ }
+
pd->dst = orig_dst;
+ if (try_palette) mi->palette_mode_info = best_pickmode.pmi;
mi->mode = best_pickmode.best_mode;
mi->ref_frame[0] = best_pickmode.best_ref_frame;
mi->ref_frame[1] = best_pickmode.best_second_ref_frame;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 38d0a36..8ab710c 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1327,6 +1327,8 @@
sf->lpf_sf.dual_sgr_penalty_level = 1;
sf->lpf_sf.disable_lr_filter = 1;
sf->rt_sf.skip_interp_filter_search = 1;
+ sf->intra_sf.prune_palette_search_level = 2;
+
// End of set
// TODO(any, yunqing): tune these features for real-time use cases.