Speed up palette keyframe encoding with model RD
On keyframe, 18% speedup, 0.02% compression loss.
Change-Id: I29085ec23dd145effbea58852a46cd7f4dea8a46
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index d7e35e8..6cd9e30 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2274,9 +2274,9 @@
BLOCK_SIZE bsize, int palette_ctx,
int dc_mode_cost, MB_MODE_INFO *best_mbmi,
uint8_t *best_palette_color_map,
- int64_t *best_rd, int *rate,
- int *rate_tokenonly, int64_t *distortion,
- int *skippable) {
+ int64_t *best_rd, int64_t *best_model_rd,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable) {
int rate_overhead = 0;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi[0];
@@ -2284,8 +2284,6 @@
const int rows = block_size_high[bsize];
const int cols = block_size_wide[bsize];
int this_rate, colors, n;
- RD_STATS tokenonly_rd_stats;
- int64_t this_rd;
const int src_stride = x->plane[0].src.stride;
const uint8_t *const src = x->plane[0].src.buf;
uint8_t *const color_map = xd->plane[0].color_index_map;
@@ -2304,12 +2302,14 @@
#endif // CONFIG_FILTER_INTRA
if (colors > 1 && colors <= 64) {
- int r, c, i, j, k;
+ int r, c, i, j, k, palette_mode_cost;
const int max_itr = 50;
uint8_t color_order[PALETTE_MAX_SIZE];
float *const data = x->palette_buffer->kmeans_data_buf;
float centroids[PALETTE_MAX_SIZE];
float lb, ub, val;
+ RD_STATS tokenonly_rd_stats;
+ int64_t this_rd, this_model_rd;
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
#if CONFIG_AOM_HIGHBITDEPTH
uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
@@ -2373,13 +2373,8 @@
pmi->palette_size[0] = k;
av1_calc_indices(data, centroids, color_map, rows * cols, k, 1);
-
- super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
- if (tokenonly_rd_stats.rate == INT_MAX) continue;
-
- this_rate =
- tokenonly_rd_stats.rate + dc_mode_cost +
- cpi->common.bit_depth * k * av1_cost_bit(128, 0) +
+ palette_mode_cost =
+ dc_mode_cost + cpi->common.bit_depth * k * av1_cost_bit(128, 0) +
cpi->palette_y_size_cost[bsize - BLOCK_8X8][k - 2] +
write_uniform_cost(k, color_map[0]) +
av1_cost_bit(
@@ -2391,9 +2386,18 @@
const int color_ctx = av1_get_palette_color_context(
color_map, cols, i, j, k, color_order, &color_idx);
assert(color_idx >= 0 && color_idx < k);
- this_rate += cpi->palette_y_color_cost[k - 2][color_ctx][color_idx];
+ palette_mode_cost +=
+ cpi->palette_y_color_cost[k - 2][color_ctx][color_idx];
}
}
+ this_model_rd = intra_model_yrd(cpi, x, bsize, palette_mode_cost);
+ if (*best_model_rd != INT64_MAX &&
+ this_model_rd > *best_model_rd + (*best_model_rd >> 1))
+ continue;
+ if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
+ super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
+ if (tokenonly_rd_stats.rate == INT_MAX) continue;
+ this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) {
tokenonly_rd_stats.rate -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
@@ -3611,7 +3615,8 @@
if (cpi->common.allow_screen_content_tools) {
rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED],
&best_mbmi, best_palette_color_map, &best_rd,
- rate, rate_tokenonly, distortion, skippable);
+ &best_model_rd, rate, rate_tokenonly, distortion,
+ skippable);
}
#endif // CONFIG_PALETTE
@@ -10603,7 +10608,7 @@
#if CONFIG_SUPERTX
int best_rate_nocoef;
#endif
- int64_t distortion2 = 0, dummy_rd = best_rd, this_rd;
+ int64_t distortion2 = 0, dummy_rd = best_rd, this_rd, model_rd = INT64_MAX;
int skippable = 0, rate_overhead_palette = 0;
RD_STATS rd_stats_y;
TX_SIZE uv_tx;
@@ -10618,7 +10623,7 @@
mbmi->ref_frame[1] = NONE_FRAME;
rate_overhead_palette = rd_pick_palette_intra_sby(
cpi, x, bsize, palette_ctx, intra_mode_cost[DC_PRED], &mbmi_dummy,
- best_palette_color_map, &dummy_rd, NULL, NULL, NULL, NULL);
+ best_palette_color_map, &dummy_rd, &model_rd, NULL, NULL, NULL, NULL);
if (pmi->palette_size[0] == 0) goto PALETTE_EXIT;
memcpy(color_map, best_palette_color_map,
rows * cols * sizeof(best_palette_color_map[0]));