Extend prune_luma_palette_size_search_level sf to speed >= 3
Extended the speed feature prune_luma_palette_size_search_level to
speed >= 3 with an aggressive header rdcost based breakout.
For allintra video encode (on screen content set),
Instruction Count BD-Rate Loss(%)
cpu-used Reduction(%) avg.psnr ovr.psnr ssim
3 2.305 0.0302 0.0313 0.0800
4 1.842 -0.0287 -0.0283 -0.3707
5 2.687 0.0155 0.0160 0.0132
6 2.042 0.0071 0.0076 0.0097
7 2.050 0.0211 0.0210 0.1402
8 1.442 -0.0015 -0.0007 0.0010
For AVIF still image encode,
Instruction Count BD-Rate Loss(%)
cpu-used Reduction(%) psnr ssim
3 0.933 0.0004 -0.0008
4 0.886 0.0009 0.0011
5 1.031 0.0004 0.0028
6 1.068 0.0002 0.0000
7 1.196 0.0000 0.0000
8 0.180 0.0000 0.0000
BUG=aomedia:3096
STATS_CHANGED
Change-Id: Ib0172c97942733fb94a75c9bd7f3c6e845e08dd4
diff --git a/av1/encoder/palette.c b/av1/encoder/palette.c
index a76f4ef..7b653b2 100644
--- a/av1/encoder/palette.c
+++ b/av1/encoder/palette.c
@@ -329,7 +329,7 @@
MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map, int64_t *best_rd,
int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable,
int *beat_best_rd, PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip,
- uint8_t *tx_type_map) {
+ uint8_t *tx_type_map, bool *is_header_rd_based_term) {
int centroids[PALETTE_MAX_SIZE];
int n = start_n;
int top_color_winner = end_n;
@@ -348,7 +348,10 @@
tx_type_map, &beat_best_palette_rd,
&do_header_rd_based_breakout);
*last_n_searched = n;
- if (do_header_rd_based_breakout) break;
+ if (do_header_rd_based_breakout) {
+ if (is_header_rd_based_term != NULL) *is_header_rd_based_term = true;
+ break;
+ }
if (beat_best_palette_rd) {
top_color_winner = n;
} else if (cpi->sf.intra_sf.prune_palette_search_level == 2) {
@@ -373,7 +376,7 @@
int64_t *best_rd, int *rate, int *rate_tokenonly, int64_t *distortion,
int *skippable, int *beat_best_rd, PICK_MODE_CONTEXT *ctx,
uint8_t *best_blk_skip, uint8_t *tx_type_map, uint8_t *color_map,
- int data_points) {
+ int data_points, bool *is_header_rd_based_term) {
int centroids[PALETTE_MAX_SIZE];
const int max_itr = 50;
int n = start_n;
@@ -397,7 +400,10 @@
tx_type_map, &beat_best_palette_rd,
&do_header_rd_based_breakout);
*last_n_searched = n;
- if (do_header_rd_based_breakout) break;
+ if (do_header_rd_based_breakout) {
+ if (is_header_rd_based_term != NULL) *is_header_rd_based_term = true;
+ break;
+ }
if (beat_best_palette_rd) {
top_color_winner = n;
} else if (cpi->sf.intra_sf.prune_palette_search_level == 2) {
@@ -522,6 +528,19 @@
count_buf[top_colors[i]] = 0;
}
+ // The following are the approaches used for header rdcost based gating
+ // for early termination for different values of prune_palette_search_level.
+ // 0: Pruning based on header rdcost for ascending order palette_size
+ // search.
+ // 1: When colors > PALETTE_MIN_SIZE, enabled only for coarse palette_size
+ // search and for finer search do_header_rd_based_gating parameter is
+ // explicitly passed as 'false'.
+ // 2: Enabled only for ascending order palette_size search and for
+ // descending order search do_header_rd_based_gating parameter is explicitly
+ // passed as 'false'.
+ const bool do_header_rd_based_gating =
+ cpi->sf.intra_sf.prune_luma_palette_size_search_level != 0;
+
// TODO(huisu@google.com): Try to avoid duplicate computation in cases
// where the dominant colors and the k-means results are similar.
if ((cpi->sf.intra_sf.prune_palette_search_level == 1) &&
@@ -555,19 +574,13 @@
const int min_n = start_n_lookup_table[max_n];
const int step_size = step_size_lookup_table[max_n];
assert(min_n >= PALETTE_MIN_SIZE);
- // Header rdcost based gating for early termination is currently enabled
- // only for coarse palette size search when prune_palette_search_level
- // is 1 and colors > PALETTE_MIN_SIZE. For finer search, the
- // do_header_rd_based_gating parameter is explicitly passed as 'false'.
- const bool do_header_rd_based_gating =
- cpi->sf.intra_sf.prune_luma_palette_size_search_level != 0;
-
// Perform top color coarse palette search to find the winner candidate
const int top_color_winner = perform_top_color_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, min_n, max_n + 1,
step_size, do_header_rd_based_gating, &unused, color_cache, n_cache,
best_mbmi, best_palette_color_map, best_rd, rate, rate_tokenonly,
- distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);
+ distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
+ NULL);
// Evaluate neighbors for the winner color (if winner is found) in the
// above coarse search for dominant colors
if (top_color_winner <= max_n) {
@@ -581,7 +594,7 @@
/*do_header_rd_based_gating=*/false, &unused, color_cache, n_cache,
best_mbmi, best_palette_color_map, best_rd, rate, rate_tokenonly,
distortion, skippable, beat_best_rd, ctx, best_blk_skip,
- tx_type_map);
+ tx_type_map, NULL);
}
// K-means clustering.
// Perform k-means coarse palette search to find the winner candidate
@@ -590,7 +603,7 @@
min_n, max_n + 1, step_size, do_header_rd_based_gating, &unused,
color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd,
rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
- best_blk_skip, tx_type_map, color_map, rows * cols);
+ best_blk_skip, tx_type_map, color_map, rows * cols, NULL);
// Evaluate neighbors for the winner color (if winner is found) in the
// above coarse search for k-means
if (k_means_winner <= max_n) {
@@ -604,21 +617,18 @@
/*do_header_rd_based_gating=*/false, &unused, color_cache, n_cache,
best_mbmi, best_palette_color_map, best_rd, rate, rate_tokenonly,
distortion, skippable, beat_best_rd, ctx, best_blk_skip,
- tx_type_map, color_map, rows * cols);
+ tx_type_map, color_map, rows * cols, NULL);
}
} else if (cpi->sf.intra_sf.prune_palette_search_level == 0) {
const int max_n = AOMMIN(colors, PALETTE_MAX_SIZE),
min_n = PALETTE_MIN_SIZE;
- // Perform gating based on header rdcost for
- // prune_palette_search_level == 0.
- const bool do_header_rd_based_gating =
- cpi->sf.intra_sf.prune_luma_palette_size_search_level != 0;
// Perform top color palette search in ascending order.
perform_top_color_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, min_n, max_n + 1,
1, do_header_rd_based_gating, &unused, color_cache, n_cache,
best_mbmi, best_palette_color_map, best_rd, rate, rate_tokenonly,
- distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);
+ distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
+ NULL);
// K-means clustering.
if (colors == PALETTE_MIN_SIZE) {
// Special case: These colors automatically become the centroids.
@@ -637,27 +647,29 @@
min_n, max_n + 1, 1, do_header_rd_based_gating, &unused,
color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd,
rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
- best_blk_skip, tx_type_map, color_map, rows * cols);
+ best_blk_skip, tx_type_map, color_map, rows * cols, NULL);
}
} else {
const int max_n = AOMMIN(colors, PALETTE_MAX_SIZE),
min_n = PALETTE_MIN_SIZE;
// Perform top color palette search in ascending order
int last_n_searched = min_n;
+ // Flag to terminate further palette_size search based on header rdcost.
+ bool is_header_rd_based_term = false;
perform_top_color_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, min_n, max_n + 1,
- 1, /*do_header_rd_based_gating=*/false, &last_n_searched, color_cache,
- n_cache, best_mbmi, best_palette_color_map, best_rd, rate,
- rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
- best_blk_skip, tx_type_map);
- if (last_n_searched < max_n) {
+ 1, do_header_rd_based_gating, &last_n_searched, color_cache, n_cache,
+ best_mbmi, best_palette_color_map, best_rd, rate, rate_tokenonly,
+ distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
+ &is_header_rd_based_term);
+ if (last_n_searched < max_n && !is_header_rd_based_term) {
// Search in descending order until we get to the previous best
perform_top_color_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, max_n,
last_n_searched, -1, /*do_header_rd_based_gating=*/false, &unused,
color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd,
rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
- best_blk_skip, tx_type_map);
+ best_blk_skip, tx_type_map, NULL);
}
// K-means clustering.
if (colors == PALETTE_MIN_SIZE) {
@@ -673,14 +685,15 @@
} else {
// Perform k-means palette search in ascending order
last_n_searched = min_n;
+ is_header_rd_based_term = false;
perform_k_means_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
- min_n, max_n + 1, 1, /*do_header_rd_based_gating=*/false,
- &last_n_searched, color_cache, n_cache, best_mbmi,
- best_palette_color_map, best_rd, rate, rate_tokenonly, distortion,
- skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map, color_map,
- rows * cols);
- if (last_n_searched < max_n) {
+ min_n, max_n + 1, 1, do_header_rd_based_gating, &last_n_searched,
+ color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd,
+ rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
+ best_blk_skip, tx_type_map, color_map, rows * cols,
+ &is_header_rd_based_term);
+ if (last_n_searched < max_n && !is_header_rd_based_term) {
// Search in descending order until we get to the previous best
perform_k_means_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
@@ -688,7 +701,7 @@
&unused, color_cache, n_cache, best_mbmi, best_palette_color_map,
best_rd, rate, rate_tokenonly, distortion, skippable,
beat_best_rd, ctx, best_blk_skip, tx_type_map, color_map,
- rows * cols);
+ rows * cols, NULL);
}
}
}
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 84ef1fc..e30b459 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -953,11 +953,14 @@
// level 2.
// 2: Terminate early for higher luma palette_size, if header rd cost of lower
// palette_size is more than best_rd.
- // For allintra encode, this sf reduces instruction count by 2.49%, 1.07%
- // and 2.76% for speed 0, 1 and 2 on screen content set with coding
- // performance change less than 0.01%. For AVIF image encode, this sf reduces
- // instruction count by 1.94%, 1.13% and 1.29% for speed 0, 1 and 2 on a
- // typical image dataset with coding performance change less than 0.01%.
+ // For allintra encode, this sf reduces instruction count by 2.49%, 1.07%,
+ // 2.76%, 2.30%, 1.84%, 2.69%, 2.04%, 2.05% and 1.44% for speed 0, 1, 2, 3, 4,
+ // 5, 6, 7 and 8 on screen content set with coding performance change less
+ // than 0.01% for speed <= 2 and less than 0.03% for speed >= 3. For AVIF
+ // image encode, this sf reduces instruction count by 1.94%, 1.13%, 1.29%,
+ // 0.93%, 0.89%, 1.03%, 1.07%, 1.20% and 0.18% for speed 0, 1, 2, 3, 4, 5, 6,
+ // 7 and 8 on a typical image dataset with coding performance change less than
+ // 0.01%.
int prune_luma_palette_size_search_level;
// Prune chroma intra modes based on luma intra mode winner.