[CFL] basic early termination for alpha search
This causes no change in the encoder output.
Comparing simple SSE-based RDO with the switch to
txfm_rd_in_plane, the overhead is reduced by 23% ~ 50%.
The total encode time increase is now 2.3% ~ 3.1%.
Change-Id: I48c76216871f8ed68631815fd781697139305e94
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index b871dd3..1ed95cb 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -5360,18 +5360,19 @@
#if CONFIG_CFL
static void txfm_rd_in_plane_once(MACROBLOCK *const x,
const AV1_COMP *const cpi, BLOCK_SIZE bsize,
- TX_SIZE tx_size, int plane, int64_t *dist,
- int *rate) {
+ TX_SIZE tx_size, int plane, int64_t best_rd,
+ int64_t *dist, int *rate) {
RD_STATS rd_stats;
av1_init_rd_stats(&rd_stats);
- txfm_rd_in_plane(x, cpi, &rd_stats, INT64_MAX, plane, bsize, tx_size,
+ txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane, bsize, tx_size,
cpi->sf.use_fast_coef_costing);
*dist = rd_stats.dist;
*rate = rd_stats.rate;
}
static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
- BLOCK_SIZE bsize, TX_SIZE tx_size) {
+ BLOCK_SIZE bsize, TX_SIZE tx_size,
+ int64_t best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x,
@@ -5381,10 +5382,10 @@
int64_t dists[CFL_PRED_PLANES][CFL_MAGS_SIZE];
mbmi->cfl_alpha_idx = 0;
mbmi->cfl_alpha_signs = CFL_SIGN_ZERO * CFL_SIGNS + CFL_SIGN_POS - 1;
- txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_U,
+ txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_U, best_rd,
&dists[CFL_PRED_U][0], &rates[CFL_PRED_U][0]);
mbmi->cfl_alpha_signs = CFL_SIGN_POS * CFL_SIGNS + CFL_SIGN_ZERO - 1;
- txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_V,
+ txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_V, best_rd,
&dists[CFL_PRED_V][0], &rates[CFL_PRED_V][0]);
for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
@@ -5392,9 +5393,9 @@
for (int sign = CFL_SIGN_NEG; sign < CFL_SIGNS; sign++) {
const int m = c * 2 + 1 + (sign == CFL_SIGN_NEG);
mbmi->cfl_alpha_signs = sign * CFL_SIGNS + sign - 1;
- txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_U,
+ txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_U, best_rd,
&dists[CFL_PRED_U][m], &rates[CFL_PRED_U][m]);
- txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_V,
+ txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_V, best_rd,
&dists[CFL_PRED_V][m], &rates[CFL_PRED_V][m]);
}
}
@@ -5402,9 +5403,9 @@
int64_t dist;
int64_t cost;
int64_t best_cost = INT64_MAX;
- int best_rate_overhead = 0;
+ int best_rate_overhead = INT_MAX;
#if CONFIG_DEBUG
- int best_rate = 0;
+ int best_rate = INT_MAX;
#endif // CONFIG_DEBUG
int ind = 0;
@@ -5416,17 +5417,19 @@
const int size_u = (sign_u == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE;
const int size_v = (sign_v == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE;
for (int u = 0; u < size_u; u++) {
- const int idx_u = (sign_u == CFL_SIGN_ZERO) ? 0 : u * 2 + 1;
+ const int idx_u = ((sign_u == CFL_SIGN_ZERO) ? 0 : u * 2 + 1) +
+ (sign_u == CFL_SIGN_NEG);
+ if (rates[CFL_PRED_U][idx_u] == INT_MAX) continue;
for (int v = 0; v < size_v; v++) {
- const int idx_v = (sign_v == CFL_SIGN_ZERO) ? 0 : v * 2 + 1;
- dist = dists[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
- dists[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
+ const int idx_v = ((sign_v == CFL_SIGN_ZERO) ? 0 : v * 2 + 1) +
+ (sign_v == CFL_SIGN_NEG);
+ if (rates[CFL_PRED_V][idx_v] == INT_MAX) continue;
+ dist = dists[CFL_PRED_U][idx_u] + dists[CFL_PRED_V][idx_v];
int rate_overhead = x->cfl_cost[joint_sign][CFL_PRED_U][u] +
x->cfl_cost[joint_sign][CFL_PRED_V][v];
int rate = x->intra_uv_mode_cost[mbmi->mode][UV_CFL_PRED] +
- rate_overhead +
- rates[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
- rates[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
+ rate_overhead + rates[CFL_PRED_U][idx_u] +
+ rates[CFL_PRED_V][idx_v];
cost = RDCOST(x->rdmult, rate, dist);
if (cost < best_cost) {
best_cost = cost;
@@ -5489,7 +5492,8 @@
if (mode == UV_CFL_PRED) {
assert(!is_directional_mode);
const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]);
- cfl_alpha_rate = cfl_rd_pick_alpha(x, cpi, bsize, uv_tx_size);
+ cfl_alpha_rate = cfl_rd_pick_alpha(x, cpi, bsize, uv_tx_size, best_rd);
+ if (cfl_alpha_rate == INT_MAX) continue;
}
#endif
#if CONFIG_EXT_INTRA