AV1 RT: Move CurvFit mode selection to run-time
CurvFit model is now disabled because it degrades BDRate to 11% while
giving 5% speedup (on RTC test set, Speed8)
Change-Id: I7f828afe80b559f2f4a69c7ff708f68a69f275cf
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 764bfe0..6e07491 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -34,8 +34,6 @@
#include "av1/encoder/rdopt.h"
#include "av1/encoder/reconinter_enc.h"
-#define _TMP_USE_CURVFIT_ 0
-
extern int g_pick_inter_mode_cnt;
typedef struct {
uint8_t *data;
@@ -451,7 +449,6 @@
}
}
-#if _TMP_USE_CURVFIT_
static void model_rd_with_curvfit(const AV1_COMP *const cpi,
const MACROBLOCK *const x,
BLOCK_SIZE plane_bsize, int plane,
@@ -477,7 +474,9 @@
double rate_f, dist_by_sse_norm_f;
av1_model_rd_curvfit(plane_bsize, sse_norm, xqr, &rate_f,
&dist_by_sse_norm_f);
-
+ // 9.0 gives the best quality gain on a test video
+ // but it likely shall be qstep dependent
+ if (rate_f < 9.0) rate_f = 0.0;
const double dist_f = dist_by_sse_norm_f * sse_norm;
int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5);
int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5);
@@ -495,7 +494,6 @@
if (rate) *rate = rate_i;
if (dist) *dist = dist_i;
}
-#endif
static TX_SIZE calculate_tx_size(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
MACROBLOCKD *const xd, unsigned int var,
@@ -704,13 +702,15 @@
pd->dst.buf, pd->dst.stride, &sse);
xd->mi[0]->tx_size = calculate_tx_size(cpi, bsize, xd, var, sse);
-#if _TMP_USE_CURVFIT_
- model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, &dist);
-#else
- (void)cpi;
- rate = INT_MAX; // this will be overwritten later with block_yrd
- dist = INT_MAX;
-#endif
+ if (cpi->sf.use_modeled_non_rd_cost) {
+ const int bwide = block_size_wide[bsize];
+ const int bhigh = block_size_high[bsize];
+ model_rd_with_curvfit(cpi, x, bsize, AOM_PLANE_Y, sse, bwide * bhigh, &rate,
+ &dist);
+ } else {
+ rate = INT_MAX; // this will be overwritten later with block_yrd
+ dist = INT_MAX;
+ }
*var_y = var;
*sse_y = sse;
x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX);
@@ -1172,10 +1172,8 @@
int rate_mv = 0;
int mode_rd_thresh;
int mode_index;
-#if !_TMP_USE_CURVFIT_
int64_t this_sse;
int is_skippable;
-#endif
int this_early_term = 0;
int skip_this_mv = 0;
int comp_pred = 0;
@@ -1349,17 +1347,20 @@
: av1_broadcast_interp_filter(filter_ref);
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
AOM_PLANE_Y, AOM_PLANE_Y);
-#if !_TMP_USE_CURVFIT_
- if (use_model_yrd_large) {
- model_skip_for_sb_y_large(cpi, bsize, x, xd, &var_y, &sse_y,
- &this_early_term);
- } else {
-#endif
+ if (cpi->sf.use_modeled_non_rd_cost) {
model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
&this_rdc.skip, NULL, &var_y, &sse_y);
-#if !_TMP_USE_CURVFIT_
+
+ } else {
+ if (use_model_yrd_large) {
+ model_skip_for_sb_y_large(cpi, bsize, x, xd, &var_y, &sse_y,
+ &this_early_term);
+ } else {
+ model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
+ &this_rdc.skip, NULL, &var_y, &sse_y);
+ }
}
-#endif
+
if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0) {
sse_zeromv_norm =
sse_y >> (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
@@ -1371,30 +1372,32 @@
const int skip_cost = x->skip_cost[skip_ctx][1];
const int no_skip_cost = x->skip_cost[skip_ctx][0];
if (!this_early_term) {
-#if !_TMP_USE_CURVFIT_
- this_sse = (int64_t)sse_y;
- block_yrd(cpi, x, mi_row, mi_col, &this_rdc, &is_skippable, &this_sse,
- bsize, mi->tx_size);
-#endif
-
- x->skip = this_rdc.skip;
- if (this_rdc.skip) {
- this_rdc.rate = skip_cost;
- } else {
-#if !_TMP_USE_CURVFIT_
- // on CurvFit this condition is checked inside curvfit modeling
- if (RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist) >=
- RDCOST(
- x->rdmult, 0,
- this_sse)) { // this_sse already multiplied by 16 in block_yrd
- x->skip = 1;
+ if (cpi->sf.use_modeled_non_rd_cost) {
+ x->skip = this_rdc.skip;
+ if (this_rdc.skip) {
this_rdc.rate = skip_cost;
- this_rdc.dist = this_sse;
- } else
-#endif
- {
+ } else {
this_rdc.rate += no_skip_cost;
}
+ } else {
+ this_sse = (int64_t)sse_y;
+ block_yrd(cpi, x, mi_row, mi_col, &this_rdc, &is_skippable, &this_sse,
+ bsize, mi->tx_size);
+ x->skip = this_rdc.skip;
+ if (this_rdc.skip) {
+ this_rdc.rate = skip_cost;
+ } else {
+ if (RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist) >=
+ RDCOST(x->rdmult, 0,
+ this_sse)) { // this_sse already multiplied by 16 in
+ // block_yrd
+ x->skip = 1;
+ this_rdc.rate = skip_cost;
+ this_rdc.dist = this_sse;
+ } else {
+ this_rdc.rate += no_skip_cost;
+ }
+ }
}
} else {
x->skip = 1;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 3119f22..b61666b 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -614,6 +614,12 @@
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
sf->tx_size_search_method = USE_FAST_RD;
sf->estimate_motion_for_var_based_partition = 0;
+// TODO(kyslov) Enable when better model is available
+// It gives +5% speedup and 11% overall BDRate degradation
+// So, can not enable now until better CurvFit is there
+#if 0
+ sf->use_modeled_non_rd_cost = 1;
+#endif
}
}
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 45e63e2..c449c8d 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -724,6 +724,9 @@
// Use CNN with luma pixels on source frame on each of the 64x64 subblock to
// perform split/no_split decision on intra-frames.
int intra_cnn_split;
+
+ // Use modeled (currently CurvFit model) RDCost for fast non-RD mode
+ int use_modeled_non_rd_cost;
} SPEED_FEATURES;
struct AV1_COMP;