Prune obmc evaluation
Pruned obmc evaluation based on gathered stats from previously encoded
frames. This feature is turned on at speed 4 now.
Ran Borg test at speed 4.
avg_psnr: ovr_psnr: ssim: avg_speedup(whole set)
hdres: 0.051 0.050 0.027 2.6%
midres: 0.137 0.114 0.154 3.4%
STATS_CHANGED
Change-Id: I6677986d8e153933d34cb10c0654dcf763f522a6
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 77d5a3d..9bcc3c7 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1643,6 +1643,24 @@
if (tile_data->allow_update_cdf) {
update_stats(&cpi->common, td, mi_row, mi_col);
}
+
+ // Gather obmc count to update the probability.
+ if (cpi->sf.prune_obmc_using_stats) {
+ const int inter_block = is_inter_block(mbmi);
+ const int seg_ref_active =
+ segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
+ if (!seg_ref_active && inter_block) {
+ const MOTION_MODE motion_allowed =
+ cm->switchable_motion_mode
+ ? motion_mode_allowed(xd->global_motion, xd, mbmi,
+ cm->allow_warped_motion)
+ : SIMPLE_TRANSLATION;
+ if (mbmi->ref_frame[1] != INTRA_FRAME &&
+ motion_allowed == OBMC_CAUSAL) {
+ td->rd_counts.obmc_used[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
+ }
+ }
+ }
}
// TODO(Ravi/Remya): Move this copy function to a better logical place
copy_winner_ref_mode_from_mbmi_ext(x);
@@ -4999,6 +5017,7 @@
av1_zero(*td->counts);
av1_zero(rdc->comp_pred_diff);
av1_zero(rdc->tx_type_used);
+ av1_zero(rdc->obmc_used);
// Reset the flag.
cpi->intrabc_used = 0;
@@ -5297,6 +5316,19 @@
}
}
}
+
+ if (cpi->sf.prune_obmc_using_stats) {
+ const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
+
+ for (i = 0; i < BLOCK_SIZES_ALL; i++) {
+ int sum = 0;
+ for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
+
+ int new_prob = sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
+ cpi->obmc_probs[update_type][i] =
+ (cpi->obmc_probs[update_type][i] + new_prob) >> 1;
+ }
+ }
}
#define CHECK_PRECOMPUTED_REF_FRAME_MAP 0
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 191b0b7..a94467d 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -232,6 +232,20 @@
{ 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }
};
+const int default_obmc_probs[FRAME_UPDATE_TYPES][BLOCK_SIZES_ALL] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 106, 90, 90, 97, 67, 59, 70, 28,
+ 30, 38, 16, 16, 16, 0, 0, 44, 50, 26, 25 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 98, 93, 97, 68, 82, 85, 33, 30,
+ 33, 16, 16, 16, 16, 0, 0, 43, 37, 26, 16 },
+ { 0, 0, 0, 91, 80, 76, 78, 55, 49, 24, 16,
+ 16, 16, 16, 16, 16, 0, 0, 29, 45, 16, 38 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 103, 89, 89, 89, 62, 63, 76, 34,
+ 35, 32, 19, 16, 16, 0, 0, 49, 55, 29, 19 }
+};
+
static INLINE void Scale2Ratio(AOM_SCALING mode, int *hr, int *hs) {
switch (mode) {
case NORMAL:
@@ -5032,6 +5046,12 @@
}
}
+ if (cpi->sf.prune_obmc_using_stats &&
+ cm->current_frame.frame_type == KEY_FRAME) {
+ av1_copy(cpi->obmc_probs, default_obmc_probs);
+ cpi->obmc_probs_thresh = 16;
+ }
+
// Loop variables
int loop_count = 0;
int loop_at_this_size = 0;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 44b765c..3781139 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -609,6 +609,7 @@
int compound_ref_used_flag;
int skip_mode_used_flag;
int tx_type_used[FRAME_UPDATE_TYPES][TX_SIZES_ALL][TX_TYPES];
+ int obmc_used[BLOCK_SIZES_ALL][2];
} RD_COUNTS;
typedef struct ThreadData {
@@ -984,6 +985,8 @@
int64_t vbp_threshold_copy;
BLOCK_SIZE vbp_bsize_min;
+ int obmc_probs[FRAME_UPDATE_TYPES][BLOCK_SIZES_ALL];
+ int obmc_probs_thresh;
int tx_type_probs[FRAME_UPDATE_TYPES][TX_SIZES_ALL][TX_TYPES];
int tx_type_probs_thresh[FRAME_UPDATE_TYPES];
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 6424d63..5741f89 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -35,6 +35,12 @@
td_t->rd_counts.tx_type_used[i][j][k];
}
}
+
+ for (int i = 0; i < BLOCK_SIZES_ALL; i++) {
+ for (int j = 0; j < 2; j++) {
+ td->rd_counts.obmc_used[i][j] += td_t->rd_counts.obmc_used[i][j];
+ }
+ }
}
static AOM_INLINE void update_delta_lf_for_row_mt(AV1_COMP *cpi) {
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 16a9a32..8c456a4 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -9747,7 +9747,12 @@
assert(mbmi->ref_frame[1] != INTRA_FRAME);
}
- if ((cpi->oxcf.enable_obmc == 0 || cpi->sf.use_fast_nonrd_pick_mode) &&
+ const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
+ const int prune_obmc =
+ cpi->sf.prune_obmc_using_stats &&
+ (cpi->obmc_probs[update_type][bsize] < cpi->obmc_probs_thresh);
+ if ((cpi->oxcf.enable_obmc == 0 || cpi->sf.use_fast_nonrd_pick_mode ||
+ prune_obmc) &&
mbmi->motion_mode == OBMC_CAUSAL)
continue;
@@ -11903,8 +11908,11 @@
}
av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
-
- if (cpi->oxcf.enable_obmc) {
+ const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
+ const int prune_obmc =
+ cpi->sf.prune_obmc_using_stats &&
+ (cpi->obmc_probs[update_type][bsize] < cpi->obmc_probs_thresh);
+ if (cpi->oxcf.enable_obmc && !prune_obmc) {
if (check_num_overlappable_neighbors(mbmi) &&
is_motion_variation_allowed_bsize(bsize)) {
int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 7addf4d..22f355f 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -443,6 +443,7 @@
// sf->tx_domain_dist_level = 2;
sf->tx_domain_dist_thres_level = 2;
sf->simple_motion_search_prune_agg = 2;
+ sf->prune_obmc_using_stats = 1;
}
}
@@ -903,6 +904,7 @@
sf->prune_comp_type_by_model_rd = 0;
sf->disable_smooth_intra = 0;
sf->perform_best_rd_based_gating_for_chroma = 0;
+ sf->prune_obmc_using_stats = 0;
if (oxcf->mode == GOOD)
set_good_speed_features_framesize_independent(cpi, sf, speed);
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 2478d97..7cb63c6 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -790,6 +790,9 @@
// frame.
int adaptive_overlay_encoding;
+ // Prune obmc search using previous frame stats.
+ int prune_obmc_using_stats;
+
// Use ALTREF frame in non-RD mode decision.
int use_nonrd_altref_frame;