Optimization for EXT_INTER + OBMC combination
In the rd loop, check the perf of obmc, whose mv is copied from regular
inter predictor, when wedge interinter is better than regular inter
(previously it will force allow_obmc = 0). The condition of the early
termination before this step is relaxed to avoid skipping too many obmc
predictions. The rates of the overhead are properly calculated for these tools.
The logic of the bitstream syntax:
(a single ref) the interintra flag is sent first, only if it is 0, we
send the obmc flag;
(compound refs) the obmc flag is sent first, only if it is 0, we send
the wedge interinter flag
Coding gain
lowres: 0.428% (2.287%->2.715%)
Change-Id: I5f3a34640b398e313cbf84235c9fe2073eb2173f
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 5cecf79..65a8298 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -700,7 +700,11 @@
#if CONFIG_OBMC
static INLINE int is_obmc_allowed(const MB_MODE_INFO *mbmi) {
+#if CONFIG_EXT_INTER
+ return (mbmi->sb_type >= BLOCK_8X8 && mbmi->ref_frame[1] != INTRA_FRAME);
+#else
return (mbmi->sb_type >= BLOCK_8X8);
+#endif // CONFIG_EXT_INTER
}
static INLINE int is_neighbor_overlappable(const MB_MODE_INFO *mbmi) {
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index b52696d..32ed200 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -1230,14 +1230,6 @@
mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
}
-#if CONFIG_OBMC
- mbmi->obmc = 0;
-#if CONFIG_SUPERTX
- if (!supertx_enabled)
-#endif // CONFIG_SUPERTX
- mbmi->obmc = read_is_obmc_block(cm, xd, r);
-#endif // CONFIG_OBMC
-
#if CONFIG_REF_MV
#if CONFIG_EXT_INTER
if (is_compound)
@@ -1516,12 +1508,8 @@
}
#if CONFIG_EXT_INTER
- mbmi->use_wedge_interintra = 0;
- mbmi->use_wedge_interinter = 0;
+ mbmi->use_wedge_interintra = 0;
if (cm->reference_mode != COMPOUND_REFERENCE &&
-#if CONFIG_OBMC
- !(is_obmc_allowed(mbmi) && mbmi->obmc) &&
-#endif // CONFIG_OBMC
#if CONFIG_SUPERTX
!supertx_enabled &&
#endif
@@ -1557,6 +1545,21 @@
}
}
}
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC
+ mbmi->obmc = 0;
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif // CONFIG_SUPERTX
+#if CONFIG_EXT_INTER
+ if (mbmi->ref_frame[1] != INTRA_FRAME)
+#endif // CONFIG_EXT_INTER
+ mbmi->obmc = read_is_obmc_block(cm, xd, r);
+#endif // CONFIG_OBMC
+
+#if CONFIG_EXT_INTER
+ mbmi->use_wedge_interinter = 0;
if (cm->reference_mode != SINGLE_REFERENCE &&
is_inter_compound_mode(mbmi->mode) &&
#if CONFIG_OBMC
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index da1885d..dccdf3b 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -1115,14 +1115,6 @@
int16_t mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
write_ref_frames(cm, xd, w);
-#if CONFIG_OBMC
-#if CONFIG_SUPERTX
- if (!supertx_enabled)
-#endif // CONFIG_SUPERTX
- if (is_obmc_allowed(mbmi))
- vp10_write(w, mbmi->obmc, cm->fc->obmc_prob[bsize]);
-#endif // CONFIG_OBMC
-
#if CONFIG_REF_MV
#if CONFIG_EXT_INTER
if (is_compound)
@@ -1286,9 +1278,6 @@
#if CONFIG_EXT_INTER
if (cpi->common.reference_mode != COMPOUND_REFERENCE &&
-#if CONFIG_OBMC
- !(is_obmc_allowed(mbmi) && mbmi->obmc) &&
-#endif // CONFIG_OBMC
#if CONFIG_SUPERTX
!supertx_enabled &&
#endif // CONFIG_SUPERTX
@@ -1311,6 +1300,20 @@
}
}
}
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif // CONFIG_SUPERTX
+#if CONFIG_EXT_INTER
+ if (mbmi->ref_frame[1] != INTRA_FRAME)
+#endif // CONFIG_EXT_INTER
+ if (is_obmc_allowed(mbmi))
+ vp10_write(w, mbmi->obmc, cm->fc->obmc_prob[bsize]);
+#endif // CONFIG_OBMC
+
+#if CONFIG_EXT_INTER
if (cpi->common.reference_mode != SINGLE_REFERENCE &&
is_inter_compound_mode(mbmi->mode) &&
#if CONFIG_OBMC
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 6aba475..d93d7f3 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -2008,21 +2008,8 @@
#endif // CONFIG_EXT_REFS
}
-#if CONFIG_OBMC
-#if CONFIG_SUPERTX
- if (!supertx_enabled)
-#endif // CONFIG_SUPERTX
- if (is_obmc_allowed(mbmi))
- counts->obmc[mbmi->sb_type][mbmi->obmc]++;
-#endif // CONFIG_OBMC
- }
- }
-
#if CONFIG_EXT_INTER
if (cm->reference_mode != COMPOUND_REFERENCE &&
-#if CONFIG_OBMC
- !(is_obmc_allowed(mbmi) && mbmi->obmc) &&
-#endif
#if CONFIG_SUPERTX
!supertx_enabled &&
#endif
@@ -2037,16 +2024,32 @@
counts->interintra[bsize_group][0]++;
}
}
- if (cm->reference_mode != SINGLE_REFERENCE &&
- is_inter_compound_mode(mbmi->mode) &&
-#if CONFIG_OBMC
- !(is_obmc_allowed(mbmi) && mbmi->obmc) &&
-#endif // CONFIG_OBMC
- is_interinter_wedge_used(bsize)) {
- counts->wedge_interinter[bsize][mbmi->use_wedge_interinter]++;
- }
#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif // CONFIG_SUPERTX
+#if CONFIG_EXT_INTER
+ if (mbmi->ref_frame[1] != INTRA_FRAME)
+#endif // CONFIG_EXT_INTER
+ if (is_obmc_allowed(mbmi))
+ counts->obmc[mbmi->sb_type][mbmi->obmc]++;
+#endif // CONFIG_OBMC
+
+#if CONFIG_EXT_INTER
+ if (cm->reference_mode != SINGLE_REFERENCE &&
+ is_inter_compound_mode(mbmi->mode) &&
+#if CONFIG_OBMC
+ !(is_obmc_allowed(mbmi) && mbmi->obmc) &&
+#endif // CONFIG_OBMC
+ is_interinter_wedge_used(bsize)) {
+ counts->wedge_interinter[bsize][mbmi->use_wedge_interinter]++;
+ }
+#endif // CONFIG_EXT_INTER
+ }
+ }
+
if (inter_block &&
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
int16_t mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 918ad3e..2b08c46 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -6248,6 +6248,10 @@
int64_t best_distortion = INT64_MAX;
unsigned int best_pred_var = UINT_MAX;
MB_MODE_INFO best_mbmi;
+#if CONFIG_EXT_INTER
+ int rate2_bmc_nocoeff;
+ MB_MODE_INFO best_bmc_mbmi;
+#endif // CONFIG_EXT_INTER
#endif // CONFIG_OBMC
int pred_exists = 0;
@@ -6278,9 +6282,6 @@
assert(!is_comp_interintra_pred || is_interintra_allowed(mbmi));
#endif // CONFIG_EXT_INTER
-#if CONFIG_OBMC
- tmp_rd = 0;
-#endif // CONFIG_OBMC
#if CONFIG_REF_MV
#if CONFIG_EXT_INTER
if (is_comp_pred)
@@ -6643,6 +6644,12 @@
rs = cm->interp_filter == SWITCHABLE ? vp10_get_switchable_rate(cpi, xd) : 0;
#if CONFIG_EXT_INTER
+#if CONFIG_OBMC
+ best_bmc_mbmi = *mbmi;
+ rate2_bmc_nocoeff = *rate2;
+ if (cm->interp_filter == SWITCHABLE)
+ rate2_bmc_nocoeff += rs;
+#endif // CONFIG_OBMC
if (is_comp_pred && is_interinter_wedge_used(bsize)) {
int wedge_index, best_wedge_index = WEDGE_NONE, rs;
int rate_sum;
@@ -6755,7 +6762,6 @@
xd, bsize, mi_row, mi_col, 1, preds1, strides);
for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
mbmi->interinter_wedge_index = wedge_index;
- // vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
vp10_build_wedge_inter_predictor_from_buf(xd, bsize, mi_row, mi_col,
preds0, strides,
preds1, strides);
@@ -6774,12 +6780,8 @@
mbmi->use_wedge_interinter = 0;
}
}
-#if CONFIG_OBMC
- if (mbmi->use_wedge_interinter)
- allow_obmc = 0;
-#endif // CONFIG_OBMC
if (ref_best_rd < INT64_MAX &&
- VPXMIN(best_rd_wedge, best_rd_nowedge) / 2 > ref_best_rd)
+ VPXMIN(best_rd_wedge, best_rd_nowedge) / 3 > ref_best_rd)
return INT64_MAX;
pred_exists = 0;
@@ -7079,7 +7081,6 @@
memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
memcpy(x->bsse, bsse, sizeof(bsse));
-
#if CONFIG_OBMC
best_rd = INT64_MAX;
for (mbmi->obmc = 0; mbmi->obmc <= allow_obmc; mbmi->obmc++) {
@@ -7087,6 +7088,12 @@
int tmp_rate;
if (mbmi->obmc) {
+#if CONFIG_EXT_INTER
+ *mbmi = best_bmc_mbmi;
+ assert(!mbmi->use_wedge_interinter);
+ vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+ mbmi->obmc = 1;
+#endif // CONFIG_EXT_INTER
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0,
NULL, NULL,
dst_buf1, dst_stride1,
@@ -7106,13 +7113,16 @@
x->pred_variance =
vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
#endif // CONFIG_VP9_HIGHBITDEPTH
-
x->skip = 0;
+#if CONFIG_EXT_INTER
+ *rate2 = mbmi->obmc ? rate2_bmc_nocoeff : rate2_nocoeff;
+#else
*rate2 = rate2_nocoeff;
- *distortion = 0;
+#endif // CONFIG_EXT_INTER
if (allow_obmc)
*rate2 += cpi->obmc_cost[bsize][mbmi->obmc];
+ *distortion = 0;
#endif // CONFIG_OBMC
if (!skip_txfm_sb) {
int skippable_y, skippable_uv;
@@ -8472,6 +8482,9 @@
#if CONFIG_EXT_INTER
rate2 += compmode_interintra_cost;
if (cm->reference_mode != SINGLE_REFERENCE && comp_pred)
+#if CONFIG_OBMC
+ if (mbmi->obmc == 0)
+#endif // CONFIG_OBMC
rate2 += compmode_wedge_cost;
#endif // CONFIG_EXT_INTER
@@ -8578,7 +8591,7 @@
*returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd),
mbmi->ref_frame[0] != INTRA_FRAME);
#if CONFIG_OBMC
- if (is_neighbor_overlappable(mbmi) && is_obmc_allowed(mbmi))
+ if (is_inter_block(mbmi) && is_obmc_allowed(mbmi))
*returnrate_nocoef -= cpi->obmc_cost[bsize][mbmi->obmc];
#endif // CONFIG_OBMC
#endif // CONFIG_SUPERTX