Speed up wedge mask index search
Bypass secondary motion estimation for each mask index. Reuse the
outcome from the two-side motion search results. This brings back
the encoding speed by another 30%. The compression performance loss
ranges 0.05% - 0.17%.
STATS_CHANGED
Change-Id: I3bdc000082f0d9de342666aa22eee851a8082257
diff --git a/av1/encoder/compound_type.c b/av1/encoder/compound_type.c
index 6f9cb56..cce0ea2 100644
--- a/av1/encoder/compound_type.c
+++ b/av1/encoder/compound_type.c
@@ -1354,9 +1354,9 @@
int_mv tmp_mv[2] = { mbmi->mv[0], mbmi->mv[1] };
int best_rate_mv = *rate_mv;
const int wedge_mask_size = get_wedge_types_lookup(bsize);
-
- int need_mask_search =
- args->wedge_index == -1 || !have_newmv_in_inter_mode(this_mode);
+ int ref_frame = av1_ref_frame_type(mbmi->ref_frame);
+ int need_mask_search = args->wedge_index[ref_frame] == -1 ||
+ !have_newmv_in_inter_mode(this_mode);
for (int wedge_mask = 0; wedge_mask < wedge_mask_size && need_mask_search;
++wedge_mask) {
@@ -1397,18 +1397,20 @@
}
if (need_mask_search) {
- args->wedge_index = best_mask_index;
- args->wedge_sign = best_wedge_sign;
+ if (have_newmv_in_inter_mode(this_mode)) {
+ args->wedge_index[ref_frame] = best_mask_index;
+ args->wedge_sign[ref_frame] = best_wedge_sign;
+ }
} else {
- mbmi->interinter_comp.wedge_index = args->wedge_index;
- mbmi->interinter_comp.wedge_sign = args->wedge_sign;
+ mbmi->interinter_comp.wedge_index = args->wedge_index[ref_frame];
+ mbmi->interinter_comp.wedge_sign = args->wedge_sign[ref_frame];
rs2 = masked_type_cost[cur_type];
rs2 += get_interinter_compound_mask_rate(&x->mode_costs, mbmi);
tmp_rate_mv = av1_interinter_compound_motion_search(cpi, x, cur_mv,
bsize, this_mode);
- best_mask_index = args->wedge_index;
- best_wedge_sign = args->wedge_sign;
+ best_mask_index = args->wedge_index[ref_frame];
+ best_wedge_sign = args->wedge_sign[ref_frame];
tmp_mv[0] = mbmi->mv[0];
tmp_mv[1] = mbmi->mv[1];
best_rate_mv = tmp_rate_mv;
diff --git a/av1/encoder/interp_search.h b/av1/encoder/interp_search.h
index 9a6dcfa..69b6a36 100644
--- a/av1/encoder/interp_search.h
+++ b/av1/encoder/interp_search.h
@@ -126,11 +126,11 @@
/*!
* Estimated wedge index.
*/
- int wedge_index;
+ int wedge_index[MODE_CTX_REF_FRAMES];
/*!
* Estimated wedge sign.
*/
- int wedge_sign;
+ int wedge_sign[MODE_CTX_REF_FRAMES];
/*!
* Estimated diff wtd index.
*/
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 05ee48c..862726d 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -155,131 +155,131 @@
THR_COMP_NEAREST_NEARESTBA,
THR_COMP_NEAR_NEARLA,
+ THR_COMP_NEW_NEWLA,
THR_COMP_NEW_NEARESTLA,
THR_COMP_NEAREST_NEWLA,
THR_COMP_NEW_NEARLA,
THR_COMP_NEAR_NEWLA,
- THR_COMP_NEW_NEWLA,
THR_COMP_GLOBAL_GLOBALLA,
THR_COMP_NEAR_NEARL2A,
+ THR_COMP_NEW_NEWL2A,
THR_COMP_NEW_NEARESTL2A,
THR_COMP_NEAREST_NEWL2A,
THR_COMP_NEW_NEARL2A,
THR_COMP_NEAR_NEWL2A,
- THR_COMP_NEW_NEWL2A,
THR_COMP_GLOBAL_GLOBALL2A,
THR_COMP_NEAR_NEARL3A,
+ THR_COMP_NEW_NEWL3A,
THR_COMP_NEW_NEARESTL3A,
THR_COMP_NEAREST_NEWL3A,
THR_COMP_NEW_NEARL3A,
THR_COMP_NEAR_NEWL3A,
- THR_COMP_NEW_NEWL3A,
THR_COMP_GLOBAL_GLOBALL3A,
THR_COMP_NEAR_NEARGA,
+ THR_COMP_NEW_NEWGA,
THR_COMP_NEW_NEARESTGA,
THR_COMP_NEAREST_NEWGA,
THR_COMP_NEW_NEARGA,
THR_COMP_NEAR_NEWGA,
- THR_COMP_NEW_NEWGA,
THR_COMP_GLOBAL_GLOBALGA,
THR_COMP_NEAR_NEARLB,
+ THR_COMP_NEW_NEWLB,
THR_COMP_NEW_NEARESTLB,
THR_COMP_NEAREST_NEWLB,
THR_COMP_NEW_NEARLB,
THR_COMP_NEAR_NEWLB,
- THR_COMP_NEW_NEWLB,
THR_COMP_GLOBAL_GLOBALLB,
THR_COMP_NEAR_NEARL2B,
+ THR_COMP_NEW_NEWL2B,
THR_COMP_NEW_NEARESTL2B,
THR_COMP_NEAREST_NEWL2B,
THR_COMP_NEW_NEARL2B,
THR_COMP_NEAR_NEWL2B,
- THR_COMP_NEW_NEWL2B,
THR_COMP_GLOBAL_GLOBALL2B,
THR_COMP_NEAR_NEARL3B,
+ THR_COMP_NEW_NEWL3B,
THR_COMP_NEW_NEARESTL3B,
THR_COMP_NEAREST_NEWL3B,
THR_COMP_NEW_NEARL3B,
THR_COMP_NEAR_NEWL3B,
- THR_COMP_NEW_NEWL3B,
THR_COMP_GLOBAL_GLOBALL3B,
THR_COMP_NEAR_NEARGB,
+ THR_COMP_NEW_NEWGB,
THR_COMP_NEW_NEARESTGB,
THR_COMP_NEAREST_NEWGB,
THR_COMP_NEW_NEARGB,
THR_COMP_NEAR_NEWGB,
- THR_COMP_NEW_NEWGB,
THR_COMP_GLOBAL_GLOBALGB,
THR_COMP_NEAR_NEARLA2,
+ THR_COMP_NEW_NEWLA2,
THR_COMP_NEW_NEARESTLA2,
THR_COMP_NEAREST_NEWLA2,
THR_COMP_NEW_NEARLA2,
THR_COMP_NEAR_NEWLA2,
- THR_COMP_NEW_NEWLA2,
THR_COMP_GLOBAL_GLOBALLA2,
THR_COMP_NEAR_NEARL2A2,
+ THR_COMP_NEW_NEWL2A2,
THR_COMP_NEW_NEARESTL2A2,
THR_COMP_NEAREST_NEWL2A2,
THR_COMP_NEW_NEARL2A2,
THR_COMP_NEAR_NEWL2A2,
- THR_COMP_NEW_NEWL2A2,
THR_COMP_GLOBAL_GLOBALL2A2,
THR_COMP_NEAR_NEARL3A2,
+ THR_COMP_NEW_NEWL3A2,
THR_COMP_NEW_NEARESTL3A2,
THR_COMP_NEAREST_NEWL3A2,
THR_COMP_NEW_NEARL3A2,
THR_COMP_NEAR_NEWL3A2,
- THR_COMP_NEW_NEWL3A2,
THR_COMP_GLOBAL_GLOBALL3A2,
THR_COMP_NEAR_NEARGA2,
+ THR_COMP_NEW_NEWGA2,
THR_COMP_NEW_NEARESTGA2,
THR_COMP_NEAREST_NEWGA2,
THR_COMP_NEW_NEARGA2,
THR_COMP_NEAR_NEWGA2,
- THR_COMP_NEW_NEWGA2,
THR_COMP_GLOBAL_GLOBALGA2,
THR_COMP_NEAR_NEARLL2,
+ THR_COMP_NEW_NEWLL2,
THR_COMP_NEW_NEARESTLL2,
THR_COMP_NEAREST_NEWLL2,
THR_COMP_NEW_NEARLL2,
THR_COMP_NEAR_NEWLL2,
- THR_COMP_NEW_NEWLL2,
THR_COMP_GLOBAL_GLOBALLL2,
THR_COMP_NEAR_NEARLL3,
+ THR_COMP_NEW_NEWLL3,
THR_COMP_NEW_NEARESTLL3,
THR_COMP_NEAREST_NEWLL3,
THR_COMP_NEW_NEARLL3,
THR_COMP_NEAR_NEWLL3,
- THR_COMP_NEW_NEWLL3,
THR_COMP_GLOBAL_GLOBALLL3,
THR_COMP_NEAR_NEARLG,
+ THR_COMP_NEW_NEWLG,
THR_COMP_NEW_NEARESTLG,
THR_COMP_NEAREST_NEWLG,
THR_COMP_NEW_NEARLG,
THR_COMP_NEAR_NEWLG,
- THR_COMP_NEW_NEWLG,
THR_COMP_GLOBAL_GLOBALLG,
THR_COMP_NEAR_NEARBA,
+ THR_COMP_NEW_NEWBA,
THR_COMP_NEW_NEARESTBA,
THR_COMP_NEAREST_NEWBA,
THR_COMP_NEW_NEARBA,
THR_COMP_NEAR_NEWBA,
- THR_COMP_NEW_NEWBA,
THR_COMP_GLOBAL_GLOBALBA,
THR_DC,
@@ -2766,8 +2766,6 @@
save_mv[i][1].as_int = INVALID_MV;
}
- args->wedge_index = -1;
- args->wedge_sign = -1;
args->diffwtd_index = -1;
// Main loop of this function. This will iterate over all of the ref mvs
@@ -5350,9 +5348,13 @@
interintra_modes,
{ { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
0,
- 0,
- 0,
+ { 0 },
+ { 0 },
0 };
+ for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) {
+ args.wedge_index[i] = -1;
+ args.wedge_sign[i] = -1;
+ }
// Indicates the appropriate number of simple translation winner modes for
// exhaustive motion mode evaluation
const int max_winner_motion_mode_cand =
diff --git a/av1/encoder/rdopt_utils.h b/av1/encoder/rdopt_utils.h
index e636df8..695aa48 100644
--- a/av1/encoder/rdopt_utils.h
+++ b/av1/encoder/rdopt_utils.h
@@ -87,131 +87,131 @@
{ NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
- { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
{ GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
- { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
{ GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
+ { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
- { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
{ GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+ { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
- { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
- { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
{ GLOBAL_GLOBALMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
- { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
{ GLOBAL_GLOBALMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
- { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
{ GLOBAL_GLOBALMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+ { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
- { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ GLOBAL_GLOBALMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
- { NEW_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
{ GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF2_FRAME } },
{ NEAR_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
+ { NEW_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ NEW_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ NEAREST_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ NEW_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ NEAR_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
- { NEW_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ NEAR_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
+ { NEW_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ NEW_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ NEAREST_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ NEW_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ NEAR_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
- { NEW_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ NEAR_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
+ { NEW_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ NEW_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ NEAR_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
- { NEW_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } },
- { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } },
{ GLOBAL_GLOBALMV, { LAST_FRAME, LAST2_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } },
- { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } },
{ GLOBAL_GLOBALMV, { LAST_FRAME, LAST3_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
- { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
{ GLOBAL_GLOBALMV, { LAST_FRAME, GOLDEN_FRAME } },
{ NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
- { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
{ GLOBAL_GLOBALMV, { BWDREF_FRAME, ALTREF_FRAME } },
// intra modes