Add the new coding tool "speed_refs"
This patch will not cause any performance change regardless of whether
speed_refs is on or off.
This coding tool is targeted to speed up the encoder side reference
frame selection process. The essential idea is to have two scanning
passes for each superblock of size 64x64 and this CL lays out the
initial framework but no reference frame selection is done yet:
First scanning pass - To simplify the partition and the mode
candidates (e.g. considering nearestmv / nearmv / zeromv only) and
identify the best reference frame prediction candidates;
Second scanning pass - Use the best reference frame candidate(s)
obtained from the first pass to encode the current superblock.
Change-Id: I11266d468de3077271a5e866eebd341a8014d136
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 98913db..96bcfea 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1979,6 +1979,11 @@
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+#if CONFIG_SPEED_REFS
+ // First scanning pass of an SB is dry run only.
+ if (cpi->sb_scanning_pass_idx == 0) assert(dry_run == DRY_RUN_NORMAL);
+#endif // CONFIG_SPEED_REFS
+
if (!dry_run && ctx >= 0) td->counts->partition[ctx][partition]++;
#if CONFIG_SUPERTX
@@ -3387,6 +3392,17 @@
}
#endif
+#if CONFIG_SPEED_REFS
+ if (cpi->sb_scanning_pass_idx == 0) {
+ // NOTE: For the 1st pass of scanning, check all the subblocks of equal size
+ // only.
+ partition_none_allowed = (bsize == MIN_SPEED_REFS_BLKSIZE);
+ partition_horz_allowed = 0;
+ partition_vert_allowed = 0;
+ do_square_split = (bsize > MIN_SPEED_REFS_BLKSIZE);
+ }
+#endif // CONFIG_SPEED_REFS
+
// PARTITION_NONE
if (partition_none_allowed) {
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
@@ -4117,6 +4133,11 @@
}
#endif // CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_SPEED_REFS
+ // First scanning is done.
+ if (cpi->sb_scanning_pass_idx == 0 && bsize == cm->sb_size) return;
+#endif // CONFIG_SPEED_REFS
+
// TODO(jbb): This code added so that we avoid static analysis
// warning related to the fact that best_rd isn't used after this
// point. This code should be refactored so that the duplicate
@@ -4319,12 +4340,30 @@
rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
&x->min_partition_size, &x->max_partition_size);
}
+#if CONFIG_SPEED_REFS
+ // NOTE: Two scanning passes for the current superblock - the first pass
+ // is only targeted to collect stats.
+ for (int sb_pass_idx = 0; sb_pass_idx < 2; ++sb_pass_idx) {
+ cpi->sb_scanning_pass_idx = sb_pass_idx;
+ rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size,
+ &dummy_rdc,
+#if CONFIG_SUPERTX
+ &dummy_rate_nocoef,
+#endif // CONFIG_SUPERTX
+ INT64_MAX, pc_root);
+ if (sb_pass_idx == 0) {
+ av1_zero(x->pred_mv);
+ pc_root->index = 0;
+ }
+ }
+#else // !CONFIG_SPEED_REFS
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size,
&dummy_rdc,
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
#endif // CONFIG_SUPERTX
INT64_MAX, pc_root);
+#endif // CONFIG_SPEED_REFS
}
}
}
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 6ad1197..1b81a41 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -53,6 +53,10 @@
extern "C" {
#endif
+#if CONFIG_SPEED_REFS
+#define MIN_SPEED_REFS_BLKSIZE BLOCK_16X16
+#endif // CONFIG_SPEED_REFS
+
typedef struct {
int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
int nmv_costs[NMV_CONTEXTS][2][MV_VALS];
@@ -650,6 +654,10 @@
#if CONFIG_LV_MAP
tran_low_t *tcoeff_buf[MAX_MB_PLANE];
#endif
+
+#if CONFIG_SPEED_REFS
+ int sb_scanning_pass_idx;
+#endif // CONFIG_SPEED_REFS
} AV1_COMP;
void av1_initialize_enc(void);
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index e820f19..1f4f161 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -9252,6 +9252,10 @@
*returnrate_nocoef = INT_MAX;
#endif // CONFIG_SUPERTX
+#if CONFIG_SPEED_REFS
+ memset(x->mbmi_ext->ref_mvs, 0, sizeof(x->mbmi_ext->ref_mvs));
+#endif // CONFIG_SPEED_REFS
+
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
x->mbmi_ext->mode_context[ref_frame] = 0;