RT: fix variance low flag for mode skipping.
Fix that vertical & horizontal partitions were set to same flag.
Used to skip golden ref frame.
Only used on 720p and above and speed 8 for now.
2% speed up and 0.17% quality loss on 720p.
Change-Id: I6c5862d54ec8521a8d9e3cf9aafbcd3b766f6263
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index a8cc8ff..89fd4d3 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -443,6 +443,14 @@
float log_q;
#endif
int thresh_freq_fact[BLOCK_SIZES_ALL][MAX_MODES];
+ // 0 - 128x128
+ // 1-2 - 128x64
+ // 3-4 - 64x128
+ // 5-8 - 64x64
+ // 9-16 - 64x32
+ // 17-24 - 32x64
+ // 25-40 - 32x32
+ // 41-104 - 16x16
uint8_t variance_low[105];
uint8_t content_state_sb;
// Strong color activity detection. Used in REALTIME coding mode to enhance
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 9a1fc14..28002b9 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1170,51 +1170,70 @@
int mi_col, BLOCK_SIZE bsize) {
int force_skip_low_temp_var = 0;
int x, y;
+ x = (mi_col & 0x1F) >> 4;
+ // y = (mi_row & 0x1F) >> 4;
+ // const int idx64 = (y << 1) + x;
+ y = (mi_row & 0x17) >> 3;
+ const int idx64 = y + x;
+
+ x = (mi_col & 0xF) >> 3;
+ // y = (mi_row & 0xF) >> 3;
+ // const int idx32 = (y << 1) + x;
+ y = (mi_row & 0xB) >> 2;
+ const int idx32 = y + x;
+
+ x = (mi_col & 0x7) >> 2;
+ // y = (mi_row & 0x7) >> 2;
+ // const int idx16 = (y << 1) + x;
+ y = (mi_row & 0x5) >> 1;
+ const int idx16 = y + x;
// Set force_skip_low_temp_var based on the block size and block offset.
switch (bsize) {
case BLOCK_128X128: force_skip_low_temp_var = variance_low[0]; break;
+ case BLOCK_128X64:
+ assert((mi_col & 0x1F) == 0);
+ force_skip_low_temp_var = variance_low[1 + ((mi_row & 0x1F) != 0)];
+ break;
+ case BLOCK_64X128:
+ assert((mi_row & 0x1F) == 0);
+ force_skip_low_temp_var = variance_low[3 + ((mi_col & 0x1F) != 0)];
+ break;
case BLOCK_64X64:
+ // Location of this 64x64 block inside the 128x128 superblock
+ force_skip_low_temp_var = variance_low[5 + idx64];
+ break;
+ case BLOCK_64X32:
+ x = (mi_col & 0x1F) >> 4;
+ y = (mi_row & 0x1F) >> 3;
+ /*
+ .---------------.---------------.
+ | x=0,y=0,idx=0 | x=0,y=0,idx=2 |
+ :---------------+---------------:
+ | x=0,y=1,idx=1 | x=1,y=1,idx=3 |
+ :---------------+---------------:
+ | x=0,y=2,idx=4 | x=1,y=2,idx=6 |
+ :---------------+---------------:
+ | x=0,y=3,idx=5 | x=1,y=3,idx=7 |
+ '---------------'---------------'
+ */
+ const int idx64x32 = (x << 1) + (y % 2) + ((y >> 1) << 2);
+ force_skip_low_temp_var = variance_low[9 + idx64x32];
+ break;
+ case BLOCK_32X64:
+ x = (mi_col & 0x1F) >> 3;
+ y = (mi_row & 0x1F) >> 4;
+ const int idx32x64 = (y << 2) + x;
+ force_skip_low_temp_var = variance_low[17 + idx32x64];
+ break;
case BLOCK_32X32:
+ force_skip_low_temp_var = variance_low[25 + (idx64 << 2) + idx32];
+ break;
+ case BLOCK_32X16:
+ case BLOCK_16X32:
case BLOCK_16X16:
- x = mi_col % 32;
- y = mi_row % 32;
- if (bsize == BLOCK_64X64) {
- assert((x == 0 || x == 16) && (y == 0 || y == 16));
- }
- x >>= 4;
- y >>= 4;
- const int idx64 = y * 2 + x;
- if (bsize == BLOCK_64X64) {
- force_skip_low_temp_var = variance_low[1 + idx64];
- break;
- }
-
- x = mi_col % 16;
- y = mi_row % 16;
- if (bsize == BLOCK_32X32) {
- assert((x == 0 || x == 8) && (y == 0 || y == 8));
- }
- x >>= 3;
- y >>= 3;
- const int idx32 = y * 2 + x;
- if (bsize == BLOCK_32X32) {
- force_skip_low_temp_var = variance_low[5 + (idx64 << 2) + idx32];
- break;
- }
-
- x = mi_col % 8;
- y = mi_row % 8;
- if (bsize == BLOCK_16X16) {
- assert((x == 0 || x == 4) && (y == 0 || y == 4));
- }
- x >>= 2;
- y >>= 2;
- const int idx16 = y * 2 + x;
- if (bsize == BLOCK_16X16) {
- force_skip_low_temp_var =
- variance_low[21 + (idx64 << 4) + (idx32 << 2) + idx16];
- break;
- }
+ force_skip_low_temp_var =
+ variance_low[41 + (idx64 << 4) + (idx32 << 2) + idx16];
+ break;
default: break;
}
return force_skip_low_temp_var;
@@ -1542,15 +1561,13 @@
const int mi_row = xd->mi_row;
const int mi_col = xd->mi_col;
- if (cpi->sf.rt_sf.short_circuit_low_temp_var &&
+ const int is_small_sb = (cm->seq_params.sb_size == BLOCK_64X64);
+ if (!is_small_sb && cpi->sf.rt_sf.short_circuit_low_temp_var &&
x->nonrd_prune_ref_frame_search) {
force_skip_low_temp_var =
get_force_skip_low_temp_var(&x->variance_low[0], mi_row, mi_col, bsize);
- // If force_skip_low_temp_var is set, and for short circuit mode = 1 and 3,
- // skip golden reference.
- if ((cpi->sf.rt_sf.short_circuit_low_temp_var == 1 ||
- cpi->sf.rt_sf.short_circuit_low_temp_var == 3) &&
- force_skip_low_temp_var) {
+ // If force_skip_low_temp_var is set, skip golden reference.
+ if (force_skip_low_temp_var) {
usable_ref_frame = LAST_FRAME;
}
}
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index c74526e..2202d37 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -468,51 +468,61 @@
xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
xd->mi[0]->mv[0].as_mv.row < mv_thr &&
xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
- if (xd->mi[0]->sb_type == BLOCK_128X128 ||
- xd->mi[0]->sb_type == BLOCK_64X128 ||
- xd->mi[0]->sb_type == BLOCK_128X64) {
- if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
+ if (xd->mi[0]->sb_type == BLOCK_128X128) {
+ if (vt->part_variances.none.variance < (thresholds[0] >> 1))
x->variance_low[0] = 1;
+ } else if (xd->mi[0]->sb_type == BLOCK_128X64) {
+ for (i = 0; i < 2; i++) {
+ if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
+ x->variance_low[i + 1] = 1;
+ }
+ } else if (xd->mi[0]->sb_type == BLOCK_64X128) {
+ for (i = 0; i < 2; i++) {
+ if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
+ x->variance_low[i + 3] = 1;
+ }
} else {
for (i = 0; i < 4; i++) {
const int idx[4][2] = { { 0, 0 }, { 0, 16 }, { 16, 0 }, { 16, 16 } };
const int idx_str =
cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1];
MB_MODE_INFO **mi_64 = cm->mi_grid_base + idx_str;
-
+ if (*mi_64 == NULL) continue;
if (cm->mi_cols <= mi_col + idx[i][1] ||
cm->mi_rows <= mi_row + idx[i][0])
continue;
-
- if ((*mi_64)->sb_type == BLOCK_64X64 ||
- (*mi_64)->sb_type == BLOCK_64X32 ||
- (*mi_64)->sb_type == BLOCK_32X64) {
- int64_t threshold_64x64 =
- (cpi->sf.rt_sf.short_circuit_low_temp_var == 1 ||
- cpi->sf.rt_sf.short_circuit_low_temp_var == 3)
- ? ((5 * thresholds[1]) >> 3)
- : (thresholds[1] >> 1);
+ const int64_t threshold_64x64 = (5 * thresholds[1]) >> 3;
+ if ((*mi_64)->sb_type == BLOCK_64X64) {
if (vt->split[i].part_variances.none.variance < threshold_64x64)
- x->variance_low[1 + i] = 1;
+ x->variance_low[5 + i] = 1;
+ } else if ((*mi_64)->sb_type == BLOCK_64X32) {
+ for (j = 0; j < 2; j++)
+ if (vt->split[i].part_variances.horz[j].variance <
+ (threshold_64x64 >> 1))
+ x->variance_low[9 + (i << 1) + j] = 1;
+ } else if ((*mi_64)->sb_type == BLOCK_32X64) {
+ for (j = 0; j < 2; j++)
+ if (vt->split[i].part_variances.vert[j].variance <
+ (threshold_64x64 >> 1))
+ x->variance_low[17 + (i << 1) + j] = 1;
} else {
for (k = 0; k < 4; k++) {
const int idx1[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
const int idx_str1 = cm->mi_stride * idx1[k][0] + idx1[k][1];
MB_MODE_INFO **mi_32 = cm->mi_grid_base + idx_str + idx_str1;
+ if (*mi_32 == NULL) continue;
if (cm->mi_cols <= mi_col + idx[i][1] + idx1[k][1] ||
cm->mi_rows <= mi_row + idx[i][0] + idx1[k][0])
continue;
+ const int64_t threshold_32x32 = (5 * thresholds[2]) >> 3;
if ((*mi_32)->sb_type == BLOCK_32X32) {
- int64_t threshold_32x32 =
- (cpi->sf.rt_sf.short_circuit_low_temp_var == 1 ||
- cpi->sf.rt_sf.short_circuit_low_temp_var == 3)
- ? ((5 * thresholds[2]) >> 3)
- : (thresholds[2] >> 1);
if (vt->split[i].split[k].part_variances.none.variance <
threshold_32x32)
- x->variance_low[5 + (i << 2) + k] = 1;
- } else if (cpi->sf.rt_sf.short_circuit_low_temp_var >= 2) {
+ x->variance_low[25 + (i << 2) + k] = 1;
+ } else {
+ // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
+ // inside.
if ((*mi_32)->sb_type == BLOCK_16X16 ||
(*mi_32)->sb_type == BLOCK_32X16 ||
(*mi_32)->sb_type == BLOCK_16X32) {
@@ -521,7 +531,7 @@
.split[k]
.split[j]
.part_variances.none.variance < (thresholds[3] >> 8))
- x->variance_low[21 + (i << 4) + (k << 2) + j] = 1;
+ x->variance_low[41 + (i << 4) + (k << 2) + j] = 1;
}
}
}