AV1 RealTime path. Initial implementation
Change-Id: If264acb0f6df620a2af2fd19706e023eadee5382
diff --git a/aom/aom_encoder.h b/aom/aom_encoder.h
index 60c1c82..f8a7cec 100644
--- a/aom/aom_encoder.h
+++ b/aom/aom_encoder.h
@@ -861,6 +861,11 @@
*/
aom_fixed_buf_t *aom_codec_get_global_headers(aom_codec_ctx_t *ctx);
+/*!\brief usage parameter analogous to AV1 GOOD QUALITY mode. */
+#define AOM_USAGE_GOOD_QUALITY (0)
+/*!\brief usage parameter analogous to AV1 REALTIME mode. */
+#define AOM_USAGE_REALTIME (1)
+
/*!\brief Encode a frame
*
* Encodes a video frame at the given "presentation time." The presentation
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 927e352..c83a99f 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -836,6 +836,18 @@
specialize qw/aom_highbd_sad64x16x4d sse2/;
#
+ # Avg
+ #
+ add_proto qw/unsigned int aom_avg_8x8/, "const uint8_t *, int p";
+ specialize qw/aom_avg_8x8 sse2/;
+
+ add_proto qw/unsigned int aom_avg_4x4/, "const uint8_t *, int p";
+ specialize qw/aom_avg_4x4 sse2/;
+
+ add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
+ specialize qw/aom_minmax_8x8 sse2/;
+
+ #
# hamadard transform and satd for implmenting temporal dependency model
#
add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
diff --git a/aom_dsp/avg.c b/aom_dsp/avg.c
index 4d78c9c..a97cd0f 100644
--- a/aom_dsp/avg.c
+++ b/aom_dsp/avg.c
@@ -14,6 +14,40 @@
#include "config/aom_dsp_rtcd.h"
#include "aom_ports/mem.h"
+void aom_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp,
+ int *min, int *max) {
+ int i, j;
+ *min = 255;
+ *max = 0;
+ for (i = 0; i < 8; ++i, s += p, d += dp) {
+ for (j = 0; j < 8; ++j) {
+ int diff = abs(s[j] - d[j]);
+ *min = diff < *min ? diff : *min;
+ *max = diff > *max ? diff : *max;
+ }
+ }
+}
+
+unsigned int aom_avg_4x4_c(const uint8_t *s, int p) {
+ int i, j;
+ int sum = 0;
+ for (i = 0; i < 4; ++i, s += p)
+ for (j = 0; j < 4; sum += s[j], ++j) {
+ }
+
+ return (sum + 8) >> 4;
+}
+
+unsigned int aom_avg_8x8_c(const uint8_t *s, int p) {
+ int i, j;
+ int sum = 0;
+ for (i = 0; i < 8; ++i, s += p)
+ for (j = 0; j < 8; sum += s[j], ++j) {
+ }
+
+ return (sum + 32) >> 6;
+}
+
// src_diff: first pass, 9 bit, dynamic range [-255, 255]
// second pass, 12 bit, dynamic range [-2040, 2040]
static void hadamard_col8(const int16_t *src_diff, ptrdiff_t src_stride,
diff --git a/aom_dsp/x86/avg_intrin_sse2.c b/aom_dsp/x86/avg_intrin_sse2.c
index 969e4e1..0c20261 100644
--- a/aom_dsp/x86/avg_intrin_sse2.c
+++ b/aom_dsp/x86/avg_intrin_sse2.c
@@ -16,6 +16,129 @@
#include "aom_dsp/x86/bitdepth_conversion_sse2.h"
#include "aom_ports/mem.h"
+void aom_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp,
+ int *min, int *max) {
+ __m128i u0, s0, d0, diff, maxabsdiff, minabsdiff, negdiff, absdiff0, absdiff;
+ u0 = _mm_setzero_si128();
+ // Row 0
+ s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);
+ d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d)), u0);
+ diff = _mm_subs_epi16(s0, d0);
+ negdiff = _mm_subs_epi16(u0, diff);
+ absdiff0 = _mm_max_epi16(diff, negdiff);
+ // Row 1
+ s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);
+ d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + dp)), u0);
+ diff = _mm_subs_epi16(s0, d0);
+ negdiff = _mm_subs_epi16(u0, diff);
+ absdiff = _mm_max_epi16(diff, negdiff);
+ maxabsdiff = _mm_max_epi16(absdiff0, absdiff);
+ minabsdiff = _mm_min_epi16(absdiff0, absdiff);
+ // Row 2
+ s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);
+ d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 2 * dp)), u0);
+ diff = _mm_subs_epi16(s0, d0);
+ negdiff = _mm_subs_epi16(u0, diff);
+ absdiff = _mm_max_epi16(diff, negdiff);
+ maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
+ minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
+ // Row 3
+ s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);
+ d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 3 * dp)), u0);
+ diff = _mm_subs_epi16(s0, d0);
+ negdiff = _mm_subs_epi16(u0, diff);
+ absdiff = _mm_max_epi16(diff, negdiff);
+ maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
+ minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
+ // Row 4
+ s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0);
+ d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 4 * dp)), u0);
+ diff = _mm_subs_epi16(s0, d0);
+ negdiff = _mm_subs_epi16(u0, diff);
+ absdiff = _mm_max_epi16(diff, negdiff);
+ maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
+ minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
+ // Row 5
+ s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0);
+ d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 5 * dp)), u0);
+ diff = _mm_subs_epi16(s0, d0);
+ negdiff = _mm_subs_epi16(u0, diff);
+ absdiff = _mm_max_epi16(diff, negdiff);
+ maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
+ minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
+ // Row 6
+ s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0);
+ d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 6 * dp)), u0);
+ diff = _mm_subs_epi16(s0, d0);
+ negdiff = _mm_subs_epi16(u0, diff);
+ absdiff = _mm_max_epi16(diff, negdiff);
+ maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
+ minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
+ // Row 7
+ s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0);
+ d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 7 * dp)), u0);
+ diff = _mm_subs_epi16(s0, d0);
+ negdiff = _mm_subs_epi16(u0, diff);
+ absdiff = _mm_max_epi16(diff, negdiff);
+ maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
+ minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
+
+ maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_si128(maxabsdiff, 8));
+ maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_epi64(maxabsdiff, 32));
+ maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_epi64(maxabsdiff, 16));
+ *max = _mm_extract_epi16(maxabsdiff, 0);
+
+ minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_si128(minabsdiff, 8));
+ minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_epi64(minabsdiff, 32));
+ minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_epi64(minabsdiff, 16));
+ *min = _mm_extract_epi16(minabsdiff, 0);
+}
+
+unsigned int aom_avg_8x8_sse2(const uint8_t *s, int p) {
+ __m128i s0, s1, u0;
+ unsigned int avg = 0;
+ u0 = _mm_setzero_si128();
+ s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);
+ s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);
+ s0 = _mm_adds_epu16(s0, s1);
+ s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);
+ s0 = _mm_adds_epu16(s0, s1);
+ s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);
+ s0 = _mm_adds_epu16(s0, s1);
+ s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0);
+ s0 = _mm_adds_epu16(s0, s1);
+ s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0);
+ s0 = _mm_adds_epu16(s0, s1);
+ s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0);
+ s0 = _mm_adds_epu16(s0, s1);
+ s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0);
+ s0 = _mm_adds_epu16(s0, s1);
+
+ s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 8));
+ s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 32));
+ s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16));
+ avg = _mm_extract_epi16(s0, 0);
+ return (avg + 32) >> 6;
+}
+
+unsigned int aom_avg_4x4_sse2(const uint8_t *s, int p) {
+ __m128i s0, s1, u0;
+ unsigned int avg = 0;
+ u0 = _mm_setzero_si128();
+ s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);
+ s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);
+ s0 = _mm_adds_epu16(s0, s1);
+ s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);
+ s0 = _mm_adds_epu16(s0, s1);
+ s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);
+ s0 = _mm_adds_epu16(s0, s1);
+
+ s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 4));
+ s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16));
+ avg = _mm_extract_epi16(s0, 0);
+ return (avg + 8) >> 4;
+}
+
static void hadamard_col8_sse2(__m128i *in, int iter) {
__m128i a0 = in[0];
__m128i a1 = in[1];
diff --git a/apps/aomenc.c b/apps/aomenc.c
index 83c1353..0123354 100644
--- a/apps/aomenc.c
+++ b/apps/aomenc.c
@@ -154,6 +154,8 @@
ARG_DEF(NULL, "skip", 1, "Skip the first n input frames");
static const arg_def_t good_dl =
ARG_DEF(NULL, "good", 0, "Use Good Quality Deadline");
+static const arg_def_t rt_dl =
+ ARG_DEF(NULL, "rt", 0, "Use Realtime Quality Deadline");
static const arg_def_t quietarg =
ARG_DEF("q", "quiet", 0, "Do not print encode progress");
static const arg_def_t verbosearg =
@@ -219,6 +221,7 @@
&limit,
&skip,
&good_dl,
+ &rt_dl,
&quietarg,
&verbosearg,
&psnrarg,
@@ -1064,7 +1067,9 @@
} else if (arg_match(&arg, &usage, argi))
global->usage = arg_parse_uint(&arg);
else if (arg_match(&arg, &good_dl, argi))
- warn("Deprecated --good option! Ignoring\n");
+ global->usage = AOM_USAGE_GOOD_QUALITY; // Good quality usage
+ else if (arg_match(&arg, &rt_dl, argi))
+ global->usage = AOM_USAGE_REALTIME; // Real-time usage
else if (arg_match(&arg, &use_yv12, argi))
global->color_type = YV12;
else if (arg_match(&arg, &use_i420, argi))
@@ -1117,11 +1122,19 @@
// Make default AV1 passes = 2 until there is a better quality 1-pass
// encoder
if (global->codec != NULL && global->codec->name != NULL)
- global->passes = (strcmp(global->codec->name, "av1") == 0) ? 2 : 1;
+ global->passes = (strcmp(global->codec->name, "av1") == 0 &&
+ global->usage != AOM_USAGE_REALTIME)
+ ? 2
+ : 1;
#else
global->passes = 1;
#endif
}
+
+ if (global->usage == AOM_USAGE_REALTIME && global->passes > 1) {
+ warn("Enforcing one-pass encoding in realtime mode\n");
+ global->passes = 1;
+ }
}
static void open_input_file(struct AvxInputContext *input,
@@ -1355,6 +1368,12 @@
config->cfg.g_error_resilient = arg_parse_uint(&arg);
} else if (arg_match(&arg, &lag_in_frames, argi)) {
config->cfg.g_lag_in_frames = arg_parse_uint(&arg);
+ if (global->usage == AOM_USAGE_REALTIME &&
+ config->cfg.rc_end_usage == AOM_CBR &&
+ config->cfg.g_lag_in_frames != 0) {
+ warn("non-zero %s option ignored in realtime CBR mode.\n", arg.name);
+ config->cfg.g_lag_in_frames = 0;
+ }
} else if (arg_match(&arg, &large_scale_tile, argi)) {
config->cfg.large_scale_tile = arg_parse_uint(&arg);
if (config->cfg.large_scale_tile) global->codec = get_aom_lst_encoder();
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 0b4901a..e99a2ed 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -194,6 +194,8 @@
"${AOM_ROOT}/av1/encoder/tpl_model.c"
"${AOM_ROOT}/av1/encoder/tpl_model.h"
"${AOM_ROOT}/av1/encoder/wedge_utils.c"
+ "${AOM_ROOT}/av1/encoder/var_based_part.c"
+ "${AOM_ROOT}/av1/encoder/var_based_part.h"
"${AOM_ROOT}/third_party/fastfeat/fast.c"
"${AOM_ROOT}/third_party/fastfeat/fast.h"
"${AOM_ROOT}/third_party/fastfeat/fast_9.c"
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 4e84958..54531e5 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -295,6 +295,7 @@
RANGE_CHECK_HI(extra_cfg, aq_mode, AQ_MODE_COUNT - 1);
RANGE_CHECK_HI(extra_cfg, deltaq_mode, DELTAQ_MODE_COUNT - 1);
RANGE_CHECK_HI(extra_cfg, frame_periodic_boost, 1);
+ RANGE_CHECK_HI(cfg, g_usage, 1);
RANGE_CHECK_HI(cfg, g_threads, MAX_NUM_THREADS);
RANGE_CHECK_HI(cfg, g_lag_in_frames, MAX_LAG_BUFFERS);
RANGE_CHECK(cfg, rc_end_usage, AOM_VBR, AOM_Q);
@@ -498,6 +499,7 @@
oxcf->profile = cfg->g_profile;
oxcf->fwd_kf_enabled = cfg->fwd_kf_enabled;
oxcf->max_threads = (int)cfg->g_threads;
+ oxcf->mode = (cfg->g_usage == 1) ? REALTIME : GOOD;
oxcf->width = cfg->g_w;
oxcf->height = cfg->g_h;
oxcf->forced_max_frame_width = cfg->g_forced_max_frame_width;
@@ -540,7 +542,6 @@
oxcf->init_framerate = 30;
oxcf->timing_info_present = 0;
}
- oxcf->mode = GOOD;
oxcf->cfg = &cfg->cfg;
switch (cfg->g_pass) {
@@ -1507,8 +1508,7 @@
}
}
}
-
- if (ctx->oxcf.mode != GOOD) {
+ if (ctx->oxcf.mode != GOOD && ctx->oxcf.mode != REALTIME) {
ctx->oxcf.mode = GOOD;
av1_change_config(ctx->cpi, &ctx->oxcf);
}
@@ -2093,7 +2093,7 @@
{ 0,
{
// NOLINT
- 0, // g_usage
+ 0, // g_usage - non-realtime usage
0, // g_threads
0, // g_profile
@@ -2158,6 +2158,74 @@
{ 0 }, // tile_heights
{ 1 }, // config file
} },
+ { 1,
+ {
+ // NOLINT
+ 1, // g_usage - real-time usage
+ 0, // g_threads
+ 0, // g_profile
+
+ 320, // g_width
+ 240, // g_height
+ 0, // g_limit
+ 0, // g_forced_max_frame_width
+ 0, // g_forced_max_frame_height
+ AOM_BITS_8, // g_bit_depth
+ 8, // g_input_bit_depth
+
+ { 1, 30 }, // g_timebase
+
+ 0, // g_error_resilient
+
+ AOM_RC_ONE_PASS, // g_pass
+
+ 1, // g_lag_in_frames
+
+ 0, // rc_dropframe_thresh
+ RESIZE_NONE, // rc_resize_mode
+ SCALE_NUMERATOR, // rc_resize_denominator
+ SCALE_NUMERATOR, // rc_resize_kf_denominator
+
+ 0, // rc_superres_mode
+ SCALE_NUMERATOR, // rc_superres_denominator
+ SCALE_NUMERATOR, // rc_superres_kf_denominator
+ 63, // rc_superres_qthresh
+ 32, // rc_superres_kf_qthresh
+
+ AOM_CBR, // rc_end_usage
+ { NULL, 0 }, // rc_twopass_stats_in
+ { NULL, 0 }, // rc_firstpass_mb_stats_in
+ 256, // rc_target_bandwidth
+ 0, // rc_min_quantizer
+ 63, // rc_max_quantizer
+ 25, // rc_undershoot_pct
+ 25, // rc_overshoot_pct
+
+ 6000, // rc_max_buffer_size
+ 4000, // rc_buffer_initial_size
+ 5000, // rc_buffer_optimal_size
+
+ 50, // rc_two_pass_vbrbias
+ 0, // rc_two_pass_vbrmin_section
+ 2000, // rc_two_pass_vbrmax_section
+
+ // keyframing settings (kf)
+ 0, // fwd_kf_enabled
+ AOM_KF_AUTO, // g_kfmode
+ 0, // kf_min_dist
+ 9999, // kf_max_dist
+ 0, // sframe_dist
+ 1, // sframe_mode
+ 0, // large_scale_tile
+ 0, // monochrome
+ 0, // full_still_picture_hdr
+ 0, // save_as_annexb
+ 0, // tile_width_count
+ 0, // tile_height_count
+ { 0 }, // tile_widths
+ { 0 }, // tile_heights
+ { 1 }, // config file
+ } },
};
#ifndef VERSION_STRING
@@ -2181,7 +2249,7 @@
},
{
// NOLINT
- 1, // 1 cfg map
+ 2, // 2 cfg map
encoder_usage_cfg_map, // aom_codec_enc_cfg_map_t
encoder_encode, // aom_codec_encode_fn_t
encoder_get_cxdata, // aom_codec_get_cx_data_fn_t
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 94bd4f0..9774678 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -60,6 +60,7 @@
#include "av1/encoder/reconinter_enc.h"
#include "av1/encoder/segmentation.h"
#include "av1/encoder/tokenize.h"
+#include "av1/encoder/var_based_part.h"
static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
@@ -74,7 +75,7 @@
// purposes of activity masking.
// Eventually this should be replaced by custom no-reference routines,
// which will be faster.
-static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
+const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
@@ -221,18 +222,6 @@
return BLOCK_8X8;
}
-// Lighter version of set_offsets that only sets the mode info
-// pointers.
-static void set_mode_info_offsets(const AV1_COMP *const cpi,
- MACROBLOCK *const x, MACROBLOCKD *const xd,
- int mi_row, int mi_col) {
- const AV1_COMMON *const cm = &cpi->common;
- const int idx_str = xd->mi_stride * mi_row + mi_col;
- xd->mi = cm->mi_grid_visible + idx_str;
- xd->mi[0] = cm->mi + idx_str;
- x->mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
-}
-
static void set_offsets_without_segment_id(const AV1_COMP *const cpi,
const TileInfo *const tile,
MACROBLOCK *const x, int mi_row,
@@ -524,11 +513,11 @@
return av1_edge_exists(ref->buf, ref->stride, width, height, high_bd, bd);
}
-static void rd_pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
- MACROBLOCK *const x, int mi_row, int mi_col,
- RD_STATS *rd_cost, PARTITION_TYPE partition,
- BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
- int64_t best_rd) {
+static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
+ MACROBLOCK *const x, int mi_row, int mi_col,
+ RD_STATS *rd_cost, PARTITION_TYPE partition,
+ BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd, int use_nonrd_pick_mode) {
AV1_COMMON *const cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
TileInfo *const tile_info = &tile_data->tile_info;
@@ -657,8 +646,15 @@
ctx->seg_feat = 1;
#endif
} else {
- av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
- bsize, ctx, best_rd);
+ // TODO(kyslov): do the same for pick_intra_mode and
+ // pick_inter_mode_sb_seg_skip
+ if (use_nonrd_pick_mode) {
+ av1_nonrd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
+ bsize, ctx, best_rd);
+ } else {
+ av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
+ bsize, ctx, best_rd);
+ }
#if CONFIG_ONE_PASS_SVM
ctx->seg_feat = 0;
#endif
@@ -1627,25 +1623,6 @@
update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
}
-// Check to see if the given partition size is allowed for a specified number
-// of mi block rows and columns remaining in the image.
-// If not then return the largest allowed partition size
-static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left,
- int cols_left, int *bh, int *bw) {
- if (rows_left <= 0 || cols_left <= 0) {
- return AOMMIN(bsize, BLOCK_8X8);
- } else {
- for (; bsize > 0; bsize -= 3) {
- *bh = mi_size_high[bsize];
- *bw = mi_size_wide[bsize];
- if ((*bh <= rows_left) && (*bw <= cols_left)) {
- break;
- }
- }
- }
- return bsize;
-}
-
static void set_partial_sb_partition(const AV1_COMMON *const cm,
MB_MODE_INFO *mi, int bh_in, int bw_in,
int mi_rows_remaining,
@@ -1769,8 +1746,8 @@
if (partition != PARTITION_NONE && !splits_below &&
mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
pc_tree->partitioning = PARTITION_NONE;
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
- PARTITION_NONE, bsize, ctx_none, INT64_MAX);
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
+ PARTITION_NONE, bsize, ctx_none, INT64_MAX, 0);
if (none_rdc.rate < INT_MAX) {
none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
@@ -1798,13 +1775,13 @@
}
switch (partition) {
case PARTITION_NONE:
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
- PARTITION_NONE, bsize, ctx_none, INT64_MAX);
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+ PARTITION_NONE, bsize, ctx_none, INT64_MAX, 0);
break;
case PARTITION_HORZ:
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
- PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
- INT64_MAX);
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+ PARTITION_HORZ, subsize, &pc_tree->horizontal[0], INT64_MAX,
+ 0);
if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
mi_row + hbs < cm->mi_rows) {
RD_STATS tmp_rdc;
@@ -1813,9 +1790,9 @@
update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
mi_col, subsize, NULL);
- rd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
- PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
- INT64_MAX);
+ pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
+ PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
+ INT64_MAX, 0);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
av1_invalid_rd_stats(&last_part_rdc);
break;
@@ -1826,9 +1803,9 @@
}
break;
case PARTITION_VERT:
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
- PARTITION_VERT, subsize, &pc_tree->vertical[0],
- INT64_MAX);
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+ PARTITION_VERT, subsize, &pc_tree->vertical[0], INT64_MAX,
+ 0);
if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
mi_col + hbs < cm->mi_cols) {
RD_STATS tmp_rdc;
@@ -1837,9 +1814,9 @@
update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1);
encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
mi_col, subsize, NULL);
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
- PARTITION_VERT, subsize,
- &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX);
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
+ PARTITION_VERT, subsize,
+ &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX, 0);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
av1_invalid_rd_stats(&last_part_rdc);
break;
@@ -1913,9 +1890,9 @@
save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
pc_tree->split[i]->partitioning = PARTITION_NONE;
- rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
- &tmp_rdc, PARTITION_SPLIT, split_subsize,
- &pc_tree->split[i]->none, INT64_MAX);
+ pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
+ PARTITION_SPLIT, split_subsize, &pc_tree->split[i]->none,
+ INT64_MAX, 0);
restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
@@ -1976,6 +1953,186 @@
*dist = chosen_rdc.dist;
}
+// TODO(kyslov): now this is very similar to rd_use_partition (except that
+// doesn't do extra search arounf suggested partitioning)
+// consider passing a flag to select non-rd path (similar to
+// encode_sb_row)
+static void nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
+ TileDataEnc *tile_data, MB_MODE_INFO **mib,
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int *rate, int64_t *dist,
+ int do_recon, PC_TREE *pc_tree) {
+ AV1_COMMON *const cm = &cpi->common;
+ const int num_planes = av1_num_planes(cm);
+ TileInfo *const tile_info = &tile_data->tile_info;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const int bs = mi_size_wide[bsize];
+ const int hbs = bs / 2;
+ int i;
+ const int pl = (bsize >= BLOCK_8X8)
+ ? partition_plane_context(xd, mi_row, mi_col, bsize)
+ : 0;
+ const PARTITION_TYPE partition =
+ (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
+ : PARTITION_NONE;
+ const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
+ RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
+ RD_STATS last_part_rdc;
+ PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+ assert(mi_size_wide[bsize] == mi_size_high[bsize]);
+
+ av1_invalid_rd_stats(&last_part_rdc);
+
+ pc_tree->partitioning = partition;
+
+ xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+ save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
+
+ if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
+ set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
+ x->mb_energy = av1_log_block_var(cpi, x, bsize);
+ }
+
+ for (int b = 0; b < 2; ++b) {
+ pc_tree->horizontal[b].skip_ref_frame_mask = 0;
+ pc_tree->vertical[b].skip_ref_frame_mask = 0;
+ }
+ for (int b = 0; b < 3; ++b) {
+ pc_tree->horizontala[b].skip_ref_frame_mask = 0;
+ pc_tree->horizontalb[b].skip_ref_frame_mask = 0;
+ pc_tree->verticala[b].skip_ref_frame_mask = 0;
+ pc_tree->verticalb[b].skip_ref_frame_mask = 0;
+ }
+ for (int b = 0; b < 4; ++b) {
+ pc_tree->horizontal4[b].skip_ref_frame_mask = 0;
+ pc_tree->vertical4[b].skip_ref_frame_mask = 0;
+ }
+ switch (partition) {
+ case PARTITION_NONE:
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+ PARTITION_NONE, bsize, ctx_none, INT64_MAX, 1);
+ break;
+ case PARTITION_HORZ:
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+ PARTITION_HORZ, subsize, &pc_tree->horizontal[0], INT64_MAX,
+ 1);
+ if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
+ mi_row + hbs < cm->mi_rows) {
+ RD_STATS tmp_rdc;
+ const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
+ av1_init_rd_stats(&tmp_rdc);
+ update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
+ encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
+ mi_col, subsize, NULL);
+ pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
+ PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
+ INT64_MAX, 1);
+ if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
+ av1_invalid_rd_stats(&last_part_rdc);
+ break;
+ }
+ last_part_rdc.rate += tmp_rdc.rate;
+ last_part_rdc.dist += tmp_rdc.dist;
+ last_part_rdc.rdcost += tmp_rdc.rdcost;
+ }
+ break;
+ case PARTITION_VERT:
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+ PARTITION_VERT, subsize, &pc_tree->vertical[0], INT64_MAX,
+ 1);
+ if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
+ mi_col + hbs < cm->mi_cols) {
+ RD_STATS tmp_rdc;
+ const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0];
+ av1_init_rd_stats(&tmp_rdc);
+ update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1);
+ encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
+ mi_col, subsize, NULL);
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
+ PARTITION_VERT, subsize,
+ &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX, 1);
+ if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
+ av1_invalid_rd_stats(&last_part_rdc);
+ break;
+ }
+ last_part_rdc.rate += tmp_rdc.rate;
+ last_part_rdc.dist += tmp_rdc.dist;
+ last_part_rdc.rdcost += tmp_rdc.rdcost;
+ }
+ break;
+ case PARTITION_SPLIT:
+ last_part_rdc.rate = 0;
+ last_part_rdc.dist = 0;
+ last_part_rdc.rdcost = 0;
+ for (i = 0; i < 4; i++) {
+ int x_idx = (i & 1) * hbs;
+ int y_idx = (i >> 1) * hbs;
+ int jj = i >> 1, ii = i & 0x01;
+ RD_STATS tmp_rdc;
+ if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
+ continue;
+
+ av1_init_rd_stats(&tmp_rdc);
+ nonrd_use_partition(
+ cpi, td, tile_data, mib + jj * hbs * cm->mi_stride + ii * hbs, tp,
+ mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate,
+ &tmp_rdc.dist, i != 3, pc_tree->split[i]);
+ if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
+ av1_invalid_rd_stats(&last_part_rdc);
+ break;
+ }
+ last_part_rdc.rate += tmp_rdc.rate;
+ last_part_rdc.dist += tmp_rdc.dist;
+ }
+ break;
+ case PARTITION_VERT_A:
+ case PARTITION_VERT_B:
+ case PARTITION_HORZ_A:
+ case PARTITION_HORZ_B:
+ case PARTITION_HORZ_4:
+ case PARTITION_VERT_4:
+ assert(0 && "Cannot handle extended partition types");
+ default: assert(0); break;
+ }
+
+ if (last_part_rdc.rate < INT_MAX) {
+ last_part_rdc.rate += x->partition_cost[pl][partition];
+ last_part_rdc.rdcost =
+ RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
+ }
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
+
+ // We must have chosen a partitioning and encoding or we'll fail later on.
+ // No other opportunities for success.
+ if (bsize == cm->seq_params.sb_size)
+ assert(last_part_rdc.rate < INT_MAX && last_part_rdc.dist < INT64_MAX);
+
+ if (do_recon) {
+ if (bsize == cm->seq_params.sb_size) {
+ // NOTE: To get estimate for rate due to the tokens, use:
+ // int rate_coeffs = 0;
+ // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
+ // bsize, pc_tree, &rate_coeffs);
+ x->cb_offset = 0;
+ encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
+ pc_tree, NULL);
+ } else {
+ encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
+ pc_tree, NULL);
+ }
+ }
+
+ *rate = last_part_rdc.rate;
+ *dist = last_part_rdc.dist;
+}
+
/* clang-format off */
static const BLOCK_SIZE min_partition_size[BLOCK_SIZES_ALL] = {
BLOCK_4X4, // 4x4
@@ -2405,9 +2562,9 @@
? INT64_MAX
: (best_rdc->rdcost - sum_rdc->rdcost);
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc,
- RTS_X_RATE_NOCOEF_ARG partition, subsize, this_ctx,
- rdcost_remaining);
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc,
+ RTS_X_RATE_NOCOEF_ARG partition, subsize, this_ctx,
+ rdcost_remaining, 0);
if (this_rdc->rate == INT_MAX) {
sum_rdc->rdcost = INT64_MAX;
@@ -2623,8 +2780,8 @@
const int64_t best_remain_rdcost =
best_rdc.rdcost == INT64_MAX ? INT64_MAX
: (best_rdc.rdcost - partition_rd_cost);
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
- PARTITION_NONE, bsize, ctx_none, best_remain_rdcost);
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_NONE,
+ bsize, ctx_none, best_remain_rdcost, 0);
pc_tree->pc_tree_stats.rdcost = ctx_none->rdcost;
pc_tree->pc_tree_stats.skip = ctx_none->skip;
@@ -4103,8 +4260,8 @@
partition_attempts[PARTITION_NONE] += 1;
}
#endif
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
- PARTITION_NONE, bsize, ctx_none, best_remain_rdcost);
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_NONE,
+ bsize, ctx_none, best_remain_rdcost, 0);
pb_source_variance = x->source_variance;
if (none_rd) *none_rd = this_rdc.rdcost;
cur_none_rd = this_rdc.rdcost;
@@ -4433,9 +4590,8 @@
partition_attempts[PARTITION_HORZ] += 1;
}
#endif
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
- PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
- best_remain_rdcost);
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_HORZ,
+ subsize, &pc_tree->horizontal[0], best_remain_rdcost, 0);
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
@@ -4465,9 +4621,9 @@
pc_tree->horizontal[1].pred_interp_filter =
av1_extract_interp_filter(ctx_h->mic.interp_filters, 0);
}
- rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
- PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
- best_rdc.rdcost - sum_rdc.rdcost);
+ pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
+ PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
+ best_rdc.rdcost - sum_rdc.rdcost, 0);
horz_rd[1] = this_rdc.rdcost;
if (this_rdc.rate == INT_MAX) {
@@ -4515,9 +4671,8 @@
partition_attempts[PARTITION_VERT] += 1;
}
#endif
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
- PARTITION_VERT, subsize, &pc_tree->vertical[0],
- best_remain_rdcost);
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_VERT,
+ subsize, &pc_tree->vertical[0], best_remain_rdcost, 0);
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
@@ -4546,9 +4701,9 @@
pc_tree->vertical[1].pred_interp_filter =
av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
}
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
- PARTITION_VERT, subsize, &pc_tree->vertical[1],
- best_rdc.rdcost - sum_rdc.rdcost);
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
+ PARTITION_VERT, subsize, &pc_tree->vertical[1],
+ best_rdc.rdcost - sum_rdc.rdcost, 0);
vert_rd[1] = this_rdc.rdcost;
if (this_rdc.rate == INT_MAX) {
@@ -5531,9 +5686,8 @@
CFL_ALPHABET_SIZE);
}
-static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
- TileDataEnc *tile_data, int mi_row,
- TOKENEXTRA **tp) {
+static void encode_sb_row(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data,
+ int mi_row, TOKENEXTRA **tp, int use_nonrd_mode) {
AV1_COMMON *const cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
const TileInfo *const tile_info = &tile_data->tile_info;
@@ -5642,6 +5796,13 @@
set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
&dummy_rate, &dummy_dist, 1, pc_root);
+ } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
+ use_nonrd_mode) {
+ set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
+ av1_choose_var_based_partitioning(cpi, tile_info, x, mi_row, mi_col);
+ nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
+ &dummy_rate, &dummy_dist, 1, pc_root);
+
} else {
const int orig_rdmult = cpi->rd.RDMULT;
x->cb_rdmult = orig_rdmult;
@@ -5812,7 +5973,7 @@
cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
cpi->tplist[tile_row][tile_col][sb_row_in_tile].start = tok;
- encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
+ encode_sb_row(cpi, td, this_tile, mi_row, &tok, cpi->sf.use_nonrd_pick_mode);
cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop = tok;
cpi->tplist[tile_row][tile_col][sb_row_in_tile].count =
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 10ca859..57cce12 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -71,6 +71,7 @@
#include "av1/encoder/segmentation.h"
#include "av1/encoder/speed_features.h"
#include "av1/encoder/reconinter_enc.h"
+#include "av1/encoder/var_based_part.h"
#define DEFAULT_EXPLICIT_ORDER_HINT_BITS 7
@@ -4729,6 +4730,9 @@
}
av1_set_quantizer(cm, q);
av1_init_quantizer(cpi);
+
+ av1_set_variance_partition_thresholds(cpi, q, 0);
+
// printf("Frame %d/%d: q = %d, frame_type = %d superres_denom = %d\n",
// cm->current_frame.frame_number, cm->show_frame, q,
// cm->current_frame.frame_type, cm->superres_scale_denominator);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 7b0af8f..9942280 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -24,6 +24,8 @@
#include "av1/common/onyxc_int.h"
#include "av1/common/resize.h"
#include "av1/common/timing.h"
+#include "av1/common/blockd.h"
+#include "av1/common/enums.h"
#include "av1/encoder/aq_cyclicrefresh.h"
#include "av1/encoder/av1_quantize.h"
#include "av1/encoder/context_tree.h"
@@ -36,6 +38,7 @@
#include "av1/encoder/rd.h"
#include "av1/encoder/speed_features.h"
#include "av1/encoder/tokenize.h"
+#include "av1/encoder/block.h"
#if CONFIG_INTERNAL_STATS
#include "aom_dsp/ssim.h"
@@ -85,7 +88,10 @@
enum {
// Good Quality Fast Encoding. The encoder balances quality with the amount of
// time it takes to encode the output. Speed setting controls how fast.
- GOOD
+ GOOD,
+ // Realtime Fast Encoding. Will force some restrictions on bitrate
+ // constraints.
+ REALTIME
} UENUM1BYTE(MODE);
enum {
@@ -132,6 +138,16 @@
SUPERRES_MODES
} UENUM1BYTE(SUPERRES_MODE);
+typedef enum {
+ kInvalid = 0,
+ kLowSadLowSumdiff = 1,
+ kLowSadHighSumdiff = 2,
+ kHighSadLowSumdiff = 3,
+ kHighSadHighSumdiff = 4,
+ kLowVarHighSumdiff = 5,
+ kVeryHighSad = 6,
+} CONTENT_STATE_SB;
+
typedef struct TplDepStats {
int64_t intra_cost;
int64_t inter_cost;
@@ -825,6 +841,16 @@
// VARIANCE_AQ segment map refresh
int vaq_refresh;
+ // VAR_BASED_PARTITION thresholds
+ // 0 - threshold_128x128; 1 - threshold_64x64;
+ // 2 - threshold_32x32; 3 - threshold_16x16;
+ // 4 - vbp_threshold_8x8;
+ int64_t vbp_thresholds[5];
+ int64_t vbp_threshold_minmax;
+ int64_t vbp_threshold_sad;
+ int64_t vbp_threshold_copy;
+ BLOCK_SIZE vbp_bsize_min;
+
// Multi-threading
int num_workers;
AVxWorker *workers;
@@ -1113,6 +1139,39 @@
cm->current_frame.frame_type == KEY_FRAME);
}
+// Lighter version of set_offsets that only sets the mode info
+// pointers.
+static INLINE void set_mode_info_offsets(const AV1_COMP *const cpi,
+ MACROBLOCK *const x,
+ MACROBLOCKD *const xd, int mi_row,
+ int mi_col) {
+ const AV1_COMMON *const cm = &cpi->common;
+ const int idx_str = xd->mi_stride * mi_row + mi_col;
+ xd->mi = cm->mi_grid_visible + idx_str;
+ xd->mi[0] = cm->mi + idx_str;
+ x->mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
+}
+
+// Check to see if the given partition size is allowed for a specified number
+// of mi block rows and columns remaining in the image.
+// If not then return the largest allowed partition size
+static INLINE BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left,
+ int cols_left, int *bh, int *bw) {
+ int int_size = (int)bsize;
+ if (rows_left <= 0 || cols_left <= 0) {
+ return AOMMIN(bsize, BLOCK_8X8);
+ } else {
+ for (; int_size > 0; int_size -= 3) {
+ *bh = mi_size_high[int_size];
+ *bw = mi_size_wide[int_size];
+ if ((*bh <= rows_left) && (*bw <= cols_left)) {
+ break;
+ }
+ }
+ }
+ return (BLOCK_SIZE)int_size;
+}
+
// Returns a Sequence Header OBU stored in an aom_fixed_buf_t, or NULL upon
// failure. When a non-NULL aom_fixed_buf_t pointer is returned by this
// function, the memory must be freed by the caller. Both the buf member of the
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 7598d7c..cc7ad97 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -11107,6 +11107,121 @@
}
x->comp_rd_stats_idx = 0;
}
+// TODO(kyslov): now this is very similar to set_params_rd_pick_inter_mode
+// (except that doesn't set ALTREF parameters)
+// consider passing a flag to select non-rd path (similar to
+// encode_sb_row)
+static void set_params_nonrd_pick_inter_mode(
+ const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
+ BLOCK_SIZE bsize, int mi_row, int mi_col, mode_skip_mask_t *mode_skip_mask,
+ int skip_ref_frame_mask, unsigned int ref_costs_single[REF_FRAMES],
+ unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES],
+ struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
+ const AV1_COMMON *const cm = &cpi->common;
+ const int num_planes = av1_num_planes(cm);
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = xd->mi[0];
+ MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+ unsigned char segment_id = mbmi->segment_id;
+ int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
+ MAX_SB_SIZE >> 1 };
+ int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
+ MAX_SB_SIZE >> 1 };
+ int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+
+ for (int i = 0; i < MB_MODE_COUNT; ++i)
+ for (int k = 0; k < REF_FRAMES; ++k) args->single_filter[i][k] = SWITCHABLE;
+
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ int len = sizeof(uint16_t);
+ args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf);
+ args->above_pred_buf[1] =
+ CONVERT_TO_BYTEPTR(x->above_pred_buf + (MAX_SB_SQUARE >> 1) * len);
+ args->above_pred_buf[2] =
+ CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len);
+ args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf);
+ args->left_pred_buf[1] =
+ CONVERT_TO_BYTEPTR(x->left_pred_buf + (MAX_SB_SQUARE >> 1) * len);
+ args->left_pred_buf[2] =
+ CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len);
+ } else {
+ args->above_pred_buf[0] = x->above_pred_buf;
+ args->above_pred_buf[1] = x->above_pred_buf + (MAX_SB_SQUARE >> 1);
+ args->above_pred_buf[2] = x->above_pred_buf + MAX_SB_SQUARE;
+ args->left_pred_buf[0] = x->left_pred_buf;
+ args->left_pred_buf[1] = x->left_pred_buf + (MAX_SB_SQUARE >> 1);
+ args->left_pred_buf[2] = x->left_pred_buf + MAX_SB_SQUARE;
+ }
+
+ av1_collect_neighbors_ref_counts(xd);
+
+ estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single,
+ ref_costs_comp);
+
+ MV_REFERENCE_FRAME ref_frame;
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ x->pred_mv_sad[ref_frame] = INT_MAX;
+ x->mbmi_ext->mode_context[ref_frame] = 0;
+ x->mbmi_ext->compound_mode_context[ref_frame] = 0;
+ mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
+ if (cpi->ref_frame_flags & ref_frame_flag_list[ref_frame]) {
+ if (mbmi->partition != PARTITION_NONE &&
+ mbmi->partition != PARTITION_SPLIT) {
+ if (skip_ref_frame_mask & (1 << ref_frame)) {
+ int skip = 1;
+ for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
+ if (!(skip_ref_frame_mask & (1 << r))) {
+ const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
+ if (rf[0] == ref_frame || rf[1] == ref_frame) {
+ skip = 0;
+ break;
+ }
+ }
+ }
+ if (skip) continue;
+ }
+ }
+ assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
+ setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
+ yv12_mb);
+ }
+ }
+
+ av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
+
+ if (check_num_overlappable_neighbors(mbmi) &&
+ is_motion_variation_allowed_bsize(bsize)) {
+ av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col,
+ args->above_pred_buf, dst_width1,
+ dst_height1, args->above_pred_stride);
+ av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col,
+ args->left_pred_buf, dst_width2,
+ dst_height2, args->left_pred_stride);
+ av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col,
+ 0, num_planes);
+ calc_target_weighted_pred(
+ cm, x, xd, mi_row, mi_col, args->above_pred_buf[0],
+ args->above_pred_stride[0], args->left_pred_buf[0],
+ args->left_pred_stride[0]);
+ }
+
+ init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
+
+ if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
+ x->use_default_intra_tx_type = 1;
+ else
+ x->use_default_intra_tx_type = 0;
+
+ if (cpi->sf.tx_type_search.fast_inter_tx_type_search)
+ x->use_default_inter_tx_type = 1;
+ else
+ x->use_default_inter_tx_type = 0;
+ if (cpi->sf.skip_repeat_interpolation_filter_search) {
+ x->interp_filter_stats_idx[0] = 0;
+ x->interp_filter_stats_idx[1] = 0;
+ }
+}
static void search_palette_mode(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
int mi_col, RD_STATS *rd_cost,
@@ -12582,7 +12697,6 @@
#endif
}
}
-
// In effect only when speed >= 2.
sf_refine_fast_tx_type_search(
cpi, x, mi_row, mi_col, rd_cost, bsize, ctx, search_state.best_mode_index,
@@ -12594,7 +12708,6 @@
search_palette_mode(cpi, x, mi_row, mi_col, rd_cost, ctx, bsize, mbmi, pmi,
ref_costs_single, &search_state);
}
-
search_state.best_mbmode.skip_mode = 0;
if (cm->current_frame.skip_mode_info.skip_mode_flag &&
!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
@@ -12672,6 +12785,535 @@
}
}
+// TODO(kyslov): now this is very similar to av1_rd_pick_inter_mode_sb except:
+// it only checks non-compound mode and
+// it doesn't check palette mode
+// it doesn't refine tx search
+// this function is likely to be heavily modified with nonrd mode
+// decision
+void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
+ MACROBLOCK *x, int mi_row, int mi_col,
+ RD_STATS *rd_cost, BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far) {
+ AV1_COMMON *const cm = &cpi->common;
+ const int num_planes = av1_num_planes(cm);
+ const SPEED_FEATURES *const sf = &cpi->sf;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = xd->mi[0];
+ const struct segmentation *const seg = &cm->seg;
+ PREDICTION_MODE this_mode;
+ unsigned char segment_id = mbmi->segment_id;
+ int i;
+ struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
+ unsigned int ref_costs_single[REF_FRAMES];
+ unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
+ int *comp_inter_cost = x->comp_inter_cost[av1_get_reference_mode_context(xd)];
+ mode_skip_mask_t mode_skip_mask;
+ uint8_t motion_mode_skip_mask = 0; // second pass of single ref modes
+#if CONFIG_ONE_PASS_SVM
+ int temp_y_eob = 0, temp_y_eob_0 = 0, temp_y_eob_1 = 0, temp_y_eob_2 = 0,
+ temp_y_eob_3 = 0;
+ int64_t temp_y_rd = 0, temp_y_rd_0 = 0, temp_y_rd_1 = 0, temp_y_rd_2 = 0,
+ temp_y_rd_3 = 0;
+#endif
+
+ InterModeSearchState search_state;
+ init_inter_mode_search_state(&search_state, cpi, tile_data, x, bsize,
+ best_rd_so_far);
+ INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
+ INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
+ INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
+ };
+ HandleInterModeArgs args = {
+ { NULL }, { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
+ { NULL }, { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1 },
+ NULL, NULL,
+ NULL, search_state.modelled_rd,
+ { { 0 } }, INT_MAX,
+ INT_MAX, search_state.simple_rd,
+ 0, interintra_modes,
+ 1, NULL
+ };
+ for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
+
+ av1_invalid_rd_stats(rd_cost);
+
+ // init params, set frame modes, speed features
+ set_params_nonrd_pick_inter_mode(cpi, x, &args, bsize, mi_row, mi_col,
+ &mode_skip_mask, ctx->skip_ref_frame_mask,
+ ref_costs_single, ref_costs_comp, yv12_mb);
+
+ int64_t best_est_rd = INT64_MAX;
+ // TODO(angiebird): Turn this on when this speed feature is well tested
+ const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
+ const int do_tx_search =
+ !((cpi->sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
+ (cpi->sf.inter_mode_rd_model_estimation == 2 &&
+ x->source_variance < 512));
+ InterModesInfo *inter_modes_info = x->inter_modes_info;
+ inter_modes_info->num = 0;
+
+ int intra_mode_num = 0;
+ int intra_mode_idx_ls[MAX_MODES];
+ int reach_first_comp_mode = 0;
+
+ // Temporary buffers used by handle_inter_mode().
+ uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_obmc_bufs[0]);
+
+ CompoundTypeRdBuffers rd_buffers;
+ alloc_compound_type_rd_buffers(cm, &rd_buffers);
+
+ for (int midx = 0; midx < LAST_SINGLE_REF_MODES + 1; ++midx) {
+ const MODE_DEFINITION *mode_order = &av1_mode_order[midx];
+ this_mode = mode_order->mode;
+ const MV_REFERENCE_FRAME ref_frame = mode_order->ref_frame[0];
+ const MV_REFERENCE_FRAME second_ref_frame = mode_order->ref_frame[1];
+ const int comp_pred = second_ref_frame > INTRA_FRAME;
+
+ if (ref_frame > LAST_FRAME) continue;
+
+ // When single ref motion search ends:
+ // 1st pass: To evaluate single ref RD results and rewind to the beginning;
+ // 2nd pass: To continue with compound ref search.
+ if (sf->prune_single_motion_modes_by_simple_trans) {
+ if (comp_pred && args.single_ref_first_pass) {
+ args.single_ref_first_pass = 0;
+ // Reach the first comp ref mode
+ // Reset midx to start the 2nd pass for single ref motion search
+ midx = -1;
+ motion_mode_skip_mask = analyze_simple_trans_states(cpi, x);
+ continue;
+ }
+ if (!comp_pred) { // single ref mode
+ if (args.single_ref_first_pass) {
+ // clear stats
+ for (int k = 0; k < MAX_REF_MV_SERCH; ++k) {
+ x->simple_rd_state[midx][k].rd_stats.rdcost = INT64_MAX;
+ x->simple_rd_state[midx][k].early_skipped = 0;
+ }
+ } else {
+ if (motion_mode_skip_mask & (1 << ref_frame)) {
+ continue;
+ }
+ }
+ }
+ }
+
+ // Reach the first compound prediction mode
+ if (sf->prune_comp_search_by_single_result > 0 && comp_pred &&
+ reach_first_comp_mode == 0) {
+ analyze_single_states(cpi, &search_state);
+ reach_first_comp_mode = 1;
+ }
+ int64_t this_rd = INT64_MAX;
+ int disable_skip = 0;
+ int rate2 = 0, rate_y = 0, rate_uv = 0;
+ int64_t distortion2 = 0;
+ int skippable = 0;
+ int this_skip2 = 0;
+
+ init_mbmi(mbmi, midx, cm);
+
+ x->skip = 0;
+ set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
+
+ if (inter_mode_compatible_skip(cpi, x, bsize, midx)) continue;
+
+ const int ret = inter_mode_search_order_independent_skip(
+ cpi, ctx, x, bsize, midx, mi_row, mi_col, &mode_skip_mask,
+ &search_state);
+ if (ret == 1) continue;
+ args.skip_motion_mode = (ret == 2);
+
+ if (sf->drop_ref && comp_pred) {
+ if (sf_check_is_drop_ref(mode_order, &search_state)) {
+ continue;
+ }
+ }
+
+ if (search_state.best_rd < search_state.mode_threshold[midx]) continue;
+
+ if (sf->prune_comp_search_by_single_result > 0 && comp_pred) {
+ if (compound_skip_by_single_states(cpi, &search_state, this_mode,
+ ref_frame, second_ref_frame, x))
+ continue;
+ }
+
+ const int ref_frame_cost = comp_pred
+ ? ref_costs_comp[ref_frame][second_ref_frame]
+ : ref_costs_single[ref_frame];
+ const int compmode_cost =
+ is_comp_ref_allowed(mbmi->sb_type) ? comp_inter_cost[comp_pred] : 0;
+ const int real_compmode_cost =
+ cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
+ ? compmode_cost
+ : 0;
+
+ if (comp_pred) {
+ if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
+ search_state.best_mode_index >= 0 &&
+ search_state.best_mbmode.ref_frame[0] == INTRA_FRAME)
+ continue;
+ }
+
+ if (ref_frame == INTRA_FRAME) {
+ if (!cpi->oxcf.enable_smooth_intra &&
+ (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
+ mbmi->mode == SMOOTH_V_PRED))
+ continue;
+ if (!cpi->oxcf.enable_paeth_intra && mbmi->mode == PAETH_PRED) continue;
+ if (sf->adaptive_mode_search > 1)
+ if ((x->source_variance << num_pels_log2_lookup[bsize]) >
+ search_state.best_pred_sse)
+ continue;
+
+ if (this_mode != DC_PRED) {
+ // Only search the oblique modes if the best so far is
+ // one of the neighboring directional modes
+ if ((sf->mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
+ (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
+ if (search_state.best_mode_index >= 0 &&
+ search_state.best_mbmode.ref_frame[0] > INTRA_FRAME)
+ continue;
+ }
+ if (sf->mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
+ if (conditional_skipintra(this_mode, search_state.best_intra_mode))
+ continue;
+ }
+ }
+ }
+
+ // Select prediction reference frames.
+ for (i = 0; i < num_planes; i++) {
+ xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
+ if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
+ }
+
+ if (ref_frame == INTRA_FRAME) {
+ intra_mode_idx_ls[intra_mode_num++] = midx;
+ continue;
+ } else {
+ mbmi->angle_delta[PLANE_TYPE_Y] = 0;
+ mbmi->angle_delta[PLANE_TYPE_UV] = 0;
+ mbmi->filter_intra_mode_info.use_filter_intra = 0;
+ mbmi->ref_mv_idx = 0;
+ int64_t ref_best_rd = search_state.best_rd;
+ {
+ RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
+ av1_init_rd_stats(&rd_stats);
+ rd_stats.rate = rate2;
+
+ // Point to variables that are maintained between loop iterations
+ args.single_newmv = search_state.single_newmv;
+ args.single_newmv_rate = search_state.single_newmv_rate;
+ args.single_newmv_valid = search_state.single_newmv_valid;
+ args.single_comp_cost = real_compmode_cost;
+ args.ref_frame_cost = ref_frame_cost;
+ if (midx < MAX_SINGLE_REF_MODES) {
+ args.simple_rd_state = x->simple_rd_state[midx];
+ }
+ this_rd = handle_inter_mode(
+ cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
+ &disable_skip, mi_row, mi_col, &args, ref_best_rd, tmp_buf,
+ &rd_buffers, &best_est_rd, do_tx_search, inter_modes_info);
+ rate2 = rd_stats.rate;
+ skippable = rd_stats.skip;
+ distortion2 = rd_stats.dist;
+ rate_y = rd_stats_y.rate;
+ rate_uv = rd_stats_uv.rate;
+#if CONFIG_ONE_PASS_SVM
+ av1_unpack_reg_stat(&rd_stats_y, &temp_y_eob, &temp_y_eob_0,
+ &temp_y_eob_1, &temp_y_eob_2, &temp_y_eob_3,
+ &temp_y_rd, &temp_y_rd_0, &temp_y_rd_1,
+ &temp_y_rd_2, &temp_y_rd_3);
+#endif
+ }
+
+ if (sf->prune_comp_search_by_single_result > 0 &&
+ is_inter_singleref_mode(this_mode) && args.single_ref_first_pass) {
+ collect_single_states(x, &search_state, mbmi);
+ }
+
+ if (this_rd == INT64_MAX) continue;
+
+ this_skip2 = mbmi->skip;
+ this_rd = RDCOST(x->rdmult, rate2, distortion2);
+ if (this_skip2) {
+ rate_y = 0;
+ rate_uv = 0;
+ }
+ }
+
+ // Did this mode help.. i.e. is it the new best mode
+ if (this_rd < search_state.best_rd || x->skip) {
+ int mode_excluded = 0;
+ if (comp_pred) {
+ mode_excluded = cm->current_frame.reference_mode == SINGLE_REFERENCE;
+ }
+ if (!mode_excluded) {
+ // Note index of best mode so far
+ search_state.best_mode_index = midx;
+
+ if (ref_frame == INTRA_FRAME) {
+ /* required for left and above block mv */
+ mbmi->mv[0].as_int = 0;
+ } else {
+ search_state.best_pred_sse = x->pred_sse[ref_frame];
+ }
+
+ rd_cost->rate = rate2;
+ rd_cost->dist = distortion2;
+ rd_cost->rdcost = this_rd;
+ search_state.best_rd = this_rd;
+ search_state.best_mbmode = *mbmi;
+ search_state.best_skip2 = this_skip2;
+ search_state.best_mode_skippable = skippable;
+ if (do_tx_search) {
+ // When do_tx_search == 0, handle_inter_mode won't provide correct
+ // rate_y and rate_uv because txfm_search process is replaced by
+ // rd estimation.
+ // Therfore, we should avoid updating best_rate_y and best_rate_uv
+ // here. These two values will be updated when txfm_search is called
+ search_state.best_rate_y =
+ rate_y +
+ x->skip_cost[av1_get_skip_context(xd)][this_skip2 || skippable];
+ search_state.best_rate_uv = rate_uv;
+
+#if CONFIG_ONE_PASS_SVM
+ av1_set_reg_stat(rd_cost, temp_y_eob, temp_y_eob_0, temp_y_eob_1,
+ temp_y_eob_2, temp_y_eob_3, temp_y_rd, temp_y_rd_0,
+ temp_y_rd_1, temp_y_rd_2, temp_y_rd_3);
+#endif
+ }
+ memcpy(ctx->blk_skip, x->blk_skip,
+ sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
+ }
+ }
+
+ /* keep record of best compound/single-only prediction */
+ if (!disable_skip && ref_frame != INTRA_FRAME) {
+ int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
+
+ if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) {
+ single_rate = rate2 - compmode_cost;
+ hybrid_rate = rate2;
+ } else {
+ single_rate = rate2;
+ hybrid_rate = rate2 + compmode_cost;
+ }
+
+ single_rd = RDCOST(x->rdmult, single_rate, distortion2);
+ hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2);
+
+ if (!comp_pred) {
+ if (single_rd < search_state.best_pred_rd[SINGLE_REFERENCE])
+ search_state.best_pred_rd[SINGLE_REFERENCE] = single_rd;
+ } else {
+ if (single_rd < search_state.best_pred_rd[COMPOUND_REFERENCE])
+ search_state.best_pred_rd[COMPOUND_REFERENCE] = single_rd;
+ }
+ if (hybrid_rd < search_state.best_pred_rd[REFERENCE_MODE_SELECT])
+ search_state.best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
+ }
+ if (sf->drop_ref && second_ref_frame == NONE_FRAME) {
+ // Collect data from single ref mode, and analyze data.
+ sf_drop_ref_analyze(&search_state, mode_order, distortion2);
+ }
+
+ if (x->skip && !comp_pred) break;
+ }
+
+ release_compound_type_rd_buffers(&rd_buffers);
+
+ if (!do_tx_search) {
+ inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
+ search_state.best_rd = INT64_MAX;
+
+ int64_t top_est_rd =
+ inter_modes_info->num > 0
+ ? inter_modes_info
+ ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
+ : INT64_MAX;
+ for (int j = 0; j < inter_modes_info->num; ++j) {
+ const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
+ *mbmi = inter_modes_info->mbmi_arr[data_idx];
+ int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
+ if (curr_est_rd * 0.80 > top_est_rd) break;
+
+ const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
+
+ x->skip = 0;
+ set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+
+ // Select prediction reference frames.
+ const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
+ for (i = 0; i < num_planes; i++) {
+ xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
+ if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
+ }
+
+ RD_STATS rd_stats;
+ RD_STATS rd_stats_y;
+ RD_STATS rd_stats_uv;
+
+ av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
+ av1_num_planes(cm) - 1);
+ if (mbmi->motion_mode == OBMC_CAUSAL)
+ av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
+
+ if (!txfm_search(cpi, tile_data, x, bsize, mi_row, mi_col, &rd_stats,
+ &rd_stats_y, &rd_stats_uv, mode_rate,
+ search_state.best_rd)) {
+ continue;
+ } else if (cpi->sf.inter_mode_rd_model_estimation == 1) {
+ const int skip_ctx = av1_get_skip_context(xd);
+ inter_mode_data_push(tile_data, mbmi->sb_type, rd_stats.sse,
+ rd_stats.dist,
+ rd_stats_y.rate + rd_stats_uv.rate +
+ x->skip_cost[skip_ctx][mbmi->skip]);
+ }
+ rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
+
+ if (rd_stats.rdcost < search_state.best_rd) {
+ search_state.best_rd = rd_stats.rdcost;
+ // Note index of best mode so far
+ const int mode_index = get_prediction_mode_idx(
+ mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+ search_state.best_mode_index = mode_index;
+ *rd_cost = rd_stats;
+ search_state.best_rd = rd_stats.rdcost;
+ search_state.best_mbmode = *mbmi;
+ search_state.best_skip2 = mbmi->skip;
+ search_state.best_mode_skippable = rd_stats.skip;
+ search_state.best_rate_y =
+ rd_stats_y.rate +
+ x->skip_cost[av1_get_skip_context(xd)][rd_stats.skip || mbmi->skip];
+ search_state.best_rate_uv = rd_stats_uv.rate;
+ memcpy(ctx->blk_skip, x->blk_skip,
+ sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
+#if CONFIG_ONE_PASS_SVM
+ av1_copy_reg_stat(rd_cost, &rd_stats_y);
+#endif
+ }
+ }
+ }
+
+ for (int j = 0; j < intra_mode_num; ++j) {
+ const int mode_index = intra_mode_idx_ls[j];
+ const MV_REFERENCE_FRAME ref_frame =
+ av1_mode_order[mode_index].ref_frame[0];
+ assert(av1_mode_order[mode_index].ref_frame[1] == NONE_FRAME);
+ assert(ref_frame == INTRA_FRAME);
+ if (sf->skip_intra_in_interframe && search_state.skip_intra_modes) break;
+ init_mbmi(mbmi, mode_index, cm);
+ x->skip = 0;
+ set_ref_ptrs(cm, xd, INTRA_FRAME, NONE_FRAME);
+
+ // Select prediction reference frames.
+ for (i = 0; i < num_planes; i++) {
+ xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
+ }
+
+ RD_STATS intra_rd_stats, intra_rd_stats_y, intra_rd_stats_uv;
+
+ const int ref_frame_cost = ref_costs_single[ref_frame];
+ intra_rd_stats.rdcost = handle_intra_mode(
+ &search_state, cpi, x, bsize, mi_row, mi_col, ref_frame_cost, ctx, 0,
+ &intra_rd_stats, &intra_rd_stats_y, &intra_rd_stats_uv);
+ if (intra_rd_stats.rdcost < search_state.best_rd) {
+ search_state.best_rd = intra_rd_stats.rdcost;
+ // Note index of best mode so far
+ search_state.best_mode_index = mode_index;
+ *rd_cost = intra_rd_stats;
+ search_state.best_rd = intra_rd_stats.rdcost;
+ search_state.best_mbmode = *mbmi;
+ search_state.best_skip2 = 0;
+ search_state.best_mode_skippable = intra_rd_stats.skip;
+ search_state.best_rate_y =
+ intra_rd_stats_y.rate +
+ x->skip_cost[av1_get_skip_context(xd)][intra_rd_stats.skip];
+ search_state.best_rate_uv = intra_rd_stats_uv.rate;
+ memcpy(ctx->blk_skip, x->blk_skip,
+ sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
+#if CONFIG_ONE_PASS_SVM
+ av1_copy_reg_stat(rd_cost, &intra_rd_stats_y);
+#endif
+ }
+ }
+
+ search_state.best_mbmode.skip_mode = 0;
+ if (cm->current_frame.skip_mode_info.skip_mode_flag &&
+ !segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ is_comp_ref_allowed(bsize)) {
+ rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, mi_row, mi_col,
+ yv12_mb);
+ }
+
+ // Make sure that the ref_mv_idx is only nonzero when we're
+ // using a mode which can support ref_mv_idx
+ if (search_state.best_mbmode.ref_mv_idx != 0 &&
+ !(search_state.best_mbmode.mode == NEWMV ||
+ search_state.best_mbmode.mode == NEW_NEWMV ||
+ have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
+ search_state.best_mbmode.ref_mv_idx = 0;
+ }
+
+ if (search_state.best_mode_index < 0 ||
+ search_state.best_rd >= best_rd_so_far) {
+ rd_cost->rate = INT_MAX;
+ rd_cost->rdcost = INT64_MAX;
+ return;
+ }
+
+ assert(
+ (cm->interp_filter == SWITCHABLE) ||
+ (cm->interp_filter ==
+ av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 0)) ||
+ !is_inter_block(&search_state.best_mbmode));
+ assert(
+ (cm->interp_filter == SWITCHABLE) ||
+ (cm->interp_filter ==
+ av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 1)) ||
+ !is_inter_block(&search_state.best_mbmode));
+
+ if (!cpi->rc.is_src_frame_alt_ref)
+ av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
+ sf->adaptive_rd_thresh, bsize,
+ search_state.best_mode_index);
+
+ // macroblock modes
+ *mbmi = search_state.best_mbmode;
+ x->skip |= search_state.best_skip2;
+
+ // Note: this section is needed since the mode may have been forced to
+ // GLOBALMV by the all-zero mode handling of ref-mv.
+ if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
+ // Correct the interp filters for GLOBALMV
+ if (is_nontrans_global_motion(xd, xd->mi[0])) {
+ assert(mbmi->interp_filters ==
+ av1_broadcast_interp_filter(
+ av1_unswitchable_filter(cm->interp_filter)));
+ }
+ }
+
+ for (i = 0; i < REFERENCE_MODES; ++i) {
+ if (search_state.best_pred_rd[i] == INT64_MAX)
+ search_state.best_pred_diff[i] = INT_MIN;
+ else
+ search_state.best_pred_diff[i] =
+ search_state.best_rd - search_state.best_pred_rd[i];
+ }
+
+ x->skip |= search_state.best_mode_skippable;
+
+ assert(search_state.best_mode_index >= 0);
+
+ store_coding_context(x, ctx, search_state.best_mode_index,
+ search_state.best_pred_diff,
+ search_state.best_mode_skippable);
+}
+
void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
TileDataEnc *tile_data, MACROBLOCK *x,
int mi_row, int mi_col,
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 0ce931a..7ba1b18 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -123,6 +123,13 @@
struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far);
+void av1_nonrd_pick_inter_mode_sb(struct AV1_COMP *cpi,
+ struct TileDataEnc *tile_data,
+ struct macroblock *x, int mi_row, int mi_col,
+ struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far);
+
void av1_rd_pick_inter_mode_sb_seg_skip(
const struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
struct macroblock *x, int mi_row, int mi_col, struct RD_STATS *rd_cost,
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index bbb6842..5237b03 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -237,6 +237,7 @@
sf->disable_wedge_search_edge_thresh = 0;
sf->prune_motion_mode_level = 1;
sf->cb_pred_filter_search = 0;
+ sf->use_nonrd_pick_mode = 0;
if (speed >= 1) {
sf->gm_erroradv_type = GM_ERRORADV_TR_1;
@@ -407,6 +408,211 @@
}
}
+// TODO(kyslov): now this is very similar to
+// set_good_speed_features_framesize_independent
+// except it sets non-rd flag on speed8. This function will likely
+// be modified in the future with RT-specific speed features
+static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
+ SPEED_FEATURES *sf,
+ int speed) {
+ AV1_COMMON *const cm = &cpi->common;
+ const int boosted = frame_is_boosted(cpi);
+
+ // Speed 0 for all speed features that give neutral coding performance change.
+ sf->reduce_inter_modes = 1;
+ sf->prune_ext_partition_types_search_level = 1;
+ sf->ml_prune_rect_partition = 1;
+ sf->ml_prune_ab_partition = 1;
+ sf->ml_prune_4_partition = 1;
+ sf->adaptive_txb_search_level = 1;
+ sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
+ sf->model_based_prune_tx_search_level = 1;
+ sf->model_based_post_interp_filter_breakout = 1;
+ sf->model_based_motion_mode_rd_breakout = 1;
+
+ // TODO(debargha): Test, tweak and turn on either 1 or 2
+ sf->inter_mode_rd_model_estimation = 0;
+
+ sf->prune_ref_frame_for_rect_partitions =
+ !(boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame);
+ sf->prune_ref_mode_for_partitions = sf->prune_ref_frame_for_rect_partitions;
+ sf->less_rectangular_check_level = 1;
+ sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3;
+ sf->gm_disable_recode = 1;
+ sf->use_fast_interpolation_filter_search = 1;
+ sf->intra_tx_size_search_init_depth_sqr = 1;
+ sf->intra_angle_estimation = 1;
+ sf->selective_ref_frame = 1;
+ sf->prune_wedge_pred_diff_based = 1;
+ sf->disable_wedge_search_var_thresh = 0;
+ sf->disable_wedge_search_edge_thresh = 0;
+ sf->prune_motion_mode_level = 1;
+ sf->cb_pred_filter_search = 0;
+ sf->use_nonrd_pick_mode = 0;
+
+ if (speed >= 1) {
+ sf->gm_erroradv_type = GM_ERRORADV_TR_1;
+ sf->selective_ref_frame = 2;
+
+ sf->inter_tx_size_search_init_depth_rect = 1;
+ sf->inter_tx_size_search_init_depth_sqr = 1;
+ sf->intra_tx_size_search_init_depth_rect = 1;
+ sf->tx_size_search_lgr_block = 1;
+ sf->prune_ext_partition_types_search_level = 2;
+ sf->skip_repeat_interpolation_filter_search = 1;
+ sf->tx_type_search.skip_tx_search = 1;
+ sf->tx_type_search.ml_tx_split_thresh = 40;
+ sf->model_based_prune_tx_search_level = 0;
+ sf->adaptive_txb_search_level = 2;
+ sf->use_intra_txb_hash = 1;
+ sf->optimize_b_precheck = 1;
+ sf->dual_sgr_penalty_level = 1;
+ sf->use_accurate_subpel_search = USE_4_TAPS;
+ sf->reuse_inter_intra_mode = 1;
+ sf->prune_comp_search_by_single_result = 1;
+ sf->skip_repeated_newmv = 1;
+ sf->obmc_full_pixel_search_level = 1;
+ // TODO(anyone): Following speed feature will be further explored to
+ // identify the appropriate tradeoff between encoder performance and its
+ // speed.
+ sf->prune_single_motion_modes_by_simple_trans = 1;
+
+ sf->simple_motion_search_prune_rect = 1;
+
+ sf->disable_wedge_search_var_thresh = 0;
+ sf->disable_wedge_search_edge_thresh = 0;
+ sf->prune_comp_type_by_comp_avg = 1;
+ sf->prune_motion_mode_level = 2;
+ sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
+ sf->cb_pred_filter_search = 1;
+ sf->use_transform_domain_distortion = boosted ? 0 : 1;
+ }
+
+ if (speed >= 2) {
+ sf->gm_erroradv_type = GM_ERRORADV_TR_2;
+
+ sf->selective_ref_frame = 3;
+ sf->fast_cdef_search = 1;
+
+ sf->adaptive_rd_thresh = 1;
+ sf->mv.auto_mv_step_size = 1;
+ sf->mv.subpel_iters_per_step = 1;
+ sf->disable_filter_search_var_thresh = 100;
+ sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
+
+ sf->partition_search_breakout_rate_thr = 80;
+ // Note: This speed feature is disable as it seems to be worse in
+ // compression/quality and is also slower.
+ // sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
+ sf->allow_partition_search_skip = 1;
+ sf->disable_wedge_search_var_thresh = 100;
+ sf->disable_wedge_search_edge_thresh = 0;
+ sf->fast_wedge_sign_estimate = 1;
+ sf->disable_dual_filter = 1;
+ sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
+ sf->prune_comp_type_by_comp_avg = 2;
+ sf->cb_pred_filter_search = 0;
+ sf->adaptive_interp_filter_search = 1;
+ }
+
+ if (speed >= 3) {
+ sf->selective_ref_frame = 4;
+ sf->tx_size_search_method = boosted ? USE_FULL_RD : USE_LARGESTALL;
+ sf->less_rectangular_check_level = 2;
+ sf->adaptive_pred_interp_filter = 1;
+ // adaptive_motion_search breaks encoder multi-thread tests.
+ // The values in x->pred_mv[] differ for single and multi-thread cases.
+ // See aomedia:1778.
+ // sf->adaptive_motion_search = 1;
+ sf->recode_loop = ALLOW_RECODE_KFARFGF;
+ sf->use_transform_domain_distortion = 1;
+ sf->use_accurate_subpel_search = USE_2_TAPS;
+ sf->adaptive_rd_thresh = 2;
+ sf->tx_type_search.prune_mode = PRUNE_2D_FAST;
+ sf->gm_search_type = GM_DISABLE_SEARCH;
+ sf->prune_comp_search_by_single_result = 2;
+ sf->prune_motion_mode_level = boosted ? 2 : 3;
+ sf->prune_warp_using_wmtype = 1;
+ // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
+ // it with cpi->sf.disable_wedge_search_var_thresh.
+ sf->disable_wedge_interintra_search = 1;
+ }
+
+ if (speed >= 4) {
+ sf->use_intra_txb_hash = 0;
+ sf->use_mb_rd_hash = 0;
+ sf->tx_type_search.fast_intra_tx_type_search = 1;
+ sf->tx_type_search.fast_inter_tx_type_search = 1;
+ sf->use_square_partition_only_threshold =
+ boosted ? BLOCK_128X128 : BLOCK_4X4;
+ sf->tx_size_search_method =
+ frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
+ sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
+ sf->adaptive_pred_interp_filter = 0;
+ sf->adaptive_mode_search = 1;
+ sf->cb_partition_search = !boosted;
+ sf->alt_ref_search_fp = 1;
+ sf->skip_sharp_interp_filter_search = 1;
+ }
+
+ if (speed >= 5) {
+ sf->recode_loop = ALLOW_RECODE_KFMAXBW;
+ sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
+ sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
+ sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
+ sf->use_square_partition_only_threshold = BLOCK_4X4;
+ sf->tx_size_search_method = USE_LARGESTALL;
+ sf->mv.search_method = BIGDIA;
+ sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
+ sf->adaptive_rd_thresh = 4;
+ sf->mode_search_skip_flags =
+ (cm->current_frame.frame_type == KEY_FRAME)
+ ? 0
+ : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
+ FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
+ FLAG_EARLY_TERMINATE;
+ sf->disable_filter_search_var_thresh = 200;
+ sf->use_fast_coef_costing = 1;
+ sf->partition_search_breakout_rate_thr = 300;
+ sf->use_transform_domain_distortion = 2;
+ }
+
+ if (speed >= 6) {
+ int i;
+ sf->optimize_coefficients = NO_TRELLIS_OPT;
+ sf->mv.search_method = HEX;
+ sf->disable_filter_search_var_thresh = 500;
+ for (i = 0; i < TX_SIZES; ++i) {
+ sf->intra_y_mode_mask[i] = INTRA_DC;
+ sf->intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
+ }
+ sf->partition_search_breakout_rate_thr = 500;
+ sf->mv.reduce_first_step_size = 1;
+ sf->simple_model_rd_from_var = 1;
+ }
+ if (speed >= 7) {
+ sf->default_max_partition_size = BLOCK_32X32;
+ sf->default_min_partition_size = BLOCK_8X8;
+ sf->intra_y_mode_mask[TX_64X64] = INTRA_DC;
+ sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
+ sf->frame_parameter_update = 0;
+ sf->mv.search_method = FAST_HEX;
+ sf->partition_search_type = REFERENCE_PARTITION;
+ sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
+ }
+ if (speed >= 8) {
+ sf->mv.search_method = FAST_DIAMOND;
+ sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
+ sf->default_max_partition_size = BLOCK_128X128;
+ sf->default_min_partition_size = BLOCK_8X8;
+ sf->partition_search_type = VAR_BASED_PARTITION;
+ sf->use_nonrd_pick_mode = 1;
+ }
+}
+
void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
SPEED_FEATURES *const sf = &cpi->sf;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
@@ -570,12 +776,13 @@
sf->disable_interinter_wedge_newmv_search = 0;
sf->prune_motion_mode_level = 0;
sf->prune_warp_using_wmtype = 0;
-
sf->disable_wedge_interintra_search = 0;
sf->perform_coeff_opt = 0;
if (oxcf->mode == GOOD)
set_good_speed_features_framesize_independent(cpi, sf, speed);
+ else if (oxcf->mode == REALTIME)
+ set_rt_speed_features_framesize_independent(cpi, sf, speed);
if (!cpi->seq_params_locked) {
cpi->common.seq_params.enable_dual_filter &= !sf->disable_dual_filter;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 2216c7b..b055904 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -191,7 +191,9 @@
// Always use a fixed size partition
FIXED_PARTITION,
- REFERENCE_PARTITION
+ REFERENCE_PARTITION,
+
+ VAR_BASED_PARTITION
} UENUM1BYTE(PARTITION_SEARCH_TYPE);
enum {
@@ -652,6 +654,9 @@
// Flag used to control the extent of coeff R-D optimization
int perform_coeff_opt;
+
+ // This flag controls the use of non-RD mode decision.
+ int use_nonrd_pick_mode;
} SPEED_FEATURES;
struct AV1_COMP;
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
new file mode 100644
index 0000000..7f19344
--- /dev/null
+++ b/av1/encoder/var_based_part.c
@@ -0,0 +1,778 @@
+/*
+ * Copyright (c) 2019, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <limits.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/binary_codes_writer.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/aom_timer.h"
+#include "aom_ports/system_state.h"
+
+#include "av1/common/reconinter.h"
+#include "av1/common/blockd.h"
+
+#include "av1/encoder/encodeframe.h"
+#include "av1/encoder/var_based_part.h"
+#include "av1/encoder/reconinter_enc.h"
+
+extern const uint8_t AV1_VAR_OFFS[];
+
+typedef struct {
+ // TODO(kyslov): consider changing to 64bit
+
+ // This struct is used for computing variance in choose_partitioning(), where
+ // the max number of samples within a superblock is 32x32 (with 4x4 avg).
+ // With 8bit bitdepth, uint32_t is enough for sum_square_error (2^8 * 2^8 * 32
+ // * 32 = 2^26). For high bitdepth we need to consider changing this to 64 bit
+ uint32_t sum_square_error;
+ int32_t sum_error;
+ int log2_count;
+ int variance;
+} var;
+
+typedef struct {
+ var none;
+ var horz[2];
+ var vert[2];
+} partition_variance;
+
+typedef struct {
+ partition_variance part_variances;
+ var split[4];
+} v4x4;
+
+typedef struct {
+ partition_variance part_variances;
+ v4x4 split[4];
+} v8x8;
+
+typedef struct {
+ partition_variance part_variances;
+ v8x8 split[4];
+} v16x16;
+
+typedef struct {
+ partition_variance part_variances;
+ v16x16 split[4];
+} v32x32;
+
+typedef struct {
+ partition_variance part_variances;
+ v32x32 split[4];
+} v64x64;
+
+typedef struct {
+ partition_variance part_variances;
+ v64x64 split[4];
+} v128x128;
+
+typedef struct {
+ partition_variance *part_variances;
+ var *split[4];
+} variance_node;
+
+static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
+ int i;
+ node->part_variances = NULL;
+ switch (bsize) {
+ case BLOCK_128X128: {
+ v128x128 *vt = (v128x128 *)data;
+ node->part_variances = &vt->part_variances;
+ for (i = 0; i < 4; i++)
+ node->split[i] = &vt->split[i].part_variances.none;
+ break;
+ }
+ case BLOCK_64X64: {
+ v64x64 *vt = (v64x64 *)data;
+ node->part_variances = &vt->part_variances;
+ for (i = 0; i < 4; i++)
+ node->split[i] = &vt->split[i].part_variances.none;
+ break;
+ }
+ case BLOCK_32X32: {
+ v32x32 *vt = (v32x32 *)data;
+ node->part_variances = &vt->part_variances;
+ for (i = 0; i < 4; i++)
+ node->split[i] = &vt->split[i].part_variances.none;
+ break;
+ }
+ case BLOCK_16X16: {
+ v16x16 *vt = (v16x16 *)data;
+ node->part_variances = &vt->part_variances;
+ for (i = 0; i < 4; i++)
+ node->split[i] = &vt->split[i].part_variances.none;
+ break;
+ }
+ case BLOCK_8X8: {
+ v8x8 *vt = (v8x8 *)data;
+ node->part_variances = &vt->part_variances;
+ for (i = 0; i < 4; i++)
+ node->split[i] = &vt->split[i].part_variances.none;
+ break;
+ }
+ default: {
+ v4x4 *vt = (v4x4 *)data;
+ assert(bsize == BLOCK_4X4);
+ node->part_variances = &vt->part_variances;
+ for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
+ break;
+ }
+ }
+}
+
+// Set variance values given sum square error, sum error, count.
+static void fill_variance(uint32_t s2, int32_t s, int c, var *v) {
+ v->sum_square_error = s2;
+ v->sum_error = s;
+ v->log2_count = c;
+}
+
+static void get_variance(var *v) {
+ v->variance =
+ (int)(256 * (v->sum_square_error -
+ (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
+ v->log2_count)) >>
+ v->log2_count);
+}
+
+static void sum_2_variances(const var *a, const var *b, var *r) {
+ assert(a->log2_count == b->log2_count);
+ fill_variance(a->sum_square_error + b->sum_square_error,
+ a->sum_error + b->sum_error, a->log2_count + 1, r);
+}
+
+static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
+ variance_node node;
+ memset(&node, 0, sizeof(node));
+ tree_to_node(data, bsize, &node);
+ sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
+ sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
+ sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
+ sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
+ sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
+ &node.part_variances->none);
+}
+
+static void set_block_size(AV1_COMP *const cpi, MACROBLOCK *const x,
+ MACROBLOCKD *const xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
+ set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+ xd->mi[0]->sb_type = bsize;
+ }
+}
+
+static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x,
+ MACROBLOCKD *const xd, void *data,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int64_t threshold, BLOCK_SIZE bsize_min,
+ int force_split) {
+ AV1_COMMON *const cm = &cpi->common;
+ variance_node vt;
+ const int block_width = mi_size_wide[bsize];
+ const int block_height = mi_size_high[bsize];
+
+ assert(block_height == block_width);
+ tree_to_node(data, bsize, &vt);
+
+ if (force_split == 1) return 0;
+
+ // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
+ // variance is below threshold, otherwise split will be selected.
+ // No check for vert/horiz split as too few samples for variance.
+ if (bsize == bsize_min) {
+ // Variance already computed to set the force_split.
+ if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
+ if (mi_col + block_width / 2 < cm->mi_cols &&
+ mi_row + block_height / 2 < cm->mi_rows &&
+ vt.part_variances->none.variance < threshold) {
+ set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
+ return 1;
+ }
+ return 0;
+ } else if (bsize > bsize_min) {
+ // Variance already computed to set the force_split.
+ if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
+ // For key frame: take split for bsize above 32X32 or very high variance.
+ if (frame_is_intra_only(cm) &&
+ (bsize > BLOCK_32X32 ||
+ vt.part_variances->none.variance > (threshold << 4))) {
+ return 0;
+ }
+ // If variance is low, take the bsize (no split).
+ if (mi_col + block_width / 2 < cm->mi_cols &&
+ mi_row + block_height / 2 < cm->mi_rows &&
+ vt.part_variances->none.variance < threshold) {
+ set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
+ return 1;
+ }
+
+ // Check vertical split.
+ if (mi_row + block_height / 2 < cm->mi_rows) {
+ BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT);
+ get_variance(&vt.part_variances->vert[0]);
+ get_variance(&vt.part_variances->vert[1]);
+ if (vt.part_variances->vert[0].variance < threshold &&
+ vt.part_variances->vert[1].variance < threshold &&
+ get_plane_block_size(subsize, xd->plane[1].subsampling_x,
+ xd->plane[1].subsampling_y) < BLOCK_INVALID) {
+ set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
+ set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
+ return 1;
+ }
+ }
+ // Check horizontal split.
+ if (mi_col + block_width / 2 < cm->mi_cols) {
+ BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
+ get_variance(&vt.part_variances->horz[0]);
+ get_variance(&vt.part_variances->horz[1]);
+ if (vt.part_variances->horz[0].variance < threshold &&
+ vt.part_variances->horz[1].variance < threshold &&
+ get_plane_block_size(subsize, xd->plane[1].subsampling_x,
+ xd->plane[1].subsampling_y) < BLOCK_INVALID) {
+ set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
+ set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
+ return 1;
+ }
+ }
+
+ return 0;
+ }
+ return 0;
+}
+
+static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
+ int dp, int x16_idx, int y16_idx, v16x16 *vst,
+ int pixels_wide, int pixels_high,
+ int is_key_frame) {
+ int k;
+ for (k = 0; k < 4; k++) {
+ int x8_idx = x16_idx + ((k & 1) << 3);
+ int y8_idx = y16_idx + ((k >> 1) << 3);
+ unsigned int sse = 0;
+ int sum = 0;
+ if (x8_idx < pixels_wide && y8_idx < pixels_high) {
+ int s_avg;
+ int d_avg = 128;
+ s_avg = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp);
+ if (!is_key_frame) d_avg = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp);
+
+ sum = s_avg - d_avg;
+ sse = sum * sum;
+ }
+ fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
+ }
+}
+
+static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
+ int dp, int x16_idx, int y16_idx, int pixels_wide,
+ int pixels_high) {
+ int k;
+ int minmax_max = 0;
+ int minmax_min = 255;
+ // Loop over the 4 8x8 subblocks.
+ for (k = 0; k < 4; k++) {
+ int x8_idx = x16_idx + ((k & 1) << 3);
+ int y8_idx = y16_idx + ((k >> 1) << 3);
+ int min = 0;
+ int max = 0;
+ if (x8_idx < pixels_wide && y8_idx < pixels_high) {
+ aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
+ &min, &max);
+ if ((max - min) > minmax_max) minmax_max = (max - min);
+ if ((max - min) < minmax_min) minmax_min = (max - min);
+ }
+ }
+ return (minmax_max - minmax_min);
+}
+
+static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
+ int dp, int x8_idx, int y8_idx, v8x8 *vst,
+ int pixels_wide, int pixels_high,
+ int is_key_frame) {
+ int k;
+ for (k = 0; k < 4; k++) {
+ int x4_idx = x8_idx + ((k & 1) << 2);
+ int y4_idx = y8_idx + ((k >> 1) << 2);
+ unsigned int sse = 0;
+ int sum = 0;
+ if (x4_idx < pixels_wide && y4_idx < pixels_high) {
+ int s_avg;
+ int d_avg = 128;
+ s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+ if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp);
+ sum = s_avg - d_avg;
+ sse = sum * sum;
+ }
+ fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
+ }
+}
+
+static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed,
+ int width, int height,
+ int content_state) {
+ if (speed >= 8) {
+ if (width <= 640 && height <= 480)
+ return (5 * threshold_base) >> 2;
+ else if ((content_state == kLowSadLowSumdiff) ||
+ (content_state == kHighSadLowSumdiff) ||
+ (content_state == kLowVarHighSumdiff))
+ return (5 * threshold_base) >> 2;
+ } else if (speed == 7) {
+ if ((content_state == kLowSadLowSumdiff) ||
+ (content_state == kHighSadLowSumdiff) ||
+ (content_state == kLowVarHighSumdiff)) {
+ return (5 * threshold_base) >> 2;
+ }
+ }
+ return threshold_base;
+}
+
+// Set the variance split thresholds for following the block sizes:
+// 0 - threshold_128x128, 1 - threshold_64x64, 2 - threshold_32x32,
+// 3 - vbp_threshold_16x16. 4 - vbp_threshold_8x8 (to split to 4x4 partition) is
+// currently only used on key frame.
+static void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[], int q,
+ int content_state) {
+ AV1_COMMON *const cm = &cpi->common;
+ const int is_key_frame = frame_is_intra_only(cm);
+ const int threshold_multiplier = is_key_frame ? 40 : 1;
+ int64_t threshold_base =
+ (int64_t)(threshold_multiplier * cpi->dequants.y_dequant_QTX[q][1]);
+
+ if (is_key_frame) {
+ thresholds[0] = threshold_base;
+ thresholds[1] = threshold_base;
+ thresholds[2] = threshold_base >> 2;
+ thresholds[3] = threshold_base >> 2;
+ thresholds[4] = threshold_base << 2;
+ } else {
+ // Increase base variance threshold based on content_state/sum_diff level.
+ threshold_base = scale_part_thresh_sumdiff(
+ threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state);
+
+ thresholds[1] = threshold_base;
+ thresholds[3] = threshold_base << cpi->oxcf.speed;
+ if (cm->width >= 1280 && cm->height >= 720)
+ thresholds[3] = thresholds[3] << 1;
+ if (cm->width <= 352 && cm->height <= 288) {
+ thresholds[1] = threshold_base >> 3;
+ thresholds[2] = threshold_base >> 1;
+ thresholds[3] = threshold_base << 3;
+ } else if (cm->width < 1280 && cm->height < 720) {
+ thresholds[2] = (5 * threshold_base) >> 2;
+ } else if (cm->width < 1920 && cm->height < 1080) {
+ thresholds[2] = threshold_base << 1;
+ thresholds[3] <<= 2;
+ } else {
+ thresholds[2] = (5 * threshold_base) >> 1;
+ }
+ }
+}
+
+void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q,
+ int content_state) {
+ AV1_COMMON *const cm = &cpi->common;
+ SPEED_FEATURES *const sf = &cpi->sf;
+ const int is_key_frame = frame_is_intra_only(cm);
+ if (sf->partition_search_type != VAR_BASED_PARTITION) {
+ return;
+ } else {
+ set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state);
+ // The thresholds below are not changed locally.
+ if (is_key_frame) {
+ cpi->vbp_threshold_sad = 0;
+ cpi->vbp_threshold_copy = 0;
+ cpi->vbp_bsize_min = BLOCK_8X8;
+ } else {
+ if (cm->width <= 352 && cm->height <= 288)
+ cpi->vbp_threshold_sad = 10;
+ else
+ cpi->vbp_threshold_sad = (cpi->dequants.y_dequant_QTX[q][1] << 1) > 1000
+ ? (cpi->dequants.y_dequant_QTX[q][1] << 1)
+ : 1000;
+ cpi->vbp_bsize_min = BLOCK_16X16;
+ if (cm->width <= 352 && cm->height <= 288)
+ cpi->vbp_threshold_copy = 4000;
+ else if (cm->width <= 640 && cm->height <= 360)
+ cpi->vbp_threshold_copy = 8000;
+ else
+ cpi->vbp_threshold_copy =
+ (cpi->dequants.y_dequant_QTX[q][1] << 3) > 8000
+ ? (cpi->dequants.y_dequant_QTX[q][1] << 3)
+ : 8000;
+ }
+ cpi->vbp_threshold_minmax = 15 + (q >> 3);
+ }
+}
+
+// This function chooses partitioning based on the variance between source and
+// reconstructed last, where variance is computed for down-sampled inputs.
+// TODO(kyslov): lot of things. Bring back noise estimation, brush up partition
+// selection
+// and most of all - retune the thresholds
+int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
+ MACROBLOCK *x, int mi_row, int mi_col) {
+ AV1_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+
+ int i, j, k, m;
+ v128x128 *vt;
+ v16x16 *vt2 = NULL;
+ unsigned char force_split[85];
+ int avg_32x32;
+ int max_var_32x32 = 0;
+ int min_var_32x32 = INT_MAX;
+ int var_32x32;
+ int var_64x64;
+ int min_var_64x64 = INT_MAX;
+ int max_var_64x64 = 0;
+ int avg_16x16[4];
+ int maxvar_16x16[4];
+ int minvar_16x16[4];
+ int64_t threshold_4x4avg;
+ int content_state = 0;
+ uint8_t *s;
+ const uint8_t *d;
+ int sp;
+ int dp;
+ int compute_minmax_variance = 1;
+ int is_key_frame = frame_is_intra_only(cm);
+ int pixels_wide = 128, pixels_high = 128;
+
+ CHECK_MEM_ERROR(cm, vt, aom_calloc(1, sizeof(*vt)));
+
+ int64_t thresholds[5] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
+ cpi->vbp_thresholds[2], cpi->vbp_thresholds[3],
+ cpi->vbp_thresholds[4] };
+
+ const int low_res = (cm->width <= 352 && cm->height <= 288);
+ int variance4x4downsample[64];
+ int segment_id;
+ const int num_planes = av1_num_planes(cm);
+
+ segment_id = xd->mi[0]->segment_id;
+
+ set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state);
+
+ // For non keyframes, disable 4x4 average for low resolution when speed = 8
+ threshold_4x4avg = INT64_MAX;
+
+ if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
+ if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
+
+ s = x->plane[0].src.buf;
+ sp = x->plane[0].src.stride;
+
+ // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
+ // 5-20 for the 16x16 blocks.
+ force_split[0] = 0;
+
+ if (!is_key_frame) {
+ // TODO(kyslov): we are assuming that the ref is LAST_FRAME! Check if it
+ // is!!
+ MB_MODE_INFO *mi = xd->mi[0];
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
+
+ assert(yv12 != NULL);
+
+ av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ get_ref_scale_factors(cm, LAST_FRAME), num_planes);
+ mi->ref_frame[0] = LAST_FRAME;
+ mi->ref_frame[1] = NONE_FRAME;
+ mi->sb_type = BLOCK_128X128;
+ mi->mv[0].as_int = 0;
+ mi->interp_filters = av1_make_interp_filters(BILINEAR, BILINEAR);
+
+// TODO(kyslov): bring the small SAD functionality back
+#if 0
+ y_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, x->plane[0].src.stride,
+ xd->plane[0].pre[0].buf,
+ xd->plane[0].pre[0].stride);
+#endif
+ x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
+
+ set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
+ av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, BLOCK_128X128,
+ AOM_PLANE_Y, AOM_PLANE_Y);
+
+ d = xd->plane[0].dst.buf;
+ dp = xd->plane[0].dst.stride;
+
+ // If the y_sad is very small, take 64x64 as partition and exit.
+ // Don't check on boosted segment for now, as 64x64 is suppressed there.
+#if 0
+ if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad)
+ { const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; const
+ int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; if (mi_col +
+ block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows)
+ { set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_128X128);
+ x->variance_low[0] = 1;
+ return 0;
+ }
+ }
+#endif
+ } else {
+ d = AV1_VAR_OFFS;
+ dp = 0;
+ }
+
+ if (low_res && threshold_4x4avg < INT64_MAX)
+ CHECK_MEM_ERROR(cm, vt2, aom_calloc(64, sizeof(*vt2)));
+ // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
+ // for splits.
+ for (m = 0; m < 4; m++) {
+ const int x64_idx = ((m & 1) << 6);
+ const int y64_idx = ((m >> 1) << 6);
+ const int m2 = m << 2;
+ force_split[m + 1] = 0;
+ for (i = 0; i < 4; i++) {
+ const int x32_idx = x64_idx + ((i & 1) << 5);
+ const int y32_idx = y64_idx + ((i >> 1) << 5);
+ const int i2 = (m2 + i) << 2;
+ force_split[5 + m2 + i] = 0;
+ avg_16x16[i] = 0;
+ maxvar_16x16[i] = 0;
+ minvar_16x16[i] = INT_MAX;
+ for (j = 0; j < 4; j++) {
+ const int x16_idx = x32_idx + ((j & 1) << 4);
+ const int y16_idx = y32_idx + ((j >> 1) << 4);
+ const int split_index = 21 + i2 + j;
+ v16x16 *vst = &vt->split[m].split[i].split[j];
+ force_split[split_index] = 0;
+ variance4x4downsample[i2 + j] = 0;
+ if (!is_key_frame) {
+ fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, pixels_wide,
+ pixels_high, is_key_frame);
+ fill_variance_tree(&vt->split[m].split[i].split[j], BLOCK_16X16);
+ get_variance(&vt->split[m].split[i].split[j].part_variances.none);
+ avg_16x16[i] +=
+ vt->split[m].split[i].split[j].part_variances.none.variance;
+ if (vt->split[m].split[i].split[j].part_variances.none.variance <
+ minvar_16x16[i])
+ minvar_16x16[i] =
+ vt->split[m].split[i].split[j].part_variances.none.variance;
+ if (vt->split[m].split[i].split[j].part_variances.none.variance >
+ maxvar_16x16[i])
+ maxvar_16x16[i] =
+ vt->split[m].split[i].split[j].part_variances.none.variance;
+ if (vt->split[m].split[i].split[j].part_variances.none.variance >
+ thresholds[3]) {
+ // 16X16 variance is above threshold for split, so force split to
+ // 8x8 for this 16x16 block (this also forces splits for upper
+ // levels).
+ force_split[split_index] = 1;
+ force_split[5 + m2 + i] = 1;
+ force_split[m + 1] = 1;
+ force_split[0] = 1;
+ } else if (compute_minmax_variance &&
+ vt->split[m]
+ .split[i]
+ .split[j]
+ .part_variances.none.variance > thresholds[2] &&
+ !cyclic_refresh_segment_id_boosted(segment_id)) {
+ // We have some nominal amount of 16x16 variance (based on average),
+ // compute the minmax over the 8x8 sub-blocks, and if above
+ // threshold, force split to 8x8 block for this 16x16 block.
+ int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx,
+ pixels_wide, pixels_high);
+ int thresh_minmax = (int)cpi->vbp_threshold_minmax;
+ if (minmax > thresh_minmax) {
+ force_split[split_index] = 1;
+ force_split[5 + m2 + i] = 1;
+ force_split[m + 1] = 1;
+ force_split[0] = 1;
+ }
+ }
+ }
+ if (is_key_frame) {
+ force_split[split_index] = 0;
+ // Go down to 4x4 down-sampling for variance.
+ variance4x4downsample[i2 + j] = 1;
+ for (k = 0; k < 4; k++) {
+ int x8_idx = x16_idx + ((k & 1) << 3);
+ int y8_idx = y16_idx + ((k >> 1) << 3);
+ v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
+ fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2,
+ pixels_wide, pixels_high, is_key_frame);
+ }
+ }
+ }
+ }
+ }
+
+ // Fill the rest of the variance tree by summing split partition values.
+ for (m = 0; m < 4; ++m) {
+ avg_32x32 = 0;
+ const int m2 = m << 2;
+ for (i = 0; i < 4; i++) {
+ const int i2 = (m2 + i) << 2;
+ for (j = 0; j < 4; j++) {
+ const int split_index = 21 + i2 + j;
+ if (variance4x4downsample[i2 + j] == 1) {
+ v16x16 *vtemp =
+ (!is_key_frame) ? &vt2[i2 + j] : &vt->split[m].split[i].split[j];
+ for (k = 0; k < 4; k++)
+ fill_variance_tree(&vtemp->split[k], BLOCK_8X8);
+ fill_variance_tree(vtemp, BLOCK_16X16);
+ // If variance of this 16x16 block is above the threshold, force block
+ // to split. This also forces a split on the upper levels.
+ get_variance(&vtemp->part_variances.none);
+ if (vtemp->part_variances.none.variance > thresholds[3]) {
+ force_split[split_index] = 1;
+ force_split[5 + m2 + i] = 1;
+ force_split[m + 1] = 1;
+ force_split[0] = 1;
+ }
+ }
+ }
+ fill_variance_tree(&vt->split[m].split[i], BLOCK_32X32);
+ // If variance of this 32x32 block is above the threshold, or if its above
+ // (some threshold of) the average variance over the sub-16x16 blocks,
+ // then force this block to split. This also forces a split on the upper
+ // (64x64) level.
+ if (!force_split[5 + m2 + i]) {
+ get_variance(&vt->split[m].split[i].part_variances.none);
+ var_32x32 = vt->split[m].split[i].part_variances.none.variance;
+ max_var_32x32 = AOMMAX(var_32x32, max_var_32x32);
+ min_var_32x32 = AOMMIN(var_32x32, min_var_32x32);
+ if (vt->split[m].split[i].part_variances.none.variance >
+ thresholds[2] ||
+ (!is_key_frame &&
+ vt->split[m].split[i].part_variances.none.variance >
+ (thresholds[2] >> 1) &&
+ vt->split[m].split[i].part_variances.none.variance >
+ (avg_16x16[i] >> 1))) {
+ force_split[5 + m2 + i] = 1;
+ force_split[m + 1] = 1;
+ force_split[0] = 1;
+ } else if (!is_key_frame && cm->height <= 360 &&
+ (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[2] >> 1) &&
+ maxvar_16x16[i] > thresholds[2]) {
+ force_split[5 + m2 + i] = 1;
+ force_split[m + 1] = 1;
+ force_split[0] = 1;
+ }
+ avg_32x32 += var_32x32;
+ }
+ }
+ if (!force_split[1 + m]) {
+ fill_variance_tree(&vt->split[m], BLOCK_64X64);
+ get_variance(&vt->split[m].part_variances.none);
+ var_64x64 = vt->split[m].part_variances.none.variance;
+ max_var_64x64 = AOMMAX(var_64x64, max_var_64x64);
+ min_var_64x64 = AOMMIN(var_64x64, min_var_64x64);
+ // If variance of this 64x64 block is above (some threshold of) the
+ // average variance over the sub-32x32 blocks, then force this block to
+ // split. Only checking this for noise level >= medium for now.
+
+ if (!is_key_frame &&
+ (max_var_32x32 - min_var_32x32) > 3 * (thresholds[1] >> 3) &&
+ max_var_32x32 > thresholds[1] >> 1)
+ force_split[1 + m] = 1;
+ }
+ }
+
+ if (!force_split[0]) {
+ fill_variance_tree(vt, BLOCK_128X128);
+ get_variance(&vt->part_variances.none);
+ if (!is_key_frame &&
+ (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) &&
+ max_var_64x64 > thresholds[0] >> 1)
+ force_split[0] = 1;
+ }
+
+ const int mi_rows_remaining = tile->mi_row_end - mi_row;
+ const int mi_cols_remaining = tile->mi_col_end - mi_col;
+ if (!((mi_cols_remaining >= cm->seq_params.mib_size) &&
+ (mi_rows_remaining >= cm->seq_params.mib_size))) {
+ int bh = mi_size_high[BLOCK_128X128];
+ int bw = mi_size_wide[BLOCK_128X128];
+
+ int r, c;
+ for (r = 0; r < cm->seq_params.mib_size; r += bh) {
+ bw = mi_size_wide[BLOCK_128X128];
+ for (c = 0; c < cm->seq_params.mib_size; c += bw) {
+ BLOCK_SIZE sb_type =
+ find_partition_size(BLOCK_128X128, mi_rows_remaining - r,
+ mi_cols_remaining - c, &bh, &bw);
+ set_block_size(cpi, x, xd, mi_row + r, mi_col + c, sb_type);
+ }
+ }
+
+ } else {
+ if (!set_vt_partitioning(cpi, x, xd, vt, BLOCK_128X128, mi_row, mi_col,
+ thresholds[0], BLOCK_16X16, force_split[0])) {
+ for (m = 0; m < 4; ++m) {
+ const int x64_idx = ((m & 1) << 4);
+ const int y64_idx = ((m >> 1) << 4);
+ const int m2 = m << 2;
+
+ // Now go through the entire structure, splitting every block size until
+ // we get to one that's got a variance lower than our threshold.
+ if (!set_vt_partitioning(cpi, x, xd, &vt->split[m], BLOCK_64X64,
+ mi_row + y64_idx, mi_col + x64_idx,
+ thresholds[1], BLOCK_16X16,
+ force_split[1 + m])) {
+ for (i = 0; i < 4; ++i) {
+ const int x32_idx = ((i & 1) << 3);
+ const int y32_idx = ((i >> 1) << 3);
+ const int i2 = (m2 + i) << 2;
+ if (!set_vt_partitioning(
+ cpi, x, xd, &vt->split[m].split[i], BLOCK_32X32,
+ (mi_row + y64_idx + y32_idx), (mi_col + x64_idx + x32_idx),
+ thresholds[2], BLOCK_16X16, force_split[5 + m2 + i])) {
+ for (j = 0; j < 4; ++j) {
+ const int x16_idx = ((j & 1) << 2);
+ const int y16_idx = ((j >> 1) << 2);
+ const int split_index = 21 + i2 + j;
+ // For inter frames: if variance4x4downsample[] == 1 for this
+ // 16x16 block, then the variance is based on 4x4 down-sampling,
+ // so use vt2 in set_vt_partioning(), otherwise use vt.
+ v16x16 *vtemp =
+ (!is_key_frame && variance4x4downsample[i2 + j] == 1)
+ ? &vt2[i2 + j]
+ : &vt->split[m].split[i].split[j];
+ if (!set_vt_partitioning(cpi, x, xd, vtemp, BLOCK_16X16,
+ mi_row + y64_idx + y32_idx + y16_idx,
+ mi_col + x64_idx + x32_idx + x16_idx,
+ thresholds[3], BLOCK_8X8,
+ force_split[split_index])) {
+ for (k = 0; k < 4; ++k) {
+ const int x8_idx = (k & 1) << 1;
+ const int y8_idx = (k >> 1) << 1;
+ set_block_size(
+ cpi, x, xd,
+ (mi_row + y64_idx + y32_idx + y16_idx + y8_idx),
+ (mi_col + x64_idx + x32_idx + x16_idx + x8_idx),
+ BLOCK_8X8);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (vt2) aom_free(vt2);
+ if (vt) aom_free(vt);
+ return 0;
+}
diff --git a/av1/encoder/var_based_part.h b/av1/encoder/var_based_part.h
new file mode 100644
index 0000000..c355224
--- /dev/null
+++ b/av1/encoder/var_based_part.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_AV1_ENCODER_VAR_BASED_PART_H_
+#define AOM_AV1_ENCODER_VAR_BASED_PART_H_
+
+#include <stdio.h>
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "av1/encoder/encoder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q,
+ int content_state);
+
+int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
+ MACROBLOCK *x, int mi_row, int mi_col);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // AOM_AV1_ENCODER_VAR_BASED_PART_H_
diff --git a/test/encode_api_test.cc b/test/encode_api_test.cc
index c26f572..235480a 100644
--- a/test/encode_api_test.cc
+++ b/test/encode_api_test.cc
@@ -50,7 +50,7 @@
EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
aom_codec_enc_init(&enc, kCodecs[i], NULL, 0));
EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
- aom_codec_enc_config_default(kCodecs[i], &cfg, 1));
+ aom_codec_enc_config_default(kCodecs[i], &cfg, 2));
EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(kCodecs[i], &cfg, 0));
EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, kCodecs[i], &cfg, 0));
diff --git a/test/rt_end_to_end_test.cc b/test/rt_end_to_end_test.cc
new file mode 100644
index 0000000..63b602b
--- /dev/null
+++ b/test/rt_end_to_end_test.cc
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2019, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+
+const unsigned int kFrames = 10;
+const int kBitrate = 500;
+
+// List of psnr thresholds for speed settings 0-8
+const double kPsnrThreshold[9] = { 36.9, 36.9, 36.85, 36.8, 36.6,
+ 36.4, 36.0, 35.5, 34.5 };
+
+typedef struct {
+ const char *filename;
+ unsigned int input_bit_depth;
+ aom_img_fmt fmt;
+ aom_bit_depth_t bit_depth;
+ unsigned int profile;
+} TestVideoParam;
+
+std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) {
+ return os << "TestVideoParam { filename:" << test_arg.filename
+ << " input_bit_depth:" << test_arg.input_bit_depth
+ << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth
+ << " profile:" << test_arg.profile << "}";
+}
+
+// TODO(kyslov): Add more test vectors
+const TestVideoParam kTestVectors[] = {
+ { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
+};
+
+// Speed settings tested
+const int kCpuUsedVectors[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
+
+class RTEndToEndTest
+ : public ::libaom_test::CodecTestWith2Params<TestVideoParam, int>,
+ public ::libaom_test::EncoderTest {
+ protected:
+ RTEndToEndTest()
+ : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)),
+ cpu_used_(GET_PARAM(2)), psnr_(0.0), nframes_(0) {}
+
+ virtual ~RTEndToEndTest() {}
+
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(::libaom_test::kRealTime);
+
+ cfg_.g_usage = 1; // TODO(kyslov): Move it to encode_test_driver.cc
+ cfg_.rc_end_usage = AOM_CBR;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_buf_optimal_sz = 600;
+ }
+
+ virtual void BeginPassHook(unsigned int) {
+ psnr_ = 0.0;
+ nframes_ = 0;
+ }
+
+ virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
+ psnr_ += pkt->data.psnr.psnr[0];
+ nframes_++;
+ }
+
+ virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+ ::libaom_test::Encoder *encoder) {
+ if (video->frame() == 0) {
+ encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+ encoder->Control(AV1E_SET_TILE_COLUMNS, 1);
+ encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+ encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
+ }
+ }
+
+ double GetAveragePsnr() const {
+ if (nframes_) return psnr_ / nframes_;
+ return 0.0;
+ }
+
+ double GetPsnrThreshold() { return kPsnrThreshold[cpu_used_]; }
+
+ void DoTest() {
+ cfg_.rc_target_bitrate = kBitrate;
+ cfg_.g_error_resilient = 0;
+ cfg_.g_profile = test_video_param_.profile;
+ cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+ cfg_.g_bit_depth = test_video_param_.bit_depth;
+ init_flags_ = AOM_CODEC_USE_PSNR;
+ if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+ std::unique_ptr<libaom_test::VideoSource> video;
+ video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+ kFrames));
+ ASSERT_TRUE(video.get() != NULL);
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+ const double psnr = GetAveragePsnr();
+ EXPECT_GT(psnr, GetPsnrThreshold()) << "cpu used = " << cpu_used_;
+ }
+
+ TestVideoParam test_video_param_;
+ int cpu_used_;
+
+ private:
+ double psnr_;
+ unsigned int nframes_;
+};
+
+class RTEndToEndTestLarge : public RTEndToEndTest {};
+
+TEST_P(RTEndToEndTestLarge, EndtoEndPSNRTest) { DoTest(); }
+
+TEST_P(RTEndToEndTest, EndtoEndPSNRTest) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_CASE(RTEndToEndTestLarge,
+ ::testing::ValuesIn(kTestVectors),
+ ::testing::ValuesIn(kCpuUsedVectors));
+
+AV1_INSTANTIATE_TEST_CASE(RTEndToEndTest, ::testing::Values(kTestVectors[0]),
+ ::testing::Values(kCpuUsedVectors[8]));
+} // namespace
diff --git a/test/test.cmake b/test/test.cmake
index 1b11373..5ae0279 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -64,6 +64,7 @@
"${AOM_ROOT}/test/encode_test_driver.cc"
"${AOM_ROOT}/test/encode_test_driver.h"
"${AOM_ROOT}/test/end_to_end_test.cc"
+ "${AOM_ROOT}/test/rt_end_to_end_test.cc"
"${AOM_ROOT}/test/error_resilience_test.cc"
"${AOM_ROOT}/test/frame_size_tests.cc"
"${AOM_ROOT}/test/horz_superres_test.cc"