Introduce full_pixel_motion_search_based_split
This speed feature performs a full_pixel_motion_search with a single
frame prior to PARTITION_NONE and use the residue variance to decide
whether to split directly and skip other modes.
Currently it is only enable on speed 1.
Performance:
| LOWRES | MIDRES |
BDBR | 0.030% | 0.078% |
Delta-T | 6.602% | 6.396% |
BDBR is calculated over 60 frames on the lowres and midres set. Delta-T
(average time reduction) is calculated with 5 clips over 4 bitrates
uniformly sampled on the default bitrate sets over 30 frames.
STATS_CHANGED
Change-Id: I9021c53e588f6647f90699521a93a902a90516dd
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index e58b56b..845830b 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -3362,6 +3362,122 @@
#undef FEATURES
#endif
+// Performs a full_pixel_motion_search with a single reference frame and extract
+// the variance of residues. Here features is assumed to be a length 6 array.
+// After this function is called, we will store the following in to features:
+// features[0] = log(1 + dc_q**2/256)
+// features[1] = log(1 + variance_of_residue)
+// for i in [2, 3, 4, 5]:
+// features[i] = log(1 + variance_of_residue_in_block[i]/variance_of_residue)
+static void get_res_var_features(AV1_COMP *const cpi, MACROBLOCK *x, int mi_row,
+ int mi_col, BLOCK_SIZE bsize,
+ float *features) {
+ assert(mi_size_wide[bsize] == mi_size_high[bsize]);
+
+ AV1_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = xd->mi[0];
+
+ mbmi->ref_frame[1] = NONE_FRAME;
+ mbmi->sb_type = bsize;
+
+ int pred_stride = 128;
+ DECLARE_ALIGNED(16, uint16_t, pred_buffer[MAX_SB_SQUARE]);
+ uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ ? CONVERT_TO_BYTEPTR(pred_buffer)
+ : (uint8_t *)pred_buffer;
+
+ // Perform a single motion search in Y_PLANE to make a prediction
+ const MV_REFERENCE_FRAME ref =
+ cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME;
+ YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref);
+ const YV12_BUFFER_CONFIG *scaled_ref_frame =
+ av1_get_scaled_ref_frame(cpi, ref);
+ struct buf_2d backup_yv12;
+ // ref_mv is in units of 1/8-pel whereas ref_mv_full is in units of pel
+ MV ref_mv = { 0, 0 };
+ MV ref_mv_full = { 0, 0 };
+ const int step_param = 1;
+ const MvLimits tmp_mv_limits = x->mv_limits;
+ const SEARCH_METHODS search_methods = NSTEP;
+ const int do_mesh_search = 0;
+ const int sadpb = x->sadperbit16;
+ int cost_list[5];
+ int num_planes = 1;
+ const int ref_idx = 0;
+
+ if (scaled_ref_frame) {
+ backup_yv12 = xd->plane[AOM_PLANE_Y].pre[ref_idx];
+ av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL,
+ num_planes);
+ } else {
+ av1_setup_pre_planes(xd, ref_idx, yv12, mi_row, mi_col,
+ &cm->frame_refs[ref - LAST_FRAME].sf, num_planes);
+ }
+
+ mbmi->ref_frame[0] = ref;
+ av1_set_mv_search_range(&x->mv_limits, &ref_mv);
+ av1_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, search_methods,
+ do_mesh_search, sadpb, cond_cost_list(cpi, cost_list),
+ &ref_mv, INT_MAX, 1, mi_col * MI_SIZE, mi_row * MI_SIZE,
+ 0);
+ // Restore
+ x->mv_limits = tmp_mv_limits;
+
+ // Convert from units of pixel to 1/8-pixels
+ x->best_mv.as_mv.row *= 8;
+ x->best_mv.as_mv.col *= 8;
+ mbmi->mv[0].as_mv = x->best_mv.as_mv;
+
+ // Get a copy of the prediction output
+ set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+ xd->plane[0].dst.buf = pred_buf;
+ xd->plane[0].dst.stride = pred_stride;
+ av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
+
+ aom_clear_system_state();
+
+ if (scaled_ref_frame) {
+ xd->plane[AOM_PLANE_Y].pre[ref_idx] = backup_yv12;
+ }
+
+ // Now that we have the frame, we can print features out
+ int f_idx = 0;
+
+ // Q_INDEX
+ const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8);
+ features[f_idx++] = logf(1.0f + (float)(dc_q * dc_q) / 256.0f);
+
+ // VARIANCE
+ av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes,
+ BLOCK_128X128);
+ const uint8_t *src = x->plane[0].src.buf;
+ const int src_stride = x->plane[0].src.stride;
+ unsigned int sse = 0;
+
+ // Whole block
+ const unsigned int var =
+ cpi->fn_ptr[bsize].vf(src, src_stride, pred_buf, pred_stride, &sse);
+ features[f_idx++] = logf(1.0f + (float)var);
+
+ // Regional
+ const int bw = block_size_wide[bsize];
+ const int bh = block_size_high[bsize];
+ const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
+ int r_idx = 0;
+ for (r_idx = 0; r_idx < 4; r_idx++) {
+ const int x_idx = (r_idx & 1) * bw / 2;
+ const int y_idx = (r_idx >> 1) * bh / 2;
+ const int src_offset = y_idx * src_stride + x_idx;
+ const int pred_offset = y_idx * pred_stride + x_idx;
+ const unsigned int sub_var =
+ cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
+ pred_buf + pred_offset, pred_stride, &sse);
+ const float var_ratio = (1.0f + (float)sub_var) / (4.0f + (float)var);
+ features[f_idx++] = var_ratio;
+ }
+}
+
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
@@ -3625,6 +3741,52 @@
MB_MODE_INFO *split_mbmi[4] = { 0 };
+ // Perform a full_pixel_search and use the residue to estimate whether we
+ // should split directly.
+ // TODO(chiyotsai@google.com): Try the algorithm on hbd and speed 0.
+ // Also try pruning PARTITION_SPLIT
+ if (cpi->sf.full_pixel_motion_search_based_split && bsize >= BLOCK_8X8 &&
+ do_square_split && mi_row + mi_size_high[bsize] <= cm->mi_rows &&
+ mi_col + mi_size_wide[bsize] <= cm->mi_cols && !frame_is_intra_only(cm) &&
+ !cm->seq_params.enable_superres) {
+ const NN_CONFIG *nn_config = NULL;
+ float split_only_thresh = 0.0f;
+ if (bsize == BLOCK_128X128) {
+ nn_config = &full_pixel_motion_search_based_split_nn_config_128;
+ split_only_thresh = full_pixel_motion_search_based_split_thresh_128;
+ } else if (bsize == BLOCK_64X64) {
+ nn_config = &full_pixel_motion_search_based_split_nn_config_64;
+ split_only_thresh = full_pixel_motion_search_based_split_thresh_64;
+ } else if (bsize == BLOCK_32X32) {
+ nn_config = &full_pixel_motion_search_based_split_nn_config_32;
+ split_only_thresh = full_pixel_motion_search_based_split_thresh_32;
+ } else if (bsize == BLOCK_16X16) {
+ nn_config = &full_pixel_motion_search_based_split_nn_config_16;
+ split_only_thresh = full_pixel_motion_search_based_split_thresh_16;
+ } else if (bsize == BLOCK_8X8) {
+#if !CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8
+ // Disable BLOCK_8X8 for now
+ nn_config = &full_pixel_motion_search_based_split_nn_config_8;
+ split_only_thresh = full_pixel_motion_search_based_split_thresh_8;
+#endif
+ } else {
+ assert(0 && "Unexpected block size in full_pixel_motion_based_split");
+ }
+ if (nn_config) {
+ float features[6] = { 0 };
+ float score = 0;
+ get_res_var_features(cpi, x, mi_row, mi_col, bsize, features);
+ av1_nn_predict(features, nn_config, &score);
+
+ if (score > split_only_thresh) {
+ partition_none_allowed = 0;
+ partition_horz_allowed = 0;
+ partition_vert_allowed = 0;
+ do_rectangular_split = 0;
+ }
+ }
+ }
+
BEGIN_PARTITION_SEARCH:
if (x->must_find_valid_partition) {
partition_none_allowed = has_rows && has_cols;
diff --git a/av1/encoder/partition_model_weights.h b/av1/encoder/partition_model_weights.h
index 735869b..271764a 100644
--- a/av1/encoder/partition_model_weights.h
+++ b/av1/encoder/partition_model_weights.h
@@ -2572,6 +2572,334 @@
#undef FEATURE_SIZE
#endif // CONFIG_ONE_PASS_SVM
+// Below are the models used for full_pixel_motion_search_based_split
+// BLOCK_128X128
+#define NUM_HIDDEN_LAYERS_128 1
+#define NUM_FEATURES_128 6
+#define NUM_LAYER_0_UNITS_128 16
+#define NUM_LOGITS_128 1
+
+static const float full_pixel_motion_search_based_split_layer_0_kernel_128[] = {
+ -0.807346f, 0.242298f, 12.9862f, -1.19161f, 5.21734f, -1.1363f,
+ -2.39127f, 0.930915f, -2.44285f, -2.42966f, 5.73476f, 0.0506879f,
+ -0.234878f, -0.317875f, 0.361322f, 0.431648f, -0.39105f, -0.110225f,
+ -2.46236f, 0.979713f, -10.5596f, -7.76653f, -3.06518f, 2.42554f,
+ 0.0492961f, -0.467176f, 0.130746f, 0.494527f, -0.0336645f, 0.501755f,
+ 0.176486f, -0.869541f, 7.77757f, 6.81303f, 6.00771f, 7.35696f,
+ 0.150731f, -0.307017f, -0.437639f, -0.082924f, 0.379107f, 0.452278f,
+ -0.0143836f, -0.183691f, -0.604698f, -9.2681f, -2.06087f, 11.0256f,
+ 0.0487599f, -0.249168f, -0.180407f, 0.304772f, 0.218642f, -0.406073f,
+ -0.0289919f, -0.794381f, 5.45092f, 5.38374f, 3.25745f, 5.32903f,
+ 1.12718f, -0.0215478f, 2.78552f, 4.8951f, -0.959671f, 0.694264f,
+ -0.0611219f, -0.331937f, 0.258252f, -0.495331f, -0.285923f, 0.294713f,
+ -0.119947f, 0.0753204f, 10.2021f, -5.82147f, -12.0137f, 3.0365f,
+ 0.366697f, 0.142683f, -3.29731f, -5.76651f, -5.62578f, 10.9462f,
+ -0.325459f, 0.092602f, -0.868027f, -0.691768f, -0.292017f, -0.00841203f,
+ 0.702545f, -0.612227f, -7.68881f, 9.52225f, -1.18581f, -2.56762f
+};
+
+static const float full_pixel_motion_search_based_split_logits_kernel_128[] = {
+ 0.364895f, 0.577553f, 0.115758f, -0.999496f, 0.124885f, 3.23193f,
+ -0.00386642f, 0.970794f, 0.136637f, -4.28052f, -1.49234f, 0.370436f,
+ 0.576981f, -0.469656f, -0.124071f, 1.07669f
+};
+
+static const float full_pixel_motion_search_based_split_layer_0_bias_128[] = {
+ 1.32916f, 0.817212f, 0.0f, -0.921066f, 0.0f, 3.57649f,
+ -0.0204517f, 2.97286f, 0.0f, 5.49957f, -8.14518f, 0.0f,
+ 1.30826f, -0.349536f, -0.638933f, 5.4496f
+};
+
+static const float full_pixel_motion_search_based_split_logits_bias_128[] = {
+ 0.683442f
+};
+
+static const NN_CONFIG full_pixel_motion_search_based_split_nn_config_128 = {
+ NUM_FEATURES_128,
+ NUM_LOGITS_128,
+ NUM_HIDDEN_LAYERS_128,
+ {
+ NUM_LAYER_0_UNITS_128,
+ },
+ {
+ full_pixel_motion_search_based_split_layer_0_kernel_128,
+ full_pixel_motion_search_based_split_logits_kernel_128,
+ },
+ {
+ full_pixel_motion_search_based_split_layer_0_bias_128,
+ full_pixel_motion_search_based_split_logits_bias_128,
+ },
+};
+
+static const float full_pixel_motion_search_based_split_thresh_128 = 2.0f;
+
+#undef NUM_HIDDEN_LAYERS_128
+#undef NUM_FEATURES_128
+#undef NUM_LAYER_0_UNITS_128
+#undef NUM_LOGITS_128
+
+// BLOCK_64X64
+#define NUM_HIDDEN_LAYERS_64 1
+#define NUM_FEATURES_64 6
+#define NUM_LAYER_0_UNITS_64 16
+#define NUM_LOGITS_64 1
+
+static const float full_pixel_motion_search_based_split_layer_0_kernel_64[] = {
+ 0.0345945f, -0.394064f, 0.0919978f, 0.270358f, -0.384502f, -0.504608f,
+ -0.25759f, 0.155981f, 2.62567f, -10.7204f, -0.709802f, 8.15948f,
+ 0.589866f, -0.445645f, -1.68232f, 10.0061f, -3.17671f, 4.87259f,
+ -0.448886f, -0.205568f, -0.462388f, 0.385001f, -0.451687f, 0.49602f,
+ -0.256708f, 0.803322f, 3.25594f, 0.38541f, -1.83867f, -2.15132f,
+ 0.936059f, -0.203056f, -5.92959f, -6.24554f, -6.68631f, -6.85977f,
+ -0.0407565f, -0.258902f, 0.195053f, -0.366515f, 0.339543f, -0.433017f,
+ -2.67026f, 0.385457f, 1.86683f, 1.9501f, 0.0381398f, 1.086f,
+ -0.153729f, 0.173772f, -42.9029f, -36.8934f, -2.892f, -0.0540691f,
+ 0.77469f, -0.380145f, 2.2689f, -9.53332f, 1.15712f, 2.86601f,
+ -0.437036f, 0.247132f, -8.51058f, -3.62972f, -8.99449f, -0.638738f,
+ 0.0609263f, -0.0614603f, 5.42307f, 5.35926f, 5.27437f, 5.26599f,
+ -0.0729677f, 0.0306104f, -7.77867f, 5.03598f, -8.17832f, 5.85461f,
+ -0.253269f, 0.164582f, -4.49713f, 3.83265f, 9.04851f, -2.85668f,
+ 1.22618f, 0.166904f, -1.51975f, -4.01576f, -1.44374f, -2.22147f,
+ -0.217072f, -0.0984913f, -0.265515f, 0.360021f, 0.0779512f, 0.361516f
+};
+
+static const float full_pixel_motion_search_based_split_logits_kernel_64[] = {
+ 0.470821f, 0.474747f, -0.571292f, 0.403221f, 0.628966f, -0.617029f,
+ 0.501105f, 0.499962f, -1.5451f, -0.473518f, -0.730568f, -5.55817f,
+ 0.776761f, 0.42569f, 0.311925f, 0.469968f
+};
+
+static const float full_pixel_motion_search_based_split_layer_0_bias_64[] = {
+ -0.134085f, 0.0758715f, 1.10419f, 0.0f, -5.75737f, 1.65494f,
+ 0.0f, 3.44047f, 0.394852f, 3.43858f, 3.65871f, -4.84987f,
+ 1.21207f, -1.7705f, -5.46469f, -0.0889634f
+};
+
+static const float full_pixel_motion_search_based_split_logits_bias_64[] = {
+ -0.479491f
+};
+
+static const NN_CONFIG full_pixel_motion_search_based_split_nn_config_64 = {
+ NUM_FEATURES_64,
+ NUM_LOGITS_64,
+ NUM_HIDDEN_LAYERS_64,
+ {
+ NUM_LAYER_0_UNITS_64,
+ },
+ {
+ full_pixel_motion_search_based_split_layer_0_kernel_64,
+ full_pixel_motion_search_based_split_logits_kernel_64,
+ },
+ {
+ full_pixel_motion_search_based_split_layer_0_bias_64,
+ full_pixel_motion_search_based_split_logits_bias_64,
+ },
+};
+
+static const float full_pixel_motion_search_based_split_thresh_64 = 2.0f;
+
+#undef NUM_HIDDEN_LAYERS_64
+#undef NUM_FEATURES_64
+#undef NUM_LAYER_0_UNITS_64
+#undef NUM_LOGITS_64
+
+// BLOCK_32X32
+#define NUM_HIDDEN_LAYERS_32 1
+#define NUM_FEATURES_32 6
+#define NUM_LAYER_0_UNITS_32 16
+#define NUM_LOGITS_32 1
+
+static const float full_pixel_motion_search_based_split_layer_0_kernel_32[] = {
+ -1.61796f, 0.0585128f, 1.57904f, 1.52703f, 0.367779f, 0.220434f,
+ 1.66652f, -1.77782f, 6.41118f, 4.16976f, 4.97299f, 4.84111f,
+ -0.0956536f, -0.163284f, -0.143662f, 0.129329f, 0.449659f, -0.528844f,
+ -1.00067f, 1.17203f, -4.26777f, -4.78521f, 8.45658f, -3.49498f,
+ -1.78386f, 0.111488f, 4.176f, 6.31911f, -10.5369f, 6.26983f,
+ -1.32233f, 1.22999f, -4.1666f, -10.0359f, -4.14779f, -10.4695f,
+ 1.83011f, -0.333152f, -9.87986f, -8.11992f, -8.2775f, -7.79918f,
+ -0.101404f, 0.00401393f, 8.89046f, -7.32186f, -6.59597f, 9.66257f,
+ -1.1492f, 1.23067f, -3.6341f, 6.59275f, -3.2373f, -3.42564f,
+ 0.371736f, -0.140902f, -2.75715f, 5.92487f, -7.9185f, 9.13743f,
+ -3.52698f, -0.191044f, 5.96691f, 6.26327f, 4.36378f, 5.69354f,
+ -0.608845f, -0.191236f, -0.482191f, -0.180474f, -3.8838f, -3.92934f,
+ -1.03191f, 0.994568f, 7.95516f, -4.0035f, -2.86266f, -4.96105f,
+ 1.75022f, 0.125058f, -1.52159f, -3.59304f, -2.82634f, -2.49556f,
+ -2.05557f, -0.222577f, 3.7608f, 5.50475f, 2.7046f, 5.25952f,
+ -1.91327f, -0.0356497f, 1.47611f, 1.27499f, -1.76108f, -0.578954f
+};
+
+static const float full_pixel_motion_search_based_split_logits_kernel_32[] = {
+ -0.220382f, -0.693902f, 0.424827f, 0.379952f, -0.413791f, -0.326785f,
+ -0.455086f, 0.242402f, 0.307986f, 0.175746f, 0.498901f, -0.628053f,
+ 0.285447f, 0.230052f, 0.415151f, -0.842946f
+};
+
+static const float full_pixel_motion_search_based_split_layer_0_bias_32[] = {
+ -1.80751f, 6.40356f, -0.0512058f, -4.59163f, -0.369933f, -0.195755f,
+ -0.16648f, -0.599755f, -5.35975f, -1.21349f, 2.48414f, 1.07096f,
+ -3.66684f, -6.17761f, 4.2159f, -1.05286f
+};
+
+static const float full_pixel_motion_search_based_split_logits_bias_32[] = {
+ -2.58676f
+};
+
+static const NN_CONFIG full_pixel_motion_search_based_split_nn_config_32 = {
+ NUM_FEATURES_32,
+ NUM_LOGITS_32,
+ NUM_HIDDEN_LAYERS_32,
+ {
+ NUM_LAYER_0_UNITS_32,
+ },
+ {
+ full_pixel_motion_search_based_split_layer_0_kernel_32,
+ full_pixel_motion_search_based_split_logits_kernel_32,
+ },
+ {
+ full_pixel_motion_search_based_split_layer_0_bias_32,
+ full_pixel_motion_search_based_split_logits_bias_32,
+ },
+};
+
+static const float full_pixel_motion_search_based_split_thresh_32 = 2.0f;
+
+#undef NUM_HIDDEN_LAYERS_32
+#undef NUM_FEATURES_32
+#undef NUM_LAYER_0_UNITS_32
+#undef NUM_LOGITS_32
+
+// BLOCK_16X16
+#define NUM_HIDDEN_LAYERS_16 1
+#define NUM_FEATURES_16 6
+#define NUM_LAYER_0_UNITS_16 16
+#define NUM_LOGITS_16 1
+
+static const float full_pixel_motion_search_based_split_layer_0_kernel_16[] = {
+ -0.611497f, -0.0422086f, -0.555957f, -0.632451f, -0.144179f, -0.152722f,
+ -0.330265f, -0.419866f, 0.287343f, 0.385295f, -0.424486f, 0.424281f,
+ 2.27442f, -2.47933f, 5.24731f, 4.33827f, 4.73215f, 3.41909f,
+ 1.16058f, -0.364505f, 0.12207f, -0.287749f, 0.0509783f, -0.0200119f,
+ 1.52907f, -1.1905f, -2.56978f, -3.00186f, -3.56084f, -3.89276f,
+ 0.00365657f, 1.57125f, -4.421f, -2.48803f, -2.51531f, -4.28646f,
+ 2.52248f, -1.03377f, -1.09607f, -1.44633f, -1.58736f, -1.25927f,
+ -1.45841f, -0.566619f, -0.246166f, -0.182289f, -0.238156f, 0.177991f,
+ 0.0112509f, -0.17677f, -0.485877f, 0.0812852f, 0.104975f, 0.222793f,
+ -0.372858f, -0.48624f, -0.00870389f, -0.385019f, 0.405842f, 0.288523f,
+ 0.167374f, -0.204208f, -8.74148f, -8.59267f, -8.42492f, -8.3778f,
+ -5.57063f, -0.406818f, -0.873199f, -0.896224f, -0.701479f, -0.985736f,
+ -0.625956f, -0.0446202f, -0.509987f, -0.321804f, -0.470759f, -0.248556f,
+ -0.369436f, -0.160828f, 0.0591148f, 0.405218f, 0.142584f, -0.130106f,
+ 0.125321f, 0.0888179f, 7.34822f, -6.71488f, -7.06592f, 6.33224f,
+ 0.0333619f, -0.377782f, 0.160767f, -0.128169f, -0.484818f, -0.311973f
+};
+
+static const float full_pixel_motion_search_based_split_logits_kernel_16[] = {
+ -0.132207f, 0.15176f, -0.680086f, 0.605921f, -0.43294f, 0.485811f,
+ -0.306286f, 0.551368f, 0.413904f, 0.548748f, -0.437391f, 0.560778f,
+ -0.00685266f, -0.558657f, 0.122127f, 0.260165f
+};
+
+static const float full_pixel_motion_search_based_split_layer_0_bias_16[] = {
+ -0.200928f, -0.074132f, 8.69963f, -9.00807f, 9.08983f, -6.83586f,
+ -3.89329f, 10.4881f, -0.0670618f, 0.0f, 9.21614f, 8.41773f,
+ -0.145851f, 0.0f, -1.43038f, -0.0460311f
+};
+
+static const float full_pixel_motion_search_based_split_logits_bias_16[] = {
+ -4.19885f
+};
+
+static const NN_CONFIG full_pixel_motion_search_based_split_nn_config_16 = {
+ NUM_FEATURES_16,
+ NUM_LOGITS_16,
+ NUM_HIDDEN_LAYERS_16,
+ {
+ NUM_LAYER_0_UNITS_16,
+ },
+ {
+ full_pixel_motion_search_based_split_layer_0_kernel_16,
+ full_pixel_motion_search_based_split_logits_kernel_16,
+ },
+ {
+ full_pixel_motion_search_based_split_layer_0_bias_16,
+ full_pixel_motion_search_based_split_logits_bias_16,
+ },
+};
+
+static const float full_pixel_motion_search_based_split_thresh_16 = 2.0f;
+
+#undef NUM_HIDDEN_LAYERS_16
+#undef NUM_FEATURES_16
+#undef NUM_LAYER_0_UNITS_16
+#undef NUM_LOGITS_16
+
+#if !CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8
+// BLOCK_8X8
+#define NUM_HIDDEN_LAYERS_8 1
+#define NUM_FEATURES_8 6
+#define NUM_LAYER_0_UNITS_8 16
+#define NUM_LOGITS_8 1
+
+static const float full_pixel_motion_search_based_split_layer_0_kernel_8[] = {
+ 0.0370236f, -0.580211f, 2.0134f, 1.69637f, 2.43181f, -0.521648f,
+ -0.00375187f, 0.122712f, -4.74411f, 7.36187f, 5.42574f, -5.53557f,
+ 0.0993344f, -0.358843f, 0.0765453f, -0.615987f, -0.754633f, -0.175846f,
+ 0.714976f, 0.492862f, 0.346604f, -1.23922f, -2.67031f, 2.12749f,
+ 1.71511f, -1.4239f, 2.09396f, 2.42478f, 2.40151f, 2.90487f,
+ 0.540813f, -0.0954257f, -4.57571f, -4.88078f, -4.62386f, -5.75167f,
+ 1.35351f, -1.08114f, 1.43744f, 1.44333f, 0.608153f, 0.193742f,
+ -0.405512f, -0.155164f, 0.0771456f, -0.473182f, -0.057984f, 0.140435f,
+ 0.743021f, -0.418589f, -0.377622f, -0.531411f, -0.668025f, -0.826607f,
+ 1.37834f, -1.07753f, 0.870466f, 0.516756f, 0.708689f, 0.286795f,
+ -3.97895f, -0.338629f, 2.79427f, 1.80561f, 1.46275f, 1.50438f,
+ 0.0232533f, -0.43174f, -0.348251f, 0.0863006f, 0.0321103f, 0.129674f,
+ -1.12024f, -0.0990596f, -0.283472f, -0.238713f, -0.239175f, -0.40816f,
+ -0.00106566f, 0.0972736f, 5.19284f, -3.70862f, 6.39657f, -5.27588f,
+ -2.08003f, 0.38825f, 2.38771f, -1.27501f, -2.45619f, 3.07324f,
+ 0.616966f, -0.451472f, -0.319365f, 0.00807278f, -0.303261f, -0.351679f
+};
+
+static const float full_pixel_motion_search_based_split_logits_kernel_8[] = {
+ -0.625847f, 0.381323f, 0.342475f, 0.526161f, -0.665965f, -0.515317f,
+ -0.406218f, 0.568007f, 0.479397f, -0.426116f, 0.615638f, 0.338572f,
+ 0.185583f, 0.308031f, 0.260748f, 0.531619f
+};
+
+static const float full_pixel_motion_search_based_split_layer_0_bias_8[] = {
+ 4.73775f, -1.12658f, -0.258038f, -6.06696f, 1.79131f, 2.49609f,
+ 4.28388f, 0.0f, -4.63598f, 3.06034f, 5.31994f, -0.152142f,
+ 0.514738f, -1.30098f, 3.00296f, -3.83481f
+};
+
+static const float full_pixel_motion_search_based_split_logits_bias_8[] = {
+ -3.44508f
+};
+
+static const NN_CONFIG full_pixel_motion_search_based_split_nn_config_8 = {
+ NUM_FEATURES_8,
+ NUM_LOGITS_8,
+ NUM_HIDDEN_LAYERS_8,
+ {
+ NUM_LAYER_0_UNITS_8,
+ },
+ {
+ full_pixel_motion_search_based_split_layer_0_kernel_8,
+ full_pixel_motion_search_based_split_logits_kernel_8,
+ },
+ {
+ full_pixel_motion_search_based_split_layer_0_bias_8,
+ full_pixel_motion_search_based_split_logits_bias_8,
+ },
+};
+
+static const float full_pixel_motion_search_based_split_thresh_8 = 2.0f;
+
+#undef NUM_HIDDEN_LAYERS_8
+#undef NUM_FEATURES_8
+#undef NUM_LAYER_0_UNITS_8
+#undef NUM_LOGITS_8
+#endif
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 48abe7c..79aff37 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -235,6 +235,8 @@
// identify the appropriate tradeoff between encoder performance and its
// speed.
sf->prune_single_motion_modes_by_simple_trans = 1;
+
+ sf->full_pixel_motion_search_based_split = 1;
}
if (speed >= 2) {
@@ -480,8 +482,10 @@
sf->ml_prune_ab_partition = 0;
sf->ml_prune_4_partition = 0;
sf->fast_cdef_search = 0;
- for (i = 0; i < PARTITION_BLOCK_SIZES; ++i)
+ for (i = 0; i < PARTITION_BLOCK_SIZES; ++i) {
sf->ml_partition_search_breakout_thresh[i] = -1; // -1 means not enabled.
+ }
+ sf->full_pixel_motion_search_based_split = 0;
// Set this at the appropriate speed levels
sf->use_transform_domain_distortion = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 7041eab..070d199 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -576,6 +576,10 @@
// Prune intra mode candidates based on source block gradient stats.
int intra_angle_estimation;
+
+ // Performs full pixel motion search before none_partition to decide if we
+ // want to split directly without trying other partition types.
+ int full_pixel_motion_search_based_split;
} SPEED_FEATURES;
struct AV1_COMP;
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 37e3c1b..feb9b5e 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -137,6 +137,8 @@
"AV1 experiment flag.")
set_aom_config_var(CONFIG_SHARP_SETTINGS 0 NUMBER "AV1 experiment flag.")
set_aom_config_var(CONFIG_ONE_PASS_SVM 0 NUMBER "AV1 experiment flag.")
+set_aom_config_var(CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 1 NUMBER
+ "Disable full_pixel_motion_search_based_split on BLOCK_8X8")
#
# Variables in this section control optional features of the build system.