Add a feature to terminate early in firstpass partition search This commit adds an early termination speed feature to the first pass of two pass partition search. Currently it is only enabled on speed >= 1 when resolution < 720p. Performance: AVG_PSNR | OVR_PSNR | SSIM | AVG_SPDUP | SPDUP:PSNR | +0.009% | +0.013% | -0.002% | +2.070% | 222.2:1 | STATS_CHANGED Change-Id: Ic0f542b26d8c26a819a813da40a0044e15c401c9
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index cdff30a..98c1487 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c
@@ -75,6 +75,9 @@ int mi_col, BLOCK_SIZE bsize, int ref, MV ref_mv_full, int num_planes, int use_subpixel); +static void firstpass_simple_motion_search_features( + AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row, + int mi_col, BLOCK_SIZE bsize, float *features); // This is used as a reference when computing the source variance for the // purposes of activity masking. @@ -2421,6 +2424,74 @@ } } +#define NUM_FEATURES 20 +static void av1_firstpass_simple_motion_search_early_term( + AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row, + int mi_col, BLOCK_SIZE bsize, const RD_STATS *none_rdc, + int *do_square_split) { + const NN_CONFIG *nn_config = NULL; + float thresh = 0.0f; + const float *ml_mean = NULL, *ml_std = NULL; + if (bsize == BLOCK_128X128) { + nn_config = &av1_fp_simple_motion_search_term_none_nn_config_128; + ml_mean = av1_fp_simple_motion_search_term_none_mean_128; + ml_std = av1_fp_simple_motion_search_term_none_std_128; + thresh = av1_fp_simple_motion_search_term_none_thresh_128; + } else if (bsize == BLOCK_64X64) { + nn_config = &av1_fp_simple_motion_search_term_none_nn_config_64; + ml_mean = av1_fp_simple_motion_search_term_none_mean_64; + ml_std = av1_fp_simple_motion_search_term_none_std_64; + thresh = av1_fp_simple_motion_search_term_none_thresh_64; + } else if (bsize == BLOCK_32X32) { + nn_config = &av1_fp_simple_motion_search_term_none_nn_config_32; + ml_mean = av1_fp_simple_motion_search_term_none_mean_32; + ml_std = av1_fp_simple_motion_search_term_none_std_32; + thresh = av1_fp_simple_motion_search_term_none_thresh_32; + } else if (bsize == BLOCK_16X16) { + nn_config = &av1_fp_simple_motion_search_term_none_nn_config_16; + ml_mean = av1_fp_simple_motion_search_term_none_mean_16; + ml_std = av1_fp_simple_motion_search_term_none_std_16; + thresh = av1_fp_simple_motion_search_term_none_thresh_16; + } else if (bsize == BLOCK_8X8) { + nn_config = &av1_fp_simple_motion_search_term_none_nn_config_8; + ml_mean = av1_fp_simple_motion_search_term_none_mean_8; + ml_std = av1_fp_simple_motion_search_term_none_std_8; + thresh = av1_fp_simple_motion_search_term_none_thresh_8; + } else { + assert(0 && + "Unexpected bsize in firstpass_simple_motion_search_early_term"); + } + + if (nn_config && thresh <= -100.0f) { + float ml_features[NUM_FEATURES] = { 0.0f }; + + firstpass_simple_motion_search_features(cpi, x, pc_tree, mi_row, mi_col, + bsize, ml_features); + int f_idx = 17; + + ml_features[f_idx++] = logf(1.0f + (float)none_rdc->rate); + ml_features[f_idx++] = logf(1.0f + (float)none_rdc->dist); + ml_features[f_idx++] = logf(1.0f + (float)none_rdc->rdcost); + + for (f_idx = 0; f_idx < 20; f_idx++) { + ml_features[f_idx] = + (ml_features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx]; + } + + // Get probabilities + float score = 0.0f; + + av1_nn_predict(ml_features, nn_config, &score); + aom_clear_system_state(); + + // Determine if we should prune square partitions. + if (score < thresh) { + *do_square_split = 0; + } + } +} +#undef NUM_FEATURES + static void rd_pick_sqr_partition(AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, @@ -2592,6 +2663,16 @@ do_square_split = 0; } } + + if (cpi->sf.firstpass_simple_motion_search_early_term && + cm->show_frame && bsize >= BLOCK_8X8 && !frame_is_intra_only(cm) && + mi_row + mi_step < cm->mi_rows && mi_col + mi_step < cm->mi_cols && + this_rdc.rdcost < INT64_MAX && this_rdc.rdcost >= 0 && + this_rdc.rate < INT_MAX && this_rdc.rate >= 0 && do_square_split) { + av1_firstpass_simple_motion_search_early_term( + cpi, x, pc_tree, mi_row, mi_col, bsize, &this_rdc, + &do_square_split); + } } } @@ -3344,15 +3425,18 @@ best_ref = ref; } + const int new_mv_row = x->best_mv.as_mv.row / 8; + const int new_mv_col = x->best_mv.as_mv.col / 8; if (save_mv_code == 4) { - pc_tree->mv_ref_fulls[ref].row = x->best_mv.as_mv.row / 8; - pc_tree->mv_ref_fulls[ref].col = x->best_mv.as_mv.col / 8; + pc_tree->mv_ref_fulls[ref].row = new_mv_row; + pc_tree->mv_ref_fulls[ref].col = new_mv_col; } else if (save_mv_code >= 0 && save_mv_code < 4) { // Propagate the new motion vectors to a lower level - pc_tree->split[save_mv_code]->mv_ref_fulls[ref].row = - x->best_mv.as_mv.row / 8; - pc_tree->split[save_mv_code]->mv_ref_fulls[ref].col = - x->best_mv.as_mv.col / 8; + pc_tree->split[save_mv_code]->mv_ref_fulls[ref].row = new_mv_row; + pc_tree->split[save_mv_code]->mv_ref_fulls[ref].col = new_mv_col; + } else { + assert(save_mv_code == -1 && + "Unknown code in simple_motion_search_get_best_ref."); } } } @@ -3782,6 +3866,66 @@ } } +static void firstpass_simple_motion_search_features( + AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row, + int mi_col, BLOCK_SIZE bsize, float *features) { + assert(mi_size_wide[bsize] == mi_size_high[bsize]); + assert(cpi->ref_frame_flags & ref_frame_flag_list[LAST_FRAME] || + cpi->ref_frame_flags & ref_frame_flag_list[ALTREF_FRAME]); + + // Setting up motion search + const int ref_list[] = { LAST_FRAME, ALTREF_FRAME }; + const int num_refs = 2; + const int use_subpixel = 0; + + unsigned int int_features[10] = { 0 }; + + int f_idx = 0; + // Doing whole block first to update the mv + simple_motion_search_get_best_ref( + cpi, x, pc_tree, mi_row, mi_col, bsize, ref_list, num_refs, use_subpixel, + 4, &int_features[f_idx], &int_features[f_idx + 1]); + f_idx += 2; + + // Split subblocks + const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); + const int w_mi = mi_size_wide[bsize]; + const int h_mi = mi_size_high[bsize]; + for (int r_idx = 0; r_idx < 4; r_idx++) { + const int sub_mi_col = mi_col + (r_idx & 1) * w_mi / 2; + const int sub_mi_row = mi_row + (r_idx >> 1) * h_mi / 2; + + simple_motion_search_get_best_ref( + cpi, x, pc_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs, + use_subpixel, r_idx, &int_features[f_idx], &int_features[f_idx + 1]); + f_idx += 2; + } + + aom_clear_system_state(); + for (int idx = 0; idx < f_idx; idx++) { + features[idx] = logf(1.0f + (float)int_features[idx]); + } + + const MACROBLOCKD *xd = &x->e_mbd; + set_offsets(cpi, &xd->tile, x, mi_row, mi_col, bsize); + + // Q_INDEX + const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); + features[f_idx++] = logf(1.0f + (float)(dc_q * dc_q) / 256.0f); + + // Neighbor stuff + const int has_above = !!xd->above_mbmi; + const int has_left = !!xd->left_mbmi; + const BLOCK_SIZE above_bsize = has_above ? xd->above_mbmi->sb_type : bsize; + const BLOCK_SIZE left_bsize = has_left ? xd->left_mbmi->sb_type : bsize; + features[f_idx++] = (float)has_above; + features[f_idx++] = (float)mi_size_wide_log2[above_bsize]; + features[f_idx++] = (float)mi_size_high_log2[above_bsize]; + features[f_idx++] = (float)has_left; + features[f_idx++] = (float)mi_size_wide_log2[left_bsize]; + features[f_idx++] = (float)mi_size_high_log2[left_bsize]; +} + // TODO(jinging,jimbankoski,rbultje): properly skip partition types that are // unlikely to be selected depending on previous rate-distortion optimization // results, for encoding speed-up. @@ -5411,7 +5555,8 @@ pc_root->index = 0; if ((sf->simple_motion_search_prune_rect || - sf->simple_motion_search_early_term_none) && + sf->simple_motion_search_early_term_none || + sf->firstpass_simple_motion_search_early_term) && !frame_is_intra_only(cm)) { init_simple_motion_search_mvs(pc_root); }
diff --git a/av1/encoder/partition_model_weights.h b/av1/encoder/partition_model_weights.h index d571932..297bb68 100644 --- a/av1/encoder/partition_model_weights.h +++ b/av1/encoder/partition_model_weights.h
@@ -4383,6 +4383,470 @@ static const float *av1_simple_motion_search_term_none_model_8 = NULL; +static const float av1_fp_simple_motion_search_term_none_mean_128[20] = { + 13.252574f, 13.231112f, 11.389298f, 11.357560f, 11.452713f, + 11.422464f, 11.566155f, 11.533492f, 11.619192f, 11.588921f, + 5.494323f, 0.681358f, 4.152762f, 4.180036f, 0.827457f, + 3.988274f, 3.996767f, 10.000490f, 15.253966f, 20.198053f, +}; + +static const float av1_fp_simple_motion_search_term_none_std_128[20] = { + 1.903357f, 1.904164f, 2.023522f, 2.023138f, 1.973689f, 1.972377f, 1.910667f, + 1.914049f, 1.866226f, 1.868023f, 2.273331f, 0.465951f, 1.209549f, 1.164535f, + 0.377852f, 1.205961f, 1.188774f, 2.926349f, 1.562097f, 1.579077f, +}; + +static const float av1_fp_simple_motion_search_term_none_mean_64[20] = { + 11.702271f, 11.665538f, 9.880902f, 9.832423f, 9.916618f, + 9.870026f, 9.944735f, 9.895662f, 9.954350f, 9.906938f, + 4.547847f, 0.844597f, 3.280917f, 3.298704f, 0.908336f, + 3.235983f, 3.243364f, 10.002173f, 13.499030f, 18.515766f, +}; + +static const float av1_fp_simple_motion_search_term_none_std_64[20] = { + 1.947901f, 1.948236f, 2.005683f, 2.003258f, 1.942182f, 1.940073f, 1.944885f, + 1.945165f, 1.891692f, 1.892137f, 1.925008f, 0.362289f, 1.088381f, 1.069284f, + 0.288552f, 1.095237f, 1.084837f, 2.320003f, 1.517385f, 1.545330f, +}; + +static const float av1_fp_simple_motion_search_term_none_mean_32[20] = { + 10.216787f, 10.167575f, 8.405353f, 8.340786f, 8.436503f, + 8.373259f, 8.444113f, 8.379074f, 8.448215f, 8.384669f, + 4.107491f, 0.923902f, 2.702687f, 2.712742f, 0.953166f, + 2.703244f, 2.707070f, 9.549801f, 12.013671f, 17.059454f, +}; + +static const float av1_fp_simple_motion_search_term_none_std_32[20] = { + 1.886182f, 1.886638f, 1.884324f, 1.883410f, 1.851800f, 1.851652f, 1.847129f, + 1.848014f, 1.832187f, 1.832360f, 1.758185f, 0.265155f, 0.939592f, 0.932395f, + 0.211284f, 0.950024f, 0.945295f, 1.846744f, 1.453674f, 1.505994f, +}; + +static const float av1_fp_simple_motion_search_term_none_mean_16[20] = { + 9.131485f, 9.065489f, 7.254479f, 7.158092f, 7.274240f, 7.178158f, 7.278780f, + 7.182110f, 7.278793f, 7.182714f, 3.981902f, 0.964040f, 2.080875f, 2.087185f, + 0.973397f, 2.088189f, 2.090166f, 9.386505f, 10.826546f, 15.985614f, +}; + +static const float av1_fp_simple_motion_search_term_none_std_16[20] = { + 1.681172f, 1.688587f, 1.710854f, 1.717533f, 1.684010f, 1.691476f, 1.683537f, + 1.691523f, 1.674699f, 1.682130f, 1.639731f, 0.186191f, 0.796448f, 0.795075f, + 0.160921f, 0.791005f, 0.790048f, 1.430960f, 1.337976f, 1.370498f, +}; + +static const float av1_fp_simple_motion_search_term_none_mean_8[20] = { + 7.821461f, 7.714526f, 5.799360f, 5.606948f, 5.805885f, 5.614357f, 5.794252f, + 5.599669f, 5.798780f, 5.605399f, 4.069016f, 0.977720f, 1.577513f, 1.581266f, + 0.983371f, 1.524603f, 1.524952f, 9.221803f, 9.508886f, 14.972815f, +}; + +static const float av1_fp_simple_motion_search_term_none_std_8[20] = { + 1.618036f, 1.634415f, 1.652861f, 1.672006f, 1.646337f, 1.664935f, 1.650876f, + 1.670476f, 1.645141f, 1.664301f, 1.502258f, 0.147592f, 0.760353f, 0.762547f, + 0.127879f, 0.741096f, 0.742186f, 1.042003f, 1.292524f, 1.250398f, +}; + +static const NN_CONFIG av1_fp_simple_motion_search_term_none_nn_config_128 = {}; + +static const NN_CONFIG av1_fp_simple_motion_search_term_none_nn_config_64 = {}; + +#define NUM_HIDDEN_LAYERS_32 1 +#define NUM_FEATURES_32 20 +#define NUM_LAYER_0_UNITS_32 20 +#define NUM_LOGITS_32 1 + +static const float + av1_fp_simple_motion_search_term_none_hiddenlayer_0_kernel_32[] = { + -0.293987f, 0.796773f, -0.0888487f, -0.00796495f, -0.343768f, + 0.0783252f, 0.0596814f, -0.235432f, -0.0780005f, -0.409017f, + -0.256821f, -0.281654f, 1.00889f, 0.701893f, -0.0181661f, + 0.119718f, 0.0956582f, 0.76792f, 0.235693f, 0.351628f, + -1.28111f, -1.45847f, 0.387732f, 0.476054f, 0.384561f, + 0.427465f, 0.11875f, -0.0176598f, -0.0528453f, 0.395589f, + -0.331994f, 0.0442108f, 0.195171f, -0.0377402f, -0.0736457f, + -0.0490903f, 0.116165f, -0.549512f, 0.12968f, 0.641055f, + -1.03066f, -0.601979f, 0.351981f, -0.122019f, 0.00869275f, + 0.399222f, -0.343995f, -0.444257f, -0.160805f, -0.537537f, + 0.261478f, -0.163785f, 0.218916f, 0.106506f, -0.103819f, + 0.0121841f, 0.284757f, -0.362989f, 1.10793f, 0.477236f, + -0.424117f, -0.884156f, -0.468291f, -0.510531f, 0.791441f, + 0.75243f, 0.839871f, 0.604127f, -0.182956f, -0.246703f, + -1.25861f, 0.0546303f, 0.0811323f, 0.00655988f, 0.0286305f, + -0.00938366f, -0.0291418f, -0.231632f, -0.331077f, 1.12479f, + -0.635514f, -0.146066f, 0.853122f, 0.923699f, 0.180011f, + -0.252973f, 0.1474f, -0.454344f, 0.354736f, 0.576872f, + -1.43275f, 0.0327868f, 0.140849f, -0.102523f, 0.0524867f, + 0.007091f, -0.00232578f, -0.536116f, -0.700144f, 0.166646f, + 0.0636548f, 0.44645f, -0.346062f, -0.685779f, -1.0792f, + -0.999219f, 0.442744f, 0.371198f, 0.777914f, 0.719409f, + -0.417984f, 0.0602868f, 0.0225539f, 0.0457407f, 0.0249501f, + 0.0126021f, 0.00450792f, 0.0485095f, 0.203485f, 0.584116f, + -0.599426f, -0.244633f, 0.168231f, -0.00134934f, -0.106987f, + -0.0490239f, -0.22029f, 0.138017f, 0.373674f, 0.00638684f, + -2.08003f, 0.106453f, 0.124456f, -0.0286108f, 0.0422698f, + 0.013734f, 0.0780971f, -0.40173f, 0.473453f, 1.16836f, + -0.251035f, 0.0119074f, 0.319241f, 0.0422023f, -0.730454f, + -0.745948f, 0.796709f, 0.277634f, 0.09711f, -0.212224f, + 0.825348f, 0.0208521f, -0.0238098f, 0.00929265f, 0.0516351f, + -0.02329f, 0.0983163f, -0.180721f, 0.0122096f, -0.246159f, + 0.61468f, 0.923765f, 0.240435f, -0.294845f, -0.495317f, + -0.0563837f, -0.417936f, 0.154874f, -0.604407f, -0.0681337f, + -0.65738f, -0.0270073f, 0.0920023f, -0.0742724f, 0.820862f, + -0.602758f, -1.20617f, -0.201707f, 0.869499f, -0.0539076f, + 0.403097f, 0.429168f, -0.938227f, -0.830894f, -0.362462f, + -0.0658648f, 0.471469f, -0.264827f, 0.610275f, 0.367995f, + 0.735662f, -0.0473157f, -0.0380545f, -0.0848067f, -0.146108f, + -0.125875f, -0.0576117f, -0.296198f, -0.100443f, -0.212971f, + 0.593524f, 1.23111f, -0.810009f, -0.604572f, 0.203021f, + 0.256285f, -1.17049f, -1.19156f, 0.24365f, 0.727876f, + -0.466826f, 0.0298762f, -0.0331735f, -0.0109056f, 0.0114862f, + 0.00396703f, 0.0385985f, -0.0587946f, 0.821079f, 0.0582033f, + 0.349156f, 1.03529f, -0.407036f, 0.200308f, -0.265649f, + -0.104567f, 0.161149f, -0.0717528f, -0.0112724f, 0.0681578f, + 0.103809f, -0.0807997f, 0.0316814f, -0.332323f, 0.112254f, + -0.163981f, 0.118988f, -0.777055f, -1.34047f, -0.910482f, + 0.74599f, -0.59633f, 0.165649f, -0.594998f, 0.0845802f, + 0.00440975f, 0.122606f, -0.463991f, 0.418502f, -0.339126f, + 1.41847f, -0.109594f, -0.411879f, -0.444865f, -0.0404821f, + -0.0607352f, -0.663753f, -0.724327f, -0.138642f, 0.834144f, + -0.811695f, -0.930264f, 0.150993f, -0.325565f, 0.0615853f, + -0.473993f, 0.0966587f, 0.315197f, 1.0345f, 0.35441f, + 0.703234f, -0.335715f, 0.783153f, 0.467976f, -0.0234736f, + 0.549724f, 0.539107f, -0.510182f, -0.154442f, 0.0126656f, + 1.66711f, 0.884555f, 0.118675f, -0.341705f, 0.195316f, + -0.0366564f, -0.619244f, -0.634092f, -0.559951f, 0.0564255f, + 0.765917f, 0.0510238f, 0.0667615f, 0.0699302f, -0.0351751f, + -0.0484402f, -0.000792665f, -0.10775f, -0.337121f, -0.983947f, + 0.517793f, 1.34977f, -0.567602f, 0.129921f, -0.443722f, + -0.276277f, -0.501404f, -0.183234f, -0.553055f, -0.447434f, + -0.35529f, -0.0444689f, 0.0192031f, 0.0372702f, -0.195202f, + -0.020753f, -0.0247035f, 0.420298f, 1.39373f, 0.203699f, + -0.218818f, 0.250734f, -0.0282348f, 0.411986f, -0.262946f, + 0.526339f, 0.242769f, -0.159857f, -0.546788f, -0.0410147f, + 0.954238f, -0.0252765f, 0.639488f, -0.491367f, -0.0572638f, + 0.285763f, -0.45764f, 0.121657f, -1.24374f, -0.372479f, + -0.111521f, 0.194134f, -0.271364f, 0.179678f, 0.121237f, + -0.14305f, -0.205662f, 0.216891f, 0.344568f, -0.523745f, + -1.00908f, 0.180965f, 0.0263031f, -0.0556144f, 0.0831083f, + -0.0623274f, 0.112748f, 0.597137f, -0.502616f, -1.10624f, + -0.0487462f, -1.10744f, -0.125653f, 0.277049f, -0.141329f, + -0.00457003f, -0.161038f, 0.588462f, 0.323317f, 0.49762f, + 0.477561f, 0.901705f, -0.264511f, 0.256557f, 0.076023f, + -0.0460696f, 0.0830666f, -0.0651269f, -0.881245f, -0.285999f, + 0.53127f, 0.914533f, 0.0505795f, -0.3054f, -0.0988696f, + -0.0658403f, 0.15979f, -0.453316f, -0.824834f, -0.280222f, + -0.686952f, -0.0768344f, -1.12235f, -0.815408f, 0.0202134f, + -0.111892f, 0.0847659f, -0.18763f, 0.597782f, 0.364016f + }; + +static const float + av1_fp_simple_motion_search_term_none_hiddenlayer_0_bias_32[] = { + -1.541f, -0.00935641f, -1.50754f, -0.638648f, -0.679403f, + -0.0387804f, -0.714791f, -1.69522f, 0.435677f, -1.5846f, + 0.108788f, 0.614982f, 0.111048f, -0.465826f, -0.611358f, + 0.637197f, 0.929621f, -1.20889f, 0.954558f, 0.716529f + }; + +static const float av1_fp_simple_motion_search_term_none_logits_kernel_32[] = { + 0.396195f, -0.791364f, -0.881893f, 1.0542069f, 0.772562f, + 0.60815647f, 1.117405f, -1.272638f, 0.483183f, -0.917147f, + 0.690799f, -0.601466f, -0.545536f, -0.416353f, -0.927874f, + 0.972198f, -0.3770457f, 0.542694f, -0.591889f, 0.464565f +}; + +static const float av1_fp_simple_motion_search_term_none_logits_bias_32[] = { + -0.590318f +}; + +static const NN_CONFIG av1_fp_simple_motion_search_term_none_nn_config_32 = { + NUM_FEATURES_32, + NUM_LOGITS_32, + NUM_HIDDEN_LAYERS_32, + { + NUM_LAYER_0_UNITS_32, + }, + { + av1_fp_simple_motion_search_term_none_hiddenlayer_0_kernel_32, + av1_fp_simple_motion_search_term_none_logits_kernel_32, + }, + { + av1_fp_simple_motion_search_term_none_hiddenlayer_0_bias_32, + av1_fp_simple_motion_search_term_none_logits_bias_32, + }, +}; + +#undef NUM_HIDDEN_LAYERS_32 +#undef NUM_FEATURES_32 +#undef NUM_LAYER_0_UNITS_32 +#undef NUM_LOGITS_32 + +#define NUM_HIDDEN_LAYERS_16 1 +#define NUM_FEATURES_16 20 +#define NUM_LAYER_0_UNITS_16 24 +#define NUM_LOGITS_16 1 + +static const float + av1_fp_simple_motion_search_term_none_hiddenlayer_0_kernel_16[] = { + -0.315922f, 0.74455f, -0.0196939f, 0.238336f, 0.288554f, + 0.0845902f, -0.0121831f, 0.455303f, 0.0235902f, 0.218997f, + -0.0445164f, 0.0752211f, 0.0539915f, -0.0439682f, -0.397139f, + -0.0030004f, -0.106365f, 0.845384f, 0.684638f, -0.965702f, + 0.307643f, -0.0433377f, -0.0644826f, -0.214946f, -0.44467f, + 0.142967f, 0.0109982f, -0.344458f, -0.42947f, 0.269175f, + -0.88534f, -0.28077f, -1.36018f, -0.33725f, -0.0885953f, + -0.123887f, 0.218107f, -0.0759977f, 0.739124f, 0.684048f, + 0.577964f, -0.328481f, -0.247837f, 0.00546713f, 0.191895f, + -0.145274f, 0.320121f, -0.482379f, 0.534585f, -0.1582f, + 0.944784f, 0.944665f, 0.0494451f, -0.0399724f, -0.170375f, + -0.0869746f, 0.106216f, -0.120556f, -1.57849f, -0.752895f, + 0.424454f, -0.0269515f, 0.00398589f, 0.214165f, -0.142986f, + 0.199223f, 0.049624f, -0.116783f, -0.648119f, -0.311599f, + 0.122629f, -0.0338422f, 0.345092f, -0.408254f, 0.601037f, + -0.00146985f, 0.00133926f, 0.0392668f, -0.931156f, 0.31429f, + -0.150243f, 0.0755763f, -0.32177f, 0.258521f, -0.104078f, + -0.144506f, 0.0199566f, -0.454723f, -0.292959f, -0.0953681f, + -1.24843f, 0.446814f, -0.311363f, 0.0590878f, -0.0568717f, + -0.421585f, 0.179852f, 0.668763f, 0.48914f, 0.290584f, + -1.14053f, -1.37576f, 0.420112f, -0.158582f, 0.268231f, + 0.252999f, 0.276423f, 0.529033f, 0.141127f, 0.702762f, + 0.181407f, -0.0279289f, -0.0194757f, 0.0752152f, -0.136963f, + 0.00902489f, 0.125334f, 0.0680212f, -0.370449f, 0.438003f, + -0.600869f, 0.154209f, -0.36306f, -0.484209f, 0.140093f, + 0.0743079f, -0.143317f, 0.0442872f, 0.272089f, 0.601531f, + 1.20687f, -0.280695f, 0.222235f, -0.0106747f, -0.017026f, + 0.204008f, -0.0316111f, -0.64679f, -0.866749f, -0.774231f, + 0.306231f, -0.0940114f, -0.56555f, -0.34399f, 0.425142f, + 0.424064f, -0.50189f, -0.146558f, 0.544899f, 0.141728f, + 1.14592f, -0.0124826f, 0.111613f, -0.0862228f, 0.0211737f, + 0.0614017f, 0.0245077f, -0.454523f, -0.0766391f, -0.436808f, + 0.251409f, -0.13354f, -0.242447f, -0.311807f, -0.844505f, + -0.671486f, 0.0946297f, 0.241702f, 0.856521f, 0.529763f, + -0.869772f, -0.0016341f, 0.14511f, 0.0136254f, -0.0359721f, + -0.0454713f, 0.00664495f, 0.0373555f, 0.653991f, -0.075867f, + -0.102728f, -0.947685f, -0.119479f, -0.145413f, 0.148364f, + 0.310885f, -0.266837f, 0.354087f, 0.299469f, 0.603911f, + 0.257161f, 0.0190527f, 0.152862f, -0.0987196f, -0.293369f, + 0.139026f, -0.128421f, 0.0505933f, -0.703803f, 1.08628f, + -0.562294f, -0.818943f, 0.102178f, 0.727399f, -0.228433f, + 0.484057f, 0.0595919f, -0.0559087f, -0.549447f, 0.176168f, + 1.41744f, -0.126284f, 0.0987251f, -0.00123073f, 0.00510827f, + 0.105209f, 0.0671775f, -0.438525f, 0.211028f, -0.782459f, + 0.286411f, -0.459887f, 0.0633669f, 0.329958f, -0.0736945f, + 0.45188f, -0.2447f, 0.676601f, 0.600321f, -0.0336198f, + 0.108531f, 0.0452834f, -0.0848577f, 0.0731281f, 1.32381f, + -0.118349f, 0.129497f, -0.840938f, -1.45444f, -0.559047f, + -0.248109f, -0.491559f, -0.139812f, 0.175964f, 0.168687f, + 0.123031f, 0.201625f, 0.422849f, 0.34436f, 0.0426694f, + 0.558045f, -0.246772f, 0.679483f, -0.0959578f, -0.102879f, + 0.391029f, 0.280906f, 0.0867408f, -1.10932f, 0.402526f, + -0.227285f, 0.336087f, -0.237765f, 0.185619f, -0.309732f, + 0.0781132f, -0.0234955f, 0.0828806f, 0.19966f, -0.241288f, + -0.224634f, 0.0638918f, -0.143521f, -0.0206692f, -0.27131f, + 0.973051f, 1.12031f, 0.262846f, 0.471585f, 0.105231f, + -0.386434f, -0.355846f, 0.7359f, 0.567308f, 0.130768f, + 0.242369f, -0.0272523f, -0.118436f, 0.374145f, 0.24802f, + -1.00186f, -0.0241195f, 0.0140446f, 0.0202831f, 0.163197f, + 0.0399298f, -0.00912791f, -0.280572f, -0.309893f, -0.644495f, + 0.243838f, 0.731391f, 0.0725078f, 0.350308f, -0.136691f, + 0.208814f, 0.0218567f, -0.0805393f, -0.18681f, -0.214638f, + 0.273354f, -0.355047f, 0.242748f, 0.472951f, -0.202705f, + 0.405247f, 0.161622f, -0.284883f, -1.31181f, -0.661056f, + -0.248219f, -0.827307f, 0.289221f, 0.660529f, 0.48563f, + 0.407366f, 0.0327303f, -0.0610309f, -0.647064f, 0.0899991f, + 0.376267f, 1.27555f, 0.0264175f, 0.153931f, 1.07345f, + 0.0715052f, 0.174473f, 0.01322f, -0.715723f, 0.113909f, + 0.100968f, -0.457287f, -0.672022f, -0.20532f, 0.895176f, + 0.357034f, 0.5413f, 0.918393f, -0.455f, -0.499617f, + -1.21799f, 0.0634338f, 0.144944f, -0.106715f, 0.0227713f, + -0.0203213f, 0.030851f, -0.0726756f, 0.589192f, -0.060841f, + -0.198521f, 0.497179f, -0.0591156f, -0.135466f, -0.132638f, + -0.181333f, -0.332358f, 0.0349959f, 0.212885f, -0.536206f, + -0.425009f, -0.035525f, 0.0384449f, 0.0360549f, -0.0383953f, + -0.0263281f, -0.0228435f, 1.11771f, 0.928061f, -0.163923f, + -0.327868f, -0.894518f, 0.00448907f, 0.0805977f, 0.329559f, + 0.157429f, 0.292729f, 0.497688f, 0.188659f, 0.203724f, + -1.26001f, -0.0392533f, -0.0566088f, 0.000859925f, 0.125254f, + 0.054261f, 0.0357295f, -0.393813f, -0.275944f, 0.299657f, + -0.211421f, 0.038172f, -0.439829f, -0.913949f, 0.35642f, + 0.865473f, -0.472033f, -0.752376f, 0.995255f, 0.417965f, + -0.680645f, 0.0622027f, 0.128878f, -0.0357859f, 0.0793577f, + 0.203629f, -0.0600867f, 0.0512268f, 0.528584f, 0.23889f, + 0.38255f, -0.216407f, -0.0338828f, 0.0328103f, -0.885678f, + -0.716634f, 0.438663f, 0.320841f, -0.119656f, 0.626092f, + 0.8526f, -0.0325005f, -0.0275416f, -0.171131f, 0.0260563f, + -0.0162027f, 0.0879367f, -0.340473f, 0.0220265f, -0.1731f, + 0.512539f, 0.587822f, -0.175619f, 0.177215f, -0.35458f, + -0.159059f, -0.423754f, 0.0198413f, -0.336208f, -0.359052f, + -1.50819f, 0.0628184f, 0.054506f, 0.0048834f, 0.361657f, + 0.00986886f, -0.0721521f, -0.256765f, 1.41173f, 0.376196f, + -0.0783331f, 0.174803f, -0.00240091f, -0.306571f, -0.304654f, + -0.0348377f, 0.115569f, -0.20359f, -0.162341f, -0.0443526f, + -0.848317f, -0.228167f, 0.699534f, 0.482092f, -0.0921484f, + -0.172425f, -0.0610094f, -0.188327f, 0.836209f, 0.541725f + }; + +static const float + av1_fp_simple_motion_search_term_none_hiddenlayer_0_bias_16[] = { + -0.388147f, -0.0868767f, 0.702129f, 0.376659f, -0.709988f, 0.496603f, + -0.238442f, -1.35761f, -0.391887f, 0.235468f, -0.327982f, 0.731842f, + 1.0949f, -0.789218f, -0.881452f, 0.514341f, 0.727894f, -0.494498f, + -1.32304f, -1.22643f, -0.294287f, -1.3974f, -0.128148f, -0.0956137f + }; + +static const float av1_fp_simple_motion_search_term_none_logits_kernel_16[] = { + 0.456147f, 0.248707f, -0.5205241f, -0.1506567f, 0.388359f, -0.6074409f, + -0.4719775f, -0.733864f, 0.5588447f, -0.4021345f, -1.140733f, -0.73399f, + -0.4299591f, 0.450688f, 0.817564f, -0.265486f, -0.3525806f, 0.55188314f, + 1.365457f, 1.180764f, 0.587772f, -0.870683f, 0.818839f, 0.318488f +}; + +static const float av1_fp_simple_motion_search_term_none_logits_bias_16[] = { + -0.1046478f +}; + +static const NN_CONFIG av1_fp_simple_motion_search_term_none_nn_config_16 = { + NUM_FEATURES_16, + NUM_LOGITS_16, + NUM_HIDDEN_LAYERS_16, + { + NUM_LAYER_0_UNITS_16, + }, + { + av1_fp_simple_motion_search_term_none_hiddenlayer_0_kernel_16, + av1_fp_simple_motion_search_term_none_logits_kernel_16, + }, + { + av1_fp_simple_motion_search_term_none_hiddenlayer_0_bias_16, + av1_fp_simple_motion_search_term_none_logits_bias_16, + }, +}; + +#undef NUM_HIDDEN_LAYERS_16 +#undef NUM_FEATURES_16 +#undef NUM_LAYER_0_UNITS_16 +#undef NUM_LOGITS_16 + +#define NUM_HIDDEN_LAYERS_8 1 +#define NUM_FEATURES_8 20 +#define NUM_LAYER_0_UNITS_8 16 +#define NUM_LOGITS_8 1 + +static const float + av1_fp_simple_motion_search_term_none_hiddenlayer_0_kernel_8[] = { + -1.11024f, -0.530449f, -0.164768f, 0.675431f, 0.456155f, + 0.711099f, -0.248095f, 0.112132f, -0.131481f, 0.234457f, + 0.128073f, 0.306214f, 0.175471f, 0.220189f, -0.270533f, + 0.293534f, -0.0795547f, 0.234901f, -0.191754f, 0.101171f, + -0.108621f, 0.395477f, -0.529459f, -0.354854f, -0.941334f, + -0.237689f, 0.39357f, 0.527129f, 0.174333f, -0.00520422f, + 1.22219f, -0.21815f, 0.0866816f, -0.29591f, -0.212968f, + 0.00431436f, -0.295382f, -0.582317f, -0.284654f, 0.486427f, + -0.202448f, -0.0421883f, -0.116346f, -0.345832f, -0.0471637f, + -0.149954f, -0.0969526f, -0.59491f, 0.594364f, 0.298285f, + -1.33301f, 0.149562f, 0.097433f, 0.157641f, -0.231132f, + -0.0191656f, 0.149396f, 0.811553f, 1.07336f, 0.140674f, + 1.02134f, 0.455909f, -0.0548795f, 0.0459996f, -0.0589837f, + -0.116328f, -0.607502f, -0.232595f, -0.517977f, -0.325901f, + 1.35047f, -0.148698f, 0.0313182f, 0.181634f, 0.06539f, + 0.00820322f, 0.0522113f, -1.06071f, -0.817999f, -0.527422f, + -1.39175f, -0.110088f, 0.0858626f, -0.247541f, 0.29043f, + 1.13767f, 0.185834f, 0.390613f, -0.501175f, -0.214176f, + -0.256376f, 0.496687f, 0.240471f, 0.218852f, 0.513543f, + 0.400559f, -0.249168f, -0.752987f, 0.430491f, -0.72299f, + 0.339754f, 0.396623f, -0.0638322f, 0.353122f, 0.355662f, + -0.0704821f, 0.195448f, 0.179396f, 0.486533f, 0.0815535f, + -0.503726f, -0.000321223f, 0.501591f, -0.117849f, 0.217667f, + -0.123391f, -0.4026f, 0.149756f, -0.0359276f, -0.0990213f, + -0.215278f, -0.293649f, 0.301629f, -0.11081f, -0.206725f, + -0.00147108f, 0.363644f, -0.430092f, 0.169524f, 0.116091f, + -0.583605f, -0.0974948f, 0.253256f, 0.22648f, 0.136902f, + -0.882541f, -0.75078f, -0.0629343f, 0.411035f, 0.265742f, + -0.360904f, -0.899324f, 0.605871f, 0.0318372f, 0.0735312f, + -0.00960722f, 0.691249f, 0.127449f, -0.133021f, -0.0793589f, + 0.665591f, -0.0682262f, -0.0437626f, 0.0783621f, 2.25727f, + 0.126529f, -0.0320763f, -0.261759f, -1.19987f, 0.216295f, + -0.253886f, -0.642908f, 0.1865f, 0.00299179f, 0.0246782f, + -0.00750628f, 0.566367f, 0.99916f, -0.0209625f, 0.273254f, + 1.09724f, 0.30026f, 0.21585f, -0.0276715f, 0.338996f, + 0.129884f, -0.00628438f, 0.0461783f, -1.36378f, -0.394756f, + -0.395261f, 0.215928f, 0.252803f, -0.207108f, -0.0506214f, + -0.0138889f, 0.124197f, -0.0522996f, 0.533803f, -0.25729f, + -0.463514f, 0.128322f, -1.04751f, -0.605498f, -0.107235f, + -0.00813289f, 0.539742f, -0.0524178f, 0.272101f, 0.151935f, + 0.607511f, -0.0608427f, 0.36342f, 0.0999134f, 0.69712f, + -0.152471f, 0.364244f, 0.410644f, 0.312606f, 0.405679f, + -0.371656f, -0.0492209f, -0.148911f, 0.214996f, -0.274749f, + -0.0372888f, 0.079023f, -0.429136f, -1.30393f, -0.833824f, + -1.31373f, -0.445343f, 0.526917f, 1.30569f, -0.0626746f, + 0.282353f, -0.28552f, 0.28084f, -0.234934f, 0.227076f, + 1.09919f, 0.33248f, -0.114933f, 0.40629f, 0.331031f, + 0.245334f, -0.0318782f, 0.00735305f, -1.58715f, 0.126443f, + -0.09472f, -0.182152f, 0.311673f, -0.186136f, 0.817743f, + 0.928961f, 0.117334f, -0.373644f, -0.0797864f, 0.205565f, + 0.0789797f, 0.0757131f, -0.152409f, 0.30301f, -0.0170824f, + -0.194496f, 0.485547f, 0.370124f, -0.802044f, -0.789671f, + 0.669258f, 0.55082f, -0.438853f, 0.0597597f, -0.0148101f, + -0.41603f, 0.0486339f, -0.464523f, -0.413725f, 0.00907629f, + 0.70351f, -0.136422f, -0.145957f, -0.0626726f, -0.115773f, + -0.333937f, 0.135474f, -0.379598f, -0.134422f, 0.227595f, + 0.908927f, 0.759504f, -0.0088258f, -0.349333f, 0.122667f, + -0.682175f, 0.2201f, -0.332003f, -0.44433f, -0.620308f, + -1.36716f, -0.0167907f, -0.538969f, 0.256824f, -0.0706724f, + -0.0392471f, -0.156312f, 0.153699f, 1.41967f, 0.0434739f, + 0.428178f, -0.0714879f, 0.0912104f, 0.00687985f, 0.341789f, + 0.217381f, 0.128288f, 0.0286751f, 0.527344f, -0.428139f, + 0.60908f, 1.02074f, -0.0977894f, 0.158067f, 0.28958f, + -0.065152f, 0.120616f, -0.882976f, -1.10413f, -1.37497f + }; + +static const float + av1_fp_simple_motion_search_term_none_hiddenlayer_0_bias_8[] = { + 1.37086f, -1.61858f, -1.32395f, 0.276031f, -0.124696f, -1.71489f, + -1.68429f, 1.79103f, -0.335306f, -1.81523f, 0.841083f, -0.542628f, + -1.82168f, 0.459829f, 0.0949306f, 0.918486f + }; + +static const float av1_fp_simple_motion_search_term_none_logits_kernel_8[] = { + -0.283418f, -0.444453f, 0.4977782f, -0.4138758f, 0.41890771f, 0.22149438f, + 0.545079f, -0.729164f, 0.619389f, 0.5169534f, -0.4236282f, 0.7304213f, + 0.531938f, -0.14828f, 0.75119f, -0.464074f +}; + +static const float av1_fp_simple_motion_search_term_none_logits_bias_8[] = { + -2.22338f +}; + +static const NN_CONFIG av1_fp_simple_motion_search_term_none_nn_config_8 = { + NUM_FEATURES_8, + NUM_LOGITS_8, + NUM_HIDDEN_LAYERS_8, + { + NUM_LAYER_0_UNITS_8, + }, + { + av1_fp_simple_motion_search_term_none_hiddenlayer_0_kernel_8, + av1_fp_simple_motion_search_term_none_logits_kernel_8, + }, + { + av1_fp_simple_motion_search_term_none_hiddenlayer_0_bias_8, + av1_fp_simple_motion_search_term_none_logits_bias_8, + }, +}; + +#undef NUM_HIDDEN_LAYERS_8 +#undef NUM_FEATURES_8 +#undef NUM_LAYER_0_UNITS_8 +#undef NUM_LOGITS_8 + +static const float av1_fp_simple_motion_search_term_none_thresh_128 = -101.0f; +static const float av1_fp_simple_motion_search_term_none_thresh_64 = -101.0f; +static const float av1_fp_simple_motion_search_term_none_thresh_32 = + -2.2884985045792563f; +static const float av1_fp_simple_motion_search_term_none_thresh_16 = + -1.6656874577527165f; +static const float av1_fp_simple_motion_search_term_none_thresh_8 = + -3.608804354309157f; + #ifdef __cplusplus } // extern "C" #endif
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c index ac7225b..9e4c9ff 100644 --- a/av1/encoder/speed_features.c +++ b/av1/encoder/speed_features.c
@@ -127,6 +127,8 @@ sf->ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32 sf->ml_partition_search_breakout_thresh[3] = 300; // BLOCK_64X64 sf->ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128 + + sf->firstpass_simple_motion_search_early_term = 1; } } @@ -694,6 +696,7 @@ sf->txb_split_cap = 1; sf->adaptive_txb_search_level = 0; sf->two_pass_partition_search = 0; + sf->firstpass_simple_motion_search_early_term = 0; sf->use_intra_txb_hash = 0; // TODO(any) : clean use_inter_txb_hash code sf->use_inter_txb_hash = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h index fc6f15f..7663be6 100644 --- a/av1/encoder/speed_features.h +++ b/av1/encoder/speed_features.h
@@ -384,6 +384,10 @@ // in the initial partition search to prune mode candidates, e.g. ref frames. int two_pass_partition_search; + // Terminate early in firstpass of two_pass partition search for faster + // firstpass. + int firstpass_simple_motion_search_early_term; + // Skip rectangular partition test when partition type none gives better // rd than partition type split. Can take values 0 - 2, 0 referring to no // skipping, and 1 - 2 increasing aggressiveness of skipping in order.