| /* |
| * Copyright (c) 2019, Alliance for Open Media. All rights reserved |
| * |
| * This source code is subject to the terms of the BSD 2 Clause License and |
| * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| * was not distributed with this source code in the LICENSE file, you can |
| * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| * Media Patent License 1.0 was not distributed with this source code in the |
| * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| */ |
| |
| #include <float.h> |
| |
| #include "aom_ports/system_state.h" |
| |
| #include "av1/common/enums.h" |
| #include "av1/common/reconinter.h" |
| |
| #include "av1/encoder/encoder.h" |
| #include "av1/encoder/partition_model_weights.h" |
| #include "av1/encoder/partition_strategy.h" |
| #include "av1/encoder/rdopt.h" |
| |
| static void simple_motion_search_prune_part_features( |
| AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row, |
| int mi_col, BLOCK_SIZE bsize, float *features, int features_to_get); |
| |
| // Performs a simple_motion_search with a single reference frame and extract |
| // the variance of residues. Here features is assumed to be a length 6 array. |
| // After this function is called, we will store the following in to features: |
| // features[0] = log(1 + dc_q**2/256) |
| // features[1] = log(1 + variance_of_residue) |
| // for i in [2, 3, 4, 5]: |
| // features[i] = log(1 + variance_of_residue_in_block[i]/variance_of_residue) |
| static void get_res_var_features(AV1_COMP *const cpi, MACROBLOCK *x, int mi_row, |
| int mi_col, BLOCK_SIZE bsize, |
| float *features) { |
| // TODO(chiyotsai@google.com): The data this model trained on did not also use |
| // SIMPLE_TRANSLATION to build the inter_predictor. Retraining and tuning the |
| // model with the correct data should give better performance. |
| assert(mi_size_wide[bsize] == mi_size_high[bsize]); |
| |
| MACROBLOCKD *xd = &x->e_mbd; |
| |
| // Perform a single motion search in Y_PLANE to make a prediction |
| const int use_subpixel = 0; |
| |
| // Start getting the features |
| int f_idx = 0; |
| |
| // Q_INDEX |
| const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); |
| aom_clear_system_state(); |
| features[f_idx++] = logf(1.0f + (float)(dc_q * dc_q) / 256.0f); |
| |
| // VARIANCE |
| unsigned int sse = 0; |
| unsigned int var = 0; |
| const MV ref_mv_full = { .row = 0, .col = 0 }; |
| av1_simple_motion_sse_var(cpi, x, mi_row, mi_col, bsize, ref_mv_full, |
| use_subpixel, &sse, &var); |
| aom_clear_system_state(); |
| features[f_idx++] = logf(1.0f + (float)var); |
| |
| // Regional |
| const uint8_t *src = x->plane[0].src.buf; |
| const int src_stride = x->plane[0].src.stride; |
| const uint8_t *dst = xd->plane[0].dst.buf; |
| const int dst_stride = xd->plane[0].dst.stride; |
| const int bw = block_size_wide[bsize]; |
| const int bh = block_size_high[bsize]; |
| const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
| int r_idx = 0; |
| for (r_idx = 0; r_idx < 4; r_idx++) { |
| const int x_idx = (r_idx & 1) * bw / 2; |
| const int y_idx = (r_idx >> 1) * bh / 2; |
| const int src_offset = y_idx * src_stride + x_idx; |
| const int dst_offset = y_idx * dst_stride + x_idx; |
| const unsigned int sub_var = cpi->fn_ptr[subsize].vf( |
| src + src_offset, src_stride, dst + dst_offset, dst_stride, &sse); |
| aom_clear_system_state(); |
| const float var_ratio = (1.0f + (float)sub_var) / (4.0f + (float)var); |
| features[f_idx++] = var_ratio; |
| } |
| } |
| |
| static void simple_motion_search_based_split_fast( |
| AV1_COMP *const cpi, MACROBLOCK *x, int mi_row, int mi_col, |
| BLOCK_SIZE bsize, int *partition_none_allowed, int *partition_horz_allowed, |
| int *partition_vert_allowed, int *do_rectangular_split, |
| int *do_square_split) { |
| const NN_CONFIG *nn_config = NULL; |
| float split_only_thresh = 1.0f; |
| if (bsize == BLOCK_128X128) { |
| nn_config = &av1_simple_motion_search_based_split_nn_config_128; |
| split_only_thresh = av1_simple_motion_search_based_split_thresh_128; |
| } else if (bsize == BLOCK_64X64) { |
| nn_config = &av1_simple_motion_search_based_split_nn_config_64; |
| split_only_thresh = av1_simple_motion_search_based_split_thresh_64; |
| } else if (bsize == BLOCK_32X32) { |
| nn_config = &av1_simple_motion_search_based_split_nn_config_32; |
| split_only_thresh = av1_simple_motion_search_based_split_thresh_32; |
| } else if (bsize == BLOCK_16X16) { |
| nn_config = &av1_simple_motion_search_based_split_nn_config_16; |
| split_only_thresh = av1_simple_motion_search_based_split_thresh_16; |
| } else if (bsize == BLOCK_8X8) { |
| return; |
| } else { |
| assert(0 && "Unexpected block size in simple_motion_based_split"); |
| return; |
| } |
| |
| float features[FEATURE_SIZE_SMS_SPLIT_FAST] = { 0.0f }; |
| float score = 0.0f; |
| get_res_var_features(cpi, x, mi_row, mi_col, bsize, features); |
| av1_nn_predict(features, nn_config, &score); |
| |
| if (score > split_only_thresh) { |
| *partition_none_allowed = 0; |
| *partition_horz_allowed = 0; |
| *partition_vert_allowed = 0; |
| *do_rectangular_split = 0; |
| } |
| if (cpi->sf.simple_motion_search_split_only >= 2) { |
| if (score < -split_only_thresh) *do_square_split = 0; |
| // For larger scores (>split_only_thresh), none and rectangular partitions |
| // are skipped. As score reduces, possibility of split decreases. Hence |
| // for near larger scores (.875 * split_only_thresh to split_only_thresh) |
| // none partition is disabled, but rectangular partitions are evaluated |
| // additionally. |
| if (score > (split_only_thresh * 0.875)) *partition_none_allowed = 0; |
| } |
| } |
| |
| void av1_simple_motion_search_based_split( |
| AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row, |
| int mi_col, BLOCK_SIZE bsize, int *partition_none_allowed, |
| int *partition_horz_allowed, int *partition_vert_allowed, |
| int *do_rectangular_split, int *do_square_split) { |
| if (cpi->sf.simple_motion_search_split_speed >= 2) { |
| simple_motion_search_based_split_fast( |
| cpi, x, mi_row, mi_col, bsize, partition_none_allowed, |
| partition_horz_allowed, partition_vert_allowed, do_rectangular_split, |
| do_square_split); |
| |
| return; |
| } |
| |
| aom_clear_system_state(); |
| const NN_CONFIG *nn_config = NULL; |
| const float *ml_mean = NULL, *ml_std = NULL; |
| float split_only_thresh = 10.0f, no_split_thresh = -10.0f; |
| if (bsize == BLOCK_128X128) { |
| ml_mean = av1_simple_motion_search_split_mean_128; |
| ml_std = av1_simple_motion_search_split_std_128; |
| nn_config = &av1_simple_motion_search_split_nn_config_128; |
| split_only_thresh = av1_simple_motion_search_split_thresh_128; |
| no_split_thresh = av1_simple_motion_search_no_split_thresh_128; |
| } else if (bsize == BLOCK_64X64) { |
| ml_mean = av1_simple_motion_search_split_mean_64; |
| ml_std = av1_simple_motion_search_split_std_64; |
| nn_config = &av1_simple_motion_search_split_nn_config_64; |
| split_only_thresh = av1_simple_motion_search_split_thresh_64; |
| no_split_thresh = av1_simple_motion_search_no_split_thresh_64; |
| } else if (bsize == BLOCK_32X32) { |
| ml_mean = av1_simple_motion_search_split_mean_32; |
| ml_std = av1_simple_motion_search_split_std_32; |
| nn_config = &av1_simple_motion_search_split_nn_config_32; |
| split_only_thresh = av1_simple_motion_search_split_thresh_32; |
| no_split_thresh = av1_simple_motion_search_no_split_thresh_32; |
| } else if (bsize == BLOCK_16X16) { |
| ml_mean = av1_simple_motion_search_split_mean_16; |
| ml_std = av1_simple_motion_search_split_std_16; |
| nn_config = &av1_simple_motion_search_split_nn_config_16; |
| split_only_thresh = av1_simple_motion_search_split_thresh_16; |
| no_split_thresh = av1_simple_motion_search_no_split_thresh_16; |
| } else if (bsize == BLOCK_8X8) { |
| ml_mean = av1_simple_motion_search_split_mean_8; |
| ml_std = av1_simple_motion_search_split_std_8; |
| nn_config = &av1_simple_motion_search_split_nn_config_8; |
| split_only_thresh = av1_simple_motion_search_split_thresh_8; |
| no_split_thresh = av1_simple_motion_search_no_split_thresh_8; |
| } else { |
| assert(0 && "Unexpected block size in simple_motion_based_split"); |
| return; |
| } |
| |
| float features[FEATURE_SIZE_SMS_SPLIT] = { 0.0f }; |
| simple_motion_search_prune_part_features(cpi, x, pc_tree, mi_row, mi_col, |
| bsize, features, |
| FEATURE_SMS_SPLIT_MODEL_FLAG); |
| for (int idx = 0; idx < FEATURE_SIZE_SMS_SPLIT; idx++) { |
| features[idx] = (features[idx] - ml_mean[idx]) / ml_std[idx]; |
| } |
| |
| float score = 0.0f; |
| |
| av1_nn_predict(features, nn_config, &score); |
| aom_clear_system_state(); |
| |
| if (score > split_only_thresh) { |
| *partition_none_allowed = 0; |
| *partition_horz_allowed = 0; |
| *partition_vert_allowed = 0; |
| *do_rectangular_split = 0; |
| } |
| |
| if (cpi->sf.simple_motion_search_split_only >= 2 && score < no_split_thresh) { |
| *do_square_split = 0; |
| } |
| } |
| |
| // Given a list of ref frames in refs, performs simple_motion_search on each of |
| // the refs and returns the ref with the smallest sse. Returns -1 if none of the |
| // ref in the list is available. Also stores the best sse and var in best_sse, |
| // best_var, respectively. If save_mv_code is -1, don't update mv_ref_fulls in |
| // pc_tree. If save_mv_code is between 0 and 3, update mv_ref_fulls under |
| // pc_tree->split[i]. If save_mv_code is 4, update mv_ref_fulls under pc_tree. |
| static int simple_motion_search_get_best_ref( |
| AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row, |
| int mi_col, BLOCK_SIZE bsize, const int *const refs, int num_refs, |
| int use_subpixel, int save_mv_code, unsigned int *best_sse, |
| unsigned int *best_var) { |
| // TODO(chiyotsai@google.com): The calculation of variance currently uses |
| // bsize, so we might take area outside of the image into account. We need to |
| // modify the SIMD functions to fix this later. |
| const AV1_COMMON *const cm = &cpi->common; |
| int best_ref = -1; |
| |
| if (mi_col >= cm->mi_cols || mi_row >= cm->mi_rows) { |
| // If the whole block is outside of the image, set the var and sse to 0. |
| *best_var = 0; |
| *best_sse = 0; |
| |
| return best_ref; |
| } |
| |
| // Otherwise do loop through the reference frames and find the one with the |
| // minimum SSE |
| const MACROBLOCKD *xd = &x->e_mbd; |
| const MV *mv_ref_fulls = pc_tree->mv_ref_fulls; |
| |
| const int num_planes = 1; |
| |
| *best_sse = INT_MAX; |
| |
| for (int ref_idx = 0; ref_idx < num_refs; ref_idx++) { |
| const int ref = refs[ref_idx]; |
| |
| if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref]) { |
| unsigned int curr_sse = 0, curr_var = 0; |
| av1_simple_motion_search(cpi, x, mi_row, mi_col, bsize, ref, |
| mv_ref_fulls[ref], num_planes, use_subpixel); |
| curr_var = cpi->fn_ptr[bsize].vf( |
| x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, |
| xd->plane[0].dst.stride, &curr_sse); |
| if (curr_sse < *best_sse) { |
| *best_sse = curr_sse; |
| *best_var = curr_var; |
| best_ref = ref; |
| } |
| |
| const int new_mv_row = x->best_mv.as_mv.row / 8; |
| const int new_mv_col = x->best_mv.as_mv.col / 8; |
| if (save_mv_code == 4) { |
| pc_tree->mv_ref_fulls[ref].row = new_mv_row; |
| pc_tree->mv_ref_fulls[ref].col = new_mv_col; |
| } else if (save_mv_code >= 0 && save_mv_code < 4) { |
| // Propagate the new motion vectors to a lower level |
| pc_tree->split[save_mv_code]->mv_ref_fulls[ref].row = new_mv_row; |
| pc_tree->split[save_mv_code]->mv_ref_fulls[ref].col = new_mv_col; |
| } else { |
| assert(save_mv_code == -1 && |
| "Unknown code in simple_motion_search_get_best_ref."); |
| } |
| } |
| } |
| |
| return best_ref; |
| } |
| |
| // Performs fullpixel simple_motion_search with LAST_FRAME and ALTREF_FRAME on |
| // each subblock and extract the variance and sse of residues. Then store the |
| // var and sse from each partition subblock to features. The DC qindex is also |
| // stored in features. |
| // Here features is assumed to be a length 19 array. |
| // After this function is called, we will store the following to features: |
| // features[0:17] = var and sse from subblocks |
| // features[18] = DC q_index |
| static void simple_motion_search_prune_part_features( |
| AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row, |
| int mi_col, BLOCK_SIZE bsize, float *features, int features_to_get) { |
| // TODO(chiyotsai@google.com): Cache the result of the motion search from the |
| // larger bsize. |
| const int w_mi = mi_size_wide[bsize]; |
| const int h_mi = mi_size_high[bsize]; |
| assert(mi_size_wide[bsize] == mi_size_high[bsize]); |
| assert(cpi->ref_frame_flags & av1_ref_frame_flag_list[LAST_FRAME] || |
| cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME]); |
| |
| // Setting up motion search |
| const int ref_list[] = { cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME |
| : LAST_FRAME }; |
| const int num_refs = 1; |
| const int use_subpixel = 1; |
| |
| // Doing whole block first to update the mv |
| if (!pc_tree->sms_none_valid && features_to_get & FEATURE_SMS_NONE_FLAG) { |
| simple_motion_search_get_best_ref(cpi, x, pc_tree, mi_row, mi_col, bsize, |
| ref_list, num_refs, use_subpixel, 4, |
| &pc_tree->sms_none_feat[0], |
| &pc_tree->sms_none_feat[1]); |
| pc_tree->sms_none_valid = 1; |
| } |
| |
| // Split subblocks |
| if (!pc_tree->sms_split_valid && features_to_get & FEATURE_SMS_SPLIT_FLAG) { |
| const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
| for (int r_idx = 0; r_idx < 4; r_idx++) { |
| const int sub_mi_col = mi_col + (r_idx & 1) * w_mi / 2; |
| const int sub_mi_row = mi_row + (r_idx >> 1) * h_mi / 2; |
| |
| simple_motion_search_get_best_ref( |
| cpi, x, pc_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs, |
| use_subpixel, r_idx, &pc_tree->sms_split_feat[2 * r_idx], |
| &pc_tree->sms_split_feat[2 * r_idx + 1]); |
| } |
| |
| pc_tree->sms_split_valid = 1; |
| } |
| |
| // Rectangular subblocks |
| if (!pc_tree->sms_rect_valid && features_to_get & FEATURE_SMS_RECT_FLAG) { |
| // Horz subblock |
| BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); |
| for (int r_idx = 0; r_idx < 2; r_idx++) { |
| const int sub_mi_col = mi_col + 0; |
| const int sub_mi_row = mi_row + r_idx * h_mi / 2; |
| |
| simple_motion_search_get_best_ref( |
| cpi, x, pc_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs, |
| use_subpixel, -1, &pc_tree->sms_rect_feat[2 * r_idx], |
| &pc_tree->sms_rect_feat[2 * r_idx + 1]); |
| } |
| |
| // Vert subblock |
| subsize = get_partition_subsize(bsize, PARTITION_VERT); |
| for (int r_idx = 0; r_idx < 2; r_idx++) { |
| const int sub_mi_col = mi_col + r_idx * w_mi / 2; |
| const int sub_mi_row = mi_row + 0; |
| |
| simple_motion_search_get_best_ref( |
| cpi, x, pc_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs, |
| use_subpixel, -1, &pc_tree->sms_rect_feat[4 + 2 * r_idx], |
| &pc_tree->sms_rect_feat[4 + 2 * r_idx + 1]); |
| } |
| pc_tree->sms_rect_valid = 1; |
| } |
| |
| aom_clear_system_state(); |
| int f_idx = 0; |
| if (features_to_get & FEATURE_SMS_NONE_FLAG) { |
| for (int sub_idx = 0; sub_idx < 2; sub_idx++) { |
| features[f_idx++] = logf(1.0f + pc_tree->sms_none_feat[sub_idx]); |
| } |
| } |
| |
| if (features_to_get & FEATURE_SMS_SPLIT_FLAG) { |
| for (int sub_idx = 0; sub_idx < 8; sub_idx++) { |
| features[f_idx++] = logf(1.0f + pc_tree->sms_split_feat[sub_idx]); |
| } |
| } |
| |
| if (features_to_get & FEATURE_SMS_RECT_FLAG) { |
| for (int sub_idx = 0; sub_idx < 8; sub_idx++) { |
| features[f_idx++] = logf(1.0f + pc_tree->sms_rect_feat[sub_idx]); |
| } |
| } |
| |
| const MACROBLOCKD *xd = &x->e_mbd; |
| set_offsets_for_motion_search(cpi, x, mi_row, mi_col, bsize); |
| |
| // Q_INDEX |
| const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); |
| features[f_idx++] = logf(1.0f + (float)(dc_q * dc_q) / 256.0f); |
| |
| // Neighbor stuff |
| const int has_above = !!xd->above_mbmi; |
| const int has_left = !!xd->left_mbmi; |
| const BLOCK_SIZE above_bsize = has_above ? xd->above_mbmi->sb_type : bsize; |
| const BLOCK_SIZE left_bsize = has_left ? xd->left_mbmi->sb_type : bsize; |
| features[f_idx++] = (float)has_above; |
| features[f_idx++] = (float)mi_size_wide_log2[above_bsize]; |
| features[f_idx++] = (float)mi_size_high_log2[above_bsize]; |
| features[f_idx++] = (float)has_left; |
| features[f_idx++] = (float)mi_size_wide_log2[left_bsize]; |
| features[f_idx++] = (float)mi_size_high_log2[left_bsize]; |
| } |
| |
| void av1_simple_motion_search_prune_part( |
| AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row, |
| int mi_col, BLOCK_SIZE bsize, int *partition_none_allowed, |
| int *partition_horz_allowed, int *partition_vert_allowed, |
| int *do_square_split, int *do_rectangular_split, int *prune_horz, |
| int *prune_vert) { |
| const AV1_COMMON *const cm = &cpi->common; |
| // Get model parameters |
| const NN_CONFIG *nn_config = NULL; |
| const float *prune_thresh = NULL, *only_thresh = NULL; |
| const float *ml_mean = NULL, *ml_std = NULL; |
| float features[FEATURE_SIZE_SMS_PRUNE_PART] = { 0.0f }; |
| |
| if (bsize == BLOCK_128X128) { |
| nn_config = &av1_simple_motion_search_prune_part_nn_config_128; |
| ml_mean = av1_simple_motion_search_prune_part_mean_128; |
| ml_std = av1_simple_motion_search_prune_part_std_128; |
| prune_thresh = av1_simple_motion_search_prune_part_prune_thresh_128; |
| only_thresh = av1_simple_motion_search_prune_part_only_thresh_128; |
| } else if (bsize == BLOCK_64X64) { |
| nn_config = &av1_simple_motion_search_prune_part_nn_config_64; |
| ml_mean = av1_simple_motion_search_prune_part_mean_64; |
| ml_std = av1_simple_motion_search_prune_part_std_64; |
| prune_thresh = av1_simple_motion_search_prune_part_prune_thresh_64; |
| only_thresh = av1_simple_motion_search_prune_part_only_thresh_64; |
| } else if (bsize == BLOCK_32X32) { |
| nn_config = &av1_simple_motion_search_prune_part_nn_config_32; |
| ml_mean = av1_simple_motion_search_prune_part_mean_32; |
| ml_std = av1_simple_motion_search_prune_part_std_32; |
| prune_thresh = av1_simple_motion_search_prune_part_prune_thresh_32; |
| only_thresh = av1_simple_motion_search_prune_part_only_thresh_32; |
| } else if (bsize == BLOCK_16X16) { |
| nn_config = &av1_simple_motion_search_prune_part_nn_config_16; |
| ml_mean = av1_simple_motion_search_prune_part_mean_16; |
| ml_std = av1_simple_motion_search_prune_part_std_16; |
| prune_thresh = av1_simple_motion_search_prune_part_prune_thresh_16; |
| only_thresh = av1_simple_motion_search_prune_part_only_thresh_16; |
| } else if (bsize == BLOCK_8X8) { |
| nn_config = &av1_simple_motion_search_prune_part_nn_config_8; |
| ml_mean = av1_simple_motion_search_prune_part_mean_8; |
| ml_std = av1_simple_motion_search_prune_part_std_8; |
| prune_thresh = av1_simple_motion_search_prune_part_prune_thresh_8; |
| only_thresh = av1_simple_motion_search_prune_part_only_thresh_8; |
| } else { |
| assert(0 && "Unexpected block size in simple_motion_prune_part"); |
| } |
| |
| // If there is no valid threshold, return immediately. |
| if (!nn_config || (prune_thresh[PARTITION_HORZ] == 0.0f && |
| prune_thresh[PARTITION_VERT] == 0.0f)) { |
| return; |
| } |
| if (bsize < BLOCK_8X8) { |
| return; |
| } |
| |
| // Get features |
| simple_motion_search_prune_part_features(cpi, x, pc_tree, mi_row, mi_col, |
| bsize, features, |
| FEATURE_SMS_PRUNE_PART_FLAG); |
| for (int f_idx = 0; f_idx < FEATURE_SIZE_SMS_PRUNE_PART; f_idx++) { |
| features[f_idx] = (features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx]; |
| } |
| |
| // Get probabilities |
| float scores[EXT_PARTITION_TYPES] = { 0.0f }, |
| probs[EXT_PARTITION_TYPES] = { 0.0f }; |
| const int num_classes = (bsize == BLOCK_128X128 || bsize == BLOCK_8X8) |
| ? PARTITION_TYPES |
| : EXT_PARTITION_TYPES; |
| |
| av1_nn_predict(features, nn_config, scores); |
| aom_clear_system_state(); |
| |
| av1_nn_softmax(scores, probs, num_classes); |
| |
| // Determine if we should prune rectangular partitions. |
| if (cpi->sf.simple_motion_search_prune_rect && !frame_is_intra_only(cm) && |
| (*partition_horz_allowed || *partition_vert_allowed) && |
| bsize >= BLOCK_8X8 && !av1_superres_scaled(cm)) { |
| *prune_horz = probs[PARTITION_HORZ] <= prune_thresh[PARTITION_HORZ]; |
| *prune_vert = probs[PARTITION_VERT] <= prune_thresh[PARTITION_VERT]; |
| } |
| |
| // Silence compiler warnings |
| (void)only_thresh; |
| (void)partition_none_allowed; |
| (void)do_square_split; |
| (void)do_rectangular_split; |
| } |
| |
| // Early terminates PARTITION_NONE using simple_motion_search features and the |
| // rate, distortion, and rdcost of PARTITION_NONE. This is only called when: |
| // - The frame is a show frame |
| // - The frame is not intra only |
| // - The current bsize is > BLOCK_8X8 |
| // - blk_row + blk_height/2 < total_rows and blk_col + blk_width/2 < total_cols |
| void av1_simple_motion_search_early_term_none(AV1_COMP *const cpi, |
| MACROBLOCK *x, PC_TREE *pc_tree, |
| int mi_row, int mi_col, |
| BLOCK_SIZE bsize, |
| const RD_STATS *none_rdc, |
| int *early_terminate) { |
| // TODO(chiyotsai@google.com): There are other features we can extract from |
| // PARTITION_NONE. Play with this later. |
| float features[FEATURE_SIZE_SMS_TERM_NONE] = { 0.0f }; |
| simple_motion_search_prune_part_features(cpi, x, pc_tree, mi_row, mi_col, |
| bsize, features, |
| FEATURE_SMS_PRUNE_PART_FLAG); |
| int f_idx = FEATURE_SIZE_SMS_PRUNE_PART; |
| |
| features[f_idx++] = logf(1.0f + (float)none_rdc->rate); |
| features[f_idx++] = logf(1.0f + (float)none_rdc->dist); |
| features[f_idx++] = logf(1.0f + (float)none_rdc->rdcost); |
| |
| assert(f_idx == FEATURE_SIZE_SMS_TERM_NONE); |
| |
| const float *ml_mean = NULL; |
| const float *ml_std = NULL; |
| const float *ml_model = NULL; |
| |
| if (bsize == BLOCK_128X128) { |
| ml_mean = av1_simple_motion_search_term_none_mean_128; |
| ml_std = av1_simple_motion_search_term_none_std_128; |
| ml_model = av1_simple_motion_search_term_none_model_128; |
| } else if (bsize == BLOCK_64X64) { |
| ml_mean = av1_simple_motion_search_term_none_mean_64; |
| ml_std = av1_simple_motion_search_term_none_std_64; |
| ml_model = av1_simple_motion_search_term_none_model_64; |
| } else if (bsize == BLOCK_32X32) { |
| ml_mean = av1_simple_motion_search_term_none_mean_32; |
| ml_std = av1_simple_motion_search_term_none_std_32; |
| ml_model = av1_simple_motion_search_term_none_model_32; |
| } else if (bsize == BLOCK_16X16) { |
| ml_mean = av1_simple_motion_search_term_none_mean_16; |
| ml_std = av1_simple_motion_search_term_none_std_16; |
| ml_model = av1_simple_motion_search_term_none_model_16; |
| } else { |
| assert(0 && "Unexpected block size in simple_motion_term_none"); |
| } |
| |
| if (ml_model) { |
| float score = 0.0f; |
| for (f_idx = 0; f_idx < FEATURE_SIZE_SMS_TERM_NONE; f_idx++) { |
| score += |
| ml_model[f_idx] * (features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx]; |
| } |
| score += ml_model[FEATURE_SIZE_SMS_TERM_NONE]; |
| |
| if (score >= 0.0f) { |
| *early_terminate = 1; |
| } |
| } |
| } |
| |
| static void firstpass_simple_motion_search_features( |
| AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row, |
| int mi_col, BLOCK_SIZE bsize, float *features) { |
| assert(mi_size_wide[bsize] == mi_size_high[bsize]); |
| assert(cpi->ref_frame_flags & av1_ref_frame_flag_list[LAST_FRAME] || |
| cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME]); |
| |
| // Setting up motion search |
| const int ref_list[] = { LAST_FRAME, ALTREF_FRAME }; |
| const int num_refs = 2; |
| const int use_subpixel = 0; |
| |
| unsigned int int_features[10] = { 0 }; |
| |
| int f_idx = 0; |
| // Doing whole block first to update the mv |
| simple_motion_search_get_best_ref( |
| cpi, x, pc_tree, mi_row, mi_col, bsize, ref_list, num_refs, use_subpixel, |
| 4, &int_features[f_idx], &int_features[f_idx + 1]); |
| f_idx += 2; |
| |
| // Split subblocks |
| const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
| const int w_mi = mi_size_wide[bsize]; |
| const int h_mi = mi_size_high[bsize]; |
| for (int r_idx = 0; r_idx < 4; r_idx++) { |
| const int sub_mi_col = mi_col + (r_idx & 1) * w_mi / 2; |
| const int sub_mi_row = mi_row + (r_idx >> 1) * h_mi / 2; |
| |
| simple_motion_search_get_best_ref( |
| cpi, x, pc_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs, |
| use_subpixel, r_idx, &int_features[f_idx], &int_features[f_idx + 1]); |
| f_idx += 2; |
| } |
| |
| aom_clear_system_state(); |
| for (int idx = 0; idx < f_idx; idx++) { |
| features[idx] = logf(1.0f + (float)int_features[idx]); |
| } |
| |
| const MACROBLOCKD *xd = &x->e_mbd; |
| set_offsets_for_motion_search(cpi, x, mi_row, mi_col, bsize); |
| |
| // Q_INDEX |
| const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); |
| features[f_idx++] = logf(1.0f + (float)(dc_q * dc_q) / 256.0f); |
| |
| // Neighbor stuff |
| const int has_above = !!xd->above_mbmi; |
| const int has_left = !!xd->left_mbmi; |
| const BLOCK_SIZE above_bsize = has_above ? xd->above_mbmi->sb_type : bsize; |
| const BLOCK_SIZE left_bsize = has_left ? xd->left_mbmi->sb_type : bsize; |
| features[f_idx++] = (float)has_above; |
| features[f_idx++] = (float)mi_size_wide_log2[above_bsize]; |
| features[f_idx++] = (float)mi_size_high_log2[above_bsize]; |
| features[f_idx++] = (float)has_left; |
| features[f_idx++] = (float)mi_size_wide_log2[left_bsize]; |
| features[f_idx++] = (float)mi_size_high_log2[left_bsize]; |
| } |
| |
| void av1_firstpass_simple_motion_search_early_term(AV1_COMP *const cpi, |
| MACROBLOCK *x, |
| PC_TREE *pc_tree, int mi_row, |
| int mi_col, BLOCK_SIZE bsize, |
| const RD_STATS *none_rdc, |
| int *do_square_split) { |
| const NN_CONFIG *nn_config = NULL; |
| float thresh = 0.0f; |
| const float *ml_mean = NULL, *ml_std = NULL; |
| if (bsize == BLOCK_32X32) { |
| nn_config = &av1_fp_simple_motion_search_term_none_nn_config_32; |
| ml_mean = av1_fp_simple_motion_search_term_none_mean_32; |
| ml_std = av1_fp_simple_motion_search_term_none_std_32; |
| thresh = av1_fp_simple_motion_search_term_none_thresh_32; |
| } else if (bsize == BLOCK_16X16) { |
| nn_config = &av1_fp_simple_motion_search_term_none_nn_config_16; |
| ml_mean = av1_fp_simple_motion_search_term_none_mean_16; |
| ml_std = av1_fp_simple_motion_search_term_none_std_16; |
| thresh = av1_fp_simple_motion_search_term_none_thresh_16; |
| } else if (bsize == BLOCK_8X8) { |
| nn_config = &av1_fp_simple_motion_search_term_none_nn_config_8; |
| ml_mean = av1_fp_simple_motion_search_term_none_mean_8; |
| ml_std = av1_fp_simple_motion_search_term_none_std_8; |
| thresh = av1_fp_simple_motion_search_term_none_thresh_8; |
| } else { |
| assert(0 && |
| "Unexpected bsize in firstpass_simple_motion_search_early_term"); |
| return; |
| } |
| |
| float ml_features[FEATURE_SIZE_FP_SMS_TERM_NONE] = { 0.0f }; |
| |
| firstpass_simple_motion_search_features(cpi, x, pc_tree, mi_row, mi_col, |
| bsize, ml_features); |
| int f_idx = 17; |
| |
| ml_features[f_idx++] = logf(1.0f + (float)none_rdc->rate); |
| ml_features[f_idx++] = logf(1.0f + (float)none_rdc->dist); |
| ml_features[f_idx++] = logf(1.0f + (float)none_rdc->rdcost); |
| |
| for (f_idx = 0; f_idx < 20; f_idx++) { |
| ml_features[f_idx] = (ml_features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx]; |
| } |
| |
| // Get probabilities |
| float score = 0.0f; |
| |
| av1_nn_predict(ml_features, nn_config, &score); |
| aom_clear_system_state(); |
| |
| // Determine if we should prune square partitions. |
| if (score < thresh) { |
| *do_square_split = 0; |
| } |
| } |
| |
| void av1_get_max_min_partition_features(AV1_COMP *const cpi, MACROBLOCK *x, |
| int mi_row, int mi_col, |
| float *features) { |
| AV1_COMMON *const cm = &cpi->common; |
| MACROBLOCKD *xd = &x->e_mbd; |
| const BLOCK_SIZE sb_size = cm->seq_params.sb_size; |
| |
| assert(sb_size == BLOCK_128X128); |
| |
| int f_idx = 0; |
| |
| const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); |
| aom_clear_system_state(); |
| const float log_q_sq = logf(1.0f + (float)(dc_q * dc_q) / 256.0f); |
| |
| // Perform full-pixel single motion search in Y plane of 16x16 mbs in the sb |
| float sum_mv_row_sq = 0; |
| float sum_mv_row = 0; |
| float min_abs_mv_row = FLT_MAX; |
| float max_abs_mv_row = 0; |
| |
| float sum_mv_col_sq = 0; |
| float sum_mv_col = 0; |
| float min_abs_mv_col = FLT_MAX; |
| float max_abs_mv_col = 0; |
| |
| float sum_log_sse_sq = 0; |
| float sum_log_sse = 0; |
| float min_log_sse = FLT_MAX; |
| float max_log_sse = 0; |
| |
| const BLOCK_SIZE mb_size = BLOCK_16X16; |
| const int mb_rows = block_size_high[sb_size] / block_size_high[mb_size]; |
| const int mb_cols = block_size_wide[sb_size] / block_size_wide[mb_size]; |
| const int mb_in_mi_size_high_log2 = mi_size_high_log2[mb_size]; |
| const int mb_in_mi_size_wide_log2 = mi_size_wide_log2[mb_size]; |
| |
| for (int mb_row = 0; mb_row < mb_rows; mb_row++) |
| for (int mb_col = 0; mb_col < mb_cols; mb_col++) { |
| const int this_mi_row = mi_row + (mb_row << mb_in_mi_size_high_log2); |
| const int this_mi_col = mi_col + (mb_col << mb_in_mi_size_wide_log2); |
| unsigned int sse = 0; |
| unsigned int var = 0; |
| const MV ref_mv_full = { .row = 0, .col = 0 }; |
| |
| av1_simple_motion_sse_var(cpi, x, this_mi_row, this_mi_col, mb_size, |
| ref_mv_full, 0, &sse, &var); |
| |
| aom_clear_system_state(); |
| const float mv_row = (float)(x->best_mv.as_mv.row / 8); |
| const float mv_col = (float)(x->best_mv.as_mv.col / 8); |
| const float log_sse = logf(1.0f + (float)sse); |
| const float abs_mv_row = fabsf(mv_row); |
| const float abs_mv_col = fabsf(mv_col); |
| |
| sum_mv_row_sq += mv_row * mv_row; |
| sum_mv_row += mv_row; |
| sum_mv_col_sq += mv_col * mv_col; |
| sum_mv_col += mv_col; |
| |
| if (abs_mv_row < min_abs_mv_row) min_abs_mv_row = abs_mv_row; |
| if (abs_mv_row > max_abs_mv_row) max_abs_mv_row = abs_mv_row; |
| if (abs_mv_col < min_abs_mv_col) min_abs_mv_col = abs_mv_col; |
| if (abs_mv_col > max_abs_mv_col) max_abs_mv_col = abs_mv_col; |
| |
| sum_log_sse_sq += log_sse * log_sse; |
| sum_log_sse += log_sse; |
| if (log_sse < min_log_sse) min_log_sse = log_sse; |
| if (log_sse > max_log_sse) max_log_sse = log_sse; |
| } |
| aom_clear_system_state(); |
| const float avg_mv_row = sum_mv_row / 64.0f; |
| const float var_mv_row = sum_mv_row_sq / 64.0f - avg_mv_row * avg_mv_row; |
| |
| const float avg_mv_col = sum_mv_col / 64.0f; |
| const float var_mv_col = sum_mv_col_sq / 64.0f - avg_mv_col * avg_mv_col; |
| |
| const float avg_log_sse = sum_log_sse / 64.0f; |
| const float var_log_sse = sum_log_sse_sq / 64.0f - avg_log_sse * avg_log_sse; |
| |
| features[f_idx++] = avg_log_sse; |
| features[f_idx++] = avg_mv_col; |
| features[f_idx++] = avg_mv_row; |
| features[f_idx++] = log_q_sq; |
| features[f_idx++] = max_abs_mv_col; |
| features[f_idx++] = max_abs_mv_row; |
| features[f_idx++] = max_log_sse; |
| features[f_idx++] = min_abs_mv_col; |
| features[f_idx++] = min_abs_mv_row; |
| features[f_idx++] = min_log_sse; |
| features[f_idx++] = var_log_sse; |
| features[f_idx++] = var_mv_col; |
| features[f_idx++] = var_mv_row; |
| |
| assert(f_idx == FEATURE_SIZE_MAX_MIN_PART_PRED); |
| } |
| |
| BLOCK_SIZE av1_predict_max_partition(AV1_COMP *const cpi, MACROBLOCK *const x, |
| const float *features) { |
| float scores[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f }, |
| probs[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f }; |
| const NN_CONFIG *nn_config = &av1_max_part_pred_nn_config; |
| |
| assert(cpi->sf.auto_max_partition_based_on_simple_motion != NOT_IN_USE); |
| |
| aom_clear_system_state(); |
| av1_nn_predict(features, nn_config, scores); |
| av1_nn_softmax(scores, probs, MAX_NUM_CLASSES_MAX_MIN_PART_PRED); |
| |
| int result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; |
| if (cpi->sf.auto_max_partition_based_on_simple_motion == DIRECT_PRED) { |
| result = 0; |
| float max_prob = probs[0]; |
| for (int i = 1; i < MAX_NUM_CLASSES_MAX_MIN_PART_PRED; ++i) { |
| if (probs[i] > max_prob) { |
| max_prob = probs[i]; |
| result = i; |
| } |
| } |
| } else if (cpi->sf.auto_max_partition_based_on_simple_motion == |
| RELAXED_PRED) { |
| for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0; |
| --result) { |
| if (result < MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1) { |
| probs[result] += probs[result + 1]; |
| } |
| if (probs[result] > 0.2) break; |
| } |
| } else if (cpi->sf.auto_max_partition_based_on_simple_motion == ADAPT_PRED) { |
| const BLOCK_SIZE sb_size = cpi->common.seq_params.sb_size; |
| MACROBLOCKD *const xd = &x->e_mbd; |
| // TODO(debargha): x->source_variance is unavailable at this point, |
| // so compute. The redundant recomputation later can be removed. |
| const unsigned int source_variance = |
| is_cur_buf_hbd(xd) |
| ? av1_high_get_sby_perpixel_variance(cpi, &x->plane[0].src, sb_size, |
| xd->bd) |
| : av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, sb_size); |
| if (source_variance > 16) { |
| const double thresh = source_variance < 128 ? 0.05 : 0.1; |
| for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0; |
| --result) { |
| if (result < MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1) { |
| probs[result] += probs[result + 1]; |
| } |
| if (probs[result] > thresh) break; |
| } |
| } |
| } |
| |
| return (BLOCK_SIZE)((result + 2) * 3); |
| } |