Pack a couple partition-related buffers to PartitionSearchInfo Bug=aomedia:2618 Change-Id: I240c962e05c91c847242990118aef3bcb02a85c3
diff --git a/av1/encoder/block.h b/av1/encoder/block.h index ba6892f..5ab598b 100644 --- a/av1/encoder/block.h +++ b/av1/encoder/block.h
@@ -36,6 +36,10 @@ // SuperblockEnc stores superblock level information used by the encoder for // more efficient encoding. typedef struct { + // The maximum and minimum allowed partition size + BLOCK_SIZE min_partition_size; + BLOCK_SIZE max_partition_size; + // Below are information gathered from tpl_model used to speed up the encoding // process. int tpl_data_count; @@ -268,6 +272,33 @@ int **mv_cost_stack; } MvCostInfo; +// This struct holds some parameters related to partitioning schemes in av1. +// TODO(chiyotsai@google.com): Consolidate this with SIMPLE_MOTION_DATA_TREE +typedef struct { +#if !CONFIG_REALTIME_ONLY + // The following 4 parameters are used for cnn-based partitioning on intra + // frame. + // Where we are on the quad tree. Used to index into the cnn buffer for + // partition decision. + int quad_tree_idx; + // Whether the CNN buffer contains valid output + int cnn_output_valid; + // A buffer used by our segmentation CNN for intra-frame partitioning. + float cnn_buffer[CNN_OUT_BUF_SIZE]; + // log of the quantization parameter of the current BLOCK_64X64 that includes + // the current block. Used as an input to the CNN. + float log_q; +#endif + + // Holds the variable of various subblocks. This is used by rt mode for + // variance based partitioning. + // 0 - 128x128 | 1-2 - 128x64 | 3-4 - 64x128 + // 5-8 - 64x64 | 9-16 - 64x32 | 17-24 - 32x64 + // 25-40 - 32x32 + // 41-104 - 16x16 + uint8_t variance_low[105]; +} PartitionSearchInfo; + // This struct stores the parameters used to perform the txfm search. For the // most part, this determines how various speed features are used. typedef struct { @@ -384,11 +415,6 @@ int mb_energy; int sb_energy_level; - // These are set to their default values at the beginning, and then adjusted - // further in the encoding process. - BLOCK_SIZE min_partition_size; - BLOCK_SIZE max_partition_size; - unsigned int max_mv_context[REF_FRAMES]; unsigned int source_variance; unsigned int simple_motion_pred_sse; @@ -526,22 +552,7 @@ COMP_RD_STATS comp_rd_stats[MAX_COMP_RD_STATS]; int comp_rd_stats_idx; -#if !CONFIG_REALTIME_ONLY - int quad_tree_idx; - int cnn_output_valid; - float cnn_buffer[CNN_OUT_BUF_SIZE]; - float log_q; -#endif int thresh_freq_fact[BLOCK_SIZES_ALL][MAX_MODES]; - // 0 - 128x128 - // 1-2 - 128x64 - // 3-4 - 64x128 - // 5-8 - 64x64 - // 9-16 - 64x32 - // 17-24 - 32x64 - // 25-40 - 32x32 - // 41-104 - 16x16 - uint8_t variance_low[105]; uint8_t content_state_sb; // Strong color activity detection. Used in REALTIME coding mode to enhance // the visual quality at the boundary of moving color objects. @@ -550,6 +561,9 @@ uint8_t search_ref_frame[REF_FRAMES]; + // Stores some partition-search related buffers. + PartitionSearchInfo part_search_info; + // Stores various txfm search related parameters such as txfm_type, txfm_size, // trellis eob search, etc. TxfmSearchParams txfm_search_params;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index b52c85c..bacdec3 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c
@@ -2736,6 +2736,12 @@ return 1; } +#ifndef NDEBUG +static AOM_INLINE int is_bsize_square(BLOCK_SIZE bsize) { + return block_size_wide[bsize] == block_size_high[bsize]; +} +#endif // NDEBUG + // Searches for the best partition pattern for a block based on the // rate-distortion cost, and returns a bool value to indicate whether a valid // partition pattern is found. The partition can recursively go down to @@ -2749,8 +2755,6 @@ // mi_row: row coordinate of the block in a step size of MI_SIZE // mi_col: column coordinate of the block in a step size of MI_SIZE // bsize: block size -// max_sq_part: the largest square block size for prediction blocks -// min_sq_part: the smallest square block size for prediction blocks // rd_cost: the pointer to the final rd cost of the current block // best_rdc: the upper bound of rd cost for a valid partition // pc_tree: the pointer to the PC_TREE node storing the picked partitions @@ -2763,19 +2767,22 @@ // // Output: // a bool value indicating whether a valid partition is found -static bool rd_pick_partition( - AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, - TokenExtra **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, - BLOCK_SIZE max_sq_part, BLOCK_SIZE min_sq_part, RD_STATS *rd_cost, - RD_STATS best_rdc, PC_TREE *pc_tree, SIMPLE_MOTION_DATA_TREE *sms_tree, - int64_t *none_rd, SB_MULTI_PASS_MODE multi_pass_mode, - RD_RECT_PART_WIN_INFO *rect_part_win_info) { +static bool rd_pick_partition(AV1_COMP *const cpi, ThreadData *td, + TileDataEnc *tile_data, TokenExtra **tp, + int mi_row, int mi_col, BLOCK_SIZE bsize, + RD_STATS *rd_cost, RD_STATS best_rdc, + PC_TREE *pc_tree, + SIMPLE_MOTION_DATA_TREE *sms_tree, + int64_t *none_rd, + SB_MULTI_PASS_MODE multi_pass_mode, + RD_RECT_PART_WIN_INFO *rect_part_win_info) { const AV1_COMMON *const cm = &cpi->common; const CommonModeInfoParams *const mi_params = &cm->mi_params; const int num_planes = av1_num_planes(cm); TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; + PartitionSearchInfo *part_info = &x->part_search_info; const int mi_step = mi_size_wide[bsize] / 2; RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; const TokenExtra *const tp_orig = *tp; @@ -2815,9 +2822,10 @@ return found_best_partition; } + // Prepare for segmentation CNN-based partitioning for intra-frame. if (frame_is_intra_only(cm) && bsize == BLOCK_64X64) { - x->quad_tree_idx = 0; - x->cnn_output_valid = 0; + part_info->quad_tree_idx = 0; + part_info->cnn_output_valid = 0; } if (bsize == cm->seq_params.sb_size) x->must_find_valid_partition = 0; @@ -2925,9 +2933,9 @@ if (try_intra_cnn_split) { av1_intra_mode_cnn_partition( - &cpi->common, x, bsize, x->quad_tree_idx, &partition_none_allowed, - &partition_horz_allowed, &partition_vert_allowed, &do_rectangular_split, - &do_square_split); + &cpi->common, x, bsize, part_info->quad_tree_idx, + &partition_none_allowed, &partition_horz_allowed, + &partition_vert_allowed, &do_rectangular_split, &do_square_split); } // Use simple_motion_search to prune partitions. This must be done prior to @@ -2963,14 +2971,16 @@ // Max and min square partition levels are defined as the partition nodes that // the recursive function rd_pick_partition() can reach. To implement this: - // only PARTITION_NONE is allowed if the current node equals min_sq_part, - // only PARTITION_SPLIT is allowed if the current node exceeds max_sq_part. - assert(block_size_wide[min_sq_part] == block_size_high[min_sq_part]); - assert(block_size_wide[max_sq_part] == block_size_high[max_sq_part]); - assert(min_sq_part <= max_sq_part); - assert(block_size_wide[bsize] == block_size_high[bsize]); - const int max_partition_size = block_size_wide[max_sq_part]; - const int min_partition_size = block_size_wide[min_sq_part]; + // only PARTITION_NONE is allowed if the current node equals + // max_partition_size, only PARTITION_SPLIT is allowed if the current node + // exceeds max_partition_size. + SuperBlockEnc *sb_enc = &x->sb_enc; + assert(is_bsize_square(sb_enc->max_partition_size)); + assert(is_bsize_square(sb_enc->min_partition_size)); + assert(sb_enc->min_partition_size <= sb_enc->max_partition_size); + assert(is_bsize_square(bsize)); + const int max_partition_size = block_size_wide[sb_enc->max_partition_size]; + const int min_partition_size = block_size_wide[sb_enc->min_partition_size]; const int blksize = block_size_wide[bsize]; assert(min_partition_size <= max_partition_size); const int is_le_min_sq_part = blksize <= min_partition_size; @@ -3177,19 +3187,19 @@ int curr_quad_tree_idx = 0; if (frame_is_intra_only(cm) && bsize <= BLOCK_64X64) { - curr_quad_tree_idx = x->quad_tree_idx; - x->quad_tree_idx = 4 * curr_quad_tree_idx + idx + 1; + curr_quad_tree_idx = part_info->quad_tree_idx; + part_info->quad_tree_idx = 4 * curr_quad_tree_idx + idx + 1; } if (!rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, - mi_col + x_idx, subsize, max_sq_part, min_sq_part, - &this_rdc, best_remain_rdcost, pc_tree->split[idx], + mi_col + x_idx, subsize, &this_rdc, + best_remain_rdcost, pc_tree->split[idx], sms_tree->split[idx], p_split_rd, multi_pass_mode, &split_part_rect_win[idx])) { av1_invalid_rd_stats(&sum_rdc); break; } if (frame_is_intra_only(cm) && bsize <= BLOCK_64X64) { - x->quad_tree_idx = curr_quad_tree_idx; + part_info->quad_tree_idx = curr_quad_tree_idx; } sum_rdc.rate += this_rdc.rate; @@ -4841,7 +4851,7 @@ #if !CONFIG_REALTIME_ONLY init_ref_frame_space(cpi, td, mi_row, mi_col); x->sb_energy_level = 0; - x->cnn_output_valid = 0; + x->part_search_info.cnn_output_valid = 0; if (gather_tpl_data) { if (cm->delta_q_info.delta_q_present_flag) { const int num_planes = av1_num_planes(cm); @@ -4867,6 +4877,49 @@ av1_invalid_rd_stats(rd_cost); } +#if !CONFIG_REALTIME_ONLY +static AOM_INLINE BLOCK_SIZE dim_to_size(int dim) { + switch (dim) { + case 4: return BLOCK_4X4; + case 8: return BLOCK_8X8; + case 16: return BLOCK_16X16; + case 32: return BLOCK_32X32; + case 64: return BLOCK_64X64; + case 128: return BLOCK_128X128; + default: assert(0); return 0; + } +} + +static AOM_INLINE void set_max_min_partition_size(SuperBlockEnc *sb_enc, + AV1_COMP *cpi, MACROBLOCK *x, + const SPEED_FEATURES *sf, + BLOCK_SIZE sb_size, + int mi_row, int mi_col) { + const AV1_COMMON *cm = &cpi->common; + + sb_enc->max_partition_size = + AOMMIN(sf->part_sf.default_max_partition_size, + dim_to_size(cpi->oxcf.max_partition_size)); + sb_enc->min_partition_size = + AOMMAX(sf->part_sf.default_min_partition_size, + dim_to_size(cpi->oxcf.min_partition_size)); + sb_enc->max_partition_size = + AOMMIN(sb_enc->max_partition_size, cm->seq_params.sb_size); + sb_enc->min_partition_size = + AOMMIN(sb_enc->min_partition_size, cm->seq_params.sb_size); + + if (use_auto_max_partition(cpi, sb_size, mi_row, mi_col)) { + float features[FEATURE_SIZE_MAX_MIN_PART_PRED] = { 0.0f }; + + av1_get_max_min_partition_features(cpi, x, mi_row, mi_col, features); + sb_enc->max_partition_size = + AOMMAX(AOMMIN(av1_predict_max_partition(cpi, x, features), + sb_enc->max_partition_size), + sb_enc->min_partition_size); + } +} +#endif // !CONFIG_REALTIME_ONLY + static AOM_INLINE void encode_rd_sb(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, TokenExtra **tp, const int mi_row, const int mi_col, @@ -4928,33 +4981,24 @@ #if CONFIG_COLLECT_COMPONENT_TIMING start_timing(cpi, rd_pick_partition_time); #endif - BLOCK_SIZE max_sq_size = x->max_partition_size; - BLOCK_SIZE min_sq_size = x->min_partition_size; - if (use_auto_max_partition(cpi, sb_size, mi_row, mi_col)) { - float features[FEATURE_SIZE_MAX_MIN_PART_PRED] = { 0.0f }; - - av1_get_max_min_partition_features(cpi, x, mi_row, mi_col, features); - max_sq_size = AOMMAX( - AOMMIN(av1_predict_max_partition(cpi, x, features), max_sq_size), - min_sq_size); - } + set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col); const int num_passes = cpi->oxcf.sb_multipass_unit_test ? 2 : 1; if (num_passes == 1) { PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size); rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, - max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc, - pc_root, sms_root, NULL, SB_SINGLE_PASS, NULL); + &dummy_rdc, dummy_rdc, pc_root, sms_root, NULL, + SB_SINGLE_PASS, NULL); } else { // First pass SB_FIRST_PASS_STATS sb_fp_stats; backup_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col); PC_TREE *const pc_root_p0 = av1_alloc_pc_tree_node(sb_size); rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, - max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc, - pc_root_p0, sms_root, NULL, SB_DRY_PASS, NULL); + &dummy_rdc, dummy_rdc, pc_root_p0, sms_root, NULL, + SB_DRY_PASS, NULL); // Second pass init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, @@ -4966,8 +5010,8 @@ PC_TREE *const pc_root_p1 = av1_alloc_pc_tree_node(sb_size); rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, - max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc, - pc_root_p1, sms_root, NULL, SB_WET_PASS, NULL); + &dummy_rdc, dummy_rdc, pc_root_p1, sms_root, NULL, + SB_WET_PASS, NULL); } // Reset to 0 so that it wouldn't be used elsewhere mistakenly. sb_enc->tpl_data_count = 0;
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c index 93528e3..c563c44 100644 --- a/av1/encoder/nonrd_pickmode.c +++ b/av1/encoder/nonrd_pickmode.c
@@ -1509,10 +1509,10 @@ x->nonrd_prune_ref_frame_search) { if (is_small_sb) *force_skip_low_temp_var = get_force_skip_low_temp_var_small_sb( - &x->variance_low[0], mi_row, mi_col, bsize); + &x->part_search_info.variance_low[0], mi_row, mi_col, bsize); else *force_skip_low_temp_var = get_force_skip_low_temp_var( - &x->variance_low[0], mi_row, mi_col, bsize); + &x->part_search_info.variance_low[0], mi_row, mi_col, bsize); // If force_skip_low_temp_var is set, skip golden reference. if (*force_skip_low_temp_var) { use_golden_ref_frame = 0;
diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c index 96992d8..a41863c 100644 --- a/av1/encoder/partition_strategy.c +++ b/av1/encoder/partition_strategy.c
@@ -69,8 +69,10 @@ return; } + PartitionSearchInfo *part_info = &x->part_search_info; + // Precompute the CNN part and cache the result in MACROBLOCK - if (bsize == BLOCK_64X64 && !x->cnn_output_valid) { + if (bsize == BLOCK_64X64 && !part_info->cnn_output_valid) { aom_clear_system_state(); const CNN_CONFIG *cnn_config = &av1_intra_mode_cnn_partition_cnn_config; @@ -83,7 +85,7 @@ float *output_buffer[CNN_TOT_OUT_CH]; float **cur_output_buf = output_buffer; - float *curr_buf_ptr = x->cnn_buffer; + float *curr_buf_ptr = part_info->cnn_buffer; for (int output_idx = 0; output_idx < num_outputs; output_idx++) { const int num_chs = out_chs[output_idx]; const int ch_size = output_dims[output_idx] * output_dims[output_idx]; @@ -106,9 +108,10 @@ const int bit_depth = xd->bd; const int dc_q = av1_dc_quant_QTX(x->qindex, 0, bit_depth) >> (bit_depth - 8); - x->log_q = logf(1.0f + (float)(dc_q * dc_q) / 256.0f); - x->log_q = (x->log_q - av1_intra_mode_cnn_partition_mean[0]) / - av1_intra_mode_cnn_partition_std[0]; + part_info->log_q = logf(1.0f + (float)(dc_q * dc_q) / 256.0f); + part_info->log_q = + (part_info->log_q - av1_intra_mode_cnn_partition_mean[0]) / + av1_intra_mode_cnn_partition_std[0]; const int width = 65, height = 65, stride = x->plane[AOM_PLANE_Y].src.stride; @@ -128,10 +131,10 @@ &thread_data, &output); } - x->cnn_output_valid = 1; + part_info->cnn_output_valid = 1; } - if (!x->cnn_output_valid) { + if (!part_info->cnn_output_valid) { return; } @@ -149,7 +152,7 @@ float dnn_features[100]; float logits[4] = { 0.0f }; - const float *branch_0 = x->cnn_buffer; + const float *branch_0 = part_info->cnn_buffer; const float *branch_1 = branch_0 + CNN_BRANCH_0_OUT_SIZE; const float *branch_2 = branch_1 + CNN_BRANCH_1_OUT_SIZE; const float *branch_3 = branch_2 + CNN_BRANCH_2_OUT_SIZE; @@ -166,7 +169,7 @@ dnn_features[f_idx++] = branch_1[lin_idx + ch_idx * spa_stride]; } } - dnn_features[f_idx++] = x->log_q; + dnn_features[f_idx++] = part_info->log_q; } else if (bsize == BLOCK_32X32) { int f_idx = 0; for (int idx = 0; idx < CNN_BRANCH_0_OUT_CH; idx++) { @@ -178,7 +181,7 @@ for (int ch_idx = 0; ch_idx < CNN_BRANCH_1_OUT_CH; ch_idx++) { dnn_features[f_idx++] = branch_1[curr_lin_idx + ch_idx * spa_stride]; } - dnn_features[f_idx++] = x->log_q; + dnn_features[f_idx++] = part_info->log_q; } else if (bsize == BLOCK_16X16) { int f_idx = 0; const int prev_quad_idx = (quad_tree_idx - 1) / 4; @@ -193,7 +196,7 @@ for (int ch_idx = 0; ch_idx < CNN_BRANCH_2_OUT_CH; ch_idx++) { dnn_features[f_idx++] = branch_2[curr_lin_idx + ch_idx * spa_stride]; } - dnn_features[f_idx++] = x->log_q; + dnn_features[f_idx++] = part_info->log_q; } else if (bsize == BLOCK_8X8) { int f_idx = 0; const int prev_quad_idx = (quad_tree_idx - 1) / 4; @@ -208,7 +211,7 @@ for (int ch_idx = 0; ch_idx < CNN_BRANCH_3_OUT_CH; ch_idx++) { dnn_features[f_idx++] = branch_3[curr_lin_idx + ch_idx * spa_stride]; } - dnn_features[f_idx++] = x->log_q; + dnn_features[f_idx++] = part_info->log_q; } else { assert(0 && "Invalid bsize in intra_cnn partition"); } @@ -713,7 +716,8 @@ assert(f_idx == FEATURE_SIZE_MAX_MIN_PART_PRED); } -BLOCK_SIZE av1_predict_max_partition(AV1_COMP *const cpi, MACROBLOCK *const x, +BLOCK_SIZE av1_predict_max_partition(const AV1_COMP *const cpi, + const MACROBLOCK *const x, const float *features) { float scores[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f }, probs[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f }; @@ -749,7 +753,7 @@ } else if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion == ADAPT_PRED) { const BLOCK_SIZE sb_size = cpi->common.seq_params.sb_size; - MACROBLOCKD *const xd = &x->e_mbd; + const MACROBLOCKD *const xd = &x->e_mbd; // TODO(debargha): x->source_variance is unavailable at this point, // so compute. The redundant recomputation later can be removed. const unsigned int source_variance =
diff --git a/av1/encoder/partition_strategy.h b/av1/encoder/partition_strategy.h index c149ec8..e89451d 100644 --- a/av1/encoder/partition_strategy.h +++ b/av1/encoder/partition_strategy.h
@@ -78,7 +78,8 @@ float *features); // Predict the maximum BLOCK_SIZE to be used to encoder the current superblock. -BLOCK_SIZE av1_predict_max_partition(AV1_COMP *const cpi, MACROBLOCK *const x, +BLOCK_SIZE av1_predict_max_partition(const AV1_COMP *const cpi, + const MACROBLOCK *const x, const float *features); // Attempts an early termination after PARTITION_SPLIT. @@ -200,12 +201,12 @@ // Do not use this criteria for screen content videos. // Since screen content videos could often find good predictors and the largest // block size is likely to be used. -static INLINE int use_auto_max_partition(AV1_COMP *const cpi, +static INLINE int use_auto_max_partition(const AV1_COMP *const cpi, BLOCK_SIZE sb_size, int mi_row, int mi_col) { assert(IMPLIES(cpi->gf_group.size > 0, cpi->gf_group.index < cpi->gf_group.size)); - AV1_COMMON *const cm = &cpi->common; + const AV1_COMMON *const cm = &cpi->common; return !frame_is_intra_only(cm) && !cpi->is_screen_content_type && cpi->sf.part_sf.auto_max_partition_based_on_simple_motion != NOT_IN_USE &&
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c index f12dfed..8f4f0d0 100644 --- a/av1/encoder/rd.c +++ b/av1/encoder/rd.c
@@ -1006,7 +1006,7 @@ pred_mv[num_mv_refs++] = ref_mv1.as_mv; } if (cpi->sf.mv_sf.adaptive_motion_search && - block_size < x->max_partition_size) { + block_size < x->sb_enc.max_partition_size) { pred_mv[num_mv_refs++] = x->pred_mv[ref_frame]; }
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c index c6e554c..80772ae 100644 --- a/av1/encoder/speed_features.c +++ b/av1/encoder/speed_features.c
@@ -112,18 +112,6 @@ return frame_is_kf_gf_arf(cpi); } -static BLOCK_SIZE dim_to_size(int dim) { - switch (dim) { - case 4: return BLOCK_4X4; - case 8: return BLOCK_8X8; - case 16: return BLOCK_16X16; - case 32: return BLOCK_32X32; - case 64: return BLOCK_64X64; - case 128: return BLOCK_128X128; - default: assert(0); return 0; - } -} - static void set_good_speed_feature_framesize_dependent( const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) { const AV1_COMMON *const cm = &cpi->common; @@ -1163,21 +1151,10 @@ cpi->mv_search_params.find_fractional_mv_step = av1_return_max_sub_pixel_mv; else if (cpi->oxcf.motion_vector_unit_test == 2) cpi->mv_search_params.find_fractional_mv_step = av1_return_min_sub_pixel_mv; - - MACROBLOCK *const x = &cpi->td.mb; - AV1_COMMON *const cm = &cpi->common; - x->min_partition_size = AOMMAX(sf->part_sf.default_min_partition_size, - dim_to_size(cpi->oxcf.min_partition_size)); - x->max_partition_size = AOMMIN(sf->part_sf.default_max_partition_size, - dim_to_size(cpi->oxcf.max_partition_size)); - x->min_partition_size = AOMMIN(x->min_partition_size, cm->seq_params.sb_size); - x->max_partition_size = AOMMIN(x->max_partition_size, cm->seq_params.sb_size); } void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) { - AV1_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; - MACROBLOCK *const x = &cpi->td.mb; WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params; const AV1EncoderConfig *const oxcf = &cpi->oxcf; int i; @@ -1255,13 +1232,6 @@ av1_find_best_sub_pixel_tree_pruned_evenmore; } - x->min_partition_size = AOMMAX(sf->part_sf.default_min_partition_size, - dim_to_size(cpi->oxcf.min_partition_size)); - x->max_partition_size = AOMMIN(sf->part_sf.default_max_partition_size, - dim_to_size(cpi->oxcf.max_partition_size)); - x->min_partition_size = AOMMIN(x->min_partition_size, cm->seq_params.sb_size); - x->max_partition_size = AOMMIN(x->max_partition_size, cm->seq_params.sb_size); - // This is only used in motion vector unit test. if (cpi->oxcf.motion_vector_unit_test == 1) mv_search_params->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c index 53bef5e..995ee5e 100644 --- a/av1/encoder/var_based_part.c +++ b/av1/encoder/var_based_part.c
@@ -409,20 +409,21 @@ // Set temporal variance low flag for superblock 64x64. // Only first 25 in the array are used in this case. static AOM_INLINE void set_low_temp_var_flag_64x64( - CommonModeInfoParams *mi_params, MACROBLOCK *x, MACROBLOCKD *xd, - VP64x64 *vt, const int64_t thresholds[], int mi_col, int mi_row) { + CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info, + MACROBLOCKD *xd, VP64x64 *vt, const int64_t thresholds[], int mi_col, + int mi_row) { if (xd->mi[0]->sb_type == BLOCK_64X64) { if ((vt->part_variances).none.variance < (thresholds[0] >> 1)) - x->variance_low[0] = 1; + part_info->variance_low[0] = 1; } else if (xd->mi[0]->sb_type == BLOCK_64X32) { for (int i = 0; i < 2; i++) { if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2)) - x->variance_low[i + 1] = 1; + part_info->variance_low[i + 1] = 1; } } else if (xd->mi[0]->sb_type == BLOCK_32X64) { for (int i = 0; i < 2; i++) { if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2)) - x->variance_low[i + 3] = 1; + part_info->variance_low[i + 3] = 1; } } else { static const int idx[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } }; @@ -440,7 +441,7 @@ if ((*this_mi)->sb_type == BLOCK_32X32) { int64_t threshold_32x32 = (5 * thresholds[1]) >> 3; if (vt->split[i].part_variances.none.variance < threshold_32x32) - x->variance_low[i + 5] = 1; + part_info->variance_low[i + 5] = 1; } else { // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block // inside. @@ -450,7 +451,7 @@ for (int j = 0; j < 4; j++) { if (vt->split[i].split[j].part_variances.none.variance < (thresholds[2] >> 8)) - x->variance_low[(i << 2) + j + 9] = 1; + part_info->variance_low[(i << 2) + j + 9] = 1; } } } @@ -459,20 +460,21 @@ } static AOM_INLINE void set_low_temp_var_flag_128x128( - CommonModeInfoParams *mi_params, MACROBLOCK *x, MACROBLOCKD *xd, - VP128x128 *vt, const int64_t thresholds[], int mi_col, int mi_row) { + CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info, + MACROBLOCKD *xd, VP128x128 *vt, const int64_t thresholds[], int mi_col, + int mi_row) { if (xd->mi[0]->sb_type == BLOCK_128X128) { if (vt->part_variances.none.variance < (thresholds[0] >> 1)) - x->variance_low[0] = 1; + part_info->variance_low[0] = 1; } else if (xd->mi[0]->sb_type == BLOCK_128X64) { for (int i = 0; i < 2; i++) { if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2)) - x->variance_low[i + 1] = 1; + part_info->variance_low[i + 1] = 1; } } else if (xd->mi[0]->sb_type == BLOCK_64X128) { for (int i = 0; i < 2; i++) { if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2)) - x->variance_low[i + 3] = 1; + part_info->variance_low[i + 3] = 1; } } else { static const int idx64[4][2] = { @@ -490,17 +492,17 @@ const int64_t threshold_64x64 = (5 * thresholds[1]) >> 3; if ((*mi_64)->sb_type == BLOCK_64X64) { if (vt->split[i].part_variances.none.variance < threshold_64x64) - x->variance_low[5 + i] = 1; + part_info->variance_low[5 + i] = 1; } else if ((*mi_64)->sb_type == BLOCK_64X32) { for (int j = 0; j < 2; j++) if (vt->split[i].part_variances.horz[j].variance < (threshold_64x64 >> 1)) - x->variance_low[9 + (i << 1) + j] = 1; + part_info->variance_low[9 + (i << 1) + j] = 1; } else if ((*mi_64)->sb_type == BLOCK_32X64) { for (int j = 0; j < 2; j++) if (vt->split[i].part_variances.vert[j].variance < (threshold_64x64 >> 1)) - x->variance_low[17 + (i << 1) + j] = 1; + part_info->variance_low[17 + (i << 1) + j] = 1; } else { for (int k = 0; k < 4; k++) { const int idx_str1 = mi_params->mi_stride * idx32[k][0] + idx32[k][1]; @@ -514,7 +516,7 @@ if ((*mi_32)->sb_type == BLOCK_32X32) { if (vt->split[i].split[k].part_variances.none.variance < threshold_32x32) - x->variance_low[25 + (i << 2) + k] = 1; + part_info->variance_low[25 + (i << 2) + k] = 1; } else { // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block // inside. @@ -526,7 +528,7 @@ .split[k] .split[j] .part_variances.none.variance < (thresholds[3] >> 8)) - x->variance_low[41 + (i << 4) + (k << 2) + j] = 1; + part_info->variance_low[41 + (i << 4) + (k << 2) + j] = 1; } } } @@ -537,9 +539,9 @@ } static AOM_INLINE void set_low_temp_var_flag( - AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, VP128x128 *vt, - int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition, int mi_col, - int mi_row) { + AV1_COMP *cpi, PartitionSearchInfo *part_info, MACROBLOCKD *xd, + VP128x128 *vt, int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition, + int mi_col, int mi_row) { AV1_COMMON *const cm = &cpi->common; const int mv_thr = cm->width > 640 ? 8 : 4; // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and @@ -555,11 +557,11 @@ xd->mi[0]->mv[0].as_mv.row > -mv_thr))) { const int is_small_sb = (cm->seq_params.sb_size == BLOCK_64X64); if (is_small_sb) - set_low_temp_var_flag_64x64(&cm->mi_params, x, xd, &(vt->split[0]), - thresholds, mi_col, mi_row); + set_low_temp_var_flag_64x64(&cm->mi_params, part_info, xd, + &(vt->split[0]), thresholds, mi_col, mi_row); else - set_low_temp_var_flag_128x128(&cm->mi_params, x, xd, vt, thresholds, - mi_col, mi_row); + set_low_temp_var_flag_128x128(&cm->mi_params, part_info, xd, vt, + thresholds, mi_col, mi_row); } } @@ -861,7 +863,8 @@ // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, // 5-20 for the 16x16 blocks. force_split[0] = 0; - memset(x->variance_low, 0, sizeof(x->variance_low)); + memset(x->part_search_info.variance_low, 0, + sizeof(x->part_search_info.variance_low)); if (!is_key_frame) { setup_planes(cpi, x, &y_sad, &y_sad_g, &ref_frame_partition, mi_row, @@ -1022,8 +1025,8 @@ } if (cpi->sf.rt_sf.short_circuit_low_temp_var) { - set_low_temp_var_flag(cpi, x, xd, vt, thresholds, ref_frame_partition, - mi_col, mi_row); + set_low_temp_var_flag(cpi, &x->part_search_info, xd, vt, thresholds, + ref_frame_partition, mi_col, mi_row); } chroma_check(cpi, x, bsize, y_sad, is_key_frame);