| /* |
| * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
| * |
| * This source code is subject to the terms of the BSD 2 Clause License and |
| * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| * was not distributed with this source code in the LICENSE file, you can |
| * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| * Media Patent License 1.0 was not distributed with this source code in the |
| * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| */ |
| |
| #include <math.h> |
| #include <stdbool.h> |
| #include <string.h> |
| |
| #include "config/aom_dsp_rtcd.h" |
| #include "config/aom_scale_rtcd.h" |
| |
| #include "aom/aom_integer.h" |
| #include "av1/common/av1_common_int.h" |
| #include "av1/common/reconinter.h" |
| #include "av1/encoder/encoder.h" |
| #include "av1/encoder/ethread.h" |
| #include "av1/encoder/pickcdef.h" |
| #include "av1/encoder/mcomp.h" |
| |
| // Get primary and secondary filter strength for the given strength index and |
| // search method |
| static inline void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method, |
| int *pri_strength, |
| int *sec_strength, |
| int strength_idx) { |
| const int tot_sec_filter = |
| (pick_method == CDEF_FAST_SEARCH_LVL5) |
| ? REDUCED_SEC_STRENGTHS_LVL5 |
| : ((pick_method >= CDEF_FAST_SEARCH_LVL3) ? REDUCED_SEC_STRENGTHS_LVL3 |
| : CDEF_SEC_STRENGTHS); |
| const int pri_idx = strength_idx / tot_sec_filter; |
| const int sec_idx = strength_idx % tot_sec_filter; |
| *pri_strength = pri_idx; |
| *sec_strength = sec_idx; |
| if (pick_method == CDEF_FULL_SEARCH) return; |
| |
| switch (pick_method) { |
| case CDEF_FAST_SEARCH_LVL1: |
| assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL1); |
| *pri_strength = priconv_lvl1[pri_idx]; |
| break; |
| case CDEF_FAST_SEARCH_LVL2: |
| assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2); |
| *pri_strength = priconv_lvl2[pri_idx]; |
| break; |
| case CDEF_FAST_SEARCH_LVL3: |
| assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2); |
| assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3); |
| *pri_strength = priconv_lvl2[pri_idx]; |
| *sec_strength = secconv_lvl3[sec_idx]; |
| break; |
| case CDEF_FAST_SEARCH_LVL4: |
| assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4); |
| assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3); |
| *pri_strength = priconv_lvl4[pri_idx]; |
| *sec_strength = secconv_lvl3[sec_idx]; |
| break; |
| case CDEF_FAST_SEARCH_LVL5: |
| assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4); |
| assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL5); |
| *pri_strength = priconv_lvl5[pri_idx]; |
| *sec_strength = secconv_lvl5[sec_idx]; |
| break; |
| default: assert(0 && "Invalid CDEF search method"); |
| } |
| } |
| |
| // Store CDEF filter strength calculated from strength index for given search |
| // method |
| #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \ |
| do { \ |
| get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength, \ |
| (strength_idx)); \ |
| cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength; \ |
| } while (0) |
| |
| /* Search for the best strength to add as an option, knowing we |
| already selected nb_strengths options. */ |
| static uint64_t search_one(int *lev, int nb_strengths, |
| uint64_t mse[][TOTAL_STRENGTHS], int sb_count, |
| CDEF_PICK_METHOD pick_method) { |
| uint64_t tot_mse[TOTAL_STRENGTHS]; |
| const int total_strengths = nb_cdef_strengths[pick_method]; |
| int i, j; |
| uint64_t best_tot_mse = (uint64_t)1 << 63; |
| int best_id = 0; |
| memset(tot_mse, 0, sizeof(tot_mse)); |
| for (i = 0; i < sb_count; i++) { |
| int gi; |
| uint64_t best_mse = (uint64_t)1 << 63; |
| /* Find best mse among already selected options. */ |
| for (gi = 0; gi < nb_strengths; gi++) { |
| if (mse[i][lev[gi]] < best_mse) { |
| best_mse = mse[i][lev[gi]]; |
| } |
| } |
| /* Find best mse when adding each possible new option. */ |
| for (j = 0; j < total_strengths; j++) { |
| uint64_t best = best_mse; |
| if (mse[i][j] < best) best = mse[i][j]; |
| tot_mse[j] += best; |
| } |
| } |
| for (j = 0; j < total_strengths; j++) { |
| if (tot_mse[j] < best_tot_mse) { |
| best_tot_mse = tot_mse[j]; |
| best_id = j; |
| } |
| } |
| lev[nb_strengths] = best_id; |
| return best_tot_mse; |
| } |
| |
| /* Search for the best luma+chroma strength to add as an option, knowing we |
| already selected nb_strengths options. */ |
| static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths, |
| uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count, |
| CDEF_PICK_METHOD pick_method) { |
| uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS]; |
| int i, j; |
| uint64_t best_tot_mse = (uint64_t)1 << 63; |
| int best_id0 = 0; |
| int best_id1 = 0; |
| const int total_strengths = nb_cdef_strengths[pick_method]; |
| memset(tot_mse, 0, sizeof(tot_mse)); |
| for (i = 0; i < sb_count; i++) { |
| int gi; |
| uint64_t best_mse = (uint64_t)1 << 63; |
| /* Find best mse among already selected options. */ |
| for (gi = 0; gi < nb_strengths; gi++) { |
| uint64_t curr = mse[0][i][lev0[gi]]; |
| curr += mse[1][i][lev1[gi]]; |
| if (curr < best_mse) { |
| best_mse = curr; |
| } |
| } |
| /* Find best mse when adding each possible new option. */ |
| for (j = 0; j < total_strengths; j++) { |
| int k; |
| for (k = 0; k < total_strengths; k++) { |
| uint64_t best = best_mse; |
| uint64_t curr = mse[0][i][j]; |
| curr += mse[1][i][k]; |
| if (curr < best) best = curr; |
| tot_mse[j][k] += best; |
| } |
| } |
| } |
| for (j = 0; j < total_strengths; j++) { |
| int k; |
| for (k = 0; k < total_strengths; k++) { |
| if (tot_mse[j][k] < best_tot_mse) { |
| best_tot_mse = tot_mse[j][k]; |
| best_id0 = j; |
| best_id1 = k; |
| } |
| } |
| } |
| lev0[nb_strengths] = best_id0; |
| lev1[nb_strengths] = best_id1; |
| return best_tot_mse; |
| } |
| |
| /* Search for the set of strengths that minimizes mse. */ |
| static uint64_t joint_strength_search(int *best_lev, int nb_strengths, |
| uint64_t mse[][TOTAL_STRENGTHS], |
| int sb_count, |
| CDEF_PICK_METHOD pick_method) { |
| uint64_t best_tot_mse; |
| int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 && |
| pick_method <= CDEF_FAST_SEARCH_LVL5); |
| int i; |
| best_tot_mse = (uint64_t)1 << 63; |
| /* Greedy search: add one strength options at a time. */ |
| for (i = 0; i < nb_strengths; i++) { |
| best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method); |
| } |
| /* Trying to refine the greedy search by reconsidering each |
| already-selected option. */ |
| if (!fast) { |
| for (i = 0; i < 4 * nb_strengths; i++) { |
| int j; |
| for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1]; |
| best_tot_mse = |
| search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method); |
| } |
| } |
| return best_tot_mse; |
| } |
| |
| /* Search for the set of luma+chroma strengths that minimizes mse. */ |
| static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1, |
| int nb_strengths, |
| uint64_t (**mse)[TOTAL_STRENGTHS], |
| int sb_count, |
| CDEF_PICK_METHOD pick_method) { |
| uint64_t best_tot_mse; |
| int i; |
| best_tot_mse = (uint64_t)1 << 63; |
| /* Greedy search: add one strength options at a time. */ |
| for (i = 0; i < nb_strengths; i++) { |
| best_tot_mse = |
| search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method); |
| } |
| /* Trying to refine the greedy search by reconsidering each |
| already-selected option. */ |
| for (i = 0; i < 4 * nb_strengths; i++) { |
| int j; |
| for (j = 0; j < nb_strengths - 1; j++) { |
| best_lev0[j] = best_lev0[j + 1]; |
| best_lev1[j] = best_lev1[j + 1]; |
| } |
| best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, |
| sb_count, pick_method); |
| } |
| return best_tot_mse; |
| } |
| |
| static inline void init_src_params(int *src_stride, int *width, int *height, |
| int *width_log2, int *height_log2, |
| BLOCK_SIZE bsize) { |
| *src_stride = block_size_wide[bsize]; |
| *width = block_size_wide[bsize]; |
| *height = block_size_high[bsize]; |
| *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize]; |
| *height_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize]; |
| } |
| #if CONFIG_AV1_HIGHBITDEPTH |
| /* Compute MSE only on the blocks we filtered. */ |
| static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src, |
| cdef_list *dlist, int cdef_count, |
| BLOCK_SIZE bsize, int coeff_shift, |
| int row, int col) { |
| assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 || |
| bsize == BLOCK_8X8); |
| uint64_t sum = 0; |
| int bi, bx, by; |
| uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst); |
| uint16_t *dst_buff = &dst16[row * dstride + col]; |
| int src_stride, width, height, width_log2, height_log2; |
| init_src_params(&src_stride, &width, &height, &width_log2, &height_log2, |
| bsize); |
| for (bi = 0; bi < cdef_count; bi++) { |
| by = dlist[bi].by; |
| bx = dlist[bi].bx; |
| sum += aom_mse_wxh_16bit_highbd( |
| &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride, |
| &src[bi << (height_log2 + width_log2)], src_stride, width, height); |
| } |
| return sum >> 2 * coeff_shift; |
| } |
| #endif |
| |
| // Checks dual and quad block processing is applicable for block widths 8 and 4 |
| // respectively. |
| static inline int is_dual_or_quad_applicable(cdef_list *dlist, int width, |
| int cdef_count, int bi, int iter) { |
| assert(width == 8 || width == 4); |
| const int blk_offset = (width == 8) ? 1 : 3; |
| if ((iter + blk_offset) >= cdef_count) return 0; |
| |
| if (dlist[bi].by == dlist[bi + blk_offset].by && |
| dlist[bi].bx + blk_offset == dlist[bi + blk_offset].bx) |
| return 1; |
| |
| return 0; |
| } |
| |
| static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src, |
| cdef_list *dlist, int cdef_count, |
| BLOCK_SIZE bsize, int coeff_shift, int row, |
| int col) { |
| assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 || |
| bsize == BLOCK_8X8); |
| uint64_t sum = 0; |
| int bi, bx, by; |
| int iter = 0; |
| int inc = 1; |
| uint8_t *dst8 = (uint8_t *)dst; |
| uint8_t *dst_buff = &dst8[row * dstride + col]; |
| int src_stride, width, height, width_log2, height_log2; |
| init_src_params(&src_stride, &width, &height, &width_log2, &height_log2, |
| bsize); |
| |
| const int num_blks = 16 / width; |
| for (bi = 0; bi < cdef_count; bi += inc) { |
| by = dlist[bi].by; |
| bx = dlist[bi].bx; |
| uint16_t *src_tmp = &src[bi << (height_log2 + width_log2)]; |
| uint8_t *dst_tmp = |
| &dst_buff[(by << height_log2) * dstride + (bx << width_log2)]; |
| |
| if (is_dual_or_quad_applicable(dlist, width, cdef_count, bi, iter)) { |
| sum += aom_mse_16xh_16bit(dst_tmp, dstride, src_tmp, width, height); |
| iter += num_blks; |
| inc = num_blks; |
| } else { |
| sum += aom_mse_wxh_16bit(dst_tmp, dstride, src_tmp, src_stride, width, |
| height); |
| iter += 1; |
| inc = 1; |
| } |
| } |
| |
| return sum >> 2 * coeff_shift; |
| } |
| |
| // Fill the boundary regions of the block with CDEF_VERY_LARGE, only if the |
| // region is outside frame boundary |
| static inline void fill_borders_for_fbs_on_frame_boundary( |
| uint16_t *inbuf, int hfilt_size, int vfilt_size, |
| bool is_fb_on_frm_left_boundary, bool is_fb_on_frm_right_boundary, |
| bool is_fb_on_frm_top_boundary, bool is_fb_on_frm_bottom_boundary) { |
| if (!is_fb_on_frm_left_boundary && !is_fb_on_frm_right_boundary && |
| !is_fb_on_frm_top_boundary && !is_fb_on_frm_bottom_boundary) |
| return; |
| if (is_fb_on_frm_bottom_boundary) { |
| // Fill bottom region of the block |
| const int buf_offset = |
| (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + CDEF_HBORDER; |
| fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size, |
| CDEF_VERY_LARGE); |
| } |
| if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_left_boundary) { |
| const int buf_offset = (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE; |
| // Fill bottom-left region of the block |
| fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, |
| CDEF_VERY_LARGE); |
| } |
| if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_right_boundary) { |
| const int buf_offset = |
| (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + hfilt_size + CDEF_HBORDER; |
| // Fill bottom-right region of the block |
| fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, |
| CDEF_VERY_LARGE); |
| } |
| if (is_fb_on_frm_top_boundary) { |
| // Fill top region of the block |
| fill_rect(&inbuf[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size, |
| CDEF_VERY_LARGE); |
| } |
| if (is_fb_on_frm_top_boundary || is_fb_on_frm_left_boundary) { |
| // Fill top-left region of the block |
| fill_rect(inbuf, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE); |
| } |
| if (is_fb_on_frm_top_boundary || is_fb_on_frm_right_boundary) { |
| const int buf_offset = hfilt_size + CDEF_HBORDER; |
| // Fill top-right region of the block |
| fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, |
| CDEF_VERY_LARGE); |
| } |
| if (is_fb_on_frm_left_boundary) { |
| const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE; |
| // Fill left region of the block |
| fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, vfilt_size, CDEF_HBORDER, |
| CDEF_VERY_LARGE); |
| } |
| if (is_fb_on_frm_right_boundary) { |
| const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE; |
| // Fill right region of the block |
| fill_rect(&inbuf[buf_offset + hfilt_size + CDEF_HBORDER], CDEF_BSTRIDE, |
| vfilt_size, CDEF_HBORDER, CDEF_VERY_LARGE); |
| } |
| } |
| |
| // Calculate the number of 8x8/4x4 filter units for which SSE can be calculated |
| // after CDEF filtering in single function call |
| static AOM_FORCE_INLINE int get_error_calc_width_in_filt_units( |
| cdef_list *dlist, int cdef_count, int bi, int subsampling_x, |
| int subsampling_y) { |
| // TODO(Ranjit): Extend the optimization for 422 |
| if (subsampling_x != subsampling_y) return 1; |
| |
| // Combining more blocks seems to increase encode time due to increase in |
| // control code |
| if (bi + 3 < cdef_count && dlist[bi].by == dlist[bi + 3].by && |
| dlist[bi].bx + 3 == dlist[bi + 3].bx) { |
| /* Calculate error for four 8x8/4x4 blocks using 32x8/16x4 block specific |
| * logic if y co-ordinates match and x co-ordinates are |
| * separated by 3 for first and fourth 8x8/4x4 blocks in dlist[]. */ |
| return 4; |
| } |
| if (bi + 1 < cdef_count && dlist[bi].by == dlist[bi + 1].by && |
| dlist[bi].bx + 1 == dlist[bi + 1].bx) { |
| /* Calculate error for two 8x8/4x4 blocks using 16x8/8x4 block specific |
| * logic if their y co-ordinates match and x co-ordinates are |
| * separated by 1 for first and second 8x8/4x4 blocks in dlist[]. */ |
| return 2; |
| } |
| return 1; |
| } |
| |
| // Returns the block error after CDEF filtering for a given strength |
| static inline uint64_t get_filt_error( |
| const CdefSearchCtx *cdef_search_ctx, const struct macroblockd_plane *pd, |
| cdef_list *dlist, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], int *dirinit, |
| int var[CDEF_NBLOCKS][CDEF_NBLOCKS], uint16_t *in, uint8_t *ref_buffer, |
| int ref_stride, int row, int col, int pri_strength, int sec_strength, |
| int cdef_count, int pli, int coeff_shift, BLOCK_SIZE bs) { |
| uint64_t curr_sse = 0; |
| const BLOCK_SIZE plane_bsize = |
| get_plane_block_size(bs, pd->subsampling_x, pd->subsampling_y); |
| const int bw_log2 = 3 - pd->subsampling_x; |
| const int bh_log2 = 3 - pd->subsampling_y; |
| |
| // TODO(Ranjit): Extend this optimization for HBD |
| if (!cdef_search_ctx->use_highbitdepth) { |
| // If all 8x8/4x4 blocks in CDEF block need to be filtered, calculate the |
| // error at CDEF block level |
| const int tot_blk_count = |
| (block_size_wide[plane_bsize] * block_size_high[plane_bsize]) >> |
| (bw_log2 + bh_log2); |
| if (cdef_count == tot_blk_count) { |
| // Calculate the offset in the buffer based on block position |
| const FULLPEL_MV this_mv = { row, col }; |
| const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride); |
| if (pri_strength == 0 && sec_strength == 0) { |
| // When CDEF strength is zero, filtering is not applied. Hence |
| // error is calculated between source and unfiltered pixels |
| curr_sse = |
| aom_sse(&ref_buffer[buf_offset], ref_stride, |
| get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride, |
| block_size_wide[plane_bsize], block_size_high[plane_bsize]); |
| } else { |
| DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]); |
| |
| av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in, |
| cdef_search_ctx->xdec[pli], |
| cdef_search_ctx->ydec[pli], dir, dirinit, var, pli, |
| dlist, cdef_count, pri_strength, |
| sec_strength + (sec_strength == 3), |
| cdef_search_ctx->damping, coeff_shift); |
| curr_sse = |
| aom_sse(&ref_buffer[buf_offset], ref_stride, tmp_dst8, |
| (1 << MAX_SB_SIZE_LOG2), block_size_wide[plane_bsize], |
| block_size_high[plane_bsize]); |
| } |
| } else { |
| // If few 8x8/4x4 blocks in CDEF block need to be filtered, filtering |
| // functions produce 8-bit output and the error is calculated in 8-bit |
| // domain |
| if (pri_strength == 0 && sec_strength == 0) { |
| int num_error_calc_filt_units = 1; |
| for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) { |
| const uint8_t by = dlist[bi].by; |
| const uint8_t bx = dlist[bi].bx; |
| const int16_t by_pos = (by << bh_log2); |
| const int16_t bx_pos = (bx << bw_log2); |
| // Calculate the offset in the buffer based on block position |
| const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos }; |
| const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride); |
| num_error_calc_filt_units = get_error_calc_width_in_filt_units( |
| dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y); |
| curr_sse += aom_sse( |
| &ref_buffer[buf_offset], ref_stride, |
| get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride, |
| num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2)); |
| } |
| } else { |
| DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]); |
| av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in, |
| cdef_search_ctx->xdec[pli], |
| cdef_search_ctx->ydec[pli], dir, dirinit, var, pli, |
| dlist, cdef_count, pri_strength, |
| sec_strength + (sec_strength == 3), |
| cdef_search_ctx->damping, coeff_shift); |
| int num_error_calc_filt_units = 1; |
| for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) { |
| const uint8_t by = dlist[bi].by; |
| const uint8_t bx = dlist[bi].bx; |
| const int16_t by_pos = (by << bh_log2); |
| const int16_t bx_pos = (bx << bw_log2); |
| // Calculate the offset in the buffer based on block position |
| const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos }; |
| const FULLPEL_MV tmp_buf_pos = { by_pos, bx_pos }; |
| const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride); |
| const int tmp_buf_offset = |
| get_offset_from_fullmv(&tmp_buf_pos, (1 << MAX_SB_SIZE_LOG2)); |
| num_error_calc_filt_units = get_error_calc_width_in_filt_units( |
| dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y); |
| curr_sse += aom_sse( |
| &ref_buffer[buf_offset], ref_stride, &tmp_dst8[tmp_buf_offset], |
| (1 << MAX_SB_SIZE_LOG2), |
| num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2)); |
| } |
| } |
| } |
| } else { |
| DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]); |
| |
| av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, |
| cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli], |
| dir, dirinit, var, pli, dlist, cdef_count, pri_strength, |
| sec_strength + (sec_strength == 3), |
| cdef_search_ctx->damping, coeff_shift); |
| curr_sse = cdef_search_ctx->compute_cdef_dist_fn( |
| ref_buffer, ref_stride, tmp_dst, dlist, cdef_count, |
| cdef_search_ctx->bsize[pli], coeff_shift, row, col); |
| } |
| return curr_sse; |
| } |
| |
| // Calculates MSE at block level. |
| // Inputs: |
| // cdef_search_ctx: Pointer to the structure containing parameters related to |
| // CDEF search context. |
| // fbr: Row index in units of 64x64 block |
| // fbc: Column index in units of 64x64 block |
| // Returns: |
| // Nothing will be returned. Contents of cdef_search_ctx will be modified. |
| void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, |
| struct aom_internal_error_info *error_info, |
| int fbr, int fbc, int sb_count) { |
| // TODO(aomedia:3276): Pass error_info to the low-level functions as required |
| // in future to handle error propagation. |
| (void)error_info; |
| const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params; |
| const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref; |
| const int coeff_shift = cdef_search_ctx->coeff_shift; |
| const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2; |
| const int *mi_high_l2 = cdef_search_ctx->mi_high_l2; |
| |
| // Declare and initialize the temporary buffers. |
| DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]); |
| cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128]; |
| int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; |
| int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; |
| uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER; |
| int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc); |
| int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr); |
| int hb_step = 1, vb_step = 1; |
| BLOCK_SIZE bs; |
| |
| const MB_MODE_INFO *const mbmi = |
| mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride + |
| MI_SIZE_64X64 * fbc]; |
| |
| uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer, |
| ref->v_buffer }; |
| int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride, |
| ref->uv_stride }; |
| |
| if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 || |
| mbmi->bsize == BLOCK_64X128) { |
| bs = mbmi->bsize; |
| if (bs == BLOCK_128X128 || bs == BLOCK_128X64) { |
| nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc); |
| hb_step = 2; |
| } |
| if (bs == BLOCK_128X128 || bs == BLOCK_64X128) { |
| nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr); |
| vb_step = 2; |
| } |
| } else { |
| bs = BLOCK_64X64; |
| } |
| // Get number of 8x8 blocks which are not skip. Cdef processing happens for |
| // 8x8 blocks which are not skip. |
| const int cdef_count = av1_cdef_compute_sb_list( |
| mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs); |
| const bool is_fb_on_frm_left_boundary = (fbc == 0); |
| const bool is_fb_on_frm_right_boundary = |
| (fbc + hb_step == cdef_search_ctx->nhfb); |
| const bool is_fb_on_frm_top_boundary = (fbr == 0); |
| const bool is_fb_on_frm_bottom_boundary = |
| (fbr + vb_step == cdef_search_ctx->nvfb); |
| const int yoff = CDEF_VBORDER * (!is_fb_on_frm_top_boundary); |
| const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary); |
| int dirinit = 0; |
| for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) { |
| /* We avoid filtering the pixels for which some of the pixels to |
| average are outside the frame. We could change the filter instead, |
| but it would add special cases for any future vectorization. */ |
| const int hfilt_size = (nhb << mi_wide_l2[pli]); |
| const int vfilt_size = (nvb << mi_high_l2[pli]); |
| const int ysize = |
| vfilt_size + CDEF_VBORDER * (!is_fb_on_frm_bottom_boundary) + yoff; |
| const int xsize = |
| hfilt_size + CDEF_HBORDER * (!is_fb_on_frm_right_boundary) + xoff; |
| const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli]; |
| const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli]; |
| struct macroblockd_plane pd = cdef_search_ctx->plane[pli]; |
| cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE, |
| pd.dst.buf, row - yoff, col - xoff, pd.dst.stride, |
| ysize, xsize); |
| fill_borders_for_fbs_on_frame_boundary( |
| inbuf, hfilt_size, vfilt_size, is_fb_on_frm_left_boundary, |
| is_fb_on_frm_right_boundary, is_fb_on_frm_top_boundary, |
| is_fb_on_frm_bottom_boundary); |
| for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) { |
| int pri_strength, sec_strength; |
| get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength, |
| &sec_strength, gi); |
| const uint64_t curr_mse = get_filt_error( |
| cdef_search_ctx, &pd, dlist, dir, &dirinit, var, in, ref_buffer[pli], |
| ref_stride[pli], row, col, pri_strength, sec_strength, cdef_count, |
| pli, coeff_shift, bs); |
| if (pli < 2) |
| cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse; |
| else |
| cdef_search_ctx->mse[1][sb_count][gi] += curr_mse; |
| } |
| } |
| cdef_search_ctx->sb_index[sb_count] = |
| MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc; |
| } |
| |
| // MSE calculation at frame level. |
| // Inputs: |
| // cdef_search_ctx: Pointer to the structure containing parameters related to |
| // CDEF search context. |
| // Returns: |
| // Nothing will be returned. Contents of cdef_search_ctx will be modified. |
| static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx, |
| struct aom_internal_error_info *error_info) { |
| // Loop over each sb. |
| for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) { |
| for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) { |
| // Checks if cdef processing can be skipped for particular sb. |
| if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue; |
| // Calculate mse for each sb and store the relevant sb index. |
| av1_cdef_mse_calc_block(cdef_search_ctx, error_info, fbr, fbc, |
| cdef_search_ctx->sb_count); |
| cdef_search_ctx->sb_count++; |
| } |
| } |
| } |
| |
| // Allocates memory for members of CdefSearchCtx. |
| // Inputs: |
| // cdef_search_ctx: Pointer to the structure containing parameters |
| // related to CDEF search context. |
| // Returns: |
| // Nothing will be returned. Contents of cdef_search_ctx will be modified. |
| static void cdef_alloc_data(AV1_COMMON *cm, CdefSearchCtx *cdef_search_ctx) { |
| const int nvfb = cdef_search_ctx->nvfb; |
| const int nhfb = cdef_search_ctx->nhfb; |
| CHECK_MEM_ERROR( |
| cm, cdef_search_ctx->sb_index, |
| aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index[0]))); |
| cdef_search_ctx->sb_count = 0; |
| CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[0], |
| aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb)); |
| CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[1], |
| aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb)); |
| } |
| |
| // Deallocates the memory allocated for members of CdefSearchCtx. |
| // Inputs: |
| // cdef_search_ctx: Pointer to the structure containing parameters |
| // related to CDEF search context. |
| // Returns: |
| // Nothing will be returned. |
| void av1_cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) { |
| if (cdef_search_ctx) { |
| aom_free(cdef_search_ctx->mse[0]); |
| cdef_search_ctx->mse[0] = NULL; |
| aom_free(cdef_search_ctx->mse[1]); |
| cdef_search_ctx->mse[1] = NULL; |
| aom_free(cdef_search_ctx->sb_index); |
| cdef_search_ctx->sb_index = NULL; |
| } |
| } |
| |
| // Initialize the parameters related to CDEF search context. |
| // Inputs: |
| // frame: Pointer to compressed frame buffer |
| // ref: Pointer to the frame buffer holding the source frame |
| // cm: Pointer to top level common structure |
| // xd: Pointer to common current coding block structure |
| // cdef_search_ctx: Pointer to the structure containing parameters related to |
| // CDEF search context. |
| // pick_method: Search method used to select CDEF parameters |
| // Returns: |
| // Nothing will be returned. Contents of cdef_search_ctx will be modified. |
| static inline void cdef_params_init(const YV12_BUFFER_CONFIG *frame, |
| const YV12_BUFFER_CONFIG *ref, |
| AV1_COMMON *cm, MACROBLOCKD *xd, |
| CdefSearchCtx *cdef_search_ctx, |
| CDEF_PICK_METHOD pick_method) { |
| const CommonModeInfoParams *const mi_params = &cm->mi_params; |
| const int num_planes = av1_num_planes(cm); |
| cdef_search_ctx->mi_params = &cm->mi_params; |
| cdef_search_ctx->ref = ref; |
| cdef_search_ctx->nvfb = |
| (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
| cdef_search_ctx->nhfb = |
| (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
| cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0); |
| cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6); |
| cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method]; |
| cdef_search_ctx->num_planes = num_planes; |
| cdef_search_ctx->pick_method = pick_method; |
| cdef_search_ctx->sb_count = 0; |
| cdef_search_ctx->use_highbitdepth = cm->seq_params->use_highbitdepth; |
| av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0, |
| num_planes); |
| // Initialize plane wise information. |
| for (int pli = 0; pli < num_planes; pli++) { |
| cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x; |
| cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y; |
| cdef_search_ctx->bsize[pli] = |
| cdef_search_ctx->ydec[pli] |
| ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4) |
| : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8); |
| cdef_search_ctx->mi_wide_l2[pli] = |
| MI_SIZE_LOG2 - xd->plane[pli].subsampling_x; |
| cdef_search_ctx->mi_high_l2[pli] = |
| MI_SIZE_LOG2 - xd->plane[pli].subsampling_y; |
| cdef_search_ctx->plane[pli] = xd->plane[pli]; |
| } |
| // Function pointer initialization. |
| #if CONFIG_AV1_HIGHBITDEPTH |
| if (cm->seq_params->use_highbitdepth) { |
| cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_highbd; |
| cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd; |
| } else { |
| cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd; |
| cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist; |
| } |
| #else |
| cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd; |
| cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist; |
| #endif |
| } |
| |
| void av1_pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef, |
| int is_screen_content) { |
| const int bd = cm->seq_params->bit_depth; |
| const int q = |
| av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8); |
| CdefInfo *const cdef_info = &cm->cdef_info; |
| // Check the speed feature to avoid extra signaling. |
| if (skip_cdef) { |
| cdef_info->cdef_bits = 1; |
| cdef_info->nb_cdef_strengths = 2; |
| } else { |
| cdef_info->cdef_bits = 0; |
| cdef_info->nb_cdef_strengths = 1; |
| } |
| cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6); |
| |
| int predicted_y_f1 = 0; |
| int predicted_y_f2 = 0; |
| int predicted_uv_f1 = 0; |
| int predicted_uv_f2 = 0; |
| if (is_screen_content) { |
| predicted_y_f1 = |
| (int)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02); |
| predicted_y_f2 = |
| (int)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01); |
| predicted_uv_f1 = |
| (int)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01); |
| predicted_uv_f2 = |
| (int)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0); |
| predicted_y_f1 = clamp(predicted_y_f1, 0, 15); |
| predicted_y_f2 = clamp(predicted_y_f2, 0, 3); |
| predicted_uv_f1 = clamp(predicted_uv_f1, 0, 15); |
| predicted_uv_f2 = clamp(predicted_uv_f2, 0, 3); |
| } else { |
| if (!frame_is_intra_only(cm)) { |
| predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f + |
| q * 0.0068615186f + 0.02709886f), |
| 0, 15); |
| predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f + |
| q * 0.0013993345f + 0.03831067f), |
| 0, 3); |
| predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f + |
| q * 0.0034628846f + 0.00887099f), |
| 0, 15); |
| predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f + |
| q * 0.00028223585f + 0.05576307f), |
| 0, 3); |
| } else { |
| predicted_y_f1 = clamp( |
| (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f), |
| 0, 15); |
| predicted_y_f2 = clamp((int)roundf(q * q * 0.0000029167343f + |
| q * 0.0027798624f + 0.0079405f), |
| 0, 3); |
| predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000130790995f + |
| q * 0.012892405f - 0.00748388f), |
| 0, 15); |
| predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f + |
| q * 0.00035520183f + 0.00228092f), |
| 0, 3); |
| } |
| } |
| cdef_info->cdef_strengths[0] = |
| predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2; |
| cdef_info->cdef_uv_strengths[0] = |
| predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2; |
| |
| // mbmi->cdef_strength is already set in the encoding stage. We don't need to |
| // set it again here. |
| if (skip_cdef) { |
| cdef_info->cdef_strengths[1] = 0; |
| cdef_info->cdef_uv_strengths[1] = 0; |
| return; |
| } |
| |
| const CommonModeInfoParams *const mi_params = &cm->mi_params; |
| const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
| const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
| MB_MODE_INFO **mbmi = mi_params->mi_grid_base; |
| // mbmi is NULL when real-time rate control library is used. |
| if (!mbmi) return; |
| for (int r = 0; r < nvfb; ++r) { |
| for (int c = 0; c < nhfb; ++c) { |
| MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c]; |
| current_mbmi->cdef_strength = 0; |
| } |
| mbmi += MI_SIZE_64X64 * mi_params->mi_stride; |
| } |
| } |
| |
| void av1_cdef_search(AV1_COMP *cpi) { |
| AV1_COMMON *cm = &cpi->common; |
| CDEF_CONTROL cdef_control = cpi->oxcf.tool_cfg.cdef_control; |
| |
| assert(cdef_control != CDEF_NONE); |
| // For CDEF_ADAPTIVE, turning off CDEF around qindex 100 was best for still |
| // pictures |
| if ((cdef_control == CDEF_REFERENCE && |
| cpi->ppi->rtc_ref.non_reference_frame) || |
| (cdef_control == CDEF_ADAPTIVE && cpi->oxcf.mode == ALLINTRA && |
| (cpi->oxcf.rc_cfg.mode == AOM_Q || cpi->oxcf.rc_cfg.mode == AOM_CQ) && |
| cpi->oxcf.rc_cfg.cq_level < 100)) { |
| CdefInfo *const cdef_info = &cm->cdef_info; |
| cdef_info->nb_cdef_strengths = 1; |
| cdef_info->cdef_bits = 0; |
| cdef_info->cdef_strengths[0] = 0; |
| cdef_info->cdef_uv_strengths[0] = 0; |
| return; |
| } |
| |
| // Indicate if external RC is used for testing |
| const int rtc_ext_rc = cpi->rc.rtc_external_ratectrl; |
| if (rtc_ext_rc) { |
| av1_pick_cdef_from_qp(cm, 0, 0); |
| return; |
| } |
| CDEF_PICK_METHOD pick_method = cpi->sf.lpf_sf.cdef_pick_method; |
| if (pick_method == CDEF_PICK_FROM_Q) { |
| const int use_screen_content_model = |
| cm->quant_params.base_qindex > |
| AOMMAX(cpi->sf.rt_sf.screen_content_cdef_filter_qindex_thresh, |
| cpi->rc.best_quality + 5) && |
| cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN; |
| av1_pick_cdef_from_qp(cm, cpi->sf.rt_sf.skip_cdef_sb, |
| use_screen_content_model); |
| return; |
| } |
| const CommonModeInfoParams *const mi_params = &cm->mi_params; |
| const int damping = 3 + (cm->quant_params.base_qindex >> 6); |
| const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 && |
| pick_method <= CDEF_FAST_SEARCH_LVL5); |
| const int num_planes = av1_num_planes(cm); |
| MACROBLOCKD *xd = &cpi->td.mb.e_mbd; |
| |
| if (!cpi->cdef_search_ctx) |
| CHECK_MEM_ERROR(cm, cpi->cdef_search_ctx, |
| aom_malloc(sizeof(*cpi->cdef_search_ctx))); |
| CdefSearchCtx *cdef_search_ctx = cpi->cdef_search_ctx; |
| |
| // Initialize parameters related to CDEF search context. |
| cdef_params_init(&cm->cur_frame->buf, cpi->source, cm, xd, cdef_search_ctx, |
| pick_method); |
| // Allocate CDEF search context buffers. |
| cdef_alloc_data(cm, cdef_search_ctx); |
| // Frame level mse calculation. |
| if (cpi->mt_info.num_workers > 1) { |
| av1_cdef_mse_calc_frame_mt(cpi); |
| } else { |
| cdef_mse_calc_frame(cdef_search_ctx, cm->error); |
| } |
| |
| /* Search for different number of signaling bits. */ |
| int nb_strength_bits = 0; |
| uint64_t best_rd = UINT64_MAX; |
| CdefInfo *const cdef_info = &cm->cdef_info; |
| int sb_count = cdef_search_ctx->sb_count; |
| uint64_t(*mse[2])[TOTAL_STRENGTHS]; |
| mse[0] = cdef_search_ctx->mse[0]; |
| mse[1] = cdef_search_ctx->mse[1]; |
| /* Calculate the maximum number of bits required to signal CDEF strengths at |
| * block level */ |
| const int total_strengths = nb_cdef_strengths[pick_method]; |
| const int joint_strengths = |
| num_planes > 1 ? total_strengths * total_strengths : total_strengths; |
| const int max_signaling_bits = |
| joint_strengths == 1 ? 0 : get_msb(joint_strengths - 1) + 1; |
| int rdmult = cpi->td.mb.rdmult; |
| for (int i = 0; i <= 3; i++) { |
| if (i > max_signaling_bits) break; |
| int best_lev0[CDEF_MAX_STRENGTHS] = { 0 }; |
| int best_lev1[CDEF_MAX_STRENGTHS] = { 0 }; |
| const int nb_strengths = 1 << i; |
| uint64_t tot_mse; |
| if (num_planes > 1) { |
| tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths, |
| mse, sb_count, pick_method); |
| } else { |
| tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count, |
| pick_method); |
| } |
| |
| const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS * |
| (num_planes > 1 ? 2 : 1); |
| const int rate_cost = av1_cost_literal(total_bits); |
| const uint64_t dist = tot_mse * 16; |
| const uint64_t rd = RDCOST(rdmult, rate_cost, dist); |
| if (rd < best_rd) { |
| best_rd = rd; |
| nb_strength_bits = i; |
| memcpy(cdef_info->cdef_strengths, best_lev0, |
| nb_strengths * sizeof(best_lev0[0])); |
| if (num_planes > 1) { |
| memcpy(cdef_info->cdef_uv_strengths, best_lev1, |
| nb_strengths * sizeof(best_lev1[0])); |
| } |
| } |
| } |
| |
| cdef_info->cdef_bits = nb_strength_bits; |
| cdef_info->nb_cdef_strengths = 1 << nb_strength_bits; |
| for (int i = 0; i < sb_count; i++) { |
| uint64_t best_mse = UINT64_MAX; |
| int best_gi = 0; |
| for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) { |
| uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]]; |
| if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]]; |
| if (curr < best_mse) { |
| best_gi = gi; |
| best_mse = curr; |
| } |
| } |
| mi_params->mi_grid_base[cdef_search_ctx->sb_index[i]]->cdef_strength = |
| best_gi; |
| } |
| if (fast) { |
| for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) { |
| const int luma_strength = cdef_info->cdef_strengths[j]; |
| const int chroma_strength = cdef_info->cdef_uv_strengths[j]; |
| int pri_strength, sec_strength; |
| |
| STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method, |
| luma_strength); |
| STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method, |
| chroma_strength); |
| } |
| } |
| |
| cdef_info->cdef_damping = damping; |
| // Deallocate CDEF search context buffers. |
| av1_cdef_dealloc_data(cdef_search_ctx); |
| } |