fix CCSO filter size
diff --git a/av1/common/av1_common_int.h b/av1/common/av1_common_int.h index 22c233d..69d1325 100644 --- a/av1/common/av1_common_int.h +++ b/av1/common/av1_common_int.h
@@ -2209,12 +2209,21 @@ if (buf->ccso_info.sb_filter_control[pli]) { aom_free(buf->ccso_info.sb_filter_control[pli]); } +#if CONFIG_CCSO_FU_BUGFIX + const int log2_filter_unit_size_y = + pli == 0 ? CCSO_BLK_SIZE + : CCSO_BLK_SIZE - cm->seq_params.subsampling_y; + const int log2_filter_unit_size_x = + pli == 0 ? CCSO_BLK_SIZE + : CCSO_BLK_SIZE - cm->seq_params.subsampling_x; +#else const int log2_filter_unit_size_y = pli > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + cm->seq_params.subsampling_y; const int log2_filter_unit_size_x = pli > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + cm->seq_params.subsampling_x; +#endif const int ccso_nvfb = ((cm->mi_params.mi_rows >> (pli ? cm->seq_params.subsampling_y : 0)) + @@ -2231,6 +2240,9 @@ 32, sizeof(*buf->ccso_info.sb_filter_control[pli]) * sb_count)); memset(buf->ccso_info.sb_filter_control[pli], 0, sizeof(*buf->ccso_info.sb_filter_control[pli]) * sb_count); +#if CONFIG_CCSO_DEBUG + printf("CCSO: plane %d nvfb %d nhfb %d sb_count %d @ %s\n", pli, ccso_nvfb, ccso_nhfb, sb_count, __FUNCTION__); +#endif } } #endif // CONFIG_CCSO_IMPROVE
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl index 1c9a2b4..1e089f7 100644 --- a/av1/common/av1_rtcd_defs.pl +++ b/av1/common/av1_rtcd_defs.pl
@@ -620,31 +620,63 @@ } # Cross-component Sample Offset -add_proto qw/void ccso_filter_block_hbd_wo_buf/, "const uint16_t *src_y, uint16_t *dst_yuv, const int x, const int y, const int pic_width, const int pic_height, int *src_cls, const int8_t *offset_buf, const int scaled_ext_stride, const int dst_stride, const int y_uv_hscale, const int y_uv_vscale, const int thr, const int neg_thr, const int *src_loc, const int max_val, const int blk_size, const bool isSingleBand, const uint8_t shift_bits, const int edge_clf, const uint8_t ccso_bo_only"; +if (aom_config("CONFIG_CCSO_FU_BUGFIX") eq "yes") { + add_proto qw/void ccso_filter_block_hbd_wo_buf/, "const uint16_t *src_y, uint16_t *dst_yuv, const int x, const int y, const int pic_width, const int pic_height, int *src_cls, const int8_t *offset_buf, const int scaled_ext_stride, const int dst_stride, const int y_uv_hscale, const int y_uv_vscale, const int thr, const int neg_thr, const int *src_loc, const int max_val, const int blk_size_x, const int blk_size_y, const bool isSingleBand, const uint8_t shift_bits, const int edge_clf, const uint8_t ccso_bo_only"; +} +else { + add_proto qw/void ccso_filter_block_hbd_wo_buf/, "const uint16_t *src_y, uint16_t *dst_yuv, const int x, const int y, const int pic_width, const int pic_height, int *src_cls, const int8_t *offset_buf, const int scaled_ext_stride, const int dst_stride, const int y_uv_hscale, const int y_uv_vscale, const int thr, const int neg_thr, const int *src_loc, const int max_val, const int blk_size, const bool isSingleBand, const uint8_t shift_bits, const int edge_clf, const uint8_t ccso_bo_only"; +} specialize qw/ccso_filter_block_hbd_wo_buf avx2/; if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { - add_proto qw/void ccso_filter_block_hbd_with_buf/, "const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1, - const int src_y_stride, const int dst_stride, - const int ccso_stride, - const int x, const int y, - const int pic_width, const int pic_height, - const int8_t *filter_offset, const int blk_size, - const int y_uv_hscale, const int y_uv_vscale, - const int max_val, const uint8_t shift_bits, - const uint8_t ccso_bo_only"; - specialize qw/ccso_filter_block_hbd_with_buf avx2/; + if (aom_config("CONFIG_CCSO_FU_BUGFIX") eq "yes") { + add_proto qw/void ccso_filter_block_hbd_with_buf/, "const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1, + const int src_y_stride, const int dst_stride, + const int ccso_stride, + const int x, const int y, + const int pic_width, const int pic_height, + const int8_t *filter_offset, const int blk_size_x, + const int blk_size_y, + const int y_uv_hscale, const int y_uv_vscale, + const int max_val, const uint8_t shift_bits, + const uint8_t ccso_bo_only"; + specialize qw/ccso_filter_block_hbd_with_buf avx2/; - add_proto qw/void ccso_filter_block_hbd_with_buf_bo_only/, "const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1, - const int src_y_stride, const int dst_stride, - const int ccso_stride, - const int x, const int y, - const int pic_width, const int pic_height, - const int8_t *filter_offset, const int blk_size, - const int y_uv_hscale, const int y_uv_vscale, - const int max_val, const uint8_t shift_bits, - const uint8_t ccso_bo_only"; - specialize qw/ccso_filter_block_hbd_with_buf_bo_only avx2/; + add_proto qw/void ccso_filter_block_hbd_with_buf_bo_only/, "const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1, + const int src_y_stride, const int dst_stride, + const int ccso_stride, + const int x, const int y, + const int pic_width, const int pic_height, + const int8_t *filter_offset, const int blk_size_x, + const int blk_size_y, + const int y_uv_hscale, const int y_uv_vscale, + const int max_val, const uint8_t shift_bits, + const uint8_t ccso_bo_only"; + specialize qw/ccso_filter_block_hbd_with_buf_bo_only avx2/; + } + else { + add_proto qw/void ccso_filter_block_hbd_with_buf/, "const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1, + const int src_y_stride, const int dst_stride, + const int ccso_stride, + const int x, const int y, + const int pic_width, const int pic_height, + const int8_t *filter_offset, const int blk_size, + const int y_uv_hscale, const int y_uv_vscale, + const int max_val, const uint8_t shift_bits, + const uint8_t ccso_bo_only"; + specialize qw/ccso_filter_block_hbd_with_buf avx2/; + + add_proto qw/void ccso_filter_block_hbd_with_buf_bo_only/, "const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1, + const int src_y_stride, const int dst_stride, + const int ccso_stride, + const int x, const int y, + const int pic_width, const int pic_height, + const int8_t *filter_offset, const int blk_size, + const int y_uv_hscale, const int y_uv_vscale, + const int max_val, const uint8_t shift_bits, + const uint8_t ccso_bo_only"; + specialize qw/ccso_filter_block_hbd_with_buf_bo_only avx2/; + } add_proto qw/uint64_t compute_distortion_block/, "const uint16_t *org, const int org_stride, const uint16_t *rec16, const int rec_stride, const int x, const int y, @@ -652,12 +684,23 @@ const int width"; specialize qw/compute_distortion_block avx2/; - add_proto qw/void ccso_derive_src_block/, "const uint16_t *src_y, uint8_t *const src_cls0, - uint8_t *const src_cls1, const int src_y_stride, const int ccso_stride, - const int x, const int y, const int pic_width, const int pic_height, - const int y_uv_hscale, const int y_uv_vscale, const int qstep, - const int neg_qstep, const int *src_loc, const int blk_size, const int edge_clf"; - specialize qw/ccso_derive_src_block avx2/ + if (aom_config("CONFIG_CCSO_FU_BUGFIX") eq "yes") { + add_proto qw/void ccso_derive_src_block/, "const uint16_t *src_y, uint8_t *const src_cls0, + uint8_t *const src_cls1, const int src_y_stride, const int ccso_stride, + const int x, const int y, const int pic_width, const int pic_height, + const int y_uv_hscale, const int y_uv_vscale, const int qstep, + const int neg_qstep, const int *src_loc, const int blk_size_x, + const int blk_size_y, const int edge_clf"; + specialize qw/ccso_derive_src_block avx2/ + } + else { + add_proto qw/void ccso_derive_src_block/, "const uint16_t *src_y, uint8_t *const src_cls0, + uint8_t *const src_cls1, const int src_y_stride, const int ccso_stride, + const int x, const int y, const int pic_width, const int pic_height, + const int y_uv_hscale, const int y_uv_vscale, const int qstep, + const int neg_qstep, const int *src_loc, const int blk_size, const int edge_clf"; + specialize qw/ccso_derive_src_block avx2/ + } } # WARPED_MOTION / GLOBAL_MOTION functions
diff --git a/av1/common/ccso.c b/av1/common/ccso.c index a5c148e..794d0e7 100644 --- a/av1/common/ccso.c +++ b/av1/common/ccso.c
@@ -150,10 +150,20 @@ const int8_t *offset_buf, const int src_y_stride, const int dst_stride, const int y_uv_hscale, const int y_uv_vscale, const int thr, const int neg_thr, const int *src_loc, const int max_val, - const int blk_size, const bool isSingleBand, const uint8_t shift_bits, +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_x, const int blk_size_y, +#else + const int blk_size, +#endif + const bool isSingleBand, const uint8_t shift_bits, const int edge_clf, const uint8_t ccso_bo_only) { +#if CONFIG_CCSO_FU_BUGFIX + const int y_end = AOMMIN(pic_height - y, blk_size_y); + const int x_end = AOMMIN(pic_width - x, blk_size_x); +#else const int y_end = AOMMIN(pic_height - y, blk_size); const int x_end = AOMMIN(pic_width - x, blk_size); +#endif for (int y_start = 0; y_start < y_end; y_start++) { const int y_pos = y_start; for (int x_start = 0; x_start < x_end; x_start++) { @@ -202,34 +212,67 @@ const int neg_thr = thr * -1; int src_loc[2]; derive_ccso_sample_pos(cm, src_loc, ccso_ext_stride, filter_sup); +#if CONFIG_CCSO_FU_BUGFIX + assert(plane == 0); // function must only be called for plane == 0 + const int blk_log2 = CCSO_BLK_SIZE; +#else const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; +#endif const int blk_size = 1 << blk_log2; src_y += CCSO_PADDING_SIZE * ccso_ext_stride + CCSO_PADDING_SIZE; +#if CONFIG_CCSO_DEBUG + printf("CCSO: plane %d bo_only %d thr %d neg_thr %d @ %s\n", + plane, cm->ccso_info.ccso_bo_only[plane], + thr, neg_thr, __FUNCTION__); +#endif for (int y = 0; y < pic_height; y += blk_size) { +#if CONFIG_CCSO_DEBUG + printf("CCSO: y %d", y); +#endif for (int x = 0; x < pic_width; x += blk_size) { +#if CONFIG_CCSO_FU_BUGFIX + const int ccso_blk_idx = + (blk_size >> MI_SIZE_LOG2) * (y >> blk_log2) * mi_params->mi_stride + + (blk_size >> MI_SIZE_LOG2) * (x >> blk_log2); +#else const int ccso_blk_idx = (blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) * (y >> blk_log2) * mi_params->mi_stride + (blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_x)) * (x >> blk_log2); +#endif const bool use_ccso = mi_params->mi_grid_base[ccso_blk_idx]->ccso_blk_y; +#if CONFIG_CCSO_DEBUG + printf(" use [%d] %d (%p)", ccso_blk_idx, use_ccso, mi_params->mi_grid_base[ccso_blk_idx]); +#endif if (!use_ccso) continue; if (cm->ccso_info.ccso_bo_only[plane]) { ccso_filter_block_hbd_wo_buf_c( src_y, dst_yuv, x, y, pic_width, pic_height, src_cls, cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride, 0, - 0, thr, neg_thr, src_loc, max_val, blk_size, false, shift_bits, + 0, thr, neg_thr, src_loc, max_val, +#if CONFIG_CCSO_FU_BUGFIX + blk_size, +#endif + blk_size, false, shift_bits, edge_clf, cm->ccso_info.ccso_bo_only[plane]); } else { ccso_filter_block_hbd_wo_buf( src_y, dst_yuv, x, y, pic_width, pic_height, src_cls, cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride, 0, - 0, thr, neg_thr, src_loc, max_val, blk_size, false, shift_bits, + 0, thr, neg_thr, src_loc, max_val, +#if CONFIG_CCSO_FU_BUGFIX + blk_size, +#endif + blk_size, false, shift_bits, edge_clf, 0); } } dst_yuv += (dst_stride << blk_log2); src_y += (ccso_ext_stride << blk_log2); +#if CONFIG_CCSO_DEBUG + printf("\n"); +#endif } } @@ -256,34 +299,67 @@ const int neg_thr = thr * -1; int src_loc[2]; derive_ccso_sample_pos(cm, src_loc, ccso_ext_stride, filter_sup); +#if CONFIG_CCSO_FU_BUGFIX + assert(plane == 0); // function must only be called for plane == 0 + const int blk_log2 = CCSO_BLK_SIZE; +#else const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; +#endif const int blk_size = 1 << blk_log2; src_y += CCSO_PADDING_SIZE * ccso_ext_stride + CCSO_PADDING_SIZE; +#if CONFIG_CCSO_DEBUG + printf("CCSO: plane %d bo_only %d thr %d neg_thr %d @ %s\n", + plane, cm->ccso_info.ccso_bo_only[plane], + thr, neg_thr, __FUNCTION__); +#endif for (int y = 0; y < pic_height; y += blk_size) { +#if CONFIG_CCSO_DEBUG + printf("CCSO: y %d", y); +#endif for (int x = 0; x < pic_width; x += blk_size) { +#if CONFIG_CCSO_FU_BUGFIX + const int ccso_blk_idx = + (blk_size >> MI_SIZE_LOG2) * (y >> blk_log2) * mi_params->mi_stride + + (blk_size >> MI_SIZE_LOG2) * (x >> blk_log2); +#else const int ccso_blk_idx = (blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) * (y >> blk_log2) * mi_params->mi_stride + (blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_x)) * (x >> blk_log2); +#endif const bool use_ccso = mi_params->mi_grid_base[ccso_blk_idx]->ccso_blk_y; +#if CONFIG_CCSO_DEBUG + printf(" use [%d] %d (%p)", ccso_blk_idx, use_ccso, mi_params->mi_grid_base[ccso_blk_idx]); +#endif if (!use_ccso) continue; if (cm->ccso_info.ccso_bo_only[plane]) { ccso_filter_block_hbd_wo_buf_c( src_y, dst_yuv, x, y, pic_width, pic_height, src_cls, cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride, 0, - 0, thr, neg_thr, src_loc, max_val, blk_size, true, shift_bits, + 0, thr, neg_thr, src_loc, max_val, +#if CONFIG_CCSO_FU_BUGFIX + blk_size, +#endif + blk_size, true, shift_bits, edge_clf, cm->ccso_info.ccso_bo_only[plane]); } else { ccso_filter_block_hbd_wo_buf( src_y, dst_yuv, x, y, pic_width, pic_height, src_cls, cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride, 0, - 0, thr, neg_thr, src_loc, max_val, blk_size, true, shift_bits, + 0, thr, neg_thr, src_loc, max_val, +#if CONFIG_CCSO_FU_BUGFIX + blk_size, +#endif + blk_size, true, shift_bits, edge_clf, 0); } } dst_yuv += (dst_stride << blk_log2); src_y += (ccso_ext_stride << blk_log2); +#if CONFIG_CCSO_DEBUG + printf("\n"); +#endif } } @@ -311,36 +387,74 @@ const int neg_thr = thr * -1; int src_loc[2]; derive_ccso_sample_pos(cm, src_loc, ccso_ext_stride, filter_sup); - const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; - const int blk_size = 1 << blk_log2; +#if CONFIG_CCSO_FU_BUGFIX + assert(plane > 0); // function must only be called for plane > 0 + const int blk_size = 1 << CCSO_BLK_SIZE; + const int blk_log2_y = CCSO_BLK_SIZE - cm->seq_params.subsampling_y; + const int blk_log2_x = CCSO_BLK_SIZE - cm->seq_params.subsampling_x; + const int blk_size_y = 1 << blk_log2_y; + const int blk_size_x = 1 << blk_log2_x; +#else + const int blk_log2_y = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; + const int blk_size_y = 1 << blk_log2_y; + const int blk_size_x = blk_size_x; +#endif src_y += CCSO_PADDING_SIZE * ccso_ext_stride + CCSO_PADDING_SIZE; - for (int y = 0; y < pic_height; y += blk_size) { - for (int x = 0; x < pic_width; x += blk_size) { +#if CONFIG_CCSO_DEBUG + printf("CCSO: plane %d bo_only %d thr %d neg_thr %d @ %s\n", + plane, cm->ccso_info.ccso_bo_only[plane], + thr, neg_thr, __FUNCTION__); +#endif + for (int y = 0; y < pic_height; y += blk_size_y) { +#if CONFIG_CCSO_DEBUG + printf("CCSO: y %d", y); +#endif + for (int x = 0; x < pic_width; x += blk_size_x) { +#if CONFIG_CCSO_FU_BUGFIX const int ccso_blk_idx = - (blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) * + (blk_size >> MI_SIZE_LOG2) * (y >> blk_log2_y) * mi_params->mi_stride + + (blk_size >> MI_SIZE_LOG2) * (x >> blk_log2_x); +#else + const int ccso_blk_idx = + (blk_size_y >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) * (y >> blk_log2) * mi_params->mi_stride + - (blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_x)) * + (blk_size_x >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_x)) * (x >> blk_log2); +#endif const bool use_ccso = (plane == 1) ? mi_params->mi_grid_base[ccso_blk_idx]->ccso_blk_u : mi_params->mi_grid_base[ccso_blk_idx]->ccso_blk_v; +#if CONFIG_CCSO_DEBUG + printf(" use [%d] %d (%p)", ccso_blk_idx, use_ccso, mi_params->mi_grid_base[ccso_blk_idx]); +#endif if (!use_ccso) continue; if (cm->ccso_info.ccso_bo_only[plane]) { ccso_filter_block_hbd_wo_buf_c( src_y, dst_yuv, x, y, pic_width, pic_height, src_cls, cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride, - y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val, blk_size, + y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val, +#if CONFIG_CCSO_FU_BUGFIX + blk_size_x, +#endif + blk_size_y, false, shift_bits, edge_clf, cm->ccso_info.ccso_bo_only[plane]); } else { ccso_filter_block_hbd_wo_buf( src_y, dst_yuv, x, y, pic_width, pic_height, src_cls, cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride, - y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val, blk_size, + y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val, +#if CONFIG_CCSO_FU_BUGFIX + blk_size_x, +#endif + blk_size_y, false, shift_bits, edge_clf, 0); } } - dst_yuv += (dst_stride << blk_log2); - src_y += (ccso_ext_stride << (blk_log2 + y_uv_vscale)); + dst_yuv += (dst_stride << blk_log2_y); + src_y += (ccso_ext_stride << (blk_log2_y + y_uv_vscale)); +#if CONFIG_CCSO_DEBUG + printf("\n"); +#endif } } @@ -369,36 +483,74 @@ const int neg_thr = thr * -1; int src_loc[2]; derive_ccso_sample_pos(cm, src_loc, ccso_ext_stride, filter_sup); - const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; - const int blk_size = 1 << blk_log2; +#if CONFIG_CCSO_FU_BUGFIX + assert(plane > 0); // function must only be called for plane > 0 + const int blk_size = 1 << CCSO_BLK_SIZE; + const int blk_log2_y = CCSO_BLK_SIZE - cm->seq_params.subsampling_y; + const int blk_log2_x = CCSO_BLK_SIZE - cm->seq_params.subsampling_x; + const int blk_size_y = 1 << blk_log2_y; + const int blk_size_x = 1 << blk_log2_x; +#else + const int blk_log2_y = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; + const int blk_size_y = 1 << blk_log2_y; + const int blk_size_x = blk_size_x; +#endif src_y += CCSO_PADDING_SIZE * ccso_ext_stride + CCSO_PADDING_SIZE; - for (int y = 0; y < pic_height; y += blk_size) { - for (int x = 0; x < pic_width; x += blk_size) { +#if CONFIG_CCSO_DEBUG + printf("CCSO: plane %d bo_only %d thr %d neg_thr %d @ %s\n", + plane, cm->ccso_info.ccso_bo_only[plane], + thr, neg_thr, __FUNCTION__); +#endif + for (int y = 0; y < pic_height; y += blk_size_y) { +#if CONFIG_CCSO_DEBUG + printf("CCSO: y %d", y); +#endif + for (int x = 0; x < pic_width; x += blk_size_x) { +#if CONFIG_CCSO_FU_BUGFIX const int ccso_blk_idx = - (blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) * + (blk_size >> MI_SIZE_LOG2) * (y >> blk_log2_y) * mi_params->mi_stride + + (blk_size >> MI_SIZE_LOG2) * (x >> blk_log2_x); +#else + const int ccso_blk_idx = + (blk_size_y >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) * (y >> blk_log2) * mi_params->mi_stride + - (blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_x)) * + (blk_size_x >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_x)) * (x >> blk_log2); +#endif const bool use_ccso = (plane == 1) ? mi_params->mi_grid_base[ccso_blk_idx]->ccso_blk_u : mi_params->mi_grid_base[ccso_blk_idx]->ccso_blk_v; +#if CONFIG_CCSO_DEBUG + printf(" use [%d] %d (%p)", ccso_blk_idx, use_ccso, mi_params->mi_grid_base[ccso_blk_idx]); +#endif if (!use_ccso) continue; if (cm->ccso_info.ccso_bo_only[plane]) { ccso_filter_block_hbd_wo_buf_c( src_y, dst_yuv, x, y, pic_width, pic_height, src_cls, cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride, - y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val, blk_size, + y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val, +#if CONFIG_CCSO_FU_BUGFIX + blk_size_x, +#endif + blk_size_y, true, shift_bits, edge_clf, cm->ccso_info.ccso_bo_only[plane]); } else { ccso_filter_block_hbd_wo_buf( src_y, dst_yuv, x, y, pic_width, pic_height, src_cls, cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride, - y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val, blk_size, + y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val, +#if CONFIG_CCSO_FU_BUGFIX + blk_size_x, +#endif + blk_size_y, true, shift_bits, edge_clf, 0); } } - dst_yuv += (dst_stride << blk_log2); - src_y += (ccso_ext_stride << (blk_log2 + y_uv_vscale)); + dst_yuv += (dst_stride << blk_log2_y); + src_y += (ccso_ext_stride << (blk_log2_y + y_uv_vscale)); +#if CONFIG_CCSO_DEBUG + printf("\n"); +#endif } } @@ -465,4 +617,4 @@ to->ccso_enable[plane] = from->ccso_enable[plane]; } -#endif // CONFIG_CCSO_IMPROVE \ No newline at end of file +#endif // CONFIG_CCSO_IMPROVE
diff --git a/av1/common/enums.h b/av1/common/enums.h index 90dc0e6..8a86fbd 100644 --- a/av1/common/enums.h +++ b/av1/common/enums.h
@@ -131,11 +131,13 @@ #define IBP_WEIGHT_SIZE (1 << IBP_WEIGHT_SIZE_LOG2) #endif // CONFIG_IBP_WEIGHT +#if !CONFIG_CCSO_FU_BUGFIX // Cross-Component Sample Offset (CCSO) #define CCSO_BLK_SIZE 7 #define CCSO_PADDING_SIZE 5 #define CCSO_BAND_NUM 128 #define CCSO_NUM_COMPONENTS 3 +#endif #define BUGFIX_AMVD_AMVR 1 // Supported scale modes for JOINT_NEWMV @@ -153,6 +155,14 @@ #define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE) #define BLOCK_128_MI_SIZE_LOG2 5 +#if CONFIG_CCSO_FU_BUGFIX +// Cross-Component Sample Offset (CCSO) +#define CCSO_BLK_SIZE MAX_SB_SIZE_LOG2 +#define CCSO_PADDING_SIZE 5 +#define CCSO_BAND_NUM 128 +#define CCSO_NUM_COMPONENTS 3 +#endif + #if CONFIG_ENABLE_MHCCP #define MHCCP_CONTEXT_GROUP_SIZE 7 #define LINE_NUM 3
diff --git a/av1/common/pred_common.c b/av1/common/pred_common.c index 0725e2b..a856199 100644 --- a/av1/common/pred_common.c +++ b/av1/common/pred_common.c
@@ -524,10 +524,15 @@ bool av1_check_ccso_mbmi_inside_tile(const MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi) { const TileInfo *const tile = &xd->tile; +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1; + const int blk_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1; +#else const int blk_size_y = (1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y - MI_SIZE_LOG2)) - 1; const int blk_size_x = (1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_x - MI_SIZE_LOG2)) - 1; +#endif return (((mbmi->mi_row_start & ~blk_size_y) >= tile->mi_row_start) && ((mbmi->mi_col_start & ~blk_size_x) >= tile->mi_col_start) && @@ -559,10 +564,15 @@ neighbor1_ccso_available = av1_check_ccso_mbmi_inside_tile(xd, neighbor1); } +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1; + const int blk_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1; +#else const int blk_size_y = (1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y - MI_SIZE_LOG2)) - 1; const int blk_size_x = (1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_x - MI_SIZE_LOG2)) - 1; +#endif if (neighbor0_ccso_available && neighbor1_ccso_available) { int is_neighbor0_ccso = 0;
diff --git a/av1/common/x86/highbd_ccso_avx2.c b/av1/common/x86/highbd_ccso_avx2.c index 9c2ac24..b796018 100644 --- a/av1/common/x86/highbd_ccso_avx2.c +++ b/av1/common/x86/highbd_ccso_avx2.c
@@ -51,7 +51,13 @@ const int y_uv_vscale, // const int pad_stride, no pad size anymore const int quant_step_size, const int inv_quant_step, const int *rec_idx, - const int max_val, const int blk_size, const bool isSingleBand, + const int max_val, +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_x, const int blk_size_y, +#else + const int blk_size, +#endif + const bool isSingleBand, const uint8_t shift_bits, const int edge_clf, const uint8_t ccso_bo_only) { assert(ccso_bo_only == 0); (void)ccso_bo_only; @@ -82,16 +88,22 @@ int y_offset; int x_offset, x_remainder; - if (y + blk_size >= pic_height) + +#if !CONFIG_CCSO_FU_BUGFIX + const int blk_size_x = blk_size; + const int blk_size_y = blk_size; +#endif + + if (y + blk_size_y >= pic_height) y_offset = pic_height - y; else - y_offset = blk_size; + y_offset = blk_size_y; - if (x + blk_size >= pic_width) { + if (x + blk_size_x >= pic_width) { x_offset = ((pic_width - x) >> 4) << 4; x_remainder = pic_width - x - x_offset; } else { - x_offset = blk_size; + x_offset = blk_size_x; x_remainder = 0; } for (int yOff = 0; yOff < y_offset; yOff++) { @@ -245,7 +257,12 @@ const int pic_width, const int pic_height, const int y_uv_hscale, const int y_uv_vscale, const int qstep, const int neg_qstep, - const int *src_loc, const int blk_size, + const int *src_loc, +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_x, const int blk_size_y, +#else + const int blk_size, +#endif const int edge_clf) { const int quant_step_size = qstep; const int inv_quant_step = neg_qstep; @@ -275,16 +292,22 @@ int y_offset; int x_offset, x_remainder; - if (y + blk_size >= pic_height) + +#if !CONFIG_CCSO_FU_BUGFIX + const int blk_size_x = blk_size; + const int blk_size_y = blk_size; +#endif + + if (y + blk_size_y >= pic_height) y_offset = pic_height - y; else - y_offset = blk_size; + y_offset = blk_size_y; - if (x + blk_size >= pic_width) { + if (x + blk_size_x >= pic_width) { x_offset = ((pic_width - x) >> 4) << 4; x_remainder = pic_width - x - x_offset; } else { - x_offset = blk_size; + x_offset = blk_size_x; x_remainder = 0; } for (int yOff = 0; yOff < y_offset; yOff++) { @@ -421,7 +444,12 @@ const uint16_t *src_y, uint16_t *dts_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1, const int src_y_stride, const int dst_stride, const int ccso_stride, const int x, const int y, const int pic_width, - const int pic_height, const int8_t *filter_offset, const int blk_size, + const int pic_height, const int8_t *filter_offset, +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_x, const int blk_size_y, +#else + const int blk_size, +#endif const int y_uv_hscale, const int y_uv_vscale, const int max_val, const uint8_t shift_bits, const uint8_t ccso_bo_only) { (void)ccso_bo_only; @@ -440,16 +468,22 @@ int y_offset; int x_offset, x_remainder; - if (y + blk_size >= pic_height) + +#if !CONFIG_CCSO_FU_BUGFIX + const int blk_size_x = blk_size; + const int blk_size_y = blk_size; +#endif + + if (y + blk_size_y >= pic_height) y_offset = pic_height - y; else - y_offset = blk_size; + y_offset = blk_size_y; - if (x + blk_size >= pic_width) { + if (x + blk_size_x >= pic_width) { x_offset = ((pic_width - x) >> 4) << 4; x_remainder = pic_width - x - x_offset; } else { - x_offset = blk_size; + x_offset = blk_size_x; x_remainder = 0; } for (int yOff = 0; yOff < y_offset; yOff++) { @@ -528,7 +562,12 @@ const uint16_t *src_y, uint16_t *dts_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1, const int src_y_stride, const int dst_stride, const int ccso_stride, const int x, const int y, const int pic_width, - const int pic_height, const int8_t *filter_offset, const int blk_size, + const int pic_height, const int8_t *filter_offset, +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_x, const int blk_size_y, +#else + const int blk_size, +#endif const int y_uv_hscale, const int y_uv_vscale, const int max_val, const uint8_t shift_bits, const uint8_t ccso_bo_only) { (void)ccso_bo_only; @@ -547,16 +586,22 @@ int y_offset; int x_offset, x_remainder; - if (y + blk_size >= pic_height) + +#if !CONFIG_CCSO_FU_BUGFIX + const int blk_size_x = blk_size; + const int blk_size_y = blk_size; +#endif + + if (y + blk_size_y >= pic_height) y_offset = pic_height - y; else - y_offset = blk_size; + y_offset = blk_size_y; - if (x + blk_size >= pic_width) { + if (x + blk_size_x >= pic_width) { x_offset = ((pic_width - x) >> 4) << 4; x_remainder = pic_width - x - x_offset; } else { - x_offset = blk_size; + x_offset = blk_size_x; x_remainder = 0; } for (int yOff = 0; yOff < y_offset; yOff++) {
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c index 344268c..d42bb86 100644 --- a/av1/decoder/decodeframe.c +++ b/av1/decoder/decodeframe.c
@@ -3894,6 +3894,12 @@ #endif // CONFIG_CCSO_SIGFIX cm->ccso_info.max_band_log2[plane] = aom_rb_read_literal(rb, 2); } +#if CONFIG_CCSO_DEBUG + printf("CCSO: plane %d quant_idx %d ext_filter_support %d edge_clf %d ccso_bo_only %d max_band_log2 %d scale_idx %d @ %s\n", + plane, cm->ccso_info.quant_idx[plane], cm->ccso_info.ext_filter_support[plane], + cm->ccso_info.edge_clf[plane], cm->ccso_info.ccso_bo_only[plane], + cm->ccso_info.max_band_log2[plane], cm->ccso_info.scale_idx[plane], __FUNCTION__); +#endif const int max_band = 1 << cm->ccso_info.max_band_log2[plane]; #if !CONFIG_CCSO_SIGFIX cm->ccso_info.edge_clf[plane] = aom_rb_read_bit(rb);
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c index 305dc9a..3a81162 100644 --- a/av1/decoder/decodemv.c +++ b/av1/decoder/decodemv.c
@@ -103,8 +103,13 @@ const BLOCK_SIZE bsize = xd->mi[0]->sb_type[PLANE_TYPE_Y]; const int bw = mi_size_wide[bsize]; const int bh = mi_size_high[bsize]; +#if CONFIG_CCSO_FU_BUGFIX + const int log2_w = CCSO_BLK_SIZE; + const int log2_h = CCSO_BLK_SIZE; +#else const int log2_w = CCSO_BLK_SIZE + xd->plane[1].subsampling_x; const int log2_h = CCSO_BLK_SIZE + xd->plane[1].subsampling_y; +#endif const int f_w = 1 << log2_w >> MI_SIZE_LOG2; const int f_h = 1 << log2_h >> MI_SIZE_LOG2; const int ccso_nhfb = (mi_params->mi_cols + f_w - 1) / f_w; @@ -122,18 +127,27 @@ const CommonModeInfoParams *const mi_params = &cm->mi_params; const int mi_row = xd->mi_row; const int mi_col = xd->mi_col; +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1; + const int blk_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1; +#else const int blk_size_y = (1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y - MI_SIZE_LOG2)) - 1; const int blk_size_x = (1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_x - MI_SIZE_LOG2)) - 1; +#endif #if CONFIG_CCSO_IMPROVE int blk_idc; #endif if (!(mi_row & blk_size_y) && !(mi_col & blk_size_x) && cm->ccso_info.ccso_enable[0]) { #if CONFIG_CCSO_IMPROVE +#if CONFIG_CCSO_FU_BUGFIX + const int log2_filter_unit_size = CCSO_BLK_SIZE; +#else const int log2_filter_unit_size = CCSO_BLK_SIZE + xd->plane[1].subsampling_x; +#endif const int ccso_nhfb = ((mi_params->mi_cols >> xd->plane[0].subsampling_x) + (1 << log2_filter_unit_size >> 2) - 1) / (1 << log2_filter_unit_size >> 2); @@ -144,10 +158,18 @@ const int ccso_ctx = av1_get_ccso_context(xd, 0); blk_idc = aom_read_symbol(r, xd->tile_ctx->ccso_cdf[0][ccso_ctx], 2, ACCT_INFO("blk_idc")); +#if CONFIG_CCSO_DEBUG + printf("CCSO: [%d,%d] read ccso_blk_y %d @ %s\n", mi_row, mi_col, blk_idc, __FUNCTION__); +#endif } else { CcsoInfo *ref_frame_ccso_info = &get_ref_frame_buf(cm, cm->ccso_info.ccso_ref_idx[0])->ccso_info; blk_idc = ref_frame_ccso_info->sb_filter_control[0][sb_idx]; +#if CONFIG_CCSO_DEBUG + printf("CCSO: [%d,%d] copy [%d] ccso_blk_y %d : 0x%p @ %s\n", mi_row, mi_col, sb_idx, blk_idc, + mi_params->mi_grid_base[(mi_row & ~blk_size_y) * mi_params->mi_stride + + (mi_col & ~blk_size_x)], __FUNCTION__); +#endif } #else const int blk_idc = @@ -174,7 +196,11 @@ if (!(mi_row & blk_size_y) && !(mi_col & blk_size_x) && cm->ccso_info.ccso_enable[1]) { #if CONFIG_CCSO_IMPROVE +#if CONFIG_CCSO_FU_BUGFIX + const int log2_filter_unit_size = (CCSO_BLK_SIZE - xd->plane[1].subsampling_x); +#else const int log2_filter_unit_size = CCSO_BLK_SIZE; +#endif const int ccso_nhfb = ((mi_params->mi_cols >> xd->plane[1].subsampling_x) + (1 << log2_filter_unit_size >> 2) - 1) / (1 << log2_filter_unit_size >> 2); @@ -185,10 +211,18 @@ const int ccso_ctx = av1_get_ccso_context(xd, 1); blk_idc = aom_read_symbol(r, xd->tile_ctx->ccso_cdf[1][ccso_ctx], 2, ACCT_INFO("blk_idc")); +#if CONFIG_CCSO_DEBUG + printf("CCSO: [%d,%d] read ccso_blk_u %d @ %s\n", mi_row, mi_col, blk_idc, __FUNCTION__); +#endif } else { CcsoInfo *ref_frame_ccso_info = &get_ref_frame_buf(cm, cm->ccso_info.ccso_ref_idx[1])->ccso_info; blk_idc = ref_frame_ccso_info->sb_filter_control[1][sb_idx]; +#if CONFIG_CCSO_DEBUG + printf("CCSO: [%d,%d] copy [%d] ccso_blk_u %d : 0x%p @ %s\n", mi_row, mi_col, sb_idx, blk_idc, + mi_params->mi_grid_base[(mi_row & ~blk_size_y) * mi_params->mi_stride + + (mi_col & ~blk_size_x)], __FUNCTION__); +#endif } #else const int blk_idc = @@ -215,7 +249,11 @@ if (!(mi_row & blk_size_y) && !(mi_col & blk_size_x) && cm->ccso_info.ccso_enable[2]) { #if CONFIG_CCSO_IMPROVE +#if CONFIG_CCSO_FU_BUGFIX + const int log2_filter_unit_size = (CCSO_BLK_SIZE - xd->plane[2].subsampling_x); +#else const int log2_filter_unit_size = CCSO_BLK_SIZE; +#endif const int ccso_nhfb = ((mi_params->mi_cols >> xd->plane[2].subsampling_x) + (1 << log2_filter_unit_size >> 2) - 1) / (1 << log2_filter_unit_size >> 2); @@ -226,10 +264,18 @@ const int ccso_ctx = av1_get_ccso_context(xd, 2); blk_idc = aom_read_symbol(r, xd->tile_ctx->ccso_cdf[2][ccso_ctx], 2, ACCT_INFO("blk_idc")); +#if CONFIG_CCSO_DEBUG + printf("CCSO: [%d,%d] read ccso_blk_v %d @ %s\n", mi_row, mi_col, blk_idc, __FUNCTION__); +#endif } else { CcsoInfo *ref_frame_ccso_info = &get_ref_frame_buf(cm, cm->ccso_info.ccso_ref_idx[2])->ccso_info; blk_idc = ref_frame_ccso_info->sb_filter_control[2][sb_idx]; +#if CONFIG_CCSO_DEBUG + printf("CCSO: [%d,%d] copy [%d] ccso_blk_v %d : 0x%p @ %s\n", mi_row, mi_col, sb_idx, blk_idc, + mi_params->mi_grid_base[(mi_row & ~blk_size_y) * mi_params->mi_stride + + (mi_col & ~blk_size_x)], __FUNCTION__); +#endif } #else const int blk_idc =
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c index 1cd32bc..bf6bd0d 100644 --- a/av1/encoder/bitstream.c +++ b/av1/encoder/bitstream.c
@@ -1949,10 +1949,15 @@ const CommonModeInfoParams *const mi_params = &cm->mi_params; const int mi_row = xd->mi_row; const int mi_col = xd->mi_col; +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1; + const int blk_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1; +#else const int blk_size_y = (1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y - MI_SIZE_LOG2)) - 1; const int blk_size_x = (1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_x - MI_SIZE_LOG2)) - 1; +#endif const MB_MODE_INFO *mbmi = mi_params->mi_grid_base[(mi_row & ~blk_size_y) * mi_params->mi_stride + (mi_col & ~blk_size_x)]; @@ -1964,6 +1969,9 @@ const int ccso_ctx = av1_get_ccso_context(xd, 0); aom_write_symbol(w, mbmi->ccso_blk_y == 0 ? 0 : 1, xd->tile_ctx->ccso_cdf[0][ccso_ctx], 2); +#if CONFIG_CCSO_DEBUG + printf("CCSO: [%d,%d] write ccso_blk_y %d @ %s\n", mi_row, mi_col, mbmi->ccso_blk_y == 0 ? 0 : 1, __FUNCTION__); +#endif } #else aom_write_symbol(w, mbmi->ccso_blk_y == 0 ? 0 : 1, @@ -1979,6 +1987,9 @@ const int ccso_ctx = av1_get_ccso_context(xd, 1); aom_write_symbol(w, mbmi->ccso_blk_u == 0 ? 0 : 1, xd->tile_ctx->ccso_cdf[1][ccso_ctx], 2); +#if CONFIG_CCSO_DEBUG + printf("CCSO: [%d,%d] write ccso_blk_u %d @ %s\n", mi_row, mi_col, mbmi->ccso_blk_u == 0 ? 0 : 1, __FUNCTION__); +#endif } #else aom_write_symbol(w, mbmi->ccso_blk_u == 0 ? 0 : 1, @@ -1994,6 +2005,9 @@ const int ccso_ctx = av1_get_ccso_context(xd, 2); aom_write_symbol(w, mbmi->ccso_blk_v == 0 ? 0 : 1, xd->tile_ctx->ccso_cdf[2][ccso_ctx], 2); +#if CONFIG_CCSO_DEBUG + printf("CCSO: [%d,%d] write ccso_blk_v %d @ %s\n", mi_row, mi_col, mbmi->ccso_blk_v == 0 ? 0 : 1, __FUNCTION__); +#endif } #else aom_write_symbol(w, mbmi->ccso_blk_v == 0 ? 0 : 1,
diff --git a/av1/encoder/pickccso.c b/av1/encoder/pickccso.c index 794b248..3d21b04 100644 --- a/av1/encoder/pickccso.c +++ b/av1/encoder/pickccso.c
@@ -75,11 +75,15 @@ const int pic_width, const int pic_height, const int y_uv_hscale, const int y_uv_vscale, const int qstep, const int neg_qstep, - const int *src_loc, const int blk_size, + const int *src_loc, +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_x, +#endif + const int blk_size_y, const int edge_clf) { int src_cls[2]; - const int y_end = AOMMIN(pic_height - y, blk_size); - const int x_end = AOMMIN(pic_width - x, blk_size); + const int y_end = AOMMIN(pic_height - y, blk_size_y); + const int x_end = AOMMIN(pic_width - x, blk_size_x); for (int y_start = 0; y_start < y_end; y_start++) { const int y_pos = y_start; for (int x_start = 0; x_start < x_end; x_start++) { @@ -114,19 +118,29 @@ const int neg_qstep = qstep * -1; int src_loc[2]; derive_ccso_sample_pos(cm, src_loc, ccso_stride_ext, filter_sup); - const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; - const int blk_size = 1 << blk_log2; +#if CONFIG_CCSO_FU_BUGFIX + const int blk_log2_y = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y; + const int blk_log2_x = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x; +#else + const int blk_log2_y = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; + const int blk_log2_x = blk_log2_y; +#endif + const int blk_size_y = 1 << blk_log2_y; + const int blk_size_x = 1 << blk_log2_x; src_y += CCSO_PADDING_SIZE * ccso_stride_ext + CCSO_PADDING_SIZE; - for (int y = 0; y < pic_height; y += blk_size) { - for (int x = 0; x < pic_width; x += blk_size) { + for (int y = 0; y < pic_height; y += blk_size_y) { + for (int x = 0; x < pic_width; x += blk_size_x) { ccso_derive_src_block(src_y, src_cls0, src_cls1, ccso_stride_ext, ccso_stride, x, y, pic_width, pic_height, y_uv_hscale, y_uv_vscale, qstep, neg_qstep, src_loc, - blk_size, edge_clf); +#if CONFIG_CCSO_FU_BUGFIX + blk_size_x, +#endif + blk_size_y, edge_clf); } - src_y += (ccso_stride_ext << (blk_log2 + y_uv_vscale)); - src_cls0 += (ccso_stride << (blk_log2 + y_uv_vscale)); - src_cls1 += (ccso_stride << (blk_log2 + y_uv_vscale)); + src_y += (ccso_stride_ext << (blk_log2_y + y_uv_vscale)); + src_cls0 += (ccso_stride << (blk_log2_y + y_uv_vscale)); + src_cls1 += (ccso_stride << (blk_log2_y + y_uv_vscale)); } } @@ -144,16 +158,23 @@ int fb_idx = 0; uint8_t cur_src_cls0; uint8_t cur_src_cls1; - const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; - const int blk_size = 1 << blk_log2; +#if CONFIG_CCSO_FU_BUGFIX + const int blk_log2_y = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y; + const int blk_log2_x = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x; +#else + const int blk_log2_y = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; + const int blk_log2_x = blk_log2_y; +#endif + const int blk_size_y = 1 << blk_log2_y; + const int blk_size_x = 1 << blk_log2_x; const int scaled_ext_stride = (ctx->ccso_stride_ext << y_uv_vscale); const int scaled_stride = (ctx->ccso_stride << y_uv_vscale); src_y += CCSO_PADDING_SIZE * ctx->ccso_stride_ext + CCSO_PADDING_SIZE; - for (int y = 0; y < pic_height; y += blk_size) { - for (int x = 0; x < pic_width; x += blk_size) { + for (int y = 0; y < pic_height; y += blk_size_y) { + for (int x = 0; x < pic_width; x += blk_size_x) { fb_idx++; - const int y_end = AOMMIN(pic_height - y, blk_size); - const int x_end = AOMMIN(pic_width - x, blk_size); + const int y_end = AOMMIN(pic_height - y, blk_size_y); + const int x_end = AOMMIN(pic_width - x, blk_size_x); for (int y_start = 0; y_start < y_end; y_start++) { for (int x_start = 0; x_start < x_end; x_start++) { const int x_pos = x + x_start; @@ -177,11 +198,11 @@ src_cls0 -= scaled_stride * y_end; src_cls1 -= scaled_stride * y_end; } - ref += (ctx->ccso_stride << blk_log2); - dst += (ctx->ccso_stride << blk_log2); - src_y += (ctx->ccso_stride_ext << (blk_log2 + y_uv_vscale)); - src_cls0 += (ctx->ccso_stride << (blk_log2 + y_uv_vscale)); - src_cls1 += (ctx->ccso_stride << (blk_log2 + y_uv_vscale)); + ref += (ctx->ccso_stride << blk_log2_y); + dst += (ctx->ccso_stride << blk_log2_y); + src_y += (ctx->ccso_stride_ext << (blk_log2_y + y_uv_vscale)); + src_cls0 += (ctx->ccso_stride << (blk_log2_y + y_uv_vscale)); + src_cls1 += (ctx->ccso_stride << (blk_log2_y + y_uv_vscale)); } } @@ -194,15 +215,22 @@ const int y_uv_hscale = xd->plane[plane].subsampling_x; const int y_uv_vscale = xd->plane[plane].subsampling_y; int fb_idx = 0; - const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; - const int blk_size = 1 << blk_log2; +#if CONFIG_CCSO_FU_BUGFIX + const int blk_log2_y = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y; + const int blk_log2_x = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x; +#else + const int blk_log2_y = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; + const int blk_log2_x = blk_log2_y; +#endif + const int blk_size_y = 1 << blk_log2_y; + const int blk_size_x = 1 << blk_log2_x; const int scaled_ext_stride = (ctx->ccso_stride_ext << y_uv_vscale); src_y += CCSO_PADDING_SIZE * ctx->ccso_stride_ext + CCSO_PADDING_SIZE; - for (int y = 0; y < pic_height; y += blk_size) { - for (int x = 0; x < pic_width; x += blk_size) { + for (int y = 0; y < pic_height; y += blk_size_y) { + for (int x = 0; x < pic_width; x += blk_size_x) { fb_idx++; - const int y_end = AOMMIN(pic_height - y, blk_size); - const int x_end = AOMMIN(pic_width - x, blk_size); + const int y_end = AOMMIN(pic_height - y, blk_size_y); + const int x_end = AOMMIN(pic_width - x, blk_size_x); for (int y_start = 0; y_start < y_end; y_start++) { for (int x_start = 0; x_start < x_end; x_start++) { const int x_pos = x + x_start; @@ -219,9 +247,9 @@ dst -= ctx->ccso_stride * y_end; src_y -= scaled_ext_stride * y_end; } - ref += (ctx->ccso_stride << blk_log2); - dst += (ctx->ccso_stride << blk_log2); - src_y += (ctx->ccso_stride_ext << (blk_log2 + y_uv_vscale)); + ref += (ctx->ccso_stride << blk_log2_y); + dst += (ctx->ccso_stride << blk_log2_y); + src_y += (ctx->ccso_stride_ext << (blk_log2_y + y_uv_vscale)); } } @@ -230,7 +258,12 @@ const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1, const int src_y_stride, const int dst_stride, const int src_cls_stride, const int x, const int y, const int pic_width, - const int pic_height, const int8_t *filter_offset, const int blk_size, + const int pic_height, const int8_t *filter_offset, +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_x, const int blk_size_y, +#else + const int blk_size, +#endif const int y_uv_hscale, const int y_uv_vscale, const int max_val, const uint8_t shift_bits, const uint8_t ccso_bo_only) { assert(ccso_bo_only == 1); @@ -242,8 +275,13 @@ int cur_src_cls0; int cur_src_cls1; +#if CONFIG_CCSO_FU_BUGFIX + const int y_end = AOMMIN(pic_height - y, blk_size_y); + const int x_end = AOMMIN(pic_width - x, blk_size_x); +#else const int y_end = AOMMIN(pic_height - y, blk_size); const int x_end = AOMMIN(pic_width - x, blk_size); +#endif for (int y_start = 0; y_start < y_end; y_start++) { const int y_pos = y_start; for (int x_start = 0; x_start < x_end; x_start++) { @@ -266,7 +304,12 @@ const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1, const int src_y_stride, const int dst_stride, const int src_cls_stride, const int x, const int y, const int pic_width, - const int pic_height, const int8_t *filter_offset, const int blk_size, + const int pic_height, const int8_t *filter_offset, +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_x, const int blk_size_y, +#else + const int blk_size, +#endif const int y_uv_hscale, const int y_uv_vscale, const int max_val, const uint8_t shift_bits, const uint8_t ccso_bo_only) { if (ccso_bo_only) { @@ -275,8 +318,13 @@ } int cur_src_cls0; int cur_src_cls1; +#if CONFIG_CCSO_FU_BUGFIX + const int y_end = AOMMIN(pic_height - y, blk_size_y); + const int x_end = AOMMIN(pic_width - x, blk_size_x); +#else const int y_end = AOMMIN(pic_height - y, blk_size); const int x_end = AOMMIN(pic_width - x, blk_size); +#endif for (int y_start = 0; y_start < y_end; y_start++) { const int y_pos = y_start; for (int x_start = 0; x_start < x_end; x_start++) { @@ -311,7 +359,11 @@ const int pic_height = xd->plane[plane].dst.height; const int pic_width = xd->plane[plane].dst.width; const int max_val = (1 << cm->seq_params.bit_depth) - 1; +#if CONFIG_CCSO_FU_BUGFIX + const int blk_log2 = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y; +#else const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; +#endif const int blk_size = 1 << blk_log2; src_y += CCSO_PADDING_SIZE * ccso_stride_ext + CCSO_PADDING_SIZE; for (int y = 0; y < pic_height; y += blk_size) { @@ -324,12 +376,18 @@ #endif // CONFIG_CCSO_IMPROVE src_y, dst_yuv, src_cls0, src_cls1, ccso_stride_ext, dst_stride, ccso_stride, x, y, pic_width, pic_height, filter_offset, blk_size, +#if CONFIG_CCSO_FU_BUGFIX + blk_size, +#endif // y_uv_scale in h and v shall be zero 0, 0, max_val, shift_bits, ccso_bo_only); } else { ccso_filter_block_hbd_with_buf( src_y, dst_yuv, src_cls0, src_cls1, ccso_stride_ext, dst_stride, ccso_stride, x, y, pic_width, pic_height, filter_offset, blk_size, +#if CONFIG_CCSO_FU_BUGFIX + blk_size, +#endif // y_uv_scale in h and v shall be zero 0, 0, max_val, shift_bits, 0); } @@ -352,11 +410,18 @@ const int y_uv_hscale = xd->plane[plane].subsampling_x; const int y_uv_vscale = xd->plane[plane].subsampling_y; const int max_val = (1 << cm->seq_params.bit_depth) - 1; - const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; - const int blk_size = 1 << blk_log2; +#if CONFIG_CCSO_FU_BUGFIX + const int blk_log2_y = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y; + const int blk_log2_x = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x; +#else + const int blk_log2_y = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; + const int blk_log2_x = blk_log2_y; +#endif + const int blk_size_y = 1 << blk_log2_y; + const int blk_size_x = 1 << blk_log2_x; src_y += CCSO_PADDING_SIZE * ccso_stride_ext + CCSO_PADDING_SIZE; - for (int y = 0; y < pic_height; y += blk_size) { - for (int x = 0; x < pic_width; x += blk_size) { + for (int y = 0; y < pic_height; y += blk_size_y) { + for (int x = 0; x < pic_width; x += blk_size_x) { if (ccso_bo_only) { #if CONFIG_CCSO_IMPROVE ccso_filter_block_hbd_with_buf_bo_only( @@ -364,19 +429,27 @@ ccso_filter_block_hbd_with_buf_c( #endif // CONFIG_CCSO_IMPROVE src_y, dst_yuv, src_cls0, src_cls1, ccso_stride_ext, dst_stride, - ccso_stride, x, y, pic_width, pic_height, filter_offset, blk_size, + ccso_stride, x, y, pic_width, pic_height, filter_offset, +#if CONFIG_CCSO_FU_BUGFIX + blk_size_x, +#endif + blk_size_y, y_uv_hscale, y_uv_vscale, max_val, shift_bits, ccso_bo_only); } else { ccso_filter_block_hbd_with_buf( src_y, dst_yuv, src_cls0, src_cls1, ccso_stride_ext, dst_stride, - ccso_stride, x, y, pic_width, pic_height, filter_offset, blk_size, + ccso_stride, x, y, pic_width, pic_height, filter_offset, +#if CONFIG_CCSO_FU_BUGFIX + blk_size_x, +#endif + blk_size_y, y_uv_hscale, y_uv_vscale, max_val, shift_bits, 0); } } - dst_yuv += (dst_stride << blk_log2); - src_y += (ccso_stride_ext << (blk_log2 + y_uv_vscale)); - src_cls0 += (ccso_stride << (blk_log2 + y_uv_vscale)); - src_cls1 += (ccso_stride << (blk_log2 + y_uv_vscale)); + dst_yuv += (dst_stride << blk_log2_y); + src_y += (ccso_stride_ext << (blk_log2_y + y_uv_vscale)); + src_cls0 += (ccso_stride << (blk_log2_y + y_uv_vscale)); + src_cls1 += (ccso_stride << (blk_log2_y + y_uv_vscale)); } } @@ -427,8 +500,8 @@ (x >> log2_filter_unit_size_x)] = ssd; *total_distortion += ssd; } - org += (org_stride << log2_filter_unit_size_x); - rec16 += (rec_stride << log2_filter_unit_size_x); + org += (org_stride << log2_filter_unit_size_y); + rec16 += (rec_stride << log2_filter_unit_size_y); } } @@ -484,9 +557,13 @@ uint64_t *cur_total_dist, int *cur_total_rate, bool *filter_enable, const int rdmult) { aom_cdf_prob ccso_cdf[CCSO_CONTEXT][CDF_SIZE(2)]; +#if CONFIG_CCSO_FU_BUGFIX + const int log2_filter_unit_size = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x; +#else const int log2_filter_unit_size = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + xd->plane[1].subsampling_x; ; +#endif const CommonModeInfoParams *const mi_params = &cm->mi_params; const int ccso_nhfb = ((mi_params->mi_cols >> xd->plane[plane].subsampling_x) + @@ -499,10 +576,15 @@ const int tile_cols = tiles->cols; const int tile_rows = tiles->rows; +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1; + const int blk_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1; +#else const int blk_size_y = (1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y - MI_SIZE_LOG2)) - 1; const int blk_size_x = (1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_x - MI_SIZE_LOG2)) - 1; +#endif *cur_total_dist = 0; @@ -590,9 +672,13 @@ bool *filter_enable, const int rdmult) { (void)rdmult; +#if CONFIG_CCSO_FU_BUGFIX + const int log2_filter_unit_size = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x; +#else const int log2_filter_unit_size = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + xd->plane[1].subsampling_x; ; +#endif const CommonModeInfoParams *const mi_params = &cm->mi_params; const int ccso_nhfb = ((mi_params->mi_cols >> xd->plane[plane].subsampling_x) + @@ -605,10 +691,15 @@ const int tile_cols = tiles->cols; const int tile_rows = tiles->rows; +#if CONFIG_CCSO_FU_BUGFIX + const int blk_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1; + const int blk_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1; +#else const int blk_size_y = (1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y - MI_SIZE_LOG2)) - 1; const int blk_size_x = (1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_x - MI_SIZE_LOG2)) - 1; +#endif *cur_total_dist = 0; *cur_total_rate = 0; @@ -724,7 +815,11 @@ const int max_edge_interval, const uint8_t ccso_bo_only) { const CommonModeInfoParams *const mi_params = &cm->mi_params; +#if CONFIG_CCSO_FU_BUGFIX + const int blk_log2 = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y; +#else const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1; +#endif const int nvfb = ((mi_params->mi_rows >> xd->plane[plane].subsampling_y) + (1 << blk_log2 >> MI_SIZE_LOG2) - 1) / (1 << blk_log2 >> MI_SIZE_LOG2); @@ -879,10 +974,15 @@ #endif ) { const CommonModeInfoParams *const mi_params = &cm->mi_params; +#if CONFIG_CCSO_FU_BUGFIX + const int log2_filter_unit_size_y = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y; + const int log2_filter_unit_size_x = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x; +#else const int log2_filter_unit_size_y = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + xd->plane[1].subsampling_y; const int log2_filter_unit_size_x = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + xd->plane[1].subsampling_x; +#endif const int ccso_nvfb = ((mi_params->mi_rows >> xd->plane[plane].subsampling_y) + @@ -1458,8 +1558,13 @@ const BLOCK_SIZE bsize = xd->mi[0]->sb_type[PLANE_TYPE_Y]; const int bw = mi_size_wide[bsize]; const int bh = mi_size_high[bsize]; +#if CONFIG_CCSO_FU_BUGFIX + const int log2_w = CCSO_BLK_SIZE; + const int log2_h = CCSO_BLK_SIZE; +#else const int log2_w = CCSO_BLK_SIZE + xd->plane[1].subsampling_x; const int log2_h = CCSO_BLK_SIZE + xd->plane[1].subsampling_y; +#endif const int f_w = 1 << log2_w >> MI_SIZE_LOG2; const int f_h = 1 << log2_h >> MI_SIZE_LOG2; const int step_h = (bh + f_h - 1) / f_h; @@ -1476,18 +1581,42 @@ cm->cur_frame->ccso_info.sb_filter_control[plane][sb_idx] = ctx->final_filter_control[y_sb * ccso_nhfb + x_sb]; #endif // CONFIG_CCSO_IMPROVE +#if CONFIG_CCSO_FU_BUGFIX + const int grid_idx_mbmi = (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * + row * mi_params->mi_stride + + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col; + MB_MODE_INFO *const mbmi = mi_params->mi_grid_base[grid_idx_mbmi]; +#endif if (plane == AOM_PLANE_Y) { +#if CONFIG_CCSO_FU_BUGFIX + mbmi->ccso_blk_y = ctx->final_filter_control[y_sb * ccso_nhfb + x_sb]; +#else mi_params ->mi_grid_base [(1 << CCSO_BLK_SIZE >> (MI_SIZE_LOG2 - xd->plane[1].subsampling_y)) * row * mi_params->mi_stride + (1 << CCSO_BLK_SIZE >> - (MI_SIZE_LOG2 - xd->plane[1].subsampling_x)) * + (MI_SIZE_LOG2 - xd->plane[1].subsampling_x)) * col] ->ccso_blk_y = ctx->final_filter_control[y_sb * ccso_nhfb + x_sb]; +#endif +#if CONFIG_CCSO_DEBUG && CONFIG_CCSO_FU_BUGFIX + printf("CCSO: [%d,%d] copy [%d] ccso_blk_y %d : 0x%p @ %s\n", + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * row, + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col, + sb_idx, + ctx->final_filter_control[y_sb * ccso_nhfb + x_sb], + mi_params->mi_grid_base[(1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * + row * mi_params->mi_stride + + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col], + __FUNCTION__); +#endif } else if (plane == AOM_PLANE_U) { +#if CONFIG_CCSO_FU_BUGFIX + mbmi->ccso_blk_u = ctx->final_filter_control[y_sb * ccso_nhfb + x_sb]; +#else mi_params ->mi_grid_base [(1 << CCSO_BLK_SIZE >> @@ -1498,7 +1627,22 @@ col] ->ccso_blk_u = ctx->final_filter_control[y_sb * ccso_nhfb + x_sb]; +#endif +#if CONFIG_CCSO_DEBUG && CONFIG_CCSO_FU_BUGFIX + printf("CCSO: [%d,%d] copy [%d] ccso_blk_u %d : 0x%p @ %s\n", + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * row, + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col, + sb_idx, + ctx->final_filter_control[y_sb * ccso_nhfb + x_sb], + mi_params->mi_grid_base[(1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * + row * mi_params->mi_stride + + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col], + __FUNCTION__); +#endif } else { +#if CONFIG_CCSO_FU_BUGFIX + mbmi->ccso_blk_v = ctx->final_filter_control[y_sb * ccso_nhfb + x_sb]; +#else mi_params ->mi_grid_base [(1 << CCSO_BLK_SIZE >> @@ -1509,8 +1653,27 @@ col] ->ccso_blk_v = ctx->final_filter_control[y_sb * ccso_nhfb + x_sb]; +#endif +#if CONFIG_CCSO_DEBUG && CONFIG_CCSO_FU_BUGFIX + printf("CCSO: [%d,%d] copy [%d] ccso_blk_v %d : 0x%p @ %s\n", + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * row, + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col, + sb_idx, + ctx->final_filter_control[y_sb * ccso_nhfb + x_sb], + mi_params->mi_grid_base[(1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * + row * mi_params->mi_stride + + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col], + __FUNCTION__); +#endif } #if CONFIG_CCSO_IMPROVE +#if CONFIG_CCSO_FU_BUGFIX + const int ccso_mib_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)); + const int ccso_mib_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)); + + int mi_row = (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * row; + int mi_col = (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col; +#else const int ccso_mib_size_y = (1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y - MI_SIZE_LOG2)); @@ -1524,6 +1687,7 @@ int mi_col = (1 << CCSO_BLK_SIZE >> (MI_SIZE_LOG2 - xd->plane[1].subsampling_x)) * col; +#endif for (int j = 0; j < AOMMIN(ccso_mib_size_y, cm->mi_params.mi_rows - mi_row); j++) { @@ -1572,18 +1736,45 @@ for (int y_sb = 0; y_sb < ccso_nvfb; y_sb++) { for (int x_sb = 0; x_sb < ccso_nhfb; x_sb++) { +#if CONFIG_CCSO_FU_BUGFIX + const int grid_idx = (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb * + mi_params->mi_stride + + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb; + MB_MODE_INFO *const mbmi = mi_params->mi_grid_base[grid_idx]; +#endif if (plane == AOM_PLANE_Y) { +#if CONFIG_CCSO_FU_BUGFIX + mbmi->ccso_blk_y = + ref_frame_ccso_info + ->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb]; +#else mi_params ->mi_grid_base[(1 << CCSO_BLK_SIZE >> - (MI_SIZE_LOG2 - xd->plane[1].subsampling_y)) * + (MI_SIZE_LOG2 - xd->plane[1].subsampling_y)) * y_sb * mi_params->mi_stride + (1 << CCSO_BLK_SIZE >> - (MI_SIZE_LOG2 - xd->plane[1].subsampling_x)) * + (MI_SIZE_LOG2 - xd->plane[1].subsampling_x)) * x_sb] ->ccso_blk_y = ref_frame_ccso_info ->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb]; +#endif +#if CONFIG_CCSO_DEBUG && CONFIG_CCSO_FU_BUGFIX + printf("CCSO: [%d,%d] copy [%d] ccso_blk_y %d : 0x%p @ %s\n", + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb, + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb, + y_sb * ccso_nhfb + x_sb, + ref_frame_ccso_info->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb], + mi_params->mi_grid_base[(1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb * mi_params->mi_stride + + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb], + __FUNCTION__); +#endif } else if (plane == AOM_PLANE_U) { +#if CONFIG_CCSO_FU_BUGFIX + mbmi->ccso_blk_u = + ref_frame_ccso_info + ->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb]; +#else mi_params ->mi_grid_base[(1 << CCSO_BLK_SIZE >> (MI_SIZE_LOG2 - xd->plane[1].subsampling_y)) * @@ -1594,7 +1785,23 @@ ->ccso_blk_u = ref_frame_ccso_info ->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb]; +#endif +#if CONFIG_CCSO_DEBUG && CONFIG_CCSO_FU_BUGFIX + printf("CCSO: [%d,%d] copy [%d] ccso_blk_u %d : 0x%p @ %s\n", + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb, + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb, + y_sb * ccso_nhfb + x_sb, + ref_frame_ccso_info->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb], + mi_params->mi_grid_base[(1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb * mi_params->mi_stride + + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb], + __FUNCTION__); +#endif } else { +#if CONFIG_CCSO_FU_BUGFIX + mbmi->ccso_blk_v = + ref_frame_ccso_info + ->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb]; +#else mi_params ->mi_grid_base[(1 << CCSO_BLK_SIZE >> (MI_SIZE_LOG2 - xd->plane[2].subsampling_y)) * @@ -1605,6 +1812,17 @@ ->ccso_blk_v = ref_frame_ccso_info ->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb]; +#endif +#if CONFIG_CCSO_DEBUG && CONFIG_CCSO_FU_BUGFIX + printf("CCSO: [%d,%d] copy [%d] ccso_blk_v %d : 0x%p @ %s\n", + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb, + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb, + y_sb * ccso_nhfb + x_sb, + ref_frame_ccso_info->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb], + mi_params->mi_grid_base[(1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb * mi_params->mi_stride + + (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb], + __FUNCTION__); +#endif } } } @@ -1643,6 +1861,12 @@ cm->cur_frame->ccso_info.reuse_root_ref[plane] = ref_frame_ccso_info->reuse_root_ref[plane]; } +#if CONFIG_CCSO_DEBUG + printf("CCSO: plane %d quant_idx %d ext_filter_support %d edge_clf %d ccso_bo_only %d max_band_log2 %d scale_idx %d @ %s\n", + plane, cm->ccso_info.quant_idx[plane], cm->ccso_info.ext_filter_support[plane], + cm->ccso_info.edge_clf[plane], cm->ccso_info.ccso_bo_only[plane], + cm->ccso_info.max_band_log2[plane], cm->ccso_info.scale_idx[plane], __FUNCTION__); +#endif } else { cm->cur_frame->ccso_info.ccso_enable[plane] = 0; }
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c index 90bfc3b..4f1dcad 100644 --- a/av1/encoder/pickcdef.c +++ b/av1/encoder/pickcdef.c
@@ -244,7 +244,7 @@ *width = block_size_wide[bsize]; *height = block_size_high[bsize]; *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize]; - *height_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize]; + *height_log2 = MI_SIZE_LOG2 + mi_size_high_log2[bsize]; } /* Compute MSE only on the blocks we filtered. */
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake index 61cb8e2..6bd3618 100644 --- a/build/cmake/aom_config_defaults.cmake +++ b/build/cmake/aom_config_defaults.cmake
@@ -358,6 +358,8 @@ set_aom_config_var(CONFIG_PARTITION_CONTEXT_REDUCE 1 "Enable to reduce partition contexts") set_aom_config_var(CONFIG_CCSO_IMPROVE 1 "Enable CCSO improvements") +set_aom_config_var(CONFIG_CCSO_DEBUG 0 "Enable CCSO debug") +set_aom_config_var(CONFIG_CCSO_FU_BUGFIX 1 "Bugfix to CCS FU size") set_aom_config_var(CONFIG_OPT_INTER_MODE_CTX 1 "Improvement of all inter mode related contexts")