fix CCSO filter size
diff --git a/av1/common/av1_common_int.h b/av1/common/av1_common_int.h
index 22c233d..69d1325 100644
--- a/av1/common/av1_common_int.h
+++ b/av1/common/av1_common_int.h
@@ -2209,12 +2209,21 @@
if (buf->ccso_info.sb_filter_control[pli]) {
aom_free(buf->ccso_info.sb_filter_control[pli]);
}
+#if CONFIG_CCSO_FU_BUGFIX
+ const int log2_filter_unit_size_y =
+ pli == 0 ? CCSO_BLK_SIZE
+ : CCSO_BLK_SIZE - cm->seq_params.subsampling_y;
+ const int log2_filter_unit_size_x =
+ pli == 0 ? CCSO_BLK_SIZE
+ : CCSO_BLK_SIZE - cm->seq_params.subsampling_x;
+#else
const int log2_filter_unit_size_y =
pli > 0 ? CCSO_BLK_SIZE
: CCSO_BLK_SIZE + cm->seq_params.subsampling_y;
const int log2_filter_unit_size_x =
pli > 0 ? CCSO_BLK_SIZE
: CCSO_BLK_SIZE + cm->seq_params.subsampling_x;
+#endif
const int ccso_nvfb =
((cm->mi_params.mi_rows >> (pli ? cm->seq_params.subsampling_y : 0)) +
@@ -2231,6 +2240,9 @@
32, sizeof(*buf->ccso_info.sb_filter_control[pli]) * sb_count));
memset(buf->ccso_info.sb_filter_control[pli], 0,
sizeof(*buf->ccso_info.sb_filter_control[pli]) * sb_count);
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: plane %d nvfb %d nhfb %d sb_count %d @ %s\n", pli, ccso_nvfb, ccso_nhfb, sb_count, __FUNCTION__);
+#endif
}
}
#endif // CONFIG_CCSO_IMPROVE
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 1c9a2b4..1e089f7 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -620,31 +620,63 @@
}
# Cross-component Sample Offset
-add_proto qw/void ccso_filter_block_hbd_wo_buf/, "const uint16_t *src_y, uint16_t *dst_yuv, const int x, const int y, const int pic_width, const int pic_height, int *src_cls, const int8_t *offset_buf, const int scaled_ext_stride, const int dst_stride, const int y_uv_hscale, const int y_uv_vscale, const int thr, const int neg_thr, const int *src_loc, const int max_val, const int blk_size, const bool isSingleBand, const uint8_t shift_bits, const int edge_clf, const uint8_t ccso_bo_only";
+if (aom_config("CONFIG_CCSO_FU_BUGFIX") eq "yes") {
+ add_proto qw/void ccso_filter_block_hbd_wo_buf/, "const uint16_t *src_y, uint16_t *dst_yuv, const int x, const int y, const int pic_width, const int pic_height, int *src_cls, const int8_t *offset_buf, const int scaled_ext_stride, const int dst_stride, const int y_uv_hscale, const int y_uv_vscale, const int thr, const int neg_thr, const int *src_loc, const int max_val, const int blk_size_x, const int blk_size_y, const bool isSingleBand, const uint8_t shift_bits, const int edge_clf, const uint8_t ccso_bo_only";
+}
+else {
+ add_proto qw/void ccso_filter_block_hbd_wo_buf/, "const uint16_t *src_y, uint16_t *dst_yuv, const int x, const int y, const int pic_width, const int pic_height, int *src_cls, const int8_t *offset_buf, const int scaled_ext_stride, const int dst_stride, const int y_uv_hscale, const int y_uv_vscale, const int thr, const int neg_thr, const int *src_loc, const int max_val, const int blk_size, const bool isSingleBand, const uint8_t shift_bits, const int edge_clf, const uint8_t ccso_bo_only";
+}
specialize qw/ccso_filter_block_hbd_wo_buf avx2/;
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
- add_proto qw/void ccso_filter_block_hbd_with_buf/, "const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1,
- const int src_y_stride, const int dst_stride,
- const int ccso_stride,
- const int x, const int y,
- const int pic_width, const int pic_height,
- const int8_t *filter_offset, const int blk_size,
- const int y_uv_hscale, const int y_uv_vscale,
- const int max_val, const uint8_t shift_bits,
- const uint8_t ccso_bo_only";
- specialize qw/ccso_filter_block_hbd_with_buf avx2/;
+ if (aom_config("CONFIG_CCSO_FU_BUGFIX") eq "yes") {
+ add_proto qw/void ccso_filter_block_hbd_with_buf/, "const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1,
+ const int src_y_stride, const int dst_stride,
+ const int ccso_stride,
+ const int x, const int y,
+ const int pic_width, const int pic_height,
+ const int8_t *filter_offset, const int blk_size_x,
+ const int blk_size_y,
+ const int y_uv_hscale, const int y_uv_vscale,
+ const int max_val, const uint8_t shift_bits,
+ const uint8_t ccso_bo_only";
+ specialize qw/ccso_filter_block_hbd_with_buf avx2/;
- add_proto qw/void ccso_filter_block_hbd_with_buf_bo_only/, "const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1,
- const int src_y_stride, const int dst_stride,
- const int ccso_stride,
- const int x, const int y,
- const int pic_width, const int pic_height,
- const int8_t *filter_offset, const int blk_size,
- const int y_uv_hscale, const int y_uv_vscale,
- const int max_val, const uint8_t shift_bits,
- const uint8_t ccso_bo_only";
- specialize qw/ccso_filter_block_hbd_with_buf_bo_only avx2/;
+ add_proto qw/void ccso_filter_block_hbd_with_buf_bo_only/, "const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1,
+ const int src_y_stride, const int dst_stride,
+ const int ccso_stride,
+ const int x, const int y,
+ const int pic_width, const int pic_height,
+ const int8_t *filter_offset, const int blk_size_x,
+ const int blk_size_y,
+ const int y_uv_hscale, const int y_uv_vscale,
+ const int max_val, const uint8_t shift_bits,
+ const uint8_t ccso_bo_only";
+ specialize qw/ccso_filter_block_hbd_with_buf_bo_only avx2/;
+ }
+ else {
+ add_proto qw/void ccso_filter_block_hbd_with_buf/, "const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1,
+ const int src_y_stride, const int dst_stride,
+ const int ccso_stride,
+ const int x, const int y,
+ const int pic_width, const int pic_height,
+ const int8_t *filter_offset, const int blk_size,
+ const int y_uv_hscale, const int y_uv_vscale,
+ const int max_val, const uint8_t shift_bits,
+ const uint8_t ccso_bo_only";
+ specialize qw/ccso_filter_block_hbd_with_buf avx2/;
+
+ add_proto qw/void ccso_filter_block_hbd_with_buf_bo_only/, "const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0, const uint8_t *src_cls1,
+ const int src_y_stride, const int dst_stride,
+ const int ccso_stride,
+ const int x, const int y,
+ const int pic_width, const int pic_height,
+ const int8_t *filter_offset, const int blk_size,
+ const int y_uv_hscale, const int y_uv_vscale,
+ const int max_val, const uint8_t shift_bits,
+ const uint8_t ccso_bo_only";
+ specialize qw/ccso_filter_block_hbd_with_buf_bo_only avx2/;
+ }
add_proto qw/uint64_t compute_distortion_block/, "const uint16_t *org, const int org_stride,
const uint16_t *rec16, const int rec_stride, const int x, const int y,
@@ -652,12 +684,23 @@
const int width";
specialize qw/compute_distortion_block avx2/;
- add_proto qw/void ccso_derive_src_block/, "const uint16_t *src_y, uint8_t *const src_cls0,
- uint8_t *const src_cls1, const int src_y_stride, const int ccso_stride,
- const int x, const int y, const int pic_width, const int pic_height,
- const int y_uv_hscale, const int y_uv_vscale, const int qstep,
- const int neg_qstep, const int *src_loc, const int blk_size, const int edge_clf";
- specialize qw/ccso_derive_src_block avx2/
+ if (aom_config("CONFIG_CCSO_FU_BUGFIX") eq "yes") {
+ add_proto qw/void ccso_derive_src_block/, "const uint16_t *src_y, uint8_t *const src_cls0,
+ uint8_t *const src_cls1, const int src_y_stride, const int ccso_stride,
+ const int x, const int y, const int pic_width, const int pic_height,
+ const int y_uv_hscale, const int y_uv_vscale, const int qstep,
+ const int neg_qstep, const int *src_loc, const int blk_size_x,
+ const int blk_size_y, const int edge_clf";
+ specialize qw/ccso_derive_src_block avx2/
+ }
+ else {
+ add_proto qw/void ccso_derive_src_block/, "const uint16_t *src_y, uint8_t *const src_cls0,
+ uint8_t *const src_cls1, const int src_y_stride, const int ccso_stride,
+ const int x, const int y, const int pic_width, const int pic_height,
+ const int y_uv_hscale, const int y_uv_vscale, const int qstep,
+ const int neg_qstep, const int *src_loc, const int blk_size, const int edge_clf";
+ specialize qw/ccso_derive_src_block avx2/
+ }
}
# WARPED_MOTION / GLOBAL_MOTION functions
diff --git a/av1/common/ccso.c b/av1/common/ccso.c
index a5c148e..794d0e7 100644
--- a/av1/common/ccso.c
+++ b/av1/common/ccso.c
@@ -150,10 +150,20 @@
const int8_t *offset_buf, const int src_y_stride, const int dst_stride,
const int y_uv_hscale, const int y_uv_vscale, const int thr,
const int neg_thr, const int *src_loc, const int max_val,
- const int blk_size, const bool isSingleBand, const uint8_t shift_bits,
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_x, const int blk_size_y,
+#else
+ const int blk_size,
+#endif
+ const bool isSingleBand, const uint8_t shift_bits,
const int edge_clf, const uint8_t ccso_bo_only) {
+#if CONFIG_CCSO_FU_BUGFIX
+ const int y_end = AOMMIN(pic_height - y, blk_size_y);
+ const int x_end = AOMMIN(pic_width - x, blk_size_x);
+#else
const int y_end = AOMMIN(pic_height - y, blk_size);
const int x_end = AOMMIN(pic_width - x, blk_size);
+#endif
for (int y_start = 0; y_start < y_end; y_start++) {
const int y_pos = y_start;
for (int x_start = 0; x_start < x_end; x_start++) {
@@ -202,34 +212,67 @@
const int neg_thr = thr * -1;
int src_loc[2];
derive_ccso_sample_pos(cm, src_loc, ccso_ext_stride, filter_sup);
+#if CONFIG_CCSO_FU_BUGFIX
+ assert(plane == 0); // function must only be called for plane == 0
+ const int blk_log2 = CCSO_BLK_SIZE;
+#else
const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
+#endif
const int blk_size = 1 << blk_log2;
src_y += CCSO_PADDING_SIZE * ccso_ext_stride + CCSO_PADDING_SIZE;
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: plane %d bo_only %d thr %d neg_thr %d @ %s\n",
+ plane, cm->ccso_info.ccso_bo_only[plane],
+ thr, neg_thr, __FUNCTION__);
+#endif
for (int y = 0; y < pic_height; y += blk_size) {
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: y %d", y);
+#endif
for (int x = 0; x < pic_width; x += blk_size) {
+#if CONFIG_CCSO_FU_BUGFIX
+ const int ccso_blk_idx =
+ (blk_size >> MI_SIZE_LOG2) * (y >> blk_log2) * mi_params->mi_stride +
+ (blk_size >> MI_SIZE_LOG2) * (x >> blk_log2);
+#else
const int ccso_blk_idx =
(blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) *
(y >> blk_log2) * mi_params->mi_stride +
(blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_x)) *
(x >> blk_log2);
+#endif
const bool use_ccso = mi_params->mi_grid_base[ccso_blk_idx]->ccso_blk_y;
+#if CONFIG_CCSO_DEBUG
+ printf(" use [%d] %d (%p)", ccso_blk_idx, use_ccso, mi_params->mi_grid_base[ccso_blk_idx]);
+#endif
if (!use_ccso) continue;
if (cm->ccso_info.ccso_bo_only[plane]) {
ccso_filter_block_hbd_wo_buf_c(
src_y, dst_yuv, x, y, pic_width, pic_height, src_cls,
cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride, 0,
- 0, thr, neg_thr, src_loc, max_val, blk_size, false, shift_bits,
+ 0, thr, neg_thr, src_loc, max_val,
+#if CONFIG_CCSO_FU_BUGFIX
+ blk_size,
+#endif
+ blk_size, false, shift_bits,
edge_clf, cm->ccso_info.ccso_bo_only[plane]);
} else {
ccso_filter_block_hbd_wo_buf(
src_y, dst_yuv, x, y, pic_width, pic_height, src_cls,
cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride, 0,
- 0, thr, neg_thr, src_loc, max_val, blk_size, false, shift_bits,
+ 0, thr, neg_thr, src_loc, max_val,
+#if CONFIG_CCSO_FU_BUGFIX
+ blk_size,
+#endif
+ blk_size, false, shift_bits,
edge_clf, 0);
}
}
dst_yuv += (dst_stride << blk_log2);
src_y += (ccso_ext_stride << blk_log2);
+#if CONFIG_CCSO_DEBUG
+ printf("\n");
+#endif
}
}
@@ -256,34 +299,67 @@
const int neg_thr = thr * -1;
int src_loc[2];
derive_ccso_sample_pos(cm, src_loc, ccso_ext_stride, filter_sup);
+#if CONFIG_CCSO_FU_BUGFIX
+ assert(plane == 0); // function must only be called for plane == 0
+ const int blk_log2 = CCSO_BLK_SIZE;
+#else
const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
+#endif
const int blk_size = 1 << blk_log2;
src_y += CCSO_PADDING_SIZE * ccso_ext_stride + CCSO_PADDING_SIZE;
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: plane %d bo_only %d thr %d neg_thr %d @ %s\n",
+ plane, cm->ccso_info.ccso_bo_only[plane],
+ thr, neg_thr, __FUNCTION__);
+#endif
for (int y = 0; y < pic_height; y += blk_size) {
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: y %d", y);
+#endif
for (int x = 0; x < pic_width; x += blk_size) {
+#if CONFIG_CCSO_FU_BUGFIX
+ const int ccso_blk_idx =
+ (blk_size >> MI_SIZE_LOG2) * (y >> blk_log2) * mi_params->mi_stride +
+ (blk_size >> MI_SIZE_LOG2) * (x >> blk_log2);
+#else
const int ccso_blk_idx =
(blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) *
(y >> blk_log2) * mi_params->mi_stride +
(blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_x)) *
(x >> blk_log2);
+#endif
const bool use_ccso = mi_params->mi_grid_base[ccso_blk_idx]->ccso_blk_y;
+#if CONFIG_CCSO_DEBUG
+ printf(" use [%d] %d (%p)", ccso_blk_idx, use_ccso, mi_params->mi_grid_base[ccso_blk_idx]);
+#endif
if (!use_ccso) continue;
if (cm->ccso_info.ccso_bo_only[plane]) {
ccso_filter_block_hbd_wo_buf_c(
src_y, dst_yuv, x, y, pic_width, pic_height, src_cls,
cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride, 0,
- 0, thr, neg_thr, src_loc, max_val, blk_size, true, shift_bits,
+ 0, thr, neg_thr, src_loc, max_val,
+#if CONFIG_CCSO_FU_BUGFIX
+ blk_size,
+#endif
+ blk_size, true, shift_bits,
edge_clf, cm->ccso_info.ccso_bo_only[plane]);
} else {
ccso_filter_block_hbd_wo_buf(
src_y, dst_yuv, x, y, pic_width, pic_height, src_cls,
cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride, 0,
- 0, thr, neg_thr, src_loc, max_val, blk_size, true, shift_bits,
+ 0, thr, neg_thr, src_loc, max_val,
+#if CONFIG_CCSO_FU_BUGFIX
+ blk_size,
+#endif
+ blk_size, true, shift_bits,
edge_clf, 0);
}
}
dst_yuv += (dst_stride << blk_log2);
src_y += (ccso_ext_stride << blk_log2);
+#if CONFIG_CCSO_DEBUG
+ printf("\n");
+#endif
}
}
@@ -311,36 +387,74 @@
const int neg_thr = thr * -1;
int src_loc[2];
derive_ccso_sample_pos(cm, src_loc, ccso_ext_stride, filter_sup);
- const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
- const int blk_size = 1 << blk_log2;
+#if CONFIG_CCSO_FU_BUGFIX
+ assert(plane > 0); // function must only be called for plane > 0
+ const int blk_size = 1 << CCSO_BLK_SIZE;
+ const int blk_log2_y = CCSO_BLK_SIZE - cm->seq_params.subsampling_y;
+ const int blk_log2_x = CCSO_BLK_SIZE - cm->seq_params.subsampling_x;
+ const int blk_size_y = 1 << blk_log2_y;
+ const int blk_size_x = 1 << blk_log2_x;
+#else
+ const int blk_log2_y = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
+ const int blk_size_y = 1 << blk_log2_y;
+ const int blk_size_x = blk_size_x;
+#endif
src_y += CCSO_PADDING_SIZE * ccso_ext_stride + CCSO_PADDING_SIZE;
- for (int y = 0; y < pic_height; y += blk_size) {
- for (int x = 0; x < pic_width; x += blk_size) {
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: plane %d bo_only %d thr %d neg_thr %d @ %s\n",
+ plane, cm->ccso_info.ccso_bo_only[plane],
+ thr, neg_thr, __FUNCTION__);
+#endif
+ for (int y = 0; y < pic_height; y += blk_size_y) {
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: y %d", y);
+#endif
+ for (int x = 0; x < pic_width; x += blk_size_x) {
+#if CONFIG_CCSO_FU_BUGFIX
const int ccso_blk_idx =
- (blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) *
+ (blk_size >> MI_SIZE_LOG2) * (y >> blk_log2_y) * mi_params->mi_stride +
+ (blk_size >> MI_SIZE_LOG2) * (x >> blk_log2_x);
+#else
+ const int ccso_blk_idx =
+ (blk_size_y >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) *
(y >> blk_log2) * mi_params->mi_stride +
- (blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_x)) *
+ (blk_size_x >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_x)) *
(x >> blk_log2);
+#endif
const bool use_ccso =
(plane == 1) ? mi_params->mi_grid_base[ccso_blk_idx]->ccso_blk_u
: mi_params->mi_grid_base[ccso_blk_idx]->ccso_blk_v;
+#if CONFIG_CCSO_DEBUG
+ printf(" use [%d] %d (%p)", ccso_blk_idx, use_ccso, mi_params->mi_grid_base[ccso_blk_idx]);
+#endif
if (!use_ccso) continue;
if (cm->ccso_info.ccso_bo_only[plane]) {
ccso_filter_block_hbd_wo_buf_c(
src_y, dst_yuv, x, y, pic_width, pic_height, src_cls,
cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride,
- y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val, blk_size,
+ y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val,
+#if CONFIG_CCSO_FU_BUGFIX
+ blk_size_x,
+#endif
+ blk_size_y,
false, shift_bits, edge_clf, cm->ccso_info.ccso_bo_only[plane]);
} else {
ccso_filter_block_hbd_wo_buf(
src_y, dst_yuv, x, y, pic_width, pic_height, src_cls,
cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride,
- y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val, blk_size,
+ y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val,
+#if CONFIG_CCSO_FU_BUGFIX
+ blk_size_x,
+#endif
+ blk_size_y,
false, shift_bits, edge_clf, 0);
}
}
- dst_yuv += (dst_stride << blk_log2);
- src_y += (ccso_ext_stride << (blk_log2 + y_uv_vscale));
+ dst_yuv += (dst_stride << blk_log2_y);
+ src_y += (ccso_ext_stride << (blk_log2_y + y_uv_vscale));
+#if CONFIG_CCSO_DEBUG
+ printf("\n");
+#endif
}
}
@@ -369,36 +483,74 @@
const int neg_thr = thr * -1;
int src_loc[2];
derive_ccso_sample_pos(cm, src_loc, ccso_ext_stride, filter_sup);
- const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
- const int blk_size = 1 << blk_log2;
+#if CONFIG_CCSO_FU_BUGFIX
+ assert(plane > 0); // function must only be called for plane > 0
+ const int blk_size = 1 << CCSO_BLK_SIZE;
+ const int blk_log2_y = CCSO_BLK_SIZE - cm->seq_params.subsampling_y;
+ const int blk_log2_x = CCSO_BLK_SIZE - cm->seq_params.subsampling_x;
+ const int blk_size_y = 1 << blk_log2_y;
+ const int blk_size_x = 1 << blk_log2_x;
+#else
+ const int blk_log2_y = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
+ const int blk_size_y = 1 << blk_log2_y;
+ const int blk_size_x = blk_size_x;
+#endif
src_y += CCSO_PADDING_SIZE * ccso_ext_stride + CCSO_PADDING_SIZE;
- for (int y = 0; y < pic_height; y += blk_size) {
- for (int x = 0; x < pic_width; x += blk_size) {
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: plane %d bo_only %d thr %d neg_thr %d @ %s\n",
+ plane, cm->ccso_info.ccso_bo_only[plane],
+ thr, neg_thr, __FUNCTION__);
+#endif
+ for (int y = 0; y < pic_height; y += blk_size_y) {
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: y %d", y);
+#endif
+ for (int x = 0; x < pic_width; x += blk_size_x) {
+#if CONFIG_CCSO_FU_BUGFIX
const int ccso_blk_idx =
- (blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) *
+ (blk_size >> MI_SIZE_LOG2) * (y >> blk_log2_y) * mi_params->mi_stride +
+ (blk_size >> MI_SIZE_LOG2) * (x >> blk_log2_x);
+#else
+ const int ccso_blk_idx =
+ (blk_size_y >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) *
(y >> blk_log2) * mi_params->mi_stride +
- (blk_size >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_x)) *
+ (blk_size_x >> (MI_SIZE_LOG2 - xd->plane[plane].subsampling_x)) *
(x >> blk_log2);
+#endif
const bool use_ccso =
(plane == 1) ? mi_params->mi_grid_base[ccso_blk_idx]->ccso_blk_u
: mi_params->mi_grid_base[ccso_blk_idx]->ccso_blk_v;
+#if CONFIG_CCSO_DEBUG
+ printf(" use [%d] %d (%p)", ccso_blk_idx, use_ccso, mi_params->mi_grid_base[ccso_blk_idx]);
+#endif
if (!use_ccso) continue;
if (cm->ccso_info.ccso_bo_only[plane]) {
ccso_filter_block_hbd_wo_buf_c(
src_y, dst_yuv, x, y, pic_width, pic_height, src_cls,
cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride,
- y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val, blk_size,
+ y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val,
+#if CONFIG_CCSO_FU_BUGFIX
+ blk_size_x,
+#endif
+ blk_size_y,
true, shift_bits, edge_clf, cm->ccso_info.ccso_bo_only[plane]);
} else {
ccso_filter_block_hbd_wo_buf(
src_y, dst_yuv, x, y, pic_width, pic_height, src_cls,
cm->ccso_info.filter_offset[plane], ccso_ext_stride, dst_stride,
- y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val, blk_size,
+ y_uv_hscale, y_uv_vscale, thr, neg_thr, src_loc, max_val,
+#if CONFIG_CCSO_FU_BUGFIX
+ blk_size_x,
+#endif
+ blk_size_y,
true, shift_bits, edge_clf, 0);
}
}
- dst_yuv += (dst_stride << blk_log2);
- src_y += (ccso_ext_stride << (blk_log2 + y_uv_vscale));
+ dst_yuv += (dst_stride << blk_log2_y);
+ src_y += (ccso_ext_stride << (blk_log2_y + y_uv_vscale));
+#if CONFIG_CCSO_DEBUG
+ printf("\n");
+#endif
}
}
@@ -465,4 +617,4 @@
to->ccso_enable[plane] = from->ccso_enable[plane];
}
-#endif // CONFIG_CCSO_IMPROVE
\ No newline at end of file
+#endif // CONFIG_CCSO_IMPROVE
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 90dc0e6..8a86fbd 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -131,11 +131,13 @@
#define IBP_WEIGHT_SIZE (1 << IBP_WEIGHT_SIZE_LOG2)
#endif // CONFIG_IBP_WEIGHT
+#if !CONFIG_CCSO_FU_BUGFIX
// Cross-Component Sample Offset (CCSO)
#define CCSO_BLK_SIZE 7
#define CCSO_PADDING_SIZE 5
#define CCSO_BAND_NUM 128
#define CCSO_NUM_COMPONENTS 3
+#endif
#define BUGFIX_AMVD_AMVR 1
// Supported scale modes for JOINT_NEWMV
@@ -153,6 +155,14 @@
#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
#define BLOCK_128_MI_SIZE_LOG2 5
+#if CONFIG_CCSO_FU_BUGFIX
+// Cross-Component Sample Offset (CCSO)
+#define CCSO_BLK_SIZE MAX_SB_SIZE_LOG2
+#define CCSO_PADDING_SIZE 5
+#define CCSO_BAND_NUM 128
+#define CCSO_NUM_COMPONENTS 3
+#endif
+
#if CONFIG_ENABLE_MHCCP
#define MHCCP_CONTEXT_GROUP_SIZE 7
#define LINE_NUM 3
diff --git a/av1/common/pred_common.c b/av1/common/pred_common.c
index 0725e2b..a856199 100644
--- a/av1/common/pred_common.c
+++ b/av1/common/pred_common.c
@@ -524,10 +524,15 @@
bool av1_check_ccso_mbmi_inside_tile(const MACROBLOCKD *xd,
const MB_MODE_INFO *const mbmi) {
const TileInfo *const tile = &xd->tile;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1;
+ const int blk_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1;
+#else
const int blk_size_y =
(1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y - MI_SIZE_LOG2)) - 1;
const int blk_size_x =
(1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_x - MI_SIZE_LOG2)) - 1;
+#endif
return (((mbmi->mi_row_start & ~blk_size_y) >= tile->mi_row_start) &&
((mbmi->mi_col_start & ~blk_size_x) >= tile->mi_col_start) &&
@@ -559,10 +564,15 @@
neighbor1_ccso_available = av1_check_ccso_mbmi_inside_tile(xd, neighbor1);
}
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1;
+ const int blk_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1;
+#else
const int blk_size_y =
(1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y - MI_SIZE_LOG2)) - 1;
const int blk_size_x =
(1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_x - MI_SIZE_LOG2)) - 1;
+#endif
if (neighbor0_ccso_available && neighbor1_ccso_available) {
int is_neighbor0_ccso = 0;
diff --git a/av1/common/x86/highbd_ccso_avx2.c b/av1/common/x86/highbd_ccso_avx2.c
index 9c2ac24..b796018 100644
--- a/av1/common/x86/highbd_ccso_avx2.c
+++ b/av1/common/x86/highbd_ccso_avx2.c
@@ -51,7 +51,13 @@
const int y_uv_vscale,
// const int pad_stride, no pad size anymore
const int quant_step_size, const int inv_quant_step, const int *rec_idx,
- const int max_val, const int blk_size, const bool isSingleBand,
+ const int max_val,
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_x, const int blk_size_y,
+#else
+ const int blk_size,
+#endif
+ const bool isSingleBand,
const uint8_t shift_bits, const int edge_clf, const uint8_t ccso_bo_only) {
assert(ccso_bo_only == 0);
(void)ccso_bo_only;
@@ -82,16 +88,22 @@
int y_offset;
int x_offset, x_remainder;
- if (y + blk_size >= pic_height)
+
+#if !CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_x = blk_size;
+ const int blk_size_y = blk_size;
+#endif
+
+ if (y + blk_size_y >= pic_height)
y_offset = pic_height - y;
else
- y_offset = blk_size;
+ y_offset = blk_size_y;
- if (x + blk_size >= pic_width) {
+ if (x + blk_size_x >= pic_width) {
x_offset = ((pic_width - x) >> 4) << 4;
x_remainder = pic_width - x - x_offset;
} else {
- x_offset = blk_size;
+ x_offset = blk_size_x;
x_remainder = 0;
}
for (int yOff = 0; yOff < y_offset; yOff++) {
@@ -245,7 +257,12 @@
const int pic_width, const int pic_height,
const int y_uv_hscale, const int y_uv_vscale,
const int qstep, const int neg_qstep,
- const int *src_loc, const int blk_size,
+ const int *src_loc,
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_x, const int blk_size_y,
+#else
+ const int blk_size,
+#endif
const int edge_clf) {
const int quant_step_size = qstep;
const int inv_quant_step = neg_qstep;
@@ -275,16 +292,22 @@
int y_offset;
int x_offset, x_remainder;
- if (y + blk_size >= pic_height)
+
+#if !CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_x = blk_size;
+ const int blk_size_y = blk_size;
+#endif
+
+ if (y + blk_size_y >= pic_height)
y_offset = pic_height - y;
else
- y_offset = blk_size;
+ y_offset = blk_size_y;
- if (x + blk_size >= pic_width) {
+ if (x + blk_size_x >= pic_width) {
x_offset = ((pic_width - x) >> 4) << 4;
x_remainder = pic_width - x - x_offset;
} else {
- x_offset = blk_size;
+ x_offset = blk_size_x;
x_remainder = 0;
}
for (int yOff = 0; yOff < y_offset; yOff++) {
@@ -421,7 +444,12 @@
const uint16_t *src_y, uint16_t *dts_yuv, const uint8_t *src_cls0,
const uint8_t *src_cls1, const int src_y_stride, const int dst_stride,
const int ccso_stride, const int x, const int y, const int pic_width,
- const int pic_height, const int8_t *filter_offset, const int blk_size,
+ const int pic_height, const int8_t *filter_offset,
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_x, const int blk_size_y,
+#else
+ const int blk_size,
+#endif
const int y_uv_hscale, const int y_uv_vscale, const int max_val,
const uint8_t shift_bits, const uint8_t ccso_bo_only) {
(void)ccso_bo_only;
@@ -440,16 +468,22 @@
int y_offset;
int x_offset, x_remainder;
- if (y + blk_size >= pic_height)
+
+#if !CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_x = blk_size;
+ const int blk_size_y = blk_size;
+#endif
+
+ if (y + blk_size_y >= pic_height)
y_offset = pic_height - y;
else
- y_offset = blk_size;
+ y_offset = blk_size_y;
- if (x + blk_size >= pic_width) {
+ if (x + blk_size_x >= pic_width) {
x_offset = ((pic_width - x) >> 4) << 4;
x_remainder = pic_width - x - x_offset;
} else {
- x_offset = blk_size;
+ x_offset = blk_size_x;
x_remainder = 0;
}
for (int yOff = 0; yOff < y_offset; yOff++) {
@@ -528,7 +562,12 @@
const uint16_t *src_y, uint16_t *dts_yuv, const uint8_t *src_cls0,
const uint8_t *src_cls1, const int src_y_stride, const int dst_stride,
const int ccso_stride, const int x, const int y, const int pic_width,
- const int pic_height, const int8_t *filter_offset, const int blk_size,
+ const int pic_height, const int8_t *filter_offset,
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_x, const int blk_size_y,
+#else
+ const int blk_size,
+#endif
const int y_uv_hscale, const int y_uv_vscale, const int max_val,
const uint8_t shift_bits, const uint8_t ccso_bo_only) {
(void)ccso_bo_only;
@@ -547,16 +586,22 @@
int y_offset;
int x_offset, x_remainder;
- if (y + blk_size >= pic_height)
+
+#if !CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_x = blk_size;
+ const int blk_size_y = blk_size;
+#endif
+
+ if (y + blk_size_y >= pic_height)
y_offset = pic_height - y;
else
- y_offset = blk_size;
+ y_offset = blk_size_y;
- if (x + blk_size >= pic_width) {
+ if (x + blk_size_x >= pic_width) {
x_offset = ((pic_width - x) >> 4) << 4;
x_remainder = pic_width - x - x_offset;
} else {
- x_offset = blk_size;
+ x_offset = blk_size_x;
x_remainder = 0;
}
for (int yOff = 0; yOff < y_offset; yOff++) {
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 344268c..d42bb86 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -3894,6 +3894,12 @@
#endif // CONFIG_CCSO_SIGFIX
cm->ccso_info.max_band_log2[plane] = aom_rb_read_literal(rb, 2);
}
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: plane %d quant_idx %d ext_filter_support %d edge_clf %d ccso_bo_only %d max_band_log2 %d scale_idx %d @ %s\n",
+ plane, cm->ccso_info.quant_idx[plane], cm->ccso_info.ext_filter_support[plane],
+ cm->ccso_info.edge_clf[plane], cm->ccso_info.ccso_bo_only[plane],
+ cm->ccso_info.max_band_log2[plane], cm->ccso_info.scale_idx[plane], __FUNCTION__);
+#endif
const int max_band = 1 << cm->ccso_info.max_band_log2[plane];
#if !CONFIG_CCSO_SIGFIX
cm->ccso_info.edge_clf[plane] = aom_rb_read_bit(rb);
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 305dc9a..3a81162 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -103,8 +103,13 @@
const BLOCK_SIZE bsize = xd->mi[0]->sb_type[PLANE_TYPE_Y];
const int bw = mi_size_wide[bsize];
const int bh = mi_size_high[bsize];
+#if CONFIG_CCSO_FU_BUGFIX
+ const int log2_w = CCSO_BLK_SIZE;
+ const int log2_h = CCSO_BLK_SIZE;
+#else
const int log2_w = CCSO_BLK_SIZE + xd->plane[1].subsampling_x;
const int log2_h = CCSO_BLK_SIZE + xd->plane[1].subsampling_y;
+#endif
const int f_w = 1 << log2_w >> MI_SIZE_LOG2;
const int f_h = 1 << log2_h >> MI_SIZE_LOG2;
const int ccso_nhfb = (mi_params->mi_cols + f_w - 1) / f_w;
@@ -122,18 +127,27 @@
const CommonModeInfoParams *const mi_params = &cm->mi_params;
const int mi_row = xd->mi_row;
const int mi_col = xd->mi_col;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1;
+ const int blk_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1;
+#else
const int blk_size_y =
(1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y - MI_SIZE_LOG2)) - 1;
const int blk_size_x =
(1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_x - MI_SIZE_LOG2)) - 1;
+#endif
#if CONFIG_CCSO_IMPROVE
int blk_idc;
#endif
if (!(mi_row & blk_size_y) && !(mi_col & blk_size_x) &&
cm->ccso_info.ccso_enable[0]) {
#if CONFIG_CCSO_IMPROVE
+#if CONFIG_CCSO_FU_BUGFIX
+ const int log2_filter_unit_size = CCSO_BLK_SIZE;
+#else
const int log2_filter_unit_size =
CCSO_BLK_SIZE + xd->plane[1].subsampling_x;
+#endif
const int ccso_nhfb = ((mi_params->mi_cols >> xd->plane[0].subsampling_x) +
(1 << log2_filter_unit_size >> 2) - 1) /
(1 << log2_filter_unit_size >> 2);
@@ -144,10 +158,18 @@
const int ccso_ctx = av1_get_ccso_context(xd, 0);
blk_idc = aom_read_symbol(r, xd->tile_ctx->ccso_cdf[0][ccso_ctx], 2,
ACCT_INFO("blk_idc"));
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: [%d,%d] read ccso_blk_y %d @ %s\n", mi_row, mi_col, blk_idc, __FUNCTION__);
+#endif
} else {
CcsoInfo *ref_frame_ccso_info =
&get_ref_frame_buf(cm, cm->ccso_info.ccso_ref_idx[0])->ccso_info;
blk_idc = ref_frame_ccso_info->sb_filter_control[0][sb_idx];
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: [%d,%d] copy [%d] ccso_blk_y %d : 0x%p @ %s\n", mi_row, mi_col, sb_idx, blk_idc,
+ mi_params->mi_grid_base[(mi_row & ~blk_size_y) * mi_params->mi_stride +
+ (mi_col & ~blk_size_x)], __FUNCTION__);
+#endif
}
#else
const int blk_idc =
@@ -174,7 +196,11 @@
if (!(mi_row & blk_size_y) && !(mi_col & blk_size_x) &&
cm->ccso_info.ccso_enable[1]) {
#if CONFIG_CCSO_IMPROVE
+#if CONFIG_CCSO_FU_BUGFIX
+ const int log2_filter_unit_size = (CCSO_BLK_SIZE - xd->plane[1].subsampling_x);
+#else
const int log2_filter_unit_size = CCSO_BLK_SIZE;
+#endif
const int ccso_nhfb = ((mi_params->mi_cols >> xd->plane[1].subsampling_x) +
(1 << log2_filter_unit_size >> 2) - 1) /
(1 << log2_filter_unit_size >> 2);
@@ -185,10 +211,18 @@
const int ccso_ctx = av1_get_ccso_context(xd, 1);
blk_idc = aom_read_symbol(r, xd->tile_ctx->ccso_cdf[1][ccso_ctx], 2,
ACCT_INFO("blk_idc"));
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: [%d,%d] read ccso_blk_u %d @ %s\n", mi_row, mi_col, blk_idc, __FUNCTION__);
+#endif
} else {
CcsoInfo *ref_frame_ccso_info =
&get_ref_frame_buf(cm, cm->ccso_info.ccso_ref_idx[1])->ccso_info;
blk_idc = ref_frame_ccso_info->sb_filter_control[1][sb_idx];
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: [%d,%d] copy [%d] ccso_blk_u %d : 0x%p @ %s\n", mi_row, mi_col, sb_idx, blk_idc,
+ mi_params->mi_grid_base[(mi_row & ~blk_size_y) * mi_params->mi_stride +
+ (mi_col & ~blk_size_x)], __FUNCTION__);
+#endif
}
#else
const int blk_idc =
@@ -215,7 +249,11 @@
if (!(mi_row & blk_size_y) && !(mi_col & blk_size_x) &&
cm->ccso_info.ccso_enable[2]) {
#if CONFIG_CCSO_IMPROVE
+#if CONFIG_CCSO_FU_BUGFIX
+ const int log2_filter_unit_size = (CCSO_BLK_SIZE - xd->plane[2].subsampling_x);
+#else
const int log2_filter_unit_size = CCSO_BLK_SIZE;
+#endif
const int ccso_nhfb = ((mi_params->mi_cols >> xd->plane[2].subsampling_x) +
(1 << log2_filter_unit_size >> 2) - 1) /
(1 << log2_filter_unit_size >> 2);
@@ -226,10 +264,18 @@
const int ccso_ctx = av1_get_ccso_context(xd, 2);
blk_idc = aom_read_symbol(r, xd->tile_ctx->ccso_cdf[2][ccso_ctx], 2,
ACCT_INFO("blk_idc"));
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: [%d,%d] read ccso_blk_v %d @ %s\n", mi_row, mi_col, blk_idc, __FUNCTION__);
+#endif
} else {
CcsoInfo *ref_frame_ccso_info =
&get_ref_frame_buf(cm, cm->ccso_info.ccso_ref_idx[2])->ccso_info;
blk_idc = ref_frame_ccso_info->sb_filter_control[2][sb_idx];
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: [%d,%d] copy [%d] ccso_blk_v %d : 0x%p @ %s\n", mi_row, mi_col, sb_idx, blk_idc,
+ mi_params->mi_grid_base[(mi_row & ~blk_size_y) * mi_params->mi_stride +
+ (mi_col & ~blk_size_x)], __FUNCTION__);
+#endif
}
#else
const int blk_idc =
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 1cd32bc..bf6bd0d 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1949,10 +1949,15 @@
const CommonModeInfoParams *const mi_params = &cm->mi_params;
const int mi_row = xd->mi_row;
const int mi_col = xd->mi_col;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1;
+ const int blk_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1;
+#else
const int blk_size_y =
(1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y - MI_SIZE_LOG2)) - 1;
const int blk_size_x =
(1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_x - MI_SIZE_LOG2)) - 1;
+#endif
const MB_MODE_INFO *mbmi =
mi_params->mi_grid_base[(mi_row & ~blk_size_y) * mi_params->mi_stride +
(mi_col & ~blk_size_x)];
@@ -1964,6 +1969,9 @@
const int ccso_ctx = av1_get_ccso_context(xd, 0);
aom_write_symbol(w, mbmi->ccso_blk_y == 0 ? 0 : 1,
xd->tile_ctx->ccso_cdf[0][ccso_ctx], 2);
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: [%d,%d] write ccso_blk_y %d @ %s\n", mi_row, mi_col, mbmi->ccso_blk_y == 0 ? 0 : 1, __FUNCTION__);
+#endif
}
#else
aom_write_symbol(w, mbmi->ccso_blk_y == 0 ? 0 : 1,
@@ -1979,6 +1987,9 @@
const int ccso_ctx = av1_get_ccso_context(xd, 1);
aom_write_symbol(w, mbmi->ccso_blk_u == 0 ? 0 : 1,
xd->tile_ctx->ccso_cdf[1][ccso_ctx], 2);
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: [%d,%d] write ccso_blk_u %d @ %s\n", mi_row, mi_col, mbmi->ccso_blk_u == 0 ? 0 : 1, __FUNCTION__);
+#endif
}
#else
aom_write_symbol(w, mbmi->ccso_blk_u == 0 ? 0 : 1,
@@ -1994,6 +2005,9 @@
const int ccso_ctx = av1_get_ccso_context(xd, 2);
aom_write_symbol(w, mbmi->ccso_blk_v == 0 ? 0 : 1,
xd->tile_ctx->ccso_cdf[2][ccso_ctx], 2);
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: [%d,%d] write ccso_blk_v %d @ %s\n", mi_row, mi_col, mbmi->ccso_blk_v == 0 ? 0 : 1, __FUNCTION__);
+#endif
}
#else
aom_write_symbol(w, mbmi->ccso_blk_v == 0 ? 0 : 1,
diff --git a/av1/encoder/pickccso.c b/av1/encoder/pickccso.c
index 794b248..3d21b04 100644
--- a/av1/encoder/pickccso.c
+++ b/av1/encoder/pickccso.c
@@ -75,11 +75,15 @@
const int pic_width, const int pic_height,
const int y_uv_hscale, const int y_uv_vscale,
const int qstep, const int neg_qstep,
- const int *src_loc, const int blk_size,
+ const int *src_loc,
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_x,
+#endif
+ const int blk_size_y,
const int edge_clf) {
int src_cls[2];
- const int y_end = AOMMIN(pic_height - y, blk_size);
- const int x_end = AOMMIN(pic_width - x, blk_size);
+ const int y_end = AOMMIN(pic_height - y, blk_size_y);
+ const int x_end = AOMMIN(pic_width - x, blk_size_x);
for (int y_start = 0; y_start < y_end; y_start++) {
const int y_pos = y_start;
for (int x_start = 0; x_start < x_end; x_start++) {
@@ -114,19 +118,29 @@
const int neg_qstep = qstep * -1;
int src_loc[2];
derive_ccso_sample_pos(cm, src_loc, ccso_stride_ext, filter_sup);
- const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
- const int blk_size = 1 << blk_log2;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_log2_y = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y;
+ const int blk_log2_x = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x;
+#else
+ const int blk_log2_y = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
+ const int blk_log2_x = blk_log2_y;
+#endif
+ const int blk_size_y = 1 << blk_log2_y;
+ const int blk_size_x = 1 << blk_log2_x;
src_y += CCSO_PADDING_SIZE * ccso_stride_ext + CCSO_PADDING_SIZE;
- for (int y = 0; y < pic_height; y += blk_size) {
- for (int x = 0; x < pic_width; x += blk_size) {
+ for (int y = 0; y < pic_height; y += blk_size_y) {
+ for (int x = 0; x < pic_width; x += blk_size_x) {
ccso_derive_src_block(src_y, src_cls0, src_cls1, ccso_stride_ext,
ccso_stride, x, y, pic_width, pic_height,
y_uv_hscale, y_uv_vscale, qstep, neg_qstep, src_loc,
- blk_size, edge_clf);
+#if CONFIG_CCSO_FU_BUGFIX
+ blk_size_x,
+#endif
+ blk_size_y, edge_clf);
}
- src_y += (ccso_stride_ext << (blk_log2 + y_uv_vscale));
- src_cls0 += (ccso_stride << (blk_log2 + y_uv_vscale));
- src_cls1 += (ccso_stride << (blk_log2 + y_uv_vscale));
+ src_y += (ccso_stride_ext << (blk_log2_y + y_uv_vscale));
+ src_cls0 += (ccso_stride << (blk_log2_y + y_uv_vscale));
+ src_cls1 += (ccso_stride << (blk_log2_y + y_uv_vscale));
}
}
@@ -144,16 +158,23 @@
int fb_idx = 0;
uint8_t cur_src_cls0;
uint8_t cur_src_cls1;
- const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
- const int blk_size = 1 << blk_log2;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_log2_y = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y;
+ const int blk_log2_x = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x;
+#else
+ const int blk_log2_y = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
+ const int blk_log2_x = blk_log2_y;
+#endif
+ const int blk_size_y = 1 << blk_log2_y;
+ const int blk_size_x = 1 << blk_log2_x;
const int scaled_ext_stride = (ctx->ccso_stride_ext << y_uv_vscale);
const int scaled_stride = (ctx->ccso_stride << y_uv_vscale);
src_y += CCSO_PADDING_SIZE * ctx->ccso_stride_ext + CCSO_PADDING_SIZE;
- for (int y = 0; y < pic_height; y += blk_size) {
- for (int x = 0; x < pic_width; x += blk_size) {
+ for (int y = 0; y < pic_height; y += blk_size_y) {
+ for (int x = 0; x < pic_width; x += blk_size_x) {
fb_idx++;
- const int y_end = AOMMIN(pic_height - y, blk_size);
- const int x_end = AOMMIN(pic_width - x, blk_size);
+ const int y_end = AOMMIN(pic_height - y, blk_size_y);
+ const int x_end = AOMMIN(pic_width - x, blk_size_x);
for (int y_start = 0; y_start < y_end; y_start++) {
for (int x_start = 0; x_start < x_end; x_start++) {
const int x_pos = x + x_start;
@@ -177,11 +198,11 @@
src_cls0 -= scaled_stride * y_end;
src_cls1 -= scaled_stride * y_end;
}
- ref += (ctx->ccso_stride << blk_log2);
- dst += (ctx->ccso_stride << blk_log2);
- src_y += (ctx->ccso_stride_ext << (blk_log2 + y_uv_vscale));
- src_cls0 += (ctx->ccso_stride << (blk_log2 + y_uv_vscale));
- src_cls1 += (ctx->ccso_stride << (blk_log2 + y_uv_vscale));
+ ref += (ctx->ccso_stride << blk_log2_y);
+ dst += (ctx->ccso_stride << blk_log2_y);
+ src_y += (ctx->ccso_stride_ext << (blk_log2_y + y_uv_vscale));
+ src_cls0 += (ctx->ccso_stride << (blk_log2_y + y_uv_vscale));
+ src_cls1 += (ctx->ccso_stride << (blk_log2_y + y_uv_vscale));
}
}
@@ -194,15 +215,22 @@
const int y_uv_hscale = xd->plane[plane].subsampling_x;
const int y_uv_vscale = xd->plane[plane].subsampling_y;
int fb_idx = 0;
- const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
- const int blk_size = 1 << blk_log2;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_log2_y = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y;
+ const int blk_log2_x = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x;
+#else
+ const int blk_log2_y = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
+ const int blk_log2_x = blk_log2_y;
+#endif
+ const int blk_size_y = 1 << blk_log2_y;
+ const int blk_size_x = 1 << blk_log2_x;
const int scaled_ext_stride = (ctx->ccso_stride_ext << y_uv_vscale);
src_y += CCSO_PADDING_SIZE * ctx->ccso_stride_ext + CCSO_PADDING_SIZE;
- for (int y = 0; y < pic_height; y += blk_size) {
- for (int x = 0; x < pic_width; x += blk_size) {
+ for (int y = 0; y < pic_height; y += blk_size_y) {
+ for (int x = 0; x < pic_width; x += blk_size_x) {
fb_idx++;
- const int y_end = AOMMIN(pic_height - y, blk_size);
- const int x_end = AOMMIN(pic_width - x, blk_size);
+ const int y_end = AOMMIN(pic_height - y, blk_size_y);
+ const int x_end = AOMMIN(pic_width - x, blk_size_x);
for (int y_start = 0; y_start < y_end; y_start++) {
for (int x_start = 0; x_start < x_end; x_start++) {
const int x_pos = x + x_start;
@@ -219,9 +247,9 @@
dst -= ctx->ccso_stride * y_end;
src_y -= scaled_ext_stride * y_end;
}
- ref += (ctx->ccso_stride << blk_log2);
- dst += (ctx->ccso_stride << blk_log2);
- src_y += (ctx->ccso_stride_ext << (blk_log2 + y_uv_vscale));
+ ref += (ctx->ccso_stride << blk_log2_y);
+ dst += (ctx->ccso_stride << blk_log2_y);
+ src_y += (ctx->ccso_stride_ext << (blk_log2_y + y_uv_vscale));
}
}
@@ -230,7 +258,12 @@
const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0,
const uint8_t *src_cls1, const int src_y_stride, const int dst_stride,
const int src_cls_stride, const int x, const int y, const int pic_width,
- const int pic_height, const int8_t *filter_offset, const int blk_size,
+ const int pic_height, const int8_t *filter_offset,
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_x, const int blk_size_y,
+#else
+ const int blk_size,
+#endif
const int y_uv_hscale, const int y_uv_vscale, const int max_val,
const uint8_t shift_bits, const uint8_t ccso_bo_only) {
assert(ccso_bo_only == 1);
@@ -242,8 +275,13 @@
int cur_src_cls0;
int cur_src_cls1;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int y_end = AOMMIN(pic_height - y, blk_size_y);
+ const int x_end = AOMMIN(pic_width - x, blk_size_x);
+#else
const int y_end = AOMMIN(pic_height - y, blk_size);
const int x_end = AOMMIN(pic_width - x, blk_size);
+#endif
for (int y_start = 0; y_start < y_end; y_start++) {
const int y_pos = y_start;
for (int x_start = 0; x_start < x_end; x_start++) {
@@ -266,7 +304,12 @@
const uint16_t *src_y, uint16_t *dst_yuv, const uint8_t *src_cls0,
const uint8_t *src_cls1, const int src_y_stride, const int dst_stride,
const int src_cls_stride, const int x, const int y, const int pic_width,
- const int pic_height, const int8_t *filter_offset, const int blk_size,
+ const int pic_height, const int8_t *filter_offset,
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_x, const int blk_size_y,
+#else
+ const int blk_size,
+#endif
const int y_uv_hscale, const int y_uv_vscale, const int max_val,
const uint8_t shift_bits, const uint8_t ccso_bo_only) {
if (ccso_bo_only) {
@@ -275,8 +318,13 @@
}
int cur_src_cls0;
int cur_src_cls1;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int y_end = AOMMIN(pic_height - y, blk_size_y);
+ const int x_end = AOMMIN(pic_width - x, blk_size_x);
+#else
const int y_end = AOMMIN(pic_height - y, blk_size);
const int x_end = AOMMIN(pic_width - x, blk_size);
+#endif
for (int y_start = 0; y_start < y_end; y_start++) {
const int y_pos = y_start;
for (int x_start = 0; x_start < x_end; x_start++) {
@@ -311,7 +359,11 @@
const int pic_height = xd->plane[plane].dst.height;
const int pic_width = xd->plane[plane].dst.width;
const int max_val = (1 << cm->seq_params.bit_depth) - 1;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_log2 = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y;
+#else
const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
+#endif
const int blk_size = 1 << blk_log2;
src_y += CCSO_PADDING_SIZE * ccso_stride_ext + CCSO_PADDING_SIZE;
for (int y = 0; y < pic_height; y += blk_size) {
@@ -324,12 +376,18 @@
#endif // CONFIG_CCSO_IMPROVE
src_y, dst_yuv, src_cls0, src_cls1, ccso_stride_ext, dst_stride,
ccso_stride, x, y, pic_width, pic_height, filter_offset, blk_size,
+#if CONFIG_CCSO_FU_BUGFIX
+ blk_size,
+#endif
// y_uv_scale in h and v shall be zero
0, 0, max_val, shift_bits, ccso_bo_only);
} else {
ccso_filter_block_hbd_with_buf(
src_y, dst_yuv, src_cls0, src_cls1, ccso_stride_ext, dst_stride,
ccso_stride, x, y, pic_width, pic_height, filter_offset, blk_size,
+#if CONFIG_CCSO_FU_BUGFIX
+ blk_size,
+#endif
// y_uv_scale in h and v shall be zero
0, 0, max_val, shift_bits, 0);
}
@@ -352,11 +410,18 @@
const int y_uv_hscale = xd->plane[plane].subsampling_x;
const int y_uv_vscale = xd->plane[plane].subsampling_y;
const int max_val = (1 << cm->seq_params.bit_depth) - 1;
- const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
- const int blk_size = 1 << blk_log2;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_log2_y = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y;
+ const int blk_log2_x = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x;
+#else
+ const int blk_log2_y = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
+ const int blk_log2_x = blk_log2_y;
+#endif
+ const int blk_size_y = 1 << blk_log2_y;
+ const int blk_size_x = 1 << blk_log2_x;
src_y += CCSO_PADDING_SIZE * ccso_stride_ext + CCSO_PADDING_SIZE;
- for (int y = 0; y < pic_height; y += blk_size) {
- for (int x = 0; x < pic_width; x += blk_size) {
+ for (int y = 0; y < pic_height; y += blk_size_y) {
+ for (int x = 0; x < pic_width; x += blk_size_x) {
if (ccso_bo_only) {
#if CONFIG_CCSO_IMPROVE
ccso_filter_block_hbd_with_buf_bo_only(
@@ -364,19 +429,27 @@
ccso_filter_block_hbd_with_buf_c(
#endif // CONFIG_CCSO_IMPROVE
src_y, dst_yuv, src_cls0, src_cls1, ccso_stride_ext, dst_stride,
- ccso_stride, x, y, pic_width, pic_height, filter_offset, blk_size,
+ ccso_stride, x, y, pic_width, pic_height, filter_offset,
+#if CONFIG_CCSO_FU_BUGFIX
+ blk_size_x,
+#endif
+ blk_size_y,
y_uv_hscale, y_uv_vscale, max_val, shift_bits, ccso_bo_only);
} else {
ccso_filter_block_hbd_with_buf(
src_y, dst_yuv, src_cls0, src_cls1, ccso_stride_ext, dst_stride,
- ccso_stride, x, y, pic_width, pic_height, filter_offset, blk_size,
+ ccso_stride, x, y, pic_width, pic_height, filter_offset,
+#if CONFIG_CCSO_FU_BUGFIX
+ blk_size_x,
+#endif
+ blk_size_y,
y_uv_hscale, y_uv_vscale, max_val, shift_bits, 0);
}
}
- dst_yuv += (dst_stride << blk_log2);
- src_y += (ccso_stride_ext << (blk_log2 + y_uv_vscale));
- src_cls0 += (ccso_stride << (blk_log2 + y_uv_vscale));
- src_cls1 += (ccso_stride << (blk_log2 + y_uv_vscale));
+ dst_yuv += (dst_stride << blk_log2_y);
+ src_y += (ccso_stride_ext << (blk_log2_y + y_uv_vscale));
+ src_cls0 += (ccso_stride << (blk_log2_y + y_uv_vscale));
+ src_cls1 += (ccso_stride << (blk_log2_y + y_uv_vscale));
}
}
@@ -427,8 +500,8 @@
(x >> log2_filter_unit_size_x)] = ssd;
*total_distortion += ssd;
}
- org += (org_stride << log2_filter_unit_size_x);
- rec16 += (rec_stride << log2_filter_unit_size_x);
+ org += (org_stride << log2_filter_unit_size_y);
+ rec16 += (rec_stride << log2_filter_unit_size_y);
}
}
@@ -484,9 +557,13 @@
uint64_t *cur_total_dist, int *cur_total_rate,
bool *filter_enable, const int rdmult) {
aom_cdf_prob ccso_cdf[CCSO_CONTEXT][CDF_SIZE(2)];
+#if CONFIG_CCSO_FU_BUGFIX
+ const int log2_filter_unit_size = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x;
+#else
const int log2_filter_unit_size =
plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + xd->plane[1].subsampling_x;
;
+#endif
const CommonModeInfoParams *const mi_params = &cm->mi_params;
const int ccso_nhfb =
((mi_params->mi_cols >> xd->plane[plane].subsampling_x) +
@@ -499,10 +576,15 @@
const int tile_cols = tiles->cols;
const int tile_rows = tiles->rows;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1;
+ const int blk_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1;
+#else
const int blk_size_y =
(1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y - MI_SIZE_LOG2)) - 1;
const int blk_size_x =
(1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_x - MI_SIZE_LOG2)) - 1;
+#endif
*cur_total_dist = 0;
@@ -590,9 +672,13 @@
bool *filter_enable, const int rdmult) {
(void)rdmult;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int log2_filter_unit_size = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x;
+#else
const int log2_filter_unit_size =
plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + xd->plane[1].subsampling_x;
;
+#endif
const CommonModeInfoParams *const mi_params = &cm->mi_params;
const int ccso_nhfb =
((mi_params->mi_cols >> xd->plane[plane].subsampling_x) +
@@ -605,10 +691,15 @@
const int tile_cols = tiles->cols;
const int tile_rows = tiles->rows;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1;
+ const int blk_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2)) - 1;
+#else
const int blk_size_y =
(1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y - MI_SIZE_LOG2)) - 1;
const int blk_size_x =
(1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_x - MI_SIZE_LOG2)) - 1;
+#endif
*cur_total_dist = 0;
*cur_total_rate = 0;
@@ -724,7 +815,11 @@
const int max_edge_interval,
const uint8_t ccso_bo_only) {
const CommonModeInfoParams *const mi_params = &cm->mi_params;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int blk_log2 = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y;
+#else
const int blk_log2 = plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + 1;
+#endif
const int nvfb = ((mi_params->mi_rows >> xd->plane[plane].subsampling_y) +
(1 << blk_log2 >> MI_SIZE_LOG2) - 1) /
(1 << blk_log2 >> MI_SIZE_LOG2);
@@ -879,10 +974,15 @@
#endif
) {
const CommonModeInfoParams *const mi_params = &cm->mi_params;
+#if CONFIG_CCSO_FU_BUGFIX
+ const int log2_filter_unit_size_y = CCSO_BLK_SIZE - xd->plane[plane].subsampling_y;
+ const int log2_filter_unit_size_x = CCSO_BLK_SIZE - xd->plane[plane].subsampling_x;
+#else
const int log2_filter_unit_size_y =
plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + xd->plane[1].subsampling_y;
const int log2_filter_unit_size_x =
plane > 0 ? CCSO_BLK_SIZE : CCSO_BLK_SIZE + xd->plane[1].subsampling_x;
+#endif
const int ccso_nvfb =
((mi_params->mi_rows >> xd->plane[plane].subsampling_y) +
@@ -1458,8 +1558,13 @@
const BLOCK_SIZE bsize = xd->mi[0]->sb_type[PLANE_TYPE_Y];
const int bw = mi_size_wide[bsize];
const int bh = mi_size_high[bsize];
+#if CONFIG_CCSO_FU_BUGFIX
+ const int log2_w = CCSO_BLK_SIZE;
+ const int log2_h = CCSO_BLK_SIZE;
+#else
const int log2_w = CCSO_BLK_SIZE + xd->plane[1].subsampling_x;
const int log2_h = CCSO_BLK_SIZE + xd->plane[1].subsampling_y;
+#endif
const int f_w = 1 << log2_w >> MI_SIZE_LOG2;
const int f_h = 1 << log2_h >> MI_SIZE_LOG2;
const int step_h = (bh + f_h - 1) / f_h;
@@ -1476,18 +1581,42 @@
cm->cur_frame->ccso_info.sb_filter_control[plane][sb_idx] =
ctx->final_filter_control[y_sb * ccso_nhfb + x_sb];
#endif // CONFIG_CCSO_IMPROVE
+#if CONFIG_CCSO_FU_BUGFIX
+ const int grid_idx_mbmi = (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) *
+ row * mi_params->mi_stride +
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col;
+ MB_MODE_INFO *const mbmi = mi_params->mi_grid_base[grid_idx_mbmi];
+#endif
if (plane == AOM_PLANE_Y) {
+#if CONFIG_CCSO_FU_BUGFIX
+ mbmi->ccso_blk_y = ctx->final_filter_control[y_sb * ccso_nhfb + x_sb];
+#else
mi_params
->mi_grid_base
[(1 << CCSO_BLK_SIZE >>
(MI_SIZE_LOG2 - xd->plane[1].subsampling_y)) *
row * mi_params->mi_stride +
(1 << CCSO_BLK_SIZE >>
- (MI_SIZE_LOG2 - xd->plane[1].subsampling_x)) *
+ (MI_SIZE_LOG2 - xd->plane[1].subsampling_x)) *
col]
->ccso_blk_y =
ctx->final_filter_control[y_sb * ccso_nhfb + x_sb];
+#endif
+#if CONFIG_CCSO_DEBUG && CONFIG_CCSO_FU_BUGFIX
+ printf("CCSO: [%d,%d] copy [%d] ccso_blk_y %d : 0x%p @ %s\n",
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * row,
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col,
+ sb_idx,
+ ctx->final_filter_control[y_sb * ccso_nhfb + x_sb],
+ mi_params->mi_grid_base[(1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) *
+ row * mi_params->mi_stride +
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col],
+ __FUNCTION__);
+#endif
} else if (plane == AOM_PLANE_U) {
+#if CONFIG_CCSO_FU_BUGFIX
+ mbmi->ccso_blk_u = ctx->final_filter_control[y_sb * ccso_nhfb + x_sb];
+#else
mi_params
->mi_grid_base
[(1 << CCSO_BLK_SIZE >>
@@ -1498,7 +1627,22 @@
col]
->ccso_blk_u =
ctx->final_filter_control[y_sb * ccso_nhfb + x_sb];
+#endif
+#if CONFIG_CCSO_DEBUG && CONFIG_CCSO_FU_BUGFIX
+ printf("CCSO: [%d,%d] copy [%d] ccso_blk_u %d : 0x%p @ %s\n",
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * row,
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col,
+ sb_idx,
+ ctx->final_filter_control[y_sb * ccso_nhfb + x_sb],
+ mi_params->mi_grid_base[(1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) *
+ row * mi_params->mi_stride +
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col],
+ __FUNCTION__);
+#endif
} else {
+#if CONFIG_CCSO_FU_BUGFIX
+ mbmi->ccso_blk_v = ctx->final_filter_control[y_sb * ccso_nhfb + x_sb];
+#else
mi_params
->mi_grid_base
[(1 << CCSO_BLK_SIZE >>
@@ -1509,8 +1653,27 @@
col]
->ccso_blk_v =
ctx->final_filter_control[y_sb * ccso_nhfb + x_sb];
+#endif
+#if CONFIG_CCSO_DEBUG && CONFIG_CCSO_FU_BUGFIX
+ printf("CCSO: [%d,%d] copy [%d] ccso_blk_v %d : 0x%p @ %s\n",
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * row,
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col,
+ sb_idx,
+ ctx->final_filter_control[y_sb * ccso_nhfb + x_sb],
+ mi_params->mi_grid_base[(1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) *
+ row * mi_params->mi_stride +
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col],
+ __FUNCTION__);
+#endif
}
#if CONFIG_CCSO_IMPROVE
+#if CONFIG_CCSO_FU_BUGFIX
+ const int ccso_mib_size_y = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2));
+ const int ccso_mib_size_x = (1 << (CCSO_BLK_SIZE - MI_SIZE_LOG2));
+
+ int mi_row = (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * row;
+ int mi_col = (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * col;
+#else
const int ccso_mib_size_y =
(1 << (CCSO_BLK_SIZE + xd->plane[1].subsampling_y -
MI_SIZE_LOG2));
@@ -1524,6 +1687,7 @@
int mi_col = (1 << CCSO_BLK_SIZE >>
(MI_SIZE_LOG2 - xd->plane[1].subsampling_x)) *
col;
+#endif
for (int j = 0;
j < AOMMIN(ccso_mib_size_y, cm->mi_params.mi_rows - mi_row);
j++) {
@@ -1572,18 +1736,45 @@
for (int y_sb = 0; y_sb < ccso_nvfb; y_sb++) {
for (int x_sb = 0; x_sb < ccso_nhfb; x_sb++) {
+#if CONFIG_CCSO_FU_BUGFIX
+ const int grid_idx = (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb *
+ mi_params->mi_stride +
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb;
+ MB_MODE_INFO *const mbmi = mi_params->mi_grid_base[grid_idx];
+#endif
if (plane == AOM_PLANE_Y) {
+#if CONFIG_CCSO_FU_BUGFIX
+ mbmi->ccso_blk_y =
+ ref_frame_ccso_info
+ ->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb];
+#else
mi_params
->mi_grid_base[(1 << CCSO_BLK_SIZE >>
- (MI_SIZE_LOG2 - xd->plane[1].subsampling_y)) *
+ (MI_SIZE_LOG2 - xd->plane[1].subsampling_y)) *
y_sb * mi_params->mi_stride +
(1 << CCSO_BLK_SIZE >>
- (MI_SIZE_LOG2 - xd->plane[1].subsampling_x)) *
+ (MI_SIZE_LOG2 - xd->plane[1].subsampling_x)) *
x_sb]
->ccso_blk_y =
ref_frame_ccso_info
->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb];
+#endif
+#if CONFIG_CCSO_DEBUG && CONFIG_CCSO_FU_BUGFIX
+ printf("CCSO: [%d,%d] copy [%d] ccso_blk_y %d : 0x%p @ %s\n",
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb,
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb,
+ y_sb * ccso_nhfb + x_sb,
+ ref_frame_ccso_info->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb],
+ mi_params->mi_grid_base[(1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb * mi_params->mi_stride +
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb],
+ __FUNCTION__);
+#endif
} else if (plane == AOM_PLANE_U) {
+#if CONFIG_CCSO_FU_BUGFIX
+ mbmi->ccso_blk_u =
+ ref_frame_ccso_info
+ ->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb];
+#else
mi_params
->mi_grid_base[(1 << CCSO_BLK_SIZE >>
(MI_SIZE_LOG2 - xd->plane[1].subsampling_y)) *
@@ -1594,7 +1785,23 @@
->ccso_blk_u =
ref_frame_ccso_info
->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb];
+#endif
+#if CONFIG_CCSO_DEBUG && CONFIG_CCSO_FU_BUGFIX
+ printf("CCSO: [%d,%d] copy [%d] ccso_blk_u %d : 0x%p @ %s\n",
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb,
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb,
+ y_sb * ccso_nhfb + x_sb,
+ ref_frame_ccso_info->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb],
+ mi_params->mi_grid_base[(1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb * mi_params->mi_stride +
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb],
+ __FUNCTION__);
+#endif
} else {
+#if CONFIG_CCSO_FU_BUGFIX
+ mbmi->ccso_blk_v =
+ ref_frame_ccso_info
+ ->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb];
+#else
mi_params
->mi_grid_base[(1 << CCSO_BLK_SIZE >>
(MI_SIZE_LOG2 - xd->plane[2].subsampling_y)) *
@@ -1605,6 +1812,17 @@
->ccso_blk_v =
ref_frame_ccso_info
->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb];
+#endif
+#if CONFIG_CCSO_DEBUG && CONFIG_CCSO_FU_BUGFIX
+ printf("CCSO: [%d,%d] copy [%d] ccso_blk_v %d : 0x%p @ %s\n",
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb,
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb,
+ y_sb * ccso_nhfb + x_sb,
+ ref_frame_ccso_info->sb_filter_control[plane][y_sb * ccso_nhfb + x_sb],
+ mi_params->mi_grid_base[(1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * y_sb * mi_params->mi_stride +
+ (1 << CCSO_BLK_SIZE >> MI_SIZE_LOG2) * x_sb],
+ __FUNCTION__);
+#endif
}
}
}
@@ -1643,6 +1861,12 @@
cm->cur_frame->ccso_info.reuse_root_ref[plane] =
ref_frame_ccso_info->reuse_root_ref[plane];
}
+#if CONFIG_CCSO_DEBUG
+ printf("CCSO: plane %d quant_idx %d ext_filter_support %d edge_clf %d ccso_bo_only %d max_band_log2 %d scale_idx %d @ %s\n",
+ plane, cm->ccso_info.quant_idx[plane], cm->ccso_info.ext_filter_support[plane],
+ cm->ccso_info.edge_clf[plane], cm->ccso_info.ccso_bo_only[plane],
+ cm->ccso_info.max_band_log2[plane], cm->ccso_info.scale_idx[plane], __FUNCTION__);
+#endif
} else {
cm->cur_frame->ccso_info.ccso_enable[plane] = 0;
}
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index 90bfc3b..4f1dcad 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -244,7 +244,7 @@
*width = block_size_wide[bsize];
*height = block_size_high[bsize];
*width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
- *height_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
+ *height_log2 = MI_SIZE_LOG2 + mi_size_high_log2[bsize];
}
/* Compute MSE only on the blocks we filtered. */
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 61cb8e2..6bd3618 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -358,6 +358,8 @@
set_aom_config_var(CONFIG_PARTITION_CONTEXT_REDUCE 1
"Enable to reduce partition contexts")
set_aom_config_var(CONFIG_CCSO_IMPROVE 1 "Enable CCSO improvements")
+set_aom_config_var(CONFIG_CCSO_DEBUG 0 "Enable CCSO debug")
+set_aom_config_var(CONFIG_CCSO_FU_BUGFIX 1 "Bugfix to CCS FU size")
set_aom_config_var(CONFIG_OPT_INTER_MODE_CTX 1
"Improvement of all inter mode related contexts")