CDEF cleanup Name changes and code moves to bring code more in line with the design doc and an upcoming single-pass patch. No functional changes. Change-Id: I2bccd58c644e534b139f420b623390aa971fbdb0
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl index 2efb016..91af8dd 100755 --- a/av1/common/av1_rtcd_defs.pl +++ b/av1/common/av1_rtcd_defs.pl
@@ -24,7 +24,6 @@ struct mv; union int_mv; struct yv12_buffer_config; -typedef uint16_t od_dering_in; EOF } forward_decls qw/av1_common_forward_decls/; @@ -561,9 +560,9 @@ add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd"; add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd"; add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd"; - add_proto qw/int od_dir_find8/, "const od_dering_in *img, int stride, int32_t *var, int coeff_shift"; - add_proto qw/void od_filter_dering_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping"; - add_proto qw/void od_filter_dering_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping"; + add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift"; + add_proto qw/void cdef_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping"; + add_proto qw/void cdef_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping"; add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride"; add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride"; @@ -580,9 +579,9 @@ specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/; specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/; specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/; - specialize qw/od_dir_find8 sse2 ssse3 sse4_1 neon/; - specialize qw/od_filter_dering_direction_4x4 sse2 ssse3 sse4_1 neon/; - specialize qw/od_filter_dering_direction_8x8 sse2 ssse3 sse4_1 neon/; + specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/; + specialize qw/cdef_direction_4x4 sse2 ssse3 sse4_1 neon/; + specialize qw/cdef_direction_8x8 sse2 ssse3 sse4_1 neon/; specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/; specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
diff --git a/av1/common/cdef.c b/av1/common/cdef.c index ba8abbb..8bb3874 100644 --- a/av1/common/cdef.c +++ b/av1/common/cdef.c
@@ -16,7 +16,7 @@ #include "./aom_scale_rtcd.h" #include "aom/aom_integer.h" #include "av1/common/cdef.h" -#include "av1/common/od_dering.h" +#include "av1/common/cdef_block.h" #include "av1/common/onyxc_int.h" #include "av1/common/reconinter.h" @@ -50,8 +50,8 @@ return is_skip; } -int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col, - dering_list *dlist, int filter_skip) { +int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col, + cdef_list *dlist, int filter_skip) { int r, c; int maxc, maxr; MODE_INFO **grid; @@ -156,82 +156,81 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd) { - int sbr, sbc; - int nhsb, nvsb; - uint16_t src[OD_DERING_INBUF_SIZE]; + int fbr, fbc; + int nhfb, nvfb; + uint16_t src[CDEF_INBUF_SIZE]; uint16_t *linebuf[3]; uint16_t *colbuf[3]; - dering_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64]; - unsigned char *row_dering, *prev_row_dering, *curr_row_dering; - int dering_count; - int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } }; - int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } }; + cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64]; + unsigned char *row_cdef, *prev_row_cdef, *curr_row_cdef; + int cdef_count; + int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; + int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; int stride; int mi_wide_l2[3]; int mi_high_l2[3]; int xdec[3]; int ydec[3]; int pli; - int dering_left; + int cdef_left; int coeff_shift = AOMMAX(cm->bit_depth - 8, 0); - int nplanes = 3; - int chroma_dering = - xd->plane[1].subsampling_x == xd->plane[1].subsampling_y && - xd->plane[2].subsampling_x == xd->plane[2].subsampling_y; - nvsb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; - nhsb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; + int nplanes = MAX_MB_PLANE; + int chroma_cdef = xd->plane[1].subsampling_x == xd->plane[1].subsampling_y && + xd->plane[2].subsampling_x == xd->plane[2].subsampling_y; + nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; + nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; av1_setup_dst_planes(xd->plane, cm->sb_size, frame, 0, 0); - row_dering = aom_malloc(sizeof(*row_dering) * (nhsb + 2) * 2); - memset(row_dering, 1, sizeof(*row_dering) * (nhsb + 2) * 2); - prev_row_dering = row_dering + 1; - curr_row_dering = prev_row_dering + nhsb + 2; + row_cdef = aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2); + memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2); + prev_row_cdef = row_cdef + 1; + curr_row_cdef = prev_row_cdef + nhfb + 2; for (pli = 0; pli < nplanes; pli++) { xdec[pli] = xd->plane[pli].subsampling_x; ydec[pli] = xd->plane[pli].subsampling_y; mi_wide_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_x; mi_high_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_y; } - stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * OD_FILT_HBORDER; + stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER; for (pli = 0; pli < nplanes; pli++) { - linebuf[pli] = aom_malloc(sizeof(*linebuf) * OD_FILT_VBORDER * stride); + linebuf[pli] = aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride); colbuf[pli] = aom_malloc(sizeof(*colbuf) * - ((MAX_SB_SIZE << mi_high_l2[pli]) + 2 * OD_FILT_VBORDER) * - OD_FILT_HBORDER); + ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) * + CDEF_HBORDER); } - for (sbr = 0; sbr < nvsb; sbr++) { + for (fbr = 0; fbr < nvfb; fbr++) { for (pli = 0; pli < nplanes; pli++) { const int block_height = - (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * OD_FILT_VBORDER; - fill_rect(colbuf[pli], OD_FILT_HBORDER, block_height, OD_FILT_HBORDER, - OD_DERING_VERY_LARGE); + (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * CDEF_VBORDER; + fill_rect(colbuf[pli], CDEF_HBORDER, block_height, CDEF_HBORDER, + CDEF_VERY_LARGE); } - dering_left = 1; - for (sbc = 0; sbc < nhsb; sbc++) { - int level, clpf_strength; - int uv_level, uv_clpf_strength; + cdef_left = 1; + for (fbc = 0; fbc < nhfb; fbc++) { + int level, sec_strength; + int uv_level, uv_sec_strength; int nhb, nvb; int cstart = 0; - curr_row_dering[sbc] = 0; - if (cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride + - MI_SIZE_64X64 * sbc] == NULL || - cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride + - MI_SIZE_64X64 * sbc] + curr_row_cdef[fbc] = 0; + if (cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride + + MI_SIZE_64X64 * fbc] == NULL || + cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride + + MI_SIZE_64X64 * fbc] ->mbmi.cdef_strength == -1) { - dering_left = 0; + cdef_left = 0; continue; } - if (!dering_left) cstart = -OD_FILT_HBORDER; - nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * sbc); - nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * sbr); + if (!cdef_left) cstart = -CDEF_HBORDER; + nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc); + nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr); int tile_top, tile_left, tile_bottom, tile_right; - int mi_idx = MI_SIZE_64X64 * sbr * cm->mi_stride + MI_SIZE_64X64 * sbc; + int mi_idx = MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc; MODE_INFO *const mi_tl = cm->mi + mi_idx; BOUNDARY_TYPE boundary_tl = mi_tl->mbmi.boundary_info; tile_top = boundary_tl & TILE_ABOVE_BOUNDARY; tile_left = boundary_tl & TILE_LEFT_BOUNDARY; - if (sbr != nvsb - 1 && + if (fbr != nvfb - 1 && (&cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride])) tile_bottom = cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride] .mbmi.boundary_info & @@ -239,197 +238,189 @@ else tile_bottom = 1; - if (sbc != nhsb - 1 && (&cm->mi[mi_idx + MI_SIZE_64X64 - 1])) + if (fbc != nhfb - 1 && (&cm->mi[mi_idx + MI_SIZE_64X64 - 1])) tile_right = cm->mi[mi_idx + MI_SIZE_64X64 - 1].mbmi.boundary_info & TILE_RIGHT_BOUNDARY; else tile_right = 1; const int mbmi_cdef_strength = - cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride + - MI_SIZE_64X64 * sbc] + cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride + + MI_SIZE_64X64 * fbc] ->mbmi.cdef_strength; - level = cm->cdef_strengths[mbmi_cdef_strength] / CLPF_STRENGTHS; - clpf_strength = cm->cdef_strengths[mbmi_cdef_strength] % CLPF_STRENGTHS; - clpf_strength += clpf_strength == 3; - uv_level = cm->cdef_uv_strengths[mbmi_cdef_strength] / CLPF_STRENGTHS; - uv_clpf_strength = - cm->cdef_uv_strengths[mbmi_cdef_strength] % CLPF_STRENGTHS; - uv_clpf_strength += uv_clpf_strength == 3; - if ((level == 0 && clpf_strength == 0 && uv_level == 0 && - uv_clpf_strength == 0) || - (dering_count = sb_compute_dering_list( - cm, sbr * MI_SIZE_64X64, sbc * MI_SIZE_64X64, dlist, + level = cm->cdef_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS; + sec_strength = + cm->cdef_strengths[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS; + sec_strength += sec_strength == 3; + uv_level = cm->cdef_uv_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS; + uv_sec_strength = + cm->cdef_uv_strengths[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS; + uv_sec_strength += uv_sec_strength == 3; + if ((level == 0 && sec_strength == 0 && uv_level == 0 && + uv_sec_strength == 0) || + (cdef_count = sb_compute_cdef_list( + cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, get_filter_skip(level) || get_filter_skip(uv_level))) == 0) { - dering_left = 0; + cdef_left = 0; continue; } - curr_row_dering[sbc] = 1; + curr_row_cdef[fbc] = 1; for (pli = 0; pli < nplanes; pli++) { - uint16_t dst[MAX_SB_SIZE * MAX_SB_SIZE]; + uint16_t dst[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE]; int coffset; int rend, cend; - int clpf_damping = cm->cdef_clpf_damping; - int dering_damping = cm->cdef_dering_damping; + int pri_damping = cm->cdef_pri_damping; + int sec_damping = cm->cdef_sec_damping; int hsize = nhb << mi_wide_l2[pli]; int vsize = nvb << mi_high_l2[pli]; if (pli) { - if (chroma_dering) + if (chroma_cdef) level = uv_level; else level = 0; - clpf_strength = uv_clpf_strength; + sec_strength = uv_sec_strength; } - if (sbc == nhsb - 1) + if (fbc == nhfb - 1) cend = hsize; else - cend = hsize + OD_FILT_HBORDER; + cend = hsize + CDEF_HBORDER; - if (sbr == nvsb - 1) + if (fbr == nvfb - 1) rend = vsize; else - rend = vsize + OD_FILT_VBORDER; + rend = vsize + CDEF_VBORDER; - coffset = sbc * MI_SIZE_64X64 << mi_wide_l2[pli]; - if (sbc == nhsb - 1) { + coffset = fbc * MI_SIZE_64X64 << mi_wide_l2[pli]; + if (fbc == nhfb - 1) { /* On the last superblock column, fill in the right border with - OD_DERING_VERY_LARGE to avoid filtering with the outside. */ - fill_rect(&src[cend + OD_FILT_HBORDER], OD_FILT_BSTRIDE, - rend + OD_FILT_VBORDER, hsize + OD_FILT_HBORDER - cend, - OD_DERING_VERY_LARGE); + CDEF_VERY_LARGE to avoid filtering with the outside. */ + fill_rect(&src[cend + CDEF_HBORDER], CDEF_BSTRIDE, + rend + CDEF_VBORDER, hsize + CDEF_HBORDER - cend, + CDEF_VERY_LARGE); } - if (sbr == nvsb - 1) { + if (fbr == nvfb - 1) { /* On the last superblock row, fill in the bottom border with - OD_DERING_VERY_LARGE to avoid filtering with the outside. */ - fill_rect(&src[(rend + OD_FILT_VBORDER) * OD_FILT_BSTRIDE], - OD_FILT_BSTRIDE, OD_FILT_VBORDER, - hsize + 2 * OD_FILT_HBORDER, OD_DERING_VERY_LARGE); + CDEF_VERY_LARGE to avoid filtering with the outside. */ + fill_rect(&src[(rend + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE, + CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE); } /* Copy in the pixels we need from the current superblock for deringing.*/ - copy_sb8_16( - cm, - &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER + cstart], - OD_FILT_BSTRIDE, xd->plane[pli].dst.buf, - (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr, coffset + cstart, - xd->plane[pli].dst.stride, rend, cend - cstart); - if (!prev_row_dering[sbc]) { - copy_sb8_16( - cm, &src[OD_FILT_HBORDER], OD_FILT_BSTRIDE, - xd->plane[pli].dst.buf, - (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER, - coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER, hsize); - } else if (sbr > 0) { - copy_rect(&src[OD_FILT_HBORDER], OD_FILT_BSTRIDE, - &linebuf[pli][coffset], stride, OD_FILT_VBORDER, hsize); + copy_sb8_16(cm, + &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart], + CDEF_BSTRIDE, xd->plane[pli].dst.buf, + (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr, coffset + cstart, + xd->plane[pli].dst.stride, rend, cend - cstart); + if (!prev_row_cdef[fbc]) { + copy_sb8_16(cm, &src[CDEF_HBORDER], CDEF_BSTRIDE, + xd->plane[pli].dst.buf, + (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER, + coffset, xd->plane[pli].dst.stride, CDEF_VBORDER, hsize); + } else if (fbr > 0) { + copy_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, &linebuf[pli][coffset], + stride, CDEF_VBORDER, hsize); } else { - fill_rect(&src[OD_FILT_HBORDER], OD_FILT_BSTRIDE, OD_FILT_VBORDER, - hsize, OD_DERING_VERY_LARGE); + fill_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize, + CDEF_VERY_LARGE); } - if (!prev_row_dering[sbc - 1]) { - copy_sb8_16( - cm, src, OD_FILT_BSTRIDE, xd->plane[pli].dst.buf, - (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER, - coffset - OD_FILT_HBORDER, xd->plane[pli].dst.stride, - OD_FILT_VBORDER, OD_FILT_HBORDER); - } else if (sbr > 0 && sbc > 0) { - copy_rect(src, OD_FILT_BSTRIDE, - &linebuf[pli][coffset - OD_FILT_HBORDER], stride, - OD_FILT_VBORDER, OD_FILT_HBORDER); + if (!prev_row_cdef[fbc - 1]) { + copy_sb8_16(cm, src, CDEF_BSTRIDE, xd->plane[pli].dst.buf, + (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER, + coffset - CDEF_HBORDER, xd->plane[pli].dst.stride, + CDEF_VBORDER, CDEF_HBORDER); + } else if (fbr > 0 && fbc > 0) { + copy_rect(src, CDEF_BSTRIDE, &linebuf[pli][coffset - CDEF_HBORDER], + stride, CDEF_VBORDER, CDEF_HBORDER); } else { - fill_rect(src, OD_FILT_BSTRIDE, OD_FILT_VBORDER, OD_FILT_HBORDER, - OD_DERING_VERY_LARGE); + fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, + CDEF_VERY_LARGE); } - if (!prev_row_dering[sbc + 1]) { - copy_sb8_16( - cm, &src[OD_FILT_HBORDER + (nhb << mi_wide_l2[pli])], - OD_FILT_BSTRIDE, xd->plane[pli].dst.buf, - (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER, - coffset + hsize, xd->plane[pli].dst.stride, OD_FILT_VBORDER, - OD_FILT_HBORDER); - } else if (sbr > 0 && sbc < nhsb - 1) { - copy_rect(&src[hsize + OD_FILT_HBORDER], OD_FILT_BSTRIDE, - &linebuf[pli][coffset + hsize], stride, OD_FILT_VBORDER, - OD_FILT_HBORDER); + if (!prev_row_cdef[fbc + 1]) { + copy_sb8_16(cm, &src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])], + CDEF_BSTRIDE, xd->plane[pli].dst.buf, + (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER, + coffset + hsize, xd->plane[pli].dst.stride, CDEF_VBORDER, + CDEF_HBORDER); + } else if (fbr > 0 && fbc < nhfb - 1) { + copy_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, + &linebuf[pli][coffset + hsize], stride, CDEF_VBORDER, + CDEF_HBORDER); } else { - fill_rect(&src[hsize + OD_FILT_HBORDER], OD_FILT_BSTRIDE, - OD_FILT_VBORDER, OD_FILT_HBORDER, OD_DERING_VERY_LARGE); + fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, + CDEF_HBORDER, CDEF_VERY_LARGE); } - if (dering_left) { + if (cdef_left) { /* If we deringed the superblock on the left then we need to copy in saved pixels. */ - copy_rect(src, OD_FILT_BSTRIDE, colbuf[pli], OD_FILT_HBORDER, - rend + OD_FILT_VBORDER, OD_FILT_HBORDER); + copy_rect(src, CDEF_BSTRIDE, colbuf[pli], CDEF_HBORDER, + rend + CDEF_VBORDER, CDEF_HBORDER); } /* Saving pixels in case we need to dering the superblock on the right. */ - copy_rect(colbuf[pli], OD_FILT_HBORDER, src + hsize, OD_FILT_BSTRIDE, - rend + OD_FILT_VBORDER, OD_FILT_HBORDER); + copy_rect(colbuf[pli], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE, + rend + CDEF_VBORDER, CDEF_HBORDER); copy_sb8_16( cm, &linebuf[pli][coffset], stride, xd->plane[pli].dst.buf, - (MI_SIZE_64X64 << mi_high_l2[pli]) * (sbr + 1) - OD_FILT_VBORDER, - coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER, hsize); + (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER, + coffset, xd->plane[pli].dst.stride, CDEF_VBORDER, hsize); if (tile_top) { - fill_rect(src, OD_FILT_BSTRIDE, OD_FILT_VBORDER, - hsize + 2 * OD_FILT_HBORDER, OD_DERING_VERY_LARGE); + fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, + CDEF_VERY_LARGE); } if (tile_left) { - fill_rect(src, OD_FILT_BSTRIDE, vsize + 2 * OD_FILT_VBORDER, - OD_FILT_HBORDER, OD_DERING_VERY_LARGE); + fill_rect(src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, + CDEF_VERY_LARGE); } if (tile_bottom) { - fill_rect(&src[(vsize + OD_FILT_VBORDER) * OD_FILT_BSTRIDE], - OD_FILT_BSTRIDE, OD_FILT_VBORDER, - hsize + 2 * OD_FILT_HBORDER, OD_DERING_VERY_LARGE); + fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE, + CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE); } if (tile_right) { - fill_rect(&src[hsize + OD_FILT_HBORDER], OD_FILT_BSTRIDE, - vsize + 2 * OD_FILT_VBORDER, OD_FILT_HBORDER, - OD_DERING_VERY_LARGE); + fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, + vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE); } #if CONFIG_HIGHBITDEPTH if (cm->use_highbitdepth) { - od_dering( + cdef_filter_fb( (uint8_t *)&CONVERT_TO_SHORTPTR( xd->plane[pli] .dst.buf)[xd->plane[pli].dst.stride * - (MI_SIZE_64X64 * sbr << mi_high_l2[pli]) + - (sbc * MI_SIZE_64X64 << mi_wide_l2[pli])], + (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) + + (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])], xd->plane[pli].dst.stride, dst, - &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER], - xdec[pli], ydec[pli], dir, NULL, var, pli, dlist, dering_count, - level, clpf_strength, clpf_damping, dering_damping, coeff_shift, - 0, 1); + &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli], + ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level, + sec_strength, sec_damping, pri_damping, coeff_shift, 0, 1); } else { #endif - od_dering(&xd->plane[pli] - .dst.buf[xd->plane[pli].dst.stride * - (MI_SIZE_64X64 * sbr << mi_high_l2[pli]) + - (sbc * MI_SIZE_64X64 << mi_wide_l2[pli])], - xd->plane[pli].dst.stride, dst, - &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER], - xdec[pli], ydec[pli], dir, NULL, var, pli, dlist, - dering_count, level, clpf_strength, clpf_damping, - dering_damping, coeff_shift, 0, 0); + cdef_filter_fb( + &xd->plane[pli] + .dst.buf[xd->plane[pli].dst.stride * + (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) + + (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])], + xd->plane[pli].dst.stride, dst, + &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli], + ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level, + sec_strength, sec_damping, pri_damping, coeff_shift, 0, 0); #if CONFIG_HIGHBITDEPTH } #endif } - dering_left = 1; + cdef_left = 1; } { unsigned char *tmp; - tmp = prev_row_dering; - prev_row_dering = curr_row_dering; - curr_row_dering = tmp; + tmp = prev_row_cdef; + prev_row_cdef = curr_row_cdef; + curr_row_cdef = tmp; } } - aom_free(row_dering); + aom_free(row_cdef); for (pli = 0; pli < nplanes; pli++) { aom_free(linebuf[pli]); aom_free(colbuf[pli]);
diff --git a/av1/common/cdef.h b/av1/common/cdef.h index a0dd0a6..acb609e 100644 --- a/av1/common/cdef.h +++ b/av1/common/cdef.h
@@ -8,20 +8,19 @@ * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ -#ifndef AV1_COMMON_DERING_H_ -#define AV1_COMMON_DERING_H_ +#ifndef AV1_COMMON_CDEF_H_ +#define AV1_COMMON_CDEF_H_ #define CDEF_STRENGTH_BITS 7 -#define DERING_STRENGTHS 32 -#define CLPF_STRENGTHS 4 +#define CDEF_PRI_STRENGTHS 32 +#define CDEF_SEC_STRENGTHS 4 #include "./aom_config.h" #include "aom/aom_integer.h" #include "aom_ports/mem.h" -#include "av1/common/od_dering.h" +#include "av1/common/cdef_block.h" #include "av1/common/onyxc_int.h" -#include "./od_dering.h" static INLINE int sign(int i) { return i < 0 ? -1 : 1; } @@ -40,8 +39,8 @@ #endif int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col); -int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col, - dering_list *dlist, int filter_skip); +int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col, + cdef_list *dlist, int filter_skip); void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd); void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, @@ -50,4 +49,4 @@ #ifdef __cplusplus } // extern "C" #endif -#endif // AV1_COMMON_DERING_H_ +#endif // AV1_COMMON_CDEF_H_
diff --git a/av1/common/od_dering.c b/av1/common/cdef_block.c similarity index 67% rename from av1/common/od_dering.c rename to av1/common/cdef_block.c index df4fb2a..3fe836a 100644 --- a/av1/common/od_dering.c +++ b/av1/common/cdef_block.c
@@ -21,17 +21,15 @@ #include "./cdef.h" /* Generated from gen_filter_tables.c. */ -const int OD_DIRECTION_OFFSETS_TABLE[8][3] = { - { -1 * OD_FILT_BSTRIDE + 1, -2 * OD_FILT_BSTRIDE + 2, - -3 * OD_FILT_BSTRIDE + 3 }, - { 0 * OD_FILT_BSTRIDE + 1, -1 * OD_FILT_BSTRIDE + 2, - -1 * OD_FILT_BSTRIDE + 3 }, - { 0 * OD_FILT_BSTRIDE + 1, 0 * OD_FILT_BSTRIDE + 2, 0 * OD_FILT_BSTRIDE + 3 }, - { 0 * OD_FILT_BSTRIDE + 1, 1 * OD_FILT_BSTRIDE + 2, 1 * OD_FILT_BSTRIDE + 3 }, - { 1 * OD_FILT_BSTRIDE + 1, 2 * OD_FILT_BSTRIDE + 2, 3 * OD_FILT_BSTRIDE + 3 }, - { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 1, 3 * OD_FILT_BSTRIDE + 1 }, - { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 0, 3 * OD_FILT_BSTRIDE + 0 }, - { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE - 1, 3 * OD_FILT_BSTRIDE - 1 }, +const int cdef_directions[8][3] = { + { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2, -3 * CDEF_BSTRIDE + 3 }, + { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2, -1 * CDEF_BSTRIDE + 3 }, + { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2, 0 * CDEF_BSTRIDE + 3 }, + { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2, 1 * CDEF_BSTRIDE + 3 }, + { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2, 3 * CDEF_BSTRIDE + 3 }, + { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1, 3 * CDEF_BSTRIDE + 1 }, + { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0, 3 * CDEF_BSTRIDE + 0 }, + { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1, 3 * CDEF_BSTRIDE - 1 } }; /* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on. @@ -41,8 +39,8 @@ in a particular direction. Since each direction have the same sum(x^2) term, that term is never computed. See Section 2, step 2, of: http://jmvalin.ca/notes/intra_paint.pdf */ -int od_dir_find8_c(const uint16_t *img, int stride, int32_t *var, - int coeff_shift) { +int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, + int coeff_shift) { int i; int32_t cost[8] = { 0 }; int partial[8][15] = { { 0 } }; @@ -113,9 +111,8 @@ } /* Smooth in the direction detected. */ -void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride, - const uint16_t *in, int threshold, - int dir, int damping) { +void cdef_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in, + int threshold, int dir, int damping) { int i; int j; int k; @@ -125,15 +122,13 @@ int16_t sum; int16_t xx; int16_t yy; - xx = in[i * OD_FILT_BSTRIDE + j]; + xx = in[i * CDEF_BSTRIDE + j]; sum = 0; for (k = 0; k < 3; k++) { int16_t p0; int16_t p1; - p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] - - xx; - p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] - - xx; + p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx; + p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx; sum += taps[k] * constrain(p0, threshold, damping); sum += taps[k] * constrain(p1, threshold, damping); } @@ -145,9 +140,8 @@ } /* Smooth in the direction detected. */ -void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride, - const uint16_t *in, int threshold, - int dir, int damping) { +void cdef_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in, + int threshold, int dir, int damping) { int i; int j; int k; @@ -157,15 +151,13 @@ int16_t sum; int16_t xx; int16_t yy; - xx = in[i * OD_FILT_BSTRIDE + j]; + xx = in[i * CDEF_BSTRIDE + j]; sum = 0; for (k = 0; k < 2; k++) { int16_t p0; int16_t p1; - p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] - - xx; - p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] - - xx; + p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx; + p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx; sum += taps[k] * constrain(p0, threshold, damping); sum += taps[k] * constrain(p1, threshold, damping); } @@ -176,16 +168,16 @@ } } -/* Compute deringing filter threshold for an 8x8 block based on the - directional variance difference. A high variance difference means that we - have a highly directional pattern (e.g. a high contrast edge), so we can - apply more deringing. A low variance means that we either have a low - contrast edge, or a non-directional texture, so we want to be careful not - to blur. */ -static INLINE int od_adjust_thresh(int threshold, int32_t var) { +/* Compute the primary filter strength for an 8x8 block based on the + directional variance difference. A high variance difference means + that we have a highly directional pattern (e.g. a high contrast + edge), so we can apply more deringing. A low variance means that we + either have a low contrast edge, or a non-directional texture, so + we want to be careful not to blur. */ +static INLINE int adjust_strength(int strength, int32_t var) { const int i = var >> 6 ? AOMMIN(get_msb(var >> 6), 12) : 0; - /* We use the variance of 8x8 blocks to adjust the threshold. */ - return var ? (threshold * (4 + i) + 8) >> 4 : 0; + /* We use the variance of 8x8 blocks to adjust the strength. */ + return var ? (strength * (4 + i) + 8) >> 4 : 0; } void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, @@ -202,20 +194,20 @@ for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j]; } -static void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, - uint16_t *src, dering_list *dlist, - int dering_count, int bsize) { +static void copy_block_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src, + cdef_list *dlist, int cdef_count, + int bsize) { int bi, bx, by; if (bsize == BLOCK_8X8) { - for (bi = 0; bi < dering_count; bi++) { + for (bi = 0; bi < cdef_count; bi++) { by = dlist[bi].by; bx = dlist[bi].bx; copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride, &src[bi << (3 + 3)], 8); } } else if (bsize == BLOCK_4X8) { - for (bi = 0; bi < dering_count; bi++) { + for (bi = 0; bi < cdef_count; bi++) { by = dlist[bi].by; bx = dlist[bi].bx; copy_4x4_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride, @@ -224,7 +216,7 @@ dstride, &src[(bi << (3 + 2)) + 4 * 4], 4); } } else if (bsize == BLOCK_8X4) { - for (bi = 0; bi < dering_count; bi++) { + for (bi = 0; bi < cdef_count; bi++) { by = dlist[bi].by; bx = dlist[bi].bx; copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride, @@ -234,7 +226,7 @@ } } else { assert(bsize == BLOCK_4X4); - for (bi = 0; bi < dering_count; bi++) { + for (bi = 0; bi < cdef_count; bi++) { by = dlist[bi].by; bx = dlist[bi].bx; copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride, @@ -259,19 +251,19 @@ dst[i * dstride + j] = (uint8_t)src[i * sstride + j]; } -static void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, - const uint16_t *src, dering_list *dlist, - int dering_count, int bsize) { +static void copy_block_16bit_to_8bit(uint8_t *dst, int dstride, + const uint16_t *src, cdef_list *dlist, + int cdef_count, int bsize) { int bi, bx, by; if (bsize == BLOCK_8X8) { - for (bi = 0; bi < dering_count; bi++) { + for (bi = 0; bi < cdef_count; bi++) { by = dlist[bi].by; bx = dlist[bi].bx; copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride, &src[bi << (3 + 3)], 8); } } else if (bsize == BLOCK_4X8) { - for (bi = 0; bi < dering_count; bi++) { + for (bi = 0; bi < cdef_count; bi++) { by = dlist[bi].by; bx = dlist[bi].bx; copy_4x4_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 2)], dstride, @@ -280,7 +272,7 @@ dstride, &src[(bi << (3 + 2)) + 4 * 4], 4); } } else if (bsize == BLOCK_8X4) { - for (bi = 0; bi < dering_count; bi++) { + for (bi = 0; bi < cdef_count; bi++) { by = dlist[bi].by; bx = dlist[bi].bx; copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3)], dstride, @@ -290,7 +282,7 @@ } } else { assert(bsize == BLOCK_4X4); - for (bi = 0; bi < dering_count; bi++) { + for (bi = 0; bi < cdef_count; bi++) { by = dlist[bi].by; bx = dlist[bi].bx; copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride, @@ -305,12 +297,12 @@ return filter_skip; } -void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec, - int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], - int *dirinit, int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], - int pli, dering_list *dlist, int dering_count, int level, - int clpf_strength, int clpf_damping, int dering_damping, - int coeff_shift, int skip_dering, int hbd) { +void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, + int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], + int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli, + cdef_list *dlist, int cdef_count, int level, + int sec_strength, int sec_damping, int pri_damping, + int coeff_shift, int skip_dering, int hbd) { int bi; int bx; int by; @@ -320,11 +312,10 @@ int filter_skip = get_filter_skip(level); if (level == 1) threshold = 31 << coeff_shift; - od_filter_dering_direction_func filter_dering_direction[] = { - od_filter_dering_direction_4x4, od_filter_dering_direction_8x8 - }; - clpf_damping += coeff_shift - (pli != AOM_PLANE_Y); - dering_damping += coeff_shift - (pli != AOM_PLANE_Y); + cdef_direction_func cdef_direction[] = { cdef_direction_4x4, + cdef_direction_8x8 }; + sec_damping += coeff_shift - (pli != AOM_PLANE_Y); + pri_damping += coeff_shift - (pli != AOM_PLANE_Y); bsize = ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8); bsizex = 3 - xdec; @@ -333,12 +324,11 @@ if (!skip_dering) { if (pli == 0) { if (!dirinit || !*dirinit) { - for (bi = 0; bi < dering_count; bi++) { + for (bi = 0; bi < cdef_count; bi++) { by = dlist[bi].by; bx = dlist[bi].bx; - dir[by][bx] = - od_dir_find8(&in[8 * by * OD_FILT_BSTRIDE + 8 * bx], - OD_FILT_BSTRIDE, &var[by][bx], coeff_shift); + dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx], + CDEF_BSTRIDE, &var[by][bx], coeff_shift); } if (dirinit) *dirinit = 1; } @@ -348,24 +338,23 @@ // something out in y[] later. if (threshold != 0) { assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4); - for (bi = 0; bi < dering_count; bi++) { + for (bi = 0; bi < cdef_count; bi++) { int t = !filter_skip && dlist[bi].skip ? 0 : threshold; by = dlist[bi].by; bx = dlist[bi].bx; - (filter_dering_direction[bsize == BLOCK_8X8])( + (cdef_direction[bsize == BLOCK_8X8])( &y[bi << (bsizex + bsizey)], 1 << bsizex, - &in[(by * OD_FILT_BSTRIDE << bsizey) + (bx << bsizex)], - pli ? t : od_adjust_thresh(t, var[by][bx]), dir[by][bx], - dering_damping); + &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)], + pli ? t : adjust_strength(t, var[by][bx]), dir[by][bx], + pri_damping); } } } - if (clpf_strength) { + if (sec_strength) { if (threshold && !skip_dering) - copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count, - bsize); - for (bi = 0; bi < dering_count; bi++) { + copy_block_16bit_to_16bit(in, CDEF_BSTRIDE, y, dlist, cdef_count, bsize); + for (bi = 0; bi < cdef_count; bi++) { by = dlist[bi].by; bx = dlist[bi].bx; int py = by << bsizey; @@ -378,31 +367,31 @@ : aom_clpf_hblock_hbd)( dst ? (uint16_t *)dst + py * dstride + px : &y[bi << (bsizex + bsizey)], - in + py * OD_FILT_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex, - OD_FILT_BSTRIDE, 1 << bsizex, 1 << bsizey, - clpf_strength << coeff_shift, clpf_damping); + in + py * CDEF_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex, + CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift, + sec_damping); } else { // Do clpf and write the result to an 8 bit destination (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block : aom_clpf_hblock)( - dst + py * dstride + px, in + py * OD_FILT_BSTRIDE + px, dstride, - OD_FILT_BSTRIDE, 1 << bsizex, 1 << bsizey, - clpf_strength << coeff_shift, clpf_damping); + dst + py * dstride + px, in + py * CDEF_BSTRIDE + px, dstride, + CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift, + sec_damping); } } } else if (threshold != 0) { // No clpf, so copy instead if (hbd) { - copy_dering_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist, - dering_count, bsize); + copy_block_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist, cdef_count, + bsize); } else { - copy_dering_16bit_to_8bit(dst, dstride, y, dlist, dering_count, bsize); + copy_block_16bit_to_8bit(dst, dstride, y, dlist, cdef_count, bsize); } } else if (dirinit) { // If we're here, both dering and clpf are off, and we still haven't written // anything to y[] yet, so we just copy the input to y[]. This is necessary // only for av1_cdef_search() and only av1_cdef_search() sets dirinit. - for (bi = 0; bi < dering_count; bi++) { + for (bi = 0; bi < cdef_count; bi++) { by = dlist[bi].by; bx = dlist[bi].bx; int iy, ix; @@ -410,7 +399,7 @@ for (iy = 0; iy < 1 << bsizey; iy++) for (ix = 0; ix < 1 << bsizex; ix++) y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] = - in[((by << bsizey) + iy) * OD_FILT_BSTRIDE + (bx << bsizex) + ix]; + in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix]; } } }
diff --git a/av1/common/cdef_block.h b/av1/common/cdef_block.h new file mode 100644 index 0000000..3891e2b --- /dev/null +++ b/av1/common/cdef_block.h
@@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if !defined(_CDEF_BLOCK_H) +#define _CDEF_BLOCK_H (1) + +#include "./odintrin.h" + +#define CDEF_BLOCKSIZE 64 +#define CDEF_BLOCKSIZE_LOG2 6 +#define CDEF_NBLOCKS (CDEF_BLOCKSIZE / 8) + +/* We need to buffer three vertical lines. */ +#define CDEF_VBORDER (3) +/* We only need to buffer three horizontal pixels too, but let's align to + 16 bytes (8 x 16 bits) to make vectorization easier. */ +#define CDEF_HBORDER (8) +#define CDEF_BSTRIDE ALIGN_POWER_OF_TWO(CDEF_BLOCKSIZE + 2 * CDEF_HBORDER, 3) + +#define CDEF_VERY_LARGE (30000) +#define CDEF_INBUF_SIZE (CDEF_BSTRIDE * (CDEF_BLOCKSIZE + 2 * CDEF_VBORDER)) + +extern const int cdef_directions[8][3]; + +typedef struct { + uint8_t by; + uint8_t bx; + uint8_t skip; +} cdef_list; + +typedef void (*cdef_direction_func)(uint16_t *y, int ystride, + const uint16_t *in, int threshold, int dir, + int damping); + +int get_filter_skip(int level); + +void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, + int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], + int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli, + cdef_list *dlist, int cdef_count, int level, + int sec_strength, int sec_damping, int pri_damping, + int coeff_shift, int skip_dering, int hbd); +#endif
diff --git a/av1/common/od_dering_neon.c b/av1/common/cdef_block_neon.c similarity index 94% rename from av1/common/od_dering_neon.c rename to av1/common/cdef_block_neon.c index 9944105..030b325 100644 --- a/av1/common/od_dering_neon.c +++ b/av1/common/cdef_block_neon.c
@@ -11,4 +11,4 @@ #include "aom_dsp/aom_simd.h" #define SIMD_FUNC(name) name##_neon -#include "./od_dering_simd.h" +#include "./cdef_block_simd.h"
diff --git a/av1/common/od_dering_simd.h b/av1/common/cdef_block_simd.h similarity index 80% rename from av1/common/od_dering_simd.h rename to av1/common/cdef_block_simd.h index 4074e7e..358d919 100644 --- a/av1/common/od_dering_simd.h +++ b/av1/common/cdef_block_simd.h
@@ -10,8 +10,7 @@ */ #include "./av1_rtcd.h" -#include "./cdef_simd.h" -#include "./od_dering.h" +#include "./cdef_block.h" /* partial A is a 16-bit vector of the form: [x8 x7 x6 x5 x4 x3 x2 x1] and partial B has the form: @@ -155,8 +154,8 @@ res[0] = v128_ziphi_64(tr1_7, tr1_6); } -int SIMD_FUNC(od_dir_find8)(const od_dering_in *img, int stride, int32_t *var, - int coeff_shift) { +int SIMD_FUNC(cdef_find_dir)(const uint16_t *img, int stride, int32_t *var, + int coeff_shift) { int i; int32_t cost[8]; int32_t best_cost = 0; @@ -211,42 +210,51 @@ return best_dir; } -void SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride, - const uint16_t *in, - int threshold, int dir, - int damping) { +// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp))) +SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold, + unsigned int adjdamp) { + v128 diff = v128_sub_16(a, b); + const v128 sign = v128_shr_n_s16(diff, 15); + diff = v128_abs_s16(diff); + const v128 s = + v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp)); + return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign); +} + +void SIMD_FUNC(cdef_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in, + int threshold, int dir, int damping) { int i; v128 p0, p1, sum, row, res; - int o1 = OD_DIRECTION_OFFSETS_TABLE[dir][0]; - int o2 = OD_DIRECTION_OFFSETS_TABLE[dir][1]; + int o1 = cdef_directions[dir][0]; + int o2 = cdef_directions[dir][1]; if (threshold) damping -= get_msb(threshold); for (i = 0; i < 4; i += 2) { sum = v128_zero(); - row = v128_from_v64(v64_load_aligned(&in[i * OD_FILT_BSTRIDE]), - v64_load_aligned(&in[(i + 1) * OD_FILT_BSTRIDE])); + row = v128_from_v64(v64_load_aligned(&in[i * CDEF_BSTRIDE]), + v64_load_aligned(&in[(i + 1) * CDEF_BSTRIDE])); - // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping) - p0 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE + o1]), - v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE + o1])); + // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping) + p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o1]), + v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o1])); p0 = constrain16(p0, row, threshold, damping); - // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping) - p1 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE - o1]), - v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE - o1])); + // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping) + p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o1]), + v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o1])); p1 = constrain16(p1, row, threshold, damping); // sum += 4 * (p0 + p1) sum = v128_add_16(sum, v128_shl_n_16(v128_add_16(p0, p1), 2)); - // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping) - p0 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE + o2]), - v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE + o2])); + // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping) + p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o2]), + v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o2])); p0 = constrain16(p0, row, threshold, damping); - // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping) - p1 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE - o2]), - v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE - o2])); + // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping) + p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o2]), + v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o2])); p1 = constrain16(p1, row, threshold, damping); // sum += 1 * (p0 + p1) @@ -261,27 +269,25 @@ } } -void SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride, - const uint16_t *in, - int threshold, int dir, - int damping) { +void SIMD_FUNC(cdef_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in, + int threshold, int dir, int damping) { int i; v128 sum, p0, p1, row, res; - int o1 = OD_DIRECTION_OFFSETS_TABLE[dir][0]; - int o2 = OD_DIRECTION_OFFSETS_TABLE[dir][1]; - int o3 = OD_DIRECTION_OFFSETS_TABLE[dir][2]; + int o1 = cdef_directions[dir][0]; + int o2 = cdef_directions[dir][1]; + int o3 = cdef_directions[dir][2]; if (threshold) damping -= get_msb(threshold); for (i = 0; i < 8; i++) { sum = v128_zero(); - row = v128_load_aligned(&in[i * OD_FILT_BSTRIDE]); + row = v128_load_aligned(&in[i * CDEF_BSTRIDE]); - // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping) - p0 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE + o1]); + // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping) + p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o1]); p0 = constrain16(p0, row, threshold, damping); - // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping) - p1 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE - o1]); + // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping) + p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o1]); p1 = constrain16(p1, row, threshold, damping); // sum += 3 * (p0 + p1) @@ -289,24 +295,24 @@ p0 = v128_add_16(p0, v128_shl_n_16(p0, 1)); sum = v128_add_16(sum, p0); - // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping) - p0 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE + o2]); + // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping) + p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o2]); p0 = constrain16(p0, row, threshold, damping); - // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping) - p1 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE - o2]); + // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping) + p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o2]); p1 = constrain16(p1, row, threshold, damping); // sum += 2 * (p0 + p1) p0 = v128_shl_n_16(v128_add_16(p0, p1), 1); sum = v128_add_16(sum, p0); - // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping) - p0 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE + o3]); + // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping) + p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o3]); p0 = constrain16(p0, row, threshold, damping); - // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping) - p1 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE - o3]); + // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping) + p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o3]); p1 = constrain16(p1, row, threshold, damping); // sum += (p0 + p1)
diff --git a/av1/common/od_dering_sse2.c b/av1/common/cdef_block_sse2.c similarity index 94% rename from av1/common/od_dering_sse2.c rename to av1/common/cdef_block_sse2.c index 8a2a62f..f3de763 100644 --- a/av1/common/od_dering_sse2.c +++ b/av1/common/cdef_block_sse2.c
@@ -11,4 +11,4 @@ #include "aom_dsp/aom_simd.h" #define SIMD_FUNC(name) name##_sse2 -#include "./od_dering_simd.h" +#include "./cdef_block_simd.h"
diff --git a/av1/common/od_dering_sse4.c b/av1/common/cdef_block_sse4.c similarity index 94% rename from av1/common/od_dering_sse4.c rename to av1/common/cdef_block_sse4.c index 0769db9..27e9ff3 100644 --- a/av1/common/od_dering_sse4.c +++ b/av1/common/cdef_block_sse4.c
@@ -11,4 +11,4 @@ #include "aom_dsp/aom_simd.h" #define SIMD_FUNC(name) name##_sse4_1 -#include "./od_dering_simd.h" +#include "./cdef_block_simd.h"
diff --git a/av1/common/od_dering_ssse3.c b/av1/common/cdef_block_ssse3.c similarity index 94% rename from av1/common/od_dering_ssse3.c rename to av1/common/cdef_block_ssse3.c index 99df62b..8635221 100644 --- a/av1/common/od_dering_ssse3.c +++ b/av1/common/cdef_block_ssse3.c
@@ -11,4 +11,4 @@ #include "aom_dsp/aom_simd.h" #define SIMD_FUNC(name) name##_ssse3 -#include "./od_dering_simd.h" +#include "./cdef_block_simd.h"
diff --git a/av1/common/cdef_simd.h b/av1/common/cdef_simd.h deleted file mode 100644 index 2649099..0000000 --- a/av1/common/cdef_simd.h +++ /dev/null
@@ -1,27 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#ifndef AV1_COMMON_CDEF_SIMD_H_ -#define AV1_COMMON_CDEF_SIMD_H_ - -#include "aom_dsp/aom_simd.h" - -// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp))) -SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold, - unsigned int adjdamp) { - v128 diff = v128_sub_16(a, b); - const v128 sign = v128_shr_n_s16(diff, 15); - diff = v128_abs_s16(diff); - const v128 s = - v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp)); - return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign); -} - -#endif // AV1_COMMON_CDEF_SIMD_H_
diff --git a/av1/common/clpf_simd.h b/av1/common/clpf_simd.h index a615b5e..c7ffc56 100644 --- a/av1/common/clpf_simd.h +++ b/av1/common/clpf_simd.h
@@ -10,10 +10,20 @@ */ #include "./av1_rtcd.h" -#include "./cdef_simd.h" #include "aom_ports/bitops.h" #include "aom_ports/mem.h" +// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp))) +SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold, + unsigned int adjdamp) { + v128 diff = v128_sub_16(a, b); + const v128 sign = v128_shr_n_s16(diff, 15); + diff = v128_abs_s16(diff); + const v128 s = + v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp)); + return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign); +} + // sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp))) SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength, unsigned int adjdamp) {
diff --git a/av1/common/od_dering.h b/av1/common/od_dering.h deleted file mode 100644 index 031112b..0000000 --- a/av1/common/od_dering.h +++ /dev/null
@@ -1,51 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#if !defined(_dering_H) -#define _dering_H (1) - -#include "odintrin.h" - -#define OD_DERING_NBLOCKS (MAX_SB_SIZE / 8) - -/* We need to buffer three vertical lines. */ -#define OD_FILT_VBORDER (3) -/* We only need to buffer three horizontal pixels too, but let's align to - 16 bytes (8 x 16 bits) to make vectorization easier. */ -#define OD_FILT_HBORDER (8) -#define OD_FILT_BSTRIDE ALIGN_POWER_OF_TWO(MAX_SB_SIZE + 2 * OD_FILT_HBORDER, 3) - -#define OD_DERING_VERY_LARGE (30000) -#define OD_DERING_INBUF_SIZE \ - (OD_FILT_BSTRIDE * (MAX_SB_SIZE + 2 * OD_FILT_VBORDER)) - -extern const int OD_DIRECTION_OFFSETS_TABLE[8][3]; - -typedef struct { - uint8_t by; - uint8_t bx; - uint8_t skip; -} dering_list; - -typedef void (*od_filter_dering_direction_func)(uint16_t *y, int ystride, - const uint16_t *in, - int threshold, int dir, - int damping); - -int get_filter_skip(int level); - -void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec, - int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], - int *dirinit, int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], - int pli, dering_list *dlist, int dering_count, int level, - int clpf_strength, int clpf_damping, int dering_damping, - int coeff_shift, int skip_dering, int hbd); -#endif
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h index cc0f7c8..2896deb 100644 --- a/av1/common/onyxc_int.h +++ b/av1/common/onyxc_int.h
@@ -425,8 +425,8 @@ int mib_size; // Size of the superblock in units of MI blocks int mib_size_log2; // Log 2 of above. #if CONFIG_CDEF - int cdef_dering_damping; - int cdef_clpf_damping; + int cdef_pri_damping; + int cdef_sec_damping; int nb_cdef_strengths; int cdef_strengths[CDEF_MAX_STRENGTHS]; int cdef_uv_strengths[CDEF_MAX_STRENGTHS];