CDEF cleanup
Name changes and code moves to bring code more in line with the
design doc and an upcoming single-pass patch. No functional changes.
Change-Id: I2bccd58c644e534b139f420b623390aa971fbdb0
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 945166b..4472019 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -236,12 +236,11 @@
${AOM_AV1_COMMON_SOURCES}
"${AOM_ROOT}/av1/common/clpf.c"
"${AOM_ROOT}/av1/common/clpf_simd.h"
- "${AOM_ROOT}/av1/common/cdef_simd.h"
"${AOM_ROOT}/av1/common/cdef.c"
"${AOM_ROOT}/av1/common/cdef.h"
- "${AOM_ROOT}/av1/common/od_dering.c"
- "${AOM_ROOT}/av1/common/od_dering.h"
- "${AOM_ROOT}/av1/common/od_dering_simd.h")
+ "${AOM_ROOT}/av1/common/cdef_block.c"
+ "${AOM_ROOT}/av1/common/cdef_block.h"
+ "${AOM_ROOT}/av1/common/cdef_block_simd.h")
set(AOM_AV1_ENCODER_SOURCES
${AOM_AV1_ENCODER_SOURCES}
@@ -250,22 +249,22 @@
set(AOM_AV1_COMMON_INTRIN_SSE2
${AOM_AV1_COMMON_INTRIN_SSE2}
"${AOM_ROOT}/av1/common/clpf_sse2.c"
- "${AOM_ROOT}/av1/common/od_dering_sse2.c")
+ "${AOM_ROOT}/av1/common/cdef_block_sse2.c")
set(AOM_AV1_COMMON_INTRIN_SSSE3
${AOM_AV1_COMMON_INTRIN_SSSE3}
"${AOM_ROOT}/av1/common/clpf_ssse3.c"
- "${AOM_ROOT}/av1/common/od_dering_ssse3.c")
+ "${AOM_ROOT}/av1/common/cdef_block_ssse3.c")
set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/clpf_sse4.c"
- "${AOM_ROOT}/av1/common/od_dering_sse4.c")
+ "${AOM_ROOT}/av1/common/cdef_block_sse4.c")
set(AOM_AV1_COMMON_INTRIN_NEON
${AOM_AV1_COMMON_INTRIN_NEON}
"${AOM_ROOT}/av1/common/clpf_neon.c"
- "${AOM_ROOT}/av1/common/od_dering_neon.c")
+ "${AOM_ROOT}/av1/common/cdef_block_neon.c")
endif ()
if (CONFIG_CONVOLVE_ROUND)
diff --git a/av1/av1_common.mk b/av1/av1_common.mk
index a8ba720..5411229 100644
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk
@@ -92,18 +92,17 @@
ifeq ($(CONFIG_CDEF),yes)
AV1_COMMON_SRCS-yes += common/clpf.c
AV1_COMMON_SRCS-yes += common/clpf_simd.h
-AV1_COMMON_SRCS-yes += common/cdef_simd.h
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/clpf_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/clpf_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/clpf_sse4.c
AV1_COMMON_SRCS-$(HAVE_NEON) += common/clpf_neon.c
-AV1_COMMON_SRCS-$(HAVE_SSE2) += common/od_dering_sse2.c
-AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/od_dering_ssse3.c
-AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/od_dering_sse4.c
-AV1_COMMON_SRCS-$(HAVE_NEON) += common/od_dering_neon.c
-AV1_COMMON_SRCS-yes += common/od_dering.c
-AV1_COMMON_SRCS-yes += common/od_dering.h
-AV1_COMMON_SRCS-yes += common/od_dering_simd.h
+AV1_COMMON_SRCS-$(HAVE_SSE2) += common/cdef_block_sse2.c
+AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/cdef_block_ssse3.c
+AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/cdef_block_sse4.c
+AV1_COMMON_SRCS-$(HAVE_NEON) += common/cdef_block_neon.c
+AV1_COMMON_SRCS-yes += common/cdef_block.c
+AV1_COMMON_SRCS-yes += common/cdef_block.h
+AV1_COMMON_SRCS-yes += common/cdef_block_simd.h
AV1_COMMON_SRCS-yes += common/cdef.c
AV1_COMMON_SRCS-yes += common/cdef.h
endif
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 2efb016..91af8dd 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -24,7 +24,6 @@
struct mv;
union int_mv;
struct yv12_buffer_config;
-typedef uint16_t od_dering_in;
EOF
}
forward_decls qw/av1_common_forward_decls/;
@@ -561,9 +560,9 @@
add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
- add_proto qw/int od_dir_find8/, "const od_dering_in *img, int stride, int32_t *var, int coeff_shift";
- add_proto qw/void od_filter_dering_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
- add_proto qw/void od_filter_dering_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
+ add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift";
+ add_proto qw/void cdef_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
+ add_proto qw/void cdef_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
@@ -580,9 +579,9 @@
specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/;
- specialize qw/od_dir_find8 sse2 ssse3 sse4_1 neon/;
- specialize qw/od_filter_dering_direction_4x4 sse2 ssse3 sse4_1 neon/;
- specialize qw/od_filter_dering_direction_8x8 sse2 ssse3 sse4_1 neon/;
+ specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
+ specialize qw/cdef_direction_4x4 sse2 ssse3 sse4_1 neon/;
+ specialize qw/cdef_direction_8x8 sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
diff --git a/av1/common/cdef.c b/av1/common/cdef.c
index ba8abbb..8bb3874 100644
--- a/av1/common/cdef.c
+++ b/av1/common/cdef.c
@@ -16,7 +16,7 @@
#include "./aom_scale_rtcd.h"
#include "aom/aom_integer.h"
#include "av1/common/cdef.h"
-#include "av1/common/od_dering.h"
+#include "av1/common/cdef_block.h"
#include "av1/common/onyxc_int.h"
#include "av1/common/reconinter.h"
@@ -50,8 +50,8 @@
return is_skip;
}
-int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
- dering_list *dlist, int filter_skip) {
+int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
+ cdef_list *dlist, int filter_skip) {
int r, c;
int maxc, maxr;
MODE_INFO **grid;
@@ -156,82 +156,81 @@
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
MACROBLOCKD *xd) {
- int sbr, sbc;
- int nhsb, nvsb;
- uint16_t src[OD_DERING_INBUF_SIZE];
+ int fbr, fbc;
+ int nhfb, nvfb;
+ uint16_t src[CDEF_INBUF_SIZE];
uint16_t *linebuf[3];
uint16_t *colbuf[3];
- dering_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
- unsigned char *row_dering, *prev_row_dering, *curr_row_dering;
- int dering_count;
- int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
- int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
+ cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
+ unsigned char *row_cdef, *prev_row_cdef, *curr_row_cdef;
+ int cdef_count;
+ int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
+ int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
int stride;
int mi_wide_l2[3];
int mi_high_l2[3];
int xdec[3];
int ydec[3];
int pli;
- int dering_left;
+ int cdef_left;
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
- int nplanes = 3;
- int chroma_dering =
- xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
- xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
- nvsb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- nhsb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+ int nplanes = MAX_MB_PLANE;
+ int chroma_cdef = xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
+ xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
+ nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+ nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
av1_setup_dst_planes(xd->plane, cm->sb_size, frame, 0, 0);
- row_dering = aom_malloc(sizeof(*row_dering) * (nhsb + 2) * 2);
- memset(row_dering, 1, sizeof(*row_dering) * (nhsb + 2) * 2);
- prev_row_dering = row_dering + 1;
- curr_row_dering = prev_row_dering + nhsb + 2;
+ row_cdef = aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2);
+ memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2);
+ prev_row_cdef = row_cdef + 1;
+ curr_row_cdef = prev_row_cdef + nhfb + 2;
for (pli = 0; pli < nplanes; pli++) {
xdec[pli] = xd->plane[pli].subsampling_x;
ydec[pli] = xd->plane[pli].subsampling_y;
mi_wide_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
mi_high_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
}
- stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * OD_FILT_HBORDER;
+ stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
for (pli = 0; pli < nplanes; pli++) {
- linebuf[pli] = aom_malloc(sizeof(*linebuf) * OD_FILT_VBORDER * stride);
+ linebuf[pli] = aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
colbuf[pli] =
aom_malloc(sizeof(*colbuf) *
- ((MAX_SB_SIZE << mi_high_l2[pli]) + 2 * OD_FILT_VBORDER) *
- OD_FILT_HBORDER);
+ ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) *
+ CDEF_HBORDER);
}
- for (sbr = 0; sbr < nvsb; sbr++) {
+ for (fbr = 0; fbr < nvfb; fbr++) {
for (pli = 0; pli < nplanes; pli++) {
const int block_height =
- (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * OD_FILT_VBORDER;
- fill_rect(colbuf[pli], OD_FILT_HBORDER, block_height, OD_FILT_HBORDER,
- OD_DERING_VERY_LARGE);
+ (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * CDEF_VBORDER;
+ fill_rect(colbuf[pli], CDEF_HBORDER, block_height, CDEF_HBORDER,
+ CDEF_VERY_LARGE);
}
- dering_left = 1;
- for (sbc = 0; sbc < nhsb; sbc++) {
- int level, clpf_strength;
- int uv_level, uv_clpf_strength;
+ cdef_left = 1;
+ for (fbc = 0; fbc < nhfb; fbc++) {
+ int level, sec_strength;
+ int uv_level, uv_sec_strength;
int nhb, nvb;
int cstart = 0;
- curr_row_dering[sbc] = 0;
- if (cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
- MI_SIZE_64X64 * sbc] == NULL ||
- cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
- MI_SIZE_64X64 * sbc]
+ curr_row_cdef[fbc] = 0;
+ if (cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
+ MI_SIZE_64X64 * fbc] == NULL ||
+ cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
+ MI_SIZE_64X64 * fbc]
->mbmi.cdef_strength == -1) {
- dering_left = 0;
+ cdef_left = 0;
continue;
}
- if (!dering_left) cstart = -OD_FILT_HBORDER;
- nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * sbc);
- nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * sbr);
+ if (!cdef_left) cstart = -CDEF_HBORDER;
+ nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
+ nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
int tile_top, tile_left, tile_bottom, tile_right;
- int mi_idx = MI_SIZE_64X64 * sbr * cm->mi_stride + MI_SIZE_64X64 * sbc;
+ int mi_idx = MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
MODE_INFO *const mi_tl = cm->mi + mi_idx;
BOUNDARY_TYPE boundary_tl = mi_tl->mbmi.boundary_info;
tile_top = boundary_tl & TILE_ABOVE_BOUNDARY;
tile_left = boundary_tl & TILE_LEFT_BOUNDARY;
- if (sbr != nvsb - 1 &&
+ if (fbr != nvfb - 1 &&
(&cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride]))
tile_bottom = cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride]
.mbmi.boundary_info &
@@ -239,197 +238,189 @@
else
tile_bottom = 1;
- if (sbc != nhsb - 1 && (&cm->mi[mi_idx + MI_SIZE_64X64 - 1]))
+ if (fbc != nhfb - 1 && (&cm->mi[mi_idx + MI_SIZE_64X64 - 1]))
tile_right = cm->mi[mi_idx + MI_SIZE_64X64 - 1].mbmi.boundary_info &
TILE_RIGHT_BOUNDARY;
else
tile_right = 1;
const int mbmi_cdef_strength =
- cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
- MI_SIZE_64X64 * sbc]
+ cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
+ MI_SIZE_64X64 * fbc]
->mbmi.cdef_strength;
- level = cm->cdef_strengths[mbmi_cdef_strength] / CLPF_STRENGTHS;
- clpf_strength = cm->cdef_strengths[mbmi_cdef_strength] % CLPF_STRENGTHS;
- clpf_strength += clpf_strength == 3;
- uv_level = cm->cdef_uv_strengths[mbmi_cdef_strength] / CLPF_STRENGTHS;
- uv_clpf_strength =
- cm->cdef_uv_strengths[mbmi_cdef_strength] % CLPF_STRENGTHS;
- uv_clpf_strength += uv_clpf_strength == 3;
- if ((level == 0 && clpf_strength == 0 && uv_level == 0 &&
- uv_clpf_strength == 0) ||
- (dering_count = sb_compute_dering_list(
- cm, sbr * MI_SIZE_64X64, sbc * MI_SIZE_64X64, dlist,
+ level = cm->cdef_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
+ sec_strength =
+ cm->cdef_strengths[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
+ sec_strength += sec_strength == 3;
+ uv_level = cm->cdef_uv_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
+ uv_sec_strength =
+ cm->cdef_uv_strengths[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
+ uv_sec_strength += uv_sec_strength == 3;
+ if ((level == 0 && sec_strength == 0 && uv_level == 0 &&
+ uv_sec_strength == 0) ||
+ (cdef_count = sb_compute_cdef_list(
+ cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist,
get_filter_skip(level) || get_filter_skip(uv_level))) == 0) {
- dering_left = 0;
+ cdef_left = 0;
continue;
}
- curr_row_dering[sbc] = 1;
+ curr_row_cdef[fbc] = 1;
for (pli = 0; pli < nplanes; pli++) {
- uint16_t dst[MAX_SB_SIZE * MAX_SB_SIZE];
+ uint16_t dst[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE];
int coffset;
int rend, cend;
- int clpf_damping = cm->cdef_clpf_damping;
- int dering_damping = cm->cdef_dering_damping;
+ int pri_damping = cm->cdef_pri_damping;
+ int sec_damping = cm->cdef_sec_damping;
int hsize = nhb << mi_wide_l2[pli];
int vsize = nvb << mi_high_l2[pli];
if (pli) {
- if (chroma_dering)
+ if (chroma_cdef)
level = uv_level;
else
level = 0;
- clpf_strength = uv_clpf_strength;
+ sec_strength = uv_sec_strength;
}
- if (sbc == nhsb - 1)
+ if (fbc == nhfb - 1)
cend = hsize;
else
- cend = hsize + OD_FILT_HBORDER;
+ cend = hsize + CDEF_HBORDER;
- if (sbr == nvsb - 1)
+ if (fbr == nvfb - 1)
rend = vsize;
else
- rend = vsize + OD_FILT_VBORDER;
+ rend = vsize + CDEF_VBORDER;
- coffset = sbc * MI_SIZE_64X64 << mi_wide_l2[pli];
- if (sbc == nhsb - 1) {
+ coffset = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
+ if (fbc == nhfb - 1) {
/* On the last superblock column, fill in the right border with
- OD_DERING_VERY_LARGE to avoid filtering with the outside. */
- fill_rect(&src[cend + OD_FILT_HBORDER], OD_FILT_BSTRIDE,
- rend + OD_FILT_VBORDER, hsize + OD_FILT_HBORDER - cend,
- OD_DERING_VERY_LARGE);
+ CDEF_VERY_LARGE to avoid filtering with the outside. */
+ fill_rect(&src[cend + CDEF_HBORDER], CDEF_BSTRIDE,
+ rend + CDEF_VBORDER, hsize + CDEF_HBORDER - cend,
+ CDEF_VERY_LARGE);
}
- if (sbr == nvsb - 1) {
+ if (fbr == nvfb - 1) {
/* On the last superblock row, fill in the bottom border with
- OD_DERING_VERY_LARGE to avoid filtering with the outside. */
- fill_rect(&src[(rend + OD_FILT_VBORDER) * OD_FILT_BSTRIDE],
- OD_FILT_BSTRIDE, OD_FILT_VBORDER,
- hsize + 2 * OD_FILT_HBORDER, OD_DERING_VERY_LARGE);
+ CDEF_VERY_LARGE to avoid filtering with the outside. */
+ fill_rect(&src[(rend + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
+ CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
}
/* Copy in the pixels we need from the current superblock for
deringing.*/
- copy_sb8_16(
- cm,
- &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER + cstart],
- OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
- (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr, coffset + cstart,
- xd->plane[pli].dst.stride, rend, cend - cstart);
- if (!prev_row_dering[sbc]) {
- copy_sb8_16(
- cm, &src[OD_FILT_HBORDER], OD_FILT_BSTRIDE,
- xd->plane[pli].dst.buf,
- (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
- coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER, hsize);
- } else if (sbr > 0) {
- copy_rect(&src[OD_FILT_HBORDER], OD_FILT_BSTRIDE,
- &linebuf[pli][coffset], stride, OD_FILT_VBORDER, hsize);
+ copy_sb8_16(cm,
+ &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
+ CDEF_BSTRIDE, xd->plane[pli].dst.buf,
+ (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr, coffset + cstart,
+ xd->plane[pli].dst.stride, rend, cend - cstart);
+ if (!prev_row_cdef[fbc]) {
+ copy_sb8_16(cm, &src[CDEF_HBORDER], CDEF_BSTRIDE,
+ xd->plane[pli].dst.buf,
+ (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
+ coffset, xd->plane[pli].dst.stride, CDEF_VBORDER, hsize);
+ } else if (fbr > 0) {
+ copy_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, &linebuf[pli][coffset],
+ stride, CDEF_VBORDER, hsize);
} else {
- fill_rect(&src[OD_FILT_HBORDER], OD_FILT_BSTRIDE, OD_FILT_VBORDER,
- hsize, OD_DERING_VERY_LARGE);
+ fill_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize,
+ CDEF_VERY_LARGE);
}
- if (!prev_row_dering[sbc - 1]) {
- copy_sb8_16(
- cm, src, OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
- (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
- coffset - OD_FILT_HBORDER, xd->plane[pli].dst.stride,
- OD_FILT_VBORDER, OD_FILT_HBORDER);
- } else if (sbr > 0 && sbc > 0) {
- copy_rect(src, OD_FILT_BSTRIDE,
- &linebuf[pli][coffset - OD_FILT_HBORDER], stride,
- OD_FILT_VBORDER, OD_FILT_HBORDER);
+ if (!prev_row_cdef[fbc - 1]) {
+ copy_sb8_16(cm, src, CDEF_BSTRIDE, xd->plane[pli].dst.buf,
+ (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
+ coffset - CDEF_HBORDER, xd->plane[pli].dst.stride,
+ CDEF_VBORDER, CDEF_HBORDER);
+ } else if (fbr > 0 && fbc > 0) {
+ copy_rect(src, CDEF_BSTRIDE, &linebuf[pli][coffset - CDEF_HBORDER],
+ stride, CDEF_VBORDER, CDEF_HBORDER);
} else {
- fill_rect(src, OD_FILT_BSTRIDE, OD_FILT_VBORDER, OD_FILT_HBORDER,
- OD_DERING_VERY_LARGE);
+ fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
+ CDEF_VERY_LARGE);
}
- if (!prev_row_dering[sbc + 1]) {
- copy_sb8_16(
- cm, &src[OD_FILT_HBORDER + (nhb << mi_wide_l2[pli])],
- OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
- (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
- coffset + hsize, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
- OD_FILT_HBORDER);
- } else if (sbr > 0 && sbc < nhsb - 1) {
- copy_rect(&src[hsize + OD_FILT_HBORDER], OD_FILT_BSTRIDE,
- &linebuf[pli][coffset + hsize], stride, OD_FILT_VBORDER,
- OD_FILT_HBORDER);
+ if (!prev_row_cdef[fbc + 1]) {
+ copy_sb8_16(cm, &src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])],
+ CDEF_BSTRIDE, xd->plane[pli].dst.buf,
+ (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
+ coffset + hsize, xd->plane[pli].dst.stride, CDEF_VBORDER,
+ CDEF_HBORDER);
+ } else if (fbr > 0 && fbc < nhfb - 1) {
+ copy_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
+ &linebuf[pli][coffset + hsize], stride, CDEF_VBORDER,
+ CDEF_HBORDER);
} else {
- fill_rect(&src[hsize + OD_FILT_HBORDER], OD_FILT_BSTRIDE,
- OD_FILT_VBORDER, OD_FILT_HBORDER, OD_DERING_VERY_LARGE);
+ fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER,
+ CDEF_HBORDER, CDEF_VERY_LARGE);
}
- if (dering_left) {
+ if (cdef_left) {
/* If we deringed the superblock on the left then we need to copy in
saved pixels. */
- copy_rect(src, OD_FILT_BSTRIDE, colbuf[pli], OD_FILT_HBORDER,
- rend + OD_FILT_VBORDER, OD_FILT_HBORDER);
+ copy_rect(src, CDEF_BSTRIDE, colbuf[pli], CDEF_HBORDER,
+ rend + CDEF_VBORDER, CDEF_HBORDER);
}
/* Saving pixels in case we need to dering the superblock on the
right. */
- copy_rect(colbuf[pli], OD_FILT_HBORDER, src + hsize, OD_FILT_BSTRIDE,
- rend + OD_FILT_VBORDER, OD_FILT_HBORDER);
+ copy_rect(colbuf[pli], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE,
+ rend + CDEF_VBORDER, CDEF_HBORDER);
copy_sb8_16(
cm, &linebuf[pli][coffset], stride, xd->plane[pli].dst.buf,
- (MI_SIZE_64X64 << mi_high_l2[pli]) * (sbr + 1) - OD_FILT_VBORDER,
- coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER, hsize);
+ (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
+ coffset, xd->plane[pli].dst.stride, CDEF_VBORDER, hsize);
if (tile_top) {
- fill_rect(src, OD_FILT_BSTRIDE, OD_FILT_VBORDER,
- hsize + 2 * OD_FILT_HBORDER, OD_DERING_VERY_LARGE);
+ fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER,
+ CDEF_VERY_LARGE);
}
if (tile_left) {
- fill_rect(src, OD_FILT_BSTRIDE, vsize + 2 * OD_FILT_VBORDER,
- OD_FILT_HBORDER, OD_DERING_VERY_LARGE);
+ fill_rect(src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER,
+ CDEF_VERY_LARGE);
}
if (tile_bottom) {
- fill_rect(&src[(vsize + OD_FILT_VBORDER) * OD_FILT_BSTRIDE],
- OD_FILT_BSTRIDE, OD_FILT_VBORDER,
- hsize + 2 * OD_FILT_HBORDER, OD_DERING_VERY_LARGE);
+ fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
+ CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
}
if (tile_right) {
- fill_rect(&src[hsize + OD_FILT_HBORDER], OD_FILT_BSTRIDE,
- vsize + 2 * OD_FILT_VBORDER, OD_FILT_HBORDER,
- OD_DERING_VERY_LARGE);
+ fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
+ vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
}
#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- od_dering(
+ cdef_filter_fb(
(uint8_t *)&CONVERT_TO_SHORTPTR(
xd->plane[pli]
.dst.buf)[xd->plane[pli].dst.stride *
- (MI_SIZE_64X64 * sbr << mi_high_l2[pli]) +
- (sbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
+ (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
+ (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
xd->plane[pli].dst.stride, dst,
- &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
- xdec[pli], ydec[pli], dir, NULL, var, pli, dlist, dering_count,
- level, clpf_strength, clpf_damping, dering_damping, coeff_shift,
- 0, 1);
+ &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
+ ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
+ sec_strength, sec_damping, pri_damping, coeff_shift, 0, 1);
} else {
#endif
- od_dering(&xd->plane[pli]
- .dst.buf[xd->plane[pli].dst.stride *
- (MI_SIZE_64X64 * sbr << mi_high_l2[pli]) +
- (sbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
- xd->plane[pli].dst.stride, dst,
- &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
- xdec[pli], ydec[pli], dir, NULL, var, pli, dlist,
- dering_count, level, clpf_strength, clpf_damping,
- dering_damping, coeff_shift, 0, 0);
+ cdef_filter_fb(
+ &xd->plane[pli]
+ .dst.buf[xd->plane[pli].dst.stride *
+ (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
+ (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
+ xd->plane[pli].dst.stride, dst,
+ &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
+ ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
+ sec_strength, sec_damping, pri_damping, coeff_shift, 0, 0);
#if CONFIG_HIGHBITDEPTH
}
#endif
}
- dering_left = 1;
+ cdef_left = 1;
}
{
unsigned char *tmp;
- tmp = prev_row_dering;
- prev_row_dering = curr_row_dering;
- curr_row_dering = tmp;
+ tmp = prev_row_cdef;
+ prev_row_cdef = curr_row_cdef;
+ curr_row_cdef = tmp;
}
}
- aom_free(row_dering);
+ aom_free(row_cdef);
for (pli = 0; pli < nplanes; pli++) {
aom_free(linebuf[pli]);
aom_free(colbuf[pli]);
diff --git a/av1/common/cdef.h b/av1/common/cdef.h
index a0dd0a6..acb609e 100644
--- a/av1/common/cdef.h
+++ b/av1/common/cdef.h
@@ -8,20 +8,19 @@
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#ifndef AV1_COMMON_DERING_H_
-#define AV1_COMMON_DERING_H_
+#ifndef AV1_COMMON_CDEF_H_
+#define AV1_COMMON_CDEF_H_
#define CDEF_STRENGTH_BITS 7
-#define DERING_STRENGTHS 32
-#define CLPF_STRENGTHS 4
+#define CDEF_PRI_STRENGTHS 32
+#define CDEF_SEC_STRENGTHS 4
#include "./aom_config.h"
#include "aom/aom_integer.h"
#include "aom_ports/mem.h"
-#include "av1/common/od_dering.h"
+#include "av1/common/cdef_block.h"
#include "av1/common/onyxc_int.h"
-#include "./od_dering.h"
static INLINE int sign(int i) { return i < 0 ? -1 : 1; }
@@ -40,8 +39,8 @@
#endif
int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
-int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
- dering_list *dlist, int filter_skip);
+int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
+ cdef_list *dlist, int filter_skip);
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd);
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
@@ -50,4 +49,4 @@
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // AV1_COMMON_DERING_H_
+#endif // AV1_COMMON_CDEF_H_
diff --git a/av1/common/od_dering.c b/av1/common/cdef_block.c
similarity index 67%
rename from av1/common/od_dering.c
rename to av1/common/cdef_block.c
index df4fb2a..3fe836a 100644
--- a/av1/common/od_dering.c
+++ b/av1/common/cdef_block.c
@@ -21,17 +21,15 @@
#include "./cdef.h"
/* Generated from gen_filter_tables.c. */
-const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
- { -1 * OD_FILT_BSTRIDE + 1, -2 * OD_FILT_BSTRIDE + 2,
- -3 * OD_FILT_BSTRIDE + 3 },
- { 0 * OD_FILT_BSTRIDE + 1, -1 * OD_FILT_BSTRIDE + 2,
- -1 * OD_FILT_BSTRIDE + 3 },
- { 0 * OD_FILT_BSTRIDE + 1, 0 * OD_FILT_BSTRIDE + 2, 0 * OD_FILT_BSTRIDE + 3 },
- { 0 * OD_FILT_BSTRIDE + 1, 1 * OD_FILT_BSTRIDE + 2, 1 * OD_FILT_BSTRIDE + 3 },
- { 1 * OD_FILT_BSTRIDE + 1, 2 * OD_FILT_BSTRIDE + 2, 3 * OD_FILT_BSTRIDE + 3 },
- { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 1, 3 * OD_FILT_BSTRIDE + 1 },
- { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 0, 3 * OD_FILT_BSTRIDE + 0 },
- { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE - 1, 3 * OD_FILT_BSTRIDE - 1 },
+const int cdef_directions[8][3] = {
+ { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2, -3 * CDEF_BSTRIDE + 3 },
+ { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2, -1 * CDEF_BSTRIDE + 3 },
+ { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2, 0 * CDEF_BSTRIDE + 3 },
+ { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2, 1 * CDEF_BSTRIDE + 3 },
+ { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2, 3 * CDEF_BSTRIDE + 3 },
+ { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1, 3 * CDEF_BSTRIDE + 1 },
+ { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0, 3 * CDEF_BSTRIDE + 0 },
+ { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1, 3 * CDEF_BSTRIDE - 1 }
};
/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
@@ -41,8 +39,8 @@
in a particular direction. Since each direction have the same sum(x^2) term,
that term is never computed. See Section 2, step 2, of:
http://jmvalin.ca/notes/intra_paint.pdf */
-int od_dir_find8_c(const uint16_t *img, int stride, int32_t *var,
- int coeff_shift) {
+int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var,
+ int coeff_shift) {
int i;
int32_t cost[8] = { 0 };
int partial[8][15] = { { 0 } };
@@ -113,9 +111,8 @@
}
/* Smooth in the direction detected. */
-void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
- const uint16_t *in, int threshold,
- int dir, int damping) {
+void cdef_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in,
+ int threshold, int dir, int damping) {
int i;
int j;
int k;
@@ -125,15 +122,13 @@
int16_t sum;
int16_t xx;
int16_t yy;
- xx = in[i * OD_FILT_BSTRIDE + j];
+ xx = in[i * CDEF_BSTRIDE + j];
sum = 0;
for (k = 0; k < 3; k++) {
int16_t p0;
int16_t p1;
- p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
- xx;
- p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
- xx;
+ p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
+ p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
sum += taps[k] * constrain(p0, threshold, damping);
sum += taps[k] * constrain(p1, threshold, damping);
}
@@ -145,9 +140,8 @@
}
/* Smooth in the direction detected. */
-void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
- const uint16_t *in, int threshold,
- int dir, int damping) {
+void cdef_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in,
+ int threshold, int dir, int damping) {
int i;
int j;
int k;
@@ -157,15 +151,13 @@
int16_t sum;
int16_t xx;
int16_t yy;
- xx = in[i * OD_FILT_BSTRIDE + j];
+ xx = in[i * CDEF_BSTRIDE + j];
sum = 0;
for (k = 0; k < 2; k++) {
int16_t p0;
int16_t p1;
- p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
- xx;
- p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
- xx;
+ p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
+ p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
sum += taps[k] * constrain(p0, threshold, damping);
sum += taps[k] * constrain(p1, threshold, damping);
}
@@ -176,16 +168,16 @@
}
}
-/* Compute deringing filter threshold for an 8x8 block based on the
- directional variance difference. A high variance difference means that we
- have a highly directional pattern (e.g. a high contrast edge), so we can
- apply more deringing. A low variance means that we either have a low
- contrast edge, or a non-directional texture, so we want to be careful not
- to blur. */
-static INLINE int od_adjust_thresh(int threshold, int32_t var) {
+/* Compute the primary filter strength for an 8x8 block based on the
+ directional variance difference. A high variance difference means
+ that we have a highly directional pattern (e.g. a high contrast
+ edge), so we can apply more deringing. A low variance means that we
+ either have a low contrast edge, or a non-directional texture, so
+ we want to be careful not to blur. */
+static INLINE int adjust_strength(int strength, int32_t var) {
const int i = var >> 6 ? AOMMIN(get_msb(var >> 6), 12) : 0;
- /* We use the variance of 8x8 blocks to adjust the threshold. */
- return var ? (threshold * (4 + i) + 8) >> 4 : 0;
+ /* We use the variance of 8x8 blocks to adjust the strength. */
+ return var ? (strength * (4 + i) + 8) >> 4 : 0;
}
void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
@@ -202,20 +194,20 @@
for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
}
-static void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride,
- uint16_t *src, dering_list *dlist,
- int dering_count, int bsize) {
+static void copy_block_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
+ cdef_list *dlist, int cdef_count,
+ int bsize) {
int bi, bx, by;
if (bsize == BLOCK_8X8) {
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
&src[bi << (3 + 3)], 8);
}
} else if (bsize == BLOCK_4X8) {
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
@@ -224,7 +216,7 @@
dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
}
} else if (bsize == BLOCK_8X4) {
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
@@ -234,7 +226,7 @@
}
} else {
assert(bsize == BLOCK_4X4);
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
@@ -259,19 +251,19 @@
dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
}
-static void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride,
- const uint16_t *src, dering_list *dlist,
- int dering_count, int bsize) {
+static void copy_block_16bit_to_8bit(uint8_t *dst, int dstride,
+ const uint16_t *src, cdef_list *dlist,
+ int cdef_count, int bsize) {
int bi, bx, by;
if (bsize == BLOCK_8X8) {
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
&src[bi << (3 + 3)], 8);
}
} else if (bsize == BLOCK_4X8) {
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
@@ -280,7 +272,7 @@
dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
}
} else if (bsize == BLOCK_8X4) {
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
@@ -290,7 +282,7 @@
}
} else {
assert(bsize == BLOCK_4X4);
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
@@ -305,12 +297,12 @@
return filter_skip;
}
-void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
- int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
- int *dirinit, int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
- int pli, dering_list *dlist, int dering_count, int level,
- int clpf_strength, int clpf_damping, int dering_damping,
- int coeff_shift, int skip_dering, int hbd) {
+void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
+ int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
+ int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
+ cdef_list *dlist, int cdef_count, int level,
+ int sec_strength, int sec_damping, int pri_damping,
+ int coeff_shift, int skip_dering, int hbd) {
int bi;
int bx;
int by;
@@ -320,11 +312,10 @@
int filter_skip = get_filter_skip(level);
if (level == 1) threshold = 31 << coeff_shift;
- od_filter_dering_direction_func filter_dering_direction[] = {
- od_filter_dering_direction_4x4, od_filter_dering_direction_8x8
- };
- clpf_damping += coeff_shift - (pli != AOM_PLANE_Y);
- dering_damping += coeff_shift - (pli != AOM_PLANE_Y);
+ cdef_direction_func cdef_direction[] = { cdef_direction_4x4,
+ cdef_direction_8x8 };
+ sec_damping += coeff_shift - (pli != AOM_PLANE_Y);
+ pri_damping += coeff_shift - (pli != AOM_PLANE_Y);
bsize =
ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
bsizex = 3 - xdec;
@@ -333,12 +324,11 @@
if (!skip_dering) {
if (pli == 0) {
if (!dirinit || !*dirinit) {
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
- dir[by][bx] =
- od_dir_find8(&in[8 * by * OD_FILT_BSTRIDE + 8 * bx],
- OD_FILT_BSTRIDE, &var[by][bx], coeff_shift);
+ dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
+ CDEF_BSTRIDE, &var[by][bx], coeff_shift);
}
if (dirinit) *dirinit = 1;
}
@@ -348,24 +338,23 @@
// something out in y[] later.
if (threshold != 0) {
assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
int t = !filter_skip && dlist[bi].skip ? 0 : threshold;
by = dlist[bi].by;
bx = dlist[bi].bx;
- (filter_dering_direction[bsize == BLOCK_8X8])(
+ (cdef_direction[bsize == BLOCK_8X8])(
&y[bi << (bsizex + bsizey)], 1 << bsizex,
- &in[(by * OD_FILT_BSTRIDE << bsizey) + (bx << bsizex)],
- pli ? t : od_adjust_thresh(t, var[by][bx]), dir[by][bx],
- dering_damping);
+ &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
+ pli ? t : adjust_strength(t, var[by][bx]), dir[by][bx],
+ pri_damping);
}
}
}
- if (clpf_strength) {
+ if (sec_strength) {
if (threshold && !skip_dering)
- copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
- bsize);
- for (bi = 0; bi < dering_count; bi++) {
+ copy_block_16bit_to_16bit(in, CDEF_BSTRIDE, y, dlist, cdef_count, bsize);
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
int py = by << bsizey;
@@ -378,31 +367,31 @@
: aom_clpf_hblock_hbd)(
dst ? (uint16_t *)dst + py * dstride + px
: &y[bi << (bsizex + bsizey)],
- in + py * OD_FILT_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex,
- OD_FILT_BSTRIDE, 1 << bsizex, 1 << bsizey,
- clpf_strength << coeff_shift, clpf_damping);
+ in + py * CDEF_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex,
+ CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
+ sec_damping);
} else {
// Do clpf and write the result to an 8 bit destination
(!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block
: aom_clpf_hblock)(
- dst + py * dstride + px, in + py * OD_FILT_BSTRIDE + px, dstride,
- OD_FILT_BSTRIDE, 1 << bsizex, 1 << bsizey,
- clpf_strength << coeff_shift, clpf_damping);
+ dst + py * dstride + px, in + py * CDEF_BSTRIDE + px, dstride,
+ CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
+ sec_damping);
}
}
} else if (threshold != 0) {
// No clpf, so copy instead
if (hbd) {
- copy_dering_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist,
- dering_count, bsize);
+ copy_block_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist, cdef_count,
+ bsize);
} else {
- copy_dering_16bit_to_8bit(dst, dstride, y, dlist, dering_count, bsize);
+ copy_block_16bit_to_8bit(dst, dstride, y, dlist, cdef_count, bsize);
}
} else if (dirinit) {
// If we're here, both dering and clpf are off, and we still haven't written
// anything to y[] yet, so we just copy the input to y[]. This is necessary
// only for av1_cdef_search() and only av1_cdef_search() sets dirinit.
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
int iy, ix;
@@ -410,7 +399,7 @@
for (iy = 0; iy < 1 << bsizey; iy++)
for (ix = 0; ix < 1 << bsizex; ix++)
y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
- in[((by << bsizey) + iy) * OD_FILT_BSTRIDE + (bx << bsizex) + ix];
+ in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix];
}
}
}
diff --git a/av1/common/cdef_block.h b/av1/common/cdef_block.h
new file mode 100644
index 0000000..3891e2b
--- /dev/null
+++ b/av1/common/cdef_block.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if !defined(_CDEF_BLOCK_H)
+#define _CDEF_BLOCK_H (1)
+
+#include "./odintrin.h"
+
+#define CDEF_BLOCKSIZE 64
+#define CDEF_BLOCKSIZE_LOG2 6
+#define CDEF_NBLOCKS (CDEF_BLOCKSIZE / 8)
+
+/* We need to buffer three vertical lines. */
+#define CDEF_VBORDER (3)
+/* We only need to buffer three horizontal pixels too, but let's align to
+ 16 bytes (8 x 16 bits) to make vectorization easier. */
+#define CDEF_HBORDER (8)
+#define CDEF_BSTRIDE ALIGN_POWER_OF_TWO(CDEF_BLOCKSIZE + 2 * CDEF_HBORDER, 3)
+
+#define CDEF_VERY_LARGE (30000)
+#define CDEF_INBUF_SIZE (CDEF_BSTRIDE * (CDEF_BLOCKSIZE + 2 * CDEF_VBORDER))
+
+extern const int cdef_directions[8][3];
+
+typedef struct {
+ uint8_t by;
+ uint8_t bx;
+ uint8_t skip;
+} cdef_list;
+
+typedef void (*cdef_direction_func)(uint16_t *y, int ystride,
+ const uint16_t *in, int threshold, int dir,
+ int damping);
+
+int get_filter_skip(int level);
+
+void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
+ int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
+ int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
+ cdef_list *dlist, int cdef_count, int level,
+ int sec_strength, int sec_damping, int pri_damping,
+ int coeff_shift, int skip_dering, int hbd);
+#endif
diff --git a/av1/common/od_dering_neon.c b/av1/common/cdef_block_neon.c
similarity index 94%
rename from av1/common/od_dering_neon.c
rename to av1/common/cdef_block_neon.c
index 9944105..030b325 100644
--- a/av1/common/od_dering_neon.c
+++ b/av1/common/cdef_block_neon.c
@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_neon
-#include "./od_dering_simd.h"
+#include "./cdef_block_simd.h"
diff --git a/av1/common/od_dering_simd.h b/av1/common/cdef_block_simd.h
similarity index 80%
rename from av1/common/od_dering_simd.h
rename to av1/common/cdef_block_simd.h
index 4074e7e..358d919 100644
--- a/av1/common/od_dering_simd.h
+++ b/av1/common/cdef_block_simd.h
@@ -10,8 +10,7 @@
*/
#include "./av1_rtcd.h"
-#include "./cdef_simd.h"
-#include "./od_dering.h"
+#include "./cdef_block.h"
/* partial A is a 16-bit vector of the form:
[x8 x7 x6 x5 x4 x3 x2 x1] and partial B has the form:
@@ -155,8 +154,8 @@
res[0] = v128_ziphi_64(tr1_7, tr1_6);
}
-int SIMD_FUNC(od_dir_find8)(const od_dering_in *img, int stride, int32_t *var,
- int coeff_shift) {
+int SIMD_FUNC(cdef_find_dir)(const uint16_t *img, int stride, int32_t *var,
+ int coeff_shift) {
int i;
int32_t cost[8];
int32_t best_cost = 0;
@@ -211,42 +210,51 @@
return best_dir;
}
-void SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride,
- const uint16_t *in,
- int threshold, int dir,
- int damping) {
+// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
+SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
+ unsigned int adjdamp) {
+ v128 diff = v128_sub_16(a, b);
+ const v128 sign = v128_shr_n_s16(diff, 15);
+ diff = v128_abs_s16(diff);
+ const v128 s =
+ v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp));
+ return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
+}
+
+void SIMD_FUNC(cdef_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in,
+ int threshold, int dir, int damping) {
int i;
v128 p0, p1, sum, row, res;
- int o1 = OD_DIRECTION_OFFSETS_TABLE[dir][0];
- int o2 = OD_DIRECTION_OFFSETS_TABLE[dir][1];
+ int o1 = cdef_directions[dir][0];
+ int o2 = cdef_directions[dir][1];
if (threshold) damping -= get_msb(threshold);
for (i = 0; i < 4; i += 2) {
sum = v128_zero();
- row = v128_from_v64(v64_load_aligned(&in[i * OD_FILT_BSTRIDE]),
- v64_load_aligned(&in[(i + 1) * OD_FILT_BSTRIDE]));
+ row = v128_from_v64(v64_load_aligned(&in[i * CDEF_BSTRIDE]),
+ v64_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
- // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE + o1]),
- v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE + o1]));
+ // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
+ p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o1]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o1]));
p0 = constrain16(p0, row, threshold, damping);
- // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE - o1]),
- v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE - o1]));
+ // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
+ p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o1]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o1]));
p1 = constrain16(p1, row, threshold, damping);
// sum += 4 * (p0 + p1)
sum = v128_add_16(sum, v128_shl_n_16(v128_add_16(p0, p1), 2));
- // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE + o2]),
- v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE + o2]));
+ // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
+ p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o2]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o2]));
p0 = constrain16(p0, row, threshold, damping);
- // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE - o2]),
- v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE - o2]));
+ // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
+ p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o2]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o2]));
p1 = constrain16(p1, row, threshold, damping);
// sum += 1 * (p0 + p1)
@@ -261,27 +269,25 @@
}
}
-void SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride,
- const uint16_t *in,
- int threshold, int dir,
- int damping) {
+void SIMD_FUNC(cdef_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in,
+ int threshold, int dir, int damping) {
int i;
v128 sum, p0, p1, row, res;
- int o1 = OD_DIRECTION_OFFSETS_TABLE[dir][0];
- int o2 = OD_DIRECTION_OFFSETS_TABLE[dir][1];
- int o3 = OD_DIRECTION_OFFSETS_TABLE[dir][2];
+ int o1 = cdef_directions[dir][0];
+ int o2 = cdef_directions[dir][1];
+ int o3 = cdef_directions[dir][2];
if (threshold) damping -= get_msb(threshold);
for (i = 0; i < 8; i++) {
sum = v128_zero();
- row = v128_load_aligned(&in[i * OD_FILT_BSTRIDE]);
+ row = v128_load_aligned(&in[i * CDEF_BSTRIDE]);
- // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE + o1]);
+ // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
+ p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o1]);
p0 = constrain16(p0, row, threshold, damping);
- // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE - o1]);
+ // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
+ p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o1]);
p1 = constrain16(p1, row, threshold, damping);
// sum += 3 * (p0 + p1)
@@ -289,24 +295,24 @@
p0 = v128_add_16(p0, v128_shl_n_16(p0, 1));
sum = v128_add_16(sum, p0);
- // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE + o2]);
+ // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
+ p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o2]);
p0 = constrain16(p0, row, threshold, damping);
- // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE - o2]);
+ // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
+ p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o2]);
p1 = constrain16(p1, row, threshold, damping);
// sum += 2 * (p0 + p1)
p0 = v128_shl_n_16(v128_add_16(p0, p1), 1);
sum = v128_add_16(sum, p0);
- // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE + o3]);
+ // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
+ p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o3]);
p0 = constrain16(p0, row, threshold, damping);
- // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE - o3]);
+ // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
+ p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o3]);
p1 = constrain16(p1, row, threshold, damping);
// sum += (p0 + p1)
diff --git a/av1/common/od_dering_sse2.c b/av1/common/cdef_block_sse2.c
similarity index 94%
rename from av1/common/od_dering_sse2.c
rename to av1/common/cdef_block_sse2.c
index 8a2a62f..f3de763 100644
--- a/av1/common/od_dering_sse2.c
+++ b/av1/common/cdef_block_sse2.c
@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_sse2
-#include "./od_dering_simd.h"
+#include "./cdef_block_simd.h"
diff --git a/av1/common/od_dering_sse4.c b/av1/common/cdef_block_sse4.c
similarity index 94%
rename from av1/common/od_dering_sse4.c
rename to av1/common/cdef_block_sse4.c
index 0769db9..27e9ff3 100644
--- a/av1/common/od_dering_sse4.c
+++ b/av1/common/cdef_block_sse4.c
@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_sse4_1
-#include "./od_dering_simd.h"
+#include "./cdef_block_simd.h"
diff --git a/av1/common/od_dering_ssse3.c b/av1/common/cdef_block_ssse3.c
similarity index 94%
rename from av1/common/od_dering_ssse3.c
rename to av1/common/cdef_block_ssse3.c
index 99df62b..8635221 100644
--- a/av1/common/od_dering_ssse3.c
+++ b/av1/common/cdef_block_ssse3.c
@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_ssse3
-#include "./od_dering_simd.h"
+#include "./cdef_block_simd.h"
diff --git a/av1/common/cdef_simd.h b/av1/common/cdef_simd.h
deleted file mode 100644
index 2649099..0000000
--- a/av1/common/cdef_simd.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AV1_COMMON_CDEF_SIMD_H_
-#define AV1_COMMON_CDEF_SIMD_H_
-
-#include "aom_dsp/aom_simd.h"
-
-// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
-SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
- unsigned int adjdamp) {
- v128 diff = v128_sub_16(a, b);
- const v128 sign = v128_shr_n_s16(diff, 15);
- diff = v128_abs_s16(diff);
- const v128 s =
- v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp));
- return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
-}
-
-#endif // AV1_COMMON_CDEF_SIMD_H_
diff --git a/av1/common/clpf_simd.h b/av1/common/clpf_simd.h
index a615b5e..c7ffc56 100644
--- a/av1/common/clpf_simd.h
+++ b/av1/common/clpf_simd.h
@@ -10,10 +10,20 @@
*/
#include "./av1_rtcd.h"
-#include "./cdef_simd.h"
#include "aom_ports/bitops.h"
#include "aom_ports/mem.h"
+// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
+SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
+ unsigned int adjdamp) {
+ v128 diff = v128_sub_16(a, b);
+ const v128 sign = v128_shr_n_s16(diff, 15);
+ diff = v128_abs_s16(diff);
+ const v128 s =
+ v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp));
+ return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
+}
+
// sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp)))
SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength,
unsigned int adjdamp) {
diff --git a/av1/common/od_dering.h b/av1/common/od_dering.h
deleted file mode 100644
index 031112b..0000000
--- a/av1/common/od_dering.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#if !defined(_dering_H)
-#define _dering_H (1)
-
-#include "odintrin.h"
-
-#define OD_DERING_NBLOCKS (MAX_SB_SIZE / 8)
-
-/* We need to buffer three vertical lines. */
-#define OD_FILT_VBORDER (3)
-/* We only need to buffer three horizontal pixels too, but let's align to
- 16 bytes (8 x 16 bits) to make vectorization easier. */
-#define OD_FILT_HBORDER (8)
-#define OD_FILT_BSTRIDE ALIGN_POWER_OF_TWO(MAX_SB_SIZE + 2 * OD_FILT_HBORDER, 3)
-
-#define OD_DERING_VERY_LARGE (30000)
-#define OD_DERING_INBUF_SIZE \
- (OD_FILT_BSTRIDE * (MAX_SB_SIZE + 2 * OD_FILT_VBORDER))
-
-extern const int OD_DIRECTION_OFFSETS_TABLE[8][3];
-
-typedef struct {
- uint8_t by;
- uint8_t bx;
- uint8_t skip;
-} dering_list;
-
-typedef void (*od_filter_dering_direction_func)(uint16_t *y, int ystride,
- const uint16_t *in,
- int threshold, int dir,
- int damping);
-
-int get_filter_skip(int level);
-
-void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
- int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
- int *dirinit, int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
- int pli, dering_list *dlist, int dering_count, int level,
- int clpf_strength, int clpf_damping, int dering_damping,
- int coeff_shift, int skip_dering, int hbd);
-#endif
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index cc0f7c8..2896deb 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -425,8 +425,8 @@
int mib_size; // Size of the superblock in units of MI blocks
int mib_size_log2; // Log 2 of above.
#if CONFIG_CDEF
- int cdef_dering_damping;
- int cdef_clpf_damping;
+ int cdef_pri_damping;
+ int cdef_sec_damping;
int nb_cdef_strengths;
int cdef_strengths[CDEF_MAX_STRENGTHS];
int cdef_uv_strengths[CDEF_MAX_STRENGTHS];
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 4e8d99a..1b572ae 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -3015,8 +3015,8 @@
#if CONFIG_CDEF
static void setup_cdef(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
int i;
- cm->cdef_dering_damping = aom_rb_read_literal(rb, 1) + 5;
- cm->cdef_clpf_damping = aom_rb_read_literal(rb, 2) + 3;
+ cm->cdef_pri_damping = aom_rb_read_literal(rb, 1) + 5;
+ cm->cdef_sec_damping = aom_rb_read_literal(rb, 2) + 3;
cm->cdef_bits = aom_rb_read_literal(rb, 2);
cm->nb_cdef_strengths = 1 << cm->cdef_bits;
for (i = 0; i < cm->nb_cdef_strengths; i++) {
diff --git a/av1/decoder/inspection.c b/av1/decoder/inspection.c
index f43470b..1728333 100644
--- a/av1/decoder/inspection.c
+++ b/av1/decoder/inspection.c
@@ -101,9 +101,10 @@
mi->tx_size = mbmi->tx_size;
#if CONFIG_CDEF
- mi->cdef_level = cm->cdef_strengths[mbmi->cdef_strength] / CLPF_STRENGTHS;
+ mi->cdef_level =
+ cm->cdef_strengths[mbmi->cdef_strength] / CDEF_SEC_STRENGTHS;
mi->cdef_strength =
- cm->cdef_strengths[mbmi->cdef_strength] % CLPF_STRENGTHS;
+ cm->cdef_strengths[mbmi->cdef_strength] % CDEF_SEC_STRENGTHS;
mi->cdef_strength += mi->cdef_strength == 3;
#endif
#if CONFIG_CFL
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 9e20819..a7e1dfc 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -3474,8 +3474,8 @@
#if CONFIG_CDEF
static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
int i;
- aom_wb_write_literal(wb, cm->cdef_dering_damping - 5, 1);
- aom_wb_write_literal(wb, cm->cdef_clpf_damping - 3, 2);
+ aom_wb_write_literal(wb, cm->cdef_pri_damping - 5, 1);
+ aom_wb_write_literal(wb, cm->cdef_sec_damping - 3, 2);
aom_wb_write_literal(wb, cm->cdef_bits, 2);
for (i = 0; i < cm->nb_cdef_strengths; i++) {
aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index e4ec388..443e9e5 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -19,11 +19,11 @@
#include "av1/common/reconinter.h"
#include "av1/encoder/encoder.h"
-#define REDUCED_STRENGTHS 8
-#define REDUCED_TOTAL_STRENGTHS (REDUCED_STRENGTHS * CLPF_STRENGTHS)
-#define TOTAL_STRENGTHS (DERING_STRENGTHS * CLPF_STRENGTHS)
+#define REDUCED_PRI_STRENGTHS 8
+#define REDUCED_TOTAL_STRENGTHS (REDUCED_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
+#define TOTAL_STRENGTHS (CDEF_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
-static int priconv[REDUCED_STRENGTHS] = { 0, 1, 2, 3, 4, 7, 12, 25 };
+static int priconv[REDUCED_PRI_STRENGTHS] = { 0, 1, 2, 3, 4, 7, 12, 25 };
/* Search for the best strength to add as an option, knowing we
already selected nb_strengths options. */
@@ -232,13 +232,13 @@
}
/* Compute MSE only on the blocks we filtered. */
-uint64_t compute_dering_dist(uint16_t *dst, int dstride, uint16_t *src,
- dering_list *dlist, int dering_count,
- BLOCK_SIZE bsize, int coeff_shift, int pli) {
+uint64_t compute_cdef_dist(uint16_t *dst, int dstride, uint16_t *src,
+ cdef_list *dlist, int cdef_count, BLOCK_SIZE bsize,
+ int coeff_shift, int pli) {
uint64_t sum = 0;
int bi, bx, by;
if (bsize == BLOCK_8X8) {
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
if (pli == 0) {
@@ -250,7 +250,7 @@
}
}
} else if (bsize == BLOCK_4X8) {
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
sum += mse_4x4_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
@@ -259,7 +259,7 @@
&src[(bi << (3 + 2)) + 4 * 4], 4);
}
} else if (bsize == BLOCK_8X4) {
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
@@ -269,7 +269,7 @@
}
} else {
assert(bsize == BLOCK_4X4);
- for (bi = 0; bi < dering_count; bi++) {
+ for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
@@ -282,12 +282,12 @@
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
AV1_COMMON *cm, MACROBLOCKD *xd, int fast) {
int r, c;
- int sbr, sbc;
+ int fbr, fbc;
uint16_t *src[3];
uint16_t *ref_coeff[3];
- dering_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
- int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
- int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
+ cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
+ int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
+ int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
int stride[3];
int bsize[3];
int mi_wide_l2[3];
@@ -295,18 +295,18 @@
int xdec[3];
int ydec[3];
int pli;
- int dering_count;
+ int cdef_count;
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
uint64_t best_tot_mse = (uint64_t)1 << 63;
uint64_t tot_mse;
int sb_count;
- int nvsb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- int nhsb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- int *sb_index = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
- int *selected_strength = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
+ int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+ int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+ int *sb_index = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
+ int *selected_strength = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
uint64_t(*mse[2])[TOTAL_STRENGTHS];
- int clpf_damping = 3 + (cm->base_qindex >> 6);
- int dering_damping = 6;
+ int pri_damping = 6;
+ int sec_damping = 3 + (cm->base_qindex >> 6);
int i;
int nb_strengths;
int nb_strength_bits;
@@ -314,19 +314,18 @@
double lambda;
int nplanes = 3;
const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
- DECLARE_ALIGNED(32, uint16_t, inbuf[OD_DERING_INBUF_SIZE]);
+ DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
uint16_t *in;
- DECLARE_ALIGNED(32, uint16_t, tmp_dst[MAX_SB_SQUARE]);
- int chroma_dering =
- xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
- xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
+ DECLARE_ALIGNED(32, uint16_t, tmp_dst[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE]);
+ int chroma_cdef = xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
+ xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
quantizer =
av1_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (cm->bit_depth - 8);
lambda = .12 * quantizer * quantizer / 256.;
av1_setup_dst_planes(xd->plane, cm->sb_size, frame, 0, 0);
- mse[0] = aom_malloc(sizeof(**mse) * nvsb * nhsb);
- mse[1] = aom_malloc(sizeof(**mse) * nvsb * nhsb);
+ mse[0] = aom_malloc(sizeof(**mse) * nvfb * nhfb);
+ mse[1] = aom_malloc(sizeof(**mse) * nvfb * nhfb);
for (pli = 0; pli < nplanes; pli++) {
uint8_t *ref_buffer;
int ref_stride;
@@ -380,65 +379,64 @@
}
}
}
- in = inbuf + OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER;
+ in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
sb_count = 0;
- for (sbr = 0; sbr < nvsb; ++sbr) {
- for (sbc = 0; sbc < nhsb; ++sbc) {
+ for (fbr = 0; fbr < nvfb; ++fbr) {
+ for (fbc = 0; fbc < nhfb; ++fbc) {
int nvb, nhb;
int gi;
int dirinit = 0;
- nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * sbc);
- nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * sbr);
- cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
- MI_SIZE_64X64 * sbc]
+ nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
+ nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
+ cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
+ MI_SIZE_64X64 * fbc]
->mbmi.cdef_strength = -1;
- if (sb_all_skip(cm, sbr * MI_SIZE_64X64, sbc * MI_SIZE_64X64)) continue;
- dering_count = sb_compute_dering_list(cm, sbr * MI_SIZE_64X64,
- sbc * MI_SIZE_64X64, dlist, 1);
+ if (sb_all_skip(cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) continue;
+ cdef_count = sb_compute_cdef_list(cm, fbr * MI_SIZE_64X64,
+ fbc * MI_SIZE_64X64, dlist, 1);
for (pli = 0; pli < nplanes; pli++) {
- for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
- inbuf[i] = OD_DERING_VERY_LARGE;
+ for (i = 0; i < CDEF_INBUF_SIZE; i++) inbuf[i] = CDEF_VERY_LARGE;
for (gi = 0; gi < total_strengths; gi++) {
int threshold;
uint64_t curr_mse;
- int clpf_strength;
- threshold = gi / CLPF_STRENGTHS;
+ int sec_strength;
+ threshold = gi / CDEF_SEC_STRENGTHS;
if (fast) threshold = priconv[threshold];
- if (pli > 0 && !chroma_dering) threshold = 0;
+ if (pli > 0 && !chroma_cdef) threshold = 0;
/* We avoid filtering the pixels for which some of the pixels to
average
are outside the frame. We could change the filter instead, but it
would add special cases for any future vectorization. */
- int yoff = OD_FILT_VBORDER * (sbr != 0);
- int xoff = OD_FILT_HBORDER * (sbc != 0);
+ int yoff = CDEF_VBORDER * (fbr != 0);
+ int xoff = CDEF_HBORDER * (fbc != 0);
int ysize = (nvb << mi_high_l2[pli]) +
- OD_FILT_VBORDER * (sbr != nvsb - 1) + yoff;
+ CDEF_VBORDER * (fbr != nvfb - 1) + yoff;
int xsize = (nhb << mi_wide_l2[pli]) +
- OD_FILT_HBORDER * (sbc != nhsb - 1) + xoff;
- clpf_strength = gi % CLPF_STRENGTHS;
- if (clpf_strength == 0)
- copy_sb16_16(&in[(-yoff * OD_FILT_BSTRIDE - xoff)], OD_FILT_BSTRIDE,
+ CDEF_HBORDER * (fbc != nhfb - 1) + xoff;
+ sec_strength = gi % CDEF_SEC_STRENGTHS;
+ if (sec_strength == 0)
+ copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
src[pli],
- (sbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
- (sbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
+ (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
+ (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
stride[pli], ysize, xsize);
- od_dering(clpf_strength ? NULL : (uint8_t *)in, OD_FILT_BSTRIDE,
- tmp_dst, in, xdec[pli], ydec[pli], dir, &dirinit, var, pli,
- dlist, dering_count, threshold,
- clpf_strength + (clpf_strength == 3), clpf_damping,
- dering_damping, coeff_shift, clpf_strength != 0, 1);
- curr_mse = compute_dering_dist(
+ cdef_filter_fb(sec_strength ? NULL : (uint8_t *)in, CDEF_BSTRIDE,
+ tmp_dst, in, xdec[pli], ydec[pli], dir, &dirinit, var,
+ pli, dlist, cdef_count, threshold,
+ sec_strength + (sec_strength == 3), sec_damping,
+ pri_damping, coeff_shift, sec_strength != 0, 1);
+ curr_mse = compute_cdef_dist(
ref_coeff[pli] +
- (sbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
- (sbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
- stride[pli], tmp_dst, dlist, dering_count, bsize[pli],
- coeff_shift, pli);
+ (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
+ (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
+ stride[pli], tmp_dst, dlist, cdef_count, bsize[pli], coeff_shift,
+ pli);
if (pli < 2)
mse[pli][sb_count][gi] = curr_mse;
else
mse[1][sb_count][gi] += curr_mse;
sb_index[sb_count] =
- MI_SIZE_64X64 * sbr * cm->mi_stride + MI_SIZE_64X64 * sbc;
+ MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
}
}
sb_count++;
@@ -494,15 +492,17 @@
if (fast) {
for (int j = 0; j < nb_strengths; j++) {
cm->cdef_strengths[j] =
- priconv[cm->cdef_strengths[j] / CLPF_STRENGTHS] * CLPF_STRENGTHS +
- (cm->cdef_strengths[j] % CLPF_STRENGTHS);
+ priconv[cm->cdef_strengths[j] / CDEF_SEC_STRENGTHS] *
+ CDEF_SEC_STRENGTHS +
+ (cm->cdef_strengths[j] % CDEF_SEC_STRENGTHS);
cm->cdef_uv_strengths[j] =
- priconv[cm->cdef_uv_strengths[j] / CLPF_STRENGTHS] * CLPF_STRENGTHS +
- (cm->cdef_uv_strengths[j] % CLPF_STRENGTHS);
+ priconv[cm->cdef_uv_strengths[j] / CDEF_SEC_STRENGTHS] *
+ CDEF_SEC_STRENGTHS +
+ (cm->cdef_uv_strengths[j] % CDEF_SEC_STRENGTHS);
}
}
- cm->cdef_dering_damping = dering_damping;
- cm->cdef_clpf_damping = clpf_damping;
+ cm->cdef_pri_damping = pri_damping;
+ cm->cdef_sec_damping = sec_damping;
aom_free(mse[0]);
aom_free(mse[1]);
for (pli = 0; pli < nplanes; pli++) {
diff --git a/test/clpf_test.cc b/test/clpf_test.cc
index 2c0f8cf..ecb0428 100644
--- a/test/clpf_test.cc
+++ b/test/clpf_test.cc
@@ -17,7 +17,7 @@
#include "./aom_config.h"
#include "./av1_rtcd.h"
#include "aom_ports/aom_timer.h"
-#include "av1/common/od_dering.h"
+#include "av1/common/cdef_block.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
@@ -128,22 +128,22 @@
if (boundary & 1) { // Left
for (int i = 0; i < size; i++)
for (int j = 0; j < xpos; j++)
- s[i * size + j] = OD_DERING_VERY_LARGE;
+ s[i * size + j] = CDEF_VERY_LARGE;
}
if (boundary & 2) { // Right
for (int i = 0; i < size; i++)
for (int j = xpos + w; j < size; j++)
- s[i * size + j] = OD_DERING_VERY_LARGE;
+ s[i * size + j] = CDEF_VERY_LARGE;
}
if (boundary & 4) { // Above
for (int i = 0; i < ypos; i++)
for (int j = 0; j < size; j++)
- s[i * size + j] = OD_DERING_VERY_LARGE;
+ s[i * size + j] = CDEF_VERY_LARGE;
}
if (boundary & 8) { // Below
for (int i = ypos + h; i < size; i++)
for (int j = 0; j < size; j++)
- s[i * size + j] = OD_DERING_VERY_LARGE;
+ s[i * size + j] = CDEF_VERY_LARGE;
}
}
for (strength = depth - 8; strength < depth - 5 && !error;
diff --git a/test/dering_test.cc b/test/dering_test.cc
index 195a60f..6b76561 100644
--- a/test/dering_test.cc
+++ b/test/dering_test.cc
@@ -17,7 +17,7 @@
#include "./aom_config.h"
#include "./av1_rtcd.h"
#include "aom_ports/aom_timer.h"
-#include "av1/common/od_dering.h"
+#include "av1/common/cdef_block.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
@@ -27,8 +27,7 @@
namespace {
-typedef std::tr1::tuple<od_filter_dering_direction_func,
- od_filter_dering_direction_func, int>
+typedef std::tr1::tuple<cdef_direction_func, cdef_direction_func, int>
dering_dir_param_t;
class CDEFDeringDirTest : public ::testing::TestWithParam<dering_dir_param_t> {
@@ -44,19 +43,18 @@
protected:
int bsize;
- od_filter_dering_direction_func dering;
- od_filter_dering_direction_func ref_dering;
+ cdef_direction_func dering;
+ cdef_direction_func ref_dering;
};
typedef CDEFDeringDirTest CDEFDeringSpeedTest;
-void test_dering(int bsize, int iterations,
- od_filter_dering_direction_func dering,
- od_filter_dering_direction_func ref_dering) {
+void test_dering(int bsize, int iterations, cdef_direction_func dering,
+ cdef_direction_func ref_dering) {
const int size = 8;
- const int ysize = size + 2 * OD_FILT_VBORDER;
+ const int ysize = size + 2 * CDEF_VBORDER;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, s[ysize * OD_FILT_BSTRIDE]);
+ DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]);
DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
memset(ref_d, 0, sizeof(ref_d));
@@ -80,38 +78,36 @@
if (boundary) {
if (boundary & 1) { // Left
for (int i = 0; i < ysize; i++)
- for (int j = 0; j < OD_FILT_HBORDER; j++)
- s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
+ for (int j = 0; j < CDEF_HBORDER; j++)
+ s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
}
if (boundary & 2) { // Right
for (int i = 0; i < ysize; i++)
- for (int j = OD_FILT_HBORDER + size; j < OD_FILT_BSTRIDE;
- j++)
- s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
+ for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
+ s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
}
if (boundary & 4) { // Above
- for (int i = 0; i < OD_FILT_VBORDER; i++)
- for (int j = 0; j < OD_FILT_BSTRIDE; j++)
- s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
+ for (int i = 0; i < CDEF_VBORDER; i++)
+ for (int j = 0; j < CDEF_BSTRIDE; j++)
+ s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
}
if (boundary & 8) { // Below
- for (int i = OD_FILT_VBORDER + size; i < ysize; i++)
- for (int j = 0; j < OD_FILT_BSTRIDE; j++)
- s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
+ for (int i = CDEF_VBORDER + size; i < ysize; i++)
+ for (int j = 0; j < CDEF_BSTRIDE; j++)
+ s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
}
}
for (dir = 0; dir < 8; dir++) {
for (threshold = 0; threshold < 64 << (depth - 8) && !error;
threshold += (1 + 4 * !!boundary) << (depth - 8)) {
- ref_dering(ref_d, size, s + OD_FILT_HBORDER +
- OD_FILT_VBORDER * OD_FILT_BSTRIDE,
+ ref_dering(ref_d, size,
+ s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
threshold, dir, damping);
// If dering and ref_dering are the same, we're just testing
// speed
if (dering != ref_dering)
ASM_REGISTER_STATE_CHECK(dering(
- d, size,
- s + OD_FILT_HBORDER + OD_FILT_VBORDER * OD_FILT_BSTRIDE,
+ d, size, s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
threshold, dir, damping));
if (ref_dering != dering) {
for (pos = 0; pos < sizeof(d) / sizeof(*d) && !error;
@@ -146,9 +142,8 @@
<< std::endl;
}
-void test_dering_speed(int bsize, int iterations,
- od_filter_dering_direction_func dering,
- od_filter_dering_direction_func ref_dering) {
+void test_dering_speed(int bsize, int iterations, cdef_direction_func dering,
+ cdef_direction_func ref_dering) {
aom_usec_timer ref_timer;
aom_usec_timer timer;
@@ -173,7 +168,7 @@
<< "SIMD time: " << elapsed_time << " us" << std::endl;
}
-typedef int (*find_dir_t)(const od_dering_in *img, int stride, int32_t *var,
+typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var,
int coeff_shift);
typedef std::tr1::tuple<find_dir_t, find_dir_t> find_dir_param_t;
@@ -196,9 +191,9 @@
typedef CDEFDeringFindDirTest CDEFDeringFindDirSpeedTest;
-void test_finddir(int (*finddir)(const od_dering_in *img, int stride,
- int32_t *var, int coeff_shift),
- int (*ref_finddir)(const od_dering_in *img, int stride,
+void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var,
+ int coeff_shift),
+ int (*ref_finddir)(const uint16_t *img, int stride,
int32_t *var, int coeff_shift)) {
const int size = 8;
ACMRandom rnd(ACMRandom::DeterministicSeed());
@@ -238,9 +233,9 @@
<< std::endl;
}
-void test_finddir_speed(int (*finddir)(const od_dering_in *img, int stride,
+void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
int32_t *var, int coeff_shift),
- int (*ref_finddir)(const od_dering_in *img, int stride,
+ int (*ref_finddir)(const uint16_t *img, int stride,
int32_t *var, int coeff_shift)) {
aom_usec_timer ref_timer;
aom_usec_timer timer;
@@ -289,99 +284,99 @@
// hard to support, so optimizations for this target are disabled.
#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
- SSE2, CDEFDeringDirTest,
- ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse2,
- &od_filter_dering_direction_4x4_c, 4),
- make_tuple(&od_filter_dering_direction_8x8_sse2,
- &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringDirTest,
+ ::testing::Values(make_tuple(&cdef_direction_4x4_sse2,
+ &cdef_direction_4x4_c, 4),
+ make_tuple(&cdef_direction_8x8_sse2,
+ &cdef_direction_8x8_c,
+ 8)));
INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirTest,
- ::testing::Values(make_tuple(&od_dir_find8_sse2,
- &od_dir_find8_c)));
+ ::testing::Values(make_tuple(&cdef_find_dir_sse2,
+ &cdef_find_dir_c)));
#endif
#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
- SSSE3, CDEFDeringDirTest,
- ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_ssse3,
- &od_filter_dering_direction_4x4_c, 4),
- make_tuple(&od_filter_dering_direction_8x8_ssse3,
- &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringDirTest,
+ ::testing::Values(make_tuple(&cdef_direction_4x4_ssse3,
+ &cdef_direction_4x4_c, 4),
+ make_tuple(&cdef_direction_8x8_ssse3,
+ &cdef_direction_8x8_c,
+ 8)));
INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirTest,
- ::testing::Values(make_tuple(&od_dir_find8_ssse3,
- &od_dir_find8_c)));
+ ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
+ &cdef_find_dir_c)));
#endif
#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, CDEFDeringDirTest,
- ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse4_1,
- &od_filter_dering_direction_4x4_c, 4),
- make_tuple(&od_filter_dering_direction_8x8_sse4_1,
- &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringDirTest,
+ ::testing::Values(make_tuple(&cdef_direction_4x4_sse4_1,
+ &cdef_direction_4x4_c, 4),
+ make_tuple(&cdef_direction_8x8_sse4_1,
+ &cdef_direction_8x8_c,
+ 8)));
INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirTest,
- ::testing::Values(make_tuple(&od_dir_find8_sse4_1,
- &od_dir_find8_c)));
+ ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
+ &cdef_find_dir_c)));
#endif
#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
- NEON, CDEFDeringDirTest,
- ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_neon,
- &od_filter_dering_direction_4x4_c, 4),
- make_tuple(&od_filter_dering_direction_8x8_neon,
- &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringDirTest,
+ ::testing::Values(make_tuple(&cdef_direction_4x4_neon,
+ &cdef_direction_4x4_c, 4),
+ make_tuple(&cdef_direction_8x8_neon,
+ &cdef_direction_8x8_c,
+ 8)));
INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirTest,
- ::testing::Values(make_tuple(&od_dir_find8_neon,
- &od_dir_find8_c)));
+ ::testing::Values(make_tuple(&cdef_find_dir_neon,
+ &cdef_find_dir_c)));
#endif
// Test speed for all supported architectures
#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
- SSE2, CDEFDeringSpeedTest,
- ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse2,
- &od_filter_dering_direction_4x4_c, 4),
- make_tuple(&od_filter_dering_direction_8x8_sse2,
- &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringSpeedTest,
+ ::testing::Values(make_tuple(&cdef_direction_4x4_sse2,
+ &cdef_direction_4x4_c, 4),
+ make_tuple(&cdef_direction_8x8_sse2,
+ &cdef_direction_8x8_c,
+ 8)));
INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirSpeedTest,
- ::testing::Values(make_tuple(&od_dir_find8_sse2,
- &od_dir_find8_c)));
+ ::testing::Values(make_tuple(&cdef_find_dir_sse2,
+ &cdef_find_dir_c)));
#endif
#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
- SSSE3, CDEFDeringSpeedTest,
- ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_ssse3,
- &od_filter_dering_direction_4x4_c, 4),
- make_tuple(&od_filter_dering_direction_8x8_ssse3,
- &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringSpeedTest,
+ ::testing::Values(make_tuple(&cdef_direction_4x4_ssse3,
+ &cdef_direction_4x4_c, 4),
+ make_tuple(&cdef_direction_8x8_ssse3,
+ &cdef_direction_8x8_c,
+ 8)));
INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirSpeedTest,
- ::testing::Values(make_tuple(&od_dir_find8_ssse3,
- &od_dir_find8_c)));
+ ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
+ &cdef_find_dir_c)));
#endif
#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, CDEFDeringSpeedTest,
- ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse4_1,
- &od_filter_dering_direction_4x4_c, 4),
- make_tuple(&od_filter_dering_direction_8x8_sse4_1,
- &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringSpeedTest,
+ ::testing::Values(make_tuple(&cdef_direction_4x4_sse4_1,
+ &cdef_direction_4x4_c, 4),
+ make_tuple(&cdef_direction_8x8_sse4_1,
+ &cdef_direction_8x8_c,
+ 8)));
INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirSpeedTest,
- ::testing::Values(make_tuple(&od_dir_find8_sse4_1,
- &od_dir_find8_c)));
+ ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
+ &cdef_find_dir_c)));
#endif
#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
- NEON, CDEFDeringSpeedTest,
- ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_neon,
- &od_filter_dering_direction_4x4_c, 4),
- make_tuple(&od_filter_dering_direction_8x8_neon,
- &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringSpeedTest,
+ ::testing::Values(make_tuple(&cdef_direction_4x4_neon,
+ &cdef_direction_4x4_c, 4),
+ make_tuple(&cdef_direction_8x8_neon,
+ &cdef_direction_8x8_c,
+ 8)));
INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirSpeedTest,
- ::testing::Values(make_tuple(&od_dir_find8_neon,
- &od_dir_find8_c)));
+ ::testing::Values(make_tuple(&cdef_find_dir_neon,
+ &cdef_find_dir_c)));
#endif
#endif // defined(_WIN64) || !defined(_MSC_VER)