Combine vert and horz loop-filtering at 64x64 block level
The vertical edge loop filtering of a 64x64 block is followed by
horizontal edge loop filtering of the previous 64x64 block for
better cache performance.
Change-Id: Ib237358f738187d84f521050c6eab03379fd5e6b
diff --git a/av1/common/av1_loopfilter.c b/av1/common/av1_loopfilter.c
index acf5a37..751e9b7 100644
--- a/av1/common/av1_loopfilter.c
+++ b/av1/common/av1_loopfilter.c
@@ -577,6 +577,8 @@
struct loopfilter *lf = &cm->lf;
int lvl;
+ lf->combine_vert_horz_lf = 1;
+
// init limits for given sharpness
update_sharpness(lfi, lf->sharpness_level);
@@ -1904,47 +1906,109 @@
#if LOOP_FILTER_BITMASK
enum lf_path path = get_loop_filter_path(plane, pd);
- // filter all vertical edges in every super block
- for (mi_row = start; mi_row < stop; mi_row += MIN_MIB_SIZE) {
- for (mi_col = col_start; mi_col < col_end; mi_col += MIN_MIB_SIZE) {
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
- mi_col, plane, plane + 1);
+ if (cm->lf.combine_vert_horz_lf) {
+ // filter all vertical and horizontal edges in every super block
+ for (mi_row = start; mi_row < stop; mi_row += MIN_MIB_SIZE) {
+ for (mi_col = col_start; mi_col < col_end; mi_col += MIN_MIB_SIZE) {
+ // filter vertical edges
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ mi_col, plane, plane + 1);
- LoopFilterMask *lf_mask = get_loop_filter_mask(cm, mi_row, mi_col);
- av1_setup_bitmask(cm, mi_row, mi_col, plane, pd[plane].subsampling_x,
- pd[plane].subsampling_y, lf_mask);
- loop_filter_block_plane_vert(cm, pd, plane, mi_row, mi_col, path,
- lf_mask);
+ LoopFilterMask *lf_mask = get_loop_filter_mask(cm, mi_row, mi_col);
+ av1_setup_bitmask(cm, mi_row, mi_col, plane, pd[plane].subsampling_x,
+ pd[plane].subsampling_y, lf_mask);
+ loop_filter_block_plane_vert(cm, pd, plane, mi_row, mi_col, path,
+ lf_mask);
+
+ // filter horizontal edges
+ if (mi_col - MIN_MIB_SIZE >= 0) {
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer,
+ mi_row, mi_col - MIN_MIB_SIZE, plane,
+ plane + 1);
+
+ LoopFilterMask *lf_mask =
+ get_loop_filter_mask(cm, mi_row, mi_col - MIN_MIB_SIZE);
+ loop_filter_block_plane_horz(cm, pd, plane, mi_row,
+ mi_col - MIN_MIB_SIZE, path, lf_mask);
+ }
+ }
+ // filter horizontal edges
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ mi_col - MIN_MIB_SIZE, plane, plane + 1);
+
+ LoopFilterMask *lf_mask =
+ get_loop_filter_mask(cm, mi_row, mi_col - MIN_MIB_SIZE);
+ loop_filter_block_plane_horz(cm, pd, plane, mi_row,
+ mi_col - MIN_MIB_SIZE, path, lf_mask);
}
- }
+ } else {
+ // filter all vertical edges in every super block
+ for (mi_row = start; mi_row < stop; mi_row += MIN_MIB_SIZE) {
+ for (mi_col = col_start; mi_col < col_end; mi_col += MIN_MIB_SIZE) {
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ mi_col, plane, plane + 1);
- // filter all horizontal edges in every super block
- for (mi_row = start; mi_row < stop; mi_row += MIN_MIB_SIZE) {
- for (mi_col = col_start; mi_col < col_end; mi_col += MIN_MIB_SIZE) {
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
- mi_col, plane, plane + 1);
+ LoopFilterMask *lf_mask = get_loop_filter_mask(cm, mi_row, mi_col);
+ av1_setup_bitmask(cm, mi_row, mi_col, plane, pd[plane].subsampling_x,
+ pd[plane].subsampling_y, lf_mask);
+ loop_filter_block_plane_vert(cm, pd, plane, mi_row, mi_col, path,
+ lf_mask);
+ }
+ }
- LoopFilterMask *lf_mask = get_loop_filter_mask(cm, mi_row, mi_col);
- loop_filter_block_plane_horz(cm, pd, plane, mi_row, mi_col, path,
- lf_mask);
+ // filter all horizontal edges in every super block
+ for (mi_row = start; mi_row < stop; mi_row += MIN_MIB_SIZE) {
+ for (mi_col = col_start; mi_col < col_end; mi_col += MIN_MIB_SIZE) {
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ mi_col, plane, plane + 1);
+
+ LoopFilterMask *lf_mask = get_loop_filter_mask(cm, mi_row, mi_col);
+ loop_filter_block_plane_horz(cm, pd, plane, mi_row, mi_col, path,
+ lf_mask);
+ }
}
}
#else
- // filter all vertical edges in every 64x64 super block
- for (mi_row = start; mi_row < stop; mi_row += MIN_MIB_SIZE) {
- for (mi_col = col_start; mi_col < col_end; mi_col += MIN_MIB_SIZE) {
+ if (cm->lf.combine_vert_horz_lf) {
+ // filter all vertical and horizontal edges in every 64x64 super block
+ for (mi_row = start; mi_row < stop; mi_row += MIN_MIB_SIZE) {
+ for (mi_col = col_start; mi_col < col_end; mi_col += MIN_MIB_SIZE) {
+ // filter vertical edges
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ mi_col, plane, plane + 1);
+ filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row, mi_col);
+ // filter horizontal edges
+ if (mi_col - MIN_MIB_SIZE >= 0) {
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer,
+ mi_row, mi_col - MIN_MIB_SIZE, plane,
+ plane + 1);
+ filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
+ mi_col - MIN_MIB_SIZE);
+ }
+ }
+ // filter horizontal edges
av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
- mi_col, plane, plane + 1);
- filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row, mi_col);
+ mi_col - MIN_MIB_SIZE, plane, plane + 1);
+ filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
+ mi_col - MIN_MIB_SIZE);
}
- }
+ } else {
+ // filter all vertical edges in every 64x64 super block
+ for (mi_row = start; mi_row < stop; mi_row += MIN_MIB_SIZE) {
+ for (mi_col = col_start; mi_col < col_end; mi_col += MIN_MIB_SIZE) {
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ mi_col, plane, plane + 1);
+ filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row, mi_col);
+ }
+ }
- // filter all horizontal edges in every 64x64 super block
- for (mi_row = start; mi_row < stop; mi_row += MIN_MIB_SIZE) {
- for (mi_col = col_start; mi_col < col_end; mi_col += MIN_MIB_SIZE) {
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
- mi_col, plane, plane + 1);
- filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, mi_col);
+ // filter all horizontal edges in every 64x64 super block
+ for (mi_row = start; mi_row < stop; mi_row += MIN_MIB_SIZE) {
+ for (mi_col = col_start; mi_col < col_end; mi_col += MIN_MIB_SIZE) {
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ mi_col, plane, plane + 1);
+ filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, mi_col);
+ }
}
}
#endif // LOOP_FILTER_BITMASK
diff --git a/av1/common/av1_loopfilter.h b/av1/common/av1_loopfilter.h
index ebf8422..136b27e 100644
--- a/av1/common/av1_loopfilter.h
+++ b/av1/common/av1_loopfilter.h
@@ -131,6 +131,8 @@
// 0 = ZERO_MV, MV
int8_t mode_deltas[MAX_MODE_LF_DELTAS];
+ int combine_vert_horz_lf;
+
#if LOOP_FILTER_BITMASK
LoopFilterMask *lfm;
size_t lfm_num;