CWG-E191 for alt-v1-anchor: Filling residual pixels outside the actual frame border
STATS_CHANGED
diff --git a/aom_mem/aom_mem.c b/aom_mem/aom_mem.c
index e977b01..78444c2 100644
--- a/aom_mem/aom_mem.c
+++ b/aom_mem/aom_mem.c
@@ -82,3 +82,12 @@
for (i = 0; i < length; i++) *dest16++ = val;
return dest;
}
+
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+void *aom_memset_int16(void *dest, int16_t val, size_t length) {
+ size_t i;
+ int16_t *dest16 = (int16_t *)dest;
+ for (i = 0; i < length; i++) *dest16++ = val;
+ return dest;
+}
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
diff --git a/aom_mem/aom_mem.h b/aom_mem/aom_mem.h
index bc5d8bc..1b6855f 100644
--- a/aom_mem/aom_mem.h
+++ b/aom_mem/aom_mem.h
@@ -37,6 +37,9 @@
void *aom_calloc(size_t num, size_t size);
void aom_free(void *memblk);
void *aom_memset16(void *dest, int val, size_t length);
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+void *aom_memset_int16(void *dest, int16_t val, size_t length);
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
/*returns an addr aligned to the byte boundary specified by align*/
#define aom_align_addr(addr, align) \
diff --git a/aom_ports/mem.h b/aom_ports/mem.h
index 9e3d424..7ce170e 100644
--- a/aom_ports/mem.h
+++ b/aom_ports/mem.h
@@ -62,6 +62,10 @@
(((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
#define DIVIDE_AND_ROUND(x, y) (((x) + ((y) >> 1)) / (y))
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+#define DIVIDE_AND_ROUND_SIGNED(n, d) \
+ ((((n) < 0) ^ ((d) < 0)) ? (((n) - (d) / 2) / (d)) : (((n) + (d) / 2) / (d)))
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
#define CONVERT_TO_SHORTPTR(x) ((uint16_t *)(((uintptr_t)(x)) << 1))
#define CONVERT_TO_BYTEPTR(x) ((uint8_t *)(((uintptr_t)(x)) >> 1))
diff --git a/av1/encoder/compound_type.c b/av1/encoder/compound_type.c
index f33094d..ba820ea 100644
--- a/av1/encoder/compound_type.c
+++ b/av1/encoder/compound_type.c
@@ -475,7 +475,9 @@
RD_STATS *rd_stats) {
MACROBLOCKD *const xd = &x->e_mbd;
if (ref_best_rd < 0) return INT64_MAX;
+#if !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
av1_subtract_plane(x, bs, 0);
+#endif // !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
x->rd_model = LOW_TXFM_RD;
const int skip_trellis = (cpi->optimize_seg_arr[xd->mi[0]->segment_id] ==
NO_ESTIMATE_YRD_TRELLIS_OPT);
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 1a4bddf..28587b0 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -34,23 +34,158 @@
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+#include "av1/encoder/rdopt_utils.h"
+
+// Compute the average value of the wxh block.
+static AOM_INLINE int16_t avg_wxh_block_c(int16_t *diff, ptrdiff_t diff_stride,
+ int w, int h) {
+ int32_t sum = 0;
+ for (int row = 0; row < h; ++row) {
+ for (int col = 0; col < w; ++col) {
+ sum += *(diff + row * diff_stride + col);
+ }
+ }
+ return (int16_t)(DIVIDE_AND_ROUND_SIGNED(sum, w * h));
+}
+
+// Compute the row average value of the wxh block.
+static AOM_INLINE void avg_wxh_block_horiz_c(int16_t *diff,
+ ptrdiff_t diff_stride, int w,
+ int h, int16_t *out) {
+ for (int row = 0; row < h; ++row) {
+ int32_t sum = 0;
+ for (int col = 0; col < w; ++col) {
+ sum += *(diff + row * diff_stride + col);
+ }
+ out[row] = (int16_t)DIVIDE_AND_ROUND_SIGNED(sum, w);
+ }
+}
+
+// Compute the column average value of the wxh block.
+static AOM_INLINE void avg_wxh_block_vert_c(int16_t *diff,
+ ptrdiff_t diff_stride, int w, int h,
+ int16_t *out) {
+ for (int col = 0; col < w; ++col) {
+ int32_t sum = 0;
+ for (int row = 0; row < h; ++row) {
+ sum += *(diff + row * diff_stride + col);
+ }
+ out[col] = (int16_t)DIVIDE_AND_ROUND_SIGNED(sum, h);
+ }
+}
+
+// Fill the outside-frame part's residues with values derived from the in-frame
+// part's residues.
+static AOM_INLINE void fill_residue_outside_frame(
+ int16_t *diff, ptrdiff_t diff_stride, int tx_cols, int tx_rows,
+ int visible_tx_cols, int visible_tx_rows, TX_TYPE tx_type) {
+ const int complete_block_outside =
+ (visible_tx_cols == 0 || visible_tx_rows == 0);
+
+ if (tx_type <= IDTX) {
+ int16_t avg = 0;
+ if (tx_type != IDTX && !complete_block_outside)
+ avg =
+ avg_wxh_block_c(diff, diff_stride, visible_tx_cols, visible_tx_rows);
+
+ // Fill the remaining parts of the block with the average value
+ const int right_pixels = tx_cols - visible_tx_cols;
+ for (int i = 0; i < tx_rows; ++i) {
+ aom_memset_int16(diff + i * diff_stride + visible_tx_cols, avg,
+ right_pixels);
+ }
+
+ for (int i = visible_tx_rows; i < tx_rows; ++i) {
+ aom_memset_int16(diff + i * diff_stride, avg, visible_tx_cols);
+ }
+ } else if (htx_tab[tx_type] == IDTX_1D) {
+ if (visible_tx_rows < tx_rows) {
+ int16_t out[64] = { 0 };
+ if (!complete_block_outside)
+ avg_wxh_block_vert_c(diff, diff_stride, visible_tx_cols,
+ visible_tx_rows, out);
+
+ for (int j = 0; j < visible_tx_cols; j++) {
+ for (int i = visible_tx_rows; i < tx_rows; ++i) {
+ *(diff + i * diff_stride + j) = out[j];
+ }
+ }
+ }
+
+ const int right_pixels = tx_cols - visible_tx_cols;
+ if (right_pixels) {
+ for (int i = 0; i < tx_rows; ++i) {
+ memset(diff + i * diff_stride + visible_tx_cols, 0,
+ right_pixels * sizeof(*diff));
+ }
+ }
+ } else {
+ assert(vtx_tab[tx_type] == IDTX_1D);
+
+ const int right_pixels = tx_cols - visible_tx_cols;
+ if (right_pixels) {
+ int16_t out[64] = { 0 };
+ if (!complete_block_outside)
+ avg_wxh_block_horiz_c(diff, diff_stride, visible_tx_cols,
+ visible_tx_rows, out);
+
+ for (int i = 0; i < visible_tx_rows; ++i) {
+ aom_memset_int16(diff + i * diff_stride + visible_tx_cols, out[i],
+ right_pixels);
+ }
+ }
+
+ for (int i = visible_tx_rows; i < tx_rows; ++i) {
+ memset(diff + i * diff_stride, 0, tx_cols * sizeof(*diff));
+ }
+ }
+}
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
void av1_subtract_block(const MACROBLOCKD *xd, int rows, int cols,
int16_t *diff, ptrdiff_t diff_stride,
const uint8_t *src8, ptrdiff_t src_stride,
- const uint8_t *pred8, ptrdiff_t pred_stride) {
+ const uint8_t *pred8, ptrdiff_t pred_stride
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ ,
+ int plane, int blk_col, int blk_row, int frame_width,
+ int frame_height, TX_TYPE tx_type
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+) {
assert(rows >= 4 && cols >= 4);
if (is_cur_buf_hbd(xd)) {
aom_highbd_subtract_block(rows, cols, diff, diff_stride, src8, src_stride,
pred8, pred_stride, xd->bd);
+#if !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
return;
+#endif // !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
}
- aom_subtract_block(rows, cols, diff, diff_stride, src8, src_stride, pred8,
- pred_stride);
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ else {
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ aom_subtract_block(rows, cols, diff, diff_stride, src8, src_stride, pred8,
+ pred_stride);
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ }
+
+ int visible_cols, visible_rows;
+ const int is_border_block = get_visible_dimensions(
+ xd, plane, blk_col, blk_row, cols, rows, frame_width, frame_height,
+ &visible_cols, &visible_rows);
+ if (is_border_block)
+ fill_residue_outside_frame(diff, diff_stride, cols, rows, visible_cols,
+ visible_rows, tx_type);
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
}
void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
- int blk_col, int blk_row, TX_SIZE tx_size) {
+ int blk_col, int blk_row, TX_SIZE tx_size
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ ,
+ int frame_width, int frame_height, TX_TYPE tx_type
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+) {
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
@@ -64,10 +199,20 @@
int16_t *src_diff =
&p->src_diff[(blk_row * diff_stride + blk_col) << MI_SIZE_LOG2];
av1_subtract_block(xd, tx1d_height, tx1d_width, src_diff, diff_stride, src,
- src_stride, dst, dst_stride);
+ src_stride, dst, dst_stride
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ ,
+ plane, blk_col, blk_row, frame_width, frame_height, tx_type
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ );
}
-void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE plane_bsize, int plane) {
+void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE plane_bsize, int plane
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ ,
+ int frame_width, int frame_height
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+) {
struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
assert(plane_bsize < BLOCK_SIZES_ALL);
@@ -76,7 +221,12 @@
const MACROBLOCKD *xd = &x->e_mbd;
av1_subtract_block(xd, bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
- pd->dst.buf, pd->dst.stride);
+ pd->dst.buf, pd->dst.stride
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ ,
+ plane, 0, 0, frame_width, frame_height, DCT_DCT
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ );
}
int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
@@ -392,6 +542,10 @@
!mbmi->skip_mode) {
tx_type = av1_get_tx_type(xd, pd->plane_type, blk_row, blk_col, tx_size,
cm->features.reduced_tx_set_used);
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size,
+ cm->width, cm->height, tx_type);
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
TxfmParam txfm_param;
QUANT_PARAM quant_param;
const int use_trellis = is_trellis_used(args->enable_optimize_b, dry_run);
@@ -620,7 +774,12 @@
void av1_encode_sby_pass1(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize) {
encode_block_pass1_args args = { cpi, x };
- av1_subtract_plane(x, bsize, 0);
+ av1_subtract_plane(x, bsize, 0
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ ,
+ cpi->common.width, cpi->common.height
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ );
av1_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
encode_block_pass1, &args);
}
@@ -675,7 +834,9 @@
const int step =
tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
av1_get_entropy_contexts(plane_bsize, pd, ctx.ta[plane], ctx.tl[plane]);
+#if !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
av1_subtract_plane(x, plane_bsize, plane);
+#endif // !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
arg.ta = ctx.ta[plane];
arg.tl = ctx.tl[plane];
const BLOCK_SIZE max_unit_bsize =
@@ -756,12 +917,19 @@
}
#endif
} else {
+#if !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
-
+#endif // !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
const ENTROPY_CONTEXT *a = &args->ta[blk_col];
const ENTROPY_CONTEXT *l = &args->tl[blk_row];
tx_type = av1_get_tx_type(xd, plane_type, blk_row, blk_col, tx_size,
cm->features.reduced_tx_set_used);
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ TX_TYPE primary_tx_type = is_stat_generation_stage(cpi) ? DCT_DCT : tx_type;
+ av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size,
+ cm->width, cm->height, primary_tx_type);
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+
TxfmParam txfm_param;
QUANT_PARAM quant_param;
const int use_trellis =
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index fcd34a3..c8df446 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h
@@ -127,12 +127,28 @@
void av1_subtract_block(const MACROBLOCKD *xd, int rows, int cols,
int16_t *diff, ptrdiff_t diff_stride,
const uint8_t *src8, ptrdiff_t src_stride,
- const uint8_t *pred8, ptrdiff_t pred_stride);
+ const uint8_t *pred8, ptrdiff_t pred_stride
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ ,
+ int plane, int blk_col, int blk_row, int frame_width,
+ int frame_height, TX_TYPE tx_type
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+);
void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
- int blk_col, int blk_row, TX_SIZE tx_size);
+ int blk_col, int blk_row, TX_SIZE tx_size
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ ,
+ int frame_width, int frame_height, TX_TYPE tx_type
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+);
-void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE plane_bsize, int plane);
+void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE plane_bsize, int plane
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ ,
+ int frame_width, int frame_height
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+);
static INLINE void av1_set_txb_context(MACROBLOCK *x, int plane, int block,
TX_SIZE tx_size, ENTROPY_CONTEXT *a,
diff --git a/av1/encoder/model_rd.h b/av1/encoder/model_rd.h
index d4d6e4d..67dd5d6 100644
--- a/av1/encoder/model_rd.h
+++ b/av1/encoder/model_rd.h
@@ -242,10 +242,22 @@
int rate;
int bw, bh;
const struct macroblock_plane *const p = &x->plane[plane];
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ const AV1_COMMON *const cm = &cpi->common;
+ const int block_width = block_size_wide[plane_bsize];
+ const int block_height = block_size_high[plane_bsize];
+ get_visible_dimensions(xd, plane, 0, 0, block_width, block_height,
+ cm->width, cm->height, &bw, &bh);
+ sse = pixel_dist_visible_only(cpi, x, p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride, plane_bsize,
+ block_height, block_width, bh, bw);
+#else
get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL,
&bw, &bh);
sse = calculate_sse(xd, p, pd, bw, bh);
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+
model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate,
&dist);
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 693bdba..00ec8d0 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1697,7 +1697,12 @@
const int bw = block_size_wide[plane_bsize];
const int bh = block_size_high[plane_bsize];
- av1_subtract_plane(x, plane_bsize, plane);
+ av1_subtract_plane(x, plane_bsize, plane
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ ,
+ cm->width, cm->height
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ );
int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh) << 4;
total_sse += sse;
}
@@ -3533,7 +3538,12 @@
if (mbmi->motion_mode == OBMC_CAUSAL)
av1_build_obmc_inter_predictors_sb(cm, xd);
- av1_subtract_plane(x, bsize, 0);
+ av1_subtract_plane(x, bsize, 0
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ ,
+ cm->width, cm->height
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ );
if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
!xd->lossless[mbmi->segment_id]) {
av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
diff --git a/av1/encoder/rdopt_utils.h b/av1/encoder/rdopt_utils.h
index b61f7f5..a6b3815 100644
--- a/av1/encoder/rdopt_utils.h
+++ b/av1/encoder/rdopt_utils.h
@@ -322,6 +322,74 @@
if (width) *width = txb_width;
}
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+static AOM_INLINE int get_visible_dimensions(const MACROBLOCKD *xd, int plane,
+ int blk_col, int blk_row, int cols,
+ int rows, int frame_width,
+ int frame_height,
+ int *visible_cols,
+ int *visible_rows) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int ss_x = pd->subsampling_x;
+ const int ss_y = pd->subsampling_y;
+ const int luma_bw = xd->plane[0].width;
+ const int luma_bh = xd->plane[0].height;
+
+ const int col_start = (luma_bw == 4) && ss_x ? xd->mi_col - 1 : xd->mi_col;
+ const int row_start = (luma_bh == 4) && ss_y ? xd->mi_row - 1 : xd->mi_row;
+ const int x = (col_start << MI_SIZE_LOG2) >> ss_x;
+ const int y = (row_start << MI_SIZE_LOG2) >> ss_y;
+
+ const int mi_x = x + (blk_col << MI_SIZE_LOG2);
+ const int mi_y = y + (blk_row << MI_SIZE_LOG2);
+ const int plane_frame_width = frame_width >> ss_x;
+ const int plane_frame_height = frame_height >> ss_y;
+ int valid_cols, valid_rows;
+
+ if (mi_x + cols <= plane_frame_width) {
+ valid_cols = cols;
+ } else {
+ valid_cols = clamp(plane_frame_width - mi_x, 0, cols);
+ }
+
+ if (mi_y + rows <= plane_frame_height) {
+ valid_rows = rows;
+ } else {
+ valid_rows = clamp(plane_frame_height - mi_y, 0, rows);
+ }
+
+ if (visible_cols != NULL && visible_rows != NULL) {
+ *visible_cols = valid_cols;
+ *visible_rows = valid_rows;
+ }
+ return (valid_cols < cols || valid_rows < rows);
+}
+
+static AOM_INLINE unsigned pixel_dist_visible_only(
+ const AV1_COMP *const cpi, const MACROBLOCK *x, const uint8_t *src,
+ const int src_stride, const uint8_t *dst, const int dst_stride,
+ const BLOCK_SIZE tx_bsize, int txb_rows, int txb_cols, int visible_rows,
+ int visible_cols) {
+ unsigned sse;
+
+ if (txb_rows == visible_rows && txb_cols == visible_cols) {
+ cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
+ return sse;
+ }
+
+ const MACROBLOCKD *xd = &x->e_mbd;
+ if (is_cur_buf_hbd(xd)) {
+ uint64_t sse64 = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
+ visible_cols, visible_rows);
+ return (unsigned int)ROUND_POWER_OF_TWO(sse64, (xd->bd - 8) * 2);
+ }
+
+ sse = aom_sse_odd_size(src, src_stride, dst, dst_stride, visible_cols,
+ visible_rows);
+ return sse;
+}
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+
static AOM_INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) {
int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * MI_SIZE_LOG2);
return num_blk;
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index fd9c242..63ba81b 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -79,6 +79,22 @@
av1_fwd_txfm(src_diff, coeff, bw, &txfm_param);
}
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+static AOM_INLINE void tpl_subtract_block(
+ const MACROBLOCKD *xd, int rows, int cols, int16_t *diff,
+ ptrdiff_t diff_stride, const uint8_t *src8, ptrdiff_t src_stride,
+ const uint8_t *pred8, ptrdiff_t pred_stride) {
+ assert(rows >= 4 && cols >= 4);
+ if (is_cur_buf_hbd(xd)) {
+ aom_highbd_subtract_block(rows, cols, diff, diff_stride, src8, src_stride,
+ pred8, pred_stride, xd->bd);
+ return;
+ }
+ aom_subtract_block(rows, cols, diff, diff_stride, src8, src_stride, pred8,
+ pred_stride);
+}
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+
static AOM_INLINE int64_t tpl_get_satd_cost(const MACROBLOCK *x,
int16_t *src_diff, int diff_stride,
const uint8_t *src, int src_stride,
@@ -88,8 +104,13 @@
const MACROBLOCKD *xd = &x->e_mbd;
const int pix_num = bw * bh;
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ tpl_subtract_block(xd, bh, bw, src_diff, diff_stride, src, src_stride, dst,
+ dst_stride);
+#else
av1_subtract_block(xd, bh, bw, src_diff, diff_stride, src, src_stride, dst,
dst_stride);
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
tpl_fwd_txfm(src_diff, bw, coeff, tx_size, xd->bd, is_cur_buf_hbd(xd));
return aom_satd(coeff, pix_num);
}
@@ -116,8 +137,14 @@
int *rate_cost, int64_t *recon_error, int64_t *sse) {
const MACROBLOCKD *xd = &x->e_mbd;
uint16_t eob;
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ tpl_subtract_block(xd, bh, bw, src_diff, diff_stride, src, src_stride, dst,
+ dst_stride);
+#else
av1_subtract_block(xd, bh, bw, src_diff, diff_stride, src, src_stride, dst,
dst_stride);
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+
tpl_fwd_txfm(src_diff, diff_stride, coeff, tx_size, xd->bd,
is_cur_buf_hbd(xd));
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index 97ed87a..4de0ce6 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -376,15 +376,25 @@
// Compute the pixel domain distortion from diff on all visible 4x4s in the
// transform block.
-static INLINE int64_t pixel_diff_dist(const MACROBLOCK *x, int plane,
- int blk_row, int blk_col,
- const BLOCK_SIZE plane_bsize,
- const BLOCK_SIZE tx_bsize,
- unsigned int *block_mse_q8) {
+static INLINE int64_t pixel_diff_dist(
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ const AV1_COMMON *cm,
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ const MACROBLOCK *x, int plane, int blk_row, int blk_col,
+ const BLOCK_SIZE plane_bsize, const BLOCK_SIZE tx_bsize,
+ unsigned int *block_mse_q8) {
int visible_rows, visible_cols;
const MACROBLOCKD *xd = &x->e_mbd;
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ const int txb_cols = block_size_wide[tx_bsize];
+ const int txb_rows = block_size_high[tx_bsize];
+
+ get_visible_dimensions(xd, plane, blk_col, blk_row, txb_cols, txb_rows,
+ cm->width, cm->height, &visible_cols, &visible_rows);
+#else
get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
NULL, &visible_cols, &visible_rows);
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
const int diff_stride = block_size_wide[plane_bsize];
const int16_t *diff = x->plane[plane].src_diff;
@@ -396,7 +406,11 @@
*block_mse_q8 =
(unsigned int)((256 * sse) / (visible_cols * visible_rows));
else
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ *block_mse_q8 = 0;
+#else
*block_mse_q8 = UINT_MAX;
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
}
return sse;
}
@@ -451,7 +465,11 @@
const int16_t dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd);
#endif // CONFIG_EXTQUANT
- *dist = pixel_diff_dist(x, 0, 0, 0, bsize, bsize, NULL);
+ *dist = pixel_diff_dist(
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ cm,
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ x, 0, 0, 0, bsize, bsize, NULL);
const int64_t mse = *dist / bw / bh;
// Normalized quantizer takes the transform upscaling factor (8 for tx size
@@ -1181,6 +1199,11 @@
cpi->oxcf.q_cfg.quant_b_adapt, &quant_param_intra);
av1_setup_qmatrix(&cm->quant_params, xd, plane, tx_size, best_tx_type,
&quant_param_intra);
+
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size,
+ cm->width, cm->height, best_tx_type);
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
av1_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
&txfm_param_intra, &quant_param_intra);
if (quant_param_intra.use_optimize_b) {
@@ -1203,6 +1226,7 @@
}
}
+#if !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
static unsigned pixel_dist_visible_only(
const AV1_COMP *const cpi, const MACROBLOCK *x, const uint8_t *src,
const int src_stride, const uint8_t *dst, const int dst_stride,
@@ -1226,6 +1250,7 @@
visible_rows);
return sse;
}
+#endif // !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
// Compute the pixel domain distortion from src and dst on all visible 4x4s in
// the
@@ -1234,15 +1259,26 @@
int plane, const uint8_t *src, const int src_stride,
const uint8_t *dst, const int dst_stride,
int blk_row, int blk_col,
+#if !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
const BLOCK_SIZE plane_bsize,
+#endif // !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
const BLOCK_SIZE tx_bsize) {
int txb_rows, txb_cols, visible_rows, visible_cols;
const MACROBLOCKD *xd = &x->e_mbd;
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ const AV1_COMMON *const cm = &cpi->common;
+ txb_cols = block_size_wide[tx_bsize];
+ txb_rows = block_size_high[tx_bsize];
+
+ get_visible_dimensions(xd, plane, blk_col, blk_row, txb_cols, txb_rows,
+ cm->width, cm->height, &visible_cols, &visible_rows);
+#else
get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
&txb_cols, &txb_rows, &visible_cols, &visible_rows);
assert(visible_rows > 0);
assert(visible_cols > 0);
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
unsigned sse = pixel_dist_visible_only(cpi, x, src, src_stride, dst,
dst_stride, tx_bsize, txb_rows,
@@ -1252,7 +1288,10 @@
}
static INLINE int64_t dist_block_px_domain(const AV1_COMP *cpi, MACROBLOCK *x,
- int plane, BLOCK_SIZE plane_bsize,
+ int plane,
+#if !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ BLOCK_SIZE plane_bsize,
+#endif // !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
int block, int blk_row, int blk_col,
TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
@@ -1293,7 +1332,11 @@
cpi->common.features.reduced_tx_set_used);
return 16 * pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE,
- blk_row, blk_col, plane_bsize, tx_bsize);
+ blk_row, blk_col,
+#if !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ plane_bsize,
+#endif // !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ tx_bsize);
}
static uint32_t get_intra_txb_hash(MACROBLOCK *x, int plane, int blk_row,
@@ -2294,6 +2337,12 @@
const int use_intra_txb_hash =
cpi->sf.tx_sf.use_intra_txb_hash && frame_is_intra_only(cm) &&
!is_inter && plane == 0 && tx_size_wide[tx_size] == tx_size_high[tx_size];
+
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size, cm->width,
+ cm->height, best_tx_type);
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+
if (use_intra_txb_hash) {
const int mi_row = xd->mi_row;
const int mi_col = xd->mi_col;
@@ -2353,8 +2402,12 @@
&per_px_mean, &dc_only_blk);
if (best_rd_stats->skip_txfm == 1) return;
} else {
- block_sse = pixel_diff_dist(x, plane, blk_row, blk_col, plane_bsize,
- txsize_to_bsize[tx_size], &block_mse_q8);
+ block_sse = pixel_diff_dist(
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ cm,
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ x, plane, blk_row, blk_col, plane_bsize, txsize_to_bsize[tx_size],
+ &block_mse_q8);
assert(block_mse_q8 != UINT_MAX);
}
@@ -2420,6 +2473,11 @@
: AV1_XFORM_QUANT_FP,
cpi->oxcf.q_cfg.quant_b_adapt, &quant_param);
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ const int is_border_block = get_visible_dimensions(
+ xd, plane, blk_col, blk_row, txw, txh, cm->width, cm->height, NULL, NULL);
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+
// Iterate through all transform type candidates.
for (int idx = 0; idx < TX_TYPES; ++idx) {
const TX_TYPE tx_type = (TX_TYPE)txk_map[idx];
@@ -2433,6 +2491,12 @@
RD_STATS this_rd_stats;
av1_invalid_rd_stats(&this_rd_stats);
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ if (is_border_block)
+ av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size,
+ cm->width, cm->height, tx_type);
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+
if (!dc_only_blk)
av1_xform(x, plane, block, blk_row, blk_col, plane_bsize, &txfm_param);
else
@@ -2464,8 +2528,12 @@
this_rd_stats.dist = this_rd_stats.sse = block_sse;
} else if (dc_only_blk) {
this_rd_stats.sse = block_sse;
- this_rd_stats.dist = dist_block_px_domain(
- cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size);
+ this_rd_stats.dist =
+ dist_block_px_domain(cpi, x, plane,
+#if !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ plane_bsize,
+#endif // !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ block, blk_row, blk_col, tx_size);
} else if (use_transform_domain_distortion) {
dist_block_tx_domain(x, plane, block, tx_size, &this_rd_stats.dist,
&this_rd_stats.sse);
@@ -2491,8 +2559,12 @@
if (tx_size != TX_64X64 || !is_high_energy ||
(sse_diff * 2) < this_rd_stats.sse) {
const int64_t tx_domain_dist = this_rd_stats.dist;
- this_rd_stats.dist = dist_block_px_domain(
- cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size);
+ this_rd_stats.dist =
+ dist_block_px_domain(cpi, x, plane,
+#if !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ plane_bsize,
+#endif // !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ block, blk_row, blk_col, tx_size);
// For high energy blocks, occasionally, the pixel domain distortion
// can be artificially low due to clamping at reconstruction stage
// even when inverse transform output is hugely different from the
@@ -2593,8 +2665,12 @@
p->dqcoeff = best_dqcoeff;
if (calc_pixel_domain_distortion_final && best_eob) {
- best_rd_stats->dist = dist_block_px_domain(
- cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size);
+ best_rd_stats->dist =
+ dist_block_px_domain(cpi, x, plane,
+#if !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ plane_bsize,
+#endif // !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ block, blk_row, blk_col, tx_size);
best_rd_stats->sse = block_sse;
}
@@ -3041,7 +3117,9 @@
if (!is_inter) {
av1_predict_intra_block_facade(cm, xd, plane, blk_col, blk_row, tx_size);
+#if !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
+#endif // !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
}
TXB_CTX txb_ctx;
@@ -3629,10 +3707,12 @@
const BLOCK_SIZE plane_bsize =
get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
+#if !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
if (is_inter) {
for (int plane = 1; plane < MAX_MB_PLANE; ++plane)
av1_subtract_plane(x, plane_bsize, plane);
}
+#endif // !CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
const int skip_trellis = 0;
const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
@@ -3744,7 +3824,12 @@
rd_stats->rate = mode_rate;
// cost and distortion
- av1_subtract_plane(x, bsize, 0);
+ av1_subtract_plane(x, bsize, 0
+#if CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ ,
+ cm->width, cm->height
+#endif // CONFIG_E191_PART2_OFS_PRED_RES_HANDLE
+ );
if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
!xd->lossless[mbmi->segment_id]) {
av1_pick_recursive_tx_size_type_yrd(cpi, x, rd_stats_y, bsize, rd_thresh);
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 9466391..f0c6f51 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -151,6 +151,9 @@
# This is an encode-only change.
set_aom_config_var(CONFIG_SCC_DETERMINATION 1
"Enable the screen content tools determination improvement.")
+# This is an encode-only change.
+set_aom_config_var(CONFIG_E191_PART2_OFS_PRED_RES_HANDLE 1
+ "Enable outside frame boundary block handling")
#
# Variables in this section control optional features of the build system.
#