Reduce predictor buffer size for tpl motion search
Change-Id: Ic191a0b8ba037b91375d2797671a2dcf7403e36e
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index c9f24ee..b93b5c3 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -159,7 +159,7 @@
src = xd->cur_buf->y_buffer + mb_y_offset;
src_stride = xd->cur_buf->y_stride;
- dst = &predictor[0];
+ dst = predictor;
dst_stride = bw;
av1_predict_intra_block(
@@ -180,10 +180,10 @@
int64_t sse;
if (is_cur_buf_hbd(xd)) {
sse = aom_highbd_sse(xd->cur_buf->y_buffer + mb_y_offset,
- xd->cur_buf->y_stride, &predictor[0], bw, bw, bh);
+ xd->cur_buf->y_stride, predictor, bw, bw, bh);
} else {
sse = aom_sse(xd->cur_buf->y_buffer + mb_y_offset,
- xd->cur_buf->y_stride, &predictor[0], bw, bw, bh);
+ xd->cur_buf->y_stride, predictor, bw, bw, bh);
}
intra_cost = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
}
@@ -240,13 +240,13 @@
mi_col * MI_SIZE, mi_row * MI_SIZE, xd, 0);
if (use_satd) {
if (is_cur_buf_hbd(xd)) {
- aom_highbd_subtract_block(
- bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
- xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
+ aom_highbd_subtract_block(bh, bw, src_diff, bw,
+ xd->cur_buf->y_buffer + mb_y_offset,
+ xd->cur_buf->y_stride, predictor, bw, xd->bd);
} else {
aom_subtract_block(bh, bw, src_diff, bw,
xd->cur_buf->y_buffer + mb_y_offset,
- xd->cur_buf->y_stride, &predictor[0], bw);
+ xd->cur_buf->y_stride, predictor, bw);
}
wht_fwd_txfm(src_diff, bw, coeff, tx_size, is_cur_buf_hbd(xd));
@@ -255,10 +255,10 @@
int64_t sse;
if (is_cur_buf_hbd(xd)) {
sse = aom_highbd_sse(xd->cur_buf->y_buffer + mb_y_offset,
- xd->cur_buf->y_stride, &predictor[0], bw, bw, bh);
+ xd->cur_buf->y_stride, predictor, bw, bw, bh);
} else {
sse = aom_sse(xd->cur_buf->y_buffer + mb_y_offset,
- xd->cur_buf->y_stride, &predictor[0], bw, bw, bh);
+ xd->cur_buf->y_stride, predictor, bw, bw, bh);
}
inter_cost = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
}
@@ -471,9 +471,7 @@
const BLOCK_SIZE bsize = convert_length_to_bsize(MC_FLOW_BSIZE_1D);
av1_tile_init(&xd->tile, cm, 0, 0);
- DECLARE_ALIGNED(32, uint16_t, predictor16[MC_FLOW_NUM_PELS * 3]);
- DECLARE_ALIGNED(32, uint8_t, predictor8[MC_FLOW_NUM_PELS * 3]);
- uint8_t *predictor;
+ DECLARE_ALIGNED(32, uint8_t, predictor8[MC_FLOW_NUM_PELS * 2]);
DECLARE_ALIGNED(32, int16_t, src_diff[MC_FLOW_NUM_PELS]);
DECLARE_ALIGNED(32, tran_low_t, coeff[MC_FLOW_NUM_PELS]);
@@ -488,10 +486,8 @@
xd->cur_buf = this_frame;
- if (is_cur_buf_hbd(xd))
- predictor = CONVERT_TO_BYTEPTR(predictor16);
- else
- predictor = predictor8;
+ uint8_t *predictor =
+ is_cur_buf_hbd(xd) ? CONVERT_TO_BYTEPTR(predictor8) : predictor8;
// TODO(jingning): remove the duplicate frames.
for (idx = 0; idx < INTER_REFS_PER_FRAME; ++idx)
@@ -734,16 +730,11 @@
const int pix_num = bw * bh;
const TX_SIZE tx_size = max_txsize_lookup[bsize];
- DECLARE_ALIGNED(32, uint16_t, predictor16[MC_FLOW_NUM_PELS * 3]);
- DECLARE_ALIGNED(32, uint8_t, predictor8[MC_FLOW_NUM_PELS * 3]);
- uint8_t *predictor;
+ DECLARE_ALIGNED(32, uint8_t, predictor8[MC_FLOW_NUM_PELS * 2]);
DECLARE_ALIGNED(32, int16_t, src_diff[MC_FLOW_NUM_PELS]);
DECLARE_ALIGNED(32, tran_low_t, coeff[MC_FLOW_NUM_PELS]);
-
- if (is_cur_buf_hbd(xd))
- predictor = CONVERT_TO_BYTEPTR(predictor16);
- else
- predictor = predictor8;
+ uint8_t *predictor =
+ is_cur_buf_hbd(xd) ? CONVERT_TO_BYTEPTR(predictor8) : predictor8;
// Initialize advanced prediction parameters as default values
struct scale_factors sf;
@@ -785,7 +776,7 @@
src->y_buffer + mi_row * MI_SIZE * src->y_stride + mi_col * MI_SIZE;
const int src_stride = src->y_stride;
- uint8_t *dst_buf = &predictor[0];
+ uint8_t *dst_buf = predictor;
const int dst_stride = bw;
av1_predict_intra_block(cm, xd, bw, bh, tx_size, mode, 0, 0,
@@ -829,7 +820,7 @@
ref->y_stride, bsize, mi_row, mi_col);
av1_build_inter_predictor(
- ref->y_buffer + mb_y_offset_ref, ref->y_stride, &predictor[0], bw,
+ ref->y_buffer + mb_y_offset_ref, ref->y_stride, predictor, bw,
&x->best_mv.as_mv, &sf, bw, bh, &conv_params, kernel, &warp_types,
mi_col * MI_SIZE, mi_row * MI_SIZE, 0, 0, MV_PRECISION_Q3,
mi_col * MI_SIZE, mi_row * MI_SIZE, xd, 0);
@@ -837,10 +828,10 @@
if (is_cur_buf_hbd(xd)) {
aom_highbd_subtract_block(bh, bw, src_diff, bw,
src->y_buffer + mb_y_offset, src->y_stride,
- &predictor[0], bw, xd->bd);
+ predictor, bw, xd->bd);
} else {
aom_subtract_block(bh, bw, src_diff, bw, src->y_buffer + mb_y_offset,
- src->y_stride, &predictor[0], bw);
+ src->y_stride, predictor, bw);
}
wht_fwd_txfm(src_diff, bw, coeff, tx_size, is_cur_buf_hbd(xd));
inter_cost = aom_satd(coeff, pix_num);
@@ -848,10 +839,10 @@
int64_t sse;
if (is_cur_buf_hbd(xd)) {
sse = aom_highbd_sse(src->y_buffer + mb_y_offset, src->y_stride,
- &predictor[0], bw, bw, bh);
+ predictor, bw, bw, bh);
} else {
- sse = aom_sse(src->y_buffer + mb_y_offset, src->y_stride,
- &predictor[0], bw, bw, bh);
+ sse = aom_sse(src->y_buffer + mb_y_offset, src->y_stride, predictor,
+ bw, bw, bh);
}
inter_cost = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
}