Merge "Minor refactor of decode_block for supertx." into nextgenv2
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c
index d7b63a3..866bda8 100644
--- a/vp10/common/reconinter.c
+++ b/vp10/common/reconinter.c
@@ -333,22 +333,15 @@
return master;
}
-static const uint8_t *get_wedge_mask(int wedge_index,
- int neg,
- BLOCK_SIZE bsize) {
- return wedge_params_lookup[bsize].masks[neg][wedge_index];
-}
-
const uint8_t *vp10_get_soft_mask(int wedge_index,
int wedge_sign,
BLOCK_SIZE sb_type,
int offset_x,
int offset_y) {
- const int bw = 4 * num_4x4_blocks_wide_lookup[sb_type];
const uint8_t *mask =
- get_wedge_mask(wedge_index, wedge_sign, sb_type);
+ get_wedge_mask_inplace(wedge_index, wedge_sign, sb_type);
if (mask)
- mask -= (offset_x + offset_y * bw);
+ mask -= (offset_x + offset_y * MASK_MASTER_STRIDE);
return mask;
}
@@ -469,7 +462,7 @@
vpx_blend_mask6(dst, dst_stride,
src0, src0_stride,
src1, src1_stride,
- mask, 4 * num_4x4_blocks_wide_lookup[sb_type],
+ mask, MASK_MASTER_STRIDE,
h, w, subh, subw);
}
@@ -489,7 +482,7 @@
vpx_highbd_blend_mask6(dst_8, dst_stride,
src0_8, src0_stride,
src1_8, src1_stride,
- mask, 4 * num_4x4_blocks_wide_lookup[sb_type],
+ mask, MASK_MASTER_STRIDE,
h, w, subh, subw, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -506,8 +499,8 @@
// pass in subsampling factors directly.
const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
- const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign,
- sb_type, 0, 0);
+ const uint8_t *mask = vp10_get_contiguous_soft_mask(wedge_index, wedge_sign,
+ sb_type);
vpx_blend_mask6(dst, dst_stride,
src0, src0_stride,
src1, src1_stride,
@@ -526,8 +519,8 @@
// pass in subsampling factors directly.
const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
- const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign,
- sb_type, 0, 0);
+ const uint8_t *mask = vp10_get_contiguous_soft_mask(wedge_index, wedge_sign,
+ sb_type);
vpx_highbd_blend_mask6(dst_8, dst_stride,
src0_8, src0_stride,
src1_8, src1_stride,
@@ -713,66 +706,79 @@
#if CONFIG_DUAL_FILTER
if (mi->mbmi.sb_type < BLOCK_8X8 && plane > 0) {
- int blk_num = 1 << (pd->subsampling_x + pd->subsampling_y);
- int chr_idx;
- int x_base = x;
- int y_base = y;
- int x_step = w >> pd->subsampling_x;
- int y_step = h >> pd->subsampling_y;
+ // block size in log2
+ const int b4_wl = b_width_log2_lookup[mi->mbmi.sb_type];
+ const int b4_hl = b_height_log2_lookup[mi->mbmi.sb_type];
+ const int b8_sl = b_width_log2_lookup[BLOCK_8X8];
- for (chr_idx = 0; chr_idx < blk_num; ++chr_idx) {
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
- struct buf_2d *const pre_buf = &pd->pre[ref];
- struct buf_2d *const dst_buf = &pd->dst;
- uint8_t *dst = dst_buf->buf;
- const MV mv = mi->bmi[chr_idx].as_mv[ref].as_mv;
- const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh,
- pd->subsampling_x,
- pd->subsampling_y);
- uint8_t *pre;
- MV32 scaled_mv;
- int xs, ys, subpel_x, subpel_y;
- const int is_scaled = vp10_is_scaled(sf);
+ // block size
+ const int b4_w = 1 << b4_wl;
+ const int b4_h = 1 << b4_hl;
+ const int b8_s = 1 << b8_sl;
+ int idx, idy;
- x = x_base + (chr_idx & 0x01) * x_step;
- y = y_base + (chr_idx >> 1) * y_step;
+ const int x_base = x;
+ const int y_base = y;
- dst += dst_buf->stride * y + x;
+ // processing unit size
+ const int x_step = w >> (b8_sl - b4_wl);
+ const int y_step = h >> (b8_sl - b4_hl);
- if (is_scaled) {
- pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
- scaled_mv = vp10_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
- xs = sf->x_step_q4;
- ys = sf->y_step_q4;
- } else {
- pre = pre_buf->buf + y * pre_buf->stride + x;
- scaled_mv.row = mv_q4.row;
- scaled_mv.col = mv_q4.col;
- xs = ys = 16;
+ for (idy = 0; idy < b8_s; idy += b4_h) {
+ for (idx = 0; idx < b8_s; idx += b4_w) {
+ const int chr_idx = (idy * 2) + idx;
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+ struct buf_2d *const pre_buf = &pd->pre[ref];
+ struct buf_2d *const dst_buf = &pd->dst;
+ uint8_t *dst = dst_buf->buf;
+ const MV mv = mi->bmi[chr_idx].as_mv[ref].as_mv;
+ const MV mv_q4 = clamp_mv_to_umv_border_sb(
+ xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
+ uint8_t *pre;
+ MV32 scaled_mv;
+ int xs, ys, subpel_x, subpel_y;
+ const int is_scaled = vp10_is_scaled(sf);
+
+ x = x_base + idx * x_step;
+ y = y_base + idy * y_step;
+
+ dst += dst_buf->stride * y + x;
+
+ if (is_scaled) {
+ pre =
+ pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
+ scaled_mv = vp10_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+ xs = sf->x_step_q4;
+ ys = sf->y_step_q4;
+ } else {
+ pre = pre_buf->buf + y * pre_buf->stride + x;
+ scaled_mv.row = mv_q4.row;
+ scaled_mv.col = mv_q4.col;
+ xs = ys = 16;
+ }
+
+ subpel_x = scaled_mv.col & SUBPEL_MASK;
+ subpel_y = scaled_mv.row & SUBPEL_MASK;
+ pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride +
+ (scaled_mv.col >> SUBPEL_BITS);
+
+#if CONFIG_EXT_INTER
+ if (ref && is_interinter_wedge_used(mi->mbmi.sb_type) &&
+ mi->mbmi.use_wedge_interinter)
+ vp10_make_masked_inter_predictor(
+ pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
+ sf, w, h, mi->mbmi.interp_filter, xs, ys,
+#if CONFIG_SUPERTX
+ wedge_offset_x, wedge_offset_y,
+#endif // CONFIG_SUPERTX
+ xd);
+ else
+#endif // CONFIG_EXT_INTER
+ vp10_make_inter_predictor(
+ pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
+ sf, x_step, y_step, ref, mi->mbmi.interp_filter, xs, ys, xd);
}
-
- subpel_x = scaled_mv.col & SUBPEL_MASK;
- subpel_y = scaled_mv.row & SUBPEL_MASK;
- pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride
- + (scaled_mv.col >> SUBPEL_BITS);
-
- #if CONFIG_EXT_INTER
- if (ref && is_interinter_wedge_used(mi->mbmi.sb_type) &&
- mi->mbmi.use_wedge_interinter)
- vp10_make_masked_inter_predictor(
- pre, pre_buf->stride, dst, dst_buf->stride,
- subpel_x, subpel_y, sf, w, h,
- mi->mbmi.interp_filter, xs, ys,
- #if CONFIG_SUPERTX
- wedge_offset_x, wedge_offset_y,
- #endif // CONFIG_SUPERTX
- xd);
- else
- #endif // CONFIG_EXT_INTER
- vp10_make_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
- subpel_x, subpel_y, sf, x_step, y_step, ref,
- mi->mbmi.interp_filter, xs, ys, xd);
}
}
return;
@@ -1887,8 +1893,9 @@
if (use_wedge_interintra) {
if (is_interintra_wedge_used(bsize)) {
- const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign,
- bsize, 0, 0);
+ const uint8_t *mask = vp10_get_contiguous_soft_mask(wedge_index,
+ wedge_sign,
+ bsize);
const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
vpx_blend_mask6(comppred, compstride,
@@ -2026,8 +2033,9 @@
if (use_wedge_interintra) {
if (is_interintra_wedge_used(bsize)) {
- const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign,
- bsize, 0, 0);
+ const uint8_t *mask = vp10_get_contiguous_soft_mask(wedge_index,
+ wedge_sign,
+ bsize);
const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
vpx_highbd_blend_mask6(comppred8, compstride,
diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h
index e84e20e..537d767 100644
--- a/vp10/common/reconinter.h
+++ b/vp10/common/reconinter.h
@@ -589,6 +589,12 @@
void vp10_init_wedge_masks();
+static INLINE const uint8_t *vp10_get_contiguous_soft_mask(int wedge_index,
+ int wedge_sign,
+ BLOCK_SIZE sb_type) {
+ return wedge_params_lookup[sb_type].masks[wedge_sign][wedge_index];
+}
+
const uint8_t *vp10_get_soft_mask(int wedge_index,
int wedge_sign,
BLOCK_SIZE sb_type,
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 9d0eb66..bd9dc55 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -60,8 +60,10 @@
tran_low_t qc;
} vp10_token_state;
-// TODO(jimbankoski): experiment to find optimal RD numbers.
-static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] ={ {9, 7}, {8, 5}, };
+// These numbers are empirically obtained.
+static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
+ {10, 6}, {8, 5},
+};
#define UPDATE_RD_COST()\
{\
@@ -97,17 +99,17 @@
const int eob = p->eobs[block];
const PLANE_TYPE type = pd->plane_type;
const int default_eob = 16 << (tx_size << 1);
- int mul;
const int16_t *dequant_ptr = pd->dequant;
-#if CONFIG_NEW_QUANT
- const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq;
-#endif // CONFIG_NEW_QUANT
const uint8_t *const band_translate = get_band_translate(tx_size);
TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
const scan_order *const so =
get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
const int16_t *const scan = so->scan;
const int16_t *const nb = so->neighbors;
+#if CONFIG_NEW_QUANT
+ const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq;
+#endif // CONFIG_NEW_QUANT
+ int shift = get_tx_scale(xd, tx_type, tx_size);
int next = eob, sz = 0;
const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][type]) >> 1;
const int64_t rddiv = mb->rddiv;
@@ -116,7 +118,6 @@
int16_t t0, t1;
EXTRABIT e0;
int best, band, pt, i, final_eob;
- int shift = get_tx_scale(xd, tx_type, tx_size);
#if CONFIG_VP9_HIGHBITDEPTH
const int *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
#else
@@ -125,7 +126,6 @@
assert((!type && !plane) || (type && plane));
assert(eob <= default_eob);
- mul = 1 << shift;
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
/* Initialize the sentinel node of the trellis. */
@@ -166,7 +166,8 @@
/* And pick the best. */
best = rd_cost1 < rd_cost0;
base_bits = vp10_get_cost(t0, e0, cat6_high_cost);
- dx = mul * (dqcoeff[rc] - coeff[rc]);
+
+ dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
dx >>= xd->bd - 8;
@@ -188,14 +189,13 @@
shortcut = (
(dequant_abscoeff_nuq(
abs(x), dequant_ptr[rc != 0],
- dequant_val[band_translate[i]]) > abs(coeff[rc]) * mul) &&
+ dequant_val[band_translate[i]]) > (abs(coeff[rc]) << shift)) &&
(dequant_abscoeff_nuq(
abs(x) - 1, dequant_ptr[rc != 0],
- dequant_val[band_translate[i]]) < abs(coeff[rc]) * mul));
+ dequant_val[band_translate[i]]) < (abs(coeff[rc]) << shift)));
#else // CONFIG_NEW_QUANT
-
- if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
- (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
+ if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
+ (abs(x) * dequant_ptr[rc != 0] < (abs(coeff[rc]) << shift) +
dequant_ptr[rc != 0]))
shortcut = 1;
else
@@ -205,6 +205,11 @@
if (shortcut) {
sz = -(x < 0);
x -= 2 * sz + 1;
+ } else {
+ tokens[i][1] = tokens[i][0];
+ best_index[i][1] = best_index[i][0];
+ next = i;
+ continue;
}
/* Consider both possible successor states. */
@@ -242,7 +247,7 @@
#if CONFIG_NEW_QUANT
dx = dequant_coeff_nuq(
x, dequant_ptr[rc != 0],
- dequant_val[band_translate[i]]) - coeff[rc] * mul;
+ dequant_val[band_translate[i]]) - (coeff[rc] << shift);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
dx >>= xd->bd - 8;
@@ -320,7 +325,8 @@
if (shift) dqcoeff[rc] = ROUND_POWER_OF_TWO(dqcoeff[rc], shift);
if (x < 0) dqcoeff[rc] = -dqcoeff[rc];
#else
- dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
+ dqcoeff[rc] = (abs(x * dequant_ptr[rc != 0]) >> shift);
+ if (x < 0) dqcoeff[rc] = -dqcoeff[rc];
#endif // CONFIG_NEW_QUANT
next = tokens[i][best].next;
@@ -894,7 +900,6 @@
struct encode_b_args *const args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
- struct optimize_ctx *const ctx = args->ctx;
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -906,8 +911,8 @@
const int bwl = b_width_log2_lookup[plane_bsize];
#endif
dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
- a = &ctx->ta[plane][blk_col];
- l = &ctx->tl[plane][blk_row];
+ a = &args->ta[blk_col];
+ l = &args->tl[blk_row];
// TODO(jingning): per transformed block zero forcing only enabled for
// luma component. will integrate chroma components as well.
@@ -989,7 +994,7 @@
}
#endif
- if (x->optimize) {
+ if (x->optimize && p->eobs[block]) {
int ctx;
#if CONFIG_VAR_TX
switch (tx_size) {
@@ -1149,7 +1154,7 @@
MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx ctx;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- struct encode_b_args arg = {x, &ctx, &mbmi->skip};
+ struct encode_b_args arg = {x, &ctx, &mbmi->skip, NULL, NULL};
int plane;
mbmi->skip = 1;
@@ -1173,6 +1178,9 @@
#endif
vp10_subtract_plane(x, bsize, plane);
+ arg.ta = ctx.ta[plane];
+ arg.tl = ctx.tl[plane];
+
if (x->optimize) {
#if CONFIG_VAR_TX
vp10_get_entropy_contexts(bsize, TX_4X4, pd,
@@ -1205,7 +1213,7 @@
MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx ctx;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- struct encode_b_args arg = {x, &ctx, &mbmi->skip};
+ struct encode_b_args arg = {x, &ctx, &mbmi->skip, NULL, NULL};
int plane;
mbmi->skip = 1;
@@ -1218,6 +1226,8 @@
vp10_subtract_plane(x, bsize, plane);
vp10_get_entropy_contexts(bsize, tx_size, pd,
ctx.ta[plane], ctx.tl[plane]);
+ arg.ta = ctx.ta[plane];
+ arg.tl = ctx.tl[plane];
vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
&arg);
}
@@ -1246,8 +1256,8 @@
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
const int tx1d_size = get_tx1d_size(tx_size);
-
INV_TXFM_PARAM inv_txfm_param;
+ ENTROPY_CONTEXT *a, *l;
dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
@@ -1274,21 +1284,16 @@
tx_size);
#else // CONFIG_NEW_QUANT
vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
- VP10_XFORM_QUANT_B);
+ VP10_XFORM_QUANT_FP);
#endif // CONFIG_NEW_QUANT
-
- if (args->ctx != NULL) {
- struct optimize_ctx *const ctx = args->ctx;
- ENTROPY_CONTEXT *a, *l;
- a = &ctx->ta[plane][blk_col];
- l = &ctx->tl[plane][blk_row];
- if (x->optimize) {
- int ctx;
- ctx = combine_entropy_contexts(*a, *l);
- *a = *l = vp10_optimize_b(x, plane, block, tx_size, ctx) > 0;
- } else {
- *a = *l = p->eobs[block] > 0;
- }
+ a = &args->ta[blk_col];
+ l = &args->tl[blk_row];
+ if (x->optimize && p->eobs[block]) {
+ int ctx;
+ ctx = combine_entropy_contexts(*a, *l);
+ *a = *l = vp10_optimize_b(x, plane, block, tx_size, ctx) > 0;
+ } else {
+ *a = *l = p->eobs[block] > 0;
}
if (*eob) {
@@ -1315,18 +1320,18 @@
void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
int enable_optimize_b) {
const MACROBLOCKD *const xd = &x->e_mbd;
- struct optimize_ctx ctx;
- struct encode_b_args arg = {x, &ctx, &xd->mi[0]->mbmi.skip};
+ ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
+ ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
+
+ struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip, ta, tl};
if (enable_optimize_b && x->optimize) {
const struct macroblockd_plane* const pd = &xd->plane[plane];
const TX_SIZE tx_size = plane ? get_uv_tx_size(&xd->mi[0]->mbmi, pd) :
xd->mi[0]->mbmi.tx_size;
- vp10_get_entropy_contexts(bsize, tx_size, pd,
- ctx.ta[plane], ctx.tl[plane]);
- } else {
- arg.ctx = NULL;
+ vp10_get_entropy_contexts(bsize, tx_size, pd, ta, tl);
}
+
vp10_foreach_transformed_block_in_plane(xd, bsize, plane,
vp10_encode_block_intra, &arg);
}
diff --git a/vp10/encoder/encodemb.h b/vp10/encoder/encodemb.h
index cef6ccc..c241b00 100644
--- a/vp10/encoder/encodemb.h
+++ b/vp10/encoder/encodemb.h
@@ -22,6 +22,8 @@
MACROBLOCK *x;
struct optimize_ctx *ctx;
int8_t *skip;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
};
typedef enum VP10_XFORM_QUANT {
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index b81c561..b000da8 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1213,18 +1213,17 @@
int rate;
int64_t dist;
int64_t sse;
-#if !CONFIG_NEW_QUANT
ENTROPY_CONTEXT coeff_ctx = combine_entropy_contexts(
*(args->t_above + blk_col), *(args->t_left + blk_row));
-#endif
if (args->exit_early)
return;
if (!is_inter_block(mbmi)) {
- struct encode_b_args arg = {x, NULL, &mbmi->skip};
+ struct encode_b_args intra_arg = {x, NULL, &mbmi->skip, args->t_above,
+ args->t_left};
vp10_encode_block_intra(plane, block, blk_row, blk_col,
- plane_bsize, tx_size, &arg);
+ plane_bsize, tx_size, &intra_arg);
if (args->cpi->sf.use_transform_domain_distortion) {
dist_block(args->cpi, x, plane, block, blk_row, blk_col,
@@ -1269,9 +1268,10 @@
#else
vp10_xform_quant(x, plane, block, blk_row, blk_col,
plane_bsize, tx_size, VP10_XFORM_QUANT_FP);
- vp10_optimize_b(x, plane, block, tx_size, coeff_ctx);
#endif // CONFIG_NEW_QUANT
- dist_block(args->cpi, x, plane, block, blk_row, blk_col,
+ if (x->plane[plane].eobs[block])
+ vp10_optimize_b(x, plane, block, tx_size, coeff_ctx);
+ dist_block(args->cpi, x, plane, block, blk_row, blk_col,
tx_size, &dist, &sse);
} else if (x->skip_txfm[plane][block >> (tx_size << 1)] ==
SKIP_TXFM_AC_ONLY) {
@@ -1324,8 +1324,9 @@
#else
vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
VP10_XFORM_QUANT_FP);
- vp10_optimize_b(x, plane, block, tx_size, coeff_ctx);
#endif // CONFIG_NEW_QUANT
+ if (x->plane[plane].eobs[block])
+ vp10_optimize_b(x, plane, block, tx_size, coeff_ctx);
dist_block(args->cpi, x, plane, block, blk_row, blk_col,
tx_size, &dist, &sse);
}
@@ -6512,7 +6513,7 @@
BLOCK_SIZE sb_type = mbmi->sb_type;
const uint8_t *mask;
const int mask_stride = 4 * num_4x4_blocks_wide_lookup[bsize];
- mask = vp10_get_soft_mask(wedge_index, wedge_sign, sb_type, 0, 0);
+ mask = vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
if (which == 0 || which == 2)
do_masked_motion_search(cpi, x, mask, mask_stride, bsize,
@@ -6521,7 +6522,7 @@
if (which == 1 || which == 2) {
// get the negative mask
- mask = vp10_get_soft_mask(wedge_index, !wedge_sign, sb_type, 0, 0);
+ mask = vp10_get_contiguous_soft_mask(wedge_index, !wedge_sign, sb_type);
do_masked_motion_search(cpi, x, mask, mask_stride, bsize,
mi_row, mi_col, &tmp_mv[1], &rate_mv[1],
1, mv_idx[1]);
@@ -6577,6 +6578,13 @@
uint32_t esq[2][4], var;
int64_t tl, br;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ pred0 = CONVERT_TO_BYTEPTR(pred0);
+ pred1 = CONVERT_TO_BYTEPTR(pred1);
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
var = cpi->fn_ptr[f_index].vf(
src, src_stride,
pred0, stride0, &esq[0][0]);
@@ -7577,8 +7585,8 @@
// Refine motion vector.
if (have_newmv_in_inter_mode(this_mode) && best_wedge_index > -1) {
// get negative of mask
- const uint8_t* mask = vp10_get_soft_mask(
- best_wedge_index, 1, bsize, 0, 0);
+ const uint8_t* mask = vp10_get_contiguous_soft_mask(
+ best_wedge_index, 1, bsize);
mbmi->interintra_wedge_index = best_wedge_index;
mbmi->interintra_wedge_sign = 0;
do_masked_motion_search(cpi, x, mask, bw, bsize,
@@ -8307,6 +8315,189 @@
color_map[r * cols + c] = indices[r * cols + c];
}
+#if CONFIG_EXT_INTRA
+static void pick_ext_intra_iframe(VP10_COMP *cpi, MACROBLOCK *x,
+ PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
+ int *rate_uv_intra, int *rate_uv_tokenonly,
+ int64_t *dist_uv, int *skip_uv,
+ PREDICTION_MODE *mode_uv,
+ EXT_INTRA_MODE_INFO *ext_intra_mode_info_uv,
+ PALETTE_MODE_INFO *pmi_uv,
+ int8_t *uv_angle_delta,
+ int palette_ctx, int skip_mask,
+ unsigned int *ref_costs_single,
+ int64_t *best_rd, int64_t *best_intra_rd,
+ PREDICTION_MODE *best_intra_mode,
+ int *best_mode_index, int *best_skip2,
+ int *best_mode_skippable,
+#if CONFIG_SUPERTX
+ int *returnrate_nocoef,
+#endif // CONFIG_SUPERTX
+ int64_t *best_pred_rd,
+ MB_MODE_INFO *best_mbmode, RD_COST *rd_cost) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ int rate2 = 0, rate_y = INT_MAX, skippable = 0, rate_uv, rate_dummy, i;
+ int dc_mode_index;
+ const int * const intra_mode_cost =
+ cpi->mbmode_cost[size_group_lookup[bsize]];
+ int64_t distortion2 = 0, distortion_y = 0, this_rd = *best_rd, distortion_uv;
+ TX_SIZE uv_tx;
+
+ for (i = 0; i < MAX_MODES; ++i)
+ if (vp10_mode_order[i].mode == DC_PRED &&
+ vp10_mode_order[i].ref_frame[0] == INTRA_FRAME)
+ break;
+ dc_mode_index = i;
+ assert(i < MAX_MODES);
+
+ // TODO(huisu): use skip_mask for further speedup.
+ (void)skip_mask;
+ mbmi->mode = DC_PRED;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->ref_frame[1] = NONE;
+ memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
+ if (!rd_pick_ext_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
+ &skippable, bsize,
+ intra_mode_cost[mbmi->mode], &this_rd, 0))
+ return;
+ if (rate_y == INT_MAX)
+ return;
+
+ uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize,
+ xd->plane[1].subsampling_x,
+ xd->plane[1].subsampling_y);
+ if (rate_uv_intra[uv_tx] == INT_MAX) {
+ choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx,
+ &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
+ &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
+ if (cm->allow_screen_content_tools)
+ pmi_uv[uv_tx] = *pmi;
+ ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info;
+ uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
+ }
+
+ rate_uv = rate_uv_tokenonly[uv_tx];
+ distortion_uv = dist_uv[uv_tx];
+ skippable = skippable && skip_uv[uv_tx];
+ mbmi->uv_mode = mode_uv[uv_tx];
+ if (cm->allow_screen_content_tools) {
+ pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
+ memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
+ pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
+ 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
+ }
+ mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1];
+ if (ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1]) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1];
+ }
+
+ rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv +
+ cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
+ if (cpi->common.allow_screen_content_tools && mbmi->mode == DC_PRED)
+ rate2 +=
+ vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
+ [palette_ctx], 0);
+
+ if (!xd->lossless[mbmi->segment_id]) {
+ // super_block_yrd above includes the cost of the tx_size in the
+ // tokenonly rate, but for intra blocks, tx_size is always coded
+ // (prediction granularity), so we account for it in the full rate,
+ // not the tokenonly rate.
+ rate_y -=
+ cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)]
+ [mbmi->tx_size];
+ }
+
+ rate2 += vp10_cost_bit(cm->fc->ext_intra_probs[0],
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0]);
+ rate2 += write_uniform_cost(FILTER_INTRA_MODES,
+ mbmi->ext_intra_mode_info.ext_intra_mode[0]);
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
+ rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS +
+ mbmi->angle_delta[1]);
+ }
+ if (ALLOW_FILTER_INTRA_MODES && mbmi->mode == DC_PRED) {
+ rate2 += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1],
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1])
+ rate2 +=
+ write_uniform_cost(FILTER_INTRA_MODES,
+ mbmi->ext_intra_mode_info.ext_intra_mode[1]);
+ }
+ distortion2 = distortion_y + distortion_uv;
+ vp10_encode_intra_block_plane(x, bsize, 0, 0);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ x->recon_variance =
+ vp10_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst,
+ bsize, xd->bd);
+ } else {
+ x->recon_variance =
+ vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
+ }
+#else
+ x->recon_variance =
+ vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ rate2 += ref_costs_single[INTRA_FRAME];
+
+ if (skippable) {
+ rate2 -= (rate_y + rate_uv);
+ rate_y = 0;
+ rate_uv = 0;
+ rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ } else {
+ rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
+ }
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ rd_variance_adjustment(x, &this_rd, INTRA_FRAME,
+#if CONFIG_OBMC
+ is_inter_block(mbmi),
+#endif // CONFIG_OBMC
+ x->source_variance);
+
+ if (this_rd < *best_intra_rd) {
+ *best_intra_rd = this_rd;
+ *best_intra_mode = mbmi->mode;
+ }
+ for (i = 0; i < REFERENCE_MODES; ++i)
+ best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
+
+ if (this_rd < *best_rd) {
+ *best_mode_index = dc_mode_index;
+ mbmi->mv[0].as_int = 0;
+ rd_cost->rate = rate2;
+#if CONFIG_SUPERTX
+ if (x->skip)
+ *returnrate_nocoef = rate2;
+ else
+ *returnrate_nocoef = rate2 - rate_y - rate_uv;
+ *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd), skippable);
+ *returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd),
+ mbmi->ref_frame[0] != INTRA_FRAME);
+#endif // CONFIG_SUPERTX
+ rd_cost->dist = distortion2;
+ rd_cost->rdcost = this_rd;
+ *best_rd = this_rd;
+ *best_mbmode = *mbmi;
+ *best_skip2 = 0;
+ *best_mode_skippable = skippable;
+ memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
+ sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
+ }
+}
+#endif // CONFIG_EXT_INTRA
+
void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
TileDataEnc *tile_data,
MACROBLOCK *x,
@@ -8373,7 +8564,7 @@
PALETTE_MODE_INFO pmi_uv[TX_SIZES];
#if CONFIG_EXT_INTRA
EXT_INTRA_MODE_INFO ext_intra_mode_info_uv[TX_SIZES];
- int8_t uv_angle_delta[TX_SIZES];
+ int8_t uv_angle_delta[TX_SIZES], dc_skipped = 1;
int is_directional_mode, angle_stats_ready = 0;
int rate_overhead, rate_dummy;
uint8_t directional_mode_skip_mask[INTRA_MODES];
@@ -8936,28 +9127,6 @@
mbmi->angle_delta[0] = 0;
super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
NULL, bsize, best_rd);
- if (rate_y == INT_MAX)
- continue;
- }
-
- // TODO(huisu): ext-intra is turned off in lossless mode for now to
- // avoid a unit test failure
- if (mbmi->mode == DC_PRED && !xd->lossless[mbmi->segment_id] &&
- ALLOW_FILTER_INTRA_MODES) {
- MB_MODE_INFO mbmi_copy = *mbmi;
-
- if (rate_y != INT_MAX) {
- int this_rate = rate_y + intra_mode_cost[mbmi->mode] +
- vp10_cost_bit(cm->fc->ext_intra_probs[0], 0);
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, distortion_y);
- } else {
- this_rd = best_rd;
- }
-
- if (!rd_pick_ext_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
- &skippable, bsize,
- intra_mode_cost[mbmi->mode], &this_rd, 0))
- *mbmi = mbmi_copy;
}
#else
super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
@@ -8966,6 +9135,12 @@
if (rate_y == INT_MAX)
continue;
+
+#if CONFIG_EXT_INTRA
+ if (mbmi->mode == DC_PRED)
+ dc_skipped = 0;
+#endif // CONFIG_EXT_INTRA
+
uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
pd->subsampling_y);
if (rate_uv_intra[uv_tx] == INT_MAX) {
@@ -9033,16 +9208,31 @@
rate2 += vp10_cost_bit(cm->fc->ext_intra_probs[0],
mbmi->ext_intra_mode_info.use_ext_intra_mode[0]);
if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
- EXT_INTRA_MODE ext_intra_mode =
- mbmi->ext_intra_mode_info.ext_intra_mode[0];
- rate2 += write_uniform_cost(FILTER_INTRA_MODES, ext_intra_mode);
+ rate2 +=
+ write_uniform_cost(FILTER_INTRA_MODES,
+ mbmi->ext_intra_mode_info.ext_intra_mode[0]);
}
}
+
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
+ rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS +
+ mbmi->angle_delta[1]);
+ }
+
+ if (ALLOW_FILTER_INTRA_MODES && mbmi->mode == DC_PRED) {
+ rate2 += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1],
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1])
+ rate2 +=
+ write_uniform_cost(FILTER_INTRA_MODES,
+ mbmi->ext_intra_mode_info.ext_intra_mode[1]);
+ }
#endif // CONFIG_EXT_INTRA
if (this_mode != DC_PRED && this_mode != TM_PRED)
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
- vp10_encode_intra_block_plane(x, bsize, 0, 0);
+ vp10_encode_intra_block_plane(x, bsize, 0, 1);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
x->recon_variance =
@@ -9505,9 +9695,11 @@
break;
}
- if (sf->tx_type_search.fast_inter_tx_type_search == 1 &&
- xd->lossless[mbmi->segment_id] == 0 &&
- best_mode_index >= 0) {
+ if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index >= 0 &&
+ ((sf->tx_type_search.fast_inter_tx_type_search == 1 &&
+ is_inter_mode(best_mbmode.mode)) ||
+ (sf->tx_type_search.fast_intra_tx_type_search == 1 &&
+ !is_inter_mode(best_mbmode.mode)))) {
int rate_y = 0, rate_uv = 0;
int64_t dist_y = 0, dist_uv = 0;
int skip_y = 0, skip_uv = 0, skip_blk = 0;
@@ -9706,6 +9898,26 @@
}
PALETTE_EXIT:
+#if CONFIG_EXT_INTRA
+ // TODO(huisu): ext-intra is turned off in lossless mode for now to
+ // avoid a unit test failure
+ if (!xd->lossless[mbmi->segment_id] &&
+ mbmi->palette_mode_info.palette_size[0] == 0 && !dc_skipped &&
+ best_mode_index >= 0 && (best_intra_rd >> 1) < best_rd) {
+ pick_ext_intra_iframe(cpi, x, ctx, bsize, rate_uv_intra,
+ rate_uv_tokenonly, dist_uv, skip_uv,
+ mode_uv, ext_intra_mode_info_uv,
+ pmi_uv, uv_angle_delta, palette_ctx, 0,
+ ref_costs_single, &best_rd, &best_intra_rd,
+ &best_intra_mode, &best_mode_index,
+ &best_skip2, &best_mode_skippable,
+#if CONFIG_SUPERTX
+ returnrate_nocoef,
+#endif // CONFIG_SUPERTX
+ best_pred_rd, &best_mbmode, rd_cost);
+ }
+#endif // CONFIG_EXT_INTRA
+
// The inter modes' rate costs are not calculated precisely in some cases.
// Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
// ZEROMV. Here, checks are added for those cases, and the mode decisions
diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c
index bd0cb81..53b8bd7 100644
--- a/vp10/encoder/speed_features.c
+++ b/vp10/encoder/speed_features.c
@@ -132,6 +132,9 @@
sf->adaptive_rd_thresh = 1;
+ sf->tx_type_search.fast_intra_tx_type_search = 1;
+ sf->tx_type_search.fast_inter_tx_type_search = 1;
+
if (speed >= 1) {
if ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ||
vp10_internal_image_edge(cpi)) {