Merge "Optimization of 8bit block error for high bitdepth"
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 6619b00..ad02c95 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -170,8 +170,12 @@
static void update_mv_probs(vpx_prob *p, int n, vpx_reader *r) {
int i;
for (i = 0; i < n; ++i)
+#if CONFIG_MISC_FIXES
+ vp10_diff_update_prob(r, &p[i]);
+#else
if (vpx_read(r, MV_UPDATE_PROB))
p[i] = (vpx_read_literal(r, 7) << 1) | 1;
+#endif
}
static void read_mv_probs(nmv_context *ctx, int allow_hp, vpx_reader *r) {
diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c
index 23851af..03a81f5 100644
--- a/vp10/decoder/decoder.c
+++ b/vp10/decoder/decoder.c
@@ -459,6 +459,9 @@
// an invalid bitstream and need to return an error.
uint8_t marker;
+#if CONFIG_MISC_FIXES
+ size_t frame_sz_sum = 0;
+#endif
assert(data_sz);
marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1);
@@ -467,7 +470,7 @@
if ((marker & 0xe0) == 0xc0) {
const uint32_t frames = (marker & 0x7) + 1;
const uint32_t mag = ((marker >> 3) & 0x3) + 1;
- const size_t index_sz = 2 + mag * frames;
+ const size_t index_sz = 2 + mag * (frames - CONFIG_MISC_FIXES);
// This chunk is marked as having a superframe index but doesn't have
// enough data for it, thus it's an invalid superframe index.
@@ -498,13 +501,19 @@
x = clear_buffer;
}
- for (i = 0; i < frames; ++i) {
+ for (i = 0; i < frames - CONFIG_MISC_FIXES; ++i) {
uint32_t this_sz = 0;
for (j = 0; j < mag; ++j)
this_sz |= (*x++) << (j * 8);
sizes[i] = this_sz;
+#if CONFIG_MISC_FIXES
+ frame_sz_sum += this_sz;
+#endif
}
+#if CONFIG_MISC_FIXES
+ sizes[i] = data_sz - index_sz - frame_sz_sum;
+#endif
*count = frames;
}
}
diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c
index 2902ece..d39e3dc 100644
--- a/vp10/decoder/detokenize.c
+++ b/vp10/decoder/detokenize.c
@@ -163,26 +163,33 @@
case CATEGORY5_TOKEN:
val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, r);
break;
- case CATEGORY6_TOKEN:
+ case CATEGORY6_TOKEN: {
+#if CONFIG_MISC_FIXES
+ const int skip_bits = TX_SIZES - 1 - tx_size;
+#else
+ const int skip_bits = 0;
+#endif
+ const uint8_t *cat6p = cat6_prob + skip_bits;
#if CONFIG_VP9_HIGHBITDEPTH
switch (xd->bd) {
case VPX_BITS_8:
- val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, r);
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 14 - skip_bits, r);
break;
case VPX_BITS_10:
- val = CAT6_MIN_VAL + read_coeff(cat6_prob, 16, r);
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 16 - skip_bits, r);
break;
case VPX_BITS_12:
- val = CAT6_MIN_VAL + read_coeff(cat6_prob, 18, r);
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 18 - skip_bits, r);
break;
default:
assert(0);
return -1;
}
#else
- val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, r);
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 14 - skip_bits, r);
#endif
break;
+ }
}
}
v = (val * dqv) >> dq_shift;
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 940ae88..1661fbd 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -122,8 +122,11 @@
static void pack_mb_tokens(vpx_writer *w,
TOKENEXTRA **tp, const TOKENEXTRA *const stop,
- vpx_bit_depth_t bit_depth) {
+ vpx_bit_depth_t bit_depth, const TX_SIZE tx) {
TOKENEXTRA *p = *tp;
+#if !CONFIG_MISC_FIXES
+ (void) tx;
+#endif
while (p < stop && p->token != EOSB_TOKEN) {
const int t = p->token;
@@ -171,6 +174,12 @@
if (b->base_val) {
const int e = p->extra, l = b->len;
+#if CONFIG_MISC_FIXES
+ int skip_bits =
+ (b->base_val == CAT6_MIN_VAL) ? TX_SIZES - 1 - tx : 0;
+#else
+ int skip_bits = 0;
+#endif
if (l) {
const unsigned char *pb = b->prob;
@@ -180,7 +189,12 @@
do {
const int bb = (v >> --n) & 1;
- vpx_write(w, bb, pb[i >> 1]);
+ if (skip_bits) {
+ skip_bits--;
+ assert(!bb);
+ } else {
+ vpx_write(w, bb, pb[i >> 1]);
+ }
i = b->tree[i + bb];
} while (n);
}
@@ -190,7 +204,7 @@
++p;
}
- *tp = p + (p->token == EOSB_TOKEN);
+ *tp = p;
}
static void write_segment_id(vpx_writer *w, const struct segmentation *seg,
@@ -382,6 +396,7 @@
const VP10_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
MODE_INFO *m;
+ int plane;
xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
m = xd->mi[0];
@@ -398,8 +413,16 @@
pack_inter_mode_mvs(cpi, m, w);
}
- assert(*tok < tok_end);
- pack_mb_tokens(w, tok, tok_end, cm->bit_depth);
+ if (!m->mbmi.skip) {
+ assert(*tok < tok_end);
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane])
+ : m->mbmi.tx_size;
+ pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+ assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
+ (*tok)++;
+ }
+ }
}
static void write_partition(const VP10_COMMON *const cm,
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 019e5b1..ce1530c 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -1356,9 +1356,6 @@
if (output_enabled) {
update_stats(&cpi->common, td);
-
- (*tp)->token = EOSB_TOKEN;
- (*tp)++;
}
}
diff --git a/vp10/encoder/encodemv.c b/vp10/encoder/encodemv.c
index ca2de1f..0736c65 100644
--- a/vp10/encoder/encodemv.c
+++ b/vp10/encoder/encodemv.c
@@ -15,6 +15,7 @@
#include "vp10/encoder/cost.h"
#include "vp10/encoder/encodemv.h"
+#include "vp10/encoder/subexp.h"
#include "vpx_dsp/vpx_dsp_common.h"
@@ -134,8 +135,12 @@
}
}
-static int update_mv(vpx_writer *w, const unsigned int ct[2], vpx_prob *cur_p,
- vpx_prob upd_p) {
+static void update_mv(vpx_writer *w, const unsigned int ct[2], vpx_prob *cur_p,
+ vpx_prob upd_p) {
+#if CONFIG_MISC_FIXES
+ (void) upd_p;
+ vp10_cond_prob_diff_update(w, cur_p, ct);
+#else
const vpx_prob new_p = get_binary_prob(ct[0], ct[1]) | 1;
const int update = cost_branch256(ct, *cur_p) + vp10_cost_zero(upd_p) >
cost_branch256(ct, new_p) + vp10_cost_one(upd_p) + 7 * 256;
@@ -144,7 +149,7 @@
*cur_p = new_p;
vpx_write_literal(w, new_p >> 1, 7);
}
- return update;
+#endif
}
static void write_mv_update(const vpx_tree_index *tree,
diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c
index af915fe..cbebd5a 100644
--- a/vp10/encoder/tokenize.c
+++ b/vp10/encoder/tokenize.c
@@ -628,8 +628,16 @@
}
if (!dry_run) {
+ int plane;
+
td->counts->skip[ctx][0] += skip_inc;
- vp10_foreach_transformed_block(xd, bsize, tokenize_b, &arg);
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_b,
+ &arg);
+ (*t)->token = EOSB_TOKEN;
+ (*t)++;
+ }
} else {
vp10_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg);
}
diff --git a/vp10/vp10_cx_iface.c b/vp10/vp10_cx_iface.c
index 304f74e..409ed1c 100644
--- a/vp10/vp10_cx_iface.c
+++ b/vp10/vp10_cx_iface.c
@@ -91,7 +91,9 @@
size_t pending_cx_data_sz;
int pending_frame_count;
size_t pending_frame_sizes[8];
+#if !CONFIG_MISC_FIXES
size_t pending_frame_magnitude;
+#endif
vpx_image_t preview_img;
vpx_enc_frame_flags_t next_frame_flags;
vp8_postproc_cfg_t preview_ppcfg;
@@ -781,24 +783,39 @@
uint8_t marker = 0xc0;
unsigned int mask;
int mag, index_sz;
+#if CONFIG_MISC_FIXES
+ int i;
+ size_t max_frame_sz = 0;
+#endif
assert(ctx->pending_frame_count);
assert(ctx->pending_frame_count <= 8);
// Add the number of frames to the marker byte
marker |= ctx->pending_frame_count - 1;
+#if CONFIG_MISC_FIXES
+ for (i = 0; i < ctx->pending_frame_count - 1; i++) {
+ const size_t frame_sz = (unsigned int) ctx->pending_frame_sizes[i];
+ max_frame_sz = frame_sz > max_frame_sz ? frame_sz : max_frame_sz;
+ }
+#endif
// Choose the magnitude
for (mag = 0, mask = 0xff; mag < 4; mag++) {
+#if CONFIG_MISC_FIXES
+ if (max_frame_sz <= mask)
+ break;
+#else
if (ctx->pending_frame_magnitude < mask)
break;
+#endif
mask <<= 8;
mask |= 0xff;
}
marker |= mag << 3;
// Write the index
- index_sz = 2 + (mag + 1) * ctx->pending_frame_count;
+ index_sz = 2 + (mag + 1) * (ctx->pending_frame_count - CONFIG_MISC_FIXES);
if (ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz) {
uint8_t *x = ctx->pending_cx_data + ctx->pending_cx_data_sz;
int i, j;
@@ -818,7 +835,7 @@
#endif
*x++ = marker;
- for (i = 0; i < ctx->pending_frame_count; i++) {
+ for (i = 0; i < ctx->pending_frame_count - CONFIG_MISC_FIXES; i++) {
unsigned int this_sz = (unsigned int)ctx->pending_frame_sizes[i];
for (j = 0; j <= mag; j++) {
@@ -974,7 +991,9 @@
ctx->pending_cx_data = cx_data;
ctx->pending_cx_data_sz += size;
ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+#if !CONFIG_MISC_FIXES
ctx->pending_frame_magnitude |= size;
+#endif
cx_data += size;
cx_data_sz -= size;
@@ -991,7 +1010,9 @@
ctx->pending_cx_data = NULL;
ctx->pending_cx_data_sz = 0;
ctx->pending_frame_count = 0;
+#if !CONFIG_MISC_FIXES
ctx->pending_frame_magnitude = 0;
+#endif
ctx->output_cx_pkt_cb.output_cx_pkt(
&pkt, ctx->output_cx_pkt_cb.user_priv);
}
@@ -1008,7 +1029,9 @@
if (ctx->pending_cx_data) {
ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+#if !CONFIG_MISC_FIXES
ctx->pending_frame_magnitude |= size;
+#endif
ctx->pending_cx_data_sz += size;
// write the superframe only for the case when
if (!ctx->output_cx_pkt_cb.output_cx_pkt)
@@ -1018,7 +1041,9 @@
ctx->pending_cx_data = NULL;
ctx->pending_cx_data_sz = 0;
ctx->pending_frame_count = 0;
+#if !CONFIG_MISC_FIXES
ctx->pending_frame_magnitude = 0;
+#endif
} else {
pkt.data.frame.buf = cx_data;
pkt.data.frame.sz = size;
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index 4cac388..678e312 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -195,8 +195,8 @@
int mi_row,
int mi_col,
PICK_MODE_CONTEXT *ctx,
- int *motion_magnitude
- ) {
+ int *motion_magnitude,
+ int is_skin) {
int mv_col, mv_row;
int sse_diff = ctx->zeromv_sse - ctx->newmv_sse;
MV_REFERENCE_FRAME frame;
@@ -214,6 +214,9 @@
saved_mbmi = *mbmi;
+ if (is_skin && *motion_magnitude > 16)
+ return COPY_BLOCK;
+
// If the best reference frame uses inter-prediction and there is enough of a
// difference in sum-squared-error, use it.
if (frame != INTRA_FRAME &&
@@ -313,18 +316,37 @@
int mi_row, int mi_col, BLOCK_SIZE bs,
PICK_MODE_CONTEXT *ctx) {
int motion_magnitude = 0;
- VP9_DENOISER_DECISION decision = FILTER_BLOCK;
+ VP9_DENOISER_DECISION decision = COPY_BLOCK;
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);
uint8_t *mc_avg_start = block_start(mc_avg.y_buffer, mc_avg.y_stride,
mi_row, mi_col);
struct buf_2d src = mb->plane[0].src;
+ int is_skin = 0;
+
+ if (bs <= BLOCK_16X16) {
+ // Take center pixel in block to determine is_skin.
+ const int y_width_shift = (4 << b_width_log2_lookup[bs]) >> 1;
+ const int y_height_shift = (4 << b_height_log2_lookup[bs]) >> 1;
+ const int uv_width_shift = y_width_shift >> 1;
+ const int uv_height_shift = y_height_shift >> 1;
+ const int stride = mb->plane[0].src.stride;
+ const int strideuv = mb->plane[1].src.stride;
+ const uint8_t ysource =
+ mb->plane[0].src.buf[y_height_shift * stride + y_width_shift];
+ const uint8_t usource =
+ mb->plane[1].src.buf[uv_height_shift * strideuv + uv_width_shift];
+ const uint8_t vsource =
+ mb->plane[2].src.buf[uv_height_shift * strideuv + uv_width_shift];
+ is_skin = vp9_skin_pixel(ysource, usource, vsource);
+ }
decision = perform_motion_compensation(denoiser, mb, bs,
denoiser->increase_denoising,
mi_row, mi_col, ctx,
- &motion_magnitude);
+ &motion_magnitude,
+ is_skin);
if (decision == FILTER_BLOCK) {
decision = vp9_denoiser_filter(src.buf, src.stride,
diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h
index c66fdf4..ec0b25e 100644
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h
@@ -12,6 +12,7 @@
#define VP9_ENCODER_DENOISER_H_
#include "vp9/encoder/vp9_block.h"
+#include "vp9/encoder/vp9_skin_detection.h"
#include "vpx_scale/yv12config.h"
#ifdef __cplusplus
diff --git a/vp9/encoder/vp9_skin_detection.c b/vp9/encoder/vp9_skin_detection.c
index aaa8ea0..c2763b7 100644
--- a/vp9/encoder/vp9_skin_detection.c
+++ b/vp9/encoder/vp9_skin_detection.c
@@ -98,12 +98,13 @@
uint8_t ysource4 = src_y[(ypos + 1) * src_ystride + (ypos + 1)];
uint8_t usource4 = src_u[(uvpos + 1) * src_uvstride + (uvpos + 1)];
uint8_t vsource4 = src_v[(uvpos + 1) * src_uvstride + (uvpos + 1)];
+ int is_skin = 0;
if (mode_filter == 1) {
ysource = (ysource + ysource2 + ysource3 + ysource4) >> 2;
usource = (usource + usource2 + usource3 + usource4) >> 2;
vsource = (vsource + vsource2 + vsource3 + vsource4) >> 2;
}
- const int is_skin = vp9_skin_pixel(ysource, usource, vsource);
+ is_skin = vp9_skin_pixel(ysource, usource, vsource);
for (i = 0; i < y_bsize; i++) {
for (j = 0; j < y_bsize; j++) {
if (is_skin)
diff --git a/vp9/encoder/vp9_skin_detection.h b/vp9/encoder/vp9_skin_detection.h
index 3d4e737..0a87ef9 100644
--- a/vp9/encoder/vp9_skin_detection.h
+++ b/vp9/encoder/vp9_skin_detection.h
@@ -25,7 +25,8 @@
#ifdef OUTPUT_YUV_SKINMAP
// For viewing skin map on input source.
-void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file);
+void vp9_compute_skin_map(struct VP9_COMP *const cpi, FILE *yuv_skinmap_file);
+extern void vp9_write_yuv_frame_420(YV12_BUFFER_CONFIG *s, FILE *f);
#endif
#ifdef __cplusplus