Merge changes I7b9f40dc,I76e74f2e * changes: vp9: correct context buffer resize check vp9: fail decode if block/frame refs are corrupt
diff --git a/vp8/common/arm/neon/bilinearpredict_neon.c b/vp8/common/arm/neon/bilinearpredict_neon.c index d77f2ba..9824a31 100644 --- a/vp8/common/arm/neon/bilinearpredict_neon.c +++ b/vp8/common/arm/neon/bilinearpredict_neon.c
@@ -10,7 +10,7 @@ #include <arm_neon.h> -static const uint16_t bifilter4_coeff[8][2] = { +static const uint8_t bifilter4_coeff[8][2] = { {128, 0}, {112, 16}, { 96, 32}, @@ -64,8 +64,8 @@ q1u8 = vcombine_u8(d2u8, d3u8); q2u8 = vcombine_u8(d4u8, d5u8); - d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]); + d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); + d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); q4u64 = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8); q5u64 = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8); @@ -155,8 +155,8 @@ q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q5u8 = vld1q_u8(src_ptr); - d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]); + d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); + d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); @@ -245,8 +245,8 @@ q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]); + d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); + d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
diff --git a/vp8/common/arm/variance_arm.c b/vp8/common/arm/variance_arm.c index e3f7083..467a509 100644 --- a/vp8/common/arm/variance_arm.c +++ b/vp8/common/arm/variance_arm.c
@@ -95,7 +95,7 @@ #endif /* HAVE_MEDIA */ -#if HAVE_NEON_ASM +#if HAVE_NEON extern unsigned int vp8_sub_pixel_variance16x16_neon_func (
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index fc8c407..b1b079c 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c
@@ -635,7 +635,6 @@ } ctx->priv = (vpx_codec_priv_t *)priv; - ctx->priv->sz = sizeof(*priv); ctx->priv->init_flags = ctx->init_flags; if (ctx->config.enc)
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c index 6d9ecc0..3ab8ed0 100644 --- a/vp8/vp8_dx_iface.c +++ b/vp8/vp8_dx_iface.c
@@ -84,7 +84,6 @@ (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv)); ctx->priv = (vpx_codec_priv_t *)priv; - ctx->priv->sz = sizeof(*priv); ctx->priv->init_flags = ctx->init_flags; priv->si.sz = sizeof(priv->si);
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index f2a3eef..fa49568 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl
@@ -284,15 +284,6 @@ $vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt; } -add_proto qw/void vp9_blend_mb_inner/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; -specialize qw/vp9_blend_mb_inner/; - -add_proto qw/void vp9_blend_mb_outer/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; -specialize qw/vp9_blend_mb_outer/; - -add_proto qw/void vp9_blend_b/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; -specialize qw/vp9_blend_b/; - # # Sub Pixel Filters # @@ -693,16 +684,16 @@ specialize qw/vp9_sad4x4x4d sse/; add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; -specialize qw/vp9_mse16x16 sse2 avx2/; +specialize qw/vp9_mse16x16 avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; -specialize qw/vp9_mse8x16 sse2/; +specialize qw/vp9_mse8x16/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; -specialize qw/vp9_mse16x8 sse2/; +specialize qw/vp9_mse16x8/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; -specialize qw/vp9_mse8x8 sse2/; +specialize qw/vp9_mse8x8/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *"; specialize qw/vp9_get_mb_ss sse2/;
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 6b89334..a9c03f0 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c
@@ -901,7 +901,7 @@ pbi->mb.corrupted |= tile_data->xd.corrupted; } // Loopfilter one row. - if (cm->lf.filter_level) { + if (cm->lf.filter_level && !pbi->mb.corrupted) { const int lf_start = mi_row - MI_BLOCK_SIZE; LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; @@ -924,7 +924,7 @@ } // Loopfilter remaining rows in the frame. - if (cm->lf.filter_level) { + if (cm->lf.filter_level && !pbi->mb.corrupted) { LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; winterface->sync(&pbi->lf_worker); lf_data->start = lf_data->stop; @@ -1451,9 +1451,11 @@ if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1 && cm->frame_parallel_decoding_mode) { *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); - // If multiple threads are used to decode tiles, then we use those threads - // to do parallel loopfiltering. - vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0); + if (!xd->corrupted) { + // If multiple threads are used to decode tiles, then we use those threads + // to do parallel loopfiltering. + vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0); + } } else { *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); }
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c index 12f6d3a..c4cf5ee 100644 --- a/vp9/encoder/vp9_denoiser.c +++ b/vp9/encoder/vp9_denoiser.c
@@ -78,7 +78,8 @@ int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, - BLOCK_SIZE bs) { + BLOCK_SIZE bs, + int motion_magnitude) { int r, c; const uint8_t *sig_start = sig; const uint8_t *mc_avg_start = mc_avg; @@ -86,6 +87,19 @@ int diff, adj, absdiff, delta; int adj_val[] = {3, 4, 6}; int total_adj = 0; + int shift_inc = 1; + + /* If motion_magnitude is small, making the denoiser more aggressive by + * increasing the adjustment for each level. Add another increment for + * blocks that are labeled for increase denoising. */ + if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) { + if (increase_denoising) { + shift_inc = 2; + } + adj_val[0] += shift_inc; + adj_val[1] += shift_inc; + adj_val[2] += shift_inc; + } // First attempt to apply a strong temporal denoising filter. for (r = 0; r < heights[bs]; ++r) { @@ -192,7 +206,8 @@ int increase_denoising, int mi_row, int mi_col, - PICK_MODE_CONTEXT *ctx + PICK_MODE_CONTEXT *ctx, + int *motion_magnitude ) { int mv_col, mv_row; int sse_diff = ctx->zeromv_sse - ctx->newmv_sse; @@ -217,6 +232,8 @@ mv_col = ctx->best_sse_mv.as_mv.col; mv_row = ctx->best_sse_mv.as_mv.row; + *motion_magnitude = mv_row * mv_row + mv_col * mv_col; + frame = ctx->best_reference_frame; // If the best reference frame uses inter-prediction and there is enough of a @@ -304,6 +321,7 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx) { + int motion_magnitude = 0; VP9_DENOISER_DECISION decision = FILTER_BLOCK; YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME]; YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y; @@ -314,13 +332,14 @@ decision = perform_motion_compensation(denoiser, mb, bs, denoiser->increase_denoising, - mi_row, mi_col, ctx); + mi_row, mi_col, ctx, + &motion_magnitude); if (decision == FILTER_BLOCK) { decision = denoiser_filter(src.buf, src.stride, mc_avg_start, mc_avg.y_stride, avg_start, avg.y_stride, - 0, bs); + 0, bs, motion_magnitude); } if (decision == FILTER_BLOCK) {
diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h index 1c827b6..a913add 100644 --- a/vp9/encoder/vp9_denoiser.h +++ b/vp9/encoder/vp9_denoiser.h
@@ -18,6 +18,8 @@ extern "C" { #endif +#define MOTION_MAGNITUDE_THRESHOLD (8*3) + typedef enum vp9_denoiser_decision { COPY_BLOCK, FILTER_BLOCK
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 26db30c..0f0b7a5 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c
@@ -670,7 +670,6 @@ return VPX_CODEC_MEM_ERROR; ctx->priv = (vpx_codec_priv_t *)priv; - ctx->priv->sz = sizeof(*priv); ctx->priv->init_flags = ctx->init_flags; ctx->priv->enc.total_encoders = 1; @@ -837,18 +836,19 @@ vpx_enc_frame_flags_t flags, unsigned long deadline) { vpx_codec_err_t res = VPX_CODEC_OK; + VP9_COMP *const cpi = ctx->cpi; const vpx_rational_t *const timebase = &ctx->cfg.g_timebase; if (img != NULL) { res = validate_img(ctx, img); // TODO(jzern) the checks related to cpi's validity should be treated as a // failure condition, encoder setup is done fully in init() currently. - if (res == VPX_CODEC_OK && ctx->cpi != NULL && ctx->cx_data == NULL) { + if (res == VPX_CODEC_OK && cpi != NULL && ctx->cx_data == NULL) { // There's no codec control for multiple alt-refs so check the encoder // instance for its status to determine the compressed data size. ctx->cx_data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 * - (ctx->cpi->multi_arf_allowed ? 8 : 2); + (cpi->multi_arf_allowed ? 8 : 2); if (ctx->cx_data_sz < 4096) ctx->cx_data_sz = 4096; ctx->cx_data = (unsigned char *)malloc(ctx->cx_data_sz); @@ -868,7 +868,7 @@ return VPX_CODEC_INVALID_PARAM; } - vp9_apply_encoding_flags(ctx->cpi, flags); + vp9_apply_encoding_flags(cpi, flags); // Handle fixed keyframe intervals if (ctx->cfg.kf_mode == VPX_KF_AUTO && @@ -880,7 +880,7 @@ } // Initialize the encoder instance on the first frame. - if (res == VPX_CODEC_OK && ctx->cpi != NULL) { + if (res == VPX_CODEC_OK && cpi != NULL) { unsigned int lib_flags = 0; YV12_BUFFER_CONFIG sd; int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts); @@ -891,16 +891,15 @@ // Set up internal flags if (ctx->base.init_flags & VPX_CODEC_USE_PSNR) - ((VP9_COMP *)ctx->cpi)->b_calculate_psnr = 1; + cpi->b_calculate_psnr = 1; if (img != NULL) { res = image2yuvconfig(img, &sd); // Store the original flags in to the frame buffer. Will extract the // key frame flag when we actually encode this frame. - if (vp9_receive_raw_frame(ctx->cpi, flags, + if (vp9_receive_raw_frame(cpi, flags, &sd, dst_time_stamp, dst_end_time_stamp)) { - VP9_COMP *cpi = (VP9_COMP *)ctx->cpi; res = update_error_state(ctx, &cpi->common.error); } } @@ -925,11 +924,10 @@ } while (cx_data_sz >= ctx->cx_data_sz / 2 && - -1 != vp9_get_compressed_data(ctx->cpi, &lib_flags, &size, + -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data, &dst_time_stamp, &dst_end_time_stamp, !img)) { if (size) { - VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; vpx_codec_cx_pkt_t pkt; #if CONFIG_SPATIAL_SVC
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index b0fb282..393c66e 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c
@@ -63,7 +63,6 @@ return VPX_CODEC_MEM_ERROR; ctx->priv = (vpx_codec_priv_t *)priv; - ctx->priv->sz = sizeof(*priv); ctx->priv->init_flags = ctx->init_flags; priv->si.sz = sizeof(priv->si);
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h index 02f2079..cbfffd0 100644 --- a/vpx/internal/vpx_codec_internal.h +++ b/vpx/internal/vpx_codec_internal.h
@@ -335,7 +335,6 @@ * and the pointer cast to the proper type. */ struct vpx_codec_priv { - unsigned int sz; const char *err_detail; vpx_codec_flags_t init_flags; struct {