Merge changes I7b9f40dc,I76e74f2e * changes: vp9: correct context buffer resize check vp9: fail decode if block/frame refs are corrupt

diff --git a/vp8/common/arm/neon/bilinearpredict_neon.c b/vp8/common/arm/neon/bilinearpredict_neon.c
index d77f2ba..9824a31 100644
--- a/vp8/common/arm/neon/bilinearpredict_neon.c
+++ b/vp8/common/arm/neon/bilinearpredict_neon.c

@@ -10,7 +10,7 @@
 
 #include <arm_neon.h>
 
-static const uint16_t bifilter4_coeff[8][2] = {
+static const uint8_t bifilter4_coeff[8][2] = {
     {128,   0},
     {112,  16},
     { 96,  32},
@@ -64,8 +64,8 @@
         q1u8 = vcombine_u8(d2u8, d3u8);
         q2u8 = vcombine_u8(d4u8, d5u8);
 
-        d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]);
-        d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]);
+        d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
 
         q4u64  = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8);
         q5u64  = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8);
@@ -155,8 +155,8 @@
         q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
         q5u8 = vld1q_u8(src_ptr);
 
-        d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]);
-        d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]);
+        d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
 
         q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
         q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
@@ -245,8 +245,8 @@
         q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
         q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
 
-        d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]);
-        d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]);
+        d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
 
         q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
         q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);

diff --git a/vp8/common/arm/variance_arm.c b/vp8/common/arm/variance_arm.c
index e3f7083..467a509 100644
--- a/vp8/common/arm/variance_arm.c
+++ b/vp8/common/arm/variance_arm.c

@@ -95,7 +95,7 @@
 #endif /* HAVE_MEDIA */
 
 
-#if HAVE_NEON_ASM
+#if HAVE_NEON
 
 extern unsigned int vp8_sub_pixel_variance16x16_neon_func
 (

diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index fc8c407..b1b079c 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c

@@ -635,7 +635,6 @@
         }
 
         ctx->priv = (vpx_codec_priv_t *)priv;
-        ctx->priv->sz = sizeof(*priv);
         ctx->priv->init_flags = ctx->init_flags;
 
         if (ctx->config.enc)

diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 6d9ecc0..3ab8ed0 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c

@@ -84,7 +84,6 @@
         (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv));
 
     ctx->priv = (vpx_codec_priv_t *)priv;
-    ctx->priv->sz = sizeof(*priv);
     ctx->priv->init_flags = ctx->init_flags;
 
     priv->si.sz = sizeof(priv->si);

diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index f2a3eef..fa49568 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl

@@ -284,15 +284,6 @@
 $vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
 }
 
-add_proto qw/void vp9_blend_mb_inner/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
-specialize qw/vp9_blend_mb_inner/;
-
-add_proto qw/void vp9_blend_mb_outer/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
-specialize qw/vp9_blend_mb_outer/;
-
-add_proto qw/void vp9_blend_b/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
-specialize qw/vp9_blend_b/;
-
 #
 # Sub Pixel Filters
 #
@@ -693,16 +684,16 @@
 specialize qw/vp9_sad4x4x4d sse/;
 
 add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-specialize qw/vp9_mse16x16 sse2 avx2/;
+specialize qw/vp9_mse16x16 avx2/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-specialize qw/vp9_mse8x16 sse2/;
+specialize qw/vp9_mse8x16/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-specialize qw/vp9_mse16x8 sse2/;
+specialize qw/vp9_mse16x8/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-specialize qw/vp9_mse8x8 sse2/;
+specialize qw/vp9_mse8x8/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
 specialize qw/vp9_get_mb_ss sse2/;

diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 6b89334..a9c03f0 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c

@@ -901,7 +901,7 @@
         pbi->mb.corrupted |= tile_data->xd.corrupted;
       }
       // Loopfilter one row.
-      if (cm->lf.filter_level) {
+      if (cm->lf.filter_level && !pbi->mb.corrupted) {
         const int lf_start = mi_row - MI_BLOCK_SIZE;
         LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
 
@@ -924,7 +924,7 @@
   }
 
   // Loopfilter remaining rows in the frame.
-  if (cm->lf.filter_level) {
+  if (cm->lf.filter_level && !pbi->mb.corrupted) {
     LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
     winterface->sync(&pbi->lf_worker);
     lf_data->start = lf_data->stop;
@@ -1451,9 +1451,11 @@
   if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
       cm->frame_parallel_decoding_mode) {
     *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
-    // If multiple threads are used to decode tiles, then we use those threads
-    // to do parallel loopfiltering.
-    vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0);
+    if (!xd->corrupted) {
+      // If multiple threads are used to decode tiles, then we use those threads
+      // to do parallel loopfiltering.
+      vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0);
+    }
   } else {
     *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
   }

diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index 12f6d3a..c4cf5ee 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c

@@ -78,7 +78,8 @@
                                              int mc_avg_stride,
                                              uint8_t *avg, int avg_stride,
                                              int increase_denoising,
-                                             BLOCK_SIZE bs) {
+                                             BLOCK_SIZE bs,
+                                             int motion_magnitude) {
   int r, c;
   const uint8_t *sig_start = sig;
   const uint8_t *mc_avg_start = mc_avg;
@@ -86,6 +87,19 @@
   int diff, adj, absdiff, delta;
   int adj_val[] = {3, 4, 6};
   int total_adj = 0;
+  int shift_inc = 1;
+
+  /* If motion_magnitude is small, making the denoiser more aggressive by
+   * increasing the adjustment for each level. Add another increment for
+   * blocks that are labeled for increase denoising. */
+  if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) {
+    if (increase_denoising) {
+      shift_inc = 2;
+    }
+    adj_val[0] += shift_inc;
+    adj_val[1] += shift_inc;
+    adj_val[2] += shift_inc;
+  }
 
   // First attempt to apply a strong temporal denoising filter.
   for (r = 0; r < heights[bs]; ++r) {
@@ -192,7 +206,8 @@
                                                          int increase_denoising,
                                                          int mi_row,
                                                          int mi_col,
-                                                         PICK_MODE_CONTEXT *ctx
+                                                         PICK_MODE_CONTEXT *ctx,
+                                                         int *motion_magnitude
                                                          ) {
   int mv_col, mv_row;
   int sse_diff = ctx->zeromv_sse - ctx->newmv_sse;
@@ -217,6 +232,8 @@
   mv_col = ctx->best_sse_mv.as_mv.col;
   mv_row = ctx->best_sse_mv.as_mv.row;
 
+  *motion_magnitude = mv_row * mv_row + mv_col * mv_col;
+
   frame = ctx->best_reference_frame;
 
   // If the best reference frame uses inter-prediction and there is enough of a
@@ -304,6 +321,7 @@
 void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
                           int mi_row, int mi_col, BLOCK_SIZE bs,
                           PICK_MODE_CONTEXT *ctx) {
+  int motion_magnitude = 0;
   VP9_DENOISER_DECISION decision = FILTER_BLOCK;
   YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
   YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
@@ -314,13 +332,14 @@
 
   decision = perform_motion_compensation(denoiser, mb, bs,
                                          denoiser->increase_denoising,
-                                         mi_row, mi_col, ctx);
+                                         mi_row, mi_col, ctx,
+                                         &motion_magnitude);
 
   if (decision == FILTER_BLOCK) {
     decision = denoiser_filter(src.buf, src.stride,
                                mc_avg_start, mc_avg.y_stride,
                                avg_start, avg.y_stride,
-                               0, bs);
+                               0, bs, motion_magnitude);
   }
 
   if (decision == FILTER_BLOCK) {

diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h
index 1c827b6..a913add 100644
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h

@@ -18,6 +18,8 @@
 extern "C" {
 #endif
 
+#define MOTION_MAGNITUDE_THRESHOLD (8*3)
+
 typedef enum vp9_denoiser_decision {
   COPY_BLOCK,
   FILTER_BLOCK

diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 26db30c..0f0b7a5 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c

@@ -670,7 +670,6 @@
       return VPX_CODEC_MEM_ERROR;
 
     ctx->priv = (vpx_codec_priv_t *)priv;
-    ctx->priv->sz = sizeof(*priv);
     ctx->priv->init_flags = ctx->init_flags;
     ctx->priv->enc.total_encoders = 1;
 
@@ -837,18 +836,19 @@
                                       vpx_enc_frame_flags_t flags,
                                       unsigned long deadline) {
   vpx_codec_err_t res = VPX_CODEC_OK;
+  VP9_COMP *const cpi = ctx->cpi;
   const vpx_rational_t *const timebase = &ctx->cfg.g_timebase;
 
   if (img != NULL) {
     res = validate_img(ctx, img);
     // TODO(jzern) the checks related to cpi's validity should be treated as a
     // failure condition, encoder setup is done fully in init() currently.
-    if (res == VPX_CODEC_OK && ctx->cpi != NULL && ctx->cx_data == NULL) {
+    if (res == VPX_CODEC_OK && cpi != NULL && ctx->cx_data == NULL) {
       // There's no codec control for multiple alt-refs so check the encoder
       // instance for its status to determine the compressed data size.
       ctx->cx_data_sz = ctx->cfg.g_w * ctx->cfg.g_h *
                         get_image_bps(img) / 8 *
-                        (ctx->cpi->multi_arf_allowed ? 8 : 2);
+                        (cpi->multi_arf_allowed ? 8 : 2);
       if (ctx->cx_data_sz < 4096) ctx->cx_data_sz = 4096;
 
       ctx->cx_data = (unsigned char *)malloc(ctx->cx_data_sz);
@@ -868,7 +868,7 @@
     return VPX_CODEC_INVALID_PARAM;
   }
 
-  vp9_apply_encoding_flags(ctx->cpi, flags);
+  vp9_apply_encoding_flags(cpi, flags);
 
   // Handle fixed keyframe intervals
   if (ctx->cfg.kf_mode == VPX_KF_AUTO &&
@@ -880,7 +880,7 @@
   }
 
   // Initialize the encoder instance on the first frame.
-  if (res == VPX_CODEC_OK && ctx->cpi != NULL) {
+  if (res == VPX_CODEC_OK && cpi != NULL) {
     unsigned int lib_flags = 0;
     YV12_BUFFER_CONFIG sd;
     int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts);
@@ -891,16 +891,15 @@
 
     // Set up internal flags
     if (ctx->base.init_flags & VPX_CODEC_USE_PSNR)
-      ((VP9_COMP *)ctx->cpi)->b_calculate_psnr = 1;
+      cpi->b_calculate_psnr = 1;
 
     if (img != NULL) {
       res = image2yuvconfig(img, &sd);
 
       // Store the original flags in to the frame buffer. Will extract the
       // key frame flag when we actually encode this frame.
-      if (vp9_receive_raw_frame(ctx->cpi, flags,
+      if (vp9_receive_raw_frame(cpi, flags,
                                 &sd, dst_time_stamp, dst_end_time_stamp)) {
-        VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
         res = update_error_state(ctx, &cpi->common.error);
       }
     }
@@ -925,11 +924,10 @@
     }
 
     while (cx_data_sz >= ctx->cx_data_sz / 2 &&
-           -1 != vp9_get_compressed_data(ctx->cpi, &lib_flags, &size,
+           -1 != vp9_get_compressed_data(cpi, &lib_flags, &size,
                                          cx_data, &dst_time_stamp,
                                          &dst_end_time_stamp, !img)) {
       if (size) {
-        VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
         vpx_codec_cx_pkt_t pkt;
 
 #if CONFIG_SPATIAL_SVC

diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index b0fb282..393c66e 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c

@@ -63,7 +63,6 @@
       return VPX_CODEC_MEM_ERROR;
 
     ctx->priv = (vpx_codec_priv_t *)priv;
-    ctx->priv->sz = sizeof(*priv);
     ctx->priv->init_flags = ctx->init_flags;
 
     priv->si.sz = sizeof(priv->si);

diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index 02f2079..cbfffd0 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h

@@ -335,7 +335,6 @@
  * and the pointer cast to the proper type.
  */
 struct vpx_codec_priv {
-  unsigned int                    sz;
   const char                     *err_detail;
   vpx_codec_flags_t               init_flags;
   struct {