Merge changes I660c1b7f,Id3cdf6b6

* changes:
  vp9_loop_filter_frame_mt: defer allocations
  vp9_loop_filter_alloc: reorder parameters
diff --git a/test/invalid_file_test.cc b/test/invalid_file_test.cc
index 50e7c23..3cfb292 100644
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -119,6 +119,7 @@
   {1, "invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf"},
   {1, "invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf"},
   {1, "invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.ivf"},
+  {1, "invalid-vp91-2-mixedrefcsp-444to420.ivf"},
 };
 
 VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 84b13f9..ee1997f 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -697,3 +697,5 @@
 741158f67c0d9d23726624d06bdc482ad368afc9  invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf.res
 8b1f7bf7e86c0976d277f60e8fcd9539e75a079a  invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.ivf
 fb79dcbbbb8c82d5a750e339acce66e39a32f15f  invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.ivf.res
+552e372e9b78127389fb06b34545df2cec15ba6d  invalid-vp91-2-mixedrefcsp-444to420.ivf
+a61774cf03fc584bd9f0904fc145253bb8ea6c4c  invalid-vp91-2-mixedrefcsp-444to420.ivf.res
diff --git a/test/test.mk b/test/test.mk
index c839c92..6d2dbd6 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -819,6 +819,8 @@
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf.res
 
 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
 # BBB VP9 streams
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index a90c876..0070c28 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -289,22 +289,19 @@
 $vp8_sub_pixel_variance16x16_neon_asm=vp8_sub_pixel_variance16x16_neon;
 
 add_proto qw/unsigned int vp8_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
-specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon_asm/;
+specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon/;
 $vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt;
 $vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6;
-$vp8_variance_halfpixvar16x16_h_neon_asm=vp8_variance_halfpixvar16x16_h_neon;
 
 add_proto qw/unsigned int vp8_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
-specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon_asm/;
+specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon/;
 $vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt;
 $vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6;
-$vp8_variance_halfpixvar16x16_v_neon_asm=vp8_variance_halfpixvar16x16_v_neon;
 
 add_proto qw/unsigned int vp8_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
-specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon_asm/;
+specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon/;
 $vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt;
 $vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6;
-$vp8_variance_halfpixvar16x16_hv_neon_asm=vp8_variance_halfpixvar16x16_hv_neon;
 
 #
 # Single block SAD
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index 0c98eb1..d4f68a9 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -412,12 +412,27 @@
     vp8_denoiser_set_parameters(denoiser, mode);
     denoiser->nmse_source_diff = 0;
     denoiser->nmse_source_diff_count = 0;
+    denoiser->qp_avg = 0;
+    // QP threshold below which we can go up to aggressive mode.
+    denoiser->qp_threshold_up = 80;
+    // QP threshold above which we can go back down to normal mode.
+    // For now keep this second threshold high, so not used currently.
+    denoiser->qp_threshold_down = 128;
+    // Bitrate thresholds and noise metric (nmse) thresholds for switching to
+    // aggressive mode.
     // TODO(marpan): Adjust thresholds, including effect on resolution.
+    denoiser->bitrate_threshold = 200000;  // (bits/sec).
     denoiser->threshold_aggressive_mode = 35;
-    if (width * height > 640 * 480)
+    if (width * height > 640 * 480) {
+      denoiser->bitrate_threshold = 500000;
+      denoiser->threshold_aggressive_mode = 100;
+    } else if (width * height > 960 * 540) {
+      denoiser->bitrate_threshold = 800000;
       denoiser->threshold_aggressive_mode = 150;
-    else if (width * height > 1280 * 720)
+    } else if (width * height > 1280 * 720) {
+      denoiser->bitrate_threshold = 2000000;
       denoiser->threshold_aggressive_mode = 1400;
+    }
     return 0;
 }
 
diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h
index e1844f1..bb0c2ce 100644
--- a/vp8/encoder/denoising.h
+++ b/vp8/encoder/denoising.h
@@ -19,7 +19,7 @@
 #endif
 
 #define SUM_DIFF_THRESHOLD (16 * 16 * 2)
-#define SUM_DIFF_THRESHOLD_HIGH (16 * 16 * 3)
+#define SUM_DIFF_THRESHOLD_HIGH (600)
 #define MOTION_MAGNITUDE_THRESHOLD (8*3)
 
 #define SUM_DIFF_THRESHOLD_UV (96)   // (8 * 8 * 1.5)
@@ -81,6 +81,10 @@
     int threshold_aggressive_mode;
     int nmse_source_diff;
     int nmse_source_diff_count;
+    int qp_avg;
+    int qp_threshold_up;
+    int qp_threshold_down;
+    int bitrate_threshold;
     denoise_params denoise_pars;
 } VP8_DENOISER;
 
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 74e75c4..791a485 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3370,33 +3370,43 @@
   if (total > 0 &&
       (num_blocks > (tot_num_blocks >> 4))) {
     // Update the recursive mean square source_diff.
-    if (cpi->denoiser.nmse_source_diff_count == 0)
+    if (cpi->denoiser.nmse_source_diff_count == 0) {
       // First sample in new interval.
       cpi->denoiser.nmse_source_diff = total;
-    else
+      cpi->denoiser.qp_avg = cm->base_qindex;
+    } else {
       // For subsequent samples, use average with weight ~1/4 for new sample.
       cpi->denoiser.nmse_source_diff = (int)((total >> 2) +
           3 * (cpi->denoiser.nmse_source_diff >> 2));
+      cpi->denoiser.qp_avg = (int)((cm->base_qindex >> 2) +
+          3 * (cpi->denoiser.qp_avg >> 2));
+    }
     cpi->denoiser.nmse_source_diff_count++;
   }
   // Check for changing the denoiser mode, when we have obtained #samples =
-  // num_mode_change.
+  // num_mode_change. Condition the change also on the bitrate and QP.
   if (cpi->denoiser.nmse_source_diff_count == num_mode_change) {
     // Check for going up: from normal to aggressive mode.
     if ((cpi->denoiser.denoiser_mode == kDenoiserOnYUV) &&
         (cpi->denoiser.nmse_source_diff >
-        cpi->denoiser.threshold_aggressive_mode)) {
+        cpi->denoiser.threshold_aggressive_mode) &&
+        (cpi->denoiser.qp_avg < cpi->denoiser.qp_threshold_up &&
+         cpi->target_bandwidth > cpi->denoiser.bitrate_threshold)) {
       vp8_denoiser_set_parameters(&cpi->denoiser, kDenoiserOnYUVAggressive);
     } else {
       // Check for going down: from aggressive to normal mode.
-      if ((cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive) &&
+      if (((cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive) &&
           (cpi->denoiser.nmse_source_diff <
-          cpi->denoiser.threshold_aggressive_mode)) {
+          cpi->denoiser.threshold_aggressive_mode)) ||
+          ((cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive) &&
+          (cpi->denoiser.qp_avg > cpi->denoiser.qp_threshold_down ||
+           cpi->target_bandwidth < cpi->denoiser.bitrate_threshold))) {
         vp8_denoiser_set_parameters(&cpi->denoiser, kDenoiserOnYUV);
       }
     }
     // Reset metric and counter for next interval.
     cpi->denoiser.nmse_source_diff = 0;
+    cpi->denoiser.qp_avg = 0;
     cpi->denoiser.nmse_source_diff_count = 0;
   }
 }
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index b60f8a0..df60987 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -3573,6 +3573,7 @@
                                  int stride) {
   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
   const __m128i final_rounding = _mm_set1_epi16(1<<5);
+  const __m128i zero = _mm_setzero_si128();
 
   // idct constants for each stage
   const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64);
@@ -3635,7 +3636,6 @@
           stp2_30, stp2_31;
   __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   int i, j, i32;
-  int zero_flag[2];
 
   for (i = 0; i < 4; i++) {
     i32 = (i << 5);
@@ -3710,13 +3710,7 @@
       zero_idx[13] = _mm_or_si128(zero_idx[10], zero_idx[11]);
       zero_idx[14] = _mm_or_si128(zero_idx[12], zero_idx[13]);
 
-      zero_idx[0] = _mm_unpackhi_epi64(zero_idx[14], zero_idx[14]);
-      zero_idx[1] = _mm_or_si128(zero_idx[0], zero_idx[14]);
-      zero_idx[2] = _mm_srli_epi64(zero_idx[1], 32);
-      zero_flag[0] = _mm_cvtsi128_si32(zero_idx[1]);
-      zero_flag[1] = _mm_cvtsi128_si32(zero_idx[2]);
-
-      if (!zero_flag[0] && !zero_flag[1]) {
+      if (_mm_movemask_epi8(_mm_cmpeq_epi32(zero_idx[14], zero)) == 0xFFFF) {
         col[i32 + 0] = _mm_setzero_si128();
         col[i32 + 1] = _mm_setzero_si128();
         col[i32 + 2] = _mm_setzero_si128();
@@ -3795,7 +3789,6 @@
       col[i32 + 31] = _mm_sub_epi16(stp1_0, stp1_31);
     }
   for (i = 0; i < 4; i++) {
-      const __m128i zero = _mm_setzero_si128();
       // Second 1-D idct
       j = i << 3;
 
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index a9c03f0..aecd906 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -668,6 +668,15 @@
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to allocate frame buffer");
   }
+  cm->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
+}
+
+static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth,
+                                          int ref_xss, int ref_yss,
+                                          vpx_bit_depth_t this_bit_depth,
+                                          int this_xss, int this_yss) {
+  return ref_bit_depth == this_bit_depth && ref_xss == this_xss &&
+         ref_yss == this_yss;
 }
 
 static void setup_frame_size_with_refs(VP9_COMMON *cm,
@@ -707,6 +716,18 @@
   if (!has_valid_ref_frame)
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
                        "Referenced frame has invalid size");
+  for (i = 0; i < REFS_PER_FRAME; ++i) {
+    RefBuffer *const ref_frame = &cm->frame_refs[i];
+    if (!valid_ref_frame_img_fmt(
+            ref_frame->buf->bit_depth,
+            ref_frame->buf->uv_crop_width < ref_frame->buf->y_crop_width,
+            ref_frame->buf->uv_crop_height < ref_frame->buf->y_crop_height,
+            cm->bit_depth,
+            cm->subsampling_x,
+            cm->subsampling_y))
+      vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+                         "Referenced frame has incompatible color space");
+  }
 
   resize_context_buffers(cm, width, height);
   setup_display_size(cm, rb);
@@ -723,6 +744,7 @@
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to allocate frame buffer");
   }
+  cm->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
 }
 
 static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 72ced05..c62b52f 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -2466,6 +2466,9 @@
     vp9_zero(cpi->mb.pred_mv);
     cpi->pc_root->index = 0;
 
+    // TODO(yunqingwang): use_lastframe_partitioning is no longer used in good-
+    // quality encoding. Need to evaluate it in real-time encoding later to
+    // decide if it can be removed too. And then, do the code cleanup.
     if ((sf->partition_search_type == SEARCH_PARTITION &&
          sf->use_lastframe_partitioning) ||
          sf->partition_search_type == FIXED_PARTITION ||
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 506c9bc..22505fb 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2674,14 +2674,6 @@
     }
   }
 
-  if (bsize > cpi->sf.max_intra_bsize) {
-    const int all_intra_modes = (1 << THR_DC) | (1 << THR_TM) |
-        (1 << THR_H_PRED) | (1 << THR_V_PRED) | (1 << THR_D135_PRED) |
-        (1 << THR_D207_PRED) | (1 << THR_D153_PRED) | (1 << THR_D63_PRED) |
-        (1 << THR_D117_PRED) | (1 << THR_D45_PRED);
-    mode_skip_mask |= all_intra_modes;
-  }
-
   for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
     int mode_excluded = 0;
     int64_t this_rd = INT64_MAX;
@@ -2705,7 +2697,7 @@
     // Look at the reference frame of the best mode so far and set the
     // skip mask to look at a subset of the remaining modes.
     if (mode_index == mode_skip_start && best_mode_index >= 0) {
-      switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
+      switch (best_mbmode.ref_frame[0]) {
         case INTRA_FRAME:
           break;
         case LAST_FRAME:
@@ -2730,6 +2722,10 @@
         continue;
     }
 
+    if (bsize > cpi->sf.max_intra_bsize)
+      if (ref_frame == INTRA_FRAME)
+        continue;
+
     if (mode_skip_mask & (1 << mode_index))
       continue;
 
@@ -2794,7 +2790,7 @@
 
       if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
           best_mode_index >=0 &&
-          vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
+          best_mbmode.ref_frame[0] == INTRA_FRAME)
         continue;
       if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
           ref_frame != best_inter_ref_frame &&
@@ -2826,7 +2822,7 @@
         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
             (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
           if (best_mode_index >= 0 &&
-              vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
+              best_mbmode.ref_frame[0] > INTRA_FRAME)
             continue;
         }
         if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
@@ -3124,7 +3120,7 @@
   // If we used an estimate for the uv intra rd in the loop above...
   if (cpi->sf.use_uv_intra_rd_estimate) {
     // Do Intra UV best rd mode selection if best mode choice above was intra.
-    if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
+    if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
       TX_SIZE uv_tx_size;
       *mbmi = best_mbmode;
       uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index df0e0a1..04f3cc2 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -104,15 +104,11 @@
                                                       : USE_LARGESTALL;
 
     if (MIN(cm->width, cm->height) >= 720) {
-      sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
-      sf->last_partitioning_redo_frequency = 3;
       sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
                                               : DISABLE_ALL_INTER_SPLIT;
       sf->adaptive_pred_interp_filter = 0;
     } else {
       sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
-      sf->last_partitioning_redo_frequency = 2;
-      sf->lf_motion_threshold = NO_MOTION_THRESHOLD;
     }
 
     sf->reference_masking = 1;
@@ -123,7 +119,6 @@
     sf->disable_filter_search_var_thresh = 100;
     sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
     sf->auto_min_max_partition_size = CONSTRAIN_NEIGHBORING_MIN_MAX;
-    sf->adjust_partitioning_from_last_frame = 1;
 
     if (MIN(cm->width, cm->height) >= 720)
       sf->partition_search_breakout_dist_thr = (1 << 24);
@@ -147,8 +142,6 @@
     sf->cb_pred_filter_search = 1;
     sf->alt_ref_search_fp = 1;
     sf->motion_field_mode_search = !boosted;
-    sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
-    sf->last_partitioning_redo_frequency = 2;
     sf->recode_loop = ALLOW_RECODE_KFMAXBW;
     sf->adaptive_rd_thresh = 3;
     sf->mode_skip_start = 6;
@@ -171,7 +164,6 @@
     sf->mode_search_skip_flags |= FLAG_SKIP_COMP_REFMISMATCH |
                                   FLAG_EARLY_TERMINATE;
     sf->disable_filter_search_var_thresh = 200;
-    sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
     sf->use_lp32x32fdct = 1;
     sf->use_fast_coef_updates = ONE_LOOP_REDUCED;
     sf->use_fast_coef_costing = 1;
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c
index fa0e030..f03feff 100644
--- a/vpx_ports/arm_cpudetect.c
+++ b/vpx_ports/arm_cpudetect.c
@@ -10,7 +10,8 @@
 
 #include <stdlib.h>
 #include <string.h>
-#include "arm.h"
+#include "vpx_ports/arm.h"
+#include "./vpx_config.h"
 
 #ifdef WINAPI_FAMILY
 #include <winapifamily.h>
@@ -54,9 +55,9 @@
 #if HAVE_MEDIA
   flags |= HAS_MEDIA;
 #endif /* HAVE_MEDIA */
-#if HAVE_NEON
+#if HAVE_NEON || HAVE_NEON_ASM
   flags |= HAS_NEON;
-#endif /* HAVE_NEON */
+#endif /* HAVE_NEON  || HAVE_NEON_ASM */
   return flags & mask;
 }
 
@@ -87,6 +88,7 @@
       /*Ignore exception.*/
     }
   }
+#endif /* HAVE_EDSP */
 #if HAVE_MEDIA
   if (mask & HAS_MEDIA)
     __try {
@@ -97,7 +99,8 @@
     /*Ignore exception.*/
   }
 }
-#if HAVE_NEON
+#endif /* HAVE_MEDIA */
+#if HAVE_NEON || HAVE_NEON_ASM
 if (mask &HAS_NEON) {
   __try {
     /*VORR q0,q0,q0*/
@@ -107,9 +110,7 @@
     /*Ignore exception.*/
   }
 }
-#endif /* HAVE_NEON */
-#endif /* HAVE_MEDIA */
-#endif /* HAVE_EDSP */
+#endif /* HAVE_NEON || HAVE_NEON_ASM */
 return flags & mask;
 }
 
@@ -132,10 +133,10 @@
 #if HAVE_MEDIA
   flags |= HAS_MEDIA;
 #endif /* HAVE_MEDIA */
-#if HAVE_NEON
+#if HAVE_NEON || HAVE_NEON_ASM
   if (features & ANDROID_CPU_ARM_FEATURE_NEON)
     flags |= HAS_NEON;
-#endif /* HAVE_NEON */
+#endif /* HAVE_NEON || HAVE_NEON_ASM */
   return flags & mask;
 }
 
@@ -162,7 +163,7 @@
      */
     char buf[512];
     while (fgets(buf, 511, fin) != NULL) {
-#if HAVE_EDSP || HAVE_NEON
+#if HAVE_EDSP || HAVE_NEON || HAVE_NEON_ASM
       if (memcmp(buf, "Features", 8) == 0) {
         char *p;
 #if HAVE_EDSP
@@ -170,15 +171,15 @@
         if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
           flags |= HAS_EDSP;
         }
-#if HAVE_NEON
+#endif /* HAVE_EDSP */
+#if HAVE_NEON || HAVE_NEON_ASM
         p = strstr(buf, " neon");
         if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
           flags |= HAS_NEON;
         }
-#endif /* HAVE_NEON */
-#endif /* HAVE_EDSP */
+#endif /* HAVE_NEON || HAVE_NEON_ASM */
       }
-#endif /* HAVE_EDSP || HAVE_NEON */
+#endif /* HAVE_EDSP || HAVE_NEON || HAVE_NEON_ASM */
 #if HAVE_MEDIA
       if (memcmp(buf, "CPU architecture:", 17) == 0) {
         int version;
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index eb0a8d6..9ff764c 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -15,6 +15,7 @@
 extern "C" {
 #endif
 
+#include "vpx/vpx_codec.h"
 #include "vpx/vpx_frame_buffer.h"
 #include "vpx/vpx_integer.h"
 
@@ -50,6 +51,7 @@
   int buffer_alloc_sz;
   int border;
   int frame_size;
+  unsigned int bit_depth;
 
   int corrupted;
   int flags;