Merge "Remove dependency of intra mode search skip check on mode order"
diff --git a/test/invalid_file_test.cc b/test/invalid_file_test.cc
index 50e7c23..3cfb292 100644
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -119,6 +119,7 @@
   {1, "invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf"},
   {1, "invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf"},
   {1, "invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.ivf"},
+  {1, "invalid-vp91-2-mixedrefcsp-444to420.ivf"},
 };
 
 VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 84b13f9..ee1997f 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -697,3 +697,5 @@
 741158f67c0d9d23726624d06bdc482ad368afc9  invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf.res
 8b1f7bf7e86c0976d277f60e8fcd9539e75a079a  invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.ivf
 fb79dcbbbb8c82d5a750e339acce66e39a32f15f  invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.ivf.res
+552e372e9b78127389fb06b34545df2cec15ba6d  invalid-vp91-2-mixedrefcsp-444to420.ivf
+a61774cf03fc584bd9f0904fc145253bb8ea6c4c  invalid-vp91-2-mixedrefcsp-444to420.ivf.res
diff --git a/test/test.mk b/test/test.mk
index c839c92..6d2dbd6 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -819,6 +819,8 @@
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf.res
 
 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
 # BBB VP9 streams
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index b60f8a0..df60987 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -3573,6 +3573,7 @@
                                  int stride) {
   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
   const __m128i final_rounding = _mm_set1_epi16(1<<5);
+  const __m128i zero = _mm_setzero_si128();
 
   // idct constants for each stage
   const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64);
@@ -3635,7 +3636,6 @@
           stp2_30, stp2_31;
   __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   int i, j, i32;
-  int zero_flag[2];
 
   for (i = 0; i < 4; i++) {
     i32 = (i << 5);
@@ -3710,13 +3710,7 @@
       zero_idx[13] = _mm_or_si128(zero_idx[10], zero_idx[11]);
       zero_idx[14] = _mm_or_si128(zero_idx[12], zero_idx[13]);
 
-      zero_idx[0] = _mm_unpackhi_epi64(zero_idx[14], zero_idx[14]);
-      zero_idx[1] = _mm_or_si128(zero_idx[0], zero_idx[14]);
-      zero_idx[2] = _mm_srli_epi64(zero_idx[1], 32);
-      zero_flag[0] = _mm_cvtsi128_si32(zero_idx[1]);
-      zero_flag[1] = _mm_cvtsi128_si32(zero_idx[2]);
-
-      if (!zero_flag[0] && !zero_flag[1]) {
+      if (_mm_movemask_epi8(_mm_cmpeq_epi32(zero_idx[14], zero)) == 0xFFFF) {
         col[i32 + 0] = _mm_setzero_si128();
         col[i32 + 1] = _mm_setzero_si128();
         col[i32 + 2] = _mm_setzero_si128();
@@ -3795,7 +3789,6 @@
       col[i32 + 31] = _mm_sub_epi16(stp1_0, stp1_31);
     }
   for (i = 0; i < 4; i++) {
-      const __m128i zero = _mm_setzero_si128();
       // Second 1-D idct
       j = i << 3;
 
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index a9c03f0..aecd906 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -668,6 +668,15 @@
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to allocate frame buffer");
   }
+  cm->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
+}
+
+static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth,
+                                          int ref_xss, int ref_yss,
+                                          vpx_bit_depth_t this_bit_depth,
+                                          int this_xss, int this_yss) {
+  return ref_bit_depth == this_bit_depth && ref_xss == this_xss &&
+         ref_yss == this_yss;
 }
 
 static void setup_frame_size_with_refs(VP9_COMMON *cm,
@@ -707,6 +716,18 @@
   if (!has_valid_ref_frame)
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
                        "Referenced frame has invalid size");
+  for (i = 0; i < REFS_PER_FRAME; ++i) {
+    RefBuffer *const ref_frame = &cm->frame_refs[i];
+    if (!valid_ref_frame_img_fmt(
+            ref_frame->buf->bit_depth,
+            ref_frame->buf->uv_crop_width < ref_frame->buf->y_crop_width,
+            ref_frame->buf->uv_crop_height < ref_frame->buf->y_crop_height,
+            cm->bit_depth,
+            cm->subsampling_x,
+            cm->subsampling_y))
+      vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+                         "Referenced frame has incompatible color space");
+  }
 
   resize_context_buffers(cm, width, height);
   setup_display_size(cm, rb);
@@ -723,6 +744,7 @@
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to allocate frame buffer");
   }
+  cm->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
 }
 
 static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 72ced05..c62b52f 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -2466,6 +2466,9 @@
     vp9_zero(cpi->mb.pred_mv);
     cpi->pc_root->index = 0;
 
+    // TODO(yunqingwang): use_lastframe_partitioning is no longer used in good-
+    // quality encoding. Need to evaluate it in real-time encoding later to
+    // decide if it can be removed too. And then, do the code cleanup.
     if ((sf->partition_search_type == SEARCH_PARTITION &&
          sf->use_lastframe_partitioning) ||
          sf->partition_search_type == FIXED_PARTITION ||
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index df0e0a1..04f3cc2 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -104,15 +104,11 @@
                                                       : USE_LARGESTALL;
 
     if (MIN(cm->width, cm->height) >= 720) {
-      sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
-      sf->last_partitioning_redo_frequency = 3;
       sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
                                               : DISABLE_ALL_INTER_SPLIT;
       sf->adaptive_pred_interp_filter = 0;
     } else {
       sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
-      sf->last_partitioning_redo_frequency = 2;
-      sf->lf_motion_threshold = NO_MOTION_THRESHOLD;
     }
 
     sf->reference_masking = 1;
@@ -123,7 +119,6 @@
     sf->disable_filter_search_var_thresh = 100;
     sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
     sf->auto_min_max_partition_size = CONSTRAIN_NEIGHBORING_MIN_MAX;
-    sf->adjust_partitioning_from_last_frame = 1;
 
     if (MIN(cm->width, cm->height) >= 720)
       sf->partition_search_breakout_dist_thr = (1 << 24);
@@ -147,8 +142,6 @@
     sf->cb_pred_filter_search = 1;
     sf->alt_ref_search_fp = 1;
     sf->motion_field_mode_search = !boosted;
-    sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
-    sf->last_partitioning_redo_frequency = 2;
     sf->recode_loop = ALLOW_RECODE_KFMAXBW;
     sf->adaptive_rd_thresh = 3;
     sf->mode_skip_start = 6;
@@ -171,7 +164,6 @@
     sf->mode_search_skip_flags |= FLAG_SKIP_COMP_REFMISMATCH |
                                   FLAG_EARLY_TERMINATE;
     sf->disable_filter_search_var_thresh = 200;
-    sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
     sf->use_lp32x32fdct = 1;
     sf->use_fast_coef_updates = ONE_LOOP_REDUCED;
     sf->use_fast_coef_costing = 1;
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c
index fa0e030..f03feff 100644
--- a/vpx_ports/arm_cpudetect.c
+++ b/vpx_ports/arm_cpudetect.c
@@ -10,7 +10,8 @@
 
 #include <stdlib.h>
 #include <string.h>
-#include "arm.h"
+#include "vpx_ports/arm.h"
+#include "./vpx_config.h"
 
 #ifdef WINAPI_FAMILY
 #include <winapifamily.h>
@@ -54,9 +55,9 @@
 #if HAVE_MEDIA
   flags |= HAS_MEDIA;
 #endif /* HAVE_MEDIA */
-#if HAVE_NEON
+#if HAVE_NEON || HAVE_NEON_ASM
   flags |= HAS_NEON;
-#endif /* HAVE_NEON */
+#endif /* HAVE_NEON  || HAVE_NEON_ASM */
   return flags & mask;
 }
 
@@ -87,6 +88,7 @@
       /*Ignore exception.*/
     }
   }
+#endif /* HAVE_EDSP */
 #if HAVE_MEDIA
   if (mask & HAS_MEDIA)
     __try {
@@ -97,7 +99,8 @@
     /*Ignore exception.*/
   }
 }
-#if HAVE_NEON
+#endif /* HAVE_MEDIA */
+#if HAVE_NEON || HAVE_NEON_ASM
 if (mask &HAS_NEON) {
   __try {
     /*VORR q0,q0,q0*/
@@ -107,9 +110,7 @@
     /*Ignore exception.*/
   }
 }
-#endif /* HAVE_NEON */
-#endif /* HAVE_MEDIA */
-#endif /* HAVE_EDSP */
+#endif /* HAVE_NEON || HAVE_NEON_ASM */
 return flags & mask;
 }
 
@@ -132,10 +133,10 @@
 #if HAVE_MEDIA
   flags |= HAS_MEDIA;
 #endif /* HAVE_MEDIA */
-#if HAVE_NEON
+#if HAVE_NEON || HAVE_NEON_ASM
   if (features & ANDROID_CPU_ARM_FEATURE_NEON)
     flags |= HAS_NEON;
-#endif /* HAVE_NEON */
+#endif /* HAVE_NEON || HAVE_NEON_ASM */
   return flags & mask;
 }
 
@@ -162,7 +163,7 @@
      */
     char buf[512];
     while (fgets(buf, 511, fin) != NULL) {
-#if HAVE_EDSP || HAVE_NEON
+#if HAVE_EDSP || HAVE_NEON || HAVE_NEON_ASM
       if (memcmp(buf, "Features", 8) == 0) {
         char *p;
 #if HAVE_EDSP
@@ -170,15 +171,15 @@
         if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
           flags |= HAS_EDSP;
         }
-#if HAVE_NEON
+#endif /* HAVE_EDSP */
+#if HAVE_NEON || HAVE_NEON_ASM
         p = strstr(buf, " neon");
         if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
           flags |= HAS_NEON;
         }
-#endif /* HAVE_NEON */
-#endif /* HAVE_EDSP */
+#endif /* HAVE_NEON || HAVE_NEON_ASM */
       }
-#endif /* HAVE_EDSP || HAVE_NEON */
+#endif /* HAVE_EDSP || HAVE_NEON || HAVE_NEON_ASM */
 #if HAVE_MEDIA
       if (memcmp(buf, "CPU architecture:", 17) == 0) {
         int version;
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index eb0a8d6..9ff764c 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -15,6 +15,7 @@
 extern "C" {
 #endif
 
+#include "vpx/vpx_codec.h"
 #include "vpx/vpx_frame_buffer.h"
 #include "vpx/vpx_integer.h"
 
@@ -50,6 +51,7 @@
   int buffer_alloc_sz;
   int border;
   int frame_size;
+  unsigned int bit_depth;
 
   int corrupted;
   int flags;