Merge changes from topic 'vp9-intra-pred'

* changes:
  vp9_reconintra_neon: add DC 16x16 predictors
  vp9_reconintra_neon: add DC 8x8 predictors
diff --git a/test/test_vectors.cc b/test/test_vectors.cc
index 07d306f..434a382 100644
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -165,7 +165,10 @@
   "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm",
   "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf",
   "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf",
+#if !CONFIG_SIZE_LIMIT || \
+    (DECODE_WIDTH_LIMIT >= 20400 && DECODE_HEIGHT_LIMIT >= 120)
   "vp90-2-13-largescaling.webm",
+#endif
   "vp90-2-14-resize-fp-tiles-1-16.webm",
   "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
   "vp90-2-14-resize-fp-tiles-1-2.webm", "vp90-2-14-resize-fp-tiles-1-4.webm",
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index fcf480b..0e9b1c5 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -699,7 +699,8 @@
 #if CONFIG_SIZE_LIMIT
   if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT)
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
-                       "Width and height beyond allowed size.");
+                       "Dimensions of %dx%d beyond allowed size of %dx%d.",
+                       width, height, DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT);
 #endif
   if (cm->width != width || cm->height != height) {
     const int new_mi_rows =
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 4a75613..2fdf408 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -697,7 +697,9 @@
   int min_log2_tile_cols, max_log2_tile_cols;
   vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
 
-  if (is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING) {
+  if (is_two_pass_svc(cpi) &&
+      (cpi->svc.encode_empty_frame_state == ENCODING ||
+      cpi->svc.number_spatial_layers > 1)) {
     cm->log2_tile_cols = 0;
     cm->log2_tile_rows = 0;
   } else {
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index b0e255d..1af6094 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -297,13 +297,11 @@
     else
       tx_size = TX_8X8;
 
-    if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
-      if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
-          cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
-        tx_size = TX_8X8;
-      else if (tx_size > TX_16X16)
-        tx_size = TX_16X16;
-    }
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+        cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
+      tx_size = TX_8X8;
+    else if (tx_size > TX_16X16)
+      tx_size = TX_16X16;
   } else {
     tx_size = MIN(max_txsize_lookup[bsize],
                   tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
@@ -481,13 +479,11 @@
     else
       xd->mi[0]->mbmi.tx_size = TX_8X8;
 
-    if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
-      if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
-          cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
-        xd->mi[0]->mbmi.tx_size = TX_8X8;
-      else if (xd->mi[0]->mbmi.tx_size > TX_16X16)
-        xd->mi[0]->mbmi.tx_size = TX_16X16;
-    }
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+        cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
+      xd->mi[0]->mbmi.tx_size = TX_8X8;
+    else if (xd->mi[0]->mbmi.tx_size > TX_16X16)
+      xd->mi[0]->mbmi.tx_size = TX_16X16;
   } else {
     xd->mi[0]->mbmi.tx_size =
         MIN(max_txsize_lookup[bsize],
@@ -1081,9 +1077,8 @@
   unsigned int var_y = UINT_MAX;
   unsigned int sse_y = UINT_MAX;
   // Reduce the intra cost penalty for small blocks (<=16x16).
-  const int reduction_fac =
-      (cpi->sf.partition_search_type == VAR_BASED_PARTITION &&
-       bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
+  const int reduction_fac = (bsize <= BLOCK_16X16) ?
+      ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
   const int intra_cost_penalty = vp9_get_intra_cost_penalty(
       cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac;
   const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
diff --git a/vpx_dsp/arm/variance_neon.c b/vpx_dsp/arm/variance_neon.c
index 1a9792e..ede6e7b 100644
--- a/vpx_dsp/arm/variance_neon.c
+++ b/vpx_dsp/arm/variance_neon.c
@@ -14,6 +14,7 @@
 #include "./vpx_config.h"
 
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
 
 static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
   const int32x4_t a = vpaddlq_s16(v_16x8);