Merge "Initial AQ1 restructuring."
diff --git a/README b/README
index 6f864d8..584a344 100644
--- a/README
+++ b/README
@@ -47,10 +47,6 @@
   --help output of the configure script. As of this writing, the list of
   available targets is:
 
-    armv5te-android-gcc
-    armv5te-linux-rvct
-    armv5te-linux-gcc
-    armv5te-none-rvct
     armv6-darwin-gcc
     armv6-linux-rvct
     armv6-linux-gcc
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 6763688..7f4c6ba 100644
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -639,9 +639,6 @@
                 tgt_isa=armv7
                 float_abi=softfp
                 ;;
-            armv5te*)
-                tgt_isa=armv5te
-                ;;
             *x86_64*|*amd64*)
                 tgt_isa=x86_64
                 ;;
diff --git a/configure b/configure
index b374fa0..c23b5fd 100755
--- a/configure
+++ b/configure
@@ -95,10 +95,6 @@
 
 # all_platforms is a list of all supported target platforms. Maintain
 # alphabetically by architecture, generic-gnu last.
-all_platforms="${all_platforms} armv5te-android-gcc"
-all_platforms="${all_platforms} armv5te-linux-rvct"
-all_platforms="${all_platforms} armv5te-linux-gcc"
-all_platforms="${all_platforms} armv5te-none-rvct"
 all_platforms="${all_platforms} armv6-darwin-gcc"
 all_platforms="${all_platforms} armv6-linux-rvct"
 all_platforms="${all_platforms} armv6-linux-gcc"
diff --git a/test/quantize_test.cc b/test/quantize_test.cc
index 756d0f6..329227b 100644
--- a/test/quantize_test.cc
+++ b/test/quantize_test.cc
@@ -147,6 +147,14 @@
   RunComparison();
 }
 
+TEST_P(QuantizeTest, TestLargeNegativeInput) {
+  FillCoeffConstant(0);
+  // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues
+  // like BUG=883 where the constant being compared was incorrectly initialized.
+  vp8_comp_->mb.coeff[0] = -8191;
+  RunComparison();
+}
+
 TEST_P(QuantizeTest, TestRandomInput) {
   FillCoeffRandom();
   RunComparison();
diff --git a/test/vp9_thread_test.cc b/test/vp9_thread_test.cc
index cc35476..902a6fc 100644
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -207,7 +207,7 @@
 int Sync(VP9Worker *const worker) { return !worker->had_error; }
 
 void Execute(VP9Worker *const worker) {
-  worker->had_error |= worker->hook(worker->data1, worker->data2);
+  worker->had_error |= !worker->hook(worker->data1, worker->data2);
 }
 
 void Launch(VP9Worker *const worker) { Execute(worker); }
diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.c b/vp8/encoder/arm/neon/fastquantizeb_neon.c
index caa7637..e5824bf 100644
--- a/vp8/encoder/arm/neon/fastquantizeb_neon.c
+++ b/vp8/encoder/arm/neon/fastquantizeb_neon.c
@@ -19,7 +19,7 @@
 };
 
 void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
-    const int16x8_t one_q = vdupq_n_s16(0xff),
+    const int16x8_t one_q = vdupq_n_s16(-1),
                     z0 = vld1q_s16(b->coeff),
                     z1 = vld1q_s16(b->coeff + 8),
                     round0 = vld1q_s16(b->round),
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 23575b0..3d1d1b4 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -460,6 +460,10 @@
   {THR_NEARESTA, THR_NEARA, THR_ZEROA, THR_NEWA},
 };
 
+static const PREDICTION_MODE intra_mode_list[] = {
+  DC_PRED, V_PRED, H_PRED, TM_PRED
+};
+
 // TODO(jingning) placeholder for inter-frame non-RD mode decision.
 // this needs various further optimizations. to be continued..
 void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
@@ -796,11 +800,11 @@
   // threshold.
   if (!x->skip && best_rdc.rdcost > inter_mode_thresh &&
       bsize <= cpi->sf.max_intra_bsize) {
-    PREDICTION_MODE this_mode;
     struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
     const TX_SIZE intra_tx_size =
         MIN(max_txsize_lookup[bsize],
             tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+    int i;
 
     if (reuse_inter_pred && best_pred != NULL) {
       if (best_pred->data == orig_dst.buf) {
@@ -813,17 +817,18 @@
     }
     pd->dst = orig_dst;
 
-    // Change the limit of this loop to add other intra prediction
-    // mode tests.
-    for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
+    for (i = 0; i < 4; ++i) {
       const TX_SIZE saved_tx_size = mbmi->tx_size;
+      const PREDICTION_MODE this_mode = intra_mode_list[i];
+      if (!((1 << this_mode) & cpi->sf.intra_y_mode_mask[intra_tx_size]))
+        continue;
+      skip_txfm = x->skip_txfm[0];
       args.mode = this_mode;
       args.rate = 0;
       args.dist = 0;
       mbmi->tx_size = intra_tx_size;
       vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
                                              estimate_block_intra, &args);
-      mbmi->tx_size = saved_tx_size;
       this_rdc.rate = args.rate;
       this_rdc.dist = args.dist;
       this_rdc.rate += cpi->mbmode_cost[this_mode];
@@ -840,6 +845,7 @@
         mbmi->mv[0].as_int = INVALID_MV;
       } else {
         x->skip_txfm[0] = skip_txfm;
+        mbmi->tx_size = saved_tx_size;
       }
     }
   }
diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h
index 48c4444..33670d3 100644
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -107,7 +107,6 @@
   int tx_select_threshes[MAX_REF_FRAMES][TX_MODES];
 
   int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
-  int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
 
   int RDMULT;
   int RDDIV;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index d70c76c..98524c2 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2360,9 +2360,9 @@
                                  int (*single_skippable)[MAX_REF_FRAMES],
                                  int64_t *psse,
                                  const int64_t ref_best_rd,
-                                 int64_t *mask_filter) {
+                                 int64_t *mask_filter,
+                                 int64_t filter_cache[]) {
   VP9_COMMON *cm = &cpi->common;
-  RD_OPT *rd_opt = &cpi->rd;
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
   const int is_comp_pred = has_second_ref(mbmi);
@@ -2502,7 +2502,7 @@
   // Search for best switchable filter by checking the variance of
   // pred error irrespective of whether the filter will be used
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
-    rd_opt->filter_cache[i] = INT64_MAX;
+    filter_cache[i] = INT64_MAX;
 
   if (cm->interp_filter != BILINEAR) {
     if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
@@ -2524,9 +2524,9 @@
 
         if (i > 0 && intpel_mv) {
           rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
-          rd_opt->filter_cache[i] = rd;
-          rd_opt->filter_cache[SWITCHABLE_FILTERS] =
-              MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
+          filter_cache[i] = rd;
+          filter_cache[SWITCHABLE_FILTERS] =
+              MIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
           if (cm->interp_filter == SWITCHABLE)
             rd += rs_rd;
           *mask_filter = MAX(*mask_filter, rd);
@@ -2557,9 +2557,9 @@
                           &tmp_skip_sb, &tmp_skip_sse);
 
           rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
-          rd_opt->filter_cache[i] = rd;
-          rd_opt->filter_cache[SWITCHABLE_FILTERS] =
-              MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
+          filter_cache[i] = rd;
+          filter_cache[SWITCHABLE_FILTERS] =
+              MIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
           if (cm->interp_filter == SWITCHABLE)
             rd += rs_rd;
           *mask_filter = MAX(*mask_filter, rd);
@@ -2819,11 +2819,15 @@
   int *mode_map = tile_data->mode_map[bsize];
   const int mode_search_skip_flags = sf->mode_search_skip_flags;
   int64_t mask_filter = 0;
+  int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
 
   vp9_zero(best_mbmode);
 
   x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 
+  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
+    filter_cache[i] = INT64_MAX;
+
   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
                            &comp_mode_p);
 
@@ -3165,7 +3169,7 @@
                                   mi_row, mi_col,
                                   single_newmv, single_inter_filter,
                                   single_skippable, &total_sse, best_rd,
-                                  &mask_filter);
+                                  &mask_filter, filter_cache);
       if (this_rd == INT64_MAX)
         continue;
 
@@ -3306,21 +3310,21 @@
 
       /* keep record of best filter type */
       if (!mode_excluded && cm->interp_filter != BILINEAR) {
-        int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
+        int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
                               SWITCHABLE_FILTERS : cm->interp_filter];
 
         for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
           int64_t adj_rd;
           if (ref == INT64_MAX)
             adj_rd = 0;
-          else if (rd_opt->filter_cache[i] == INT64_MAX)
+          else if (filter_cache[i] == INT64_MAX)
             // when early termination is triggered, the encoder does not have
             // access to the rate-distortion cost. it only knows that the cost
             // should be above the maximum valid value. hence it takes the known
             // maximum plus an arbitrary constant as the rate-distortion cost.
             adj_rd = mask_filter - ref + 10;
           else
-            adj_rd = rd_opt->filter_cache[i] - ref;
+            adj_rd = filter_cache[i] - ref;
 
           adj_rd += this_rd;
           best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
@@ -3469,7 +3473,6 @@
                                         PICK_MODE_CONTEXT *ctx,
                                         int64_t best_rd_so_far) {
   VP9_COMMON *const cm = &cpi->common;
-  RD_OPT *const rd_opt = &cpi->rd;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
   unsigned char segment_id = mbmi->segment_id;
@@ -3506,11 +3509,6 @@
   mbmi->mv[0].as_int = 0;
   x->skip = 1;
 
-  // Search for best switchable filter by checking the variance of
-  // pred error irrespective of whether the filter will be used
-  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
-    rd_opt->filter_cache[i] = INT64_MAX;
-
   if (cm->interp_filter != BILINEAR) {
     best_filter = EIGHTTAP;
     if (cm->interp_filter == SWITCHABLE &&
@@ -3614,11 +3612,15 @@
   int best_skip2 = 0;
   int ref_frame_skip_mask[2] = { 0 };
   int64_t mask_filter = 0;
+  int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
 
   x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
   vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
   vp9_zero(best_mbmode);
 
+  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
+    filter_cache[i] = INT64_MAX;
+
   for (i = 0; i < 4; i++) {
     int j;
     for (j = 0; j < MAX_REF_FRAMES; j++)
@@ -3812,7 +3814,7 @@
       this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
       rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
-        rd_opt->filter_cache[i] = INT64_MAX;
+        filter_cache[i] = INT64_MAX;
 
       if (cm->interp_filter != BILINEAR) {
         tmp_best_filter = EIGHTTAP;
@@ -3844,9 +3846,9 @@
               continue;
             rs = vp9_get_switchable_rate(cpi);
             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
-            rd_opt->filter_cache[switchable_filter_index] = tmp_rd;
-            rd_opt->filter_cache[SWITCHABLE_FILTERS] =
-                MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS],
+            filter_cache[switchable_filter_index] = tmp_rd;
+            filter_cache[SWITCHABLE_FILTERS] =
+                MIN(filter_cache[SWITCHABLE_FILTERS],
                     tmp_rd + rs_rd);
             if (cm->interp_filter == SWITCHABLE)
               tmp_rd += rs_rd;
@@ -4075,20 +4077,20 @@
     /* keep record of best filter type */
     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
         cm->interp_filter != BILINEAR) {
-      int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
+      int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
                               SWITCHABLE_FILTERS : cm->interp_filter];
       int64_t adj_rd;
       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
         if (ref == INT64_MAX)
           adj_rd = 0;
-        else if (rd_opt->filter_cache[i] == INT64_MAX)
+        else if (filter_cache[i] == INT64_MAX)
           // when early termination is triggered, the encoder does not have
           // access to the rate-distortion cost. it only knows that the cost
           // should be above the maximum valid value. hence it takes the known
           // maximum plus an arbitrary constant as the rate-distortion cost.
           adj_rd = mask_filter - ref + 10;
         else
-          adj_rd = rd_opt->filter_cache[i] - ref;
+          adj_rd = filter_cache[i] - ref;
 
         adj_rd += this_rd;
         best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 6b9572f..27c8829 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -304,16 +304,19 @@
     // This feature is only enabled when partition search is disabled.
     sf->reuse_inter_pred_sby = 1;
     sf->partition_search_breakout_rate_thr = 200;
+    if (!is_keyframe) {
+      int i;
+      if (content == VP9E_CONTENT_SCREEN) {
+        for (i = 0; i < TX_SIZES; ++i)
+          sf->intra_y_mode_mask[i] = INTRA_DC_TM_H_V;
+      } else {
+        for (i = 0; i < TX_SIZES; i++)
+          sf->intra_y_mode_mask[i] = INTRA_DC;
+      }
+    }
   }
 
   if (speed >= 6) {
-    if (content == VP9E_CONTENT_SCREEN) {
-      int i;
-      // Allow fancy modes at all sizes since SOURCE_VAR_BASED_PARTITION is used
-      for (i = 0; i < BLOCK_SIZES; ++i)
-        sf->inter_mode_mask[i] = INTER_NEAREST_NEAR_NEW;
-    }
-
     // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
     sf->partition_search_type = VAR_BASED_PARTITION;
     sf->search_type_check_frequency = 50;