Merge changes Id0a490bc,I1b4a0504

* changes:
  vp9/inverse_transform_block_inter: move eob check
  vp9/inverse_transform_block_intra: move eob check
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 3bd42ec..596427c 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -463,14 +463,13 @@
   }
 }
 
-static void dec_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, int_mv *mvlist,
-                                  int_mv *best_mv, int refmv_count) {
+static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv,
+                                  int refmv_count) {
   int i;
 
   // Make sure all the candidates are properly clamped etc
   for (i = 0; i < refmv_count; ++i) {
     lower_mv_precision(&mvlist[i].as_mv, allow_hp);
-    clamp_mv2(&mvlist[i].as_mv, xd);
     *best_mv = mvlist[i];
   }
 }
@@ -778,7 +777,7 @@
                                        tmp_mvs, mi_row, mi_col, -1, 0,
                                        fpm_sync, (void *)pbi);
 
-        dec_find_best_ref_mvs(xd, allow_hp, tmp_mvs, &best_ref_mvs[ref],
+        dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref],
                               refmv_count);
       }
     }
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 6454ad6..547c162 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2722,6 +2722,13 @@
   return scale;
 }
 
+static int big_rate_miss(VP9_COMP *cpi, int high_limit, int low_limit) {
+  const RATE_CONTROL *const rc = &cpi->rc;
+
+  return (rc->projected_frame_size > ((high_limit * 3) / 2)) ||
+         (rc->projected_frame_size < (low_limit / 2));
+}
+
 // Function to test for conditions that indicate we should loop
 // back and recode a frame.
 static int recode_loop_test(VP9_COMP *cpi,
@@ -2733,6 +2740,7 @@
   int force_recode = 0;
 
   if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
+      big_rate_miss(cpi, high_limit, low_limit) ||
       (cpi->sf.recode_loop == ALLOW_RECODE) ||
       (frame_is_kfgfarf &&
        (cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF))) {
@@ -3090,7 +3098,27 @@
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 
-  if (cpi->twopass.total_left_stats.coded_error != 0.0)
+  if (cpi->twopass.total_left_stats.coded_error != 0.0) {
+    double dc_quant_devisor;
+#if CONFIG_VP9_HIGHBITDEPTH
+    switch (cm->bit_depth) {
+      case VPX_BITS_8:
+        dc_quant_devisor = 4.0;
+        break;
+      case VPX_BITS_10:
+        dc_quant_devisor = 16.0;
+        break;
+      case VPX_BITS_12:
+        dc_quant_devisor = 64.0;
+        break;
+      default:
+        assert(0 && "bit_depth must be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+        break;
+    }
+#else
+    dc_quant_devisor = 4.0;
+#endif
+
     fprintf(f, "%10u %dx%d %10d %10d %d %d %10d %10d %10d %10d"
        "%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" "
        "%10"PRId64" %10"PRId64" %10d "
@@ -3116,7 +3144,8 @@
         (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target),
         cpi->rc.total_actual_bits, cm->base_qindex,
         vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth),
-        (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) / 4.0,
+        (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) /
+            dc_quant_devisor,
         vp9_convert_qindex_to_q(cpi->twopass.active_worst_quality,
                                 cm->bit_depth),
         cpi->rc.avg_q,
@@ -3131,7 +3160,7 @@
         cpi->twopass.kf_zeromotion_pct,
         cpi->twopass.fr_content_type,
         cm->lf.filter_level);
-
+  }
   fclose(f);
 
   if (0) {
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 9d3b154..10fd6c0 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -490,7 +490,35 @@
   cpi->rc.frames_to_key = INT_MAX;
 }
 
+// This threshold is used to track blocks where to all intents and purposes
+// the intra prediction error 0. Though the metric we test against
+// is technically a sse we are mainly interested in blocks where all the pixels
+// int he 8 bit domain have an error of <= 1 (where error = sse) so a
+// linear scaling for 10 and 12 bit gives similar results.
 #define UL_INTRA_THRESH 50
+#if CONFIG_VP9_HIGHBITDEPTH
+static int get_ul_intra_threshold(VP9_COMMON *cm) {
+  int ret_val = UL_INTRA_THRESH;
+  if (cm->use_highbitdepth) {
+    switch (cm->bit_depth) {
+      case VPX_BITS_8:
+        ret_val = UL_INTRA_THRESH;
+        break;
+      case VPX_BITS_10:
+        ret_val = UL_INTRA_THRESH >> 2;
+        break;
+      case VPX_BITS_12:
+        ret_val = UL_INTRA_THRESH >> 4;
+        break;
+      default:
+        assert(0 && "cm->bit_depth should be VPX_BITS_8, "
+                    "VPX_BITS_10 or VPX_BITS_12");
+    }
+  }
+  return ret_val;
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
 #define INVALID_ROW -1
 void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
   int mb_row, mb_col;
@@ -681,7 +709,11 @@
       // domain). In natural videos this is uncommon, but it is much more
       // common in animations, graphics and screen content, so may be used
       // as a signal to detect these types of content.
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (this_error < get_ul_intra_threshold(cm)) {
+#else
       if (this_error < UL_INTRA_THRESH) {
+#endif
         ++intra_skip_count;
       } else if ((mb_col > 0) && (image_data_start_row == INVALID_ROW)) {
         image_data_start_row = mb_row;
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index c875d02..5921636 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -487,8 +487,11 @@
 
   oxcf->tile_columns = extra_cfg->tile_columns;
 
-  // The dependencies between row tiles cause error in multi-threaded encoding.
-  // For now, it is forced to be 0 in this case.
+  // TODO(yunqing): The dependencies between row tiles cause error in multi-
+  // threaded encoding. For now, tile_rows is forced to be 0 in this case.
+  // The further fix can be done by adding synchronizations after a tile row
+  // is encoded. But this will hurt multi-threaded encoder performance. So,
+  // it is recommended to use tile-rows=0 while encoding with threads > 1.
   if (oxcf->max_threads > 1 && oxcf->tile_columns > 0)
     oxcf->tile_rows  = 0;
   else