Merge "Remove double counting for mv costs" into nextgenv2
diff --git a/vp10/common/blockd.c b/vp10/common/blockd.c
index ad30bc3..7339493 100644
--- a/vp10/common/blockd.c
+++ b/vp10/common/blockd.c
@@ -140,24 +140,90 @@
 }
 
 #if CONFIG_EXT_INTRA
+// If angle > 0 && angle < 90, dx = -((int)(256 / t)), dy = 1;
+// If angle > 90 && angle < 180, dx = (int)(256 / t), dy = (int)(256 * t);
+// If angle > 180 && angle < 270, dx = 1, dy = -((int)(256 * t));
+const int16_t dr_intra_derivative[270][2] = {
+    {     1,     1 }, { -14666,    1 }, { -7330,     1 }, { -4884,     1 },
+    { -3660,     1 }, { -2926,     1 }, { -2435,     1 }, { -2084,     1 },
+    { -1821,     1 }, { -1616,     1 }, { -1451,     1 }, { -1317,     1 },
+    { -1204,     1 }, { -1108,     1 }, { -1026,     1 }, {  -955,     1 },
+    {  -892,     1 }, {  -837,     1 }, {  -787,     1 }, {  -743,     1 },
+    {  -703,     1 }, {  -666,     1 }, {  -633,     1 }, {  -603,     1 },
+    {  -574,     1 }, {  -548,     1 }, {  -524,     1 }, {  -502,     1 },
+    {  -481,     1 }, {  -461,     1 }, {  -443,     1 }, {  -426,     1 },
+    {  -409,     1 }, {  -394,     1 }, {  -379,     1 }, {  -365,     1 },
+    {  -352,     1 }, {  -339,     1 }, {  -327,     1 }, {  -316,     1 },
+    {  -305,     1 }, {  -294,     1 }, {  -284,     1 }, {  -274,     1 },
+    {  -265,     1 }, {  -256,     1 }, {  -247,     1 }, {  -238,     1 },
+    {  -230,     1 }, {  -222,     1 }, {  -214,     1 }, {  -207,     1 },
+    {  -200,     1 }, {  -192,     1 }, {  -185,     1 }, {  -179,     1 },
+    {  -172,     1 }, {  -166,     1 }, {  -159,     1 }, {  -153,     1 },
+    {  -147,     1 }, {  -141,     1 }, {  -136,     1 }, {  -130,     1 },
+    {  -124,     1 }, {  -119,     1 }, {  -113,     1 }, {  -108,     1 },
+    {  -103,     1 }, {   -98,     1 }, {   -93,     1 }, {   -88,     1 },
+    {   -83,     1 }, {   -78,     1 }, {   -73,     1 }, {   -68,     1 },
+    {   -63,     1 }, {   -59,     1 }, {   -54,     1 }, {   -49,     1 },
+    {   -45,     1 }, {   -40,     1 }, {   -35,     1 }, {   -31,     1 },
+    {   -26,     1 }, {   -22,     1 }, {   -17,     1 }, {   -13,     1 },
+    {    -8,     1 }, {    -4,     1 }, {     1,     1 }, {     4, 14666 },
+    {     8,  7330 }, {    13,  4884 }, {    17,  3660 }, {    22,  2926 },
+    {    26,  2435 }, {    31,  2084 }, {    35,  1821 }, {    40,  1616 },
+    {    45,  1451 }, {    49,  1317 }, {    54,  1204 }, {    59,  1108 },
+    {    63,  1026 }, {    68,   955 }, {    73,   892 }, {    78,   837 },
+    {    83,   787 }, {    88,   743 }, {    93,   703 }, {    98,   666 },
+    {   103,   633 }, {   108,   603 }, {   113,   574 }, {   119,   548 },
+    {   124,   524 }, {   130,   502 }, {   136,   481 }, {   141,   461 },
+    {   147,   443 }, {   153,   426 }, {   159,   409 }, {   166,   394 },
+    {   172,   379 }, {   179,   365 }, {   185,   352 }, {   192,   339 },
+    {   200,   327 }, {   207,   316 }, {   214,   305 }, {   222,   294 },
+    {   230,   284 }, {   238,   274 }, {   247,   265 }, {   255,   256 },
+    {   265,   247 }, {   274,   238 }, {   284,   230 }, {   294,   222 },
+    {   305,   214 }, {   316,   207 }, {   327,   200 }, {   339,   192 },
+    {   352,   185 }, {   365,   179 }, {   379,   172 }, {   394,   166 },
+    {   409,   159 }, {   426,   153 }, {   443,   147 }, {   461,   141 },
+    {   481,   136 }, {   502,   130 }, {   524,   124 }, {   548,   119 },
+    {   574,   113 }, {   603,   108 }, {   633,   103 }, {   666,    98 },
+    {   703,    93 }, {   743,    88 }, {   787,    83 }, {   837,    78 },
+    {   892,    73 }, {   955,    68 }, {  1026,    63 }, {  1108,    59 },
+    {  1204,    54 }, {  1317,    49 }, {  1451,    45 }, {  1616,    40 },
+    {  1821,    35 }, {  2084,    31 }, {  2435,    26 }, {  2926,    22 },
+    {  3660,    17 }, {  4884,    13 }, {  7330,     8 }, { 14666,     4 },
+    {     1,     1 }, {     1,    -4 }, {     1,    -8 }, {     1,   -13 },
+    {     1,   -17 }, {     1,   -22 }, {     1,   -26 }, {     1,   -31 },
+    {     1,   -35 }, {     1,   -40 }, {     1,   -45 }, {     1,   -49 },
+    {     1,   -54 }, {     1,   -59 }, {     1,   -63 }, {     1,   -68 },
+    {     1,   -73 }, {     1,   -78 }, {     1,   -83 }, {     1,   -88 },
+    {     1,   -93 }, {     1,   -98 }, {     1,  -103 }, {     1,  -108 },
+    {     1,  -113 }, {     1,  -119 }, {     1,  -124 }, {     1,  -130 },
+    {     1,  -136 }, {     1,  -141 }, {     1,  -147 }, {     1,  -153 },
+    {     1,  -159 }, {     1,  -166 }, {     1,  -172 }, {     1,  -179 },
+    {     1,  -185 }, {     1,  -192 }, {     1,  -200 }, {     1,  -207 },
+    {     1,  -214 }, {     1,  -222 }, {     1,  -230 }, {     1,  -238 },
+    {     1,  -247 }, {     1,  -255 }, {     1,  -265 }, {     1,  -274 },
+    {     1,  -284 }, {     1,  -294 }, {     1,  -305 }, {     1,  -316 },
+    {     1,  -327 }, {     1,  -339 }, {     1,  -352 }, {     1,  -365 },
+    {     1,  -379 }, {     1,  -394 }, {     1,  -409 }, {     1,  -426 },
+    {     1,  -443 }, {     1,  -461 }, {     1,  -481 }, {     1,  -502 },
+    {     1,  -524 }, {     1,  -548 }, {     1,  -574 }, {     1,  -603 },
+    {     1,  -633 }, {     1,  -666 }, {     1,  -703 }, {     1,  -743 },
+    {     1,  -787 }, {     1,  -837 }, {     1,  -892 }, {     1,  -955 },
+    {     1, -1026 }, {     1, -1108 }, {     1, -1204 }, {     1, -1317 },
+    {     1, -1451 }, {     1, -1616 }, {     1, -1821 }, {     1, -2084 },
+    {     1, -2435 }, {     1, -2926 }, {     1, -3660 }, {     1, -4884 },
+    {     1, -7330 }, {     1, -14666 },
+};
+
 // Returns whether filter selection is needed for a given
 // intra prediction angle.
 int pick_intra_filter(int angle) {
+  assert(angle > 0 && angle < 270);
   if (angle % 45 == 0)
     return 0;
   if (angle > 90 && angle < 180) {
     return 1;
   } else {
-    double t;
-    double n;
-
-    vpx_clear_system_state();
-
-    t = tan(angle * PI / 180.0);
-    if (angle < 90)
-      t = 1 / t;
-    n = floor(t);
-    return (t - n) * 1024 > 1;
+    return ((-(dr_intra_derivative[angle][angle > 180])) & 0xFF) > 0;
   }
 }
 #endif  // CONFIG_EXT_INTRA
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index fbcba09..b6eeee5 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -483,6 +483,8 @@
 #define ANGLE_SKIP_THRESH 0.10
 #define FILTER_FAST_SEARCH 1
 
+extern const int16_t dr_intra_derivative[270][2];
+
 static uint8_t mode_to_angle_map[INTRA_MODES] = {
     0, 90, 180, 45, 135, 111, 157, 203, 67, 0,
 };
diff --git a/vp10/common/entropymv.c b/vp10/common/entropymv.c
index 6e8c993..097d693 100644
--- a/vp10/common/entropymv.c
+++ b/vp10/common/entropymv.c
@@ -132,14 +132,11 @@
   return c;
 }
 
+// TODO(jingning): This idle function is intentionally left as is for
+// experimental purpose.
 int vp10_use_mv_hp(const MV *ref) {
-#if CONFIG_MISC_FIXES
   (void) ref;
   return 1;
-#else
-  return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH &&
-         (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH;
-#endif
 }
 
 static void inc_mv_component(int v, nmv_component_counts *comp_counts,
@@ -160,14 +157,16 @@
   if (c == MV_CLASS_0) {
     comp_counts->class0[d] += incr;
     comp_counts->class0_fp[d][f] += incr;
-    comp_counts->class0_hp[e] += usehp * incr;
+    if (usehp)
+      comp_counts->class0_hp[e] += incr;
   } else {
     int i;
     int b = c + CLASS0_BITS - 1;  // number of bits
     for (i = 0; i < b; ++i)
       comp_counts->bits[i][((d >> i) & 1)] += incr;
     comp_counts->fp[f] += incr;
-    comp_counts->hp[e] += usehp * incr;
+    if (usehp)
+      comp_counts->hp[e] += incr;
   }
 }
 
@@ -182,15 +181,11 @@
 #endif
     ++counts->joints[j];
 
-    if (mv_joint_vertical(j)) {
-      inc_mv_component(mv->row, &counts->comps[0], 1,
-                       !CONFIG_MISC_FIXES || usehp);
-    }
+    if (mv_joint_vertical(j))
+      inc_mv_component(mv->row, &counts->comps[0], 1, usehp);
 
-    if (mv_joint_horizontal(j)) {
-      inc_mv_component(mv->col, &counts->comps[1], 1,
-                       !CONFIG_MISC_FIXES || usehp);
-    }
+    if (mv_joint_horizontal(j))
+      inc_mv_component(mv->col, &counts->comps[1], 1, usehp);
   }
 }
 
diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c
index a7bd6c4..ef046e9 100644
--- a/vp10/common/reconintra.c
+++ b/vp10/common/reconintra.c
@@ -672,26 +672,16 @@
 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
                          const uint8_t *above, const uint8_t *left, int angle,
                          INTRA_FILTER filter_type) {
-  double t;
-  int dx, dy;
-  int bs = 4 << tx_size;
+  const int dx = (int)dr_intra_derivative[angle][0];
+  const int dy = (int)dr_intra_derivative[angle][1];
+  const int bs = 4 << tx_size;
+  assert(angle > 0 && angle < 270);
 
-  vpx_clear_system_state();
-  t = 0;
-  if (angle != 90 && angle != 180)
-    t = tan(angle * PI / 180.0);
   if (angle > 0 && angle < 90) {
-    dx = -((int)(256 / t));
-    dy = 1;
     dr_prediction_z1(dst, stride, bs, above, left, dx, dy, filter_type);
   } else if (angle > 90 && angle < 180) {
-    t = -t;
-    dx = (int)(256 / t);
-    dy = (int)(256 * t);
     dr_prediction_z2(dst, stride, bs, above, left, dx, dy, filter_type);
   } else if (angle > 180 && angle < 270) {
-    dx = 1;
-    dy = -((int)(256 * t));
     dr_prediction_z3(dst, stride, bs, above, left, dx, dy, filter_type);
   } else if (angle == 90) {
     pred[V_PRED][tx_size](dst, stride, above, left);
@@ -1010,25 +1000,15 @@
 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
                                 const uint16_t *above, const uint16_t *left,
                                 int angle, int bd, INTRA_FILTER filter) {
-  double t;
-  int dx, dy;
+  const int dx = (int)dr_intra_derivative[angle][0];
+  const int dy = (int)dr_intra_derivative[angle][1];
+  assert(angle > 0 && angle < 270);
 
-  vpx_clear_system_state();
-  t = 0;
-  if (angle != 90 && angle != 180)
-    t = tan(angle * PI / 180.0);
   if (angle > 0 && angle < 90) {
-    dx = -((int)(256 / t));
-    dy = 1;
     highbd_dr_prediction_z1(dst, stride, bs, above, left, dx, dy, bd, filter);
   } else if (angle > 90 && angle < 180) {
-    t = -t;
-    dx = (int)(256 / t);
-    dy = (int)(256 * t);
     highbd_dr_prediction_z2(dst, stride, bs, above, left, dx, dy, bd, filter);
   } else if (angle > 180 && angle < 270) {
-    dx = 1;
-    dy = -((int)(256 * t));
     highbd_dr_prediction_z3(dst, stride, bs, above, left, dx, dy, bd, filter);
   } else if (angle == 90) {
     highbd_v_predictor(dst, stride, bs, above, left, bd);
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 40d4c36..437b366 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -243,10 +243,10 @@
 #else
   if (cpi->common.allow_high_precision_mv) {
     mb->mvcost = mb->nmvcost_hp;
-    mb->mvsadcost = mb->nmvsadcost_hp;
+    mb->mvsadcost = mb->nmvcost_hp;
   } else {
     mb->mvcost = mb->nmvcost;
-    mb->mvsadcost = mb->nmvsadcost;
+    mb->mvsadcost = mb->nmvcost;
   }
 #endif
 }
diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c
index 823095e..dd0c311 100644
--- a/vp10/encoder/mcomp.c
+++ b/vp10/encoder/mcomp.c
@@ -98,22 +98,12 @@
 
 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
                           int sad_per_bit) {
-#if CONFIG_REF_MV
   const MV diff = { (mv->row - ref->row) * 8,
                     (mv->col - ref->col) * 8 };
   return ROUND_POWER_OF_TWO(
       (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->mvsadcost) *
           sad_per_bit,
       VP9_PROB_COST_SHIFT);
-#else
-  const MV diff = { mv->row - ref->row,
-                    mv->col - ref->col };
-
-  return ROUND_POWER_OF_TWO(
-      (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) *
-          sad_per_bit,
-      VP9_PROB_COST_SHIFT);
-#endif
 }
 
 void vp10_init_dsmotion_compensation(search_site_config *cfg, int stride) {
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index a546322..0d2dada 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1407,6 +1407,7 @@
   int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
   TX_SIZE best_tx = max_tx_size;
+  uint8_t zcoeff_blk[TX_SIZES][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
   const int tx_select = cm->tx_mode == TX_MODE_SELECT;
   const int is_inter = is_inter_block(mbmi);
 #if CONFIG_EXT_TX
@@ -1477,10 +1478,17 @@
       *rate       = r;
       *skip       = s;
       *psse       = sse;
+      memcpy(zcoeff_blk[mbmi->tx_size], x->zcoeff_blk[mbmi->tx_size],
+             sizeof(zcoeff_blk[mbmi->tx_size][0]) *
+             MAX_MIB_SIZE * MAX_MIB_SIZE * 4);
     }
   }
   mbmi->tx_size = best_tx;
 
+  memcpy(x->zcoeff_blk[mbmi->tx_size], zcoeff_blk[mbmi->tx_size],
+         sizeof(zcoeff_blk[mbmi->tx_size][0]) *
+         MAX_MIB_SIZE * MAX_MIB_SIZE * 4);
+
   return best_rd;
 }
 
@@ -1639,6 +1647,7 @@
                                    BLOCK_SIZE bs) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  uint8_t zcoeff_blk[TX_SIZES][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
   int r, s;
   int64_t d, sse;
   int64_t rd = INT64_MAX;
@@ -1671,6 +1680,9 @@
       *psse       = sse;
       best_tx_type = tx_type;
       best_tx = mbmi->tx_size;
+      memcpy(zcoeff_blk[mbmi->tx_size], x->zcoeff_blk[mbmi->tx_size],
+             sizeof(zcoeff_blk[mbmi->tx_size][0]) *
+             MAX_MIB_SIZE * MAX_MIB_SIZE * 4);
     }
   }
 
@@ -1681,6 +1693,10 @@
   if (mbmi->tx_size >= TX_32X32)
     assert(mbmi->tx_type == DCT_DCT);
 #endif
+
+  memcpy(x->zcoeff_blk[mbmi->tx_size], zcoeff_blk[mbmi->tx_size],
+         sizeof(zcoeff_blk[mbmi->tx_size][0]) *
+         MAX_MIB_SIZE * MAX_MIB_SIZE * 4);
 }
 
 static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
@@ -2517,7 +2533,7 @@
 
 static INLINE int get_angle_index(double angle) {
   const double step = 22.5, base = 45;
-  return (int)round((angle - base) / step);
+  return (int)lround((angle - base) / step);
 }
 
 static void angle_estimation(const uint8_t *src, int src_stride,