Rework intra_model_yrd()

Use SATD to estimate the intra prediction quality. The compression
performance change is within 0.05% range in all intra mode for
speed 1 and 6. Speed 6 encoding speed up by ~10%.

STATS_CHANGED

BUG=aomedia:2959

Change-Id: If8dbf73bc1238021bbdb86639cb1a67135f60d94
diff --git a/aom_dsp/avg.c b/aom_dsp/avg.c
index 574c423..0ec21fb 100644
--- a/aom_dsp/avg.c
+++ b/aom_dsp/avg.c
@@ -123,7 +123,7 @@
     ++tmp_buf;
   }
 
-  for (idx = 0; idx < 16; ++idx) coeff[idx] = (tran_low_t)(buffer2[idx] << 1);
+  for (idx = 0; idx < 16; ++idx) coeff[idx] = (tran_low_t)buffer2[idx];
 }
 
 // src_diff: first pass, 9 bit, dynamic range [-255, 255]
diff --git a/av1/encoder/intra_mode_search_utils.h b/av1/encoder/intra_mode_search_utils.h
index 18ab41b..00f8c47 100644
--- a/av1/encoder/intra_mode_search_utils.h
+++ b/av1/encoder/intra_mode_search_utils.h
@@ -390,48 +390,47 @@
   const AV1_COMMON *cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
-  assert(!is_inter_block(mbmi));
-  RD_STATS this_rd_stats;
   int row, col;
-  int64_t temp_sse, this_rd;
-  const ModeCosts *mode_costs = &x->mode_costs;
-  const TxfmSearchParams *txfm_params = &x->txfm_search_params;
-  TX_SIZE tx_size =
-      tx_size_from_tx_mode(bsize, txfm_params->tx_mode_search_type);
+  assert(!is_inter_block(mbmi));
+  (void)mode_cost;
+  TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]);
   const int stepr = tx_size_high_unit[tx_size];
   const int stepc = tx_size_wide_unit[tx_size];
+  const int txbw = tx_size_wide[tx_size];
+  const int txbh = tx_size_high[tx_size];
   const int max_blocks_wide = max_block_wide(xd, bsize, 0);
   const int max_blocks_high = max_block_high(xd, bsize, 0);
   mbmi->tx_size = tx_size;
+  int64_t satd_cost = 0;
+  struct macroblock_plane *p = &x->plane[0];
+  struct macroblockd_plane *pd = &xd->plane[0];
   // Prediction.
   for (row = 0; row < max_blocks_high; row += stepr) {
     for (col = 0; col < max_blocks_wide; col += stepc) {
       av1_predict_intra_block_facade(cm, xd, 0, col, row, tx_size);
+      av1_subtract_block(
+          xd, txbh, txbw, p->src_diff, block_size_wide[bsize],
+          p->src.buf + (((row * p->src.stride) + col) << 2), p->src.stride,
+          pd->dst.buf + (((row * p->src.stride) + col) << 2), pd->dst.stride);
+      switch (tx_size) {
+        case TX_4X4:
+          aom_hadamard_4x4(p->src_diff, block_size_wide[bsize], p->coeff);
+          break;
+        case TX_8X8:
+          aom_hadamard_8x8(p->src_diff, block_size_wide[bsize], p->coeff);
+          break;
+        case TX_16X16:
+          aom_hadamard_16x16(p->src_diff, block_size_wide[bsize], p->coeff);
+          break;
+        case TX_32X32:
+          aom_hadamard_32x32(p->src_diff, block_size_wide[bsize], p->coeff);
+          break;
+        default: assert(0);
+      }
+      satd_cost += aom_satd(p->coeff, tx_size_2d[tx_size]);
     }
   }
-  // RD estimation.
-  model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model ? MODELRD_LEGACY
-                                                   : MODELRD_TYPE_INTRA](
-      cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate, &this_rd_stats.dist,
-      &this_rd_stats.skip_txfm, &temp_sse, NULL, NULL, NULL);
-  if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) {
-    mode_cost += mode_costs->angle_delta_cost[mbmi->mode - V_PRED]
-                                             [MAX_ANGLE_DELTA +
-                                              mbmi->angle_delta[PLANE_TYPE_Y]];
-  }
-  if (mbmi->mode == DC_PRED &&
-      av1_filter_intra_allowed_bsize(cm, mbmi->bsize)) {
-    if (mbmi->filter_intra_mode_info.use_filter_intra) {
-      const int mode = mbmi->filter_intra_mode_info.filter_intra_mode;
-      mode_cost += mode_costs->filter_intra_cost[mbmi->bsize][1] +
-                   mode_costs->filter_intra_mode_cost[mode];
-    } else {
-      mode_cost += mode_costs->filter_intra_cost[mbmi->bsize][0];
-    }
-  }
-  this_rd =
-      RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist);
-  return this_rd;
+  return satd_cost;
 }
 /*!\endcond */
 
@@ -451,7 +450,7 @@
                                                 int64_t *best_model_rd) {
   const int64_t this_model_rd = intra_model_yrd(cpi, x, bsize, mode_info_cost);
   if (*best_model_rd != INT64_MAX &&
-      this_model_rd > *best_model_rd + (*best_model_rd >> 1)) {
+      this_model_rd > *best_model_rd + (*best_model_rd >> 2)) {
     return 1;
   } else if (this_model_rd < *best_model_rd) {
     *best_model_rd = this_model_rd;
diff --git a/test/horz_superres_test.cc b/test/horz_superres_test.cc
index b8a268e..9fcf405 100644
--- a/test/horz_superres_test.cc
+++ b/test/horz_superres_test.cc
@@ -52,7 +52,7 @@
 }
 
 const TestVideoParam kTestVideoVectors[] = {
-  { "park_joy_90p_8_420.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 5, 0, 25.7,
+  { "park_joy_90p_8_420.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 5, 0, 25.5,
     45.0 },
 #if CONFIG_AV1_HIGHBITDEPTH
   { "park_joy_90p_10_444.y4m", AOM_IMG_FMT_I44416, AOM_BITS_10, 1, 5, 0, 27.0,