Account for partition rd cost in rd square partition

Partition rd cost is accounted in the calculation of best rd
so far in square partition search.
For speed=1 preset, encode time reduced by 1.5% for 20 frames of
BasketballDrill_832x480_50 content when encoded at 1 mbps and
0.03% average BD-rate improvement is seen for AWCY tests.

STATS_CHANGED

Change-Id: I1754b922beec1db4f781bb866933b8510db5164c
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index cb226c5..76ac7af 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -2527,6 +2527,9 @@
     subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
     int idx;
 
+    sum_rdc.rate = partition_cost[PARTITION_SPLIT];
+    sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
+
     for (idx = 0; idx < 4 && sum_rdc.rdcost < temp_best_rdcost; ++idx) {
       const int x_idx = (idx & 1) * mi_step;
       const int y_idx = (idx >> 1) * mi_step;
@@ -2538,12 +2541,12 @@
 
       pc_tree->split[idx]->index = idx;
       int64_t *p_split_rd = &split_rd[idx];
-      // TODO(Cherma) : Account for partition cost while passing best rd to
-      // rd_pick_sqr_partition()
-      rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row + y_idx,
-                            mi_col + x_idx, subsize, &this_rdc,
-                            temp_best_rdcost - sum_rdc.rdcost,
-                            pc_tree->split[idx], p_split_rd);
+      int64_t best_remain_rdcost = (temp_best_rdcost == INT64_MAX)
+                                       ? INT64_MAX
+                                       : (temp_best_rdcost - sum_rdc.rdcost);
+      rd_pick_sqr_partition(
+          cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize,
+          &this_rdc, best_remain_rdcost, pc_tree->split[idx], p_split_rd);
 
       pc_tree->pc_tree_stats.sub_block_rdcost[idx] = this_rdc.rdcost;
       pc_tree->pc_tree_stats.sub_block_skip[idx] =
@@ -2561,7 +2564,6 @@
     reached_last_index = (idx == 4);
 
     if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
-      sum_rdc.rate += partition_cost[PARTITION_SPLIT];
       sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
 
       if (sum_rdc.rdcost < best_rdc.rdcost) {