Correct rd cost computation using RDCOST_DBL in hbd encode

For hbd encoding, SSE is scaled as per bit-depth while computing rd cost using RDCOST_DBL.

Results on 10-bit encode show quality improvement.

          Instruction Count       BD-Rate Impact(%)
cpu-used    Reduction(%)     avg.psnr  ovr.psnr   ssim
   0         -0.114          -0.7501   -0.7521   -0.8904
   1         -0.182          -0.6677   -0.6641   -0.7598
   2          0.145          -0.5546   -0.5576   -0.6530
   3          0.294          -0.3103   -0.3010   -0.3582
   4          0.521          -0.3058   -0.3095   -0.3835

STATS_CHANGED for hbd encoding

Change-Id: I66c145c0dde998792e91010911b44a27ff316327
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index c46c500..d146829 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2770,7 +2770,8 @@
     const int64_t this_sse = superres_sses[this_index];
     const int64_t this_rate = superres_rates[this_index];
     const int this_largest_tile_id = superres_largest_tile_ids[this_index];
-    const double this_rdcost = RDCOST_DBL(rdmult, this_rate, this_sse);
+    const double this_rdcost = RDCOST_DBL(
+        rdmult, this_rate, this_sse >> (2 * (cm->seq_params.bit_depth - 8)));
     if (this_rdcost < proj_rdcost1) {
       sse1 = this_sse;
       rate1 = this_rate;
@@ -2780,9 +2781,11 @@
     }
   }
 #else
-  const double proj_rdcost1 = RDCOST_DBL(rdmult, rate1, sse1);
+  const double proj_rdcost1 =
+      RDCOST_DBL(rdmult, rate1, sse1 >> (2 * (cm->seq_params.bit_depth - 8)));
 #endif  // SUPERRES_RECODE_ALL_RATIOS
-  const double proj_rdcost2 = RDCOST_DBL(rdmult, rate2, sse2);
+  const double proj_rdcost2 =
+      RDCOST_DBL(rdmult, rate2, sse2 >> (2 * (cm->seq_params.bit_depth - 8)));
 
   // Re-encode with superres if it's better.
   if (proj_rdcost1 < proj_rdcost2) {
diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c
index 2fa848e..594c070 100644
--- a/av1/encoder/picklpf.c
+++ b/av1/encoder/picklpf.c
@@ -195,8 +195,10 @@
 
   // Update best error
   best_err = ss_err[filt_best];
+  const int sse_shift = 2 * (cm->seq_params.bit_depth - 8);
 
-  if (best_cost_ret) *best_cost_ret = RDCOST_DBL(x->rdmult, 0, best_err);
+  if (best_cost_ret)
+    *best_cost_ret = RDCOST_DBL(x->rdmult, 0, ((best_err >> sse_shift) << 4));
   return filt_best;
 }
 
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 0123520..6548926 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -922,11 +922,11 @@
   const int64_t bits_sgr = x->mode_costs.sgrproj_restore_cost[1] +
                            (count_sgrproj_bits(&rusi->sgrproj, &rsc->sgrproj)
                             << AV1_PROB_COST_SHIFT);
-
-  double cost_none =
-      RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]);
-  double cost_sgr =
-      RDCOST_DBL(x->rdmult, bits_sgr >> 4, rusi->sse[RESTORE_SGRPROJ]);
+  const int sse_shift = 2 * (bit_depth - 8);
+  double cost_none = RDCOST_DBL(x->rdmult, bits_none >> 4,
+                                rusi->sse[RESTORE_NONE] >> sse_shift);
+  double cost_sgr = RDCOST_DBL(x->rdmult, bits_sgr >> 4,
+                               rusi->sse[RESTORE_SGRPROJ] >> sse_shift);
   if (rusi->sgrproj.ep < 10)
     cost_sgr *=
         (1 + DUAL_SGR_PENALTY_MULT * rsc->lpf_sf->dual_sgr_penalty_level);
@@ -1571,10 +1571,11 @@
       (count_wiener_bits(wiener_win, &rusi->wiener, &rsc->wiener)
        << AV1_PROB_COST_SHIFT);
 
-  double cost_none =
-      RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]);
-  double cost_wiener =
-      RDCOST_DBL(x->rdmult, bits_wiener >> 4, rusi->sse[RESTORE_WIENER]);
+  const int sse_shift = 2 * (rsc->cm->seq_params.bit_depth - 8);
+  double cost_none = RDCOST_DBL(x->rdmult, bits_none >> 4,
+                                rusi->sse[RESTORE_NONE] >> sse_shift);
+  double cost_wiener = RDCOST_DBL(x->rdmult, bits_wiener >> 4,
+                                  rusi->sse[RESTORE_WIENER] >> sse_shift);
 
   RestorationType rtype =
       (cost_wiener < cost_none) ? RESTORE_WIENER : RESTORE_NONE;
@@ -1657,7 +1658,8 @@
     }
     const int64_t coeff_bits = coeff_pcost << AV1_PROB_COST_SHIFT;
     const int64_t bits = x->mode_costs.switchable_restore_cost[r] + coeff_bits;
-    double cost = RDCOST_DBL(x->rdmult, bits >> 4, sse);
+    const int sse_shift = 2 * (rsc->cm->seq_params.bit_depth - 8);
+    double cost = RDCOST_DBL(x->rdmult, bits >> 4, sse >> sse_shift);
     if (r == RESTORE_SGRPROJ && rusi->sgrproj.ep < 10)
       cost *= (1 + DUAL_SGR_PENALTY_MULT * rsc->lpf_sf->dual_sgr_penalty_level);
     if (r == 0 || cost < best_cost) {
@@ -1696,7 +1698,8 @@
 
   av1_foreach_rest_unit_in_plane(rsc->cm, rsc->plane, funs[rtype], rsc,
                                  &rsc->tile_rect, rsc->cm->rst_tmpbuf, NULL);
-  return RDCOST_DBL(rsc->x->rdmult, rsc->bits >> 4, rsc->sse);
+  return RDCOST_DBL(rsc->x->rdmult, rsc->bits >> 4,
+                    rsc->sse >> (2 * (rsc->cm->seq_params.bit_depth - 8)));
 }
 
 static int rest_tiles_in_plane(const AV1_COMMON *cm, int plane) {