Modify RDO for use with Daala TX constant-depth coeffs

Modify the portions of RDO using TX-domain coeff calaculations to deal
with TX_COEFF_DEPTH and constant-depth coefficient scaling.  At
present, this represents no functional change.

subset-1:
monty-rest-of-stack-quant-s1@2017-11-13T14:38:43.774Z ->
 monty-rest-of-stack-RDO-s1@2017-11-13T14:39:17.093Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0000 |  0.0000 |  0.0000 |   0.0000 | 0.0000 |  0.0000 |     0.0000

objective-1-fast --limit=4:
monty-rest-of-stack-quant-o1f4@2017-11-13T14:38:28.828Z ->
 monty-rest-of-stack-RDO-o1f4@2017-11-13T14:38:57.951Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0000 |  0.0000 |  0.0000 |   0.0000 | 0.0000 |  0.0000 |     0.0000

Change-Id: I0fbc45e018f565f48e1fc8fdeabfcd6cb6fa62fe
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 1a56a93..416171e 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1588,7 +1588,12 @@
                                  int64_t *ssz, int bd) {
   int i;
   int64_t error = 0, sqcoeff = 0;
+#if CONFIG_DAALA_TX
+  (void)bd;
+  int shift = 2 * (TX_COEFF_DEPTH - 11);
+#else
   int shift = 2 * (bd - 8);
+#endif
   int rounding = shift > 0 ? 1 << (shift - 1) : 0;
 
   for (i = 0; i < block_size; i++) {
@@ -1926,7 +1931,13 @@
     // not involve an inverse transform, but it is less accurate.
     const int buffer_length = tx_size_2d[tx_size];
     int64_t this_sse;
+// TX-domain results need to shift down to Q2/D10 to match pixel
+// domain distortion values which are in Q2^2
+#if CONFIG_DAALA_TX
+    int shift = (TX_COEFF_DEPTH - 10) * 2;
+#else
     int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
+#endif
     tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
     tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 
@@ -2106,7 +2117,13 @@
   av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
                   coeff_ctx, AV1_XFORM_QUANT_FP);
 
+// TX-domain results need to shift down to Q2/D10 to match pixel
+// domain distortion values which are in Q2^2
+#if CONFIG_DAALA_TX
+  const int shift = (TX_COEFF_DEPTH - 10) * 2;
+#else
   const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
+#endif
   tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
   tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
   const int buffer_length = tx_size_2d[tx_size];
@@ -3658,6 +3675,7 @@
   const int coeff_ctx_one_byte = combine_entropy_contexts(*a, *l);
   const uint8_t cur_joint_ctx = (coeff_ctx << 2) + coeff_ctx_one_byte;
 
+  // Note: tmp below is pixel distortion, not TX domain
   tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
                         plane_bsize, txm_bsize);
 
@@ -3714,7 +3732,13 @@
   av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
                   coeff_ctx, AV1_XFORM_QUANT_FP);
 
+// TX-domain results need to shift down to Q2/D10 to match pixel
+// domain distortion values which are in Q2^2
+#if CONFIG_DAALA_TX
+  const int shift = (TX_COEFF_DEPTH - 10) * 2;
+#else
   const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
+#endif
   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   const int buffer_length = tx_size_2d[tx_size];
   int64_t tmp_dist, tmp_sse;