Fix to use correct tx w/h sizes with daala-dist

In function av1_xform_quant().

For subset1, high delay mode:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0565 | -0.0975 |  0.0317 |  -0.0937 | -0.0930 | -0.1261 |    -0.0994

Change-Id: I638c2a4c2ad359709069c1587b677a813fb589c2
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index db63049..cbcfad1 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -575,7 +575,8 @@
   uint8_t *dst;
   int16_t *pred;
   const int dst_stride = pd->dst.stride;
-  int tx_blk_size;
+  const int txw = tx_size_wide[tx_size];
+  const int txh = tx_size_high[tx_size];
   int i, j;
 #endif
 
@@ -615,18 +616,16 @@
   src_int16 =
       &p->src_int16[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
 
-  // transform block size in pixels
-  tx_blk_size = tx_size_wide[tx_size];
 #if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    for (j = 0; j < tx_blk_size; j++)
-      for (i = 0; i < tx_blk_size; i++)
+    for (j = 0; j < txh; j++)
+      for (i = 0; i < txw; i++)
         src_int16[diff_stride * j + i] =
             CONVERT_TO_SHORTPTR(src)[src_stride * j + i];
   } else {
 #endif  // CONFIG_HIGHBITDEPTH
-    for (j = 0; j < tx_blk_size; j++)
-      for (i = 0; i < tx_blk_size; i++)
+    for (j = 0; j < txh; j++)
+      for (i = 0; i < txw; i++)
         src_int16[diff_stride * j + i] = src[src_stride * j + i];
 #if CONFIG_HIGHBITDEPTH
   }
@@ -637,21 +636,18 @@
   dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
   pred = &pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
 
-  // transform block size in pixels
-  tx_blk_size = tx_size_wide[tx_size];
-
 // copy uint8 orig and predicted block to int16 buffer
 // in order to use existing VP10 transform functions
 #if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    for (j = 0; j < tx_blk_size; j++)
-      for (i = 0; i < tx_blk_size; i++)
+    for (j = 0; j < txh; j++)
+      for (i = 0; i < txw; i++)
         pred[diff_stride * j + i] =
             CONVERT_TO_SHORTPTR(dst)[dst_stride * j + i];
   } else {
 #endif  // CONFIG_HIGHBITDEPTH
-    for (j = 0; j < tx_blk_size; j++)
-      for (i = 0; i < tx_blk_size; i++)
+    for (j = 0; j < txh; j++)
+      for (i = 0; i < txw; i++)
         pred[diff_stride * j + i] = dst[dst_stride * j + i];
 #if CONFIG_HIGHBITDEPTH
   }