Fix uninitialized blk_skip for VAR TX.

x->blk_skip used to be uninitialized (leftover from encoding the
previous block), if cm->tx_mode != TX_MODE_SELECT (which is used with
higher --cpu-used or --rt options). This resulted in degraded coding
performance when using cm->tx_mode != TX_MODE_SELECT.

This fixes the VP10/EndToEndTestLarge.EndtoEndPSNRTest/40 unit test.

Also fixed an edge effect where encode_block in encodemb.c used the
formal width of the block (without cropping at the right edge), to
look up blk_skip, while select_tx_block in rdopt.c used the cropped
width to set blk_skip.

Change-Id: I76d0f49ac5ab3ab54203573e0d7fcfcc1c6aa10d
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 45505f1..d0c7a33 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -3339,6 +3339,15 @@
                                bsize >= BLOCK_8X8;
   (void) *tp_orig;
 
+#if CONFIG_VAR_TX
+#ifndef NDEBUG
+  // Nothing should rely on the default value of this array (which is just
+  // leftover from encoding the previous block. Setting it to magic number
+  // when debugging.
+  memset(x->blk_skip[0], 234, sizeof(x->blk_skip[0]));
+#endif  // NDEBUG
+#endif  // CONFIG_VAR_TX
+
   assert(num_8x8_blocks_wide_lookup[bsize] ==
              num_8x8_blocks_high_lookup[bsize]);
 
@@ -4433,7 +4442,6 @@
   x->quant_fp = cpi->sf.use_quant_fp;
   vp10_zero(x->skip_txfm);
 #if CONFIG_VAR_TX
-  vp10_zero(x->blk_skip);
 #if CONFIG_REF_MV
   vp10_zero(x->blk_skip_drl);
 #endif
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 9acf00c..c848e4b 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -435,6 +435,9 @@
 //    return;
 //  }
 
+  // Assert not magic number (uninitialised).
+  assert(x->blk_skip[plane][(blk_row << bwl) + blk_col] != 234);
+
 #if CONFIG_VAR_TX
   if (!x->skip_recode &&
       x->blk_skip[plane][(blk_row << bwl) + blk_col] == 0) {
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index e7762a5..50feb4f 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -2946,8 +2946,9 @@
   const int tx_col = blk_col >> (1 - pd->subsampling_x);
   TX_SIZE (*const inter_tx_size)[MAX_MIB_SIZE] =
     (TX_SIZE (*)[MAX_MIB_SIZE])&mbmi->inter_tx_size[tx_row][tx_col];
+  const int bw = num_4x4_blocks_wide_lookup[plane_bsize];
   int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
-  int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+  int max_blocks_wide = bw;
   int64_t this_rd = INT64_MAX;
   ENTROPY_CONTEXT *pta = ta + blk_col;
   ENTROPY_CONTEXT *ptl = tl + blk_row;
@@ -3018,10 +3019,10 @@
       *rate = zero_blk_rate;
       *dist = *bsse;
       *skip = 1;
-      x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 1;
+      x->blk_skip[plane][blk_row * bw + blk_col] = 1;
       p->eobs[block] = 0;
     } else {
-      x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 0;
+      x->blk_skip[plane][blk_row * bw + blk_col] = 0;
       *skip = 0;
     }
 
@@ -3078,7 +3079,7 @@
     mbmi->tx_size = tx_size;
     if (this_rd == INT64_MAX)
       *is_cost_valid = 0;
-    x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = *skip;
+    x->blk_skip[plane][blk_row * bw + blk_col] = *skip;
   } else {
     *rate = sum_rate;
     *dist = sum_dist;
@@ -7166,6 +7167,8 @@
       for (idy = 0; idy < xd->n8_h; ++idy)
         for (idx = 0; idx < xd->n8_w; ++idx)
           mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
+      memset(x->blk_skip[0], skippable_y,
+             sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
     }
 #else
     super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,