Merge "Various rect-tx fixes" into nextgenv2
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 6abc290..ee5ce0d 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -670,7 +670,7 @@
     return TX_4X4;
   } else {
     const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss];
-    return VPXMIN(y_tx_size, max_txsize_lookup[plane_bsize]);
+    return VPXMIN(txsize_sqr_map[y_tx_size], max_txsize_lookup[plane_bsize]);
   }
 }
 
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index dc6958b..5df080b 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h
@@ -172,6 +172,7 @@
   unsigned int comp_ref[REF_CONTEXTS][COMP_REFS - 1][2];
 #endif  // CONFIG_EXT_REFS
   unsigned int tx_size_totals[TX_SIZES];
+  unsigned int tx_size_implied[TX_SIZES][TX_SIZES];
   unsigned int tx_size[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
 #if CONFIG_VAR_TX
   unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c
index 17e6ac4..1c50c4e 100644
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -710,7 +710,7 @@
   const BLOCK_SIZE block_size = mbmi->sb_type;
   // TODO(debargha): Check if masks can be setup correctly when
   // rectangular transfroms are used with the EXT_TX expt.
-  const TX_SIZE tx_size_y = txsize_sqr_map[mbmi->tx_size];
+  const TX_SIZE tx_size_y = txsize_sqr_up_map[mbmi->tx_size];
   const TX_SIZE tx_size_uv =
       get_uv_tx_size_impl(mbmi->tx_size, block_size, 1, 1);
   const int filter_level = get_filter_level(lfi_n, mbmi);
@@ -791,7 +791,7 @@
 #endif  // CONFIG_SUPERTX
                          LOOP_FILTER_MASK *lfm) {
   const MB_MODE_INFO *mbmi = &mi->mbmi;
-  const TX_SIZE tx_size_y = txsize_sqr_map[mbmi->tx_size];
+  const TX_SIZE tx_size_y = txsize_sqr_up_map[mbmi->tx_size];
 #if CONFIG_SUPERTX
   const BLOCK_SIZE block_size =
       supertx_enabled ? (BLOCK_SIZE)(3 * tx_size_y) : mbmi->sb_type;
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 921ada6..8288dd8 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -389,7 +389,7 @@
                                          int n4_hl) {
   // get minimum log2 num4x4s dimension
   const int x = VPXMIN(n4_wl, n4_hl);
-  return VPXMIN(mbmi->tx_size, x);
+  return VPXMIN(txsize_sqr_map[mbmi->tx_size], x);
 }
 
 static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) {
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index 26e8a11..2adb482 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -306,10 +306,17 @@
     if (allow_select && tx_mode == TX_MODE_SELECT) {
       return read_selected_tx_size(cm, xd, max_tx_size, r);
     } else {
-      return VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]);
+      TX_SIZE tx_size =
+          VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+      if (txsize_sqr_map[max_txsize_rect_lookup[bsize]] <= tx_size)
+        tx_size = max_txsize_rect_lookup[bsize];
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
+      return tx_size;
     }
   } else {
 #if CONFIG_EXT_TX && CONFIG_RECT_TX && !CONFIG_VAR_TX
+    assert(IMPLIES(tx_mode == ONLY_4X4, bsize == BLOCK_4X4));
     return max_txsize_rect_lookup[bsize];
 #else
     return TX_4X4;
diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c
index 5ce7c62..b827451 100644
--- a/vp10/decoder/detokenize.c
+++ b/vp10/decoder/detokenize.c
@@ -186,7 +186,7 @@
           val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, r);
           break;
         case CATEGORY6_TOKEN: {
-          const int skip_bits = TX_SIZES - 1 - tx_size;
+          const int skip_bits = TX_SIZES - 1 - txsize_sqr_up_map[tx_size];
           const uint8_t *cat6p = cat6_prob + skip_bits;
 #if CONFIG_VP9_HIGHBITDEPTH
           switch (xd->bd) {
@@ -369,7 +369,7 @@
           val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, ans);
           break;
         case CATEGORY6_TOKEN: {
-          const int skip_bits = TX_SIZES - 1 - tx_size;
+          const int skip_bits = TX_SIZES - 1 - txsize_sqr_up_map[tx_size];
           const uint8_t *cat6p = cat6_prob + skip_bits;
 #if CONFIG_VP9_HIGHBITDEPTH
           switch (xd->bd) {
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index a023417..60e54e4 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -332,7 +332,7 @@
 }
 
 #if CONFIG_VAR_TX
-static void write_tx_size_inter(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+static void write_tx_size_vartx(const VP10_COMMON *cm, const MACROBLOCKD *xd,
                                 const MB_MODE_INFO *mbmi, TX_SIZE tx_size,
                                 int blk_row, int blk_col, vp10_writer *w) {
   const int tx_row = blk_row >> 1;
@@ -368,7 +368,7 @@
     for (i = 0; i < 4; ++i) {
       int offsetr = blk_row + ((i >> 1) << bsl);
       int offsetc = blk_col + ((i & 0x01) << bsl);
-      write_tx_size_inter(cm, xd, mbmi, tx_size - 1, offsetr, offsetc, w);
+      write_tx_size_vartx(cm, xd, mbmi, tx_size - 1, offsetr, offsetc, w);
     }
   }
 }
@@ -608,7 +608,7 @@
   const TOKENEXTRA *p = *tp;
 #if CONFIG_VAR_TX
   int count = 0;
-  const int seg_eob = 16 << (tx << 1);
+  const int seg_eob = get_tx2d_size(tx);
 #endif
 
   while (p < stop && p->token != EOSB_TOKEN) {
@@ -652,7 +652,9 @@
 
     if (b->base_val) {
       const int e = p->extra, l = b->len;
-      int skip_bits = (b->base_val == CAT6_MIN_VAL) ? TX_SIZES - 1 - tx : 0;
+      int skip_bits = (b->base_val == CAT6_MIN_VAL)
+                          ? TX_SIZES - 1 - txsize_sqr_up_map[tx]
+                          : 0;
 
       if (l) {
         const unsigned char *pb = b->prob;
@@ -725,7 +727,9 @@
 
       if (b->base_val) {
         const int e = p->extra, l = b->len;
-        int skip_bits = (b->base_val == CAT6_MIN_VAL) ? TX_SIZES - 1 - tx : 0;
+        int skip_bits = (b->base_val == CAT6_MIN_VAL)
+                            ? TX_SIZES - 1 - txsize_sqr_up_map[tx]
+                            : 0;
 
         if (l) {
           const unsigned char *pb = b->prob;
@@ -771,10 +775,7 @@
   const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
   const int tx_row = blk_row >> (1 - pd->subsampling_y);
   const int tx_col = blk_col >> (1 - pd->subsampling_x);
-  const TX_SIZE plane_tx_size =
-      plane ? get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0,
-                                  0)
-            : mbmi->inter_tx_size[tx_row][tx_col];
+  TX_SIZE plane_tx_size;
   int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
 
@@ -785,6 +786,10 @@
 
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
 
+  plane_tx_size = plane ? get_uv_tx_size_impl(
+                              mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0)
+                        : mbmi->inter_tx_size[tx_row][tx_col];
+
   if (tx_size == plane_tx_size) {
     pack_mb_tokens(w, tp, tok_end, bit_depth, tx_size);
   } else {
@@ -1111,7 +1116,7 @@
       int idx, idy;
       for (idy = 0; idy < height; idy += bs)
         for (idx = 0; idx < width; idx += bs)
-          write_tx_size_inter(cm, xd, mbmi, max_tx_size, idy, idx, w);
+          write_tx_size_vartx(cm, xd, mbmi, max_tx_size, idy, idx, w);
     } else {
       set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h);
       set_txfm_ctx(xd->above_txfm_context, mbmi->tx_size, xd->n8_w);
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 032516d..24fc6e1 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -5025,28 +5025,34 @@
   if (output_enabled) {
     if (cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8 &&
         !(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) {
+      const int ctx = get_tx_size_context(xd);
+      const int tx_size_cat = max_txsize_lookup[bsize] - TX_8X8;
 #if CONFIG_VAR_TX
       if (is_inter_block(mbmi))
         tx_partition_count_update(cm, xd, bsize, mi_row, mi_col, td->counts);
 #endif
-      ++td->counts->tx_size[max_txsize_lookup[bsize] -
-                            TX_8X8][get_tx_size_context(xd)][mbmi->tx_size];
+      ++td->counts->tx_size[tx_size_cat][ctx][mbmi->tx_size];
     } else {
       int x, y;
       TX_SIZE tx_size;
       // The new intra coding scheme requires no change of transform size
-      if (is_inter_block(&mi->mbmi))
+      if (is_inter_block(&mi->mbmi)) {
         tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
                          max_txsize_lookup[bsize]);
-      else
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+        if (txsize_sqr_map[max_txsize_rect_lookup[bsize]] <= tx_size)
+          tx_size = max_txsize_rect_lookup[bsize];
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
+      } else {
         tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
+      }
 
       for (y = 0; y < mi_height; y++)
         for (x = 0; x < mi_width; x++)
           if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows)
             mi_8x8[mis * y + x]->mbmi.tx_size = tx_size;
     }
-    ++td->counts->tx_size_totals[mbmi->tx_size];
+    ++td->counts->tx_size_totals[txsize_sqr_map[mbmi->tx_size]];
     ++td->counts->tx_size_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
 #if CONFIG_EXT_TX
     if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter_block(mbmi)) > 1 &&
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 96d05ba..1e825e3 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -863,10 +863,7 @@
   const struct macroblockd_plane *const pd = &xd->plane[plane];
   const int tx_row = blk_row >> (1 - pd->subsampling_y);
   const int tx_col = blk_col >> (1 - pd->subsampling_x);
-  const TX_SIZE plane_tx_size =
-      plane ? get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0,
-                                  0)
-            : mbmi->inter_tx_size[tx_row][tx_col];
+  TX_SIZE plane_tx_size;
 
   int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
@@ -878,6 +875,10 @@
 
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
 
+  plane_tx_size = plane ? get_uv_tx_size_impl(
+                              mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0)
+                        : mbmi->inter_tx_size[tx_row][tx_col];
+
   if (tx_size == plane_tx_size) {
     encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
   } else {
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index d34e4f6..b060878 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -3302,11 +3302,7 @@
   BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
   const int tx_row = blk_row >> (1 - pd->subsampling_y);
   const int tx_col = blk_col >> (1 - pd->subsampling_x);
-  const TX_SIZE plane_tx_size =
-      plane ? get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0,
-                                  0)
-            : mbmi->inter_tx_size[tx_row][tx_col];
-
+  TX_SIZE plane_tx_size;
   int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
 
@@ -3321,6 +3317,10 @@
 
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
 
+  plane_tx_size = plane ? get_uv_tx_size_impl(
+                              mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0)
+                        : mbmi->inter_tx_size[tx_row][tx_col];
+
   if (tx_size == plane_tx_size) {
     int coeff_ctx, i;
     ENTROPY_CONTEXT *ta = above_ctx + blk_col;
diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c
index c79d84e..257eba5 100644
--- a/vp10/encoder/tokenize.c
+++ b/vp10/encoder/tokenize.c
@@ -569,10 +569,7 @@
   const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
   const int tx_row = blk_row >> (1 - pd->subsampling_y);
   const int tx_col = blk_col >> (1 - pd->subsampling_x);
-  const TX_SIZE plane_tx_size =
-      plane ? get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0,
-                                  0)
-            : mbmi->inter_tx_size[tx_row][tx_col];
+  TX_SIZE plane_tx_size;
 
   int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
@@ -586,6 +583,10 @@
 
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
 
+  plane_tx_size = plane ? get_uv_tx_size_impl(
+                              mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0)
+                        : mbmi->inter_tx_size[tx_row][tx_col];
+
   if (tx_size == plane_tx_size) {
     const struct macroblockd_plane *const pd = &xd->plane[plane];
     BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd);