Various table updates to support 64x64 transforms

Includes:
Various table updates and fixes to support 64x64 transforms.
Entropymode updates to support tx_size expansion to 64x64.
tx_mode changes to support an ALLOW_64sXx64 transform mode.

Change-Id: Ib9098cfe27d0c015fe3be6ae13e7d09576771b9e
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 712c00f..2c64880 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -3166,9 +3166,17 @@
 }
 #endif
 
-static void write_txfm_mode(TX_MODE mode, struct aom_write_bit_buffer *wb) {
+static void write_tx_mode(TX_MODE mode, struct aom_write_bit_buffer *wb) {
+#if CONFIG_TX64X64
+  aom_wb_write_bit(wb, mode == TX_MODE_SELECT);
+  if (mode != TX_MODE_SELECT) {
+    aom_wb_write_literal(wb, AOMMIN(mode, ALLOW_32X32), 2);
+    if (mode >= ALLOW_32X32) aom_wb_write_bit(wb, mode == ALLOW_64X64);
+  }
+#else
   aom_wb_write_bit(wb, mode == TX_MODE_SELECT);
   if (mode != TX_MODE_SELECT) aom_wb_write_literal(wb, mode, 2);
+#endif  // CONFIG_TX64X64
 }
 
 static void update_txfm_probs(AV1_COMMON *cm, aom_writer *w,
@@ -3949,7 +3957,7 @@
   if (!cm->seg.enabled && xd->lossless[0])
     cm->tx_mode = ONLY_4X4;
   else
-    write_txfm_mode(cm->tx_mode, wb);
+    write_tx_mode(cm->tx_mode, wb);
 
   if (cpi->allow_comp_inter_inter) {
     const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 4de60ef..10595a5 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4479,7 +4479,7 @@
 static TX_MODE select_tx_mode(const AV1_COMP *cpi, MACROBLOCKD *const xd) {
   if (xd->lossless[0]) return ONLY_4X4;
   if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
-    return ALLOW_32X32;
+    return ALLOW_32X32 + CONFIG_TX64X64;
   else if (cpi->sf.tx_size_search_method == USE_FULL_RD ||
            cpi->sf.tx_size_search_method == USE_TX_8X8)
     return TX_MODE_SELECT;
@@ -5061,9 +5061,97 @@
 
 #if CONFIG_VAR_TX
     if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0)
-      cm->tx_mode = ALLOW_32X32;
+      cm->tx_mode = ALLOW_32X32 + CONFIG_TX64X64;
 #else
     if (cm->tx_mode == TX_MODE_SELECT) {
+#if CONFIG_TX64X64
+      int count4x4 = 0;
+      int count8x8_8x8p = 0, count8x8_lp = 0;
+      int count16x16_16x16p = 0, count16x16_lp = 0;
+      int count32x32_32x32p = 0, count32x32_lp = 0;
+      int count64x64_64x64p = 0;
+      for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
+        // counts->tx_size[max_depth][context_idx][this_depth_level]
+        count4x4 += counts->tx_size[0][i][0];
+        count4x4 += counts->tx_size[1][i][0];
+        count4x4 += counts->tx_size[2][i][0];
+        count4x4 += counts->tx_size[3][i][0];
+
+        count8x8_8x8p += counts->tx_size[0][i][1];
+        count8x8_lp += counts->tx_size[1][i][1];
+        count8x8_lp += counts->tx_size[2][i][1];
+        count8x8_lp += counts->tx_size[3][i][1];
+
+        count16x16_16x16p += counts->tx_size[1][i][2];
+        count16x16_lp += counts->tx_size[2][i][2];
+        count16x16_lp += counts->tx_size[3][i][2];
+
+        count32x32_32x32p += counts->tx_size[2][i][3];
+        count32x32_lp += counts->tx_size[3][i][3];
+
+        count64x64_64x64p += counts->tx_size[3][i][4];
+      }
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+      count4x4 += counts->tx_size_implied[0][TX_4X4];
+      count4x4 += counts->tx_size_implied[1][TX_4X4];
+      count4x4 += counts->tx_size_implied[2][TX_4X4];
+      count4x4 += counts->tx_size_implied[3][TX_4X4];
+      count8x8_8x8p += counts->tx_size_implied[1][TX_8X8];
+      count8x8_lp += counts->tx_size_implied[2][TX_8X8];
+      count8x8_lp += counts->tx_size_implied[3][TX_8X8];
+      count8x8_lp += counts->tx_size_implied[4][TX_8X8];
+      count16x16_16x16p += counts->tx_size_implied[2][TX_16X16];
+      count16x16_lp += counts->tx_size_implied[3][TX_16X16];
+      count16x16_lp += counts->tx_size_implied[4][TX_16X16];
+      count32x32_32x32p += counts->tx_size_implied[3][TX_32X32];
+      count32x32_lp += counts->tx_size_implied[4][TX_32X32];
+      count64x64_64x64p += counts->tx_size[4][TX_64X64];
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
+      if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
+          count32x32_lp == 0 && count32x32_32x32p == 0 &&
+#if CONFIG_SUPERTX
+          cm->counts.supertx_size[TX_16X16] == 0 &&
+          cm->counts.supertx_size[TX_32X32] == 0 &&
+          cm->counts.supertx_size[TX_64X64] == 0 &&
+#endif
+          count64x64_64x64p == 0) {
+        cm->tx_mode = ALLOW_8X8;
+        reset_skip_tx_size(cm, TX_8X8);
+      } else if (count8x8_8x8p == 0 && count8x8_lp == 0 &&
+                 count16x16_16x16p == 0 && count16x16_lp == 0 &&
+                 count32x32_32x32p == 0 && count32x32_lp == 0 &&
+#if CONFIG_SUPERTX
+                 cm->counts.supertx_size[TX_8X8] == 0 &&
+                 cm->counts.supertx_size[TX_16X16] == 0 &&
+                 cm->counts.supertx_size[TX_32X32] == 0 &&
+                 cm->counts.supertx_size[TX_64X64] == 0 &&
+#endif
+                 count64x64_64x64p == 0) {
+        cm->tx_mode = ONLY_4X4;
+        reset_skip_tx_size(cm, TX_4X4);
+      } else if (count4x4 == 0 && count8x8_lp == 0 && count16x16_lp == 0 &&
+                 count32x32_lp == 0) {
+        cm->tx_mode = ALLOW_64X64;
+      } else if (count4x4 == 0 && count8x8_lp == 0 && count16x16_lp == 0 &&
+#if CONFIG_SUPERTX
+                 cm->counts.supertx_size[TX_64X64] == 0 &&
+#endif
+                 count64x64_64x64p == 0) {
+        cm->tx_mode = ALLOW_32X32;
+        reset_skip_tx_size(cm, TX_32X32);
+      } else if (count4x4 == 0 && count8x8_lp == 0 && count32x32_lp == 0 &&
+                 count32x32_32x32p == 0 &&
+#if CONFIG_SUPERTX
+                 cm->counts.supertx_size[TX_32X32] == 0 &&
+                 cm->counts.supertx_size[TX_64X64] == 0 &&
+#endif
+                 count64x64_64x64p == 0) {
+        cm->tx_mode = ALLOW_16X16;
+        reset_skip_tx_size(cm, TX_16X16);
+      }
+
+#else  // CONFIG_TX64X64
+
       int count4x4 = 0;
       int count8x8_lp = 0, count8x8_8x8p = 0;
       int count16x16_16x16p = 0, count16x16_lp = 0;
@@ -5074,9 +5162,9 @@
         count4x4 += counts->tx_size[1][i][0];
         count4x4 += counts->tx_size[2][i][0];
 
+        count8x8_8x8p += counts->tx_size[0][i][1];
         count8x8_lp += counts->tx_size[1][i][1];
         count8x8_lp += counts->tx_size[2][i][1];
-        count8x8_8x8p += counts->tx_size[0][i][1];
 
         count16x16_16x16p += counts->tx_size[1][i][2];
         count16x16_lp += counts->tx_size[2][i][2];
@@ -5087,9 +5175,9 @@
       count4x4 += counts->tx_size_implied[1][TX_4X4];
       count4x4 += counts->tx_size_implied[2][TX_4X4];
       count4x4 += counts->tx_size_implied[3][TX_4X4];
+      count8x8_8x8p += counts->tx_size_implied[1][TX_8X8];
       count8x8_lp += counts->tx_size_implied[2][TX_8X8];
       count8x8_lp += counts->tx_size_implied[3][TX_8X8];
-      count8x8_8x8p += counts->tx_size_implied[1][TX_8X8];
       count16x16_lp += counts->tx_size_implied[3][TX_16X16];
       count16x16_16x16p += counts->tx_size_implied[2][TX_16X16];
       count32x32 += counts->tx_size_implied[3][TX_32X32];
@@ -5122,6 +5210,7 @@
         cm->tx_mode = ALLOW_16X16;
         reset_skip_tx_size(cm, TX_16X16);
       }
+#endif  // CONFIG_TX64X64
     }
 #endif
   } else {