Use reduced transform set for 16x16

Speed increase for ext-tx by 20% for a BDRATE drop of 0.26%.
The ext-tx expt becomes -2.66% BDRATE (reduced from -2.92%) for
the lowres set.

It turns out that reducing the set of transforms for intra from
12 to 5 makes very little difference in coding performance (~0.04%).
Most of the performance drop comes from the reduction is transform
set for inter. Currently there is a provision to control that with
a macro.

Change-Id: I7de05527bf72f96acc1e0ab8a74a849da0a141e5
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 5cecf79..4ed7f81 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -391,15 +391,16 @@
 #endif  // CONFIG_SUPERTX
 
 #if CONFIG_EXT_TX
-#define ALLOW_INTRA_EXT_TX       1
+#define ALLOW_INTRA_EXT_TX          1
 // whether masked transforms are used for 32X32
-#define USE_MSKTX_FOR_32X32      0
+#define USE_MSKTX_FOR_32X32         0
+#define USE_REDUCED_TXSET_FOR_16X16 1
 
 static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = {
   1, 16, 12, 2
 };
 static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = {
-  1, 12, 10
+  1, 12, 5
 };
 
 #if EXT_TX_SIZES == 4
@@ -408,7 +409,11 @@
   if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0;
   if (tx_size == TX_32X32)
     return is_inter ? 3 - 2 * USE_MSKTX_FOR_32X32 : 0;
-  return ((is_inter || tx_size < TX_16X16) ? 1 : 2);
+#if USE_REDUCED_TXSET_FOR_16X16
+  return (tx_size == TX_16X16 ? 2 : 1);
+#else
+  return (tx_size == TX_16X16 && !is_inter ? 2 : 1);
+#endif  // USE_REDUCED_TXSET_FOR_16X16
 }
 
 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][TX_SIZES] = {
@@ -444,7 +449,7 @@
   { 0, 0, 0, 0, },  // unused
   { 1, 1, 0, 0, },
   { 0, 0, 1, 0, },
-  { 0, 0, 0, 0, },
+  { 0, 0, 0, 1, },
 };
 #endif  // EXT_TX_SIZES == 4
 
@@ -452,14 +457,14 @@
 static const int ext_tx_used_intra[EXT_TX_SETS_INTRA][TX_TYPES] = {
   {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0},
-  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
+  {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
 };
 
 // Transform types used in each inter set
 static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = {
   {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1},
+  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0},
   {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
 };
 
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index 0c89a29..f1c8e30 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -982,17 +982,12 @@
     -FLIPADST_DCT, -DCT_FLIPADST,
     18, 20,
     -ADST_ADST, -FLIPADST_FLIPADST,
-    -ADST_FLIPADST, -FLIPADST_ADST
+    -ADST_FLIPADST, -FLIPADST_ADST,
   }, {
     -IDTX, 2,
     -DCT_DCT, 4,
-    6, 12,
-    8, 10,
+    -ADST_ADST, 6,
     -ADST_DCT, -DCT_ADST,
-    -FLIPADST_DCT, -DCT_FLIPADST,
-    14, 16,
-    -ADST_ADST, -FLIPADST_FLIPADST,
-    -ADST_FLIPADST, -FLIPADST_ADST
   }
 };
 
@@ -1094,50 +1089,50 @@
     },
   }, {
     {
-      {   8, 176, 128, 128, 128, 128, 128, 128, 128, },
-      {  10,  28, 176, 192, 208, 128, 128, 128, 128, },
-      {  10,  28, 176, 192,  48, 128, 128, 128, 128, },
-      {   9, 160, 128, 128, 128, 128, 128, 128, 128, },
-      {   8,  28,  96, 128, 128, 128, 160, 192, 128, },
-      {   7,  28, 160, 176, 192, 128, 128, 128, 128, },
-      {   7,  20, 160, 176,  64, 128, 128, 128, 128, },
-      {  10,  23, 160, 176,  64, 128, 128, 128, 128, },
-      {   8,  29, 160, 176, 192, 128, 128, 128, 128, },
-      {   3,  20,  96, 128, 128, 128, 160, 192, 128, },
+      {   8, 224,  64, 128, },
+      {  10,  32,  16, 192, },
+      {  10,  32,  16,  64, },
+      {   9, 200,  64, 128, },
+      {   8,   8, 224, 128, },
+      {  10,  32,  16, 192, },
+      {  10,  32,  16,  64, },
+      {  10,  23,  80, 176, },
+      {  10,  23,  80, 176, },
+      {  10,  32,  16,  64, },
     }, {
-      {   2, 176, 128, 128, 128, 128, 128, 128, 128, },
-      {   4,  28, 176, 192, 208, 128, 128, 128, 128, },
-      {   4,  28, 176, 192,  48, 128, 128, 128, 128, },
-      {   8, 160, 128, 128, 128, 128, 128, 128, 128, },
-      {   2,  28,  96, 128, 128, 128, 160, 192, 128, },
-      {   3,  28, 160, 176, 192, 128, 128, 128, 128, },
-      {   3,  26, 160, 176,  64, 128, 128, 128, 128, },
-      {   9,  24, 160, 176,  64, 128, 128, 128, 128, },
-      {   5,  24, 160, 176, 192, 128, 128, 128, 128, },
-      {   2,  25,  96, 128, 128, 128, 160, 192, 128, },
+      {   8, 224,  64, 128, },
+      {  10,  32,  16, 192, },
+      {  10,  32,  16,  64, },
+      {   9, 200,  64, 128, },
+      {   8,   8, 224, 128, },
+      {  10,  32,  16, 192, },
+      {  10,  32,  16,  64, },
+      {  10,  23,  80, 176, },
+      {  10,  23,  80, 176, },
+      {  10,  32,  16,  64, },
     }, {
-      {   2, 176, 128, 128, 128, 128, 128, 128, 128, },
-      {   1,  28, 176, 192, 208, 128, 128, 128, 128, },
-      {   1,  28, 176, 192,  48, 128, 128, 128, 128, },
-      {   4, 160, 128, 128, 128, 128, 128, 128, 128, },
-      {   2,  28,  96, 128, 128, 128, 160, 192, 128, },
-      {   2,  28, 160, 176, 192, 128, 128, 128, 128, },
-      {   3,  29, 160, 176,  64, 128, 128, 128, 128, },
-      {   4,  27, 160, 176,  64, 128, 128, 128, 128, },
-      {   2,  34, 160, 176, 192, 128, 128, 128, 128, },
-      {   1,  25,  96, 128, 128, 128, 160, 192, 128, },
+      {   8, 224,  64, 128, },
+      {  10,  32,  16, 192, },
+      {  10,  32,  16,  64, },
+      {   9, 200,  64, 128, },
+      {   8,   8, 224, 128, },
+      {  10,  32,  16, 192, },
+      {  10,  32,  16,  64, },
+      {  10,  23,  80, 176, },
+      {  10,  23,  80, 176, },
+      {  10,  32,  16,  64, },
 #if EXT_TX_SIZES == 4
     }, {
-      {   2, 176, 128, 128, 128, 128, 128, 128, 128, },
-      {   1,  12, 160, 176, 192, 128, 128, 128, 128, },
-      {   1,  17, 160, 176,  64, 128, 128, 128, 128, },
-      {   4,  41, 128, 128, 128, 128, 128, 128, 128, },
-      {   2,  17,  96, 128, 128, 128, 160, 192, 128, },
-      {   2,  14, 160, 176, 192, 128, 128, 128, 128, },
-      {   3,  19, 160, 176,  64, 128, 128, 128, 128, },
-      {   4,  27, 160, 176,  64, 128, 128, 128, 128, },
-      {   2,  34, 160, 176, 192, 128, 128, 128, 128, },
-      {   1,  15,  96, 128, 128, 128, 160, 192, 128, },
+      {   8, 224,  64, 128, },
+      {  10,  32,  16, 192, },
+      {  10,  32,  16,  64, },
+      {   9, 200,  64, 128, },
+      {   8,   8, 224, 128, },
+      {  10,  32,  16, 192, },
+      {  10,  32,  16,  64, },
+      {  10,  23,  80, 176, },
+      {  10,  23,  80, 176, },
+      {  10,  32,  16,  64, },
 #endif
     },
   },