Misc. ext_tx fixes/enhancements

derflr: +1.732% (8-bit)

Change-Id: I9c04c8249646ff96eacacfa1dcb0bd118c04e84a
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 597cf12..ce4bd39 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -231,14 +231,14 @@
 
 static TX_TYPE ext_tx_to_txtype[EXT_TX_TYPES] = {
   DCT_DCT,
+  ADST_DCT,
+  DCT_ADST,
   ADST_ADST,
+  FLIPADST_DCT,
+  DCT_FLIPADST,
   FLIPADST_FLIPADST,
   ADST_FLIPADST,
   FLIPADST_ADST,
-  ADST_DCT,
-  DCT_ADST,
-  FLIPADST_DCT,
-  DCT_FLIPADST,
   DST_DST,
   DST_DCT,
   DCT_DST,
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index c638423..b8c7350 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -334,9 +334,9 @@
 };
 
 static const vpx_prob default_ext_tx_prob[EXT_TX_SIZES][EXT_TX_TYPES - 1] = {
-  { 216, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
-  { 192, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
-  { 168, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+  { 160, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+  { 144, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+  { 128, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
 };
 #endif  // CONFIG_EXT_TX
 
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index 9e1f870..5656b06 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -20,10 +20,10 @@
 #if CONFIG_EXT_TX
 void idst4_c(const tran_low_t *input, tran_low_t *output) {
   static const int N = 4;
-  static const int sinvalue_lookup_table[] = {
-    9630, 15582
+  // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
+  static const int32_t sinvalue_lookup[] = {
+    141124871, 228344838,
   };
-  static const int mult = 14654;  // sqrt(4/5)
   int i, j;
   for (i = 0; i < N; i++) {
     int64_t sum = 0;
@@ -36,20 +36,19 @@
       }
       idx = idx > N + 1 - idx ? N + 1 - idx : idx;
       if (idx == 0) continue;
-      idx--;
-      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+      sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1);
     }
-    sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+    sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
     output[i] = WRAPLOW(sum, 8);
   }
 }
 
 void idst8_c(const tran_low_t *input, tran_low_t *output) {
   static const int N = 8;
-  static const int sinvalue_lookup_table[] = {
-    5604, 10531, 14189, 16135
+  // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
+  static const int32_t sinvalue_lookup[] = {
+    86559612, 162678858, 219176632, 249238470
   };
-  static const int mult = 15447;  // 2*sqrt(2/9)
   int i, j;
   for (i = 0; i < N; i++) {
     int64_t sum = 0;
@@ -62,21 +61,20 @@
       }
       idx = idx > N + 1 - idx ? N + 1 - idx : idx;
       if (idx == 0) continue;
-      idx--;
-      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+      sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1);
     }
-    sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+    sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
     output[i] = WRAPLOW(sum, 8);
   }
 }
 
 void idst16_c(const tran_low_t *input, tran_low_t *output) {
   static const int N = 16;
-  static const int sinvalue_lookup_table[] = {
-    3011,  5919,  8625, 11038,
-    13075, 14666, 15759, 16314
+  // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
+  static const int32_t sinvalue_lookup[] = {
+    47852167, 94074787, 137093803, 175444254,
+    207820161, 233119001, 250479254, 259309736
   };
-  static const int mult = 15895;  // 2*sqrt(4/17)
   int i, j;
   for (i = 0; i < N; i++) {
     int64_t sum = 0;
@@ -89,10 +87,9 @@
       }
       idx = idx > N + 1 - idx ? N + 1 - idx : idx;
       if (idx == 0) continue;
-      idx--;
-      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+      sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1);
     }
-    sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+    sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
     output[i] = WRAPLOW(sum, 8);
   }
 }
@@ -100,10 +97,10 @@
 #if CONFIG_VP9_HIGHBITDEPTH
 void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
   static const int N = 4;
-  static const int sinvalue_lookup_table[] = {
-    9630, 15582
+  // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
+  static const int32_t sinvalue_lookup[] = {
+    141124871, 228344838,
   };
-  static const int mult = 14654;  // sqrt(4/5)
   int i, j;
   (void) bd;
   for (i = 0; i < N; i++) {
@@ -117,20 +114,19 @@
       }
       idx = idx > N + 1 - idx ? N + 1 - idx : idx;
       if (idx == 0) continue;
-      idx--;
-      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+      sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1);
     }
-    sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+    sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
     output[i] = WRAPLOW(sum, bd);
   }
 }
 
 void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
   static const int N = 8;
-  static const int sinvalue_lookup_table[] = {
-    5604, 10531, 14189, 16135
+  // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
+  static const int32_t sinvalue_lookup[] = {
+    86559612, 162678858, 219176632, 249238470
   };
-  static const int mult = 15447;  // 2*sqrt(2/9)
   int i, j;
   (void) bd;
   for (i = 0; i < N; i++) {
@@ -144,21 +140,20 @@
       }
       idx = idx > N + 1 - idx ? N + 1 - idx : idx;
       if (idx == 0) continue;
-      idx--;
-      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+      sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1);
     }
-    sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+    sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
     output[i] = WRAPLOW(sum, bd);
   }
 }
 
 void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
   static const int N = 16;
-  static const int sinvalue_lookup_table[] = {
-    3011,  5919,  8625, 11038,
-    13075, 14666, 15759, 16314
+  // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
+  static const int32_t sinvalue_lookup[] = {
+    47852167, 94074787, 137093803, 175444254,
+    207820161, 233119001, 250479254, 259309736
   };
-  static const int mult = 15895;  // 2*sqrt(4/17)
   int i, j;
   (void) bd;
   for (i = 0; i < N; i++) {
@@ -172,10 +167,9 @@
       }
       idx = idx > N + 1 - idx ? N + 1 - idx : idx;
       if (idx == 0) continue;
-      idx--;
-      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+      sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1);
     }
-    sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+    sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
     output[i] = WRAPLOW(sum, bd);
   }
 }
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index cb1ede2..f183167 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -23,10 +23,10 @@
 #if CONFIG_EXT_TX
 void fdst4(const tran_low_t *input, tran_low_t *output) {
   static const int N = 4;
-  static const int sinvalue_lookup_table[] = {
-    9630, 15582
+  // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
+  static const int32_t sinvalue_lookup[] = {
+    141124871, 228344838,
   };
-  static const int mult = 14654;  // sqrt(4/5)
   int i, j;
   for (i = 0; i < N; i++) {
     int64_t sum = 0;
@@ -40,18 +40,18 @@
       idx = idx > N + 1 - idx ? N + 1 - idx : idx;
       if (idx == 0) continue;
       idx--;
-      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+      sum += (int64_t)input[j] * sinvalue_lookup[idx] * (sign ? -1 : 1);
     }
-    output[i] = (sum * mult) >> (2 * DCT_CONST_BITS);
+    output[i] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
   }
 }
 
 void fdst8(const tran_low_t *input, tran_low_t *output) {
   static const int N = 8;
-  static const int sinvalue_lookup_table[] = {
-    5604, 10531, 14189, 16135
+  // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
+  static const int sinvalue_lookup[] = {
+    86559612, 162678858, 219176632, 249238470
   };
-  static const int mult = 15447;  // 2*sqrt(2/9)
   int i, j;
   for (i = 0; i < N; i++) {
     int64_t sum = 0;
@@ -65,19 +65,19 @@
       idx = idx > N + 1 - idx ? N + 1 - idx : idx;
       if (idx == 0) continue;
       idx--;
-      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+      sum += (int64_t)input[j] * sinvalue_lookup[idx] * (sign ? -1 : 1);
     }
-    output[i] = (sum * mult) >> (2 * DCT_CONST_BITS);
+    output[i] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
   }
 }
 
 void fdst16(const tran_low_t *input, tran_low_t *output) {
   static const int N = 16;
-  static const int sinvalue_lookup_table[] = {
-    3011,  5919,  8625, 11038,
-    13075, 14666, 15759, 16314
+  // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
+  static const int sinvalue_lookup[] = {
+    47852167, 94074787, 137093803, 175444254,
+    207820161, 233119001, 250479254, 259309736
   };
-  static const int mult = 15895;  // 2*sqrt(4/17)
   int i, j;
   for (i = 0; i < N; i++) {
     int64_t sum = 0;
@@ -91,9 +91,9 @@
       idx = idx > N + 1 - idx ? N + 1 - idx : idx;
       if (idx == 0) continue;
       idx--;
-      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+      sum += (int64_t)input[j] * sinvalue_lookup[idx] * (sign ? -1 : 1);
     }
-    output[i] = (sum * mult) >> (2 * DCT_CONST_BITS);
+    output[i] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
   }
 }
 #endif  // CONFIG_EXT_TX
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 9b27bc7..0c2f58f 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -54,7 +54,7 @@
 #define NEW_MV_DISCOUNT_FACTOR  8
 
 #if CONFIG_EXT_TX
-const double ext_tx_th = 0.99;
+const double ext_tx_th = 0.98;
 #endif
 
 typedef struct {
@@ -2736,7 +2736,7 @@
       int64_t distortion_y_tx;
       int dummy;
       int64_t best_rdcost_tx = INT64_MAX;
-      int best_ext_tx = NORM;
+      int best_ext_tx = -1;
 
       for (i = NORM; i < EXT_TX_TYPES; i++) {
         mbmi->ext_txfrm = i;
@@ -2747,7 +2747,8 @@
         rdcost_tx = RDCOST(x->rdmult, x->rddiv, rate_y_tx, distortion_y_tx);
         rdcost_tx = MIN(rdcost_tx, RDCOST(x->rdmult, x->rddiv, 0, *psse));
         assert(rdcost_tx >= 0);
-        if (rdcost_tx < best_rdcost_tx * ext_tx_th) {
+        if (rdcost_tx <
+            (best_ext_tx == NORM ? ext_tx_th : 1) * best_rdcost_tx) {
           best_ext_tx = i;
           best_rdcost_tx = rdcost_tx;
         }