Code cleanup in highbd inv_txfm module

Removed redundant functions of SSE4_1 and AVX2 in highbd inv_txfm path.

No impact on speed numbers and the same has been verified for multiple test cases.

Change-Id: If23a3a386e5f7ced1ae11136578b9ac4ff86c561
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index d9000c7..5b46865 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -116,23 +116,7 @@
 add_proto qw/void av1_highbd_inv_txfm_add_4x4/,  "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
 specialize qw/av1_highbd_inv_txfm_add_4x4 sse4_1/;
 add_proto qw/void av1_highbd_inv_txfm_add_8x8/,  "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_8x8 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_16x8/,  "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_16x8 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_8x16/,  "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_8x16 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_16x16/,  "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_16x16 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_32x32/,  "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_32x32 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_16x32/,  "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_16x32 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_32x16/,  "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_32x16 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_8x32/,  "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_8x32 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_32x8/,  "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_32x8 sse4_1 avx2/;
+specialize qw/av1_highbd_inv_txfm_add_8x8 sse4_1/;
 add_proto qw/void av1_highbd_inv_txfm_add_4x8/,  "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
 specialize qw/av1_highbd_inv_txfm_add_4x8 sse4_1/;
 add_proto qw/void av1_highbd_inv_txfm_add_8x4/,  "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
diff --git a/av1/common/x86/highbd_inv_txfm_avx2.c b/av1/common/x86/highbd_inv_txfm_avx2.c
index cc9e90b..fe22465 100644
--- a/av1/common/x86/highbd_inv_txfm_avx2.c
+++ b/av1/common/x86/highbd_inv_txfm_avx2.c
@@ -4309,238 +4309,30 @@
       highbd_inv_txfm2d_add_no_identity_avx2(input, CONVERT_TO_SHORTPTR(output),
                                              stride, tx_type, tx_size, eob, bd);
       break;
+    case IDTX:
+    case H_DCT:
+    case H_ADST:
+    case H_FLIPADST:
+    case V_DCT:
+    case V_ADST:
+    case V_FLIPADST:
+      av1_highbd_inv_txfm2d_add_universe_sse4_1(input, output, stride, tx_type,
+                                                tx_size, eob, bd);
+      break;
     default: assert(0); break;
   }
 }
-
-void av1_highbd_inv_txfm_add_16x16_avx2(const tran_low_t *input, uint8_t *dest,
-                                        int stride,
-                                        const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  switch (tx_type) {
-    case IDTX:
-    case H_DCT:
-    case H_ADST:
-    case H_FLIPADST:
-    case V_DCT:
-    case V_ADST:
-    case V_FLIPADST:
-      av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
-                                                txfm_param->tx_size,
-                                                txfm_param->eob, bd);
-      break;
-    default:
-      av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
-                                              txfm_param->tx_size,
-                                              txfm_param->eob, bd);
-      break;
-  }
-}
-
-void av1_highbd_inv_txfm_add_32x32_avx2(const tran_low_t *input, uint8_t *dest,
-                                        int stride,
-                                        const TxfmParam *txfm_param) {
-  const int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  switch (tx_type) {
-    case DCT_DCT:
-      av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
-                                              txfm_param->tx_size,
-                                              txfm_param->eob, bd);
-      break;
-    case IDTX:
-      av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
-                                                txfm_param->tx_size,
-                                                txfm_param->eob, bd);
-      break;
-
-    default: assert(0);
-  }
-}
-
-void av1_highbd_inv_txfm_add_16x32_avx2(const tran_low_t *input, uint8_t *dest,
-                                        int stride,
-                                        const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  switch (tx_type) {
-    case DCT_DCT:
-      av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
-                                              txfm_param->tx_size,
-                                              txfm_param->eob, bd);
-      break;
-    case IDTX:
-      av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
-                                                txfm_param->tx_size,
-                                                txfm_param->eob, bd);
-      break;
-    default: assert(0);
-  }
-}
-
-void av1_highbd_inv_txfm_add_32x16_avx2(const tran_low_t *input, uint8_t *dest,
-                                        int stride,
-                                        const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  switch (tx_type) {
-    case DCT_DCT:
-      av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
-                                              txfm_param->tx_size,
-                                              txfm_param->eob, bd);
-      break;
-    case IDTX:
-      av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
-                                                txfm_param->tx_size,
-                                                txfm_param->eob, bd);
-      break;
-    default: assert(0);
-  }
-}
-void av1_highbd_inv_txfm_add_8x8_avx2(const tran_low_t *input, uint8_t *dest,
-                                      int stride, const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  switch (tx_type) {
-    case IDTX:
-    case H_DCT:
-    case H_ADST:
-    case H_FLIPADST:
-    case V_DCT:
-    case V_ADST:
-    case V_FLIPADST:
-      av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
-                                                txfm_param->tx_size,
-                                                txfm_param->eob, bd);
-      break;
-    default:
-      av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
-                                              txfm_param->tx_size,
-                                              txfm_param->eob, bd);
-      break;
-  }
-}
-void av1_highbd_inv_txfm_add_8x32_avx2(const tran_low_t *input, uint8_t *dest,
-                                       int stride,
-                                       const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  switch (tx_type) {
-    case DCT_DCT:
-      av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
-                                              txfm_param->tx_size,
-                                              txfm_param->eob, bd);
-      break;
-    case IDTX:
-      av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
-                                                txfm_param->tx_size,
-                                                txfm_param->eob, bd);
-      break;
-    default: assert(0);
-  }
-}
-
-void av1_highbd_inv_txfm_add_32x8_avx2(const tran_low_t *input, uint8_t *dest,
-                                       int stride,
-                                       const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  switch (tx_type) {
-    case DCT_DCT:
-      av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
-                                              txfm_param->tx_size,
-                                              txfm_param->eob, bd);
-      break;
-    case IDTX:
-      av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
-                                                txfm_param->tx_size,
-                                                txfm_param->eob, bd);
-      break;
-    default: assert(0);
-  }
-}
-void av1_highbd_inv_txfm_add_16x8_avx2(const tran_low_t *input, uint8_t *dest,
-                                       int stride,
-                                       const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  switch (tx_type) {
-    case IDTX:
-    case H_DCT:
-    case H_ADST:
-    case H_FLIPADST:
-    case V_DCT:
-    case V_ADST:
-    case V_FLIPADST:
-      av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
-                                                txfm_param->tx_size,
-                                                txfm_param->eob, bd);
-      break;
-    default:
-      av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
-                                              txfm_param->tx_size,
-                                              txfm_param->eob, bd);
-      break;
-  }
-}
-
-void av1_highbd_inv_txfm_add_8x16_avx2(const tran_low_t *input, uint8_t *dest,
-                                       int stride,
-                                       const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  switch (tx_type) {
-    case IDTX:
-    case H_DCT:
-    case H_ADST:
-    case H_FLIPADST:
-    case V_DCT:
-    case V_ADST:
-    case V_FLIPADST:
-      av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
-                                                txfm_param->tx_size,
-                                                txfm_param->eob, bd);
-      break;
-    default:
-      av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
-                                              txfm_param->tx_size,
-                                              txfm_param->eob, bd);
-      break;
-  }
-}
 void av1_highbd_inv_txfm_add_avx2(const tran_low_t *input, uint8_t *dest,
                                   int stride, const TxfmParam *txfm_param) {
   assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
   const TX_SIZE tx_size = txfm_param->tx_size;
   switch (tx_size) {
-    case TX_32X32:
-      av1_highbd_inv_txfm_add_32x32_avx2(input, dest, stride, txfm_param);
-      break;
-    case TX_16X16:
-      av1_highbd_inv_txfm_add_16x16_avx2(input, dest, stride, txfm_param);
-      break;
-    case TX_8X8:
-      av1_highbd_inv_txfm_add_8x8_avx2(input, dest, stride, txfm_param);
-      break;
     case TX_4X8:
       av1_highbd_inv_txfm_add_4x8_sse4_1(input, dest, stride, txfm_param);
       break;
     case TX_8X4:
       av1_highbd_inv_txfm_add_8x4_sse4_1(input, dest, stride, txfm_param);
       break;
-    case TX_8X16:
-      av1_highbd_inv_txfm_add_8x16_avx2(input, dest, stride, txfm_param);
-      break;
-    case TX_16X8:
-      av1_highbd_inv_txfm_add_16x8_avx2(input, dest, stride, txfm_param);
-      break;
-    case TX_16X32:
-      av1_highbd_inv_txfm_add_16x32_avx2(input, dest, stride, txfm_param);
-      break;
-    case TX_32X16:
-      av1_highbd_inv_txfm_add_32x16_avx2(input, dest, stride, txfm_param);
-      break;
     case TX_4X4:
       av1_highbd_inv_txfm_add_4x4_sse4_1(input, dest, stride, txfm_param);
       break;
@@ -4550,21 +4342,10 @@
     case TX_4X16:
       av1_highbd_inv_txfm_add_4x16_sse4_1(input, dest, stride, txfm_param);
       break;
-    case TX_8X32:
-      av1_highbd_inv_txfm_add_8x32_avx2(input, dest, stride, txfm_param);
-      break;
-    case TX_32X8:
-      av1_highbd_inv_txfm_add_32x8_avx2(input, dest, stride, txfm_param);
-      break;
-    case TX_64X64:
-    case TX_32X64:
-    case TX_64X32:
-    case TX_16X64:
-    case TX_64X16:
+    default:
       av1_highbd_inv_txfm2d_add_universe_avx2(
           input, dest, stride, txfm_param->tx_type, txfm_param->tx_size,
           txfm_param->eob, txfm_param->bd);
       break;
-    default: assert(0 && "Invalid transform size"); break;
   }
 }
diff --git a/av1/common/x86/highbd_inv_txfm_sse4.c b/av1/common/x86/highbd_inv_txfm_sse4.c
index 39f3548..2f0eecb 100644
--- a/av1/common/x86/highbd_inv_txfm_sse4.c
+++ b/av1/common/x86/highbd_inv_txfm_sse4.c
@@ -5245,79 +5245,6 @@
       break;
   }
 }
-
-void av1_highbd_inv_txfm_add_16x8_sse4_1(const tran_low_t *input, uint8_t *dest,
-                                         int stride,
-                                         const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  av1_highbd_inv_txfm2d_add_universe_sse4_1(
-      input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_8x16_sse4_1(const tran_low_t *input, uint8_t *dest,
-                                         int stride,
-                                         const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  av1_highbd_inv_txfm2d_add_universe_sse4_1(
-      input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_16x16_sse4_1(const tran_low_t *input,
-                                          uint8_t *dest, int stride,
-                                          const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  av1_highbd_inv_txfm2d_add_universe_sse4_1(
-      input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_32x32_sse4_1(const tran_low_t *input,
-                                          uint8_t *dest, int stride,
-                                          const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  av1_highbd_inv_txfm2d_add_universe_sse4_1(
-      input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_16x32_sse4_1(const tran_low_t *input,
-                                          uint8_t *dest, int stride,
-                                          const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  av1_highbd_inv_txfm2d_add_universe_sse4_1(
-      input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_32x16_sse4_1(const tran_low_t *input,
-                                          uint8_t *dest, int stride,
-                                          const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  av1_highbd_inv_txfm2d_add_universe_sse4_1(
-      input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_8x32_sse4_1(const tran_low_t *input, uint8_t *dest,
-                                         int stride,
-                                         const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  av1_highbd_inv_txfm2d_add_universe_sse4_1(
-      input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_32x8_sse4_1(const tran_low_t *input, uint8_t *dest,
-                                         int stride,
-                                         const TxfmParam *txfm_param) {
-  int bd = txfm_param->bd;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  av1_highbd_inv_txfm2d_add_universe_sse4_1(
-      input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
 void av1_highbd_inv_txfm_add_4x4_sse4_1(const tran_low_t *input, uint8_t *dest,
                                         int stride,
                                         const TxfmParam *txfm_param) {
@@ -5999,12 +5926,6 @@
   assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
   const TX_SIZE tx_size = txfm_param->tx_size;
   switch (tx_size) {
-    case TX_32X32:
-      av1_highbd_inv_txfm_add_32x32_sse4_1(input, dest, stride, txfm_param);
-      break;
-    case TX_16X16:
-      av1_highbd_inv_txfm_add_16x16_sse4_1(input, dest, stride, txfm_param);
-      break;
     case TX_8X8:
       av1_highbd_inv_txfm_add_8x8_sse4_1(input, dest, stride, txfm_param);
       break;
@@ -6014,18 +5935,6 @@
     case TX_8X4:
       av1_highbd_inv_txfm_add_8x4_sse4_1(input, dest, stride, txfm_param);
       break;
-    case TX_8X16:
-      av1_highbd_inv_txfm_add_8x16_sse4_1(input, dest, stride, txfm_param);
-      break;
-    case TX_16X8:
-      av1_highbd_inv_txfm_add_16x8_sse4_1(input, dest, stride, txfm_param);
-      break;
-    case TX_16X32:
-      av1_highbd_inv_txfm_add_16x32_sse4_1(input, dest, stride, txfm_param);
-      break;
-    case TX_32X16:
-      av1_highbd_inv_txfm_add_32x16_sse4_1(input, dest, stride, txfm_param);
-      break;
     case TX_4X4:
       av1_highbd_inv_txfm_add_4x4_sse4_1(input, dest, stride, txfm_param);
       break;
@@ -6035,21 +5944,10 @@
     case TX_4X16:
       av1_highbd_inv_txfm_add_4x16_sse4_1(input, dest, stride, txfm_param);
       break;
-    case TX_8X32:
-      av1_highbd_inv_txfm_add_8x32_sse4_1(input, dest, stride, txfm_param);
-      break;
-    case TX_32X8:
-      av1_highbd_inv_txfm_add_32x8_sse4_1(input, dest, stride, txfm_param);
-      break;
-    case TX_64X64:
-    case TX_32X64:
-    case TX_64X32:
-    case TX_16X64:
-    case TX_64X16:
+    default:
       av1_highbd_inv_txfm2d_add_universe_sse4_1(
-          input, dest, stride, txfm_param->tx_type, txfm_param->tx_size,
-          txfm_param->eob, txfm_param->bd);
+          input, dest, stride, txfm_param->tx_type, tx_size, txfm_param->eob,
+          txfm_param->bd);
       break;
-    default: assert(0 && "Invalid transform size"); break;
   }
 }