daala_tx: Add SIMD version of 16-point identity TX

Change-Id: I1d1dc43c2b0403b614d0f1b1bb0cd3c5efe6740b
diff --git a/av1/common/x86/daala_inv_txfm_avx2.c b/av1/common/x86/daala_inv_txfm_avx2.c
index 3771a80..bdedfa5 100644
--- a/av1/common/x86/daala_inv_txfm_avx2.c
+++ b/av1/common/x86/daala_inv_txfm_avx2.c
@@ -1423,6 +1423,16 @@
                            od_flip_idst16_kernel16_epi16);
 }
 
+static void od_row_iidtx16_avx2(int16_t *out, int rows, const tran_low_t *in) {
+  od_row_iidtx_avx2(out, rows * 16, in);
+}
+
+static void od_col_iidtx16_add_hbd_avx2(unsigned char *output_pixels,
+                                        int output_stride, int cols,
+                                        const int16_t *in, int bd) {
+  od_col_iidtx_add_hbd_avx2(output_pixels, output_stride, 16, cols, in, bd);
+}
+
 typedef void (*daala_row_itx)(int16_t *out, int rows, const tran_low_t *in);
 typedef void (*daala_col_itx_add)(unsigned char *output_pixels,
                                   int output_stride, int cols,
@@ -1436,7 +1446,8 @@
   { od_row_idct8_avx2, od_row_idst8_avx2, od_row_flip_idst8_avx2,
     od_row_iidtx8_avx2 },
   // 16-point transforms
-  { od_row_idct16_avx2, od_row_idst16_avx2, od_row_flip_idst16_avx2, NULL },
+  { od_row_idct16_avx2, od_row_idst16_avx2, od_row_flip_idst16_avx2,
+    od_row_iidtx16_avx2 },
   // 32-point transforms
   { NULL, NULL, NULL, NULL },
 #if CONFIG_TX64X64
@@ -1471,7 +1482,7 @@
         od_col_flip_idst8_add_hbd_avx2, od_col_iidtx8_add_hbd_avx2 },
       // 16-point transforms
       { od_col_idct16_add_hbd_avx2, od_col_idst16_add_hbd_avx2,
-        od_col_flip_idst16_add_hbd_avx2, NULL },
+        od_col_flip_idst16_add_hbd_avx2, od_col_iidtx16_add_hbd_avx2 },
       // 32-point transforms
       { NULL, NULL, NULL, NULL },
 #if CONFIG_TX64X64