Add 2x2 forward and inverse transform for high bd

Add 2x2 forward and inverse 2D-DCT for high bit-depth.

Change-Id: I3092a2587a0cdc6675a69cc9203499a530b65325
diff --git a/av1/common/idct.c b/av1/common/idct.c
index 73ad3f0..948ac14 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -2509,6 +2509,38 @@
     aom_highbd_idct32x32_1024_add(input, dest, stride, bd);
 }
 
+#if CONFIG_CB4X4
+void av1_highbd_inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest,
+                                 int stride, int eob, TX_TYPE tx_type,
+                                 int lossless, int bd) {
+  tran_high_t a1 = input[0] >> UNIT_QUANT_SHIFT;
+  tran_high_t b1 = input[1] >> UNIT_QUANT_SHIFT;
+  tran_high_t c1 = input[2] >> UNIT_QUANT_SHIFT;
+  tran_high_t d1 = input[3] >> UNIT_QUANT_SHIFT;
+
+  tran_high_t a2 = a1 + c1;
+  tran_high_t b2 = b1 + d1;
+  tran_high_t c2 = a1 - c1;
+  tran_high_t d2 = b1 - d1;
+
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dest);
+
+  (void)tx_type;
+  (void)lossless;
+  (void)eob;
+
+  a1 = (a2 + b2) >> 2;
+  b1 = (a2 - b2) >> 2;
+  c1 = (c2 + d2) >> 2;
+  d1 = (c2 - d2) >> 2;
+
+  dst[0] = highbd_clip_pixel_add(dst[0], a1, bd);
+  dst[1] = highbd_clip_pixel_add(dst[1], b1, bd);
+  dst[stride] = highbd_clip_pixel_add(dst[stride], c1, bd);
+  dst[stride + 1] = highbd_clip_pixel_add(dst[stride + 1], d1, bd);
+}
+#endif
+
 void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
                                  int stride, int eob, int bd, TX_TYPE tx_type,
                                  int lossless) {
@@ -2864,6 +2896,13 @@
       av1_highbd_inv_txfm_add_4x4(input, dest, stride, eob, bd, tx_type,
                                   lossless);
       break;
+#if CONFIG_CB4X4
+    case TX_2X2:
+      av1_highbd_inv_txfm_add_2x2(input, dest, stride, eob, bd, tx_type,
+                                  lossless);
+      break;
+
+#endif
     default: assert(0 && "Invalid transform size"); break;
   }
 }
diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c
index 1b3aaf9..b0f217c 100644
--- a/av1/encoder/hybrid_fwd_txfm.c
+++ b/av1/encoder/hybrid_fwd_txfm.c
@@ -274,6 +274,36 @@
 #endif  // CONFIG_TX64X64
 
 #if CONFIG_AOM_HIGHBITDEPTH
+#if CONFIG_CB4X4
+static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff,
+                                int diff_stride, TX_TYPE tx_type, int lossless,
+                                const int bd) {
+  tran_high_t a1 = src_diff[0];
+  tran_high_t b1 = src_diff[1];
+  tran_high_t c1 = src_diff[diff_stride];
+  tran_high_t d1 = src_diff[1 + diff_stride];
+
+  tran_high_t a2 = a1 + c1;
+  tran_high_t b2 = b1 + d1;
+  tran_high_t c2 = a1 - c1;
+  tran_high_t d2 = b1 - d1;
+
+  a1 = a2 + b2;
+  b1 = a2 - b2;
+  c1 = c2 + d2;
+  d1 = c2 - d2;
+
+  coeff[0] = (tran_low_t)(4 * a1);
+  coeff[1] = (tran_low_t)(4 * b1);
+  coeff[2] = (tran_low_t)(4 * c1);
+  coeff[3] = (tran_low_t)(4 * d1);
+
+  (void)tx_type;
+  (void)lossless;
+  (void)bd;
+}
+#endif
+
 static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
                                 int diff_stride, TX_TYPE tx_type, int lossless,
                                 const int bd) {
@@ -604,6 +634,11 @@
     case TX_4X4:
       highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless, bd);
       break;
+#if CONFIG_CB4X4
+    case TX_2X2:
+      highbd_fwd_txfm_2x2(src_diff, coeff, diff_stride, tx_type, lossless, bd);
+      break;
+#endif
     default: assert(0); break;
   }
 }