Remove 4x rectangular fwd txfm from rtc build

Reduce binary size by 25K (linux, O2)

Bug: aomedia:2865

Change-Id: I02db4283112d5df555622fcaa7483878215bc946
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index ec3df2e..e6bb482 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -320,14 +320,7 @@
   specialize qw/av1_fwd_txfm2d_16x32 sse4_1 neon/;
   add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
   specialize qw/av1_fwd_txfm2d_32x16 sse4_1 neon/;
-  add_proto qw/void av1_fwd_txfm2d_4x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
-  specialize qw/av1_fwd_txfm2d_4x16 sse4_1 neon/;
-  add_proto qw/void av1_fwd_txfm2d_16x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
-  specialize qw/av1_fwd_txfm2d_16x4 sse4_1 neon/;
-  add_proto qw/void av1_fwd_txfm2d_8x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
-  specialize qw/av1_fwd_txfm2d_8x32 sse4_1 neon/;
-  add_proto qw/void av1_fwd_txfm2d_32x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
-  specialize qw/av1_fwd_txfm2d_32x8 sse4_1 neon/;
+
   add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
   specialize qw/av1_fwd_txfm2d_4x4 sse4_1 neon/;
   add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
@@ -343,11 +336,21 @@
   specialize qw/av1_fwd_txfm2d_32x64 sse4_1 neon/;
   add_proto qw/void av1_fwd_txfm2d_64x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
   specialize qw/av1_fwd_txfm2d_64x32 sse4_1 neon/;
-  add_proto qw/void av1_fwd_txfm2d_16x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
-  specialize qw/av1_fwd_txfm2d_16x64 sse4_1 neon/;
-  add_proto qw/void av1_fwd_txfm2d_64x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
-  specialize qw/av1_fwd_txfm2d_64x16 sse4_1 neon/;
+  add_proto qw/void av1_fwd_txfm2d_16x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+  specialize qw/av1_fwd_txfm2d_16x4 sse4_1 neon/;
 
+  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
+    add_proto qw/void av1_fwd_txfm2d_4x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+    specialize qw/av1_fwd_txfm2d_4x16 sse4_1 neon/;
+    add_proto qw/void av1_fwd_txfm2d_8x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+    specialize qw/av1_fwd_txfm2d_8x32 sse4_1 neon/;
+    add_proto qw/void av1_fwd_txfm2d_32x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+    specialize qw/av1_fwd_txfm2d_32x8 sse4_1 neon/;
+    add_proto qw/void av1_fwd_txfm2d_16x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+    specialize qw/av1_fwd_txfm2d_16x64 sse4_1 neon/;
+    add_proto qw/void av1_fwd_txfm2d_64x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+    specialize qw/av1_fwd_txfm2d_64x16 sse4_1 neon/;
+  }
   #
   # Motion search
   #
diff --git a/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c b/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c
index e17cd90..06e4356 100644
--- a/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c
+++ b/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c
@@ -797,6 +797,7 @@
     out[7 + 8 * i] = vshlq_n_s32(in[7 + 8 * i], 1);
   }
 }
+#if !CONFIG_REALTIME_ONLY
 static void idtx32x8_neon(int32x4_t *in, int32x4_t *out, int bit, int col_num) {
   (void)bit;
   (void)col_num;
@@ -811,6 +812,7 @@
     out[j + 8 * 7] = vshlq_n_s32(in[j + 8 * 7], 1);
   }
 }
+#endif
 void av1_fwd_txfm2d_8x8_neon(const int16_t *input, int32_t *coeff, int stride,
                              TX_TYPE tx_type, int bd) {
   int32x4_t in[16], out[16];
@@ -1793,6 +1795,7 @@
   fadst8x8_neon,  // V_FLIPADST
   idtx8x8_neon    // H_FLIPADST
 };
+#if !CONFIG_REALTIME_ONLY
 static const fwd_transform_1d_neon row_highbd_txfm32x8_arr[TX_TYPES] = {
   fdct8x8_neon,   // DCT_DCT
   NULL,           // ADST_DCT
@@ -1811,6 +1814,7 @@
   NULL,           // V_FLIPADST
   NULL,           // H_FLIPADST
 };
+#endif
 static const fwd_transform_1d_neon col_highbd_txfm4x8_arr[TX_TYPES] = {
   fdct4x8_neon,   // DCT_DCT
   fadst8x8_neon,  // ADST_DCT
@@ -3421,6 +3425,7 @@
   }
 }
 
+#if !CONFIG_REALTIME_ONLY
 void av1_fwd_txfm2d_4x16_neon(const int16_t *input, int32_t *coeff, int stride,
                               TX_TYPE tx_type, int bd) {
   (void)bd;
@@ -3452,6 +3457,7 @@
     row_txfm(in + i, outcoeff128 + i * txfm_size_col, bitrow, txfm_size_col);
   }
 }
+#endif
 
 void av1_fwd_txfm2d_16x4_neon(const int16_t *input, int32_t *coeff, int stride,
                               TX_TYPE tx_type, int bd) {
@@ -3637,6 +3643,7 @@
   (void)bd;
 }
 
+#if !CONFIG_REALTIME_ONLY
 void av1_fwd_txfm2d_8x32_neon(const int16_t *input, int32_t *coeff, int stride,
                               TX_TYPE tx_type, int bd) {
   int32x4_t in[64];
@@ -3707,6 +3714,7 @@
   transpose_8nx8n(in, outcoef128, txfm_size_row, txfm_size_col);
   (void)bd;
 }
+#endif
 
 void av1_fwd_txfm2d_4x8_neon(const int16_t *input, int32_t *coeff, int stride,
                              TX_TYPE tx_type, int bd) {
@@ -3771,6 +3779,7 @@
   (void)bd;
 }
 
+#if !CONFIG_REALTIME_ONLY
 void av1_fwd_txfm2d_16x64_neon(const int16_t *input, int32_t *coeff, int stride,
                                TX_TYPE tx_type, int bd) {
   int32x4_t in[256];
@@ -3855,6 +3864,7 @@
   transpose_8nx8n(in, outcoeff128, txfm_size_row, 32);
   (void)bd;
 }
+#endif
 
 static void fdct64_new_neon(int32x4_t *input, int32x4_t *output,
                             const int8_t cos_bit, const int8_t *stage_range) {
diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c
index 0699085..08c167a 100644
--- a/av1/encoder/hybrid_fwd_txfm.c
+++ b/av1/encoder/hybrid_fwd_txfm.c
@@ -134,6 +134,7 @@
                        txfm_param->bd);
 }
 
+#if !CONFIG_REALTIME_ONLY
 static void highbd_fwd_txfm_16x4(const int16_t *src_diff, tran_low_t *coeff,
                                  int diff_stride, TxfmParam *txfm_param) {
   int32_t *dst_coeff = (int32_t *)coeff;
@@ -161,6 +162,7 @@
   av1_fwd_txfm2d_8x32(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
                       txfm_param->bd);
 }
+#endif
 
 static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
                                 int diff_stride, TxfmParam *txfm_param) {
@@ -204,6 +206,7 @@
                        bd);
 }
 
+#if !CONFIG_REALTIME_ONLY
 static void highbd_fwd_txfm_16x64(const int16_t *src_diff, tran_low_t *coeff,
                                   int diff_stride, TxfmParam *txfm_param) {
   assert(txfm_param->tx_type == DCT_DCT);
@@ -219,6 +222,7 @@
   const int bd = txfm_param->bd;
   av1_fwd_txfm2d_64x16(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
 }
+#endif
 
 static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
                                   int diff_stride, TxfmParam *txfm_param) {
@@ -255,12 +259,7 @@
     case TX_64X32:
       highbd_fwd_txfm_64x32(src_diff, coeff, diff_stride, txfm_param);
       break;
-    case TX_16X64:
-      highbd_fwd_txfm_16x64(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_64X16:
-      highbd_fwd_txfm_64x16(src_diff, coeff, diff_stride, txfm_param);
-      break;
+
     case TX_32X32:
       highbd_fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param);
       break;
@@ -291,6 +290,7 @@
     case TX_4X4:
       highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, txfm_param);
       break;
+#if !CONFIG_REALTIME_ONLY
     case TX_4X16:
       highbd_fwd_txfm_4x16(src_diff, coeff, diff_stride, txfm_param);
       break;
@@ -303,6 +303,13 @@
     case TX_32X8:
       highbd_fwd_txfm_32x8(src_diff, coeff, diff_stride, txfm_param);
       break;
+    case TX_16X64:
+      highbd_fwd_txfm_16x64(src_diff, coeff, diff_stride, txfm_param);
+      break;
+    case TX_64X16:
+      highbd_fwd_txfm_64x16(src_diff, coeff, diff_stride, txfm_param);
+      break;
+#endif
     default: assert(0); break;
   }
 }
diff --git a/av1/encoder/x86/highbd_fwd_txfm_sse4.c b/av1/encoder/x86/highbd_fwd_txfm_sse4.c
index 73afc5d..9a0a36c1 100644
--- a/av1/encoder/x86/highbd_fwd_txfm_sse4.c
+++ b/av1/encoder/x86/highbd_fwd_txfm_sse4.c
@@ -827,6 +827,7 @@
     out[7 + 8 * i] = _mm_add_epi32(in[7 + 8 * i], in[7 + 8 * i]);
   }
 }
+#if !CONFIG_REALTIME_ONLY
 static void idtx32x8_sse4_1(__m128i *in, __m128i *out, int bit, int col_num) {
   (void)bit;
   (void)col_num;
@@ -841,6 +842,7 @@
     out[j + 8 * 7] = _mm_add_epi32(in[j + 8 * 7], in[j + 8 * 7]);
   }
 }
+#endif
 void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *coeff, int stride,
                                TX_TYPE tx_type, int bd) {
   __m128i in[16], out[16];
@@ -1146,6 +1148,7 @@
   load_buffer_4x4(botL, out + 4, stride, flipud, fliplr, shift);
 }
 
+#if !CONFIG_REALTIME_ONLY
 static INLINE void load_buffer_4x16(const int16_t *input, __m128i *out,
                                     const int stride, const int flipud,
                                     const int fliplr, const int shift) {
@@ -1162,6 +1165,7 @@
   load_buffer_4x8(topL, out, stride, flipud, fliplr, shift);
   load_buffer_4x8(botL, out + 8, stride, flipud, fliplr, shift);
 }
+#endif
 
 static INLINE void load_buffer_32x8n(const int16_t *input, __m128i *out,
                                      int stride, int flipud, int fliplr,
@@ -1943,6 +1947,7 @@
   fadst8x8_sse4_1,  // V_FLIPADST
   idtx8x8_sse4_1    // H_FLIPADST
 };
+#if !CONFIG_REALTIME_ONLY
 static const fwd_transform_1d_sse4_1 row_highbd_txfm32x8_arr[TX_TYPES] = {
   fdct8x8_sse4_1,   // DCT_DCT
   NULL,             // ADST_DCT
@@ -1961,6 +1966,7 @@
   NULL,             // V_FLIPADST
   NULL,             // H_FLIPADST
 };
+#endif
 static const fwd_transform_1d_sse4_1 col_highbd_txfm4x8_arr[TX_TYPES] = {
   fdct4x8_sse4_1,   // DCT_DCT
   fadst8x8_sse4_1,  // ADST_DCT
@@ -2194,6 +2200,7 @@
   (void)bd;
 }
 
+#if !CONFIG_REALTIME_ONLY
 void av1_fwd_txfm2d_4x16_sse4_1(const int16_t *input, int32_t *coeff,
                                 int stride, TX_TYPE tx_type, int bd) {
   __m128i in[16];
@@ -2222,6 +2229,7 @@
   }
   (void)bd;
 }
+#endif
 
 void av1_fwd_txfm2d_16x4_sse4_1(const int16_t *input, int32_t *coeff,
                                 int stride, TX_TYPE tx_type, int bd) {
@@ -2394,6 +2402,7 @@
   (void)bd;
 }
 
+#if !CONFIG_REALTIME_ONLY
 void av1_fwd_txfm2d_8x32_sse4_1(const int16_t *input, int32_t *coeff,
                                 int stride, TX_TYPE tx_type, int bd) {
   __m128i in[64];
@@ -2461,6 +2470,7 @@
   transpose_8nx8n(in, outcoef128, txfm_size_row, txfm_size_col);
   (void)bd;
 }
+#endif
 
 void av1_fwd_txfm2d_4x8_sse4_1(const int16_t *input, int32_t *coeff, int stride,
                                TX_TYPE tx_type, int bd) {
@@ -2522,6 +2532,7 @@
   (void)bd;
 }
 
+#if !CONFIG_REALTIME_ONLY
 void av1_fwd_txfm2d_16x64_sse4_1(const int16_t *input, int32_t *coeff,
                                  int stride, TX_TYPE tx_type, int bd) {
   __m128i in[256];
@@ -2602,3 +2613,4 @@
   transpose_8nx8n(in, outcoeff128, txfm_size_row, 32);
   (void)bd;
 }
+#endif
diff --git a/test/av1_txfm_test.h b/test/av1_txfm_test.h
index 5a56d28..13a7e8a 100644
--- a/test/av1_txfm_test.h
+++ b/test/av1_txfm_test.h
@@ -97,7 +97,7 @@
 }
 
 #if CONFIG_AV1_ENCODER
-
+#if !CONFIG_REALTIME_ONLY
 static const FwdTxfm2dFunc fwd_txfm_func_ls[TX_SIZES_ALL] = {
   av1_fwd_txfm2d_4x4_c,   av1_fwd_txfm2d_8x8_c,   av1_fwd_txfm2d_16x16_c,
   av1_fwd_txfm2d_32x32_c, av1_fwd_txfm2d_64x64_c, av1_fwd_txfm2d_4x8_c,
@@ -107,6 +107,29 @@
   av1_fwd_txfm2d_8x32_c,  av1_fwd_txfm2d_32x8_c,  av1_fwd_txfm2d_16x64_c,
   av1_fwd_txfm2d_64x16_c,
 };
+#else
+static const FwdTxfm2dFunc fwd_txfm_func_ls[TX_SIZES_ALL] = {
+  av1_fwd_txfm2d_4x4_c,
+  av1_fwd_txfm2d_8x8_c,
+  av1_fwd_txfm2d_16x16_c,
+  av1_fwd_txfm2d_32x32_c,
+  av1_fwd_txfm2d_64x64_c,
+  av1_fwd_txfm2d_4x8_c,
+  av1_fwd_txfm2d_8x4_c,
+  av1_fwd_txfm2d_8x16_c,
+  av1_fwd_txfm2d_16x8_c,
+  av1_fwd_txfm2d_16x32_c,
+  av1_fwd_txfm2d_32x16_c,
+  av1_fwd_txfm2d_32x64_c,
+  av1_fwd_txfm2d_64x32_c,
+  nullptr,
+  av1_fwd_txfm2d_16x4_c,
+  nullptr,
+  nullptr,
+  nullptr,
+  nullptr,
+};
+#endif
 #endif
 
 static const InvTxfm2dFunc inv_txfm_func_ls[TX_SIZES_ALL] = {