Remove 4x rectangular fwd txfm from rtc build
Reduce binary size by 25K (linux, O2)
Bug: aomedia:2865
Change-Id: I02db4283112d5df555622fcaa7483878215bc946
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index ec3df2e..e6bb482 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -320,14 +320,7 @@
specialize qw/av1_fwd_txfm2d_16x32 sse4_1 neon/;
add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
specialize qw/av1_fwd_txfm2d_32x16 sse4_1 neon/;
- add_proto qw/void av1_fwd_txfm2d_4x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- specialize qw/av1_fwd_txfm2d_4x16 sse4_1 neon/;
- add_proto qw/void av1_fwd_txfm2d_16x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- specialize qw/av1_fwd_txfm2d_16x4 sse4_1 neon/;
- add_proto qw/void av1_fwd_txfm2d_8x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- specialize qw/av1_fwd_txfm2d_8x32 sse4_1 neon/;
- add_proto qw/void av1_fwd_txfm2d_32x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- specialize qw/av1_fwd_txfm2d_32x8 sse4_1 neon/;
+
add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
specialize qw/av1_fwd_txfm2d_4x4 sse4_1 neon/;
add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
@@ -343,11 +336,21 @@
specialize qw/av1_fwd_txfm2d_32x64 sse4_1 neon/;
add_proto qw/void av1_fwd_txfm2d_64x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
specialize qw/av1_fwd_txfm2d_64x32 sse4_1 neon/;
- add_proto qw/void av1_fwd_txfm2d_16x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- specialize qw/av1_fwd_txfm2d_16x64 sse4_1 neon/;
- add_proto qw/void av1_fwd_txfm2d_64x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- specialize qw/av1_fwd_txfm2d_64x16 sse4_1 neon/;
+ add_proto qw/void av1_fwd_txfm2d_16x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ specialize qw/av1_fwd_txfm2d_16x4 sse4_1 neon/;
+ if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
+ add_proto qw/void av1_fwd_txfm2d_4x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ specialize qw/av1_fwd_txfm2d_4x16 sse4_1 neon/;
+ add_proto qw/void av1_fwd_txfm2d_8x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ specialize qw/av1_fwd_txfm2d_8x32 sse4_1 neon/;
+ add_proto qw/void av1_fwd_txfm2d_32x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ specialize qw/av1_fwd_txfm2d_32x8 sse4_1 neon/;
+ add_proto qw/void av1_fwd_txfm2d_16x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ specialize qw/av1_fwd_txfm2d_16x64 sse4_1 neon/;
+ add_proto qw/void av1_fwd_txfm2d_64x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ specialize qw/av1_fwd_txfm2d_64x16 sse4_1 neon/;
+ }
#
# Motion search
#
diff --git a/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c b/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c
index e17cd90..06e4356 100644
--- a/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c
+++ b/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c
@@ -797,6 +797,7 @@
out[7 + 8 * i] = vshlq_n_s32(in[7 + 8 * i], 1);
}
}
+#if !CONFIG_REALTIME_ONLY
static void idtx32x8_neon(int32x4_t *in, int32x4_t *out, int bit, int col_num) {
(void)bit;
(void)col_num;
@@ -811,6 +812,7 @@
out[j + 8 * 7] = vshlq_n_s32(in[j + 8 * 7], 1);
}
}
+#endif
void av1_fwd_txfm2d_8x8_neon(const int16_t *input, int32_t *coeff, int stride,
TX_TYPE tx_type, int bd) {
int32x4_t in[16], out[16];
@@ -1793,6 +1795,7 @@
fadst8x8_neon, // V_FLIPADST
idtx8x8_neon // H_FLIPADST
};
+#if !CONFIG_REALTIME_ONLY
static const fwd_transform_1d_neon row_highbd_txfm32x8_arr[TX_TYPES] = {
fdct8x8_neon, // DCT_DCT
NULL, // ADST_DCT
@@ -1811,6 +1814,7 @@
NULL, // V_FLIPADST
NULL, // H_FLIPADST
};
+#endif
static const fwd_transform_1d_neon col_highbd_txfm4x8_arr[TX_TYPES] = {
fdct4x8_neon, // DCT_DCT
fadst8x8_neon, // ADST_DCT
@@ -3421,6 +3425,7 @@
}
}
+#if !CONFIG_REALTIME_ONLY
void av1_fwd_txfm2d_4x16_neon(const int16_t *input, int32_t *coeff, int stride,
TX_TYPE tx_type, int bd) {
(void)bd;
@@ -3452,6 +3457,7 @@
row_txfm(in + i, outcoeff128 + i * txfm_size_col, bitrow, txfm_size_col);
}
}
+#endif
void av1_fwd_txfm2d_16x4_neon(const int16_t *input, int32_t *coeff, int stride,
TX_TYPE tx_type, int bd) {
@@ -3637,6 +3643,7 @@
(void)bd;
}
+#if !CONFIG_REALTIME_ONLY
void av1_fwd_txfm2d_8x32_neon(const int16_t *input, int32_t *coeff, int stride,
TX_TYPE tx_type, int bd) {
int32x4_t in[64];
@@ -3707,6 +3714,7 @@
transpose_8nx8n(in, outcoef128, txfm_size_row, txfm_size_col);
(void)bd;
}
+#endif
void av1_fwd_txfm2d_4x8_neon(const int16_t *input, int32_t *coeff, int stride,
TX_TYPE tx_type, int bd) {
@@ -3771,6 +3779,7 @@
(void)bd;
}
+#if !CONFIG_REALTIME_ONLY
void av1_fwd_txfm2d_16x64_neon(const int16_t *input, int32_t *coeff, int stride,
TX_TYPE tx_type, int bd) {
int32x4_t in[256];
@@ -3855,6 +3864,7 @@
transpose_8nx8n(in, outcoeff128, txfm_size_row, 32);
(void)bd;
}
+#endif
static void fdct64_new_neon(int32x4_t *input, int32x4_t *output,
const int8_t cos_bit, const int8_t *stage_range) {
diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c
index 0699085..08c167a 100644
--- a/av1/encoder/hybrid_fwd_txfm.c
+++ b/av1/encoder/hybrid_fwd_txfm.c
@@ -134,6 +134,7 @@
txfm_param->bd);
}
+#if !CONFIG_REALTIME_ONLY
static void highbd_fwd_txfm_16x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
int32_t *dst_coeff = (int32_t *)coeff;
@@ -161,6 +162,7 @@
av1_fwd_txfm2d_8x32(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
txfm_param->bd);
}
+#endif
static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
@@ -204,6 +206,7 @@
bd);
}
+#if !CONFIG_REALTIME_ONLY
static void highbd_fwd_txfm_16x64(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
assert(txfm_param->tx_type == DCT_DCT);
@@ -219,6 +222,7 @@
const int bd = txfm_param->bd;
av1_fwd_txfm2d_64x16(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
}
+#endif
static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
@@ -255,12 +259,7 @@
case TX_64X32:
highbd_fwd_txfm_64x32(src_diff, coeff, diff_stride, txfm_param);
break;
- case TX_16X64:
- highbd_fwd_txfm_16x64(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_64X16:
- highbd_fwd_txfm_64x16(src_diff, coeff, diff_stride, txfm_param);
- break;
+
case TX_32X32:
highbd_fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param);
break;
@@ -291,6 +290,7 @@
case TX_4X4:
highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, txfm_param);
break;
+#if !CONFIG_REALTIME_ONLY
case TX_4X16:
highbd_fwd_txfm_4x16(src_diff, coeff, diff_stride, txfm_param);
break;
@@ -303,6 +303,13 @@
case TX_32X8:
highbd_fwd_txfm_32x8(src_diff, coeff, diff_stride, txfm_param);
break;
+ case TX_16X64:
+ highbd_fwd_txfm_16x64(src_diff, coeff, diff_stride, txfm_param);
+ break;
+ case TX_64X16:
+ highbd_fwd_txfm_64x16(src_diff, coeff, diff_stride, txfm_param);
+ break;
+#endif
default: assert(0); break;
}
}
diff --git a/av1/encoder/x86/highbd_fwd_txfm_sse4.c b/av1/encoder/x86/highbd_fwd_txfm_sse4.c
index 73afc5d..9a0a36c1 100644
--- a/av1/encoder/x86/highbd_fwd_txfm_sse4.c
+++ b/av1/encoder/x86/highbd_fwd_txfm_sse4.c
@@ -827,6 +827,7 @@
out[7 + 8 * i] = _mm_add_epi32(in[7 + 8 * i], in[7 + 8 * i]);
}
}
+#if !CONFIG_REALTIME_ONLY
static void idtx32x8_sse4_1(__m128i *in, __m128i *out, int bit, int col_num) {
(void)bit;
(void)col_num;
@@ -841,6 +842,7 @@
out[j + 8 * 7] = _mm_add_epi32(in[j + 8 * 7], in[j + 8 * 7]);
}
}
+#endif
void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *coeff, int stride,
TX_TYPE tx_type, int bd) {
__m128i in[16], out[16];
@@ -1146,6 +1148,7 @@
load_buffer_4x4(botL, out + 4, stride, flipud, fliplr, shift);
}
+#if !CONFIG_REALTIME_ONLY
static INLINE void load_buffer_4x16(const int16_t *input, __m128i *out,
const int stride, const int flipud,
const int fliplr, const int shift) {
@@ -1162,6 +1165,7 @@
load_buffer_4x8(topL, out, stride, flipud, fliplr, shift);
load_buffer_4x8(botL, out + 8, stride, flipud, fliplr, shift);
}
+#endif
static INLINE void load_buffer_32x8n(const int16_t *input, __m128i *out,
int stride, int flipud, int fliplr,
@@ -1943,6 +1947,7 @@
fadst8x8_sse4_1, // V_FLIPADST
idtx8x8_sse4_1 // H_FLIPADST
};
+#if !CONFIG_REALTIME_ONLY
static const fwd_transform_1d_sse4_1 row_highbd_txfm32x8_arr[TX_TYPES] = {
fdct8x8_sse4_1, // DCT_DCT
NULL, // ADST_DCT
@@ -1961,6 +1966,7 @@
NULL, // V_FLIPADST
NULL, // H_FLIPADST
};
+#endif
static const fwd_transform_1d_sse4_1 col_highbd_txfm4x8_arr[TX_TYPES] = {
fdct4x8_sse4_1, // DCT_DCT
fadst8x8_sse4_1, // ADST_DCT
@@ -2194,6 +2200,7 @@
(void)bd;
}
+#if !CONFIG_REALTIME_ONLY
void av1_fwd_txfm2d_4x16_sse4_1(const int16_t *input, int32_t *coeff,
int stride, TX_TYPE tx_type, int bd) {
__m128i in[16];
@@ -2222,6 +2229,7 @@
}
(void)bd;
}
+#endif
void av1_fwd_txfm2d_16x4_sse4_1(const int16_t *input, int32_t *coeff,
int stride, TX_TYPE tx_type, int bd) {
@@ -2394,6 +2402,7 @@
(void)bd;
}
+#if !CONFIG_REALTIME_ONLY
void av1_fwd_txfm2d_8x32_sse4_1(const int16_t *input, int32_t *coeff,
int stride, TX_TYPE tx_type, int bd) {
__m128i in[64];
@@ -2461,6 +2470,7 @@
transpose_8nx8n(in, outcoef128, txfm_size_row, txfm_size_col);
(void)bd;
}
+#endif
void av1_fwd_txfm2d_4x8_sse4_1(const int16_t *input, int32_t *coeff, int stride,
TX_TYPE tx_type, int bd) {
@@ -2522,6 +2532,7 @@
(void)bd;
}
+#if !CONFIG_REALTIME_ONLY
void av1_fwd_txfm2d_16x64_sse4_1(const int16_t *input, int32_t *coeff,
int stride, TX_TYPE tx_type, int bd) {
__m128i in[256];
@@ -2602,3 +2613,4 @@
transpose_8nx8n(in, outcoeff128, txfm_size_row, 32);
(void)bd;
}
+#endif
diff --git a/test/av1_txfm_test.h b/test/av1_txfm_test.h
index 5a56d28..13a7e8a 100644
--- a/test/av1_txfm_test.h
+++ b/test/av1_txfm_test.h
@@ -97,7 +97,7 @@
}
#if CONFIG_AV1_ENCODER
-
+#if !CONFIG_REALTIME_ONLY
static const FwdTxfm2dFunc fwd_txfm_func_ls[TX_SIZES_ALL] = {
av1_fwd_txfm2d_4x4_c, av1_fwd_txfm2d_8x8_c, av1_fwd_txfm2d_16x16_c,
av1_fwd_txfm2d_32x32_c, av1_fwd_txfm2d_64x64_c, av1_fwd_txfm2d_4x8_c,
@@ -107,6 +107,29 @@
av1_fwd_txfm2d_8x32_c, av1_fwd_txfm2d_32x8_c, av1_fwd_txfm2d_16x64_c,
av1_fwd_txfm2d_64x16_c,
};
+#else
+static const FwdTxfm2dFunc fwd_txfm_func_ls[TX_SIZES_ALL] = {
+ av1_fwd_txfm2d_4x4_c,
+ av1_fwd_txfm2d_8x8_c,
+ av1_fwd_txfm2d_16x16_c,
+ av1_fwd_txfm2d_32x32_c,
+ av1_fwd_txfm2d_64x64_c,
+ av1_fwd_txfm2d_4x8_c,
+ av1_fwd_txfm2d_8x4_c,
+ av1_fwd_txfm2d_8x16_c,
+ av1_fwd_txfm2d_16x8_c,
+ av1_fwd_txfm2d_16x32_c,
+ av1_fwd_txfm2d_32x16_c,
+ av1_fwd_txfm2d_32x64_c,
+ av1_fwd_txfm2d_64x32_c,
+ nullptr,
+ av1_fwd_txfm2d_16x4_c,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+};
+#endif
#endif
static const InvTxfm2dFunc inv_txfm_func_ls[TX_SIZES_ALL] = {