Remove "_new" suffix from function naming Change-Id: I1b993e6ae335df0943e95acbd60eb92e1446ab9f

diff --git a/av1/common/arm/av1_inv_txfm_neon.c b/av1/common/arm/av1_inv_txfm_neon.c
index 29928d9..2f3567a 100644
--- a/av1/common/arm/av1_inv_txfm_neon.c
+++ b/av1/common/arm/av1_inv_txfm_neon.c

@@ -48,11 +48,11 @@
 
 // 1D functions
 static const transform_1d_neon lowbd_txfm_all_1d_arr[TX_SIZES][ITX_TYPES_1D] = {
-  { av1_idct4_new, av1_iadst4_new, av1_iidentity4_c },
-  { av1_idct8_new, av1_iadst8_new, av1_iidentity8_c },
-  { av1_idct16_new, av1_iadst16_new, av1_iidentity16_c },
-  { av1_idct32_new, NULL, NULL },
-  { av1_idct64_new, NULL, NULL },
+  { av1_idct4, av1_iadst4, av1_iidentity4_c },
+  { av1_idct8, av1_iadst8, av1_iidentity8_c },
+  { av1_idct16, av1_iadst16, av1_iidentity16_c },
+  { av1_idct32, NULL, NULL },
+  { av1_idct64, NULL, NULL },
 };
 
 static INLINE void lowbd_add_flip_buffer_8xn_neon(int16x8_t *in,
@@ -258,8 +258,8 @@
   return val;
 }
 
-static INLINE void iadst8_new_neon(int16x8_t *const in, int16x8_t *out,
-                                   int8_t cos_bit, int bit) {
+static INLINE void iadst8_neon(int16x8_t *const in, int16x8_t *out,
+                               int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
 
@@ -332,8 +332,8 @@
   out[7] = vqnegq_s16(x[1]);
 }
 
-static INLINE void iadst8_low1_new_neon(int16x8_t *const in, int16x8_t *out,
-                                        int8_t cos_bit, int bit) {
+static INLINE void iadst8_low1_neon(int16x8_t *const in, int16x8_t *out,
+                                    int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   const int16x4_t c2 = set_s16x4_neon((int16_t)cospi[32], (int16_t)cospi[32],
@@ -385,8 +385,8 @@
   out[7] = vqnegq_s16(x[1]);
 }
 
-static INLINE void idct8_new_neon(int16x8_t *in, int16x8_t *out, int8_t cos_bit,
-                                  int bit) {
+static INLINE void idct8_neon(int16x8_t *in, int16x8_t *out, int8_t cos_bit,
+                              int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   int16x8_t step1[8], step2[8];
@@ -425,8 +425,8 @@
   out[7] = vqsubq_s16(step1[0], step2[7]);
 }
 
-static INLINE void idct8_low1_new_neon(int16x8_t *in, int16x8_t *out,
-                                       int8_t cos_bit, int bit) {
+static INLINE void idct8_low1_neon(int16x8_t *in, int16x8_t *out,
+                                   int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   int16x8_t step1;
@@ -518,8 +518,8 @@
   }
 }
 
-static INLINE void idct16_low1_new_neon(int16x8_t *in, int16x8_t *out,
-                                        int8_t cos_bit, int bit) {
+static INLINE void idct16_low1_neon(int16x8_t *in, int16x8_t *out,
+                                    int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   int16x8_t step1;
@@ -552,8 +552,8 @@
   out[15] = step1;
 }
 
-static INLINE void idct16_new_neon(int16x8_t *in, int16x8_t *out,
-                                   int8_t cos_bit, int bit) {
+static INLINE void idct16_neon(int16x8_t *in, int16x8_t *out, int8_t cos_bit,
+                               int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   int16x8_t step1[16], step2[16];
@@ -675,8 +675,8 @@
   out[15] = vqsubq_s16(step2[0], step2[15]);
 }
 
-static INLINE void idct16_low8_new_neon(int16x8_t *in, int16x8_t *out,
-                                        int8_t cos_bit, int bit) {
+static INLINE void idct16_low8_neon(int16x8_t *in, int16x8_t *out,
+                                    int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   int16x8_t step1[16], step2[16];
@@ -786,8 +786,8 @@
   out[15] = vqsubq_s16(step2[0], step2[15]);
 }
 
-static INLINE void iadst16_new_neon(int16x8_t *const in, int16x8_t *out,
-                                    int8_t cos_bit, int bit) {
+static INLINE void iadst16_neon(int16x8_t *const in, int16x8_t *out,
+                                int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
 
@@ -944,8 +944,8 @@
   out[15] = vqnegq_s16(x[1]);
 }
 
-static INLINE void iadst16_low1_new_neon(int16x8_t *const in, int16x8_t *out,
-                                         int8_t cos_bit, int bit) {
+static INLINE void iadst16_low1_neon(int16x8_t *const in, int16x8_t *out,
+                                     int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   const int16x4_t c0 = set_s16x4_neon((int16_t)cospi[8], (int16_t)cospi[56],
@@ -1036,8 +1036,8 @@
   out[15] = vqnegq_s16(x[1]);
 }
 
-static INLINE void iadst16_low8_new_neon(int16x8_t *const in, int16x8_t *out,
-                                         int8_t cos_bit, int bit) {
+static INLINE void iadst16_low8_neon(int16x8_t *const in, int16x8_t *out,
+                                     int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
 
@@ -1179,8 +1179,8 @@
   out[15] = vqnegq_s16(x[1]);
 }
 
-static INLINE void idct32_new_neon(int16x8_t *in, int16x8_t *out,
-                                   int8_t cos_bit, int bit) {
+static INLINE void idct32_neon(int16x8_t *in, int16x8_t *out, int8_t cos_bit,
+                               int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   int16x8_t step1[32], step2[32];
@@ -1464,8 +1464,8 @@
   out[31] = vqsubq_s16(step2[0], step2[31]);
 }
 
-static INLINE void idct32_low1_new_neon(int16x8_t *in, int16x8_t *out,
-                                        int8_t cos_bit, int bit) {
+static INLINE void idct32_low1_neon(int16x8_t *in, int16x8_t *out,
+                                    int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   int16x8_t step1;
@@ -1521,8 +1521,8 @@
   out[31] = step1;
 }
 
-static INLINE void idct32_low8_new_neon(int16x8_t *in, int16x8_t *out,
-                                        int8_t cos_bit, int bit) {
+static INLINE void idct32_low8_neon(int16x8_t *in, int16x8_t *out,
+                                    int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   int16x8_t step1[32], step2[32];
@@ -1768,8 +1768,8 @@
   out[31] = vqsubq_s16(step2[0], step2[31]);
 }
 
-static INLINE void idct32_low16_new_neon(int16x8_t *in, int16x8_t *out,
-                                         int8_t cos_bit, int bit) {
+static INLINE void idct32_low16_neon(int16x8_t *in, int16x8_t *out,
+                                     int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   int16x8_t step1[32], step2[32];
@@ -2165,8 +2165,8 @@
   step2[63] = step1[63];
 }
 
-static INLINE void idct64_low32_new_neon(int16x8_t *in, int16x8_t *out,
-                                         int8_t cos_bit, int bit) {
+static INLINE void idct64_low32_neon(int16x8_t *in, int16x8_t *out,
+                                     int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   int16x8_t step2[64], step1[64];
@@ -2646,8 +2646,8 @@
   out[63] = vqsubq_s16(step2[0], step2[63]);
 }
 
-static INLINE void idct64_low1_new_neon(int16x8_t *input, int16x8_t *out,
-                                        int8_t cos_bit, int bit) {
+static INLINE void idct64_low1_neon(int16x8_t *input, int16x8_t *out,
+                                    int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   int16x8_t step1;
@@ -2736,8 +2736,8 @@
   out[63] = step1;
 }
 
-static INLINE void idct64_low8_new_neon(int16x8_t *in, int16x8_t *out,
-                                        int8_t cos_bit, int bit) {
+static INLINE void idct64_low8_neon(int16x8_t *in, int16x8_t *out,
+                                    int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   int16x8_t step2[64], step1[64];
@@ -3114,8 +3114,8 @@
   out[63] = vqsubq_s16(step2[0], step2[63]);
 }
 
-static INLINE void idct64_low16_new_neon(int16x8_t *in, int16x8_t *out,
-                                         int8_t cos_bit, int bit) {
+static INLINE void idct64_low16_neon(int16x8_t *in, int16x8_t *out,
+                                     int8_t cos_bit, int bit) {
   (void)bit;
   const int32_t *cospi = cospi_arr(cos_bit);
   int16x8_t step2[64], step1[64];
@@ -3574,21 +3574,19 @@
           { NULL, NULL, NULL, NULL },
           { NULL, NULL, NULL, NULL },
       },
-      { { idct8_low1_new_neon, idct8_new_neon, NULL, NULL },
-        { iadst8_low1_new_neon, iadst8_new_neon, NULL, NULL },
+      { { idct8_low1_neon, idct8_neon, NULL, NULL },
+        { iadst8_low1_neon, iadst8_neon, NULL, NULL },
         { NULL, NULL, NULL, NULL } },
       {
-          { idct16_low1_new_neon, idct16_low8_new_neon, idct16_new_neon, NULL },
-          { iadst16_low1_new_neon, iadst16_low8_new_neon, iadst16_new_neon,
-            NULL },
+          { idct16_low1_neon, idct16_low8_neon, idct16_neon, NULL },
+          { iadst16_low1_neon, iadst16_low8_neon, iadst16_neon, NULL },
           { NULL, NULL, NULL, NULL },
       },
-      { { idct32_low1_new_neon, idct32_low8_new_neon, idct32_low16_new_neon,
-          idct32_new_neon },
+      { { idct32_low1_neon, idct32_low8_neon, idct32_low16_neon, idct32_neon },
         { NULL, NULL, NULL, NULL },
         { NULL, NULL, NULL, NULL } },
-      { { idct64_low1_new_neon, idct64_low8_new_neon, idct64_low16_new_neon,
-          idct64_low32_new_neon },
+      { { idct64_low1_neon, idct64_low8_neon, idct64_low16_neon,
+          idct64_low32_neon },
         { NULL, NULL, NULL, NULL },
         { NULL, NULL, NULL, NULL } }
     };

diff --git a/av1/common/av1_inv_txfm1d.c b/av1/common/av1_inv_txfm1d.c
index 3d4e800..c6b386f 100644
--- a/av1/common/av1_inv_txfm1d.c
+++ b/av1/common/av1_inv_txfm1d.c

@@ -13,8 +13,8 @@
 #include "av1/common/av1_inv_txfm1d.h"
 #include "av1/common/av1_txfm.h"
 
-void av1_idct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                   const int8_t *stage_range) {
+void av1_idct4(const int32_t *input, int32_t *output, int8_t cos_bit,
+               const int8_t *stage_range) {
   assert(output != input);
   const int32_t size = 4;
   const int32_t *cospi = cospi_arr(cos_bit);
@@ -54,8 +54,8 @@
   bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
 }
 
-void av1_idct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                   const int8_t *stage_range) {
+void av1_idct8(const int32_t *input, int32_t *output, int8_t cos_bit,
+               const int8_t *stage_range) {
   assert(output != input);
   const int32_t size = 8;
   const int32_t *cospi = cospi_arr(cos_bit);
@@ -135,8 +135,8 @@
   bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
 }
 
-void av1_idct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
+void av1_idct16(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range) {
   assert(output != input);
   const int32_t size = 16;
   const int32_t *cospi = cospi_arr(cos_bit);
@@ -300,8 +300,8 @@
   bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
 }
 
-void av1_idct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
+void av1_idct32(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range) {
   assert(output != input);
   const int32_t size = 32;
   const int32_t *cospi = cospi_arr(cos_bit);
@@ -653,8 +653,8 @@
   bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]);
 }
 
-void av1_iadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
+void av1_iadst4(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range) {
   int bit = cos_bit;
   const int32_t *sinpi = sinpi_arr(bit);
   int32_t s0, s1, s2, s3, s4, s5, s6, s7;
@@ -710,8 +710,8 @@
   output[3] = round_shift(x3, bit);
 }
 
-void av1_iadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
+void av1_iadst8(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range) {
   assert(output != input);
   const int32_t size = 8;
   const int32_t *cospi = cospi_arr(cos_bit);
@@ -819,8 +819,8 @@
   bf1[7] = -bf0[1];
 }
 
-void av1_iadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                     const int8_t *stage_range) {
+void av1_iadst16(const int32_t *input, int32_t *output, int8_t cos_bit,
+                 const int8_t *stage_range) {
   assert(output != input);
   const int32_t size = 16;
   const int32_t *cospi = cospi_arr(cos_bit);
@@ -1061,8 +1061,8 @@
   for (int i = 0; i < 32; ++i) output[i] = (int32_t)((int64_t)input[i] * 4);
 }
 
-void av1_idct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
+void av1_idct64(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range) {
   assert(output != input);
   const int32_t size = 64;
   const int32_t *cospi = cospi_arr(cos_bit);

diff --git a/av1/common/av1_inv_txfm1d.h b/av1/common/av1_inv_txfm1d.h
index c31c019..e1d5d98 100644
--- a/av1/common/av1_inv_txfm1d.h
+++ b/av1/common/av1_inv_txfm1d.h

@@ -29,22 +29,22 @@
   for (int i = 0; i < size; ++i) buf[i] = clamp_value(buf[i], bit);
 }
 
-void av1_idct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                   const int8_t *stage_range);
-void av1_idct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                   const int8_t *stage_range);
-void av1_idct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_idct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_idct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_iadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_iadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_iadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                     const int8_t *stage_range);
+void av1_idct4(const int32_t *input, int32_t *output, int8_t cos_bit,
+               const int8_t *stage_range);
+void av1_idct8(const int32_t *input, int32_t *output, int8_t cos_bit,
+               const int8_t *stage_range);
+void av1_idct16(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range);
+void av1_idct32(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range);
+void av1_idct64(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range);
+void av1_iadst4(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range);
+void av1_iadst8(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range);
+void av1_iadst16(const int32_t *input, int32_t *output, int8_t cos_bit,
+                 const int8_t *stage_range);
 void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
                       const int8_t *stage_range);
 void av1_iidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,

diff --git a/av1/common/av1_inv_txfm2d.c b/av1/common/av1_inv_txfm2d.c
index 92486aa..559d121 100644
--- a/av1/common/av1_inv_txfm2d.c
+++ b/av1/common/av1_inv_txfm2d.c

@@ -113,14 +113,14 @@
 
 static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
   switch (txfm_type) {
-    case TXFM_TYPE_DCT4: return av1_idct4_new;
-    case TXFM_TYPE_DCT8: return av1_idct8_new;
-    case TXFM_TYPE_DCT16: return av1_idct16_new;
-    case TXFM_TYPE_DCT32: return av1_idct32_new;
-    case TXFM_TYPE_DCT64: return av1_idct64_new;
-    case TXFM_TYPE_ADST4: return av1_iadst4_new;
-    case TXFM_TYPE_ADST8: return av1_iadst8_new;
-    case TXFM_TYPE_ADST16: return av1_iadst16_new;
+    case TXFM_TYPE_DCT4: return av1_idct4;
+    case TXFM_TYPE_DCT8: return av1_idct8;
+    case TXFM_TYPE_DCT16: return av1_idct16;
+    case TXFM_TYPE_DCT32: return av1_idct32;
+    case TXFM_TYPE_DCT64: return av1_idct64;
+    case TXFM_TYPE_ADST4: return av1_iadst4;
+    case TXFM_TYPE_ADST8: return av1_iadst8;
+    case TXFM_TYPE_ADST16: return av1_iadst16;
     case TXFM_TYPE_IDENTITY4: return av1_iidentity4_c;
     case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c;
     case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c;

diff --git a/av1/common/x86/av1_inv_txfm_avx2.c b/av1/common/x86/av1_inv_txfm_avx2.c
index cae7504..40278ea 100644
--- a/av1/common/x86/av1_inv_txfm_avx2.c
+++ b/av1/common/x86/av1_inv_txfm_avx2.c

@@ -61,8 +61,7 @@
   btf_16_adds_subs_out_avx2(&output[7], &output[8], x1[7], x1[8]);
 }
 
-static void idct16_new_avx2(const __m256i *input, __m256i *output,
-                            int8_t cos_bit) {
+static void idct16_avx2(const __m256i *input, __m256i *output, int8_t cos_bit) {
   (void)(cos_bit);
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -133,8 +132,8 @@
   idct16_stage7_avx2(output, x1);
 }
 
-static void idct16_low8_new_avx2(const __m256i *input, __m256i *output,
-                                 int8_t cos_bit) {
+static void idct16_low8_avx2(const __m256i *input, __m256i *output,
+                             int8_t cos_bit) {
   (void)(cos_bit);
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -181,8 +180,8 @@
   idct16_stage7_avx2(output, x1);
 }
 
-static void idct16_low1_new_avx2(const __m256i *input, __m256i *output,
-                                 int8_t cos_bit) {
+static void idct16_low1_avx2(const __m256i *input, __m256i *output,
+                             int8_t cos_bit) {
   (void)(cos_bit);
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
 
@@ -303,8 +302,8 @@
   output[15] = _mm256_subs_epi16(__zero, x1[1]);
 }
 
-static void iadst16_new_avx2(const __m256i *input, __m256i *output,
-                             int8_t cos_bit) {
+static void iadst16_avx2(const __m256i *input, __m256i *output,
+                         int8_t cos_bit) {
   (void)(cos_bit);
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
 
@@ -365,8 +364,8 @@
   iadst16_stage9_avx2(output, x1);
 }
 
-static void iadst16_low8_new_avx2(const __m256i *input, __m256i *output,
-                                  int8_t cos_bit) {
+static void iadst16_low8_avx2(const __m256i *input, __m256i *output,
+                              int8_t cos_bit) {
   (void)(cos_bit);
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -401,8 +400,8 @@
   iadst16_stage9_avx2(output, x1);
 }
 
-static void iadst16_low1_new_avx2(const __m256i *input, __m256i *output,
-                                  int8_t cos_bit) {
+static void iadst16_low1_avx2(const __m256i *input, __m256i *output,
+                              int8_t cos_bit) {
   (void)(cos_bit);
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -568,8 +567,8 @@
   btf_16_adds_subs_out_avx2(&output[15], &output[16], x[15], x[16]);
 }
 
-static void idct32_low1_new_avx2(const __m256i *input, __m256i *output,
-                                 int8_t cos_bit) {
+static void idct32_low1_avx2(const __m256i *input, __m256i *output,
+                             int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
 
@@ -621,8 +620,8 @@
   output[16] = x[0];
 }
 
-static void idct32_low8_new_avx2(const __m256i *input, __m256i *output,
-                                 int8_t cos_bit) {
+static void idct32_low8_avx2(const __m256i *input, __m256i *output,
+                             int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -679,8 +678,8 @@
   idct32_stage9_avx2(output, x);
 }
 
-static void idct32_low16_new_avx2(const __m256i *input, __m256i *output,
-                                  int8_t cos_bit) {
+static void idct32_low16_avx2(const __m256i *input, __m256i *output,
+                              int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -746,8 +745,7 @@
   idct32_stage9_avx2(output, x);
 }
 
-static void idct32_new_avx2(const __m256i *input, __m256i *output,
-                            int8_t cos_bit) {
+static void idct32_avx2(const __m256i *input, __m256i *output, int8_t cos_bit) {
   (void)(cos_bit);
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -1104,8 +1102,8 @@
   btf_16_adds_subs_out_avx2(&output[31], &output[32], x[31], x[32]);
 }
 
-static void idct64_low1_new_avx2(const __m256i *input, __m256i *output,
-                                 int8_t cos_bit) {
+static void idct64_low1_avx2(const __m256i *input, __m256i *output,
+                             int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
 
@@ -1191,8 +1189,8 @@
   output[32] = x[0];
 }
 
-static void idct64_low8_new_avx2(const __m256i *input, __m256i *output,
-                                 int8_t cos_bit) {
+static void idct64_low8_avx2(const __m256i *input, __m256i *output,
+                             int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -1312,8 +1310,8 @@
   idct64_stage11_avx2(output, x);
 }
 
-static void idct64_low16_new_avx2(const __m256i *input, __m256i *output,
-                                  int8_t cos_bit) {
+static void idct64_low16_avx2(const __m256i *input, __m256i *output,
+                              int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -1428,8 +1426,8 @@
   idct64_stage11_avx2(output, x);
 }
 
-static void idct64_low32_new_avx2(const __m256i *input, __m256i *output,
-                                  int8_t cos_bit) {
+static void idct64_low32_avx2(const __m256i *input, __m256i *output,
+                              int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -1592,17 +1590,15 @@
         { NULL, NULL, NULL, NULL },
         { NULL, NULL, NULL, NULL } },
       {
-          { idct16_low1_new_avx2, idct16_low8_new_avx2, idct16_new_avx2, NULL },
-          { iadst16_low1_new_avx2, iadst16_low8_new_avx2, iadst16_new_avx2,
-            NULL },
+          { idct16_low1_avx2, idct16_low8_avx2, idct16_avx2, NULL },
+          { iadst16_low1_avx2, iadst16_low8_avx2, iadst16_avx2, NULL },
           { NULL, NULL, NULL, NULL },
       },
-      { { idct32_low1_new_avx2, idct32_low8_new_avx2, idct32_low16_new_avx2,
-          idct32_new_avx2 },
+      { { idct32_low1_avx2, idct32_low8_avx2, idct32_low16_avx2, idct32_avx2 },
         { NULL, NULL, NULL, NULL },
         { NULL, NULL, NULL, NULL } },
-      { { idct64_low1_new_avx2, idct64_low8_new_avx2, idct64_low16_new_avx2,
-          idct64_low32_new_avx2 },
+      { { idct64_low1_avx2, idct64_low8_avx2, idct64_low16_avx2,
+          idct64_low32_avx2 },
         { NULL, NULL, NULL, NULL },
         { NULL, NULL, NULL, NULL } }
     };

diff --git a/av1/common/x86/av1_inv_txfm_ssse3.c b/av1/common/x86/av1_inv_txfm_ssse3.c
index 2303ed8..30488ca 100644
--- a/av1/common/x86/av1_inv_txfm_ssse3.c
+++ b/av1/common/x86/av1_inv_txfm_ssse3.c

@@ -24,8 +24,7 @@
 
 // TODO(binpengsmail@gmail.com): replace some for loop with do {} while
 
-static void idct4_new_sse2(const __m128i *input, __m128i *output,
-                           int8_t cos_bit) {
+static void idct4_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -51,8 +50,8 @@
   btf_16_adds_subs_out_sse2(output[1], output[2], x[1], x[2]);
 }
 
-static void idct4_w4_new_sse2(const __m128i *input, __m128i *output,
-                              int8_t cos_bit) {
+static void idct4_w4_sse2(const __m128i *input, __m128i *output,
+                          int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -78,8 +77,8 @@
   btf_16_adds_subs_out_sse2(output[1], output[2], x[1], x[2]);
 }
 
-static void idct8_low1_new_ssse3(const __m128i *input, __m128i *output,
-                                 int8_t cos_bit) {
+static void idct8_low1_ssse3(const __m128i *input, __m128i *output,
+                             int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
 
@@ -103,8 +102,7 @@
   output[4] = x[0];
 }
 
-static void idct8_new_sse2(const __m128i *input, __m128i *output,
-                           int8_t cos_bit) {
+static void idct8_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -152,8 +150,8 @@
   btf_16_adds_subs_out_sse2(output[3], output[4], x[3], x[4]);
 }
 
-static void idct8_w4_new_sse2(const __m128i *input, __m128i *output,
-                              int8_t cos_bit) {
+static void idct8_w4_sse2(const __m128i *input, __m128i *output,
+                          int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -239,8 +237,8 @@
   btf_16_adds_subs_out_sse2(output[7], output[8], x[7], x[8]);
 }
 
-static void idct16_low1_new_ssse3(const __m128i *input, __m128i *output,
-                                  int8_t cos_bit) {
+static void idct16_low1_ssse3(const __m128i *input, __m128i *output,
+                              int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
 
@@ -274,8 +272,8 @@
   output[8] = x[0];
 }
 
-static void idct16_low8_new_ssse3(const __m128i *input, __m128i *output,
-                                  int8_t cos_bit) {
+static void idct16_low8_ssse3(const __m128i *input, __m128i *output,
+                              int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -321,8 +319,7 @@
   idct16_stage7_sse2(output, x);
 }
 
-static void idct16_new_sse2(const __m128i *input, __m128i *output,
-                            int8_t cos_bit) {
+static void idct16_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -394,8 +391,8 @@
   idct16_stage7_sse2(output, x);
 }
 
-static void idct16_w4_new_sse2(const __m128i *input, __m128i *output,
-                               int8_t cos_bit) {
+static void idct16_w4_sse2(const __m128i *input, __m128i *output,
+                           int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -605,8 +602,8 @@
   btf_16_adds_subs_out_sse2(output[15], output[16], x[15], x[16]);
 }
 
-static void idct32_low1_new_ssse3(const __m128i *input, __m128i *output,
-                                  int8_t cos_bit) {
+static void idct32_low1_ssse3(const __m128i *input, __m128i *output,
+                              int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
 
@@ -658,8 +655,8 @@
   output[16] = x[0];
 }
 
-static void idct32_low8_new_ssse3(const __m128i *input, __m128i *output,
-                                  int8_t cos_bit) {
+static void idct32_low8_ssse3(const __m128i *input, __m128i *output,
+                              int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -716,8 +713,8 @@
   idct32_stage9_sse2(output, x);
 }
 
-static void idct32_low16_new_ssse3(const __m128i *input, __m128i *output,
-                                   int8_t cos_bit) {
+static void idct32_low16_ssse3(const __m128i *input, __m128i *output,
+                               int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -783,8 +780,7 @@
   idct32_stage9_sse2(output, x);
 }
 
-static void idct32_new_sse2(const __m128i *input, __m128i *output,
-                            int8_t cos_bit) {
+static void idct32_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -1143,8 +1139,8 @@
   btf_16_adds_subs_out_sse2(output[31], output[32], x[31], x[32]);
 }
 
-static void idct64_low1_new_ssse3(const __m128i *input, __m128i *output,
-                                  int8_t cos_bit) {
+static void idct64_low1_ssse3(const __m128i *input, __m128i *output,
+                              int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
 
@@ -1230,8 +1226,8 @@
   output[32] = x[0];
 }
 
-static void idct64_low8_new_ssse3(const __m128i *input, __m128i *output,
-                                  int8_t cos_bit) {
+static void idct64_low8_ssse3(const __m128i *input, __m128i *output,
+                              int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -1351,8 +1347,8 @@
   idct64_stage11_sse2(output, x);
 }
 
-static void idct64_low16_new_ssse3(const __m128i *input, __m128i *output,
-                                   int8_t cos_bit) {
+static void idct64_low16_ssse3(const __m128i *input, __m128i *output,
+                               int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -1467,8 +1463,8 @@
   idct64_stage11_sse2(output, x);
 }
 
-static void idct64_low32_new_ssse3(const __m128i *input, __m128i *output,
-                                   int8_t cos_bit) {
+static void idct64_low32_ssse3(const __m128i *input, __m128i *output,
+                               int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -1616,8 +1612,7 @@
   idct64_stage11_sse2(output, x);
 }
 
-static void iadst4_new_sse2(const __m128i *input, __m128i *output,
-                            int8_t cos_bit) {
+static void iadst4_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *sinpi = sinpi_arr(INV_COS_BIT);
   const __m128i sinpi_p01_p04 = pair_set_epi16(sinpi[1], sinpi[4]);
@@ -1678,8 +1673,8 @@
   }
 }
 
-static void iadst4_w4_new_sse2(const __m128i *input, __m128i *output,
-                               int8_t cos_bit) {
+static void iadst4_w4_sse2(const __m128i *input, __m128i *output,
+                           int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *sinpi = sinpi_arr(INV_COS_BIT);
   const __m128i sinpi_p01_p04 = pair_set_epi16(sinpi[1], sinpi[4]);
@@ -1724,8 +1719,8 @@
   }
 }
 
-static void iadst8_low1_new_ssse3(const __m128i *input, __m128i *output,
-                                  int8_t cos_bit) {
+static void iadst8_low1_ssse3(const __m128i *input, __m128i *output,
+                              int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __zero = _mm_setzero_si128();
@@ -1771,8 +1766,7 @@
   output[7] = _mm_subs_epi16(__zero, x[1]);
 }
 
-static void iadst8_new_sse2(const __m128i *input, __m128i *output,
-                            int8_t cos_bit) {
+static void iadst8_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __zero = _mm_setzero_si128();
@@ -1840,8 +1834,8 @@
   output[7] = _mm_subs_epi16(__zero, x[1]);
 }
 
-static void iadst8_w4_new_sse2(const __m128i *input, __m128i *output,
-                               int8_t cos_bit) {
+static void iadst8_w4_sse2(const __m128i *input, __m128i *output,
+                           int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __zero = _mm_setzero_si128();
@@ -2000,8 +1994,8 @@
   output[15] = _mm_subs_epi16(__zero, x[1]);
 }
 
-static void iadst16_low1_new_ssse3(const __m128i *input, __m128i *output,
-                                   int8_t cos_bit) {
+static void iadst16_low1_ssse3(const __m128i *input, __m128i *output,
+                               int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -2049,8 +2043,8 @@
   iadst16_stage9_ssse3(output, x);
 }
 
-static void iadst16_low8_new_ssse3(const __m128i *input, __m128i *output,
-                                   int8_t cos_bit) {
+static void iadst16_low8_ssse3(const __m128i *input, __m128i *output,
+                               int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -2085,8 +2079,8 @@
   iadst16_stage8_ssse3(x, cospi, __rounding, cos_bit);
   iadst16_stage9_ssse3(output, x);
 }
-static void iadst16_new_sse2(const __m128i *input, __m128i *output,
-                             int8_t cos_bit) {
+static void iadst16_sse2(const __m128i *input, __m128i *output,
+                         int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -2146,8 +2140,8 @@
   iadst16_stage9_ssse3(output, x);
 }
 
-static void iadst16_w4_new_sse2(const __m128i *input, __m128i *output,
-                                int8_t cos_bit) {
+static void iadst16_w4_sse2(const __m128i *input, __m128i *output,
+                            int8_t cos_bit) {
   (void)cos_bit;
   const int32_t *cospi = cospi_arr(INV_COS_BIT);
   const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
@@ -2240,8 +2234,8 @@
   iadst16_stage9_ssse3(output, x);
 }
 
-static void iidentity4_new_ssse3(const __m128i *input, __m128i *output,
-                                 int8_t cos_bit) {
+static void iidentity4_ssse3(const __m128i *input, __m128i *output,
+                             int8_t cos_bit) {
   (void)cos_bit;
   const int16_t scale_fractional = (NewSqrt2 - (1 << NewSqrt2Bits));
   const __m128i scale = _mm_set1_epi16(scale_fractional << (15 - NewSqrt2Bits));
@@ -2251,16 +2245,16 @@
   }
 }
 
-static void iidentity8_new_sse2(const __m128i *input, __m128i *output,
-                                int8_t cos_bit) {
+static void iidentity8_sse2(const __m128i *input, __m128i *output,
+                            int8_t cos_bit) {
   (void)cos_bit;
   for (int i = 0; i < 8; ++i) {
     output[i] = _mm_adds_epi16(input[i], input[i]);
   }
 }
 
-static void iidentity16_new_ssse3(const __m128i *input, __m128i *output,
-                                  int8_t cos_bit) {
+static void iidentity16_ssse3(const __m128i *input, __m128i *output,
+                              int8_t cos_bit) {
   (void)cos_bit;
   const int16_t scale_fractional = 2 * (NewSqrt2 - (1 << NewSqrt2Bits));
   const __m128i scale = _mm_set1_epi16(scale_fractional << (15 - NewSqrt2Bits));
@@ -2307,11 +2301,11 @@
 // 1D functions process process 8 pixels at one time.
 static const transform_1d_ssse3
     lowbd_txfm_all_1d_w8_arr[TX_SIZES][ITX_TYPES_1D] = {
-      { idct4_new_sse2, iadst4_new_sse2, iidentity4_new_ssse3 },
-      { idct8_new_sse2, iadst8_new_sse2, iidentity8_new_sse2 },
-      { idct16_new_sse2, iadst16_new_sse2, iidentity16_new_ssse3 },
-      { idct32_new_sse2, NULL, NULL },
-      { idct64_low32_new_ssse3, NULL, NULL },
+      { idct4_sse2, iadst4_sse2, iidentity4_ssse3 },
+      { idct8_sse2, iadst8_sse2, iidentity8_sse2 },
+      { idct16_sse2, iadst16_sse2, iidentity16_ssse3 },
+      { idct32_sse2, NULL, NULL },
+      { idct64_low32_ssse3, NULL, NULL },
     };
 
 // functions for blocks with eob at DC and within
@@ -2319,26 +2313,24 @@
 static const transform_1d_ssse3
     lowbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = {
       {
-          { idct4_new_sse2, idct4_new_sse2, NULL, NULL },
-          { iadst4_new_sse2, iadst4_new_sse2, NULL, NULL },
-          { iidentity4_new_ssse3, iidentity4_new_ssse3, NULL, NULL },
+          { idct4_sse2, idct4_sse2, NULL, NULL },
+          { iadst4_sse2, iadst4_sse2, NULL, NULL },
+          { iidentity4_ssse3, iidentity4_ssse3, NULL, NULL },
       },
-      { { idct8_low1_new_ssse3, idct8_new_sse2, NULL, NULL },
-        { iadst8_low1_new_ssse3, iadst8_new_sse2, NULL, NULL },
-        { iidentity8_new_sse2, iidentity8_new_sse2, NULL, NULL } },
+      { { idct8_low1_ssse3, idct8_sse2, NULL, NULL },
+        { iadst8_low1_ssse3, iadst8_sse2, NULL, NULL },
+        { iidentity8_sse2, iidentity8_sse2, NULL, NULL } },
       {
-          { idct16_low1_new_ssse3, idct16_low8_new_ssse3, idct16_new_sse2,
-            NULL },
-          { iadst16_low1_new_ssse3, iadst16_low8_new_ssse3, iadst16_new_sse2,
-            NULL },
+          { idct16_low1_ssse3, idct16_low8_ssse3, idct16_sse2, NULL },
+          { iadst16_low1_ssse3, iadst16_low8_ssse3, iadst16_sse2, NULL },
           { NULL, NULL, NULL, NULL },
       },
-      { { idct32_low1_new_ssse3, idct32_low8_new_ssse3, idct32_low16_new_ssse3,
-          idct32_new_sse2 },
+      { { idct32_low1_ssse3, idct32_low8_ssse3, idct32_low16_ssse3,
+          idct32_sse2 },
         { NULL, NULL, NULL, NULL },
         { NULL, NULL, NULL, NULL } },
-      { { idct64_low1_new_ssse3, idct64_low8_new_ssse3, idct64_low16_new_ssse3,
-          idct64_low32_new_ssse3 },
+      { { idct64_low1_ssse3, idct64_low8_ssse3, idct64_low16_ssse3,
+          idct64_low32_ssse3 },
         { NULL, NULL, NULL, NULL },
         { NULL, NULL, NULL, NULL } }
     };
@@ -2347,9 +2339,9 @@
 // used in 4x4, 4x8, 4x16, 8x4, 16x4
 static const transform_1d_ssse3
     lowbd_txfm_all_1d_w4_arr[TX_SIZES][ITX_TYPES_1D] = {
-      { idct4_w4_new_sse2, iadst4_w4_new_sse2, iidentity4_new_ssse3 },
-      { idct8_w4_new_sse2, iadst8_w4_new_sse2, iidentity8_new_sse2 },
-      { idct16_w4_new_sse2, iadst16_w4_new_sse2, iidentity16_new_ssse3 },
+      { idct4_w4_sse2, iadst4_w4_sse2, iidentity4_ssse3 },
+      { idct8_w4_sse2, iadst8_w4_sse2, iidentity8_sse2 },
+      { idct16_w4_sse2, iadst16_w4_sse2, iidentity16_ssse3 },
       { NULL, NULL, NULL },
       { NULL, NULL, NULL },
     };
@@ -2827,7 +2819,7 @@
     load_buffer_32bit_to_16bit_w4(input_cur, txfm_size_col, buf_cur,
                                   row_one_loop);
     transpose_16bit_4x8(buf_cur, buf_cur);
-    if (row_txfm == iidentity4_new_ssse3) {
+    if (row_txfm == iidentity4_ssse3) {
       const __m128i scale = pair_set_epi16(NewSqrt2, 3 << (NewSqrt2Bits - 1));
       const __m128i ones = _mm_set1_epi16(1);
       for (int j = 0; j < 4; ++j) {
@@ -2888,7 +2880,7 @@
                                txfm_size_row);
     transpose_16bit_8x4(buf_cur, buf_cur);
   }
-  if (row_txfm == iidentity16_new_ssse3) {
+  if (row_txfm == iidentity16_ssse3) {
     const __m128i scale = pair_set_epi16(2 * NewSqrt2, 3 << (NewSqrt2Bits - 1));
     const __m128i ones = _mm_set1_epi16(1);
     for (int j = 0; j < 16; ++j) {

diff --git a/av1/encoder/av1_fwd_txfm1d.c b/av1/encoder/av1_fwd_txfm1d.c
index 98505e0..6601c19 100644
--- a/av1/encoder/av1_fwd_txfm1d.c
+++ b/av1/encoder/av1_fwd_txfm1d.c

@@ -13,8 +13,8 @@
 #include "av1/encoder/av1_fwd_txfm1d.h"
 #include "av1/common/av1_txfm.h"
 
-void av1_fdct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                   const int8_t *stage_range) {
+void av1_fdct4(const int32_t *input, int32_t *output, int8_t cos_bit,
+               const int8_t *stage_range) {
   const int32_t size = 4;
   const int32_t *cospi;
 
@@ -56,8 +56,8 @@
   av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
 }
 
-void av1_fdct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                   const int8_t *stage_range) {
+void av1_fdct8(const int32_t *input, int32_t *output, int8_t cos_bit,
+               const int8_t *stage_range) {
   const int32_t size = 8;
   const int32_t *cospi;
 
@@ -141,8 +141,8 @@
   av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
 }
 
-void av1_fdct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
+void av1_fdct16(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range) {
   const int32_t size = 16;
   const int32_t *cospi;
 
@@ -312,8 +312,8 @@
   av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
 }
 
-void av1_fdct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
+void av1_fdct32(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range) {
   const int32_t size = 32;
   const int32_t *cospi;
 
@@ -673,8 +673,8 @@
   av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
 }
 
-void av1_fadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
+void av1_fadst4(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range) {
   int bit = cos_bit;
   const int32_t *sinpi = sinpi_arr(bit);
   int32_t x0, x1, x2, x3;
@@ -732,8 +732,8 @@
   av1_range_check_buf(6, input, output, 4, stage_range[6]);
 }
 
-void av1_fadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
+void av1_fadst8(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range) {
   const int32_t size = 8;
   const int32_t *cospi;
 
@@ -846,8 +846,8 @@
   av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
 }
 
-void av1_fadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                     const int8_t *stage_range) {
+void av1_fadst16(const int32_t *input, int32_t *output, int8_t cos_bit,
+                 const int8_t *stage_range) {
   const int32_t size = 16;
   const int32_t *cospi;
 
@@ -1093,8 +1093,8 @@
   av1_range_check_buf(0, input, output, 32, stage_range[0]);
 }
 
-void av1_fdct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
+void av1_fdct64(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range) {
   const int32_t size = 64;
   const int32_t *cospi;
 

diff --git a/av1/encoder/av1_fwd_txfm1d.h b/av1/encoder/av1_fwd_txfm1d.h
index 9dcf165..9ef54fe 100644
--- a/av1/encoder/av1_fwd_txfm1d.h
+++ b/av1/encoder/av1_fwd_txfm1d.h

@@ -18,22 +18,22 @@
 extern "C" {
 #endif
 
-void av1_fdct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                   const int8_t *stage_range);
-void av1_fdct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                   const int8_t *stage_range);
-void av1_fdct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_fdct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_fdct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_fadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_fadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_fadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                     const int8_t *stage_range);
+void av1_fdct4(const int32_t *input, int32_t *output, int8_t cos_bit,
+               const int8_t *stage_range);
+void av1_fdct8(const int32_t *input, int32_t *output, int8_t cos_bit,
+               const int8_t *stage_range);
+void av1_fdct16(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range);
+void av1_fdct32(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range);
+void av1_fdct64(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range);
+void av1_fadst4(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range);
+void av1_fadst8(const int32_t *input, int32_t *output, int8_t cos_bit,
+                const int8_t *stage_range);
+void av1_fadst16(const int32_t *input, int32_t *output, int8_t cos_bit,
+                 const int8_t *stage_range);
 void av1_fidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
                       const int8_t *stage_range);
 void av1_fidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,

diff --git a/av1/encoder/av1_fwd_txfm2d.c b/av1/encoder/av1_fwd_txfm2d.c
index deae95f..bcb829d 100644
--- a/av1/encoder/av1_fwd_txfm2d.c
+++ b/av1/encoder/av1_fwd_txfm2d.c

@@ -22,14 +22,14 @@
 
 static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
   switch (txfm_type) {
-    case TXFM_TYPE_DCT4: return av1_fdct4_new;
-    case TXFM_TYPE_DCT8: return av1_fdct8_new;
-    case TXFM_TYPE_DCT16: return av1_fdct16_new;
-    case TXFM_TYPE_DCT32: return av1_fdct32_new;
-    case TXFM_TYPE_DCT64: return av1_fdct64_new;
-    case TXFM_TYPE_ADST4: return av1_fadst4_new;
-    case TXFM_TYPE_ADST8: return av1_fadst8_new;
-    case TXFM_TYPE_ADST16: return av1_fadst16_new;
+    case TXFM_TYPE_DCT4: return av1_fdct4;
+    case TXFM_TYPE_DCT8: return av1_fdct8;
+    case TXFM_TYPE_DCT16: return av1_fdct16;
+    case TXFM_TYPE_DCT32: return av1_fdct32;
+    case TXFM_TYPE_DCT64: return av1_fdct64;
+    case TXFM_TYPE_ADST4: return av1_fadst4;
+    case TXFM_TYPE_ADST8: return av1_fadst8;
+    case TXFM_TYPE_ADST16: return av1_fadst16;
     case TXFM_TYPE_IDENTITY4: return av1_fidentity4_c;
     case TXFM_TYPE_IDENTITY8: return av1_fidentity8_c;
     case TXFM_TYPE_IDENTITY16: return av1_fidentity16_c;

diff --git a/av1/encoder/x86/av1_fwd_txfm1d_sse4.c b/av1/encoder/x86/av1_fwd_txfm1d_sse4.c
index 865ac31..62eaa30 100644
--- a/av1/encoder/x86/av1_fwd_txfm1d_sse4.c
+++ b/av1/encoder/x86/av1_fwd_txfm1d_sse4.c

@@ -11,8 +11,8 @@
 
 #include "av1/encoder/x86/av1_txfm1d_sse4.h"
 
-void av1_fdct32_new_sse4_1(__m128i *input, __m128i *output, int cos_bit,
-                           const int stride) {
+void av1_fdct32_sse4_1(__m128i *input, __m128i *output, int cos_bit,
+                       const int stride) {
   __m128i buf0[32];
   __m128i buf1[32];
   const int32_t *cospi;
@@ -396,8 +396,8 @@
   output[endidx] = buf0[1];
 }
 
-void av1_fadst4_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range) {
+void av1_fadst4_sse4_1(const __m128i *input, __m128i *output,
+                       const int8_t cos_bit, const int8_t *stage_range) {
   const int txfm_size = 4;
   const int num_per_128 = 4;
   const int32_t *cospi;
@@ -459,8 +459,8 @@
   }
 }
 
-void av1_fdct64_new_sse4_1(__m128i *input, __m128i *output, int8_t cos_bit,
-                           const int instride, const int outstride) {
+void av1_fdct64_sse4_1(__m128i *input, __m128i *output, int8_t cos_bit,
+                       const int instride, const int outstride) {
   const int32_t *cospi = cospi_arr(cos_bit);
   const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
 
@@ -1408,8 +1408,8 @@
   output[endidx] = x10[1];
 }
 
-void av1_idtx32_new_sse4_1(__m128i *input, __m128i *output, int cos_bit,
-                           const int col_num) {
+void av1_idtx32_sse4_1(__m128i *input, __m128i *output, int cos_bit,
+                       const int col_num) {
   (void)cos_bit;
   for (int i = 0; i < 32; i++) {
     output[i * col_num] = _mm_slli_epi32(input[i * col_num], 2);

diff --git a/av1/encoder/x86/av1_fwd_txfm2d_avx2.c b/av1/encoder/x86/av1_fwd_txfm2d_avx2.c
index f97cf71..634d50b 100644
--- a/av1/encoder/x86/av1_fwd_txfm2d_avx2.c
+++ b/av1/encoder/x86/av1_fwd_txfm2d_avx2.c

@@ -113,8 +113,8 @@
   output[15] = x1[15];
 }
 
-static INLINE void fdct16x32_new_avx2(const __m256i *input, __m256i *output,
-                                      int8_t cos_bit) {
+static INLINE void fdct16x32_avx2(const __m256i *input, __m256i *output,
+                                  int8_t cos_bit) {
   const int32_t *cospi = cospi_arr(cos_bit);
   const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
 
@@ -711,8 +711,8 @@
   output[63] = x1[63];
 }
 
-static INLINE void fdct32_new_avx2(const __m256i *input, __m256i *output,
-                                   int8_t cos_bit) {
+static INLINE void fdct32_avx2(const __m256i *input, __m256i *output,
+                               int8_t cos_bit) {
   __m256i x1[32];
   const int32_t *cospi = cospi_arr(cos_bit);
   const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
@@ -1422,8 +1422,8 @@
   }
 }
 
-static INLINE void fidentity16x32_new_avx2(const __m256i *input,
-                                           __m256i *output, int8_t cos_bit) {
+static INLINE void fidentity16x32_avx2(const __m256i *input, __m256i *output,
+                                       int8_t cos_bit) {
   (void)cos_bit;
   for (int i = 0; i < 32; ++i) {
     output[i] = _mm256_slli_epi16(input[i], 2);
@@ -1499,41 +1499,41 @@
                                   int8_t cos_bit);
 
 static const transform_1d_avx2 col_txfm16x32_arr[TX_TYPES] = {
-  fdct16x32_new_avx2,       // DCT_DCT
-  NULL,                     // ADST_DCT
-  NULL,                     // DCT_ADST
-  NULL,                     // ADST_ADST
-  NULL,                     // FLIPADST_DCT
-  NULL,                     // DCT_FLIPADST
-  NULL,                     // FLIPADST_FLIPADST
-  NULL,                     // ADST_FLIPADST
-  NULL,                     // FLIPADST_ADST
-  fidentity16x32_new_avx2,  // IDTX
-  fdct16x32_new_avx2,       // V_DCT
-  fidentity16x32_new_avx2,  // H_DCT
-  NULL,                     // V_ADST
-  NULL,                     // H_ADST
-  NULL,                     // V_FLIPADST
-  NULL                      // H_FLIPADST
+  fdct16x32_avx2,       // DCT_DCT
+  NULL,                 // ADST_DCT
+  NULL,                 // DCT_ADST
+  NULL,                 // ADST_ADST
+  NULL,                 // FLIPADST_DCT
+  NULL,                 // DCT_FLIPADST
+  NULL,                 // FLIPADST_FLIPADST
+  NULL,                 // ADST_FLIPADST
+  NULL,                 // FLIPADST_ADST
+  fidentity16x32_avx2,  // IDTX
+  fdct16x32_avx2,       // V_DCT
+  fidentity16x32_avx2,  // H_DCT
+  NULL,                 // V_ADST
+  NULL,                 // H_ADST
+  NULL,                 // V_FLIPADST
+  NULL                  // H_FLIPADST
 };
 
 static const transform_1d_avx2 row_txfm16x32_arr[TX_TYPES] = {
-  fdct16x32_new_avx2,       // DCT_DCT
-  NULL,                     // ADST_DCT
-  NULL,                     // DCT_ADST
-  NULL,                     // ADST_ADST
-  NULL,                     // FLIPADST_DCT
-  NULL,                     // DCT_FLIPADST
-  NULL,                     // FLIPADST_FLIPADST
-  NULL,                     // ADST_FLIPADST
-  NULL,                     // FLIPADST_ADST
-  fidentity16x32_new_avx2,  // IDTX
-  fidentity16x32_new_avx2,  // V_DCT
-  fdct16x32_new_avx2,       // H_DCT
-  NULL,                     // V_ADST
-  NULL,                     // H_ADST
-  NULL,                     // V_FLIPADST
-  NULL                      // H_FLIPADST
+  fdct16x32_avx2,       // DCT_DCT
+  NULL,                 // ADST_DCT
+  NULL,                 // DCT_ADST
+  NULL,                 // ADST_ADST
+  NULL,                 // FLIPADST_DCT
+  NULL,                 // DCT_FLIPADST
+  NULL,                 // FLIPADST_FLIPADST
+  NULL,                 // ADST_FLIPADST
+  NULL,                 // FLIPADST_ADST
+  fidentity16x32_avx2,  // IDTX
+  fidentity16x32_avx2,  // V_DCT
+  fdct16x32_avx2,       // H_DCT
+  NULL,                 // V_ADST
+  NULL,                 // H_ADST
+  NULL,                 // V_FLIPADST
+  NULL                  // H_FLIPADST
 };
 
 static const transform_1d_avx2 col_txfm16x16_arr[TX_TYPES] = {
@@ -1891,8 +1891,8 @@
       bufA[j] = _mm256_cvtepi16_epi32(buf[j * 2]);
       bufB[j] = _mm256_cvtepi16_epi32(buf[j * 2 + 1]);
     }
-    fdct32_new_avx2(bufA, bufA, cos_bit_row);
-    fdct32_new_avx2(bufB, bufB, cos_bit_row);
+    fdct32_avx2(bufA, bufA, cos_bit_row);
+    fdct32_avx2(bufB, bufB, cos_bit_row);
     av1_round_shift_rect_array_32_avx2(bufA, bufA, 32, -shift[2], NewSqrt2);
     av1_round_shift_rect_array_32_avx2(bufB, bufB, 32, -shift[2], NewSqrt2);
 

diff --git a/av1/encoder/x86/av1_fwd_txfm2d_sse4.c b/av1/encoder/x86/av1_fwd_txfm2d_sse4.c
index 84b7229..0bc3fbc 100644
--- a/av1/encoder/x86/av1_fwd_txfm2d_sse4.c
+++ b/av1/encoder/x86/av1_fwd_txfm2d_sse4.c

@@ -32,15 +32,15 @@
 typedef void (*TxfmFuncSSE2)(__m128i *input, __m128i *output,
                              const int8_t cos_bit, const int8_t *stage_range);
 
-static void fdct32_new_sse4_1(__m128i *input, __m128i *output,
-                              const int8_t cos_bit, const int8_t *stage_range) {
+static void fdct32_sse4_1(__m128i *input, __m128i *output, const int8_t cos_bit,
+                          const int8_t *stage_range) {
   const int txfm_size = 32;
   const int num_per_128 = 4;
   int col_num = txfm_size / num_per_128;
   int col;
   (void)stage_range;
   for (col = 0; col < col_num; col++) {
-    av1_fdct32_new_sse4_1((input + col), (output + col), cos_bit, col_num);
+    av1_fdct32_sse4_1((input + col), (output + col), cos_bit, col_num);
   }
 }
 
@@ -51,8 +51,7 @@
   int col_num = txfm_size / num_per_128;
   (void)stage_range;
   for (int col = 0; col < col_num; col++) {
-    av1_fdct64_new_sse4_1((input + col), (output + col), cos_bit, col_num,
-                          col_num);
+    av1_fdct64_sse4_1((input + col), (output + col), cos_bit, col_num, col_num);
   }
 }
 static void idtx32x32_sse4_1(__m128i *input, __m128i *output,
@@ -60,13 +59,13 @@
   (void)stage_range;
 
   for (int i = 0; i < 8; i++) {
-    av1_idtx32_new_sse4_1(&input[i * 32], &output[i * 32], cos_bit, 1);
+    av1_idtx32_sse4_1(&input[i * 32], &output[i * 32], cos_bit, 1);
   }
 }
 
 static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
   switch (txfm_type) {
-    case TXFM_TYPE_DCT32: return fdct32_new_sse4_1; break;
+    case TXFM_TYPE_DCT32: return fdct32_sse4_1; break;
     case TXFM_TYPE_DCT64: return fdct64_new_sse4_1; break;
     case TXFM_TYPE_IDENTITY32: return idtx32x32_sse4_1; break;
     default: assert(0);
@@ -136,8 +135,8 @@
 
   /*row wise transform*/
   for (int col = 0; col < (col_num >> 1); col++) {
-    av1_fdct64_new_sse4_1((buf_128 + col), (out_128 + col), cos_bit_row,
-                          col_num, (col_num >> 1));
+    av1_fdct64_sse4_1((buf_128 + col), (out_128 + col), cos_bit_row, col_num,
+                      (col_num >> 1));
   }
 
   txfm2d_size_128 = (col_num >> 1) * (txfm_size >> 1);
@@ -221,8 +220,8 @@
       bufA[j] = _mm_cvtepi16_epi32(buf[j]);
       bufB[j] = _mm_cvtepi16_epi32(_mm_unpackhi_epi64(buf[j], buf[j]));
     }
-    av1_fdct64_new_sse4_1(bufA, bufA, cos_bit_row, 1, 1);
-    av1_fdct64_new_sse4_1(bufB, bufB, cos_bit_row, 1, 1);
+    av1_fdct64_sse4_1(bufA, bufA, cos_bit_row, 1, 1);
+    av1_fdct64_sse4_1(bufB, bufB, cos_bit_row, 1, 1);
     av1_round_shift_array_32_sse4_1(bufA, bufA, 32, -shift[2]);
     av1_round_shift_array_32_sse4_1(bufB, bufB, 32, -shift[2]);
 
@@ -268,8 +267,8 @@
       bufA[j] = _mm_cvtepi16_epi32(buf[j]);
       bufB[j] = _mm_cvtepi16_epi32(_mm_unpackhi_epi64(buf[j], buf[j]));
     }
-    av1_fdct64_new_sse4_1(bufA, bufA, cos_bit_row, 1, 1);
-    av1_fdct64_new_sse4_1(bufB, bufB, cos_bit_row, 1, 1);
+    av1_fdct64_sse4_1(bufA, bufA, cos_bit_row, 1, 1);
+    av1_fdct64_sse4_1(bufB, bufB, cos_bit_row, 1, 1);
     av1_round_shift_rect_array_32_sse4_1(bufA, bufA, 32, -shift[2], NewSqrt2);
     av1_round_shift_rect_array_32_sse4_1(bufB, bufB, 32, -shift[2], NewSqrt2);
 
@@ -317,8 +316,8 @@
       bufA[j] = _mm_cvtepi16_epi32(buf[j]);
       bufB[j] = _mm_cvtepi16_epi32(_mm_unpackhi_epi64(buf[j], buf[j]));
     }
-    av1_fdct32_new_sse4_1(bufA, bufA, cos_bit_row, 1);
-    av1_fdct32_new_sse4_1(bufB, bufB, cos_bit_row, 1);
+    av1_fdct32_sse4_1(bufA, bufA, cos_bit_row, 1);
+    av1_fdct32_sse4_1(bufB, bufB, cos_bit_row, 1);
     av1_round_shift_rect_array_32_sse4_1(bufA, bufA, 32, -shift[2], NewSqrt2);
     av1_round_shift_rect_array_32_sse4_1(bufB, bufB, 32, -shift[2], NewSqrt2);
 

diff --git a/av1/encoder/x86/av1_txfm1d_sse4.h b/av1/encoder/x86/av1_txfm1d_sse4.h
index b3d5b22..7a0f328 100644
--- a/av1/encoder/x86/av1_txfm1d_sse4.h
+++ b/av1/encoder/x86/av1_txfm1d_sse4.h

@@ -20,43 +20,43 @@
 extern "C" {
 #endif
 
-void av1_fdct4_new_sse4_1(const __m128i *input, __m128i *output,
-                          const int8_t cos_bit, const int8_t *stage_range);
-void av1_fdct8_new_sse4_1(const __m128i *input, __m128i *output,
-                          const int8_t cos_bit, const int8_t *stage_range);
-void av1_fdct16_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_fdct32_new_sse4_1(__m128i *input, __m128i *output, int cos_bit,
-                           const int stride);
-void av1_fdct64_new_sse4_1(__m128i *input, __m128i *output, int8_t cos_bit,
-                           const int instride, const int outstride);
-void av1_fadst4_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_fadst8_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_fadst16_new_sse4_1(const __m128i *input, __m128i *output,
-                            const int8_t cos_bit, const int8_t *stage_range);
+void av1_fdct4_sse4_1(const __m128i *input, __m128i *output,
+                      const int8_t cos_bit, const int8_t *stage_range);
+void av1_fdct8_sse4_1(const __m128i *input, __m128i *output,
+                      const int8_t cos_bit, const int8_t *stage_range);
+void av1_fdct16_sse4_1(const __m128i *input, __m128i *output,
+                       const int8_t cos_bit, const int8_t *stage_range);
+void av1_fdct32_sse4_1(__m128i *input, __m128i *output, int cos_bit,
+                       const int stride);
+void av1_fdct64_sse4_1(__m128i *input, __m128i *output, int8_t cos_bit,
+                       const int instride, const int outstride);
+void av1_fadst4_sse4_1(const __m128i *input, __m128i *output,
+                       const int8_t cos_bit, const int8_t *stage_range);
+void av1_fadst8_sse4_1(const __m128i *input, __m128i *output,
+                       const int8_t cos_bit, const int8_t *stage_range);
+void av1_fadst16_sse4_1(const __m128i *input, __m128i *output,
+                        const int8_t cos_bit, const int8_t *stage_range);
 
-void av1_idct4_new_sse4_1(const __m128i *input, __m128i *output,
-                          const int8_t cos_bit, const int8_t *stage_range);
-void av1_idct8_new_sse4_1(const __m128i *input, __m128i *output,
-                          const int8_t cos_bit, const int8_t *stage_range);
-void av1_idct16_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_idct32_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_idct64_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
+void av1_idct4_sse4_1(const __m128i *input, __m128i *output,
+                      const int8_t cos_bit, const int8_t *stage_range);
+void av1_idct8_sse4_1(const __m128i *input, __m128i *output,
+                      const int8_t cos_bit, const int8_t *stage_range);
+void av1_idct16_sse4_1(const __m128i *input, __m128i *output,
+                       const int8_t cos_bit, const int8_t *stage_range);
+void av1_idct32_sse4_1(const __m128i *input, __m128i *output,
+                       const int8_t cos_bit, const int8_t *stage_range);
+void av1_idct64_sse4_1(const __m128i *input, __m128i *output,
+                       const int8_t cos_bit, const int8_t *stage_range);
 
-void av1_iadst4_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_iadst8_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_iadst16_new_sse4_1(const __m128i *input, __m128i *output,
-                            const int8_t cos_bit, const int8_t *stage_range);
+void av1_iadst4_sse4_1(const __m128i *input, __m128i *output,
+                       const int8_t cos_bit, const int8_t *stage_range);
+void av1_iadst8_sse4_1(const __m128i *input, __m128i *output,
+                       const int8_t cos_bit, const int8_t *stage_range);
+void av1_iadst16_sse4_1(const __m128i *input, __m128i *output,
+                        const int8_t cos_bit, const int8_t *stage_range);
 
-void av1_idtx32_new_sse4_1(__m128i *input, __m128i *output, int cos_bit,
-                           const int col_num);
+void av1_idtx32_sse4_1(__m128i *input, __m128i *output, int cos_bit,
+                       const int col_num);
 
 static INLINE void transpose_32_4x4(int stride, const __m128i *input,
                                     __m128i *output) {

diff --git a/av1/encoder/x86/highbd_fwd_txfm_sse4.c b/av1/encoder/x86/highbd_fwd_txfm_sse4.c
index d2ed007..73afc5d 100644
--- a/av1/encoder/x86/highbd_fwd_txfm_sse4.c
+++ b/av1/encoder/x86/highbd_fwd_txfm_sse4.c

@@ -2094,22 +2094,22 @@
 };
 
 static const fwd_transform_1d_sse4_1 col_highbd_txfm8x32_arr[TX_TYPES] = {
-  av1_fdct32_new_sse4_1,  // DCT_DCT
-  NULL,                   // ADST_DCT
-  NULL,                   // DCT_ADST
-  NULL,                   // ADST_ADST
-  NULL,                   // FLIPADST_DCT
-  NULL,                   // DCT_FLIPADST
-  NULL,                   // FLIPADST_FLIPADST
-  NULL,                   // ADST_FLIPADST
-  NULL,                   // FLIPADST_ADST
-  av1_idtx32_new_sse4_1,  // IDTX
-  NULL,                   // V_DCT
-  NULL,                   // H_DCT
-  NULL,                   // V_ADST
-  NULL,                   // H_ADST
-  NULL,                   // V_FLIPADST
-  NULL                    // H_FLIPADST
+  av1_fdct32_sse4_1,  // DCT_DCT
+  NULL,               // ADST_DCT
+  NULL,               // DCT_ADST
+  NULL,               // ADST_ADST
+  NULL,               // FLIPADST_DCT
+  NULL,               // DCT_FLIPADST
+  NULL,               // FLIPADST_FLIPADST
+  NULL,               // ADST_FLIPADST
+  NULL,               // FLIPADST_ADST
+  av1_idtx32_sse4_1,  // IDTX
+  NULL,               // V_DCT
+  NULL,               // H_DCT
+  NULL,               // V_ADST
+  NULL,               // H_ADST
+  NULL,               // V_FLIPADST
+  NULL                // H_FLIPADST
 };
 
 static const fwd_transform_1d_sse4_1 row_highbd_txfm8x32_arr[TX_TYPES] = {
@@ -2303,7 +2303,7 @@
   // column transform
   load_buffer_32x8n(input, in, stride, 0, 0, shift[0], txfm_size_row);
   for (int i = 0; i < num_col; i++) {
-    av1_fdct64_new_sse4_1((in + i), (in + i), bitcol, num_col, num_col);
+    av1_fdct64_sse4_1((in + i), (in + i), bitcol, num_col, num_col);
   }
   for (int i = 0; i < num_col; i++) {
     col_txfm_16x16_rounding((in + i * txfm_size_row), -shift[1]);
@@ -2312,7 +2312,7 @@
 
   // row transform
   for (int i = 0; i < num_row; i++) {
-    av1_fdct32_new_sse4_1((outcoef128 + i), (in + i), bitrow, num_row);
+    av1_fdct32_sse4_1((outcoef128 + i), (in + i), bitrow, num_row);
   }
   transpose_8nx8n(in, outcoef128, txfm_size_row, txfm_size_col);
   av1_round_shift_rect_array_32_sse4_1(outcoef128, outcoef128, 512, -shift[2],
@@ -2347,7 +2347,7 @@
   }
 
   for (int i = 0; i < num_col; i++) {
-    av1_fdct32_new_sse4_1((in + i), (in + i), bitcol, num_col);
+    av1_fdct32_sse4_1((in + i), (in + i), bitcol, num_col);
   }
 
   for (int i = 0; i < num_row; i++) {
@@ -2357,7 +2357,7 @@
 
   // row transform
   for (int i = 0; i < num_row; i++) {
-    av1_fdct64_new_sse4_1((outcoef128 + i), (in + i), bitrow, num_row, num_row);
+    av1_fdct64_sse4_1((outcoef128 + i), (in + i), bitrow, num_row, num_row);
   }
   transpose_8nx8n(in, outcoef128, txfm_size_row, txfm_size_col >> 1);
   av1_round_shift_rect_array_32_sse4_1(outcoef128, outcoef128, 512 >> 1,
@@ -2549,7 +2549,7 @@
   }
 
   for (int i = 0; i < num_col; i++) {
-    av1_fdct64_new_sse4_1(in + i, outcoeff128 + i, bitcol, num_col, num_col);
+    av1_fdct64_sse4_1(in + i, outcoeff128 + i, bitcol, num_col, num_col);
   }
 
   col_txfm_16x16_rounding(outcoeff128, -shift[1]);
@@ -2597,7 +2597,7 @@
 
   transpose_8nx8n(outcoeff128, in, txfm_size_col, txfm_size_row);
   for (int i = 0; i < 4; i++) {
-    av1_fdct64_new_sse4_1(in + i, in + i, bitrow, 4, 4);
+    av1_fdct64_sse4_1(in + i, in + i, bitrow, 4, 4);
   }
   transpose_8nx8n(in, outcoeff128, txfm_size_row, 32);
   (void)bd;

diff --git a/test/av1_fwd_txfm1d_test.cc b/test/av1_fwd_txfm1d_test.cc
index 863cb39..abc46ed 100644
--- a/test/av1_fwd_txfm1d_test.cc
+++ b/test/av1_fwd_txfm1d_test.cc

@@ -30,11 +30,11 @@
 const int txfm_size_ls[] = { 4, 8, 16, 32, 64 };
 
 const TxfmFunc fwd_txfm_func_ls[][txfm_type_num] = {
-  { av1_fdct4_new, av1_fadst4_new, av1_fidentity4_c },
-  { av1_fdct8_new, av1_fadst8_new, av1_fidentity8_c },
-  { av1_fdct16_new, av1_fadst16_new, av1_fidentity16_c },
-  { av1_fdct32_new, NULL, av1_fidentity32_c },
-  { av1_fdct64_new, NULL, NULL },
+  { av1_fdct4, av1_fadst4, av1_fidentity4_c },
+  { av1_fdct8, av1_fadst8, av1_fidentity8_c },
+  { av1_fdct16, av1_fadst16, av1_fidentity16_c },
+  { av1_fdct32, NULL, av1_fidentity32_c },
+  { av1_fdct64, NULL, NULL },
 };
 
 // the maximum stage number of fwd/inv 1d dct/adst txfm is 12

diff --git a/test/av1_inv_txfm1d_test.cc b/test/av1_inv_txfm1d_test.cc
index 0fc2b03..01d4a4d 100644
--- a/test/av1_inv_txfm1d_test.cc
+++ b/test/av1_inv_txfm1d_test.cc

@@ -26,19 +26,15 @@
 const int txfm_size_ls[] = { 4, 8, 16, 32, 64 };
 
 const TxfmFunc fwd_txfm_func_ls[][txfm_type_num] = {
-  { av1_fdct4_new, av1_fadst4_new },
-  { av1_fdct8_new, av1_fadst8_new },
-  { av1_fdct16_new, av1_fadst16_new },
-  { av1_fdct32_new, NULL },
-  { av1_fdct64_new, NULL },
+  { av1_fdct4, av1_fadst4 },   { av1_fdct8, av1_fadst8 },
+  { av1_fdct16, av1_fadst16 }, { av1_fdct32, NULL },
+  { av1_fdct64, NULL },
 };
 
 const TxfmFunc inv_txfm_func_ls[][txfm_type_num] = {
-  { av1_idct4_new, av1_iadst4_new },
-  { av1_idct8_new, av1_iadst8_new },
-  { av1_idct16_new, av1_iadst16_new },
-  { av1_idct32_new, NULL },
-  { av1_idct64_new, NULL },
+  { av1_idct4, av1_iadst4 },   { av1_idct8, av1_iadst8 },
+  { av1_idct16, av1_iadst16 }, { av1_idct32, NULL },
+  { av1_idct64, NULL },
 };
 
 // the maximum stage number of fwd/inv 1d dct/adst txfm is 12

diff --git a/test/av1_txfm_test.cc b/test/av1_txfm_test.cc
index abbc475..aedd45d 100644
--- a/test/av1_txfm_test.cc
+++ b/test/av1_txfm_test.cc

@@ -119,7 +119,7 @@
   }
 }
 
-// TODO(any): Copied from the old 'fadst4' (same as the new 'av1_fadst4_new'
+// TODO(any): Copied from the old 'fadst4' (same as the new 'av1_fadst4'
 // function). Should be replaced by a proper reference function that takes
 // 'double' input & output.
 static void fadst4_new(const tran_low_t *input, tran_low_t *output) {