cosmetics: sm_weight_arrays -> smooth_weights + normalize SM_WEIGHT_LOG2_SCALE -> SMOOTH_WEIGHT_LOG2_SCALE Bug: b/217461825 Change-Id: I094b891c654d99f9f57292ed2e161e48bee2314c

commit: 6be7f499612ba296976c516255e40b9e5e8e779c [log] [tgz]
author: James Zern <jzern@google.com> Wed Apr 27 12:18:20 2022 -0700
committer: James Zern <jzern@google.com> Thu Apr 28 20:41:44 2022 +0000
tree: 40161e2abc29b2723dc9460d6f036c29c5e0f203
parent: f02743e8df4a548841f0757c0e4b4001481c4b72 [diff]
diff --git a/aom_dsp/arm/highbd_intrapred_neon.c b/aom_dsp/arm/highbd_intrapred_neon.c
index b898e2a..7686e6b 100644
--- a/aom_dsp/arm/highbd_intrapred_neon.c
+++ b/aom_dsp/arm/highbd_intrapred_neon.c

@@ -396,11 +396,11 @@
                                           const int height) {
   const uint16_t top_right = top_row[3];
   const uint16_t bottom_left = left_column[height - 1];
-  const uint16_t *const weights_y = sm_weight_arrays_u16 + height - 4;
+  const uint16_t *const weights_y = smooth_weights_u16 + height - 4;
 
   const uint16x4_t top_v = vld1_u16(top_row);
   const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
-  const uint16x4_t weights_x_v = vld1_u16(sm_weight_arrays_u16);
+  const uint16x4_t weights_x_v = vld1_u16(smooth_weights_u16);
   const uint16x4_t scaled_weights_x = negate_s8(weights_x_v);
   const uint32x4_t weighted_tr = vmull_n_u16(scaled_weights_x, top_right);
 
@@ -414,7 +414,8 @@
     const uint32x4_t weighted_bl =
         vmlal_n_u16(weighted_left, bottom_left_v, 256 - weights_y[y]);
 
-    const uint16x4_t pred = vrshrn_n_u32(weighted_bl, SM_WEIGHT_LOG2_SCALE + 1);
+    const uint16x4_t pred =
+        vrshrn_n_u32(weighted_bl, SMOOTH_WEIGHT_LOG2_SCALE + 1);
     vst1_u16(dst, pred);
     dst += stride;
   }
@@ -434,7 +435,7 @@
       vmlal_n_u16(weighted_top_low, weights_x.val[0], left_y);
 
   const uint16x4_t pred_low =
-      vrshrn_n_u32(weighted_edges_low, SM_WEIGHT_LOG2_SCALE + 1);
+      vrshrn_n_u32(weighted_edges_low, SMOOTH_WEIGHT_LOG2_SCALE + 1);
   vst1_u16(dst, pred_low);
 
   const uint32x4_t weighted_top_high =
@@ -443,7 +444,7 @@
       vmlal_n_u16(weighted_top_high, weights_x.val[1], left_y);
 
   const uint16x4_t pred_high =
-      vrshrn_n_u32(weighted_edges_high, SM_WEIGHT_LOG2_SCALE + 1);
+      vrshrn_n_u32(weighted_edges_high, SMOOTH_WEIGHT_LOG2_SCALE + 1);
   vst1_u16(dst + 4, pred_high);
 }
 
@@ -453,13 +454,13 @@
                                    const int height) {
   const uint16_t top_right = top_row[7];
   const uint16_t bottom_left = left_column[height - 1];
-  const uint16_t *const weights_y = sm_weight_arrays_u16 + height - 4;
+  const uint16_t *const weights_y = smooth_weights_u16 + height - 4;
 
   const uint16x4x2_t top_vals = { { vld1_u16(top_row),
                                     vld1_u16(top_row + 4) } };
   const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
-  const uint16x4x2_t weights_x = { { vld1_u16(sm_weight_arrays_u16 + 4),
-                                     vld1_u16(sm_weight_arrays_u16 + 8) } };
+  const uint16x4x2_t weights_x = { { vld1_u16(smooth_weights_u16 + 4),
+                                     vld1_u16(smooth_weights_u16 + 8) } };
   const uint32x4_t weighted_tr_low =
       vmull_n_u16(negate_s8(weights_x.val[0]), top_right);
   const uint32x4_t weighted_tr_high =
@@ -506,7 +507,7 @@
       const uint16_t *const left_column, const int height) {                   \
     const uint16_t top_right = top_row[(W)-1];                                 \
     const uint16_t bottom_left = left_column[height - 1];                      \
-    const uint16_t *const weights_y = sm_weight_arrays_u16 + height - 4;       \
+    const uint16_t *const weights_y = smooth_weights_u16 + height - 4;         \
                                                                                \
     /* Precompute weighted values that don't vary with |y|. */                 \
     uint32x4_t weighted_tr_low[(W) >> 3];                                      \
@@ -514,10 +515,10 @@
     for (int i = 0; i<(W)>> 3; ++i) {                                          \
       const int x = i << 3;                                                    \
       const uint16x4_t weights_x_low =                                         \
-          vld1_u16(sm_weight_arrays_u16 + (W)-4 + x);                          \
+          vld1_u16(smooth_weights_u16 + (W)-4 + x);                            \
       weighted_tr_low[i] = vmull_n_u16(negate_s8(weights_x_low), top_right);   \
       const uint16x4_t weights_x_high =                                        \
-          vld1_u16(sm_weight_arrays_u16 + (W) + x);                            \
+          vld1_u16(smooth_weights_u16 + (W) + x);                              \
       weighted_tr_high[i] = vmull_n_u16(negate_s8(weights_x_high), top_right); \
     }                                                                          \
                                                                                \
@@ -536,8 +537,8 @@
             vaddq_u32(weighted_bl, weighted_tr_high[i]);                       \
         /* Accumulate weighted edge values and store. */                       \
         const uint16x4x2_t weights_x = {                                       \
-          { vld1_u16(sm_weight_arrays_u16 + (W)-4 + x),                        \
-            vld1_u16(sm_weight_arrays_u16 + (W) + x) }                         \
+          { vld1_u16(smooth_weights_u16 + (W)-4 + x),                          \
+            vld1_u16(smooth_weights_u16 + (W) + x) }                           \
         };                                                                     \
         highbd_calculate_pred8(dst_x, weighted_corners_low,                    \
                                weighted_corners_high, top_vals, weights_x,     \
@@ -585,7 +586,7 @@
                                      const uint16_t *const left_column,
                                      const int height) {
   const uint16_t bottom_left = left_column[height - 1];
-  const uint16_t *const weights_y = sm_weight_arrays_u16 + height - 4;
+  const uint16_t *const weights_y = smooth_weights_u16 + height - 4;
 
   const uint16x4_t top_v = vld1_u16(top_row);
   const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
@@ -595,7 +596,7 @@
         vmull_n_u16(bottom_left_v, 256 - weights_y[y]);
     const uint32x4_t weighted_top =
         vmlal_n_u16(weighted_bl, top_v, weights_y[y]);
-    vst1_u16(dst, vrshrn_n_u32(weighted_top, SM_WEIGHT_LOG2_SCALE));
+    vst1_u16(dst, vrshrn_n_u32(weighted_top, SMOOTH_WEIGHT_LOG2_SCALE));
 
     dst += stride;
   }
@@ -606,7 +607,7 @@
                                      const uint16_t *const left_column,
                                      const int height) {
   const uint16_t bottom_left = left_column[height - 1];
-  const uint16_t *const weights_y = sm_weight_arrays_u16 + height - 4;
+  const uint16_t *const weights_y = smooth_weights_u16 + height - 4;
 
   const uint16x4_t top_low = vld1_u16(top_row);
   const uint16x4_t top_high = vld1_u16(top_row + 4);
@@ -618,11 +619,12 @@
 
     const uint32x4_t weighted_top_low =
         vmlal_n_u16(weighted_bl, top_low, weights_y[y]);
-    vst1_u16(dst, vrshrn_n_u32(weighted_top_low, SM_WEIGHT_LOG2_SCALE));
+    vst1_u16(dst, vrshrn_n_u32(weighted_top_low, SMOOTH_WEIGHT_LOG2_SCALE));
 
     const uint32x4_t weighted_top_high =
         vmlal_n_u16(weighted_bl, top_high, weights_y[y]);
-    vst1_u16(dst + 4, vrshrn_n_u32(weighted_top_high, SM_WEIGHT_LOG2_SCALE));
+    vst1_u16(dst + 4,
+             vrshrn_n_u32(weighted_top_high, SMOOTH_WEIGHT_LOG2_SCALE));
     dst += stride;
   }
 }
@@ -649,39 +651,40 @@
 #undef HIGHBD_SMOOTH_V_NXM
 
 // For width 16 and above.
-#define HIGHBD_SMOOTH_V_PREDICTOR(W)                                           \
-  static void highbd_smooth_v_##W##xh_neon(                                    \
-      uint16_t *dst, const ptrdiff_t stride, const uint16_t *const top_row,    \
-      const uint16_t *const left_column, const int height) {                   \
-    const uint16_t bottom_left = left_column[height - 1];                      \
-    const uint16_t *const weights_y = sm_weight_arrays_u16 + height - 4;       \
-                                                                               \
-    uint16x4x2_t top_vals[(W) >> 3];                                           \
-    for (int i = 0; i<(W)>> 3; ++i) {                                          \
-      const int x = i << 3;                                                    \
-      top_vals[i].val[0] = vld1_u16(top_row + x);                              \
-      top_vals[i].val[1] = vld1_u16(top_row + x + 4);                          \
-    }                                                                          \
-                                                                               \
-    const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);                  \
-    for (int y = 0; y < height; ++y) {                                         \
-      const uint32x4_t weighted_bl =                                           \
-          vmull_n_u16(bottom_left_v, 256 - weights_y[y]);                      \
-                                                                               \
-      uint16_t *dst_x = dst;                                                   \
-      for (int i = 0; i<(W)>> 3; ++i) {                                        \
-        const uint32x4_t weighted_top_low =                                    \
-            vmlal_n_u16(weighted_bl, top_vals[i].val[0], weights_y[y]);        \
-        vst1_u16(dst_x, vrshrn_n_u32(weighted_top_low, SM_WEIGHT_LOG2_SCALE)); \
-                                                                               \
-        const uint32x4_t weighted_top_high =                                   \
-            vmlal_n_u16(weighted_bl, top_vals[i].val[1], weights_y[y]);        \
-        vst1_u16(dst_x + 4,                                                    \
-                 vrshrn_n_u32(weighted_top_high, SM_WEIGHT_LOG2_SCALE));       \
-        dst_x += 8;                                                            \
-      }                                                                        \
-      dst += stride;                                                           \
-    }                                                                          \
+#define HIGHBD_SMOOTH_V_PREDICTOR(W)                                         \
+  static void highbd_smooth_v_##W##xh_neon(                                  \
+      uint16_t *dst, const ptrdiff_t stride, const uint16_t *const top_row,  \
+      const uint16_t *const left_column, const int height) {                 \
+    const uint16_t bottom_left = left_column[height - 1];                    \
+    const uint16_t *const weights_y = smooth_weights_u16 + height - 4;       \
+                                                                             \
+    uint16x4x2_t top_vals[(W) >> 3];                                         \
+    for (int i = 0; i<(W)>> 3; ++i) {                                        \
+      const int x = i << 3;                                                  \
+      top_vals[i].val[0] = vld1_u16(top_row + x);                            \
+      top_vals[i].val[1] = vld1_u16(top_row + x + 4);                        \
+    }                                                                        \
+                                                                             \
+    const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);                \
+    for (int y = 0; y < height; ++y) {                                       \
+      const uint32x4_t weighted_bl =                                         \
+          vmull_n_u16(bottom_left_v, 256 - weights_y[y]);                    \
+                                                                             \
+      uint16_t *dst_x = dst;                                                 \
+      for (int i = 0; i<(W)>> 3; ++i) {                                      \
+        const uint32x4_t weighted_top_low =                                  \
+            vmlal_n_u16(weighted_bl, top_vals[i].val[0], weights_y[y]);      \
+        vst1_u16(dst_x,                                                      \
+                 vrshrn_n_u32(weighted_top_low, SMOOTH_WEIGHT_LOG2_SCALE));  \
+                                                                             \
+        const uint32x4_t weighted_top_high =                                 \
+            vmlal_n_u16(weighted_bl, top_vals[i].val[1], weights_y[y]);      \
+        vst1_u16(dst_x + 4,                                                  \
+                 vrshrn_n_u32(weighted_top_high, SMOOTH_WEIGHT_LOG2_SCALE)); \
+        dst_x += 8;                                                          \
+      }                                                                      \
+      dst += stride;                                                         \
+    }                                                                        \
   }
 
 HIGHBD_SMOOTH_V_PREDICTOR(16)
@@ -722,14 +725,14 @@
                                             const int height) {
   const uint16_t top_right = top_row[3];
 
-  const uint16x4_t weights_x = vld1_u16(sm_weight_arrays_u16);
+  const uint16x4_t weights_x = vld1_u16(smooth_weights_u16);
   const uint16x4_t scaled_weights_x = negate_s8(weights_x);
 
   const uint32x4_t weighted_tr = vmull_n_u16(scaled_weights_x, top_right);
   for (int y = 0; y < height; ++y) {
     const uint32x4_t weighted_left =
         vmlal_n_u16(weighted_tr, weights_x, left_column[y]);
-    vst1_u16(dst, vrshrn_n_u32(weighted_left, SM_WEIGHT_LOG2_SCALE));
+    vst1_u16(dst, vrshrn_n_u32(weighted_left, SMOOTH_WEIGHT_LOG2_SCALE));
     dst += stride;
   }
 }
@@ -740,8 +743,8 @@
                                             const int height) {
   const uint16_t top_right = top_row[7];
 
-  const uint16x4x2_t weights_x = { { vld1_u16(sm_weight_arrays_u16 + 4),
-                                     vld1_u16(sm_weight_arrays_u16 + 8) } };
+  const uint16x4x2_t weights_x = { { vld1_u16(smooth_weights_u16 + 4),
+                                     vld1_u16(smooth_weights_u16 + 8) } };
 
   const uint32x4_t weighted_tr_low =
       vmull_n_u16(negate_s8(weights_x.val[0]), top_right);
@@ -752,11 +755,12 @@
     const uint16_t left_y = left_column[y];
     const uint32x4_t weighted_left_low =
         vmlal_n_u16(weighted_tr_low, weights_x.val[0], left_y);
-    vst1_u16(dst, vrshrn_n_u32(weighted_left_low, SM_WEIGHT_LOG2_SCALE));
+    vst1_u16(dst, vrshrn_n_u32(weighted_left_low, SMOOTH_WEIGHT_LOG2_SCALE));
 
     const uint32x4_t weighted_left_high =
         vmlal_n_u16(weighted_tr_high, weights_x.val[1], left_y);
-    vst1_u16(dst + 4, vrshrn_n_u32(weighted_left_high, SM_WEIGHT_LOG2_SCALE));
+    vst1_u16(dst + 4,
+             vrshrn_n_u32(weighted_left_high, SMOOTH_WEIGHT_LOG2_SCALE));
     dst += stride;
   }
 }
@@ -783,43 +787,43 @@
 #undef HIGHBD_SMOOTH_H_NXM
 
 // For width 16 and above.
-#define HIGHBD_SMOOTH_H_PREDICTOR(W)                                      \
-  void highbd_smooth_h_##W##xh_neon(                                      \
-      uint16_t *dst, ptrdiff_t stride, const uint16_t *const top_row,     \
-      const uint16_t *const left_column, const int height) {              \
-    const uint16_t top_right = top_row[(W)-1];                            \
-                                                                          \
-    uint16x4_t weights_x_low[(W) >> 3];                                   \
-    uint16x4_t weights_x_high[(W) >> 3];                                  \
-    uint32x4_t weighted_tr_low[(W) >> 3];                                 \
-    uint32x4_t weighted_tr_high[(W) >> 3];                                \
-    for (int i = 0; i<(W)>> 3; ++i) {                                     \
-      const int x = i << 3;                                               \
-      weights_x_low[i] = vld1_u16(sm_weight_arrays_u16 + (W)-4 + x);      \
-      weighted_tr_low[i] =                                                \
-          vmull_n_u16(negate_s8(weights_x_low[i]), top_right);            \
-      weights_x_high[i] = vld1_u16(sm_weight_arrays_u16 + (W) + x);       \
-      weighted_tr_high[i] =                                               \
-          vmull_n_u16(negate_s8(weights_x_high[i]), top_right);           \
-    }                                                                     \
-                                                                          \
-    for (int y = 0; y < height; ++y) {                                    \
-      uint16_t *dst_x = dst;                                              \
-      const uint16_t left_y = left_column[y];                             \
-      for (int i = 0; i<(W)>> 3; ++i) {                                   \
-        const uint32x4_t weighted_left_low =                              \
-            vmlal_n_u16(weighted_tr_low[i], weights_x_low[i], left_y);    \
-        vst1_u16(dst_x,                                                   \
-                 vrshrn_n_u32(weighted_left_low, SM_WEIGHT_LOG2_SCALE));  \
-                                                                          \
-        const uint32x4_t weighted_left_high =                             \
-            vmlal_n_u16(weighted_tr_high[i], weights_x_high[i], left_y);  \
-        vst1_u16(dst_x + 4,                                               \
-                 vrshrn_n_u32(weighted_left_high, SM_WEIGHT_LOG2_SCALE)); \
-        dst_x += 8;                                                       \
-      }                                                                   \
-      dst += stride;                                                      \
-    }                                                                     \
+#define HIGHBD_SMOOTH_H_PREDICTOR(W)                                          \
+  void highbd_smooth_h_##W##xh_neon(                                          \
+      uint16_t *dst, ptrdiff_t stride, const uint16_t *const top_row,         \
+      const uint16_t *const left_column, const int height) {                  \
+    const uint16_t top_right = top_row[(W)-1];                                \
+                                                                              \
+    uint16x4_t weights_x_low[(W) >> 3];                                       \
+    uint16x4_t weights_x_high[(W) >> 3];                                      \
+    uint32x4_t weighted_tr_low[(W) >> 3];                                     \
+    uint32x4_t weighted_tr_high[(W) >> 3];                                    \
+    for (int i = 0; i<(W)>> 3; ++i) {                                         \
+      const int x = i << 3;                                                   \
+      weights_x_low[i] = vld1_u16(smooth_weights_u16 + (W)-4 + x);            \
+      weighted_tr_low[i] =                                                    \
+          vmull_n_u16(negate_s8(weights_x_low[i]), top_right);                \
+      weights_x_high[i] = vld1_u16(smooth_weights_u16 + (W) + x);             \
+      weighted_tr_high[i] =                                                   \
+          vmull_n_u16(negate_s8(weights_x_high[i]), top_right);               \
+    }                                                                         \
+                                                                              \
+    for (int y = 0; y < height; ++y) {                                        \
+      uint16_t *dst_x = dst;                                                  \
+      const uint16_t left_y = left_column[y];                                 \
+      for (int i = 0; i<(W)>> 3; ++i) {                                       \
+        const uint32x4_t weighted_left_low =                                  \
+            vmlal_n_u16(weighted_tr_low[i], weights_x_low[i], left_y);        \
+        vst1_u16(dst_x,                                                       \
+                 vrshrn_n_u32(weighted_left_low, SMOOTH_WEIGHT_LOG2_SCALE));  \
+                                                                              \
+        const uint32x4_t weighted_left_high =                                 \
+            vmlal_n_u16(weighted_tr_high[i], weights_x_high[i], left_y);      \
+        vst1_u16(dst_x + 4,                                                   \
+                 vrshrn_n_u32(weighted_left_high, SMOOTH_WEIGHT_LOG2_SCALE)); \
+        dst_x += 8;                                                           \
+      }                                                                       \
+      dst += stride;                                                          \
+    }                                                                         \
   }
 
 HIGHBD_SMOOTH_H_PREDICTOR(16)

diff --git a/aom_dsp/arm/intrapred_neon.c b/aom_dsp/arm/intrapred_neon.c
index 8be9716..d439a47 100644
--- a/aom_dsp/arm/intrapred_neon.c
+++ b/aom_dsp/arm/intrapred_neon.c

@@ -2684,8 +2684,8 @@
 // weight_w[0]: weights_w and scale - weights_w interleave vector
 static INLINE void load_weight_w4(int height, uint16x8_t *weight_h,
                                   uint16x8_t *weight_w) {
-  const uint16x8_t d = vdupq_n_u16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
-  const uint8x8_t t = vcreate_u8(((const uint32_t *)(sm_weight_arrays))[0]);
+  const uint16x8_t d = vdupq_n_u16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
+  const uint8x8_t t = vcreate_u8(((const uint32_t *)smooth_weights)[0]);
   weight_h[0] = vmovl_u8(t);
   weight_h[1] = vsubw_u8(d, t);
 #if defined(__aarch64__)
@@ -2695,12 +2695,12 @@
 #endif  // (__aarch64__)
 
   if (height == 8) {
-    const uint8x8_t weight = vld1_u8(&sm_weight_arrays[4]);
+    const uint8x8_t weight = vld1_u8(&smooth_weights[4]);
     weight_h[0] = vmovl_u8(weight);
     weight_h[1] = vsubw_u8(d, weight);
   } else if (height == 16) {
     const uint8x16_t zero = vdupq_n_u8(0);
-    const uint8x16_t weight = vld1q_u8(&sm_weight_arrays[12]);
+    const uint8x16_t weight = vld1q_u8(&smooth_weights[12]);
     const uint8x16x2_t weight_h_02 = vzipq_u8(weight, zero);
     weight_h[0] = vreinterpretq_u16_u8(weight_h_02.val[0]);
     weight_h[1] = vsubq_u16(d, vreinterpretq_u16_u8(weight_h_02.val[0]));
@@ -2853,7 +2853,7 @@
                                   uint16x8_t *weight_w) {
   const uint8x16_t zero = vdupq_n_u8(0);
   const int we_offset = height < 8 ? 0 : 4;
-  uint8x16_t we = vld1q_u8(&sm_weight_arrays[we_offset]);
+  uint8x16_t we = vld1q_u8(&smooth_weights[we_offset]);
 #if defined(__aarch64__)
   weight_h[0] = vreinterpretq_u16_u8(vzip1q_u8(we, zero));
 #else
@@ -2876,20 +2876,20 @@
   }
 
   if (height == 16) {
-    we = vld1q_u8(&sm_weight_arrays[12]);
+    we = vld1q_u8(&smooth_weights[12]);
     const uint8x16x2_t weight_h_02 = vzipq_u8(we, zero);
     weight_h[0] = vreinterpretq_u16_u8(weight_h_02.val[0]);
     weight_h[1] = vsubq_u16(d, weight_h[0]);
     weight_h[2] = vreinterpretq_u16_u8(weight_h_02.val[1]);
     weight_h[3] = vsubq_u16(d, weight_h[2]);
   } else if (height == 32) {
-    const uint8x16_t weight_lo = vld1q_u8(&sm_weight_arrays[28]);
+    const uint8x16_t weight_lo = vld1q_u8(&smooth_weights[28]);
     const uint8x16x2_t weight_h_02 = vzipq_u8(weight_lo, zero);
     weight_h[0] = vreinterpretq_u16_u8(weight_h_02.val[0]);
     weight_h[1] = vsubq_u16(d, weight_h[0]);
     weight_h[2] = vreinterpretq_u16_u8(weight_h_02.val[1]);
     weight_h[3] = vsubq_u16(d, weight_h[2]);
-    const uint8x16_t weight_hi = vld1q_u8(&sm_weight_arrays[28 + 16]);
+    const uint8x16_t weight_hi = vld1q_u8(&smooth_weights[28 + 16]);
     const uint8x16x2_t weight_h_46 = vzipq_u8(weight_hi, zero);
     weight_h[4] = vreinterpretq_u16_u8(weight_h_46.val[0]);
     weight_h[5] = vsubq_u16(d, weight_h[4]);
@@ -3020,8 +3020,8 @@
                                         const uint8_t *above,
                                         const uint8_t *left, uint32_t bw,
                                         uint32_t bh) {
-  const uint8_t *const sm_weights_w = sm_weight_arrays + bw - 4;
-  const uint8_t *const sm_weights_h = sm_weight_arrays + bh - 4;
+  const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
+  const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
   const uint16x8_t scale_value = vdupq_n_u16(256);
 
   for (uint32_t y = 0; y < bh; ++y) {

diff --git a/aom_dsp/intrapred.c b/aom_dsp/intrapred.c
index c2c03e6..00396c8 100644
--- a/aom_dsp/intrapred.c
+++ b/aom_dsp/intrapred.c

@@ -86,11 +86,11 @@
                                     const uint8_t *left) {
   const uint8_t below_pred = left[bh - 1];   // estimated by bottom-left pixel
   const uint8_t right_pred = above[bw - 1];  // estimated by top-right pixel
-  const uint8_t *const sm_weights_w = sm_weight_arrays + bw - 4;
-  const uint8_t *const sm_weights_h = sm_weight_arrays + bh - 4;
-  // scale = 2 * 2^SM_WEIGHT_LOG2_SCALE
-  const int log2_scale = 1 + SM_WEIGHT_LOG2_SCALE;
-  const uint16_t scale = (1 << SM_WEIGHT_LOG2_SCALE);
+  const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
+  const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
+  // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE
+  const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE;
+  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
   sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
                            log2_scale + sizeof(*dst));
   int r;
@@ -116,10 +116,10 @@
                                       int bh, const uint8_t *above,
                                       const uint8_t *left) {
   const uint8_t below_pred = left[bh - 1];  // estimated by bottom-left pixel
-  const uint8_t *const sm_weights = sm_weight_arrays + bh - 4;
-  // scale = 2^SM_WEIGHT_LOG2_SCALE
-  const int log2_scale = SM_WEIGHT_LOG2_SCALE;
-  const uint16_t scale = (1 << SM_WEIGHT_LOG2_SCALE);
+  const uint8_t *const sm_weights = smooth_weights + bh - 4;
+  // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
+  const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
+  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
   sm_weights_sanity_checks(sm_weights, sm_weights, scale,
                            log2_scale + sizeof(*dst));
 
@@ -145,10 +145,10 @@
                                       int bh, const uint8_t *above,
                                       const uint8_t *left) {
   const uint8_t right_pred = above[bw - 1];  // estimated by top-right pixel
-  const uint8_t *const sm_weights = sm_weight_arrays + bw - 4;
-  // scale = 2^SM_WEIGHT_LOG2_SCALE
-  const int log2_scale = SM_WEIGHT_LOG2_SCALE;
-  const uint16_t scale = (1 << SM_WEIGHT_LOG2_SCALE);
+  const uint8_t *const sm_weights = smooth_weights + bw - 4;
+  // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
+  const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
+  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
   sm_weights_sanity_checks(sm_weights, sm_weights, scale,
                            log2_scale + sizeof(*dst));
 
@@ -405,11 +405,11 @@
   (void)bd;
   const uint16_t below_pred = left[bh - 1];   // estimated by bottom-left pixel
   const uint16_t right_pred = above[bw - 1];  // estimated by top-right pixel
-  const uint8_t *const sm_weights_w = sm_weight_arrays + bw - 4;
-  const uint8_t *const sm_weights_h = sm_weight_arrays + bh - 4;
-  // scale = 2 * 2^SM_WEIGHT_LOG2_SCALE
-  const int log2_scale = 1 + SM_WEIGHT_LOG2_SCALE;
-  const uint16_t scale = (1 << SM_WEIGHT_LOG2_SCALE);
+  const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
+  const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
+  // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE
+  const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE;
+  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
   sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
                            log2_scale + sizeof(*dst));
   int r;
@@ -437,10 +437,10 @@
                                              const uint16_t *left, int bd) {
   (void)bd;
   const uint16_t below_pred = left[bh - 1];  // estimated by bottom-left pixel
-  const uint8_t *const sm_weights = sm_weight_arrays + bh - 4;
-  // scale = 2^SM_WEIGHT_LOG2_SCALE
-  const int log2_scale = SM_WEIGHT_LOG2_SCALE;
-  const uint16_t scale = (1 << SM_WEIGHT_LOG2_SCALE);
+  const uint8_t *const sm_weights = smooth_weights + bh - 4;
+  // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
+  const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
+  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
   sm_weights_sanity_checks(sm_weights, sm_weights, scale,
                            log2_scale + sizeof(*dst));
 
@@ -468,10 +468,10 @@
                                              const uint16_t *left, int bd) {
   (void)bd;
   const uint16_t right_pred = above[bw - 1];  // estimated by top-right pixel
-  const uint8_t *const sm_weights = sm_weight_arrays + bw - 4;
-  // scale = 2^SM_WEIGHT_LOG2_SCALE
-  const int log2_scale = SM_WEIGHT_LOG2_SCALE;
-  const uint16_t scale = (1 << SM_WEIGHT_LOG2_SCALE);
+  const uint8_t *const sm_weights = smooth_weights + bw - 4;
+  // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
+  const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
+  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
   sm_weights_sanity_checks(sm_weights, sm_weights, scale,
                            log2_scale + sizeof(*dst));
 

diff --git a/aom_dsp/intrapred_common.h b/aom_dsp/intrapred_common.h
index a872e8d..ac836a5 100644
--- a/aom_dsp/intrapred_common.h
+++ b/aom_dsp/intrapred_common.h

@@ -15,10 +15,10 @@
 #include "config/aom_config.h"
 
 // Weights are quadratic from '1' to '1 / block_size', scaled by
-// 2^SM_WEIGHT_LOG2_SCALE.
-#define SM_WEIGHT_LOG2_SCALE 8
+// 2^SMOOTH_WEIGHT_LOG2_SCALE.
+#define SMOOTH_WEIGHT_LOG2_SCALE 8
 
-static const uint8_t sm_weight_arrays[] = {
+static const uint8_t smooth_weights[] = {
   // bs = 4
   255, 149, 85, 64,
   // bs = 8
@@ -35,7 +35,7 @@
   13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4
 };
 
-static const uint16_t sm_weight_arrays_u16[] = {
+static const uint16_t smooth_weights_u16[] = {
   // block dimension = 4
   255, 149, 85, 64,
   // block dimension = 8

diff --git a/aom_dsp/x86/intrapred_ssse3.c b/aom_dsp/x86/intrapred_ssse3.c
index 296a41a..f0bd040 100644
--- a/aom_dsp/x86/intrapred_ssse3.c
+++ b/aom_dsp/x86/intrapred_ssse3.c

@@ -610,20 +610,19 @@
 static INLINE void load_weight_w4(int height, __m128i *weight_h,
                                   __m128i *weight_w) {
   const __m128i zero = _mm_setzero_si128();
-  const __m128i d = _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
-  const __m128i t = _mm_cvtsi32_si128(((const uint32_t *)sm_weight_arrays)[0]);
+  const __m128i d = _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
+  const __m128i t = _mm_cvtsi32_si128(((const uint32_t *)smooth_weights)[0]);
   weight_h[0] = _mm_unpacklo_epi8(t, zero);
   weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
   weight_w[0] = _mm_unpacklo_epi16(weight_h[0], weight_h[1]);
 
   if (height == 8) {
-    const __m128i weight =
-        _mm_loadl_epi64((const __m128i *)&sm_weight_arrays[4]);
+    const __m128i weight = _mm_loadl_epi64((const __m128i *)&smooth_weights[4]);
     weight_h[0] = _mm_unpacklo_epi8(weight, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
   } else if (height == 16) {
     const __m128i weight =
-        _mm_loadu_si128((const __m128i *)&sm_weight_arrays[12]);
+        _mm_loadu_si128((const __m128i *)&smooth_weights[12]);
     weight_h[0] = _mm_unpacklo_epi8(weight, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
     weight_h[2] = _mm_unpackhi_epi8(weight, zero);
@@ -634,7 +633,7 @@
 static INLINE void smooth_pred_4xh(const __m128i *pixel, const __m128i *wh,
                                    const __m128i *ww, int h, uint8_t *dst,
                                    ptrdiff_t stride, int second_half) {
-  const __m128i round = _mm_set1_epi32((1 << SM_WEIGHT_LOG2_SCALE));
+  const __m128i round = _mm_set1_epi32((1 << SMOOTH_WEIGHT_LOG2_SCALE));
   const __m128i one = _mm_set1_epi16(1);
   const __m128i inc = _mm_set1_epi16(0x202);
   const __m128i gat = _mm_set1_epi32(0xc080400);
@@ -654,7 +653,7 @@
 
     sum = _mm_add_epi32(s, sum);
     sum = _mm_add_epi32(sum, round);
-    sum = _mm_srai_epi32(sum, 1 + SM_WEIGHT_LOG2_SCALE);
+    sum = _mm_srai_epi32(sum, 1 + SMOOTH_WEIGHT_LOG2_SCALE);
 
     sum = _mm_shuffle_epi8(sum, gat);
     *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
@@ -749,9 +748,9 @@
                                   __m128i *weight_w) {
   const __m128i zero = _mm_setzero_si128();
   const int we_offset = height < 8 ? 0 : 4;
-  __m128i we = _mm_loadu_si128((const __m128i *)&sm_weight_arrays[we_offset]);
+  __m128i we = _mm_loadu_si128((const __m128i *)&smooth_weights[we_offset]);
   weight_h[0] = _mm_unpacklo_epi8(we, zero);
-  const __m128i d = _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+  const __m128i d = _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
   weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
 
   if (height == 4) {
@@ -766,20 +765,20 @@
   }
 
   if (height == 16) {
-    we = _mm_loadu_si128((const __m128i *)&sm_weight_arrays[12]);
+    we = _mm_loadu_si128((const __m128i *)&smooth_weights[12]);
     weight_h[0] = _mm_unpacklo_epi8(we, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
     weight_h[2] = _mm_unpackhi_epi8(we, zero);
     weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
   } else if (height == 32) {
     const __m128i weight_lo =
-        _mm_loadu_si128((const __m128i *)&sm_weight_arrays[28]);
+        _mm_loadu_si128((const __m128i *)&smooth_weights[28]);
     weight_h[0] = _mm_unpacklo_epi8(weight_lo, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
     weight_h[2] = _mm_unpackhi_epi8(weight_lo, zero);
     weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
     const __m128i weight_hi =
-        _mm_loadu_si128((const __m128i *)&sm_weight_arrays[28 + 16]);
+        _mm_loadu_si128((const __m128i *)&smooth_weights[28 + 16]);
     weight_h[4] = _mm_unpacklo_epi8(weight_hi, zero);
     weight_h[5] = _mm_sub_epi16(d, weight_h[4]);
     weight_h[6] = _mm_unpackhi_epi8(weight_hi, zero);
@@ -790,7 +789,7 @@
 static INLINE void smooth_pred_8xh(const __m128i *pixels, const __m128i *wh,
                                    const __m128i *ww, int h, uint8_t *dst,
                                    ptrdiff_t stride, int second_half) {
-  const __m128i round = _mm_set1_epi32((1 << SM_WEIGHT_LOG2_SCALE));
+  const __m128i round = _mm_set1_epi32((1 << SMOOTH_WEIGHT_LOG2_SCALE));
   const __m128i one = _mm_set1_epi16(1);
   const __m128i inc = _mm_set1_epi16(0x202);
   const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
@@ -814,11 +813,11 @@
 
     s0 = _mm_add_epi32(s0, sum0);
     s0 = _mm_add_epi32(s0, round);
-    s0 = _mm_srai_epi32(s0, 1 + SM_WEIGHT_LOG2_SCALE);
+    s0 = _mm_srai_epi32(s0, 1 + SMOOTH_WEIGHT_LOG2_SCALE);
 
     s1 = _mm_add_epi32(s1, sum1);
     s1 = _mm_add_epi32(s1, round);
-    s1 = _mm_srai_epi32(s1, 1 + SM_WEIGHT_LOG2_SCALE);
+    s1 = _mm_srai_epi32(s1, 1 + SMOOTH_WEIGHT_LOG2_SCALE);
 
     sum0 = _mm_packus_epi16(s0, s1);
     sum0 = _mm_shuffle_epi8(sum0, gat);
@@ -888,17 +887,18 @@
                                         const uint8_t *above,
                                         const uint8_t *left, uint32_t bw,
                                         uint32_t bh) {
-  const uint8_t *const sm_weights_w = sm_weight_arrays + bw - 4;
-  const uint8_t *const sm_weights_h = sm_weight_arrays + bh - 4;
+  const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
+  const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
   const __m128i zero = _mm_setzero_si128();
   const __m128i scale_value =
-      _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+      _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
   const __m128i bottom_left = _mm_cvtsi32_si128((uint32_t)left[bh - 1]);
   const __m128i dup16 = _mm_set1_epi32(0x01000100);
   const __m128i top_right =
       _mm_shuffle_epi8(_mm_cvtsi32_si128((uint32_t)above[bw - 1]), dup16);
   const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
-  const __m128i round = _mm_set1_epi32((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+  const __m128i round =
+      _mm_set1_epi32((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
 
   for (uint32_t y = 0; y < bh; ++y) {
     const __m128i weights_y = _mm_cvtsi32_si128((uint32_t)sm_weights_h[y]);
@@ -933,8 +933,8 @@
       pred_lo = _mm_add_epi32(pred_lo, swxtr_lo);
       pred_hi = _mm_add_epi32(pred_hi, swxtr_hi);
 
-      pred_lo = _mm_srai_epi32(pred_lo, (1 + SM_WEIGHT_LOG2_SCALE));
-      pred_hi = _mm_srai_epi32(pred_hi, (1 + SM_WEIGHT_LOG2_SCALE));
+      pred_lo = _mm_srai_epi32(pred_lo, (1 + SMOOTH_WEIGHT_LOG2_SCALE));
+      pred_hi = _mm_srai_epi32(pred_hi, (1 + SMOOTH_WEIGHT_LOG2_SCALE));
 
       __m128i pred = _mm_packus_epi16(pred_lo, pred_hi);
       pred = _mm_shuffle_epi8(pred, gat);
@@ -1033,21 +1033,20 @@
 // weights[1]: scale - weights_h vector
 static INLINE void load_weight_v_w4(int height, __m128i *weights) {
   const __m128i zero = _mm_setzero_si128();
-  const __m128i d = _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+  const __m128i d = _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
 
   if (height == 4) {
     const __m128i weight =
-        _mm_cvtsi32_si128(((const uint32_t *)sm_weight_arrays)[0]);
+        _mm_cvtsi32_si128(((const uint32_t *)smooth_weights)[0]);
     weights[0] = _mm_unpacklo_epi8(weight, zero);
     weights[1] = _mm_sub_epi16(d, weights[0]);
   } else if (height == 8) {
-    const __m128i weight =
-        _mm_loadl_epi64((const __m128i *)&sm_weight_arrays[4]);
+    const __m128i weight = _mm_loadl_epi64((const __m128i *)&smooth_weights[4]);
     weights[0] = _mm_unpacklo_epi8(weight, zero);
     weights[1] = _mm_sub_epi16(d, weights[0]);
   } else {
     const __m128i weight =
-        _mm_loadu_si128((const __m128i *)&sm_weight_arrays[12]);
+        _mm_loadu_si128((const __m128i *)&smooth_weights[12]);
     weights[0] = _mm_unpacklo_epi8(weight, zero);
     weights[1] = _mm_sub_epi16(d, weights[0]);
     weights[2] = _mm_unpackhi_epi8(weight, zero);
@@ -1058,7 +1057,8 @@
 static INLINE void smooth_v_pred_4xh(const __m128i *pixel,
                                      const __m128i *weight, int h, uint8_t *dst,
                                      ptrdiff_t stride) {
-  const __m128i pred_round = _mm_set1_epi32((1 << (SM_WEIGHT_LOG2_SCALE - 1)));
+  const __m128i pred_round =
+      _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
   const __m128i inc = _mm_set1_epi16(0x202);
   const __m128i gat = _mm_set1_epi32(0xc080400);
   __m128i d = _mm_set1_epi16(0x100);
@@ -1069,7 +1069,7 @@
     const __m128i wh_sc = _mm_unpacklo_epi16(wg_wg, sc_sc);
     __m128i sum = _mm_madd_epi16(pixel[0], wh_sc);
     sum = _mm_add_epi32(sum, pred_round);
-    sum = _mm_srai_epi32(sum, SM_WEIGHT_LOG2_SCALE);
+    sum = _mm_srai_epi32(sum, SMOOTH_WEIGHT_LOG2_SCALE);
     sum = _mm_shuffle_epi8(sum, gat);
     *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
     dst += stride;
@@ -1137,30 +1137,30 @@
 // weight_h[7]: same as [1], offset 24
 static INLINE void load_weight_v_w8(int height, __m128i *weight_h) {
   const __m128i zero = _mm_setzero_si128();
-  const __m128i d = _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+  const __m128i d = _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
 
   if (height < 16) {
     const int offset = height < 8 ? 0 : 4;
     const __m128i weight =
-        _mm_loadu_si128((const __m128i *)&sm_weight_arrays[offset]);
+        _mm_loadu_si128((const __m128i *)&smooth_weights[offset]);
     weight_h[0] = _mm_unpacklo_epi8(weight, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
   } else if (height == 16) {
     const __m128i weight =
-        _mm_loadu_si128((const __m128i *)&sm_weight_arrays[12]);
+        _mm_loadu_si128((const __m128i *)&smooth_weights[12]);
     weight_h[0] = _mm_unpacklo_epi8(weight, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
     weight_h[2] = _mm_unpackhi_epi8(weight, zero);
     weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
   } else {
     const __m128i weight_lo =
-        _mm_loadu_si128((const __m128i *)&sm_weight_arrays[28]);
+        _mm_loadu_si128((const __m128i *)&smooth_weights[28]);
     weight_h[0] = _mm_unpacklo_epi8(weight_lo, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
     weight_h[2] = _mm_unpackhi_epi8(weight_lo, zero);
     weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
     const __m128i weight_hi =
-        _mm_loadu_si128((const __m128i *)&sm_weight_arrays[28 + 16]);
+        _mm_loadu_si128((const __m128i *)&smooth_weights[28 + 16]);
     weight_h[4] = _mm_unpacklo_epi8(weight_hi, zero);
     weight_h[5] = _mm_sub_epi16(d, weight_h[4]);
     weight_h[6] = _mm_unpackhi_epi8(weight_hi, zero);
@@ -1170,7 +1170,8 @@
 
 static INLINE void smooth_v_pred_8xh(const __m128i *pixels, const __m128i *wh,
                                      int h, uint8_t *dst, ptrdiff_t stride) {
-  const __m128i pred_round = _mm_set1_epi32((1 << (SM_WEIGHT_LOG2_SCALE - 1)));
+  const __m128i pred_round =
+      _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
   const __m128i inc = _mm_set1_epi16(0x202);
   const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
   __m128i d = _mm_set1_epi16(0x100);
@@ -1183,10 +1184,10 @@
     __m128i s1 = _mm_madd_epi16(pixels[1], wh_sc);
 
     s0 = _mm_add_epi32(s0, pred_round);
-    s0 = _mm_srai_epi32(s0, SM_WEIGHT_LOG2_SCALE);
+    s0 = _mm_srai_epi32(s0, SMOOTH_WEIGHT_LOG2_SCALE);
 
     s1 = _mm_add_epi32(s1, pred_round);
-    s1 = _mm_srai_epi32(s1, SM_WEIGHT_LOG2_SCALE);
+    s1 = _mm_srai_epi32(s1, SMOOTH_WEIGHT_LOG2_SCALE);
 
     __m128i sum01 = _mm_packus_epi16(s0, s1);
     sum01 = _mm_shuffle_epi8(sum01, gat);
@@ -1257,16 +1258,16 @@
                                           const uint8_t *above,
                                           const uint8_t *left, uint32_t bw,
                                           uint32_t bh) {
-  const uint8_t *const sm_weights_h = sm_weight_arrays + bh - 4;
+  const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
   const __m128i zero = _mm_setzero_si128();
   const __m128i scale_value =
-      _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+      _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
   const __m128i dup16 = _mm_set1_epi32(0x01000100);
   const __m128i bottom_left =
       _mm_shuffle_epi8(_mm_cvtsi32_si128((uint32_t)left[bh - 1]), dup16);
   const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
   const __m128i round =
-      _mm_set1_epi32((uint16_t)(1 << (SM_WEIGHT_LOG2_SCALE - 1)));
+      _mm_set1_epi32((uint16_t)(1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
 
   for (uint32_t y = 0; y < bh; ++y) {
     const __m128i weights_y = _mm_cvtsi32_si128((uint32_t)sm_weights_h[y]);
@@ -1287,8 +1288,8 @@
 
       pred_lo = _mm_add_epi32(pred_lo, round);
       pred_hi = _mm_add_epi32(pred_hi, round);
-      pred_lo = _mm_srai_epi32(pred_lo, SM_WEIGHT_LOG2_SCALE);
-      pred_hi = _mm_srai_epi32(pred_hi, SM_WEIGHT_LOG2_SCALE);
+      pred_lo = _mm_srai_epi32(pred_lo, SMOOTH_WEIGHT_LOG2_SCALE);
+      pred_hi = _mm_srai_epi32(pred_hi, SMOOTH_WEIGHT_LOG2_SCALE);
 
       __m128i pred = _mm_packus_epi16(pred_lo, pred_hi);
       pred = _mm_shuffle_epi8(pred, gat);
@@ -1389,11 +1390,11 @@
 // weights[0]: weights_w and scale - weights_w interleave vector
 static INLINE void load_weight_h_w4(int height, __m128i *weights) {
   (void)height;
-  const __m128i t = _mm_loadu_si128((const __m128i *)&sm_weight_arrays[0]);
+  const __m128i t = _mm_loadu_si128((const __m128i *)&smooth_weights[0]);
   const __m128i zero = _mm_setzero_si128();
 
   const __m128i weights_0 = _mm_unpacklo_epi8(t, zero);
-  const __m128i d = _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+  const __m128i d = _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
   const __m128i weights_1 = _mm_sub_epi16(d, weights_0);
   weights[0] = _mm_unpacklo_epi16(weights_0, weights_1);
 }
@@ -1401,7 +1402,8 @@
 static INLINE void smooth_h_pred_4xh(const __m128i *pixel,
                                      const __m128i *weight, int h, uint8_t *dst,
                                      ptrdiff_t stride) {
-  const __m128i pred_round = _mm_set1_epi32((1 << (SM_WEIGHT_LOG2_SCALE - 1)));
+  const __m128i pred_round =
+      _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
   const __m128i one = _mm_set1_epi16(1);
   const __m128i gat = _mm_set1_epi32(0xc080400);
   __m128i rep = _mm_set1_epi16((short)0x8000);
@@ -1412,7 +1414,7 @@
     __m128i sum = _mm_madd_epi16(b, weight[0]);
 
     sum = _mm_add_epi32(sum, pred_round);
-    sum = _mm_srai_epi32(sum, SM_WEIGHT_LOG2_SCALE);
+    sum = _mm_srai_epi32(sum, SMOOTH_WEIGHT_LOG2_SCALE);
 
     sum = _mm_shuffle_epi8(sum, gat);
     *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
@@ -1488,8 +1490,8 @@
 static INLINE void load_weight_h_w8(int height, __m128i *weight_w) {
   (void)height;
   const __m128i zero = _mm_setzero_si128();
-  const __m128i d = _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
-  const __m128i we = _mm_loadu_si128((const __m128i *)&sm_weight_arrays[4]);
+  const __m128i d = _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
+  const __m128i we = _mm_loadu_si128((const __m128i *)&smooth_weights[4]);
   const __m128i tmp1 = _mm_unpacklo_epi8(we, zero);
   const __m128i tmp2 = _mm_sub_epi16(d, tmp1);
   weight_w[0] = _mm_unpacklo_epi16(tmp1, tmp2);
@@ -1499,7 +1501,8 @@
 static INLINE void smooth_h_pred_8xh(const __m128i *pixels, const __m128i *ww,
                                      int h, uint8_t *dst, ptrdiff_t stride,
                                      int second_half) {
-  const __m128i pred_round = _mm_set1_epi32((1 << (SM_WEIGHT_LOG2_SCALE - 1)));
+  const __m128i pred_round =
+      _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
   const __m128i one = _mm_set1_epi16(1);
   const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
   __m128i rep = second_half ? _mm_set1_epi16((short)0x8008)
@@ -1512,10 +1515,10 @@
     __m128i sum1 = _mm_madd_epi16(b, ww[1]);
 
     sum0 = _mm_add_epi32(sum0, pred_round);
-    sum0 = _mm_srai_epi32(sum0, SM_WEIGHT_LOG2_SCALE);
+    sum0 = _mm_srai_epi32(sum0, SMOOTH_WEIGHT_LOG2_SCALE);
 
     sum1 = _mm_add_epi32(sum1, pred_round);
-    sum1 = _mm_srai_epi32(sum1, SM_WEIGHT_LOG2_SCALE);
+    sum1 = _mm_srai_epi32(sum1, SMOOTH_WEIGHT_LOG2_SCALE);
 
     sum0 = _mm_packus_epi16(sum0, sum1);
     sum0 = _mm_shuffle_epi8(sum0, gat);
@@ -1586,13 +1589,14 @@
                                           const uint8_t *above,
                                           const uint8_t *left, uint32_t bw,
                                           uint32_t bh) {
-  const uint8_t *const sm_weights_w = sm_weight_arrays + bw - 4;
+  const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
   const __m128i zero = _mm_setzero_si128();
   const __m128i scale_value =
-      _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+      _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
   const __m128i top_right = _mm_cvtsi32_si128((uint32_t)above[bw - 1]);
   const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
-  const __m128i pred_round = _mm_set1_epi32((1 << (SM_WEIGHT_LOG2_SCALE - 1)));
+  const __m128i pred_round =
+      _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
 
   for (uint32_t y = 0; y < bh; ++y) {
     const __m128i left_y = _mm_cvtsi32_si128((uint32_t)left[y]);
@@ -1612,8 +1616,8 @@
       pred_lo = _mm_add_epi32(pred_lo, pred_round);
       pred_hi = _mm_add_epi32(pred_hi, pred_round);
 
-      pred_lo = _mm_srai_epi32(pred_lo, SM_WEIGHT_LOG2_SCALE);
-      pred_hi = _mm_srai_epi32(pred_hi, SM_WEIGHT_LOG2_SCALE);
+      pred_lo = _mm_srai_epi32(pred_lo, SMOOTH_WEIGHT_LOG2_SCALE);
+      pred_hi = _mm_srai_epi32(pred_hi, SMOOTH_WEIGHT_LOG2_SCALE);
 
       __m128i pred = _mm_packus_epi16(pred_lo, pred_hi);
       pred = _mm_shuffle_epi8(pred, gat);
commit	6be7f499612ba296976c516255e40b9e5e8e779c	[log] [tgz]
author	James Zern <jzern@google.com>	Wed Apr 27 12:18:20 2022 -0700
committer	James Zern <jzern@google.com>	Thu Apr 28 20:41:44 2022 +0000
tree	40161e2abc29b2723dc9460d6f036c29c5e0f203
parent	f02743e8df4a548841f0757c0e4b4001481c4b72 [diff]