cosmetics: sm_weight_arrays -> smooth_weights
+ normalize SM_WEIGHT_LOG2_SCALE -> SMOOTH_WEIGHT_LOG2_SCALE
Bug: b/217461825
Change-Id: I094b891c654d99f9f57292ed2e161e48bee2314c
diff --git a/aom_dsp/arm/highbd_intrapred_neon.c b/aom_dsp/arm/highbd_intrapred_neon.c
index b898e2a..7686e6b 100644
--- a/aom_dsp/arm/highbd_intrapred_neon.c
+++ b/aom_dsp/arm/highbd_intrapred_neon.c
@@ -396,11 +396,11 @@
const int height) {
const uint16_t top_right = top_row[3];
const uint16_t bottom_left = left_column[height - 1];
- const uint16_t *const weights_y = sm_weight_arrays_u16 + height - 4;
+ const uint16_t *const weights_y = smooth_weights_u16 + height - 4;
const uint16x4_t top_v = vld1_u16(top_row);
const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
- const uint16x4_t weights_x_v = vld1_u16(sm_weight_arrays_u16);
+ const uint16x4_t weights_x_v = vld1_u16(smooth_weights_u16);
const uint16x4_t scaled_weights_x = negate_s8(weights_x_v);
const uint32x4_t weighted_tr = vmull_n_u16(scaled_weights_x, top_right);
@@ -414,7 +414,8 @@
const uint32x4_t weighted_bl =
vmlal_n_u16(weighted_left, bottom_left_v, 256 - weights_y[y]);
- const uint16x4_t pred = vrshrn_n_u32(weighted_bl, SM_WEIGHT_LOG2_SCALE + 1);
+ const uint16x4_t pred =
+ vrshrn_n_u32(weighted_bl, SMOOTH_WEIGHT_LOG2_SCALE + 1);
vst1_u16(dst, pred);
dst += stride;
}
@@ -434,7 +435,7 @@
vmlal_n_u16(weighted_top_low, weights_x.val[0], left_y);
const uint16x4_t pred_low =
- vrshrn_n_u32(weighted_edges_low, SM_WEIGHT_LOG2_SCALE + 1);
+ vrshrn_n_u32(weighted_edges_low, SMOOTH_WEIGHT_LOG2_SCALE + 1);
vst1_u16(dst, pred_low);
const uint32x4_t weighted_top_high =
@@ -443,7 +444,7 @@
vmlal_n_u16(weighted_top_high, weights_x.val[1], left_y);
const uint16x4_t pred_high =
- vrshrn_n_u32(weighted_edges_high, SM_WEIGHT_LOG2_SCALE + 1);
+ vrshrn_n_u32(weighted_edges_high, SMOOTH_WEIGHT_LOG2_SCALE + 1);
vst1_u16(dst + 4, pred_high);
}
@@ -453,13 +454,13 @@
const int height) {
const uint16_t top_right = top_row[7];
const uint16_t bottom_left = left_column[height - 1];
- const uint16_t *const weights_y = sm_weight_arrays_u16 + height - 4;
+ const uint16_t *const weights_y = smooth_weights_u16 + height - 4;
const uint16x4x2_t top_vals = { { vld1_u16(top_row),
vld1_u16(top_row + 4) } };
const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
- const uint16x4x2_t weights_x = { { vld1_u16(sm_weight_arrays_u16 + 4),
- vld1_u16(sm_weight_arrays_u16 + 8) } };
+ const uint16x4x2_t weights_x = { { vld1_u16(smooth_weights_u16 + 4),
+ vld1_u16(smooth_weights_u16 + 8) } };
const uint32x4_t weighted_tr_low =
vmull_n_u16(negate_s8(weights_x.val[0]), top_right);
const uint32x4_t weighted_tr_high =
@@ -506,7 +507,7 @@
const uint16_t *const left_column, const int height) { \
const uint16_t top_right = top_row[(W)-1]; \
const uint16_t bottom_left = left_column[height - 1]; \
- const uint16_t *const weights_y = sm_weight_arrays_u16 + height - 4; \
+ const uint16_t *const weights_y = smooth_weights_u16 + height - 4; \
\
/* Precompute weighted values that don't vary with |y|. */ \
uint32x4_t weighted_tr_low[(W) >> 3]; \
@@ -514,10 +515,10 @@
for (int i = 0; i<(W)>> 3; ++i) { \
const int x = i << 3; \
const uint16x4_t weights_x_low = \
- vld1_u16(sm_weight_arrays_u16 + (W)-4 + x); \
+ vld1_u16(smooth_weights_u16 + (W)-4 + x); \
weighted_tr_low[i] = vmull_n_u16(negate_s8(weights_x_low), top_right); \
const uint16x4_t weights_x_high = \
- vld1_u16(sm_weight_arrays_u16 + (W) + x); \
+ vld1_u16(smooth_weights_u16 + (W) + x); \
weighted_tr_high[i] = vmull_n_u16(negate_s8(weights_x_high), top_right); \
} \
\
@@ -536,8 +537,8 @@
vaddq_u32(weighted_bl, weighted_tr_high[i]); \
/* Accumulate weighted edge values and store. */ \
const uint16x4x2_t weights_x = { \
- { vld1_u16(sm_weight_arrays_u16 + (W)-4 + x), \
- vld1_u16(sm_weight_arrays_u16 + (W) + x) } \
+ { vld1_u16(smooth_weights_u16 + (W)-4 + x), \
+ vld1_u16(smooth_weights_u16 + (W) + x) } \
}; \
highbd_calculate_pred8(dst_x, weighted_corners_low, \
weighted_corners_high, top_vals, weights_x, \
@@ -585,7 +586,7 @@
const uint16_t *const left_column,
const int height) {
const uint16_t bottom_left = left_column[height - 1];
- const uint16_t *const weights_y = sm_weight_arrays_u16 + height - 4;
+ const uint16_t *const weights_y = smooth_weights_u16 + height - 4;
const uint16x4_t top_v = vld1_u16(top_row);
const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
@@ -595,7 +596,7 @@
vmull_n_u16(bottom_left_v, 256 - weights_y[y]);
const uint32x4_t weighted_top =
vmlal_n_u16(weighted_bl, top_v, weights_y[y]);
- vst1_u16(dst, vrshrn_n_u32(weighted_top, SM_WEIGHT_LOG2_SCALE));
+ vst1_u16(dst, vrshrn_n_u32(weighted_top, SMOOTH_WEIGHT_LOG2_SCALE));
dst += stride;
}
@@ -606,7 +607,7 @@
const uint16_t *const left_column,
const int height) {
const uint16_t bottom_left = left_column[height - 1];
- const uint16_t *const weights_y = sm_weight_arrays_u16 + height - 4;
+ const uint16_t *const weights_y = smooth_weights_u16 + height - 4;
const uint16x4_t top_low = vld1_u16(top_row);
const uint16x4_t top_high = vld1_u16(top_row + 4);
@@ -618,11 +619,12 @@
const uint32x4_t weighted_top_low =
vmlal_n_u16(weighted_bl, top_low, weights_y[y]);
- vst1_u16(dst, vrshrn_n_u32(weighted_top_low, SM_WEIGHT_LOG2_SCALE));
+ vst1_u16(dst, vrshrn_n_u32(weighted_top_low, SMOOTH_WEIGHT_LOG2_SCALE));
const uint32x4_t weighted_top_high =
vmlal_n_u16(weighted_bl, top_high, weights_y[y]);
- vst1_u16(dst + 4, vrshrn_n_u32(weighted_top_high, SM_WEIGHT_LOG2_SCALE));
+ vst1_u16(dst + 4,
+ vrshrn_n_u32(weighted_top_high, SMOOTH_WEIGHT_LOG2_SCALE));
dst += stride;
}
}
@@ -649,39 +651,40 @@
#undef HIGHBD_SMOOTH_V_NXM
// For width 16 and above.
-#define HIGHBD_SMOOTH_V_PREDICTOR(W) \
- static void highbd_smooth_v_##W##xh_neon( \
- uint16_t *dst, const ptrdiff_t stride, const uint16_t *const top_row, \
- const uint16_t *const left_column, const int height) { \
- const uint16_t bottom_left = left_column[height - 1]; \
- const uint16_t *const weights_y = sm_weight_arrays_u16 + height - 4; \
- \
- uint16x4x2_t top_vals[(W) >> 3]; \
- for (int i = 0; i<(W)>> 3; ++i) { \
- const int x = i << 3; \
- top_vals[i].val[0] = vld1_u16(top_row + x); \
- top_vals[i].val[1] = vld1_u16(top_row + x + 4); \
- } \
- \
- const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left); \
- for (int y = 0; y < height; ++y) { \
- const uint32x4_t weighted_bl = \
- vmull_n_u16(bottom_left_v, 256 - weights_y[y]); \
- \
- uint16_t *dst_x = dst; \
- for (int i = 0; i<(W)>> 3; ++i) { \
- const uint32x4_t weighted_top_low = \
- vmlal_n_u16(weighted_bl, top_vals[i].val[0], weights_y[y]); \
- vst1_u16(dst_x, vrshrn_n_u32(weighted_top_low, SM_WEIGHT_LOG2_SCALE)); \
- \
- const uint32x4_t weighted_top_high = \
- vmlal_n_u16(weighted_bl, top_vals[i].val[1], weights_y[y]); \
- vst1_u16(dst_x + 4, \
- vrshrn_n_u32(weighted_top_high, SM_WEIGHT_LOG2_SCALE)); \
- dst_x += 8; \
- } \
- dst += stride; \
- } \
+#define HIGHBD_SMOOTH_V_PREDICTOR(W) \
+ static void highbd_smooth_v_##W##xh_neon( \
+ uint16_t *dst, const ptrdiff_t stride, const uint16_t *const top_row, \
+ const uint16_t *const left_column, const int height) { \
+ const uint16_t bottom_left = left_column[height - 1]; \
+ const uint16_t *const weights_y = smooth_weights_u16 + height - 4; \
+ \
+ uint16x4x2_t top_vals[(W) >> 3]; \
+ for (int i = 0; i<(W)>> 3; ++i) { \
+ const int x = i << 3; \
+ top_vals[i].val[0] = vld1_u16(top_row + x); \
+ top_vals[i].val[1] = vld1_u16(top_row + x + 4); \
+ } \
+ \
+ const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left); \
+ for (int y = 0; y < height; ++y) { \
+ const uint32x4_t weighted_bl = \
+ vmull_n_u16(bottom_left_v, 256 - weights_y[y]); \
+ \
+ uint16_t *dst_x = dst; \
+ for (int i = 0; i<(W)>> 3; ++i) { \
+ const uint32x4_t weighted_top_low = \
+ vmlal_n_u16(weighted_bl, top_vals[i].val[0], weights_y[y]); \
+ vst1_u16(dst_x, \
+ vrshrn_n_u32(weighted_top_low, SMOOTH_WEIGHT_LOG2_SCALE)); \
+ \
+ const uint32x4_t weighted_top_high = \
+ vmlal_n_u16(weighted_bl, top_vals[i].val[1], weights_y[y]); \
+ vst1_u16(dst_x + 4, \
+ vrshrn_n_u32(weighted_top_high, SMOOTH_WEIGHT_LOG2_SCALE)); \
+ dst_x += 8; \
+ } \
+ dst += stride; \
+ } \
}
HIGHBD_SMOOTH_V_PREDICTOR(16)
@@ -722,14 +725,14 @@
const int height) {
const uint16_t top_right = top_row[3];
- const uint16x4_t weights_x = vld1_u16(sm_weight_arrays_u16);
+ const uint16x4_t weights_x = vld1_u16(smooth_weights_u16);
const uint16x4_t scaled_weights_x = negate_s8(weights_x);
const uint32x4_t weighted_tr = vmull_n_u16(scaled_weights_x, top_right);
for (int y = 0; y < height; ++y) {
const uint32x4_t weighted_left =
vmlal_n_u16(weighted_tr, weights_x, left_column[y]);
- vst1_u16(dst, vrshrn_n_u32(weighted_left, SM_WEIGHT_LOG2_SCALE));
+ vst1_u16(dst, vrshrn_n_u32(weighted_left, SMOOTH_WEIGHT_LOG2_SCALE));
dst += stride;
}
}
@@ -740,8 +743,8 @@
const int height) {
const uint16_t top_right = top_row[7];
- const uint16x4x2_t weights_x = { { vld1_u16(sm_weight_arrays_u16 + 4),
- vld1_u16(sm_weight_arrays_u16 + 8) } };
+ const uint16x4x2_t weights_x = { { vld1_u16(smooth_weights_u16 + 4),
+ vld1_u16(smooth_weights_u16 + 8) } };
const uint32x4_t weighted_tr_low =
vmull_n_u16(negate_s8(weights_x.val[0]), top_right);
@@ -752,11 +755,12 @@
const uint16_t left_y = left_column[y];
const uint32x4_t weighted_left_low =
vmlal_n_u16(weighted_tr_low, weights_x.val[0], left_y);
- vst1_u16(dst, vrshrn_n_u32(weighted_left_low, SM_WEIGHT_LOG2_SCALE));
+ vst1_u16(dst, vrshrn_n_u32(weighted_left_low, SMOOTH_WEIGHT_LOG2_SCALE));
const uint32x4_t weighted_left_high =
vmlal_n_u16(weighted_tr_high, weights_x.val[1], left_y);
- vst1_u16(dst + 4, vrshrn_n_u32(weighted_left_high, SM_WEIGHT_LOG2_SCALE));
+ vst1_u16(dst + 4,
+ vrshrn_n_u32(weighted_left_high, SMOOTH_WEIGHT_LOG2_SCALE));
dst += stride;
}
}
@@ -783,43 +787,43 @@
#undef HIGHBD_SMOOTH_H_NXM
// For width 16 and above.
-#define HIGHBD_SMOOTH_H_PREDICTOR(W) \
- void highbd_smooth_h_##W##xh_neon( \
- uint16_t *dst, ptrdiff_t stride, const uint16_t *const top_row, \
- const uint16_t *const left_column, const int height) { \
- const uint16_t top_right = top_row[(W)-1]; \
- \
- uint16x4_t weights_x_low[(W) >> 3]; \
- uint16x4_t weights_x_high[(W) >> 3]; \
- uint32x4_t weighted_tr_low[(W) >> 3]; \
- uint32x4_t weighted_tr_high[(W) >> 3]; \
- for (int i = 0; i<(W)>> 3; ++i) { \
- const int x = i << 3; \
- weights_x_low[i] = vld1_u16(sm_weight_arrays_u16 + (W)-4 + x); \
- weighted_tr_low[i] = \
- vmull_n_u16(negate_s8(weights_x_low[i]), top_right); \
- weights_x_high[i] = vld1_u16(sm_weight_arrays_u16 + (W) + x); \
- weighted_tr_high[i] = \
- vmull_n_u16(negate_s8(weights_x_high[i]), top_right); \
- } \
- \
- for (int y = 0; y < height; ++y) { \
- uint16_t *dst_x = dst; \
- const uint16_t left_y = left_column[y]; \
- for (int i = 0; i<(W)>> 3; ++i) { \
- const uint32x4_t weighted_left_low = \
- vmlal_n_u16(weighted_tr_low[i], weights_x_low[i], left_y); \
- vst1_u16(dst_x, \
- vrshrn_n_u32(weighted_left_low, SM_WEIGHT_LOG2_SCALE)); \
- \
- const uint32x4_t weighted_left_high = \
- vmlal_n_u16(weighted_tr_high[i], weights_x_high[i], left_y); \
- vst1_u16(dst_x + 4, \
- vrshrn_n_u32(weighted_left_high, SM_WEIGHT_LOG2_SCALE)); \
- dst_x += 8; \
- } \
- dst += stride; \
- } \
+#define HIGHBD_SMOOTH_H_PREDICTOR(W) \
+ void highbd_smooth_h_##W##xh_neon( \
+ uint16_t *dst, ptrdiff_t stride, const uint16_t *const top_row, \
+ const uint16_t *const left_column, const int height) { \
+ const uint16_t top_right = top_row[(W)-1]; \
+ \
+ uint16x4_t weights_x_low[(W) >> 3]; \
+ uint16x4_t weights_x_high[(W) >> 3]; \
+ uint32x4_t weighted_tr_low[(W) >> 3]; \
+ uint32x4_t weighted_tr_high[(W) >> 3]; \
+ for (int i = 0; i<(W)>> 3; ++i) { \
+ const int x = i << 3; \
+ weights_x_low[i] = vld1_u16(smooth_weights_u16 + (W)-4 + x); \
+ weighted_tr_low[i] = \
+ vmull_n_u16(negate_s8(weights_x_low[i]), top_right); \
+ weights_x_high[i] = vld1_u16(smooth_weights_u16 + (W) + x); \
+ weighted_tr_high[i] = \
+ vmull_n_u16(negate_s8(weights_x_high[i]), top_right); \
+ } \
+ \
+ for (int y = 0; y < height; ++y) { \
+ uint16_t *dst_x = dst; \
+ const uint16_t left_y = left_column[y]; \
+ for (int i = 0; i<(W)>> 3; ++i) { \
+ const uint32x4_t weighted_left_low = \
+ vmlal_n_u16(weighted_tr_low[i], weights_x_low[i], left_y); \
+ vst1_u16(dst_x, \
+ vrshrn_n_u32(weighted_left_low, SMOOTH_WEIGHT_LOG2_SCALE)); \
+ \
+ const uint32x4_t weighted_left_high = \
+ vmlal_n_u16(weighted_tr_high[i], weights_x_high[i], left_y); \
+ vst1_u16(dst_x + 4, \
+ vrshrn_n_u32(weighted_left_high, SMOOTH_WEIGHT_LOG2_SCALE)); \
+ dst_x += 8; \
+ } \
+ dst += stride; \
+ } \
}
HIGHBD_SMOOTH_H_PREDICTOR(16)
diff --git a/aom_dsp/arm/intrapred_neon.c b/aom_dsp/arm/intrapred_neon.c
index 8be9716..d439a47 100644
--- a/aom_dsp/arm/intrapred_neon.c
+++ b/aom_dsp/arm/intrapred_neon.c
@@ -2684,8 +2684,8 @@
// weight_w[0]: weights_w and scale - weights_w interleave vector
static INLINE void load_weight_w4(int height, uint16x8_t *weight_h,
uint16x8_t *weight_w) {
- const uint16x8_t d = vdupq_n_u16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
- const uint8x8_t t = vcreate_u8(((const uint32_t *)(sm_weight_arrays))[0]);
+ const uint16x8_t d = vdupq_n_u16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
+ const uint8x8_t t = vcreate_u8(((const uint32_t *)smooth_weights)[0]);
weight_h[0] = vmovl_u8(t);
weight_h[1] = vsubw_u8(d, t);
#if defined(__aarch64__)
@@ -2695,12 +2695,12 @@
#endif // (__aarch64__)
if (height == 8) {
- const uint8x8_t weight = vld1_u8(&sm_weight_arrays[4]);
+ const uint8x8_t weight = vld1_u8(&smooth_weights[4]);
weight_h[0] = vmovl_u8(weight);
weight_h[1] = vsubw_u8(d, weight);
} else if (height == 16) {
const uint8x16_t zero = vdupq_n_u8(0);
- const uint8x16_t weight = vld1q_u8(&sm_weight_arrays[12]);
+ const uint8x16_t weight = vld1q_u8(&smooth_weights[12]);
const uint8x16x2_t weight_h_02 = vzipq_u8(weight, zero);
weight_h[0] = vreinterpretq_u16_u8(weight_h_02.val[0]);
weight_h[1] = vsubq_u16(d, vreinterpretq_u16_u8(weight_h_02.val[0]));
@@ -2853,7 +2853,7 @@
uint16x8_t *weight_w) {
const uint8x16_t zero = vdupq_n_u8(0);
const int we_offset = height < 8 ? 0 : 4;
- uint8x16_t we = vld1q_u8(&sm_weight_arrays[we_offset]);
+ uint8x16_t we = vld1q_u8(&smooth_weights[we_offset]);
#if defined(__aarch64__)
weight_h[0] = vreinterpretq_u16_u8(vzip1q_u8(we, zero));
#else
@@ -2876,20 +2876,20 @@
}
if (height == 16) {
- we = vld1q_u8(&sm_weight_arrays[12]);
+ we = vld1q_u8(&smooth_weights[12]);
const uint8x16x2_t weight_h_02 = vzipq_u8(we, zero);
weight_h[0] = vreinterpretq_u16_u8(weight_h_02.val[0]);
weight_h[1] = vsubq_u16(d, weight_h[0]);
weight_h[2] = vreinterpretq_u16_u8(weight_h_02.val[1]);
weight_h[3] = vsubq_u16(d, weight_h[2]);
} else if (height == 32) {
- const uint8x16_t weight_lo = vld1q_u8(&sm_weight_arrays[28]);
+ const uint8x16_t weight_lo = vld1q_u8(&smooth_weights[28]);
const uint8x16x2_t weight_h_02 = vzipq_u8(weight_lo, zero);
weight_h[0] = vreinterpretq_u16_u8(weight_h_02.val[0]);
weight_h[1] = vsubq_u16(d, weight_h[0]);
weight_h[2] = vreinterpretq_u16_u8(weight_h_02.val[1]);
weight_h[3] = vsubq_u16(d, weight_h[2]);
- const uint8x16_t weight_hi = vld1q_u8(&sm_weight_arrays[28 + 16]);
+ const uint8x16_t weight_hi = vld1q_u8(&smooth_weights[28 + 16]);
const uint8x16x2_t weight_h_46 = vzipq_u8(weight_hi, zero);
weight_h[4] = vreinterpretq_u16_u8(weight_h_46.val[0]);
weight_h[5] = vsubq_u16(d, weight_h[4]);
@@ -3020,8 +3020,8 @@
const uint8_t *above,
const uint8_t *left, uint32_t bw,
uint32_t bh) {
- const uint8_t *const sm_weights_w = sm_weight_arrays + bw - 4;
- const uint8_t *const sm_weights_h = sm_weight_arrays + bh - 4;
+ const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
+ const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
const uint16x8_t scale_value = vdupq_n_u16(256);
for (uint32_t y = 0; y < bh; ++y) {
diff --git a/aom_dsp/intrapred.c b/aom_dsp/intrapred.c
index c2c03e6..00396c8 100644
--- a/aom_dsp/intrapred.c
+++ b/aom_dsp/intrapred.c
@@ -86,11 +86,11 @@
const uint8_t *left) {
const uint8_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
const uint8_t right_pred = above[bw - 1]; // estimated by top-right pixel
- const uint8_t *const sm_weights_w = sm_weight_arrays + bw - 4;
- const uint8_t *const sm_weights_h = sm_weight_arrays + bh - 4;
- // scale = 2 * 2^SM_WEIGHT_LOG2_SCALE
- const int log2_scale = 1 + SM_WEIGHT_LOG2_SCALE;
- const uint16_t scale = (1 << SM_WEIGHT_LOG2_SCALE);
+ const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
+ const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
+ // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE
+ const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE;
+ const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
log2_scale + sizeof(*dst));
int r;
@@ -116,10 +116,10 @@
int bh, const uint8_t *above,
const uint8_t *left) {
const uint8_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
- const uint8_t *const sm_weights = sm_weight_arrays + bh - 4;
- // scale = 2^SM_WEIGHT_LOG2_SCALE
- const int log2_scale = SM_WEIGHT_LOG2_SCALE;
- const uint16_t scale = (1 << SM_WEIGHT_LOG2_SCALE);
+ const uint8_t *const sm_weights = smooth_weights + bh - 4;
+ // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
+ const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
+ const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
sm_weights_sanity_checks(sm_weights, sm_weights, scale,
log2_scale + sizeof(*dst));
@@ -145,10 +145,10 @@
int bh, const uint8_t *above,
const uint8_t *left) {
const uint8_t right_pred = above[bw - 1]; // estimated by top-right pixel
- const uint8_t *const sm_weights = sm_weight_arrays + bw - 4;
- // scale = 2^SM_WEIGHT_LOG2_SCALE
- const int log2_scale = SM_WEIGHT_LOG2_SCALE;
- const uint16_t scale = (1 << SM_WEIGHT_LOG2_SCALE);
+ const uint8_t *const sm_weights = smooth_weights + bw - 4;
+ // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
+ const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
+ const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
sm_weights_sanity_checks(sm_weights, sm_weights, scale,
log2_scale + sizeof(*dst));
@@ -405,11 +405,11 @@
(void)bd;
const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel
- const uint8_t *const sm_weights_w = sm_weight_arrays + bw - 4;
- const uint8_t *const sm_weights_h = sm_weight_arrays + bh - 4;
- // scale = 2 * 2^SM_WEIGHT_LOG2_SCALE
- const int log2_scale = 1 + SM_WEIGHT_LOG2_SCALE;
- const uint16_t scale = (1 << SM_WEIGHT_LOG2_SCALE);
+ const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
+ const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
+ // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE
+ const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE;
+ const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
log2_scale + sizeof(*dst));
int r;
@@ -437,10 +437,10 @@
const uint16_t *left, int bd) {
(void)bd;
const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
- const uint8_t *const sm_weights = sm_weight_arrays + bh - 4;
- // scale = 2^SM_WEIGHT_LOG2_SCALE
- const int log2_scale = SM_WEIGHT_LOG2_SCALE;
- const uint16_t scale = (1 << SM_WEIGHT_LOG2_SCALE);
+ const uint8_t *const sm_weights = smooth_weights + bh - 4;
+ // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
+ const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
+ const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
sm_weights_sanity_checks(sm_weights, sm_weights, scale,
log2_scale + sizeof(*dst));
@@ -468,10 +468,10 @@
const uint16_t *left, int bd) {
(void)bd;
const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel
- const uint8_t *const sm_weights = sm_weight_arrays + bw - 4;
- // scale = 2^SM_WEIGHT_LOG2_SCALE
- const int log2_scale = SM_WEIGHT_LOG2_SCALE;
- const uint16_t scale = (1 << SM_WEIGHT_LOG2_SCALE);
+ const uint8_t *const sm_weights = smooth_weights + bw - 4;
+ // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
+ const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
+ const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
sm_weights_sanity_checks(sm_weights, sm_weights, scale,
log2_scale + sizeof(*dst));
diff --git a/aom_dsp/intrapred_common.h b/aom_dsp/intrapred_common.h
index a872e8d..ac836a5 100644
--- a/aom_dsp/intrapred_common.h
+++ b/aom_dsp/intrapred_common.h
@@ -15,10 +15,10 @@
#include "config/aom_config.h"
// Weights are quadratic from '1' to '1 / block_size', scaled by
-// 2^SM_WEIGHT_LOG2_SCALE.
-#define SM_WEIGHT_LOG2_SCALE 8
+// 2^SMOOTH_WEIGHT_LOG2_SCALE.
+#define SMOOTH_WEIGHT_LOG2_SCALE 8
-static const uint8_t sm_weight_arrays[] = {
+static const uint8_t smooth_weights[] = {
// bs = 4
255, 149, 85, 64,
// bs = 8
@@ -35,7 +35,7 @@
13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4
};
-static const uint16_t sm_weight_arrays_u16[] = {
+static const uint16_t smooth_weights_u16[] = {
// block dimension = 4
255, 149, 85, 64,
// block dimension = 8
diff --git a/aom_dsp/x86/intrapred_ssse3.c b/aom_dsp/x86/intrapred_ssse3.c
index 296a41a..f0bd040 100644
--- a/aom_dsp/x86/intrapred_ssse3.c
+++ b/aom_dsp/x86/intrapred_ssse3.c
@@ -610,20 +610,19 @@
static INLINE void load_weight_w4(int height, __m128i *weight_h,
__m128i *weight_w) {
const __m128i zero = _mm_setzero_si128();
- const __m128i d = _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
- const __m128i t = _mm_cvtsi32_si128(((const uint32_t *)sm_weight_arrays)[0]);
+ const __m128i d = _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
+ const __m128i t = _mm_cvtsi32_si128(((const uint32_t *)smooth_weights)[0]);
weight_h[0] = _mm_unpacklo_epi8(t, zero);
weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
weight_w[0] = _mm_unpacklo_epi16(weight_h[0], weight_h[1]);
if (height == 8) {
- const __m128i weight =
- _mm_loadl_epi64((const __m128i *)&sm_weight_arrays[4]);
+ const __m128i weight = _mm_loadl_epi64((const __m128i *)&smooth_weights[4]);
weight_h[0] = _mm_unpacklo_epi8(weight, zero);
weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
} else if (height == 16) {
const __m128i weight =
- _mm_loadu_si128((const __m128i *)&sm_weight_arrays[12]);
+ _mm_loadu_si128((const __m128i *)&smooth_weights[12]);
weight_h[0] = _mm_unpacklo_epi8(weight, zero);
weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
weight_h[2] = _mm_unpackhi_epi8(weight, zero);
@@ -634,7 +633,7 @@
static INLINE void smooth_pred_4xh(const __m128i *pixel, const __m128i *wh,
const __m128i *ww, int h, uint8_t *dst,
ptrdiff_t stride, int second_half) {
- const __m128i round = _mm_set1_epi32((1 << SM_WEIGHT_LOG2_SCALE));
+ const __m128i round = _mm_set1_epi32((1 << SMOOTH_WEIGHT_LOG2_SCALE));
const __m128i one = _mm_set1_epi16(1);
const __m128i inc = _mm_set1_epi16(0x202);
const __m128i gat = _mm_set1_epi32(0xc080400);
@@ -654,7 +653,7 @@
sum = _mm_add_epi32(s, sum);
sum = _mm_add_epi32(sum, round);
- sum = _mm_srai_epi32(sum, 1 + SM_WEIGHT_LOG2_SCALE);
+ sum = _mm_srai_epi32(sum, 1 + SMOOTH_WEIGHT_LOG2_SCALE);
sum = _mm_shuffle_epi8(sum, gat);
*(uint32_t *)dst = _mm_cvtsi128_si32(sum);
@@ -749,9 +748,9 @@
__m128i *weight_w) {
const __m128i zero = _mm_setzero_si128();
const int we_offset = height < 8 ? 0 : 4;
- __m128i we = _mm_loadu_si128((const __m128i *)&sm_weight_arrays[we_offset]);
+ __m128i we = _mm_loadu_si128((const __m128i *)&smooth_weights[we_offset]);
weight_h[0] = _mm_unpacklo_epi8(we, zero);
- const __m128i d = _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+ const __m128i d = _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
if (height == 4) {
@@ -766,20 +765,20 @@
}
if (height == 16) {
- we = _mm_loadu_si128((const __m128i *)&sm_weight_arrays[12]);
+ we = _mm_loadu_si128((const __m128i *)&smooth_weights[12]);
weight_h[0] = _mm_unpacklo_epi8(we, zero);
weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
weight_h[2] = _mm_unpackhi_epi8(we, zero);
weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
} else if (height == 32) {
const __m128i weight_lo =
- _mm_loadu_si128((const __m128i *)&sm_weight_arrays[28]);
+ _mm_loadu_si128((const __m128i *)&smooth_weights[28]);
weight_h[0] = _mm_unpacklo_epi8(weight_lo, zero);
weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
weight_h[2] = _mm_unpackhi_epi8(weight_lo, zero);
weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
const __m128i weight_hi =
- _mm_loadu_si128((const __m128i *)&sm_weight_arrays[28 + 16]);
+ _mm_loadu_si128((const __m128i *)&smooth_weights[28 + 16]);
weight_h[4] = _mm_unpacklo_epi8(weight_hi, zero);
weight_h[5] = _mm_sub_epi16(d, weight_h[4]);
weight_h[6] = _mm_unpackhi_epi8(weight_hi, zero);
@@ -790,7 +789,7 @@
static INLINE void smooth_pred_8xh(const __m128i *pixels, const __m128i *wh,
const __m128i *ww, int h, uint8_t *dst,
ptrdiff_t stride, int second_half) {
- const __m128i round = _mm_set1_epi32((1 << SM_WEIGHT_LOG2_SCALE));
+ const __m128i round = _mm_set1_epi32((1 << SMOOTH_WEIGHT_LOG2_SCALE));
const __m128i one = _mm_set1_epi16(1);
const __m128i inc = _mm_set1_epi16(0x202);
const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
@@ -814,11 +813,11 @@
s0 = _mm_add_epi32(s0, sum0);
s0 = _mm_add_epi32(s0, round);
- s0 = _mm_srai_epi32(s0, 1 + SM_WEIGHT_LOG2_SCALE);
+ s0 = _mm_srai_epi32(s0, 1 + SMOOTH_WEIGHT_LOG2_SCALE);
s1 = _mm_add_epi32(s1, sum1);
s1 = _mm_add_epi32(s1, round);
- s1 = _mm_srai_epi32(s1, 1 + SM_WEIGHT_LOG2_SCALE);
+ s1 = _mm_srai_epi32(s1, 1 + SMOOTH_WEIGHT_LOG2_SCALE);
sum0 = _mm_packus_epi16(s0, s1);
sum0 = _mm_shuffle_epi8(sum0, gat);
@@ -888,17 +887,18 @@
const uint8_t *above,
const uint8_t *left, uint32_t bw,
uint32_t bh) {
- const uint8_t *const sm_weights_w = sm_weight_arrays + bw - 4;
- const uint8_t *const sm_weights_h = sm_weight_arrays + bh - 4;
+ const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
+ const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
const __m128i zero = _mm_setzero_si128();
const __m128i scale_value =
- _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+ _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
const __m128i bottom_left = _mm_cvtsi32_si128((uint32_t)left[bh - 1]);
const __m128i dup16 = _mm_set1_epi32(0x01000100);
const __m128i top_right =
_mm_shuffle_epi8(_mm_cvtsi32_si128((uint32_t)above[bw - 1]), dup16);
const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
- const __m128i round = _mm_set1_epi32((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+ const __m128i round =
+ _mm_set1_epi32((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
for (uint32_t y = 0; y < bh; ++y) {
const __m128i weights_y = _mm_cvtsi32_si128((uint32_t)sm_weights_h[y]);
@@ -933,8 +933,8 @@
pred_lo = _mm_add_epi32(pred_lo, swxtr_lo);
pred_hi = _mm_add_epi32(pred_hi, swxtr_hi);
- pred_lo = _mm_srai_epi32(pred_lo, (1 + SM_WEIGHT_LOG2_SCALE));
- pred_hi = _mm_srai_epi32(pred_hi, (1 + SM_WEIGHT_LOG2_SCALE));
+ pred_lo = _mm_srai_epi32(pred_lo, (1 + SMOOTH_WEIGHT_LOG2_SCALE));
+ pred_hi = _mm_srai_epi32(pred_hi, (1 + SMOOTH_WEIGHT_LOG2_SCALE));
__m128i pred = _mm_packus_epi16(pred_lo, pred_hi);
pred = _mm_shuffle_epi8(pred, gat);
@@ -1033,21 +1033,20 @@
// weights[1]: scale - weights_h vector
static INLINE void load_weight_v_w4(int height, __m128i *weights) {
const __m128i zero = _mm_setzero_si128();
- const __m128i d = _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+ const __m128i d = _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
if (height == 4) {
const __m128i weight =
- _mm_cvtsi32_si128(((const uint32_t *)sm_weight_arrays)[0]);
+ _mm_cvtsi32_si128(((const uint32_t *)smooth_weights)[0]);
weights[0] = _mm_unpacklo_epi8(weight, zero);
weights[1] = _mm_sub_epi16(d, weights[0]);
} else if (height == 8) {
- const __m128i weight =
- _mm_loadl_epi64((const __m128i *)&sm_weight_arrays[4]);
+ const __m128i weight = _mm_loadl_epi64((const __m128i *)&smooth_weights[4]);
weights[0] = _mm_unpacklo_epi8(weight, zero);
weights[1] = _mm_sub_epi16(d, weights[0]);
} else {
const __m128i weight =
- _mm_loadu_si128((const __m128i *)&sm_weight_arrays[12]);
+ _mm_loadu_si128((const __m128i *)&smooth_weights[12]);
weights[0] = _mm_unpacklo_epi8(weight, zero);
weights[1] = _mm_sub_epi16(d, weights[0]);
weights[2] = _mm_unpackhi_epi8(weight, zero);
@@ -1058,7 +1057,8 @@
static INLINE void smooth_v_pred_4xh(const __m128i *pixel,
const __m128i *weight, int h, uint8_t *dst,
ptrdiff_t stride) {
- const __m128i pred_round = _mm_set1_epi32((1 << (SM_WEIGHT_LOG2_SCALE - 1)));
+ const __m128i pred_round =
+ _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
const __m128i inc = _mm_set1_epi16(0x202);
const __m128i gat = _mm_set1_epi32(0xc080400);
__m128i d = _mm_set1_epi16(0x100);
@@ -1069,7 +1069,7 @@
const __m128i wh_sc = _mm_unpacklo_epi16(wg_wg, sc_sc);
__m128i sum = _mm_madd_epi16(pixel[0], wh_sc);
sum = _mm_add_epi32(sum, pred_round);
- sum = _mm_srai_epi32(sum, SM_WEIGHT_LOG2_SCALE);
+ sum = _mm_srai_epi32(sum, SMOOTH_WEIGHT_LOG2_SCALE);
sum = _mm_shuffle_epi8(sum, gat);
*(uint32_t *)dst = _mm_cvtsi128_si32(sum);
dst += stride;
@@ -1137,30 +1137,30 @@
// weight_h[7]: same as [1], offset 24
static INLINE void load_weight_v_w8(int height, __m128i *weight_h) {
const __m128i zero = _mm_setzero_si128();
- const __m128i d = _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+ const __m128i d = _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
if (height < 16) {
const int offset = height < 8 ? 0 : 4;
const __m128i weight =
- _mm_loadu_si128((const __m128i *)&sm_weight_arrays[offset]);
+ _mm_loadu_si128((const __m128i *)&smooth_weights[offset]);
weight_h[0] = _mm_unpacklo_epi8(weight, zero);
weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
} else if (height == 16) {
const __m128i weight =
- _mm_loadu_si128((const __m128i *)&sm_weight_arrays[12]);
+ _mm_loadu_si128((const __m128i *)&smooth_weights[12]);
weight_h[0] = _mm_unpacklo_epi8(weight, zero);
weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
weight_h[2] = _mm_unpackhi_epi8(weight, zero);
weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
} else {
const __m128i weight_lo =
- _mm_loadu_si128((const __m128i *)&sm_weight_arrays[28]);
+ _mm_loadu_si128((const __m128i *)&smooth_weights[28]);
weight_h[0] = _mm_unpacklo_epi8(weight_lo, zero);
weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
weight_h[2] = _mm_unpackhi_epi8(weight_lo, zero);
weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
const __m128i weight_hi =
- _mm_loadu_si128((const __m128i *)&sm_weight_arrays[28 + 16]);
+ _mm_loadu_si128((const __m128i *)&smooth_weights[28 + 16]);
weight_h[4] = _mm_unpacklo_epi8(weight_hi, zero);
weight_h[5] = _mm_sub_epi16(d, weight_h[4]);
weight_h[6] = _mm_unpackhi_epi8(weight_hi, zero);
@@ -1170,7 +1170,8 @@
static INLINE void smooth_v_pred_8xh(const __m128i *pixels, const __m128i *wh,
int h, uint8_t *dst, ptrdiff_t stride) {
- const __m128i pred_round = _mm_set1_epi32((1 << (SM_WEIGHT_LOG2_SCALE - 1)));
+ const __m128i pred_round =
+ _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
const __m128i inc = _mm_set1_epi16(0x202);
const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
__m128i d = _mm_set1_epi16(0x100);
@@ -1183,10 +1184,10 @@
__m128i s1 = _mm_madd_epi16(pixels[1], wh_sc);
s0 = _mm_add_epi32(s0, pred_round);
- s0 = _mm_srai_epi32(s0, SM_WEIGHT_LOG2_SCALE);
+ s0 = _mm_srai_epi32(s0, SMOOTH_WEIGHT_LOG2_SCALE);
s1 = _mm_add_epi32(s1, pred_round);
- s1 = _mm_srai_epi32(s1, SM_WEIGHT_LOG2_SCALE);
+ s1 = _mm_srai_epi32(s1, SMOOTH_WEIGHT_LOG2_SCALE);
__m128i sum01 = _mm_packus_epi16(s0, s1);
sum01 = _mm_shuffle_epi8(sum01, gat);
@@ -1257,16 +1258,16 @@
const uint8_t *above,
const uint8_t *left, uint32_t bw,
uint32_t bh) {
- const uint8_t *const sm_weights_h = sm_weight_arrays + bh - 4;
+ const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
const __m128i zero = _mm_setzero_si128();
const __m128i scale_value =
- _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+ _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
const __m128i dup16 = _mm_set1_epi32(0x01000100);
const __m128i bottom_left =
_mm_shuffle_epi8(_mm_cvtsi32_si128((uint32_t)left[bh - 1]), dup16);
const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
const __m128i round =
- _mm_set1_epi32((uint16_t)(1 << (SM_WEIGHT_LOG2_SCALE - 1)));
+ _mm_set1_epi32((uint16_t)(1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
for (uint32_t y = 0; y < bh; ++y) {
const __m128i weights_y = _mm_cvtsi32_si128((uint32_t)sm_weights_h[y]);
@@ -1287,8 +1288,8 @@
pred_lo = _mm_add_epi32(pred_lo, round);
pred_hi = _mm_add_epi32(pred_hi, round);
- pred_lo = _mm_srai_epi32(pred_lo, SM_WEIGHT_LOG2_SCALE);
- pred_hi = _mm_srai_epi32(pred_hi, SM_WEIGHT_LOG2_SCALE);
+ pred_lo = _mm_srai_epi32(pred_lo, SMOOTH_WEIGHT_LOG2_SCALE);
+ pred_hi = _mm_srai_epi32(pred_hi, SMOOTH_WEIGHT_LOG2_SCALE);
__m128i pred = _mm_packus_epi16(pred_lo, pred_hi);
pred = _mm_shuffle_epi8(pred, gat);
@@ -1389,11 +1390,11 @@
// weights[0]: weights_w and scale - weights_w interleave vector
static INLINE void load_weight_h_w4(int height, __m128i *weights) {
(void)height;
- const __m128i t = _mm_loadu_si128((const __m128i *)&sm_weight_arrays[0]);
+ const __m128i t = _mm_loadu_si128((const __m128i *)&smooth_weights[0]);
const __m128i zero = _mm_setzero_si128();
const __m128i weights_0 = _mm_unpacklo_epi8(t, zero);
- const __m128i d = _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+ const __m128i d = _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
const __m128i weights_1 = _mm_sub_epi16(d, weights_0);
weights[0] = _mm_unpacklo_epi16(weights_0, weights_1);
}
@@ -1401,7 +1402,8 @@
static INLINE void smooth_h_pred_4xh(const __m128i *pixel,
const __m128i *weight, int h, uint8_t *dst,
ptrdiff_t stride) {
- const __m128i pred_round = _mm_set1_epi32((1 << (SM_WEIGHT_LOG2_SCALE - 1)));
+ const __m128i pred_round =
+ _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
const __m128i one = _mm_set1_epi16(1);
const __m128i gat = _mm_set1_epi32(0xc080400);
__m128i rep = _mm_set1_epi16((short)0x8000);
@@ -1412,7 +1414,7 @@
__m128i sum = _mm_madd_epi16(b, weight[0]);
sum = _mm_add_epi32(sum, pred_round);
- sum = _mm_srai_epi32(sum, SM_WEIGHT_LOG2_SCALE);
+ sum = _mm_srai_epi32(sum, SMOOTH_WEIGHT_LOG2_SCALE);
sum = _mm_shuffle_epi8(sum, gat);
*(uint32_t *)dst = _mm_cvtsi128_si32(sum);
@@ -1488,8 +1490,8 @@
static INLINE void load_weight_h_w8(int height, __m128i *weight_w) {
(void)height;
const __m128i zero = _mm_setzero_si128();
- const __m128i d = _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
- const __m128i we = _mm_loadu_si128((const __m128i *)&sm_weight_arrays[4]);
+ const __m128i d = _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
+ const __m128i we = _mm_loadu_si128((const __m128i *)&smooth_weights[4]);
const __m128i tmp1 = _mm_unpacklo_epi8(we, zero);
const __m128i tmp2 = _mm_sub_epi16(d, tmp1);
weight_w[0] = _mm_unpacklo_epi16(tmp1, tmp2);
@@ -1499,7 +1501,8 @@
static INLINE void smooth_h_pred_8xh(const __m128i *pixels, const __m128i *ww,
int h, uint8_t *dst, ptrdiff_t stride,
int second_half) {
- const __m128i pred_round = _mm_set1_epi32((1 << (SM_WEIGHT_LOG2_SCALE - 1)));
+ const __m128i pred_round =
+ _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
const __m128i one = _mm_set1_epi16(1);
const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
__m128i rep = second_half ? _mm_set1_epi16((short)0x8008)
@@ -1512,10 +1515,10 @@
__m128i sum1 = _mm_madd_epi16(b, ww[1]);
sum0 = _mm_add_epi32(sum0, pred_round);
- sum0 = _mm_srai_epi32(sum0, SM_WEIGHT_LOG2_SCALE);
+ sum0 = _mm_srai_epi32(sum0, SMOOTH_WEIGHT_LOG2_SCALE);
sum1 = _mm_add_epi32(sum1, pred_round);
- sum1 = _mm_srai_epi32(sum1, SM_WEIGHT_LOG2_SCALE);
+ sum1 = _mm_srai_epi32(sum1, SMOOTH_WEIGHT_LOG2_SCALE);
sum0 = _mm_packus_epi16(sum0, sum1);
sum0 = _mm_shuffle_epi8(sum0, gat);
@@ -1586,13 +1589,14 @@
const uint8_t *above,
const uint8_t *left, uint32_t bw,
uint32_t bh) {
- const uint8_t *const sm_weights_w = sm_weight_arrays + bw - 4;
+ const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
const __m128i zero = _mm_setzero_si128();
const __m128i scale_value =
- _mm_set1_epi16((uint16_t)(1 << SM_WEIGHT_LOG2_SCALE));
+ _mm_set1_epi16((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
const __m128i top_right = _mm_cvtsi32_si128((uint32_t)above[bw - 1]);
const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
- const __m128i pred_round = _mm_set1_epi32((1 << (SM_WEIGHT_LOG2_SCALE - 1)));
+ const __m128i pred_round =
+ _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
for (uint32_t y = 0; y < bh; ++y) {
const __m128i left_y = _mm_cvtsi32_si128((uint32_t)left[y]);
@@ -1612,8 +1616,8 @@
pred_lo = _mm_add_epi32(pred_lo, pred_round);
pred_hi = _mm_add_epi32(pred_hi, pred_round);
- pred_lo = _mm_srai_epi32(pred_lo, SM_WEIGHT_LOG2_SCALE);
- pred_hi = _mm_srai_epi32(pred_hi, SM_WEIGHT_LOG2_SCALE);
+ pred_lo = _mm_srai_epi32(pred_lo, SMOOTH_WEIGHT_LOG2_SCALE);
+ pred_hi = _mm_srai_epi32(pred_hi, SMOOTH_WEIGHT_LOG2_SCALE);
__m128i pred = _mm_packus_epi16(pred_lo, pred_hi);
pred = _mm_shuffle_epi8(pred, gat);