Optimize filter weight adjustment in YUV strategy.
Use the linear function `sum_square_diff / num_ref_pixels * 3` to
replace the original lookup table. This improves the readability and
also makes the code easier to maintain. Furthermore, along with this
optimization, function `av1_apply_temporal_filter_yonly()` is merged
into function `av1_apply_temporal_filter_yuc()`.
As the linear function after optimization may be slightly different from
the original lookup table (due to the integer cutoff), the performance
may be affected a little bit, yet can be ignored. Under speed-4, NO
clips show performance change in all test sets.
STATS_CHANGED
Change-Id: I2e1aa3d5b161c53babad492b3ffca8778fb007d0
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 0a99f70..3dfc6b5 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -284,7 +284,7 @@
add_proto qw/int av1_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const MV *center_mv";
if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
- add_proto qw/void av1_apply_temporal_filter_yuv/, "const struct yv12_buffer_config *ref_frame, const struct macroblockd *mbd, const BLOCK_SIZE block_size, const int mb_row, const int mb_col, const int strength, const int use_subblock, const int *subblock_filter_weights, const uint8_t *pred, uint32_t *accum, uint16_t *count";
+ add_proto qw/void av1_apply_temporal_filter_yuv/, "const struct yv12_buffer_config *ref_frame, const struct macroblockd *mbd, const BLOCK_SIZE block_size, const int mb_row, const int mb_col, const int num_planes, const int strength, const int use_subblock, const int *subblock_filter_weights, const uint8_t *pred, uint32_t *accum, uint16_t *count";
specialize qw/av1_apply_temporal_filter_yuv sse4_1/;
}
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 46679ed..1abaa13 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -468,41 +468,7 @@
}
}
-// Magic numbers used to adjust the pixel-wise weight used in YUV filtering.
-// For now, it only supports 3x3 window for filtering.
-// The adjustment is performed with following steps:
-// (1) For a particular pixel, compute the sum of squared difference between
-// input frame and prediction in a small window (i.e., 3x3). There are
-// three possible outcomes:
-// (a) If the pixel locates in the middle of the plane, it has 9
-// neighbours (self-included).
-// (b) If the pixel locates on the edge of the plane, it has 6
-// neighbours (self-included).
-// (c) If the pixel locates on the corner of the plane, it has 4
-// neighbours (self-included).
-// (2) For Y-plane, it will also consider the squared difference from U-plane
-// and V-plane at the corresponding position as reference. This leads to
-// 2 more neighbours.
-// (3) For U-plane and V-plane, it will consider the squared difference from
-// Y-plane at the corresponding position (after upsampling) as reference.
-// This leads to 1 more (subsampling = 0) or 4 more (subsampling = 1)
-// neighbours.
-// (4) Find the modifier for adjustment from the lookup table according to
-// number of reference pixels (neighbours) used. From above, the number
-// of neighbours can be 9+2 (11), 6+2 (8), 4+2 (6), 9+1 (10), 6+1 (7),
-// 4+1 (5), 9+4 (13), 6+4 (10), 4+4 (8).
-// TODO(any): Not sure what index 4 and index 9 are for.
-static const uint32_t filter_weight_adjustment_lookup_table_yuv[14] = {
- 0, 0, 0, 0, 49152, 39322, 32768, 28087, 24576, 21846, 19661, 17874, 0, 15124
-};
-// Lookup table for high bit-depth.
-static const uint64_t highbd_filter_weight_adjustment_lookup_table_yuv[14] = {
- 0U, 0U, 0U, 0U, 3221225472U,
- 2576980378U, 2147483648U, 1840700270U, 1610612736U, 1431655766U,
- 1288490189U, 1171354718U, 0U, 991146300U
-};
-
-// Function to adjust the filter weight when applying YUV filter.
+// Function to adjust the filter weight when use YUV strategy.
// Inputs:
// filter_weight: Original filter weight.
// sum_square_diff: Sum of squared difference between input frame and
@@ -513,34 +479,21 @@
// `filter_weight_adjustment_lookup_table_yuv` and
// `highbd_filter_weight_adjustment_lookup_table_yuv`.
// strength: Strength for filter weight adjustment.
-// is_high_bitdepth: Whether apply temporal filter to high bie-depth video.
// Returns:
// Adjusted filter weight which will finally be used for filtering.
static INLINE int adjust_filter_weight_yuv(const int filter_weight,
const uint64_t sum_square_diff,
const int num_ref_pixels,
- const int strength,
- const int is_high_bitdepth) {
- assert(TF_YUV_FILTER_WINDOW_LENGTH == 3);
- assert(num_ref_pixels >= 0 && num_ref_pixels <= 13);
-
- const uint64_t multiplier =
- is_high_bitdepth
- ? highbd_filter_weight_adjustment_lookup_table_yuv[num_ref_pixels]
- : filter_weight_adjustment_lookup_table_yuv[num_ref_pixels];
- assert(multiplier != 0);
-
- const uint32_t max_value = is_high_bitdepth ? UINT32_MAX : UINT16_MAX;
- const int shift = is_high_bitdepth ? 32 : 16;
+ const int strength) {
int modifier =
- (int)((AOMMIN(sum_square_diff, max_value) * multiplier) >> shift);
-
+ (int)(AOMMIN(sum_square_diff * TF_YUV_FILTER_WEIGHT_SCALE, INT32_MAX)) /
+ num_ref_pixels;
const int rounding = (1 << strength) >> 1;
modifier = (modifier + rounding) >> strength;
return (modifier >= 16) ? 0 : (16 - modifier) * filter_weight;
}
-// Applies temporal filter to YUV planes.
+// Applies temporal filter with YUV strategy.
// Inputs:
// frame_to_filter: Pointer to the frame to be filtered, which is used as
// reference to compute squared differece from the predictor.
@@ -549,6 +502,7 @@
// block_size: Size of the block.
// mb_row: Row index of the block in the entire frame.
// mb_col: Column index of the block in the entire frame.
+// num_planes: Number of planes in the frame.
// strength: Strength for filter weight adjustment.
// use_subblock: Whether to use 4 sub-blocks to replace the original block.
// subblock_filter_weights: The filter weights for each sub-block (row-major
@@ -560,14 +514,14 @@
// Returns:
// Nothing will be returned. But the content to which `accum` and `pred`
// point will be modified.
-void av1_apply_temporal_filter_yuv_c(const YV12_BUFFER_CONFIG *frame_to_filter,
- const MACROBLOCKD *mbd,
- const BLOCK_SIZE block_size,
- const int mb_row, const int mb_col,
- const int strength, const int use_subblock,
- const int *subblock_filter_weights,
- const uint8_t *pred, uint32_t *accum,
- uint16_t *count) {
+void av1_apply_temporal_filter_yuv_c(
+ const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd,
+ const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
+ const int num_planes, const int strength, const int use_subblock,
+ const int *subblock_filter_weights, const uint8_t *pred, uint32_t *accum,
+ uint16_t *count) {
+ assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
+
// Block information.
const int mb_height = block_size_high[block_size];
const int mb_width = block_size_wide[block_size];
@@ -575,14 +529,14 @@
const int is_high_bitdepth = is_frame_high_bitdepth(frame_to_filter);
const uint16_t *pred16 = CONVERT_TO_SHORTPTR(pred);
- // Allocate memory for pixel-wise squared differences for Y, U, V planes. All
- // planes, regardless of the subsampling, are assigned with memory of size
- // `mb_pels`.
- uint32_t *square_diff = aom_memalign(16, 3 * mb_pels * sizeof(uint32_t));
- memset(square_diff, 0, 3 * mb_pels * sizeof(square_diff[0]));
+ // Allocate memory for pixel-wise squared differences for all planes. They,
+ // regardless of the subsampling, are assigned with memory of size `mb_pels`.
+ uint32_t *square_diff =
+ aom_memalign(16, num_planes * mb_pels * sizeof(uint32_t));
+ memset(square_diff, 0, num_planes * mb_pels * sizeof(square_diff[0]));
int plane_offset = 0;
- for (int plane = 0; plane < 3; ++plane) {
+ for (int plane = 0; plane < num_planes; ++plane) {
// Locate pixel on reference frame.
const int plane_h = mb_height >> mbd->plane[plane].subsampling_y;
const int plane_w = mb_width >> mbd->plane[plane].subsampling_x;
@@ -599,14 +553,11 @@
assert(TF_YUV_FILTER_WINDOW_LENGTH % 2 == 1);
const int half_window = TF_YUV_FILTER_WINDOW_LENGTH >> 1;
- // Handle Y-plane, U-plane, V-plane in sequence.
+ // Handle planes in sequence.
plane_offset = 0;
- for (int plane = 0; plane < 3; ++plane) {
+ for (int plane = 0; plane < num_planes; ++plane) {
const int subsampling_y = mbd->plane[plane].subsampling_y;
const int subsampling_x = mbd->plane[plane].subsampling_x;
- // Only 0 and 1 are supported for filter weight adjustment.
- assert(subsampling_y == 0 || subsampling_y == 1);
- assert(subsampling_x == 0 || subsampling_x == 1);
const int h = mb_height >> subsampling_y; // Plane height.
const int w = mb_width >> subsampling_x; // Plane width.
@@ -614,10 +565,6 @@
int pred_idx = 0;
for (int i = 0; i < h; ++i) {
for (int j = 0; j < w; ++j) {
- const int subblock_idx =
- use_subblock ? (i >= h / 2) * 2 + (j >= w / 2) : 0;
- const int filter_weight = subblock_filter_weights[subblock_idx];
-
// non-local mean approach
uint64_t sum_square_diff = 0;
int num_ref_pixels = 0;
@@ -634,7 +581,7 @@
}
if (plane == 0) { // Filter Y-plane using both U-plane and V-plane.
- for (int p = 1; p < 3; ++p) {
+ for (int p = 1; p < num_planes; ++p) {
const int ss_y_shift = mbd->plane[p].subsampling_y - subsampling_y;
const int ss_x_shift = mbd->plane[p].subsampling_x - subsampling_x;
const int yy = i >> ss_y_shift; // Y-coord on UV-plane.
@@ -657,11 +604,15 @@
}
}
+ // Base filter weight estimated by motion search error.
+ const int subblock_idx =
+ use_subblock ? (i >= h / 2) * 2 + (j >= w / 2) : 0;
+ const int filter_weight = subblock_filter_weights[subblock_idx];
+
const int idx = plane_offset + pred_idx; // Index with plane shift.
const int pred_value = is_high_bitdepth ? pred16[idx] : pred[idx];
const int adjusted_weight = adjust_filter_weight_yuv(
- filter_weight, sum_square_diff, num_ref_pixels, strength,
- is_high_bitdepth);
+ filter_weight, sum_square_diff, num_ref_pixels, strength);
accum[idx] += adjusted_weight * pred_value;
count[idx] += adjusted_weight;
@@ -674,128 +625,9 @@
aom_free(square_diff);
}
-// Function to adjust the filter weight when applying filter to Y-plane only.
-// Inputs:
-// filter_weight: Original filter weight.
-// sum_square_diff: Sum of squared difference between input frame and
-// prediction. This field is computed pixel by pixel, and
-// is used as a reference for the filter weight adjustment.
-// num_ref_pixels: Number of pixels used to compute the `sum_square_diff`.
-// strength: Strength for filter weight adjustment.
-// Returns:
-// Adjusted filter weight which will finally be used for filtering.
-static INLINE int adjust_filter_weight_yonly(const int filter_weight,
- const uint64_t sum_square_diff,
- const int num_ref_pixels,
- const int strength) {
- assert(TF_YONLY_FILTER_WINDOW_LENGTH == 3);
-
- int modifier = (int)(AOMMIN(sum_square_diff * 3, INT32_MAX));
- modifier /= num_ref_pixels;
-
- const int rounding = (1 << strength) >> 1;
- modifier = (modifier + rounding) >> strength;
- return (modifier >= 16) ? 0 : (16 - modifier) * filter_weight;
-}
-
-// Applies temporal filter to Y-plane ONLY.
-// Different from the function `av1_apply_temporal_filter_yuv_c()`, this
-// function only applies temporal filter to Y-plane. This should be used when
-// the input video frame only has one plane.
-// Inputs:
-// frame_to_filter: Pointer to the frame to be filtered, which is used as
-// reference to compute squared differece from the predictor.
-// mbd: Pointer to the block for filtering, which is ONLY used to get
-// subsampling information of Y-plane.
-// block_size: Size of the block.
-// mb_row: Row index of the block in the entire frame.
-// mb_col: Column index of the block in the entire frame.
-// strength: Strength for filter weight adjustment.
-// use_subblock: Whether to use 4 sub-blocks to replace the original block.
-// subblock_filter_weights: The filter weights for each sub-block (row-major
-// order). If `use_subblock` is set as 0, the first
-// weight will be applied to the entire block.
-// pred: Pointer to the well-built predictors.
-// accum: Pointer to the pixel-wise accumulator for filtering.
-// count: Pointer to the pixel-wise counter fot filtering.
-// Returns:
-// Nothing will be returned. But the content to which `accum` and `pred`
-// point will be modified.
-void av1_apply_temporal_filter_yonly(const YV12_BUFFER_CONFIG *frame_to_filter,
- const MACROBLOCKD *mbd,
- const BLOCK_SIZE block_size,
- const int mb_row, const int mb_col,
- const int strength, const int use_subblock,
- const int *subblock_filter_weights,
- const uint8_t *pred, uint32_t *accum,
- uint16_t *count) {
- // Block information.
- const int mb_height = block_size_high[block_size];
- const int mb_width = block_size_wide[block_size];
- const int mb_pels = mb_height * mb_width;
- const int is_high_bitdepth = is_frame_high_bitdepth(frame_to_filter);
- const uint16_t *pred16 = CONVERT_TO_SHORTPTR(pred);
-
- // Y-plane information.
- const int subsampling_y = mbd->plane[0].subsampling_y;
- const int subsampling_x = mbd->plane[0].subsampling_x;
- const int h = mb_height >> subsampling_y;
- const int w = mb_width >> subsampling_x;
-
- // Pre-compute squared difference before filtering.
- const int frame_stride = frame_to_filter->y_stride;
- const int frame_offset = mb_row * h * frame_stride + mb_col * w;
- const uint8_t *ref = frame_to_filter->y_buffer;
- uint32_t *square_diff = aom_memalign(16, mb_pels * sizeof(uint32_t));
- memset(square_diff, 0, mb_pels * sizeof(square_diff[0]));
- compute_square_diff(ref, frame_offset, frame_stride, pred, 0, w, h, w,
- is_high_bitdepth, square_diff);
-
- // Get window size for pixel-wise filtering.
- assert(TF_YONLY_FILTER_WINDOW_LENGTH % 2 == 1);
- const int half_window = TF_YONLY_FILTER_WINDOW_LENGTH >> 1;
-
- // Perform filtering.
- int idx = 0;
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; ++j) {
- const int subblock_idx =
- use_subblock ? (i >= h / 2) * 2 + (j >= w / 2) : 0;
- const int filter_weight = subblock_filter_weights[subblock_idx];
-
- // non-local mean approach
- uint64_t sum_square_diff = 0;
- int num_ref_pixels = 0;
-
- for (int wi = -half_window; wi <= half_window; ++wi) {
- for (int wj = -half_window; wj <= half_window; ++wj) {
- const int y = i + wi; // Y-coord on the current plane.
- const int x = j + wj; // X-coord on the current plane.
- if (y >= 0 && y < h && x >= 0 && x < w) {
- sum_square_diff += square_diff[y * w + x];
- ++num_ref_pixels;
- }
- }
- }
-
- const int pred_value = is_high_bitdepth ? pred16[idx] : pred[idx];
- const int adjusted_weight = adjust_filter_weight_yonly(
- filter_weight, sum_square_diff, num_ref_pixels, strength);
- accum[idx] += adjusted_weight * pred_value;
- count[idx] += adjusted_weight;
-
- ++idx;
- }
- }
-
- aom_free(square_diff);
-}
-
-// Applies temporal filter plane by plane.
-// Different from the function `av1_apply_temporal_filter_yuv_c()` and the
-// function `av1_apply_temporal_filter_yonly()`, this function applies temporal
-// filter to each plane independently. Besides, the strategy of filter weight
-// adjustment is different from the other two functions.
+// Applies temporal filter with plane-wise strategy.
+// The strategy of filter weight adjustment is different from the function
+// `av1_apply_temporal_filter_yuv_c()`.
// Inputs:
// frame_to_filter: Pointer to the frame to be filtered, which is used as
// reference to compute squared differece from the predictor.
@@ -933,20 +765,17 @@
// mb_row: Row index of the block in the entire frame.
// mb_col: Column index of the block in the entire frame.
// num_planes: Number of planes in the frame.
-// use_planewise_strategy: Whether to use plane-wise temporal filtering
-// strategy. If set as 0, YUV or YONLY filtering will
-// be used (depending on number of planes).
-// strength: Strength for filter weight adjustment. (Used in YUV filtering and
-// YONLY filtering)
+// use_planewise_strategy: Whether to use plane-wise strategy or YUV strategy.
+// strength: Strength for filter weight adjustment. (Used in YUV strategy)
// use_subblock: Whether to use 4 sub-blocks to replace the original block.
-// (Used in YUV filtering and YONLY filtering)
+// (Used in YUV strategy)
// subblock_filter_weights: The filter weights for each sub-block (row-major
// order). If `use_subblock` is set as 0, the first
// weight will be applied to the entire block. (Used
-// in YUV filtering and YONLY filtering)
+// in YUV strategy)
// noise_levels: Pointer to the noise levels of the to-filter frame, estimated
// with each plane (in Y, U, V order). (Used in plane-wise
-// filtering)
+// strategy)
// pred: Pointer to the well-built predictors.
// accum: Pointer to the pixel-wise accumulator for filtering.
// count: Pointer to the pixel-wise counter fot filtering.
@@ -975,16 +804,18 @@
}
} else { // Commonly used for low-resolution video.
const int adj_strength = strength + 2 * (mbd->bd - 8);
- if (num_planes == 1) {
- av1_apply_temporal_filter_yonly(
- frame_to_filter, mbd, block_size, mb_row, mb_col, adj_strength,
- use_subblock, subblock_filter_weights, pred, accum, count);
- } else if (num_planes == 3) {
- av1_apply_temporal_filter_yuv(
- frame_to_filter, mbd, block_size, mb_row, mb_col, adj_strength,
- use_subblock, subblock_filter_weights, pred, accum, count);
+ if (num_planes == 3 && TF_YUV_FILTER_WEIGHT_SCALE == 3) {
+ av1_apply_temporal_filter_yuv(frame_to_filter, mbd, block_size, mb_row,
+ mb_col, num_planes, adj_strength,
+ use_subblock, subblock_filter_weights, pred,
+ accum, count);
} else {
- assert(0 && "Only support Y-plane and YUV-plane modes.");
+ // TODO(any): sse4 version should be changed to align with C function
+ // before using.
+ av1_apply_temporal_filter_yuv_c(frame_to_filter, mbd, block_size, mb_row,
+ mb_col, num_planes, adj_strength,
+ use_subblock, subblock_filter_weights,
+ pred, accum, count);
}
}
}
@@ -1284,8 +1115,8 @@
}
// Estimates the strength for filter weight adjustment, which is used in YUV
-// filtering and YONLY filtering. This estimation is based on the pre-estimated
-// noise level of the to-filter frame.
+// strategy. This estimation is based on the pre-estimated noise level of the
+// to-filter frame.
// Inputs:
// cpi: Pointer to the composed information of input video.
// noise_level: Noise level of the to-filter frame, estimated with Y-plane.
diff --git a/av1/encoder/temporal_filter.h b/av1/encoder/temporal_filter.h
index eb49ce0..ef86496 100644
--- a/av1/encoder/temporal_filter.h
+++ b/av1/encoder/temporal_filter.h
@@ -22,13 +22,11 @@
#define BH 32
#define BW 32
-// Window size for temporal filtering on YUV planes.
+// Window size for YUV temporal filtering.
// This is particually used for function `av1_apply_temporal_filter_yuv()`.
#define TF_YUV_FILTER_WINDOW_LENGTH 3
-
-// Window size for temporal filtering on Y planes.
-// This is particually used for function `av1_apply_temporal_filter_yonly()`.
-#define TF_YONLY_FILTER_WINDOW_LENGTH 3
+// A scale factor used in YUV temporal filtering for weight adjustment.
+#define TF_YUV_FILTER_WEIGHT_SCALE 3
#define TF_ENABLE_PLANEWISE_STRATEGY 1
// Window size for plane-wise temporal filtering.
diff --git a/av1/encoder/x86/temporal_filter_sse4.c b/av1/encoder/x86/temporal_filter_sse4.c
index 5e43e43..e3f9f5f 100644
--- a/av1/encoder/x86/temporal_filter_sse4.c
+++ b/av1/encoder/x86/temporal_filter_sse4.c
@@ -2025,10 +2025,13 @@
void av1_apply_temporal_filter_yuv_sse4_1(
const YV12_BUFFER_CONFIG *ref_frame, const MACROBLOCKD *mbd,
const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
- const int strength, const int use_subblock,
+ const int num_planes, const int strength, const int use_subblock,
const int *subblock_filter_weights, const uint8_t *pred, uint32_t *accum,
uint16_t *count) {
const int is_high_bitdepth = ref_frame->flags & YV12_FLAG_HIGHBITDEPTH;
+ // TODO(any): Need to support when `num_planes != 3`, like C implementation.
+ assert(num_planes == 3);
+ (void)num_planes;
if (is_high_bitdepth) {
highbd_apply_temporal_filter_yuv(
ref_frame, mbd, block_size, mb_row, mb_col, strength, use_subblock,
diff --git a/test/temporal_filter_yuv_test.cc b/test/temporal_filter_yuv_test.cc
index 15ffec3..dc17aaa 100644
--- a/test/temporal_filter_yuv_test.cc
+++ b/test/temporal_filter_yuv_test.cc
@@ -29,8 +29,8 @@
typedef void (*TemporalFilterYUVFunc)(
const YV12_BUFFER_CONFIG *ref_frame, const MACROBLOCKD *mbd,
const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
- const int strength, const int use_subblock, const int *blk_fw,
- const uint8_t *pred, uint32_t *accum, uint16_t *count);
+ const int num_planes, const int strength, const int use_subblock,
+ const int *blk_fw, const uint8_t *pred, uint32_t *accum, uint16_t *count);
struct TemporalFilterWithBd {
TemporalFilterWithBd(TemporalFilterYUVFunc func, int bitdepth)
@@ -414,6 +414,7 @@
assert(block_width == MAX_WIDTH && MAX_WIDTH == 32);
assert(block_height == MAX_HEIGHT && MAX_HEIGHT == 32);
const BLOCK_SIZE block_size = BLOCK_32X32;
+ const int num_planes = 3;
const int mb_pels = MAX_WIDTH * MAX_HEIGHT;
const int mb_row = 0;
const int mb_col = 0;
@@ -455,9 +456,9 @@
memcpy(count + 1 * mb_pels, u_count, mb_pels * sizeof(uint16_t));
memcpy(count + 2 * mb_pels, v_count, mb_pels * sizeof(uint16_t));
- ASM_REGISTER_STATE_CHECK(filter_func_(ref_frame, mbd, block_size, mb_row,
- mb_col, strength, use_subblock, blk_fw,
- pred, accum, count));
+ ASM_REGISTER_STATE_CHECK(
+ filter_func_(ref_frame, mbd, block_size, mb_row, mb_col, num_planes,
+ strength, use_subblock, blk_fw, pred, accum, count));
memcpy(y_accum, accum + 0 * mb_pels, mb_pels * sizeof(uint32_t));
memcpy(u_accum, accum + 1 * mb_pels, mb_pels * sizeof(uint32_t));
@@ -487,6 +488,7 @@
assert(block_width == MAX_WIDTH && MAX_WIDTH == 32);
assert(block_height == MAX_HEIGHT && MAX_HEIGHT == 32);
const BLOCK_SIZE block_size = BLOCK_32X32;
+ const int num_planes = 3;
const int mb_pels = MAX_WIDTH * MAX_HEIGHT;
const int mb_row = 0;
const int mb_col = 0;
@@ -529,9 +531,9 @@
memcpy(count + 2 * mb_pels, v_count, mb_pels * sizeof(uint16_t));
const uint8_t *pred = CONVERT_TO_BYTEPTR(pred16);
- ASM_REGISTER_STATE_CHECK(filter_func_(ref_frame, mbd, block_size, mb_row,
- mb_col, strength, use_subblock, blk_fw,
- pred, accum, count));
+ ASM_REGISTER_STATE_CHECK(
+ filter_func_(ref_frame, mbd, block_size, mb_row, mb_col, num_planes,
+ strength, use_subblock, blk_fw, pred, accum, count));
memcpy(y_accum, accum + 0 * mb_pels, mb_pels * sizeof(uint32_t));
memcpy(u_accum, accum + 1 * mb_pels, mb_pels * sizeof(uint32_t));