Add 3/4 scaling Neon optimizations
Ported from vp9
Change-Id: I10ef122842f8a489fba6b8b5953b325679ce36e7
diff --git a/av1/common/arm/resize_neon.c b/av1/common/arm/resize_neon.c
index 7169cd4ec..b570e15 100644
--- a/av1/common/arm/resize_neon.c
+++ b/av1/common/arm/resize_neon.c
@@ -428,6 +428,312 @@
} while (x);
}
+static INLINE uint8x8_t scale_filter_bilinear(const uint8x8_t *const s,
+ const uint8x8_t *const coef) {
+ const uint16x8_t h0 = vmull_u8(s[0], coef[0]);
+ const uint16x8_t h1 = vmlal_u8(h0, s[1], coef[1]);
+
+ return vrshrn_n_u16(h1, 7);
+}
+
+// Notes for 4 to 3 scaling:
+//
+// 1. 6 rows are calculated in each horizontal inner loop, so width_hor must be
+// multiple of 6, and no less than w.
+//
+// 2. 8 rows are calculated in each vertical inner loop, so width_ver must be
+// multiple of 8, and no less than w.
+//
+// 3. 8 columns are calculated in each horizontal inner loop for further
+// vertical scaling, so height_hor must be multiple of 8, and no less than
+// 4 * h / 3.
+//
+// 4. 6 columns are calculated in each vertical inner loop, so height_ver must
+// be multiple of 6, and no less than h.
+//
+// 5. The physical location of the last row of the 4 to 3 scaled frame is
+// decided by phase_scaler, and are always less than 1 pixel below the last row
+// of the original image.
+static void scale_plane_4_to_3_bilinear(const uint8_t *src,
+ const int src_stride, uint8_t *dst,
+ const int dst_stride, const int w,
+ const int h, const int phase_scaler,
+ uint8_t *const temp_buffer) {
+ static const int step_q4 = 16 * 4 / 3;
+ const int width_hor = (w + 5) - ((w + 5) % 6);
+ const int stride_hor = width_hor + 2; // store 2 extra pixels
+ const int width_ver = (w + 7) & ~7;
+ // We only need 1 extra row below because there are only 2 bilinear
+ // coefficients.
+ const int height_hor = (4 * h / 3 + 1 + 7) & ~7;
+ const int height_ver = (h + 5) - ((h + 5) % 6);
+ int x, y = height_hor;
+ uint8_t *t = temp_buffer;
+ uint8x8_t s[9], d[8], c[6];
+ const InterpKernel *interp_kernel =
+ (const InterpKernel *)av1_interp_filter_params_list[BILINEAR].filter_ptr;
+ assert(w && h);
+
+ c[0] = vdup_n_u8((uint8_t)interp_kernel[phase_scaler][3]);
+ c[1] = vdup_n_u8((uint8_t)interp_kernel[phase_scaler][4]);
+ c[2] = vdup_n_u8(
+ (uint8_t)interp_kernel[(phase_scaler + 1 * step_q4) & SUBPEL_MASK][3]);
+ c[3] = vdup_n_u8(
+ (uint8_t)interp_kernel[(phase_scaler + 1 * step_q4) & SUBPEL_MASK][4]);
+ c[4] = vdup_n_u8(
+ (uint8_t)interp_kernel[(phase_scaler + 2 * step_q4) & SUBPEL_MASK][3]);
+ c[5] = vdup_n_u8(
+ (uint8_t)interp_kernel[(phase_scaler + 2 * step_q4) & SUBPEL_MASK][4]);
+
+ d[6] = vdup_n_u8(0);
+ d[7] = vdup_n_u8(0);
+
+ // horizontal 6x8
+ do {
+ load_u8_8x8(src, src_stride, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5],
+ &s[6], &s[7]);
+ src += 1;
+ transpose_u8_8x8(&s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]);
+ x = width_hor;
+
+ do {
+ load_u8_8x8(src, src_stride, &s[1], &s[2], &s[3], &s[4], &s[5], &s[6],
+ &s[7], &s[8]);
+ src += 8;
+ transpose_u8_8x8(&s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7], &s[8]);
+
+ // 00 10 20 30 40 50 60 70
+ // 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72
+ // 03 13 23 33 43 53 63 73
+ // 04 14 24 34 44 54 64 74
+ // 05 15 25 35 45 55 65 75
+ d[0] = scale_filter_bilinear(&s[0], &c[0]);
+ d[1] =
+ scale_filter_bilinear(&s[(phase_scaler + 1 * step_q4) >> 4], &c[2]);
+ d[2] =
+ scale_filter_bilinear(&s[(phase_scaler + 2 * step_q4) >> 4], &c[4]);
+ d[3] = scale_filter_bilinear(&s[4], &c[0]);
+ d[4] = scale_filter_bilinear(&s[4 + ((phase_scaler + 1 * step_q4) >> 4)],
+ &c[2]);
+ d[5] = scale_filter_bilinear(&s[4 + ((phase_scaler + 2 * step_q4) >> 4)],
+ &c[4]);
+
+ // 00 01 02 03 04 05 xx xx
+ // 10 11 12 13 14 15 xx xx
+ // 20 21 22 23 24 25 xx xx
+ // 30 31 32 33 34 35 xx xx
+ // 40 41 42 43 44 45 xx xx
+ // 50 51 52 53 54 55 xx xx
+ // 60 61 62 63 64 65 xx xx
+ // 70 71 72 73 74 75 xx xx
+ transpose_u8_8x8(&d[0], &d[1], &d[2], &d[3], &d[4], &d[5], &d[6], &d[7]);
+ // store 2 extra pixels
+ vst1_u8(t + 0 * stride_hor, d[0]);
+ vst1_u8(t + 1 * stride_hor, d[1]);
+ vst1_u8(t + 2 * stride_hor, d[2]);
+ vst1_u8(t + 3 * stride_hor, d[3]);
+ vst1_u8(t + 4 * stride_hor, d[4]);
+ vst1_u8(t + 5 * stride_hor, d[5]);
+ vst1_u8(t + 6 * stride_hor, d[6]);
+ vst1_u8(t + 7 * stride_hor, d[7]);
+
+ s[0] = s[8];
+
+ t += 6;
+ x -= 6;
+ } while (x);
+ src += 8 * src_stride - 4 * width_hor / 3 - 1;
+ t += 7 * stride_hor + 2;
+ y -= 8;
+ } while (y);
+
+ // vertical 8x6
+ x = width_ver;
+ t = temp_buffer;
+ do {
+ load_u8_8x8(t, stride_hor, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6],
+ &s[7]);
+ t += stride_hor;
+ y = height_ver;
+
+ do {
+ load_u8_8x8(t, stride_hor, &s[1], &s[2], &s[3], &s[4], &s[5], &s[6],
+ &s[7], &s[8]);
+ t += 8 * stride_hor;
+
+ d[0] = scale_filter_bilinear(&s[0], &c[0]);
+ d[1] =
+ scale_filter_bilinear(&s[(phase_scaler + 1 * step_q4) >> 4], &c[2]);
+ d[2] =
+ scale_filter_bilinear(&s[(phase_scaler + 2 * step_q4) >> 4], &c[4]);
+ d[3] = scale_filter_bilinear(&s[4], &c[0]);
+ d[4] = scale_filter_bilinear(&s[4 + ((phase_scaler + 1 * step_q4) >> 4)],
+ &c[2]);
+ d[5] = scale_filter_bilinear(&s[4 + ((phase_scaler + 2 * step_q4) >> 4)],
+ &c[4]);
+ vst1_u8(dst + 0 * dst_stride, d[0]);
+ vst1_u8(dst + 1 * dst_stride, d[1]);
+ vst1_u8(dst + 2 * dst_stride, d[2]);
+ vst1_u8(dst + 3 * dst_stride, d[3]);
+ vst1_u8(dst + 4 * dst_stride, d[4]);
+ vst1_u8(dst + 5 * dst_stride, d[5]);
+
+ s[0] = s[8];
+
+ dst += 6 * dst_stride;
+ y -= 6;
+ } while (y);
+ t -= stride_hor * (4 * height_ver / 3 + 1);
+ t += 8;
+ dst -= height_ver * dst_stride;
+ dst += 8;
+ x -= 8;
+ } while (x);
+}
+
+static void scale_plane_4_to_3_general(const uint8_t *src, const int src_stride,
+ uint8_t *dst, const int dst_stride,
+ const int w, const int h,
+ const int16_t *const coef,
+ const int phase_scaler,
+ uint8_t *const temp_buffer) {
+ static const int step_q4 = 16 * 4 / 3;
+ const int width_hor = (w + 5) - ((w + 5) % 6);
+ const int stride_hor = width_hor + 2; // store 2 extra pixels
+ const int width_ver = (w + 7) & ~7;
+ // We need (SUBPEL_TAPS - 1) extra rows: (SUBPEL_TAPS / 2 - 1) extra rows
+ // above and (SUBPEL_TAPS / 2) extra rows below.
+ const int height_hor = (4 * h / 3 + SUBPEL_TAPS - 1 + 7) & ~7;
+ const int height_ver = (h + 5) - ((h + 5) % 6);
+ const int16x8_t filters0 =
+ vld1q_s16(&coef[(phase_scaler + 0 * step_q4) & SUBPEL_MASK]);
+ const int16x8_t filters1 =
+ vld1q_s16(&coef[(phase_scaler + 1 * step_q4) & SUBPEL_MASK]);
+ const int16x8_t filters2 =
+ vld1q_s16(&coef[(phase_scaler + 2 * step_q4) & SUBPEL_MASK]);
+ int x, y = height_hor;
+ uint8_t *t = temp_buffer;
+ uint8x8_t s[15], d[8];
+
+ assert(w && h);
+
+ src -= (SUBPEL_TAPS / 2 - 1) * src_stride + SUBPEL_TAPS / 2;
+ d[6] = vdup_n_u8(0);
+ d[7] = vdup_n_u8(0);
+
+ // horizontal 6x8
+ do {
+ load_u8_8x8(src + 1, src_stride, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5],
+ &s[6], &s[7]);
+ transpose_u8_8x8(&s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]);
+ x = width_hor;
+
+ do {
+ src += 8;
+ load_u8_8x8(src, src_stride, &s[7], &s[8], &s[9], &s[10], &s[11], &s[12],
+ &s[13], &s[14]);
+ transpose_u8_8x8(&s[7], &s[8], &s[9], &s[10], &s[11], &s[12], &s[13],
+ &s[14]);
+
+ // 00 10 20 30 40 50 60 70
+ // 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72
+ // 03 13 23 33 43 53 63 73
+ // 04 14 24 34 44 54 64 74
+ // 05 15 25 35 45 55 65 75
+ d[0] = scale_filter_8(&s[0], filters0);
+ d[1] = scale_filter_8(&s[(phase_scaler + 1 * step_q4) >> 4], filters1);
+ d[2] = scale_filter_8(&s[(phase_scaler + 2 * step_q4) >> 4], filters2);
+ d[3] = scale_filter_8(&s[4], filters0);
+ d[4] =
+ scale_filter_8(&s[4 + ((phase_scaler + 1 * step_q4) >> 4)], filters1);
+ d[5] =
+ scale_filter_8(&s[4 + ((phase_scaler + 2 * step_q4) >> 4)], filters2);
+
+ // 00 01 02 03 04 05 xx xx
+ // 10 11 12 13 14 15 xx xx
+ // 20 21 22 23 24 25 xx xx
+ // 30 31 32 33 34 35 xx xx
+ // 40 41 42 43 44 45 xx xx
+ // 50 51 52 53 54 55 xx xx
+ // 60 61 62 63 64 65 xx xx
+ // 70 71 72 73 74 75 xx xx
+ transpose_u8_8x8(&d[0], &d[1], &d[2], &d[3], &d[4], &d[5], &d[6], &d[7]);
+ // store 2 extra pixels
+ vst1_u8(t + 0 * stride_hor, d[0]);
+ vst1_u8(t + 1 * stride_hor, d[1]);
+ vst1_u8(t + 2 * stride_hor, d[2]);
+ vst1_u8(t + 3 * stride_hor, d[3]);
+ vst1_u8(t + 4 * stride_hor, d[4]);
+ vst1_u8(t + 5 * stride_hor, d[5]);
+ vst1_u8(t + 6 * stride_hor, d[6]);
+ vst1_u8(t + 7 * stride_hor, d[7]);
+
+ s[0] = s[8];
+ s[1] = s[9];
+ s[2] = s[10];
+ s[3] = s[11];
+ s[4] = s[12];
+ s[5] = s[13];
+ s[6] = s[14];
+
+ t += 6;
+ x -= 6;
+ } while (x);
+ src += 8 * src_stride - 4 * width_hor / 3;
+ t += 7 * stride_hor + 2;
+ y -= 8;
+ } while (y);
+
+ // vertical 8x6
+ x = width_ver;
+ t = temp_buffer;
+ do {
+ load_u8_8x8(t, stride_hor, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6],
+ &s[7]);
+ t += 7 * stride_hor;
+ y = height_ver;
+
+ do {
+ load_u8_8x8(t, stride_hor, &s[7], &s[8], &s[9], &s[10], &s[11], &s[12],
+ &s[13], &s[14]);
+ t += 8 * stride_hor;
+
+ d[0] = scale_filter_8(&s[0], filters0);
+ d[1] = scale_filter_8(&s[(phase_scaler + 1 * step_q4) >> 4], filters1);
+ d[2] = scale_filter_8(&s[(phase_scaler + 2 * step_q4) >> 4], filters2);
+ d[3] = scale_filter_8(&s[4], filters0);
+ d[4] =
+ scale_filter_8(&s[4 + ((phase_scaler + 1 * step_q4) >> 4)], filters1);
+ d[5] =
+ scale_filter_8(&s[4 + ((phase_scaler + 2 * step_q4) >> 4)], filters2);
+ vst1_u8(dst + 0 * dst_stride, d[0]);
+ vst1_u8(dst + 1 * dst_stride, d[1]);
+ vst1_u8(dst + 2 * dst_stride, d[2]);
+ vst1_u8(dst + 3 * dst_stride, d[3]);
+ vst1_u8(dst + 4 * dst_stride, d[4]);
+ vst1_u8(dst + 5 * dst_stride, d[5]);
+
+ s[0] = s[8];
+ s[1] = s[9];
+ s[2] = s[10];
+ s[3] = s[11];
+ s[4] = s[12];
+ s[5] = s[13];
+ s[6] = s[14];
+
+ dst += 6 * dst_stride;
+ y -= 6;
+ } while (y);
+ t -= stride_hor * (4 * height_ver / 3 + 7);
+ t += 8;
+ dst -= height_ver * dst_stride;
+ dst += 8;
+ x -= 8;
+ } while (x);
+}
+
void av1_resize_and_extend_frame_neon(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
const InterpFilter filter,
@@ -495,6 +801,27 @@
free(temp_buffer);
}
}
+ } else if (4 * dst_w == 3 * src_w && 4 * dst_h == 3 * src_h) {
+ // 4 to 3
+ const int buffer_stride = (dst_w + 5) - ((dst_w + 5) % 6) + 2;
+ const int buffer_height = (4 * dst_h / 3 + SUBPEL_TAPS - 1 + 7) & ~7;
+ uint8_t *const temp_buffer =
+ (uint8_t *)malloc(buffer_stride * buffer_height);
+ if (temp_buffer) {
+ if (filter == BILINEAR) {
+ scale_plane_4_to_3_bilinear(src->buffers[i], src->strides[is_uv],
+ dst->buffers[i], dst->strides[is_uv],
+ dst_w, dst_h, phase, temp_buffer);
+ } else {
+ const InterpKernel *interp_kernel =
+ (const InterpKernel *)av1_interp_filter_params_list[filter]
+ .filter_ptr;
+ scale_plane_4_to_3_general(src->buffers[i], src->strides[is_uv],
+ dst->buffers[i], dst->strides[is_uv],
+ dst_w, dst_h, interp_kernel[phase], phase,
+ temp_buffer);
+ }
+ }
} else {
av1_resize_plane(src->buffers[i], src_h, src_w, src->strides[is_uv],
dst->buffers[i], dst_h, dst_w, dst->strides[is_uv]);
diff --git a/av1/common/resize.c b/av1/common/resize.c
index f39ca31..74847ba 100644
--- a/av1/common/resize.c
+++ b/av1/common/resize.c
@@ -1341,22 +1341,20 @@
YV12_BUFFER_CONFIG *unscaled,
YV12_BUFFER_CONFIG *scaled,
const InterpFilter filter,
- const int phase) {
+ const int phase,
+ const int use_optimized_scaler) {
const int num_planes = av1_num_planes(cm);
if (cm->width != unscaled->y_crop_width ||
cm->height != unscaled->y_crop_height) {
#if CONFIG_AV1_HIGHBITDEPTH
- if (cm->width <= (unscaled->y_crop_width >> 1) &&
- cm->height <= (unscaled->y_crop_height >> 1) &&
- cm->seq_params.bit_depth == AOM_BITS_8) {
+ if (use_optimized_scaler && cm->seq_params.bit_depth == AOM_BITS_8) {
av1_resize_and_extend_frame(unscaled, scaled, filter, phase, num_planes);
} else {
av1_resize_and_extend_frame_nonnormative(
unscaled, scaled, (int)cm->seq_params.bit_depth, num_planes);
}
#else
- if (cm->width <= (unscaled->y_crop_width >> 1) &&
- cm->height <= (unscaled->y_crop_height >> 1)) {
+ if (use_optimized_scaler) {
av1_resize_and_extend_frame(unscaled, scaled, filter, phase, num_planes);
} else {
av1_resize_and_extend_frame_nonnormative(
diff --git a/av1/common/resize.h b/av1/common/resize.h
index f5c84b0..af6eeb7 100644
--- a/av1/common/resize.h
+++ b/av1/common/resize.h
@@ -71,11 +71,9 @@
const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst);
-YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm,
- YV12_BUFFER_CONFIG *unscaled,
- YV12_BUFFER_CONFIG *scaled,
- const InterpFilter filter,
- const int phase);
+YV12_BUFFER_CONFIG *av1_scale_if_required(
+ AV1_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
+ const InterpFilter filter, const int phase, const int use_optimized_scaler);
void av1_resize_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst, int bd,
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index b9fbd23..63b55bf 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -948,7 +948,7 @@
if (apply_filtering && is_psnr_calc_enabled(cpi)) {
cpi->source =
av1_scale_if_required(cm, source_kf_buffer, &cpi->scaled_source,
- cm->features.interp_filter, 0);
+ cm->features.interp_filter, 0, 0);
cpi->unscaled_source = source_kf_buffer;
}
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 7ae8079..328faeb 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2058,7 +2058,7 @@
aom_clear_system_state();
cpi->source = av1_scale_if_required(cm, unscaled, &cpi->scaled_source,
- filter_scaler, phase_scaler);
+ filter_scaler, phase_scaler, 1);
if (frame_is_intra_only(cm) || resize_pending != 0) {
memset(cpi->consec_zero_mv, 0,
((cm->mi_params.mi_rows * cm->mi_params.mi_cols) >> 2) *
@@ -2068,7 +2068,7 @@
if (cpi->unscaled_last_source != NULL) {
cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
&cpi->scaled_last_source,
- filter_scaler, phase_scaler);
+ filter_scaler, phase_scaler, 1);
}
if (cpi->sf.rt_sf.use_temporal_noise_estimate) {
@@ -2080,7 +2080,7 @@
// use for newmv search, we can avoid scaling here.
if (!frame_is_intra_only(cm) &&
!(cpi->use_svc && cpi->svc.force_zero_mode_spatial_ref))
- av1_scale_references(cpi, filter_scaler, phase_scaler);
+ av1_scale_references(cpi, filter_scaler, phase_scaler, 1);
av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
q_cfg->enable_chroma_deltaq);
@@ -2239,19 +2239,19 @@
}
}
cpi->source = av1_scale_if_required(
- cm, cpi->unscaled_source, &cpi->scaled_source, EIGHTTAP_REGULAR, 0);
+ cm, cpi->unscaled_source, &cpi->scaled_source, EIGHTTAP_REGULAR, 0, 0);
if (cpi->unscaled_last_source != NULL) {
- cpi->last_source =
- av1_scale_if_required(cm, cpi->unscaled_last_source,
- &cpi->scaled_last_source, EIGHTTAP_REGULAR, 0);
+ cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
+ &cpi->scaled_last_source,
+ EIGHTTAP_REGULAR, 0, 0);
}
if (!frame_is_intra_only(cm)) {
if (loop_count > 0) {
release_scaled_references(cpi);
}
- av1_scale_references(cpi, EIGHTTAP_REGULAR, 0);
+ av1_scale_references(cpi, EIGHTTAP_REGULAR, 0, 0);
}
#if CONFIG_TUNE_VMAF
if (oxcf->tuning == AOM_TUNE_VMAF_WITH_PREPROCESSING ||
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index 72cc146..b55acb6 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c
@@ -633,7 +633,7 @@
}
void av1_scale_references(AV1_COMP *cpi, const InterpFilter filter,
- const int phase) {
+ const int phase, const int use_optimized_scaler) {
AV1_COMMON *cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
MV_REFERENCE_FRAME ref_frame;
@@ -692,17 +692,14 @@
"Failed to allocate frame buffer");
}
#if CONFIG_AV1_HIGHBITDEPTH
- if (cm->width <= (ref->y_crop_width >> 1) &&
- cm->height <= (ref->y_crop_height >> 1) &&
- cm->seq_params.bit_depth == AOM_BITS_8)
+ if (use_optimized_scaler && cm->seq_params.bit_depth == AOM_BITS_8)
av1_resize_and_extend_frame(ref, &new_fb->buf, filter, phase,
num_planes);
else
av1_resize_and_extend_frame_nonnormative(
ref, &new_fb->buf, (int)cm->seq_params.bit_depth, num_planes);
#else
- if (cm->width <= (ref->y_crop_width >> 1) &&
- cm->height <= (ref->y_crop_height >> 1))
+ if (use_optimized_scaler)
av1_resize_and_extend_frame(ref, &new_fb->buf, filter, phase,
num_planes);
else
@@ -942,11 +939,11 @@
cpi->source =
av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source,
- cm->features.interp_filter, 0);
+ cm->features.interp_filter, 0, 0);
if (cpi->unscaled_last_source != NULL) {
cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
&cpi->scaled_last_source,
- cm->features.interp_filter, 0);
+ cm->features.interp_filter, 0, 0);
}
av1_setup_frame(cpi);
diff --git a/av1/encoder/encoder_utils.h b/av1/encoder/encoder_utils.h
index 55347ba..4cb480a3 100644
--- a/av1/encoder/encoder_utils.h
+++ b/av1/encoder/encoder_utils.h
@@ -787,7 +787,7 @@
const AV1EncoderConfig *oxcf);
void av1_scale_references(AV1_COMP *cpi, const InterpFilter filter,
- const int phase);
+ const int phase, const int use_optimized_scaler);
void av1_setup_frame(AV1_COMP *cpi);