Upgrade resampler for scaled output from aomdec
When valid p/q is found for lanczos filters, 2d lanczos resampler
is used instead of using libyuv.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f5bba8b..02e54ca 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -166,6 +166,8 @@
list(
APPEND
AOM_DECODER_APP_UTIL_SOURCES
+ "${AOM_ROOT}/av1/common/lanczos_resample.h"
+ "${AOM_ROOT}/av1/common/lanczos_resample.c"
"${AOM_ROOT}/common/ivfdec.c"
"${AOM_ROOT}/common/ivfdec.h"
"${AOM_ROOT}/common/obudec.c"
diff --git a/apps/aomdec.c b/apps/aomdec.c
index b9f3981..25f1ccd 100644
--- a/apps/aomdec.c
+++ b/apps/aomdec.c
@@ -31,6 +31,7 @@
#include "aom/aomdx.h"
#include "aom_ports/aom_timer.h"
#include "aom_ports/mem_ops.h"
+#include "av1/common/lanczos_resample.h"
#include "common/args.h"
#include "common/ivfdec.h"
#include "common/md5_utils.h"
@@ -120,6 +121,53 @@
&skipfilmgrain, NULL
};
+static INLINE int get_plane_size_i420(int size, int is_uv) {
+ return is_uv ? (size + 1) >> 1 : size;
+ return size;
+}
+
+static INLINE int lanczos_scale(aom_image_t *src, aom_image_t *dst, int bd) {
+ if (src->fmt != dst->fmt ||
+ (src->fmt != AOM_IMG_FMT_I42016 && src->fmt != AOM_IMG_FMT_I420))
+ return -1;
+
+ int scale_q = -1;
+ int scale_p = -1;
+ av1_derive_scale_factor(src->d_w, dst->d_w, &scale_p, &scale_q);
+ if (scale_p <= 0 || scale_q <= 0) return -1;
+
+ int scale_q_h = -1;
+ int scale_p_h = -1;
+ av1_derive_scale_factor(src->d_h, dst->d_h, &scale_p_h, &scale_q_h);
+ if (scale_p != scale_p_h || scale_q != scale_q_h) return -1;
+
+ for (int i = 0; i < 3; ++i) {
+ const int is_uv = (i > 0);
+ const int lanczos_a_hor =
+ is_uv ? LANCZOS_A_NORMATIVE_HOR_C : LANCZOS_A_NORMATIVE_HOR_Y;
+ const int lanczos_a_ver =
+ is_uv ? LANCZOS_A_NORMATIVE_VER_C : LANCZOS_A_NORMATIVE_VER_Y;
+ const int src_h = get_plane_size_i420(src->d_h, is_uv);
+ const int src_w = get_plane_size_i420(src->d_w, is_uv);
+ const int dst_h = get_plane_size_i420(dst->d_h, is_uv);
+ const int dst_w = get_plane_size_i420(dst->d_w, is_uv);
+
+ if (src->fmt == AOM_IMG_FMT_I420) {
+ av1_resample_plane_2d_8b_lanczos(
+ src->planes[i], src_h, src_w, src->stride[i], dst->planes[i], dst_h,
+ dst_w, dst->stride[i], is_uv ? 1 : 0, is_uv ? 1 : 0, bd, scale_q,
+ scale_p, lanczos_a_hor, lanczos_a_ver);
+ } else {
+ av1_resample_plane_2d_lanczos(
+ (uint16_t *)src->planes[i], src_h, src_w, src->stride[i] / 2,
+ (uint16_t *)dst->planes[i], dst_h, dst_w, dst->stride[i] / 2,
+ is_uv ? 1 : 0, is_uv ? 1 : 0, bd, scale_q, scale_p, lanczos_a_hor,
+ lanczos_a_ver);
+ }
+ }
+ return 0;
+}
+
#if CONFIG_LIBYUV
static INLINE int libyuv_scale(aom_image_t *src, aom_image_t *dst,
FilterModeEnum mode) {
@@ -940,18 +988,22 @@
}
if (img->d_w != scaled_img->d_w || img->d_h != scaled_img->d_h) {
+ if (!lanczos_scale(img, scaled_img, img->bit_depth)) {
+ img = scaled_img;
+ } else {
#if CONFIG_LIBYUV
- libyuv_scale(img, scaled_img, kFilterBox);
- img = scaled_img;
+ libyuv_scale(img, scaled_img, kFilterBox);
+ img = scaled_img;
#else
- fprintf(
- stderr,
- "Failed to scale output frame: %s.\n"
- "libyuv is required for scaling but is currently disabled.\n"
- "Be sure to specify -DCONFIG_LIBYUV=1 when running cmake.\n",
- aom_codec_error(&decoder));
- goto fail;
+ fprintf(
+ stderr,
+ "Failed to scale output frame: %s.\n"
+ "libyuv is required for scaling but is currently disabled.\n"
+ "Be sure to specify -DCONFIG_LIBYUV=1 when running cmake.\n",
+ aom_codec_error(&decoder));
+ goto fail;
#endif
+ }
}
}
// Default to codec bit depth if output bit depth not set
diff --git a/av1/common/lanczos_resample.c b/av1/common/lanczos_resample.c
index fa79e45..46cf074 100644
--- a/av1/common/lanczos_resample.c
+++ b/av1/common/lanczos_resample.c
@@ -16,8 +16,9 @@
#include <stdbool.h>
#include <string.h>
#include <assert.h>
+#include <limits.h>
-#include "tools/lanczos/lanczos_resample.h"
+#include "av1/common/lanczos_resample.h"
/* Shift down with rounding for use when n >= 0, value >= 0 */
#define ROUND_POWER_OF_TWO(value, n) (((value) + (((1 << (n)) >> 1))) >> (n))
@@ -166,6 +167,8 @@
}
static void integerize_array(double *x, int len, int bits, int16_t *y) {
+ assert(len <= MAX_FILTER_LEN);
+
int sumy = 0;
for (int i = 0; i < len; ++i) {
y[i] = (int16_t)rint(x[i] * (1 << bits));
@@ -173,7 +176,7 @@
}
while (sumy > (1 << bits)) {
double mx = -65536.0;
- int imx = -1;
+ int imx = 0;
for (int i = 0; i < len; ++i) {
const double v = (double)y[i] - (x[i] * (1 << bits));
if (v > mx) {
@@ -181,12 +184,13 @@
imx = i;
}
}
+ assert(imx >= 0 && imx < len);
y[imx] -= 1;
sumy -= 1;
}
while (sumy < (1 << bits)) {
double mx = 65536.0;
- int imx = -1;
+ int imx = 0;
for (int i = 0; i < len; ++i) {
const double v = (double)y[i] - (x[i] * (1 << bits));
if (v < mx) {
@@ -194,6 +198,7 @@
imx = i;
}
}
+ assert(imx >= 0 && imx < len);
y[imx] += 1;
sumy += 1;
}
@@ -257,6 +262,13 @@
int get_resample_filter(int p, int q, int a, double x0, EXT_TYPE ext_type,
WIN_TYPE win_type, int subsampled, int bits,
RationalResampleFilter *rf) {
+ // Initialization to silence static analysis warnings.
+ for (int phase = 0; phase < MAX_RATIONAL_FACTOR; ++phase) {
+ for (int i = 0; i < MAX_FILTER_LEN; ++i) {
+ rf->filter[phase][i] = 0;
+ }
+ }
+
double offset[MAX_RATIONAL_FACTOR + 1];
int intpel[MAX_RATIONAL_FACTOR];
if (p <= 0 || q <= 0) {
@@ -340,11 +352,18 @@
ClipProfile *clip, int16_t *y, int outlen) {
(void)inlen;
const int tapsby2 = rf->length / 2;
+ assert(tapsby2 * 2 <= MAX_FILTER_LEN);
+ assert(rf->p <= MAX_RATIONAL_FACTOR);
+
const int16_t *xext = x;
xext += rf->start;
for (int i = 0, p = 0; i < outlen; ++i, p = (p + 1) % rf->p) {
int64_t sum = 0;
+
+ assert(p >= 0 && p < MAX_RATIONAL_FACTOR);
for (int j = -tapsby2 + 1; j <= tapsby2; ++j) {
+ assert(j + tapsby2 - 1 >= 0);
+ assert(j + tapsby2 - 1 < MAX_FILTER_LEN);
sum += (int)rf->filter[p][j + tapsby2 - 1] * (int)xext[j];
}
sum = ROUND_POWER_OF_TWO_SIGNED(sum, downshift);
@@ -527,6 +546,256 @@
free(tmparr_);
}
+// Assume x buffer is already extended on both sides with x pointing to the
+// leftmost pixel, and the extension values are already filled up.
+static void resample_1d_core_in8b(const uint8_t *x, int inlen,
+ RationalResampleFilter *rf, int downshift,
+ ClipProfile *clip, int16_t *y, int outlen) {
+ (void)inlen;
+ const int tapsby2 = rf->length / 2;
+ assert(tapsby2 * 2 <= MAX_FILTER_LEN);
+ assert(rf->p <= MAX_RATIONAL_FACTOR);
+
+ const uint8_t *xext = x;
+ xext += rf->start;
+ for (int i = 0, p = 0; i < outlen; ++i, p = (p + 1) % rf->p) {
+ int64_t sum = 0;
+
+ assert(p >= 0 && p < MAX_RATIONAL_FACTOR);
+ for (int j = -tapsby2 + 1; j <= tapsby2; ++j) {
+ assert(j + tapsby2 - 1 >= 0);
+ assert(j + tapsby2 - 1 < MAX_FILTER_LEN);
+ sum += (int)rf->filter[p][j + tapsby2 - 1] * (int)xext[j];
+ }
+ sum = ROUND_POWER_OF_TWO_SIGNED(sum, downshift);
+ if (clip) {
+ y[i] = (int16_t)(clip->issigned
+ ? doclip((int)sum, -(1 << (clip->bits - 1)),
+ (1 << (clip->bits - 1)) - 1)
+ : doclip((int)sum, 0, (1 << clip->bits) - 1));
+ } else {
+ y[i] = (int16_t)doclip((int)sum, -(1 << 15), (1 << 15) - 1);
+ }
+ xext += rf->steps[p];
+ }
+}
+
+// Assume x buffer is already extended on both sides with x pointing to the
+// leftmost pixel, and the extension values are already filled up.
+static void resample_1d_core_8b(const uint8_t *x, int inlen,
+ RationalResampleFilter *rf, int downshift,
+ ClipProfile *clip, uint8_t *y, int outlen) {
+ (void)inlen;
+ const int tapsby2 = rf->length / 2;
+ const uint8_t *xext = x;
+ xext += rf->start;
+ for (int i = 0, p = 0; i < outlen; ++i, p = (p + 1) % rf->p) {
+ int64_t sum = 0;
+ for (int j = -tapsby2 + 1; j <= tapsby2; ++j) {
+ sum += (int)rf->filter[p][j + tapsby2 - 1] * (int)xext[j];
+ }
+ sum = ROUND_POWER_OF_TWO_SIGNED(sum, downshift);
+ if (clip) {
+ y[i] = (uint8_t)(clip->issigned
+ ? doclip((int)sum, -(1 << (clip->bits - 1)),
+ (1 << (clip->bits - 1)) - 1)
+ : doclip((int)sum, 0, (1 << clip->bits) - 1));
+ } else {
+ y[i] = (uint8_t)sum;
+ }
+ xext += rf->steps[p];
+ }
+}
+
+static void extend_border_8b(uint8_t *x, int inlen, EXT_TYPE ext_type,
+ int border) {
+ switch (ext_type) {
+ case EXT_REPEAT:
+ for (int i = -border; i < 0; ++i) x[i] = x[0];
+ for (int i = 0; i < border; ++i) x[i + inlen] = x[inlen - 1];
+ break;
+ case EXT_SYMMETRIC:
+ if (inlen >= border) {
+ for (int i = -border; i < 0; ++i) x[i] = x[-i - 1];
+ for (int i = 0; i < border; ++i) x[i + inlen] = x[inlen - 1 - i];
+ } else {
+ for (int i = -border; i < 0; ++i)
+ x[i] = x[(-i - 1 > inlen - 1 ? inlen - 1 : -i - 1)];
+ for (int i = 0; i < border; ++i)
+ x[i + inlen] = x[(inlen - 1 - i < 0 ? 0 : inlen - 1 - i)];
+ }
+ break;
+ case EXT_REFLECT:
+ if (inlen > border) {
+ for (int i = -border; i < 0; ++i) x[i] = x[-i];
+ for (int i = 0; i < border; ++i) x[i + inlen] = x[inlen - 2 - i];
+ } else {
+ for (int i = -border; i < 0; ++i)
+ x[i] = x[(-i > inlen - 1 ? inlen - 1 : -i)];
+ for (int i = 0; i < border; ++i)
+ x[i + inlen] = x[(inlen - 2 - i < 0 ? 0 : inlen - 2 - i)];
+ }
+ break;
+ case EXT_GRADIENT:
+ if (inlen > border) {
+ for (int i = -border; i < 0; ++i) {
+ const int t = 2 * x[0] - x[-i];
+ x[i] = (uint8_t)doclip(t, 0, 255);
+ }
+ for (int i = 0; i < border; ++i) {
+ const int t = 2 * x[inlen - 1] - x[inlen - 2 - i];
+ x[i + inlen] = (uint8_t)doclip(t, 0, 255);
+ }
+ } else {
+ for (int i = -border; i < 0; ++i) {
+ const int t = 2 * x[0] - x[(-i > inlen - 1 ? inlen - 1 : -i)];
+ x[i] = (uint8_t)doclip(t, 0, 255);
+ }
+ for (int i = 0; i < border; ++i) {
+ const int t =
+ 2 * x[inlen - 1] - x[(inlen - 2 - i < 0 ? 0 : inlen - 2 - i)];
+ x[i + inlen] = (uint8_t)doclip(t, 0, 255);
+ }
+ }
+ break;
+ }
+}
+
+static void resample_1d_xt_8b(uint8_t *x, int inlen, RationalResampleFilter *rf,
+ int downshift, ClipProfile *clip, uint8_t *y,
+ int outlen) {
+ extend_border_8b(x, inlen, rf->ext_type, rf->length / 2);
+ resample_1d_core_8b(x, inlen, rf, downshift, clip, y, outlen);
+}
+
+static void resample_1d_xc_8b(const uint8_t *x, int inlen,
+ RationalResampleFilter *rf, int downshift,
+ ClipProfile *clip, uint8_t *y, int outlen,
+ uint8_t *xext) {
+ memcpy(xext, x, inlen * sizeof(*x));
+
+ resample_1d_xt_8b(xext, inlen, rf, downshift, clip, y, outlen);
+}
+
+static void resample_1d_xt_in8b(uint8_t *x, int inlen,
+ RationalResampleFilter *rf, int downshift,
+ ClipProfile *clip, int16_t *y, int outlen) {
+ extend_border_8b(x, inlen, rf->ext_type, rf->length / 2);
+ resample_1d_core_in8b(x, inlen, rf, downshift, clip, y, outlen);
+}
+
+static void resample_1d_xc_in8b(const uint8_t *x, int inlen,
+ RationalResampleFilter *rf, int downshift,
+ ClipProfile *clip, int16_t *y, int outlen,
+ uint8_t *xext) {
+ memcpy(xext, x, inlen * sizeof(*x));
+
+ resample_1d_xt_in8b(xext, inlen, rf, downshift, clip, y, outlen);
+}
+
+static void fill_col_to_arr_in8b(const uint8_t *img, int stride, int len,
+ int16_t *arr) {
+ int i;
+ const uint8_t *iptr = img;
+ int16_t *aptr = arr;
+ for (i = 0; i < len; ++i, iptr += stride) {
+ *aptr++ = (int16_t)(*iptr);
+ }
+}
+
+static void fill_arr_to_col_out8b(uint8_t *img, int stride, int len,
+ const int16_t *arr) {
+ int i;
+ uint8_t *iptr = img;
+ const int16_t *aptr = arr;
+ for (i = 0; i < len; ++i, iptr += stride) {
+ *iptr = (uint8_t)*aptr++;
+ }
+}
+
+void resample_1d_8b(const uint8_t *x, int inlen, RationalResampleFilter *rf,
+ int downshift, ClipProfile *clip, uint8_t *y, int outlen) {
+ const int tapsby2 = rf->length / 2;
+ uint8_t *xext_ = (uint8_t *)malloc((inlen + rf->length) * sizeof(*x));
+ uint8_t *xext = xext_ + tapsby2;
+
+ resample_1d_xc_8b(x, inlen, rf, downshift, clip, y, outlen, xext);
+
+ free(xext_);
+}
+
+void av1_resample_2d_8b(const uint8_t *x, int inwidth, int inheight,
+ int instride, RationalResampleFilter *rfh,
+ RationalResampleFilter *rfv, int int_extra_bits,
+ ClipProfile *clip, uint8_t *y, int outwidth,
+ int outheight, int outstride) {
+ if (rfv == NULL || is_resampler_noop(rfv)) {
+ resample_horz_8b(x, inwidth, inheight, instride, rfh, clip, y, outwidth,
+ outstride);
+ return;
+ }
+ if (rfh == NULL || is_resampler_noop(rfh)) {
+ resample_vert_8b(x, inwidth, inheight, instride, rfv, clip, y, outheight,
+ outstride);
+ return;
+ }
+ int16_t *tmpbuf = (int16_t *)malloc(sizeof(int16_t) * outwidth * inheight);
+ const int arrsize =
+ outheight + ((inheight + rfv->length > inwidth + rfh->length)
+ ? (inheight + rfv->length)
+ : (inwidth + rfh->length));
+ int16_t *tmparr_ = (int16_t *)calloc(arrsize, sizeof(int16_t));
+ int16_t *tmparrh = tmparr_ + outheight + rfh->length / 2;
+ int16_t *tmparrv = tmparr_ + outheight + rfv->length / 2;
+ int16_t *tmparro = tmparr_;
+ int tmpstride = outwidth;
+ // intermediate data is stored in 16 bit buffers, so limit int_extra_bits
+ int_extra_bits = MIN(int_extra_bits, 14 - clip->bits);
+ const int downshifth = rfh->filter_bits - int_extra_bits;
+ const int downshiftv = rfh->filter_bits + int_extra_bits;
+ for (int i = 0; i < inheight; ++i) {
+ resample_1d_xc_in8b(x + instride * i, inwidth, rfh, downshifth, NULL,
+ tmpbuf + i * tmpstride, outwidth, (uint8_t *)tmparrh);
+ }
+ for (int i = 0; i < outwidth; ++i) {
+ fill_col_to_arr(tmpbuf + i, outwidth, inheight, tmparrv);
+ resample_1d_xt(tmparrv, inheight, rfv, downshiftv, clip, tmparro,
+ outheight);
+ fill_arr_to_col_out8b(y + i, outstride, outheight, tmparro);
+ }
+ free(tmpbuf);
+ free(tmparr_);
+}
+
+void resample_horz_8b(const uint8_t *x, int inwidth, int inheight, int instride,
+ RationalResampleFilter *rfh, ClipProfile *clip,
+ uint8_t *y, int outwidth, int outstride) {
+ const int arrsize = inwidth + rfh->length;
+ uint8_t *tmparr_ = (uint8_t *)calloc(arrsize, sizeof(*tmparr_));
+ uint8_t *tmparrh = tmparr_ + rfh->length / 2;
+ for (int i = 0; i < inheight; ++i) {
+ resample_1d_xc_8b(x + instride * i, inwidth, rfh, rfh->filter_bits, clip,
+ y + i * outstride, outwidth, tmparrh);
+ }
+ free(tmparr_);
+}
+
+void resample_vert_8b(const uint8_t *x, int inwidth, int inheight, int instride,
+ RationalResampleFilter *rfv, ClipProfile *clip,
+ uint8_t *y, int outheight, int outstride) {
+ const int arrsize = outheight + inheight + rfv->length;
+ int16_t *tmparr_ = (int16_t *)calloc(arrsize, sizeof(int16_t));
+ int16_t *tmparrv = tmparr_ + outheight + rfv->length / 2;
+ int16_t *tmparro = tmparr_;
+ for (int i = 0; i < inwidth; ++i) {
+ fill_col_to_arr_in8b(x + i, instride, inheight, tmparrv);
+ resample_1d_xt(tmparrv, inheight, rfv, rfv->filter_bits, clip, tmparro,
+ outheight);
+ fill_arr_to_col_out8b(y + i, outstride, outheight, tmparro);
+ }
+ free(tmparr_);
+}
+
int get_resampled_output_length(int inlen, int p, int q, int force_even) {
if (!force_even) {
// round
@@ -539,3 +808,106 @@
else
return outlen_floor;
}
+
+void av1_derive_scale_factor(int width, int width_scaled, int *p, int *q) {
+ assert(width > 0);
+ assert(width_scaled > 0);
+
+ *p = -1;
+ *q = -1;
+
+ // Lanczos library supports a scaling factor p/q with both p and q <= 16.
+ if ((width > (width_scaled << 4)) || (width_scaled > (width << 4))) return;
+
+ int best_err = abs(width - width_scaled);
+ int best_denom = 1;
+
+ for (int denom = 1; denom <= 16; ++denom) {
+ for (int num = 1; num <= 16; ++num) {
+ int err = abs(num * width - denom * width_scaled);
+
+ if (err * best_denom < best_err * denom) {
+ *p = num;
+ *q = denom;
+ best_err = err;
+ best_denom = denom;
+ }
+ }
+ }
+
+ if (best_err > (best_denom * width) >> 5) {
+ *p = -1;
+ *q = -1;
+ }
+ return;
+}
+
+void av1_resample_plane_2d_lanczos(const uint16_t *const input, int height,
+ int width, int in_stride, uint16_t *output,
+ int height2, int width2, int out_stride,
+ int subx, int suby, int bd, int denom,
+ int num, int lanczos_a_hor,
+ int lanczos_a_ver) {
+ int coeff_prec_bits = 14;
+ int extra_prec_bits = 2;
+ WIN_TYPE win = WIN_LANCZOS;
+ EXT_TYPE ext = EXT_REPEAT;
+ ClipProfile clip = { bd, 0 };
+ int horz_a = lanczos_a_hor;
+ int vert_a = lanczos_a_ver;
+ double horz_x0 = subx ? (double)('d') : (double)('c');
+ double vert_x0 = suby ? (double)('d') : (double)('c');
+
+ RationalResampleFilter horz_rf;
+ RationalResampleFilter vert_rf;
+
+ if (!get_resample_filter(num, denom, horz_a, horz_x0, ext, win, subx,
+ coeff_prec_bits, &horz_rf)) {
+ fprintf(stderr, "Cannot generate filter, exiting!\n");
+ exit(1);
+ }
+ if (!get_resample_filter(num, denom, vert_a, vert_x0, ext, win, suby,
+ coeff_prec_bits, &vert_rf)) {
+ fprintf(stderr, "Cannot generate filter, exiting!\n");
+ exit(1);
+ }
+
+ av1_resample_2d((const int16_t *)input, width, height, in_stride, &horz_rf,
+ &vert_rf, extra_prec_bits, &clip, (int16_t *)output, width2,
+ height2, out_stride);
+}
+
+void av1_resample_plane_2d_8b_lanczos(const uint8_t *const input, int height,
+ int width, int in_stride, uint8_t *output,
+ int height2, int width2, int out_stride,
+ int subx, int suby, int bd, int denom,
+ int num, int lanczos_a_hor,
+ int lanczos_a_ver) {
+ int coeff_prec_bits = 14;
+ int extra_prec_bits = 2;
+ WIN_TYPE win = WIN_LANCZOS;
+ EXT_TYPE ext = EXT_REPEAT;
+ ClipProfile clip = { bd, 0 };
+ int horz_a = lanczos_a_hor;
+ int vert_a = lanczos_a_ver;
+ double horz_x0 = subx ? (double)('d') : (double)('c');
+ double vert_x0 = suby ? (double)('d') : (double)('c');
+
+ RationalResampleFilter horz_rf;
+ RationalResampleFilter vert_rf;
+
+ if (!get_resample_filter(num, denom, horz_a, horz_x0, ext, win, subx,
+ coeff_prec_bits, &horz_rf)) {
+ fprintf(stderr, "Cannot generate filter, exiting!\n");
+ exit(1);
+ }
+ if (!get_resample_filter(num, denom, vert_a, vert_x0, ext, win, suby,
+ coeff_prec_bits, &vert_rf)) {
+ fprintf(stderr, "Cannot generate filter, exiting!\n");
+ exit(1);
+ }
+
+ av1_resample_2d_8b(input, width, height, in_stride, &horz_rf, &vert_rf,
+ extra_prec_bits, &clip, output, width2, height2,
+ out_stride);
+}
diff --git a/av1/common/lanczos_resample.h b/av1/common/lanczos_resample.h
index 08d9d97..f40d7a3 100644
--- a/av1/common/lanczos_resample.h
+++ b/av1/common/lanczos_resample.h
@@ -20,6 +20,32 @@
#define MAX_RATIONAL_FACTOR 16
#define MAX_FILTER_LEN 320
+void av1_derive_scale_factor(int width, int width_scaled, int *p, int *q);
+
+#define LANCZOS_A_NORMATIVE_HOR_Y 6 // Normative hor Lanczos a Luma
+#define LANCZOS_A_NORMATIVE_HOR_C 4 // Normative hor Lanczos a Chroma
+#define LANCZOS_A_NORMATIVE_VER_Y 4 // Normative ver Lanczos a Luma
+#define LANCZOS_A_NORMATIVE_VER_C 4 // Normative ver Lanczos a Chroma
+
+#define LANCZOS_A_NONNORMATIVE_HOR_Y 6 // Non-normative hor Lanczos a Luma
+#define LANCZOS_A_NONNORMATIVE_HOR_C 4 // Non-normative hor Lanczos a Chroma
+#define LANCZOS_A_NONNORMATIVE_VER_Y 6 // Non-normative ver Lanczos a Luma
+#define LANCZOS_A_NONNORMATIVE_VER_C \
+ 4 // Non-normative ver Lanczos a Chroma
+ // Chroma
+void av1_resample_plane_2d_lanczos(const uint16_t *const input, int height,
+ int width, int in_stride, uint16_t *output,
+ int height2, int width2, int out_stride,
+ int subx, int suby, int bd, int denom,
+ int num, int lanczos_a_hor,
+ int lanczos_a_ver);
+void av1_resample_plane_2d_8b_lanczos(const uint8_t *const input, int height,
+ int width, int in_stride, uint8_t *output,
+ int height2, int width2, int out_stride,
+ int subx, int suby, int bd, int denom,
+ int num, int lanczos_a_hor,
+ int lanczos_a_ver);
+
// Note: check window() function implementation for values of any
// other params used by these windowing functions.
typedef enum {
@@ -96,6 +122,26 @@
RationalResampleFilter *rfv, ClipProfile *clip, int16_t *y,
int outheight, int outstride);
+// 8-bit versions of high-level resampling functions
+
+// Assume no extension of the input x buffer
+void resample_1d_8b(const uint8_t *x, int inlen, RationalResampleFilter *rf,
+ int downshift, ClipProfile *clip, uint8_t *y, int outlen);
+
+void av1_resample_2d_8b(const uint8_t *x, int inwidth, int inheight,
+ int instride, RationalResampleFilter *rfh,
+ RationalResampleFilter *rfv, int int_extra_bits,
+ ClipProfile *clip, uint8_t *y, int outwidth,
+ int outheight, int outstride);
+
+void resample_horz_8b(const uint8_t *x, int inwidth, int inheight, int instride,
+ RationalResampleFilter *rfh, ClipProfile *clip,
+ uint8_t *y, int outwidth, int outstride);
+
+void resample_vert_8b(const uint8_t *x, int inwidth, int inheight, int instride,
+ RationalResampleFilter *rfv, ClipProfile *clip,
+ uint8_t *y, int outheight, int outstride);
+
void show_resample_filter(RationalResampleFilter *rf);
int get_resampled_output_length(int inlen, int p, int q, int force_even);
diff --git a/av1/common/resize.c b/av1/common/resize.c
index 0b2ab30..53b040a 100644
--- a/av1/common/resize.c
+++ b/av1/common/resize.c
@@ -17,8 +17,6 @@
#include <stdlib.h>
#include <string.h>
-#include "aom_ports/system_state.h"
-
#include "config/aom_config.h"
#include "aom_dsp/aom_dsp_common.h"
@@ -989,54 +987,6 @@
aom_free(arrbuf2);
}
-#define LANCZOS_A_NORMATIVE_HOR_Y 6 // Normative hor Lanczos a Luma
-#define LANCZOS_A_NORMATIVE_HOR_C 4 // Normative hor Lanczos a Chroma
-#define LANCZOS_A_NORMATIVE_VER_Y 4 // Normative ver Lanczos a Luma
-#define LANCZOS_A_NORMATIVE_VER_C 4 // Normative ver Lanczos a Chroma
-
-#define LANCZOS_A_NONNORMATIVE_HOR_Y 6 // Non-normative hor Lanczos a Luma
-#define LANCZOS_A_NONNORMATIVE_HOR_C 4 // Non-normative hor Lanczos a Chroma
-#define LANCZOS_A_NONNORMATIVE_VER_Y 6 // Non-normative ver Lanczos a Luma
-#define LANCZOS_A_NONNORMATIVE_VER_C \
- 4 // Non-normative ver Lanczos a Chroma
- // Chroma
-void av1_resample_plane_2d_lanczos(const uint16_t *const input, int height,
- int width, int in_stride, uint16_t *output,
- int height2, int width2, int out_stride,
- int subx, int suby, int bd, int denom,
- int num, int lanczos_a_hor,
- int lanczos_a_ver) {
- (void)suby;
-
- int coeff_prec_bits = 14;
- int extra_prec_bits = 2;
- WIN_TYPE win = WIN_LANCZOS;
- EXT_TYPE ext = EXT_REPEAT;
- ClipProfile clip = { bd, 0 };
- int horz_a = lanczos_a_hor;
- int vert_a = lanczos_a_ver;
- double horz_x0 = subx ? (double)('d') : (double)('c');
- double vert_x0 = (double)('c');
-
- RationalResampleFilter horz_rf;
- RationalResampleFilter vert_rf;
-
- if (!get_resample_filter(num, denom, horz_a, horz_x0, ext, win, subx,
- coeff_prec_bits, &horz_rf)) {
- fprintf(stderr, "Cannot generate filter, exiting!\n");
- exit(1);
- }
- if (!get_resample_filter(num, denom, vert_a, vert_x0, ext, win, 0,
- coeff_prec_bits, &vert_rf)) {
- fprintf(stderr, "Cannot generate filter, exiting!\n");
- exit(1);
- }
-
- av1_resample_2d((const int16_t *)input, width, height, in_stride, &horz_rf,
- &vert_rf, extra_prec_bits, &clip, (int16_t *)output, width2,
- height2, out_stride);
-}
-
void av1_resize_lanczos_and_extend_frame(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst, int bd,
const int num_planes, const int subx,
@@ -1057,41 +1007,6 @@
aom_extend_frame_borders(dst, num_planes);
}
-static void derive_scale_factor(int width, int width_scaled, int *p, int *q) {
- assert(width > 0);
- assert(width_scaled > 0);
-
- *p = -1;
- *q = -1;
-
- // Lanczos library supports a scaling factor p/q with both p and q <= 16.
- if ((width > (width_scaled << 4)) || (width_scaled > (width << 4))) return;
-
- aom_clear_system_state();
-
- const float scale_factor = (float)width_scaled / (float)width;
- const float error_thresh = 0.05f;
- float error_min = 1.0f;
-
- for (int denom = 1; denom <= 16; ++denom) {
- for (int num = 1; num <= 16; ++num) {
- float error = fabsf((float)num / (float)denom - scale_factor);
-
- if (error < error_min) {
- *p = num;
- *q = denom;
- error_min = error;
- }
- }
- }
-
- if (error_min > error_thresh) {
- *p = -1;
- *q = -1;
- }
- return;
-}
-
#if CONFIG_EXT_SUPERRES
int64_t av1_downup_lanczos_sse(const YV12_BUFFER_CONFIG *src, int bd, int denom,
int num) {
@@ -1449,8 +1364,8 @@
// TODO(yuec): implement 1D superres based on lanczos resampling
if (cm->superres_scale_denominator == SCALE_NUMERATOR)
#endif
- derive_scale_factor(unscaled->y_crop_width, scaled->y_crop_width,
- &scale_num, &scale_denom);
+ av1_derive_scale_factor(unscaled->y_crop_width, scaled->y_crop_width,
+ &scale_num, &scale_denom);
if (scale_denom > 0 && scale_num > 0) {
av1_resize_lanczos_and_extend_frame(