Improve temporal filter prediction
In temporal filtering, applied high precision 12-tap filter in
prediction process. This gave a good coding gain.
Borg test result at speed 1:
avg_psnr ovr_psnr ssim
lowres2: -0.526 -0.517 -0.337
midres2: -0.359 -0.349 -0.161
hdres2: -0.168 -0.162 -0.083
TODO: Need to add 12-tap SIMD code, so the encoder slowness can
be mostly reduced.
STATS_CHANGED
Change-Id: I88c7dcd2c9afe00d52e5299b1330f42c6e1e01cb
(cherry picked from commit 144a941ec51cb998ea7923445e183bbaf048a777)
diff --git a/av1/common/convolve.c b/av1/common/convolve.c
index 0a25396..716886f 100644
--- a/av1/common/convolve.c
+++ b/av1/common/convolve.c
@@ -561,15 +561,35 @@
if (!need_x && !need_y) {
aom_convolve_copy(src, src_stride, dst, dst_stride, w, h);
} else if (need_x && !need_y) {
- av1_convolve_x_sr(src, src_stride, dst, dst_stride, w, h, filter_params_x,
- subpel_x_qn, conv_params);
+ // Filters with taps > 8 are only for encoder side use.
+ // TODO(any): need SIMD for > 8 taps filters
+ if (filter_params_x->taps > 8 || filter_params_y->taps > 8) {
+ av1_convolve_x_sr_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params_x, subpel_x_qn, conv_params);
+ } else {
+ av1_convolve_x_sr(src, src_stride, dst, dst_stride, w, h, filter_params_x,
+ subpel_x_qn, conv_params);
+ }
} else if (!need_x && need_y) {
- av1_convolve_y_sr(src, src_stride, dst, dst_stride, w, h, filter_params_y,
- subpel_y_qn);
+ if (filter_params_x->taps > 8 || filter_params_y->taps > 8) {
+ av1_convolve_y_sr_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params_y, subpel_y_qn);
+ } else {
+ av1_convolve_y_sr(src, src_stride, dst, dst_stride, w, h, filter_params_y,
+ subpel_y_qn);
+ }
} else {
assert(need_x && need_y);
- av1_convolve_2d_sr(src, src_stride, dst, dst_stride, w, h, filter_params_x,
- filter_params_y, subpel_x_qn, subpel_y_qn, conv_params);
+
+ if (filter_params_x->taps > 8 || filter_params_y->taps > 8) {
+ av1_convolve_2d_sr_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params_x, filter_params_y, subpel_x_qn,
+ subpel_y_qn, conv_params);
+ } else {
+ av1_convolve_2d_sr(src, src_stride, dst, dst_stride, w, h,
+ filter_params_x, filter_params_y, subpel_x_qn,
+ subpel_y_qn, conv_params);
+ }
}
}
@@ -1064,19 +1084,43 @@
const int subpel_y_qn, ConvolveParams *conv_params, int bd) {
const bool need_x = subpel_x_qn != 0;
const bool need_y = subpel_y_qn != 0;
+ // Filters with taps > 8 are only for encoder side use.
+ const int filter_x_taps_gt8 =
+ (filter_params_x == NULL) ? 0 : ((filter_params_x->taps > 8) ? 1 : 0);
+ const int filter_y_taps_gt8 =
+ (filter_params_y == NULL) ? 0 : ((filter_params_y->taps > 8) ? 1 : 0);
+
if (!need_x && !need_y) {
aom_highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h);
} else if (need_x && !need_y) {
- av1_highbd_convolve_x_sr(src, src_stride, dst, dst_stride, w, h,
- filter_params_x, subpel_x_qn, conv_params, bd);
+ // TODO(any): need SIMD for > 8 taps filters
+ if (filter_x_taps_gt8 || filter_y_taps_gt8) {
+ av1_highbd_convolve_x_sr_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params_x, subpel_x_qn, conv_params, bd);
+
+ } else {
+ av1_highbd_convolve_x_sr(src, src_stride, dst, dst_stride, w, h,
+ filter_params_x, subpel_x_qn, conv_params, bd);
+ }
} else if (!need_x && need_y) {
- av1_highbd_convolve_y_sr(src, src_stride, dst, dst_stride, w, h,
- filter_params_y, subpel_y_qn, bd);
+ if (filter_x_taps_gt8 || filter_y_taps_gt8) {
+ av1_highbd_convolve_y_sr_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params_y, subpel_y_qn, bd);
+ } else {
+ av1_highbd_convolve_y_sr(src, src_stride, dst, dst_stride, w, h,
+ filter_params_y, subpel_y_qn, bd);
+ }
} else {
assert(need_x && need_y);
- av1_highbd_convolve_2d_sr(src, src_stride, dst, dst_stride, w, h,
- filter_params_x, filter_params_y, subpel_x_qn,
- subpel_y_qn, conv_params, bd);
+ if (filter_x_taps_gt8 || filter_y_taps_gt8) {
+ av1_highbd_convolve_2d_sr_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params_x, filter_params_y, subpel_x_qn,
+ subpel_y_qn, conv_params, bd);
+ } else {
+ av1_highbd_convolve_2d_sr(src, src_stride, dst, dst_stride, w, h,
+ filter_params_x, filter_params_y, subpel_x_qn,
+ subpel_y_qn, conv_params, bd);
+ }
}
}
diff --git a/av1/common/filter.h b/av1/common/filter.h
index 787e699..16a9450 100644
--- a/av1/common/filter.h
+++ b/av1/common/filter.h
@@ -32,6 +32,9 @@
EIGHTTAP_SMOOTH,
MULTITAP_SHARP,
BILINEAR,
+ // Encoder side only filters
+ MULTITAP_SHARP2,
+
INTERP_FILTERS_ALL,
SWITCHABLE_FILTERS = BILINEAR,
SWITCHABLE = SWITCHABLE_FILTERS + 1, /* the last switchable one */
@@ -166,14 +169,38 @@
{ 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 28, 2, 0 }
};
+DECLARE_ALIGNED(256, static const int16_t,
+ av1_sub_pel_filters_12sharp[SUBPEL_SHIFTS][12]) = {
+ { 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 },
+ { 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0 },
+ { -1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1 },
+ { -1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1 },
+ { -1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1 },
+ { -2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2 },
+ { -2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2 },
+ { -2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2 },
+ { -2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2 },
+ { -2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2 },
+ { -2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2 },
+ { -2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2 },
+ { -1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1 },
+ { -1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1 },
+ { -1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1 },
+ { 0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0 }
+};
+
static const InterpFilterParams
- av1_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
+ av1_interp_filter_params_list[INTERP_FILTERS_ALL] = {
{ (const int16_t *)av1_sub_pel_filters_8, SUBPEL_TAPS, EIGHTTAP_REGULAR },
{ (const int16_t *)av1_sub_pel_filters_8smooth, SUBPEL_TAPS,
EIGHTTAP_SMOOTH },
{ (const int16_t *)av1_sub_pel_filters_8sharp, SUBPEL_TAPS,
MULTITAP_SHARP },
- { (const int16_t *)av1_bilinear_filters, SUBPEL_TAPS, BILINEAR }
+ { (const int16_t *)av1_bilinear_filters, SUBPEL_TAPS, BILINEAR },
+
+ // The following filters are for encoder only, and now they are used in
+ // temporal filtering. The predictor block size >= 16 in temporal filter.
+ { (const int16_t *)av1_sub_pel_filters_12sharp, 12, MULTITAP_SHARP2 },
};
// A special 2-tap bilinear filter for IntraBC chroma. IntraBC uses full pixel
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 77d4a23..922b9ae 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -325,10 +325,10 @@
// Default interpolation filters.
#if CONFIG_REMOVE_DUAL_FILTER
- const InterpFilter interp_filters = MULTITAP_SHARP;
+ const InterpFilter interp_filters = MULTITAP_SHARP2;
#else
const int_interpfilters interp_filters =
- av1_broadcast_interp_filter(MULTITAP_SHARP);
+ av1_broadcast_interp_filter(MULTITAP_SHARP2);
#endif // !CONFIG_REMOVE_DUAL_FILTER
// Handle Y-plane, U-plane and V-plane (if needed) in sequence.
diff --git a/test/av1_convolve_test.cc b/test/av1_convolve_test.cc
index e87f914..ee4352b 100644
--- a/test/av1_convolve_test.cc
+++ b/test/av1_convolve_test.cc
@@ -20,6 +20,11 @@
namespace {
+// TODO(any): Remove following INTERP_FILTERS_ALL define, so that 12-tap filter
+// is tested once 12-tap filter SIMD is done.
+#undef INTERP_FILTERS_ALL
+#define INTERP_FILTERS_ALL 4
+
// All single reference convolve tests are parameterized on block size,
// bit-depth, and function to test.
//