Adds support for enhanced interpolation for subpel motion
using an 8-tap filter.
The results with 3 different 8-tap filters on the derf set are in:
http://www.corp.google.com/~debargha/vp8_results/enhinterp.html
The one that gives the most gain achieves an overall gain of about
0.6%. The results for a set of 12 hd (720p) videos are in:
http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd.html
with max gain of 0.55% with the same filter. The best filter apparently
achieves the best trade-off between pass band ripple and stop band
attenuation.
Change-Id: I919e28ae245c0493147fa0864f8c9d048a9dd530
diff --git a/configure b/configure
index a955d14..c6bce04 100755
--- a/configure
+++ b/configure
@@ -227,6 +227,7 @@
uvintra
newnear
newlpf
+ enhanced_interp
"
CONFIG_LIST="
external_build
diff --git a/vp8/common/filter.c b/vp8/common/filter.c
index 256ba47..91022c1 100644
--- a/vp8/common/filter.c
+++ b/vp8/common/filter.c
@@ -25,6 +25,50 @@
{ 16, 112 }
};
+#if CONFIG_ENHANCED_INTERP
+#define FILTER_ALPHA 75
+DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][2*INTERP_EXTEND]) =
+{
+ /* Generated using MATLAB:
+ * alpha = 0.75;
+ * b=intfilt(8,4,alpha);
+ * bi=round(128*b);
+ * ba=flipud(reshape([bi 0], 8, 8));
+ * disp(num2str(ba, '%d,'))
+ */
+#if FILTER_ALPHA == 75
+ /* alpha = 0.75 */
+ { 0, 0, 0, 128, 0, 0, 0, 0},
+ {-1, 4, -11, 123, 18, -7, 3, -1},
+ {-2, 7, -19, 113, 38, -14, 6, -2},
+ {-3, 9, -22, 98, 59, -19, 8, -3},
+ {-3, 9, -22, 80, 80, -22, 9, -3},
+ {-3, 8, -19, 59, 98, -22, 9, -3},
+ {-2, 6, -14, 38, 113, -19, 7, -2},
+ {-1, 3, -7, 18, 123, -11, 4, -1}
+#elif FILTER_ALPHA == 625
+ /* alpha = 0.625 */
+ { 0, 0, 0, 128, 0, 0, 0, 0},
+ {-1, 3, -10, 123, 18, -6, 2, -1},
+ {-1, 5, -17, 112, 38, -12, 4, -1},
+ {-1, 6, -20, 97, 58, -17, 6, -1},
+ {-2, 7, -20, 79, 79, -20, 7, -2},
+ {-1, 6, -17, 58, 97, -20, 6, -1},
+ {-1, 4, -12, 38, 112, -17, 5, -1},
+ {-1, 2, -6, 18, 123, -10, 3, -1}
+#elif FILTER_ALPHA == 50
+ /* alpha = 0.5 */
+ { 0, 0, 0, 128, 0, 0, 0, 0},
+ { 0, 2, -10, 122, 18, -6, 2, 0},
+ {-1, 4, -16, 112, 37, -11, 3, -1},
+ {-1, 5, -18, 96, 58, -16, 4, -1},
+ {-1, 5, -18, 78, 78, -18, 5, -1},
+ {-1, 4, -16, 58, 96, -18, 5, -1},
+ {-1, 3, -11, 37, 112, -16, 4, -1},
+ { 0, 2, -6, 18, 122, -10, 2, 0}
+#endif
+};
+#else
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
{
@@ -37,6 +81,7 @@
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
{ 0, -1, 12, 123, -6, 0 },
};
+#endif
static void filter_block2d_first_pass
(
@@ -56,13 +101,37 @@
{
for (j = 0; j < output_width; j++)
{
+#if INTERP_EXTEND == 3
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
- ((int)src_ptr[0] * vp8_filter[2]) +
- ((int)src_ptr[pixel_step] * vp8_filter[3]) +
- ((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
- ((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
+ ((int)src_ptr[0] * vp8_filter[2]) +
+ ((int)src_ptr[pixel_step] * vp8_filter[3]) +
+ ((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
+ ((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
+#elif INTERP_EXTEND == 4
+ Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) +
+ ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) +
+ ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) +
+ ((int)src_ptr[0] * vp8_filter[3]) +
+ ((int)src_ptr[pixel_step] * vp8_filter[4]) +
+ ((int)src_ptr[2 * pixel_step] * vp8_filter[5]) +
+ ((int)src_ptr[3 * pixel_step] * vp8_filter[6]) +
+ ((int)src_ptr[4 * pixel_step] * vp8_filter[7]) +
+ (VP8_FILTER_WEIGHT >> 1); /* Rounding */
+#elif INTERP_EXTEND == 5
+ Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) +
+ ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) +
+ ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) +
+ ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) +
+ ((int)src_ptr[0] * vp8_filter[4]) +
+ ((int)src_ptr[pixel_step] * vp8_filter[5]) +
+ ((int)src_ptr[2 * pixel_step] * vp8_filter[6]) +
+ ((int)src_ptr[3 * pixel_step] * vp8_filter[7]) +
+ ((int)src_ptr[4 * pixel_step] * vp8_filter[8]) +
+ ((int)src_ptr[5 * pixel_step] * vp8_filter[9]) +
+ (VP8_FILTER_WEIGHT >> 1); /* Rounding */
+#endif
/* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
@@ -102,13 +171,37 @@
for (j = 0; j < output_width; j++)
{
/* Apply filter */
+#if INTERP_EXTEND == 3
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
- ((int)src_ptr[0] * vp8_filter[2]) +
- ((int)src_ptr[pixel_step] * vp8_filter[3]) +
- ((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
- ((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
+ ((int)src_ptr[0] * vp8_filter[2]) +
+ ((int)src_ptr[pixel_step] * vp8_filter[3]) +
+ ((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
+ ((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
+#elif INTERP_EXTEND == 4
+ Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) +
+ ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) +
+ ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) +
+ ((int)src_ptr[0] * vp8_filter[3]) +
+ ((int)src_ptr[pixel_step] * vp8_filter[4]) +
+ ((int)src_ptr[2 * pixel_step] * vp8_filter[5]) +
+ ((int)src_ptr[3 * pixel_step] * vp8_filter[6]) +
+ ((int)src_ptr[4 * pixel_step] * vp8_filter[7]) +
+ (VP8_FILTER_WEIGHT >> 1); /* Rounding */
+#elif INTERP_EXTEND == 5
+ Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) +
+ ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) +
+ ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) +
+ ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) +
+ ((int)src_ptr[0] * vp8_filter[4]) +
+ ((int)src_ptr[pixel_step] * vp8_filter[5]) +
+ ((int)src_ptr[2 * pixel_step] * vp8_filter[6]) +
+ ((int)src_ptr[3 * pixel_step] * vp8_filter[7]) +
+ ((int)src_ptr[4 * pixel_step] * vp8_filter[8]) +
+ ((int)src_ptr[5 * pixel_step] * vp8_filter[9]) +
+ (VP8_FILTER_WEIGHT >> 1); /* Rounding */
+#endif
/* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
@@ -157,13 +250,37 @@
for (j = 0; j < output_width; j++)
{
/* Apply filter */
+#if INTERP_EXTEND == 3
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
- ((int)src_ptr[0] * vp8_filter[2]) +
- ((int)src_ptr[pixel_step] * vp8_filter[3]) +
- ((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
- ((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
+ ((int)src_ptr[0] * vp8_filter[2]) +
+ ((int)src_ptr[pixel_step] * vp8_filter[3]) +
+ ((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
+ ((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
+#elif INTERP_EXTEND == 4
+ Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) +
+ ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) +
+ ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) +
+ ((int)src_ptr[0] * vp8_filter[3]) +
+ ((int)src_ptr[pixel_step] * vp8_filter[4]) +
+ ((int)src_ptr[2 * pixel_step] * vp8_filter[5]) +
+ ((int)src_ptr[3 * pixel_step] * vp8_filter[6]) +
+ ((int)src_ptr[4 * pixel_step] * vp8_filter[7]) +
+ (VP8_FILTER_WEIGHT >> 1); /* Rounding */
+#elif INTERP_EXTEND == 5
+ Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) +
+ ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) +
+ ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) +
+ ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) +
+ ((int)src_ptr[0] * vp8_filter[4]) +
+ ((int)src_ptr[pixel_step] * vp8_filter[5]) +
+ ((int)src_ptr[2 * pixel_step] * vp8_filter[6]) +
+ ((int)src_ptr[3 * pixel_step] * vp8_filter[7]) +
+ ((int)src_ptr[4 * pixel_step] * vp8_filter[8]) +
+ ((int)src_ptr[5 * pixel_step] * vp8_filter[9]) +
+ (VP8_FILTER_WEIGHT >> 1); /* Rounding */
+#endif
/* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
@@ -194,13 +311,14 @@
const short *VFilter
)
{
- int FData[9*4]; /* Temp data buffer used in filtering */
+ int FData[(3+INTERP_EXTEND*2)*4]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
- filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
+ filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+ 3+INTERP_EXTEND*2, 4, HFilter);
/* then filter verticaly... */
- filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
+ filter_block2d_second_pass(FData + 4*(INTERP_EXTEND-1), output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
}
@@ -234,17 +352,19 @@
{
const short *HFilter;
const short *VFilter;
- int FData[13*16]; /* Temp data buffer used in filtering */
+ // int FData[(7+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */
+ int FData[(7+INTERP_EXTEND*2)*8]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
- filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
+ filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+ 7+INTERP_EXTEND*2, 8, HFilter);
/* then filter verticaly... */
- filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
+ filter_block2d_second_pass(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
}
@@ -261,16 +381,18 @@
{
const short *HFilter;
const short *VFilter;
- int FData[13*16]; /* Temp data buffer used in filtering */
+ // int FData[(7+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */
+ int FData[(7+INTERP_EXTEND*2)*8]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
- filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
+ filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+ 7+INTERP_EXTEND*2, 8, HFilter);
/* then filter verticaly... */
- filter_block2d_second_pass_avg(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
+ filter_block2d_second_pass_avg(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
}
#endif /* CONFIG_DUALPRED */
@@ -286,17 +408,19 @@
{
const short *HFilter;
const short *VFilter;
- int FData[13*16]; /* Temp data buffer used in filtering */
+ // int FData[(7+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */
+ int FData[(3+INTERP_EXTEND*2)*8]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
- filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
+ filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+ 3+INTERP_EXTEND*2, 8, HFilter);
/* then filter verticaly... */
- filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
+ filter_block2d_second_pass(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
}
@@ -312,17 +436,19 @@
{
const short *HFilter;
const short *VFilter;
- int FData[21*24]; /* Temp data buffer used in filtering */
+ // int FData[(15+INTERP_EXTEND*2)*24]; /* Temp data buffer used in filtering */
+ int FData[(15+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
- filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
+ filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+ 15+INTERP_EXTEND*2, 16, HFilter);
/* then filter verticaly... */
- filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
+ filter_block2d_second_pass(FData + 16*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
}
@@ -339,17 +465,18 @@
{
const short *HFilter;
const short *VFilter;
- int FData[21*24]; /* Temp data buffer used in filtering */
+ // int FData[(15+INTERP_EXTEND*2)*24]; /* Temp data buffer used in filtering */
+ int FData[(15+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
- filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
- src_pixels_per_line, 1, 21, 16, HFilter);
+ filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData,
+ src_pixels_per_line, 1, 15+INTERP_EXTEND*2, 16, HFilter);
/* then filter verticaly... */
- filter_block2d_second_pass_avg(FData + 32, dst_ptr, dst_pitch,
+ filter_block2d_second_pass_avg(FData + 16*(INTERP_EXTEND-1), dst_ptr, dst_pitch,
16, 16, 16, 16, VFilter);
}
#endif /* CONFIG_DUALPRED */
diff --git a/vp8/common/filter.h b/vp8/common/filter.h
index 0f225c2..1a47389 100644
--- a/vp8/common/filter.h
+++ b/vp8/common/filter.h
@@ -8,15 +8,17 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
#ifndef FILTER_H
#define FILTER_H
+#include "vpx_config.h"
+#include "vpx_scale/yv12config.h"
+
#define BLOCK_HEIGHT_WIDTH 4
#define VP8_FILTER_WEIGHT 128
#define VP8_FILTER_SHIFT 7
extern const short vp8_bilinear_filters[8][2];
-extern const short vp8_sub_pel_filters[8][6];
+extern const short vp8_sub_pel_filters[8][INTERP_EXTEND*2];
#endif //FILTER_H
diff --git a/vp8/encoder/mbgraph.c b/vp8/encoder/mbgraph.c
index 79b7fd5..18c30a4 100644
--- a/vp8/encoder/mbgraph.c
+++ b/vp8/encoder/mbgraph.c
@@ -335,8 +335,8 @@
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
arf_top_mv.as_int = 0;
gld_top_mv.as_int = 0;
- x->mv_row_min = -(VP8BORDERINPIXELS - 19);
- x->mv_row_max = (cm->mb_rows - 1) * 16 + VP8BORDERINPIXELS - 19;
+ x->mv_row_min = -(VP8BORDERINPIXELS - 16 - INTERP_EXTEND);
+ x->mv_row_max = (cm->mb_rows - 1) * 16 + VP8BORDERINPIXELS - 16 - INTERP_EXTEND;
xd->up_available = 0;
xd->dst.y_stride = buf->y_stride;
xd->pre.y_stride = buf->y_stride;
@@ -353,8 +353,8 @@
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
arf_left_mv.as_int = arf_top_mv.as_int;
gld_left_mv.as_int = gld_top_mv.as_int;
- x->mv_col_min = -(VP8BORDERINPIXELS - 19);
- x->mv_col_max = (cm->mb_cols - 1) * 16 + VP8BORDERINPIXELS - 19;
+ x->mv_col_min = -(VP8BORDERINPIXELS - 16 - INTERP_EXTEND);
+ x->mv_col_max = (cm->mb_cols - 1) * 16 + VP8BORDERINPIXELS - 16 - INTERP_EXTEND;
xd->left_available = 0;
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 6715ac2..639d53d 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -268,6 +268,7 @@
{
static int init_done = 0;
+
if (!init_done)
{
vp8_scale_machine_specific_config();
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index 3cc4746..e780467 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -17,7 +17,14 @@
#endif
#define VP7BORDERINPIXELS 48
+
+#if CONFIG_ENHANCED_INTERP
+#define VP8BORDERINPIXELS 64
+#define INTERP_EXTEND 4
+#else
#define VP8BORDERINPIXELS 32
+#define INTERP_EXTEND 3
+#endif
/*************************************
For INT_YUV: