| /* | 
 |  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
 |  * | 
 |  *  Use of this source code is governed by a BSD-style license | 
 |  *  that can be found in the LICENSE file in the root of the source | 
 |  *  tree. An additional intellectual property rights grant can be found | 
 |  *  in the file PATENTS.  All contributing project authors may | 
 |  *  be found in the AUTHORS file in the root of the source tree. | 
 |  */ | 
 |  | 
 |  | 
 | #include "filter.h" | 
 |  | 
 | DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = | 
 | { | 
 |     { 128,   0 }, | 
 |     { 112,  16 }, | 
 |     {  96,  32 }, | 
 |     {  80,  48 }, | 
 |     {  64,  64 }, | 
 |     {  48,  80 }, | 
 |     {  32,  96 }, | 
 |     {  16, 112 } | 
 | }; | 
 |  | 
 | DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = | 
 | { | 
 |  | 
 |     { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ | 
 |     { 0, -6,  123,   12,  -1,  0 }, | 
 |     { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */ | 
 |     { 0, -9,   93,   50,  -6,  0 }, | 
 |     { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */ | 
 |     { 0, -6,   50,   93,  -9,  0 }, | 
 |     { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */ | 
 |     { 0, -1,   12,  123,  -6,  0 }, | 
 | }; | 
 |  | 
 | static void filter_block2d_first_pass | 
 | ( | 
 |     unsigned char *src_ptr, | 
 |     int *output_ptr, | 
 |     unsigned int src_pixels_per_line, | 
 |     unsigned int pixel_step, | 
 |     unsigned int output_height, | 
 |     unsigned int output_width, | 
 |     const short *vp8_filter | 
 | ) | 
 | { | 
 |     unsigned int i, j; | 
 |     int  Temp; | 
 |  | 
 |     for (i = 0; i < output_height; i++) | 
 |     { | 
 |         for (j = 0; j < output_width; j++) | 
 |         { | 
 |             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + | 
 |                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + | 
 |                    ((int)src_ptr[0]                 * vp8_filter[2]) + | 
 |                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) + | 
 |                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) + | 
 |                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) + | 
 |                    (VP8_FILTER_WEIGHT >> 1);      /* Rounding */ | 
 |  | 
 |             /* Normalize back to 0-255 */ | 
 |             Temp = Temp >> VP8_FILTER_SHIFT; | 
 |  | 
 |             if (Temp < 0) | 
 |                 Temp = 0; | 
 |             else if (Temp > 255) | 
 |                 Temp = 255; | 
 |  | 
 |             output_ptr[j] = Temp; | 
 |             src_ptr++; | 
 |         } | 
 |  | 
 |         /* Next row... */ | 
 |         src_ptr    += src_pixels_per_line - output_width; | 
 |         output_ptr += output_width; | 
 |     } | 
 | } | 
 |  | 
 | static void filter_block2d_second_pass | 
 | ( | 
 |     int *src_ptr, | 
 |     unsigned char *output_ptr, | 
 |     int output_pitch, | 
 |     unsigned int src_pixels_per_line, | 
 |     unsigned int pixel_step, | 
 |     unsigned int output_height, | 
 |     unsigned int output_width, | 
 |     const short *vp8_filter | 
 | ) | 
 | { | 
 |     unsigned int i, j; | 
 |     int  Temp; | 
 |  | 
 |     for (i = 0; i < output_height; i++) | 
 |     { | 
 |         for (j = 0; j < output_width; j++) | 
 |         { | 
 |             /* Apply filter */ | 
 |             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + | 
 |                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + | 
 |                    ((int)src_ptr[0]                 * vp8_filter[2]) + | 
 |                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) + | 
 |                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) + | 
 |                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) + | 
 |                    (VP8_FILTER_WEIGHT >> 1);   /* Rounding */ | 
 |  | 
 |             /* Normalize back to 0-255 */ | 
 |             Temp = Temp >> VP8_FILTER_SHIFT; | 
 |  | 
 |             if (Temp < 0) | 
 |                 Temp = 0; | 
 |             else if (Temp > 255) | 
 |                 Temp = 255; | 
 |  | 
 |             output_ptr[j] = (unsigned char)Temp; | 
 |             src_ptr++; | 
 |         } | 
 |  | 
 |         /* Start next row */ | 
 |         src_ptr    += src_pixels_per_line - output_width; | 
 |         output_ptr += output_pitch; | 
 |     } | 
 | } | 
 |  | 
 |  | 
 | static void filter_block2d | 
 | ( | 
 |     unsigned char  *src_ptr, | 
 |     unsigned char  *output_ptr, | 
 |     unsigned int src_pixels_per_line, | 
 |     int output_pitch, | 
 |     const short  *HFilter, | 
 |     const short  *VFilter | 
 | ) | 
 | { | 
 |     int FData[9*4]; /* Temp data buffer used in filtering */ | 
 |  | 
 |     /* First filter 1-D horizontally... */ | 
 |     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); | 
 |  | 
 |     /* then filter verticaly... */ | 
 |     filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); | 
 | } | 
 |  | 
 |  | 
 | void vp8_sixtap_predict4x4_c | 
 | ( | 
 |     unsigned char  *src_ptr, | 
 |     int   src_pixels_per_line, | 
 |     int  xoffset, | 
 |     int  yoffset, | 
 |     unsigned char *dst_ptr, | 
 |     int dst_pitch | 
 | ) | 
 | { | 
 |     const short  *HFilter; | 
 |     const short  *VFilter; | 
 |  | 
 |     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */ | 
 |     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */ | 
 |  | 
 |     filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); | 
 | } | 
 | void vp8_sixtap_predict8x8_c | 
 | ( | 
 |     unsigned char  *src_ptr, | 
 |     int  src_pixels_per_line, | 
 |     int  xoffset, | 
 |     int  yoffset, | 
 |     unsigned char *dst_ptr, | 
 |     int  dst_pitch | 
 | ) | 
 | { | 
 |     const short  *HFilter; | 
 |     const short  *VFilter; | 
 |     int FData[13*16];   /* Temp data buffer used in filtering */ | 
 |  | 
 |     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */ | 
 |     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */ | 
 |  | 
 |     /* First filter 1-D horizontally... */ | 
 |     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); | 
 |  | 
 |  | 
 |     /* then filter verticaly... */ | 
 |     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); | 
 |  | 
 | } | 
 |  | 
 | void vp8_sixtap_predict8x4_c | 
 | ( | 
 |     unsigned char  *src_ptr, | 
 |     int  src_pixels_per_line, | 
 |     int  xoffset, | 
 |     int  yoffset, | 
 |     unsigned char *dst_ptr, | 
 |     int  dst_pitch | 
 | ) | 
 | { | 
 |     const short  *HFilter; | 
 |     const short  *VFilter; | 
 |     int FData[13*16];   /* Temp data buffer used in filtering */ | 
 |  | 
 |     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */ | 
 |     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */ | 
 |  | 
 |     /* First filter 1-D horizontally... */ | 
 |     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); | 
 |  | 
 |  | 
 |     /* then filter verticaly... */ | 
 |     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); | 
 |  | 
 | } | 
 |  | 
 | void vp8_sixtap_predict16x16_c | 
 | ( | 
 |     unsigned char  *src_ptr, | 
 |     int  src_pixels_per_line, | 
 |     int  xoffset, | 
 |     int  yoffset, | 
 |     unsigned char *dst_ptr, | 
 |     int  dst_pitch | 
 | ) | 
 | { | 
 |     const short  *HFilter; | 
 |     const short  *VFilter; | 
 |     int FData[21*24];   /* Temp data buffer used in filtering */ | 
 |  | 
 |  | 
 |     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */ | 
 |     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */ | 
 |  | 
 |     /* First filter 1-D horizontally... */ | 
 |     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); | 
 |  | 
 |     /* then filter verticaly... */ | 
 |     filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); | 
 |  | 
 | } | 
 |  | 
 |  | 
 | /**************************************************************************** | 
 |  * | 
 |  *  ROUTINE       : filter_block2d_bil_first_pass | 
 |  * | 
 |  *  INPUTS        : UINT8  *src_ptr    : Pointer to source block. | 
 |  *                  UINT32  src_stride : Stride of source block. | 
 |  *                  UINT32  height     : Block height. | 
 |  *                  UINT32  width      : Block width. | 
 |  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps. | 
 |  * | 
 |  *  OUTPUTS       : INT32  *dst_ptr    : Pointer to filtered block. | 
 |  * | 
 |  *  RETURNS       : void | 
 |  * | 
 |  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block | 
 |  *                  in the horizontal direction to produce the filtered output | 
 |  *                  block. Used to implement first-pass of 2-D separable filter. | 
 |  * | 
 |  *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass. | 
 |  *                  Two filter taps should sum to VP8_FILTER_WEIGHT. | 
 |  * | 
 |  ****************************************************************************/ | 
 | static void filter_block2d_bil_first_pass | 
 | ( | 
 |     unsigned char  *src_ptr, | 
 |     unsigned short *dst_ptr, | 
 |     unsigned int    src_stride, | 
 |     unsigned int    height, | 
 |     unsigned int    width, | 
 |     const short    *vp8_filter | 
 | ) | 
 | { | 
 |     unsigned int i, j; | 
 |  | 
 |     for (i = 0; i < height; i++) | 
 |     { | 
 |         for (j = 0; j < width; j++) | 
 |         { | 
 |             /* Apply bilinear filter */ | 
 |             dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + | 
 |                           ((int)src_ptr[1] * vp8_filter[1]) + | 
 |                           (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; | 
 |             src_ptr++; | 
 |         } | 
 |  | 
 |         /* Next row... */ | 
 |         src_ptr += src_stride - width; | 
 |         dst_ptr += width; | 
 |     } | 
 | } | 
 |  | 
 | /**************************************************************************** | 
 |  * | 
 |  *  ROUTINE       : filter_block2d_bil_second_pass | 
 |  * | 
 |  *  INPUTS        : INT32  *src_ptr    : Pointer to source block. | 
 |  *                  UINT32  dst_pitch  : Destination block pitch. | 
 |  *                  UINT32  height     : Block height. | 
 |  *                  UINT32  width      : Block width. | 
 |  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps. | 
 |  * | 
 |  *  OUTPUTS       : UINT16 *dst_ptr    : Pointer to filtered block. | 
 |  * | 
 |  *  RETURNS       : void | 
 |  * | 
 |  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block | 
 |  *                  in the vertical direction to produce the filtered output | 
 |  *                  block. Used to implement second-pass of 2-D separable filter. | 
 |  * | 
 |  *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. | 
 |  *                  Two filter taps should sum to VP8_FILTER_WEIGHT. | 
 |  * | 
 |  ****************************************************************************/ | 
 | static void filter_block2d_bil_second_pass | 
 | ( | 
 |     unsigned short *src_ptr, | 
 |     unsigned char  *dst_ptr, | 
 |     int             dst_pitch, | 
 |     unsigned int    height, | 
 |     unsigned int    width, | 
 |     const short    *vp8_filter | 
 | ) | 
 | { | 
 |     unsigned int  i, j; | 
 |     int  Temp; | 
 |  | 
 |     for (i = 0; i < height; i++) | 
 |     { | 
 |         for (j = 0; j < width; j++) | 
 |         { | 
 |             /* Apply filter */ | 
 |             Temp = ((int)src_ptr[0]     * vp8_filter[0]) + | 
 |                    ((int)src_ptr[width] * vp8_filter[1]) + | 
 |                    (VP8_FILTER_WEIGHT / 2); | 
 |             dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); | 
 |             src_ptr++; | 
 |         } | 
 |  | 
 |         /* Next row... */ | 
 |         dst_ptr += dst_pitch; | 
 |     } | 
 | } | 
 |  | 
 |  | 
 | /**************************************************************************** | 
 |  * | 
 |  *  ROUTINE       : filter_block2d_bil | 
 |  * | 
 |  *  INPUTS        : UINT8  *src_ptr          : Pointer to source block. | 
 |  *                  UINT32  src_pitch        : Stride of source block. | 
 |  *                  UINT32  dst_pitch        : Stride of destination block. | 
 |  *                  INT32  *HFilter          : Array of 2 horizontal filter taps. | 
 |  *                  INT32  *VFilter          : Array of 2 vertical filter taps. | 
 |  *                  INT32  Width             : Block width | 
 |  *                  INT32  Height            : Block height | 
 |  * | 
 |  *  OUTPUTS       : UINT16 *dst_ptr       : Pointer to filtered block. | 
 |  * | 
 |  *  RETURNS       : void | 
 |  * | 
 |  *  FUNCTION      : 2-D filters an input block by applying a 2-tap | 
 |  *                  bi-linear filter horizontally followed by a 2-tap | 
 |  *                  bi-linear filter vertically on the result. | 
 |  * | 
 |  *  SPECIAL NOTES : The largest block size can be handled here is 16x16 | 
 |  * | 
 |  ****************************************************************************/ | 
 | static void filter_block2d_bil | 
 | ( | 
 |     unsigned char *src_ptr, | 
 |     unsigned char *dst_ptr, | 
 |     unsigned int   src_pitch, | 
 |     unsigned int   dst_pitch, | 
 |     const short   *HFilter, | 
 |     const short   *VFilter, | 
 |     int            Width, | 
 |     int            Height | 
 | ) | 
 | { | 
 |  | 
 |     unsigned short FData[17*16];    /* Temp data buffer used in filtering */ | 
 |  | 
 |     /* First filter 1-D horizontally... */ | 
 |     filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); | 
 |  | 
 |     /* then 1-D vertically... */ | 
 |     filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter); | 
 | } | 
 |  | 
 |  | 
 | void vp8_bilinear_predict4x4_c | 
 | ( | 
 |     unsigned char  *src_ptr, | 
 |     int   src_pixels_per_line, | 
 |     int  xoffset, | 
 |     int  yoffset, | 
 |     unsigned char *dst_ptr, | 
 |     int dst_pitch | 
 | ) | 
 | { | 
 |     const short *HFilter; | 
 |     const short *VFilter; | 
 |  | 
 |     HFilter = vp8_bilinear_filters[xoffset]; | 
 |     VFilter = vp8_bilinear_filters[yoffset]; | 
 | #if 0 | 
 |     { | 
 |         int i; | 
 |         unsigned char temp1[16]; | 
 |         unsigned char temp2[16]; | 
 |  | 
 |         bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); | 
 |         filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); | 
 |  | 
 |         for (i = 0; i < 16; i++) | 
 |         { | 
 |             if (temp1[i] != temp2[i]) | 
 |             { | 
 |                 bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); | 
 |                 filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); | 
 |             } | 
 |         } | 
 |     } | 
 | #endif | 
 |     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); | 
 |  | 
 | } | 
 |  | 
 | void vp8_bilinear_predict8x8_c | 
 | ( | 
 |     unsigned char  *src_ptr, | 
 |     int  src_pixels_per_line, | 
 |     int  xoffset, | 
 |     int  yoffset, | 
 |     unsigned char *dst_ptr, | 
 |     int  dst_pitch | 
 | ) | 
 | { | 
 |     const short *HFilter; | 
 |     const short *VFilter; | 
 |  | 
 |     HFilter = vp8_bilinear_filters[xoffset]; | 
 |     VFilter = vp8_bilinear_filters[yoffset]; | 
 |  | 
 |     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); | 
 |  | 
 | } | 
 |  | 
 | void vp8_bilinear_predict8x4_c | 
 | ( | 
 |     unsigned char  *src_ptr, | 
 |     int  src_pixels_per_line, | 
 |     int  xoffset, | 
 |     int  yoffset, | 
 |     unsigned char *dst_ptr, | 
 |     int  dst_pitch | 
 | ) | 
 | { | 
 |     const short *HFilter; | 
 |     const short *VFilter; | 
 |  | 
 |     HFilter = vp8_bilinear_filters[xoffset]; | 
 |     VFilter = vp8_bilinear_filters[yoffset]; | 
 |  | 
 |     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); | 
 |  | 
 | } | 
 |  | 
 | void vp8_bilinear_predict16x16_c | 
 | ( | 
 |     unsigned char  *src_ptr, | 
 |     int  src_pixels_per_line, | 
 |     int  xoffset, | 
 |     int  yoffset, | 
 |     unsigned char *dst_ptr, | 
 |     int  dst_pitch | 
 | ) | 
 | { | 
 |     const short *HFilter; | 
 |     const short *VFilter; | 
 |  | 
 |     HFilter = vp8_bilinear_filters[xoffset]; | 
 |     VFilter = vp8_bilinear_filters[yoffset]; | 
 |  | 
 |     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); | 
 | } |