|  | /* | 
|  | *  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
|  | * | 
|  | *  Use of this source code is governed by a BSD-style license | 
|  | *  that can be found in the LICENSE file in the root of the source | 
|  | *  tree. An additional intellectual property rights grant can be found | 
|  | *  in the file PATENTS.  All contributing project authors may | 
|  | *  be found in the AUTHORS file in the root of the source tree. | 
|  | */ | 
|  |  | 
|  |  | 
|  | #include "filter.h" | 
|  |  | 
|  | DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = | 
|  | { | 
|  | { 128,   0 }, | 
|  | { 112,  16 }, | 
|  | {  96,  32 }, | 
|  | {  80,  48 }, | 
|  | {  64,  64 }, | 
|  | {  48,  80 }, | 
|  | {  32,  96 }, | 
|  | {  16, 112 } | 
|  | }; | 
|  |  | 
|  | DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = | 
|  | { | 
|  |  | 
|  | { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ | 
|  | { 0, -6,  123,   12,  -1,  0 }, | 
|  | { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */ | 
|  | { 0, -9,   93,   50,  -6,  0 }, | 
|  | { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */ | 
|  | { 0, -6,   50,   93,  -9,  0 }, | 
|  | { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */ | 
|  | { 0, -1,   12,  123,  -6,  0 }, | 
|  | }; | 
|  |  | 
|  | static void filter_block2d_first_pass | 
|  | ( | 
|  | unsigned char *src_ptr, | 
|  | int *output_ptr, | 
|  | unsigned int src_pixels_per_line, | 
|  | unsigned int pixel_step, | 
|  | unsigned int output_height, | 
|  | unsigned int output_width, | 
|  | const short *vp8_filter | 
|  | ) | 
|  | { | 
|  | unsigned int i, j; | 
|  | int  Temp; | 
|  |  | 
|  | for (i = 0; i < output_height; i++) | 
|  | { | 
|  | for (j = 0; j < output_width; j++) | 
|  | { | 
|  | Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + | 
|  | ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + | 
|  | ((int)src_ptr[0]                 * vp8_filter[2]) + | 
|  | ((int)src_ptr[pixel_step]         * vp8_filter[3]) + | 
|  | ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) + | 
|  | ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) + | 
|  | (VP8_FILTER_WEIGHT >> 1);      /* Rounding */ | 
|  |  | 
|  | /* Normalize back to 0-255 */ | 
|  | Temp = Temp >> VP8_FILTER_SHIFT; | 
|  |  | 
|  | if (Temp < 0) | 
|  | Temp = 0; | 
|  | else if (Temp > 255) | 
|  | Temp = 255; | 
|  |  | 
|  | output_ptr[j] = Temp; | 
|  | src_ptr++; | 
|  | } | 
|  |  | 
|  | /* Next row... */ | 
|  | src_ptr    += src_pixels_per_line - output_width; | 
|  | output_ptr += output_width; | 
|  | } | 
|  | } | 
|  |  | 
|  | static void filter_block2d_second_pass | 
|  | ( | 
|  | int *src_ptr, | 
|  | unsigned char *output_ptr, | 
|  | int output_pitch, | 
|  | unsigned int src_pixels_per_line, | 
|  | unsigned int pixel_step, | 
|  | unsigned int output_height, | 
|  | unsigned int output_width, | 
|  | const short *vp8_filter | 
|  | ) | 
|  | { | 
|  | unsigned int i, j; | 
|  | int  Temp; | 
|  |  | 
|  | for (i = 0; i < output_height; i++) | 
|  | { | 
|  | for (j = 0; j < output_width; j++) | 
|  | { | 
|  | /* Apply filter */ | 
|  | Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + | 
|  | ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + | 
|  | ((int)src_ptr[0]                 * vp8_filter[2]) + | 
|  | ((int)src_ptr[pixel_step]         * vp8_filter[3]) + | 
|  | ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) + | 
|  | ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) + | 
|  | (VP8_FILTER_WEIGHT >> 1);   /* Rounding */ | 
|  |  | 
|  | /* Normalize back to 0-255 */ | 
|  | Temp = Temp >> VP8_FILTER_SHIFT; | 
|  |  | 
|  | if (Temp < 0) | 
|  | Temp = 0; | 
|  | else if (Temp > 255) | 
|  | Temp = 255; | 
|  |  | 
|  | output_ptr[j] = (unsigned char)Temp; | 
|  | src_ptr++; | 
|  | } | 
|  |  | 
|  | /* Start next row */ | 
|  | src_ptr    += src_pixels_per_line - output_width; | 
|  | output_ptr += output_pitch; | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | static void filter_block2d | 
|  | ( | 
|  | unsigned char  *src_ptr, | 
|  | unsigned char  *output_ptr, | 
|  | unsigned int src_pixels_per_line, | 
|  | int output_pitch, | 
|  | const short  *HFilter, | 
|  | const short  *VFilter | 
|  | ) | 
|  | { | 
|  | int FData[9*4]; /* Temp data buffer used in filtering */ | 
|  |  | 
|  | /* First filter 1-D horizontally... */ | 
|  | filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); | 
|  |  | 
|  | /* then filter verticaly... */ | 
|  | filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); | 
|  | } | 
|  |  | 
|  |  | 
|  | void vp8_sixtap_predict4x4_c | 
|  | ( | 
|  | unsigned char  *src_ptr, | 
|  | int   src_pixels_per_line, | 
|  | int  xoffset, | 
|  | int  yoffset, | 
|  | unsigned char *dst_ptr, | 
|  | int dst_pitch | 
|  | ) | 
|  | { | 
|  | const short  *HFilter; | 
|  | const short  *VFilter; | 
|  |  | 
|  | HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */ | 
|  | VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */ | 
|  |  | 
|  | filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); | 
|  | } | 
|  | void vp8_sixtap_predict8x8_c | 
|  | ( | 
|  | unsigned char  *src_ptr, | 
|  | int  src_pixels_per_line, | 
|  | int  xoffset, | 
|  | int  yoffset, | 
|  | unsigned char *dst_ptr, | 
|  | int  dst_pitch | 
|  | ) | 
|  | { | 
|  | const short  *HFilter; | 
|  | const short  *VFilter; | 
|  | int FData[13*16];   /* Temp data buffer used in filtering */ | 
|  |  | 
|  | HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */ | 
|  | VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */ | 
|  |  | 
|  | /* First filter 1-D horizontally... */ | 
|  | filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); | 
|  |  | 
|  |  | 
|  | /* then filter verticaly... */ | 
|  | filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); | 
|  |  | 
|  | } | 
|  |  | 
|  | void vp8_sixtap_predict8x4_c | 
|  | ( | 
|  | unsigned char  *src_ptr, | 
|  | int  src_pixels_per_line, | 
|  | int  xoffset, | 
|  | int  yoffset, | 
|  | unsigned char *dst_ptr, | 
|  | int  dst_pitch | 
|  | ) | 
|  | { | 
|  | const short  *HFilter; | 
|  | const short  *VFilter; | 
|  | int FData[13*16];   /* Temp data buffer used in filtering */ | 
|  |  | 
|  | HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */ | 
|  | VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */ | 
|  |  | 
|  | /* First filter 1-D horizontally... */ | 
|  | filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); | 
|  |  | 
|  |  | 
|  | /* then filter verticaly... */ | 
|  | filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); | 
|  |  | 
|  | } | 
|  |  | 
|  | void vp8_sixtap_predict16x16_c | 
|  | ( | 
|  | unsigned char  *src_ptr, | 
|  | int  src_pixels_per_line, | 
|  | int  xoffset, | 
|  | int  yoffset, | 
|  | unsigned char *dst_ptr, | 
|  | int  dst_pitch | 
|  | ) | 
|  | { | 
|  | const short  *HFilter; | 
|  | const short  *VFilter; | 
|  | int FData[21*24];   /* Temp data buffer used in filtering */ | 
|  |  | 
|  |  | 
|  | HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */ | 
|  | VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */ | 
|  |  | 
|  | /* First filter 1-D horizontally... */ | 
|  | filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); | 
|  |  | 
|  | /* then filter verticaly... */ | 
|  | filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); | 
|  |  | 
|  | } | 
|  |  | 
|  |  | 
|  | /**************************************************************************** | 
|  | * | 
|  | *  ROUTINE       : filter_block2d_bil_first_pass | 
|  | * | 
|  | *  INPUTS        : UINT8  *src_ptr    : Pointer to source block. | 
|  | *                  UINT32  src_stride : Stride of source block. | 
|  | *                  UINT32  height     : Block height. | 
|  | *                  UINT32  width      : Block width. | 
|  | *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps. | 
|  | * | 
|  | *  OUTPUTS       : INT32  *dst_ptr    : Pointer to filtered block. | 
|  | * | 
|  | *  RETURNS       : void | 
|  | * | 
|  | *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block | 
|  | *                  in the horizontal direction to produce the filtered output | 
|  | *                  block. Used to implement first-pass of 2-D separable filter. | 
|  | * | 
|  | *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass. | 
|  | *                  Two filter taps should sum to VP8_FILTER_WEIGHT. | 
|  | * | 
|  | ****************************************************************************/ | 
|  | static void filter_block2d_bil_first_pass | 
|  | ( | 
|  | unsigned char  *src_ptr, | 
|  | unsigned short *dst_ptr, | 
|  | unsigned int    src_stride, | 
|  | unsigned int    height, | 
|  | unsigned int    width, | 
|  | const short    *vp8_filter | 
|  | ) | 
|  | { | 
|  | unsigned int i, j; | 
|  |  | 
|  | for (i = 0; i < height; i++) | 
|  | { | 
|  | for (j = 0; j < width; j++) | 
|  | { | 
|  | /* Apply bilinear filter */ | 
|  | dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + | 
|  | ((int)src_ptr[1] * vp8_filter[1]) + | 
|  | (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; | 
|  | src_ptr++; | 
|  | } | 
|  |  | 
|  | /* Next row... */ | 
|  | src_ptr += src_stride - width; | 
|  | dst_ptr += width; | 
|  | } | 
|  | } | 
|  |  | 
|  | /**************************************************************************** | 
|  | * | 
|  | *  ROUTINE       : filter_block2d_bil_second_pass | 
|  | * | 
|  | *  INPUTS        : INT32  *src_ptr    : Pointer to source block. | 
|  | *                  UINT32  dst_pitch  : Destination block pitch. | 
|  | *                  UINT32  height     : Block height. | 
|  | *                  UINT32  width      : Block width. | 
|  | *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps. | 
|  | * | 
|  | *  OUTPUTS       : UINT16 *dst_ptr    : Pointer to filtered block. | 
|  | * | 
|  | *  RETURNS       : void | 
|  | * | 
|  | *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block | 
|  | *                  in the vertical direction to produce the filtered output | 
|  | *                  block. Used to implement second-pass of 2-D separable filter. | 
|  | * | 
|  | *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. | 
|  | *                  Two filter taps should sum to VP8_FILTER_WEIGHT. | 
|  | * | 
|  | ****************************************************************************/ | 
|  | static void filter_block2d_bil_second_pass | 
|  | ( | 
|  | unsigned short *src_ptr, | 
|  | unsigned char  *dst_ptr, | 
|  | int             dst_pitch, | 
|  | unsigned int    height, | 
|  | unsigned int    width, | 
|  | const short    *vp8_filter | 
|  | ) | 
|  | { | 
|  | unsigned int  i, j; | 
|  | int  Temp; | 
|  |  | 
|  | for (i = 0; i < height; i++) | 
|  | { | 
|  | for (j = 0; j < width; j++) | 
|  | { | 
|  | /* Apply filter */ | 
|  | Temp = ((int)src_ptr[0]     * vp8_filter[0]) + | 
|  | ((int)src_ptr[width] * vp8_filter[1]) + | 
|  | (VP8_FILTER_WEIGHT / 2); | 
|  | dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); | 
|  | src_ptr++; | 
|  | } | 
|  |  | 
|  | /* Next row... */ | 
|  | dst_ptr += dst_pitch; | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | /**************************************************************************** | 
|  | * | 
|  | *  ROUTINE       : filter_block2d_bil | 
|  | * | 
|  | *  INPUTS        : UINT8  *src_ptr          : Pointer to source block. | 
|  | *                  UINT32  src_pitch        : Stride of source block. | 
|  | *                  UINT32  dst_pitch        : Stride of destination block. | 
|  | *                  INT32  *HFilter          : Array of 2 horizontal filter taps. | 
|  | *                  INT32  *VFilter          : Array of 2 vertical filter taps. | 
|  | *                  INT32  Width             : Block width | 
|  | *                  INT32  Height            : Block height | 
|  | * | 
|  | *  OUTPUTS       : UINT16 *dst_ptr       : Pointer to filtered block. | 
|  | * | 
|  | *  RETURNS       : void | 
|  | * | 
|  | *  FUNCTION      : 2-D filters an input block by applying a 2-tap | 
|  | *                  bi-linear filter horizontally followed by a 2-tap | 
|  | *                  bi-linear filter vertically on the result. | 
|  | * | 
|  | *  SPECIAL NOTES : The largest block size can be handled here is 16x16 | 
|  | * | 
|  | ****************************************************************************/ | 
|  | static void filter_block2d_bil | 
|  | ( | 
|  | unsigned char *src_ptr, | 
|  | unsigned char *dst_ptr, | 
|  | unsigned int   src_pitch, | 
|  | unsigned int   dst_pitch, | 
|  | const short   *HFilter, | 
|  | const short   *VFilter, | 
|  | int            Width, | 
|  | int            Height | 
|  | ) | 
|  | { | 
|  |  | 
|  | unsigned short FData[17*16];    /* Temp data buffer used in filtering */ | 
|  |  | 
|  | /* First filter 1-D horizontally... */ | 
|  | filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); | 
|  |  | 
|  | /* then 1-D vertically... */ | 
|  | filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter); | 
|  | } | 
|  |  | 
|  |  | 
|  | void vp8_bilinear_predict4x4_c | 
|  | ( | 
|  | unsigned char  *src_ptr, | 
|  | int   src_pixels_per_line, | 
|  | int  xoffset, | 
|  | int  yoffset, | 
|  | unsigned char *dst_ptr, | 
|  | int dst_pitch | 
|  | ) | 
|  | { | 
|  | const short *HFilter; | 
|  | const short *VFilter; | 
|  |  | 
|  | HFilter = vp8_bilinear_filters[xoffset]; | 
|  | VFilter = vp8_bilinear_filters[yoffset]; | 
|  | #if 0 | 
|  | { | 
|  | int i; | 
|  | unsigned char temp1[16]; | 
|  | unsigned char temp2[16]; | 
|  |  | 
|  | bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); | 
|  | filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); | 
|  |  | 
|  | for (i = 0; i < 16; i++) | 
|  | { | 
|  | if (temp1[i] != temp2[i]) | 
|  | { | 
|  | bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); | 
|  | filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); | 
|  | } | 
|  | } | 
|  | } | 
|  | #endif | 
|  | filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); | 
|  |  | 
|  | } | 
|  |  | 
|  | void vp8_bilinear_predict8x8_c | 
|  | ( | 
|  | unsigned char  *src_ptr, | 
|  | int  src_pixels_per_line, | 
|  | int  xoffset, | 
|  | int  yoffset, | 
|  | unsigned char *dst_ptr, | 
|  | int  dst_pitch | 
|  | ) | 
|  | { | 
|  | const short *HFilter; | 
|  | const short *VFilter; | 
|  |  | 
|  | HFilter = vp8_bilinear_filters[xoffset]; | 
|  | VFilter = vp8_bilinear_filters[yoffset]; | 
|  |  | 
|  | filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); | 
|  |  | 
|  | } | 
|  |  | 
|  | void vp8_bilinear_predict8x4_c | 
|  | ( | 
|  | unsigned char  *src_ptr, | 
|  | int  src_pixels_per_line, | 
|  | int  xoffset, | 
|  | int  yoffset, | 
|  | unsigned char *dst_ptr, | 
|  | int  dst_pitch | 
|  | ) | 
|  | { | 
|  | const short *HFilter; | 
|  | const short *VFilter; | 
|  |  | 
|  | HFilter = vp8_bilinear_filters[xoffset]; | 
|  | VFilter = vp8_bilinear_filters[yoffset]; | 
|  |  | 
|  | filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); | 
|  |  | 
|  | } | 
|  |  | 
|  | void vp8_bilinear_predict16x16_c | 
|  | ( | 
|  | unsigned char  *src_ptr, | 
|  | int  src_pixels_per_line, | 
|  | int  xoffset, | 
|  | int  yoffset, | 
|  | unsigned char *dst_ptr, | 
|  | int  dst_pitch | 
|  | ) | 
|  | { | 
|  | const short *HFilter; | 
|  | const short *VFilter; | 
|  |  | 
|  | HFilter = vp8_bilinear_filters[xoffset]; | 
|  | VFilter = vp8_bilinear_filters[yoffset]; | 
|  |  | 
|  | filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); | 
|  | } |