| /* |
| * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| |
| /**************************************************************************** |
| * |
| * Module Title : gen_scalers.c |
| * |
| * Description : Generic image scaling functions. |
| * |
| ***************************************************************************/ |
| |
| /**************************************************************************** |
| * Header Files |
| ****************************************************************************/ |
| #include "vpx_scale/vpxscale.h" |
| |
| /**************************************************************************** |
| * Imports |
| ****************************************************************************/ |
| |
| /**************************************************************************** |
| * |
| * ROUTINE : horizontal_line_4_5_scale_c4 |
| * |
| * INPUTS : const unsigned char *source : Pointer to source data. |
| * unsigned int source_width : Stride of source. |
| * unsigned char *dest : Pointer to destination data. |
| * unsigned int dest_width : Stride of destination (NOT USED). |
| * |
| * OUTPUTS : None. |
| * |
| * RETURNS : void |
| * |
| * FUNCTION : Copies horizontal line of pixels from source to |
| * destination scaling up by 4 to 5. |
| * |
| * SPECIAL NOTES : None. |
| * |
| ****************************************************************************/ |
| static |
| void horizontal_line_4_5_scale_c64 |
| ( |
| const unsigned char *source, |
| unsigned int source_width, |
| unsigned char *dest, |
| unsigned int dest_width |
| ) |
| { |
| unsigned i; |
| unsigned int ba, cb, dc, ed; |
| unsigned char *restrict des = dest; |
| unsigned int *restrict src = (unsigned int *)source; |
| unsigned int const_51_205, const_102_154, |
| const_205_51, const_154_102; |
| |
| unsigned int src_current, src_next; |
| |
| (void) dest_width; |
| |
| // Constants that are to be used for the filtering. For |
| // best speed we are going to want to right shift by 16. |
| // In the generic version they were shift by 8, so put |
| // an extra 8 in now so that 16 will come out later. |
| const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); |
| const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); |
| const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); |
| const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); |
| |
| // 5 points are needed to filter to give 5 output points. |
| // A load can pull up 4 at a time, and one needs to be |
| // "borrowed" from the next set of data. So instead of |
| // loading those 5 points each time, "steal" a point from |
| // the next set and only load up 4 each time through. |
| src_current = _mem4(src); |
| |
| for (i = 0; i < source_width - 4; i += 4) |
| { |
| src_next = _mem4(src++); |
| |
| // Reorder the data so that it is ready for the |
| // dot product. |
| ba = _unpklu4(src_current); |
| cb = _unpkhu4(_rotl(src_current, 8)); |
| dc = _unpkhu4(src_current); |
| ed = _unpkhu4(_shrmb(src_next, src_current)); |
| |
| // Use the dot product with round and shift. |
| des [0] = src_current & 0xff; |
| des [1] = _dotprsu2(ba, const_205_51); |
| des [2] = _dotprsu2(cb, const_154_102); |
| des [3] = _dotprsu2(dc, const_102_154); |
| des [4] = _dotprsu2(ed, const_51_205); |
| |
| des += 5; |
| |
| // reuse loaded vales next time around. |
| src_current = src_next; |
| } |
| |
| // vp8_filter the last set of points. Normally a point from the next set |
| // would be used, but there is no next set, so just fill. |
| ba = _unpklu4(src_current); |
| cb = _unpkhu4(_rotl(src_current, 8)); |
| dc = _unpkhu4(src_current); |
| |
| des [0] = src_current & 0xff; |
| des [1] = _dotprsu2(ba, const_205_51); |
| des [2] = _dotprsu2(cb, const_154_102); |
| des [3] = _dotprsu2(dc, const_102_154); |
| des [4] = src_current & 0xff; |
| |
| } |
| /**************************************************************************** |
| * |
| * ROUTINE : vertical_band_4_5_scale_c64 |
| * |
| * INPUTS : unsigned char *dest : Pointer to destination data. |
| * unsigned int dest_pitch : Stride of destination data. |
| * unsigned int dest_width : Width of destination data. |
| * |
| * OUTPUTS : None. |
| * |
| * RETURNS : void |
| * |
| * FUNCTION : Scales vertical band of pixels by scale 4 to 5. The |
| * height of the band scaled is 4-pixels. |
| * |
| * SPECIAL NOTES : The routine uses the first line of the band below |
| * the current band. |
| * |
| ****************************************************************************/ |
| static |
| void vertical_band_4_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) |
| { |
| unsigned int i; |
| unsigned int a, b, c, d, e; |
| unsigned int ba, cb, dc, ed; |
| unsigned char *restrict src = dest; |
| unsigned char *restrict des = dest; |
| unsigned int const_51_205, const_102_154, |
| const_205_51, const_154_102; |
| |
| const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); |
| const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); |
| const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); |
| const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); |
| |
| // Force a loop unroll here so that there is not such a |
| // dependancy. |
| a = src [0]; |
| b = src [dest_pitch]; |
| c = src [dest_pitch*2]; |
| d = src [dest_pitch*3]; |
| e = src [dest_pitch*5]; |
| src ++; |
| |
| for (i = 0; i < dest_width; i++) |
| { |
| ba = _pack2(b, a); |
| cb = _pack2(c, b); |
| dc = _pack2(d, c); |
| ed = _pack2(e, d); |
| |
| a = src [0]; |
| b = src [dest_pitch]; |
| c = src [dest_pitch*2]; |
| d = src [dest_pitch*3]; |
| e = src [dest_pitch*5]; |
| src ++; |
| |
| des [dest_pitch] = _dotprsu2(ba, const_205_51); |
| des [dest_pitch*2] = _dotprsu2(cb, const_154_102); |
| des [dest_pitch*3] = _dotprsu2(dc, const_102_154); |
| des [dest_pitch*4] = _dotprsu2(ed, const_51_205); |
| |
| des ++; |
| } |
| } |
| |
| /**************************************************************************** |
| * |
| * ROUTINE : last_vertical_band_4_5_scale_c64 |
| * |
| * INPUTS : unsigned char *dest : Pointer to destination data. |
| * unsigned int dest_pitch : Stride of destination data. |
| * unsigned int dest_width : Width of destination data. |
| * |
| * OUTPUTS : None. |
| * |
| * RETURNS : void |
| * |
| * FUNCTION : Scales last vertical band of pixels by scale 4 to 5. The |
| * height of the band scaled is 4-pixels. |
| * |
| * SPECIAL NOTES : The routine does not have available the first line of |
| * the band below the current band, since this is the |
| * last band. |
| * |
| ****************************************************************************/ |
| static |
| void last_vertical_band_4_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) |
| { |
| unsigned int i; |
| unsigned int a, b, c, d; |
| unsigned int ba, cb, dc; |
| unsigned char *restrict src = dest; |
| unsigned char *restrict des = dest; |
| unsigned int const_102_154, const_205_51, const_154_102; |
| |
| const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); |
| const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); |
| const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); |
| |
| a = src [0]; |
| b = src [dest_pitch]; |
| c = src [dest_pitch*2]; |
| d = src [dest_pitch*3]; |
| src ++; |
| |
| for (i = 0; i < dest_width; ++i) |
| { |
| ba = _pack2(b, a); |
| cb = _pack2(c, b); |
| dc = _pack2(d, c); |
| |
| a = src [0]; |
| b = src [dest_pitch]; |
| c = src [dest_pitch*2]; |
| d = src [dest_pitch*3]; |
| src ++; |
| |
| des [dest_pitch] = _dotprsu2(ba, const_205_51); |
| des [dest_pitch*2] = _dotprsu2(cb, const_154_102); |
| des [dest_pitch*3] = _dotprsu2(dc, const_102_154); |
| des [dest_pitch*4] = (unsigned char) d; |
| |
| des++; |
| } |
| } |
| |
| /**************************************************************************** |
| * |
| * ROUTINE : horizontal_line_3_5_scale_c64 |
| * |
| * INPUTS : const unsigned char *source : Pointer to source data. |
| * unsigned int source_width : Stride of source. |
| * unsigned char *dest : Pointer to destination data. |
| * unsigned int dest_width : Stride of destination (NOT USED). |
| * |
| * OUTPUTS : None. |
| * |
| * RETURNS : void |
| * |
| * FUNCTION : Copies horizontal line of pixels from source to |
| * destination scaling up by 3 to 5. |
| * |
| * SPECIAL NOTES : None. |
| * |
| * |
| ****************************************************************************/ |
| static |
| void horizontal_line_3_5_scale_c64 |
| ( |
| const unsigned char *source, |
| unsigned int source_width, |
| unsigned char *dest, |
| unsigned int dest_width |
| ) |
| { |
| unsigned int i; |
| unsigned int ba, cb, dc; |
| unsigned int src_current; |
| unsigned char *restrict des = dest; |
| unsigned char *restrict src = (unsigned char *)source; |
| unsigned int const_51_205, const_102_154, |
| const_205_51, const_154_102; |
| |
| (void) dest_width; |
| |
| const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); |
| const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); |
| const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); |
| const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); |
| |
| for (i = 0; i < source_width - 3; i += 3) |
| { |
| src_current = _mem4(src); |
| |
| // Reorder the data so that it is ready for the |
| // dot product. |
| ba = _unpklu4(src_current); |
| cb = _unpkhu4(_rotl(src_current, 8)); |
| dc = _unpkhu4(src_current); |
| |
| des [0] = src_current & 0xff; |
| des [1] = _dotprsu2(ba, const_154_102); |
| des [2] = _dotprsu2(cb, const_51_205); |
| des [3] = _dotprsu2(cb, const_205_51); |
| des [4] = _dotprsu2(dc, const_102_154); |
| |
| src += 3; |
| des += 5; |
| } |
| |
| src_current = _mem4(src); |
| |
| ba = _unpklu4(src_current); |
| cb = _unpkhu4(_rotl(src_current, 8)); |
| dc = _unpkhu4(src_current); |
| |
| |
| des [0] = src_current & 0xff; |
| des [1] = _dotprsu2(ba, const_154_102); |
| des [2] = _dotprsu2(cb, const_51_205); |
| des [3] = _dotprsu2(cb, const_205_51); |
| des [4] = dc & 0xff; |
| |
| } |
| |
| /**************************************************************************** |
| * |
| * ROUTINE : vertical_band_3_5_scale_c64 |
| * |
| * INPUTS : unsigned char *dest : Pointer to destination data. |
| * unsigned int dest_pitch : Stride of destination data. |
| * unsigned int dest_width : Width of destination data. |
| * |
| * OUTPUTS : None. |
| * |
| * RETURNS : void |
| * |
| * FUNCTION : Scales vertical band of pixels by scale 3 to 5. The |
| * height of the band scaled is 3-pixels. |
| * |
| * SPECIAL NOTES : The routine uses the first line of the band below |
| * the current band. |
| * |
| ****************************************************************************/ |
| static |
| void vertical_band_3_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) |
| { |
| unsigned int i; |
| unsigned int a, b, c, d; |
| unsigned int ba, cb, dc; |
| unsigned char *restrict src = dest; |
| unsigned char *restrict des = dest; |
| unsigned int const_51_205, const_102_154, |
| const_205_51, const_154_102; |
| |
| const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); |
| const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); |
| const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); |
| const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); |
| |
| a = src [0]; |
| b = src [dest_pitch]; |
| c = src [dest_pitch*2]; |
| d = src [dest_pitch*5]; |
| src ++; |
| |
| for (i = 0; i < dest_width; i++) |
| { |
| ba = _pack2(b, a); |
| cb = _pack2(c, b); |
| dc = _pack2(d, c); |
| |
| a = src [0]; |
| b = src [dest_pitch]; |
| c = src [dest_pitch*2]; |
| d = src [dest_pitch*5]; |
| src ++; |
| |
| des [dest_pitch] = _dotprsu2(ba, const_154_102); |
| des [dest_pitch*2] = _dotprsu2(cb, const_51_205); |
| des [dest_pitch*3] = _dotprsu2(cb, const_205_51); |
| des [dest_pitch*4] = _dotprsu2(dc, const_102_154); |
| |
| des++; |
| } |
| } |
| |
| /**************************************************************************** |
| * |
| * ROUTINE : last_vertical_band_3_5_scale_c64 |
| * |
| * INPUTS : unsigned char *dest : Pointer to destination data. |
| * unsigned int dest_pitch : Stride of destination data. |
| * unsigned int dest_width : Width of destination data. |
| * |
| * OUTPUTS : None. |
| * |
| * RETURNS : void |
| * |
| * FUNCTION : Scales last vertical band of pixels by scale 3 to 5. The |
| * height of the band scaled is 3-pixels. |
| * |
| * SPECIAL NOTES : The routine does not have available the first line of |
| * the band below the current band, since this is the |
| * last band. |
| * |
| ****************************************************************************/ |
| static |
| void last_vertical_band_3_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) |
| { |
| unsigned int i; |
| unsigned int a, b, c; |
| unsigned int ba, cb; |
| unsigned char *restrict src = dest; |
| unsigned char *restrict des = dest; |
| unsigned int const_51_205, const_205_51, const_154_102; |
| |
| const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); |
| const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); |
| const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); |
| |
| a = src [0]; |
| b = src [dest_pitch]; |
| c = src [dest_pitch*2]; |
| src ++; |
| |
| for (i = 0; i < dest_width; ++i) |
| { |
| ba = _pack2(b, a); |
| cb = _pack2(c, b); |
| |
| a = src [0]; |
| b = src [dest_pitch]; |
| c = src [dest_pitch*2]; |
| src ++; |
| |
| des [dest_pitch] = _dotprsu2(ba, const_154_102); |
| des [dest_pitch*2] = _dotprsu2(cb, const_51_205); |
| des [dest_pitch*3] = _dotprsu2(cb, const_205_51); |
| des [dest_pitch*4] = (unsigned char)(c) ; |
| |
| des++; |
| } |
| } |
| |
| /**************************************************************************** |
| * |
| * ROUTINE : horizontal_line_1_2_scale_c64 |
| * |
| * INPUTS : const unsigned char *source : Pointer to source data. |
| * unsigned int source_width : Stride of source. |
| * unsigned char *dest : Pointer to destination data. |
| * unsigned int dest_width : Stride of destination (NOT USED). |
| * |
| * OUTPUTS : None. |
| * |
| * RETURNS : void |
| * |
| * FUNCTION : Copies horizontal line of pixels from source to |
| * destination scaling up by 1 to 2. |
| * |
| * SPECIAL NOTES : source width must be a multiple of 4. |
| * |
| ****************************************************************************/ |
| void horizontal_line_1_2_scale_c64 |
| ( |
| const unsigned char *source, |
| unsigned int source_width, |
| unsigned char *dest, |
| unsigned int dest_width |
| ) |
| { |
| unsigned int i; |
| unsigned char *restrict des = dest; |
| unsigned char *restrict src = (unsigned char *)source; |
| unsigned int src7_4i, src4_1i, src3_0i; |
| unsigned int a4_0i, ahi, alo; |
| double src7_0d, src3_0d; |
| const unsigned int k01 = 0x01010101; |
| |
| for (i = 0; i < source_width / 4; i += 1) |
| { |
| // Load up the data from src. Here a wide load is |
| // used to get 8 bytes at once, only 5 will be used |
| // for the actual computation. |
| src7_0d = _memd8(src); |
| src3_0i = _lo(src7_0d); |
| src7_4i = _hi(src7_0d); |
| |
| // Need to average between points. Shift byte 5 into |
| // the lower word. This will result in bytes 5-1 |
| // averaged with 4-0. |
| src4_1i = _shrmb(src7_4i, src3_0i); |
| a4_0i = _avgu4(src4_1i, src3_0i); |
| |
| // Expand the data out. Could do an unpack, however |
| // all but the multiply units are getting pretty hard |
| // here the multiply unit can take some of the computations. |
| src3_0d = _mpyu4(src3_0i, k01); |
| |
| // The averages need to be unpacked so that they are in 16 |
| // bit form and will be able to be interleaved with the |
| // original data |
| ahi = _unpkhu4(a4_0i); |
| alo = _unpklu4(a4_0i); |
| |
| ahi = _swap4(ahi); |
| alo = _swap4(alo); |
| |
| // Mix the average result in with the orginal data. |
| ahi = _hi(src3_0d) | ahi; |
| alo = _lo(src3_0d) | alo; |
| |
| _memd8(des) = _itod(ahi, alo); |
| |
| des += 8; |
| src += 4; |
| } |
| } |
| |
| |
| /**************************************************************************** |
| * |
| * ROUTINE : vertical_band_1_2_scale_c64 |
| * |
| * INPUTS : unsigned char *dest : Pointer to destination data. |
| * unsigned int dest_pitch : Stride of destination data. |
| * unsigned int dest_width : Width of destination data. |
| * |
| * OUTPUTS : None. |
| * |
| * RETURNS : void |
| * |
| * FUNCTION : Scales vertical band of pixels by scale 1 to 2. The |
| * height of the band scaled is 1-pixel. |
| * |
| * SPECIAL NOTES : The routine uses the first line of the band below |
| * the current band. |
| * Destination width must be a multiple of 4. Because the |
| * intput must be, therefore the output must be. |
| * |
| ****************************************************************************/ |
| static |
| void vertical_band_1_2_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) |
| { |
| unsigned int i; |
| unsigned int a, b; |
| unsigned int *restrict line_a = (unsigned int *)dest; |
| unsigned int *restrict line_b = (unsigned int *)(dest + (dest_pitch * 2)); |
| unsigned int *restrict des = (unsigned int *)(dest + dest_pitch); |
| |
| for (i = 0; i < dest_width / 4; i++) |
| { |
| a = _mem4(line_a++); |
| b = _mem4(line_b++); |
| |
| _mem4(des++) = _avgu4(a, b); |
| } |
| } |
| |
| /**************************************************************************** |
| * |
| * ROUTINE : last_vertical_band_1_2_scale_c64 |
| * |
| * INPUTS : unsigned char *dest : Pointer to destination data. |
| * unsigned int dest_pitch : Stride of destination data. |
| * unsigned int dest_width : Width of destination data. |
| * |
| * OUTPUTS : None. |
| * |
| * RETURNS : void |
| * |
| * FUNCTION : Scales last vertical band of pixels by scale 1 to 2. The |
| * height of the band scaled is 1-pixel. |
| * |
| * SPECIAL NOTES : The routine does not have available the first line of |
| * the band below the current band, since this is the |
| * last band. Again, width must be a multiple of 4. |
| * |
| ****************************************************************************/ |
| static |
| void last_vertical_band_1_2_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) |
| { |
| unsigned int i; |
| unsigned int *restrict src = (unsigned int *)dest; |
| unsigned int *restrict des = (unsigned int *)(dest + dest_pitch); |
| |
| for (i = 0; i < dest_width / 4; ++i) |
| { |
| _mem4(des++) = _mem4(src++); |
| } |
| } |
| |
| void |
| register_generic_scalers(void) |
| { |
| vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_c64; |
| vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_c64; |
| vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_c64; |
| vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_c64; |
| vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_c64; |
| vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_c64; |
| vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_c64; |
| vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_c64; |
| vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_c64; |
| } |