|  | /* | 
|  | *  Copyright 2013 The LibYuv Project Authors. All rights reserved. | 
|  | * | 
|  | *  Use of this source code is governed by a BSD-style license | 
|  | *  that can be found in the LICENSE file in the root of the source | 
|  | *  tree. An additional intellectual property rights grant can be found | 
|  | *  in the file PATENTS. All contributing project authors may | 
|  | *  be found in the AUTHORS file in the root of the source tree. | 
|  | */ | 
|  |  | 
|  | #include "libyuv/scale.h" | 
|  |  | 
|  | #include <assert.h> | 
|  | #include <string.h> | 
|  |  | 
|  | #include "libyuv/cpu_id.h" | 
|  | #include "libyuv/planar_functions.h"  // For CopyARGB | 
|  | #include "libyuv/row.h" | 
|  | #include "libyuv/scale_row.h" | 
|  |  | 
|  | #ifdef __cplusplus | 
|  | namespace libyuv { | 
|  | extern "C" { | 
|  | #endif | 
|  |  | 
|  | static __inline int Abs(int v) { | 
|  | return v >= 0 ? v : -v; | 
|  | } | 
|  |  | 
|  | // CPU agnostic row functions | 
|  | void ScaleRowDown2_C(const uint8_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst, | 
|  | int dst_width) { | 
|  | int x; | 
|  | (void)src_stride; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = src_ptr[1]; | 
|  | dst[1] = src_ptr[3]; | 
|  | dst += 2; | 
|  | src_ptr += 4; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src_ptr[1]; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown2_16_C(const uint16_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint16_t* dst, | 
|  | int dst_width) { | 
|  | int x; | 
|  | (void)src_stride; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = src_ptr[1]; | 
|  | dst[1] = src_ptr[3]; | 
|  | dst += 2; | 
|  | src_ptr += 4; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src_ptr[1]; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown2Linear_C(const uint8_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst, | 
|  | int dst_width) { | 
|  | const uint8_t* s = src_ptr; | 
|  | int x; | 
|  | (void)src_stride; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = (s[0] + s[1] + 1) >> 1; | 
|  | dst[1] = (s[2] + s[3] + 1) >> 1; | 
|  | dst += 2; | 
|  | s += 4; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = (s[0] + s[1] + 1) >> 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint16_t* dst, | 
|  | int dst_width) { | 
|  | const uint16_t* s = src_ptr; | 
|  | int x; | 
|  | (void)src_stride; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = (s[0] + s[1] + 1) >> 1; | 
|  | dst[1] = (s[2] + s[3] + 1) >> 1; | 
|  | dst += 2; | 
|  | s += 4; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = (s[0] + s[1] + 1) >> 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown2Box_C(const uint8_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst, | 
|  | int dst_width) { | 
|  | const uint8_t* s = src_ptr; | 
|  | const uint8_t* t = src_ptr + src_stride; | 
|  | int x; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; | 
|  | dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; | 
|  | dst += 2; | 
|  | s += 4; | 
|  | t += 4; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst, | 
|  | int dst_width) { | 
|  | const uint8_t* s = src_ptr; | 
|  | const uint8_t* t = src_ptr + src_stride; | 
|  | int x; | 
|  | dst_width -= 1; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; | 
|  | dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; | 
|  | dst += 2; | 
|  | s += 4; | 
|  | t += 4; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; | 
|  | dst += 1; | 
|  | s += 2; | 
|  | t += 2; | 
|  | } | 
|  | dst[0] = (s[0] + t[0] + 1) >> 1; | 
|  | } | 
|  |  | 
|  | void ScaleRowDown2Box_16_C(const uint16_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint16_t* dst, | 
|  | int dst_width) { | 
|  | const uint16_t* s = src_ptr; | 
|  | const uint16_t* t = src_ptr + src_stride; | 
|  | int x; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; | 
|  | dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; | 
|  | dst += 2; | 
|  | s += 4; | 
|  | t += 4; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown4_C(const uint8_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst, | 
|  | int dst_width) { | 
|  | int x; | 
|  | (void)src_stride; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = src_ptr[2]; | 
|  | dst[1] = src_ptr[6]; | 
|  | dst += 2; | 
|  | src_ptr += 8; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src_ptr[2]; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown4_16_C(const uint16_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint16_t* dst, | 
|  | int dst_width) { | 
|  | int x; | 
|  | (void)src_stride; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = src_ptr[2]; | 
|  | dst[1] = src_ptr[6]; | 
|  | dst += 2; | 
|  | src_ptr += 8; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src_ptr[2]; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown4Box_C(const uint8_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst, | 
|  | int dst_width) { | 
|  | intptr_t stride = src_stride; | 
|  | int x; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + | 
|  | src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + | 
|  | src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + | 
|  | src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + | 
|  | src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + | 
|  | src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + | 
|  | src_ptr[stride * 3 + 3] + 8) >> | 
|  | 4; | 
|  | dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + | 
|  | src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] + | 
|  | src_ptr[stride + 7] + src_ptr[stride * 2 + 4] + | 
|  | src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] + | 
|  | src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] + | 
|  | src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] + | 
|  | src_ptr[stride * 3 + 7] + 8) >> | 
|  | 4; | 
|  | dst += 2; | 
|  | src_ptr += 8; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + | 
|  | src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + | 
|  | src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + | 
|  | src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + | 
|  | src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + | 
|  | src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + | 
|  | src_ptr[stride * 3 + 3] + 8) >> | 
|  | 4; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown4Box_16_C(const uint16_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint16_t* dst, | 
|  | int dst_width) { | 
|  | intptr_t stride = src_stride; | 
|  | int x; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + | 
|  | src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + | 
|  | src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + | 
|  | src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + | 
|  | src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + | 
|  | src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + | 
|  | src_ptr[stride * 3 + 3] + 8) >> | 
|  | 4; | 
|  | dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + | 
|  | src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] + | 
|  | src_ptr[stride + 7] + src_ptr[stride * 2 + 4] + | 
|  | src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] + | 
|  | src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] + | 
|  | src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] + | 
|  | src_ptr[stride * 3 + 7] + 8) >> | 
|  | 4; | 
|  | dst += 2; | 
|  | src_ptr += 8; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + | 
|  | src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + | 
|  | src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + | 
|  | src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + | 
|  | src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + | 
|  | src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + | 
|  | src_ptr[stride * 3 + 3] + 8) >> | 
|  | 4; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown34_C(const uint8_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst, | 
|  | int dst_width) { | 
|  | int x; | 
|  | (void)src_stride; | 
|  | assert((dst_width % 3 == 0) && (dst_width > 0)); | 
|  | for (x = 0; x < dst_width; x += 3) { | 
|  | dst[0] = src_ptr[0]; | 
|  | dst[1] = src_ptr[1]; | 
|  | dst[2] = src_ptr[3]; | 
|  | dst += 3; | 
|  | src_ptr += 4; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown34_16_C(const uint16_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint16_t* dst, | 
|  | int dst_width) { | 
|  | int x; | 
|  | (void)src_stride; | 
|  | assert((dst_width % 3 == 0) && (dst_width > 0)); | 
|  | for (x = 0; x < dst_width; x += 3) { | 
|  | dst[0] = src_ptr[0]; | 
|  | dst[1] = src_ptr[1]; | 
|  | dst[2] = src_ptr[3]; | 
|  | dst += 3; | 
|  | src_ptr += 4; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Filter rows 0 and 1 together, 3 : 1 | 
|  | void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* d, | 
|  | int dst_width) { | 
|  | const uint8_t* s = src_ptr; | 
|  | const uint8_t* t = src_ptr + src_stride; | 
|  | int x; | 
|  | assert((dst_width % 3 == 0) && (dst_width > 0)); | 
|  | for (x = 0; x < dst_width; x += 3) { | 
|  | uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; | 
|  | uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; | 
|  | uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; | 
|  | uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; | 
|  | uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; | 
|  | uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; | 
|  | d[0] = (a0 * 3 + b0 + 2) >> 2; | 
|  | d[1] = (a1 * 3 + b1 + 2) >> 2; | 
|  | d[2] = (a2 * 3 + b2 + 2) >> 2; | 
|  | d += 3; | 
|  | s += 4; | 
|  | t += 4; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint16_t* d, | 
|  | int dst_width) { | 
|  | const uint16_t* s = src_ptr; | 
|  | const uint16_t* t = src_ptr + src_stride; | 
|  | int x; | 
|  | assert((dst_width % 3 == 0) && (dst_width > 0)); | 
|  | for (x = 0; x < dst_width; x += 3) { | 
|  | uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; | 
|  | uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; | 
|  | uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; | 
|  | uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; | 
|  | uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; | 
|  | uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; | 
|  | d[0] = (a0 * 3 + b0 + 2) >> 2; | 
|  | d[1] = (a1 * 3 + b1 + 2) >> 2; | 
|  | d[2] = (a2 * 3 + b2 + 2) >> 2; | 
|  | d += 3; | 
|  | s += 4; | 
|  | t += 4; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Filter rows 1 and 2 together, 1 : 1 | 
|  | void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* d, | 
|  | int dst_width) { | 
|  | const uint8_t* s = src_ptr; | 
|  | const uint8_t* t = src_ptr + src_stride; | 
|  | int x; | 
|  | assert((dst_width % 3 == 0) && (dst_width > 0)); | 
|  | for (x = 0; x < dst_width; x += 3) { | 
|  | uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; | 
|  | uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; | 
|  | uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; | 
|  | uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; | 
|  | uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; | 
|  | uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; | 
|  | d[0] = (a0 + b0 + 1) >> 1; | 
|  | d[1] = (a1 + b1 + 1) >> 1; | 
|  | d[2] = (a2 + b2 + 1) >> 1; | 
|  | d += 3; | 
|  | s += 4; | 
|  | t += 4; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint16_t* d, | 
|  | int dst_width) { | 
|  | const uint16_t* s = src_ptr; | 
|  | const uint16_t* t = src_ptr + src_stride; | 
|  | int x; | 
|  | assert((dst_width % 3 == 0) && (dst_width > 0)); | 
|  | for (x = 0; x < dst_width; x += 3) { | 
|  | uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; | 
|  | uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; | 
|  | uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; | 
|  | uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; | 
|  | uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; | 
|  | uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; | 
|  | d[0] = (a0 + b0 + 1) >> 1; | 
|  | d[1] = (a1 + b1 + 1) >> 1; | 
|  | d[2] = (a2 + b2 + 1) >> 1; | 
|  | d += 3; | 
|  | s += 4; | 
|  | t += 4; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Scales a single row of pixels using point sampling. | 
|  | void ScaleCols_C(uint8_t* dst_ptr, | 
|  | const uint8_t* src_ptr, | 
|  | int dst_width, | 
|  | int x, | 
|  | int dx) { | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | dst_ptr[0] = src_ptr[x >> 16]; | 
|  | x += dx; | 
|  | dst_ptr[1] = src_ptr[x >> 16]; | 
|  | x += dx; | 
|  | dst_ptr += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst_ptr[0] = src_ptr[x >> 16]; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleCols_16_C(uint16_t* dst_ptr, | 
|  | const uint16_t* src_ptr, | 
|  | int dst_width, | 
|  | int x, | 
|  | int dx) { | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | dst_ptr[0] = src_ptr[x >> 16]; | 
|  | x += dx; | 
|  | dst_ptr[1] = src_ptr[x >> 16]; | 
|  | x += dx; | 
|  | dst_ptr += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst_ptr[0] = src_ptr[x >> 16]; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Scales a single row of pixels up by 2x using point sampling. | 
|  | void ScaleColsUp2_C(uint8_t* dst_ptr, | 
|  | const uint8_t* src_ptr, | 
|  | int dst_width, | 
|  | int x, | 
|  | int dx) { | 
|  | int j; | 
|  | (void)x; | 
|  | (void)dx; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | dst_ptr[1] = dst_ptr[0] = src_ptr[0]; | 
|  | src_ptr += 1; | 
|  | dst_ptr += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst_ptr[0] = src_ptr[0]; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleColsUp2_16_C(uint16_t* dst_ptr, | 
|  | const uint16_t* src_ptr, | 
|  | int dst_width, | 
|  | int x, | 
|  | int dx) { | 
|  | int j; | 
|  | (void)x; | 
|  | (void)dx; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | dst_ptr[1] = dst_ptr[0] = src_ptr[0]; | 
|  | src_ptr += 1; | 
|  | dst_ptr += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst_ptr[0] = src_ptr[0]; | 
|  | } | 
|  | } | 
|  |  | 
|  | // (1-f)a + fb can be replaced with a + f(b-a) | 
|  | #if defined(__arm__) || defined(__aarch64__) | 
|  | #define BLENDER(a, b, f) \ | 
|  | (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) | 
|  | #else | 
|  | // Intel uses 7 bit math with rounding. | 
|  | #define BLENDER(a, b, f) \ | 
|  | (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7)) | 
|  | #endif | 
|  |  | 
|  | void ScaleFilterCols_C(uint8_t* dst_ptr, | 
|  | const uint8_t* src_ptr, | 
|  | int dst_width, | 
|  | int x, | 
|  | int dx) { | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | int xi = x >> 16; | 
|  | int a = src_ptr[xi]; | 
|  | int b = src_ptr[xi + 1]; | 
|  | dst_ptr[0] = BLENDER(a, b, x & 0xffff); | 
|  | x += dx; | 
|  | xi = x >> 16; | 
|  | a = src_ptr[xi]; | 
|  | b = src_ptr[xi + 1]; | 
|  | dst_ptr[1] = BLENDER(a, b, x & 0xffff); | 
|  | x += dx; | 
|  | dst_ptr += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | int xi = x >> 16; | 
|  | int a = src_ptr[xi]; | 
|  | int b = src_ptr[xi + 1]; | 
|  | dst_ptr[0] = BLENDER(a, b, x & 0xffff); | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleFilterCols64_C(uint8_t* dst_ptr, | 
|  | const uint8_t* src_ptr, | 
|  | int dst_width, | 
|  | int x32, | 
|  | int dx) { | 
|  | int64_t x = (int64_t)(x32); | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | int64_t xi = x >> 16; | 
|  | int a = src_ptr[xi]; | 
|  | int b = src_ptr[xi + 1]; | 
|  | dst_ptr[0] = BLENDER(a, b, x & 0xffff); | 
|  | x += dx; | 
|  | xi = x >> 16; | 
|  | a = src_ptr[xi]; | 
|  | b = src_ptr[xi + 1]; | 
|  | dst_ptr[1] = BLENDER(a, b, x & 0xffff); | 
|  | x += dx; | 
|  | dst_ptr += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | int64_t xi = x >> 16; | 
|  | int a = src_ptr[xi]; | 
|  | int b = src_ptr[xi + 1]; | 
|  | dst_ptr[0] = BLENDER(a, b, x & 0xffff); | 
|  | } | 
|  | } | 
|  | #undef BLENDER | 
|  |  | 
|  | // Same as 8 bit arm blender but return is cast to uint16_t | 
|  | #define BLENDER(a, b, f) \ | 
|  | (uint16_t)(            \ | 
|  | (int)(a) +         \ | 
|  | (int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16)) | 
|  |  | 
|  | void ScaleFilterCols_16_C(uint16_t* dst_ptr, | 
|  | const uint16_t* src_ptr, | 
|  | int dst_width, | 
|  | int x, | 
|  | int dx) { | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | int xi = x >> 16; | 
|  | int a = src_ptr[xi]; | 
|  | int b = src_ptr[xi + 1]; | 
|  | dst_ptr[0] = BLENDER(a, b, x & 0xffff); | 
|  | x += dx; | 
|  | xi = x >> 16; | 
|  | a = src_ptr[xi]; | 
|  | b = src_ptr[xi + 1]; | 
|  | dst_ptr[1] = BLENDER(a, b, x & 0xffff); | 
|  | x += dx; | 
|  | dst_ptr += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | int xi = x >> 16; | 
|  | int a = src_ptr[xi]; | 
|  | int b = src_ptr[xi + 1]; | 
|  | dst_ptr[0] = BLENDER(a, b, x & 0xffff); | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleFilterCols64_16_C(uint16_t* dst_ptr, | 
|  | const uint16_t* src_ptr, | 
|  | int dst_width, | 
|  | int x32, | 
|  | int dx) { | 
|  | int64_t x = (int64_t)(x32); | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | int64_t xi = x >> 16; | 
|  | int a = src_ptr[xi]; | 
|  | int b = src_ptr[xi + 1]; | 
|  | dst_ptr[0] = BLENDER(a, b, x & 0xffff); | 
|  | x += dx; | 
|  | xi = x >> 16; | 
|  | a = src_ptr[xi]; | 
|  | b = src_ptr[xi + 1]; | 
|  | dst_ptr[1] = BLENDER(a, b, x & 0xffff); | 
|  | x += dx; | 
|  | dst_ptr += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | int64_t xi = x >> 16; | 
|  | int a = src_ptr[xi]; | 
|  | int b = src_ptr[xi + 1]; | 
|  | dst_ptr[0] = BLENDER(a, b, x & 0xffff); | 
|  | } | 
|  | } | 
|  | #undef BLENDER | 
|  |  | 
|  | void ScaleRowDown38_C(const uint8_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst, | 
|  | int dst_width) { | 
|  | int x; | 
|  | (void)src_stride; | 
|  | assert(dst_width % 3 == 0); | 
|  | for (x = 0; x < dst_width; x += 3) { | 
|  | dst[0] = src_ptr[0]; | 
|  | dst[1] = src_ptr[3]; | 
|  | dst[2] = src_ptr[6]; | 
|  | dst += 3; | 
|  | src_ptr += 8; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown38_16_C(const uint16_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint16_t* dst, | 
|  | int dst_width) { | 
|  | int x; | 
|  | (void)src_stride; | 
|  | assert(dst_width % 3 == 0); | 
|  | for (x = 0; x < dst_width; x += 3) { | 
|  | dst[0] = src_ptr[0]; | 
|  | dst[1] = src_ptr[3]; | 
|  | dst[2] = src_ptr[6]; | 
|  | dst += 3; | 
|  | src_ptr += 8; | 
|  | } | 
|  | } | 
|  |  | 
|  | // 8x3 -> 3x1 | 
|  | void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst_ptr, | 
|  | int dst_width) { | 
|  | intptr_t stride = src_stride; | 
|  | int i; | 
|  | assert((dst_width % 3 == 0) && (dst_width > 0)); | 
|  | for (i = 0; i < dst_width; i += 3) { | 
|  | dst_ptr[0] = | 
|  | (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + | 
|  | src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + | 
|  | src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * | 
|  | (65536 / 9) >> | 
|  | 16; | 
|  | dst_ptr[1] = | 
|  | (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + | 
|  | src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + | 
|  | src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * | 
|  | (65536 / 9) >> | 
|  | 16; | 
|  | dst_ptr[2] = | 
|  | (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] + | 
|  | src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * | 
|  | (65536 / 6) >> | 
|  | 16; | 
|  | src_ptr += 8; | 
|  | dst_ptr += 3; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint16_t* dst_ptr, | 
|  | int dst_width) { | 
|  | intptr_t stride = src_stride; | 
|  | int i; | 
|  | assert((dst_width % 3 == 0) && (dst_width > 0)); | 
|  | for (i = 0; i < dst_width; i += 3) { | 
|  | dst_ptr[0] = | 
|  | (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + | 
|  | src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + | 
|  | src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * | 
|  | (65536 / 9) >> | 
|  | 16; | 
|  | dst_ptr[1] = | 
|  | (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + | 
|  | src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + | 
|  | src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * | 
|  | (65536 / 9) >> | 
|  | 16; | 
|  | dst_ptr[2] = | 
|  | (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] + | 
|  | src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * | 
|  | (65536 / 6) >> | 
|  | 16; | 
|  | src_ptr += 8; | 
|  | dst_ptr += 3; | 
|  | } | 
|  | } | 
|  |  | 
|  | // 8x2 -> 3x1 | 
|  | void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst_ptr, | 
|  | int dst_width) { | 
|  | intptr_t stride = src_stride; | 
|  | int i; | 
|  | assert((dst_width % 3 == 0) && (dst_width > 0)); | 
|  | for (i = 0; i < dst_width; i += 3) { | 
|  | dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + | 
|  | src_ptr[stride + 1] + src_ptr[stride + 2]) * | 
|  | (65536 / 6) >> | 
|  | 16; | 
|  | dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + | 
|  | src_ptr[stride + 4] + src_ptr[stride + 5]) * | 
|  | (65536 / 6) >> | 
|  | 16; | 
|  | dst_ptr[2] = | 
|  | (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) * | 
|  | (65536 / 4) >> | 
|  | 16; | 
|  | src_ptr += 8; | 
|  | dst_ptr += 3; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint16_t* dst_ptr, | 
|  | int dst_width) { | 
|  | intptr_t stride = src_stride; | 
|  | int i; | 
|  | assert((dst_width % 3 == 0) && (dst_width > 0)); | 
|  | for (i = 0; i < dst_width; i += 3) { | 
|  | dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + | 
|  | src_ptr[stride + 1] + src_ptr[stride + 2]) * | 
|  | (65536 / 6) >> | 
|  | 16; | 
|  | dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + | 
|  | src_ptr[stride + 4] + src_ptr[stride + 5]) * | 
|  | (65536 / 6) >> | 
|  | 16; | 
|  | dst_ptr[2] = | 
|  | (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) * | 
|  | (65536 / 4) >> | 
|  | 16; | 
|  | src_ptr += 8; | 
|  | dst_ptr += 3; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { | 
|  | int x; | 
|  | assert(src_width > 0); | 
|  | for (x = 0; x < src_width - 1; x += 2) { | 
|  | dst_ptr[0] += src_ptr[0]; | 
|  | dst_ptr[1] += src_ptr[1]; | 
|  | src_ptr += 2; | 
|  | dst_ptr += 2; | 
|  | } | 
|  | if (src_width & 1) { | 
|  | dst_ptr[0] += src_ptr[0]; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleAddRow_16_C(const uint16_t* src_ptr, | 
|  | uint32_t* dst_ptr, | 
|  | int src_width) { | 
|  | int x; | 
|  | assert(src_width > 0); | 
|  | for (x = 0; x < src_width - 1; x += 2) { | 
|  | dst_ptr[0] += src_ptr[0]; | 
|  | dst_ptr[1] += src_ptr[1]; | 
|  | src_ptr += 2; | 
|  | dst_ptr += 2; | 
|  | } | 
|  | if (src_width & 1) { | 
|  | dst_ptr[0] += src_ptr[0]; | 
|  | } | 
|  | } | 
|  |  | 
|  | // ARGB scale row functions | 
|  |  | 
|  | void ScaleARGBRowDown2_C(const uint8_t* src_argb, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst_argb, | 
|  | int dst_width) { | 
|  | const uint32_t* src = (const uint32_t*)(src_argb); | 
|  | uint32_t* dst = (uint32_t*)(dst_argb); | 
|  | int x; | 
|  | (void)src_stride; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = src[1]; | 
|  | dst[1] = src[3]; | 
|  | src += 4; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src[1]; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst_argb, | 
|  | int dst_width) { | 
|  | int x; | 
|  | (void)src_stride; | 
|  | for (x = 0; x < dst_width; ++x) { | 
|  | dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1; | 
|  | dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1; | 
|  | dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1; | 
|  | dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1; | 
|  | src_argb += 8; | 
|  | dst_argb += 4; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleARGBRowDown2Box_C(const uint8_t* src_argb, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst_argb, | 
|  | int dst_width) { | 
|  | int x; | 
|  | for (x = 0; x < dst_width; ++x) { | 
|  | dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] + | 
|  | src_argb[src_stride + 4] + 2) >> | 
|  | 2; | 
|  | dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] + | 
|  | src_argb[src_stride + 5] + 2) >> | 
|  | 2; | 
|  | dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] + | 
|  | src_argb[src_stride + 6] + 2) >> | 
|  | 2; | 
|  | dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] + | 
|  | src_argb[src_stride + 7] + 2) >> | 
|  | 2; | 
|  | src_argb += 8; | 
|  | dst_argb += 4; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleARGBRowDownEven_C(const uint8_t* src_argb, | 
|  | ptrdiff_t src_stride, | 
|  | int src_stepx, | 
|  | uint8_t* dst_argb, | 
|  | int dst_width) { | 
|  | const uint32_t* src = (const uint32_t*)(src_argb); | 
|  | uint32_t* dst = (uint32_t*)(dst_argb); | 
|  | (void)src_stride; | 
|  | int x; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = src[0]; | 
|  | dst[1] = src[src_stepx]; | 
|  | src += src_stepx * 2; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src[0]; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb, | 
|  | ptrdiff_t src_stride, | 
|  | int src_stepx, | 
|  | uint8_t* dst_argb, | 
|  | int dst_width) { | 
|  | int x; | 
|  | for (x = 0; x < dst_width; ++x) { | 
|  | dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] + | 
|  | src_argb[src_stride + 4] + 2) >> | 
|  | 2; | 
|  | dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] + | 
|  | src_argb[src_stride + 5] + 2) >> | 
|  | 2; | 
|  | dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] + | 
|  | src_argb[src_stride + 6] + 2) >> | 
|  | 2; | 
|  | dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] + | 
|  | src_argb[src_stride + 7] + 2) >> | 
|  | 2; | 
|  | src_argb += src_stepx * 4; | 
|  | dst_argb += 4; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Scales a single row of pixels using point sampling. | 
|  | void ScaleARGBCols_C(uint8_t* dst_argb, | 
|  | const uint8_t* src_argb, | 
|  | int dst_width, | 
|  | int x, | 
|  | int dx) { | 
|  | const uint32_t* src = (const uint32_t*)(src_argb); | 
|  | uint32_t* dst = (uint32_t*)(dst_argb); | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | dst[0] = src[x >> 16]; | 
|  | x += dx; | 
|  | dst[1] = src[x >> 16]; | 
|  | x += dx; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src[x >> 16]; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleARGBCols64_C(uint8_t* dst_argb, | 
|  | const uint8_t* src_argb, | 
|  | int dst_width, | 
|  | int x32, | 
|  | int dx) { | 
|  | int64_t x = (int64_t)(x32); | 
|  | const uint32_t* src = (const uint32_t*)(src_argb); | 
|  | uint32_t* dst = (uint32_t*)(dst_argb); | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | dst[0] = src[x >> 16]; | 
|  | x += dx; | 
|  | dst[1] = src[x >> 16]; | 
|  | x += dx; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src[x >> 16]; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Scales a single row of pixels up by 2x using point sampling. | 
|  | void ScaleARGBColsUp2_C(uint8_t* dst_argb, | 
|  | const uint8_t* src_argb, | 
|  | int dst_width, | 
|  | int x, | 
|  | int dx) { | 
|  | const uint32_t* src = (const uint32_t*)(src_argb); | 
|  | uint32_t* dst = (uint32_t*)(dst_argb); | 
|  | int j; | 
|  | (void)x; | 
|  | (void)dx; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | dst[1] = dst[0] = src[0]; | 
|  | src += 1; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src[0]; | 
|  | } | 
|  | } | 
|  |  | 
|  | // TODO(fbarchard): Replace 0x7f ^ f with 128-f.  bug=607. | 
|  | // Mimics SSSE3 blender | 
|  | #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7 | 
|  | #define BLENDERC(a, b, f, s) \ | 
|  | (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) | 
|  | #define BLENDER(a, b, f)                                                 \ | 
|  | BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \ | 
|  | BLENDERC(a, b, f, 0) | 
|  |  | 
|  | void ScaleARGBFilterCols_C(uint8_t* dst_argb, | 
|  | const uint8_t* src_argb, | 
|  | int dst_width, | 
|  | int x, | 
|  | int dx) { | 
|  | const uint32_t* src = (const uint32_t*)(src_argb); | 
|  | uint32_t* dst = (uint32_t*)(dst_argb); | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | int xi = x >> 16; | 
|  | int xf = (x >> 9) & 0x7f; | 
|  | uint32_t a = src[xi]; | 
|  | uint32_t b = src[xi + 1]; | 
|  | dst[0] = BLENDER(a, b, xf); | 
|  | x += dx; | 
|  | xi = x >> 16; | 
|  | xf = (x >> 9) & 0x7f; | 
|  | a = src[xi]; | 
|  | b = src[xi + 1]; | 
|  | dst[1] = BLENDER(a, b, xf); | 
|  | x += dx; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | int xi = x >> 16; | 
|  | int xf = (x >> 9) & 0x7f; | 
|  | uint32_t a = src[xi]; | 
|  | uint32_t b = src[xi + 1]; | 
|  | dst[0] = BLENDER(a, b, xf); | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleARGBFilterCols64_C(uint8_t* dst_argb, | 
|  | const uint8_t* src_argb, | 
|  | int dst_width, | 
|  | int x32, | 
|  | int dx) { | 
|  | int64_t x = (int64_t)(x32); | 
|  | const uint32_t* src = (const uint32_t*)(src_argb); | 
|  | uint32_t* dst = (uint32_t*)(dst_argb); | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | int64_t xi = x >> 16; | 
|  | int xf = (x >> 9) & 0x7f; | 
|  | uint32_t a = src[xi]; | 
|  | uint32_t b = src[xi + 1]; | 
|  | dst[0] = BLENDER(a, b, xf); | 
|  | x += dx; | 
|  | xi = x >> 16; | 
|  | xf = (x >> 9) & 0x7f; | 
|  | a = src[xi]; | 
|  | b = src[xi + 1]; | 
|  | dst[1] = BLENDER(a, b, xf); | 
|  | x += dx; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | int64_t xi = x >> 16; | 
|  | int xf = (x >> 9) & 0x7f; | 
|  | uint32_t a = src[xi]; | 
|  | uint32_t b = src[xi + 1]; | 
|  | dst[0] = BLENDER(a, b, xf); | 
|  | } | 
|  | } | 
|  | #undef BLENDER1 | 
|  | #undef BLENDERC | 
|  | #undef BLENDER | 
|  |  | 
|  | // UV scale row functions | 
|  | // same as ARGB but 2 channels | 
|  |  | 
|  | void ScaleUVRowDown2_C(const uint8_t* src_uv, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst_uv, | 
|  | int dst_width) { | 
|  | const uint16_t* src = (const uint16_t*)(src_uv); | 
|  | uint16_t* dst = (uint16_t*)(dst_uv); | 
|  | int x; | 
|  | (void)src_stride; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = src[1]; | 
|  | dst[1] = src[3]; | 
|  | src += 2; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src[1]; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleUVRowDown2Linear_C(const uint8_t* src_uv, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst_uv, | 
|  | int dst_width) { | 
|  | int x; | 
|  | (void)src_stride; | 
|  | for (x = 0; x < dst_width; ++x) { | 
|  | dst_uv[0] = (src_uv[0] + src_uv[2] + 1) >> 1; | 
|  | dst_uv[1] = (src_uv[1] + src_uv[3] + 1) >> 1; | 
|  | src_uv += 4; | 
|  | dst_uv += 2; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleUVRowDown2Box_C(const uint8_t* src_uv, | 
|  | ptrdiff_t src_stride, | 
|  | uint8_t* dst_uv, | 
|  | int dst_width) { | 
|  | int x; | 
|  | for (x = 0; x < dst_width; ++x) { | 
|  | dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] + | 
|  | src_uv[src_stride + 2] + 2) >> | 
|  | 2; | 
|  | dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] + | 
|  | src_uv[src_stride + 3] + 2) >> | 
|  | 2; | 
|  | src_uv += 4; | 
|  | dst_uv += 2; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleUVRowDownEven_C(const uint8_t* src_uv, | 
|  | ptrdiff_t src_stride, | 
|  | int src_stepx, | 
|  | uint8_t* dst_uv, | 
|  | int dst_width) { | 
|  | const uint16_t* src = (const uint16_t*)(src_uv); | 
|  | uint16_t* dst = (uint16_t*)(dst_uv); | 
|  | (void)src_stride; | 
|  | int x; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | dst[0] = src[0]; | 
|  | dst[1] = src[src_stepx]; | 
|  | src += src_stepx * 2; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src[0]; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleUVRowDownEvenBox_C(const uint8_t* src_uv, | 
|  | ptrdiff_t src_stride, | 
|  | int src_stepx, | 
|  | uint8_t* dst_uv, | 
|  | int dst_width) { | 
|  | int x; | 
|  | for (x = 0; x < dst_width; ++x) { | 
|  | dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] + | 
|  | src_uv[src_stride + 2] + 2) >> | 
|  | 2; | 
|  | dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] + | 
|  | src_uv[src_stride + 3] + 2) >> | 
|  | 2; | 
|  | src_uv += src_stepx * 2; | 
|  | dst_uv += 2; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Scales a single row of pixels using point sampling. | 
|  | void ScaleUVCols_C(uint8_t* dst_uv, | 
|  | const uint8_t* src_uv, | 
|  | int dst_width, | 
|  | int x, | 
|  | int dx) { | 
|  | const uint16_t* src = (const uint16_t*)(src_uv); | 
|  | uint16_t* dst = (uint16_t*)(dst_uv); | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | dst[0] = src[x >> 16]; | 
|  | x += dx; | 
|  | dst[1] = src[x >> 16]; | 
|  | x += dx; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src[x >> 16]; | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleUVCols64_C(uint8_t* dst_uv, | 
|  | const uint8_t* src_uv, | 
|  | int dst_width, | 
|  | int x32, | 
|  | int dx) { | 
|  | int64_t x = (int64_t)(x32); | 
|  | const uint16_t* src = (const uint16_t*)(src_uv); | 
|  | uint16_t* dst = (uint16_t*)(dst_uv); | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | dst[0] = src[x >> 16]; | 
|  | x += dx; | 
|  | dst[1] = src[x >> 16]; | 
|  | x += dx; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src[x >> 16]; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Scales a single row of pixels up by 2x using point sampling. | 
|  | void ScaleUVColsUp2_C(uint8_t* dst_uv, | 
|  | const uint8_t* src_uv, | 
|  | int dst_width, | 
|  | int x, | 
|  | int dx) { | 
|  | const uint16_t* src = (const uint16_t*)(src_uv); | 
|  | uint16_t* dst = (uint16_t*)(dst_uv); | 
|  | int j; | 
|  | (void)x; | 
|  | (void)dx; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | dst[1] = dst[0] = src[0]; | 
|  | src += 1; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | dst[0] = src[0]; | 
|  | } | 
|  | } | 
|  |  | 
|  | // TODO(fbarchard): Replace 0x7f ^ f with 128-f.  bug=607. | 
|  | // Mimics SSSE3 blender | 
|  | #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7 | 
|  | #define BLENDERC(a, b, f, s) \ | 
|  | (uint16_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) | 
|  | #define BLENDER(a, b, f) BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0) | 
|  |  | 
|  | void ScaleUVFilterCols_C(uint8_t* dst_uv, | 
|  | const uint8_t* src_uv, | 
|  | int dst_width, | 
|  | int x, | 
|  | int dx) { | 
|  | const uint16_t* src = (const uint16_t*)(src_uv); | 
|  | uint16_t* dst = (uint16_t*)(dst_uv); | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | int xi = x >> 16; | 
|  | int xf = (x >> 9) & 0x7f; | 
|  | uint16_t a = src[xi]; | 
|  | uint16_t b = src[xi + 1]; | 
|  | dst[0] = BLENDER(a, b, xf); | 
|  | x += dx; | 
|  | xi = x >> 16; | 
|  | xf = (x >> 9) & 0x7f; | 
|  | a = src[xi]; | 
|  | b = src[xi + 1]; | 
|  | dst[1] = BLENDER(a, b, xf); | 
|  | x += dx; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | int xi = x >> 16; | 
|  | int xf = (x >> 9) & 0x7f; | 
|  | uint16_t a = src[xi]; | 
|  | uint16_t b = src[xi + 1]; | 
|  | dst[0] = BLENDER(a, b, xf); | 
|  | } | 
|  | } | 
|  |  | 
|  | void ScaleUVFilterCols64_C(uint8_t* dst_uv, | 
|  | const uint8_t* src_uv, | 
|  | int dst_width, | 
|  | int x32, | 
|  | int dx) { | 
|  | int64_t x = (int64_t)(x32); | 
|  | const uint16_t* src = (const uint16_t*)(src_uv); | 
|  | uint16_t* dst = (uint16_t*)(dst_uv); | 
|  | int j; | 
|  | for (j = 0; j < dst_width - 1; j += 2) { | 
|  | int64_t xi = x >> 16; | 
|  | int xf = (x >> 9) & 0x7f; | 
|  | uint16_t a = src[xi]; | 
|  | uint16_t b = src[xi + 1]; | 
|  | dst[0] = BLENDER(a, b, xf); | 
|  | x += dx; | 
|  | xi = x >> 16; | 
|  | xf = (x >> 9) & 0x7f; | 
|  | a = src[xi]; | 
|  | b = src[xi + 1]; | 
|  | dst[1] = BLENDER(a, b, xf); | 
|  | x += dx; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | int64_t xi = x >> 16; | 
|  | int xf = (x >> 9) & 0x7f; | 
|  | uint16_t a = src[xi]; | 
|  | uint16_t b = src[xi + 1]; | 
|  | dst[0] = BLENDER(a, b, xf); | 
|  | } | 
|  | } | 
|  | #undef BLENDER1 | 
|  | #undef BLENDERC | 
|  | #undef BLENDER | 
|  |  | 
|  | // Scale plane vertically with bilinear interpolation. | 
|  | void ScalePlaneVertical(int src_height, | 
|  | int dst_width, | 
|  | int dst_height, | 
|  | int src_stride, | 
|  | int dst_stride, | 
|  | const uint8_t* src_argb, | 
|  | uint8_t* dst_argb, | 
|  | int x, | 
|  | int y, | 
|  | int dy, | 
|  | int bpp, | 
|  | enum FilterMode filtering) { | 
|  | // TODO(fbarchard): Allow higher bpp. | 
|  | int dst_width_bytes = dst_width * bpp; | 
|  | void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, | 
|  | ptrdiff_t src_stride, int dst_width, | 
|  | int source_y_fraction) = InterpolateRow_C; | 
|  | const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; | 
|  | int j; | 
|  | assert(bpp >= 1 && bpp <= 4); | 
|  | assert(src_height != 0); | 
|  | assert(dst_width > 0); | 
|  | assert(dst_height > 0); | 
|  | src_argb += (x >> 16) * bpp; | 
|  | #if defined(HAS_INTERPOLATEROW_SSSE3) | 
|  | if (TestCpuFlag(kCpuHasSSSE3)) { | 
|  | InterpolateRow = InterpolateRow_Any_SSSE3; | 
|  | if (IS_ALIGNED(dst_width_bytes, 16)) { | 
|  | InterpolateRow = InterpolateRow_SSSE3; | 
|  | } | 
|  | } | 
|  | #endif | 
|  | #if defined(HAS_INTERPOLATEROW_AVX2) | 
|  | if (TestCpuFlag(kCpuHasAVX2)) { | 
|  | InterpolateRow = InterpolateRow_Any_AVX2; | 
|  | if (IS_ALIGNED(dst_width_bytes, 32)) { | 
|  | InterpolateRow = InterpolateRow_AVX2; | 
|  | } | 
|  | } | 
|  | #endif | 
|  | #if defined(HAS_INTERPOLATEROW_NEON) | 
|  | if (TestCpuFlag(kCpuHasNEON)) { | 
|  | InterpolateRow = InterpolateRow_Any_NEON; | 
|  | if (IS_ALIGNED(dst_width_bytes, 16)) { | 
|  | InterpolateRow = InterpolateRow_NEON; | 
|  | } | 
|  | } | 
|  | #endif | 
|  | #if defined(HAS_INTERPOLATEROW_MMI) | 
|  | if (TestCpuFlag(kCpuHasMMI)) { | 
|  | InterpolateRow = InterpolateRow_Any_MMI; | 
|  | if (IS_ALIGNED(dst_width_bytes, 8)) { | 
|  | InterpolateRow = InterpolateRow_MMI; | 
|  | } | 
|  | } | 
|  | #endif | 
|  | #if defined(HAS_INTERPOLATEROW_MSA) | 
|  | if (TestCpuFlag(kCpuHasMSA)) { | 
|  | InterpolateRow = InterpolateRow_Any_MSA; | 
|  | if (IS_ALIGNED(dst_width_bytes, 32)) { | 
|  | InterpolateRow = InterpolateRow_MSA; | 
|  | } | 
|  | } | 
|  | #endif | 
|  | for (j = 0; j < dst_height; ++j) { | 
|  | int yi; | 
|  | int yf; | 
|  | if (y > max_y) { | 
|  | y = max_y; | 
|  | } | 
|  | yi = y >> 16; | 
|  | yf = filtering ? ((y >> 8) & 255) : 0; | 
|  | InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride, | 
|  | dst_width_bytes, yf); | 
|  | dst_argb += dst_stride; | 
|  | y += dy; | 
|  | } | 
|  | } | 
|  | void ScalePlaneVertical_16(int src_height, | 
|  | int dst_width, | 
|  | int dst_height, | 
|  | int src_stride, | 
|  | int dst_stride, | 
|  | const uint16_t* src_argb, | 
|  | uint16_t* dst_argb, | 
|  | int x, | 
|  | int y, | 
|  | int dy, | 
|  | int wpp, | 
|  | enum FilterMode filtering) { | 
|  | // TODO(fbarchard): Allow higher wpp. | 
|  | int dst_width_words = dst_width * wpp; | 
|  | void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb, | 
|  | ptrdiff_t src_stride, int dst_width, | 
|  | int source_y_fraction) = InterpolateRow_16_C; | 
|  | const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; | 
|  | int j; | 
|  | assert(wpp >= 1 && wpp <= 2); | 
|  | assert(src_height != 0); | 
|  | assert(dst_width > 0); | 
|  | assert(dst_height > 0); | 
|  | src_argb += (x >> 16) * wpp; | 
|  | #if defined(HAS_INTERPOLATEROW_16_SSE2) | 
|  | if (TestCpuFlag(kCpuHasSSE2)) { | 
|  | InterpolateRow = InterpolateRow_Any_16_SSE2; | 
|  | if (IS_ALIGNED(dst_width_bytes, 16)) { | 
|  | InterpolateRow = InterpolateRow_16_SSE2; | 
|  | } | 
|  | } | 
|  | #endif | 
|  | #if defined(HAS_INTERPOLATEROW_16_SSSE3) | 
|  | if (TestCpuFlag(kCpuHasSSSE3)) { | 
|  | InterpolateRow = InterpolateRow_Any_16_SSSE3; | 
|  | if (IS_ALIGNED(dst_width_bytes, 16)) { | 
|  | InterpolateRow = InterpolateRow_16_SSSE3; | 
|  | } | 
|  | } | 
|  | #endif | 
|  | #if defined(HAS_INTERPOLATEROW_16_AVX2) | 
|  | if (TestCpuFlag(kCpuHasAVX2)) { | 
|  | InterpolateRow = InterpolateRow_Any_16_AVX2; | 
|  | if (IS_ALIGNED(dst_width_bytes, 32)) { | 
|  | InterpolateRow = InterpolateRow_16_AVX2; | 
|  | } | 
|  | } | 
|  | #endif | 
|  | #if defined(HAS_INTERPOLATEROW_16_NEON) | 
|  | if (TestCpuFlag(kCpuHasNEON)) { | 
|  | InterpolateRow = InterpolateRow_Any_16_NEON; | 
|  | if (IS_ALIGNED(dst_width_bytes, 16)) { | 
|  | InterpolateRow = InterpolateRow_16_NEON; | 
|  | } | 
|  | } | 
|  | #endif | 
|  | for (j = 0; j < dst_height; ++j) { | 
|  | int yi; | 
|  | int yf; | 
|  | if (y > max_y) { | 
|  | y = max_y; | 
|  | } | 
|  | yi = y >> 16; | 
|  | yf = filtering ? ((y >> 8) & 255) : 0; | 
|  | InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride, | 
|  | dst_width_words, yf); | 
|  | dst_argb += dst_stride; | 
|  | y += dy; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Simplify the filtering based on scale factors. | 
|  | enum FilterMode ScaleFilterReduce(int src_width, | 
|  | int src_height, | 
|  | int dst_width, | 
|  | int dst_height, | 
|  | enum FilterMode filtering) { | 
|  | if (src_width < 0) { | 
|  | src_width = -src_width; | 
|  | } | 
|  | if (src_height < 0) { | 
|  | src_height = -src_height; | 
|  | } | 
|  | if (filtering == kFilterBox) { | 
|  | // If scaling both axis to 0.5 or larger, switch from Box to Bilinear. | 
|  | if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) { | 
|  | filtering = kFilterBilinear; | 
|  | } | 
|  | } | 
|  | if (filtering == kFilterBilinear) { | 
|  | if (src_height == 1) { | 
|  | filtering = kFilterLinear; | 
|  | } | 
|  | // TODO(fbarchard): Detect any odd scale factor and reduce to Linear. | 
|  | if (dst_height == src_height || dst_height * 3 == src_height) { | 
|  | filtering = kFilterLinear; | 
|  | } | 
|  | // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to | 
|  | // avoid reading 2 pixels horizontally that causes memory exception. | 
|  | if (src_width == 1) { | 
|  | filtering = kFilterNone; | 
|  | } | 
|  | } | 
|  | if (filtering == kFilterLinear) { | 
|  | if (src_width == 1) { | 
|  | filtering = kFilterNone; | 
|  | } | 
|  | // TODO(fbarchard): Detect any odd scale factor and reduce to None. | 
|  | if (dst_width == src_width || dst_width * 3 == src_width) { | 
|  | filtering = kFilterNone; | 
|  | } | 
|  | } | 
|  | return filtering; | 
|  | } | 
|  |  | 
|  | // Divide num by div and return as 16.16 fixed point result. | 
|  | int FixedDiv_C(int num, int div) { | 
|  | return (int)(((int64_t)(num) << 16) / div); | 
|  | } | 
|  |  | 
|  | // Divide num by div and return as 16.16 fixed point result. | 
|  | int FixedDiv1_C(int num, int div) { | 
|  | return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1)); | 
|  | } | 
|  |  | 
|  | #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s) | 
|  |  | 
|  | // Compute slope values for stepping. | 
|  | void ScaleSlope(int src_width, | 
|  | int src_height, | 
|  | int dst_width, | 
|  | int dst_height, | 
|  | enum FilterMode filtering, | 
|  | int* x, | 
|  | int* y, | 
|  | int* dx, | 
|  | int* dy) { | 
|  | assert(x != NULL); | 
|  | assert(y != NULL); | 
|  | assert(dx != NULL); | 
|  | assert(dy != NULL); | 
|  | assert(src_width != 0); | 
|  | assert(src_height != 0); | 
|  | assert(dst_width > 0); | 
|  | assert(dst_height > 0); | 
|  | // Check for 1 pixel and avoid FixedDiv overflow. | 
|  | if (dst_width == 1 && src_width >= 32768) { | 
|  | dst_width = src_width; | 
|  | } | 
|  | if (dst_height == 1 && src_height >= 32768) { | 
|  | dst_height = src_height; | 
|  | } | 
|  | if (filtering == kFilterBox) { | 
|  | // Scale step for point sampling duplicates all pixels equally. | 
|  | *dx = FixedDiv(Abs(src_width), dst_width); | 
|  | *dy = FixedDiv(src_height, dst_height); | 
|  | *x = 0; | 
|  | *y = 0; | 
|  | } else if (filtering == kFilterBilinear) { | 
|  | // Scale step for bilinear sampling renders last pixel once for upsample. | 
|  | if (dst_width <= Abs(src_width)) { | 
|  | *dx = FixedDiv(Abs(src_width), dst_width); | 
|  | *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter. | 
|  | } else if (dst_width > 1) { | 
|  | *dx = FixedDiv1(Abs(src_width), dst_width); | 
|  | *x = 0; | 
|  | } | 
|  | if (dst_height <= src_height) { | 
|  | *dy = FixedDiv(src_height, dst_height); | 
|  | *y = CENTERSTART(*dy, -32768);  // Subtract 0.5 (32768) to center filter. | 
|  | } else if (dst_height > 1) { | 
|  | *dy = FixedDiv1(src_height, dst_height); | 
|  | *y = 0; | 
|  | } | 
|  | } else if (filtering == kFilterLinear) { | 
|  | // Scale step for bilinear sampling renders last pixel once for upsample. | 
|  | if (dst_width <= Abs(src_width)) { | 
|  | *dx = FixedDiv(Abs(src_width), dst_width); | 
|  | *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter. | 
|  | } else if (dst_width > 1) { | 
|  | *dx = FixedDiv1(Abs(src_width), dst_width); | 
|  | *x = 0; | 
|  | } | 
|  | *dy = FixedDiv(src_height, dst_height); | 
|  | *y = *dy >> 1; | 
|  | } else { | 
|  | // Scale step for point sampling duplicates all pixels equally. | 
|  | *dx = FixedDiv(Abs(src_width), dst_width); | 
|  | *dy = FixedDiv(src_height, dst_height); | 
|  | *x = CENTERSTART(*dx, 0); | 
|  | *y = CENTERSTART(*dy, 0); | 
|  | } | 
|  | // Negative src_width means horizontally mirror. | 
|  | if (src_width < 0) { | 
|  | *x += (dst_width - 1) * *dx; | 
|  | *dx = -*dx; | 
|  | // src_width = -src_width;   // Caller must do this. | 
|  | } | 
|  | } | 
|  | #undef CENTERSTART | 
|  |  | 
|  | // Read 8x2 upsample with filtering and write 16x1. | 
|  | // actually reads an extra pixel, so 9x2. | 
|  | void ScaleRowUp2_16_C(const uint16_t* src_ptr, | 
|  | ptrdiff_t src_stride, | 
|  | uint16_t* dst, | 
|  | int dst_width) { | 
|  | const uint16_t* src2 = src_ptr + src_stride; | 
|  |  | 
|  | int x; | 
|  | for (x = 0; x < dst_width - 1; x += 2) { | 
|  | uint16_t p0 = src_ptr[0]; | 
|  | uint16_t p1 = src_ptr[1]; | 
|  | uint16_t p2 = src2[0]; | 
|  | uint16_t p3 = src2[1]; | 
|  | dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4; | 
|  | dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4; | 
|  | ++src_ptr; | 
|  | ++src2; | 
|  | dst += 2; | 
|  | } | 
|  | if (dst_width & 1) { | 
|  | uint16_t p0 = src_ptr[0]; | 
|  | uint16_t p1 = src_ptr[1]; | 
|  | uint16_t p2 = src2[0]; | 
|  | uint16_t p3 = src2[1]; | 
|  | dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4; | 
|  | } | 
|  | } | 
|  |  | 
|  | #ifdef __cplusplus | 
|  | }  // extern "C" | 
|  | }  // namespace libyuv | 
|  | #endif |