blob: 79aed5c7877d820d9987586aa1be30dfdef7b04e [file] [log] [blame]
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include <stdio.h>
#include <string.h> // For memcpy and memset.
#include "libyuv/basic_types.h"
#include "libyuv/convert_argb.h" // For kYuvI601Constants
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// The following ifdef from row_win makes the C code match the row_win code,
// which is 7 bit fixed point.
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
(defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__)))
#define LIBYUV_RGB7 1
#endif
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
defined(_M_IX86)
#define LIBYUV_ARGBTOUV_PAVGB 1
#define LIBYUV_RGBTOU_TRUNCATE 1
#endif
// llvm x86 is poor at ternary operator, so use branchless min/max.
#define USE_BRANCHLESS 1
#if USE_BRANCHLESS
static __inline int32_t clamp0(int32_t v) {
return -(v >= 0) & v;
}
// TODO(fbarchard): make clamp255 preserve negative values.
static __inline int32_t clamp255(int32_t v) {
return (-(v >= 255) | v) & 255;
}
static __inline int32_t clamp1023(int32_t v) {
return (-(v >= 1023) | v) & 1023;
}
static __inline uint32_t Abs(int32_t v) {
int m = -(v < 0);
return (v + m) ^ m;
}
#else // USE_BRANCHLESS
static __inline int32_t clamp0(int32_t v) {
return (v < 0) ? 0 : v;
}
static __inline int32_t clamp255(int32_t v) {
return (v > 255) ? 255 : v;
}
static __inline int32_t clamp1023(int32_t v) {
return (v > 1023) ? 1023 : v;
}
static __inline uint32_t Abs(int32_t v) {
return (v < 0) ? -v : v;
}
#endif // USE_BRANCHLESS
static __inline uint32_t Clamp(int32_t val) {
int v = clamp0(val);
return (uint32_t)(clamp255(v));
}
static __inline uint32_t Clamp10(int32_t val) {
int v = clamp0(val);
return (uint32_t)(clamp1023(v));
}
// Little Endian
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define WRITEWORD(p, v) *(uint32_t*)(p) = v
#else
static inline void WRITEWORD(uint8_t* p, uint32_t v) {
p[0] = (uint8_t)(v & 255);
p[1] = (uint8_t)((v >> 8) & 255);
p[2] = (uint8_t)((v >> 16) & 255);
p[3] = (uint8_t)((v >> 24) & 255);
}
#endif
void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_rgb24[0];
uint8_t g = src_rgb24[1];
uint8_t r = src_rgb24[2];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = 255u;
dst_argb += 4;
src_rgb24 += 3;
}
}
void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t r = src_raw[0];
uint8_t g = src_raw[1];
uint8_t b = src_raw[2];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = 255u;
dst_argb += 4;
src_raw += 3;
}
}
void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t r = src_raw[0];
uint8_t g = src_raw[1];
uint8_t b = src_raw[2];
dst_rgba[0] = 255u;
dst_rgba[1] = b;
dst_rgba[2] = g;
dst_rgba[3] = r;
dst_rgba += 4;
src_raw += 3;
}
}
void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t r = src_raw[0];
uint8_t g = src_raw[1];
uint8_t b = src_raw[2];
dst_rgb24[0] = b;
dst_rgb24[1] = g;
dst_rgb24[2] = r;
dst_rgb24 += 3;
src_raw += 3;
}
}
void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
uint8_t* dst_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_rgb565[0] & 0x1f;
uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
uint8_t r = src_rgb565[1] >> 3;
dst_argb[0] = (b << 3) | (b >> 2);
dst_argb[1] = (g << 2) | (g >> 4);
dst_argb[2] = (r << 3) | (r >> 2);
dst_argb[3] = 255u;
dst_argb += 4;
src_rgb565 += 2;
}
}
void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
uint8_t* dst_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb1555[0] & 0x1f;
uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
uint8_t a = src_argb1555[1] >> 7;
dst_argb[0] = (b << 3) | (b >> 2);
dst_argb[1] = (g << 3) | (g >> 2);
dst_argb[2] = (r << 3) | (r >> 2);
dst_argb[3] = -a;
dst_argb += 4;
src_argb1555 += 2;
}
}
void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
uint8_t* dst_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb4444[0] & 0x0f;
uint8_t g = src_argb4444[0] >> 4;
uint8_t r = src_argb4444[1] & 0x0f;
uint8_t a = src_argb4444[1] >> 4;
dst_argb[0] = (b << 4) | b;
dst_argb[1] = (g << 4) | g;
dst_argb[2] = (r << 4) | r;
dst_argb[3] = (a << 4) | a;
dst_argb += 4;
src_argb4444 += 2;
}
}
void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint32_t ar30;
memcpy(&ar30, src_ar30, sizeof ar30);
uint32_t b = (ar30 >> 2) & 0xff;
uint32_t g = (ar30 >> 12) & 0xff;
uint32_t r = (ar30 >> 22) & 0xff;
uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
*(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
dst_argb += 4;
src_ar30 += 4;
}
}
void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
int x;
for (x = 0; x < width; ++x) {
uint32_t ar30;
memcpy(&ar30, src_ar30, sizeof ar30);
uint32_t b = (ar30 >> 2) & 0xff;
uint32_t g = (ar30 >> 12) & 0xff;
uint32_t r = (ar30 >> 22) & 0xff;
uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
*(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
dst_abgr += 4;
src_ar30 += 4;
}
}
void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
int x;
for (x = 0; x < width; ++x) {
uint32_t ar30;
memcpy(&ar30, src_ar30, sizeof ar30);
uint32_t b = ar30 & 0x3ff;
uint32_t ga = ar30 & 0xc00ffc00;
uint32_t r = (ar30 >> 20) & 0x3ff;
*(uint32_t*)(dst_ab30) = r | ga | (b << 20);
dst_ab30 += 4;
src_ar30 += 4;
}
}
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb[0];
uint8_t g = src_argb[1];
uint8_t r = src_argb[2];
dst_rgb[0] = b;
dst_rgb[1] = g;
dst_rgb[2] = r;
dst_rgb += 3;
src_argb += 4;
}
}
void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb[0];
uint8_t g = src_argb[1];
uint8_t r = src_argb[2];
dst_rgb[0] = r;
dst_rgb[1] = g;
dst_rgb[2] = b;
dst_rgb += 3;
src_argb += 4;
}
}
void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 = src_argb[0] >> 3;
uint8_t g0 = src_argb[1] >> 2;
uint8_t r0 = src_argb[2] >> 3;
uint8_t b1 = src_argb[4] >> 3;
uint8_t g1 = src_argb[5] >> 2;
uint8_t r1 = src_argb[6] >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
(r1 << 27));
dst_rgb += 4;
src_argb += 8;
}
if (width & 1) {
uint8_t b0 = src_argb[0] >> 3;
uint8_t g0 = src_argb[1] >> 2;
uint8_t r0 = src_argb[2] >> 3;
*(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
}
}
// dither4 is a row of 4 values from 4x4 dither matrix.
// The 4x4 matrix contains values to increase RGB. When converting to
// fewer bits (565) this provides an ordered dither.
// The order in the 4x4 matrix in first byte is upper left.
// The 4 values are passed as an int, then referenced as an array, so
// endian will not affect order of the original matrix. But the dither4
// will containing the first pixel in the lower byte for little endian
// or the upper byte for big endian.
void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
uint8_t* dst_rgb,
const uint32_t dither4,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
int dither0 = ((const unsigned char*)(&dither4))[x & 3];
int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
(r1 << 27));
dst_rgb += 4;
src_argb += 8;
}
if (width & 1) {
int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
*(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
}
}
void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 = src_argb[0] >> 3;
uint8_t g0 = src_argb[1] >> 3;
uint8_t r0 = src_argb[2] >> 3;
uint8_t a0 = src_argb[3] >> 7;
uint8_t b1 = src_argb[4] >> 3;
uint8_t g1 = src_argb[5] >> 3;
uint8_t r1 = src_argb[6] >> 3;
uint8_t a1 = src_argb[7] >> 7;
*(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
(b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
dst_rgb += 4;
src_argb += 8;
}
if (width & 1) {
uint8_t b0 = src_argb[0] >> 3;
uint8_t g0 = src_argb[1] >> 3;
uint8_t r0 = src_argb[2] >> 3;
uint8_t a0 = src_argb[3] >> 7;
*(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
}
}
void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 = src_argb[0] >> 4;
uint8_t g0 = src_argb[1] >> 4;
uint8_t r0 = src_argb[2] >> 4;
uint8_t a0 = src_argb[3] >> 4;
uint8_t b1 = src_argb[4] >> 4;
uint8_t g1 = src_argb[5] >> 4;
uint8_t r1 = src_argb[6] >> 4;
uint8_t a1 = src_argb[7] >> 4;
*(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
(b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
dst_rgb += 4;
src_argb += 8;
}
if (width & 1) {
uint8_t b0 = src_argb[0] >> 4;
uint8_t g0 = src_argb[1] >> 4;
uint8_t r0 = src_argb[2] >> 4;
uint8_t a0 = src_argb[3] >> 4;
*(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
}
}
void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
int x;
for (x = 0; x < width; ++x) {
uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
uint32_t a0 = (src_abgr[3] >> 6);
*(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
dst_ar30 += 4;
src_abgr += 4;
}
}
void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
int x;
for (x = 0; x < width; ++x) {
uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
uint32_t a0 = (src_argb[3] >> 6);
*(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
dst_ar30 += 4;
src_argb += 4;
}
}
#ifdef LIBYUV_RGB7
// Old 7 bit math for compatibility on unsupported platforms.
static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
return ((33 * r + 65 * g + 13 * b) >> 7) + 16;
}
#else
// 8 bit
// Intel SSE/AVX uses the following equivalent formula
// 0x7e80 = (66 + 129 + 25) * -128 + 0x1000 (for +16) and 0x0080 for round.
// return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
// 0x7e80) >> 8;
static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
}
#endif
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
#ifdef LIBYUV_RGBTOU_TRUNCATE
static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
return (112 * b - 74 * g - 38 * r + 0x8000) >> 8;
}
static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
return (112 * r - 94 * g - 18 * b + 0x8000) >> 8;
}
#else
// TODO(fbarchard): Add rounding to SIMD and use this
static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
}
static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
}
#endif
#if !defined(LIBYUV_ARGBTOUV_PAVGB)
static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8;
}
static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8;
}
#endif
// ARGBToY_C and ARGBToUV_C
// Intel version mimic SSE/AVX which does 2 pavgb
#if LIBYUV_ARGBTOUV_PAVGB
#define MAKEROWY(NAME, R, G, B, BPP) \
void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
int x; \
for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
src_argb0 += BPP; \
dst_y += 1; \
} \
} \
void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
int x; \
for (x = 0; x < width - 1; x += 2) { \
uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
dst_u[0] = RGBToU(ar, ag, ab); \
dst_v[0] = RGBToV(ar, ag, ab); \
src_rgb0 += BPP * 2; \
src_rgb1 += BPP * 2; \
dst_u += 1; \
dst_v += 1; \
} \
if (width & 1) { \
uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
dst_u[0] = RGBToU(ar, ag, ab); \
dst_v[0] = RGBToV(ar, ag, ab); \
} \
}
#else
// ARM version does sum / 2 then multiply by 2x smaller coefficients
#define MAKEROWY(NAME, R, G, B, BPP) \
void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
int x; \
for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
src_argb0 += BPP; \
dst_y += 1; \
} \
} \
void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
int x; \
for (x = 0; x < width - 1; x += 2) { \
uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
src_rgb1[B + BPP] + 1) >> \
1; \
uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
src_rgb1[G + BPP] + 1) >> \
1; \
uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
src_rgb1[R + BPP] + 1) >> \
1; \
dst_u[0] = RGB2xToU(ar, ag, ab); \
dst_v[0] = RGB2xToV(ar, ag, ab); \
src_rgb0 += BPP * 2; \
src_rgb1 += BPP * 2; \
dst_u += 1; \
dst_v += 1; \
} \
if (width & 1) { \
uint16_t ab = src_rgb0[B] + src_rgb1[B]; \
uint16_t ag = src_rgb0[G] + src_rgb1[G]; \
uint16_t ar = src_rgb0[R] + src_rgb1[R]; \
dst_u[0] = RGB2xToU(ar, ag, ab); \
dst_v[0] = RGB2xToV(ar, ag, ab); \
} \
}
#endif
MAKEROWY(ARGB, 2, 1, 0, 4)
MAKEROWY(BGRA, 1, 2, 3, 4)
MAKEROWY(ABGR, 0, 1, 2, 4)
MAKEROWY(RGBA, 3, 2, 1, 4)
MAKEROWY(RGB24, 2, 1, 0, 3)
MAKEROWY(RAW, 0, 1, 2, 3)
#undef MAKEROWY
// JPeg uses a variation on BT.601-1 full range
// y = 0.29900 * r + 0.58700 * g + 0.11400 * b
// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
// BT.601 Mpeg range uses:
// b 0.1016 * 255 = 25.908 = 25
// g 0.5078 * 255 = 129.489 = 129
// r 0.2578 * 255 = 65.739 = 66
// JPeg 7 bit Y (deprecated)
// b 0.11400 * 128 = 14.592 = 15
// g 0.58700 * 128 = 75.136 = 75
// r 0.29900 * 128 = 38.272 = 38
// JPeg 8 bit Y:
// b 0.11400 * 256 = 29.184 = 29
// g 0.58700 * 256 = 150.272 = 150
// r 0.29900 * 256 = 76.544 = 77
// JPeg 8 bit U:
// b 0.50000 * 255 = 127.5 = 127
// g -0.33126 * 255 = -84.4713 = -84
// r -0.16874 * 255 = -43.0287 = -43
// JPeg 8 bit V:
// b -0.08131 * 255 = -20.73405 = -20
// g -0.41869 * 255 = -106.76595 = -107
// r 0.50000 * 255 = 127.5 = 127
#ifdef LIBYUV_RGB7
// Old 7 bit math for compatibility on unsupported platforms.
static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
return (38 * r + 75 * g + 15 * b + 64) >> 7;
}
#else
// 8 bit
static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
return (77 * r + 150 * g + 29 * b + 128) >> 8;
}
#endif
#if defined(LIBYUV_ARGBTOUV_PAVGB)
static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
}
static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
}
#else
static __inline int RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
}
static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
}
#endif
// ARGBToYJ_C and ARGBToUVJ_C
// Intel version mimic SSE/AVX which does 2 pavgb
#if LIBYUV_ARGBTOUV_PAVGB
#define MAKEROWYJ(NAME, R, G, B, BPP) \
void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
int x; \
for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
src_argb0 += BPP; \
dst_y += 1; \
} \
} \
void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
int x; \
for (x = 0; x < width - 1; x += 2) { \
uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
dst_u[0] = RGBToUJ(ar, ag, ab); \
dst_v[0] = RGBToVJ(ar, ag, ab); \
src_rgb0 += BPP * 2; \
src_rgb1 += BPP * 2; \
dst_u += 1; \
dst_v += 1; \
} \
if (width & 1) { \
uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
dst_u[0] = RGBToUJ(ar, ag, ab); \
dst_v[0] = RGBToVJ(ar, ag, ab); \
} \
}
#else
// ARM version does sum / 2 then multiply by 2x smaller coefficients
#define MAKEROWYJ(NAME, R, G, B, BPP) \
void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
int x; \
for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
src_argb0 += BPP; \
dst_y += 1; \
} \
} \
void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
int x; \
for (x = 0; x < width - 1; x += 2) { \
uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
src_rgb1[B + BPP] + 1) >> \
1; \
uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
src_rgb1[G + BPP] + 1) >> \
1; \
uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
src_rgb1[R + BPP] + 1) >> \
1; \
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
src_rgb0 += BPP * 2; \
src_rgb1 += BPP * 2; \
dst_u += 1; \
dst_v += 1; \
} \
if (width & 1) { \
uint16_t ab = (src_rgb0[B] + src_rgb1[B]); \
uint16_t ag = (src_rgb0[G] + src_rgb1[G]); \
uint16_t ar = (src_rgb0[R] + src_rgb1[R]); \
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
} \
}
#endif
MAKEROWYJ(ARGB, 2, 1, 0, 4)
MAKEROWYJ(RGBA, 3, 2, 1, 4)
MAKEROWYJ(RGB24, 2, 1, 0, 3)
MAKEROWYJ(RAW, 0, 1, 2, 3)
#undef MAKEROWYJ
void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_rgb565[0] & 0x1f;
uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
uint8_t r = src_rgb565[1] >> 3;
b = (b << 3) | (b >> 2);
g = (g << 2) | (g >> 4);
r = (r << 3) | (r >> 2);
dst_y[0] = RGBToY(r, g, b);
src_rgb565 += 2;
dst_y += 1;
}
}
void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb1555[0] & 0x1f;
uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
b = (b << 3) | (b >> 2);
g = (g << 3) | (g >> 2);
r = (r << 3) | (r >> 2);
dst_y[0] = RGBToY(r, g, b);
src_argb1555 += 2;
dst_y += 1;
}
}
void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb4444[0] & 0x0f;
uint8_t g = src_argb4444[0] >> 4;
uint8_t r = src_argb4444[1] & 0x0f;
b = (b << 4) | b;
g = (g << 4) | g;
r = (r << 4) | r;
dst_y[0] = RGBToY(r, g, b);
src_argb4444 += 2;
dst_y += 1;
}
}
void RGB565ToUVRow_C(const uint8_t* src_rgb565,
int src_stride_rgb565,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 = src_rgb565[0] & 0x1f;
uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
uint8_t r0 = src_rgb565[1] >> 3;
uint8_t b1 = src_rgb565[2] & 0x1f;
uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
uint8_t r1 = src_rgb565[3] >> 3;
uint8_t b2 = next_rgb565[0] & 0x1f;
uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
uint8_t r2 = next_rgb565[1] >> 3;
uint8_t b3 = next_rgb565[2] & 0x1f;
uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
uint8_t r3 = next_rgb565[3] >> 3;
b0 = (b0 << 3) | (b0 >> 2);
g0 = (g0 << 2) | (g0 >> 4);
r0 = (r0 << 3) | (r0 >> 2);
b1 = (b1 << 3) | (b1 >> 2);
g1 = (g1 << 2) | (g1 >> 4);
r1 = (r1 << 3) | (r1 >> 2);
b2 = (b2 << 3) | (b2 >> 2);
g2 = (g2 << 2) | (g2 >> 4);
r2 = (r2 << 3) | (r2 >> 2);
b3 = (b3 << 3) | (b3 >> 2);
g3 = (g3 << 2) | (g3 >> 4);
r3 = (r3 << 3) | (r3 >> 2);
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
src_rgb565 += 4;
next_rgb565 += 4;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8_t b0 = src_rgb565[0] & 0x1f;
uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
uint8_t r0 = src_rgb565[1] >> 3;
uint8_t b2 = next_rgb565[0] & 0x1f;
uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
uint8_t r2 = next_rgb565[1] >> 3;
b0 = (b0 << 3) | (b0 >> 2);
g0 = (g0 << 2) | (g0 >> 4);
r0 = (r0 << 3) | (r0 >> 2);
b2 = (b2 << 3) | (b2 >> 2);
g2 = (g2 << 2) | (g2 >> 4);
r2 = (r2 << 3) | (r2 >> 2);
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(b0, b2);
uint8_t ag = AVGB(g0, g2);
uint8_t ar = AVGB(r0, r2);
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = b0 + b2;
uint16_t g = g0 + g2;
uint16_t r = r0 + r2;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
}
}
void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
int src_stride_argb1555,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 = src_argb1555[0] & 0x1f;
uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
uint8_t b1 = src_argb1555[2] & 0x1f;
uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
uint8_t b2 = next_argb1555[0] & 0x1f;
uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
uint8_t b3 = next_argb1555[2] & 0x1f;
uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
b0 = (b0 << 3) | (b0 >> 2);
g0 = (g0 << 3) | (g0 >> 2);
r0 = (r0 << 3) | (r0 >> 2);
b1 = (b1 << 3) | (b1 >> 2);
g1 = (g1 << 3) | (g1 >> 2);
r1 = (r1 << 3) | (r1 >> 2);
b2 = (b2 << 3) | (b2 >> 2);
g2 = (g2 << 3) | (g2 >> 2);
r2 = (r2 << 3) | (r2 >> 2);
b3 = (b3 << 3) | (b3 >> 2);
g3 = (g3 << 3) | (g3 >> 2);
r3 = (r3 << 3) | (r3 >> 2);
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
src_argb1555 += 4;
next_argb1555 += 4;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8_t b0 = src_argb1555[0] & 0x1f;
uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
uint8_t b2 = next_argb1555[0] & 0x1f;
uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
uint8_t r2 = next_argb1555[1] >> 3;
b0 = (b0 << 3) | (b0 >> 2);
g0 = (g0 << 3) | (g0 >> 2);
r0 = (r0 << 3) | (r0 >> 2);
b2 = (b2 << 3) | (b2 >> 2);
g2 = (g2 << 3) | (g2 >> 2);
r2 = (r2 << 3) | (r2 >> 2);
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(b0, b2);
uint8_t ag = AVGB(g0, g2);
uint8_t ar = AVGB(r0, r2);
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = b0 + b2;
uint16_t g = g0 + g2;
uint16_t r = r0 + r2;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
}
}
void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
int src_stride_argb4444,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 = src_argb4444[0] & 0x0f;
uint8_t g0 = src_argb4444[0] >> 4;
uint8_t r0 = src_argb4444[1] & 0x0f;
uint8_t b1 = src_argb4444[2] & 0x0f;
uint8_t g1 = src_argb4444[2] >> 4;
uint8_t r1 = src_argb4444[3] & 0x0f;
uint8_t b2 = next_argb4444[0] & 0x0f;
uint8_t g2 = next_argb4444[0] >> 4;
uint8_t r2 = next_argb4444[1] & 0x0f;
uint8_t b3 = next_argb4444[2] & 0x0f;
uint8_t g3 = next_argb4444[2] >> 4;
uint8_t r3 = next_argb4444[3] & 0x0f;
b0 = (b0 << 4) | b0;
g0 = (g0 << 4) | g0;
r0 = (r0 << 4) | r0;
b1 = (b1 << 4) | b1;
g1 = (g1 << 4) | g1;
r1 = (r1 << 4) | r1;
b2 = (b2 << 4) | b2;
g2 = (g2 << 4) | g2;
r2 = (r2 << 4) | r2;
b3 = (b3 << 4) | b3;
g3 = (g3 << 4) | g3;
r3 = (r3 << 4) | r3;
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
src_argb4444 += 4;
next_argb4444 += 4;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8_t b0 = src_argb4444[0] & 0x0f;
uint8_t g0 = src_argb4444[0] >> 4;
uint8_t r0 = src_argb4444[1] & 0x0f;
uint8_t b2 = next_argb4444[0] & 0x0f;
uint8_t g2 = next_argb4444[0] >> 4;
uint8_t r2 = next_argb4444[1] & 0x0f;
b0 = (b0 << 4) | b0;
g0 = (g0 << 4) | g0;
r0 = (r0 << 4) | r0;
b2 = (b2 << 4) | b2;
g2 = (g2 << 4) | g2;
r2 = (r2 << 4) | r2;
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(b0, b2);
uint8_t ag = AVGB(g0, g2);
uint8_t ar = AVGB(r0, r2);
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = b0 + b2;
uint16_t g = g0 + g2;
uint16_t r = r0 + r2;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
}
}
void ARGBToUV444Row_C(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t ab = src_argb[0];
uint8_t ag = src_argb[1];
uint8_t ar = src_argb[2];
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
src_argb += 4;
dst_u += 1;
dst_v += 1;
}
}
void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
dst_argb[3] = src_argb[3];
dst_argb += 4;
src_argb += 4;
}
}
// Convert a row of image to Sepia tone.
void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
int sb = (b * 17 + g * 68 + r * 35) >> 7;
int sg = (b * 22 + g * 88 + r * 45) >> 7;
int sr = (b * 24 + g * 98 + r * 50) >> 7;
// b does not over flow. a is preserved from original.
dst_argb[0] = sb;
dst_argb[1] = clamp255(sg);
dst_argb[2] = clamp255(sr);
dst_argb += 4;
}
}
// Apply color matrix to a row of image. Matrix is signed.
// TODO(fbarchard): Consider adding rounding (+32).
void ARGBColorMatrixRow_C(const uint8_t* src_argb,
uint8_t* dst_argb,
const int8_t* matrix_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
int b = src_argb[0];
int g = src_argb[1];
int r = src_argb[2];
int a = src_argb[3];
int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
a * matrix_argb[3]) >>
6;
int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
a * matrix_argb[7]) >>
6;
int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
a * matrix_argb[11]) >>
6;
int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
a * matrix_argb[15]) >>
6;
dst_argb[0] = Clamp(sb);
dst_argb[1] = Clamp(sg);
dst_argb[2] = Clamp(sr);
dst_argb[3] = Clamp(sa);
src_argb += 4;
dst_argb += 4;
}
}
// Apply color table to a row of image.
void ARGBColorTableRow_C(uint8_t* dst_argb,
const uint8_t* table_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
int a = dst_argb[3];
dst_argb[0] = table_argb[b * 4 + 0];
dst_argb[1] = table_argb[g * 4 + 1];
dst_argb[2] = table_argb[r * 4 + 2];
dst_argb[3] = table_argb[a * 4 + 3];
dst_argb += 4;
}
}
// Apply color table to a row of image.
void RGBColorTableRow_C(uint8_t* dst_argb,
const uint8_t* table_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
dst_argb[0] = table_argb[b * 4 + 0];
dst_argb[1] = table_argb[g * 4 + 1];
dst_argb[2] = table_argb[r * 4 + 2];
dst_argb += 4;
}
}
void ARGBQuantizeRow_C(uint8_t* dst_argb,
int scale,
int interval_size,
int interval_offset,
int width) {
int x;
for (x = 0; x < width; ++x) {
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
dst_argb += 4;
}
}
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v* f >> 24
void ARGBShadeRow_C(const uint8_t* src_argb,
uint8_t* dst_argb,
int width,
uint32_t value) {
const uint32_t b_scale = REPEAT8(value & 0xff);
const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
const uint32_t a_scale = REPEAT8(value >> 24);
int i;
for (i = 0; i < width; ++i) {
const uint32_t b = REPEAT8(src_argb[0]);
const uint32_t g = REPEAT8(src_argb[1]);
const uint32_t r = REPEAT8(src_argb[2]);
const uint32_t a = REPEAT8(src_argb[3]);
dst_argb[0] = SHADE(b, b_scale);
dst_argb[1] = SHADE(g, g_scale);
dst_argb[2] = SHADE(r, r_scale);
dst_argb[3] = SHADE(a, a_scale);
src_argb += 4;
dst_argb += 4;
}
}
#undef REPEAT8
#undef SHADE
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v* f >> 16
void ARGBMultiplyRow_C(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
const uint32_t b = REPEAT8(src_argb0[0]);
const uint32_t g = REPEAT8(src_argb0[1]);
const uint32_t r = REPEAT8(src_argb0[2]);
const uint32_t a = REPEAT8(src_argb0[3]);
const uint32_t b_scale = src_argb1[0];
const uint32_t g_scale = src_argb1[1];
const uint32_t r_scale = src_argb1[2];
const uint32_t a_scale = src_argb1[3];
dst_argb[0] = SHADE(b, b_scale);
dst_argb[1] = SHADE(g, g_scale);
dst_argb[2] = SHADE(r, r_scale);
dst_argb[3] = SHADE(a, a_scale);
src_argb0 += 4;
src_argb1 += 4;
dst_argb += 4;
}
}
#undef REPEAT8
#undef SHADE
#define SHADE(f, v) clamp255(v + f)
void ARGBAddRow_C(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
const int b = src_argb0[0];
const int g = src_argb0[1];
const int r = src_argb0[2];
const int a = src_argb0[3];
const int b_add = src_argb1[0];
const int g_add = src_argb1[1];
const int r_add = src_argb1[2];
const int a_add = src_argb1[3];
dst_argb[0] = SHADE(b, b_add);
dst_argb[1] = SHADE(g, g_add);
dst_argb[2] = SHADE(r, r_add);
dst_argb[3] = SHADE(a, a_add);
src_argb0 += 4;
src_argb1 += 4;
dst_argb += 4;
}
}
#undef SHADE
#define SHADE(f, v) clamp0(f - v)
void ARGBSubtractRow_C(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
const int b = src_argb0[0];
const int g = src_argb0[1];
const int r = src_argb0[2];
const int a = src_argb0[3];
const int b_sub = src_argb1[0];
const int g_sub = src_argb1[1];
const int r_sub = src_argb1[2];
const int a_sub = src_argb1[3];
dst_argb[0] = SHADE(b, b_sub);
dst_argb[1] = SHADE(g, g_sub);
dst_argb[2] = SHADE(r, r_sub);
dst_argb[3] = SHADE(a, a_sub);
src_argb0 += 4;
src_argb1 += 4;
dst_argb += 4;
}
}
#undef SHADE
// Sobel functions which mimics SSSE3.
void SobelXRow_C(const uint8_t* src_y0,
const uint8_t* src_y1,
const uint8_t* src_y2,
uint8_t* dst_sobelx,
int width) {
int i;
for (i = 0; i < width; ++i) {
int a = src_y0[i];
int b = src_y1[i];
int c = src_y2[i];
int a_sub = src_y0[i + 2];
int b_sub = src_y1[i + 2];
int c_sub = src_y2[i + 2];
int a_diff = a - a_sub;
int b_diff = b - b_sub;
int c_diff = c - c_sub;
int sobel = Abs(a_diff + b_diff * 2 + c_diff);
dst_sobelx[i] = (uint8_t)(clamp255(sobel));
}
}
void SobelYRow_C(const uint8_t* src_y0,
const uint8_t* src_y1,
uint8_t* dst_sobely,
int width) {
int i;
for (i = 0; i < width; ++i) {
int a = src_y0[i + 0];
int b = src_y0[i + 1];
int c = src_y0[i + 2];
int a_sub = src_y1[i + 0];
int b_sub = src_y1[i + 1];
int c_sub = src_y1[i + 2];
int a_diff = a - a_sub;
int b_diff = b - b_sub;
int c_diff = c - c_sub;
int sobel = Abs(a_diff + b_diff * 2 + c_diff);
dst_sobely[i] = (uint8_t)(clamp255(sobel));
}
}
void SobelRow_C(const uint8_t* src_sobelx,
const uint8_t* src_sobely,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
int r = src_sobelx[i];
int b = src_sobely[i];
int s = clamp255(r + b);
dst_argb[0] = (uint8_t)(s);
dst_argb[1] = (uint8_t)(s);
dst_argb[2] = (uint8_t)(s);
dst_argb[3] = (uint8_t)(255u);
dst_argb += 4;
}
}
void SobelToPlaneRow_C(const uint8_t* src_sobelx,
const uint8_t* src_sobely,
uint8_t* dst_y,
int width) {
int i;
for (i = 0; i < width; ++i) {
int r = src_sobelx[i];
int b = src_sobely[i];
int s = clamp255(r + b);
dst_y[i] = (uint8_t)(s);
}
}
void SobelXYRow_C(const uint8_t* src_sobelx,
const uint8_t* src_sobely,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
int r = src_sobelx[i];
int b = src_sobely[i];
int g = clamp255(r + b);
dst_argb[0] = (uint8_t)(b);
dst_argb[1] = (uint8_t)(g);
dst_argb[2] = (uint8_t)(r);
dst_argb[3] = (uint8_t)(255u);
dst_argb += 4;
}
}
void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
// Copy a Y to RGB.
int x;
for (x = 0; x < width; ++x) {
uint8_t y = src_y[0];
dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
dst_argb[3] = 255u;
dst_argb += 4;
++src_y;
}
}
// TODO(fbarchard): Unify these structures to be platform independent.
// TODO(fbarchard): Generate SIMD structures from float matrix.
// BT.601 YUV to RGB reference
// R = (Y - 16) * 1.164 - V * -1.596
// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
// B = (Y - 16) * 1.164 - U * -2.018
// Y contribution to R,G,B. Scale and bias.
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
// U and V contributions to R,G,B.
#define UB -128 /* max(-128, round(-2.018 * 64)) */
#define UG 25 /* round(0.391 * 64) */
#define VG 52 /* round(0.813 * 64) */
#define VR -102 /* round(-1.596 * 64) */
// Bias values to subtract 16 from Y and 128 from U and V.
#define BB (UB * 128 + YGB)
#define BG (UG * 128 + VG * 128 + YGB)
#define BR (VR * 128 + YGB)
#if defined(__aarch64__) // 64 bit arm
const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
{UG, VG, UG, VG, UG, VG, UG, VG},
{UG, VG, UG, VG, UG, VG, UG, VG},
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{VG, UG, VG, UG, VG, UG, VG, UG},
{VG, UG, VG, UG, VG, UG, VG, UG},
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#elif defined(__arm__) // 32 bit arm
const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
{-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
{UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
{-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
{VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#else
const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
{UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
{0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
{VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
{VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
{0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
#endif
#undef BB
#undef BG
#undef BR
#undef YGB
#undef UB
#undef UG
#undef VG
#undef VR
#undef YG
// JPEG YUV to RGB reference
// * R = Y - V * -1.40200
// * G = Y - U * 0.34414 - V * 0.71414
// * B = Y - U * -1.77200
// Y contribution to R,G,B. Scale and bias.
#define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
#define YGB 32 /* 64 / 2 */
// U and V contributions to R,G,B.
#define UB -113 /* round(-1.77200 * 64) */
#define UG 22 /* round(0.34414 * 64) */
#define VG 46 /* round(0.71414 * 64) */
#define VR -90 /* round(-1.40200 * 64) */
// Bias values to round, and subtract 128 from U and V.
#define BB (UB * 128 + YGB)
#define BG (UG * 128 + VG * 128 + YGB)
#define BR (VR * 128 + YGB)
#if defined(__aarch64__)
const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
{UG, VG, UG, VG, UG, VG, UG, VG},
{UG, VG, UG, VG, UG, VG, UG, VG},
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{VG, UG, VG, UG, VG, UG, VG, UG},
{VG, UG, VG, UG, VG, UG, VG, UG},
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#elif defined(__arm__)
const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
{-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
{UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
{-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
{VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#else
const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
{UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
{0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
{VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
{VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
{0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
#endif
#undef BB
#undef BG
#undef BR
#undef YGB
#undef UB
#undef UG
#undef VG
#undef VR
#undef YG
// BT.709 YUV to RGB reference
// R = (Y - 16) * 1.164 - V * -1.793
// G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
// B = (Y - 16) * 1.164 - U * -2.112
// See also http://www.equasys.de/colorconversion.html
// Y contribution to R,G,B. Scale and bias.
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
// TODO(fbarchard): Find way to express 2.112 instead of 2.0.
// U and V contributions to R,G,B.
#define UB -128 /* max(-128, round(-2.112 * 64)) */
#define UG 14 /* round(0.213 * 64) */
#define VG 34 /* round(0.533 * 64) */
#define VR -115 /* round(-1.793 * 64) */
// Bias values to round, and subtract 128 from U and V.
#define BB (UB * 128 + YGB)
#define BG (UG * 128 + VG * 128 + YGB)
#define BR (VR * 128 + YGB)
#if defined(__aarch64__)
const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
{UG, VG, UG, VG, UG, VG, UG, VG},
{UG, VG, UG, VG, UG, VG, UG, VG},
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{VG, UG, VG, UG, VG, UG, VG, UG},
{VG, UG, VG, UG, VG, UG, VG, UG},
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#elif defined(__arm__)
const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
{-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
{UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
{-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
{VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#else
const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
{UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
{0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
{VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
{VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
{0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
#endif
#undef BB
#undef BG
#undef BR
#undef YGB
#undef UB
#undef UG
#undef VG
#undef VR
#undef YG
// BT.2020 YUV to RGB reference
// R = (Y - 16) * 1.164384 - V * -1.67867
// G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042
// B = (Y - 16) * 1.164384 - U * -2.14177
// Y contribution to R,G,B. Scale and bias.
#define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
#define YGB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
// TODO(fbarchard): Improve accuracy; the B channel is off by 7%.
// U and V contributions to R,G,B.
#define UB -128 /* max(-128, round(-2.142 * 64)) */
#define UG 12 /* round(0.187326 * 64) */
#define VG 42 /* round(0.65042 * 64) */
#define VR -107 /* round(-1.67867 * 64) */
// Bias values to round, and subtract 128 from U and V.
#define BB (UB * 128 + YGB)
#define BG (UG * 128 + VG * 128 + YGB)
#define BR (VR * 128 + YGB)
#if defined(__aarch64__)
const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
{UG, VG, UG, VG, UG, VG, UG, VG},
{UG, VG, UG, VG, UG, VG, UG, VG},
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{VG, UG, VG, UG, VG, UG, VG, UG},
{VG, UG, VG, UG, VG, UG, VG, UG},
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#elif defined(__arm__)
const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
{-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
{UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
{-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
{VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#else
const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
{UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
{0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
{VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
{VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
{0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
#endif
#undef BB
#undef BG
#undef BR
#undef YGB
#undef UB
#undef UG
#undef VG
#undef VR
#undef YG
// C reference code that mimics the YUV assembly.
// Reads 8 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel(uint8_t y,
uint8_t u,
uint8_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = -yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
*b = Clamp((int32_t)(-(u * ub) + y1 + bb) >> 6);
*g = Clamp((int32_t)(-(u * ug + v * vg) + y1 + bg) >> 6);
*r = Clamp((int32_t)(-(v * vr) + y1 + br) >> 6);
}
// Reads 8 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel8_16(uint8_t y,
uint8_t u,
uint8_t v,
int* b,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = -yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
*b = (int)(-(u * ub) + y1 + bb);
*g = (int)(-(u * ug + v * vg) + y1 + bg);
*r = (int)(-(v * vr) + y1 + br);
}
// C reference code that mimics the YUV 16 bit assembly.
// Reads 10 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel16(int16_t y,
int16_t u,
int16_t v,
int* b,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = -yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16;
u = clamp255(u >> 2);
v = clamp255(v >> 2);
*b = (int)(-(u * ub) + y1 + bb);
*g = (int)(-(u * ug + v * vg) + y1 + bg);
*r = (int)(-(v * vr) + y1 + br);
}
// C reference code that mimics the YUV 10 bit assembly.
// Reads 10 bit YUV and clamps down to 8 bit RGB.
static __inline void YuvPixel10(uint16_t y,
uint16_t u,
uint16_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
int b16;
int g16;
int r16;
YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants);
*b = Clamp(b16 >> 6);
*g = Clamp(g16 >> 6);
*r = Clamp(r16 >> 6);
}
// C reference code that mimics the YUV assembly.
// Reads 8 bit YUV and leaves result as 16 bit.
static __inline void YPixel(uint8_t y,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__) || defined(__arm__)
int ygb = yuvconstants->kUVBiasBGR[3];
int yg = yuvconstants->kYToRgb[1];
#else
int ygb = yuvconstants->kYBiasToRgb[0];
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
*b = Clamp(((int32_t)(y1) + ygb) >> 6);
*g = Clamp(((int32_t)(y1) + ygb) >> 6);
*r = Clamp(((int32_t)(y1) + ygb) >> 6);
}
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
// C mimic assembly.
// TODO(fbarchard): Remove subsampling from Neon.
void I444ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 2;
src_v += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
#else
void I444ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
src_y += 1;
src_u += 1;
src_v += 1;
rgb_buf += 4; // Advance 1 pixel.
}
}
#endif
// Also used for 420
void I422ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
// 10 bit YUV to ARGB
void I210ToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
uint32_t ar30;
b = b >> 4; // convert 10.6 to 10 bit.
g = g >> 4;
r = r >> 4;
b = Clamp10(b);
g = Clamp10(g);
r = Clamp10(r);
ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
(*(uint32_t*)rgb_buf) = ar30;
}
// 10 bit YUV to 10 bit AR30
void I210ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
int b;
int g;
int r;
for (x = 0; x < width - 1; x += 2) {
YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf + 4, b, g, r);
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
}
}
// 8 bit YUV to 10 bit AR30
// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
void I422ToAR30Row_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
int b;
int g;
int r;
for (x = 0; x < width - 1; x += 2) {
YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf + 4, b, g, r);
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
}
}
void I422AlphaToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
const uint8_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = src_a[0];
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = src_a[1];
src_y += 2;
src_u += 1;
src_v += 1;
src_a += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = src_a[0];
}
}
void I422ToRGB24Row_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
rgb_buf + 5, yuvconstants);
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 6; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
}
}
void I422ToARGB4444Row_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width) {
uint8_t b0;
uint8_t g0;
uint8_t r0;
uint8_t b1;
uint8_t g1;
uint8_t r1;
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 4;
g0 = g0 >> 4;
r0 = r0 >> 4;
b1 = b1 >> 4;
g1 = g1 >> 4;
r1 = r1 >> 4;
*(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
(g1 << 20) | (r1 << 24) | 0xf000f000;
src_y += 2;
src_u += 1;
src_v += 1;
dst_argb4444 += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 4;
g0 = g0 >> 4;
r0 = r0 >> 4;
*(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
}