Merge "Removing last_source field from VP9_COMP."
diff --git a/examples/set_maps.c b/examples/set_maps.c
index ff60d51..2ee5bca 100644
--- a/examples/set_maps.c
+++ b/examples/set_maps.c
@@ -42,6 +42,7 @@
// Use the `simple_decoder` example to decode this sample, and observe
// the change in the image at frames 22, 33, and 44.
+#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -177,9 +178,10 @@
memset(&info, 0, sizeof(info));
encoder = get_vpx_encoder_by_name(argv[1]);
- if (!encoder)
+ if (encoder == NULL) {
die("Unsupported codec.");
-
+ }
+ assert(encoder != NULL);
info.codec_fourcc = encoder->fourcc;
info.frame_width = strtol(argv[2], NULL, 0);
info.frame_height = strtol(argv[3], NULL, 0);
diff --git a/examples/vpx_temporal_svc_encoder.c b/examples/vpx_temporal_svc_encoder.c
index 4ec1848..5eac92c 100644
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -12,6 +12,7 @@
// encoding scheme based on temporal scalability for video applications
// that benefit from a scalable bitstream.
+#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
@@ -438,7 +439,7 @@
}
int main(int argc, char **argv) {
- VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS];
+ VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
vpx_codec_ctx_t codec;
vpx_codec_enc_cfg_t cfg;
int frame_cnt = 0;
@@ -456,7 +457,6 @@
int layering_mode = 0;
int layer_flags[VPX_TS_MAX_PERIODICITY] = {0};
int flag_periodicity = 1;
- int max_intra_size_pct;
vpx_svc_layer_id_t layer_id = {0, 0};
const VpxInterface *encoder = NULL;
FILE *infile = NULL;
@@ -570,6 +570,8 @@
outfile[i] = vpx_video_writer_open(file_name, kContainerIVF, &info);
if (!outfile[i])
die("Failed to open %s for writing", file_name);
+
+ assert(outfile[i] != NULL);
}
// No spatial layers in this encoder.
cfg.ss_number_layers = 1;
@@ -595,11 +597,11 @@
// This controls the maximum target size of the key frame.
// For generating smaller key frames, use a smaller max_intra_size_pct
// value, like 100 or 200.
- max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5)
- * ((double) cfg.g_timebase.den / cfg.g_timebase.num) / 10.0);
- // For low-quality key frame.
- max_intra_size_pct = 200;
- vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct);
+ {
+ const int max_intra_size_pct = 200;
+ vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
+ max_intra_size_pct);
+ }
frame_avail = 1;
while (frame_avail || got_data) {
diff --git a/third_party/libyuv/README.libvpx b/third_party/libyuv/README.libvpx
index fa5b498..fb2b9d1 100644
--- a/third_party/libyuv/README.libvpx
+++ b/third_party/libyuv/README.libvpx
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1041
+Version: 1060
License: BSD
License File: LICENSE
diff --git a/third_party/libyuv/include/libyuv/mjpeg_decoder.h b/third_party/libyuv/include/libyuv/mjpeg_decoder.h
index 82fd95d..8423121 100644
--- a/third_party/libyuv/include/libyuv/mjpeg_decoder.h
+++ b/third_party/libyuv/include/libyuv/mjpeg_decoder.h
@@ -153,7 +153,6 @@
int* subsample_x, int* subsample_y, int number_of_components);
private:
-
void AllocOutputBuffers(int num_outbufs);
void DestroyOutputBuffers();
diff --git a/third_party/libyuv/include/libyuv/row.h b/third_party/libyuv/include/libyuv/row.h
index fdfe1ae..4b3c870 100644
--- a/third_party/libyuv/include/libyuv/row.h
+++ b/third_party/libyuv/include/libyuv/row.h
@@ -252,6 +252,94 @@
// The following are available on arm64 platforms:
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+// #define HAS_I444TOARGBROW_NEON
+// #define HAS_I422TOARGBROW_NEON
+// #define HAS_I411TOARGBROW_NEON
+// #define HAS_I422TOBGRAROW_NEON
+// #define HAS_I422TOABGRROW_NEON
+// #define HAS_I422TORGBAROW_NEON
+// #define HAS_I422TORGB24ROW_NEON
+// #define HAS_I422TORAWROW_NEON
+// #define HAS_I422TORGB565ROW_NEON
+// #define HAS_I422TOARGB1555ROW_NEON
+// #define HAS_I422TOARGB4444ROW_NEON
+// #define HAS_YTOARGBROW_NEON
+// #define HAS_I400TOARGBROW_NEON
+// #define HAS_NV12TOARGBROW_NEON
+// #define HAS_NV21TOARGBROW_NEON
+// #define HAS_NV12TORGB565ROW_NEON
+// #define HAS_NV21TORGB565ROW_NEON
+// #define HAS_YUY2TOARGBROW_NEON
+// #define HAS_UYVYTOARGBROW_NEON
+#define HAS_SPLITUVROW_NEON
+#define HAS_MERGEUVROW_NEON
+#define HAS_COPYROW_NEON
+#define HAS_SETROW_NEON
+#define HAS_ARGBSETROWS_NEON
+#define HAS_MIRRORROW_NEON
+#define HAS_MIRRORUVROW_NEON
+#define HAS_ARGBMIRRORROW_NEON
+#define HAS_RGB24TOARGBROW_NEON
+#define HAS_RAWTOARGBROW_NEON
+// #define HAS_RGB565TOARGBROW_NEON
+// #define HAS_ARGB1555TOARGBROW_NEON
+// #define HAS_ARGB4444TOARGBROW_NEON
+#define HAS_ARGBTORGB24ROW_NEON
+#define HAS_ARGBTORAWROW_NEON
+#define HAS_YUY2TOYROW_NEON
+#define HAS_UYVYTOYROW_NEON
+#define HAS_YUY2TOUV422ROW_NEON
+#define HAS_UYVYTOUV422ROW_NEON
+#define HAS_YUY2TOUVROW_NEON
+#define HAS_UYVYTOUVROW_NEON
+#define HAS_HALFROW_NEON
+#define HAS_ARGBTOBAYERROW_NEON
+#define HAS_ARGBTOBAYERGGROW_NEON
+#define HAS_ARGBSHUFFLEROW_NEON
+#define HAS_I422TOYUY2ROW_NEON
+#define HAS_I422TOUYVYROW_NEON
+// #define HAS_ARGBTORGB565ROW_NEON
+// #define HAS_ARGBTOARGB1555ROW_NEON
+// #define HAS_ARGBTOARGB4444ROW_NEON
+#define HAS_ARGBTOYROW_NEON
+#define HAS_ARGBTOYJROW_NEON
+// #define HAS_ARGBTOUV444ROW_NEON
+// #define HAS_ARGBTOUV422ROW_NEON
+// #define HAS_ARGBTOUV411ROW_NEON
+// #define HAS_ARGBTOUVROW_NEON
+// #define HAS_ARGBTOUVJROW_NEON
+// #define HAS_BGRATOUVROW_NEON
+// #define HAS_ABGRTOUVROW_NEON
+// #define HAS_RGBATOUVROW_NEON
+// #define HAS_RGB24TOUVROW_NEON
+// #define HAS_RAWTOUVROW_NEON
+// #define HAS_RGB565TOUVROW_NEON
+// #define HAS_ARGB1555TOUVROW_NEON
+// #define HAS_ARGB4444TOUVROW_NEON
+// #define HAS_RGB565TOYROW_NEON
+// #define HAS_ARGB1555TOYROW_NEON
+// #define HAS_ARGB4444TOYROW_NEON
+// #define HAS_BGRATOYROW_NEON
+// #define HAS_ABGRTOYROW_NEON
+// #define HAS_RGBATOYROW_NEON
+// #define HAS_RGB24TOYROW_NEON
+// #define HAS_RAWTOYROW_NEON
+// #define HAS_INTERPOLATEROW_NEON
+// #define HAS_ARGBBLENDROW_NEON
+// #define HAS_ARGBATTENUATEROW_NEON
+// #define HAS_ARGBQUANTIZEROW_NEON
+// #define HAS_ARGBSHADEROW_NEON
+// #define HAS_ARGBGRAYROW_NEON
+// #define HAS_ARGBSEPIAROW_NEON
+// #define HAS_ARGBCOLORMATRIXROW_NEON
+#define HAS_ARGBMULTIPLYROW_NEON
+#define HAS_ARGBADDROW_NEON
+#define HAS_ARGBSUBTRACTROW_NEON
+#define HAS_SOBELROW_NEON
+#define HAS_SOBELTOPLANEROW_NEON
+#define HAS_SOBELXYROW_NEON
+#define HAS_SOBELXROW_NEON
+#define HAS_SOBELYROW_NEON
#endif
// The following are available on Neon platforms:
@@ -465,7 +553,7 @@
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
#endif // defined(__native_client__) && defined(__x86_64__)
-#if defined(__arm__)
+#if defined(__arm__) || defined(__aarch64__)
#undef MEMACCESS
#if defined(__native_client__)
#define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n"
diff --git a/third_party/libyuv/include/libyuv/scale_row.h b/third_party/libyuv/include/libyuv/scale_row.h
index 8dc0762..3c49542 100644
--- a/third_party/libyuv/include/libyuv/scale_row.h
+++ b/third_party/libyuv/include/libyuv/scale_row.h
@@ -51,6 +51,14 @@
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON
#define HAS_SCALEARGBROWDOWN2_NEON
+#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
+ (defined(__aarch64__) || defined(LIBYUV_NEON))
+/* #define HAS_SCALEROWDOWN2_NEON */
+/* #define HAS_SCALEROWDOWN4_NEON */
+/* #define HAS_SCALEROWDOWN34_NEON */
+/* #define HAS_SCALEROWDOWN38_NEON */
+/* #define HAS_SCALEARGBROWDOWNEVEN_NEON */
+/* #define HAS_SCALEARGBROWDOWN2_NEON */
#endif
// The following are available on Mips platforms:
diff --git a/third_party/libyuv/include/libyuv/version.h b/third_party/libyuv/include/libyuv/version.h
index 912c4c9..73a7f1b 100644
--- a/third_party/libyuv/include/libyuv/version.h
+++ b/third_party/libyuv/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1041
+#define LIBYUV_VERSION 1059
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
diff --git a/third_party/libyuv/source/compare.cc b/third_party/libyuv/source/compare.cc
index 9ea81b4..dc715e0 100644
--- a/third_party/libyuv/source/compare.cc
+++ b/third_party/libyuv/source/compare.cc
@@ -80,7 +80,7 @@
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SUMSQUAREERROR_NEON
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
#endif
diff --git a/third_party/libyuv/source/compare_neon.cc b/third_party/libyuv/source/compare_neon.cc
index 5e7b8e4..55052c0 100644
--- a/third_party/libyuv/source/compare_neon.cc
+++ b/third_party/libyuv/source/compare_neon.cc
@@ -56,6 +56,45 @@
return sse;
}
+#elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
+ volatile uint32 sse;
+ asm volatile (
+ "eor v16.16b, v16.16b, v16.16b \n"
+ "eor v18.16b, v18.16b, v18.16b \n"
+ "eor v17.16b, v17.16b, v17.16b \n"
+ "eor v19.16b, v19.16b, v19.16b \n"
+
+ ".p2align 2 \n"
+ "1: \n"
+ MEMACCESS(0)
+ "ld1 {v0.16b}, [%0], #16 \n"
+ MEMACCESS(1)
+ "ld1 {v1.16b}, [%1], #16 \n"
+ "subs %2, %2, #16 \n"
+ "usubl v2.8h, v0.8b, v1.8b \n"
+ "usubl2 v3.8h, v0.16b, v1.16b \n"
+ "smlal v16.4s, v2.4h, v2.4h \n"
+ "smlal v17.4s, v3.4h, v3.4h \n"
+ "smlal2 v18.4s, v2.8h, v2.8h \n"
+ "smlal2 v19.4s, v3.8h, v3.8h \n"
+ "bgt 1b \n"
+
+ "add v16.4s, v16.4s, v17.4s \n"
+ "add v18.4s, v18.4s, v19.4s \n"
+ "add v19.4s, v16.4s, v18.4s \n"
+ "addv s0, v19.4s \n"
+ "fmov %w3, s0 \n"
+ : "+r"(src_a),
+ "+r"(src_b),
+ "+r"(count),
+ "=r"(sse)
+ :
+ : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
+ return sse;
+}
+
#endif // __ARM_NEON__
#ifdef __cplusplus
diff --git a/third_party/libyuv/source/convert.cc b/third_party/libyuv/source/convert.cc
index 874a6cb..a8e294f 100644
--- a/third_party/libyuv/source/convert.cc
+++ b/third_party/libyuv/source/convert.cc
@@ -401,7 +401,7 @@
uint8* dst_v, int dst_stride_v,
int width, int height) {
int y;
- int halfheight = (height + 1) >> 1;
+ int halfheight;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
int pix) = YUY2ToUV422Row_C;
@@ -711,11 +711,13 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
@@ -963,9 +965,6 @@
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
#endif
if (!src_rgb24 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
@@ -1022,36 +1021,44 @@
#endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_RGB24TOYROW_NEON
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RGB24TOYROW_NEON)
- RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
- RGB24ToYRow(src_rgb24, dst_y, width);
- RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
-#else
- RGB24ToARGBRow(src_rgb24, row, width);
- RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_rgb24 += src_stride_rgb24 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_RGB24TOYROW_NEON)
- RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
- RGB24ToYRow(src_rgb24, dst_y, width);
-#else
- RGB24ToARGBRow(src_rgb24, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
+ {
#if !defined(HAS_RGB24TOYROW_NEON)
- free_aligned_buffer_64(row);
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 15) & ~15;
+ align_buffer_64(row, kRowSize * 2);
#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+#if defined(HAS_RGB24TOYROW_NEON)
+ RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
+ RGB24ToYRow(src_rgb24, dst_y, width);
+ RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
+#else
+ RGB24ToARGBRow(src_rgb24, row, width);
+ RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+ src_rgb24 += src_stride_rgb24 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if defined(HAS_RGB24TOYROW_NEON)
+ RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
+ RGB24ToYRow(src_rgb24, dst_y, width);
+#else
+ RGB24ToARGBRow(src_rgb24, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+#endif
+ }
+#if !defined(HAS_RGB24TOYROW_NEON)
+ free_aligned_buffer_64(row);
+#endif
+ }
return 0;
}
@@ -1075,9 +1082,6 @@
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
#endif
if (!src_raw || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
@@ -1134,36 +1138,42 @@
#endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_RAWTOYROW_NEON
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RAWTOYROW_NEON)
- RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
- RAWToYRow(src_raw, dst_y, width);
- RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
-#else
- RAWToARGBRow(src_raw, row, width);
- RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_raw += src_stride_raw * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
+ {
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 15) & ~15;
+ align_buffer_64(row, kRowSize * 2);
+
+ for (y = 0; y < height - 1; y += 2) {
+ #if defined(HAS_RAWTOYROW_NEON)
+ RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
+ RAWToYRow(src_raw, dst_y, width);
+ RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
+ #else
+ RAWToARGBRow(src_raw, row, width);
+ RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ #endif
+ src_raw += src_stride_raw * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+ #if defined(HAS_RAWTOYROW_NEON)
+ RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
+ RAWToYRow(src_raw, dst_y, width);
+ #else
+ RAWToARGBRow(src_raw, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ #endif
+ }
+ #if !defined(HAS_RAWTOYROW_NEON)
+ free_aligned_buffer_64(row);
+ #endif
}
- if (height & 1) {
-#if defined(HAS_RAWTOYROW_NEON)
- RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
- RAWToYRow(src_raw, dst_y, width);
-#else
- RAWToARGBRow(src_raw, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
-#if !defined(HAS_RAWTOYROW_NEON)
- free_aligned_buffer_64(row);
-#endif
return 0;
}
@@ -1187,9 +1197,6 @@
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
#endif
if (!src_rgb565 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
@@ -1246,36 +1253,44 @@
#endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_RGB565TOYROW_NEON
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RGB565TOYROW_NEON)
- RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
- RGB565ToYRow(src_rgb565, dst_y, width);
- RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
-#else
- RGB565ToARGBRow(src_rgb565, row, width);
- RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_rgb565 += src_stride_rgb565 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_RGB565TOYROW_NEON)
- RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
- RGB565ToYRow(src_rgb565, dst_y, width);
-#else
- RGB565ToARGBRow(src_rgb565, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
+ {
#if !defined(HAS_RGB565TOYROW_NEON)
- free_aligned_buffer_64(row);
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 15) & ~15;
+ align_buffer_64(row, kRowSize * 2);
#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+#if defined(HAS_RGB565TOYROW_NEON)
+ RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
+ RGB565ToYRow(src_rgb565, dst_y, width);
+ RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
+#else
+ RGB565ToARGBRow(src_rgb565, row, width);
+ RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+ src_rgb565 += src_stride_rgb565 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if defined(HAS_RGB565TOYROW_NEON)
+ RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
+ RGB565ToYRow(src_rgb565, dst_y, width);
+#else
+ RGB565ToARGBRow(src_rgb565, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+#endif
+ }
+#if !defined(HAS_RGB565TOYROW_NEON)
+ free_aligned_buffer_64(row);
+#endif
+ }
return 0;
}
@@ -1299,9 +1314,6 @@
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
#endif
if (!src_argb1555 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
@@ -1358,38 +1370,45 @@
#endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_ARGB1555TOYROW_NEON
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_ARGB1555TOYROW_NEON)
- ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
- ARGB1555ToYRow(src_argb1555, dst_y, width);
- ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
- width);
-#else
- ARGB1555ToARGBRow(src_argb1555, row, width);
- ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
- width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ {
+#if !defined(HAS_ARGB1555TOYROW_NEON)
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 15) & ~15;
+ align_buffer_64(row, kRowSize * 2);
#endif
- src_argb1555 += src_stride_argb1555 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
+ for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_ARGB1555TOYROW_NEON)
- ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
- ARGB1555ToYRow(src_argb1555, dst_y, width);
+ ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
+ ARGB1555ToYRow(src_argb1555, dst_y, width);
+ ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
+ width);
#else
- ARGB1555ToARGBRow(src_argb1555, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
+ ARGB1555ToARGBRow(src_argb1555, row, width);
+ ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
+ width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
#endif
- }
+ src_argb1555 += src_stride_argb1555 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if defined(HAS_ARGB1555TOYROW_NEON)
+ ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
+ ARGB1555ToYRow(src_argb1555, dst_y, width);
+#else
+ ARGB1555ToARGBRow(src_argb1555, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+#endif
+ }
#if !defined(HAS_ARGB1555TOYROW_NEON)
free_aligned_buffer_64(row);
#endif
+ }
return 0;
}
@@ -1413,9 +1432,6 @@
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
#endif
if (!src_argb4444 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
@@ -1472,38 +1488,46 @@
#endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_ARGB4444TOYROW_NEON
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_ARGB4444TOYROW_NEON)
- ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
- ARGB4444ToYRow(src_argb4444, dst_y, width);
- ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
- width);
-#else
- ARGB4444ToARGBRow(src_argb4444, row, width);
- ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
- width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_argb4444 += src_stride_argb4444 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_ARGB4444TOYROW_NEON)
- ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
- ARGB4444ToYRow(src_argb4444, dst_y, width);
-#else
- ARGB4444ToARGBRow(src_argb4444, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
+ {
#if !defined(HAS_ARGB4444TOYROW_NEON)
- free_aligned_buffer_64(row);
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 15) & ~15;
+ align_buffer_64(row, kRowSize * 2);
#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+#if defined(HAS_ARGB4444TOYROW_NEON)
+ ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
+ ARGB4444ToYRow(src_argb4444, dst_y, width);
+ ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
+ width);
+#else
+ ARGB4444ToARGBRow(src_argb4444, row, width);
+ ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
+ width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+ src_argb4444 += src_stride_argb4444 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if defined(HAS_ARGB4444TOYROW_NEON)
+ ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
+ ARGB4444ToYRow(src_argb4444, dst_y, width);
+#else
+ ARGB4444ToARGBRow(src_argb4444, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+#endif
+ }
+#if !defined(HAS_ARGB4444TOYROW_NEON)
+ free_aligned_buffer_64(row);
+#endif
+ }
return 0;
}
diff --git a/third_party/libyuv/source/convert_from_argb.cc b/third_party/libyuv/source/convert_from_argb.cc
index 121a416..de461dd 100644
--- a/third_party/libyuv/source/convert_from_argb.cc
+++ b/third_party/libyuv/source/convert_from_argb.cc
@@ -60,6 +60,13 @@
}
}
}
+#elif defined(HAS_ARGBTOUV444ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToUV444Row = ARGBToUV444Row_NEON;
+ }
+ }
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
@@ -76,10 +83,8 @@
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
- ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
- ARGBToUV444Row = ARGBToUV444Row_NEON;
}
}
#endif
@@ -134,6 +139,13 @@
}
}
}
+#elif defined(HAS_ARGBTOUV422ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUV422Row = ARGBToUV422Row_NEON;
+ }
+ }
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
@@ -153,12 +165,6 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_NEON;
- }
- }
}
#endif
@@ -228,11 +234,13 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 32) {
- ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
- if (IS_ALIGNED(width, 32)) {
- ARGBToUV411Row = ARGBToUV411Row_NEON;
- }
+ }
+#endif
+#if defined(HAS_ARGBTOUV411ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 32) {
+ ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUV411Row = ARGBToUV411Row_NEON;
}
}
#endif
@@ -261,9 +269,6 @@
ARGBToYRow_C;
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C;
- // Allocate a rows of uv.
- align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
- uint8* row_v = row_u + ((halfwidth + 15) & ~15);
if (!src_argb ||
!dst_y || !dst_uv ||
width <= 0 || height == 0) {
@@ -296,11 +301,13 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
@@ -331,22 +338,27 @@
}
}
#endif
+ {
+ // Allocate a rows of uv.
+ align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
+ uint8* row_v = row_u + ((halfwidth + 15) & ~15);
- for (y = 0; y < height - 1; y += 2) {
- ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
- MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
- src_argb += src_stride_argb * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
+ for (y = 0; y < height - 1; y += 2) {
+ ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
+ MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
+ ARGBToYRow(src_argb, dst_y, width);
+ ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+ src_argb += src_stride_argb * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+ MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
+ ARGBToYRow(src_argb, dst_y, width);
+ }
+ free_aligned_buffer_64(row_u);
}
- if (height & 1) {
- ARGBToUVRow(src_argb, 0, row_u, row_v, width);
- MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- }
- free_aligned_buffer_64(row_u);
return 0;
}
@@ -364,9 +376,6 @@
ARGBToYRow_C;
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C;
- // Allocate a rows of uv.
- align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
- uint8* row_v = row_u + ((halfwidth + 15) & ~15);
if (!src_argb ||
!dst_y || !dst_uv ||
width <= 0 || height == 0) {
@@ -399,11 +408,13 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
@@ -434,22 +445,27 @@
}
}
#endif
+ {
+ // Allocate a rows of uv.
+ align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
+ uint8* row_v = row_u + ((halfwidth + 15) & ~15);
- for (y = 0; y < height - 1; y += 2) {
- ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
- src_argb += src_stride_argb * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
+ for (y = 0; y < height - 1; y += 2) {
+ ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
+ MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
+ ARGBToYRow(src_argb, dst_y, width);
+ ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+ src_argb += src_stride_argb * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+ MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
+ ARGBToYRow(src_argb, dst_y, width);
+ }
+ free_aligned_buffer_64(row_u);
}
- if (height & 1) {
- ARGBToUVRow(src_argb, 0, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- }
- free_aligned_buffer_64(row_u);
return 0;
}
@@ -493,6 +509,13 @@
}
}
}
+#elif defined(HAS_ARGBTOUV422ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUV422Row = ARGBToUV422Row_NEON;
+ }
+ }
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
@@ -510,12 +533,6 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_NEON;
- }
- }
}
#endif
@@ -594,6 +611,13 @@
}
}
}
+#elif defined(HAS_ARGBTOUV422ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUV422Row = ARGBToUV422Row_NEON;
+ }
+ }
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
@@ -611,12 +635,6 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_NEON;
- }
- }
}
#endif
@@ -1022,11 +1040,13 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON;
}
- if (width >= 16) {
- ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_NEON;
- }
+ }
+#endif
+#if defined(HAS_ARGBTOUVJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJRow = ARGBToUVJRow_NEON;
}
}
#endif
diff --git a/third_party/libyuv/source/cpu_id.cc b/third_party/libyuv/source/cpu_id.cc
index 2e0d61d..deb4c44 100644
--- a/third_party/libyuv/source/cpu_id.cc
+++ b/third_party/libyuv/source/cpu_id.cc
@@ -14,7 +14,7 @@
#include <intrin.h> // For __cpuidex()
#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
- !defined(__native_client__) && defined(_M_X64) && \
+ !defined(__native_client__) && \
defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
#include <immintrin.h> // For _xgetbv()
#endif
@@ -97,7 +97,7 @@
uint32 xcr0 = 0u;
#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
-#elif defined(_M_IX86)
+#elif defined(_M_IX86) && defined(_MSC_VER)
__asm {
xor ecx, ecx // xcr 0
_asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
@@ -256,12 +256,17 @@
if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) {
cpu_info_ &= ~kCpuHasMIPS_DSPR2;
}
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
// gcc -mfpu=neon defines __ARM_NEON__
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
cpu_info_ = kCpuHasNEON;
+// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
+// flag in it.
+// So for aarch64, neon enabling is hard coded here.
+#elif defined(__aarch64__)
+ cpu_info_ = kCpuHasNEON;
#else
// Linux arm parse text file for neon detect.
cpu_info_ = ArmCpuCaps("/proc/cpuinfo");
diff --git a/third_party/libyuv/source/format_conversion.cc b/third_party/libyuv/source/format_conversion.cc
index a3daf96..3c17371 100644
--- a/third_party/libyuv/source/format_conversion.cc
+++ b/third_party/libyuv/source/format_conversion.cc
@@ -332,11 +332,13 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
diff --git a/third_party/libyuv/source/mjpeg_decoder.cc b/third_party/libyuv/source/mjpeg_decoder.cc
index 15b0ed8..36028c3 100644
--- a/third_party/libyuv/source/mjpeg_decoder.cc
+++ b/third_party/libyuv/source/mjpeg_decoder.cc
@@ -13,8 +13,8 @@
#ifdef HAVE_JPEG
#include <assert.h>
-#if !defined(__pnacl__) && !defined(__CLR_VER) && !defined(COVERAGE_ENABLED) &&\
- !defined(TARGET_IPHONE_SIMULATOR)
+#if !defined(__pnacl__) && !defined(__CLR_VER) && \
+ !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
// Must be included before jpeglib.
#include <setjmp.h>
#define HAVE_SETJMP
@@ -101,7 +101,7 @@
}
buf_.data = src;
- buf_.len = (int)(src_len);
+ buf_.len = static_cast<int>(src_len);
buf_vec_.pos = 0;
decompress_struct_->client_data = &buf_vec_;
#ifdef HAVE_SETJMP
@@ -411,7 +411,7 @@
}
boolean fill_input_buffer(j_decompress_ptr cinfo) {
- BufferVector* buf_vec = (BufferVector*)(cinfo->client_data);
+ BufferVector* buf_vec = reinterpret_cast<BufferVector*>(cinfo->client_data);
if (buf_vec->pos >= buf_vec->len) {
assert(0 && "No more data");
// ERROR: No more data
@@ -447,7 +447,7 @@
// ERROR: Error in jpeglib: buf
#endif
- SetJmpErrorMgr* mgr = (SetJmpErrorMgr*)(cinfo->err);
+ SetJmpErrorMgr* mgr = reinterpret_cast<SetJmpErrorMgr*>(cinfo->err);
// This rewinds the call stack to the point of the corresponding setjmp()
// and causes it to return (for a second time) with value 1.
longjmp(mgr->setjmp_buffer, 1);
diff --git a/third_party/libyuv/source/row_any.cc b/third_party/libyuv/source/row_any.cc
index 97ef844..ce8b3da 100644
--- a/third_party/libyuv/source/row_any.cc
+++ b/third_party/libyuv/source/row_any.cc
@@ -79,9 +79,13 @@
YANY(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, I422ToARGB1555Row_C,
1, 2, 7)
YANY(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, I422ToRGB565Row_C, 1, 2, 7)
-YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
-YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
#endif // HAS_I422TOARGBROW_NEON
+#ifdef HAS_I422TOYUY2ROW_NEON
+YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
+#endif // HAS_I422TOYUY2ROW_NEON
+#ifdef HAS_I422TOUYVYROW_NEON
+YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
+#endif // HAS_I422TOUYVYROW_NEON
#undef YANY
// Wrappers to handle odd width
@@ -250,12 +254,26 @@
YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8)
YANY(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 2, 1, 8)
YANY(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 2, 1, 8)
+#endif
+#ifdef HAS_YUY2TOYROW_NEON
YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16)
+#endif
+#ifdef HAS_UYVYTOYROW_NEON
YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16)
+#endif
+#ifdef HAS_RGB24TOARGBROW_NEON
YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8)
+#endif
+#ifdef HAS_RAWTOARGBROW_NEON
YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8)
+#endif
+#ifdef HAS_RGB565TOARGBROW_NEON
YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8)
+#endif
+#ifdef HAS_ARGB1555TOARGBROW_NEON
YANY(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 2, 4, 8)
+#endif
+#ifdef HAS_ARGB4444TOARGBROW_NEON
YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
#endif
#undef YANY
@@ -333,7 +351,11 @@
UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2, 15)
UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2, 15)
UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2, 15)
+#endif
+#ifdef HAS_YUY2TOUVROW_NEON
UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2, 15)
+#endif
+#ifdef HAS_UYVYTOUVROW_NEON
UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15)
#endif
#undef UVANY
diff --git a/third_party/libyuv/source/row_neon64.cc b/third_party/libyuv/source/row_neon64.cc
index 46e9ceb..21111cf 100644
--- a/third_party/libyuv/source/row_neon64.cc
+++ b/third_party/libyuv/source/row_neon64.cc
@@ -824,19 +824,19 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV
+ "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pairs of UV
"subs %3, %3, #16 \n" // 16 processed per loop
MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n" // store U
+ "st1 {v0.16b}, [%1], #16 \n" // store U
MEMACCESS(2)
- "vst1.8 {q1}, [%2]! \n" // store V
+ "st1 {v1.16b}, [%2], #16 \n" // store V
"bgt 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(width) // %3 // Output registers
: // Input registers
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1" // Clobber List
);
}
#endif // HAS_SPLITUVROW_NEON
@@ -849,12 +849,12 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load U
+ "ld1 {v0.16b}, [%0], #16 \n" // load U
MEMACCESS(1)
- "vld1.8 {q1}, [%1]! \n" // load V
+ "ld1 {v1.16b}, [%1], #16 \n" // load V
"subs %3, %3, #16 \n" // 16 processed per loop
MEMACCESS(2)
- "vst2.u8 {q0, q1}, [%2]! \n" // store 16 pairs of UV
+ "st2 {v0.16b, v1.16b}, [%2], #32 \n" // store 16 pairs of UV
"bgt 1b \n"
:
"+r"(src_u), // %0
@@ -862,7 +862,7 @@
"+r"(dst_uv), // %2
"+r"(width) // %3 // Output registers
: // Input registers
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1" // Clobber List
);
}
#endif // HAS_MERGEUVROW_NEON
@@ -874,16 +874,16 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32
+ "ld1 {v0.8b-v3.8b}, [%0], #32 \n" // load 32
"subs %2, %2, #32 \n" // 32 processed per loop
MEMACCESS(1)
- "vst1.8 {d0, d1, d2, d3}, [%1]! \n" // store 32
+ "st1 {v0.8b-v3.8b}, [%1], #32 \n" // store 32
"bgt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(count) // %2 // Output registers
: // Input registers
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_COPYROW_NEON
@@ -892,16 +892,16 @@
#ifdef HAS_SETROW_NEON
void SetRow_NEON(uint8* dst, uint32 v32, int count) {
asm volatile (
- "vdup.u32 q0, %2 \n" // duplicate 4 ints
+ "dup v0.4s, %w2 \n" // duplicate 4 ints
"1: \n"
"subs %1, %1, #16 \n" // 16 bytes per loop
MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n" // store
+ "st1 {v0.16b}, [%0], #16 \n" // store
"bgt 1b \n"
: "+r"(dst), // %0
"+r"(count) // %1
: "r"(v32) // %2
- : "cc", "memory", "q0"
+ : "cc", "memory", "v0"
);
}
#endif // HAS_SETROW_NEON
@@ -922,26 +922,25 @@
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
asm volatile (
// Start at end of source row.
- "mov r3, #-16 \n"
"add %0, %0, %2 \n"
- "sub %0, #16 \n"
+ "sub %0, %0, #16 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0], r3 \n" // src -= 16
- "subs %2, #16 \n" // 16 pixels per loop.
- "vrev64.8 q0, q0 \n"
+ "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
+ "subs %2, %2, #16 \n" // 16 pixels per loop.
+ "rev64 v0.16b, v0.16b \n"
MEMACCESS(1)
- "vst1.8 {d1}, [%1]! \n" // dst += 16
+ "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n"
+ "st1 {v0.D}[0], [%1], #8 \n"
"bgt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
- :
- : "cc", "memory", "r3", "q0"
+ : "r"((ptrdiff_t)-16) // %3
+ : "cc", "memory", "v0"
);
}
#endif // HAS_MIRRORROW_NEON
@@ -951,27 +950,27 @@
int width) {
asm volatile (
// Start at end of source row.
- "mov r12, #-16 \n"
"add %0, %0, %3, lsl #1 \n"
- "sub %0, #16 \n"
+ "sub %0, %0, #16 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16
- "subs %3, #8 \n" // 8 pixels per loop.
- "vrev64.8 q0, q0 \n"
+ "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16
+ "subs %3, %3, #8 \n" // 8 pixels per loop.
+ "rev64 v0.8b, v0.8b \n"
+ "rev64 v1.8b, v1.8b \n"
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // dst += 8
+ "st1 {v0.8b}, [%1], #8 \n" // dst += 8
MEMACCESS(2)
- "vst1.8 {d1}, [%2]! \n"
+ "st1 {v1.8b}, [%2], #8 \n"
"bgt 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(width) // %3
- :
- : "cc", "memory", "r12", "q0"
+ : "r"((ptrdiff_t)-16) // %4
+ : "cc", "memory", "v0", "v1"
);
}
#endif // HAS_MIRRORUVROW_NEON
@@ -980,26 +979,25 @@
void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
asm volatile (
// Start at end of source row.
- "mov r3, #-16 \n"
"add %0, %0, %2, lsl #2 \n"
- "sub %0, #16 \n"
+ "sub %0, %0, #16 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0], r3 \n" // src -= 16
- "subs %2, #4 \n" // 4 pixels per loop.
- "vrev64.32 q0, q0 \n"
+ "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
+ "subs %2, %2, #4 \n" // 4 pixels per loop.
+ "rev64 v0.4s, v0.4s \n"
MEMACCESS(1)
- "vst1.8 {d1}, [%1]! \n" // dst += 16
+ "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n"
+ "st1 {v0.D}[0], [%1], #8 \n"
"bgt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
- :
- : "cc", "memory", "r3", "q0"
+ : "r"((ptrdiff_t)-16) // %3
+ : "cc", "memory", "v0"
);
}
#endif // HAS_ARGBMIRRORROW_NEON
@@ -1007,20 +1005,20 @@
#ifdef HAS_RGB24TOARGBROW_NEON
void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
asm volatile (
- "vmov.u8 d4, #255 \n" // Alpha
+ "movi v4.8b, #255 \n" // Alpha
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24.
+ "ld3 {v1.8b-v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24.
"subs %2, %2, #8 \n" // 8 processed per loop.
MEMACCESS(1)
- "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB.
+ "st4 {v1.8b-v4.8b}, [%1], #32 \n" // store 8 pixels of ARGB.
"bgt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
+ : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
);
}
#endif // HAS_RGB24TOARGBROW_NEON
@@ -1028,21 +1026,22 @@
#ifdef HAS_RAWTOARGBROW_NEON
void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
asm volatile (
- "vmov.u8 d4, #255 \n" // Alpha
+ "movi v5.8b, #255 \n" // Alpha
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW.
+ "ld3 {v0.8b-v2.8b}, [%0], #24 \n" // read r g b
"subs %2, %2, #8 \n" // 8 processed per loop.
- "vswp.u8 d1, d3 \n" // swap R, B
+ "mov v3.8b, v1.8b \n" // move g
+ "mov v4.8b, v0.8b \n" // move r
MEMACCESS(1)
- "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB.
+ "st4 {v2.8b-v5.8b}, [%1], #32 \n" // store b g r a
"bgt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List
);
}
#endif // HAS_RAWTOARGBROW_NEON
@@ -1170,16 +1169,16 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
+ "ld4 {v1.8b-v4.8b}, [%0], #32 \n" // load 8 pixels of ARGB.
"subs %2, %2, #8 \n" // 8 processed per loop.
MEMACCESS(1)
- "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RGB24.
+ "st3 {v1.8b-v3.8b}, [%1], #24 \n" // store 8 pixels of RGB24.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_rgb24), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
+ : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
);
}
#endif // HAS_ARGBTORGB24ROW_NEON
@@ -1190,17 +1189,18 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
+ "ld4 {v1.8b-v4.8b}, [%0], #32 \n" // load b g r a
"subs %2, %2, #8 \n" // 8 processed per loop.
- "vswp.u8 d1, d3 \n" // swap R, B
+ "mov v4.8b, v2.8b \n" // mov g
+ "mov v5.8b, v1.8b \n" // mov b
MEMACCESS(1)
- "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RAW.
+ "st3 {v3.8b-v5.8b}, [%1], #24 \n" // store r g b
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_raw), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
+ : "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List
);
}
#endif // HAS_ARGBTORAWROW_NEON
@@ -1211,16 +1211,16 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2.
+ "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2.
"subs %2, %2, #16 \n" // 16 processed per loop.
MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n" // store 16 pixels of Y.
+ "st1 {v0.16b}, [%1], #16 \n" // store 16 pixels of Y.
"bgt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1" // Clobber List
);
}
#endif // HAS_YUY2TOYROW_NEON
@@ -1231,16 +1231,16 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY.
+ "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY.
"subs %2, %2, #16 \n" // 16 processed per loop.
MEMACCESS(1)
- "vst1.8 {q1}, [%1]! \n" // store 16 pixels of Y.
+ "st1 {v1.16b}, [%1], #16 \n" // store 16 pixels of Y.
"bgt 1b \n"
: "+r"(src_uyvy), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1" // Clobber List
);
}
#endif // HAS_UYVYTOYROW_NEON
@@ -1252,19 +1252,19 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of YUY2.
"subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
MEMACCESS(1)
- "vst1.8 {d1}, [%1]! \n" // store 8 U.
+ "st1 {v1.8b}, [%1], #8 \n" // store 8 U.
MEMACCESS(2)
- "vst1.8 {d3}, [%2]! \n" // store 8 V.
+ "st1 {v3.8b}, [%2], #8 \n" // store 8 V.
"bgt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(pix) // %3
:
- : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_YUY2TOUV422ROW_NEON
@@ -1276,19 +1276,19 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of UYVY.
"subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 U.
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 U.
MEMACCESS(2)
- "vst1.8 {d2}, [%2]! \n" // store 8 V.
+ "st1 {v2.8b}, [%2], #8 \n" // store 8 V.
"bgt 1b \n"
: "+r"(src_uyvy), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(pix) // %3
:
- : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_UYVYTOUV422ROW_NEON
@@ -1297,20 +1297,20 @@
void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
- "add %1, %0, %1 \n" // stride + src_yuy2
+ "add %x1, %x0, %w1, sxtw \n" // stride + src_yuy2
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of YUY2.
"subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
MEMACCESS(1)
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row YUY2.
- "vrhadd.u8 d1, d1, d5 \n" // average rows of U
- "vrhadd.u8 d3, d3, d7 \n" // average rows of V
+ "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load next row YUY2.
+ "urhadd v1.8b, v1.8b, v5.8b \n" // average rows of U
+ "urhadd v3.8b, v3.8b, v7.8b \n" // average rows of V
MEMACCESS(2)
- "vst1.8 {d1}, [%2]! \n" // store 8 U.
+ "st1 {v1.8b}, [%2], #8 \n" // store 8 U.
MEMACCESS(3)
- "vst1.8 {d3}, [%3]! \n" // store 8 V.
+ "st1 {v3.8b}, [%3], #8 \n" // store 8 V.
"bgt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(stride_yuy2), // %1
@@ -1318,7 +1318,7 @@
"+r"(dst_v), // %3
"+r"(pix) // %4
:
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List
);
}
#endif // HAS_YUY2TOUVROW_NEON
@@ -1327,20 +1327,20 @@
void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
- "add %1, %0, %1 \n" // stride + src_uyvy
+ "add %x1, %x0, %w1, sxtw \n" // stride + src_uyvy
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of UYVY.
"subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
MEMACCESS(1)
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row UYVY.
- "vrhadd.u8 d0, d0, d4 \n" // average rows of U
- "vrhadd.u8 d2, d2, d6 \n" // average rows of V
+ "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load next row UYVY.
+ "urhadd v0.8b, v0.8b, v4.8b \n" // average rows of U
+ "urhadd v2.8b, v2.8b, v6.8b \n" // average rows of V
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 U.
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 U.
MEMACCESS(3)
- "vst1.8 {d2}, [%3]! \n" // store 8 V.
+ "st1 {v2.8b}, [%3], #8 \n" // store 8 V.
"bgt 1b \n"
: "+r"(src_uyvy), // %0
"+r"(stride_uyvy), // %1
@@ -1348,7 +1348,7 @@
"+r"(dst_v), // %3
"+r"(pix) // %4
:
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List
);
}
#endif // HAS_UYVYTOUVROW_NEON
@@ -1358,23 +1358,23 @@
uint8* dst_uv, int pix) {
asm volatile (
// change the stride to row 2 pointer
- "add %1, %0 \n"
+ "add %x1, %x0, %w1, sxtw \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load row 1 16 pixels.
+ "ld1 {v0.16b}, [%0], #16 \n" // load row 1 16 pixels.
"subs %3, %3, #16 \n" // 16 processed per loop
MEMACCESS(1)
- "vld1.8 {q1}, [%1]! \n" // load row 2 16 pixels.
- "vrhadd.u8 q0, q1 \n" // average row 1 and 2
+ "ld1 {v1.16b}, [%1], #16 \n" // load row 2 16 pixels.
+ "urhadd v0.16b, v0.16b, v1.16b \n" // average row 1 and 2
MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n"
+ "st1 {v0.16b}, [%2], #16 \n"
"bgt 1b \n"
: "+r"(src_uv), // %0
"+r"(src_uv_stride), // %1
"+r"(dst_uv), // %2
"+r"(pix) // %3
:
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1" // Clobber List
);
}
#endif // HAS_HALFROW_NEON
@@ -1384,22 +1384,22 @@
void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) {
asm volatile (
- "vmov.u32 d6[0], %3 \n" // selector
+ "mov v2.s[0], %w3 \n" // selector
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0, q1}, [%0]! \n" // load row 8 pixels.
+ "ld1 {v0.16b, v1.16b}, [%0], 32 \n" // load row 8 pixels.
"subs %2, %2, #8 \n" // 8 processed per loop
- "vtbl.8 d4, {d0, d1}, d6 \n" // look up 4 pixels
- "vtbl.8 d5, {d2, d3}, d6 \n" // look up 4 pixels
- "vtrn.u32 d4, d5 \n" // combine 8 pixels
+ "tbl v4.8b, {v0.16b}, v2.8b \n" // look up 4 pixels
+ "tbl v5.8b, {v1.16b}, v2.8b \n" // look up 4 pixels
+ "trn1 v4.4s, v4.4s, v5.4s \n" // combine 8 pixels
MEMACCESS(1)
- "vst1.8 {d4}, [%1]! \n" // store 8.
+ "st1 {v4.8b}, [%1], #8 \n" // store 8.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_bayer), // %1
"+r"(pix) // %2
: "r"(selector) // %3
- : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v4", "v5" // Clobber List
);
}
#endif // HAS_ARGBTOBAYERROW_NEON
@@ -1411,16 +1411,16 @@
asm volatile (
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load row 8 pixels.
"subs %2, %2, #8 \n" // 8 processed per loop
MEMACCESS(1)
- "vst1.8 {d1}, [%1]! \n" // store 8 G's.
+ "st1 {v1.8b}, [%1], #8 \n" // store 8 G's.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_bayer), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_ARGBTOBAYERGGROW_NEON
@@ -1431,21 +1431,20 @@
const uint8* shuffler, int pix) {
asm volatile (
MEMACCESS(3)
- "vld1.8 {q2}, [%3] \n" // shuffler
+ "ld1 {v2.16b}, [%3] \n" // shuffler
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 4 pixels.
+ "ld1 {v0.16b}, [%0], #16 \n" // load 4 pixels.
"subs %2, %2, #4 \n" // 4 processed per loop
- "vtbl.8 d2, {d0, d1}, d4 \n" // look up 2 first pixels
- "vtbl.8 d3, {d0, d1}, d5 \n" // look up 2 next pixels
+ "tbl v1.16b, {v0.16b}, v2.16b \n" // look up 4 pixels
MEMACCESS(1)
- "vst1.8 {q1}, [%1]! \n" // store 4.
+ "st1 {v1.16b}, [%1], #16 \n" // store 4.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
: "r"(shuffler) // %3
- : "cc", "memory", "q0", "q1", "q2" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2" // Clobber List
);
}
#endif // HAS_ARGBSHUFFLEROW_NEON
@@ -1459,14 +1458,15 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld2.8 {d0, d2}, [%0]! \n" // load 16 Ys
+ "ld2 {v0.8b, v1.8b}, [%0], #16 \n" // load 16 Ys
+ "mov v2.8b, v1.8b \n"
MEMACCESS(1)
- "vld1.8 {d1}, [%1]! \n" // load 8 Us
+ "ld1 {v1.8b}, [%1], #8 \n" // load 8 Us
MEMACCESS(2)
- "vld1.8 {d3}, [%2]! \n" // load 8 Vs
+ "ld1 {v3.8b}, [%2], #8 \n" // load 8 Vs
"subs %4, %4, #16 \n" // 16 pixels
MEMACCESS(3)
- "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 YUY2/16 pixels.
+ "st4 {v0.8b-v3.8b}, [%3], #32 \n" // Store 8 YUY2/16 pixels.
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
@@ -1474,7 +1474,7 @@
"+r"(dst_yuy2), // %3
"+r"(width) // %4
:
- : "cc", "memory", "d0", "d1", "d2", "d3"
+ : "cc", "memory", "v0", "v1", "v2", "v3"
);
}
#endif // HAS_I422TOYUY2ROW_NEON
@@ -1488,14 +1488,15 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld2.8 {d1, d3}, [%0]! \n" // load 16 Ys
+ "ld2 {v1.8b, v2.8b}, [%0], #16 \n" // load 16 Ys
+ "mov v3.8b, v2.8b \n"
MEMACCESS(1)
- "vld1.8 {d0}, [%1]! \n" // load 8 Us
+ "ld1 {v0.8b}, [%1], #8 \n" // load 8 Us
MEMACCESS(2)
- "vld1.8 {d2}, [%2]! \n" // load 8 Vs
+ "ld1 {v2.8b}, [%2], #8 \n" // load 8 Vs
"subs %4, %4, #16 \n" // 16 pixels
MEMACCESS(3)
- "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 UYVY/16 pixels.
+ "st4 {v0.8b-v3.8b}, [%3], #32 \n" // Store 8 UYVY/16 pixels.
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
@@ -1503,7 +1504,7 @@
"+r"(dst_uyvy), // %3
"+r"(width) // %4
:
- : "cc", "memory", "d0", "d1", "d2", "d3"
+ : "cc", "memory", "v0", "v1", "v2", "v3"
);
}
#endif // HAS_I422TOUYVYROW_NEON
@@ -1577,28 +1578,28 @@
#ifdef HAS_ARGBTOYROW_NEON
void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile (
- "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d27, #16 \n" // Add 16 constant
+ "movi v4.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v5.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v6.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v7.8b, #16 \n" // Add 16 constant
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
"subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d27 \n"
+ "umull v3.8h, v0.8b, v4.8b \n" // B
+ "umlal v3.8h, v1.8b, v5.8b \n" // G
+ "umlal v3.8h, v2.8b, v6.8b \n" // R
+ "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v7.8b \n"
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
);
}
#endif // HAS_ARGBTOYROW_NEON
@@ -1606,26 +1607,26 @@
#ifdef HAS_ARGBTOYJROW_NEON
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile (
- "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
- "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
- "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
+ "movi v4.8b, #15 \n" // B * 0.11400 coefficient
+ "movi v5.8b, #75 \n" // G * 0.58700 coefficient
+ "movi v6.8b, #38 \n" // R * 0.29900 coefficient
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
"subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y
+ "umull v3.8h, v0.8b, v4.8b \n" // B
+ "umlal v3.8h, v1.8b, v5.8b \n" // G
+ "umlal v3.8h, v2.8b, v6.8b \n" // R
+ "sqrshrun v0.8b, v3.8h, #7 \n" // 15 bit to 8 bit Y
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"
);
}
#endif // HAS_ARGBTOYJROW_NEON
@@ -3048,20 +3049,20 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
MEMACCESS(1)
- "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // load 8 more ARGB pixels.
+ "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop.
- "vmull.u8 q0, d0, d1 \n" // multiply B
- "vmull.u8 q1, d2, d3 \n" // multiply G
- "vmull.u8 q2, d4, d5 \n" // multiply R
- "vmull.u8 q3, d6, d7 \n" // multiply A
- "vrshrn.u16 d0, q0, #8 \n" // 16 bit to 8 bit B
- "vrshrn.u16 d1, q1, #8 \n" // 16 bit to 8 bit G
- "vrshrn.u16 d2, q2, #8 \n" // 16 bit to 8 bit R
- "vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A
+ "umull v0.8h, v0.8b, v4.8b \n" // multiply B
+ "umull v1.8h, v1.8b, v5.8b \n" // multiply G
+ "umull v2.8h, v2.8b, v6.8b \n" // multiply R
+ "umull v3.8h, v3.8b, v7.8b \n" // multiply A
+ "rshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit B
+ "rshrn v1.8b, v1.8h, #8 \n" // 16 bit to 8 bit G
+ "rshrn v2.8b, v2.8h, #8 \n" // 16 bit to 8 bit R
+ "rshrn v3.8b, v3.8h, #8 \n" // 16 bit to 8 bit A
MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
"bgt 1b \n"
: "+r"(src_argb0), // %0
@@ -3069,7 +3070,7 @@
"+r"(dst_argb), // %2
"+r"(width) // %3
:
- : "cc", "memory", "q0", "q1", "q2", "q3"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
);
}
#endif // HAS_ARGBMULTIPLYROW_NEON
@@ -3083,14 +3084,16 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
MEMACCESS(1)
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels.
+ "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop.
- "vqadd.u8 q0, q0, q2 \n" // add B, G
- "vqadd.u8 q1, q1, q3 \n" // add R, A
+ "uqadd v0.8b, v0.8b, v4.8b \n"
+ "uqadd v1.8b, v1.8b, v5.8b \n"
+ "uqadd v2.8b, v2.8b, v6.8b \n"
+ "uqadd v3.8b, v3.8b, v7.8b \n"
MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
"bgt 1b \n"
: "+r"(src_argb0), // %0
@@ -3098,7 +3101,7 @@
"+r"(dst_argb), // %2
"+r"(width) // %3
:
- : "cc", "memory", "q0", "q1", "q2", "q3"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
);
}
#endif // HAS_ARGBADDROW_NEON
@@ -3112,14 +3115,16 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
MEMACCESS(1)
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels.
+ "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop.
- "vqsub.u8 q0, q0, q2 \n" // subtract B, G
- "vqsub.u8 q1, q1, q3 \n" // subtract R, A
+ "uqsub v0.8b, v0.8b, v4.8b \n"
+ "uqsub v1.8b, v1.8b, v5.8b \n"
+ "uqsub v2.8b, v2.8b, v6.8b \n"
+ "uqsub v3.8b, v3.8b, v7.8b \n"
MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
"bgt 1b \n"
: "+r"(src_argb0), // %0
@@ -3127,7 +3132,7 @@
"+r"(dst_argb), // %2
"+r"(width) // %3
:
- : "cc", "memory", "q0", "q1", "q2", "q3"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
);
}
#endif // HAS_ARGBSUBTRACTROW_NEON
@@ -3141,27 +3146,27 @@
void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) {
asm volatile (
- "vmov.u8 d3, #255 \n" // alpha
+ "movi v3.8b, #255 \n" // alpha
// 8 pixel loop.
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {d0}, [%0]! \n" // load 8 sobelx.
+ "ld1 {v0.8b}, [%0], #8 \n" // load 8 sobelx.
MEMACCESS(1)
- "vld1.8 {d1}, [%1]! \n" // load 8 sobely.
+ "ld1 {v1.8b}, [%1], #8 \n" // load 8 sobely.
"subs %3, %3, #8 \n" // 8 processed per loop.
- "vqadd.u8 d0, d0, d1 \n" // add
- "vmov.u8 d1, d0 \n"
- "vmov.u8 d2, d0 \n"
+ "uqadd v0.8b, v0.8b, v1.8b \n" // add
+ "mov v1.8b, v0.8b \n"
+ "mov v2.8b, v0.8b \n"
MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
"bgt 1b \n"
: "+r"(src_sobelx), // %0
"+r"(src_sobely), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
:
- : "cc", "memory", "q0", "q1"
+ : "cc", "memory", "v0", "v1", "v2", "v3"
);
}
#endif // HAS_SOBELROW_NEON
@@ -3175,20 +3180,20 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 16 sobelx.
+ "ld1 {v0.16b}, [%0], #16 \n" // load 16 sobelx.
MEMACCESS(1)
- "vld1.8 {q1}, [%1]! \n" // load 16 sobely.
+ "ld1 {v1.16b}, [%1], #16 \n" // load 16 sobely.
"subs %3, %3, #16 \n" // 16 processed per loop.
- "vqadd.u8 q0, q0, q1 \n" // add
+ "uqadd v0.16b, v0.16b, v1.16b \n" // add
MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n" // store 16 pixels.
+ "st1 {v0.16b}, [%2], #16 \n" // store 16 pixels.
"bgt 1b \n"
: "+r"(src_sobelx), // %0
"+r"(src_sobely), // %1
"+r"(dst_y), // %2
"+r"(width) // %3
:
- : "cc", "memory", "q0", "q1"
+ : "cc", "memory", "v0", "v1"
);
}
#endif // HAS_SOBELTOPLANEROW_NEON
@@ -3202,25 +3207,25 @@
void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) {
asm volatile (
- "vmov.u8 d3, #255 \n" // alpha
+ "movi v3.8b, #255 \n" // alpha
// 8 pixel loop.
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {d2}, [%0]! \n" // load 8 sobelx.
+ "ld1 {v2.8b}, [%0], #8 \n" // load 8 sobelx.
MEMACCESS(1)
- "vld1.8 {d0}, [%1]! \n" // load 8 sobely.
+ "ld1 {v0.8b}, [%1], #8 \n" // load 8 sobely.
"subs %3, %3, #8 \n" // 8 processed per loop.
- "vqadd.u8 d1, d0, d2 \n" // add
+ "uqadd v1.8b, v0.8b, v2.8b \n" // add
MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
"bgt 1b \n"
: "+r"(src_sobelx), // %0
"+r"(src_sobely), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
:
- : "cc", "memory", "q0", "q1"
+ : "cc", "memory", "v0", "v1", "v2", "v3"
);
}
#endif // HAS_SOBELXYROW_NEON
@@ -3236,28 +3241,28 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {d0}, [%0],%5 \n" // top
+ "ld1 {v0.8b}, [%0],%5 \n" // top
MEMACCESS(0)
- "vld1.8 {d1}, [%0],%6 \n"
- "vsubl.u8 q0, d0, d1 \n"
+ "ld1 {v1.8b}, [%0],%6 \n"
+ "usubl v0.8h, v0.8b, v1.8b \n"
MEMACCESS(1)
- "vld1.8 {d2}, [%1],%5 \n" // center * 2
+ "ld1 {v2.8b}, [%1],%5 \n" // center * 2
MEMACCESS(1)
- "vld1.8 {d3}, [%1],%6 \n"
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vadd.s16 q0, q0, q1 \n"
+ "ld1 {v3.8b}, [%1],%6 \n"
+ "usubl v1.8h, v2.8b, v3.8b \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "add v0.8h, v0.8h, v1.8h \n"
MEMACCESS(2)
- "vld1.8 {d2}, [%2],%5 \n" // bottom
+ "ld1 {v2.8b}, [%2],%5 \n" // bottom
MEMACCESS(2)
- "vld1.8 {d3}, [%2],%6 \n"
+ "ld1 {v3.8b}, [%2],%6 \n"
"subs %4, %4, #8 \n" // 8 pixels
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vabs.s16 q0, q0 \n"
- "vqmovn.u16 d0, q0 \n"
+ "usubl v1.8h, v2.8b, v3.8b \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "abs v0.8h, v0.8h \n"
+ "uqxtn v0.8b, v0.8h \n"
MEMACCESS(3)
- "vst1.8 {d0}, [%3]! \n" // store 8 sobelx
+ "st1 {v0.8b}, [%3], #8 \n" // store 8 sobelx
"bgt 1b \n"
: "+r"(src_y0), // %0
"+r"(src_y1), // %1
@@ -3266,7 +3271,7 @@
"+r"(width) // %4
: "r"(2), // %5
"r"(6) // %6
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_SOBELXROW_NEON
@@ -3282,28 +3287,28 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {d0}, [%0],%4 \n" // left
+ "ld1 {v0.8b}, [%0],%4 \n" // left
MEMACCESS(1)
- "vld1.8 {d1}, [%1],%4 \n"
- "vsubl.u8 q0, d0, d1 \n"
+ "ld1 {v1.8b}, [%1],%4 \n"
+ "usubl v0.8h, v0.8b, v1.8b \n"
MEMACCESS(0)
- "vld1.8 {d2}, [%0],%4 \n" // center * 2
+ "ld1 {v2.8b}, [%0],%4 \n" // center * 2
MEMACCESS(1)
- "vld1.8 {d3}, [%1],%4 \n"
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vadd.s16 q0, q0, q1 \n"
+ "ld1 {v3.8b}, [%1],%4 \n"
+ "usubl v1.8h, v2.8b, v3.8b \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "add v0.8h, v0.8h, v1.8h \n"
MEMACCESS(0)
- "vld1.8 {d2}, [%0],%5 \n" // right
+ "ld1 {v2.8b}, [%0],%5 \n" // right
MEMACCESS(1)
- "vld1.8 {d3}, [%1],%5 \n"
+ "ld1 {v3.8b}, [%1],%5 \n"
"subs %3, %3, #8 \n" // 8 pixels
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vabs.s16 q0, q0 \n"
- "vqmovn.u16 d0, q0 \n"
+ "usubl v1.8h, v2.8b, v3.8b \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "abs v0.8h, v0.8h \n"
+ "uqxtn v0.8b, v0.8h \n"
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 sobely
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 sobely
"bgt 1b \n"
: "+r"(src_y0), // %0
"+r"(src_y1), // %1
@@ -3311,7 +3316,7 @@
"+r"(width) // %3
: "r"(1), // %4
"r"(6) // %5
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_SOBELYROW_NEON
diff --git a/third_party/libyuv/source/row_win.cc b/third_party/libyuv/source/row_win.cc
index 8eb8889..f58fc51 100644
--- a/third_party/libyuv/source/row_win.cc
+++ b/third_party/libyuv/source/row_win.cc
@@ -10,7 +10,7 @@
#include "libyuv/row.h"
-#if defined (_M_X64)
+#if defined (_M_X64) && !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
#include <emmintrin.h>
#include <tmmintrin.h> // For _mm_maddubs_epi16
#endif
@@ -78,7 +78,6 @@
const uint8* v_buf,
uint8* dst_argb,
int width) {
-
__m128i xmm0, xmm1, xmm2, xmm3;
const __m128i xmm5 = _mm_set1_epi8(-1);
const __m128i xmm4 = _mm_setzero_si128();
@@ -132,7 +131,6 @@
const uint8* v_buf,
uint8* dst_argb,
int width) {
-
__m128i xmm0, xmm1, xmm2, xmm3;
const __m128i xmm5 = _mm_set1_epi8(-1);
const __m128i xmm4 = _mm_setzero_si128();
diff --git a/tools_common.h b/tools_common.h
index 558413e..c1f466b 100644
--- a/tools_common.h
+++ b/tools_common.h
@@ -103,17 +103,25 @@
extern "C" {
#endif
+#if defined(__GNUC__)
+#define VPX_NO_RETURN __attribute__((noreturn))
+#else
+#define VPX_NO_RETURN
+#endif
+
/* Sets a stdio stream into binary mode */
FILE *set_binary_mode(FILE *stream);
-void die(const char *fmt, ...);
-void fatal(const char *fmt, ...);
+void die(const char *fmt, ...) VPX_NO_RETURN;
+void fatal(const char *fmt, ...) VPX_NO_RETURN;
void warn(const char *fmt, ...);
-void die_codec(vpx_codec_ctx_t *ctx, const char *s);
+void die_codec(vpx_codec_ctx_t *ctx, const char *s) VPX_NO_RETURN;
/* The tool including this file must define usage_exit() */
-void usage_exit();
+void usage_exit() VPX_NO_RETURN;
+
+#undef VPX_NO_RETURN
int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame);
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index b05ad14..d48c4fe 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -224,7 +224,7 @@
int arnr_strength;
int arnr_type;
- struct vpx_fixed_buf two_pass_stats_in;
+ vpx_fixed_buf_t two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
vp8e_tuning tuning;
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 2baf693..bdd12f3 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -208,11 +208,11 @@
int tile_columns;
int tile_rows;
- struct vpx_fixed_buf two_pass_stats_in;
- struct vpx_codec_pkt_list *output_pkt_list;
+ vpx_fixed_buf_t two_pass_stats_in;
+ struct vpx_codec_pkt_list *output_pkt_list;
#if CONFIG_FP_MB_STATS
- struct vpx_fixed_buf firstpass_mb_stats_in;
+ vpx_fixed_buf_t firstpass_mb_stats_in;
#endif
vp8e_tuning tuning;
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 801449b..0857e39 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -25,7 +25,7 @@
double framerate;
int avg_frame_size;
TWO_PASS twopass;
- struct vpx_fixed_buf rc_twopass_stats_in;
+ vpx_fixed_buf_t rc_twopass_stats_in;
unsigned int current_video_frame_in_layer;
int is_key_frame;
vpx_svc_parameters_t svc_params_received;
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index d6b5972..efde63f 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -978,8 +978,9 @@
cx_data_sz -= size;
#if CONFIG_SPATIAL_SVC
if (is_spatial_svc(cpi)) {
- vpx_codec_cx_pkt_t pkt = {0};
+ vpx_codec_cx_pkt_t pkt;
int i;
+ vp9_zero(pkt);
pkt.kind = VPX_CODEC_SPATIAL_SVC_LAYER_SIZES;
for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
pkt.data.layer_sizes[i] = cpi->svc.layer_context[i].layer_size;
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index 82bef55..d7c5800 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -344,7 +344,7 @@
vpx_codec_priv_cb_pair_t put_slice_cb;
} dec;
struct {
- struct vpx_fixed_buf cx_data_dst_buf;
+ vpx_fixed_buf_t cx_data_dst_buf;
unsigned int cx_data_pad_before;
unsigned int cx_data_pad_after;
vpx_codec_cx_pkt_t cx_data_pkt;
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index 7dbbf2f..58acf18 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -188,14 +188,14 @@
has id 0.*/
} frame; /**< data for compressed frame packet */
- struct vpx_fixed_buf twopass_stats; /**< data for two-pass packet */
- struct vpx_fixed_buf firstpass_mb_stats; /**< first pass mb packet */
+ vpx_fixed_buf_t twopass_stats; /**< data for two-pass packet */
+ vpx_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */
struct vpx_psnr_pkt {
unsigned int samples[4]; /**< Number of samples, total/y/u/v */
uint64_t sse[4]; /**< sum squared error, total/y/u/v */
double psnr[4]; /**< PSNR, total/y/u/v */
} psnr; /**< data for PSNR packet */
- struct vpx_fixed_buf raw; /**< data for arbitrary packets */
+ vpx_fixed_buf_t raw; /**< data for arbitrary packets */
#if CONFIG_SPATIAL_SVC
size_t layer_sizes[VPX_SS_MAX_LAYERS];
#endif
@@ -452,14 +452,14 @@
* A buffer containing all of the stats packets produced in the first
* pass, concatenated.
*/
- struct vpx_fixed_buf rc_twopass_stats_in;
+ vpx_fixed_buf_t rc_twopass_stats_in;
/*!\brief first pass mb stats buffer.
*
* A buffer containing all of the first pass mb stats packets produced
* in the first pass, concatenated.
*/
- struct vpx_fixed_buf rc_firstpass_mb_stats_in;
+ vpx_fixed_buf_t rc_firstpass_mb_stats_in;
/*!\brief Target data rate
*
diff --git a/vpxenc.c b/vpxenc.c
index b99e61a..d8fc553 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -740,8 +740,9 @@
#if CONFIG_VP9_ENCODER
// Make default VP9 passes = 2 until there is a better quality 1-pass
// encoder
- global->passes = (strcmp(global->codec->name, "vp9") == 0 &&
- global->deadline != VPX_DL_REALTIME) ? 2 : 1;
+ if (global->codec != NULL && global->codec->name != NULL)
+ global->passes = (strcmp(global->codec->name, "vp9") == 0 &&
+ global->deadline != VPX_DL_REALTIME) ? 2 : 1;
#else
global->passes = 1;
#endif
@@ -809,8 +810,10 @@
struct stream_state *stream;
stream = calloc(1, sizeof(*stream));
- if (!stream)
+ if (stream == NULL) {
fatal("Failed to allocate new stream.");
+ }
+
if (prev) {
memcpy(stream, prev, sizeof(*stream));
stream->index++;
@@ -996,12 +999,13 @@
* instance of this control.
*/
for (j = 0; j < config->arg_ctrl_cnt; j++)
- if (config->arg_ctrls[j][0] == ctrl_args_map[i])
+ if (ctrl_args_map != NULL &&
+ config->arg_ctrls[j][0] == ctrl_args_map[i])
break;
/* Update/insert */
assert(j < (int)ARG_CTRL_CNT_MAX);
- if (j < (int)ARG_CTRL_CNT_MAX) {
+ if (ctrl_args_map != NULL && j < (int)ARG_CTRL_CNT_MAX) {
config->arg_ctrls[j][0] = ctrl_args_map[i];
config->arg_ctrls[j][1] = arg_parse_enum_or_int(&arg);
if (j == config->arg_ctrl_cnt)
@@ -1788,7 +1792,8 @@
got_data = 0;
FOREACH_STREAM(get_cx_data(stream, &global, &got_data));
- if (!got_data && input.length && !streams->frames_out) {
+ if (!got_data && input.length && streams != NULL &&
+ !streams->frames_out) {
lagged_count = global.limit ? seen_frames : ftello(input.file);
} else if (input.length) {
int64_t remaining;
diff --git a/y4minput.c b/y4minput.c
index 520c332..bcc742a 100644
--- a/y4minput.c
+++ b/y4minput.c
@@ -700,7 +700,7 @@
int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
int only_420) {
- char buffer[80];
+ char buffer[80] = {0};
int ret;
int i;
/*Read until newline, or 80 cols, whichever happens first.*/
@@ -978,7 +978,9 @@
_y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
else
_y4m->dst_buf = (unsigned char *)malloc(2 * _y4m->dst_buf_sz);
- _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
+
+ if (_y4m->aux_buf_sz > 0)
+ _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
return 0;
}