Merge "Re-use switchable rate value in handle_inter_mode"
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index 3653309..7907225 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -245,13 +245,13 @@
case "$target" in
x86_64*)
platforms[0]="x64"
- asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} "\$(InputPath)""
- asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} "\$(InputPath)""
+ asm_Debug_cmdline="yasm -Xvc -g cv8 -f win64 ${yasmincs} "\$(InputPath)""
+ asm_Release_cmdline="yasm -Xvc -f win64 ${yasmincs} "\$(InputPath)""
;;
x86*)
platforms[0]="Win32"
- asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} "\$(InputPath)""
- asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} "\$(InputPath)""
+ asm_Debug_cmdline="yasm -Xvc -g cv8 -f win32 ${yasmincs} "\$(InputPath)""
+ asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} "\$(InputPath)""
;;
*) die "Unsupported target $target!"
;;
diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh
index 23ef6a3..56b9a3b 100755
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -253,13 +253,13 @@
case "$target" in
x86_64*)
platforms[0]="x64"
- asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} "%(FullPath)""
- asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} "%(FullPath)""
+ asm_Debug_cmdline="yasm -Xvc -g cv8 -f win64 ${yasmincs} "%(FullPath)""
+ asm_Release_cmdline="yasm -Xvc -f win64 ${yasmincs} "%(FullPath)""
;;
x86*)
platforms[0]="Win32"
- asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} "%(FullPath)""
- asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} "%(FullPath)""
+ asm_Debug_cmdline="yasm -Xvc -g cv8 -f win32 ${yasmincs} "%(FullPath)""
+ asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} "%(FullPath)""
;;
arm*)
asm_Debug_cmdline="armasm -nologo "%(FullPath)""
diff --git a/examples.mk b/examples.mk
index 91bd45a..537d8ff 100644
--- a/examples.mk
+++ b/examples.mk
@@ -31,6 +31,7 @@
third_party/libyuv/source/scale_common.cc \
third_party/libyuv/source/scale_mips.cc \
third_party/libyuv/source/scale_neon.cc \
+ third_party/libyuv/source/scale_neon64.cc \
third_party/libyuv/source/scale_posix.cc \
third_party/libyuv/source/scale_win.cc \
diff --git a/examples/set_maps.c b/examples/set_maps.c
index ff60d51..2ee5bca 100644
--- a/examples/set_maps.c
+++ b/examples/set_maps.c
@@ -42,6 +42,7 @@
// Use the `simple_decoder` example to decode this sample, and observe
// the change in the image at frames 22, 33, and 44.
+#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -177,9 +178,10 @@
memset(&info, 0, sizeof(info));
encoder = get_vpx_encoder_by_name(argv[1]);
- if (!encoder)
+ if (encoder == NULL) {
die("Unsupported codec.");
-
+ }
+ assert(encoder != NULL);
info.codec_fourcc = encoder->fourcc;
info.frame_width = strtol(argv[2], NULL, 0);
info.frame_height = strtol(argv[3], NULL, 0);
diff --git a/examples/twopass_encoder.c b/examples/twopass_encoder.c
index 369b1d8..76d5a28 100644
--- a/examples/twopass_encoder.c
+++ b/examples/twopass_encoder.c
@@ -66,13 +66,14 @@
exit(EXIT_FAILURE);
}
-static void get_frame_stats(vpx_codec_ctx_t *ctx,
- const vpx_image_t *img,
- vpx_codec_pts_t pts,
- unsigned int duration,
- vpx_enc_frame_flags_t flags,
- unsigned int deadline,
- vpx_fixed_buf_t *stats) {
+static int get_frame_stats(vpx_codec_ctx_t *ctx,
+ const vpx_image_t *img,
+ vpx_codec_pts_t pts,
+ unsigned int duration,
+ vpx_enc_frame_flags_t flags,
+ unsigned int deadline,
+ vpx_fixed_buf_t *stats) {
+ int got_pkts = 0;
vpx_codec_iter_t iter = NULL;
const vpx_codec_cx_pkt_t *pkt = NULL;
const vpx_codec_err_t res = vpx_codec_encode(ctx, img, pts, duration, flags,
@@ -81,6 +82,8 @@
die_codec(ctx, "Failed to get frame stats.");
while ((pkt = vpx_codec_get_cx_data(ctx, &iter)) != NULL) {
+ got_pkts = 1;
+
if (pkt->kind == VPX_CODEC_STATS_PKT) {
const uint8_t *const pkt_buf = pkt->data.twopass_stats.buf;
const size_t pkt_size = pkt->data.twopass_stats.sz;
@@ -89,15 +92,18 @@
stats->sz += pkt_size;
}
}
+
+ return got_pkts;
}
-static void encode_frame(vpx_codec_ctx_t *ctx,
- const vpx_image_t *img,
- vpx_codec_pts_t pts,
- unsigned int duration,
- vpx_enc_frame_flags_t flags,
- unsigned int deadline,
- VpxVideoWriter *writer) {
+static int encode_frame(vpx_codec_ctx_t *ctx,
+ const vpx_image_t *img,
+ vpx_codec_pts_t pts,
+ unsigned int duration,
+ vpx_enc_frame_flags_t flags,
+ unsigned int deadline,
+ VpxVideoWriter *writer) {
+ int got_pkts = 0;
vpx_codec_iter_t iter = NULL;
const vpx_codec_cx_pkt_t *pkt = NULL;
const vpx_codec_err_t res = vpx_codec_encode(ctx, img, pts, duration, flags,
@@ -106,6 +112,7 @@
die_codec(ctx, "Failed to encode frame.");
while ((pkt = vpx_codec_get_cx_data(ctx, &iter)) != NULL) {
+ got_pkts = 1;
if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
@@ -117,19 +124,90 @@
fflush(stdout);
}
}
+
+ return got_pkts;
+}
+
+static vpx_fixed_buf_t pass0(vpx_image_t *raw,
+ FILE *infile,
+ const VpxInterface *encoder,
+ const vpx_codec_enc_cfg_t *cfg) {
+ vpx_codec_ctx_t codec;
+ int frame_count = 0;
+ vpx_fixed_buf_t stats = {NULL, 0};
+
+ if (vpx_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0))
+ die_codec(&codec, "Failed to initialize encoder");
+
+ // Calculate frame statistics.
+ while (vpx_img_read(raw, infile)) {
+ ++frame_count;
+ get_frame_stats(&codec, raw, frame_count, 1, 0, VPX_DL_BEST_QUALITY,
+ &stats);
+ }
+
+ // Flush encoder.
+ while (get_frame_stats(&codec, NULL, frame_count, 1, 0,
+ VPX_DL_BEST_QUALITY, &stats)) {}
+
+ printf("Pass 0 complete. Processed %d frames.\n", frame_count);
+ if (vpx_codec_destroy(&codec))
+ die_codec(&codec, "Failed to destroy codec.");
+
+ return stats;
+}
+
+static void pass1(vpx_image_t *raw,
+ FILE *infile,
+ const char *outfile_name,
+ const VpxInterface *encoder,
+ const vpx_codec_enc_cfg_t *cfg) {
+ VpxVideoInfo info = {
+ encoder->fourcc,
+ cfg->g_w,
+ cfg->g_h,
+ {cfg->g_timebase.num, cfg->g_timebase.den}
+ };
+ VpxVideoWriter *writer = NULL;
+ vpx_codec_ctx_t codec;
+ int frame_count = 0;
+
+ writer = vpx_video_writer_open(outfile_name, kContainerIVF, &info);
+ if (!writer)
+ die("Failed to open %s for writing", outfile_name);
+
+ if (vpx_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0))
+ die_codec(&codec, "Failed to initialize encoder");
+
+ // Encode frames.
+ while (vpx_img_read(raw, infile)) {
+ ++frame_count;
+ encode_frame(&codec, raw, frame_count, 1, 0, VPX_DL_BEST_QUALITY, writer);
+ }
+
+ // Flush encoder.
+ while (encode_frame(&codec, NULL, -1, 1, 0, VPX_DL_BEST_QUALITY, writer)) {}
+
+ printf("\n");
+
+ if (vpx_codec_destroy(&codec))
+ die_codec(&codec, "Failed to destroy codec.");
+
+ vpx_video_writer_close(writer);
+
+ printf("Pass 1 complete. Processed %d frames.\n", frame_count);
}
int main(int argc, char **argv) {
FILE *infile = NULL;
- VpxVideoWriter *writer = NULL;
+ int w, h;
vpx_codec_ctx_t codec;
vpx_codec_enc_cfg_t cfg;
vpx_image_t raw;
vpx_codec_err_t res;
- vpx_fixed_buf_t stats = {0};
- VpxVideoInfo info = {0};
+ vpx_fixed_buf_t stats;
+
const VpxInterface *encoder = NULL;
- int pass;
const int fps = 30; // TODO(dkovalev) add command line argument
const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument
const char *const codec_arg = argv[1];
@@ -146,85 +224,44 @@
if (!encoder)
die("Unsupported codec.");
- info.codec_fourcc = encoder->fourcc;
- info.time_base.numerator = 1;
- info.time_base.denominator = fps;
- info.frame_width = strtol(width_arg, NULL, 0);
- info.frame_height = strtol(height_arg, NULL, 0);
+ w = strtol(width_arg, NULL, 0);
+ h = strtol(height_arg, NULL, 0);
- if (info.frame_width <= 0 ||
- info.frame_height <= 0 ||
- (info.frame_width % 2) != 0 ||
- (info.frame_height % 2) != 0) {
- die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
- }
+ if (w <= 0 || h <= 0 || (w % 2) != 0 || (h % 2) != 0)
+ die("Invalid frame size: %dx%d", w, h);
- if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width,
- info.frame_height, 1)) {
- die("Failed to allocate image", info.frame_width, info.frame_height);
- }
-
- writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
- if (!writer)
- die("Failed to open %s for writing", outfile_arg);
+ if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, w, h, 1))
+ die("Failed to allocate image", w, h);
printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));
+ // Configuration
res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
if (res)
die_codec(&codec, "Failed to get default codec config.");
- cfg.g_w = info.frame_width;
- cfg.g_h = info.frame_height;
- cfg.g_timebase.num = info.time_base.numerator;
- cfg.g_timebase.den = info.time_base.denominator;
+ cfg.g_w = w;
+ cfg.g_h = h;
+ cfg.g_timebase.num = 1;
+ cfg.g_timebase.den = fps;
cfg.rc_target_bitrate = bitrate;
- for (pass = 0; pass < 2; ++pass) {
- int frame_count = 0;
+ if (!(infile = fopen(infile_arg, "rb")))
+ die("Failed to open %s for reading", infile_arg);
- if (pass == 0) {
- cfg.g_pass = VPX_RC_FIRST_PASS;
- } else {
- cfg.g_pass = VPX_RC_LAST_PASS;
- cfg.rc_twopass_stats_in = stats;
- }
+ // Pass 0
+ cfg.g_pass = VPX_RC_FIRST_PASS;
+ stats = pass0(&raw, infile, encoder, &cfg);
- if (!(infile = fopen(infile_arg, "rb")))
- die("Failed to open %s for reading", infile_arg);
-
- if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0))
- die_codec(&codec, "Failed to initialize encoder");
-
- while (vpx_img_read(&raw, infile)) {
- ++frame_count;
-
- if (pass == 0) {
- get_frame_stats(&codec, &raw, frame_count, 1, 0, VPX_DL_BEST_QUALITY,
- &stats);
- } else {
- encode_frame(&codec, &raw, frame_count, 1, 0, VPX_DL_BEST_QUALITY,
- writer);
- }
- }
-
- if (pass == 0) {
- get_frame_stats(&codec, NULL, frame_count, 1, 0, VPX_DL_BEST_QUALITY,
- &stats);
- } else {
- printf("\n");
- }
-
- fclose(infile);
- printf("Pass %d complete. Processed %d frames.\n", pass + 1, frame_count);
- if (vpx_codec_destroy(&codec))
- die_codec(&codec, "Failed to destroy codec.");
- }
-
- vpx_img_free(&raw);
+ // Pass 1
+ rewind(infile);
+ cfg.g_pass = VPX_RC_LAST_PASS;
+ cfg.rc_twopass_stats_in = stats;
+ pass1(&raw, infile, outfile_arg, encoder, &cfg);
free(stats.buf);
- vpx_video_writer_close(writer);
+ vpx_img_free(&raw);
+ fclose(infile);
return EXIT_SUCCESS;
}
diff --git a/examples/vpx_temporal_svc_encoder.c b/examples/vpx_temporal_svc_encoder.c
index 4ec1848..5eac92c 100644
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -12,6 +12,7 @@
// encoding scheme based on temporal scalability for video applications
// that benefit from a scalable bitstream.
+#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
@@ -438,7 +439,7 @@
}
int main(int argc, char **argv) {
- VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS];
+ VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
vpx_codec_ctx_t codec;
vpx_codec_enc_cfg_t cfg;
int frame_cnt = 0;
@@ -456,7 +457,6 @@
int layering_mode = 0;
int layer_flags[VPX_TS_MAX_PERIODICITY] = {0};
int flag_periodicity = 1;
- int max_intra_size_pct;
vpx_svc_layer_id_t layer_id = {0, 0};
const VpxInterface *encoder = NULL;
FILE *infile = NULL;
@@ -570,6 +570,8 @@
outfile[i] = vpx_video_writer_open(file_name, kContainerIVF, &info);
if (!outfile[i])
die("Failed to open %s for writing", file_name);
+
+ assert(outfile[i] != NULL);
}
// No spatial layers in this encoder.
cfg.ss_number_layers = 1;
@@ -595,11 +597,11 @@
// This controls the maximum target size of the key frame.
// For generating smaller key frames, use a smaller max_intra_size_pct
// value, like 100 or 200.
- max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5)
- * ((double) cfg.g_timebase.den / cfg.g_timebase.num) / 10.0);
- // For low-quality key frame.
- max_intra_size_pct = 200;
- vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct);
+ {
+ const int max_intra_size_pct = 200;
+ vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
+ max_intra_size_pct);
+ }
frame_avail = 1;
while (frame_avail || got_data) {
diff --git a/libs.mk b/libs.mk
index 25fbc2c..c7c2748 100644
--- a/libs.mk
+++ b/libs.mk
@@ -133,6 +133,8 @@
CODEC_DOC_SECTIONS += vp9 vp9_decoder
endif
+VP9_PREFIX=vp9/
+$(BUILD_PFX)$(VP9_PREFIX)%.c.o: CFLAGS += -Wextra
ifeq ($(CONFIG_ENCODERS),yes)
CODEC_DOC_SECTIONS += encoder
diff --git a/test/active_map_test.cc b/test/active_map_test.cc
index a9bb540..0221995 100644
--- a/test/active_map_test.cc
+++ b/test/active_map_test.cc
@@ -38,7 +38,7 @@
if (video->frame() == 1) {
encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
} else if (video->frame() == 3) {
- vpx_active_map_t map = {0};
+ vpx_active_map_t map = vpx_active_map_t();
uint8_t active_map[9 * 13] = {
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
@@ -57,7 +57,7 @@
map.active_map = active_map;
encoder->Control(VP8E_SET_ACTIVEMAP, &map);
} else if (video->frame() == 15) {
- vpx_active_map_t map = {0};
+ vpx_active_map_t map = vpx_active_map_t();
map.cols = (kWidth + 15) / 16;
map.rows = (kHeight + 15) / 16;
map.active_map = NULL;
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 5b4a20e..1724db3 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -646,26 +646,6 @@
#endif
#if HAVE_AVX2
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-}
-
const ConvolveFunctions convolve8_avx2(
vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
@@ -676,9 +656,7 @@
make_tuple(8, 4, &convolve8_avx2),
make_tuple(4, 8, &convolve8_avx2),
make_tuple(8, 8, &convolve8_avx2),
- make_tuple(8, 16, &convolve8_avx2)));
-
-INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, ConvolveTest, ::testing::Values(
+ make_tuple(8, 16, &convolve8_avx2),
make_tuple(16, 8, &convolve8_avx2),
make_tuple(16, 16, &convolve8_avx2),
make_tuple(32, 16, &convolve8_avx2),
diff --git a/test/datarate_test.cc b/test/datarate_test.cc
index 8dcf26c..a3d730a 100644
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -42,6 +42,9 @@
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
+ if (video->frame() == 1) {
+ encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
+ }
const vpx_rational_t tb = video->timebase();
timebase_ = static_cast<double>(tb.num) / tb.den;
duration_ = 0;
@@ -120,9 +123,40 @@
double file_datarate_;
double effective_datarate_;
size_t bits_in_last_frame_;
+ int denoiser_on_;
};
+// Check basic datarate targeting, for a single bitrate, but loop over the
+// various denoiser settings.
+TEST_P(DatarateTestLarge, DenoiserLevels) {
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_dropframe_thresh = 1;
+ cfg_.rc_max_quantizer = 56;
+ cfg_.rc_end_usage = VPX_CBR;
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ 30, 1, 0, 140);
+ for (int j = 1; j < 5; ++j) {
+ // Run over the denoiser levels.
+ // For the temporal denoiser (#if CONFIG_TEMPORAL_DENOISING) the level j
+ // refers to the 4 denoiser modes: denoiserYonly, denoiserOnYUV,
+ // denoiserOnAggressive, and denoiserOnAdaptive.
+ // For the spatial denoiser (if !CONFIG_TEMPORAL_DENOISING), the level j
+ // refers to the blur thresholds: 20, 40, 60 80.
+ // The j = 0 case (denoiser off) is covered in the tests below.
+ denoiser_on_ = j;
+ cfg_.rc_target_bitrate = 300;
+ ResetModel();
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
+ << " The datarate for the file exceeds the target!";
+
+ ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3)
+ << " The datarate for the file missed the target!";
+ }
+}
+
TEST_P(DatarateTestLarge, BasicBufferModel) {
+ denoiser_on_ = 0;
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_dropframe_thresh = 1;
cfg_.rc_max_quantizer = 56;
@@ -154,6 +188,7 @@
}
TEST_P(DatarateTestLarge, ChangingDropFrameThresh) {
+ denoiser_on_ = 0;
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_max_quantizer = 36;
cfg_.rc_end_usage = VPX_CBR;
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index ee417ce..c38cc2e 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -268,11 +268,13 @@
typedef std::tr1::tuple<FdctFunc, IdctFunc, int> Dct16x16Param;
typedef std::tr1::tuple<FhtFunc, IhtFunc, int> Ht16x16Param;
-void fdct16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
+void fdct16x16_ref(const int16_t *in, int16_t *out, int stride,
+ int /*tx_type*/) {
vp9_fdct16x16_c(in, out, stride);
}
-void idct16x16_ref(const int16_t *in, uint8_t *dest, int stride, int tx_type) {
+void idct16x16_ref(const int16_t *in, uint8_t *dest, int stride,
+ int /*tx_type*/) {
vp9_idct16x16_256_add_c(in, dest, stride);
}
diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc
index 4f34a44..d2d437c 100644
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -37,7 +37,7 @@
const int kNumCoeffs = 1024;
const double kPi = 3.141592653589793238462643383279502884;
-void reference_32x32_dct_1d(const double in[32], double out[32], int stride) {
+void reference_32x32_dct_1d(const double in[32], double out[32]) {
const double kInvSqrt2 = 0.707106781186547524400844362104;
for (int k = 0; k < 32; k++) {
out[k] = 0.0;
@@ -55,7 +55,7 @@
double temp_in[32], temp_out[32];
for (int j = 0; j < 32; ++j)
temp_in[j] = input[j*32 + i];
- reference_32x32_dct_1d(temp_in, temp_out, 1);
+ reference_32x32_dct_1d(temp_in, temp_out);
for (int j = 0; j < 32; ++j)
output[j * 32 + i] = temp_out[j];
}
@@ -64,7 +64,7 @@
double temp_in[32], temp_out[32];
for (int j = 0; j < 32; ++j)
temp_in[j] = output[j + i*32];
- reference_32x32_dct_1d(temp_in, temp_out, 1);
+ reference_32x32_dct_1d(temp_in, temp_out);
// Scale by some magic number
for (int j = 0; j < 32; ++j)
output[j + i * 32] = temp_out[j] / 4;
diff --git a/test/decode_perf_test.cc b/test/decode_perf_test.cc
index 11529b3..5a71140 100644
--- a/test/decode_perf_test.cc
+++ b/test/decode_perf_test.cc
@@ -74,7 +74,7 @@
libvpx_test::WebMVideoSource video(video_name);
video.Init();
- vpx_codec_dec_cfg_t cfg = {0};
+ vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
cfg.threads = threads;
libvpx_test::VP9Decoder decoder(cfg, 0);
diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc
index 99610eb..0ef4f7b 100644
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -106,7 +106,7 @@
}
void DecoderTest::RunLoop(CompressedVideoSource *video) {
- vpx_codec_dec_cfg_t dec_cfg = {0};
+ vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
RunLoop(video, dec_cfg);
}
diff --git a/test/decode_test_driver.h b/test/decode_test_driver.h
index 1f73c7d..a757b59 100644
--- a/test/decode_test_driver.h
+++ b/test/decode_test_driver.h
@@ -125,20 +125,20 @@
const vpx_codec_dec_cfg_t &dec_cfg);
// Hook to be called before decompressing every frame.
- virtual void PreDecodeFrameHook(const CompressedVideoSource& video,
- Decoder *decoder) {}
+ virtual void PreDecodeFrameHook(const CompressedVideoSource& /*video*/,
+ Decoder* /*decoder*/) {}
// Hook to be called to handle decode result. Return true to continue.
virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
- const CompressedVideoSource& /* video */,
+ const CompressedVideoSource& /*video*/,
Decoder *decoder) {
EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
return VPX_CODEC_OK == res_dec;
}
// Hook to be called on every decompressed frame.
- virtual void DecompressedFrameHook(const vpx_image_t& img,
- const unsigned int frame_number) {}
+ virtual void DecompressedFrameHook(const vpx_image_t& /*img*/,
+ const unsigned int /*frame_number*/) {}
// Hook to be called on peek result
virtual void HandlePeekResult(Decoder* const decoder,
diff --git a/test/encode_test_driver.cc b/test/encode_test_driver.cc
index 6d4281d..9702ddf 100644
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -133,13 +133,13 @@
return match;
}
-void EncoderTest::MismatchHook(const vpx_image_t *img1,
- const vpx_image_t *img2) {
+void EncoderTest::MismatchHook(const vpx_image_t* /*img1*/,
+ const vpx_image_t* /*img2*/) {
ASSERT_TRUE(0) << "Encode/Decode mismatch found";
}
void EncoderTest::RunLoop(VideoSource *video) {
- vpx_codec_dec_cfg_t dec_cfg = {0};
+ vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
stats_.Reset();
diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h
index 2270ce2..a77bd64 100644
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -189,20 +189,21 @@
virtual void RunLoop(VideoSource *video);
// Hook to be called at the beginning of a pass.
- virtual void BeginPassHook(unsigned int pass) {}
+ virtual void BeginPassHook(unsigned int /*pass*/) {}
// Hook to be called at the end of a pass.
virtual void EndPassHook() {}
// Hook to be called before encoding a frame.
- virtual void PreEncodeFrameHook(VideoSource *video) {}
- virtual void PreEncodeFrameHook(VideoSource *video, Encoder *encoder) {}
+ virtual void PreEncodeFrameHook(VideoSource* /*video*/) {}
+ virtual void PreEncodeFrameHook(VideoSource* /*video*/,
+ Encoder* /*encoder*/) {}
// Hook to be called on every compressed data packet.
- virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {}
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t* /*pkt*/) {}
// Hook to be called on every PSNR packet.
- virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {}
+ virtual void PSNRPktHook(const vpx_codec_cx_pkt_t* /*pkt*/) {}
// Hook to determine whether the encode loop should continue.
virtual bool Continue() const {
@@ -218,19 +219,19 @@
const vpx_image_t *img2);
// Hook to be called on every decompressed frame.
- virtual void DecompressedFrameHook(const vpx_image_t& img,
- vpx_codec_pts_t pts) {}
+ virtual void DecompressedFrameHook(const vpx_image_t& /*img*/,
+ vpx_codec_pts_t /*pts*/) {}
// Hook to be called to handle decode result. Return true to continue.
virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
- const VideoSource& /* video */,
+ const VideoSource& /*video*/,
Decoder *decoder) {
EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
return VPX_CODEC_OK == res_dec;
}
// Hook that can modify the encoder's output data
- virtual const vpx_codec_cx_pkt_t * MutateEncoderOutputHook(
+ virtual const vpx_codec_cx_pkt_t *MutateEncoderOutputHook(
const vpx_codec_cx_pkt_t *pkt) {
return pkt;
}
diff --git a/test/external_frame_buffer_test.cc b/test/external_frame_buffer_test.cc
index fb0449d..44eba33 100644
--- a/test/external_frame_buffer_test.cc
+++ b/test/external_frame_buffer_test.cc
@@ -285,7 +285,7 @@
video_->Init();
video_->Begin();
- vpx_codec_dec_cfg_t cfg = {0};
+ vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
decoder_ = new libvpx_test::VP9Decoder(cfg, 0);
ASSERT_TRUE(decoder_ != NULL);
}
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 7c48260..08a69ab 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -40,7 +40,7 @@
typedef std::tr1::tuple<FdctFunc, IdctFunc, int> Dct4x4Param;
typedef std::tr1::tuple<FhtFunc, IhtFunc, int> Ht4x4Param;
-void fdct4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
+void fdct4x4_ref(const int16_t *in, int16_t *out, int stride, int /*tx_type*/) {
vp9_fdct4x4_c(in, out, stride);
}
@@ -48,7 +48,7 @@
vp9_fht4x4_c(in, out, stride, tx_type);
}
-void fwht4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
+void fwht4x4_ref(const int16_t *in, int16_t *out, int stride, int /*tx_type*/) {
vp9_fwht4x4_c(in, out, stride);
}
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index 567e5f6..a694f0c 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -39,7 +39,7 @@
typedef std::tr1::tuple<FdctFunc, IdctFunc, int> Dct8x8Param;
typedef std::tr1::tuple<FhtFunc, IhtFunc, int> Ht8x8Param;
-void fdct8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
+void fdct8x8_ref(const int16_t *in, int16_t *out, int stride, int /*tx_type*/) {
vp9_fdct8x8_c(in, out, stride);
}
diff --git a/test/frame_size_tests.cc b/test/frame_size_tests.cc
index db27975..1c9a522 100644
--- a/test/frame_size_tests.cc
+++ b/test/frame_size_tests.cc
@@ -27,7 +27,7 @@
}
virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
- const libvpx_test::VideoSource &video,
+ const libvpx_test::VideoSource& /*video*/,
libvpx_test::Decoder *decoder) {
EXPECT_EQ(expected_res_, res_dec) << decoder->DecodeError();
return !::testing::Test::HasFailure();
diff --git a/test/invalid_file_test.cc b/test/invalid_file_test.cc
index 0a1c17c..34d6236 100644
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -73,7 +73,7 @@
void RunTest() {
const DecodeParam input = GET_PARAM(1);
libvpx_test::CompressedVideoSource *video = NULL;
- vpx_codec_dec_cfg_t cfg = {0};
+ vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
cfg.threads = input.threads;
const std::string filename = input.filename;
@@ -126,9 +126,9 @@
class InvalidFileInvalidPeekTest : public InvalidFileTest {
protected:
InvalidFileInvalidPeekTest() : InvalidFileTest() {}
- virtual void HandlePeekResult(libvpx_test::Decoder *const decoder,
- libvpx_test::CompressedVideoSource *video,
- const vpx_codec_err_t res_peek) {}
+ virtual void HandlePeekResult(libvpx_test::Decoder *const /*decoder*/,
+ libvpx_test::CompressedVideoSource* /*video*/,
+ const vpx_codec_err_t /*res_peek*/) {}
};
TEST_P(InvalidFileInvalidPeekTest, ReturnCode) {
diff --git a/test/resize_test.cc b/test/resize_test.cc
index 8d08f1e..9d0c570 100644
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -211,8 +211,8 @@
EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0);
}
- virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
#if WRITE_COMPRESSED_STREAM
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
++out_frames_;
// Write initial file header if first frame.
@@ -222,8 +222,8 @@
// Write frame header and data.
write_ivf_frame_header(pkt, outfile_);
(void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
-#endif
}
+#endif
double frame0_psnr_;
#if WRITE_COMPRESSED_STREAM
diff --git a/test/set_maps.sh b/test/set_maps.sh
new file mode 100755
index 0000000..e7c8d43
--- /dev/null
+++ b/test/set_maps.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+## This file tests the libvpx set_maps example. To add new tests to this file,
+## do the following:
+## 1. Write a shell function (this is your test).
+## 2. Add the function to set_maps_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required, and set_maps must exist in
+# $LIBVPX_BIN_PATH.
+set_maps_verify_environment() {
+ if [ ! -e "${YUV_RAW_INPUT}" ]; then
+ echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+ return 1
+ fi
+ if [ -z "$(vpx_tool_path set_maps)" ]; then
+ elog "set_maps not found. It must exist in LIBVPX_BIN_PATH or its parent."
+ return 1
+ fi
+}
+
+# Runs set_maps using the codec specified by $1.
+set_maps() {
+ local encoder="$(vpx_tool_path set_maps)"
+ local codec="$1"
+ local output_file="${VPX_TEST_OUTPUT_DIR}/set_maps_${codec}.ivf"
+
+ eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
+ "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \
+ ${devnull}
+
+ [ -e "${output_file}" ] || return 1
+}
+
+set_maps_vp8() {
+ if [ "$(vp8_encode_available)" = "yes" ]; then
+ set_maps vp8 || return 1
+ fi
+}
+
+set_maps_vp9() {
+ if [ "$(vp9_encode_available)" = "yes" ]; then
+ set_maps vp9 || return 1
+ fi
+}
+
+set_maps_tests="set_maps_vp8
+ set_maps_vp9"
+
+run_tests set_maps_verify_environment "${set_maps_tests}"
diff --git a/test/subtract_test.cc b/test/subtract_test.cc
index 6619fb1..ff42725 100644
--- a/test/subtract_test.cc
+++ b/test/subtract_test.cc
@@ -105,7 +105,7 @@
INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest,
::testing::Values(vp8_subtract_b_c));
-#if HAVE_NEON_ASM
+#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(NEON, SubtractBlockTest,
::testing::Values(vp8_subtract_b_neon));
#endif
diff --git a/test/svc_test.cc b/test/svc_test.cc
index e9cf38d..e219488 100644
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -13,6 +13,9 @@
#include "test/codec_factory.h"
#include "test/decode_test_driver.h"
#include "test/i420_video_source.h"
+
+#include "vp9/decoder/vp9_decoder.h"
+
#include "vpx/svc_context.h"
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
@@ -21,6 +24,7 @@
using libvpx_test::CodecFactory;
using libvpx_test::Decoder;
+using libvpx_test::DxDataIterator;
using libvpx_test::VP9CodecFactory;
class SvcTest : public ::testing::Test {
@@ -56,15 +60,252 @@
codec_enc_.kf_min_dist = 100;
codec_enc_.kf_max_dist = 100;
- vpx_codec_dec_cfg_t dec_cfg = {0};
+ vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
VP9CodecFactory codec_factory;
decoder_ = codec_factory.CreateDecoder(dec_cfg, 0);
}
virtual void TearDown() {
- vpx_svc_release(&svc_);
+ ReleaseEncoder();
delete(decoder_);
+ }
+
+ void InitializeEncoder() {
+ const vpx_codec_err_t res =
+ vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+ EXPECT_EQ(VPX_CODEC_OK, res);
+ codec_initialized_ = true;
+ }
+
+ void ReleaseEncoder() {
+ vpx_svc_release(&svc_);
if (codec_initialized_) vpx_codec_destroy(&codec_);
+ codec_initialized_ = false;
+ }
+
+ void Pass1EncodeNFrames(const int n, const int layers,
+ std::string *const stats_buf) {
+ vpx_codec_err_t res;
+ size_t stats_size = 0;
+ const char *stats_data = NULL;
+
+ ASSERT_GT(n, 0);
+ ASSERT_GT(layers, 0);
+ svc_.spatial_layers = layers;
+ codec_enc_.g_pass = VPX_RC_FIRST_PASS;
+ InitializeEncoder();
+
+ libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
+ codec_enc_.g_timebase.den,
+ codec_enc_.g_timebase.num, 0, 30);
+ video.Begin();
+
+ for (int i = 0; i < n; ++i) {
+ res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
+ video.duration(), VPX_DL_GOOD_QUALITY);
+ ASSERT_EQ(VPX_CODEC_OK, res);
+ stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);
+ EXPECT_GT(stats_size, 0U);
+ stats_data = vpx_svc_get_rc_stats_buffer(&svc_);
+ ASSERT_TRUE(stats_data != NULL);
+ stats_buf->append(stats_data, stats_size);
+ video.Next();
+ }
+
+ // Flush encoder and test EOS packet.
+ res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(),
+ video.duration(), VPX_DL_GOOD_QUALITY);
+ stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);
+ EXPECT_GT(stats_size, 0U);
+ stats_data = vpx_svc_get_rc_stats_buffer(&svc_);
+ ASSERT_TRUE(stats_data != NULL);
+ stats_buf->append(stats_data, stats_size);
+
+ ReleaseEncoder();
+ }
+
+ void StoreFrames(const size_t max_frame_received,
+ struct vpx_fixed_buf *const outputs,
+ size_t *const frame_received) {
+ size_t frame_size;
+ while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
+ ASSERT_LT(*frame_received, max_frame_received);
+
+ if (*frame_received == 0) {
+ EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));
+ }
+
+ outputs[*frame_received].buf = malloc(frame_size + 16);
+ ASSERT_TRUE(outputs[*frame_received].buf != NULL);
+ memcpy(outputs[*frame_received].buf, vpx_svc_get_buffer(&svc_),
+ frame_size);
+ outputs[*frame_received].sz = frame_size;
+ ++(*frame_received);
+ }
+ }
+
+ void Pass2EncodeNFrames(std::string *const stats_buf,
+ const int n, const int layers,
+ struct vpx_fixed_buf *const outputs) {
+ vpx_codec_err_t res;
+ size_t frame_received = 0;
+
+ ASSERT_TRUE(outputs != NULL);
+ ASSERT_GT(n, 0);
+ ASSERT_GT(layers, 0);
+ svc_.spatial_layers = layers;
+ codec_enc_.rc_target_bitrate = 500;
+ if (codec_enc_.g_pass == VPX_RC_LAST_PASS) {
+ ASSERT_TRUE(stats_buf != NULL);
+ ASSERT_GT(stats_buf->size(), 0U);
+ codec_enc_.rc_twopass_stats_in.buf = &(*stats_buf)[0];
+ codec_enc_.rc_twopass_stats_in.sz = stats_buf->size();
+ }
+ InitializeEncoder();
+
+ libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
+ codec_enc_.g_timebase.den,
+ codec_enc_.g_timebase.num, 0, 30);
+ video.Begin();
+
+ for (int i = 0; i < n; ++i) {
+ res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
+ video.duration(), VPX_DL_GOOD_QUALITY);
+ ASSERT_EQ(VPX_CODEC_OK, res);
+ StoreFrames(n, outputs, &frame_received);
+ video.Next();
+ }
+
+ // Flush encoder.
+ res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
+ video.duration(), VPX_DL_GOOD_QUALITY);
+ EXPECT_EQ(VPX_CODEC_OK, res);
+ StoreFrames(n, outputs, &frame_received);
+
+ EXPECT_EQ(frame_received, static_cast<size_t>(n));
+
+ ReleaseEncoder();
+ }
+
+ void DecodeNFrames(const struct vpx_fixed_buf *const inputs, const int n) {
+ int decoded_frames = 0;
+ int received_frames = 0;
+
+ ASSERT_TRUE(inputs != NULL);
+ ASSERT_GT(n, 0);
+
+ for (int i = 0; i < n; ++i) {
+ ASSERT_TRUE(inputs[i].buf != NULL);
+ ASSERT_GT(inputs[i].sz, 0U);
+ const vpx_codec_err_t res_dec =
+ decoder_->DecodeFrame(static_cast<const uint8_t *>(inputs[i].buf),
+ inputs[i].sz);
+ ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ ++decoded_frames;
+
+ DxDataIterator dec_iter = decoder_->GetDxData();
+ while (dec_iter.Next() != NULL) {
+ ++received_frames;
+ }
+ }
+ EXPECT_EQ(decoded_frames, n);
+ EXPECT_EQ(received_frames, n);
+ }
+
+ void DropEnhancementLayers(struct vpx_fixed_buf *const inputs,
+ const int num_super_frames,
+ const int remained_layers,
+ const bool is_multiple_frame_context) {
+ ASSERT_TRUE(inputs != NULL);
+ ASSERT_GT(num_super_frames, 0);
+ ASSERT_GT(remained_layers, 0);
+
+ for (int i = 0; i < num_super_frames; ++i) {
+ uint32_t frame_sizes[8] = {0};
+ int frame_count = 0;
+ int frames_found = 0;
+ int frame;
+ ASSERT_TRUE(inputs[i].buf != NULL);
+ ASSERT_GT(inputs[i].sz, 0U);
+
+ vpx_codec_err_t res =
+ vp9_parse_superframe_index(static_cast<const uint8_t*>(inputs[i].buf),
+ inputs[i].sz, frame_sizes, &frame_count,
+ NULL, NULL);
+ ASSERT_EQ(VPX_CODEC_OK, res);
+
+ uint8_t *frame_data = static_cast<uint8_t *>(inputs[i].buf);
+ uint8_t *frame_start = frame_data;
+ for (frame = 0; frame < frame_count; ++frame) {
+ // Looking for a visible frame.
+ if (frame_data[0] & 0x02) {
+ ++frames_found;
+ if (frames_found == remained_layers)
+ break;
+ }
+ frame_data += frame_sizes[frame];
+ }
+ ASSERT_LT(frame, frame_count) << "Couldn't find a visible frame. "
+ << "remaining_layers: " << remained_layers
+ << " super_frame: " << i
+ << " is_multiple_frame_context: " << is_multiple_frame_context;
+ if (frame == frame_count - 1 && !is_multiple_frame_context)
+ continue;
+
+ frame_data += frame_sizes[frame];
+ // We need to add one more frame for multiple frame context.
+ if (is_multiple_frame_context)
+ ++frame;
+ uint8_t marker =
+ static_cast<const uint8_t *>(inputs[i].buf)[inputs[i].sz - 1];
+ const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+ const size_t index_sz = 2 + mag * frame_count;
+ const size_t new_index_sz = 2 + mag * (frame + 1);
+ marker &= 0x0f8;
+ marker |= frame;
+
+ // Copy existing frame sizes.
+ memmove(frame_data + (is_multiple_frame_context ? 2 : 1),
+ frame_start + inputs[i].sz - index_sz + 1, new_index_sz - 2);
+ if (is_multiple_frame_context) {
+ // Add a one byte frame with flag show_existing frame.
+ *frame_data++ = 0x88 | (remained_layers - 1);
+ }
+ // New marker.
+ frame_data[0] = marker;
+ frame_data += (mag * (frame + 1) + 1);
+
+ if (is_multiple_frame_context) {
+ // Write the frame size for the one byte frame.
+ frame_data -= mag;
+ *frame_data++ = 1;
+ for (uint32_t j = 1; j < mag; ++j) {
+ *frame_data++ = 0;
+ }
+ }
+
+ *frame_data++ = marker;
+ inputs[i].sz = frame_data - frame_start;
+
+ if (is_multiple_frame_context) {
+ // Change the show frame flag to 0 for all frames.
+ for (int j = 0; j < frame; ++j) {
+ frame_start[0] &= ~2;
+ frame_start += frame_sizes[j];
+ }
+ }
+ }
+ }
+
+ void FreeBitstreamBuffers(struct vpx_fixed_buf *const inputs, const int n) {
+ ASSERT_TRUE(inputs != NULL);
+ ASSERT_GT(n, 0);
+
+ for (int i = 0; i < n; ++i) {
+ free(inputs[i].buf);
+ inputs[i].buf = NULL;
+ inputs[i].sz = 0;
+ }
}
SvcContext svc_;
@@ -93,9 +334,7 @@
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
svc_.spatial_layers = 0; // use default layers
- res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_);
- EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
+ InitializeEncoder();
EXPECT_EQ(VPX_SS_DEFAULT_LAYERS, svc_.spatial_layers);
}
@@ -106,9 +345,7 @@
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); // valid scale values
- res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_);
- EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
+ InitializeEncoder();
}
TEST_F(SvcTest, InvalidOptions) {
@@ -124,18 +361,15 @@
TEST_F(SvcTest, SetLayersOption) {
vpx_codec_err_t res = vpx_svc_set_options(&svc_, "layers=3");
EXPECT_EQ(VPX_CODEC_OK, res);
- res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
+ InitializeEncoder();
EXPECT_EQ(3, svc_.spatial_layers);
}
TEST_F(SvcTest, SetMultipleOptions) {
vpx_codec_err_t res =
vpx_svc_set_options(&svc_, "layers=2 scale-factors=1/3,2/3");
- res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
+ InitializeEncoder();
EXPECT_EQ(2, svc_.spatial_layers);
}
@@ -149,9 +383,7 @@
res = vpx_svc_set_options(&svc_, "scale-factors=1/3,2/3");
EXPECT_EQ(VPX_CODEC_OK, res);
- res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
+ InitializeEncoder();
}
TEST_F(SvcTest, SetQuantizersOption) {
@@ -162,9 +394,7 @@
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
vpx_svc_set_options(&svc_, "quantizers=40,45");
- res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
+ InitializeEncoder();
}
TEST_F(SvcTest, SetAutoAltRefOption) {
@@ -180,9 +410,7 @@
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
vpx_svc_set_options(&svc_, "auto-alt-refs=0,1,1,1,0");
- res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
+ InitializeEncoder();
}
TEST_F(SvcTest, SetQuantizers) {
@@ -200,9 +428,7 @@
res = vpx_svc_set_quantizers(&svc_, "40,30");
EXPECT_EQ(VPX_CODEC_OK, res);
- res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
+ InitializeEncoder();
}
TEST_F(SvcTest, SetScaleFactors) {
@@ -220,121 +446,25 @@
res = vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
EXPECT_EQ(VPX_CODEC_OK, res);
- res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
+ InitializeEncoder();
}
// Test that decoder can handle an SVC frame as the first frame in a sequence.
-TEST_F(SvcTest, FirstFrameHasLayers) {
- svc_.spatial_layers = 2;
- vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
- vpx_svc_set_quantizers(&svc_, "40,30");
-
- vpx_codec_err_t res =
- vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
-
- libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
- codec_enc_.g_timebase.den,
- codec_enc_.g_timebase.num, 0, 30);
- video.Begin();
-
- res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_GOOD_QUALITY);
- EXPECT_EQ(VPX_CODEC_OK, res);
-
- if (vpx_svc_get_frame_size(&svc_) == 0) {
- // Flush encoder
- res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
- video.duration(), VPX_DL_GOOD_QUALITY);
- EXPECT_EQ(VPX_CODEC_OK, res);
- }
-
- int frame_size = vpx_svc_get_frame_size(&svc_);
- EXPECT_GT(frame_size, 0);
- const vpx_codec_err_t res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-
- // this test fails with a decoder error
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+TEST_F(SvcTest, OnePassEncodeOneFrame) {
+ codec_enc_.g_pass = VPX_RC_ONE_PASS;
+ vpx_fixed_buf output = {0};
+ Pass2EncodeNFrames(NULL, 1, 2, &output);
+ DecodeNFrames(&output, 1);
+ FreeBitstreamBuffers(&output, 1);
}
-TEST_F(SvcTest, EncodeThreeFrames) {
- svc_.spatial_layers = 2;
- vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
- vpx_svc_set_quantizers(&svc_, "40,30");
- int decoded_frames = 0;
- vpx_codec_err_t res_dec;
- int frame_size;
-
- vpx_codec_err_t res =
- vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- ASSERT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
-
- libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
- codec_enc_.g_timebase.den,
- codec_enc_.g_timebase.num, 0, 30);
- // FRAME 0
- video.Begin();
- // This frame is a keyframe.
- res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_GOOD_QUALITY);
-
- if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
- EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
- res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
- ++decoded_frames;
- }
-
- // FRAME 1
- video.Next();
- // This is a P-frame.
- res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_GOOD_QUALITY);
- ASSERT_EQ(VPX_CODEC_OK, res);
-
- if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
- EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
- res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
- ++decoded_frames;
- }
-
- // FRAME 2
- video.Next();
- // This is a P-frame.
- res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_GOOD_QUALITY);
- ASSERT_EQ(VPX_CODEC_OK, res);
-
- if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
- EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
- res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
- ++decoded_frames;
- }
-
- // Flush encoder
- res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
- video.duration(), VPX_DL_GOOD_QUALITY);
- EXPECT_EQ(VPX_CODEC_OK, res);
-
- while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
- EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
- res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
- ++decoded_frames;
- }
-
- EXPECT_EQ(decoded_frames, 3);
+TEST_F(SvcTest, OnePassEncodeThreeFrames) {
+ codec_enc_.g_pass = VPX_RC_ONE_PASS;
+ vpx_fixed_buf outputs[3];
+ memset(&outputs[0], 0, sizeof(outputs));
+ Pass2EncodeNFrames(NULL, 3, 2, &outputs[0]);
+ DecodeNFrames(&outputs[0], 3);
+ FreeBitstreamBuffers(&outputs[0], 3);
}
TEST_F(SvcTest, GetLayerResolution) {
@@ -342,14 +472,11 @@
vpx_svc_set_scale_factors(&svc_, "4/16,8/16");
vpx_svc_set_quantizers(&svc_, "40,30");
- vpx_codec_err_t res =
- vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
+ InitializeEncoder();
// ensure that requested layer is a valid layer
uint32_t layer_width, layer_height;
- res = vpx_svc_get_layer_resolution(&svc_, svc_.spatial_layers,
+ vpx_codec_err_t res = vpx_svc_get_layer_resolution(&svc_, svc_.spatial_layers,
&layer_width, &layer_height);
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
@@ -373,131 +500,222 @@
EXPECT_EQ(kHeight * 8 / 16, layer_height);
}
-TEST_F(SvcTest, TwoPassEncode) {
+TEST_F(SvcTest, TwoPassEncode10Frames) {
// First pass encode
std::string stats_buf;
- svc_.spatial_layers = 2;
- codec_enc_.g_pass = VPX_RC_FIRST_PASS;
- vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
- vpx_svc_set_quantizers(&svc_, "40,30");
- vpx_svc_set_options(&svc_, "auto-alt-refs=1,1");
-
- vpx_codec_err_t res =
- vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- ASSERT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
-
- libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
- codec_enc_.g_timebase.den,
- codec_enc_.g_timebase.num, 0, 30);
- // FRAME 0
- video.Begin();
- res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_GOOD_QUALITY);
- ASSERT_EQ(VPX_CODEC_OK, res);
- size_t stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);
- EXPECT_GT(stats_size, 0U);
- const char *stats_data = vpx_svc_get_rc_stats_buffer(&svc_);
- ASSERT_TRUE(stats_data != NULL);
- stats_buf.append(stats_data, stats_size);
-
- // FRAME 1
- video.Next();
- res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_GOOD_QUALITY);
- stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);
- EXPECT_GT(stats_size, 0U);
- stats_data = vpx_svc_get_rc_stats_buffer(&svc_);
- ASSERT_TRUE(stats_data != NULL);
- stats_buf.append(stats_data, stats_size);
-
- // Flush encoder and test EOS packet
- res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(),
- video.duration(), VPX_DL_GOOD_QUALITY);
- stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);
- EXPECT_GT(stats_size, 0U);
- stats_data = vpx_svc_get_rc_stats_buffer(&svc_);
- ASSERT_TRUE(stats_data != NULL);
- stats_buf.append(stats_data, stats_size);
-
- // Tear down encoder
- vpx_svc_release(&svc_);
- vpx_codec_destroy(&codec_);
+ Pass1EncodeNFrames(10, 2, &stats_buf);
// Second pass encode
- int decoded_frames = 0;
- vpx_codec_err_t res_dec;
- int frame_size;
codec_enc_.g_pass = VPX_RC_LAST_PASS;
- vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
- vpx_svc_set_quantizers(&svc_, "40,30");
+ vpx_fixed_buf outputs[10];
+ memset(&outputs[0], 0, sizeof(outputs));
+ Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
+ DecodeNFrames(&outputs[0], 10);
+ FreeBitstreamBuffers(&outputs[0], 10);
+}
+
+TEST_F(SvcTest, TwoPassEncode20FramesWithAltRef) {
+ // First pass encode
+ std::string stats_buf;
+ Pass1EncodeNFrames(20, 2, &stats_buf);
+
+ // Second pass encode
+ codec_enc_.g_pass = VPX_RC_LAST_PASS;
vpx_svc_set_options(&svc_, "auto-alt-refs=1,1");
- codec_enc_.rc_twopass_stats_in.buf = &stats_buf[0];
- codec_enc_.rc_twopass_stats_in.sz = stats_buf.size();
+ vpx_fixed_buf outputs[20];
+ memset(&outputs[0], 0, sizeof(outputs));
+ Pass2EncodeNFrames(&stats_buf, 20, 2, &outputs[0]);
+ DecodeNFrames(&outputs[0], 20);
+ FreeBitstreamBuffers(&outputs[0], 20);
+}
- res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- ASSERT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
+TEST_F(SvcTest, TwoPassEncode2LayersDecodeBaseLayerOnly) {
+ // First pass encode
+ std::string stats_buf;
+ Pass1EncodeNFrames(10, 2, &stats_buf);
- // FRAME 0
- video.Begin();
- // This frame is a keyframe.
- res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_GOOD_QUALITY);
- ASSERT_EQ(VPX_CODEC_OK, res);
+ // Second pass encode
+ codec_enc_.g_pass = VPX_RC_LAST_PASS;
+ vpx_svc_set_options(&svc_, "auto-alt-refs=1,1");
+ vpx_fixed_buf outputs[10];
+ memset(&outputs[0], 0, sizeof(outputs));
+ Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
+ DropEnhancementLayers(&outputs[0], 10, 1, false);
+ DecodeNFrames(&outputs[0], 10);
+ FreeBitstreamBuffers(&outputs[0], 10);
+}
- if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
- EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
- res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
- ++decoded_frames;
- }
+TEST_F(SvcTest, TwoPassEncode5LayersDecode54321Layers) {
+ // First pass encode
+ std::string stats_buf;
+ Pass1EncodeNFrames(10, 5, &stats_buf);
- // FRAME 1
- video.Next();
- // This is a P-frame.
- res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_GOOD_QUALITY);
- ASSERT_EQ(VPX_CODEC_OK, res);
+ // Second pass encode
+ codec_enc_.g_pass = VPX_RC_LAST_PASS;
+ vpx_svc_set_options(&svc_, "auto-alt-refs=0,1,1,1,0");
+ vpx_fixed_buf outputs[10];
+ memset(&outputs[0], 0, sizeof(outputs));
+ Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]);
- if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
- EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
- res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
- ++decoded_frames;
- }
+ DecodeNFrames(&outputs[0], 10);
+ DropEnhancementLayers(&outputs[0], 10, 4, false);
+ DecodeNFrames(&outputs[0], 10);
+ DropEnhancementLayers(&outputs[0], 10, 3, false);
+ DecodeNFrames(&outputs[0], 10);
+ DropEnhancementLayers(&outputs[0], 10, 2, false);
+ DecodeNFrames(&outputs[0], 10);
+ DropEnhancementLayers(&outputs[0], 10, 1, false);
+ DecodeNFrames(&outputs[0], 10);
- // FRAME 2
- video.Next();
- // This is a P-frame.
- res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_GOOD_QUALITY);
- ASSERT_EQ(VPX_CODEC_OK, res);
+ FreeBitstreamBuffers(&outputs[0], 10);
+}
- if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
- EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
- res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
- ++decoded_frames;
- }
+TEST_F(SvcTest, TwoPassEncode2SNRLayers) {
+ // First pass encode
+ std::string stats_buf;
+ vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1");
+ Pass1EncodeNFrames(20, 2, &stats_buf);
- // Flush encoder
- res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
- video.duration(), VPX_DL_GOOD_QUALITY);
+ // Second pass encode
+ codec_enc_.g_pass = VPX_RC_LAST_PASS;
+ vpx_svc_set_options(&svc_,
+ "auto-alt-refs=1,1 scale-factors=1/1,1/1");
+ vpx_fixed_buf outputs[20];
+ memset(&outputs[0], 0, sizeof(outputs));
+ Pass2EncodeNFrames(&stats_buf, 20, 2, &outputs[0]);
+ DecodeNFrames(&outputs[0], 20);
+ FreeBitstreamBuffers(&outputs[0], 20);
+}
+
+TEST_F(SvcTest, TwoPassEncode3SNRLayersDecode321Layers) {
+ // First pass encode
+ std::string stats_buf;
+ vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1,1/1");
+ Pass1EncodeNFrames(20, 3, &stats_buf);
+
+ // Second pass encode
+ codec_enc_.g_pass = VPX_RC_LAST_PASS;
+ vpx_svc_set_options(&svc_,
+ "auto-alt-refs=1,1,1 scale-factors=1/1,1/1,1/1");
+ vpx_fixed_buf outputs[20];
+ memset(&outputs[0], 0, sizeof(outputs));
+ Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]);
+ DecodeNFrames(&outputs[0], 20);
+ DropEnhancementLayers(&outputs[0], 20, 2, false);
+ DecodeNFrames(&outputs[0], 20);
+ DropEnhancementLayers(&outputs[0], 20, 1, false);
+ DecodeNFrames(&outputs[0], 20);
+
+ FreeBitstreamBuffers(&outputs[0], 20);
+}
+
+TEST_F(SvcTest, SetMultipleFrameContextOption) {
+ svc_.spatial_layers = 5;
+ vpx_codec_err_t res =
+ vpx_svc_set_options(&svc_, "multi-frame-contexts=1");
EXPECT_EQ(VPX_CODEC_OK, res);
+ res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+ EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
- while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
- EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
- res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
- ++decoded_frames;
+ svc_.spatial_layers = 2;
+ res = vpx_svc_set_options(&svc_, "multi-frame-contexts=1");
+ InitializeEncoder();
+}
+
+TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContext) {
+ // First pass encode
+ std::string stats_buf;
+ Pass1EncodeNFrames(10, 2, &stats_buf);
+
+ // Second pass encode
+ codec_enc_.g_pass = VPX_RC_LAST_PASS;
+ codec_enc_.g_error_resilient = 0;
+ vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1");
+ vpx_fixed_buf outputs[10];
+ memset(&outputs[0], 0, sizeof(outputs));
+ Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
+ DropEnhancementLayers(&outputs[0], 10, 2, true);
+ DecodeNFrames(&outputs[0], 10);
+ FreeBitstreamBuffers(&outputs[0], 10);
+}
+
+TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContextDecodeBaselayer) {
+ // First pass encode
+ std::string stats_buf;
+ Pass1EncodeNFrames(10, 2, &stats_buf);
+
+ // Second pass encode
+ codec_enc_.g_pass = VPX_RC_LAST_PASS;
+ codec_enc_.g_error_resilient = 0;
+ vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1");
+ vpx_fixed_buf outputs[10];
+ memset(&outputs[0], 0, sizeof(outputs));
+ Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
+ DropEnhancementLayers(&outputs[0], 10, 1, true);
+ DecodeNFrames(&outputs[0], 10);
+ FreeBitstreamBuffers(&outputs[0], 10);
+}
+
+TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContext) {
+ // First pass encode
+ std::string stats_buf;
+ vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1");
+ Pass1EncodeNFrames(10, 2, &stats_buf);
+
+ // Second pass encode
+ codec_enc_.g_pass = VPX_RC_LAST_PASS;
+ codec_enc_.g_error_resilient = 0;
+ vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 scale-factors=1/1,1/1 "
+ "multi-frame-contexts=1");
+ vpx_fixed_buf outputs[10];
+ memset(&outputs[0], 0, sizeof(outputs));
+ Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
+ DropEnhancementLayers(&outputs[0], 10, 2, true);
+ DecodeNFrames(&outputs[0], 10);
+ FreeBitstreamBuffers(&outputs[0], 10);
+}
+
+TEST_F(SvcTest, TwoPassEncode3SNRLayersWithMultipleFrameContextDecode321Layer) {
+ // First pass encode
+ std::string stats_buf;
+ vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1,1/1");
+ Pass1EncodeNFrames(10, 3, &stats_buf);
+
+ // Second pass encode
+ codec_enc_.g_pass = VPX_RC_LAST_PASS;
+ codec_enc_.g_error_resilient = 0;
+ vpx_svc_set_options(&svc_, "auto-alt-refs=1,1,1 scale-factors=1/1,1/1,1/1 "
+ "multi-frame-contexts=1");
+ vpx_fixed_buf outputs[10];
+ memset(&outputs[0], 0, sizeof(outputs));
+ Pass2EncodeNFrames(&stats_buf, 10, 3, &outputs[0]);
+
+ vpx_fixed_buf outputs_new[10];
+ for (int i = 0; i < 10; ++i) {
+ outputs_new[i].buf = malloc(outputs[i].sz + 16);
+ ASSERT_TRUE(outputs_new[i].buf != NULL);
+ memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
+ outputs_new[i].sz = outputs[i].sz;
}
+ DropEnhancementLayers(&outputs_new[0], 10, 3, true);
+ DecodeNFrames(&outputs_new[0], 10);
- EXPECT_EQ(decoded_frames, 3);
+ for (int i = 0; i < 10; ++i) {
+ memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
+ outputs_new[i].sz = outputs[i].sz;
+ }
+ DropEnhancementLayers(&outputs_new[0], 10, 2, true);
+ DecodeNFrames(&outputs_new[0], 10);
+
+ for (int i = 0; i < 10; ++i) {
+ memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
+ outputs_new[i].sz = outputs[i].sz;
+ }
+ DropEnhancementLayers(&outputs_new[0], 10, 1, true);
+ DecodeNFrames(&outputs_new[0], 10);
+
+ FreeBitstreamBuffers(&outputs[0], 10);
+ FreeBitstreamBuffers(&outputs_new[0], 10);
}
} // namespace
diff --git a/test/tile_independence_test.cc b/test/tile_independence_test.cc
index d714452..b9f879d 100644
--- a/test/tile_independence_test.cc
+++ b/test/tile_independence_test.cc
@@ -29,7 +29,7 @@
md5_inv_order_(),
n_tiles_(GET_PARAM(1)) {
init_flags_ = VPX_CODEC_USE_PSNR;
- vpx_codec_dec_cfg_t cfg;
+ vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
cfg.w = 704;
cfg.h = 144;
cfg.threads = 1;
diff --git a/test/user_priv_test.cc b/test/user_priv_test.cc
index 22fce85..8512d88 100644
--- a/test/user_priv_test.cc
+++ b/test/user_priv_test.cc
@@ -47,7 +47,7 @@
libvpx_test::WebMVideoSource video(filename);
video.Init();
- vpx_codec_dec_cfg_t cfg = {0};
+ vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
libvpx_test::VP9Decoder decoder(cfg, 0);
libvpx_test::MD5 md5;
diff --git a/test/vp8_decrypt_test.cc b/test/vp8_decrypt_test.cc
index 470fdf1..972a1d9 100644
--- a/test/vp8_decrypt_test.cc
+++ b/test/vp8_decrypt_test.cc
@@ -47,7 +47,7 @@
libvpx_test::IVFVideoSource video("vp80-00-comprehensive-001.ivf");
video.Init();
- vpx_codec_dec_cfg_t dec_cfg = {0};
+ vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
VP8Decoder decoder(dec_cfg, 0);
video.Begin();
diff --git a/test/vp8_multi_resolution_encoder.sh b/test/vp8_multi_resolution_encoder.sh
new file mode 100755
index 0000000..ed66bf8
--- /dev/null
+++ b/test/vp8_multi_resolution_encoder.sh
@@ -0,0 +1,75 @@
+#!/bin/sh
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+## This file tests the libvpx vp8_multi_resolution_encoder example. To add new
+## tests to this file, do the following:
+## 1. Write a shell function (this is your test).
+## 2. Add the function to vp8_mre_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+vp8_multi_resolution_encoder_verify_environment() {
+ if [ "$(vpx_config_option_enabled CONFIG_MULTI_RES_ENCODING)" = "yes" ]; then
+ if [ ! -e "${YUV_RAW_INPUT}" ]; then
+ elog "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+ return 1
+ fi
+ local readonly app="vp8_multi_resolution_encoder"
+ if [ -z "$(vpx_tool_path "${app}")" ]; then
+ elog "${app} not found. It must exist in LIBVPX_BIN_PATH or its parent."
+ return 1
+ fi
+ fi
+}
+
+# Runs vp8_multi_resolution_encoder. Simply forwards all arguments to
+# vp8_multi_resolution_encoder after building path to the executable.
+vp8_mre() {
+ local readonly encoder="$(vpx_tool_path vp8_multi_resolution_encoder)"
+ if [ ! -x "${encoder}" ]; then
+ elog "${encoder} does not exist or is not executable."
+ return 1
+ fi
+
+ eval "${VPX_TEST_PREFIX}" "${encoder}" "$@"
+}
+
+vp8_multi_resolution_encoder_three_formats() {
+ local readonly output_files="${VPX_TEST_OUTPUT_DIR}/vp8_mre_0.ivf
+ ${VPX_TEST_OUTPUT_DIR}/vp8_mre_1.ivf
+ ${VPX_TEST_OUTPUT_DIR}/vp8_mre_2.ivf"
+
+ if [ "$(vpx_config_option_enabled CONFIG_MULTI_RES_ENCODING)" = "yes" ]; then
+ if [ "$(vp8_encode_available)" = "yes" ]; then
+ # Param order:
+ # Input width
+ # Input height
+ # Input file path
+ # Output file names
+ # Output PSNR
+ vp8_mre "${YUV_RAW_INPUT_WIDTH}" \
+ "${YUV_RAW_INPUT_HEIGHT}" \
+ "${YUV_RAW_INPUT}" \
+ ${output_files} \
+ 0
+
+ for output_file in ${output_files}; do
+ if [ ! -e "${output_file}" ]; then
+ elog "Missing output file: ${output_file}"
+ return 1
+ fi
+ done
+ fi
+ fi
+}
+
+vp8_mre_tests="vp8_multi_resolution_encoder_three_formats"
+run_tests vp8_multi_resolution_encoder_verify_environment "${vp8_mre_tests}"
diff --git a/test/vp9_decrypt_test.cc b/test/vp9_decrypt_test.cc
index 88a3c14..d988612 100644
--- a/test/vp9_decrypt_test.cc
+++ b/test/vp9_decrypt_test.cc
@@ -47,7 +47,7 @@
libvpx_test::IVFVideoSource video("vp90-2-05-resize.ivf");
video.Init();
- vpx_codec_dec_cfg_t dec_cfg = {0};
+ vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
VP9Decoder decoder(dec_cfg, 0);
video.Begin();
diff --git a/test/vp9_thread_test.cc b/test/vp9_thread_test.cc
index d7fc4ee..cc35476 100644
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -163,7 +163,7 @@
libvpx_test::WebMVideoSource video(filename);
video.Init();
- vpx_codec_dec_cfg_t cfg = {0};
+ vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
cfg.threads = num_threads;
libvpx_test::VP9Decoder decoder(cfg, 0);
diff --git a/test/vpxenc.sh b/test/vpxenc.sh
index b6482c6..9674bdc 100755
--- a/test/vpxenc.sh
+++ b/test/vpxenc.sh
@@ -41,6 +41,40 @@
fi
}
+# Echo vpxenc command line parameters allowing use of
+# hantro_collage_w352h288.yuv as input.
+yuv_input_hantro_collage() {
+ echo ""${YUV_RAW_INPUT}"
+ --width="${YUV_RAW_INPUT_WIDTH}"
+ --height="${YUV_RAW_INPUT_HEIGHT}""
+}
+
+# Echo default vpxenc real time encoding params. $1 is the codec, which defaults
+# to vp8 if unspecified.
+vpxenc_rt_params() {
+ local readonly codec="${1:-vp8}"
+ echo "--codec=${codec}
+ --buf-initial-sz=500
+ --buf-optimal-sz=600
+ --buf-sz=1000
+ --cpu-used=-5
+ --end-usage=cbr
+ --error-resilient=1
+ --kf-max-dist=90000
+ --lag-in-frames=0
+ --max-intra-rate=300
+ --max-q=56
+ --min-q=2
+ --noise-sensitivity=0
+ --overshoot-pct=50
+ --passes=1
+ --profile=0
+ --resize-allowed=0
+ --rt
+ --static-thresh=0
+ --undershoot-pct=50"
+}
+
# Wrapper function for running vpxenc with pipe input. Requires that
# LIBVPX_BIN_PATH points to the directory containing vpxenc. $1 is used as the
# input file path and shifted away. All remaining parameters are passed through
@@ -59,9 +93,9 @@
# shifted away. All remaining parameters are passed through to vpxenc.
vpxenc() {
local readonly encoder="$(vpx_tool_path vpxenc)"
- local readonly input="${1}"
+ local readonly input="$1"
shift
- eval "${VPX_TEST_PREFIX}" "${encoder}" "$input" \
+ eval "${VPX_TEST_PREFIX}" "${encoder}" "${input}" \
--test-decode=fatal \
"$@" ${devnull}
}
@@ -69,13 +103,11 @@
vpxenc_vp8_ivf() {
if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8.ivf"
- vpxenc --codec=vp8 \
- --width="${YUV_RAW_INPUT_WIDTH}" \
- --height="${YUV_RAW_INPUT_HEIGHT}" \
+ vpxenc $(yuv_input_hantro_collage) \
+ --codec=vp8 \
--limit="${TEST_FRAMES}" \
--ivf \
- --output="${output}" \
- "${YUV_RAW_INPUT}"
+ --output="${output}"
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
@@ -88,12 +120,10 @@
if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \
[ "$(webm_io_available)" = "yes" ]; then
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8.webm"
- vpxenc --codec=vp8 \
- --width="${YUV_RAW_INPUT_WIDTH}" \
- --height="${YUV_RAW_INPUT_HEIGHT}" \
+ vpxenc $(yuv_input_hantro_collage) \
+ --codec=vp8 \
--limit="${TEST_FRAMES}" \
- --output="${output}" \
- "${YUV_RAW_INPUT}"
+ --output="${output}"
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
@@ -102,17 +132,29 @@
fi
}
+vpxenc_vp8_webm_rt() {
+ if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \
+ [ "$(webm_io_available)" = "yes" ]; then
+ local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8_rt.webm"
+ vpxenc $(yuv_input_hantro_collage) \
+ $(vpxenc_rt_params vp8) \
+ --output="${output}"
+ if [ ! -e "${output}" ]; then
+ elog "Output file does not exist."
+ return 1
+ fi
+ fi
+}
+
vpxenc_vp8_webm_2pass() {
if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \
[ "$(webm_io_available)" = "yes" ]; then
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8.webm"
- vpxenc --codec=vp8 \
- --width="${YUV_RAW_INPUT_WIDTH}" \
- --height="${YUV_RAW_INPUT_HEIGHT}" \
+ vpxenc $(yuv_input_hantro_collage) \
+ --codec=vp8 \
--limit="${TEST_FRAMES}" \
--output="${output}" \
- --passes=2 \
- "${YUV_RAW_INPUT}"
+ --passes=2
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
@@ -127,15 +169,13 @@
local readonly lag_total_frames=20
local readonly lag_frames=10
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8_lag10_frames20.webm"
- vpxenc --codec=vp8 \
- --width="${YUV_RAW_INPUT_WIDTH}" \
- --height="${YUV_RAW_INPUT_HEIGHT}" \
+ vpxenc $(yuv_input_hantro_collage) \
+ --codec=vp8 \
--limit="${lag_total_frames}" \
--lag-in-frames="${lag_frames}" \
--output="${output}" \
--auto-alt-ref=1 \
- --passes=2 \
- "${YUV_RAW_INPUT}"
+ --passes=2
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
@@ -147,14 +187,11 @@
vpxenc_vp8_ivf_piped_input() {
if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8_piped_input.ivf"
- cat "${YUV_RAW_INPUT}" \
- | vpxenc --codec=vp8 \
- --width="${YUV_RAW_INPUT_WIDTH}" \
- --height="${YUV_RAW_INPUT_HEIGHT}" \
- --limit="${TEST_FRAMES}" \
- --ivf \
- --output="${output}" \
- -
+ vpxenc_pipe $(yuv_input_hantro_collage) \
+ --codec=vp8 \
+ --limit="${TEST_FRAMES}" \
+ --ivf \
+ --output="${output}"
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
@@ -166,13 +203,11 @@
vpxenc_vp9_ivf() {
if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9.ivf"
- vpxenc --codec=vp9 \
- --width="${YUV_RAW_INPUT_WIDTH}" \
- --height="${YUV_RAW_INPUT_HEIGHT}" \
+ vpxenc $(yuv_input_hantro_collage) \
+ --codec=vp9 \
--limit="${TEST_FRAMES}" \
--ivf \
- --output="${output}" \
- "${YUV_RAW_INPUT}"
+ --output="${output}"
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
@@ -185,12 +220,25 @@
if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
[ "$(webm_io_available)" = "yes" ]; then
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9.webm"
- vpxenc --codec=vp9 \
- --width="${YUV_RAW_INPUT_WIDTH}" \
- --height="${YUV_RAW_INPUT_HEIGHT}" \
+ vpxenc $(yuv_input_hantro_collage) \
+ --codec=vp9 \
--limit="${TEST_FRAMES}" \
- --output="${output}" \
- "${YUV_RAW_INPUT}"
+ --output="${output}"
+
+ if [ ! -e "${output}" ]; then
+ elog "Output file does not exist."
+ return 1
+ fi
+ fi
+}
+
+vpxenc_vp9_webm_rt() {
+ if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
+ [ "$(webm_io_available)" = "yes" ]; then
+ local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_rt.webm"
+ vpxenc $(yuv_input_hantro_collage) \
+ $(vpxenc_rt_params vp9) \
+ --output="${output}"
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
@@ -203,14 +251,11 @@
if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
[ "$(webm_io_available)" = "yes" ]; then
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9.webm"
- vpxenc --codec=vp9 \
- --width="${YUV_RAW_INPUT_WIDTH}" \
- --height="${YUV_RAW_INPUT_HEIGHT}" \
+ vpxenc $(yuv_input_hantro_collage) \
+ --codec=vp9 \
--limit="${TEST_FRAMES}" \
- --test-decode=fatal \
--output="${output}" \
- --passes=2 \
- "${YUV_RAW_INPUT}"
+ --passes=2
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
@@ -222,14 +267,12 @@
vpxenc_vp9_ivf_lossless() {
if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_lossless.ivf"
- vpxenc --codec=vp9 \
- --width="${YUV_RAW_INPUT_WIDTH}" \
- --height="${YUV_RAW_INPUT_HEIGHT}" \
+ vpxenc $(yuv_input_hantro_collage) \
+ --codec=vp9 \
--limit="${TEST_FRAMES}" \
--ivf \
--output="${output}" \
- --lossless=1 \
- "${YUV_RAW_INPUT}"
+ --lossless=1
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
@@ -241,15 +284,13 @@
vpxenc_vp9_ivf_minq0_maxq0() {
if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_lossless_minq0_maxq0.ivf"
- vpxenc --codec=vp9 \
- --width="${YUV_RAW_INPUT_WIDTH}" \
- --height="${YUV_RAW_INPUT_HEIGHT}" \
+ vpxenc $(yuv_input_hantro_collage) \
+ --codec=vp9 \
--limit="${TEST_FRAMES}" \
--ivf \
--output="${output}" \
--min-q=0 \
- --max-q=0 \
- "${YUV_RAW_INPUT}"
+ --max-q=0
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
@@ -264,16 +305,13 @@
local readonly lag_total_frames=20
local readonly lag_frames=10
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_lag10_frames20.webm"
- vpxenc --codec=vp9 \
- --width="${YUV_RAW_INPUT_WIDTH}" \
- --height="${YUV_RAW_INPUT_HEIGHT}" \
+ vpxenc $(yuv_input_hantro_collage) \
+ --codec=vp9 \
--limit="${lag_total_frames}" \
--lag-in-frames="${lag_frames}" \
--output="${output}" \
- --test-decode=fatal \
--passes=2 \
- --auto-alt-ref=1 \
- "${YUV_RAW_INPUT}"
+ --auto-alt-ref=1
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
@@ -284,11 +322,13 @@
vpxenc_tests="vpxenc_vp8_ivf
vpxenc_vp8_webm
+ vpxenc_vp8_webm_rt
vpxenc_vp8_webm_2pass
vpxenc_vp8_webm_lag10_frames20
vpxenc_vp8_ivf_piped_input
vpxenc_vp9_ivf
vpxenc_vp9_webm
+ vpxenc_vp9_webm_rt
vpxenc_vp9_webm_2pass
vpxenc_vp9_ivf_lossless
vpxenc_vp9_ivf_minq0_maxq0
diff --git a/third_party/libyuv/README.libvpx b/third_party/libyuv/README.libvpx
index fa5b498..fb2b9d1 100644
--- a/third_party/libyuv/README.libvpx
+++ b/third_party/libyuv/README.libvpx
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1041
+Version: 1060
License: BSD
License File: LICENSE
diff --git a/third_party/libyuv/include/libyuv/mjpeg_decoder.h b/third_party/libyuv/include/libyuv/mjpeg_decoder.h
index 82fd95d..8423121 100644
--- a/third_party/libyuv/include/libyuv/mjpeg_decoder.h
+++ b/third_party/libyuv/include/libyuv/mjpeg_decoder.h
@@ -153,7 +153,6 @@
int* subsample_x, int* subsample_y, int number_of_components);
private:
-
void AllocOutputBuffers(int num_outbufs);
void DestroyOutputBuffers();
diff --git a/third_party/libyuv/include/libyuv/row.h b/third_party/libyuv/include/libyuv/row.h
index fdfe1ae..4b3c870 100644
--- a/third_party/libyuv/include/libyuv/row.h
+++ b/third_party/libyuv/include/libyuv/row.h
@@ -252,6 +252,94 @@
// The following are available on arm64 platforms:
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+// #define HAS_I444TOARGBROW_NEON
+// #define HAS_I422TOARGBROW_NEON
+// #define HAS_I411TOARGBROW_NEON
+// #define HAS_I422TOBGRAROW_NEON
+// #define HAS_I422TOABGRROW_NEON
+// #define HAS_I422TORGBAROW_NEON
+// #define HAS_I422TORGB24ROW_NEON
+// #define HAS_I422TORAWROW_NEON
+// #define HAS_I422TORGB565ROW_NEON
+// #define HAS_I422TOARGB1555ROW_NEON
+// #define HAS_I422TOARGB4444ROW_NEON
+// #define HAS_YTOARGBROW_NEON
+// #define HAS_I400TOARGBROW_NEON
+// #define HAS_NV12TOARGBROW_NEON
+// #define HAS_NV21TOARGBROW_NEON
+// #define HAS_NV12TORGB565ROW_NEON
+// #define HAS_NV21TORGB565ROW_NEON
+// #define HAS_YUY2TOARGBROW_NEON
+// #define HAS_UYVYTOARGBROW_NEON
+#define HAS_SPLITUVROW_NEON
+#define HAS_MERGEUVROW_NEON
+#define HAS_COPYROW_NEON
+#define HAS_SETROW_NEON
+#define HAS_ARGBSETROWS_NEON
+#define HAS_MIRRORROW_NEON
+#define HAS_MIRRORUVROW_NEON
+#define HAS_ARGBMIRRORROW_NEON
+#define HAS_RGB24TOARGBROW_NEON
+#define HAS_RAWTOARGBROW_NEON
+// #define HAS_RGB565TOARGBROW_NEON
+// #define HAS_ARGB1555TOARGBROW_NEON
+// #define HAS_ARGB4444TOARGBROW_NEON
+#define HAS_ARGBTORGB24ROW_NEON
+#define HAS_ARGBTORAWROW_NEON
+#define HAS_YUY2TOYROW_NEON
+#define HAS_UYVYTOYROW_NEON
+#define HAS_YUY2TOUV422ROW_NEON
+#define HAS_UYVYTOUV422ROW_NEON
+#define HAS_YUY2TOUVROW_NEON
+#define HAS_UYVYTOUVROW_NEON
+#define HAS_HALFROW_NEON
+#define HAS_ARGBTOBAYERROW_NEON
+#define HAS_ARGBTOBAYERGGROW_NEON
+#define HAS_ARGBSHUFFLEROW_NEON
+#define HAS_I422TOYUY2ROW_NEON
+#define HAS_I422TOUYVYROW_NEON
+// #define HAS_ARGBTORGB565ROW_NEON
+// #define HAS_ARGBTOARGB1555ROW_NEON
+// #define HAS_ARGBTOARGB4444ROW_NEON
+#define HAS_ARGBTOYROW_NEON
+#define HAS_ARGBTOYJROW_NEON
+// #define HAS_ARGBTOUV444ROW_NEON
+// #define HAS_ARGBTOUV422ROW_NEON
+// #define HAS_ARGBTOUV411ROW_NEON
+// #define HAS_ARGBTOUVROW_NEON
+// #define HAS_ARGBTOUVJROW_NEON
+// #define HAS_BGRATOUVROW_NEON
+// #define HAS_ABGRTOUVROW_NEON
+// #define HAS_RGBATOUVROW_NEON
+// #define HAS_RGB24TOUVROW_NEON
+// #define HAS_RAWTOUVROW_NEON
+// #define HAS_RGB565TOUVROW_NEON
+// #define HAS_ARGB1555TOUVROW_NEON
+// #define HAS_ARGB4444TOUVROW_NEON
+// #define HAS_RGB565TOYROW_NEON
+// #define HAS_ARGB1555TOYROW_NEON
+// #define HAS_ARGB4444TOYROW_NEON
+// #define HAS_BGRATOYROW_NEON
+// #define HAS_ABGRTOYROW_NEON
+// #define HAS_RGBATOYROW_NEON
+// #define HAS_RGB24TOYROW_NEON
+// #define HAS_RAWTOYROW_NEON
+// #define HAS_INTERPOLATEROW_NEON
+// #define HAS_ARGBBLENDROW_NEON
+// #define HAS_ARGBATTENUATEROW_NEON
+// #define HAS_ARGBQUANTIZEROW_NEON
+// #define HAS_ARGBSHADEROW_NEON
+// #define HAS_ARGBGRAYROW_NEON
+// #define HAS_ARGBSEPIAROW_NEON
+// #define HAS_ARGBCOLORMATRIXROW_NEON
+#define HAS_ARGBMULTIPLYROW_NEON
+#define HAS_ARGBADDROW_NEON
+#define HAS_ARGBSUBTRACTROW_NEON
+#define HAS_SOBELROW_NEON
+#define HAS_SOBELTOPLANEROW_NEON
+#define HAS_SOBELXYROW_NEON
+#define HAS_SOBELXROW_NEON
+#define HAS_SOBELYROW_NEON
#endif
// The following are available on Neon platforms:
@@ -465,7 +553,7 @@
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
#endif // defined(__native_client__) && defined(__x86_64__)
-#if defined(__arm__)
+#if defined(__arm__) || defined(__aarch64__)
#undef MEMACCESS
#if defined(__native_client__)
#define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n"
diff --git a/third_party/libyuv/include/libyuv/scale_row.h b/third_party/libyuv/include/libyuv/scale_row.h
index 8dc0762..3c49542 100644
--- a/third_party/libyuv/include/libyuv/scale_row.h
+++ b/third_party/libyuv/include/libyuv/scale_row.h
@@ -51,6 +51,14 @@
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON
#define HAS_SCALEARGBROWDOWN2_NEON
+#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
+ (defined(__aarch64__) || defined(LIBYUV_NEON))
+/* #define HAS_SCALEROWDOWN2_NEON */
+/* #define HAS_SCALEROWDOWN4_NEON */
+/* #define HAS_SCALEROWDOWN34_NEON */
+/* #define HAS_SCALEROWDOWN38_NEON */
+/* #define HAS_SCALEARGBROWDOWNEVEN_NEON */
+/* #define HAS_SCALEARGBROWDOWN2_NEON */
#endif
// The following are available on Mips platforms:
diff --git a/third_party/libyuv/include/libyuv/version.h b/third_party/libyuv/include/libyuv/version.h
index 912c4c9..73a7f1b 100644
--- a/third_party/libyuv/include/libyuv/version.h
+++ b/third_party/libyuv/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1041
+#define LIBYUV_VERSION 1059
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
diff --git a/third_party/libyuv/source/compare.cc b/third_party/libyuv/source/compare.cc
index 9ea81b4..dc715e0 100644
--- a/third_party/libyuv/source/compare.cc
+++ b/third_party/libyuv/source/compare.cc
@@ -80,7 +80,7 @@
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SUMSQUAREERROR_NEON
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
#endif
diff --git a/third_party/libyuv/source/compare_neon.cc b/third_party/libyuv/source/compare_neon.cc
index 5e7b8e4..55052c0 100644
--- a/third_party/libyuv/source/compare_neon.cc
+++ b/third_party/libyuv/source/compare_neon.cc
@@ -56,6 +56,45 @@
return sse;
}
+#elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
+ volatile uint32 sse;
+ asm volatile (
+ "eor v16.16b, v16.16b, v16.16b \n"
+ "eor v18.16b, v18.16b, v18.16b \n"
+ "eor v17.16b, v17.16b, v17.16b \n"
+ "eor v19.16b, v19.16b, v19.16b \n"
+
+ ".p2align 2 \n"
+ "1: \n"
+ MEMACCESS(0)
+ "ld1 {v0.16b}, [%0], #16 \n"
+ MEMACCESS(1)
+ "ld1 {v1.16b}, [%1], #16 \n"
+ "subs %2, %2, #16 \n"
+ "usubl v2.8h, v0.8b, v1.8b \n"
+ "usubl2 v3.8h, v0.16b, v1.16b \n"
+ "smlal v16.4s, v2.4h, v2.4h \n"
+ "smlal v17.4s, v3.4h, v3.4h \n"
+ "smlal2 v18.4s, v2.8h, v2.8h \n"
+ "smlal2 v19.4s, v3.8h, v3.8h \n"
+ "bgt 1b \n"
+
+ "add v16.4s, v16.4s, v17.4s \n"
+ "add v18.4s, v18.4s, v19.4s \n"
+ "add v19.4s, v16.4s, v18.4s \n"
+ "addv s0, v19.4s \n"
+ "fmov %w3, s0 \n"
+ : "+r"(src_a),
+ "+r"(src_b),
+ "+r"(count),
+ "=r"(sse)
+ :
+ : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
+ return sse;
+}
+
#endif // __ARM_NEON__
#ifdef __cplusplus
diff --git a/third_party/libyuv/source/convert.cc b/third_party/libyuv/source/convert.cc
index 874a6cb..a8e294f 100644
--- a/third_party/libyuv/source/convert.cc
+++ b/third_party/libyuv/source/convert.cc
@@ -401,7 +401,7 @@
uint8* dst_v, int dst_stride_v,
int width, int height) {
int y;
- int halfheight = (height + 1) >> 1;
+ int halfheight;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
int pix) = YUY2ToUV422Row_C;
@@ -711,11 +711,13 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
@@ -963,9 +965,6 @@
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
#endif
if (!src_rgb24 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
@@ -1022,36 +1021,44 @@
#endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_RGB24TOYROW_NEON
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RGB24TOYROW_NEON)
- RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
- RGB24ToYRow(src_rgb24, dst_y, width);
- RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
-#else
- RGB24ToARGBRow(src_rgb24, row, width);
- RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_rgb24 += src_stride_rgb24 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_RGB24TOYROW_NEON)
- RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
- RGB24ToYRow(src_rgb24, dst_y, width);
-#else
- RGB24ToARGBRow(src_rgb24, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
+ {
#if !defined(HAS_RGB24TOYROW_NEON)
- free_aligned_buffer_64(row);
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 15) & ~15;
+ align_buffer_64(row, kRowSize * 2);
#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+#if defined(HAS_RGB24TOYROW_NEON)
+ RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
+ RGB24ToYRow(src_rgb24, dst_y, width);
+ RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
+#else
+ RGB24ToARGBRow(src_rgb24, row, width);
+ RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+ src_rgb24 += src_stride_rgb24 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if defined(HAS_RGB24TOYROW_NEON)
+ RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
+ RGB24ToYRow(src_rgb24, dst_y, width);
+#else
+ RGB24ToARGBRow(src_rgb24, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+#endif
+ }
+#if !defined(HAS_RGB24TOYROW_NEON)
+ free_aligned_buffer_64(row);
+#endif
+ }
return 0;
}
@@ -1075,9 +1082,6 @@
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
#endif
if (!src_raw || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
@@ -1134,36 +1138,42 @@
#endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_RAWTOYROW_NEON
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RAWTOYROW_NEON)
- RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
- RAWToYRow(src_raw, dst_y, width);
- RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
-#else
- RAWToARGBRow(src_raw, row, width);
- RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_raw += src_stride_raw * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
+ {
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 15) & ~15;
+ align_buffer_64(row, kRowSize * 2);
+
+ for (y = 0; y < height - 1; y += 2) {
+ #if defined(HAS_RAWTOYROW_NEON)
+ RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
+ RAWToYRow(src_raw, dst_y, width);
+ RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
+ #else
+ RAWToARGBRow(src_raw, row, width);
+ RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ #endif
+ src_raw += src_stride_raw * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+ #if defined(HAS_RAWTOYROW_NEON)
+ RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
+ RAWToYRow(src_raw, dst_y, width);
+ #else
+ RAWToARGBRow(src_raw, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ #endif
+ }
+ #if !defined(HAS_RAWTOYROW_NEON)
+ free_aligned_buffer_64(row);
+ #endif
}
- if (height & 1) {
-#if defined(HAS_RAWTOYROW_NEON)
- RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
- RAWToYRow(src_raw, dst_y, width);
-#else
- RAWToARGBRow(src_raw, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
-#if !defined(HAS_RAWTOYROW_NEON)
- free_aligned_buffer_64(row);
-#endif
return 0;
}
@@ -1187,9 +1197,6 @@
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
#endif
if (!src_rgb565 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
@@ -1246,36 +1253,44 @@
#endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_RGB565TOYROW_NEON
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RGB565TOYROW_NEON)
- RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
- RGB565ToYRow(src_rgb565, dst_y, width);
- RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
-#else
- RGB565ToARGBRow(src_rgb565, row, width);
- RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_rgb565 += src_stride_rgb565 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_RGB565TOYROW_NEON)
- RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
- RGB565ToYRow(src_rgb565, dst_y, width);
-#else
- RGB565ToARGBRow(src_rgb565, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
+ {
#if !defined(HAS_RGB565TOYROW_NEON)
- free_aligned_buffer_64(row);
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 15) & ~15;
+ align_buffer_64(row, kRowSize * 2);
#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+#if defined(HAS_RGB565TOYROW_NEON)
+ RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
+ RGB565ToYRow(src_rgb565, dst_y, width);
+ RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
+#else
+ RGB565ToARGBRow(src_rgb565, row, width);
+ RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+ src_rgb565 += src_stride_rgb565 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if defined(HAS_RGB565TOYROW_NEON)
+ RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
+ RGB565ToYRow(src_rgb565, dst_y, width);
+#else
+ RGB565ToARGBRow(src_rgb565, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+#endif
+ }
+#if !defined(HAS_RGB565TOYROW_NEON)
+ free_aligned_buffer_64(row);
+#endif
+ }
return 0;
}
@@ -1299,9 +1314,6 @@
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
#endif
if (!src_argb1555 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
@@ -1358,38 +1370,45 @@
#endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_ARGB1555TOYROW_NEON
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_ARGB1555TOYROW_NEON)
- ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
- ARGB1555ToYRow(src_argb1555, dst_y, width);
- ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
- width);
-#else
- ARGB1555ToARGBRow(src_argb1555, row, width);
- ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
- width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ {
+#if !defined(HAS_ARGB1555TOYROW_NEON)
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 15) & ~15;
+ align_buffer_64(row, kRowSize * 2);
#endif
- src_argb1555 += src_stride_argb1555 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
+ for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_ARGB1555TOYROW_NEON)
- ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
- ARGB1555ToYRow(src_argb1555, dst_y, width);
+ ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
+ ARGB1555ToYRow(src_argb1555, dst_y, width);
+ ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
+ width);
#else
- ARGB1555ToARGBRow(src_argb1555, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
+ ARGB1555ToARGBRow(src_argb1555, row, width);
+ ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
+ width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
#endif
- }
+ src_argb1555 += src_stride_argb1555 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if defined(HAS_ARGB1555TOYROW_NEON)
+ ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
+ ARGB1555ToYRow(src_argb1555, dst_y, width);
+#else
+ ARGB1555ToARGBRow(src_argb1555, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+#endif
+ }
#if !defined(HAS_ARGB1555TOYROW_NEON)
free_aligned_buffer_64(row);
#endif
+ }
return 0;
}
@@ -1413,9 +1432,6 @@
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
#endif
if (!src_argb4444 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
@@ -1472,38 +1488,46 @@
#endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_ARGB4444TOYROW_NEON
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_ARGB4444TOYROW_NEON)
- ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
- ARGB4444ToYRow(src_argb4444, dst_y, width);
- ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
- width);
-#else
- ARGB4444ToARGBRow(src_argb4444, row, width);
- ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
- width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_argb4444 += src_stride_argb4444 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_ARGB4444TOYROW_NEON)
- ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
- ARGB4444ToYRow(src_argb4444, dst_y, width);
-#else
- ARGB4444ToARGBRow(src_argb4444, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
+ {
#if !defined(HAS_ARGB4444TOYROW_NEON)
- free_aligned_buffer_64(row);
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 15) & ~15;
+ align_buffer_64(row, kRowSize * 2);
#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+#if defined(HAS_ARGB4444TOYROW_NEON)
+ ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
+ ARGB4444ToYRow(src_argb4444, dst_y, width);
+ ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
+ width);
+#else
+ ARGB4444ToARGBRow(src_argb4444, row, width);
+ ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
+ width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+ src_argb4444 += src_stride_argb4444 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if defined(HAS_ARGB4444TOYROW_NEON)
+ ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
+ ARGB4444ToYRow(src_argb4444, dst_y, width);
+#else
+ ARGB4444ToARGBRow(src_argb4444, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+#endif
+ }
+#if !defined(HAS_ARGB4444TOYROW_NEON)
+ free_aligned_buffer_64(row);
+#endif
+ }
return 0;
}
diff --git a/third_party/libyuv/source/convert_from_argb.cc b/third_party/libyuv/source/convert_from_argb.cc
index 121a416..de461dd 100644
--- a/third_party/libyuv/source/convert_from_argb.cc
+++ b/third_party/libyuv/source/convert_from_argb.cc
@@ -60,6 +60,13 @@
}
}
}
+#elif defined(HAS_ARGBTOUV444ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToUV444Row = ARGBToUV444Row_NEON;
+ }
+ }
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
@@ -76,10 +83,8 @@
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
- ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
- ARGBToUV444Row = ARGBToUV444Row_NEON;
}
}
#endif
@@ -134,6 +139,13 @@
}
}
}
+#elif defined(HAS_ARGBTOUV422ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUV422Row = ARGBToUV422Row_NEON;
+ }
+ }
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
@@ -153,12 +165,6 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_NEON;
- }
- }
}
#endif
@@ -228,11 +234,13 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 32) {
- ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
- if (IS_ALIGNED(width, 32)) {
- ARGBToUV411Row = ARGBToUV411Row_NEON;
- }
+ }
+#endif
+#if defined(HAS_ARGBTOUV411ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 32) {
+ ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUV411Row = ARGBToUV411Row_NEON;
}
}
#endif
@@ -261,9 +269,6 @@
ARGBToYRow_C;
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C;
- // Allocate a rows of uv.
- align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
- uint8* row_v = row_u + ((halfwidth + 15) & ~15);
if (!src_argb ||
!dst_y || !dst_uv ||
width <= 0 || height == 0) {
@@ -296,11 +301,13 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
@@ -331,22 +338,27 @@
}
}
#endif
+ {
+ // Allocate a rows of uv.
+ align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
+ uint8* row_v = row_u + ((halfwidth + 15) & ~15);
- for (y = 0; y < height - 1; y += 2) {
- ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
- MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
- src_argb += src_stride_argb * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
+ for (y = 0; y < height - 1; y += 2) {
+ ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
+ MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
+ ARGBToYRow(src_argb, dst_y, width);
+ ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+ src_argb += src_stride_argb * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+ MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
+ ARGBToYRow(src_argb, dst_y, width);
+ }
+ free_aligned_buffer_64(row_u);
}
- if (height & 1) {
- ARGBToUVRow(src_argb, 0, row_u, row_v, width);
- MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- }
- free_aligned_buffer_64(row_u);
return 0;
}
@@ -364,9 +376,6 @@
ARGBToYRow_C;
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C;
- // Allocate a rows of uv.
- align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
- uint8* row_v = row_u + ((halfwidth + 15) & ~15);
if (!src_argb ||
!dst_y || !dst_uv ||
width <= 0 || height == 0) {
@@ -399,11 +408,13 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
@@ -434,22 +445,27 @@
}
}
#endif
+ {
+ // Allocate a rows of uv.
+ align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
+ uint8* row_v = row_u + ((halfwidth + 15) & ~15);
- for (y = 0; y < height - 1; y += 2) {
- ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
- src_argb += src_stride_argb * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
+ for (y = 0; y < height - 1; y += 2) {
+ ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
+ MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
+ ARGBToYRow(src_argb, dst_y, width);
+ ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+ src_argb += src_stride_argb * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+ MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
+ ARGBToYRow(src_argb, dst_y, width);
+ }
+ free_aligned_buffer_64(row_u);
}
- if (height & 1) {
- ARGBToUVRow(src_argb, 0, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- }
- free_aligned_buffer_64(row_u);
return 0;
}
@@ -493,6 +509,13 @@
}
}
}
+#elif defined(HAS_ARGBTOUV422ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUV422Row = ARGBToUV422Row_NEON;
+ }
+ }
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
@@ -510,12 +533,6 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_NEON;
- }
- }
}
#endif
@@ -594,6 +611,13 @@
}
}
}
+#elif defined(HAS_ARGBTOUV422ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUV422Row = ARGBToUV422Row_NEON;
+ }
+ }
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
@@ -611,12 +635,6 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_NEON;
- }
- }
}
#endif
@@ -1022,11 +1040,13 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON;
}
- if (width >= 16) {
- ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_NEON;
- }
+ }
+#endif
+#if defined(HAS_ARGBTOUVJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJRow = ARGBToUVJRow_NEON;
}
}
#endif
diff --git a/third_party/libyuv/source/cpu_id.cc b/third_party/libyuv/source/cpu_id.cc
index 2e0d61d..deb4c44 100644
--- a/third_party/libyuv/source/cpu_id.cc
+++ b/third_party/libyuv/source/cpu_id.cc
@@ -14,7 +14,7 @@
#include <intrin.h> // For __cpuidex()
#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
- !defined(__native_client__) && defined(_M_X64) && \
+ !defined(__native_client__) && \
defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
#include <immintrin.h> // For _xgetbv()
#endif
@@ -97,7 +97,7 @@
uint32 xcr0 = 0u;
#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
-#elif defined(_M_IX86)
+#elif defined(_M_IX86) && defined(_MSC_VER)
__asm {
xor ecx, ecx // xcr 0
_asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
@@ -256,12 +256,17 @@
if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) {
cpu_info_ &= ~kCpuHasMIPS_DSPR2;
}
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
// gcc -mfpu=neon defines __ARM_NEON__
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
cpu_info_ = kCpuHasNEON;
+// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
+// flag in it.
+// So for aarch64, neon enabling is hard coded here.
+#elif defined(__aarch64__)
+ cpu_info_ = kCpuHasNEON;
#else
// Linux arm parse text file for neon detect.
cpu_info_ = ArmCpuCaps("/proc/cpuinfo");
diff --git a/third_party/libyuv/source/format_conversion.cc b/third_party/libyuv/source/format_conversion.cc
index a3daf96..3c17371 100644
--- a/third_party/libyuv/source/format_conversion.cc
+++ b/third_party/libyuv/source/format_conversion.cc
@@ -332,11 +332,13 @@
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
- if (width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
diff --git a/third_party/libyuv/source/mjpeg_decoder.cc b/third_party/libyuv/source/mjpeg_decoder.cc
index 15b0ed8..36028c3 100644
--- a/third_party/libyuv/source/mjpeg_decoder.cc
+++ b/third_party/libyuv/source/mjpeg_decoder.cc
@@ -13,8 +13,8 @@
#ifdef HAVE_JPEG
#include <assert.h>
-#if !defined(__pnacl__) && !defined(__CLR_VER) && !defined(COVERAGE_ENABLED) &&\
- !defined(TARGET_IPHONE_SIMULATOR)
+#if !defined(__pnacl__) && !defined(__CLR_VER) && \
+ !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
// Must be included before jpeglib.
#include <setjmp.h>
#define HAVE_SETJMP
@@ -101,7 +101,7 @@
}
buf_.data = src;
- buf_.len = (int)(src_len);
+ buf_.len = static_cast<int>(src_len);
buf_vec_.pos = 0;
decompress_struct_->client_data = &buf_vec_;
#ifdef HAVE_SETJMP
@@ -411,7 +411,7 @@
}
boolean fill_input_buffer(j_decompress_ptr cinfo) {
- BufferVector* buf_vec = (BufferVector*)(cinfo->client_data);
+ BufferVector* buf_vec = reinterpret_cast<BufferVector*>(cinfo->client_data);
if (buf_vec->pos >= buf_vec->len) {
assert(0 && "No more data");
// ERROR: No more data
@@ -447,7 +447,7 @@
// ERROR: Error in jpeglib: buf
#endif
- SetJmpErrorMgr* mgr = (SetJmpErrorMgr*)(cinfo->err);
+ SetJmpErrorMgr* mgr = reinterpret_cast<SetJmpErrorMgr*>(cinfo->err);
// This rewinds the call stack to the point of the corresponding setjmp()
// and causes it to return (for a second time) with value 1.
longjmp(mgr->setjmp_buffer, 1);
diff --git a/third_party/libyuv/source/row_any.cc b/third_party/libyuv/source/row_any.cc
index 97ef844..ce8b3da 100644
--- a/third_party/libyuv/source/row_any.cc
+++ b/third_party/libyuv/source/row_any.cc
@@ -79,9 +79,13 @@
YANY(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, I422ToARGB1555Row_C,
1, 2, 7)
YANY(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, I422ToRGB565Row_C, 1, 2, 7)
-YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
-YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
#endif // HAS_I422TOARGBROW_NEON
+#ifdef HAS_I422TOYUY2ROW_NEON
+YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
+#endif // HAS_I422TOYUY2ROW_NEON
+#ifdef HAS_I422TOUYVYROW_NEON
+YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
+#endif // HAS_I422TOUYVYROW_NEON
#undef YANY
// Wrappers to handle odd width
@@ -250,12 +254,26 @@
YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8)
YANY(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 2, 1, 8)
YANY(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 2, 1, 8)
+#endif
+#ifdef HAS_YUY2TOYROW_NEON
YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16)
+#endif
+#ifdef HAS_UYVYTOYROW_NEON
YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16)
+#endif
+#ifdef HAS_RGB24TOARGBROW_NEON
YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8)
+#endif
+#ifdef HAS_RAWTOARGBROW_NEON
YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8)
+#endif
+#ifdef HAS_RGB565TOARGBROW_NEON
YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8)
+#endif
+#ifdef HAS_ARGB1555TOARGBROW_NEON
YANY(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 2, 4, 8)
+#endif
+#ifdef HAS_ARGB4444TOARGBROW_NEON
YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
#endif
#undef YANY
@@ -333,7 +351,11 @@
UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2, 15)
UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2, 15)
UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2, 15)
+#endif
+#ifdef HAS_YUY2TOUVROW_NEON
UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2, 15)
+#endif
+#ifdef HAS_UYVYTOUVROW_NEON
UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15)
#endif
#undef UVANY
diff --git a/third_party/libyuv/source/row_neon64.cc b/third_party/libyuv/source/row_neon64.cc
index 46e9ceb..21111cf 100644
--- a/third_party/libyuv/source/row_neon64.cc
+++ b/third_party/libyuv/source/row_neon64.cc
@@ -824,19 +824,19 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV
+ "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pairs of UV
"subs %3, %3, #16 \n" // 16 processed per loop
MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n" // store U
+ "st1 {v0.16b}, [%1], #16 \n" // store U
MEMACCESS(2)
- "vst1.8 {q1}, [%2]! \n" // store V
+ "st1 {v1.16b}, [%2], #16 \n" // store V
"bgt 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(width) // %3 // Output registers
: // Input registers
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1" // Clobber List
);
}
#endif // HAS_SPLITUVROW_NEON
@@ -849,12 +849,12 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load U
+ "ld1 {v0.16b}, [%0], #16 \n" // load U
MEMACCESS(1)
- "vld1.8 {q1}, [%1]! \n" // load V
+ "ld1 {v1.16b}, [%1], #16 \n" // load V
"subs %3, %3, #16 \n" // 16 processed per loop
MEMACCESS(2)
- "vst2.u8 {q0, q1}, [%2]! \n" // store 16 pairs of UV
+ "st2 {v0.16b, v1.16b}, [%2], #32 \n" // store 16 pairs of UV
"bgt 1b \n"
:
"+r"(src_u), // %0
@@ -862,7 +862,7 @@
"+r"(dst_uv), // %2
"+r"(width) // %3 // Output registers
: // Input registers
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1" // Clobber List
);
}
#endif // HAS_MERGEUVROW_NEON
@@ -874,16 +874,16 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32
+ "ld1 {v0.8b-v3.8b}, [%0], #32 \n" // load 32
"subs %2, %2, #32 \n" // 32 processed per loop
MEMACCESS(1)
- "vst1.8 {d0, d1, d2, d3}, [%1]! \n" // store 32
+ "st1 {v0.8b-v3.8b}, [%1], #32 \n" // store 32
"bgt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(count) // %2 // Output registers
: // Input registers
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_COPYROW_NEON
@@ -892,16 +892,16 @@
#ifdef HAS_SETROW_NEON
void SetRow_NEON(uint8* dst, uint32 v32, int count) {
asm volatile (
- "vdup.u32 q0, %2 \n" // duplicate 4 ints
+ "dup v0.4s, %w2 \n" // duplicate 4 ints
"1: \n"
"subs %1, %1, #16 \n" // 16 bytes per loop
MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n" // store
+ "st1 {v0.16b}, [%0], #16 \n" // store
"bgt 1b \n"
: "+r"(dst), // %0
"+r"(count) // %1
: "r"(v32) // %2
- : "cc", "memory", "q0"
+ : "cc", "memory", "v0"
);
}
#endif // HAS_SETROW_NEON
@@ -922,26 +922,25 @@
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
asm volatile (
// Start at end of source row.
- "mov r3, #-16 \n"
"add %0, %0, %2 \n"
- "sub %0, #16 \n"
+ "sub %0, %0, #16 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0], r3 \n" // src -= 16
- "subs %2, #16 \n" // 16 pixels per loop.
- "vrev64.8 q0, q0 \n"
+ "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
+ "subs %2, %2, #16 \n" // 16 pixels per loop.
+ "rev64 v0.16b, v0.16b \n"
MEMACCESS(1)
- "vst1.8 {d1}, [%1]! \n" // dst += 16
+ "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n"
+ "st1 {v0.D}[0], [%1], #8 \n"
"bgt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
- :
- : "cc", "memory", "r3", "q0"
+ : "r"((ptrdiff_t)-16) // %3
+ : "cc", "memory", "v0"
);
}
#endif // HAS_MIRRORROW_NEON
@@ -951,27 +950,27 @@
int width) {
asm volatile (
// Start at end of source row.
- "mov r12, #-16 \n"
"add %0, %0, %3, lsl #1 \n"
- "sub %0, #16 \n"
+ "sub %0, %0, #16 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16
- "subs %3, #8 \n" // 8 pixels per loop.
- "vrev64.8 q0, q0 \n"
+ "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16
+ "subs %3, %3, #8 \n" // 8 pixels per loop.
+ "rev64 v0.8b, v0.8b \n"
+ "rev64 v1.8b, v1.8b \n"
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // dst += 8
+ "st1 {v0.8b}, [%1], #8 \n" // dst += 8
MEMACCESS(2)
- "vst1.8 {d1}, [%2]! \n"
+ "st1 {v1.8b}, [%2], #8 \n"
"bgt 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(width) // %3
- :
- : "cc", "memory", "r12", "q0"
+ : "r"((ptrdiff_t)-16) // %4
+ : "cc", "memory", "v0", "v1"
);
}
#endif // HAS_MIRRORUVROW_NEON
@@ -980,26 +979,25 @@
void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
asm volatile (
// Start at end of source row.
- "mov r3, #-16 \n"
"add %0, %0, %2, lsl #2 \n"
- "sub %0, #16 \n"
+ "sub %0, %0, #16 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0], r3 \n" // src -= 16
- "subs %2, #4 \n" // 4 pixels per loop.
- "vrev64.32 q0, q0 \n"
+ "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
+ "subs %2, %2, #4 \n" // 4 pixels per loop.
+ "rev64 v0.4s, v0.4s \n"
MEMACCESS(1)
- "vst1.8 {d1}, [%1]! \n" // dst += 16
+ "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n"
+ "st1 {v0.D}[0], [%1], #8 \n"
"bgt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
- :
- : "cc", "memory", "r3", "q0"
+ : "r"((ptrdiff_t)-16) // %3
+ : "cc", "memory", "v0"
);
}
#endif // HAS_ARGBMIRRORROW_NEON
@@ -1007,20 +1005,20 @@
#ifdef HAS_RGB24TOARGBROW_NEON
void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
asm volatile (
- "vmov.u8 d4, #255 \n" // Alpha
+ "movi v4.8b, #255 \n" // Alpha
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24.
+ "ld3 {v1.8b-v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24.
"subs %2, %2, #8 \n" // 8 processed per loop.
MEMACCESS(1)
- "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB.
+ "st4 {v1.8b-v4.8b}, [%1], #32 \n" // store 8 pixels of ARGB.
"bgt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
+ : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
);
}
#endif // HAS_RGB24TOARGBROW_NEON
@@ -1028,21 +1026,22 @@
#ifdef HAS_RAWTOARGBROW_NEON
void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
asm volatile (
- "vmov.u8 d4, #255 \n" // Alpha
+ "movi v5.8b, #255 \n" // Alpha
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW.
+ "ld3 {v0.8b-v2.8b}, [%0], #24 \n" // read r g b
"subs %2, %2, #8 \n" // 8 processed per loop.
- "vswp.u8 d1, d3 \n" // swap R, B
+ "mov v3.8b, v1.8b \n" // move g
+ "mov v4.8b, v0.8b \n" // move r
MEMACCESS(1)
- "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB.
+ "st4 {v2.8b-v5.8b}, [%1], #32 \n" // store b g r a
"bgt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List
);
}
#endif // HAS_RAWTOARGBROW_NEON
@@ -1170,16 +1169,16 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
+ "ld4 {v1.8b-v4.8b}, [%0], #32 \n" // load 8 pixels of ARGB.
"subs %2, %2, #8 \n" // 8 processed per loop.
MEMACCESS(1)
- "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RGB24.
+ "st3 {v1.8b-v3.8b}, [%1], #24 \n" // store 8 pixels of RGB24.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_rgb24), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
+ : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
);
}
#endif // HAS_ARGBTORGB24ROW_NEON
@@ -1190,17 +1189,18 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
+ "ld4 {v1.8b-v4.8b}, [%0], #32 \n" // load b g r a
"subs %2, %2, #8 \n" // 8 processed per loop.
- "vswp.u8 d1, d3 \n" // swap R, B
+ "mov v4.8b, v2.8b \n" // mov g
+ "mov v5.8b, v1.8b \n" // mov b
MEMACCESS(1)
- "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RAW.
+ "st3 {v3.8b-v5.8b}, [%1], #24 \n" // store r g b
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_raw), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
+ : "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List
);
}
#endif // HAS_ARGBTORAWROW_NEON
@@ -1211,16 +1211,16 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2.
+ "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2.
"subs %2, %2, #16 \n" // 16 processed per loop.
MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n" // store 16 pixels of Y.
+ "st1 {v0.16b}, [%1], #16 \n" // store 16 pixels of Y.
"bgt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1" // Clobber List
);
}
#endif // HAS_YUY2TOYROW_NEON
@@ -1231,16 +1231,16 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY.
+ "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY.
"subs %2, %2, #16 \n" // 16 processed per loop.
MEMACCESS(1)
- "vst1.8 {q1}, [%1]! \n" // store 16 pixels of Y.
+ "st1 {v1.16b}, [%1], #16 \n" // store 16 pixels of Y.
"bgt 1b \n"
: "+r"(src_uyvy), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1" // Clobber List
);
}
#endif // HAS_UYVYTOYROW_NEON
@@ -1252,19 +1252,19 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of YUY2.
"subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
MEMACCESS(1)
- "vst1.8 {d1}, [%1]! \n" // store 8 U.
+ "st1 {v1.8b}, [%1], #8 \n" // store 8 U.
MEMACCESS(2)
- "vst1.8 {d3}, [%2]! \n" // store 8 V.
+ "st1 {v3.8b}, [%2], #8 \n" // store 8 V.
"bgt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(pix) // %3
:
- : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_YUY2TOUV422ROW_NEON
@@ -1276,19 +1276,19 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of UYVY.
"subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 U.
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 U.
MEMACCESS(2)
- "vst1.8 {d2}, [%2]! \n" // store 8 V.
+ "st1 {v2.8b}, [%2], #8 \n" // store 8 V.
"bgt 1b \n"
: "+r"(src_uyvy), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(pix) // %3
:
- : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_UYVYTOUV422ROW_NEON
@@ -1297,20 +1297,20 @@
void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
- "add %1, %0, %1 \n" // stride + src_yuy2
+ "add %x1, %x0, %w1, sxtw \n" // stride + src_yuy2
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of YUY2.
"subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
MEMACCESS(1)
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row YUY2.
- "vrhadd.u8 d1, d1, d5 \n" // average rows of U
- "vrhadd.u8 d3, d3, d7 \n" // average rows of V
+ "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load next row YUY2.
+ "urhadd v1.8b, v1.8b, v5.8b \n" // average rows of U
+ "urhadd v3.8b, v3.8b, v7.8b \n" // average rows of V
MEMACCESS(2)
- "vst1.8 {d1}, [%2]! \n" // store 8 U.
+ "st1 {v1.8b}, [%2], #8 \n" // store 8 U.
MEMACCESS(3)
- "vst1.8 {d3}, [%3]! \n" // store 8 V.
+ "st1 {v3.8b}, [%3], #8 \n" // store 8 V.
"bgt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(stride_yuy2), // %1
@@ -1318,7 +1318,7 @@
"+r"(dst_v), // %3
"+r"(pix) // %4
:
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List
);
}
#endif // HAS_YUY2TOUVROW_NEON
@@ -1327,20 +1327,20 @@
void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
- "add %1, %0, %1 \n" // stride + src_uyvy
+ "add %x1, %x0, %w1, sxtw \n" // stride + src_uyvy
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of UYVY.
"subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
MEMACCESS(1)
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row UYVY.
- "vrhadd.u8 d0, d0, d4 \n" // average rows of U
- "vrhadd.u8 d2, d2, d6 \n" // average rows of V
+ "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load next row UYVY.
+ "urhadd v0.8b, v0.8b, v4.8b \n" // average rows of U
+ "urhadd v2.8b, v2.8b, v6.8b \n" // average rows of V
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 U.
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 U.
MEMACCESS(3)
- "vst1.8 {d2}, [%3]! \n" // store 8 V.
+ "st1 {v2.8b}, [%3], #8 \n" // store 8 V.
"bgt 1b \n"
: "+r"(src_uyvy), // %0
"+r"(stride_uyvy), // %1
@@ -1348,7 +1348,7 @@
"+r"(dst_v), // %3
"+r"(pix) // %4
:
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List
);
}
#endif // HAS_UYVYTOUVROW_NEON
@@ -1358,23 +1358,23 @@
uint8* dst_uv, int pix) {
asm volatile (
// change the stride to row 2 pointer
- "add %1, %0 \n"
+ "add %x1, %x0, %w1, sxtw \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load row 1 16 pixels.
+ "ld1 {v0.16b}, [%0], #16 \n" // load row 1 16 pixels.
"subs %3, %3, #16 \n" // 16 processed per loop
MEMACCESS(1)
- "vld1.8 {q1}, [%1]! \n" // load row 2 16 pixels.
- "vrhadd.u8 q0, q1 \n" // average row 1 and 2
+ "ld1 {v1.16b}, [%1], #16 \n" // load row 2 16 pixels.
+ "urhadd v0.16b, v0.16b, v1.16b \n" // average row 1 and 2
MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n"
+ "st1 {v0.16b}, [%2], #16 \n"
"bgt 1b \n"
: "+r"(src_uv), // %0
"+r"(src_uv_stride), // %1
"+r"(dst_uv), // %2
"+r"(pix) // %3
:
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1" // Clobber List
);
}
#endif // HAS_HALFROW_NEON
@@ -1384,22 +1384,22 @@
void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) {
asm volatile (
- "vmov.u32 d6[0], %3 \n" // selector
+ "mov v2.s[0], %w3 \n" // selector
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0, q1}, [%0]! \n" // load row 8 pixels.
+ "ld1 {v0.16b, v1.16b}, [%0], 32 \n" // load row 8 pixels.
"subs %2, %2, #8 \n" // 8 processed per loop
- "vtbl.8 d4, {d0, d1}, d6 \n" // look up 4 pixels
- "vtbl.8 d5, {d2, d3}, d6 \n" // look up 4 pixels
- "vtrn.u32 d4, d5 \n" // combine 8 pixels
+ "tbl v4.8b, {v0.16b}, v2.8b \n" // look up 4 pixels
+ "tbl v5.8b, {v1.16b}, v2.8b \n" // look up 4 pixels
+ "trn1 v4.4s, v4.4s, v5.4s \n" // combine 8 pixels
MEMACCESS(1)
- "vst1.8 {d4}, [%1]! \n" // store 8.
+ "st1 {v4.8b}, [%1], #8 \n" // store 8.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_bayer), // %1
"+r"(pix) // %2
: "r"(selector) // %3
- : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v4", "v5" // Clobber List
);
}
#endif // HAS_ARGBTOBAYERROW_NEON
@@ -1411,16 +1411,16 @@
asm volatile (
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load row 8 pixels.
"subs %2, %2, #8 \n" // 8 processed per loop
MEMACCESS(1)
- "vst1.8 {d1}, [%1]! \n" // store 8 G's.
+ "st1 {v1.8b}, [%1], #8 \n" // store 8 G's.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_bayer), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_ARGBTOBAYERGGROW_NEON
@@ -1431,21 +1431,20 @@
const uint8* shuffler, int pix) {
asm volatile (
MEMACCESS(3)
- "vld1.8 {q2}, [%3] \n" // shuffler
+ "ld1 {v2.16b}, [%3] \n" // shuffler
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 4 pixels.
+ "ld1 {v0.16b}, [%0], #16 \n" // load 4 pixels.
"subs %2, %2, #4 \n" // 4 processed per loop
- "vtbl.8 d2, {d0, d1}, d4 \n" // look up 2 first pixels
- "vtbl.8 d3, {d0, d1}, d5 \n" // look up 2 next pixels
+ "tbl v1.16b, {v0.16b}, v2.16b \n" // look up 4 pixels
MEMACCESS(1)
- "vst1.8 {q1}, [%1]! \n" // store 4.
+ "st1 {v1.16b}, [%1], #16 \n" // store 4.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
: "r"(shuffler) // %3
- : "cc", "memory", "q0", "q1", "q2" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2" // Clobber List
);
}
#endif // HAS_ARGBSHUFFLEROW_NEON
@@ -1459,14 +1458,15 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld2.8 {d0, d2}, [%0]! \n" // load 16 Ys
+ "ld2 {v0.8b, v1.8b}, [%0], #16 \n" // load 16 Ys
+ "mov v2.8b, v1.8b \n"
MEMACCESS(1)
- "vld1.8 {d1}, [%1]! \n" // load 8 Us
+ "ld1 {v1.8b}, [%1], #8 \n" // load 8 Us
MEMACCESS(2)
- "vld1.8 {d3}, [%2]! \n" // load 8 Vs
+ "ld1 {v3.8b}, [%2], #8 \n" // load 8 Vs
"subs %4, %4, #16 \n" // 16 pixels
MEMACCESS(3)
- "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 YUY2/16 pixels.
+ "st4 {v0.8b-v3.8b}, [%3], #32 \n" // Store 8 YUY2/16 pixels.
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
@@ -1474,7 +1474,7 @@
"+r"(dst_yuy2), // %3
"+r"(width) // %4
:
- : "cc", "memory", "d0", "d1", "d2", "d3"
+ : "cc", "memory", "v0", "v1", "v2", "v3"
);
}
#endif // HAS_I422TOYUY2ROW_NEON
@@ -1488,14 +1488,15 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld2.8 {d1, d3}, [%0]! \n" // load 16 Ys
+ "ld2 {v1.8b, v2.8b}, [%0], #16 \n" // load 16 Ys
+ "mov v3.8b, v2.8b \n"
MEMACCESS(1)
- "vld1.8 {d0}, [%1]! \n" // load 8 Us
+ "ld1 {v0.8b}, [%1], #8 \n" // load 8 Us
MEMACCESS(2)
- "vld1.8 {d2}, [%2]! \n" // load 8 Vs
+ "ld1 {v2.8b}, [%2], #8 \n" // load 8 Vs
"subs %4, %4, #16 \n" // 16 pixels
MEMACCESS(3)
- "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 UYVY/16 pixels.
+ "st4 {v0.8b-v3.8b}, [%3], #32 \n" // Store 8 UYVY/16 pixels.
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
@@ -1503,7 +1504,7 @@
"+r"(dst_uyvy), // %3
"+r"(width) // %4
:
- : "cc", "memory", "d0", "d1", "d2", "d3"
+ : "cc", "memory", "v0", "v1", "v2", "v3"
);
}
#endif // HAS_I422TOUYVYROW_NEON
@@ -1577,28 +1578,28 @@
#ifdef HAS_ARGBTOYROW_NEON
void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile (
- "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d27, #16 \n" // Add 16 constant
+ "movi v4.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v5.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v6.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v7.8b, #16 \n" // Add 16 constant
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
"subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d27 \n"
+ "umull v3.8h, v0.8b, v4.8b \n" // B
+ "umlal v3.8h, v1.8b, v5.8b \n" // G
+ "umlal v3.8h, v2.8b, v6.8b \n" // R
+ "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v7.8b \n"
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
);
}
#endif // HAS_ARGBTOYROW_NEON
@@ -1606,26 +1607,26 @@
#ifdef HAS_ARGBTOYJROW_NEON
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile (
- "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
- "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
- "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
+ "movi v4.8b, #15 \n" // B * 0.11400 coefficient
+ "movi v5.8b, #75 \n" // G * 0.58700 coefficient
+ "movi v6.8b, #38 \n" // R * 0.29900 coefficient
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
"subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y
+ "umull v3.8h, v0.8b, v4.8b \n" // B
+ "umlal v3.8h, v1.8b, v5.8b \n" // G
+ "umlal v3.8h, v2.8b, v6.8b \n" // R
+ "sqrshrun v0.8b, v3.8h, #7 \n" // 15 bit to 8 bit Y
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
- : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"
);
}
#endif // HAS_ARGBTOYJROW_NEON
@@ -3048,20 +3049,20 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
MEMACCESS(1)
- "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // load 8 more ARGB pixels.
+ "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop.
- "vmull.u8 q0, d0, d1 \n" // multiply B
- "vmull.u8 q1, d2, d3 \n" // multiply G
- "vmull.u8 q2, d4, d5 \n" // multiply R
- "vmull.u8 q3, d6, d7 \n" // multiply A
- "vrshrn.u16 d0, q0, #8 \n" // 16 bit to 8 bit B
- "vrshrn.u16 d1, q1, #8 \n" // 16 bit to 8 bit G
- "vrshrn.u16 d2, q2, #8 \n" // 16 bit to 8 bit R
- "vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A
+ "umull v0.8h, v0.8b, v4.8b \n" // multiply B
+ "umull v1.8h, v1.8b, v5.8b \n" // multiply G
+ "umull v2.8h, v2.8b, v6.8b \n" // multiply R
+ "umull v3.8h, v3.8b, v7.8b \n" // multiply A
+ "rshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit B
+ "rshrn v1.8b, v1.8h, #8 \n" // 16 bit to 8 bit G
+ "rshrn v2.8b, v2.8h, #8 \n" // 16 bit to 8 bit R
+ "rshrn v3.8b, v3.8h, #8 \n" // 16 bit to 8 bit A
MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
"bgt 1b \n"
: "+r"(src_argb0), // %0
@@ -3069,7 +3070,7 @@
"+r"(dst_argb), // %2
"+r"(width) // %3
:
- : "cc", "memory", "q0", "q1", "q2", "q3"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
);
}
#endif // HAS_ARGBMULTIPLYROW_NEON
@@ -3083,14 +3084,16 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
MEMACCESS(1)
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels.
+ "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop.
- "vqadd.u8 q0, q0, q2 \n" // add B, G
- "vqadd.u8 q1, q1, q3 \n" // add R, A
+ "uqadd v0.8b, v0.8b, v4.8b \n"
+ "uqadd v1.8b, v1.8b, v5.8b \n"
+ "uqadd v2.8b, v2.8b, v6.8b \n"
+ "uqadd v3.8b, v3.8b, v7.8b \n"
MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
"bgt 1b \n"
: "+r"(src_argb0), // %0
@@ -3098,7 +3101,7 @@
"+r"(dst_argb), // %2
"+r"(width) // %3
:
- : "cc", "memory", "q0", "q1", "q2", "q3"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
);
}
#endif // HAS_ARGBADDROW_NEON
@@ -3112,14 +3115,16 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
MEMACCESS(1)
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels.
+ "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop.
- "vqsub.u8 q0, q0, q2 \n" // subtract B, G
- "vqsub.u8 q1, q1, q3 \n" // subtract R, A
+ "uqsub v0.8b, v0.8b, v4.8b \n"
+ "uqsub v1.8b, v1.8b, v5.8b \n"
+ "uqsub v2.8b, v2.8b, v6.8b \n"
+ "uqsub v3.8b, v3.8b, v7.8b \n"
MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
"bgt 1b \n"
: "+r"(src_argb0), // %0
@@ -3127,7 +3132,7 @@
"+r"(dst_argb), // %2
"+r"(width) // %3
:
- : "cc", "memory", "q0", "q1", "q2", "q3"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
);
}
#endif // HAS_ARGBSUBTRACTROW_NEON
@@ -3141,27 +3146,27 @@
void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) {
asm volatile (
- "vmov.u8 d3, #255 \n" // alpha
+ "movi v3.8b, #255 \n" // alpha
// 8 pixel loop.
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {d0}, [%0]! \n" // load 8 sobelx.
+ "ld1 {v0.8b}, [%0], #8 \n" // load 8 sobelx.
MEMACCESS(1)
- "vld1.8 {d1}, [%1]! \n" // load 8 sobely.
+ "ld1 {v1.8b}, [%1], #8 \n" // load 8 sobely.
"subs %3, %3, #8 \n" // 8 processed per loop.
- "vqadd.u8 d0, d0, d1 \n" // add
- "vmov.u8 d1, d0 \n"
- "vmov.u8 d2, d0 \n"
+ "uqadd v0.8b, v0.8b, v1.8b \n" // add
+ "mov v1.8b, v0.8b \n"
+ "mov v2.8b, v0.8b \n"
MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
"bgt 1b \n"
: "+r"(src_sobelx), // %0
"+r"(src_sobely), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
:
- : "cc", "memory", "q0", "q1"
+ : "cc", "memory", "v0", "v1", "v2", "v3"
);
}
#endif // HAS_SOBELROW_NEON
@@ -3175,20 +3180,20 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 16 sobelx.
+ "ld1 {v0.16b}, [%0], #16 \n" // load 16 sobelx.
MEMACCESS(1)
- "vld1.8 {q1}, [%1]! \n" // load 16 sobely.
+ "ld1 {v1.16b}, [%1], #16 \n" // load 16 sobely.
"subs %3, %3, #16 \n" // 16 processed per loop.
- "vqadd.u8 q0, q0, q1 \n" // add
+ "uqadd v0.16b, v0.16b, v1.16b \n" // add
MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n" // store 16 pixels.
+ "st1 {v0.16b}, [%2], #16 \n" // store 16 pixels.
"bgt 1b \n"
: "+r"(src_sobelx), // %0
"+r"(src_sobely), // %1
"+r"(dst_y), // %2
"+r"(width) // %3
:
- : "cc", "memory", "q0", "q1"
+ : "cc", "memory", "v0", "v1"
);
}
#endif // HAS_SOBELTOPLANEROW_NEON
@@ -3202,25 +3207,25 @@
void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) {
asm volatile (
- "vmov.u8 d3, #255 \n" // alpha
+ "movi v3.8b, #255 \n" // alpha
// 8 pixel loop.
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {d2}, [%0]! \n" // load 8 sobelx.
+ "ld1 {v2.8b}, [%0], #8 \n" // load 8 sobelx.
MEMACCESS(1)
- "vld1.8 {d0}, [%1]! \n" // load 8 sobely.
+ "ld1 {v0.8b}, [%1], #8 \n" // load 8 sobely.
"subs %3, %3, #8 \n" // 8 processed per loop.
- "vqadd.u8 d1, d0, d2 \n" // add
+ "uqadd v1.8b, v0.8b, v2.8b \n" // add
MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
"bgt 1b \n"
: "+r"(src_sobelx), // %0
"+r"(src_sobely), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
:
- : "cc", "memory", "q0", "q1"
+ : "cc", "memory", "v0", "v1", "v2", "v3"
);
}
#endif // HAS_SOBELXYROW_NEON
@@ -3236,28 +3241,28 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {d0}, [%0],%5 \n" // top
+ "ld1 {v0.8b}, [%0],%5 \n" // top
MEMACCESS(0)
- "vld1.8 {d1}, [%0],%6 \n"
- "vsubl.u8 q0, d0, d1 \n"
+ "ld1 {v1.8b}, [%0],%6 \n"
+ "usubl v0.8h, v0.8b, v1.8b \n"
MEMACCESS(1)
- "vld1.8 {d2}, [%1],%5 \n" // center * 2
+ "ld1 {v2.8b}, [%1],%5 \n" // center * 2
MEMACCESS(1)
- "vld1.8 {d3}, [%1],%6 \n"
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vadd.s16 q0, q0, q1 \n"
+ "ld1 {v3.8b}, [%1],%6 \n"
+ "usubl v1.8h, v2.8b, v3.8b \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "add v0.8h, v0.8h, v1.8h \n"
MEMACCESS(2)
- "vld1.8 {d2}, [%2],%5 \n" // bottom
+ "ld1 {v2.8b}, [%2],%5 \n" // bottom
MEMACCESS(2)
- "vld1.8 {d3}, [%2],%6 \n"
+ "ld1 {v3.8b}, [%2],%6 \n"
"subs %4, %4, #8 \n" // 8 pixels
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vabs.s16 q0, q0 \n"
- "vqmovn.u16 d0, q0 \n"
+ "usubl v1.8h, v2.8b, v3.8b \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "abs v0.8h, v0.8h \n"
+ "uqxtn v0.8b, v0.8h \n"
MEMACCESS(3)
- "vst1.8 {d0}, [%3]! \n" // store 8 sobelx
+ "st1 {v0.8b}, [%3], #8 \n" // store 8 sobelx
"bgt 1b \n"
: "+r"(src_y0), // %0
"+r"(src_y1), // %1
@@ -3266,7 +3271,7 @@
"+r"(width) // %4
: "r"(2), // %5
"r"(6) // %6
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_SOBELXROW_NEON
@@ -3282,28 +3287,28 @@
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {d0}, [%0],%4 \n" // left
+ "ld1 {v0.8b}, [%0],%4 \n" // left
MEMACCESS(1)
- "vld1.8 {d1}, [%1],%4 \n"
- "vsubl.u8 q0, d0, d1 \n"
+ "ld1 {v1.8b}, [%1],%4 \n"
+ "usubl v0.8h, v0.8b, v1.8b \n"
MEMACCESS(0)
- "vld1.8 {d2}, [%0],%4 \n" // center * 2
+ "ld1 {v2.8b}, [%0],%4 \n" // center * 2
MEMACCESS(1)
- "vld1.8 {d3}, [%1],%4 \n"
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vadd.s16 q0, q0, q1 \n"
+ "ld1 {v3.8b}, [%1],%4 \n"
+ "usubl v1.8h, v2.8b, v3.8b \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "add v0.8h, v0.8h, v1.8h \n"
MEMACCESS(0)
- "vld1.8 {d2}, [%0],%5 \n" // right
+ "ld1 {v2.8b}, [%0],%5 \n" // right
MEMACCESS(1)
- "vld1.8 {d3}, [%1],%5 \n"
+ "ld1 {v3.8b}, [%1],%5 \n"
"subs %3, %3, #8 \n" // 8 pixels
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vabs.s16 q0, q0 \n"
- "vqmovn.u16 d0, q0 \n"
+ "usubl v1.8h, v2.8b, v3.8b \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "abs v0.8h, v0.8h \n"
+ "uqxtn v0.8b, v0.8h \n"
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 sobely
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 sobely
"bgt 1b \n"
: "+r"(src_y0), // %0
"+r"(src_y1), // %1
@@ -3311,7 +3316,7 @@
"+r"(width) // %3
: "r"(1), // %4
"r"(6) // %5
- : "cc", "memory", "q0", "q1" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_SOBELYROW_NEON
diff --git a/third_party/libyuv/source/row_win.cc b/third_party/libyuv/source/row_win.cc
index 8eb8889..f58fc51 100644
--- a/third_party/libyuv/source/row_win.cc
+++ b/third_party/libyuv/source/row_win.cc
@@ -10,7 +10,7 @@
#include "libyuv/row.h"
-#if defined (_M_X64)
+#if defined (_M_X64) && !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
#include <emmintrin.h>
#include <tmmintrin.h> // For _mm_maddubs_epi16
#endif
@@ -78,7 +78,6 @@
const uint8* v_buf,
uint8* dst_argb,
int width) {
-
__m128i xmm0, xmm1, xmm2, xmm3;
const __m128i xmm5 = _mm_set1_epi8(-1);
const __m128i xmm4 = _mm_setzero_si128();
@@ -132,7 +131,6 @@
const uint8* v_buf,
uint8* dst_argb,
int width) {
-
__m128i xmm0, xmm1, xmm2, xmm3;
const __m128i xmm5 = _mm_set1_epi8(-1);
const __m128i xmm4 = _mm_setzero_si128();
diff --git a/third_party/libyuv/source/scale_neon64.cc b/third_party/libyuv/source/scale_neon64.cc
new file mode 100644
index 0000000..64c7d10
--- /dev/null
+++ b/third_party/libyuv/source/scale_neon64.cc
@@ -0,0 +1,790 @@
+/*
+ * Copyright 2014 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC Neon.
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+#ifdef HAS_SCALEROWDOWN2_NEON
+// Read 32x1 throw away even pixels, and write 16x1.
+void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width) {
+ asm volatile (
+ ".p2align 2 \n"
+ "1: \n"
+ // load even pixels into q0, odd into q1
+ MEMACCESS(0)
+ "vld2.8 {q0, q1}, [%0]! \n"
+ "subs %2, %2, #16 \n" // 16 processed per loop
+ MEMACCESS(1)
+ "vst1.8 {q1}, [%1]! \n" // store odd pixels
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "q0", "q1" // Clobber List
+ );
+}
+#endif //HAS_SCALEROWDOWN2_NEON
+
+#ifdef HAS_SCALEROWDOWN2_NEON
+// Read 32x2 average down and write 16x1.
+void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width) {
+ asm volatile (
+ // change the stride to row 2 pointer
+ "add %1, %0 \n"
+ ".p2align 2 \n"
+ "1: \n"
+ MEMACCESS(0)
+ "vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc
+ MEMACCESS(1)
+ "vld1.8 {q2, q3}, [%1]! \n" // load row 2 and post inc
+ "subs %3, %3, #16 \n" // 16 processed per loop
+ "vpaddl.u8 q0, q0 \n" // row 1 add adjacent
+ "vpaddl.u8 q1, q1 \n"
+ "vpadal.u8 q0, q2 \n" // row 2 add adjacent + row1
+ "vpadal.u8 q1, q3 \n"
+ "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack
+ "vrshrn.u16 d1, q1, #2 \n"
+ MEMACCESS(2)
+ "vst1.8 {q0}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(src_stride), // %1
+ "+r"(dst), // %2
+ "+r"(dst_width) // %3
+ :
+ : "q0", "q1", "q2", "q3" // Clobber List
+ );
+}
+#endif //HAS_SCALEROWDOWN2_NEON
+
+#ifdef HAS_SCALEROWDOWN4_NEON
+void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ asm volatile (
+ ".p2align 2 \n"
+ "1: \n"
+ MEMACCESS(0)
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
+ "subs %2, %2, #8 \n" // 8 processed per loop
+ MEMACCESS(1)
+ "vst1.8 {d2}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ :
+ : "q0", "q1", "memory", "cc"
+ );
+}
+#endif //HAS_SCALEROWDOWN4_NEON
+
+#ifdef HAS_SCALEROWDOWN4_NEON
+void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ const uint8* src_ptr1 = src_ptr + src_stride;
+ const uint8* src_ptr2 = src_ptr + src_stride * 2;
+ const uint8* src_ptr3 = src_ptr + src_stride * 3;
+asm volatile (
+ ".p2align 2 \n"
+ "1: \n"
+ MEMACCESS(0)
+ "vld1.8 {q0}, [%0]! \n" // load up 16x4
+ MEMACCESS(3)
+ "vld1.8 {q1}, [%3]! \n"
+ MEMACCESS(4)
+ "vld1.8 {q2}, [%4]! \n"
+ MEMACCESS(5)
+ "vld1.8 {q3}, [%5]! \n"
+ "subs %2, %2, #4 \n"
+ "vpaddl.u8 q0, q0 \n"
+ "vpadal.u8 q0, q1 \n"
+ "vpadal.u8 q0, q2 \n"
+ "vpadal.u8 q0, q3 \n"
+ "vpaddl.u16 q0, q0 \n"
+ "vrshrn.u32 d0, q0, #4 \n" // divide by 16 w/rounding
+ "vmovn.u16 d0, q0 \n"
+ MEMACCESS(1)
+ "vst1.32 {d0[0]}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(src_ptr1), // %3
+ "+r"(src_ptr2), // %4
+ "+r"(src_ptr3) // %5
+ :
+ : "q0", "q1", "q2", "q3", "memory", "cc"
+ );
+}
+#endif //HAS_SCALEROWDOWN4_NEON
+
+#ifdef HAS_SCALEROWDOWN34_NEON
+// Down scale from 4 to 3 pixels. Use the neon multilane read/write
+// to load up the every 4th pixel into a 4 different registers.
+// Point samples 32 pixels to 24 pixels.
+void ScaleRowDown34_NEON(const uint8* src_ptr,
+ ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ asm volatile (
+ ".p2align 2 \n"
+ "1: \n"
+ MEMACCESS(0)
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
+ "subs %2, %2, #24 \n"
+ "vmov d2, d3 \n" // order d0, d1, d2
+ MEMACCESS(1)
+ "vst3.8 {d0, d1, d2}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ :
+ : "d0", "d1", "d2", "d3", "memory", "cc"
+ );
+}
+#endif //HAS_SCALEROWDOWN34_NEON
+
+#ifdef HAS_SCALEROWDOWN34_NEON
+void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
+ ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ asm volatile (
+ "vmov.u8 d24, #3 \n"
+ "add %3, %0 \n"
+ ".p2align 2 \n"
+ "1: \n"
+ MEMACCESS(0)
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
+ MEMACCESS(3)
+ "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
+ "subs %2, %2, #24 \n"
+
+ // filter src line 0 with src line 1
+ // expand chars to shorts to allow for room
+ // when adding lines together
+ "vmovl.u8 q8, d4 \n"
+ "vmovl.u8 q9, d5 \n"
+ "vmovl.u8 q10, d6 \n"
+ "vmovl.u8 q11, d7 \n"
+
+ // 3 * line_0 + line_1
+ "vmlal.u8 q8, d0, d24 \n"
+ "vmlal.u8 q9, d1, d24 \n"
+ "vmlal.u8 q10, d2, d24 \n"
+ "vmlal.u8 q11, d3, d24 \n"
+
+ // (3 * line_0 + line_1) >> 2
+ "vqrshrn.u16 d0, q8, #2 \n"
+ "vqrshrn.u16 d1, q9, #2 \n"
+ "vqrshrn.u16 d2, q10, #2 \n"
+ "vqrshrn.u16 d3, q11, #2 \n"
+
+ // a0 = (src[0] * 3 + s[1] * 1) >> 2
+ "vmovl.u8 q8, d1 \n"
+ "vmlal.u8 q8, d0, d24 \n"
+ "vqrshrn.u16 d0, q8, #2 \n"
+
+ // a1 = (src[1] * 1 + s[2] * 1) >> 1
+ "vrhadd.u8 d1, d1, d2 \n"
+
+ // a2 = (src[2] * 1 + s[3] * 3) >> 2
+ "vmovl.u8 q8, d2 \n"
+ "vmlal.u8 q8, d3, d24 \n"
+ "vqrshrn.u16 d2, q8, #2 \n"
+
+ MEMACCESS(1)
+ "vst3.8 {d0, d1, d2}, [%1]! \n"
+
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(src_stride) // %3
+ :
+ : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "d24", "memory", "cc"
+ );
+}
+#endif //ScaleRowDown34_0_Box_NEON
+
+#ifdef HAS_SCALEROWDOWN34_NEON
+void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
+ ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ asm volatile (
+ "vmov.u8 d24, #3 \n"
+ "add %3, %0 \n"
+ ".p2align 2 \n"
+ "1: \n"
+ MEMACCESS(0)
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
+ MEMACCESS(3)
+ "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
+ "subs %2, %2, #24 \n"
+ // average src line 0 with src line 1
+ "vrhadd.u8 q0, q0, q2 \n"
+ "vrhadd.u8 q1, q1, q3 \n"
+
+ // a0 = (src[0] * 3 + s[1] * 1) >> 2
+ "vmovl.u8 q3, d1 \n"
+ "vmlal.u8 q3, d0, d24 \n"
+ "vqrshrn.u16 d0, q3, #2 \n"
+
+ // a1 = (src[1] * 1 + s[2] * 1) >> 1
+ "vrhadd.u8 d1, d1, d2 \n"
+
+ // a2 = (src[2] * 1 + s[3] * 3) >> 2
+ "vmovl.u8 q3, d2 \n"
+ "vmlal.u8 q3, d3, d24 \n"
+ "vqrshrn.u16 d2, q3, #2 \n"
+
+ MEMACCESS(1)
+ "vst3.8 {d0, d1, d2}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(src_stride) // %3
+ :
+ : "r4", "q0", "q1", "q2", "q3", "d24", "memory", "cc"
+ );
+}
+#endif //HAS_SCALEROWDOWN34_NEON
+
+#ifdef HAS_SCALEROWDOWN38_NEON
+#define HAS_SCALEROWDOWN38_NEON
+static uvec8 kShuf38 =
+ { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
+static uvec8 kShuf38_2 =
+ { 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 };
+static vec16 kMult38_Div6 =
+ { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
+ 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 };
+static vec16 kMult38_Div9 =
+ { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
+ 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
+
+// 32 -> 12
+void ScaleRowDown38_NEON(const uint8* src_ptr,
+ ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ asm volatile (
+ MEMACCESS(3)
+ "vld1.8 {q3}, [%3] \n"
+ ".p2align 2 \n"
+ "1: \n"
+ MEMACCESS(0)
+ "vld1.8 {d0, d1, d2, d3}, [%0]! \n"
+ "subs %2, %2, #12 \n"
+ "vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n"
+ "vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n"
+ MEMACCESS(1)
+ "vst1.8 {d4}, [%1]! \n"
+ MEMACCESS(1)
+ "vst1.32 {d5[0]}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"(&kShuf38) // %3
+ : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc"
+ );
+}
+
+#endif //HAS_SCALEROWDOWN38_NEON
+
+#ifdef HAS_SCALEROWDOWN38_NEON
+// 32x3 -> 12x1
+void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
+ ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ const uint8* src_ptr1 = src_ptr + src_stride * 2;
+
+ asm volatile (
+ MEMACCESS(5)
+ "vld1.16 {q13}, [%5] \n"
+ MEMACCESS(6)
+ "vld1.8 {q14}, [%6] \n"
+ MEMACCESS(7)
+ "vld1.8 {q15}, [%7] \n"
+ "add %3, %0 \n"
+ ".p2align 2 \n"
+ "1: \n"
+
+ // d0 = 00 40 01 41 02 42 03 43
+ // d1 = 10 50 11 51 12 52 13 53
+ // d2 = 20 60 21 61 22 62 23 63
+ // d3 = 30 70 31 71 32 72 33 73
+ MEMACCESS(0)
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n"
+ MEMACCESS(3)
+ "vld4.8 {d4, d5, d6, d7}, [%3]! \n"
+ MEMACCESS(4)
+ "vld4.8 {d16, d17, d18, d19}, [%4]! \n"
+ "subs %2, %2, #12 \n"
+
+ // Shuffle the input data around to get align the data
+ // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
+ // d0 = 00 10 01 11 02 12 03 13
+ // d1 = 40 50 41 51 42 52 43 53
+ "vtrn.u8 d0, d1 \n"
+ "vtrn.u8 d4, d5 \n"
+ "vtrn.u8 d16, d17 \n"
+
+ // d2 = 20 30 21 31 22 32 23 33
+ // d3 = 60 70 61 71 62 72 63 73
+ "vtrn.u8 d2, d3 \n"
+ "vtrn.u8 d6, d7 \n"
+ "vtrn.u8 d18, d19 \n"
+
+ // d0 = 00+10 01+11 02+12 03+13
+ // d2 = 40+50 41+51 42+52 43+53
+ "vpaddl.u8 q0, q0 \n"
+ "vpaddl.u8 q2, q2 \n"
+ "vpaddl.u8 q8, q8 \n"
+
+ // d3 = 60+70 61+71 62+72 63+73
+ "vpaddl.u8 d3, d3 \n"
+ "vpaddl.u8 d7, d7 \n"
+ "vpaddl.u8 d19, d19 \n"
+
+ // combine source lines
+ "vadd.u16 q0, q2 \n"
+ "vadd.u16 q0, q8 \n"
+ "vadd.u16 d4, d3, d7 \n"
+ "vadd.u16 d4, d19 \n"
+
+ // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0]
+ // + s[6 + st * 1] + s[7 + st * 1]
+ // + s[6 + st * 2] + s[7 + st * 2]) / 6
+ "vqrdmulh.s16 q2, q2, q13 \n"
+ "vmovn.u16 d4, q2 \n"
+
+ // Shuffle 2,3 reg around so that 2 can be added to the
+ // 0,1 reg and 3 can be added to the 4,5 reg. This
+ // requires expanding from u8 to u16 as the 0,1 and 4,5
+ // registers are already expanded. Then do transposes
+ // to get aligned.
+ // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
+ "vmovl.u8 q1, d2 \n"
+ "vmovl.u8 q3, d6 \n"
+ "vmovl.u8 q9, d18 \n"
+
+ // combine source lines
+ "vadd.u16 q1, q3 \n"
+ "vadd.u16 q1, q9 \n"
+
+ // d4 = xx 20 xx 30 xx 22 xx 32
+ // d5 = xx 21 xx 31 xx 23 xx 33
+ "vtrn.u32 d2, d3 \n"
+
+ // d4 = xx 20 xx 21 xx 22 xx 23
+ // d5 = xx 30 xx 31 xx 32 xx 33
+ "vtrn.u16 d2, d3 \n"
+
+ // 0+1+2, 3+4+5
+ "vadd.u16 q0, q1 \n"
+
+ // Need to divide, but can't downshift as the the value
+ // isn't a power of 2. So multiply by 65536 / n
+ // and take the upper 16 bits.
+ "vqrdmulh.s16 q0, q0, q15 \n"
+
+ // Align for table lookup, vtbl requires registers to
+ // be adjacent
+ "vmov.u8 d2, d4 \n"
+
+ "vtbl.u8 d3, {d0, d1, d2}, d28 \n"
+ "vtbl.u8 d4, {d0, d1, d2}, d29 \n"
+
+ MEMACCESS(1)
+ "vst1.8 {d3}, [%1]! \n"
+ MEMACCESS(1)
+ "vst1.32 {d4[0]}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(src_stride), // %3
+ "+r"(src_ptr1) // %4
+ : "r"(&kMult38_Div6), // %5
+ "r"(&kShuf38_2), // %6
+ "r"(&kMult38_Div9) // %7
+ : "q0", "q1", "q2", "q3", "q8", "q9", "q13", "q14", "q15", "memory", "cc"
+ );
+}
+#endif //HAS_SCALEROWDOWN38_NEON
+
+#ifdef HAS_SCALEROWDOWN38_NEON
+// 32x2 -> 12x1
+void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
+ ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ asm volatile (
+ MEMACCESS(4)
+ "vld1.16 {q13}, [%4] \n"
+ MEMACCESS(5)
+ "vld1.8 {q14}, [%5] \n"
+ "add %3, %0 \n"
+ ".p2align 2 \n"
+ "1: \n"
+
+ // d0 = 00 40 01 41 02 42 03 43
+ // d1 = 10 50 11 51 12 52 13 53
+ // d2 = 20 60 21 61 22 62 23 63
+ // d3 = 30 70 31 71 32 72 33 73
+ MEMACCESS(0)
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n"
+ MEMACCESS(3)
+ "vld4.8 {d4, d5, d6, d7}, [%3]! \n"
+ "subs %2, %2, #12 \n"
+
+ // Shuffle the input data around to get align the data
+ // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
+ // d0 = 00 10 01 11 02 12 03 13
+ // d1 = 40 50 41 51 42 52 43 53
+ "vtrn.u8 d0, d1 \n"
+ "vtrn.u8 d4, d5 \n"
+
+ // d2 = 20 30 21 31 22 32 23 33
+ // d3 = 60 70 61 71 62 72 63 73
+ "vtrn.u8 d2, d3 \n"
+ "vtrn.u8 d6, d7 \n"
+
+ // d0 = 00+10 01+11 02+12 03+13
+ // d2 = 40+50 41+51 42+52 43+53
+ "vpaddl.u8 q0, q0 \n"
+ "vpaddl.u8 q2, q2 \n"
+
+ // d3 = 60+70 61+71 62+72 63+73
+ "vpaddl.u8 d3, d3 \n"
+ "vpaddl.u8 d7, d7 \n"
+
+ // combine source lines
+ "vadd.u16 q0, q2 \n"
+ "vadd.u16 d4, d3, d7 \n"
+
+ // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4
+ "vqrshrn.u16 d4, q2, #2 \n"
+
+ // Shuffle 2,3 reg around so that 2 can be added to the
+ // 0,1 reg and 3 can be added to the 4,5 reg. This
+ // requires expanding from u8 to u16 as the 0,1 and 4,5
+ // registers are already expanded. Then do transposes
+ // to get aligned.
+ // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
+ "vmovl.u8 q1, d2 \n"
+ "vmovl.u8 q3, d6 \n"
+
+ // combine source lines
+ "vadd.u16 q1, q3 \n"
+
+ // d4 = xx 20 xx 30 xx 22 xx 32
+ // d5 = xx 21 xx 31 xx 23 xx 33
+ "vtrn.u32 d2, d3 \n"
+
+ // d4 = xx 20 xx 21 xx 22 xx 23
+ // d5 = xx 30 xx 31 xx 32 xx 33
+ "vtrn.u16 d2, d3 \n"
+
+ // 0+1+2, 3+4+5
+ "vadd.u16 q0, q1 \n"
+
+ // Need to divide, but can't downshift as the the value
+ // isn't a power of 2. So multiply by 65536 / n
+ // and take the upper 16 bits.
+ "vqrdmulh.s16 q0, q0, q13 \n"
+
+ // Align for table lookup, vtbl requires registers to
+ // be adjacent
+ "vmov.u8 d2, d4 \n"
+
+ "vtbl.u8 d3, {d0, d1, d2}, d28 \n"
+ "vtbl.u8 d4, {d0, d1, d2}, d29 \n"
+
+ MEMACCESS(1)
+ "vst1.8 {d3}, [%1]! \n"
+ MEMACCESS(1)
+ "vst1.32 {d4[0]}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(src_stride) // %3
+ : "r"(&kMult38_Div6), // %4
+ "r"(&kShuf38_2) // %5
+ : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc"
+ );
+}
+#endif //HAS_SCALEROWDOWN38_NEON
+
+#if 0
+// 16x2 -> 16x1
+void ScaleFilterRows_NEON(uint8* dst_ptr,
+ const uint8* src_ptr, ptrdiff_t src_stride,
+ int dst_width, int source_y_fraction) {
+ asm volatile (
+ "cmp %4, #0 \n"
+ "beq 100f \n"
+ "add %2, %1 \n"
+ "cmp %4, #64 \n"
+ "beq 75f \n"
+ "cmp %4, #128 \n"
+ "beq 50f \n"
+ "cmp %4, #192 \n"
+ "beq 25f \n"
+
+ "vdup.8 d5, %4 \n"
+ "rsb %4, #256 \n"
+ "vdup.8 d4, %4 \n"
+ // General purpose row blend.
+ "1: \n"
+ MEMACCESS(1)
+ "vld1.8 {q0}, [%1]! \n"
+ MEMACCESS(2)
+ "vld1.8 {q1}, [%2]! \n"
+ "subs %3, %3, #16 \n"
+ "vmull.u8 q13, d0, d4 \n"
+ "vmull.u8 q14, d1, d4 \n"
+ "vmlal.u8 q13, d2, d5 \n"
+ "vmlal.u8 q14, d3, d5 \n"
+ "vrshrn.u16 d0, q13, #8 \n"
+ "vrshrn.u16 d1, q14, #8 \n"
+ MEMACCESS(0)
+ "vst1.8 {q0}, [%0]! \n"
+ "bgt 1b \n"
+ "b 99f \n"
+
+ // Blend 25 / 75.
+ "25: \n"
+ MEMACCESS(1)
+ "vld1.8 {q0}, [%1]! \n"
+ MEMACCESS(2)
+ "vld1.8 {q1}, [%2]! \n"
+ "subs %3, %3, #16 \n"
+ "vrhadd.u8 q0, q1 \n"
+ "vrhadd.u8 q0, q1 \n"
+ MEMACCESS(0)
+ "vst1.8 {q0}, [%0]! \n"
+ "bgt 25b \n"
+ "b 99f \n"
+
+ // Blend 50 / 50.
+ "50: \n"
+ MEMACCESS(1)
+ "vld1.8 {q0}, [%1]! \n"
+ MEMACCESS(2)
+ "vld1.8 {q1}, [%2]! \n"
+ "subs %3, %3, #16 \n"
+ "vrhadd.u8 q0, q1 \n"
+ MEMACCESS(0)
+ "vst1.8 {q0}, [%0]! \n"
+ "bgt 50b \n"
+ "b 99f \n"
+
+ // Blend 75 / 25.
+ "75: \n"
+ MEMACCESS(1)
+ "vld1.8 {q1}, [%1]! \n"
+ MEMACCESS(2)
+ "vld1.8 {q0}, [%2]! \n"
+ "subs %3, %3, #16 \n"
+ "vrhadd.u8 q0, q1 \n"
+ "vrhadd.u8 q0, q1 \n"
+ MEMACCESS(0)
+ "vst1.8 {q0}, [%0]! \n"
+ "bgt 75b \n"
+ "b 99f \n"
+
+ // Blend 100 / 0 - Copy row unchanged.
+ "100: \n"
+ MEMACCESS(1)
+ "vld1.8 {q0}, [%1]! \n"
+ "subs %3, %3, #16 \n"
+ MEMACCESS(0)
+ "vst1.8 {q0}, [%0]! \n"
+ "bgt 100b \n"
+
+ "99: \n"
+ MEMACCESS(0)
+ "vst1.8 {d1[7]}, [%0] \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+r"(src_stride), // %2
+ "+r"(dst_width), // %3
+ "+r"(source_y_fraction) // %4
+ :
+ : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc"
+ );
+}
+#endif //0
+
+#ifdef HAS_SCALEARGBROWDOWN2_NEON
+void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width) {
+ asm volatile (
+ ".p2align 2 \n"
+ "1: \n"
+ // load even pixels into q0, odd into q1
+ MEMACCESS(0)
+ "vld2.32 {q0, q1}, [%0]! \n"
+ MEMACCESS(0)
+ "vld2.32 {q2, q3}, [%0]! \n"
+ "subs %2, %2, #8 \n" // 8 processed per loop
+ MEMACCESS(1)
+ "vst1.8 {q1}, [%1]! \n" // store odd pixels
+ MEMACCESS(1)
+ "vst1.8 {q3}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
+ );
+}
+#endif //HAS_SCALEARGBROWDOWN2_NEON
+
+#ifdef HAS_SCALEARGBROWDOWN2_NEON
+void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width) {
+ asm volatile (
+ // change the stride to row 2 pointer
+ "add %1, %1, %0 \n"
+ ".p2align 2 \n"
+ "1: \n"
+ MEMACCESS(0)
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
+ MEMACCESS(0)
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
+ "subs %3, %3, #8 \n" // 8 processed per loop.
+ "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
+ "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
+ "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
+ MEMACCESS(1)
+ "vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB pixels.
+ MEMACCESS(1)
+ "vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB pixels.
+ "vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts.
+ "vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts.
+ "vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts.
+ "vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts.
+ "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack
+ "vrshrn.u16 d1, q1, #2 \n"
+ "vrshrn.u16 d2, q2, #2 \n"
+ "vrshrn.u16 d3, q3, #2 \n"
+ MEMACCESS(2)
+ "vst4.8 {d0, d1, d2, d3}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(src_stride), // %1
+ "+r"(dst), // %2
+ "+r"(dst_width) // %3
+ :
+ : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
+ );
+}
+#endif //HAS_SCALEARGBROWDOWN2_NEON
+
+#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
+// Reads 4 pixels at a time.
+// Alignment requirement: src_argb 4 byte aligned.
+void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+ int src_stepx, uint8* dst_argb, int dst_width) {
+ asm volatile (
+ "mov r12, %3, lsl #2 \n"
+ ".p2align 2 \n"
+ "1: \n"
+ MEMACCESS(0)
+ "vld1.32 {d0[0]}, [%0], r12 \n"
+ MEMACCESS(0)
+ "vld1.32 {d0[1]}, [%0], r12 \n"
+ MEMACCESS(0)
+ "vld1.32 {d1[0]}, [%0], r12 \n"
+ MEMACCESS(0)
+ "vld1.32 {d1[1]}, [%0], r12 \n"
+ "subs %2, %2, #4 \n" // 4 pixels per loop.
+ MEMACCESS(1)
+ "vst1.8 {q0}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ : "r"(src_stepx) // %3
+ : "memory", "cc", "r12", "q0"
+ );
+}
+#endif //HAS_SCALEARGBROWDOWNEVEN_NEON
+
+#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
+// Reads 4 pixels at a time.
+// Alignment requirement: src_argb 4 byte aligned.
+void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+ int src_stepx,
+ uint8* dst_argb, int dst_width) {
+ asm volatile (
+ "mov r12, %4, lsl #2 \n"
+ "add %1, %1, %0 \n"
+ ".p2align 2 \n"
+ "1: \n"
+ MEMACCESS(0)
+ "vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1
+ MEMACCESS(1)
+ "vld1.8 {d1}, [%1], r12 \n"
+ MEMACCESS(0)
+ "vld1.8 {d2}, [%0], r12 \n"
+ MEMACCESS(1)
+ "vld1.8 {d3}, [%1], r12 \n"
+ MEMACCESS(0)
+ "vld1.8 {d4}, [%0], r12 \n"
+ MEMACCESS(1)
+ "vld1.8 {d5}, [%1], r12 \n"
+ MEMACCESS(0)
+ "vld1.8 {d6}, [%0], r12 \n"
+ MEMACCESS(1)
+ "vld1.8 {d7}, [%1], r12 \n"
+ "vaddl.u8 q0, d0, d1 \n"
+ "vaddl.u8 q1, d2, d3 \n"
+ "vaddl.u8 q2, d4, d5 \n"
+ "vaddl.u8 q3, d6, d7 \n"
+ "vswp.8 d1, d2 \n" // ab_cd -> ac_bd
+ "vswp.8 d5, d6 \n" // ef_gh -> eg_fh
+ "vadd.u16 q0, q0, q1 \n" // (a+b)_(c+d)
+ "vadd.u16 q2, q2, q3 \n" // (e+f)_(g+h)
+ "vrshrn.u16 d0, q0, #2 \n" // first 2 pixels.
+ "vrshrn.u16 d1, q2, #2 \n" // next 2 pixels.
+ "subs %3, %3, #4 \n" // 4 pixels per loop.
+ MEMACCESS(2)
+ "vst1.8 {q0}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(src_stride), // %1
+ "+r"(dst_argb), // %2
+ "+r"(dst_width) // %3
+ : "r"(src_stepx) // %4
+ : "memory", "cc", "r12", "q0", "q1", "q2", "q3"
+ );
+}
+#endif // HAS_SCALEARGBROWDOWNEVEN_NEON
+#endif // __aarch64__
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/tools_common.h b/tools_common.h
index 558413e..c1f466b 100644
--- a/tools_common.h
+++ b/tools_common.h
@@ -103,17 +103,25 @@
extern "C" {
#endif
+#if defined(__GNUC__)
+#define VPX_NO_RETURN __attribute__((noreturn))
+#else
+#define VPX_NO_RETURN
+#endif
+
/* Sets a stdio stream into binary mode */
FILE *set_binary_mode(FILE *stream);
-void die(const char *fmt, ...);
-void fatal(const char *fmt, ...);
+void die(const char *fmt, ...) VPX_NO_RETURN;
+void fatal(const char *fmt, ...) VPX_NO_RETURN;
void warn(const char *fmt, ...);
-void die_codec(vpx_codec_ctx_t *ctx, const char *s);
+void die_codec(vpx_codec_ctx_t *ctx, const char *s) VPX_NO_RETURN;
/* The tool including this file must define usage_exit() */
-void usage_exit();
+void usage_exit() VPX_NO_RETURN;
+
+#undef VPX_NO_RETURN
int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame);
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index b05ad14..d48c4fe 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -224,7 +224,7 @@
int arnr_strength;
int arnr_type;
- struct vpx_fixed_buf two_pass_stats_in;
+ vpx_fixed_buf_t two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
vp8e_tuning tuning;
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index fd9afd2..902a83a 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -446,19 +446,16 @@
# Forward DCT
#
add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch";
-specialize qw/vp8_short_fdct4x4 mmx sse2 media neon_asm/;
+specialize qw/vp8_short_fdct4x4 mmx sse2 media neon/;
$vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6;
-$vp8_short_fdct4x4_neon_asm=vp8_short_fdct4x4_neon;
add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch";
-specialize qw/vp8_short_fdct8x4 mmx sse2 media neon_asm/;
+specialize qw/vp8_short_fdct8x4 mmx sse2 media neon/;
$vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6;
-$vp8_short_fdct8x4_neon_asm=vp8_short_fdct8x4_neon;
add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch";
-specialize qw/vp8_short_walsh4x4 sse2 media neon_asm/;
+specialize qw/vp8_short_walsh4x4 sse2 media neon/;
$vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6;
-$vp8_short_walsh4x4_neon_asm=vp8_short_walsh4x4_neon;
#
# Quantizer
@@ -503,19 +500,16 @@
$vp8_mbuverror_sse2=vp8_mbuverror_xmm;
add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch";
-specialize qw/vp8_subtract_b mmx sse2 media neon_asm/;
+specialize qw/vp8_subtract_b mmx sse2 media neon/;
$vp8_subtract_b_media=vp8_subtract_b_armv6;
-$vp8_subtract_b_neon_asm=vp8_subtract_b_neon;
add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride";
-specialize qw/vp8_subtract_mby mmx sse2 media neon_asm/;
+specialize qw/vp8_subtract_mby mmx sse2 media neon/;
$vp8_subtract_mby_media=vp8_subtract_mby_armv6;
-$vp8_subtract_mby_neon_asm=vp8_subtract_mby_neon;
add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride";
-specialize qw/vp8_subtract_mbuv mmx sse2 media neon_asm/;
+specialize qw/vp8_subtract_mbuv mmx sse2 media neon/;
$vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6;
-$vp8_subtract_mbuv_neon_asm=vp8_subtract_mbuv_neon;
#
# Motion search
diff --git a/vp8/encoder/arm/neon/shortfdct_neon.asm b/vp8/encoder/arm/neon/shortfdct_neon.asm
deleted file mode 100644
index 5ea8dd8..0000000
--- a/vp8/encoder/arm/neon/shortfdct_neon.asm
+++ /dev/null
@@ -1,221 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_short_fdct4x4_neon|
- EXPORT |vp8_short_fdct8x4_neon|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=4
-
-
- ALIGN 16 ; enable use of @128 bit aligned loads
-coeff
- DCW 5352, 5352, 5352, 5352
- DCW 2217, 2217, 2217, 2217
- DCD 14500, 14500, 14500, 14500
- DCD 7500, 7500, 7500, 7500
- DCD 12000, 12000, 12000, 12000
- DCD 51000, 51000, 51000, 51000
-
-;void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
-|vp8_short_fdct4x4_neon| PROC
-
- ; Part one
- vld1.16 {d0}, [r0@64], r2
- adr r12, coeff
- vld1.16 {d1}, [r0@64], r2
- vld1.16 {q8}, [r12@128]! ; d16=5352, d17=2217
- vld1.16 {d2}, [r0@64], r2
- vld1.32 {q9, q10}, [r12@128]! ; q9=14500, q10=7500
- vld1.16 {d3}, [r0@64], r2
-
- ; transpose d0=ip[0], d1=ip[1], d2=ip[2], d3=ip[3]
- vtrn.32 d0, d2
- vtrn.32 d1, d3
- vld1.32 {q11,q12}, [r12@128] ; q11=12000, q12=51000
- vtrn.16 d0, d1
- vtrn.16 d2, d3
-
- vadd.s16 d4, d0, d3 ; a1 = ip[0] + ip[3]
- vadd.s16 d5, d1, d2 ; b1 = ip[1] + ip[2]
- vsub.s16 d6, d1, d2 ; c1 = ip[1] - ip[2]
- vsub.s16 d7, d0, d3 ; d1 = ip[0] - ip[3]
-
- vshl.s16 q2, q2, #3 ; (a1, b1) << 3
- vshl.s16 q3, q3, #3 ; (c1, d1) << 3
-
- vadd.s16 d0, d4, d5 ; op[0] = a1 + b1
- vsub.s16 d2, d4, d5 ; op[2] = a1 - b1
-
- vmlal.s16 q9, d7, d16 ; d1*5352 + 14500
- vmlal.s16 q10, d7, d17 ; d1*2217 + 7500
- vmlal.s16 q9, d6, d17 ; c1*2217 + d1*5352 + 14500
- vmlsl.s16 q10, d6, d16 ; d1*2217 - c1*5352 + 7500
-
- vshrn.s32 d1, q9, #12 ; op[1] = (c1*2217 + d1*5352 + 14500)>>12
- vshrn.s32 d3, q10, #12 ; op[3] = (d1*2217 - c1*5352 + 7500)>>12
-
-
- ; Part two
-
- ; transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12]
- vtrn.32 d0, d2
- vtrn.32 d1, d3
- vtrn.16 d0, d1
- vtrn.16 d2, d3
-
- vmov.s16 d26, #7
-
- vadd.s16 d4, d0, d3 ; a1 = ip[0] + ip[12]
- vadd.s16 d5, d1, d2 ; b1 = ip[4] + ip[8]
- vsub.s16 d6, d1, d2 ; c1 = ip[4] - ip[8]
- vadd.s16 d4, d4, d26 ; a1 + 7
- vsub.s16 d7, d0, d3 ; d1 = ip[0] - ip[12]
-
- vadd.s16 d0, d4, d5 ; op[0] = a1 + b1 + 7
- vsub.s16 d2, d4, d5 ; op[8] = a1 - b1 + 7
-
- vmlal.s16 q11, d7, d16 ; d1*5352 + 12000
- vmlal.s16 q12, d7, d17 ; d1*2217 + 51000
-
- vceq.s16 d4, d7, #0
-
- vshr.s16 d0, d0, #4
- vshr.s16 d2, d2, #4
-
- vmlal.s16 q11, d6, d17 ; c1*2217 + d1*5352 + 12000
- vmlsl.s16 q12, d6, d16 ; d1*2217 - c1*5352 + 51000
-
- vmvn d4, d4
- vshrn.s32 d1, q11, #16 ; op[4] = (c1*2217 + d1*5352 + 12000)>>16
- vsub.s16 d1, d1, d4 ; op[4] += (d1!=0)
- vshrn.s32 d3, q12, #16 ; op[12]= (d1*2217 - c1*5352 + 51000)>>16
-
- vst1.16 {q0, q1}, [r1@128]
-
- bx lr
-
- ENDP
-
-;void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
-|vp8_short_fdct8x4_neon| PROC
-
- ; Part one
-
- vld1.16 {q0}, [r0@128], r2
- adr r12, coeff
- vld1.16 {q1}, [r0@128], r2
- vld1.16 {q8}, [r12@128]! ; d16=5352, d17=2217
- vld1.16 {q2}, [r0@128], r2
- vld1.32 {q9, q10}, [r12@128]! ; q9=14500, q10=7500
- vld1.16 {q3}, [r0@128], r2
-
- ; transpose q0=ip[0], q1=ip[1], q2=ip[2], q3=ip[3]
- vtrn.32 q0, q2 ; [A0|B0]
- vtrn.32 q1, q3 ; [A1|B1]
- vtrn.16 q0, q1 ; [A2|B2]
- vtrn.16 q2, q3 ; [A3|B3]
-
- vadd.s16 q11, q0, q3 ; a1 = ip[0] + ip[3]
- vadd.s16 q12, q1, q2 ; b1 = ip[1] + ip[2]
- vsub.s16 q13, q1, q2 ; c1 = ip[1] - ip[2]
- vsub.s16 q14, q0, q3 ; d1 = ip[0] - ip[3]
-
- vshl.s16 q11, q11, #3 ; a1 << 3
- vshl.s16 q12, q12, #3 ; b1 << 3
- vshl.s16 q13, q13, #3 ; c1 << 3
- vshl.s16 q14, q14, #3 ; d1 << 3
-
- vadd.s16 q0, q11, q12 ; [A0 | B0] = a1 + b1
- vsub.s16 q2, q11, q12 ; [A2 | B2] = a1 - b1
-
- vmov.s16 q11, q9 ; 14500
- vmov.s16 q12, q10 ; 7500
-
- vmlal.s16 q9, d28, d16 ; A[1] = d1*5352 + 14500
- vmlal.s16 q10, d28, d17 ; A[3] = d1*2217 + 7500
- vmlal.s16 q11, d29, d16 ; B[1] = d1*5352 + 14500
- vmlal.s16 q12, d29, d17 ; B[3] = d1*2217 + 7500
-
- vmlal.s16 q9, d26, d17 ; A[1] = c1*2217 + d1*5352 + 14500
- vmlsl.s16 q10, d26, d16 ; A[3] = d1*2217 - c1*5352 + 7500
- vmlal.s16 q11, d27, d17 ; B[1] = c1*2217 + d1*5352 + 14500
- vmlsl.s16 q12, d27, d16 ; B[3] = d1*2217 - c1*5352 + 7500
-
- vshrn.s32 d2, q9, #12 ; A[1] = (c1*2217 + d1*5352 + 14500)>>12
- vshrn.s32 d6, q10, #12 ; A[3] = (d1*2217 - c1*5352 + 7500)>>12
- vshrn.s32 d3, q11, #12 ; B[1] = (c1*2217 + d1*5352 + 14500)>>12
- vshrn.s32 d7, q12, #12 ; B[3] = (d1*2217 - c1*5352 + 7500)>>12
-
-
- ; Part two
- vld1.32 {q9,q10}, [r12@128] ; q9=12000, q10=51000
-
- ; transpose q0=ip[0], q1=ip[4], q2=ip[8], q3=ip[12]
- vtrn.32 q0, q2 ; q0=[A0 | B0]
- vtrn.32 q1, q3 ; q1=[A4 | B4]
- vtrn.16 q0, q1 ; q2=[A8 | B8]
- vtrn.16 q2, q3 ; q3=[A12|B12]
-
- vmov.s16 q15, #7
-
- vadd.s16 q11, q0, q3 ; a1 = ip[0] + ip[12]
- vadd.s16 q12, q1, q2 ; b1 = ip[4] + ip[8]
- vadd.s16 q11, q11, q15 ; a1 + 7
- vsub.s16 q13, q1, q2 ; c1 = ip[4] - ip[8]
- vsub.s16 q14, q0, q3 ; d1 = ip[0] - ip[12]
-
- vadd.s16 q0, q11, q12 ; a1 + b1 + 7
- vsub.s16 q1, q11, q12 ; a1 - b1 + 7
-
- vmov.s16 q11, q9 ; 12000
- vmov.s16 q12, q10 ; 51000
-
- vshr.s16 d0, d0, #4 ; A[0] = (a1 + b1 + 7)>>4
- vshr.s16 d4, d1, #4 ; B[0] = (a1 + b1 + 7)>>4
- vshr.s16 d2, d2, #4 ; A[8] = (a1 + b1 + 7)>>4
- vshr.s16 d6, d3, #4 ; B[8] = (a1 + b1 + 7)>>4
-
-
- vmlal.s16 q9, d28, d16 ; A[4] = d1*5352 + 12000
- vmlal.s16 q10, d28, d17 ; A[12] = d1*2217 + 51000
- vmlal.s16 q11, d29, d16 ; B[4] = d1*5352 + 12000
- vmlal.s16 q12, d29, d17 ; B[12] = d1*2217 + 51000
-
- vceq.s16 q14, q14, #0
-
- vmlal.s16 q9, d26, d17 ; A[4] = c1*2217 + d1*5352 + 12000
- vmlsl.s16 q10, d26, d16 ; A[12] = d1*2217 - c1*5352 + 51000
- vmlal.s16 q11, d27, d17 ; B[4] = c1*2217 + d1*5352 + 12000
- vmlsl.s16 q12, d27, d16 ; B[12] = d1*2217 - c1*5352 + 51000
-
- vmvn q14, q14
-
- vshrn.s32 d1, q9, #16 ; A[4] = (c1*2217 + d1*5352 + 12000)>>16
- vshrn.s32 d3, q10, #16 ; A[12]= (d1*2217 - c1*5352 + 51000)>>16
- vsub.s16 d1, d1, d28 ; A[4] += (d1!=0)
-
- vshrn.s32 d5, q11, #16 ; B[4] = (c1*2217 + d1*5352 + 12000)>>16
- vshrn.s32 d7, q12, #16 ; B[12]= (d1*2217 - c1*5352 + 51000)>>16
- vsub.s16 d5, d5, d29 ; B[4] += (d1!=0)
-
- vst1.16 {q0, q1}, [r1@128]! ; block A
- vst1.16 {q2, q3}, [r1@128]! ; block B
-
- bx lr
-
- ENDP
-
- END
-
diff --git a/vp8/encoder/arm/neon/shortfdct_neon.c b/vp8/encoder/arm/neon/shortfdct_neon.c
new file mode 100644
index 0000000..391e5f9
--- /dev/null
+++ b/vp8/encoder/arm/neon/shortfdct_neon.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_short_fdct4x4_neon(
+ int16_t *input,
+ int16_t *output,
+ int pitch) {
+ int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16;
+ int16x4_t d16s16, d17s16, d26s16, dEmptys16;
+ uint16x4_t d4u16;
+ int16x8_t q0s16, q1s16;
+ int32x4_t q9s32, q10s32, q11s32, q12s32;
+ int16x4x2_t v2tmp0, v2tmp1;
+ int32x2x2_t v2tmp2, v2tmp3;
+
+ d16s16 = vdup_n_s16(5352);
+ d17s16 = vdup_n_s16(2217);
+ q9s32 = vdupq_n_s32(14500);
+ q10s32 = vdupq_n_s32(7500);
+ q11s32 = vdupq_n_s32(12000);
+ q12s32 = vdupq_n_s32(51000);
+
+ // Part one
+ pitch >>= 1;
+ d0s16 = vld1_s16(input);
+ input += pitch;
+ d1s16 = vld1_s16(input);
+ input += pitch;
+ d2s16 = vld1_s16(input);
+ input += pitch;
+ d3s16 = vld1_s16(input);
+
+ v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d0s16),
+ vreinterpret_s32_s16(d2s16));
+ v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d1s16),
+ vreinterpret_s32_s16(d3s16));
+ v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), // d0
+ vreinterpret_s16_s32(v2tmp3.val[0])); // d1
+ v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), // d2
+ vreinterpret_s16_s32(v2tmp3.val[1])); // d3
+
+ d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
+ d5s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
+ d6s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
+ d7s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
+
+ d4s16 = vshl_n_s16(d4s16, 3);
+ d5s16 = vshl_n_s16(d5s16, 3);
+ d6s16 = vshl_n_s16(d6s16, 3);
+ d7s16 = vshl_n_s16(d7s16, 3);
+
+ d0s16 = vadd_s16(d4s16, d5s16);
+ d2s16 = vsub_s16(d4s16, d5s16);
+
+ q9s32 = vmlal_s16(q9s32, d7s16, d16s16);
+ q10s32 = vmlal_s16(q10s32, d7s16, d17s16);
+ q9s32 = vmlal_s16(q9s32, d6s16, d17s16);
+ q10s32 = vmlsl_s16(q10s32, d6s16, d16s16);
+
+ d1s16 = vshrn_n_s32(q9s32, 12);
+ d3s16 = vshrn_n_s32(q10s32, 12);
+
+ // Part two
+ v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d0s16),
+ vreinterpret_s32_s16(d2s16));
+ v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d1s16),
+ vreinterpret_s32_s16(d3s16));
+ v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), // d0
+ vreinterpret_s16_s32(v2tmp3.val[0])); // d1
+ v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), // d2
+ vreinterpret_s16_s32(v2tmp3.val[1])); // d3
+
+ d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
+ d5s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
+ d6s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
+ d7s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
+
+ d26s16 = vdup_n_s16(7);
+ d4s16 = vadd_s16(d4s16, d26s16);
+
+ d0s16 = vadd_s16(d4s16, d5s16);
+ d2s16 = vsub_s16(d4s16, d5s16);
+
+ q11s32 = vmlal_s16(q11s32, d7s16, d16s16);
+ q12s32 = vmlal_s16(q12s32, d7s16, d17s16);
+
+ dEmptys16 = vdup_n_s16(0);
+ d4u16 = vceq_s16(d7s16, dEmptys16);
+
+ d0s16 = vshr_n_s16(d0s16, 4);
+ d2s16 = vshr_n_s16(d2s16, 4);
+
+ q11s32 = vmlal_s16(q11s32, d6s16, d17s16);
+ q12s32 = vmlsl_s16(q12s32, d6s16, d16s16);
+
+ d4u16 = vmvn_u16(d4u16);
+ d1s16 = vshrn_n_s32(q11s32, 16);
+ d1s16 = vsub_s16(d1s16, vreinterpret_s16_u16(d4u16));
+ d3s16 = vshrn_n_s32(q12s32, 16);
+
+ q0s16 = vcombine_s16(d0s16, d1s16);
+ q1s16 = vcombine_s16(d2s16, d3s16);
+
+ vst1q_s16(output, q0s16);
+ vst1q_s16(output + 8, q1s16);
+ return;
+}
+
+void vp8_short_fdct8x4_neon(
+ int16_t *input,
+ int16_t *output,
+ int pitch) {
+ int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16;
+ int16x4_t d16s16, d17s16, d26s16, d27s16, d28s16, d29s16;
+ uint16x4_t d28u16, d29u16;
+ uint16x8_t q14u16;
+ int16x8_t q0s16, q1s16, q2s16, q3s16;
+ int16x8_t q11s16, q12s16, q13s16, q14s16, q15s16, qEmptys16;
+ int32x4_t q9s32, q10s32, q11s32, q12s32;
+ int16x8x2_t v2tmp0, v2tmp1;
+ int32x4x2_t v2tmp2, v2tmp3;
+
+ d16s16 = vdup_n_s16(5352);
+ d17s16 = vdup_n_s16(2217);
+ q9s32 = vdupq_n_s32(14500);
+ q10s32 = vdupq_n_s32(7500);
+
+ // Part one
+ pitch >>= 1;
+ q0s16 = vld1q_s16(input);
+ input += pitch;
+ q1s16 = vld1q_s16(input);
+ input += pitch;
+ q2s16 = vld1q_s16(input);
+ input += pitch;
+ q3s16 = vld1q_s16(input);
+
+ v2tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q0s16),
+ vreinterpretq_s32_s16(q2s16));
+ v2tmp3 = vtrnq_s32(vreinterpretq_s32_s16(q1s16),
+ vreinterpretq_s32_s16(q3s16));
+ v2tmp0 = vtrnq_s16(vreinterpretq_s16_s32(v2tmp2.val[0]), // q0
+ vreinterpretq_s16_s32(v2tmp3.val[0])); // q1
+ v2tmp1 = vtrnq_s16(vreinterpretq_s16_s32(v2tmp2.val[1]), // q2
+ vreinterpretq_s16_s32(v2tmp3.val[1])); // q3
+
+ q11s16 = vaddq_s16(v2tmp0.val[0], v2tmp1.val[1]);
+ q12s16 = vaddq_s16(v2tmp0.val[1], v2tmp1.val[0]);
+ q13s16 = vsubq_s16(v2tmp0.val[1], v2tmp1.val[0]);
+ q14s16 = vsubq_s16(v2tmp0.val[0], v2tmp1.val[1]);
+
+ q11s16 = vshlq_n_s16(q11s16, 3);
+ q12s16 = vshlq_n_s16(q12s16, 3);
+ q13s16 = vshlq_n_s16(q13s16, 3);
+ q14s16 = vshlq_n_s16(q14s16, 3);
+
+ q0s16 = vaddq_s16(q11s16, q12s16);
+ q2s16 = vsubq_s16(q11s16, q12s16);
+
+ q11s32 = q9s32;
+ q12s32 = q10s32;
+
+ d26s16 = vget_low_s16(q13s16);
+ d27s16 = vget_high_s16(q13s16);
+ d28s16 = vget_low_s16(q14s16);
+ d29s16 = vget_high_s16(q14s16);
+
+ q9s32 = vmlal_s16(q9s32, d28s16, d16s16);
+ q10s32 = vmlal_s16(q10s32, d28s16, d17s16);
+ q11s32 = vmlal_s16(q11s32, d29s16, d16s16);
+ q12s32 = vmlal_s16(q12s32, d29s16, d17s16);
+
+ q9s32 = vmlal_s16(q9s32, d26s16, d17s16);
+ q10s32 = vmlsl_s16(q10s32, d26s16, d16s16);
+ q11s32 = vmlal_s16(q11s32, d27s16, d17s16);
+ q12s32 = vmlsl_s16(q12s32, d27s16, d16s16);
+
+ d2s16 = vshrn_n_s32(q9s32, 12);
+ d6s16 = vshrn_n_s32(q10s32, 12);
+ d3s16 = vshrn_n_s32(q11s32, 12);
+ d7s16 = vshrn_n_s32(q12s32, 12);
+ q1s16 = vcombine_s16(d2s16, d3s16);
+ q3s16 = vcombine_s16(d6s16, d7s16);
+
+ // Part two
+ q9s32 = vdupq_n_s32(12000);
+ q10s32 = vdupq_n_s32(51000);
+
+ v2tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q0s16),
+ vreinterpretq_s32_s16(q2s16));
+ v2tmp3 = vtrnq_s32(vreinterpretq_s32_s16(q1s16),
+ vreinterpretq_s32_s16(q3s16));
+ v2tmp0 = vtrnq_s16(vreinterpretq_s16_s32(v2tmp2.val[0]), // q0
+ vreinterpretq_s16_s32(v2tmp3.val[0])); // q1
+ v2tmp1 = vtrnq_s16(vreinterpretq_s16_s32(v2tmp2.val[1]), // q2
+ vreinterpretq_s16_s32(v2tmp3.val[1])); // q3
+
+ q11s16 = vaddq_s16(v2tmp0.val[0], v2tmp1.val[1]);
+ q12s16 = vaddq_s16(v2tmp0.val[1], v2tmp1.val[0]);
+ q13s16 = vsubq_s16(v2tmp0.val[1], v2tmp1.val[0]);
+ q14s16 = vsubq_s16(v2tmp0.val[0], v2tmp1.val[1]);
+
+ q15s16 = vdupq_n_s16(7);
+ q11s16 = vaddq_s16(q11s16, q15s16);
+ q0s16 = vaddq_s16(q11s16, q12s16);
+ q1s16 = vsubq_s16(q11s16, q12s16);
+
+ q11s32 = q9s32;
+ q12s32 = q10s32;
+
+ d0s16 = vget_low_s16(q0s16);
+ d1s16 = vget_high_s16(q0s16);
+ d2s16 = vget_low_s16(q1s16);
+ d3s16 = vget_high_s16(q1s16);
+
+ d0s16 = vshr_n_s16(d0s16, 4);
+ d4s16 = vshr_n_s16(d1s16, 4);
+ d2s16 = vshr_n_s16(d2s16, 4);
+ d6s16 = vshr_n_s16(d3s16, 4);
+
+ d26s16 = vget_low_s16(q13s16);
+ d27s16 = vget_high_s16(q13s16);
+ d28s16 = vget_low_s16(q14s16);
+ d29s16 = vget_high_s16(q14s16);
+
+ q9s32 = vmlal_s16(q9s32, d28s16, d16s16);
+ q10s32 = vmlal_s16(q10s32, d28s16, d17s16);
+ q11s32 = vmlal_s16(q11s32, d29s16, d16s16);
+ q12s32 = vmlal_s16(q12s32, d29s16, d17s16);
+
+ q9s32 = vmlal_s16(q9s32, d26s16, d17s16);
+ q10s32 = vmlsl_s16(q10s32, d26s16, d16s16);
+ q11s32 = vmlal_s16(q11s32, d27s16, d17s16);
+ q12s32 = vmlsl_s16(q12s32, d27s16, d16s16);
+
+ d1s16 = vshrn_n_s32(q9s32, 16);
+ d3s16 = vshrn_n_s32(q10s32, 16);
+ d5s16 = vshrn_n_s32(q11s32, 16);
+ d7s16 = vshrn_n_s32(q12s32, 16);
+
+ qEmptys16 = vdupq_n_s16(0);
+ q14u16 = vceqq_s16(q14s16, qEmptys16);
+ q14u16 = vmvnq_u16(q14u16);
+
+ d28u16 = vget_low_u16(q14u16);
+ d29u16 = vget_high_u16(q14u16);
+ d1s16 = vsub_s16(d1s16, vreinterpret_s16_u16(d28u16));
+ d5s16 = vsub_s16(d5s16, vreinterpret_s16_u16(d29u16));
+
+ q0s16 = vcombine_s16(d0s16, d1s16);
+ q1s16 = vcombine_s16(d2s16, d3s16);
+ q2s16 = vcombine_s16(d4s16, d5s16);
+ q3s16 = vcombine_s16(d6s16, d7s16);
+
+ vst1q_s16(output, q0s16);
+ vst1q_s16(output + 8, q1s16);
+ vst1q_s16(output + 16, q2s16);
+ vst1q_s16(output + 24, q3s16);
+ return;
+}
diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm
deleted file mode 100644
index 840cb33..0000000
--- a/vp8/encoder/arm/neon/subtract_neon.asm
+++ /dev/null
@@ -1,205 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
- EXPORT |vp8_subtract_b_neon|
- EXPORT |vp8_subtract_mby_neon|
- EXPORT |vp8_subtract_mbuv_neon|
-
- INCLUDE vp8_asm_enc_offsets.asm
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-;void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch)
-|vp8_subtract_b_neon| PROC
-
- stmfd sp!, {r4-r7}
-
- ldr r3, [r0, #vp8_block_base_src]
- ldr r4, [r0, #vp8_block_src]
- ldr r5, [r0, #vp8_block_src_diff]
- ldr r3, [r3]
- ldr r6, [r0, #vp8_block_src_stride]
- add r3, r3, r4 ; src = *base_src + src
- ldr r7, [r1, #vp8_blockd_predictor]
-
- vld1.8 {d0}, [r3], r6 ;load src
- vld1.8 {d1}, [r7], r2 ;load pred
- vld1.8 {d2}, [r3], r6
- vld1.8 {d3}, [r7], r2
- vld1.8 {d4}, [r3], r6
- vld1.8 {d5}, [r7], r2
- vld1.8 {d6}, [r3], r6
- vld1.8 {d7}, [r7], r2
-
- vsubl.u8 q10, d0, d1
- vsubl.u8 q11, d2, d3
- vsubl.u8 q12, d4, d5
- vsubl.u8 q13, d6, d7
-
- mov r2, r2, lsl #1
-
- vst1.16 {d20}, [r5], r2 ;store diff
- vst1.16 {d22}, [r5], r2
- vst1.16 {d24}, [r5], r2
- vst1.16 {d26}, [r5], r2
-
- ldmfd sp!, {r4-r7}
- bx lr
-
- ENDP
-
-
-;==========================================
-;void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride
-; unsigned char *pred, int pred_stride)
-|vp8_subtract_mby_neon| PROC
- push {r4-r7}
- vpush {d8-d15}
-
- mov r12, #4
- ldr r4, [sp, #80] ; pred_stride
- mov r6, #32 ; "diff" stride x2
- add r5, r0, #16 ; second diff pointer
-
-subtract_mby_loop
- vld1.8 {q0}, [r1], r2 ;load src
- vld1.8 {q1}, [r3], r4 ;load pred
- vld1.8 {q2}, [r1], r2
- vld1.8 {q3}, [r3], r4
- vld1.8 {q4}, [r1], r2
- vld1.8 {q5}, [r3], r4
- vld1.8 {q6}, [r1], r2
- vld1.8 {q7}, [r3], r4
-
- vsubl.u8 q8, d0, d2
- vsubl.u8 q9, d1, d3
- vsubl.u8 q10, d4, d6
- vsubl.u8 q11, d5, d7
- vsubl.u8 q12, d8, d10
- vsubl.u8 q13, d9, d11
- vsubl.u8 q14, d12, d14
- vsubl.u8 q15, d13, d15
-
- vst1.16 {q8}, [r0], r6 ;store diff
- vst1.16 {q9}, [r5], r6
- vst1.16 {q10}, [r0], r6
- vst1.16 {q11}, [r5], r6
- vst1.16 {q12}, [r0], r6
- vst1.16 {q13}, [r5], r6
- vst1.16 {q14}, [r0], r6
- vst1.16 {q15}, [r5], r6
-
- subs r12, r12, #1
- bne subtract_mby_loop
-
- vpop {d8-d15}
- pop {r4-r7}
- bx lr
- ENDP
-
-;=================================
-;void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
-; int src_stride, unsigned char *upred,
-; unsigned char *vpred, int pred_stride)
-
-|vp8_subtract_mbuv_neon| PROC
- push {r4-r7}
- vpush {d8-d15}
-
- ldr r4, [sp, #80] ; upred
- ldr r5, [sp, #84] ; vpred
- ldr r6, [sp, #88] ; pred_stride
- add r0, r0, #512 ; short *udiff = diff + 256;
- mov r12, #32 ; "diff" stride x2
- add r7, r0, #16 ; second diff pointer
-
-;u
- vld1.8 {d0}, [r1], r3 ;load usrc
- vld1.8 {d1}, [r4], r6 ;load upred
- vld1.8 {d2}, [r1], r3
- vld1.8 {d3}, [r4], r6
- vld1.8 {d4}, [r1], r3
- vld1.8 {d5}, [r4], r6
- vld1.8 {d6}, [r1], r3
- vld1.8 {d7}, [r4], r6
- vld1.8 {d8}, [r1], r3
- vld1.8 {d9}, [r4], r6
- vld1.8 {d10}, [r1], r3
- vld1.8 {d11}, [r4], r6
- vld1.8 {d12}, [r1], r3
- vld1.8 {d13}, [r4], r6
- vld1.8 {d14}, [r1], r3
- vld1.8 {d15}, [r4], r6
-
- vsubl.u8 q8, d0, d1
- vsubl.u8 q9, d2, d3
- vsubl.u8 q10, d4, d5
- vsubl.u8 q11, d6, d7
- vsubl.u8 q12, d8, d9
- vsubl.u8 q13, d10, d11
- vsubl.u8 q14, d12, d13
- vsubl.u8 q15, d14, d15
-
- vst1.16 {q8}, [r0], r12 ;store diff
- vst1.16 {q9}, [r7], r12
- vst1.16 {q10}, [r0], r12
- vst1.16 {q11}, [r7], r12
- vst1.16 {q12}, [r0], r12
- vst1.16 {q13}, [r7], r12
- vst1.16 {q14}, [r0], r12
- vst1.16 {q15}, [r7], r12
-
-;v
- vld1.8 {d0}, [r2], r3 ;load vsrc
- vld1.8 {d1}, [r5], r6 ;load vpred
- vld1.8 {d2}, [r2], r3
- vld1.8 {d3}, [r5], r6
- vld1.8 {d4}, [r2], r3
- vld1.8 {d5}, [r5], r6
- vld1.8 {d6}, [r2], r3
- vld1.8 {d7}, [r5], r6
- vld1.8 {d8}, [r2], r3
- vld1.8 {d9}, [r5], r6
- vld1.8 {d10}, [r2], r3
- vld1.8 {d11}, [r5], r6
- vld1.8 {d12}, [r2], r3
- vld1.8 {d13}, [r5], r6
- vld1.8 {d14}, [r2], r3
- vld1.8 {d15}, [r5], r6
-
- vsubl.u8 q8, d0, d1
- vsubl.u8 q9, d2, d3
- vsubl.u8 q10, d4, d5
- vsubl.u8 q11, d6, d7
- vsubl.u8 q12, d8, d9
- vsubl.u8 q13, d10, d11
- vsubl.u8 q14, d12, d13
- vsubl.u8 q15, d14, d15
-
- vst1.16 {q8}, [r0], r12 ;store diff
- vst1.16 {q9}, [r7], r12
- vst1.16 {q10}, [r0], r12
- vst1.16 {q11}, [r7], r12
- vst1.16 {q12}, [r0], r12
- vst1.16 {q13}, [r7], r12
- vst1.16 {q14}, [r0], r12
- vst1.16 {q15}, [r7], r12
-
- vpop {d8-d15}
- pop {r4-r7}
- bx lr
-
- ENDP
-
- END
diff --git a/vp8/encoder/arm/neon/subtract_neon.c b/vp8/encoder/arm/neon/subtract_neon.c
new file mode 100644
index 0000000..d3ab7b1
--- /dev/null
+++ b/vp8/encoder/arm/neon/subtract_neon.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "vp8/encoder/block.h"
+
+void vp8_subtract_b_neon(
+ BLOCK *be,
+ BLOCKD *bd,
+ int pitch) {
+ unsigned char *src_ptr, *predictor;
+ int src_stride;
+ int16_t *src_diff;
+ uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
+ uint16x8_t q10u16, q11u16, q12u16, q13u16;
+
+ src_ptr = *be->base_src + be->src;
+ src_stride = be->src_stride;
+ predictor = bd->predictor;
+
+ d0u8 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d2u8 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d4u8 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d6u8 = vld1_u8(src_ptr);
+
+ d1u8 = vld1_u8(predictor);
+ predictor += pitch;
+ d3u8 = vld1_u8(predictor);
+ predictor += pitch;
+ d5u8 = vld1_u8(predictor);
+ predictor += pitch;
+ d7u8 = vld1_u8(predictor);
+
+ q10u16 = vsubl_u8(d0u8, d1u8);
+ q11u16 = vsubl_u8(d2u8, d3u8);
+ q12u16 = vsubl_u8(d4u8, d5u8);
+ q13u16 = vsubl_u8(d6u8, d7u8);
+
+ src_diff = be->src_diff;
+ vst1_u16((uint16_t *)src_diff, vget_low_u16(q10u16));
+ src_diff += pitch;
+ vst1_u16((uint16_t *)src_diff, vget_low_u16(q11u16));
+ src_diff += pitch;
+ vst1_u16((uint16_t *)src_diff, vget_low_u16(q12u16));
+ src_diff += pitch;
+ vst1_u16((uint16_t *)src_diff, vget_low_u16(q13u16));
+ return;
+}
+
+void vp8_subtract_mby_neon(
+ int16_t *diff,
+ unsigned char *src,
+ int src_stride,
+ unsigned char *pred,
+ int pred_stride) {
+ int i;
+ uint8x16_t q0u8, q1u8, q2u8, q3u8;
+ uint16x8_t q8u16, q9u16, q10u16, q11u16;
+
+ for (i = 0; i < 8; i++) { // subtract_mby_loop
+ q0u8 = vld1q_u8(src);
+ src += src_stride;
+ q2u8 = vld1q_u8(src);
+ src += src_stride;
+ q1u8 = vld1q_u8(pred);
+ pred += pred_stride;
+ q3u8 = vld1q_u8(pred);
+ pred += pred_stride;
+
+ q8u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q1u8));
+ q9u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q1u8));
+ q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q3u8));
+ q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q3u8));
+
+ vst1q_u16((uint16_t *)diff, q8u16);
+ diff += 8;
+ vst1q_u16((uint16_t *)diff, q9u16);
+ diff += 8;
+ vst1q_u16((uint16_t *)diff, q10u16);
+ diff += 8;
+ vst1q_u16((uint16_t *)diff, q11u16);
+ diff += 8;
+ }
+ return;
+}
+
+void vp8_subtract_mbuv_neon(
+ int16_t *diff,
+ unsigned char *usrc,
+ unsigned char *vsrc,
+ int src_stride,
+ unsigned char *upred,
+ unsigned char *vpred,
+ int pred_stride) {
+ int i, j;
+ unsigned char *src_ptr, *pred_ptr;
+ uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
+ uint16x8_t q8u16, q9u16, q10u16, q11u16;
+
+ diff += 256;
+ for (i = 0; i < 2; i++) {
+ if (i == 0) {
+ src_ptr = usrc;
+ pred_ptr = upred;
+ } else if (i == 1) {
+ src_ptr = vsrc;
+ pred_ptr = vpred;
+ }
+
+ for (j = 0; j < 2; j++) {
+ d0u8 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d1u8 = vld1_u8(pred_ptr);
+ pred_ptr += pred_stride;
+ d2u8 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d3u8 = vld1_u8(pred_ptr);
+ pred_ptr += pred_stride;
+ d4u8 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d5u8 = vld1_u8(pred_ptr);
+ pred_ptr += pred_stride;
+ d6u8 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d7u8 = vld1_u8(pred_ptr);
+ pred_ptr += pred_stride;
+
+ q8u16 = vsubl_u8(d0u8, d1u8);
+ q9u16 = vsubl_u8(d2u8, d3u8);
+ q10u16 = vsubl_u8(d4u8, d5u8);
+ q11u16 = vsubl_u8(d6u8, d7u8);
+
+ vst1q_u16((uint16_t *)diff, q8u16);
+ diff += 8;
+ vst1q_u16((uint16_t *)diff, q9u16);
+ diff += 8;
+ vst1q_u16((uint16_t *)diff, q10u16);
+ diff += 8;
+ vst1q_u16((uint16_t *)diff, q11u16);
+ diff += 8;
+ }
+ }
+ return;
+}
diff --git a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
deleted file mode 100644
index 22266297..0000000
--- a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
+++ /dev/null
@@ -1,103 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_short_walsh4x4_neon|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_short_walsh4x4_neon(short *input, short *output, int pitch)
-; r0 short *input,
-; r1 short *output,
-; r2 int pitch
-|vp8_short_walsh4x4_neon| PROC
-
- vld1.16 {d0}, [r0@64], r2 ; load input
- vld1.16 {d1}, [r0@64], r2
- vld1.16 {d2}, [r0@64], r2
- vld1.16 {d3}, [r0@64]
-
- ;First for-loop
- ;transpose d0, d1, d2, d3. Then, d0=ip[0], d1=ip[1], d2=ip[2], d3=ip[3]
- vtrn.32 d0, d2
- vtrn.32 d1, d3
-
- vmov.s32 q15, #3 ; add 3 to all values
-
- vtrn.16 d0, d1
- vtrn.16 d2, d3
-
- vadd.s16 d4, d0, d2 ; ip[0] + ip[2]
- vadd.s16 d5, d1, d3 ; ip[1] + ip[3]
- vsub.s16 d6, d1, d3 ; ip[1] - ip[3]
- vsub.s16 d7, d0, d2 ; ip[0] - ip[2]
-
- vshl.s16 d4, d4, #2 ; a1 = (ip[0] + ip[2]) << 2
- vshl.s16 d5, d5, #2 ; d1 = (ip[1] + ip[3]) << 2
- vshl.s16 d6, d6, #2 ; c1 = (ip[1] - ip[3]) << 2
- vceq.s16 d16, d4, #0 ; a1 == 0
- vshl.s16 d7, d7, #2 ; b1 = (ip[0] - ip[2]) << 2
-
- vadd.s16 d0, d4, d5 ; a1 + d1
- vmvn d16, d16 ; a1 != 0
- vsub.s16 d3, d4, d5 ; op[3] = a1 - d1
- vadd.s16 d1, d7, d6 ; op[1] = b1 + c1
- vsub.s16 d2, d7, d6 ; op[2] = b1 - c1
- vsub.s16 d0, d0, d16 ; op[0] = a1 + d1 + (a1 != 0)
-
- ;Second for-loop
- ;transpose d0, d1, d2, d3, Then, d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12]
- vtrn.32 d1, d3
- vtrn.32 d0, d2
- vtrn.16 d2, d3
- vtrn.16 d0, d1
-
- vaddl.s16 q8, d0, d2 ; a1 = ip[0]+ip[8]
- vaddl.s16 q9, d1, d3 ; d1 = ip[4]+ip[12]
- vsubl.s16 q10, d1, d3 ; c1 = ip[4]-ip[12]
- vsubl.s16 q11, d0, d2 ; b1 = ip[0]-ip[8]
-
- vadd.s32 q0, q8, q9 ; a2 = a1 + d1
- vadd.s32 q1, q11, q10 ; b2 = b1 + c1
- vsub.s32 q2, q11, q10 ; c2 = b1 - c1
- vsub.s32 q3, q8, q9 ; d2 = a1 - d1
-
- vclt.s32 q8, q0, #0
- vclt.s32 q9, q1, #0
- vclt.s32 q10, q2, #0
- vclt.s32 q11, q3, #0
-
- ; subtract -1 (or 0)
- vsub.s32 q0, q0, q8 ; a2 += a2 < 0
- vsub.s32 q1, q1, q9 ; b2 += b2 < 0
- vsub.s32 q2, q2, q10 ; c2 += c2 < 0
- vsub.s32 q3, q3, q11 ; d2 += d2 < 0
-
- vadd.s32 q8, q0, q15 ; a2 + 3
- vadd.s32 q9, q1, q15 ; b2 + 3
- vadd.s32 q10, q2, q15 ; c2 + 3
- vadd.s32 q11, q3, q15 ; d2 + 3
-
- ; vrshrn? would add 1 << 3-1 = 2
- vshrn.s32 d0, q8, #3
- vshrn.s32 d1, q9, #3
- vshrn.s32 d2, q10, #3
- vshrn.s32 d3, q11, #3
-
- vst1.16 {q0, q1}, [r1@128]
-
- bx lr
-
- ENDP
-
- END
diff --git a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c
new file mode 100644
index 0000000..d6b67f8
--- /dev/null
+++ b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_short_walsh4x4_neon(
+ int16_t *input,
+ int16_t *output,
+ int pitch) {
+ uint16x4_t d16u16;
+ int16x8_t q0s16, q1s16;
+ int16x4_t dEmptys16, d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16;
+ int32x4_t qEmptys32, q0s32, q1s32, q2s32, q3s32, q8s32;
+ int32x4_t q9s32, q10s32, q11s32, q15s32;
+ uint32x4_t q8u32, q9u32, q10u32, q11u32;
+ int16x4x2_t v2tmp0, v2tmp1;
+ int32x2x2_t v2tmp2, v2tmp3;
+
+ dEmptys16 = vdup_n_s16(0);
+ qEmptys32 = vdupq_n_s32(0);
+ q15s32 = vdupq_n_s32(3);
+
+ d0s16 = vld1_s16(input);
+ input += pitch/2;
+ d1s16 = vld1_s16(input);
+ input += pitch/2;
+ d2s16 = vld1_s16(input);
+ input += pitch/2;
+ d3s16 = vld1_s16(input);
+
+ v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d0s16),
+ vreinterpret_s32_s16(d2s16));
+ v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d1s16),
+ vreinterpret_s32_s16(d3s16));
+ v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), // d0
+ vreinterpret_s16_s32(v2tmp3.val[0])); // d1
+ v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), // d2
+ vreinterpret_s16_s32(v2tmp3.val[1])); // d3
+
+ d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[0]);
+ d5s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[1]);
+ d6s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[1]);
+ d7s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[0]);
+
+ d4s16 = vshl_n_s16(d4s16, 2);
+ d5s16 = vshl_n_s16(d5s16, 2);
+ d6s16 = vshl_n_s16(d6s16, 2);
+ d7s16 = vshl_n_s16(d7s16, 2);
+
+ d16u16 = vceq_s16(d4s16, dEmptys16);
+ d16u16 = vmvn_u16(d16u16);
+
+ d0s16 = vadd_s16(d4s16, d5s16);
+ d3s16 = vsub_s16(d4s16, d5s16);
+ d1s16 = vadd_s16(d7s16, d6s16);
+ d2s16 = vsub_s16(d7s16, d6s16);
+
+ d0s16 = vsub_s16(d0s16, vreinterpret_s16_u16(d16u16));
+
+ // Second for-loop
+ v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d1s16),
+ vreinterpret_s32_s16(d3s16));
+ v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d0s16),
+ vreinterpret_s32_s16(d2s16));
+ v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp3.val[1]), // d2
+ vreinterpret_s16_s32(v2tmp2.val[1])); // d3
+ v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp3.val[0]), // d0
+ vreinterpret_s16_s32(v2tmp2.val[0])); // d1
+
+ q8s32 = vaddl_s16(v2tmp1.val[0], v2tmp0.val[0]);
+ q9s32 = vaddl_s16(v2tmp1.val[1], v2tmp0.val[1]);
+ q10s32 = vsubl_s16(v2tmp1.val[1], v2tmp0.val[1]);
+ q11s32 = vsubl_s16(v2tmp1.val[0], v2tmp0.val[0]);
+
+ q0s32 = vaddq_s32(q8s32, q9s32);
+ q1s32 = vaddq_s32(q11s32, q10s32);
+ q2s32 = vsubq_s32(q11s32, q10s32);
+ q3s32 = vsubq_s32(q8s32, q9s32);
+
+ q8u32 = vcltq_s32(q0s32, qEmptys32);
+ q9u32 = vcltq_s32(q1s32, qEmptys32);
+ q10u32 = vcltq_s32(q2s32, qEmptys32);
+ q11u32 = vcltq_s32(q3s32, qEmptys32);
+
+ q8s32 = vreinterpretq_s32_u32(q8u32);
+ q9s32 = vreinterpretq_s32_u32(q9u32);
+ q10s32 = vreinterpretq_s32_u32(q10u32);
+ q11s32 = vreinterpretq_s32_u32(q11u32);
+
+ q0s32 = vsubq_s32(q0s32, q8s32);
+ q1s32 = vsubq_s32(q1s32, q9s32);
+ q2s32 = vsubq_s32(q2s32, q10s32);
+ q3s32 = vsubq_s32(q3s32, q11s32);
+
+ q8s32 = vaddq_s32(q0s32, q15s32);
+ q9s32 = vaddq_s32(q1s32, q15s32);
+ q10s32 = vaddq_s32(q2s32, q15s32);
+ q11s32 = vaddq_s32(q3s32, q15s32);
+
+ d0s16 = vshrn_n_s32(q8s32, 3);
+ d1s16 = vshrn_n_s32(q9s32, 3);
+ d2s16 = vshrn_n_s32(q10s32, 3);
+ d3s16 = vshrn_n_s32(q11s32, 3);
+
+ q0s16 = vcombine_s16(d0s16, d1s16);
+ q1s16 = vcombine_s16(d2s16, d3s16);
+
+ vst1q_s16(output, q0s16);
+ vst1q_s16(output + 8, q1s16);
+ return;
+}
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index 4621d13..2da0d8c 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -430,6 +430,7 @@
vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg[i]);
}
vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg);
+ vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_last_source);
vpx_free(denoiser->denoise_state);
}
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index d29dd39..38b8999 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3303,15 +3303,17 @@
int skip = 2;
// Only select blocks for computing nmse that have been encoded
// as ZERO LAST min_consec_zero_last frames in a row.
- int min_consec_zero_last = 10;
+ // Scale with number of temporal layers.
+ int min_consec_zero_last = 8 / cpi->oxcf.number_of_layers;
// Decision is tested for changing the denoising mode every
// num_mode_change times this function is called. Note that this
// function called every 8 frames, so (8 * num_mode_change) is number
// of frames where denoising mode change is tested for switch.
int num_mode_change = 15;
// Framerate factor, to compensate for larger mse at lower framerates.
- // TODO(marpan): Adjust this factor,
- int fac_framerate = cpi->output_framerate < 25.0f ? 80 : 100;
+ // Use ref_framerate, which is full source framerate for temporal layers.
+ // TODO(marpan): Adjust this factor.
+ int fac_framerate = cpi->ref_framerate < 25.0f ? 80 : 100;
int tot_num_blocks = cm->mb_rows * cm->mb_cols;
int ystride = cpi->Source->y_stride;
unsigned char *src = cpi->Source->y_buffer;
@@ -3380,13 +3382,13 @@
// num_mode_change.
if (cpi->denoiser.nmse_source_diff_count == num_mode_change) {
// Check for going up: from normal to aggressive mode.
- if ((cpi->denoiser.denoiser_mode = kDenoiserOnYUV) &&
+ if ((cpi->denoiser.denoiser_mode == kDenoiserOnYUV) &&
(cpi->denoiser.nmse_source_diff >
cpi->denoiser.threshold_aggressive_mode)) {
vp8_denoiser_set_parameters(&cpi->denoiser, kDenoiserOnYUVAggressive);
} else {
// Check for going down: from aggressive to normal mode.
- if ((cpi->denoiser.denoiser_mode = kDenoiserOnYUVAggressive) &&
+ if ((cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive) &&
(cpi->denoiser.nmse_source_diff <
cpi->denoiser.threshold_aggressive_mode)) {
vp8_denoiser_set_parameters(&cpi->denoiser, kDenoiserOnYUV);
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 0c522bd..1654cd8 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -39,40 +39,28 @@
};
-struct extraconfig_map
-{
- int usage;
- struct vp8_extracfg cfg;
-};
-
-static const struct extraconfig_map extracfg_map[] =
-{
- {
- 0,
- {
- NULL,
+static struct vp8_extracfg default_extracfg = {
+ NULL,
#if !(CONFIG_REALTIME_ONLY)
- 0, /* cpu_used */
+ 0, /* cpu_used */
#else
- 4, /* cpu_used */
+ 4, /* cpu_used */
#endif
- 0, /* enable_auto_alt_ref */
- 0, /* noise_sensitivity */
- 0, /* Sharpness */
- 0, /* static_thresh */
+ 0, /* enable_auto_alt_ref */
+ 0, /* noise_sensitivity */
+ 0, /* Sharpness */
+ 0, /* static_thresh */
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
- VP8_EIGHT_TOKENPARTITION,
+ VP8_EIGHT_TOKENPARTITION,
#else
- VP8_ONE_TOKENPARTITION, /* token_partitions */
+ VP8_ONE_TOKENPARTITION, /* token_partitions */
#endif
- 0, /* arnr_max_frames */
- 3, /* arnr_strength */
- 3, /* arnr_type*/
- 0, /* tuning*/
- 10, /* cq_level */
- 0, /* rc_max_intra_bitrate_pct */
- }
- }
+ 0, /* arnr_max_frames */
+ 3, /* arnr_strength */
+ 3, /* arnr_type*/
+ 0, /* tuning*/
+ 10, /* cq_level */
+ 0, /* rc_max_intra_bitrate_pct */
};
struct vpx_codec_alg_priv
@@ -632,9 +620,6 @@
{
vpx_codec_err_t res = VPX_CODEC_OK;
struct vpx_codec_alg_priv *priv;
- vpx_codec_enc_cfg_t *cfg;
- unsigned int i;
-
struct VP8_COMP *optr;
vp8_rtcd();
@@ -650,7 +635,6 @@
ctx->priv = &priv->base;
ctx->priv->sz = sizeof(*ctx->priv);
- ctx->priv->iface = ctx->iface;
ctx->priv->alg_priv = priv;
ctx->priv->init_flags = ctx->init_flags;
@@ -663,17 +647,8 @@
ctx->config.enc = &ctx->priv->alg_priv->cfg;
}
- cfg = &ctx->priv->alg_priv->cfg;
- /* Select the extra vp8 configuration table based on the current
- * usage value. If the current usage value isn't found, use the
- * values for usage case 0.
- */
- for (i = 0;
- extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage;
- i++);
-
- priv->vp8_cfg = extracfg_map[i].cfg;
+ priv->vp8_cfg = default_extracfg;
priv->vp8_cfg.pkt_list = &priv->pkt_list.head;
priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2;
@@ -1347,10 +1322,10 @@
vp8e_destroy, /* vpx_codec_destroy_fn_t destroy; */
vp8e_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */
{
- NOT_IMPLEMENTED, /* vpx_codec_peek_si_fn_t peek_si; */
- NOT_IMPLEMENTED, /* vpx_codec_get_si_fn_t get_si; */
- NOT_IMPLEMENTED, /* vpx_codec_decode_fn_t decode; */
- NOT_IMPLEMENTED, /* vpx_codec_frame_get_fn_t frame_get; */
+ NULL, /* vpx_codec_peek_si_fn_t peek_si; */
+ NULL, /* vpx_codec_get_si_fn_t get_si; */
+ NULL, /* vpx_codec_decode_fn_t decode; */
+ NULL, /* vpx_codec_frame_get_fn_t frame_get; */
},
{
1, /* 1 cfg map */
@@ -1358,7 +1333,7 @@
vp8e_encode, /* vpx_codec_encode_fn_t encode; */
vp8e_get_cxdata, /* vpx_codec_get_cx_data_fn_t frame_get; */
vp8e_set_config,
- NOT_IMPLEMENTED,
+ NULL,
vp8e_get_preview,
vp8e_mr_alloc_mem,
} /* encoder functions */
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index c76ac14..05f32ba 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -84,7 +84,6 @@
(vpx_codec_priv_t *)vpx_memalign(8, sizeof(vpx_codec_alg_priv_t));
vpx_memset(ctx->priv, 0, sizeof(vpx_codec_alg_priv_t));
ctx->priv->sz = sizeof(*ctx->priv);
- ctx->priv->iface = ctx->iface;
ctx->priv->alg_priv = (vpx_codec_alg_priv_t *)ctx->priv;
ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si);
ctx->priv->alg_priv->decrypt_cb = NULL;
@@ -815,15 +814,15 @@
vp8_get_si, /* vpx_codec_get_si_fn_t get_si; */
vp8_decode, /* vpx_codec_decode_fn_t decode; */
vp8_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */
- NOT_IMPLEMENTED,
+ NULL,
},
{ /* encoder functions */
0,
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED,
- NOT_IMPLEMENTED
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
}
};
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index 5733048..eb94202 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -37,10 +37,10 @@
# encoder
VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/fastquantizeb_neon$(ASM)
VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/picklpf_arm.c
-VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/shortfdct_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/subtract_neon$(ASM)
VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_mse16x16_neon$(ASM)
VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_memcpy_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM)
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/denoising_neon.c
+VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_shortwalsh4x4_neon.c
+VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/subtract_neon.c
+VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/shortfdct_neon.c
diff --git a/vp9/common/vp9_mv.h b/vp9/common/vp9_mv.h
index 3eb7f9d..5d89da8 100644
--- a/vp9/common/vp9_mv.h
+++ b/vp9/common/vp9_mv.h
@@ -34,6 +34,14 @@
int32_t col;
} MV32;
+static INLINE int is_zero_mv(const MV *mv) {
+ return *((const uint32_t *)mv) == 0;
+}
+
+static INLINE int is_equal_mv(const MV *a, const MV *b) {
+ return *((const uint32_t *)a) == *((const uint32_t *)b);
+}
+
static INLINE void clamp_mv(MV *mv, int min_col, int max_col,
int min_row, int max_row) {
mv->col = clamp(mv->col, min_col, max_col);
diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c
index abda4e6..e4e6ce7 100644
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -366,6 +366,9 @@
unsigned int width, unsigned int height, int pitch) {
unsigned int i, j;
+ // TODO(jbb): why does simd code use both but c doesn't, normalize and
+ // fix..
+ (void) bothclamp;
for (i = 0; i < height; i++) {
uint8_t *pos = start + i * pitch;
char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 708f41b..92f9318 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -305,15 +305,15 @@
$vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon;
add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/;
+specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2 avx2/;
$vp9_convolve8_neon_asm=vp9_convolve8_neon;
add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/;
+specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2 avx2/;
$vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon;
add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/;
+specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2 avx2/;
$vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon;
add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
index d109e13..3bc7d39 100644
--- a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
+++ b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
@@ -307,7 +307,7 @@
__m256i addFilterReg64;
__m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5;
__m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10;
- __m256i srcReg32b11, srcReg32b12, srcReg32b13, filtersReg32;
+ __m256i srcReg32b11, srcReg32b12, filtersReg32;
__m256i firstFilters, secondFilters, thirdFilters, forthFilters;
unsigned int i;
unsigned int src_stride, dst_stride;
@@ -409,35 +409,35 @@
// multiply 2 adjacent elements with the filter and add the result
srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters);
srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters);
- srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters);
- srcReg32b8 = _mm256_maddubs_epi16(srcReg32b7, forthFilters);
// add and saturate the results together
srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6);
- srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b8);
-
// multiply 2 adjacent elements with the filter and add the result
srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters);
- srcReg32b6 = _mm256_maddubs_epi16(srcReg32b3, secondFilters);
-
- // multiply 2 adjacent elements with the filter and add the result
srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters);
- srcReg32b13 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters);
-
// add and saturate the results together
srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
_mm256_min_epi16(srcReg32b8, srcReg32b12));
- srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
- _mm256_min_epi16(srcReg32b6, srcReg32b13));
-
- // add and saturate the results together
srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
_mm256_max_epi16(srcReg32b8, srcReg32b12));
- srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
- _mm256_max_epi16(srcReg32b6, srcReg32b13));
+ // multiply 2 adjacent elements with the filter and add the result
+ srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters);
+ srcReg32b6 = _mm256_maddubs_epi16(srcReg32b7, forthFilters);
+
+ srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b6);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcReg32b8 = _mm256_maddubs_epi16(srcReg32b3, secondFilters);
+ srcReg32b12 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters);
+
+ // add and saturate the results together
+ srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
+ _mm256_min_epi16(srcReg32b8, srcReg32b12));
+ srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
+ _mm256_max_epi16(srcReg32b8, srcReg32b12));
srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg64);
srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg64);
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index a0fff45..572ab0e 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -627,11 +627,12 @@
"Width and height beyond allowed size.");
#endif
if (cm->width != width || cm->height != height) {
- const int new_rows = ALIGN_POWER_OF_TWO(height,
- MI_SIZE_LOG2) >> MI_SIZE_LOG2;
- const int new_cols = ALIGN_POWER_OF_TWO(width,
- MI_SIZE_LOG2) >> MI_SIZE_LOG2;
- if (calc_mi_size(new_rows) * calc_mi_size(new_cols) > cm->mi_alloc_size) {
+ const int new_mi_rows =
+ calc_mi_size(ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2) >> MI_SIZE_LOG2);
+ const int new_mi_cols =
+ calc_mi_size(ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2) >> MI_SIZE_LOG2);
+ if (new_mi_cols > cm->mi_stride ||
+ (new_mi_rows * new_mi_cols > cm->mi_alloc_size)) {
if (vp9_alloc_context_buffers(cm, width, height))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate context buffers");
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 1a41558..e79dcf3 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -314,3 +314,67 @@
vp9_clear_system_state();
return ret;
}
+
+vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data,
+ size_t data_sz,
+ uint32_t sizes[8], int *count,
+ vpx_decrypt_cb decrypt_cb,
+ void *decrypt_state) {
+ // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
+ // it is a super frame index. If the last byte of real video compression
+ // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
+ // not the associated matching marker byte at the front of the index we have
+ // an invalid bitstream and need to return an error.
+
+ uint8_t marker;
+
+ assert(data_sz);
+ marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1);
+ *count = 0;
+
+ if ((marker & 0xe0) == 0xc0) {
+ const uint32_t frames = (marker & 0x7) + 1;
+ const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+ const size_t index_sz = 2 + mag * frames;
+
+ // This chunk is marked as having a superframe index but doesn't have
+ // enough data for it, thus it's an invalid superframe index.
+ if (data_sz < index_sz)
+ return VPX_CODEC_CORRUPT_FRAME;
+
+ {
+ const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state,
+ data + data_sz - index_sz);
+
+ // This chunk is marked as having a superframe index but doesn't have
+ // the matching marker byte at the front of the index therefore it's an
+ // invalid chunk.
+ if (marker != marker2)
+ return VPX_CODEC_CORRUPT_FRAME;
+ }
+
+ {
+ // Found a valid superframe index.
+ uint32_t i, j;
+ const uint8_t *x = &data[data_sz - index_sz + 1];
+
+ // Frames has a maximum of 8 and mag has a maximum of 4.
+ uint8_t clear_buffer[32];
+ assert(sizeof(clear_buffer) >= frames * mag);
+ if (decrypt_cb) {
+ decrypt_cb(decrypt_state, x, clear_buffer, frames * mag);
+ x = clear_buffer;
+ }
+
+ for (i = 0; i < frames; ++i) {
+ uint32_t this_sz = 0;
+
+ for (j = 0; j < mag; ++j)
+ this_sz |= (*x++) << (j * 8);
+ sizes[i] = this_sz;
+ }
+ *count = frames;
+ }
+ }
+ return VPX_CODEC_OK;
+}
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 223b66f..848d212 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -78,6 +78,25 @@
void vp9_decoder_remove(struct VP9Decoder *pbi);
+static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb,
+ void *decrypt_state,
+ const uint8_t *data) {
+ if (decrypt_cb) {
+ uint8_t marker;
+ decrypt_cb(decrypt_state, data, &marker, 1);
+ return marker;
+ }
+ return *data;
+}
+
+// This function is exposed for use in tests, as well as the inlined function
+// "read_marker".
+vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data,
+ size_t data_sz,
+ uint32_t sizes[8], int *count,
+ vpx_decrypt_cb decrypt_cb,
+ void *decrypt_state);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/arm/neon/vp9_quantize_neon.c b/vp9/encoder/arm/neon/vp9_quantize_neon.c
index 2d5ec79..8c13d0d 100644
--- a/vp9/encoder/arm/neon/vp9_quantize_neon.c
+++ b/vp9/encoder/arm/neon/vp9_quantize_neon.c
@@ -28,7 +28,6 @@
int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
- int i;
// TODO(jingning) Decide the need of these arguments after the
// quantization process is completed.
(void)zbin_ptr;
@@ -39,7 +38,7 @@
if (!skip_block) {
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
-
+ int i;
const int16x8_t v_zero = vdupq_n_s16(0);
const int16x8_t v_one = vdupq_n_s16(1);
int16x8_t v_eobmax_76543210 = vdupq_n_s16(-1);
@@ -50,13 +49,12 @@
v_round = vsetq_lane_s16(round_ptr[0], v_round, 0);
v_quant = vsetq_lane_s16(quant_ptr[0], v_quant, 0);
v_dequant = vsetq_lane_s16(dequant_ptr[0], v_dequant, 0);
-
- for (i = 0; i < count; i += 8) {
- const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
- const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]);
+ // process dc and the first seven ac coeffs
+ {
+ const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
+ const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]);
const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
- const int16x8_t v_abs_coeff = vabsq_s16(v_coeff);
- const int16x8_t v_tmp = vqaddq_s16(v_abs_coeff, v_round);
+ const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp),
vget_low_s16(v_quant));
const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp),
@@ -65,20 +63,39 @@
vshrn_n_s32(v_tmp_hi, 16));
const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
- const int16x8_t v_nz_iscan =
- vandq_s16(vmvnq_s16(vreinterpretq_s16_u16(v_nz_mask)), v_iscan_plus1);
+ const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
-
v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
-
- vst1q_s16(&qcoeff_ptr[i], v_qcoeff);
- vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff);
+ vst1q_s16(&qcoeff_ptr[0], v_qcoeff);
+ vst1q_s16(&dqcoeff_ptr[0], v_dqcoeff);
v_round = vmovq_n_s16(round_ptr[1]);
v_quant = vmovq_n_s16(quant_ptr[1]);
v_dequant = vmovq_n_s16(dequant_ptr[1]);
}
+ // now process the rest of the ac coeffs
+ for (i = 8; i < count; i += 8) {
+ const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
+ const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]);
+ const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
+ const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+ const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp),
+ vget_low_s16(v_quant));
+ const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp),
+ vget_high_s16(v_quant));
+ const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16),
+ vshrn_n_s32(v_tmp_hi, 16));
+ const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
+ const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
+ const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
+ const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
+ const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
+ const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
+ v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
+ vst1q_s16(&qcoeff_ptr[i], v_qcoeff);
+ vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff);
+ }
{
const int16x4_t v_eobmax_3210 =
vmax_s16(vget_low_s16(v_eobmax_76543210),
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index bdb1338..5c30446 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -294,6 +294,7 @@
vp9_write_token(w, vp9_switchable_interp_tree,
cm->fc.switchable_interp_prob[ctx],
&switchable_interp_encodings[mbmi->interp_filter]);
+ ++cpi->interp_filter_selected[0][mbmi->interp_filter];
} else {
assert(mbmi->interp_filter == cm->interp_filter);
}
@@ -1083,7 +1084,16 @@
write_bitdepth_colorspace_sampling(cm, wb);
write_frame_size(cm, wb);
} else {
- if (!cm->show_frame)
+ // In spatial svc if it's not error_resilient_mode then we need to code all
+ // visible frames as invisible. But we need to keep the show_frame flag so
+ // that the publisher could know whether it is supposed to be visible.
+ // So we will code the show_frame flag as it is. Then code the intra_only
+ // bit here. This will make the bitstream incompatible. In the player we
+ // will change to show_frame flag to 0, then add an one byte frame with
+ // show_existing_frame flag which tells the decoder which frame we want to
+ // show.
+ if (!cm->show_frame ||
+ (is_spatial_svc(cpi) && cm->error_resilient_mode == 0))
vp9_wb_write_bit(wb, cm->intra_only);
if (!cm->error_resilient_mode)
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index 90ea9cc..793a9da 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -370,8 +370,8 @@
ctx->newmv_sse = UINT_MAX;
}
-void vp9_denoiser_update_frame_stats(VP9_DENOISER *denoiser, MB_MODE_INFO *mbmi,
- unsigned int sse, PREDICTION_MODE mode,
+void vp9_denoiser_update_frame_stats(MB_MODE_INFO *mbmi, unsigned int sse,
+ PREDICTION_MODE mode,
PICK_MODE_CONTEXT *ctx) {
// TODO(tkopp): Use both MVs if possible
if (mbmi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) {
diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h
index d93846f..8a91492 100644
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h
@@ -42,7 +42,7 @@
void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx);
-void vp9_denoiser_update_frame_stats(VP9_DENOISER *denoiser, MB_MODE_INFO *mbmi,
+void vp9_denoiser_update_frame_stats(MB_MODE_INFO *mbmi,
unsigned int sse, PREDICTION_MODE mode,
PICK_MODE_CONTEXT *ctx);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 711354b..950a6c8 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1232,30 +1232,23 @@
}
}
-static int is_background(VP9_COMP *cpi, const TileInfo *const tile,
+static int is_background(const VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, int mi_col) {
- MACROBLOCK *x = &cpi->mb;
- uint8_t *src, *pre;
- int src_stride, pre_stride;
-
+ // This assumes the input source frames are of the same dimension.
const int row8x8_remaining = tile->mi_row_end - mi_row;
const int col8x8_remaining = tile->mi_col_end - mi_col;
-
+ const int x = mi_col * MI_SIZE;
+ const int y = mi_row * MI_SIZE;
+ const int src_stride = cpi->Source->y_stride;
+ const uint8_t *const src = &cpi->Source->y_buffer[y * src_stride + x];
+ const int pre_stride = cpi->Last_Source->y_stride;
+ const uint8_t *const pre = &cpi->Last_Source->y_buffer[y * pre_stride + x];
int this_sad = 0;
int threshold = 0;
- // This assumes the input source frames are of the same dimension.
- src_stride = cpi->Source->y_stride;
- src = cpi->Source->y_buffer + (mi_row * MI_SIZE) * src_stride +
- (mi_col * MI_SIZE);
- pre_stride = cpi->Last_Source->y_stride;
- pre = cpi->Last_Source->y_buffer + (mi_row * MI_SIZE) * pre_stride +
- (mi_col * MI_SIZE);
-
if (row8x8_remaining >= MI_BLOCK_SIZE &&
col8x8_remaining >= MI_BLOCK_SIZE) {
- this_sad = cpi->fn_ptr[BLOCK_64X64].sdf(src, src_stride,
- pre, pre_stride);
+ this_sad = cpi->fn_ptr[BLOCK_64X64].sdf(src, src_stride, pre, pre_stride);
threshold = (1 << 12);
} else {
int r, c;
@@ -1266,8 +1259,7 @@
threshold = (row8x8_remaining * col8x8_remaining) << 6;
}
- x->in_static_area = (this_sad < 2 * threshold);
- return x->in_static_area;
+ return this_sad < 2 * threshold;
}
static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8,
@@ -3119,7 +3111,7 @@
break;
case REFERENCE_PARTITION:
if (sf->partition_check ||
- !is_background(cpi, tile, mi_row, mi_col)) {
+ !(x->in_static_area = is_background(cpi, tile, mi_row, mi_col))) {
set_modeinfo_offsets(cm, xd, mi_row, mi_col);
auto_partition_range(cpi, tile, mi_row, mi_col,
&sf->min_partition_size,
@@ -3297,7 +3289,6 @@
vp9_zero(cm->counts);
vp9_zero(cpi->coef_counts);
- vp9_zero(cpi->tx_stepdown_count);
vp9_zero(rd_opt->comp_pred_diff);
vp9_zero(rd_opt->filter_diff);
vp9_zero(rd_opt->tx_select_diff);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 7a9ff5b..8464882 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -131,8 +131,10 @@
if (!is_spatial_svc(cpi))
cpi->refresh_golden_frame = 1;
cpi->refresh_alt_ref_frame = 1;
+ vp9_zero(cpi->interp_filter_selected);
} else {
cm->fc = cm->frame_contexts[cm->frame_context_idx];
+ vp9_zero(cpi->interp_filter_selected[0]);
}
}
@@ -559,10 +561,8 @@
// Temporal scalability.
cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
- if ((cpi->svc.number_temporal_layers > 1 &&
- cpi->oxcf.rc_mode == VPX_CBR) ||
- (cpi->svc.number_spatial_layers > 1 &&
- cpi->oxcf.mode == TWO_PASS_SECOND_BEST)) {
+ if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
+ (cpi->svc.number_spatial_layers > 1 && cpi->oxcf.pass == 2)) {
vp9_init_layer_context(cpi);
}
@@ -577,6 +577,20 @@
set_tile_limits(cpi);
}
+static void set_rc_buffer_sizes(RATE_CONTROL *rc,
+ const VP9EncoderConfig *oxcf) {
+ const int64_t bandwidth = oxcf->target_bandwidth;
+ const int64_t starting = oxcf->starting_buffer_level_ms;
+ const int64_t optimal = oxcf->optimal_buffer_level_ms;
+ const int64_t maximum = oxcf->maximum_buffer_size_ms;
+
+ rc->starting_buffer_level = starting * bandwidth / 1000;
+ rc->optimal_buffer_level = (optimal == 0) ? bandwidth / 8
+ : optimal * bandwidth / 1000;
+ rc->maximum_buffer_size = (maximum == 0) ? bandwidth / 8
+ : maximum * bandwidth / 1000;
+}
+
void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
@@ -610,28 +624,8 @@
}
cpi->encode_breakout = cpi->oxcf.encode_breakout;
- // local file playback mode == really big buffer
- if (cpi->oxcf.rc_mode == VPX_VBR) {
- cpi->oxcf.starting_buffer_level_ms = 60000;
- cpi->oxcf.optimal_buffer_level_ms = 60000;
- cpi->oxcf.maximum_buffer_size_ms = 240000;
- }
+ set_rc_buffer_sizes(rc, &cpi->oxcf);
- rc->starting_buffer_level = cpi->oxcf.starting_buffer_level_ms *
- cpi->oxcf.target_bandwidth / 1000;
-
- // Set or reset optimal and maximum buffer levels.
- if (cpi->oxcf.optimal_buffer_level_ms == 0)
- rc->optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
- else
- rc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level_ms *
- cpi->oxcf.target_bandwidth / 1000;
-
- if (cpi->oxcf.maximum_buffer_size_ms == 0)
- rc->maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
- else
- rc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size_ms *
- cpi->oxcf.target_bandwidth / 1000;
// Under a configuration change, where maximum_buffer_size may change,
// keep buffer level clipped to the maximum allowed buffer size.
rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size);
@@ -757,11 +751,6 @@
vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
cm->current_video_frame = 0;
-
- cpi->gold_is_last = 0;
- cpi->alt_is_last = 0;
- cpi->gold_is_alt = 0;
-
cpi->skippable_frame = 0;
// Create the encoder segmentation map and set all entries to 0
@@ -904,8 +893,6 @@
kf_list = fopen("kf_list.stt", "w");
#endif
- cpi->output_pkt_list = oxcf->output_pkt_list;
-
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
if (oxcf->pass == 1) {
@@ -1571,17 +1558,32 @@
ref_cnt_fb(cm->frame_bufs,
&cm->ref_frame_map[arf_idx], cm->new_fb_idx);
+ vpx_memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
}
if (cpi->refresh_golden_frame) {
ref_cnt_fb(cm->frame_bufs,
&cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
+ if (!cpi->rc.is_src_frame_alt_ref)
+ vpx_memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+ else
+ vpx_memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
+ cpi->interp_filter_selected[ALTREF_FRAME],
+ sizeof(cpi->interp_filter_selected[ALTREF_FRAME]));
}
}
if (cpi->refresh_last_frame) {
ref_cnt_fb(cm->frame_bufs,
&cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
+ if (!cpi->rc.is_src_frame_alt_ref)
+ vpx_memcpy(cpi->interp_filter_selected[LAST_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
}
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0) {
@@ -1959,36 +1961,27 @@
} while (loop);
}
-static void get_ref_frame_flags(VP9_COMP *cpi) {
- if (cpi->refresh_last_frame & cpi->refresh_golden_frame)
- cpi->gold_is_last = 1;
- else if (cpi->refresh_last_frame ^ cpi->refresh_golden_frame)
- cpi->gold_is_last = 0;
+static int get_ref_frame_flags(const VP9_COMP *cpi) {
+ const int *const map = cpi->common.ref_frame_map;
+ const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
+ const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
+ const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
+ int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
- if (cpi->refresh_last_frame & cpi->refresh_alt_ref_frame)
- cpi->alt_is_last = 1;
- else if (cpi->refresh_last_frame ^ cpi->refresh_alt_ref_frame)
- cpi->alt_is_last = 0;
-
- if (cpi->refresh_alt_ref_frame & cpi->refresh_golden_frame)
- cpi->gold_is_alt = 1;
- else if (cpi->refresh_alt_ref_frame ^ cpi->refresh_golden_frame)
- cpi->gold_is_alt = 0;
-
- cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
-
- if (cpi->gold_is_last)
- cpi->ref_frame_flags &= ~VP9_GOLD_FLAG;
+ if (gold_is_last)
+ flags &= ~VP9_GOLD_FLAG;
if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
!is_spatial_svc(cpi))
- cpi->ref_frame_flags &= ~VP9_GOLD_FLAG;
+ flags &= ~VP9_GOLD_FLAG;
- if (cpi->alt_is_last)
- cpi->ref_frame_flags &= ~VP9_ALT_FLAG;
+ if (alt_is_last)
+ flags &= ~VP9_ALT_FLAG;
- if (cpi->gold_is_alt)
- cpi->ref_frame_flags &= ~VP9_ALT_FLAG;
+ if (gold_is_alt)
+ flags &= ~VP9_ALT_FLAG;
+
+ return flags;
}
static void set_ext_overrides(VP9_COMP *cpi) {
@@ -2057,19 +2050,69 @@
cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias;
}
+static void set_mv_search_params(VP9_COMP *cpi) {
+ const VP9_COMMON *const cm = &cpi->common;
+ const unsigned int max_mv_def = MIN(cm->width, cm->height);
+
+ // Default based on max resolution.
+ cpi->mv_step_param = vp9_init_search_range(max_mv_def);
+
+ if (cpi->sf.mv.auto_mv_step_size) {
+ if (frame_is_intra_only(cm)) {
+ // Initialize max_mv_magnitude for use in the first INTER frame
+ // after a key/intra-only frame.
+ cpi->max_mv_magnitude = max_mv_def;
+ } else {
+ if (cm->show_frame)
+ // Allow mv_steps to correspond to twice the max mv magnitude found
+ // in the previous frame, capped by the default max_mv_magnitude based
+ // on resolution.
+ cpi->mv_step_param =
+ vp9_init_search_range(MIN(max_mv_def, 2 * cpi->max_mv_magnitude));
+ cpi->max_mv_magnitude = 0;
+ }
+ }
+}
+
+
+int setup_interp_filter_search_mask(VP9_COMP *cpi) {
+ INTERP_FILTER ifilter;
+ int ref_total[MAX_REF_FRAMES] = {0};
+ MV_REFERENCE_FRAME ref;
+ int mask = 0;
+ if (cpi->common.last_frame_type == KEY_FRAME ||
+ cpi->refresh_alt_ref_frame)
+ return mask;
+ for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
+ for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter)
+ ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
+
+ for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) {
+ if ((ref_total[LAST_FRAME] &&
+ cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) &&
+ (ref_total[GOLDEN_FRAME] == 0 ||
+ cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50
+ < ref_total[GOLDEN_FRAME]) &&
+ (ref_total[ALTREF_FRAME] == 0 ||
+ cpi->interp_filter_selected[ALTREF_FRAME][ifilter] * 50
+ < ref_total[ALTREF_FRAME]))
+ mask |= 1 << ifilter;
+ }
+ return mask;
+}
+
static void encode_frame_to_data_rate(VP9_COMP *cpi,
size_t *size,
uint8_t *dest,
unsigned int *frame_flags) {
VP9_COMMON *const cm = &cpi->common;
+ const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ struct segmentation *const seg = &cm->seg;
TX_SIZE t;
int q;
int top_index;
int bottom_index;
- const SPEED_FEATURES *const sf = &cpi->sf;
- const unsigned int max_mv_def = MIN(cm->width, cm->height);
- struct segmentation *const seg = &cm->seg;
set_ext_overrides(cpi);
cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
@@ -2095,24 +2138,13 @@
// Set default state for segment based loop filter update flags.
cm->lf.mode_ref_delta_update = 0;
- // Initialize cpi->mv_step_param to default based on max resolution.
- cpi->mv_step_param = vp9_init_search_range(max_mv_def);
- // Initialize cpi->max_mv_magnitude and cpi->mv_step_param if appropriate.
- if (sf->mv.auto_mv_step_size) {
- if (frame_is_intra_only(cm)) {
- // Initialize max_mv_magnitude for use in the first INTER frame
- // after a key/intra-only frame.
- cpi->max_mv_magnitude = max_mv_def;
- } else {
- if (cm->show_frame)
- // Allow mv_steps to correspond to twice the max mv magnitude found
- // in the previous frame, capped by the default max_mv_magnitude based
- // on resolution.
- cpi->mv_step_param = vp9_init_search_range(MIN(max_mv_def, 2 *
- cpi->max_mv_magnitude));
- cpi->max_mv_magnitude = 0;
- }
- }
+ set_mv_search_params(cpi);
+
+ if (cpi->oxcf.pass == 2 &&
+ cpi->sf.adaptive_interp_filter_search)
+ cpi->sf.interp_filter_search_mask =
+ setup_interp_filter_search_mask(cpi);
+
// Set various flags etc to special state if it is a key frame.
if (frame_is_intra_only(cm)) {
@@ -2128,9 +2160,7 @@
// The alternate reference frame cannot be active for a key frame.
cpi->rc.source_alt_ref_active = 0;
- cm->error_resilient_mode = (cpi->oxcf.error_resilient_mode != 0);
- cm->frame_parallel_decoding_mode =
- (cpi->oxcf.frame_parallel_decoding_mode != 0);
+ cm->error_resilient_mode = oxcf->error_resilient_mode;
// By default, encoder assumes decoder can use prev_mi.
if (cm->error_resilient_mode) {
@@ -2138,29 +2168,43 @@
cm->reset_frame_context = 0;
cm->refresh_frame_context = 0;
} else if (cm->intra_only) {
+ cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
// Only reset the current context.
cm->reset_frame_context = 2;
}
}
+ if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0) {
+ cm->frame_context_idx = cpi->svc.spatial_layer_id;
+
+ // The probs will be updated based on the frame type of its previous
+ // frame if frame_parallel_decoding_mode is 0. The type may vary for
+ // the frame after a key frame in base layer since we may drop enhancement
+ // layers. So set frame_parallel_decoding_mode to 1 in this case.
+ if (cpi->svc.spatial_layer_id == 0 &&
+ cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)
+ cm->frame_parallel_decoding_mode = 1;
+ else
+ cm->frame_parallel_decoding_mode = 0;
+ }
// Configure experimental use of segmentation for enhanced coding of
// static regions if indicated.
// Only allowed in second pass of two pass (as requires lagged coding)
// and if the relevant speed feature flag is set.
- if (cpi->oxcf.pass == 2 && cpi->sf.static_segmentation)
+ if (oxcf->pass == 2 && cpi->sf.static_segmentation)
configure_static_seg_features(cpi);
// Check if the current frame is skippable for the partition search in the
// second pass according to the first pass stats
- if (cpi->oxcf.pass == 2 &&
+ if (oxcf->pass == 2 &&
(!cpi->use_svc || is_spatial_svc(cpi))) {
configure_skippable_frame(cpi);
}
// For 1 pass CBR, check if we are dropping this frame.
// Never drop on key frame.
- if (cpi->oxcf.pass == 0 &&
- cpi->oxcf.rc_mode == VPX_CBR &&
+ if (oxcf->pass == 0 &&
+ oxcf->rc_mode == VPX_CBR &&
cm->frame_type != KEY_FRAME) {
if (vp9_rc_drop_frame(cpi)) {
vp9_rc_postencode_update_drop_frame(cpi);
@@ -2172,9 +2216,9 @@
vp9_clear_system_state();
#if CONFIG_VP9_POSTPROC
- if (cpi->oxcf.noise_sensitivity > 0) {
+ if (oxcf->noise_sensitivity > 0) {
int l = 0;
- switch (cpi->oxcf.noise_sensitivity) {
+ switch (oxcf->noise_sensitivity) {
case 1:
l = 20;
break;
@@ -2215,7 +2259,7 @@
#if CONFIG_VP9_TEMPORAL_DENOISING
#ifdef OUTPUT_YUV_DENOISED
- if (cpi->oxcf.noise_sensitivity > 0) {
+ if (oxcf->noise_sensitivity > 0) {
vp9_write_yuv_frame_420(&cpi->denoiser.running_avg_y[INTRA_FRAME],
yuv_denoised_file);
}
@@ -2290,7 +2334,7 @@
else
cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
- get_ref_frame_flags(cpi);
+ cpi->ref_frame_flags = get_ref_frame_flags(cpi);
cm->last_frame_type = cm->frame_type;
vp9_rc_postencode_update(cpi, *size);
@@ -2317,8 +2361,12 @@
cm->last_height = cm->height;
// reset to normal state now that we are done.
- if (!cm->show_existing_frame)
- cm->last_show_frame = cm->show_frame;
+ if (!cm->show_existing_frame) {
+ if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0)
+ cm->last_show_frame = 0;
+ else
+ cm->last_show_frame = cm->show_frame;
+ }
if (cm->show_frame) {
vp9_swap_mi_and_prev_mi(cm);
@@ -2329,6 +2377,10 @@
if (cpi->use_svc)
vp9_inc_frame_in_layer(&cpi->svc);
}
+
+ if (is_spatial_svc(cpi))
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type =
+ cm->frame_type;
}
static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
@@ -2443,8 +2495,8 @@
cm->seg.update_data;
}
-void adjust_frame_rate(VP9_COMP *cpi) {
- const struct lookahead_entry *const source = cpi->source;
+void adjust_frame_rate(VP9_COMP *cpi,
+ const struct lookahead_entry *source) {
int64_t this_duration;
int step = 0;
@@ -2500,7 +2552,8 @@
return arf_src_index;
}
-static void check_src_altref(VP9_COMP *cpi) {
+static void check_src_altref(VP9_COMP *cpi,
+ const struct lookahead_entry *source) {
RATE_CONTROL *const rc = &cpi->rc;
if (cpi->oxcf.pass == 2) {
@@ -2509,7 +2562,7 @@
(gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
} else {
rc->is_src_frame_alt_ref = cpi->alt_ref_source &&
- (cpi->source == cpi->alt_ref_source);
+ (source == cpi->alt_ref_source);
}
if (rc->is_src_frame_alt_ref) {
@@ -2531,6 +2584,8 @@
RATE_CONTROL *const rc = &cpi->rc;
struct vpx_usec_timer cmptimer;
YV12_BUFFER_CONFIG *force_src_buffer = NULL;
+ struct lookahead_entry *last_source = NULL;
+ struct lookahead_entry *source = NULL;
MV_REFERENCE_FRAME ref_frame;
int arf_src_index;
@@ -2543,9 +2598,6 @@
vpx_usec_timer_start(&cmptimer);
- cpi->source = NULL;
- cpi->last_source = NULL;
-
vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
// Normal defaults
@@ -2562,13 +2614,12 @@
#if CONFIG_SPATIAL_SVC
if (is_spatial_svc(cpi))
- cpi->source = vp9_svc_lookahead_peek(cpi, cpi->lookahead,
- arf_src_index, 0);
+ source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, arf_src_index, 0);
else
#endif
- cpi->source = vp9_lookahead_peek(cpi->lookahead, arf_src_index);
- if (cpi->source != NULL) {
- cpi->alt_ref_source = cpi->source;
+ source = vp9_lookahead_peek(cpi->lookahead, arf_src_index);
+ if (source != NULL) {
+ cpi->alt_ref_source = source;
#if CONFIG_SPATIAL_SVC
if (is_spatial_svc(cpi) && cpi->svc.spatial_layer_id > 0) {
@@ -2602,46 +2653,44 @@
}
}
- if (!cpi->source) {
+ if (!source) {
// Get last frame source.
if (cm->current_video_frame > 0) {
#if CONFIG_SPATIAL_SVC
if (is_spatial_svc(cpi))
- cpi->last_source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, -1, 0);
+ last_source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, -1, 0);
else
#endif
- cpi->last_source = vp9_lookahead_peek(cpi->lookahead, -1);
- if (cpi->last_source == NULL)
+ last_source = vp9_lookahead_peek(cpi->lookahead, -1);
+ if (last_source == NULL)
return -1;
}
// Read in the source frame.
#if CONFIG_SPATIAL_SVC
if (is_spatial_svc(cpi))
- cpi->source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
+ source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
else
#endif
- cpi->source = vp9_lookahead_pop(cpi->lookahead, flush);
- if (cpi->source != NULL) {
+ source = vp9_lookahead_pop(cpi->lookahead, flush);
+ if (source != NULL) {
cm->show_frame = 1;
cm->intra_only = 0;
// Check to see if the frame should be encoded as an arf overlay.
- check_src_altref(cpi);
+ check_src_altref(cpi, source);
}
}
- if (cpi->source) {
+ if (source) {
cpi->un_scaled_source = cpi->Source = force_src_buffer ? force_src_buffer
- : &cpi->source->img;
+ : &source->img;
- cpi->unscaled_last_source = cpi->last_source != NULL ?
- &cpi->last_source->img : NULL;
+ cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
- *time_stamp = cpi->source->ts_start;
- *time_end = cpi->source->ts_end;
- *frame_flags =
- (cpi->source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
+ *time_stamp = source->ts_start;
+ *time_end = source->ts_end;
+ *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
} else {
*size = 0;
@@ -2652,9 +2701,9 @@
return -1;
}
- if (cpi->source->ts_start < cpi->first_time_stamp_ever) {
- cpi->first_time_stamp_ever = cpi->source->ts_start;
- cpi->last_end_time_stamp_seen = cpi->source->ts_start;
+ if (source->ts_start < cpi->first_time_stamp_ever) {
+ cpi->first_time_stamp_ever = source->ts_start;
+ cpi->last_end_time_stamp_seen = source->ts_start;
}
// Clear down mmx registers
@@ -2662,7 +2711,7 @@
// adjust frame rates based on timestamps given
if (cm->show_frame) {
- adjust_frame_rate(cpi);
+ adjust_frame_rate(cpi, source);
}
if (cpi->svc.number_temporal_layers > 1 &&
@@ -2734,7 +2783,7 @@
const int lossless = is_lossless_requested(oxcf);
cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4;
cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
- vp9_first_pass(cpi);
+ vp9_first_pass(cpi, source);
} else if (oxcf->pass == 2 &&
(!cpi->use_svc || is_spatial_svc(cpi))) {
Pass2Encode(cpi, size, dest, frame_flags);
@@ -2812,12 +2861,12 @@
cpi->totalp_sq_error += psnr2.sse[0];
cpi->totalp_samples += psnr2.samples[0];
- frame_ssim2 = vp9_calc_ssim(orig, recon, 1, &weight);
+ frame_ssim2 = vp9_calc_ssim(orig, recon, &weight);
cpi->summed_quality += frame_ssim2 * weight;
cpi->summed_weights += weight;
- frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, 1, &weight);
+ frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, &weight);
cpi->summedp_quality += frame_ssim2 * weight;
cpi->summedp_weights += weight;
@@ -2833,6 +2882,7 @@
}
}
+
if (cpi->b_calculate_ssimg) {
double y, u, v, frame_all;
frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v);
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index e6506a6..2dba67c 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -82,36 +82,19 @@
} VPX_SCALING;
typedef enum {
- // Good Quality Fast Encoding. The encoder balances quality with the
- // amount of time it takes to encode the output. (speed setting
- // controls how fast)
- ONE_PASS_GOOD = 1,
+ // Good Quality Fast Encoding. The encoder balances quality with the amount of
+ // time it takes to encode the output. Speed setting controls how fast.
+ GOOD,
- // One Pass - Best Quality. The encoder places priority on the
- // quality of the output over encoding speed. The output is compressed
- // at the highest possible quality. This option takes the longest
- // amount of time to encode. (speed setting ignored)
- ONE_PASS_BEST = 2,
+ // The encoder places priority on the quality of the output over encoding
+ // speed. The output is compressed at the highest possible quality. This
+ // option takes the longest amount of time to encode. Speed setting ignored.
+ BEST,
- // Two Pass - First Pass. The encoder generates a file of statistics
- // for use in the second encoding pass. (speed setting controls how fast)
- TWO_PASS_FIRST = 3,
-
- // Two Pass - Second Pass. The encoder uses the statistics that were
- // generated in the first encoding pass to create the compressed
- // output. (speed setting controls how fast)
- TWO_PASS_SECOND_GOOD = 4,
-
- // Two Pass - Second Pass Best. The encoder uses the statistics that
- // were generated in the first encoding pass to create the compressed
- // output using the highest possible quality, and taking a
- // longer amount of time to encode. (speed setting ignored)
- TWO_PASS_SECOND_BEST = 5,
-
- // Realtime/Live Encoding. This mode is optimized for realtime
- // encoding (for example, capturing a television signal or feed from
- // a live camera). (speed setting controls how fast)
- REALTIME = 6,
+ // Realtime/Live Encoding. This mode is optimized for realtime encoding (for
+ // example, capturing a television signal or feed from a live camera). Speed
+ // setting controls how fast.
+ REALTIME
} MODE;
typedef enum {
@@ -225,11 +208,11 @@
int tile_columns;
int tile_rows;
- struct vpx_fixed_buf two_pass_stats_in;
- struct vpx_codec_pkt_list *output_pkt_list;
+ vpx_fixed_buf_t two_pass_stats_in;
+ struct vpx_codec_pkt_list *output_pkt_list;
#if CONFIG_FP_MB_STATS
- struct vpx_fixed_buf firstpass_mb_stats_in;
+ vpx_fixed_buf_t firstpass_mb_stats_in;
#endif
vp8e_tuning tuning;
@@ -240,19 +223,13 @@
return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
}
-static INLINE int is_best_mode(MODE mode) {
- return mode == ONE_PASS_BEST || mode == TWO_PASS_SECOND_BEST;
-}
-
typedef struct VP9_COMP {
QUANTS quants;
MACROBLOCK mb;
VP9_COMMON common;
VP9EncoderConfig oxcf;
struct lookahead_ctx *lookahead;
- struct lookahead_entry *source;
struct lookahead_entry *alt_ref_source;
- struct lookahead_entry *last_source;
YV12_BUFFER_CONFIG *Source;
YV12_BUFFER_CONFIG *Last_Source; // NULL for first frame and alt_ref frames
@@ -261,10 +238,6 @@
YV12_BUFFER_CONFIG *unscaled_last_source;
YV12_BUFFER_CONFIG scaled_last_source;
- int gold_is_last; // gold same as last frame ( short circuit gold searches)
- int alt_is_last; // Alt same as last ( short circuit altref search)
- int gold_is_alt; // don't do both alt and gold search ( just do gold).
-
int skippable_frame;
int scaled_ref_idx[3];
@@ -312,6 +285,7 @@
double framerate;
vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
+ int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE];
struct vpx_codec_pkt_list *output_pkt_list;
@@ -399,8 +373,6 @@
int dummy_packing; /* flag to indicate if packing is dummy */
- unsigned int tx_stepdown_count[TX_SIZES];
-
int initial_width;
int initial_height;
@@ -500,14 +472,6 @@
.buf;
}
-// Intra only frames, golden frames (except alt ref overlays) and
-// alt ref frames tend to be coded at a higher than ambient quality
-static INLINE int frame_is_boosted(const VP9_COMP *cpi) {
- return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
- (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) ||
- vp9_is_upper_layer_key_frame(cpi);
-}
-
static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
// TODO(JBB): double check we can't exceed this token count if we have a
// 32x32 transform crossing a boundary at a multiple of 16.
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 295e437..7867dc2 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -396,7 +396,7 @@
cpi->rc.frames_to_key = INT_MAX;
}
-void vp9_first_pass(VP9_COMP *cpi) {
+void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
int mb_row, mb_col;
MACROBLOCK *const x = &cpi->mb;
VP9_COMMON *const cm = &cpi->common;
@@ -428,10 +428,12 @@
int neutral_count = 0;
int new_mv_count = 0;
int sum_in_vectors = 0;
- uint32_t lastmv_as_int = 0;
+ MV lastmv = {0, 0};
TWO_PASS *twopass = &cpi->twopass;
const MV zero_mv = {0, 0};
const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
+ LAYER_CONTEXT *const lc = is_spatial_svc(cpi) ?
+ &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0;
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
@@ -444,15 +446,14 @@
set_first_pass_params(cpi);
vp9_set_quantizer(cm, find_fp_qindex());
- if (is_spatial_svc(cpi)) {
+ if (lc != NULL) {
MV_REFERENCE_FRAME ref_frame = LAST_FRAME;
const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL;
- twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass;
+ twopass = &lc->twopass;
if (cpi->common.current_video_frame == 0) {
cpi->ref_frame_flags = 0;
} else {
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
if (lc->current_video_frame_in_layer == 0)
cpi->ref_frame_flags = VP9_GOLD_FLAG;
else
@@ -511,9 +512,7 @@
vp9_tile_init(&tile, cm, 0, 0);
for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
- int_mv best_ref_mv;
-
- best_ref_mv.as_int = 0;
+ MV best_ref_mv = {0, 0};
// Reset above block coeffs.
xd->up_available = (mb_row != 0);
@@ -593,14 +592,13 @@
// Other than for the first frame do a motion search.
if (cm->current_video_frame > 0) {
int tmp_err, motion_error, raw_motion_error;
- int_mv mv, tmp_mv;
+ // Assume 0,0 motion with no mv overhead.
+ MV mv = {0, 0} , tmp_mv = {0, 0};
struct buf_2d unscaled_last_source_buf_2d;
xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
motion_error = get_prediction_error(bsize, &x->plane[0].src,
&xd->plane[0].pre[0]);
- // Assume 0,0 motion with no mv overhead.
- mv.as_int = tmp_mv.as_int = 0;
// Compute the motion error of the 0,0 motion using the last source
// frame as the reference. Skip the further motion search on
@@ -613,11 +611,10 @@
&unscaled_last_source_buf_2d);
// TODO(pengchong): Replace the hard-coded threshold
- if (raw_motion_error > 25 || is_spatial_svc(cpi)) {
+ if (raw_motion_error > 25 || lc != NULL) {
// Test last reference frame using the previous best mv as the
// starting point (best reference) for the search.
- first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
- &motion_error);
+ first_pass_motion_search(cpi, x, &best_ref_mv, &mv, &motion_error);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_clear_system_state();
motion_error = (int)(motion_error * error_weight);
@@ -625,9 +622,9 @@
// If the current best reference mv is not centered on 0,0 then do a
// 0,0 based search as well.
- if (best_ref_mv.as_int) {
+ if (!is_zero_mv(&best_ref_mv)) {
tmp_err = INT_MAX;
- first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv, &tmp_err);
+ first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &tmp_err);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_clear_system_state();
tmp_err = (int)(tmp_err * error_weight);
@@ -635,7 +632,7 @@
if (tmp_err < motion_error) {
motion_error = tmp_err;
- mv.as_int = tmp_mv.as_int;
+ mv = tmp_mv;
}
}
@@ -648,7 +645,7 @@
gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
&xd->plane[0].pre[0]);
- first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
+ first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv,
&gf_motion_error);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_clear_system_state();
@@ -679,7 +676,8 @@
}
// Start by assuming that intra mode is best.
- best_ref_mv.as_int = 0;
+ best_ref_mv.row = 0;
+ best_ref_mv.col = 0;
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
@@ -703,25 +701,25 @@
this_error < 2 * intrapenalty)
++neutral_count;
- mv.as_mv.row *= 8;
- mv.as_mv.col *= 8;
+ mv.row *= 8;
+ mv.col *= 8;
this_error = motion_error;
xd->mi[0]->mbmi.mode = NEWMV;
- xd->mi[0]->mbmi.mv[0] = mv;
+ xd->mi[0]->mbmi.mv[0].as_mv = mv;
xd->mi[0]->mbmi.tx_size = TX_4X4;
xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME;
xd->mi[0]->mbmi.ref_frame[1] = NONE;
vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize);
vp9_encode_sby_pass1(x, bsize);
- sum_mvr += mv.as_mv.row;
- sum_mvr_abs += abs(mv.as_mv.row);
- sum_mvc += mv.as_mv.col;
- sum_mvc_abs += abs(mv.as_mv.col);
- sum_mvrs += mv.as_mv.row * mv.as_mv.row;
- sum_mvcs += mv.as_mv.col * mv.as_mv.col;
+ sum_mvr += mv.row;
+ sum_mvr_abs += abs(mv.row);
+ sum_mvc += mv.col;
+ sum_mvc_abs += abs(mv.col);
+ sum_mvrs += mv.row * mv.row;
+ sum_mvcs += mv.col * mv.col;
++intercount;
- best_ref_mv.as_int = mv.as_int;
+ best_ref_mv = mv;
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
@@ -739,7 +737,7 @@
}
#endif
- if (mv.as_int) {
+ if (!is_zero_mv(&mv)) {
++mvcount;
#if CONFIG_FP_MB_STATS
@@ -770,33 +768,33 @@
#endif
// Non-zero vector, was it different from the last non zero vector?
- if (mv.as_int != lastmv_as_int)
+ if (!is_equal_mv(&mv, &lastmv))
++new_mv_count;
- lastmv_as_int = mv.as_int;
+ lastmv = mv;
// Does the row vector point inwards or outwards?
if (mb_row < cm->mb_rows / 2) {
- if (mv.as_mv.row > 0)
+ if (mv.row > 0)
--sum_in_vectors;
- else if (mv.as_mv.row < 0)
+ else if (mv.row < 0)
++sum_in_vectors;
} else if (mb_row > cm->mb_rows / 2) {
- if (mv.as_mv.row > 0)
+ if (mv.row > 0)
++sum_in_vectors;
- else if (mv.as_mv.row < 0)
+ else if (mv.row < 0)
--sum_in_vectors;
}
// Does the col vector point inwards or outwards?
if (mb_col < cm->mb_cols / 2) {
- if (mv.as_mv.col > 0)
+ if (mv.col > 0)
--sum_in_vectors;
- else if (mv.as_mv.col < 0)
+ else if (mv.col < 0)
++sum_in_vectors;
} else if (mb_col > cm->mb_cols / 2) {
- if (mv.as_mv.col > 0)
+ if (mv.col > 0)
++sum_in_vectors;
- else if (mv.as_mv.col < 0)
+ else if (mv.col < 0)
--sum_in_vectors;
}
}
@@ -864,7 +862,7 @@
// TODO(paulwilkins): Handle the case when duration is set to 0, or
// something less than the full time between subsequent values of
// cpi->source_time_stamp.
- fps.duration = (double)(cpi->source->ts_end - cpi->source->ts_start);
+ fps.duration = (double)(source->ts_end - source->ts_start);
// Don't want to do output stats with a stack variable!
twopass->this_frame_stats = fps;
@@ -895,7 +893,7 @@
vp9_extend_frame_borders(new_yv12);
- if (is_spatial_svc(cpi)) {
+ if (lc != NULL) {
vp9_update_reference_frames(cpi);
} else {
// Swap frame pointers so last frame refers to the frame we just compressed.
@@ -1081,8 +1079,7 @@
// This function gives an estimate of how badly we believe the prediction
// quality is decaying from frame to frame.
-static double get_zero_motion_factor(const VP9_COMMON *cm,
- const FIRSTPASS_STATS *frame) {
+static double get_zero_motion_factor(const FIRSTPASS_STATS *frame) {
const double sr_ratio = frame->coded_error /
DOUBLE_DIVIDE_CHECK(frame->sr_coded_error);
const double zero_motion_pct = frame->pcnt_inter -
@@ -1095,12 +1092,10 @@
// Function to test for a condition where a complex transition is followed
// by a static section. For example in slide shows where there is a fade
// between slides. This is to help with more optimal kf and gf positioning.
-static int detect_transition_to_still(TWO_PASS *twopass,
+static int detect_transition_to_still(const TWO_PASS *twopass,
int frame_interval, int still_interval,
double loop_decay_rate,
double last_decay_rate) {
- int trans_to_still = 0;
-
// Break clause to detect very still sections after motion
// For example a static image after a fade or other transition
// instead of a clean scene cut.
@@ -1108,26 +1103,22 @@
loop_decay_rate >= 0.999 &&
last_decay_rate < 0.9) {
int j;
- const FIRSTPASS_STATS *position = twopass->stats_in;
- FIRSTPASS_STATS tmp_next_frame;
// Look ahead a few frames to see if static condition persists...
for (j = 0; j < still_interval; ++j) {
- if (EOF == input_stats(twopass, &tmp_next_frame))
+ const FIRSTPASS_STATS *stats = &twopass->stats_in[j];
+ if (stats >= twopass->stats_in_end)
break;
- if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999)
+ if (stats->pcnt_inter - stats->pcnt_motion < 0.999)
break;
}
- reset_fpf_position(twopass, position);
-
// Only if it does do we signal a transition to still.
- if (j == still_interval)
- trans_to_still = 1;
+ return j == still_interval;
}
- return trans_to_still;
+ return 0;
}
// This function detects a flash through the high relative pcnt_second_ref
@@ -1373,7 +1364,8 @@
double group_error, int gf_arf_bits) {
RATE_CONTROL *const rc = &cpi->rc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
- TWO_PASS *twopass = &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->twopass;
+ GF_GROUP *const gf_group = &twopass->gf_group;
FIRSTPASS_STATS frame_stats;
int i;
int frame_index = 1;
@@ -1396,17 +1388,17 @@
// is also the golden frame.
if (!key_frame) {
if (rc->source_alt_ref_active) {
- twopass->gf_group.update_type[0] = OVERLAY_UPDATE;
- twopass->gf_group.rf_level[0] = INTER_NORMAL;
- twopass->gf_group.bit_allocation[0] = 0;
- twopass->gf_group.arf_update_idx[0] = arf_buffer_indices[0];
- twopass->gf_group.arf_ref_idx[0] = arf_buffer_indices[0];
+ gf_group->update_type[0] = OVERLAY_UPDATE;
+ gf_group->rf_level[0] = INTER_NORMAL;
+ gf_group->bit_allocation[0] = 0;
+ gf_group->arf_update_idx[0] = arf_buffer_indices[0];
+ gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
} else {
- twopass->gf_group.update_type[0] = GF_UPDATE;
- twopass->gf_group.rf_level[0] = GF_ARF_STD;
- twopass->gf_group.bit_allocation[0] = gf_arf_bits;
- twopass->gf_group.arf_update_idx[0] = arf_buffer_indices[0];
- twopass->gf_group.arf_ref_idx[0] = arf_buffer_indices[0];
+ gf_group->update_type[0] = GF_UPDATE;
+ gf_group->rf_level[0] = GF_ARF_STD;
+ gf_group->bit_allocation[0] = gf_arf_bits;
+ gf_group->arf_update_idx[0] = arf_buffer_indices[0];
+ gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
}
// Step over the golden frame / overlay frame
@@ -1421,25 +1413,25 @@
// Store the bits to spend on the ARF if there is one.
if (rc->source_alt_ref_pending) {
- twopass->gf_group.update_type[frame_index] = ARF_UPDATE;
- twopass->gf_group.rf_level[frame_index] = GF_ARF_STD;
- twopass->gf_group.bit_allocation[frame_index] = gf_arf_bits;
- twopass->gf_group.arf_src_offset[frame_index] =
+ gf_group->update_type[frame_index] = ARF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
+ gf_group->bit_allocation[frame_index] = gf_arf_bits;
+ gf_group->arf_src_offset[frame_index] =
(unsigned char)(rc->baseline_gf_interval - 1);
- twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[0];
- twopass->gf_group.arf_ref_idx[frame_index] =
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
+ gf_group->arf_ref_idx[frame_index] =
arf_buffer_indices[cpi->multi_arf_last_grp_enabled &&
rc->source_alt_ref_active];
++frame_index;
if (cpi->multi_arf_enabled) {
// Set aside a slot for a level 1 arf.
- twopass->gf_group.update_type[frame_index] = ARF_UPDATE;
- twopass->gf_group.rf_level[frame_index] = GF_ARF_LOW;
- twopass->gf_group.arf_src_offset[frame_index] =
+ gf_group->update_type[frame_index] = ARF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_LOW;
+ gf_group->arf_src_offset[frame_index] =
(unsigned char)((rc->baseline_gf_interval >> 1) - 1);
- twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[1];
- twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0];
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[1];
+ gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
++frame_index;
}
}
@@ -1469,16 +1461,16 @@
if (frame_index <= mid_frame_idx)
arf_idx = 1;
}
- twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[arf_idx];
- twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[arf_idx];
+ gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
target_frame_size = clamp(target_frame_size, 0,
MIN(max_bits, (int)total_group_bits));
- twopass->gf_group.update_type[frame_index] = LF_UPDATE;
- twopass->gf_group.rf_level[frame_index] = INTER_NORMAL;
+ gf_group->update_type[frame_index] = LF_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
- twopass->gf_group.bit_allocation[frame_index] = target_frame_size;
+ gf_group->bit_allocation[frame_index] = target_frame_size;
++frame_index;
}
@@ -1486,23 +1478,23 @@
// We need to configure the frame at the end of the sequence + 1 that will be
// the start frame for the next group. Otherwise prior to the call to
// vp9_rc_get_second_pass_params() the data will be undefined.
- twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[0];
- twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0];
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
+ gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
if (rc->source_alt_ref_pending) {
- twopass->gf_group.update_type[frame_index] = OVERLAY_UPDATE;
- twopass->gf_group.rf_level[frame_index] = INTER_NORMAL;
+ gf_group->update_type[frame_index] = OVERLAY_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
// Final setup for second arf and its overlay.
if (cpi->multi_arf_enabled) {
- twopass->gf_group.bit_allocation[2] =
- twopass->gf_group.bit_allocation[mid_frame_idx] + mid_boost_bits;
- twopass->gf_group.update_type[mid_frame_idx] = OVERLAY_UPDATE;
- twopass->gf_group.bit_allocation[mid_frame_idx] = 0;
+ gf_group->bit_allocation[2] =
+ gf_group->bit_allocation[mid_frame_idx] + mid_boost_bits;
+ gf_group->update_type[mid_frame_idx] = OVERLAY_UPDATE;
+ gf_group->bit_allocation[mid_frame_idx] = 0;
}
} else {
- twopass->gf_group.update_type[frame_index] = GF_UPDATE;
- twopass->gf_group.rf_level[frame_index] = GF_ARF_STD;
+ gf_group->update_type[frame_index] = GF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
}
// Note whether multi-arf was enabled this group for next time.
@@ -1554,8 +1546,6 @@
vp9_clear_system_state();
vp9_zero(next_frame);
- gf_group_bits = 0;
-
// Load stats for the current frame.
mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame);
@@ -1615,9 +1605,8 @@
decay_accumulator = decay_accumulator * loop_decay_rate;
// Monitor for static sections.
- zero_motion_accumulator =
- MIN(zero_motion_accumulator,
- get_zero_motion_factor(&cpi->common, &next_frame));
+ zero_motion_accumulator = MIN(zero_motion_accumulator,
+ get_zero_motion_factor(&next_frame));
// Break clause to detect very still sections after motion. For example,
// a static image after a fade or other transition.
@@ -1831,6 +1820,7 @@
int i, j;
RATE_CONTROL *const rc = &cpi->rc;
TWO_PASS *const twopass = &cpi->twopass;
+ GF_GROUP *const gf_group = &twopass->gf_group;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
const FIRSTPASS_STATS first_frame = *this_frame;
const FIRSTPASS_STATS *const start_position = twopass->stats_in;
@@ -1849,7 +1839,7 @@
cpi->common.frame_type = KEY_FRAME;
// Reset the GF group data structures.
- vp9_zero(twopass->gf_group);
+ vp9_zero(*gf_group);
// Is this a forced key frame by interval.
rc->this_key_frame_forced = rc->next_key_frame_forced;
@@ -1987,9 +1977,8 @@
break;
// Monitor for static sections.
- zero_motion_accumulator =
- MIN(zero_motion_accumulator,
- get_zero_motion_factor(&cpi->common, &next_frame));
+ zero_motion_accumulator =MIN(zero_motion_accumulator,
+ get_zero_motion_factor(&next_frame));
// For the first few frames collect data to decide kf boost.
if (i <= (rc->max_gf_interval * 2)) {
@@ -2040,9 +2029,9 @@
twopass->kf_group_bits -= kf_bits;
// Save the bits to spend on the key frame.
- twopass->gf_group.bit_allocation[0] = kf_bits;
- twopass->gf_group.update_type[0] = KF_UPDATE;
- twopass->gf_group.rf_level[0] = KF_STD;
+ gf_group->bit_allocation[0] = kf_bits;
+ gf_group->update_type[0] = KF_UPDATE;
+ gf_group->rf_level[0] = KF_STD;
// Note the total error score of the kf group minus the key frame itself.
twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err);
@@ -2119,15 +2108,16 @@
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
TWO_PASS *const twopass = &cpi->twopass;
+ GF_GROUP *const gf_group = &twopass->gf_group;
int frames_left;
FIRSTPASS_STATS this_frame;
FIRSTPASS_STATS this_frame_copy;
int target_rate;
- LAYER_CONTEXT *lc = NULL;
+ LAYER_CONTEXT *const lc = is_spatial_svc(cpi) ?
+ &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0;
- if (is_spatial_svc(cpi)) {
- lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+ if (lc != NULL) {
frames_left = (int)(twopass->total_stats.count -
lc->current_video_frame_in_layer);
} else {
@@ -2140,10 +2130,10 @@
// If this is an arf frame then we dont want to read the stats file or
// advance the input pointer as we already have what we need.
- if (twopass->gf_group.update_type[twopass->gf_group.index] == ARF_UPDATE) {
+ if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
int target_rate;
configure_buffer_updates(cpi);
- target_rate = twopass->gf_group.bit_allocation[twopass->gf_group.index];
+ target_rate = gf_group->bit_allocation[gf_group->index];
target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
rc->base_frame_target = target_rate;
@@ -2154,7 +2144,7 @@
vp9_rc_set_frame_target(cpi, target_rate);
cm->frame_type = INTER_FRAME;
- if (is_spatial_svc(cpi)) {
+ if (lc != NULL) {
if (cpi->svc.spatial_layer_id == 0) {
lc->is_key_frame = 0;
} else {
@@ -2170,7 +2160,7 @@
vp9_clear_system_state();
- if (is_spatial_svc(cpi) && twopass->kf_intra_err_min == 0) {
+ if (lc != NULL && twopass->kf_intra_err_min == 0) {
twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
}
@@ -2178,8 +2168,7 @@
if (cpi->oxcf.rc_mode == VPX_Q) {
twopass->active_worst_quality = cpi->oxcf.cq_level;
} else if (cm->current_video_frame == 0 ||
- (is_spatial_svc(cpi) &&
- lc->current_video_frame_in_layer == 0)) {
+ (lc != NULL && lc->current_video_frame_in_layer == 0)) {
// Special case code for first frame.
const int section_target_bandwidth = (int)(twopass->bits_left /
frames_left);
@@ -2205,7 +2194,7 @@
cm->frame_type = INTER_FRAME;
}
- if (is_spatial_svc(cpi)) {
+ if (lc != NULL) {
if (cpi->svc.spatial_layer_id == 0) {
lc->is_key_frame = (cm->frame_type == KEY_FRAME);
if (lc->is_key_frame)
@@ -2236,13 +2225,13 @@
}
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
- if (!is_spatial_svc(cpi))
+ if (lc != NULL)
cpi->refresh_golden_frame = 1;
}
configure_buffer_updates(cpi);
- target_rate = twopass->gf_group.bit_allocation[twopass->gf_group.index];
+ target_rate = gf_group->bit_allocation[gf_group->index];
if (cpi->common.frame_type == KEY_FRAME)
target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate);
else
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index bf8c9fd..aaa6b03 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -121,7 +121,7 @@
void vp9_init_first_pass(struct VP9_COMP *cpi);
void vp9_rc_get_first_pass_params(struct VP9_COMP *cpi);
-void vp9_first_pass(struct VP9_COMP *cpi);
+void vp9_first_pass(struct VP9_COMP *cpi, const struct lookahead_entry *source);
void vp9_end_first_pass(struct VP9_COMP *cpi);
void vp9_init_second_pass(struct VP9_COMP *cpi);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 6115f5a..7a7bb28 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -17,6 +17,7 @@
#include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_reconinter.h"
@@ -253,7 +254,8 @@
}
static void free_pred_buffer(PRED_BUFFER *p) {
- p->in_use = 0;
+ if (p != NULL)
+ p->in_use = 0;
}
static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
@@ -343,6 +345,52 @@
}
}
+struct estimate_block_intra_args {
+ VP9_COMP *cpi;
+ MACROBLOCK *x;
+ PREDICTION_MODE mode;
+ int rate;
+ int64_t dist;
+};
+
+static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, void *arg) {
+ struct estimate_block_intra_args* const args = arg;
+ VP9_COMP *const cpi = args->cpi;
+ MACROBLOCK *const x = args->x;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *const p = &x->plane[0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ const BLOCK_SIZE bsize_tx = txsize_to_bsize[tx_size];
+ uint8_t *const src_buf_base = p->src.buf;
+ uint8_t *const dst_buf_base = pd->dst.buf;
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
+ int i, j;
+ int rate;
+ int64_t dist;
+ unsigned int var_y, sse_y;
+ txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+ assert(plane == 0);
+ (void) plane;
+
+ p->src.buf = &src_buf_base[4 * (j * src_stride + i)];
+ pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)];
+ // Use source buffer as an approximation for the fully reconstructed buffer.
+ vp9_predict_intra_block(xd, block >> (2 * tx_size),
+ b_width_log2(plane_bsize),
+ tx_size, args->mode,
+ p->src.buf, src_stride,
+ pd->dst.buf, dst_stride,
+ i, j, 0);
+ // This procedure assumes zero offset from p->src.buf and pd->dst.buf.
+ model_rd_for_sb_y(cpi, bsize_tx, x, xd, &rate, &dist, &var_y, &sse_y);
+ p->src.buf = src_buf_base;
+ pd->dst.buf = dst_buf_base;
+ args->rate += rate;
+ args->dist += dist;
+}
+
static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][4] = {
{THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV},
{THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG},
@@ -360,7 +408,6 @@
PICK_MODE_CONTEXT *ctx) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
PREDICTION_MODE this_mode, best_mode = ZEROMV;
MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
@@ -397,9 +444,9 @@
(((mi_row + mi_col) >> bsl) +
get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
int const_motion[MAX_REF_FRAMES] = { 0 };
- int bh = num_4x4_blocks_high_lookup[bsize] << 2;
- int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
- int pixels_in_block = bh * bw;
+ const int bh = num_4x4_blocks_high_lookup[bsize] << 2;
+ const int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
+ const int pixels_in_block = bh * bw;
// For speed 6, the result of interp filter is reused later in actual encoding
// process.
// tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
@@ -609,8 +656,7 @@
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0) {
- vp9_denoiser_update_frame_stats(&cpi->denoiser, mbmi, sse_y,
- this_mode, ctx);
+ vp9_denoiser_update_frame_stats(mbmi, sse_y, this_mode, ctx);
}
#endif
@@ -625,8 +671,7 @@
skip_txfm = x->skip_txfm[0];
if (cpi->sf.reuse_inter_pred_sby) {
- if (best_pred != NULL)
- free_pred_buffer(best_pred);
+ free_pred_buffer(best_pred);
best_pred = this_mode_pred;
}
@@ -646,7 +691,8 @@
// If best prediction is not in dst buf, then copy the prediction block from
// temp buf to dst buf.
- if (cpi->sf.reuse_inter_pred_sby && best_pred->data != orig_dst.buf) {
+ if (best_pred != NULL && cpi->sf.reuse_inter_pred_sby &&
+ best_pred->data != orig_dst.buf) {
uint8_t *copy_from, *copy_to;
pd->dst = orig_dst;
@@ -670,20 +716,11 @@
// threshold.
if (!x->skip && best_rd > inter_mode_thresh &&
bsize <= cpi->sf.max_intra_bsize) {
- int i, j;
- const int width = num_4x4_blocks_wide_lookup[bsize];
- const int height = num_4x4_blocks_high_lookup[bsize];
+ struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
- int rate2 = 0;
- int64_t dist2 = 0;
- const int dst_stride = cpi->sf.reuse_inter_pred_sby ? bw : pd->dst.stride;
- const int src_stride = p->src.stride;
- int block_idx = 0;
-
- TX_SIZE tmp_tx_size = MIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
- const BLOCK_SIZE bsize_tx = txsize_to_bsize[tmp_tx_size];
- const int step = 1 << tmp_tx_size;
+ const TX_SIZE intra_tx_size =
+ MIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
if (cpi->sf.reuse_inter_pred_sby) {
pd->dst.buf = tmp[0].data;
@@ -691,44 +728,26 @@
}
for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
- uint8_t *const src_buf_base = p->src.buf;
- uint8_t *const dst_buf_base = pd->dst.buf;
- for (j = 0; j < height; j += step) {
- for (i = 0; i < width; i += step) {
- p->src.buf = &src_buf_base[4 * (j * src_stride + i)];
- pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)];
- // Use source buffer as an approximation for the fully reconstructed
- // buffer
- vp9_predict_intra_block(xd, block_idx, b_width_log2(bsize),
- tmp_tx_size, this_mode,
- p->src.buf, src_stride,
- pd->dst.buf, dst_stride,
- i, j, 0);
- model_rd_for_sb_y(cpi, bsize_tx, x, xd, &rate, &dist, &var_y, &sse_y);
- rate2 += rate;
- dist2 += dist;
- ++block_idx;
- }
- }
- p->src.buf = src_buf_base;
- pd->dst.buf = dst_buf_base;
-
- rate = rate2;
- dist = dist2;
-
+ const TX_SIZE saved_tx_size = mbmi->tx_size;
+ args.mode = this_mode;
+ args.rate = 0;
+ args.dist = 0;
+ mbmi->tx_size = intra_tx_size;
+ vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
+ estimate_block_intra, &args);
+ mbmi->tx_size = saved_tx_size;
+ rate = args.rate;
+ dist = args.dist;
rate += cpi->mbmode_cost[this_mode];
rate += intra_cost_penalty;
this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
- if (cpi->sf.reuse_inter_pred_sby)
- pd->dst = orig_dst;
-
if (this_rd + intra_mode_cost < best_rd) {
best_rd = this_rd;
*returnrate = rate;
*returndistortion = dist;
mbmi->mode = this_mode;
- mbmi->tx_size = tmp_tx_size;
+ mbmi->tx_size = intra_tx_size;
mbmi->ref_frame[0] = INTRA_FRAME;
mbmi->uv_mode = this_mode;
mbmi->mv[0].as_int = INVALID_MV;
@@ -736,6 +755,8 @@
x->skip_txfm[0] = skip_txfm;
}
}
+ if (cpi->sf.reuse_inter_pred_sby)
+ pd->dst = orig_dst;
}
return INT64_MAX;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 9da2ade..b926a58 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -646,7 +646,6 @@
int q;
if (frame_is_intra_only(cm)) {
- active_best_quality = rc->best_quality;
// Handle the special case for key frames forced when we have reached
// the maximum key frame interval. Here force the Q to a range
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index 4fc3e9e..2841efa 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -462,7 +462,7 @@
// Set baseline threshold values.
for (i = 0; i < MAX_MODES; ++i)
- rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
+ rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
rd->thresh_mult[THR_NEARESTMV] = 0;
rd->thresh_mult[THR_NEARESTG] = 0;
@@ -548,7 +548,7 @@
int i;
for (i = 0; i < MAX_REFS; ++i)
- rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
+ rd->thresh_mult_sub8x8[i] = cpi->oxcf.mode == BEST ? -500 : 0;
rd->thresh_mult_sub8x8[THR_LAST] += 2500;
rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 3a34e6f..c36db30 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -486,7 +486,6 @@
txfm_rd_in_plane(x, rate, distortion, skip,
sse, ref_best_rd, 0, bs,
mbmi->tx_size, cpi->sf.use_fast_coef_costing);
- cpi->tx_stepdown_count[0]++;
}
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
@@ -508,24 +507,24 @@
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX}};
- TX_SIZE n, m;
+ int n, m;
int s0, s1;
const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
int64_t best_rd = INT64_MAX;
- TX_SIZE best_tx = TX_4X4;
+ TX_SIZE best_tx = max_tx_size;
const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
assert(skip_prob > 0);
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
- for (n = TX_4X4; n <= max_tx_size; n++) {
+ for (n = max_tx_size; n >= 0; n--) {
txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
&sse[n], ref_best_rd, 0, bs, n,
cpi->sf.use_fast_coef_costing);
r[n][1] = r[n][0];
if (r[n][0] < INT_MAX) {
- for (m = 0; m <= n - (n == max_tx_size); m++) {
+ for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
if (m == n)
r[n][1] += vp9_cost_zero(tx_probs[m]);
else
@@ -541,6 +540,13 @@
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
}
+ // Early termination in transform size search.
+ if (cpi->sf.tx_size_search_breakout &&
+ (rd[n][1] == INT64_MAX ||
+ (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
+ s[n] == 1))
+ break;
+
if (rd[n][1] < best_rd) {
best_tx = n;
best_rd = rd[n][1];
@@ -562,16 +568,12 @@
if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
- cpi->tx_stepdown_count[0]++;
} else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
- cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
} else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
- cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
} else {
tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
- cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
}
}
@@ -1382,7 +1384,7 @@
if (best_rd < label_mv_thresh)
break;
- if (!is_best_mode(cpi->oxcf.mode)) {
+ if (cpi->oxcf.mode != BEST) {
// use previous block's result as next block's MV predictor.
if (i > 0) {
bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
@@ -1424,7 +1426,7 @@
INT_MAX, 1);
// Should we do a full search (best quality only)
- if (is_best_mode(cpi->oxcf.mode)) {
+ if (cpi->oxcf.mode == BEST) {
int_mv *const best_mv = &mi->bmi[i].as_mv[0];
/* Check if mvp_full is within the range. */
clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
@@ -2278,6 +2280,13 @@
} else {
int rate_sum = 0;
int64_t dist_sum = 0;
+ if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
+ (cpi->sf.interp_filter_search_mask & (1 << i))) {
+ rate_sum = INT_MAX;
+ dist_sum = INT64_MAX;
+ continue;
+ }
+
if ((cm->interp_filter == SWITCHABLE &&
(!i || best_needs_copy)) ||
(cm->interp_filter != SWITCHABLE &&
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index a2f4583..879c83c 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -50,8 +50,20 @@
(1 << THR_GOLD)
};
+// Intra only frames, golden frames (except alt ref overlays) and
+// alt ref frames tend to be coded at a higher than ambient quality
+static int frame_is_boosted(const VP9_COMP *cpi) {
+ return frame_is_intra_only(&cpi->common) ||
+ cpi->refresh_alt_ref_frame ||
+ (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) ||
+ vp9_is_upper_layer_key_frame(cpi);
+}
+
+
static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
SPEED_FEATURES *sf, int speed) {
+ const int boosted = frame_is_boosted(cpi);
+
sf->adaptive_rd_thresh = 1;
sf->recode_loop = (speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW;
sf->allow_skip_recode = 1;
@@ -59,8 +71,6 @@
if (speed >= 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
- sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD
- : USE_LARGESTALL;
if (MIN(cm->width, cm->height) >= 720)
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
@@ -80,9 +90,14 @@
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+
+ sf->tx_size_search_breakout = 1;
}
if (speed >= 2) {
+ sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD
+ : USE_LARGESTALL;
+
if (MIN(cm->width, cm->height) >= 720) {
sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->last_partitioning_redo_frequency = 3;
@@ -117,10 +132,10 @@
sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
}
sf->adaptive_pred_interp_filter = 0;
- sf->cb_partition_search = frame_is_boosted(cpi) ? 0 : 1;
+ sf->cb_partition_search = !boosted;
sf->cb_pred_filter_search = 1;
sf->alt_ref_search_fp = 1;
- sf->motion_field_mode_search = frame_is_boosted(cpi) ? 0 : 1;
+ sf->motion_field_mode_search = !boosted;
sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->last_partitioning_redo_frequency = 3;
sf->recode_loop = ALLOW_RECODE_KFMAXBW;
@@ -128,6 +143,7 @@
sf->mode_skip_start = 6;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
+ sf->adaptive_interp_filter_search = 1;
}
if (speed >= 4) {
@@ -165,8 +181,8 @@
static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
int speed, vp9e_tune_content content) {
VP9_COMMON *const cm = &cpi->common;
- const int frames_since_key =
- cm->frame_type == KEY_FRAME ? 0 : cpi->rc.frames_since_key;
+ const int is_keyframe = cm->frame_type == KEY_FRAME;
+ const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key;
sf->static_segmentation = 0;
sf->adaptive_rd_thresh = 1;
sf->use_fast_coef_costing = 1;
@@ -262,17 +278,16 @@
}
if (speed >= 5) {
- sf->use_quant_fp = cm->frame_type == KEY_FRAME ? 0 : 1;
- sf->auto_min_max_partition_size = (cm->frame_type == KEY_FRAME) ?
- RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
+ sf->use_quant_fp = !is_keyframe;
+ sf->auto_min_max_partition_size = is_keyframe ? RELAXED_NEIGHBORING_MIN_MAX
+ : STRICT_NEIGHBORING_MIN_MAX;
sf->max_partition_size = BLOCK_32X32;
sf->min_partition_size = BLOCK_8X8;
sf->partition_check =
(frames_since_key % sf->last_partitioning_redo_frequency == 1);
- sf->force_frame_boost = cm->frame_type == KEY_FRAME ||
- (frames_since_key %
- (sf->last_partitioning_redo_frequency << 1) == 1);
- sf->max_delta_qindex = (cm->frame_type == KEY_FRAME) ? 20 : 15;
+ sf->force_frame_boost = is_keyframe ||
+ (frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1);
+ sf->max_delta_qindex = is_keyframe ? 20 : 15;
sf->partition_search_type = REFERENCE_PARTITION;
sf->use_nonrd_pick_mode = 1;
sf->allow_skip_recode = 0;
@@ -290,8 +305,7 @@
sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
sf->search_type_check_frequency = 50;
- sf->tx_size_search_method = (cm->frame_type == KEY_FRAME) ?
- USE_LARGESTALL : USE_TX_8X8;
+ sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
// This feature is only enabled when partition search is disabled.
sf->reuse_inter_pred_sby = 1;
@@ -301,6 +315,7 @@
sf->mv.reduce_first_step_size = 1;
}
+
if (speed >= 7) {
sf->mv.search_method = FAST_DIAMOND;
sf->mv.fullpel_search_step_param = 10;
@@ -309,10 +324,12 @@
800 : 300;
sf->elevate_newmv_thresh = 2500;
}
+
if (speed >= 12) {
sf->elevate_newmv_thresh = 4000;
sf->mv.subpel_force_stop = 2;
}
+
if (speed >= 13) {
int i;
sf->max_intra_bsize = BLOCK_32X32;
@@ -365,6 +382,8 @@
sf->force_frame_boost = 0;
sf->max_delta_qindex = 0;
sf->disable_filter_search_var_thresh = 0;
+ sf->adaptive_interp_filter_search = 0;
+
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_ALL;
sf->intra_uv_mode_mask[i] = INTRA_ALL;
@@ -391,17 +410,16 @@
// Recode loop tolerence %.
sf->recode_tolerance = 25;
sf->default_interp_filter = SWITCHABLE;
+ sf->tx_size_search_breakout = 0;
- if (oxcf->mode == REALTIME) {
+ if (oxcf->mode == REALTIME)
set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content);
- } else {
- if (!is_best_mode(oxcf->mode))
- set_good_speed_feature(cpi, cm, sf, oxcf->speed);
- }
+ else if (oxcf->mode == GOOD)
+ set_good_speed_feature(cpi, cm, sf, oxcf->speed);
cpi->full_search_sad = vp9_full_search_sad;
- cpi->diamond_search_sad = is_best_mode(oxcf->mode) ? vp9_full_range_search
- : vp9_diamond_search_sad;
+ cpi->diamond_search_sad = oxcf->mode == BEST ? vp9_full_range_search
+ : vp9_diamond_search_sad;
cpi->refining_search_sad = vp9_refining_search_sad;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 3e8215f..e2e5c1e 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -103,6 +103,12 @@
} MODE_SEARCH_SKIP_LOGIC;
typedef enum {
+ FLAG_SKIP_EIGHTTAP = 1 << EIGHTTAP,
+ FLAG_SKIP_EIGHTTAP_SMOOTH = 1 << EIGHTTAP_SMOOTH,
+ FLAG_SKIP_EIGHTTAP_SHARP = 1 << EIGHTTAP_SHARP,
+} INTERP_FILTER_MASK;
+
+typedef enum {
// Search partitions using RD/NONRD criterion
SEARCH_PARTITION = 0,
@@ -376,6 +382,16 @@
// default interp filter choice
INTERP_FILTER default_interp_filter;
+
+ // Early termination in transform size search, which only applies while
+ // tx_size_search_method is USE_FULL_RD.
+ int tx_size_search_breakout;
+
+ // adaptive interp_filter search to allow skip of certain filter types.
+ int adaptive_interp_filter_search;
+
+ // mask for skip evaluation of certain interp_filter type.
+ INTERP_FILTER_MASK interp_filter_search_mask;
} SPEED_FEATURES;
struct VP9_COMP;
diff --git a/vp9/encoder/vp9_ssim.c b/vp9/encoder/vp9_ssim.c
index 026e6a8..8435640 100644
--- a/vp9/encoder/vp9_ssim.c
+++ b/vp9/encoder/vp9_ssim.c
@@ -95,7 +95,7 @@
return ssim_total;
}
double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
- int lumamask, double *weight) {
+ double *weight) {
double a, b, c;
double ssimv;
diff --git a/vp9/encoder/vp9_ssim.h b/vp9/encoder/vp9_ssim.h
index a581c2c..d1dd1b7 100644
--- a/vp9/encoder/vp9_ssim.h
+++ b/vp9/encoder/vp9_ssim.h
@@ -18,7 +18,7 @@
#include "vpx_scale/yv12config.h"
double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
- int lumamask, double *weight);
+ double *weight);
double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
double *ssim_y, double *ssim_u, double *ssim_v);
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index fb52d1a..9bd9792 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -36,6 +36,7 @@
int i;
lc->current_video_frame_in_layer = 0;
lc->layer_size = 0;
+ lc->last_frame_type = FRAME_TYPES;
lrc->ni_av_qi = oxcf->worst_allowed_q;
lrc->total_actual_bits = 0;
lrc->total_target_vs_actual = 0;
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 801449b..d475d5f 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -25,9 +25,10 @@
double framerate;
int avg_frame_size;
TWO_PASS twopass;
- struct vpx_fixed_buf rc_twopass_stats_in;
+ vpx_fixed_buf_t rc_twopass_stats_in;
unsigned int current_video_frame_in_layer;
int is_key_frame;
+ FRAME_TYPE last_frame_type;
vpx_svc_parameters_t svc_params_received;
struct lookahead_entry *alt_ref_source;
int alt_ref_idx;
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index cdbb69b..5ff02d8 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -21,7 +21,6 @@
#include "vp9/vp9_iface_common.h"
struct vp9_extracfg {
- struct vpx_codec_pkt_list *pkt_list;
int cpu_used; // available cpu percentage in 1/16
unsigned int enable_auto_alt_ref;
unsigned int noise_sensitivity;
@@ -43,37 +42,26 @@
vp9e_tune_content content;
};
-struct extraconfig_map {
- unsigned int usage;
- struct vp9_extracfg cfg;
-};
-
-static const struct extraconfig_map extracfg_map[] = {
- {
- 0,
- { // NOLINT
- NULL,
- 0, // cpu_used
- 1, // enable_auto_alt_ref
- 0, // noise_sensitivity
- 0, // sharpness
- 0, // static_thresh
- 0, // tile_columns
- 0, // tile_rows
- 7, // arnr_max_frames
- 5, // arnr_strength
- 3, // arnr_type
- VP8_TUNE_PSNR, // tuning
- 10, // cq_level
- 0, // rc_max_intra_bitrate_pct
- 0, // lossless
- 0, // frame_parallel_decoding_mode
- NO_AQ, // aq_mode
- 0, // frame_periodic_delta_q
- BITS_8, // Bit depth
- VP9E_CONTENT_DEFAULT // content
- }
- }
+static struct vp9_extracfg default_extra_cfg = {
+ 0, // cpu_used
+ 1, // enable_auto_alt_ref
+ 0, // noise_sensitivity
+ 0, // sharpness
+ 0, // static_thresh
+ 0, // tile_columns
+ 0, // tile_rows
+ 7, // arnr_max_frames
+ 5, // arnr_strength
+ 3, // arnr_type
+ VP8_TUNE_PSNR, // tuning
+ 10, // cq_level
+ 0, // rc_max_intra_bitrate_pct
+ 0, // lossless
+ 0, // frame_parallel_decoding_mode
+ NO_AQ, // aq_mode
+ 0, // frame_periodic_delta_q
+ BITS_8, // Bit depth
+ VP9E_CONTENT_DEFAULT // content
};
struct vpx_codec_alg_priv {
@@ -188,6 +176,8 @@
if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers)
ERROR("Not enough ref buffers for svc alt ref frames");
}
+ if (cfg->ss_number_layers > 3 && cfg->g_error_resilient == 0)
+ ERROR("Multiple frame contexts are not supported for more than 3 layers");
#endif
RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
@@ -325,6 +315,7 @@
VP9EncoderConfig *oxcf,
const vpx_codec_enc_cfg_t *cfg,
const struct vp9_extracfg *extra_cfg) {
+ const int is_vbr = cfg->rc_end_usage == VPX_VBR;
oxcf->profile = cfg->g_profile;
oxcf->width = cfg->g_w;
oxcf->height = cfg->g_h;
@@ -334,17 +325,16 @@
if (oxcf->init_framerate > 180)
oxcf->init_framerate = 30;
+ oxcf->mode = GOOD;
+
switch (cfg->g_pass) {
case VPX_RC_ONE_PASS:
- oxcf->mode = ONE_PASS_GOOD;
oxcf->pass = 0;
break;
case VPX_RC_FIRST_PASS:
- oxcf->mode = TWO_PASS_FIRST;
oxcf->pass = 1;
break;
case VPX_RC_LAST_PASS:
- oxcf->mode = TWO_PASS_SECOND_BEST;
oxcf->pass = 2;
break;
}
@@ -371,9 +361,9 @@
oxcf->scaled_frame_width = cfg->rc_scaled_width;
oxcf->scaled_frame_height = cfg->rc_scaled_height;
- oxcf->maximum_buffer_size_ms = cfg->rc_buf_sz;
- oxcf->starting_buffer_level_ms = cfg->rc_buf_initial_sz;
- oxcf->optimal_buffer_level_ms = cfg->rc_buf_optimal_sz;
+ oxcf->maximum_buffer_size_ms = is_vbr ? 240000 : cfg->rc_buf_sz;
+ oxcf->starting_buffer_level_ms = is_vbr ? 60000 : cfg->rc_buf_initial_sz;
+ oxcf->optimal_buffer_level_ms = is_vbr ? 60000 : cfg->rc_buf_optimal_sz;
oxcf->drop_frames_water_mark = cfg->rc_dropframe_thresh;
@@ -393,7 +383,6 @@
oxcf->sharpness = extra_cfg->sharpness;
oxcf->two_pass_stats_in = cfg->rc_twopass_stats_in;
- oxcf->output_pkt_list = extra_cfg->pkt_list;
#if CONFIG_FP_MB_STATS
oxcf->firstpass_mb_stats_in = cfg->rc_firstpass_mb_stats_in;
@@ -659,8 +648,6 @@
(void)data;
if (ctx->priv == NULL) {
- int i;
- vpx_codec_enc_cfg_t *cfg;
struct vpx_codec_alg_priv *priv = calloc(1, sizeof(*priv));
if (priv == NULL)
@@ -668,7 +655,6 @@
ctx->priv = &priv->base;
ctx->priv->sz = sizeof(*ctx->priv);
- ctx->priv->iface = ctx->iface;
ctx->priv->alg_priv = priv;
ctx->priv->init_flags = ctx->init_flags;
ctx->priv->enc.total_encoders = 1;
@@ -679,17 +665,7 @@
ctx->config.enc = &ctx->priv->alg_priv->cfg;
}
- cfg = &ctx->priv->alg_priv->cfg;
-
- // Select the extra vp6 configuration table based on the current
- // usage value. If the current usage value isn't found, use the
- // values for usage case 0.
- for (i = 0;
- extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage;
- ++i) {}
-
- priv->extra_cfg = extracfg_map[i].cfg;
- priv->extra_cfg.pkt_list = &priv->pkt_list.head;
+ priv->extra_cfg = default_extra_cfg;
vp9_initialize_enc();
@@ -701,10 +677,12 @@
&ctx->priv->alg_priv->cfg,
&ctx->priv->alg_priv->extra_cfg);
cpi = vp9_create_compressor(&ctx->priv->alg_priv->oxcf);
- if (cpi == NULL)
+ if (cpi == NULL) {
res = VPX_CODEC_MEM_ERROR;
- else
+ } else {
+ cpi->output_pkt_list = &priv->pkt_list.head;
ctx->priv->alg_priv->cpi = cpi;
+ }
}
}
@@ -718,31 +696,36 @@
return VPX_CODEC_OK;
}
-static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx,
+static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx,
unsigned long duration,
unsigned long deadline) {
- // Use best quality mode if no deadline is given.
- MODE new_qc = ONE_PASS_BEST;
+ MODE new_mode = BEST;
- if (deadline) {
- // Convert duration parameter from stream timebase to microseconds
- const uint64_t duration_us = (uint64_t)duration * 1000000 *
- (uint64_t)ctx->cfg.g_timebase.num /
- (uint64_t)ctx->cfg.g_timebase.den;
+ switch (ctx->cfg.g_pass) {
+ case VPX_RC_ONE_PASS:
+ if (deadline > 0) {
+ const vpx_codec_enc_cfg_t *const cfg = &ctx->cfg;
- // If the deadline is more that the duration this frame is to be shown,
- // use good quality mode. Otherwise use realtime mode.
- new_qc = (deadline > duration_us) ? ONE_PASS_GOOD : REALTIME;
+ // Convert duration parameter from stream timebase to microseconds.
+ const uint64_t duration_us = (uint64_t)duration * 1000000 *
+ (uint64_t)cfg->g_timebase.num /(uint64_t)cfg->g_timebase.den;
+
+ // If the deadline is more that the duration this frame is to be shown,
+ // use good quality mode. Otherwise use realtime mode.
+ new_mode = (deadline > duration_us) ? GOOD : REALTIME;
+ } else {
+ new_mode = BEST;
+ }
+ break;
+ case VPX_RC_FIRST_PASS:
+ break;
+ case VPX_RC_LAST_PASS:
+ new_mode = deadline > 0 ? GOOD : BEST;
+ break;
}
- if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
- new_qc = TWO_PASS_FIRST;
- else if (ctx->cfg.g_pass == VPX_RC_LAST_PASS)
- new_qc = (new_qc == ONE_PASS_BEST) ? TWO_PASS_SECOND_BEST
- : TWO_PASS_SECOND_GOOD;
-
- if (ctx->oxcf.mode != new_qc) {
- ctx->oxcf.mode = new_qc;
+ if (ctx->oxcf.mode != new_mode) {
+ ctx->oxcf.mode = new_mode;
vp9_change_config(ctx->cpi, &ctx->oxcf);
}
}
@@ -929,7 +912,7 @@
#endif
// Pack invisible frames with the next visible frame
- if (cpi->common.show_frame == 0
+ if (!cpi->common.show_frame
#if CONFIG_SPATIAL_SVC
|| (is_spatial_svc(cpi) &&
cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
@@ -961,18 +944,6 @@
)
pkt.data.frame.flags |= VPX_FRAME_IS_KEY;
- if (cpi->common.show_frame == 0) {
- pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE;
-
- // This timestamp should be as close as possible to the
- // prior PTS so that if a decoder uses pts to schedule when
- // to do this, we start right after last frame was decoded.
- // Invisible frames have no duration.
- pkt.data.frame.pts =
- ticks_to_timebase_units(timebase, cpi->last_time_stamp_seen) + 1;
- pkt.data.frame.duration = 0;
- }
-
if (cpi->droppable)
pkt.data.frame.flags |= VPX_FRAME_IS_DROPPABLE;
@@ -997,8 +968,9 @@
cx_data_sz -= size;
#if CONFIG_SPATIAL_SVC
if (is_spatial_svc(cpi)) {
- vpx_codec_cx_pkt_t pkt = {0};
+ vpx_codec_cx_pkt_t pkt;
int i;
+ vp9_zero(pkt);
pkt.kind = VPX_CODEC_SPATIAL_SVC_LAYER_SIZES;
for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
pkt.data.layer_sizes[i] = cpi->svc.layer_context[i].layer_size;
@@ -1350,11 +1322,11 @@
encoder_destroy, // vpx_codec_destroy_fn_t
encoder_ctrl_maps, // vpx_codec_ctrl_fn_map_t
{ // NOLINT
- NOT_IMPLEMENTED, // vpx_codec_peek_si_fn_t
- NOT_IMPLEMENTED, // vpx_codec_get_si_fn_t
- NOT_IMPLEMENTED, // vpx_codec_decode_fn_t
- NOT_IMPLEMENTED, // vpx_codec_frame_get_fn_t
- NOT_IMPLEMENTED // vpx_codec_set_fb_fn_t
+ NULL, // vpx_codec_peek_si_fn_t
+ NULL, // vpx_codec_get_si_fn_t
+ NULL, // vpx_codec_decode_fn_t
+ NULL, // vpx_codec_frame_get_fn_t
+ NULL // vpx_codec_set_fb_fn_t
},
{ // NOLINT
1, // 1 cfg map
@@ -1362,8 +1334,8 @@
encoder_encode, // vpx_codec_encode_fn_t
encoder_get_cxdata, // vpx_codec_get_cx_data_fn_t
encoder_set_config, // vpx_codec_enc_config_set_fn_t
- NOT_IMPLEMENTED, // vpx_codec_get_global_headers_fn_t
+ NULL, // vpx_codec_get_global_headers_fn_t
encoder_get_preview, // vpx_codec_get_preview_frame_fn_t
- NOT_IMPLEMENTED // vpx_codec_enc_mr_get_mem_loc_fn_t
+ NULL // vpx_codec_enc_mr_get_mem_loc_fn_t
}
};
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 4372ac9..05d61c8 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -66,7 +66,6 @@
ctx->priv = (vpx_codec_priv_t *)alg_priv;
ctx->priv->sz = sizeof(*ctx->priv);
- ctx->priv->iface = ctx->iface;
ctx->priv->alg_priv = alg_priv;
ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si);
ctx->priv->init_flags = ctx->init_flags;
@@ -332,81 +331,6 @@
return VPX_CODEC_OK;
}
-static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb,
- void *decrypt_state,
- const uint8_t *data) {
- if (decrypt_cb) {
- uint8_t marker;
- decrypt_cb(decrypt_state, data, &marker, 1);
- return marker;
- }
- return *data;
-}
-
-static vpx_codec_err_t parse_superframe_index(const uint8_t *data,
- size_t data_sz,
- uint32_t sizes[8], int *count,
- vpx_decrypt_cb decrypt_cb,
- void *decrypt_state) {
- // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
- // it is a super frame index. If the last byte of real video compression
- // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
- // not the associated matching marker byte at the front of the index we have
- // an invalid bitstream and need to return an error.
-
- uint8_t marker;
-
- assert(data_sz);
- marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1);
- *count = 0;
-
- if ((marker & 0xe0) == 0xc0) {
- const uint32_t frames = (marker & 0x7) + 1;
- const uint32_t mag = ((marker >> 3) & 0x3) + 1;
- const size_t index_sz = 2 + mag * frames;
-
- // This chunk is marked as having a superframe index but doesn't have
- // enough data for it, thus it's an invalid superframe index.
- if (data_sz < index_sz)
- return VPX_CODEC_CORRUPT_FRAME;
-
- {
- const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state,
- data + data_sz - index_sz);
-
- // This chunk is marked as having a superframe index but doesn't have
- // the matching marker byte at the front of the index therefore it's an
- // invalid chunk.
- if (marker != marker2)
- return VPX_CODEC_CORRUPT_FRAME;
- }
-
- {
- // Found a valid superframe index.
- uint32_t i, j;
- const uint8_t *x = &data[data_sz - index_sz + 1];
-
- // Frames has a maximum of 8 and mag has a maximum of 4.
- uint8_t clear_buffer[32];
- assert(sizeof(clear_buffer) >= frames * mag);
- if (decrypt_cb) {
- decrypt_cb(decrypt_state, x, clear_buffer, frames * mag);
- x = clear_buffer;
- }
-
- for (i = 0; i < frames; ++i) {
- uint32_t this_sz = 0;
-
- for (j = 0; j < mag; ++j)
- this_sz |= (*x++) << (j * 8);
- sizes[i] = this_sz;
- }
- *count = frames;
- }
- }
- return VPX_CODEC_OK;
-}
-
static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx,
const uint8_t *data, unsigned int data_sz,
void *user_priv, long deadline) {
@@ -424,8 +348,8 @@
// Reset flushed when receiving a valid frame.
ctx->flushed = 0;
- res = parse_superframe_index(data, data_sz, frame_sizes, &frame_count,
- ctx->decrypt_cb, ctx->decrypt_state);
+ res = vp9_parse_superframe_index(data, data_sz, frame_sizes, &frame_count,
+ ctx->decrypt_cb, ctx->decrypt_state);
if (res != VPX_CODEC_OK)
return res;
@@ -723,12 +647,12 @@
},
{ // NOLINT
0,
- NOT_IMPLEMENTED, // vpx_codec_enc_cfg_map_t
- NOT_IMPLEMENTED, // vpx_codec_encode_fn_t
- NOT_IMPLEMENTED, // vpx_codec_get_cx_data_fn_t
- NOT_IMPLEMENTED, // vpx_codec_enc_config_set_fn_t
- NOT_IMPLEMENTED, // vpx_codec_get_global_headers_fn_t
- NOT_IMPLEMENTED, // vpx_codec_get_preview_frame_fn_t
- NOT_IMPLEMENTED // vpx_codec_enc_mr_get_mem_loc_fn_t
+ NULL, // vpx_codec_enc_cfg_map_t
+ NULL, // vpx_codec_encode_fn_t
+ NULL, // vpx_codec_get_cx_data_fn_t
+ NULL, // vpx_codec_enc_config_set_fn_t
+ NULL, // vpx_codec_get_global_headers_fn_t
+ NULL, // vpx_codec_get_preview_frame_fn_t
+ NULL // vpx_codec_enc_mr_get_mem_loc_fn_t
}
};
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index a7716d1..d7c5800 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -286,8 +286,6 @@
vpx_codec_enc_cfg_t cfg;
} vpx_codec_enc_cfg_map_t;
-#define NOT_IMPLEMENTED 0
-
/*!\brief Decoder algorithm interface interface
*
* All decoders \ref MUST expose a variable of this type.
@@ -338,7 +336,6 @@
*/
struct vpx_codec_priv {
unsigned int sz;
- vpx_codec_iface_t *iface;
struct vpx_codec_alg_priv *alg_priv;
const char *err_detail;
vpx_codec_flags_t init_flags;
@@ -347,8 +344,7 @@
vpx_codec_priv_cb_pair_t put_slice_cb;
} dec;
struct {
- int tbd;
- struct vpx_fixed_buf cx_data_dst_buf;
+ vpx_fixed_buf_t cx_data_dst_buf;
unsigned int cx_data_pad_before;
unsigned int cx_data_pad_after;
vpx_codec_cx_pkt_t cx_data_pkt;
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index 7828615..45b0dca 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -86,6 +86,7 @@
int layers;
int layer;
int is_keyframe;
+ int use_multiple_frame_contexts;
FrameData *frame_list;
FrameData *frame_temp;
@@ -366,6 +367,7 @@
char *option_name;
char *option_value;
char *input_ptr;
+ SvcInternal *const si = get_svc_internal(svc_ctx);
vpx_codec_err_t res = VPX_CODEC_OK;
if (options == NULL) return VPX_CODEC_OK;
@@ -393,6 +395,8 @@
} else if (strcmp("auto-alt-refs", option_name) == 0) {
res = parse_auto_alt_ref(svc_ctx, option_value);
if (res != VPX_CODEC_OK) break;
+ } else if (strcmp("multi-frame-contexts", option_name) == 0) {
+ si->use_multiple_frame_contexts = atoi(option_value);
} else {
svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name);
res = VPX_CODEC_INVALID_PARAM;
@@ -401,6 +405,10 @@
option_name = strtok_r(NULL, "=", &input_ptr);
}
free(input_string);
+
+ if (si->use_multiple_frame_contexts && svc_ctx->spatial_layers > 3)
+ res = VPX_CODEC_INVALID_PARAM;
+
return res;
}
@@ -534,7 +542,8 @@
enc_cfg->rc_buf_initial_sz = 500;
enc_cfg->rc_buf_optimal_sz = 600;
enc_cfg->rc_buf_sz = 1000;
- enc_cfg->g_error_resilient = 1;
+ if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0)
+ enc_cfg->g_error_resilient = 1;
// Initialize codec
res = vpx_codec_enc_init(codec_ctx, iface, enc_cfg, VPX_CODEC_USE_PSNR);
diff --git a/vpx/src/vpx_decoder.c b/vpx/src/vpx_decoder.c
index 4d22a08..fdcc9c1 100644
--- a/vpx/src/vpx_decoder.c
+++ b/vpx/src/vpx_decoder.c
@@ -20,7 +20,7 @@
vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx,
vpx_codec_iface_t *iface,
- vpx_codec_dec_cfg_t *cfg,
+ const vpx_codec_dec_cfg_t *cfg,
vpx_codec_flags_t flags,
int ver) {
vpx_codec_err_t res;
@@ -54,9 +54,6 @@
ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
vpx_codec_destroy(ctx);
}
-
- if (ctx->priv)
- ctx->priv->iface = ctx->iface;
}
return SAVE_STATUS(ctx, res);
diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
index 6e18bd1..736a8da 100644
--- a/vpx/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c
@@ -22,7 +22,7 @@
vpx_codec_err_t vpx_codec_enc_init_ver(vpx_codec_ctx_t *ctx,
vpx_codec_iface_t *iface,
- vpx_codec_enc_cfg_t *cfg,
+ const vpx_codec_enc_cfg_t *cfg,
vpx_codec_flags_t flags,
int ver) {
vpx_codec_err_t res;
@@ -53,9 +53,6 @@
ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
vpx_codec_destroy(ctx);
}
-
- if (ctx->priv)
- ctx->priv->iface = ctx->iface;
}
return SAVE_STATUS(ctx, res);
@@ -135,9 +132,6 @@
}
}
- if (ctx->priv)
- ctx->priv->iface = ctx->iface;
-
if (res)
break;
diff --git a/vpx/src/vpx_image.c b/vpx/src/vpx_image.c
index 8c7e3cf..e20703a 100644
--- a/vpx/src/vpx_image.c
+++ b/vpx/src/vpx_image.c
@@ -8,38 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
#include <stdlib.h>
#include <string.h>
+
#include "vpx/vpx_image.h"
#include "vpx/vpx_integer.h"
-
-#define ADDRESS_STORAGE_SIZE sizeof(size_t)
-/*returns an addr aligned to the byte boundary specified by align*/
-#define align_addr(addr,align) (void*)(((size_t)(addr) + ((align) - 1)) & (size_t)-(align))
-
-/* Memalign code is copied from vpx_mem.c */
-static void *img_buf_memalign(size_t align, size_t size) {
- void *addr,
- * x = NULL;
-
- addr = malloc(size + align - 1 + ADDRESS_STORAGE_SIZE);
-
- if (addr) {
- x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align);
- /* save the actual malloc address */
- ((size_t *)x)[-1] = (size_t)addr;
- }
-
- return x;
-}
-
-static void img_buf_free(void *memblk) {
- if (memblk) {
- void *addr = (void *)(((size_t *)memblk)[-1]);
- free(addr);
- }
-}
+#include "vpx_mem/vpx_mem.h"
static vpx_image_t *img_alloc_helper(vpx_image_t *img,
vpx_img_fmt_t fmt,
@@ -172,7 +146,7 @@
if (alloc_size != (size_t)alloc_size)
goto fail;
- img->img_data = img_buf_memalign(buf_align, (size_t)alloc_size);
+ img->img_data = (uint8_t *)vpx_memalign(buf_align, (size_t)alloc_size);
img->img_data_owner = 1;
}
@@ -296,7 +270,7 @@
void vpx_img_free(vpx_image_t *img) {
if (img) {
if (img->img_data && img->img_data_owner)
- img_buf_free(img->img_data);
+ vpx_free(img->img_data);
if (img->self_allocd)
free(img);
diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h
index 07df72a..91fc532 100644
--- a/vpx/vpx_codec.h
+++ b/vpx/vpx_codec.h
@@ -203,9 +203,11 @@
const char *err_detail; /**< Detailed info, if available */
vpx_codec_flags_t init_flags; /**< Flags passed at init time */
union {
- struct vpx_codec_dec_cfg *dec; /**< Decoder Configuration Pointer */
- struct vpx_codec_enc_cfg *enc; /**< Encoder Configuration Pointer */
- void *raw;
+ /**< Decoder Configuration Pointer */
+ const struct vpx_codec_dec_cfg *dec;
+ /**< Encoder Configuration Pointer */
+ const struct vpx_codec_enc_cfg *enc;
+ const void *raw;
} config; /**< Configuration pointer aliasing union */
vpx_codec_priv_t *priv; /**< Algorithm private storage */
} vpx_codec_ctx_t;
diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
index 10b89fa..62fd919 100644
--- a/vpx/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -135,7 +135,7 @@
*/
vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx,
vpx_codec_iface_t *iface,
- vpx_codec_dec_cfg_t *cfg,
+ const vpx_codec_dec_cfg_t *cfg,
vpx_codec_flags_t flags,
int ver);
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index 7dbbf2f..75d3a47 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -188,14 +188,14 @@
has id 0.*/
} frame; /**< data for compressed frame packet */
- struct vpx_fixed_buf twopass_stats; /**< data for two-pass packet */
- struct vpx_fixed_buf firstpass_mb_stats; /**< first pass mb packet */
+ vpx_fixed_buf_t twopass_stats; /**< data for two-pass packet */
+ vpx_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */
struct vpx_psnr_pkt {
unsigned int samples[4]; /**< Number of samples, total/y/u/v */
uint64_t sse[4]; /**< sum squared error, total/y/u/v */
double psnr[4]; /**< PSNR, total/y/u/v */
} psnr; /**< data for PSNR packet */
- struct vpx_fixed_buf raw; /**< data for arbitrary packets */
+ vpx_fixed_buf_t raw; /**< data for arbitrary packets */
#if CONFIG_SPATIAL_SVC
size_t layer_sizes[VPX_SS_MAX_LAYERS];
#endif
@@ -452,14 +452,14 @@
* A buffer containing all of the stats packets produced in the first
* pass, concatenated.
*/
- struct vpx_fixed_buf rc_twopass_stats_in;
+ vpx_fixed_buf_t rc_twopass_stats_in;
/*!\brief first pass mb stats buffer.
*
* A buffer containing all of the first pass mb stats packets produced
* in the first pass, concatenated.
*/
- struct vpx_fixed_buf rc_firstpass_mb_stats_in;
+ vpx_fixed_buf_t rc_firstpass_mb_stats_in;
/*!\brief Target data rate
*
@@ -715,7 +715,7 @@
*/
vpx_codec_err_t vpx_codec_enc_init_ver(vpx_codec_ctx_t *ctx,
vpx_codec_iface_t *iface,
- vpx_codec_enc_cfg_t *cfg,
+ const vpx_codec_enc_cfg_t *cfg,
vpx_codec_flags_t flags,
int ver);
diff --git a/vpx_ports/vpx_timer.h b/vpx_ports/vpx_timer.h
index 870338b..dd98e29 100644
--- a/vpx_ports/vpx_timer.h
+++ b/vpx_ports/vpx_timer.h
@@ -11,6 +11,9 @@
#ifndef VPX_PORTS_VPX_TIMER_H_
#define VPX_PORTS_VPX_TIMER_H_
+
+#include "./vpx_config.h"
+
#include "vpx/vpx_integer.h"
#if CONFIG_OS_SUPPORT
diff --git a/vpxdec.c b/vpxdec.c
index faee42a..4125044 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -15,13 +15,16 @@
#include <string.h>
#include <limits.h>
+#include "./vpx_config.h"
+
+#if CONFIG_LIBYUV
#include "third_party/libyuv/include/libyuv/scale.h"
+#endif
#include "./args.h"
#include "./ivfdec.h"
#define VPX_CODEC_DISABLE_COMPAT 1
-#include "./vpx_config.h"
#include "vpx/vpx_decoder.h"
#include "vpx_ports/mem_ops.h"
#include "vpx_ports/vpx_timer.h"
@@ -123,6 +126,7 @@
};
#endif
+#if CONFIG_LIBYUV
static INLINE int vpx_image_scale(vpx_image_t *src, vpx_image_t *dst,
FilterModeEnum mode) {
assert(src->fmt == VPX_IMG_FMT_I420);
@@ -137,6 +141,7 @@
dst->d_w, dst->d_h,
mode);
}
+#endif
void usage_exit() {
int i;
@@ -538,7 +543,8 @@
struct VpxDecInputContext input = {NULL, NULL};
struct VpxInputContext vpx_input_ctx;
#if CONFIG_WEBM_IO
- struct WebmInputContext webm_ctx = {0};
+ struct WebmInputContext webm_ctx;
+ memset(&(webm_ctx), 0, sizeof(webm_ctx));
input.webm_ctx = &webm_ctx;
#endif
input.vpx_input_ctx = &vpx_input_ctx;
@@ -593,7 +599,8 @@
do_scale = 1;
else if (arg_match(&arg, &fb_arg, argi))
num_external_frame_buffers = arg_parse_uint(&arg);
-
+ else if (arg_match(&arg, &continuearg, argi))
+ keep_going = 1;
#if CONFIG_VP8_DECODER
else if (arg_match(&arg, &addnoise_level, argi)) {
postproc = 1;
@@ -643,11 +650,8 @@
}
} else if (arg_match(&arg, &error_concealment, argi)) {
ec_enabled = 1;
- } else if (arg_match(&arg, &continuearg, argi)) {
- keep_going = 1;
}
-
-#endif
+#endif // CONFIG_VP8_DECODER
else
argj++;
}
@@ -883,7 +887,7 @@
display_height = display_size[1];
}
}
- scaled_img = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, display_width,
+ scaled_img = vpx_img_alloc(NULL, img->fmt, display_width,
display_height, 16);
scaled_img->bit_depth = img->bit_depth;
}
diff --git a/vpxenc.c b/vpxenc.c
index 7e037a6..d8fc553 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -19,12 +19,15 @@
#include <stdlib.h>
#include <string.h>
+#if CONFIG_LIBYUV
+#include "third_party/libyuv/include/libyuv/scale.h"
+#endif
+
#include "vpx/vpx_encoder.h"
#if CONFIG_DECODERS
#include "vpx/vpx_decoder.h"
#endif
-#include "third_party/libyuv/include/libyuv/scale.h"
#include "./args.h"
#include "./ivfenc.h"
#include "./tools_common.h"
@@ -737,8 +740,9 @@
#if CONFIG_VP9_ENCODER
// Make default VP9 passes = 2 until there is a better quality 1-pass
// encoder
- global->passes = (strcmp(global->codec->name, "vp9") == 0 &&
- global->deadline != VPX_DL_REALTIME) ? 2 : 1;
+ if (global->codec != NULL && global->codec->name != NULL)
+ global->passes = (strcmp(global->codec->name, "vp9") == 0 &&
+ global->deadline != VPX_DL_REALTIME) ? 2 : 1;
#else
global->passes = 1;
#endif
@@ -806,8 +810,10 @@
struct stream_state *stream;
stream = calloc(1, sizeof(*stream));
- if (!stream)
+ if (stream == NULL) {
fatal("Failed to allocate new stream.");
+ }
+
if (prev) {
memcpy(stream, prev, sizeof(*stream));
stream->index++;
@@ -993,12 +999,13 @@
* instance of this control.
*/
for (j = 0; j < config->arg_ctrl_cnt; j++)
- if (config->arg_ctrls[j][0] == ctrl_args_map[i])
+ if (ctrl_args_map != NULL &&
+ config->arg_ctrls[j][0] == ctrl_args_map[i])
break;
/* Update/insert */
assert(j < (int)ARG_CTRL_CNT_MAX);
- if (j < (int)ARG_CTRL_CNT_MAX) {
+ if (ctrl_args_map != NULL && j < (int)ARG_CTRL_CNT_MAX) {
config->arg_ctrls[j][0] = ctrl_args_map[i];
config->arg_ctrls[j][1] = arg_parse_enum_or_int(&arg);
if (j == config->arg_ctrl_cnt)
@@ -1785,7 +1792,8 @@
got_data = 0;
FOREACH_STREAM(get_cx_data(stream, &global, &got_data));
- if (!got_data && input.length && !streams->frames_out) {
+ if (!got_data && input.length && streams != NULL &&
+ !streams->frames_out) {
lagged_count = global.limit ? seen_frames : ftello(input.file);
} else if (input.length) {
int64_t remaining;
diff --git a/y4minput.c b/y4minput.c
index 520c332..bcc742a 100644
--- a/y4minput.c
+++ b/y4minput.c
@@ -700,7 +700,7 @@
int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
int only_420) {
- char buffer[80];
+ char buffer[80] = {0};
int ret;
int i;
/*Read until newline, or 80 cols, whichever happens first.*/
@@ -978,7 +978,9 @@
_y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
else
_y4m->dst_buf = (unsigned char *)malloc(2 * _y4m->dst_buf_sz);
- _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
+
+ if (_y4m->aux_buf_sz > 0)
+ _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
return 0;
}