Merge "Inter-frame non-RD mode decision"
diff --git a/examples/decode_to_md5.c b/examples/decode_to_md5.c
index 01e4871..2ceb17a 100644
--- a/examples/decode_to_md5.c
+++ b/examples/decode_to_md5.c
@@ -37,12 +37,24 @@
#include "./vpx_config.h"
#include "vpx/vp8dx.h"
#include "vpx/vpx_decoder.h"
-#define interface (vpx_codec_vp8_dx())
#include "md5_utils.h"
+#define VP8_FOURCC 0x30385056
+#define VP9_FOURCC 0x30395056
+
#define IVF_FILE_HDR_SZ (32)
#define IVF_FRAME_HDR_SZ (12)
+static vpx_codec_iface_t *get_codec_interface(unsigned int fourcc) {
+ switch (fourcc) {
+ case VP8_FOURCC:
+ return vpx_codec_vp8_dx();
+ case VP9_FOURCC:
+ return vpx_codec_vp9_dx();
+ }
+ return NULL;
+}
+
static unsigned int mem_get_le32(const unsigned char *mem) {
return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]);
}
@@ -52,7 +64,7 @@
va_start(ap, fmt);
vprintf(fmt, ap);
- if(fmt[strlen(fmt)-1] != '\n')
+ if (fmt[strlen(fmt) - 1] != '\n')
printf("\n");
exit(EXIT_FAILURE);
}
@@ -66,7 +78,7 @@
exit(EXIT_FAILURE);
}
-static void get_image_md5(const vpx_image_t *img, unsigned char md5_sum[16]) {
+static void get_image_md5(const vpx_image_t *img, unsigned char digest[16]) {
int plane, y;
MD5Context md5;
@@ -84,12 +96,20 @@
}
}
- MD5Final(md5_sum, &md5);
+ MD5Final(digest, &md5);
+}
+
+static void print_md5(FILE *stream, unsigned char digest[16]) {
+ int i;
+
+ for (i = 0; i < 16; ++i)
+ fprintf(stream, "%02x", digest[i]);
}
int main(int argc, char **argv) {
FILE *infile, *outfile;
vpx_codec_ctx_t codec;
+ vpx_codec_iface_t *iface;
int flags = 0, frame_cnt = 0;
unsigned char file_hdr[IVF_FILE_HDR_SZ];
unsigned char frame_hdr[IVF_FRAME_HDR_SZ];
@@ -109,9 +129,14 @@
file_hdr[2] == 'I' && file_hdr[3] == 'F'))
die("%s is not an IVF file.", argv[1]);
- printf("Using %s\n",vpx_codec_iface_name(interface));
+ iface = get_codec_interface(mem_get_le32(file_hdr + 8));
+ if (!iface)
+ die("Unknown FOURCC code.");
- if (vpx_codec_dec_init(&codec, interface, NULL, flags))
+
+ printf("Using %s\n", vpx_codec_iface_name(iface));
+
+ if (vpx_codec_dec_init(&codec, iface, NULL, flags))
die_codec(&codec, "Failed to initialize decoder");
while (fread(frame_hdr, 1, IVF_FRAME_HDR_SZ, infile) == IVF_FRAME_HDR_SZ) {
@@ -119,25 +144,22 @@
vpx_codec_iter_t iter = NULL;
vpx_image_t *img;
- frame_cnt++;
if (frame_size > sizeof(frame))
die("Frame %d data too big for example code buffer", frame_size);
if (fread(frame, 1, frame_size, infile) != frame_size)
- die("Frame %d failed to read complete frame", frame_cnt);
+ die("Failed to read complete frame");
if (vpx_codec_decode(&codec, frame, frame_size, NULL, 0))
die_codec(&codec, "Failed to decode frame");
while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL) {
- int i;
- unsigned char md5_sum[16];
+ unsigned char digest[16];
- get_image_md5(img, md5_sum);
- for (i = 0; i < 16; ++i)
- fprintf(outfile, "%02x", md5_sum[i]);
- fprintf(outfile, " img-%dx%d-%04d.i420\n", img->d_w, img->d_h,
- frame_cnt);
+ get_image_md5(img, digest);
+ print_md5(outfile, digest);
+ fprintf(outfile, " img-%dx%d-%04d.i420\n",
+ img->d_w, img->d_h, ++frame_cnt);
}
}
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm
index f106bc7..98619bb 100644
--- a/vp9/common/arm/neon/vp9_reconintra_neon.asm
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm
@@ -16,6 +16,7 @@
EXPORT |vp9_h_predictor_8x8_neon|
EXPORT |vp9_h_predictor_16x16_neon|
EXPORT |vp9_h_predictor_32x32_neon|
+ EXPORT |vp9_tm_predictor_4x4_neon|
ARM
REQUIRE8
PRESERVE8
@@ -283,4 +284,52 @@
bx lr
ENDP ; |vp9_h_predictor_32x32_neon|
+;void vp9_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride,
+; const uint8_t *above,
+; const uint8_t *left)
+; r0 uint8_t *dst
+; r1 ptrdiff_t y_stride
+; r2 const uint8_t *above
+; r3 const uint8_t *left
+
+|vp9_tm_predictor_4x4_neon| PROC
+ ; Load ytop_left = above[-1];
+ sub r12, r2, #1
+ ldrb r12, [r12]
+ vdup.u8 d0, r12
+
+ ; Load above 4 pixels
+ vld1.32 {d2[0]}, [r2]
+
+ ; Compute above - ytop_left
+ vsubl.u8 q3, d2, d0
+
+ ; Load left row by row and compute left + (above - ytop_left)
+ ; 1st row and 2nd row
+ ldrb r12, [r3], #1
+ ldrb r2, [r3], #1
+ vdup.u16 q1, r12
+ vdup.u16 q2, r2
+ vadd.s16 q1, q1, q3
+ vadd.s16 q2, q2, q3
+ vqshrun.s16 d0, q1, #0
+ vqshrun.s16 d1, q2, #0
+ vst1.32 {d0[0]}, [r0], r1
+ vst1.32 {d1[0]}, [r0], r1
+
+ ; 3rd row and 4th row
+ ldrb r12, [r3], #1
+ ldrb r2, [r3], #1
+ vdup.u16 q1, r12
+ vdup.u16 q2, r2
+ vadd.s16 q1, q1, q3
+ vadd.s16 q2, q2, q3
+ vqshrun.s16 d0, q1, #0
+ vqshrun.s16 d1, q2, #0
+ vst1.32 {d0[0]}, [r0], r1
+ vst1.32 {d1[0]}, [r0], r1
+
+ bx lr
+ ENDP ; |vp9_tm_predictor_4x4_neon|
+
END
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index a79b45c..8f858f4 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -57,7 +57,7 @@
specialize vp9_v_predictor_4x4 $sse_x86inc neon
prototype void vp9_tm_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_4x4 $sse_x86inc dspr2
+specialize vp9_tm_predictor_4x4 $sse_x86inc neon dspr2
prototype void vp9_dc_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_dc_predictor_4x4 $sse_x86inc dspr2
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 2c7c86e..42d4196 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -2603,17 +2603,19 @@
recon_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
if (cpi->twopass.total_left_stats.coded_error != 0.0)
- fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d"
- "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f"
- "%6d %6d %5d %5d %5d %10d %10.3f"
- "%10.3f %8d %10d %10d %10d\n",
+ fprintf(f, "%10u %10d %10d %10d %10d %10d "
+ "%10"PRId64" %10"PRId64" %10d "
+ "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
+ "%6d %6d %5d %5d %5d "
+ "%10"PRId64" %10.3lf"
+ "%10lf %8u %10d %10d %10d\n",
cpi->common.current_video_frame, cpi->rc.this_frame_target,
cpi->rc.projected_frame_size,
cpi->rc.projected_frame_size / cpi->common.MBs,
(cpi->rc.projected_frame_size - cpi->rc.this_frame_target),
- (int)cpi->rc.total_target_vs_actual,
- (int)(cpi->oxcf.starting_buffer_level - cpi->rc.bits_off_target),
- (int)cpi->rc.total_actual_bits, cm->base_qindex,
+ cpi->rc.total_target_vs_actual,
+ (cpi->oxcf.starting_buffer_level - cpi->rc.bits_off_target),
+ cpi->rc.total_actual_bits, cm->base_qindex,
vp9_convert_qindex_to_q(cm->base_qindex),
(double)vp9_dc_quant(cm->base_qindex, 0) / 4.0,
vp9_convert_qindex_to_q(cpi->rc.active_worst_quality), cpi->rc.avg_q,
@@ -2621,9 +2623,9 @@
vp9_convert_qindex_to_q(cpi->cq_target_quality),
cpi->refresh_last_frame, cpi->refresh_golden_frame,
cpi->refresh_alt_ref_frame, cm->frame_type, cpi->rc.gfu_boost,
- (int)cpi->twopass.bits_left,
+ cpi->twopass.bits_left,
cpi->twopass.total_left_stats.coded_error,
- (double)cpi->twopass.bits_left /
+ cpi->twopass.bits_left /
(1 + cpi->twopass.total_left_stats.coded_error),
cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
cpi->twopass.kf_zeromotion_pct);
@@ -2707,14 +2709,16 @@
// Dummy pack of the bitstream using up to date stats to get an
// accurate estimate of output frame size to determine if we need
// to recode.
- vp9_save_coding_context(cpi);
- cpi->dummy_packing = 1;
- vp9_pack_bitstream(cpi, dest, size);
- cpi->rc.projected_frame_size = (*size) << 3;
- vp9_restore_coding_context(cpi);
+ if (cpi->sf.recode_loop != 0) {
+ vp9_save_coding_context(cpi);
+ cpi->dummy_packing = 1;
+ vp9_pack_bitstream(cpi, dest, size);
+ cpi->rc.projected_frame_size = (*size) << 3;
+ vp9_restore_coding_context(cpi);
- if (frame_over_shoot_limit == 0)
- frame_over_shoot_limit = 1;
+ if (frame_over_shoot_limit == 0)
+ frame_over_shoot_limit = 1;
+ }
if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
loop = 0;