Merge "vp9_decoder_remove: destroy common after thread shutdown"
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index f9c09c6..8940027 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -653,6 +653,8 @@
e3ab35d4316c5e81325c50f5236ceca4bc0d35df vp90-2-15-segkey.webm.md5
9b7ca2cac09d34c4a5d296c1900f93b1e2f69d0d vp90-2-15-segkey_adpq.webm
8f46ba5f785d0c2170591a153e0d0d146a7c8090 vp90-2-15-segkey_adpq.webm.md5
+698a6910a97486b833073ef0c0b18d75dce57ee8 vp90-2-16-intra-only.webm
+5661b0168752969f055eec37b05fa9fa947dc7eb vp90-2-16-intra-only.webm.md5
0321d507ce62dedc8a51b4e9011f7a19aed9c3dc vp91-2-04-yuv444.webm
367e423dd41fdb49aa028574a2cfec5c2f325c5c vp91-2-04-yuv444.webm.md5
76024eb753cdac6a5e5703aaea189d35c3c30ac7 invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf
diff --git a/test/test.mk b/test/test.mk
index 85212d9..ef81ab1 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -771,6 +771,8 @@
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-16-intra-only.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-16-intra-only.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm.md5
diff --git a/test/test_vectors.cc b/test/test_vectors.cc
index 41c9e26..4955887 100644
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -180,6 +180,7 @@
"vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm",
"vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm",
"vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm",
+ "vp90-2-16-intra-only.webm",
"vp91-2-04-yuv444.webm",
};
const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
diff --git a/test/vp9_thread_test.cc b/test/vp9_thread_test.cc
index 72719a6..fa51835 100644
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -18,7 +18,7 @@
#if CONFIG_WEBM_IO
#include "test/webm_video_source.h"
#endif
-#include "vp9/decoder/vp9_thread.h"
+#include "vp9/common/vp9_thread.h"
namespace {
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index e1753a1..afe831a 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -257,10 +257,14 @@
xd->mi_stride = cm->mi_stride;
}
+static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) {
+ return cm->frame_type == KEY_FRAME || cm->intra_only;
+}
+
static INLINE const vp9_prob* get_partition_probs(const VP9_COMMON *cm,
int ctx) {
- return cm->frame_type == KEY_FRAME ? vp9_kf_partition_probs[ctx]
- : cm->fc.partition_prob[ctx];
+ return frame_is_intra_only(cm) ? vp9_kf_partition_probs[ctx]
+ : cm->fc.partition_prob[ctx];
}
static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
@@ -299,10 +303,6 @@
cm->prev_mip + cm->mi_stride + 1 : NULL;
}
-static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) {
- return cm->frame_type == KEY_FRAME || cm->intra_only;
-}
-
static INLINE void update_partition_context(MACROBLOCKD *xd,
int mi_row, int mi_col,
BLOCK_SIZE subsize,
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index f52dccb..b182f3f 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -717,6 +717,9 @@
add_proto qw/void vp9_quantize_fp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp/, "$ssse3_x86_64";
+add_proto qw/void vp9_quantize_fp_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64";
+
add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
diff --git a/vp9/decoder/vp9_thread.c b/vp9/common/vp9_thread.c
similarity index 100%
rename from vp9/decoder/vp9_thread.c
rename to vp9/common/vp9_thread.c
diff --git a/vp9/decoder/vp9_thread.h b/vp9/common/vp9_thread.h
similarity index 100%
rename from vp9/decoder/vp9_thread.h
rename to vp9/common/vp9_thread.h
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 8b96abb..55d5b4f 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -28,6 +28,7 @@
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_thread.h"
#include "vp9/common/vp9_tile_common.h"
#include "vp9/decoder/vp9_decodeframe.h"
@@ -38,7 +39,6 @@
#include "vp9/decoder/vp9_dthread.h"
#include "vp9/decoder/vp9_read_bit_buffer.h"
#include "vp9/decoder/vp9_reader.h"
-#include "vp9/decoder/vp9_thread.h"
#define MAX_VP9_HEADER_SIZE 80
@@ -605,8 +605,8 @@
: literal_to_filter[vp9_rb_read_literal(rb, 2)];
}
-static void read_frame_size(struct vp9_read_bit_buffer *rb,
- int *width, int *height) {
+void vp9_read_frame_size(struct vp9_read_bit_buffer *rb,
+ int *width, int *height) {
const int w = vp9_rb_read_literal(rb, 16) + 1;
const int h = vp9_rb_read_literal(rb, 16) + 1;
*width = w;
@@ -617,7 +617,7 @@
cm->display_width = cm->width;
cm->display_height = cm->height;
if (vp9_rb_read_bit(rb))
- read_frame_size(rb, &cm->display_width, &cm->display_height);
+ vp9_read_frame_size(rb, &cm->display_width, &cm->display_height);
}
static void apply_frame_size(VP9_COMMON *cm, int width, int height) {
@@ -649,7 +649,7 @@
static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
int width, height;
- read_frame_size(rb, &width, &height);
+ vp9_read_frame_size(rb, &width, &height);
apply_frame_size(cm, width, height);
setup_display_size(cm, rb);
}
@@ -669,7 +669,7 @@
}
if (!found)
- read_frame_size(rb, &width, &height);
+ vp9_read_frame_size(rb, &width, &height);
// Check that each of the frames that this frame references has valid
// dimensions.
@@ -1053,20 +1053,17 @@
return bit_reader_end;
}
-static void check_sync_code(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
- if (vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_0 ||
- vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_1 ||
- vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_2) {
- vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
- "Invalid frame sync code");
- }
-}
-
static void error_handler(void *data) {
VP9_COMMON *const cm = (VP9_COMMON *)data;
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet");
}
+int vp9_read_sync_code(struct vp9_read_bit_buffer *const rb) {
+ return vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 &&
+ vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 &&
+ vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2;
+}
+
static BITSTREAM_PROFILE read_profile(struct vp9_read_bit_buffer *rb) {
int profile = vp9_rb_read_bit(rb);
profile |= vp9_rb_read_bit(rb) << 1;
@@ -1112,7 +1109,9 @@
cm->error_resilient_mode = vp9_rb_read_bit(rb);
if (cm->frame_type == KEY_FRAME) {
- check_sync_code(cm, rb);
+ if (!vp9_read_sync_code(rb))
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Invalid frame sync code");
if (cm->profile > PROFILE_1)
cm->bit_depth = vp9_rb_read_bit(rb) ? BITS_12 : BITS_10;
cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3);
@@ -1150,9 +1149,18 @@
0 : vp9_rb_read_literal(rb, 2);
if (cm->intra_only) {
- check_sync_code(cm, rb);
+ if (!vp9_read_sync_code(rb))
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Invalid frame sync code");
pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
+
+ // NOTE: The intra-only frame header does not include the specification of
+ // either the color format or color sub-sampling. VP9 specifies that the
+ // default color space should be YUV 4:2:0 in this case (normative).
+ cm->color_space = BT_601;
+ cm->subsampling_y = cm->subsampling_x = 1;
+
setup_frame_size(cm, rb);
} else {
pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h
index fb15645..e5d9d62 100644
--- a/vp9/decoder/vp9_decodeframe.h
+++ b/vp9/decoder/vp9_decodeframe.h
@@ -18,6 +18,7 @@
struct VP9Common;
struct VP9Decoder;
+struct vp9_read_bit_buffer;
void vp9_init_dequantizer(struct VP9Common *cm);
@@ -25,6 +26,10 @@
const uint8_t *data, const uint8_t *data_end,
const uint8_t **p_data_end);
+int vp9_read_sync_code(struct vp9_read_bit_buffer *const rb);
+void vp9_read_frame_size(struct vp9_read_bit_buffer *rb,
+ int *width, int *height);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index a0bd2f1..a1a78a9 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -267,7 +267,10 @@
vp9_decode_frame(pbi, source, source + size, psource);
- swap_frame_buffers(pbi);
+ if (!cm->show_existing_frame)
+ swap_frame_buffers(pbi);
+ else
+ cm->frame_to_show = get_frame_new_buffer(cm);
vp9_clear_system_state();
@@ -291,6 +294,7 @@
int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd,
vp9_ppflags_t *flags) {
+ VP9_COMMON *const cm = &pbi->common;
int ret = -1;
#if !CONFIG_VP9_POSTPROC
(void)*flags;
@@ -300,15 +304,20 @@
return ret;
/* no raw frame to show!!! */
- if (pbi->common.show_frame == 0)
+ if (!cm->show_frame)
return ret;
pbi->ready_for_new_data = 1;
#if CONFIG_VP9_POSTPROC
- ret = vp9_post_proc_frame(&pbi->common, sd, flags);
+ if (!cm->show_existing_frame) {
+ ret = vp9_post_proc_frame(cm, sd, flags);
+ } else {
+ *sd = *cm->frame_to_show;
+ ret = 0;
+ }
#else
- *sd = *pbi->common.frame_to_show;
+ *sd = *cm->frame_to_show;
ret = 0;
#endif /*!CONFIG_POSTPROC*/
vp9_clear_system_state();
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index ab4f9a2..8e16e1c 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -18,10 +18,9 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_ppflags.h"
+#include "vp9/common/vp9_thread.h"
-#include "vp9/decoder/vp9_decoder.h"
#include "vp9/decoder/vp9_dthread.h"
-#include "vp9/decoder/vp9_thread.h"
#ifdef __cplusplus
extern "C" {
diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h
index a727e2a..423bd88 100644
--- a/vp9/decoder/vp9_dthread.h
+++ b/vp9/decoder/vp9_dthread.h
@@ -12,8 +12,8 @@
#define VP9_DECODER_VP9_DTHREAD_H_
#include "./vpx_config.h"
+#include "vp9/common/vp9_thread.h"
#include "vp9/decoder/vp9_reader.h"
-#include "vp9/decoder/vp9_thread.h"
struct VP9Common;
struct VP9Decoder;
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index f6393e0..dbf8cd7 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -190,12 +190,19 @@
MACROBLOCKD *filter_mbd = &mb->e_mbd;
MB_MODE_INFO *mbmi = &filter_mbd->mi[0]->mbmi;
+ MB_MODE_INFO saved_mbmi;
+ int i, j;
+ struct buf_2d saved_dst[MAX_MB_PLANE];
+ struct buf_2d saved_pre[MAX_MB_PLANE][2]; // 2 pre buffers
+
// We will restore these after motion compensation.
- MB_MODE_INFO saved_mbmi = *mbmi;
- struct buf_2d saved_dst = filter_mbd->plane[0].dst;
- struct buf_2d saved_pre[2];
- saved_pre[0] = filter_mbd->plane[0].pre[0];
- saved_pre[1] = filter_mbd->plane[0].pre[1];
+ saved_mbmi = *mbmi;
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ for (j = 0; j < 2; ++j) {
+ saved_pre[i][j] = filter_mbd->plane[i].pre[j];
+ }
+ saved_dst[i] = filter_mbd->plane[i].dst;
+ }
mv_col = denoiser->best_sse_mv.as_mv.col;
mv_row = denoiser->best_sse_mv.as_mv.row;
@@ -224,67 +231,52 @@
// Set the pointers in the MACROBLOCKD to point to the buffers in the denoiser
// struct.
- filter_mbd->plane[0].pre[0].buf =
- block_start(denoiser->running_avg_y[frame].y_buffer,
- denoiser->running_avg_y[frame].y_stride,
- mi_row, mi_col);
- filter_mbd->plane[0].pre[0].stride = denoiser->running_avg_y[frame].y_stride;
-
- filter_mbd->plane[1].pre[0].buf =
- block_start(denoiser->running_avg_y[frame].u_buffer,
- denoiser->running_avg_y[frame].uv_stride,
- mi_row, mi_col);
- filter_mbd->plane[1].pre[0].stride = denoiser->running_avg_y[frame].uv_stride;
-
- filter_mbd->plane[2].pre[0].buf =
- block_start(denoiser->running_avg_y[frame].v_buffer,
- denoiser->running_avg_y[frame].uv_stride,
- mi_row, mi_col);
- filter_mbd->plane[2].pre[0].stride = denoiser->running_avg_y[frame].uv_stride;
-
- filter_mbd->plane[0].pre[1].buf =
- block_start(denoiser->running_avg_y[frame].y_buffer,
- denoiser->running_avg_y[frame].y_stride,
- mi_row, mi_col);
- filter_mbd->plane[0].pre[1].stride = denoiser->running_avg_y[frame].y_stride;
-
- filter_mbd->plane[1].pre[1].buf =
- block_start(denoiser->running_avg_y[frame].u_buffer,
- denoiser->running_avg_y[frame].uv_stride,
- mi_row, mi_col);
- filter_mbd->plane[1].pre[1].stride = denoiser->running_avg_y[frame].uv_stride;
-
- filter_mbd->plane[2].pre[1].buf =
- block_start(denoiser->running_avg_y[frame].v_buffer,
- denoiser->running_avg_y[frame].uv_stride,
- mi_row, mi_col);
- filter_mbd->plane[2].pre[1].stride = denoiser->running_avg_y[frame].uv_stride;
-
+ for (j = 0; j < 2; ++j) {
+ filter_mbd->plane[0].pre[j].buf =
+ block_start(denoiser->running_avg_y[frame].y_buffer,
+ denoiser->running_avg_y[frame].y_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[0].pre[j].stride =
+ denoiser->running_avg_y[frame].y_stride;
+ filter_mbd->plane[1].pre[j].buf =
+ block_start(denoiser->running_avg_y[frame].u_buffer,
+ denoiser->running_avg_y[frame].uv_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[1].pre[j].stride =
+ denoiser->running_avg_y[frame].uv_stride;
+ filter_mbd->plane[2].pre[j].buf =
+ block_start(denoiser->running_avg_y[frame].v_buffer,
+ denoiser->running_avg_y[frame].uv_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[2].pre[j].stride =
+ denoiser->running_avg_y[frame].uv_stride;
+ }
filter_mbd->plane[0].dst.buf =
block_start(denoiser->mc_running_avg_y.y_buffer,
denoiser->mc_running_avg_y.y_stride,
mi_row, mi_col);
filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride;
-
filter_mbd->plane[1].dst.buf =
block_start(denoiser->mc_running_avg_y.u_buffer,
denoiser->mc_running_avg_y.uv_stride,
mi_row, mi_col);
- filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.y_stride;
-
+ filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.uv_stride;
filter_mbd->plane[2].dst.buf =
block_start(denoiser->mc_running_avg_y.v_buffer,
denoiser->mc_running_avg_y.uv_stride,
mi_row, mi_col);
- filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.y_stride;
+ filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride;
vp9_build_inter_predictors_sby(filter_mbd, mv_row, mv_col, bs);
// Restore everything to its original state
- filter_mbd->plane[0].pre[0] = saved_pre[0];
- filter_mbd->plane[0].pre[1] = saved_pre[1];
- filter_mbd->plane[0].dst = saved_dst;
*mbmi = saved_mbmi;
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ for (j = 0; j < 2; ++j) {
+ filter_mbd->plane[i].pre[j] = saved_pre[i][j];
+ }
+ filter_mbd->plane[i].dst = saved_dst[i];
+ }
mv_row = denoiser->best_sse_mv.as_mv.row;
mv_col = denoiser->best_sse_mv.as_mv.col;
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index eb9624d..cd0191e 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -320,10 +320,10 @@
switch (tx_size) {
case TX_32X32:
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
- vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, p->zbin_extra, eob, scan_order->scan,
- scan_order->iscan);
+ vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob, scan_order->scan,
+ scan_order->iscan);
break;
case TX_16X16:
vp9_fdct16x16(src_diff, coeff, diff_stride);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index e0e0561..a240622 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -862,9 +862,7 @@
#if CONFIG_DENOISING
#ifdef OUTPUT_YUV_DENOISED
- if (cpi->oxcf.noise_sensitivity > 0) {
- yuv_denoised_file = fopen("denoised.yuv", "ab");
- }
+ yuv_denoised_file = fopen("denoised.yuv", "ab");
#endif
#endif
#ifdef OUTPUT_YUV_SRC
@@ -1122,9 +1120,7 @@
#if CONFIG_DENOISING
#ifdef OUTPUT_YUV_DENOISED
- if (cpi->oxcf.noise_sensitivity > 0) {
- fclose(yuv_denoised_file);
- }
+ fclose(yuv_denoised_file);
#endif
#endif
#ifdef OUTPUT_YUV_SRC
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index c66e003..9f8b37f 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -338,7 +338,6 @@
CYCLIC_REFRESH *cyclic_refresh;
fractional_mv_step_fp *find_fractional_mv_step;
- fractional_mv_step_comp_fp *find_fractional_mv_step_comp;
vp9_full_search_fn_t full_search_sad;
vp9_refining_search_fn_t refining_search_sad;
vp9_diamond_search_fn_t diamond_search_sad;
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 9eb2fbc..6e04e2a 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -56,7 +56,7 @@
cpi->find_fractional_mv_step(
x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
&v_fn_ptr, 0, mv_sf->subpel_iters_per_step, NULL, NULL, &distortion,
- &sse);
+ &sse, NULL, 0, 0);
}
xd->mi[0]->mbmi.mode = NEWMV;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index c0edf45..01d2b44 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -172,15 +172,15 @@
return &buf[(r >> 3) * stride + (c >> 3)];
}
-/* returns subpixel variance error function */
-#define DIST(r, c) \
- vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
- src_stride, &sse)
-
/* checks if (r, c) has better score than previous best */
#define CHECK_BETTER(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
- thismse = (DIST(r, c)); \
+ if (second_pred == NULL) \
+ thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
+ src_stride, &sse); \
+ else \
+ thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
+ z, src_stride, &sse, second_pred); \
if ((v = MVC(r, c) + thismse) < besterr) { \
besterr = v; \
br = r; \
@@ -266,105 +266,9 @@
int iters_per_step,
int *mvjcost, int *mvcost[2],
int *distortion,
- unsigned int *sse1) {
- const uint8_t *const z = x->plane[0].src.buf;
- const int src_stride = x->plane[0].src.stride;
- const MACROBLOCKD *xd = &x->e_mbd;
- unsigned int besterr = INT_MAX;
- unsigned int sse;
- unsigned int whichdir;
- int thismse;
- unsigned int halfiters = iters_per_step;
- unsigned int quarteriters = iters_per_step;
- unsigned int eighthiters = iters_per_step;
-
- const int y_stride = xd->plane[0].pre[0].stride;
- const int offset = bestmv->row * y_stride + bestmv->col;
- const uint8_t *const y = xd->plane[0].pre[0].buf;
-
- int rr = ref_mv->row;
- int rc = ref_mv->col;
- int br = bestmv->row * 8;
- int bc = bestmv->col * 8;
- int hstep = 4;
- const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
- const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
- const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
- const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
-
- int tr = br;
- int tc = bc;
-
- // central mv
- bestmv->row *= 8;
- bestmv->col *= 8;
-
- // calculate central point error
- besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
- *distortion = besterr;
- besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
-
- // 1/2 pel
- FIRST_LEVEL_CHECKS;
- if (halfiters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- tr = br;
- tc = bc;
-
- // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
- if (forced_stop != 2) {
- hstep >>= 1;
- FIRST_LEVEL_CHECKS;
- if (quarteriters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- tr = br;
- tc = bc;
- }
-
- if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
- hstep >>= 1;
- FIRST_LEVEL_CHECKS;
- if (eighthiters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- tr = br;
- tc = bc;
- }
- // These lines insure static analysis doesn't warn that
- // tr and tc aren't used after the above point.
- (void) tr;
- (void) tc;
-
- bestmv->row = br;
- bestmv->col = bc;
-
- if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
- return INT_MAX;
-
- return besterr;
-}
-
-#undef DIST
-/* returns subpixel variance error function */
-#define DIST(r, c) \
- vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
- z, src_stride, &sse, second_pred)
-
-int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
- MV *bestmv, const MV *ref_mv,
- int allow_hp,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- int forced_stop,
- int iters_per_step,
- int *mvjcost, int *mvcost[2],
- int *distortion,
- unsigned int *sse1,
- const uint8_t *second_pred,
- int w, int h) {
+ unsigned int *sse1,
+ const uint8_t *second_pred,
+ int w, int h) {
const uint8_t *const z = x->plane[0].src.buf;
const int src_stride = x->plane[0].src.stride;
const MACROBLOCKD *xd = &x->e_mbd;
@@ -376,7 +280,6 @@
const unsigned int quarteriters = iters_per_step;
const unsigned int eighthiters = iters_per_step;
- DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
const int y_stride = xd->plane[0].pre[0].stride;
const int offset = bestmv->row * y_stride + bestmv->col;
const uint8_t *const y = xd->plane[0].pre[0].buf;
@@ -401,8 +304,13 @@
// calculate central point error
// TODO(yunqingwang): central pointer error was already calculated in full-
// pixel search, and can be passed in this function.
- vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
- besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
+ if (second_pred != NULL) {
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
+ vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+ besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
+ } else {
+ besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
+ }
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -456,7 +364,6 @@
#undef MVC
#undef PRE
-#undef DIST
#undef CHECK_BETTER
static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 07e410d..366f9af 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -98,27 +98,12 @@
const vp9_variance_fn_ptr_t *vfp,
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
int iters_per_step,
- int *mvjcost,
- int *mvcost[2],
- int *distortion,
- unsigned int *sse);
-
-extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
-
-typedef int (fractional_mv_step_comp_fp) (
- const MACROBLOCK *x,
- MV *bestmv, const MV *ref_mv,
- int allow_hp,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
- int iters_per_step,
int *mvjcost, int *mvcost[2],
int *distortion, unsigned int *sse1,
const uint8_t *second_pred,
int w, int h);
-extern fractional_mv_step_comp_fp vp9_find_best_sub_pixel_comp_tree;
+extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x,
const MV *ref_mv, int sad_per_bit,
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 7515f44..c915e5c 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -106,24 +106,25 @@
return const_motion;
}
-static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int mi_row, int mi_col,
- int_mv *tmp_mv, int *rate_mv) {
+static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int_mv *tmp_mv, int *rate_mv,
+ int64_t best_rd_sofar) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
- int step_param;
- int sadpb = x->sadperbit16;
+ const int step_param = cpi->sf.mv.fullpel_search_step_param;
+ const int sadpb = x->sadperbit16;
MV mvp_full;
- int ref = mbmi->ref_frame[0];
+ const int ref = mbmi->ref_frame[0];
const MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
- int i;
-
- int tmp_col_min = x->mv_col_min;
- int tmp_col_max = x->mv_col_max;
- int tmp_row_min = x->mv_row_min;
- int tmp_row_max = x->mv_row_max;
-
+ int dis;
+ int rate_mode;
+ const int tmp_col_min = x->mv_col_min;
+ const int tmp_col_max = x->mv_col_max;
+ const int tmp_row_min = x->mv_row_min;
+ const int tmp_row_max = x->mv_row_max;
+ int rv = 0;
const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
ref);
if (scaled_ref_frame) {
@@ -133,27 +134,19 @@
// motion search code to be used without additional modifications.
for (i = 0; i < MAX_MB_PLANE; i++)
backup_yv12[i] = xd->plane[i].pre[0];
-
vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
}
-
vp9_set_mv_search_range(x, &ref_mv);
- // TODO(jingning) exploiting adaptive motion search control in non-RD
- // mode decision too.
- step_param = cpi->sf.mv.fullpel_search_step_param;
-
- for (i = LAST_FRAME; i <= LAST_FRAME && cpi->common.show_frame; ++i) {
- if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
- tmp_mv->as_int = INVALID_MV;
-
- if (scaled_ref_frame) {
- int i;
- for (i = 0; i < MAX_MB_PLANE; i++)
- xd->plane[i].pre[0] = backup_yv12[i];
- }
- return;
+ if (cpi->common.show_frame &&
+ (x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[LAST_FRAME]) {
+ tmp_mv->as_int = INVALID_MV;
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[0] = backup_yv12[i];
}
+ return rv;
}
assert(x->mv_best_ref_index[ref] <= 2);
if (x->mv_best_ref_index[ref] < 2)
@@ -172,60 +165,39 @@
x->mv_row_min = tmp_row_min;
x->mv_row_max = tmp_row_max;
- if (scaled_ref_frame) {
- int i;
- for (i = 0; i < MAX_MB_PLANE; i++)
- xd->plane[i].pre[0] = backup_yv12[i];
- }
-
// calculate the bit cost on motion vector
mvp_full.row = tmp_mv->as_mv.row * 8;
mvp_full.col = tmp_mv->as_mv.col * 8;
+
*rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
-}
-static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int mi_row, int mi_col,
- MV *tmp_mv) {
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
- int ref = mbmi->ref_frame[0];
- MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
- int dis;
+ rate_mode = cpi->inter_mode_cost[mbmi->mode_context[ref]]
+ [INTER_OFFSET(NEWMV)];
+ rv = !(RDCOST(x->rdmult, x->rddiv, (*rate_mv + rate_mode), 0) >
+ best_rd_sofar);
- const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
- ref);
- if (scaled_ref_frame) {
- int i;
- // Swap out the reference frame for a version that's been scaled to
- // match the resolution of the current frame, allowing the existing
- // motion search code to be used without additional modifications.
- for (i = 0; i < MAX_MB_PLANE; i++)
- backup_yv12[i] = xd->plane[i].pre[0];
-
- vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
+ if (rv) {
+ cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
+ cpi->common.allow_high_precision_mv,
+ x->errorperbit,
+ &cpi->fn_ptr[bsize],
+ cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step,
+ x->nmvjointcost, x->mvcost,
+ &dis, &x->pred_sse[ref], NULL, 0, 0);
+ x->pred_mv[ref] = tmp_mv->as_mv;
}
- cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv,
- cpi->common.allow_high_precision_mv,
- x->errorperbit,
- &cpi->fn_ptr[bsize],
- cpi->sf.mv.subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step,
- x->nmvjointcost, x->mvcost,
- &dis, &x->pred_sse[ref]);
-
if (scaled_ref_frame) {
int i;
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[0] = backup_yv12[i];
}
-
- x->pred_mv[ref] = *tmp_mv;
+ return rv;
}
+
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum,
@@ -544,28 +516,17 @@
continue;
if (this_mode == NEWMV) {
- int rate_mode = 0;
if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
continue;
-
- full_pixel_motion_search(cpi, x, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame], &rate_mv);
-
- if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV)
+ if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame],
+ &rate_mv, best_rd))
continue;
-
- rate_mode = cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
- [INTER_OFFSET(this_mode)];
- if (RDCOST(x->rdmult, x->rddiv, rate_mv + rate_mode, 0) > best_rd)
- continue;
-
- sub_pixel_motion_search(cpi, x, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame].as_mv);
}
- if (this_mode != NEARESTMV)
- if (frame_mv[this_mode][ref_frame].as_int ==
- frame_mv[NEARESTMV][ref_frame].as_int)
+ if (this_mode != NEARESTMV &&
+ frame_mv[this_mode][ref_frame].as_int ==
+ frame_mv[NEARESTMV][ref_frame].as_int)
continue;
mbmi->mode = this_mode;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 4964e0f..370e1ce 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -104,6 +104,49 @@
*eob_ptr = eob + 1;
}
+// TODO(jingning) Refactor this file and combine functions with similar
+// operations.
+void vp9_quantize_fp_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ int zbin_oq_value, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ int i, eob = -1;
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)zbin_oq_value;
+ (void)iscan;
+
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
+
+ if (!skip_block) {
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ int tmp = 0;
+ int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
+ abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+ tmp = (abs_coeff * quant_ptr[rc != 0]) >> 15;
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+ }
+
+ if (tmp)
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index a8daa21..998fb3c 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1422,7 +1422,8 @@
cpi->sf.mv.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
&distortion,
- &x->pred_sse[mbmi->ref_frame[0]]);
+ &x->pred_sse[mbmi->ref_frame[0]],
+ NULL, 0, 0);
// save motion search result for use in compound prediction
seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
@@ -1838,7 +1839,7 @@
cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
- &dis, &x->pred_sse[ref]);
+ &dis, &x->pred_sse[ref], NULL, 0, 0);
}
*rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
@@ -1954,7 +1955,7 @@
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
- bestsme = cpi->find_fractional_mv_step_comp(
+ bestsme = cpi->find_fractional_mv_step(
x, &tmp_mv,
&ref_mv[id].as_mv,
cpi->common.allow_high_precision_mv,
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 98d6825..f271182 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -253,6 +253,7 @@
}
if (speed >= 5) {
+ sf->use_quant_fp = cm->frame_type == KEY_FRAME ? 0 : 1;
sf->auto_min_max_partition_size = (cm->frame_type == KEY_FRAME) ?
RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
sf->max_partition_size = BLOCK_32X32;
@@ -265,7 +266,6 @@
sf->max_delta_qindex = (cm->frame_type == KEY_FRAME) ? 20 : 15;
sf->partition_search_type = REFERENCE_PARTITION;
sf->use_nonrd_pick_mode = 1;
- sf->mv.search_method = FAST_DIAMOND;
sf->allow_skip_recode = 0;
}
@@ -287,7 +287,7 @@
sf->mv.reduce_first_step_size = 1;
}
if (speed >= 7) {
- sf->use_quant_fp = cm->frame_type == KEY_FRAME ? 0 : 1;
+ sf->mv.search_method = FAST_DIAMOND;
sf->mv.fullpel_search_step_param = 10;
sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
sf->encode_breakout_thresh = (MIN(cm->width, cm->height) >= 720) ?
@@ -396,7 +396,6 @@
if (sf->mv.subpel_search_method == SUBPEL_TREE) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
- cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree;
}
cpi->mb.optimize = sf->optimize_coefficients == 1 && cpi->pass != 1;
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index c090731..bcea100 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -178,7 +178,7 @@
&cpi->fn_ptr[BLOCK_16X16],
0, mv_sf->subpel_iters_per_step,
NULL, NULL,
- &distortion, &sse);
+ &distortion, &sse, NULL, 0, 0);
// Restore input state
x->plane[0].src = src;
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
index 2d9f2b0..508e1d4 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
@@ -234,21 +234,18 @@
movifnidn quantq, quantmp
mova m1, [roundq] ; m1 = round
mova m2, [quantq] ; m2 = quant
-%ifidn %1, b_32x32
-; TODO(jingning) to be continued with 32x32 quantization process
+%ifidn %1, fp_32x32
pcmpeqw m5, m5
psrlw m5, 15
- paddw m0, m5
paddw m1, m5
- psrlw m0, 1 ; m0 = (m0 + 1) / 2
psrlw m1, 1 ; m1 = (m1 + 1) / 2
%endif
mova m3, [r2q] ; m3 = dequant
mov r3, qcoeffmp
mov r4, dqcoeffmp
mov r5, iscanmp
-%ifidn %1, b_32x32
- psllw m4, 1
+%ifidn %1, fp_32x32
+ psllw m2, 1
%endif
pxor m5, m5 ; m5 = dedicated zero
DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
@@ -275,18 +272,19 @@
psignw m13, m10 ; m13 = reinsert sign
mova [qcoeffq+ncoeffq*2+ 0], m8
mova [qcoeffq+ncoeffq*2+16], m13
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
pabsw m8, m8
pabsw m13, m13
%endif
pmullw m8, m3 ; dqc[i] = qc[i] * q
punpckhqdq m3, m3
pmullw m13, m3 ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
psrlw m8, 1
psrlw m13, 1
psignw m8, m9
psignw m13, m10
+ psrlw m0, m3, 2
%endif
mova [dqcoeffq+ncoeffq*2+ 0], m8
mova [dqcoeffq+ncoeffq*2+16], m13
@@ -307,13 +305,17 @@
mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
pabsw m6, m9 ; m6 = abs(m9)
pabsw m11, m10 ; m11 = abs(m10)
- pcmpeqw m7, m7
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
+ pcmpgtw m7, m6, m0
+ pcmpgtw m12, m11, m0
pmovmskb r6, m7
- pmovmskb r2, m7
+ pmovmskb r2, m12
+
or r6, r2
jz .skip_iter
%endif
+ pcmpeqw m7, m7
+
paddsw m6, m1 ; m6 += round
paddsw m11, m1 ; m11 += round
pmulhw m14, m6, m2 ; m14 = m6*q>>16
@@ -322,13 +324,13 @@
psignw m13, m10 ; m13 = reinsert sign
mova [qcoeffq+ncoeffq*2+ 0], m14
mova [qcoeffq+ncoeffq*2+16], m13
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
pabsw m14, m14
pabsw m13, m13
%endif
pmullw m14, m3 ; dqc[i] = qc[i] * q
pmullw m13, m3 ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
psrlw m14, 1
psrlw m13, 1
psignw m14, m9
@@ -349,7 +351,7 @@
add ncoeffq, mmsize
jl .ac_only_loop
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
jmp .accumulate_eob
.skip_iter:
mova [qcoeffq+ncoeffq*2+ 0], m5
@@ -397,3 +399,4 @@
INIT_XMM ssse3
QUANTIZE_FP fp, 7
+QUANTIZE_FP fp_32x32, 7
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 8c1f345..81fe6a6 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -50,6 +50,8 @@
VP9_COMMON_SRCS-yes += common/vp9_seg_common.c
VP9_COMMON_SRCS-yes += common/vp9_systemdependent.h
VP9_COMMON_SRCS-yes += common/vp9_textblit.h
+VP9_COMMON_SRCS-yes += common/vp9_thread.h
+VP9_COMMON_SRCS-yes += common/vp9_thread.c
VP9_COMMON_SRCS-yes += common/vp9_tile_common.h
VP9_COMMON_SRCS-yes += common/vp9_tile_common.c
VP9_COMMON_SRCS-yes += common/vp9_loopfilter.c
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index b150161..24dcbfa 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -353,7 +353,7 @@
oxcf->key_freq = cfg->kf_max_dist;
- oxcf->speed = clamp(abs(extra_cfg->cpu_used), 0, 7);
+ oxcf->speed = abs(extra_cfg->cpu_used);
oxcf->encode_breakout = extra_cfg->static_thresh;
oxcf->play_alternate = extra_cfg->enable_auto_alt_ref;
oxcf->noise_sensitivity = extra_cfg->noise_sensitivity;
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index c3ca7ee..2591852 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -20,6 +20,7 @@
#include "vp9/common/vp9_frame_buffers.h"
#include "vp9/decoder/vp9_decoder.h"
+#include "vp9/decoder/vp9_decodeframe.h"
#include "vp9/decoder/vp9_read_bit_buffer.h"
#include "vp9/vp9_iface_common.h"
@@ -98,8 +99,10 @@
static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data,
unsigned int data_sz,
vpx_codec_stream_info_t *si,
+ int *is_intra_only,
vpx_decrypt_cb decrypt_cb,
void *decrypt_state) {
+ int intra_only_flag = 0;
uint8_t clear_buffer[9];
if (data + data_sz <= data)
@@ -115,6 +118,8 @@
}
{
+ int show_frame;
+ int error_resilient;
struct vp9_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL };
const int frame_marker = vp9_rb_read_literal(&rb, 2);
const int version = vp9_rb_read_bit(&rb);
@@ -126,6 +131,7 @@
if (version > 1) return VPX_CODEC_UNSUP_BITSTREAM;
if (vp9_rb_read_bit(&rb)) { // show an existing frame
+ vp9_rb_read_literal(&rb, 3); // Frame buffer to show.
return VPX_CODEC_OK;
}
@@ -133,18 +139,15 @@
return VPX_CODEC_UNSUP_BITSTREAM;
si->is_kf = !vp9_rb_read_bit(&rb);
+ show_frame = vp9_rb_read_bit(&rb);
+ error_resilient = vp9_rb_read_bit(&rb);
+
if (si->is_kf) {
const int sRGB = 7;
int colorspace;
- rb.bit_offset += 1; // show frame
- rb.bit_offset += 1; // error resilient
-
- if (vp9_rb_read_literal(&rb, 8) != VP9_SYNC_CODE_0 ||
- vp9_rb_read_literal(&rb, 8) != VP9_SYNC_CODE_1 ||
- vp9_rb_read_literal(&rb, 8) != VP9_SYNC_CODE_2) {
+ if (!vp9_read_sync_code(&rb))
return VPX_CODEC_UNSUP_BITSTREAM;
- }
colorspace = vp9_rb_read_literal(&rb, 3);
if (colorspace != sRGB) {
@@ -161,20 +164,28 @@
return VPX_CODEC_UNSUP_BITSTREAM;
}
}
+ vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h);
+ } else {
+ intra_only_flag = show_frame ? 0 : vp9_rb_read_bit(&rb);
+ rb.bit_offset += error_resilient ? 0 : 2; // reset_frame_context
- // TODO(jzern): these are available on non-keyframes in intra only mode.
- si->w = vp9_rb_read_literal(&rb, 16) + 1;
- si->h = vp9_rb_read_literal(&rb, 16) + 1;
+ if (intra_only_flag) {
+ if (!vp9_read_sync_code(&rb))
+ return VPX_CODEC_UNSUP_BITSTREAM;
+ rb.bit_offset += REF_FRAMES; // refresh_frame_flags
+ vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h);
+ }
}
}
-
+ if (is_intra_only != NULL)
+ *is_intra_only = intra_only_flag;
return VPX_CODEC_OK;
}
static vpx_codec_err_t decoder_peek_si(const uint8_t *data,
unsigned int data_sz,
vpx_codec_stream_info_t *si) {
- return decoder_peek_si_internal(data, data_sz, si, NULL, NULL);
+ return decoder_peek_si_internal(data, data_sz, si, NULL, NULL, NULL);
}
static vpx_codec_err_t decoder_get_si(vpx_codec_alg_priv_t *ctx,
@@ -266,13 +277,14 @@
// validate that we have a buffer that does not wrap around the top
// of the heap.
if (!ctx->si.h) {
+ int is_intra_only = 0;
const vpx_codec_err_t res =
- decoder_peek_si_internal(*data, data_sz, &ctx->si, ctx->decrypt_cb,
- ctx->decrypt_state);
+ decoder_peek_si_internal(*data, data_sz, &ctx->si, &is_intra_only,
+ ctx->decrypt_cb, ctx->decrypt_state);
if (res != VPX_CODEC_OK)
return res;
- if (!ctx->si.is_kf)
+ if (!ctx->si.is_kf && !is_intra_only)
return VPX_CODEC_ERROR;
}
diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk
index 92ec6fd..1fcb36f 100644
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@@ -31,8 +31,6 @@
VP9_DX_SRCS-yes += decoder/vp9_detokenize.h
VP9_DX_SRCS-yes += decoder/vp9_decoder.c
VP9_DX_SRCS-yes += decoder/vp9_decoder.h
-VP9_DX_SRCS-yes += decoder/vp9_thread.c
-VP9_DX_SRCS-yes += decoder/vp9_thread.h
VP9_DX_SRCS-yes += decoder/vp9_dsubexp.c
VP9_DX_SRCS-yes += decoder/vp9_dsubexp.h