Merge "vpxenc: clean up; move stats code out of vpxenc.c"
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index f52adfc..a666d1d 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -170,9 +170,9 @@
};
struct macroblockd_plane {
- DECLARE_ALIGNED(16, int16_t, qcoeff[64 * 64]);
- DECLARE_ALIGNED(16, int16_t, dqcoeff[64 * 64]);
- DECLARE_ALIGNED(16, uint16_t, eobs[256]);
+ int16_t *qcoeff;
+ int16_t *dqcoeff;
+ uint16_t *eobs;
PLANE_TYPE plane_type;
int subsampling_x;
int subsampling_y;
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index aad400a..218fdd8 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -42,6 +42,9 @@
vp9_reader bit_reader;
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
DECLARE_ALIGNED(16, unsigned char, token_cache[1024]);
+ DECLARE_ALIGNED(16, int16_t, qcoeff[MAX_MB_PLANE][64 * 64]);
+ DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
+ DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]);
} TileWorkerData;
static int read_be32(const uint8_t *p) {
@@ -931,6 +934,19 @@
return end;
}
+static void setup_tile_macroblockd(TileWorkerData *const tile_data) {
+ MACROBLOCKD *xd = &tile_data->xd;
+ struct macroblockd_plane *const pd = xd->plane;
+ int i;
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ pd[i].qcoeff = tile_data->qcoeff[i];
+ pd[i].dqcoeff = tile_data->dqcoeff[i];
+ pd[i].eobs = tile_data->eobs[i];
+ vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));
+ }
+}
+
static int tile_worker_hook(void *arg1, void *arg2) {
TileWorkerData *tile_data = (TileWorkerData*)arg1;
const TileInfo *const tile = (TileInfo*)arg2;
@@ -1008,6 +1024,7 @@
setup_token_decoder(data, data_end, size, &cm->error,
&tile_data->bit_reader);
setup_tile_context(pbi, &tile_data->xd, 0, tile_col);
+ setup_tile_macroblockd(tile_data);
worker->had_error = 0;
if (i == num_workers - 1 || tile_col == tile_cols - 1) {
@@ -1319,7 +1336,7 @@
cm->fc = cm->frame_contexts[cm->frame_context_idx];
vp9_zero(cm->counts);
for (i = 0; i < MAX_MB_PLANE; ++i)
- vp9_zero(xd->plane[i].dqcoeff);
+ vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));
xd->corrupted = 0;
new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index 5f970a3..cb45d37 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -107,6 +107,18 @@
}
}
+static void init_macroblockd(VP9D_COMP *const pbi) {
+ MACROBLOCKD *xd = &pbi->mb;
+ struct macroblockd_plane *const pd = xd->plane;
+ int i;
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ pd[i].qcoeff = pbi->qcoeff[i];
+ pd[i].dqcoeff = pbi->dqcoeff[i];
+ pd[i].eobs = pbi->eobs[i];
+ }
+}
+
VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
VP9D_COMP *const pbi = vpx_memalign(32, sizeof(VP9D_COMP));
VP9_COMMON *const cm = pbi ? &pbi->common : NULL;
@@ -141,6 +153,8 @@
cm->error.setjmp = 0;
pbi->decoded_key_frame = 0;
+ init_macroblockd(pbi);
+
vp9_worker_init(&pbi->lf_worker);
return pbi;
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index 7c4c9db..7ad05e6 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -22,6 +22,10 @@
DECLARE_ALIGNED(16, VP9_COMMON, common);
+ DECLARE_ALIGNED(16, int16_t, qcoeff[MAX_MB_PLANE][64 * 64]);
+ DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
+ DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]);
+
VP9D_CONFIG oxcf;
const uint8_t *source;
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index f922f90..4f1357a 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -1524,6 +1524,17 @@
}
}
+static void init_macroblock(VP9_COMP *const cpi) {
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ struct macroblockd_plane *const pd = xd->plane;
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ pd[i].qcoeff = cpi->qcoeff[i];
+ pd[i].dqcoeff = cpi->dqcoeff[i];
+ pd[i].eobs = cpi->eobs[i];
+ }
+}
+
VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
int i, j;
volatile union {
@@ -1562,6 +1573,8 @@
init_pick_mode_context(cpi);
+ init_macroblock(cpi);
+
cm->current_video_frame = 0;
cpi->kf_overspend_bits = 0;
cpi->kf_bitrate_adjustment = 0;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 9429c7f..839a92b 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -313,6 +313,10 @@
VP9_CONFIG oxcf;
struct rdcost_block_args rdcost_stack;
+ DECLARE_ALIGNED(16, int16_t, qcoeff[MAX_MB_PLANE][64 * 64]);
+ DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
+ DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]);
+
struct lookahead_ctx *lookahead;
struct lookahead_entry *source;
#if CONFIG_MULTIPLE_ARF
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 6aaa2c7..08b51ef 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2477,54 +2477,41 @@
int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
- int refs[2] = { mbmi->ref_frame[0],
- (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
+ const int refs[2] = { mbmi->ref_frame[0],
+ mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
int_mv ref_mv[2];
const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
- int ite;
+ int ite, ref;
// Prediction buffer from second frame.
uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
// Do joint motion search in compound mode to get more accurate mv.
- struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
- struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
- struct buf_2d scaled_first_yv12;
+ struct buf_2d backup_yv12[2][MAX_MB_PLANE];
+ struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
int last_besterr[2] = {INT_MAX, INT_MAX};
- YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
- scaled_ref_frame[0] = get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
- scaled_ref_frame[1] = get_scaled_ref_frame(cpi, mbmi->ref_frame[1]);
+ YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
+ get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
+ get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
+ };
- ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
- ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
+ for (ref = 0; ref < 2; ++ref) {
+ ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
- if (scaled_ref_frame[0]) {
- int i;
- // Swap out the reference frame for a version that's been scaled to
- // match the resolution of the current frame, allowing the existing
- // motion search code to be used without additional modifications.
- for (i = 0; i < MAX_MB_PLANE; i++)
- backup_yv12[i] = xd->plane[i].pre[0];
- setup_pre_planes(xd, 0, scaled_ref_frame[0], mi_row, mi_col, NULL);
+ if (scaled_ref_frame[ref]) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[ref][i] = xd->plane[i].pre[ref];
+ setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL);
+ }
+
+ xd->scale_factor[ref].sfc->set_scaled_offsets(&xd->scale_factor[ref],
+ mi_row, mi_col);
+ frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
}
- if (scaled_ref_frame[1]) {
- int i;
- for (i = 0; i < MAX_MB_PLANE; i++)
- backup_second_yv12[i] = xd->plane[i].pre[1];
-
- setup_pre_planes(xd, 1, scaled_ref_frame[1], mi_row, mi_col, NULL);
- }
-
- xd->scale_factor[0].sfc->set_scaled_offsets(&xd->scale_factor[0],
- mi_row, mi_col);
- xd->scale_factor[1].sfc->set_scaled_offsets(&xd->scale_factor[1],
- mi_row, mi_col);
- scaled_first_yv12 = xd->plane[0].pre[0];
-
- // Initialize mv using single prediction mode result.
- frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
- frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
-
// Allow joint search multiple times iteratively for each ref frame
// and break out the search loop if it couldn't find better mv.
for (ite = 0; ite < 4; ite++) {
@@ -2604,24 +2591,20 @@
}
}
- // restore the predictor
- if (scaled_ref_frame[0]) {
- int i;
- for (i = 0; i < MAX_MB_PLANE; i++)
- xd->plane[i].pre[0] = backup_yv12[i];
- }
+ *rate_mv = 0;
- if (scaled_ref_frame[1]) {
- int i;
- for (i = 0; i < MAX_MB_PLANE; i++)
- xd->plane[i].pre[1] = backup_second_yv12[i];
+ for (ref = 0; ref < 2; ++ref) {
+ if (scaled_ref_frame[ref]) {
+ // restore the predictor
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[ref] = backup_yv12[ref][i];
+ }
+
+ *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
+ &mbmi->ref_mvs[refs[ref]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
}
- *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
- &mbmi->ref_mvs[refs[0]][0].as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
- *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
- &mbmi->ref_mvs[refs[1]][0].as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
vpx_free(second_pred);
}