Merge "Add SVC codec control to set frame flags and buffer indices."
diff --git a/vp9/common/vp9_thread_common.c b/vp9/common/vp9_thread_common.c
index 8324cea..978e0bf 100644
--- a/vp9/common/vp9_thread_common.c
+++ b/vp9/common/vp9_thread_common.c
@@ -318,21 +318,21 @@
}
// Accumulate frame counts.
-void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts,
- int is_dec) {
+void vp9_accumulate_frame_counts(FRAME_COUNTS *accum,
+ const FRAME_COUNTS *counts, int is_dec) {
int i, j, k, l, m;
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
for (j = 0; j < INTRA_MODES; j++)
- cm->counts.y_mode[i][j] += counts->y_mode[i][j];
+ accum->y_mode[i][j] += counts->y_mode[i][j];
for (i = 0; i < INTRA_MODES; i++)
for (j = 0; j < INTRA_MODES; j++)
- cm->counts.uv_mode[i][j] += counts->uv_mode[i][j];
+ accum->uv_mode[i][j] += counts->uv_mode[i][j];
for (i = 0; i < PARTITION_CONTEXTS; i++)
for (j = 0; j < PARTITION_TYPES; j++)
- cm->counts.partition[i][j] += counts->partition[i][j];
+ accum->partition[i][j] += counts->partition[i][j];
if (is_dec) {
int n;
@@ -341,10 +341,10 @@
for (k = 0; k < REF_TYPES; k++)
for (l = 0; l < COEF_BANDS; l++)
for (m = 0; m < COEFF_CONTEXTS; m++) {
- cm->counts.eob_branch[i][j][k][l][m] +=
+ accum->eob_branch[i][j][k][l][m] +=
counts->eob_branch[i][j][k][l][m];
for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
- cm->counts.coef[i][j][k][l][m][n] +=
+ accum->coef[i][j][k][l][m][n] +=
counts->coef[i][j][k][l][m][n];
}
} else {
@@ -353,64 +353,64 @@
for (k = 0; k < REF_TYPES; k++)
for (l = 0; l < COEF_BANDS; l++)
for (m = 0; m < COEFF_CONTEXTS; m++)
- cm->counts.eob_branch[i][j][k][l][m] +=
+ accum->eob_branch[i][j][k][l][m] +=
counts->eob_branch[i][j][k][l][m];
- // In the encoder, cm->counts.coef is only updated at frame
+ // In the encoder, coef is only updated at frame
// level, so not need to accumulate it here.
// for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
- // cm->counts.coef[i][j][k][l][m][n] +=
+ // accum->coef[i][j][k][l][m][n] +=
// counts->coef[i][j][k][l][m][n];
}
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
for (j = 0; j < SWITCHABLE_FILTERS; j++)
- cm->counts.switchable_interp[i][j] += counts->switchable_interp[i][j];
+ accum->switchable_interp[i][j] += counts->switchable_interp[i][j];
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
for (j = 0; j < INTER_MODES; j++)
- cm->counts.inter_mode[i][j] += counts->inter_mode[i][j];
+ accum->inter_mode[i][j] += counts->inter_mode[i][j];
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
for (j = 0; j < 2; j++)
- cm->counts.intra_inter[i][j] += counts->intra_inter[i][j];
+ accum->intra_inter[i][j] += counts->intra_inter[i][j];
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
for (j = 0; j < 2; j++)
- cm->counts.comp_inter[i][j] += counts->comp_inter[i][j];
+ accum->comp_inter[i][j] += counts->comp_inter[i][j];
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < 2; j++)
for (k = 0; k < 2; k++)
- cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k];
+ accum->single_ref[i][j][k] += counts->single_ref[i][j][k];
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < 2; j++)
- cm->counts.comp_ref[i][j] += counts->comp_ref[i][j];
+ accum->comp_ref[i][j] += counts->comp_ref[i][j];
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
for (j = 0; j < TX_SIZES; j++)
- cm->counts.tx.p32x32[i][j] += counts->tx.p32x32[i][j];
+ accum->tx.p32x32[i][j] += counts->tx.p32x32[i][j];
for (j = 0; j < TX_SIZES - 1; j++)
- cm->counts.tx.p16x16[i][j] += counts->tx.p16x16[i][j];
+ accum->tx.p16x16[i][j] += counts->tx.p16x16[i][j];
for (j = 0; j < TX_SIZES - 2; j++)
- cm->counts.tx.p8x8[i][j] += counts->tx.p8x8[i][j];
+ accum->tx.p8x8[i][j] += counts->tx.p8x8[i][j];
}
for (i = 0; i < TX_SIZES; i++)
- cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i];
+ accum->tx.tx_totals[i] += counts->tx.tx_totals[i];
for (i = 0; i < SKIP_CONTEXTS; i++)
for (j = 0; j < 2; j++)
- cm->counts.skip[i][j] += counts->skip[i][j];
+ accum->skip[i][j] += counts->skip[i][j];
for (i = 0; i < MV_JOINTS; i++)
- cm->counts.mv.joints[i] += counts->mv.joints[i];
+ accum->mv.joints[i] += counts->mv.joints[i];
for (k = 0; k < 2; k++) {
- nmv_component_counts *comps = &cm->counts.mv.comps[k];
- nmv_component_counts *comps_t = &counts->mv.comps[k];
+ nmv_component_counts *const comps = &accum->mv.comps[k];
+ const nmv_component_counts *const comps_t = &counts->mv.comps[k];
for (i = 0; i < 2; i++) {
comps->sign[i] += comps_t->sign[i];
diff --git a/vp9/common/vp9_thread_common.h b/vp9/common/vp9_thread_common.h
index 04666b6..db6587f 100644
--- a/vp9/common/vp9_thread_common.h
+++ b/vp9/common/vp9_thread_common.h
@@ -55,8 +55,8 @@
VPxWorker *workers, int num_workers,
VP9LfSync *lf_sync);
-void vp9_accumulate_frame_counts(struct VP9Common *cm,
- struct FRAME_COUNTS *counts, int is_dec);
+void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
+ const struct FRAME_COUNTS *counts, int is_dec);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 004d343..ba8c8dd 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1563,9 +1563,10 @@
return vpx_reader_find_end(&tile_data->bit_reader);
}
-static int tile_worker_hook(TileWorkerData *const tile_data,
- const TileInfo *const tile) {
+static int tile_worker_hook(TileWorkerData *const tile_data, void *unused) {
+ const TileInfo *const tile = &tile_data->xd.tile;
int mi_row, mi_col;
+ (void)unused;
if (setjmp(tile_data->error_info.jmp)) {
tile_data->error_info.setjmp = 0;
@@ -1628,8 +1629,6 @@
CHECK_MEM_ERROR(cm, pbi->tile_worker_data,
vpx_memalign(32, num_threads *
sizeof(*pbi->tile_worker_data)));
- CHECK_MEM_ERROR(cm, pbi->tile_worker_info,
- vpx_malloc(num_threads * sizeof(*pbi->tile_worker_info)));
for (i = 0; i < num_threads; ++i) {
VPxWorker *const worker = &pbi->tile_workers[i];
++pbi->num_tile_workers;
@@ -1645,10 +1644,15 @@
// Reset tile decoding hook
for (n = 0; n < num_workers; ++n) {
VPxWorker *const worker = &pbi->tile_workers[n];
+ TileWorkerData *const tile_data = &pbi->tile_worker_data[n];
winterface->sync(worker);
+ tile_data->pbi = pbi;
+ tile_data->xd = pbi->mb;
+ tile_data->xd.counts =
+ cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts;
worker->hook = (VPxWorkerHook)tile_worker_hook;
- worker->data1 = &pbi->tile_worker_data[n];
- worker->data2 = &pbi->tile_worker_info[n];
+ worker->data1 = tile_data;
+ worker->data2 = NULL;
}
// Note: this memset assumes above_context[0], [1] and [2]
@@ -1698,16 +1702,10 @@
for (i = 0; i < num_workers && n < tile_cols; ++i) {
VPxWorker *const worker = &pbi->tile_workers[i];
TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
- TileInfo *const tile = (TileInfo*)worker->data2;
TileBuffer *const buf = &tile_buffers[0][n];
- tile_data->pbi = pbi;
- tile_data->xd = pbi->mb;
tile_data->xd.corrupted = 0;
- tile_data->xd.counts = cm->frame_parallel_decoding_mode ?
- 0 : &tile_data->counts;
vp9_zero(tile_data->dqcoeff);
- vp9_tile_init(tile, cm, 0, buf->col);
vp9_tile_init(&tile_data->xd.tile, cm, 0, buf->col);
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
&tile_data->bit_reader, pbi->decrypt_cb,
@@ -1742,14 +1740,15 @@
bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader);
final_worker = -1;
}
+ }
- // Accumulate thread frame counts.
- if (n >= tile_cols && !cm->frame_parallel_decoding_mode) {
- for (i = 0; i < num_workers; ++i) {
- TileWorkerData *const tile_data =
- (TileWorkerData*)pbi->tile_workers[i].data1;
- vp9_accumulate_frame_counts(cm, &tile_data->counts, 1);
- }
+ // Accumulate thread frame counts.
+ if (!cm->frame_parallel_decoding_mode) {
+ int i;
+ for (i = 0; i < num_workers; ++i) {
+ TileWorkerData *const tile_data =
+ (TileWorkerData*)pbi->tile_workers[i].data1;
+ vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1);
}
}
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 6734d00..61077cd 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -134,7 +134,6 @@
vpx_get_worker_interface()->end(worker);
}
vpx_free(pbi->tile_worker_data);
- vpx_free(pbi->tile_worker_info);
vpx_free(pbi->tile_workers);
if (pbi->num_tile_workers > 0) {
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 915f9dc..944f7da 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -65,7 +65,6 @@
VPxWorker lf_worker;
VPxWorker *tile_workers;
TileWorkerData *tile_worker_data;
- TileInfo *tile_worker_info;
int num_tile_workers;
TileData *tile_data;
diff --git a/vp9/encoder/vp9_ethread.c b/vp9/encoder/vp9_ethread.c
index 69ed132..ad25712 100644
--- a/vp9/encoder/vp9_ethread.c
+++ b/vp9/encoder/vp9_ethread.c
@@ -192,7 +192,7 @@
// Accumulate counters.
if (i < cpi->num_workers - 1) {
- vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
+ vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);
accumulate_rd_opt(&cpi->td, thread_data->td);
}
}
diff --git a/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm b/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
index 7ea6a0e..3fbaa27 100644
--- a/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
+++ b/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
@@ -104,7 +104,7 @@
%define k0k1k4k5 m8
%define k2k3k6k7 m9
%define krd m10
- %define orig_height r7
+ %define orig_height r7d
mova krd, [GLOBAL(pw_64)]
pshuflw k0k1k4k5, m4, 0b ;k0_k1
pshufhw k0k1k4k5, k0k1k4k5, 10101010b ;k0_k1_k4_k5
@@ -131,8 +131,8 @@
mova k2k3k6k7, m7
mova krd, m1
%endif
- mov orig_height, heightq
- shr heightq, 1
+ mov orig_height, heightd
+ shr heightd, 1
.loop:
;Do two rows at once
movh m0, [srcq - 3]
@@ -200,12 +200,12 @@
lea dstq, [dstq + 2 * dstrideq ]
prefetcht0 [srcq + 2 * sstrideq - 3]
- dec heightq
+ dec heightd
jnz .loop
; Do last row if output_height is odd
- mov heightq, orig_height
- and heightq, 1
+ mov heightd, orig_height
+ and heightd, 1
je .done
movh m0, [srcq - 3] ; load src
@@ -254,17 +254,17 @@
;-------------------------------------------------------------------------------
%macro SUBPIX_HFILTER8 1
-cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 13, LOCAL_VARS_SIZE, \
+cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 14, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
SETUP_LOCAL_VARS
%if ARCH_X86_64
- %define orig_height r7
+ %define orig_height r7d
%else
%define orig_height heightmp
%endif
- mov orig_height, heightq
- shr heightq, 1
+ mov orig_height, heightd
+ shr heightd, 1
.loop:
movh m0, [srcq - 3]
@@ -336,12 +336,12 @@
lea srcq, [srcq + sstrideq ]
lea dstq, [dstq + 2 * dstrideq ]
prefetcht0 [srcq + 2 * sstrideq - 3]
- dec heightq
+ dec heightd
jnz .loop
;Do last row if output_height is odd
- mov heightq, orig_height
- and heightq, 1
+ mov heightd, orig_height
+ and heightd, 1
je .done
movh m0, [srcq - 3]
@@ -361,7 +361,7 @@
;-------------------------------------------------------------------------------
%macro SUBPIX_HFILTER16 1
-cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 13, LOCAL_VARS_SIZE, \
+cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
SETUP_LOCAL_VARS
@@ -427,7 +427,7 @@
lea srcq, [srcq + sstrideq]
mova [dstq], m0
lea dstq, [dstq + dstrideq]
- dec heightq
+ dec heightd
jnz .loop
RET
%endm
@@ -527,11 +527,11 @@
%endif
movx [dstq], m1
add dstq, dst_stride
- sub heightq, 2
- cmp heightq, 1
+ sub heightd, 2
+ cmp heightd, 1
jg .loop
- cmp heightq, 0
+ cmp heightd, 0
je .done
movx m0, [srcq ] ;A
@@ -570,7 +570,7 @@
;-------------------------------------------------------------------------------
%macro SUBPIX_VFILTER16 1
-cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*2), 13, LOCAL_VARS_SIZE, \
+cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
@@ -655,7 +655,7 @@
%endif
movh [dstq + 8], m3
add dstq, dst_stride
- dec heightq
+ dec heightd
jnz .loop
RET
%endm