Move dequant from BLOCKD to per-plane MACROBLOCKD
This data can vary per-plane, but not per-block.
Change-Id: I1971b0b2c2e697d2118e38b54ef446e52f63c65a
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index e2e9419..783b812 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -280,7 +280,6 @@
typedef struct blockd {
int16_t *diff;
- int16_t *dequant;
/* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
uint8_t **base_pre;
@@ -335,6 +334,7 @@
int subsampling_y;
struct buf_2d dst;
struct buf_2d pre[2];
+ int16_t *dequant;
};
#define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n))
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index c196dd5..ce5681a 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -206,11 +206,9 @@
const int qindex = get_qindex(mb, segment_id, pc->base_qindex);
mb->q_index = qindex;
- for (i = 0; i < 16; i++)
- mb->block[i].dequant = pc->y_dequant[qindex];
-
- for (i = 16; i < 24; i++)
- mb->block[i].dequant = pc->uv_dequant[qindex];
+ mb->plane[0].dequant = pc->y_dequant[qindex];
+ for (i = 1; i < MAX_MB_PLANE; i++)
+ mb->plane[i].dequant = pc->uv_dequant[qindex];
if (mb->lossless) {
assert(qindex == 0);
@@ -354,7 +352,8 @@
xd->mode_info_context->bmi[i].as_mode.context = b->bmi.as_mode.context =
vp9_find_bpred_context(xd, b);
if (!xd->mode_info_context->mbmi.mb_skip_coeff)
- vp9_decode_coefs_4x4(pbi, xd, r, PLANE_TYPE_Y_WITH_DC, i, b->dequant);
+ vp9_decode_coefs_4x4(pbi, xd, r, PLANE_TYPE_Y_WITH_DC, i,
+ xd->plane[0].dequant);
#endif
vp9_intra4x4_predict(xd, b, b_mode, *(b->base_dst) + b->dst,
b->dst_stride);
@@ -363,7 +362,7 @@
}
#if CONFIG_NEWBINTRAMODES
if (!xd->mode_info_context->mbmi.mb_skip_coeff)
- vp9_decode_mb_tokens_4x4_uv(pbi, xd, r, xd->block[16].dequant);
+ vp9_decode_mb_tokens_4x4_uv(pbi, xd, r, xd->plane[1].dequant);
#endif
vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16);
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->plane[1].dst.buf,
@@ -596,7 +595,7 @@
mb_init_dequantizer(pbi, xd);
// dequantization and idct
- eobtotal = vp9_decode_tokens(pbi, xd, r, bsize, xd->block[0].dequant);
+ eobtotal = vp9_decode_tokens(pbi, xd, r, bsize, xd->plane[0].dequant);
if (eobtotal == 0) { // skip loopfilter
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
@@ -671,7 +670,7 @@
if (mode != I4X4_PRED)
#endif
eobtotal = vp9_decode_tokens(pbi, xd, r, BLOCK_SIZE_MB16X16,
- xd->block[0].dequant);
+ xd->plane[0].dequant);
}
}
diff --git a/vp9/encoder/vp9_asm_enc_offsets.c b/vp9/encoder/vp9_asm_enc_offsets.c
deleted file mode 100644
index d8e844e..0000000
--- a/vp9/encoder/vp9_asm_enc_offsets.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/asm_offsets.h"
-#include "vpx_config.h"
-#include "vp9/encoder/vp9_block.h"
-#include "vp9/common/vp9_blockd.h"
-#include "vp9/encoder/vp9_onyx_int.h"
-#include "vp9/encoder/vp9_treewriter.h"
-#include "vp9/encoder/vp9_tokenize.h"
-
-BEGIN
-
-/* regular quantize */
-DEFINE(vp9_blockd_dequant, offsetof(BLOCKD, dequant));
-
-END
-
-/* add asserts for any offset that is not supported by assembly code
- * add asserts for any size that is not supported by assembly code
- */
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 49c651b..823e92d 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -509,7 +509,7 @@
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
- optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
ta + x_idx, tl + y_idx, TX_32X32, 64 * bw * bh);
}
}
@@ -532,7 +532,7 @@
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
- optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
ta + x_idx, tl + y_idx, TX_16X16, 16 * bw * bh);
}
}
@@ -560,7 +560,7 @@
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
- optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
ta + x_idx, tl + y_idx, TX_8X8, 4 * bw * bh);
}
}
@@ -585,7 +585,7 @@
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
- optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
ta + x_idx, tl + y_idx, TX_4X4, bh * bw);
}
}
@@ -599,7 +599,7 @@
assert(bsize == BLOCK_SIZE_SB64X64);
for (b = 256; b < 384; b += 64) {
- const int cidx = b >= 320 ? 20 : 16;
+ const int plane = 1 + (b >= 320);
a = ta + vp9_block2above_sb64[TX_32X32][b];
l = tl + vp9_block2left_sb64[TX_32X32][b];
a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
@@ -610,7 +610,7 @@
l3 = l + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
a_ec = (a[0] + a[1] + a1[0] + a1[1] + a2[0] + a2[1] + a3[0] + a3[1]) != 0;
l_ec = (l[0] + l[1] + l1[0] + l1[1] + l2[0] + l2[1] + l3[0] + l3[1]) != 0;
- optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.plane[plane].dequant,
&a_ec, &l_ec, TX_32X32, 256);
}
}
@@ -638,11 +638,10 @@
}
for (plane = 0; plane < 2; plane++) {
- const int cidx = 16 + plane * 4;
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
optimize_b(cm, x, uvoff + n * 16, PLANE_TYPE_UV,
- x->e_mbd.block[cidx].dequant,
+ x->e_mbd.plane[plane + 1].dequant,
&ta[plane][x_idx], &tl[plane][y_idx],
TX_16X16, bh * bw * 64);
}
@@ -671,11 +670,10 @@
}
for (plane = 0; plane < 2; plane++) {
- const int cidx = 16 + plane * 4;
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
optimize_b(cm, x, uvoff + n * 4, PLANE_TYPE_UV,
- x->e_mbd.block[cidx].dequant,
+ x->e_mbd.plane[plane + 1].dequant,
&ta[plane][x_idx], &tl[plane][y_idx],
TX_8X8, bh * bw * 16);
}
@@ -708,11 +706,10 @@
}
for (plane = 0; plane < 2; plane++) {
- const int cidx = 16 + plane * 4;
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
optimize_b(cm, x, uvoff + n, PLANE_TYPE_UV,
- x->e_mbd.block[cidx].dequant,
+ x->e_mbd.plane[plane + 1].dequant,
&ta[plane][x_idx], &tl[plane][y_idx],
TX_4X4, bh * bw * 4);
}
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 78ea78c..b275ab1 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -28,7 +28,6 @@
void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
MACROBLOCKD *const xd = &mb->e_mbd;
- BLOCKD *const d = &xd->block[0];
int i, rc, eob;
int zbin;
int x, y, z, sz;
@@ -41,7 +40,7 @@
int16_t *round_ptr = mb->plane[0].round;
int16_t *quant_ptr = mb->plane[0].quant;
uint8_t *quant_shift_ptr = mb->plane[0].quant_shift;
- int16_t *dequant_ptr = d->dequant;
+ int16_t *dequant_ptr = xd->plane[0].dequant;
int zbin_oq_value = mb->plane[0].zbin_extra;
const int *pt_scan = get_scan_4x4(tx_type);
@@ -84,7 +83,6 @@
MACROBLOCKD *const xd = &mb->e_mbd;
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
const int c_idx = plane_idx(pb_idx.plane);
- BLOCKD *const d = &xd->block[c_idx];
int i, rc, eob;
int zbin;
int x, y, z, sz;
@@ -99,7 +97,7 @@
int16_t *round_ptr = mb->plane[pb_idx.plane].round;
int16_t *quant_ptr = mb->plane[pb_idx.plane].quant;
uint8_t *quant_shift_ptr = mb->plane[pb_idx.plane].quant_shift;
- int16_t *dequant_ptr = d->dequant;
+ int16_t *dequant_ptr = xd->plane[0].dequant;
int zbin_oq_value = mb->plane[pb_idx.plane].zbin_extra;
if (c_idx == 0) assert(pb_idx.plane == 0);
@@ -152,7 +150,6 @@
pb_idx.block, 16);
int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff,
pb_idx.block, 16);
- BLOCKD *const d = &xd->block[c_idx];
const int *pt_scan = get_scan_8x8(tx_type);
if (c_idx == 0) assert(pb_idx.plane == 0);
@@ -171,7 +168,7 @@
int16_t *round_ptr = mb->plane[pb_idx.plane].round;
int16_t *quant_ptr = mb->plane[pb_idx.plane].quant;
uint8_t *quant_shift_ptr = mb->plane[pb_idx.plane].quant_shift;
- int16_t *dequant_ptr = d->dequant;
+ int16_t *dequant_ptr = xd->plane[pb_idx.plane].dequant;
int zbin_oq_value = mb->plane[pb_idx.plane].zbin_extra;
eob = -1;
@@ -286,7 +283,6 @@
MACROBLOCKD *const xd = &mb->e_mbd;
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
const int c_idx = plane_idx(pb_idx.plane);
- BLOCKD *const d = &xd->block[c_idx];
const int *pt_scan = get_scan_16x16(tx_type);
if (c_idx == 0) assert(pb_idx.plane == 0);
@@ -301,7 +297,7 @@
mb->plane[pb_idx.plane].quant_shift,
BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
- d->dequant,
+ xd->plane[pb_idx.plane].dequant,
mb->plane[pb_idx.plane].zbin_extra,
&xd->plane[pb_idx.plane].eobs[pb_idx.block],
pt_scan, 1);
@@ -311,7 +307,6 @@
MACROBLOCKD *const xd = &mb->e_mbd;
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
const int c_idx = plane_idx(pb_idx.plane);
- BLOCKD *const d = &xd->block[c_idx];
if (c_idx == 0) assert(pb_idx.plane == 0);
if (c_idx == 16) assert(pb_idx.plane == 1);
@@ -325,7 +320,7 @@
mb->plane[pb_idx.plane].quant_shift,
BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
- d->dequant,
+ xd->plane[pb_idx.plane].dequant,
mb->plane[pb_idx.plane].zbin_extra,
&xd->plane[pb_idx.plane].eobs[pb_idx.block],
vp9_default_zig_zag1d_32x32, 2);
@@ -528,8 +523,7 @@
x->plane[0].round = cpi->Y1round[qindex];
x->plane[0].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[qindex];
x->plane[0].zbin_extra = (int16_t)zbin_extra;
- for (i = 0; i < 16; i++)
- x->e_mbd.block[i].dequant = cpi->common.y_dequant[qindex];
+ x->e_mbd.plane[0].dequant = cpi->common.y_dequant[qindex];
// UV
zbin_extra = (cpi->common.uv_dequant[qindex][1] *
@@ -542,9 +536,8 @@
x->plane[i].round = cpi->UVround[qindex];
x->plane[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[qindex];
x->plane[i].zbin_extra = (int16_t)zbin_extra;
+ x->e_mbd.plane[i].dequant = cpi->common.uv_dequant[qindex];
}
- for (i = 16; i < 24; i++)
- x->e_mbd.block[i].dequant = cpi->common.uv_dequant[qindex];
x->skip_block = vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index a33abb8..de08e83 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -3050,7 +3050,7 @@
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
model_rd_from_var_lapndz(var, 16 * bw * 16 * bh,
- xd->block[0].dequant[1] >> 3,
+ xd->plane[0].dequant[1] >> 3,
&tmp_rate_y, &tmp_dist_y);
var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf,
x->plane[1].src.stride,
@@ -3058,7 +3058,7 @@
xd->plane[1].dst.stride,
&sse);
model_rd_from_var_lapndz(var, 8 * bw * 8 * bh,
- xd->block[16].dequant[1] >> 3,
+ xd->plane[1].dequant[1] >> 3,
&tmp_rate_u, &tmp_dist_u);
var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf,
x->plane[1].src.stride,
@@ -3066,7 +3066,7 @@
xd->plane[1].dst.stride,
&sse);
model_rd_from_var_lapndz(var, 8 * bw * 8 * bh,
- xd->block[20].dequant[1] >> 3,
+ xd->plane[2].dequant[1] >> 3,
&tmp_rate_v, &tmp_dist_v);
rd = RDCOST(x->rdmult, x->rddiv,
rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
@@ -3138,17 +3138,17 @@
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
- model_rd_from_var_lapndz(var, 16 * 16, xd->block[0].dequant[1] >> 3,
+ model_rd_from_var_lapndz(var, 16 * 16, xd->plane[0].dequant[1] >> 3,
&tmp_rate_y, &tmp_dist_y);
var = vp9_variance8x8(x->plane[1].src.buf, x->plane[1].src.stride,
xd->plane[1].dst.buf, xd->plane[1].dst.stride,
&sse);
- model_rd_from_var_lapndz(var, 8 * 8, xd->block[16].dequant[1] >> 3,
+ model_rd_from_var_lapndz(var, 8 * 8, xd->plane[1].dequant[1] >> 3,
&tmp_rate_u, &tmp_dist_u);
var = vp9_variance8x8(x->plane[2].src.buf, x->plane[1].src.stride,
xd->plane[2].dst.buf, xd->plane[1].dst.stride,
&sse);
- model_rd_from_var_lapndz(var, 8 * 8, xd->block[20].dequant[1] >> 3,
+ model_rd_from_var_lapndz(var, 8 * 8, xd->plane[2].dequant[1] >> 3,
&tmp_rate_v, &tmp_dist_v);
rd = RDCOST(x->rdmult, x->rddiv,
rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
@@ -3225,8 +3225,8 @@
x->skip = 1;
else if (x->encode_breakout) {
unsigned int var, sse;
- int threshold = (xd->block[0].dequant[1]
- * xd->block[0].dequant[1] >> 4);
+ int threshold = (xd->plane[0].dequant[1]
+ * xd->plane[0].dequant[1] >> 4);
if (threshold < x->encode_breakout)
threshold = x->encode_breakout;
@@ -3244,7 +3244,7 @@
}
if ((int)sse < threshold) {
- unsigned int q2dc = xd->block[0].dequant[0];
+ unsigned int q2dc = xd->plane[0].dequant[0];
/* If there is no codeable 2nd order dc
or a very small uniform pixel change change */
if ((sse - var < q2dc * q2dc >> 4) ||
diff --git a/vp9/encoder/x86/vp9_quantize_sse2.asm b/vp9/encoder/x86/vp9_quantize_sse2.asm
index 2a686f5..700e64b 100644
--- a/vp9/encoder/x86/vp9_quantize_sse2.asm
+++ b/vp9/encoder/x86/vp9_quantize_sse2.asm
@@ -9,7 +9,6 @@
%include "vpx_ports/x86_abi_support.asm"
-%include "vp9_asm_enc_offsets.asm"
; void vp9_regular_quantize_b_sse2 | arg
diff --git a/vp9/encoder/x86/vp9_quantize_sse4.asm b/vp9/encoder/x86/vp9_quantize_sse4.asm
index d7779bd..4c14e5f 100644
--- a/vp9/encoder/x86/vp9_quantize_sse4.asm
+++ b/vp9/encoder/x86/vp9_quantize_sse4.asm
@@ -9,7 +9,6 @@
%include "vpx_ports/x86_abi_support.asm"
-%include "vp9_asm_enc_offsets.asm"
; void vp9_regular_quantize_b_sse4 | arg
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm
index e082af1..1fa0521 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm
@@ -9,7 +9,6 @@
%include "vpx_ports/x86_abi_support.asm"
-%include "vp9_asm_enc_offsets.asm"
; void vp9_fast_quantize_b_ssse3 | arg
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 13785f7..0972113 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -26,7 +26,6 @@
#INCLUDES += common
#INCLUDES += encoder
-VP9_CX_SRCS-yes += encoder/vp9_asm_enc_offsets.c
VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
VP9_CX_SRCS-yes += encoder/vp9_boolhuff.c
VP9_CX_SRCS-yes += encoder/vp9_dct.c
@@ -117,6 +116,3 @@
VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes))
-
-$(eval $(call asm_offsets_template,\
- vp9_asm_enc_offsets.asm, $(VP9_PREFIX)encoder/vp9_asm_enc_offsets.c))