Merge "Simplify vp9_adapt_nmv_probs" into experimental
diff --git a/configure b/configure
index f55f798..5c8dc8e 100755
--- a/configure
+++ b/configure
@@ -245,6 +245,7 @@
comp_interintra_pred
enable_6tap
abovesprefmv
+ code_nonzerocount
"
CONFIG_LIST="
external_build
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index b46dd05..5adfa69 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -250,6 +250,9 @@
INTERPOLATIONFILTERTYPE interp_filter;
BLOCK_SIZE_TYPE sb_type;
+#if CONFIG_CODE_NONZEROCOUNT
+ uint16_t nzcs[256+64*2];
+#endif
} MB_MODE_INFO;
typedef struct {
@@ -295,6 +298,9 @@
DECLARE_ALIGNED(16, int16_t, qcoeff[64*64+32*32*2]);
DECLARE_ALIGNED(16, int16_t, dqcoeff[64*64+32*32*2]);
DECLARE_ALIGNED(16, uint16_t, eobs[256+64*2]);
+#if CONFIG_CODE_NONZEROCOUNT
+ DECLARE_ALIGNED(16, uint16_t, nzcs[256+64*2]);
+#endif
/* 16 Y blocks, 4 U, 4 V, each with 16 entries. */
BLOCKD block[24];
@@ -449,25 +455,29 @@
extern const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384];
extern const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384];
-#define USE_ADST_FOR_I16X16_8X8 0
-#define USE_ADST_FOR_I16X16_4X4 0
+#define USE_ADST_FOR_I16X16_8X8 1
+#define USE_ADST_FOR_I16X16_4X4 1
#define USE_ADST_FOR_I8X8_4X4 1
#define USE_ADST_PERIPHERY_ONLY 1
+#define USE_ADST_FOR_SB 1
+#define USE_ADST_FOR_REMOTE_EDGE 0
-static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
+static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) {
// TODO(debargha): explore different patterns for ADST usage when blocksize
// is smaller than the prediction size
TX_TYPE tx_type = DCT_DCT;
- int ib = (int)(b - xd->block);
- if (ib >= 16)
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+#if !USE_ADST_FOR_SB
+ if (sb_type)
+ return tx_type;
+#endif
+ if (ib >= (16 << (2 * sb_type))) // no chroma adst
return tx_type;
if (xd->lossless)
return DCT_DCT;
- // TODO(rbultje, debargha): Explore ADST usage for superblocks
- if (xd->mode_info_context->mbmi.sb_type)
- return tx_type;
if (xd->mode_info_context->mbmi.mode == B_PRED &&
xd->q_index < ACTIVE_HT) {
+ const BLOCKD *b = &xd->block[ib];
tx_type = txfm_map(
#if CONFIG_NEWBINTRAMODES
b->bmi.as_mode.first == B_CONTEXT_PRED ? b->bmi.as_mode.context :
@@ -475,16 +485,32 @@
b->bmi.as_mode.first);
} else if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
xd->q_index < ACTIVE_HT) {
+ const BLOCKD *b = &xd->block[ib];
+ const int ic = (ib & 10);
#if USE_ADST_FOR_I8X8_4X4
#if USE_ADST_PERIPHERY_ONLY
// Use ADST for periphery blocks only
- int ic = (ib & 10);
+ const int inner = ib & 5;
b += ic - ib;
- tx_type = (ic != 10) ?
- txfm_map(pred_mode_conv((MB_PREDICTION_MODE)b->bmi.as_mode.first)) :
- DCT_DCT;
+ tx_type = txfm_map(pred_mode_conv(
+ (MB_PREDICTION_MODE)b->bmi.as_mode.first));
+#if USE_ADST_FOR_REMOTE_EDGE
+ if (inner == 5)
+ tx_type = DCT_DCT;
+#else
+ if (inner == 1) {
+ if (tx_type == ADST_ADST) tx_type = ADST_DCT;
+ else if (tx_type == DCT_ADST) tx_type = DCT_DCT;
+ } else if (inner == 4) {
+ if (tx_type == ADST_ADST) tx_type = DCT_ADST;
+ else if (tx_type == ADST_DCT) tx_type = DCT_DCT;
+ } else if (inner == 5) {
+ tx_type = DCT_DCT;
+ }
+#endif
#else
// Use ADST
+ b += ic - ib;
tx_type = txfm_map(pred_mode_conv(
(MB_PREDICTION_MODE)b->bmi.as_mode.first));
#endif
@@ -496,9 +522,22 @@
xd->q_index < ACTIVE_HT) {
#if USE_ADST_FOR_I16X16_4X4
#if USE_ADST_PERIPHERY_ONLY
- // Use ADST for periphery blocks only
- tx_type = (ib < 4 || ((ib & 3) == 0)) ?
- txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)) : DCT_DCT;
+ const int hmax = 4 << sb_type;
+ tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
+#if USE_ADST_FOR_REMOTE_EDGE
+ if ((ib & (hmax - 1)) != 0 && ib >= hmax)
+ tx_type = DCT_DCT;
+#else
+ if (ib >= 1 && ib < hmax) {
+ if (tx_type == ADST_ADST) tx_type = ADST_DCT;
+ else if (tx_type == DCT_ADST) tx_type = DCT_DCT;
+ } else if (ib >= 1 && (ib & (hmax - 1)) == 0) {
+ if (tx_type == ADST_ADST) tx_type = DCT_ADST;
+ else if (tx_type == ADST_DCT) tx_type = DCT_DCT;
+ } else if (ib != 0) {
+ tx_type = DCT_DCT;
+ }
+#endif
#else
// Use ADST
tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
@@ -511,29 +550,44 @@
return tx_type;
}
-static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) {
+static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) {
// TODO(debargha): explore different patterns for ADST usage when blocksize
// is smaller than the prediction size
TX_TYPE tx_type = DCT_DCT;
- int ib = (int)(b - xd->block);
- if (ib >= 16)
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+#if !USE_ADST_FOR_SB
+ if (sb_type)
return tx_type;
- // TODO(rbultje, debargha): Explore ADST usage for superblocks
- if (xd->mode_info_context->mbmi.sb_type)
+#endif
+ if (ib >= (16 << (2 * sb_type))) // no chroma adst
return tx_type;
if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
xd->q_index < ACTIVE_HT8) {
+ const BLOCKD *b = &xd->block[ib];
// TODO(rbultje): MB_PREDICTION_MODE / B_PREDICTION_MODE should be merged
// or the relationship otherwise modified to address this type conversion.
tx_type = txfm_map(pred_mode_conv(
(MB_PREDICTION_MODE)b->bmi.as_mode.first));
} else if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
xd->q_index < ACTIVE_HT8) {
-#if USE_ADST_FOR_I8X8_4X4
+#if USE_ADST_FOR_I16X16_8X8
#if USE_ADST_PERIPHERY_ONLY
- // Use ADST for periphery blocks only
- tx_type = (ib != 10) ?
- txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)) : DCT_DCT;
+ const int hmax = 4 << sb_type;
+ tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
+#if USE_ADST_FOR_REMOTE_EDGE
+ if ((ib & (hmax - 1)) != 0 && ib >= hmax)
+ tx_type = DCT_DCT;
+#else
+ if (ib >= 1 && ib < hmax) {
+ if (tx_type == ADST_ADST) tx_type = ADST_DCT;
+ else if (tx_type == DCT_ADST) tx_type = DCT_DCT;
+ } else if (ib >= 1 && (ib & (hmax - 1)) == 0) {
+ if (tx_type == ADST_ADST) tx_type = DCT_ADST;
+ else if (tx_type == ADST_DCT) tx_type = DCT_DCT;
+ } else if (ib != 0) {
+ tx_type = DCT_DCT;
+ }
+#endif
#else
// Use ADST
tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
@@ -546,35 +600,37 @@
return tx_type;
}
-static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) {
+static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, int ib) {
TX_TYPE tx_type = DCT_DCT;
- int ib = (int)(b - xd->block);
- if (ib >= 16)
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+#if !USE_ADST_FOR_SB
+ if (sb_type)
return tx_type;
- // TODO(rbultje, debargha): Explore ADST usage for superblocks
- if (xd->mode_info_context->mbmi.sb_type)
+#endif
+ if (ib >= (16 << (2 * sb_type)))
return tx_type;
if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
xd->q_index < ACTIVE_HT16) {
tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
- }
- return tx_type;
-}
-
-static TX_TYPE get_tx_type(const MACROBLOCKD *xd, const BLOCKD *b) {
- TX_TYPE tx_type = DCT_DCT;
- int ib = (int)(b - xd->block);
- if (ib >= 16)
- return tx_type;
- if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) {
- tx_type = get_tx_type_16x16(xd, b);
- }
- if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
- ib = (ib & 8) + ((ib & 4) >> 1);
- tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
- }
- if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
- tx_type = get_tx_type_4x4(xd, b);
+#if USE_ADST_PERIPHERY_ONLY
+ if (sb_type) {
+ const int hmax = 4 << sb_type;
+#if USE_ADST_FOR_REMOTE_EDGE
+ if ((ib & (hmax - 1)) != 0 && ib >= hmax)
+ tx_type = DCT_DCT;
+#else
+ if (ib >= 1 && ib < hmax) {
+ if (tx_type == ADST_ADST) tx_type = ADST_DCT;
+ else if (tx_type == DCT_ADST) tx_type = DCT_DCT;
+ } else if (ib >= 1 && (ib & (hmax - 1)) == 0) {
+ if (tx_type == ADST_ADST) tx_type = DCT_ADST;
+ else if (tx_type == ADST_DCT) tx_type = DCT_DCT;
+ } else if (ib != 0) {
+ tx_type = DCT_DCT;
+ }
+#endif
+ }
+#endif
}
return tx_type;
}
@@ -592,4 +648,25 @@
}
}
+static TX_SIZE get_uv_tx_size(const MACROBLOCKD *xd) {
+ TX_SIZE tx_size_uv;
+ if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
+ tx_size_uv = xd->mode_info_context->mbmi.txfm_size;
+ } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) {
+ if (xd->mode_info_context->mbmi.txfm_size == TX_32X32)
+ tx_size_uv = TX_16X16;
+ else
+ tx_size_uv = xd->mode_info_context->mbmi.txfm_size;
+ } else {
+ if (xd->mode_info_context->mbmi.txfm_size == TX_16X16)
+ tx_size_uv = TX_8X8;
+ else if (xd->mode_info_context->mbmi.txfm_size == TX_8X8 &&
+ (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV))
+ tx_size_uv = TX_4X4;
+ else
+ tx_size_uv = xd->mode_info_context->mbmi.txfm_size;
+ }
+ return tx_size_uv;
+}
#endif // VP9_COMMON_VP9_BLOCKD_H_
diff --git a/vp9/common/vp9_coefupdateprobs.h b/vp9/common/vp9_coefupdateprobs.h
index ee250e0..6d8ed67 100644
--- a/vp9/common/vp9_coefupdateprobs.h
+++ b/vp9/common/vp9_coefupdateprobs.h
@@ -9,7 +9,7 @@
*/
#ifndef VP9_COMMON_VP9_COEFUPDATEPROBS_H_
-#define VP9_COMMON_VP9_COEFUPDATEPROBS_H__
+#define VP9_COMMON_VP9_COEFUPDATEPROBS_H_
/* Update probabilities for the nodes in the token entropy tree.
Generated file included by vp9_entropy.c */
@@ -17,4 +17,12 @@
#define COEF_UPDATE_PROB_8X8 252
#define COEF_UPDATE_PROB_16X16 252
+#if CONFIG_CODE_NONZEROCOUNT
+#define NZC_UPDATE_PROB_4X4 252
+#define NZC_UPDATE_PROB_8X8 252
+#define NZC_UPDATE_PROB_16X16 252
+#define NZC_UPDATE_PROB_32X32 252
+#define NZC_UPDATE_PROB_PCAT 252
+#endif
+
#endif // VP9_COMMON_VP9_COEFUPDATEPROBS_H__
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index 4295eba..dcc5073 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -11,10 +11,11 @@
#ifndef VP9_COMMON_VP9_COMMON_H_
#define VP9_COMMON_VP9_COMMON_H_
-#include <assert.h>
-#include "vpx_config.h"
/* Interface header for common constant data structures and lookup tables */
+#include <assert.h>
+
+#include "./vpx_config.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx/vpx_integer.h"
@@ -24,23 +25,27 @@
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-/* Only need this for fixed-size arrays, for structs just assign. */
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))
-#define vp9_copy(Dest, Src) { \
- assert(sizeof(Dest) == sizeof(Src)); \
- vpx_memcpy(Dest, Src, sizeof(Src)); \
+/* If we don't want to use ROUND_POWER_OF_TWO macro
+static INLINE int16_t round_power_of_two(int16_t value, int n) {
+ return (value + (1 << (n - 1))) >> n;
+}*/
+
+// Only need this for fixed-size arrays, for structs just assign.
+#define vp9_copy(dest, src) { \
+ assert(sizeof(dest) == sizeof(src)); \
+ vpx_memcpy(dest, src, sizeof(src)); \
}
-/* Use this for variably-sized arrays. */
-
-#define vp9_copy_array(Dest, Src, N) { \
- assert(sizeof(*Dest) == sizeof(*Src)); \
- vpx_memcpy(Dest, Src, N * sizeof(*Src)); \
+// Use this for variably-sized arrays.
+#define vp9_copy_array(dest, src, n) { \
+ assert(sizeof(*dest) == sizeof(*src)); \
+ vpx_memcpy(dest, src, n * sizeof(*src)); \
}
-#define vp9_zero(Dest) vpx_memset(&Dest, 0, sizeof(Dest));
-
-#define vp9_zero_array(Dest, N) vpx_memset(Dest, 0, N * sizeof(*Dest));
+#define vp9_zero(dest) vpx_memset(&dest, 0, sizeof(dest));
+#define vp9_zero_array(dest, n) vpx_memset(dest, 0, n * sizeof(*dest));
static INLINE uint8_t clip_pixel(int val) {
return (val > 255) ? 255u : (val < 0) ? 0u : val;
diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c
index 1953d60..c3fffc6 100644
--- a/vp9/common/vp9_debugmodes.c
+++ b/vp9/common/vp9_debugmodes.c
@@ -9,6 +9,7 @@
*/
#include <stdio.h>
+
#include "vp9/common/vp9_blockd.h"
void vp9_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols,
@@ -18,8 +19,7 @@
int mb_index = 0;
FILE *mvs = fopen("mvs.stt", "a");
- /* print out the macroblock Y modes */
- mb_index = 0;
+ // Print out the macroblock Y modes
fprintf(mvs, "Mb Modes for Frame %d\n", frame);
for (mb_row = 0; mb_row < rows; mb_row++) {
diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h
index 204e65a..6b1eff0 100644
--- a/vp9/common/vp9_default_coef_probs.h
+++ b/vp9/common/vp9_default_coef_probs.h
@@ -695,3 +695,299 @@
}
}
};
+
+#if CONFIG_CODE_NONZEROCOUNT
+
+// TODO(debargha): Remove the macro and count tables after experimentation
+#define NZC_DEFAULT_COUNTS /* Uncomment to use counts as defaults */
+
+#ifdef NZC_DEFAULT_COUNTS
+static const unsigned int default_nzc_counts_4x4[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC4X4_TOKENS] = {
+ {
+ {
+ { 967652, 29023, 15039, 6952, 1568, 116 },
+ { 289116, 22938, 4522, 1935, 520, 47 }
+ }, {
+ { 967652, 29023, 15039, 6952, 1568, 116 },
+ { 689116, 22938, 4522, 1935, 520, 47 }
+ },
+ }, {
+ {
+ { 124684, 37167, 15270, 8483, 1777, 102 },
+ { 10405, 12395, 3401, 3574, 2461, 771 }
+ }, {
+ { 124684, 37167, 15270, 8483, 1777, 102 },
+ { 20405, 12395, 3401, 3574, 2461, 771 }
+ }
+ }, {
+ {
+ { 4100, 22976, 15627, 16137, 7982, 1793 },
+ { 4249, 3084, 2131, 4081, 6439, 1653 }
+ }, {
+ { 21100, 22976, 15627, 16137, 7982, 1793 },
+ { 4249, 3084, 2131, 4081, 2439, 1653 }
+ }
+ }
+};
+
+static const unsigned int default_nzc_counts_8x8[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC8X8_TOKENS] = {
+ {
+ {
+ { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10 },
+ { 72052, 30468, 6973, 3250, 1500, 750, 375, 5 },
+ }, {
+ { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10 },
+ { 192052, 30468, 6973, 3250, 1500, 750, 375, 5 },
+ }
+ }, {
+ {
+ { 121533, 33527, 15655, 11920, 5723, 2009, 315, 7 },
+ { 23772, 23120, 13127, 8115, 4000, 2000, 200, 6 },
+ }, {
+ { 121533, 33527, 15655, 11920, 5723, 2009, 315, 7 },
+ { 23772, 23120, 13127, 8115, 4000, 2000, 200, 6 },
+ }
+ }, {
+ {
+ { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17 },
+ { 11612, 13874, 13329, 13022, 6500, 3250, 300, 12 },
+ }, {
+ { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17 },
+ { 11612, 13874, 13329, 13022, 6500, 3250, 300, 12 },
+ }
+ }
+};
+
+static const unsigned int default_nzc_counts_16x16[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC16X16_TOKENS] = {
+ {
+ {
+ { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2 },
+ { 72052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1 },
+ }, {
+ { 32988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2 },
+ { 92052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1 },
+ }
+ }, {
+ {
+ { 21533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2 },
+ { 47772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2 },
+ }, {
+ { 21533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2 },
+ { 27772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2 },
+ }
+ }, {
+ {
+ { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5 },
+ { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3 },
+ }, {
+ { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5 },
+ { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3 },
+ }
+ }
+};
+
+static const unsigned int default_nzc_counts_32x32[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC32X32_TOKENS] = {
+ {
+ {
+ { 72988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2, 1, 0 },
+ { 52052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1, 0, 0 },
+ }, {
+ { 72988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2, 1, 0 },
+ { 72052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1, 0, 0 },
+ }
+ }, {
+ {
+ { 21533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2, 1, 0 },
+ { 27772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2, 1, 0 },
+ }, {
+ { 21533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2, 1, 0 },
+ { 27772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2, 1, 0 },
+ }
+ }, {
+ {
+ { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5, 2, 1 },
+ { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3, 2, 1 },
+ }, {
+ { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5, 2, 1 },
+ { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3, 2, 1 },
+ }
+ }
+};
+
+#else
+
+static const vp9_prob default_nzc_probs_4x4[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC4X4_TOKENS] = {
+ {
+ {
+ { 219, 162, 179, 142, 242, },
+ { 214, 253, 228, 246, 255, },
+ }, {
+ { 225, 236, 190, 229, 253, },
+ { 251, 253, 240, 248, 255, },
+ },
+ }, {
+ {
+ { 106, 126, 158, 126, 244, },
+ { 118, 241, 201, 240, 255, },
+ }, {
+ { 165, 179, 143, 189, 242, },
+ { 173, 239, 192, 255, 128, },
+ },
+ }, {
+ {
+ { 42 , 78 , 153, 92 , 223, },
+ { 128, 128, 128, 128, 128, },
+ }, {
+ { 76 , 68 , 126, 110, 216, },
+ { 128, 128, 128, 128, 128, },
+ },
+ },
+};
+
+static const vp9_prob default_nzc_probs_8x8[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC8X8_TOKENS] = {
+ {
+ {
+ { 134, 139, 170, 178, 142, 197, 255, },
+ { 167, 224, 199, 252, 205, 255, 128, },
+ }, {
+ { 181, 210, 180, 241, 190, 235, 255, },
+ { 234, 251, 235, 252, 219, 255, 128, },
+ },
+ }, {
+ {
+ { 33 , 64 , 155, 143, 86 , 216, 255, },
+ { 73 , 160, 167, 251, 153, 255, 128, },
+ }, {
+ { 79 , 104, 153, 195, 119, 246, 255, },
+ { 149, 183, 186, 249, 203, 255, 128, },
+ },
+ }, {
+ {
+ { 10 , 25 , 156, 61 , 69 , 156, 254, },
+ { 32 , 1 , 128, 146, 64 , 255, 128, },
+ }, {
+ { 37 , 48 , 143, 113, 81 , 202, 255, },
+ { 1 , 255, 128, 128, 128, 128, 128, },
+ },
+ },
+};
+
+static const vp9_prob default_nzc_probs_16x16[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC16X16_TOKENS] = {
+ {
+ {
+ { 11 , 188, 210, 167, 141, 143, 152, 255, 128, },
+ { 171, 201, 203, 244, 207, 255, 255, 128, 128, },
+ }, {
+ { 23 , 217, 207, 251, 198, 255, 219, 128, 128, },
+ { 235, 249, 229, 255, 199, 128, 128, 128, 128, },
+ },
+ }, {
+ {
+ { 9 , 45 , 168, 85 , 66 , 221, 139, 246, 255, },
+ { 51 , 110, 163, 238, 94 , 255, 255, 128, 128, },
+ }, {
+ { 4 , 149, 175, 240, 149, 255, 205, 128, 128, },
+ { 141, 217, 186, 255, 128, 128, 128, 128, 128, },
+ },
+ }, {
+ {
+ { 1 , 12 , 173, 6 , 68 , 145, 41 , 204, 255, },
+ { 39 , 47 , 128, 199, 110, 255, 128, 128, 128, },
+ }, {
+ { 1 , 121, 171, 149, 115, 242, 159, 255, 128, },
+ { 1 , 255, 255, 128, 128, 128, 128, 128, 128, },
+ },
+ },
+};
+
+static const vp9_prob default_nzc_probs_32x32[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC32X32_TOKENS] = {
+ {
+ {
+ { 11 , 216, 195, 201, 160, 247, 217, 255, 255, 128, 128, },
+ { 177, 240, 239, 255, 192, 128, 128, 128, 128, 128, 128, },
+ }, {
+ { 48 , 235, 213, 235, 199, 255, 255, 128, 128, 128, 128, },
+ { 205, 255, 248, 128, 128, 128, 128, 128, 128, 128, 128, },
+ },
+ }, {
+ {
+ { 6 , 96 , 138, 99 , 125, 248, 188, 255, 128, 128, 128, },
+ { 17 , 53 , 43 , 189, 1 , 255, 171, 128, 128, 128, 128, },
+ }, {
+ { 5 , 187, 235, 232, 117, 255, 219, 128, 128, 128, 128, },
+ { 146, 255, 255, 128, 128, 128, 128, 128, 128, 128, 128, },
+ },
+ }, {
+ {
+ { 1 , 7 , 93 , 14 , 100, 30 , 85 , 65 , 81 , 210, 255, },
+ { 1 , 1 , 128, 26 , 1 , 218, 78 , 255, 255, 128, 128, },
+ }, {
+ { 4 , 148, 206, 137, 160, 255, 255, 128, 128, 128, 128, },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, },
+ },
+ },
+};
+#endif
+
+static const vp9_prob default_nzc_pcat_probs[MAX_NZC_CONTEXTS]
+ [NZC_TOKENS_EXTRA]
+ [NZC_BITS_EXTRA] = {
+ // Bit probabilities are in least to most significance order
+ {
+ {176, 128, 128, 128, 128, 128, 128, 128, 128}, // 3 - 4
+ {164, 192, 128, 128, 128, 128, 128, 128, 128}, // 5 - 8
+ {154, 184, 208, 128, 128, 128, 128, 128, 128}, // 9 - 16
+ {144, 176, 200, 216, 128, 128, 128, 128, 128}, // 17 - 32
+ {140, 172, 192, 208, 224, 128, 128, 128, 128}, // 33 - 64
+ {136, 168, 188, 200, 220, 232, 128, 128, 128}, // 65 - 128
+ {132, 164, 184, 196, 216, 228, 240, 128, 128}, // 129 - 256
+ {130, 162, 178, 194, 212, 226, 240, 248, 128}, // 257 - 512
+ {128, 160, 176, 192, 208, 224, 240, 248, 254}, // 513 - 1024
+ }, {
+ {168, 128, 128, 128, 128, 128, 128, 128, 128}, // 3 - 4
+ {152, 184, 128, 128, 128, 128, 128, 128, 128}, // 5 - 8
+ {152, 184, 208, 128, 128, 128, 128, 128, 128}, // 9 - 16
+ {144, 176, 200, 216, 128, 128, 128, 128, 128}, // 17 - 32
+ {140, 172, 192, 208, 224, 128, 128, 128, 128}, // 33 - 64
+ {136, 168, 188, 200, 220, 232, 128, 128, 128}, // 65 - 128
+ {132, 164, 184, 196, 216, 228, 240, 128, 128}, // 129 - 256
+ {130, 162, 178, 194, 212, 226, 240, 248, 128}, // 257 - 512
+ {128, 160, 176, 192, 208, 224, 240, 248, 254}, // 513 - 1024
+ }, {
+ {160, 128, 128, 128, 128, 128, 128, 128, 128}, // 3 - 4
+ {152, 176, 128, 128, 128, 128, 128, 128, 128}, // 5 - 8
+ {150, 184, 208, 128, 128, 128, 128, 128, 128}, // 9 - 16
+ {144, 176, 200, 216, 128, 128, 128, 128, 128}, // 17 - 32
+ {140, 172, 192, 208, 224, 128, 128, 128, 128}, // 33 - 64
+ {136, 168, 188, 200, 220, 232, 128, 128, 128}, // 65 - 128
+ {132, 164, 184, 196, 216, 228, 240, 128, 128}, // 129 - 256
+ {130, 162, 178, 194, 212, 226, 240, 248, 128}, // 257 - 512
+ {128, 160, 176, 192, 208, 224, 240, 248, 254}, // 513 - 1024
+ },
+};
+
+#endif // CONFIG_CODE_NONZEROCOUNT
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index bc69353..b5ae70a 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
@@ -186,6 +186,65 @@
254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129
};
+#if CONFIG_CODE_NONZEROCOUNT
+const vp9_tree_index vp9_nzc4x4_tree[2 * NZC4X4_NODES] = {
+ -NZC_0, 2,
+ 4, 6,
+ -NZC_1, -NZC_2,
+ -NZC_3TO4, 8,
+ -NZC_5TO8, -NZC_9TO16,
+};
+struct vp9_token_struct vp9_nzc4x4_encodings[NZC4X4_TOKENS];
+
+const vp9_tree_index vp9_nzc8x8_tree[2 * NZC8X8_NODES] = {
+ -NZC_0, 2,
+ 4, 6,
+ -NZC_1, -NZC_2,
+ 8, 10,
+ -NZC_3TO4, -NZC_5TO8,
+ -NZC_9TO16, 12,
+ -NZC_17TO32, -NZC_33TO64,
+};
+struct vp9_token_struct vp9_nzc8x8_encodings[NZC8X8_TOKENS];
+
+const vp9_tree_index vp9_nzc16x16_tree[2 * NZC16X16_NODES] = {
+ -NZC_0, 2,
+ 4, 6,
+ -NZC_1, -NZC_2,
+ 8, 10,
+ -NZC_3TO4, -NZC_5TO8,
+ 12, 14,
+ -NZC_9TO16, -NZC_17TO32,
+ -NZC_33TO64, 16,
+ -NZC_65TO128, -NZC_129TO256,
+};
+struct vp9_token_struct vp9_nzc16x16_encodings[NZC16X16_TOKENS];
+
+const vp9_tree_index vp9_nzc32x32_tree[2 * NZC32X32_NODES] = {
+ -NZC_0, 2,
+ 4, 6,
+ -NZC_1, -NZC_2,
+ 8, 10,
+ -NZC_3TO4, -NZC_5TO8,
+ 12, 14,
+ -NZC_9TO16, -NZC_17TO32,
+ 16, 18,
+ -NZC_33TO64, -NZC_65TO128,
+ -NZC_129TO256, 20,
+ -NZC_257TO512, -NZC_513TO1024,
+};
+struct vp9_token_struct vp9_nzc32x32_encodings[NZC32X32_TOKENS];
+
+const int vp9_extranzcbits[NZC32X32_TOKENS] = {
+ 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
+};
+
+const int vp9_basenzcvalue[NZC32X32_TOKENS] = {
+ 0, 1, 2, 3, 5, 9, 17, 33, 65, 129, 257, 513
+};
+
+#endif // CONFIG_CODE_NONZEROCOUNT
+
static vp9_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[28];
static void init_bit_tree(vp9_tree_index *p, int n) {
@@ -253,6 +312,55 @@
};
void vp9_default_coef_probs(VP9_COMMON *pc) {
+#if CONFIG_CODE_NONZEROCOUNT
+#ifdef NZC_DEFAULT_COUNTS
+ int h, g;
+ for (h = 0; h < MAX_NZC_CONTEXTS; ++h) {
+ for (g = 0; g < REF_TYPES; ++g) {
+ int i;
+ unsigned int branch_ct4x4[NZC4X4_NODES][2];
+ unsigned int branch_ct8x8[NZC8X8_NODES][2];
+ unsigned int branch_ct16x16[NZC16X16_NODES][2];
+ unsigned int branch_ct32x32[NZC32X32_NODES][2];
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ vp9_tree_probs_from_distribution(
+ NZC4X4_TOKENS, vp9_nzc4x4_encodings, vp9_nzc4x4_tree,
+ pc->fc.nzc_probs_4x4[h][g][i], branch_ct4x4,
+ default_nzc_counts_4x4[h][g][i]);
+ }
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ vp9_tree_probs_from_distribution(
+ NZC8X8_TOKENS, vp9_nzc8x8_encodings, vp9_nzc8x8_tree,
+ pc->fc.nzc_probs_8x8[h][g][i], branch_ct8x8,
+ default_nzc_counts_8x8[h][g][i]);
+ }
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ vp9_tree_probs_from_distribution(
+ NZC16X16_TOKENS, vp9_nzc16x16_encodings, vp9_nzc16x16_tree,
+ pc->fc.nzc_probs_16x16[h][g][i], branch_ct16x16,
+ default_nzc_counts_16x16[h][g][i]);
+ }
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ vp9_tree_probs_from_distribution(
+ NZC32X32_TOKENS, vp9_nzc32x32_encodings, vp9_nzc32x32_tree,
+ pc->fc.nzc_probs_32x32[h][g][i], branch_ct32x32,
+ default_nzc_counts_32x32[h][g][i]);
+ }
+ }
+ }
+#else
+ vpx_memcpy(pc->fc.nzc_probs_4x4, default_nzc_probs_4x4,
+ sizeof(pc->fc.nzc_probs_4x4));
+ vpx_memcpy(pc->fc.nzc_probs_8x8, default_nzc_probs_8x8,
+ sizeof(pc->fc.nzc_probs_8x8));
+ vpx_memcpy(pc->fc.nzc_probs_16x16, default_nzc_probs_16x16,
+ sizeof(pc->fc.nzc_probs_16x16));
+ vpx_memcpy(pc->fc.nzc_probs_32x32, default_nzc_probs_32x32,
+ sizeof(pc->fc.nzc_probs_32x32));
+#endif
+ vpx_memcpy(pc->fc.nzc_pcat_probs, default_nzc_pcat_probs,
+ sizeof(pc->fc.nzc_pcat_probs));
+#endif // CONFIG_CODE_NONZEROCOUNTyy
vpx_memcpy(pc->fc.coef_probs_4x4, default_coef_probs_4x4,
sizeof(pc->fc.coef_probs_4x4));
vpx_memcpy(pc->fc.coef_probs_8x8, default_coef_probs_8x8,
@@ -266,8 +374,1098 @@
void vp9_coef_tree_initialize() {
init_bit_trees();
vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree);
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_tokens_from_tree(vp9_nzc4x4_encodings, vp9_nzc4x4_tree);
+ vp9_tokens_from_tree(vp9_nzc8x8_encodings, vp9_nzc8x8_tree);
+ vp9_tokens_from_tree(vp9_nzc16x16_encodings, vp9_nzc16x16_tree);
+ vp9_tokens_from_tree(vp9_nzc32x32_encodings, vp9_nzc32x32_tree);
+#endif
}
+#if CONFIG_CODE_NONZEROCOUNT
+
+#define mb_in_cur_tile(cm, mb_row, mb_col) \
+ ((mb_col) >= (cm)->cur_tile_mb_col_start && \
+ (mb_col) <= (cm)->cur_tile_mb_col_end && \
+ (mb_row) >= 0)
+
+#define choose_nzc_context(nzc_exp, t2, t1) \
+ ((nzc_exp) >= (t2) ? 2 : (nzc_exp) >= (t1) ? 1 : 0)
+
+#define NZC_T2_32X32 (16 << 6)
+#define NZC_T1_32X32 (4 << 6)
+
+#define NZC_T2_16X16 (12 << 6)
+#define NZC_T1_16X16 (3 << 6)
+
+#define NZC_T2_8X8 (8 << 6)
+#define NZC_T1_8X8 (2 << 6)
+
+#define NZC_T2_4X4 (4 << 6)
+#define NZC_T1_4X4 (1 << 6)
+
+// Transforms a mb16 block index to a sb64 block index
+static inline int mb16_to_sb64_index(int mb_row, int mb_col, int block) {
+ int r = (mb_row & 3);
+ int c = (mb_col & 3);
+ int b;
+ if (block < 16) { // Y
+ int ib = block >> 2;
+ int jb = block & 3;
+ ib += r * 4;
+ jb += c * 4;
+ b = ib * 16 + jb;
+ assert(b < 256);
+ return b;
+ } else { // UV
+ int base = block - (block & 3);
+ int ib = (block - base) >> 1;
+ int jb = (block - base) & 1;
+ ib += r * 2;
+ jb += c * 2;
+ b = base * 16 + ib * 8 + jb;
+ assert(b >= 256 && b < 384);
+ return b;
+ }
+}
+
+// Transforms a mb16 block index to a sb32 block index
+static inline int mb16_to_sb32_index(int mb_row, int mb_col, int block) {
+ int r = (mb_row & 1);
+ int c = (mb_col & 1);
+ int b;
+ if (block < 16) { // Y
+ int ib = block >> 2;
+ int jb = block & 3;
+ ib += r * 4;
+ jb += c * 4;
+ b = ib * 8 + jb;
+ assert(b < 64);
+ return b;
+ } else { // UV
+ int base = block - (block & 3);
+ int ib = (block - base) >> 1;
+ int jb = (block - base) & 1;
+ ib += r * 2;
+ jb += c * 2;
+ b = base * 4 + ib * 4 + jb;
+ assert(b >= 64 && b < 96);
+ return b;
+ }
+}
+
+static inline int block_to_txfm_index(int block, TX_SIZE tx_size, int s) {
+ // s is the log of the number of 4x4 blocks in each row/col of larger block
+ int b, ib, jb, nb;
+ ib = block >> s;
+ jb = block - (ib << s);
+ ib >>= tx_size;
+ jb >>= tx_size;
+ nb = 1 << (s - tx_size);
+ b = (ib * nb + jb) << (2 * tx_size);
+ return b;
+}
+
+/* BEGIN - Helper functions to get the y nzcs */
+static unsigned int get_nzc_4x4_y_sb64(MB_MODE_INFO *mi, int block) {
+ int b;
+ assert(block < 256);
+ b = block_to_txfm_index(block, mi->txfm_size, 4);
+ assert(b < 256);
+ return mi->nzcs[b] << (6 - 2 * mi->txfm_size);
+}
+
+static unsigned int get_nzc_4x4_y_sb32(MB_MODE_INFO *mi, int block) {
+ int b;
+ assert(block < 64);
+ b = block_to_txfm_index(block, mi->txfm_size, 3);
+ assert(b < 64);
+ return mi->nzcs[b] << (6 - 2 * mi->txfm_size);
+}
+
+static unsigned int get_nzc_4x4_y_mb16(MB_MODE_INFO *mi, int block) {
+ int b;
+ assert(block < 16);
+ b = block_to_txfm_index(block, mi->txfm_size, 2);
+ assert(b < 16);
+ return mi->nzcs[b] << (6 - 2 * mi->txfm_size);
+}
+/* END - Helper functions to get the y nzcs */
+
+/* Function to get y nzc where block index is in mb16 terms */
+static unsigned int get_nzc_4x4_y(VP9_COMMON *cm, MODE_INFO *m,
+ int mb_row, int mb_col, int block) {
+ // NOTE: All values returned are at 64 times the true value at 4x4 scale
+ MB_MODE_INFO *const mi = &m->mbmi;
+ const int mis = cm->mode_info_stride;
+ if (mi->mb_skip_coeff || !mb_in_cur_tile(cm, mb_row, mb_col))
+ return 0;
+ if (mi->sb_type == BLOCK_SIZE_SB64X64) {
+ int r = mb_row & 3;
+ int c = mb_col & 3;
+ m -= c + r * mis;
+ if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c))
+ return 0;
+ else
+ return get_nzc_4x4_y_sb64(
+ &m->mbmi, mb16_to_sb64_index(mb_row, mb_col, block));
+ } else if (mi->sb_type == BLOCK_SIZE_SB32X32) {
+ int r = mb_row & 1;
+ int c = mb_col & 1;
+ m -= c + r * mis;
+ if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c))
+ return 0;
+ else
+ return get_nzc_4x4_y_sb32(
+ &m->mbmi, mb16_to_sb32_index(mb_row, mb_col, block));
+ } else {
+ if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row, mb_col))
+ return 0;
+ return get_nzc_4x4_y_mb16(mi, block);
+ }
+}
+
+/* BEGIN - Helper functions to get the uv nzcs */
+static unsigned int get_nzc_4x4_uv_sb64(MB_MODE_INFO *mi, int block) {
+ int b;
+ int base, uvtxfm_size;
+ assert(block >= 256 && block < 384);
+ uvtxfm_size = mi->txfm_size;
+ base = 256 + (block & 64);
+ block -= base;
+ b = base + block_to_txfm_index(block, uvtxfm_size, 3);
+ assert(b >= 256 && b < 384);
+ return mi->nzcs[b] << (6 - 2 * uvtxfm_size);
+}
+
+static unsigned int get_nzc_4x4_uv_sb32(MB_MODE_INFO *mi, int block) {
+ int b;
+ int base, uvtxfm_size;
+ assert(block >= 64 && block < 96);
+ if (mi->txfm_size == TX_32X32)
+ uvtxfm_size = TX_16X16;
+ else
+ uvtxfm_size = mi->txfm_size;
+ base = 64 + (block & 16);
+ block -= base;
+ b = base + block_to_txfm_index(block, uvtxfm_size, 2);
+ assert(b >= 64 && b < 96);
+ return mi->nzcs[b] << (6 - 2 * uvtxfm_size);
+}
+
+static unsigned int get_nzc_4x4_uv_mb16(MB_MODE_INFO *mi, int block) {
+ int b;
+ int base, uvtxfm_size;
+ assert(block >= 16 && block < 24);
+ if (mi->txfm_size == TX_8X8 &&
+ (mi->mode == SPLITMV || mi->mode == I8X8_PRED))
+ uvtxfm_size = TX_4X4;
+ else if (mi->txfm_size == TX_16X16)
+ uvtxfm_size = TX_8X8;
+ else
+ uvtxfm_size = mi->txfm_size;
+ base = 16 + (block & 4);
+ block -= base;
+ b = base + block_to_txfm_index(block, uvtxfm_size, 1);
+ assert(b >= 16 && b < 24);
+ return mi->nzcs[b] << (6 - 2 * uvtxfm_size);
+}
+/* END - Helper functions to get the uv nzcs */
+
+/* Function to get uv nzc where block index is in mb16 terms */
+static unsigned int get_nzc_4x4_uv(VP9_COMMON *cm, MODE_INFO *m,
+ int mb_row, int mb_col, int block) {
+ // NOTE: All values returned are at 64 times the true value at 4x4 scale
+ MB_MODE_INFO *const mi = &m->mbmi;
+ const int mis = cm->mode_info_stride;
+ if (mi->mb_skip_coeff || !mb_in_cur_tile(cm, mb_row, mb_col))
+ return 0;
+ if (mi->sb_type == BLOCK_SIZE_SB64X64) {
+ int r = mb_row & 3;
+ int c = mb_col & 3;
+ m -= c + r * mis;
+ if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c))
+ return 0;
+ else
+ return get_nzc_4x4_uv_sb64(
+ &m->mbmi, mb16_to_sb64_index(mb_row, mb_col, block));
+ } else if (mi->sb_type == BLOCK_SIZE_SB32X32) {
+ int r = mb_row & 1;
+ int c = mb_col & 1;
+ m -= c + r * mis;
+ if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c))
+ return 0;
+ else
+ return get_nzc_4x4_uv_sb32(
+ &m->mbmi, mb16_to_sb32_index(mb_row, mb_col, block));
+ } else {
+ return get_nzc_4x4_uv_mb16(mi, block);
+ }
+}
+
+int vp9_get_nzc_context_y_sb64(VP9_COMMON *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block) {
+ // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy
+ // neighboring blocks are
+ int mis = cm->mode_info_stride;
+ int nzc_exp = 0;
+ TX_SIZE txfm_size = cur->mbmi.txfm_size;
+ assert(block < 256);
+ switch (txfm_size) {
+ case TX_32X32:
+ assert((block & 63) == 0);
+ if (block < 128) {
+ int o = (block >> 6) * 2;
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 12) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 13) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 14) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 15) +
+ get_nzc_4x4_y(cm, cur - mis + o + 1,
+ mb_row - 1, mb_col + o + 1, 12) +
+ get_nzc_4x4_y(cm, cur - mis + o + 1,
+ mb_row - 1, mb_col + o + 1, 13) +
+ get_nzc_4x4_y(cm, cur - mis + o + 1,
+ mb_row - 1, mb_col + o + 1, 14) +
+ get_nzc_4x4_y(cm, cur - mis + o + 1,
+ mb_row - 1, mb_col + o + 1, 15);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 128] << 3;
+ }
+ if ((block & 127) == 0) {
+ int o = (block >> 7) * 2;
+ nzc_exp +=
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 15) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis + mis,
+ mb_row + o + 1, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis + mis,
+ mb_row + o + 1, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis + mis,
+ mb_row + o + 1, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis + mis,
+ mb_row + o + 1, mb_col - 1, 15);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 64] << 3;
+ }
+ nzc_exp <<= 2;
+ // Note nzc_exp is 64 times the average value expected at 32x32 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_32X32, NZC_T1_32X32);
+ break;
+
+ case TX_16X16:
+ assert((block & 15) == 0);
+ if (block < 64) {
+ int o = block >> 4;
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 12) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 13) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 14) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 15);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 64] << 4;
+ }
+ if ((block & 63) == 0) {
+ int o = block >> 6;
+ nzc_exp +=
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 15);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 16] << 4;
+ }
+ nzc_exp <<= 1;
+ // Note nzc_exp is 64 times the average value expected at 16x16 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16);
+ break;
+
+ case TX_8X8:
+ assert((block & 3) == 0);
+ if (block < 32) {
+ int o = block >> 3;
+ int p = ((block >> 2) & 1) ? 14 : 12;
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p + 1);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 32] << 5;
+ }
+ if ((block & 31) == 0) {
+ int o = block >> 6;
+ int p = ((block >> 5) & 1) ? 11 : 3;
+ nzc_exp +=
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p + 4);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 4] << 5;
+ }
+ // Note nzc_exp is 64 times the average value expected at 8x8 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8);
+ break;
+
+ case TX_4X4:
+ if (block < 16) {
+ int o = block >> 2;
+ int p = block & 3;
+ nzc_exp = get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ 12 + p);
+ } else {
+ nzc_exp = (cur->mbmi.nzcs[block - 16] << 6);
+ }
+ if ((block & 15) == 0) {
+ int o = block >> 6;
+ int p = (block >> 4) & 3;
+ nzc_exp += get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ 3 + 4 * p);
+ } else {
+ nzc_exp += (cur->mbmi.nzcs[block - 1] << 6);
+ }
+ nzc_exp >>= 1;
+ // Note nzc_exp is 64 times the average value expected at 4x4 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4);
+ break;
+
+ default:
+ return 0;
+ }
+}
+
+int vp9_get_nzc_context_y_sb32(VP9_COMMON *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block) {
+ // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy
+ // neighboring blocks are
+ int mis = cm->mode_info_stride;
+ int nzc_exp = 0;
+ TX_SIZE txfm_size = cur->mbmi.txfm_size;
+ assert(block < 64);
+ switch (txfm_size) {
+ case TX_32X32:
+ assert(block == 0);
+ nzc_exp =
+ (get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 12) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 13) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 14) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 15) +
+ get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 12) +
+ get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 13) +
+ get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 14) +
+ get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 15) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 15) +
+ get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 15)) << 2;
+ // Note nzc_exp is 64 times the average value expected at 32x32 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_32X32, NZC_T1_32X32);
+ break;
+
+ case TX_16X16:
+ assert((block & 15) == 0);
+ if (block < 32) {
+ int o = (block >> 4) & 1;
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 12) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 13) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 14) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 15);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 32] << 4;
+ }
+ if ((block & 31) == 0) {
+ int o = block >> 5;
+ nzc_exp +=
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 15);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 16] << 4;
+ }
+ nzc_exp <<= 1;
+ // Note nzc_exp is 64 times the average value expected at 16x16 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16);
+ break;
+
+ case TX_8X8:
+ assert((block & 3) == 0);
+ if (block < 16) {
+ int o = block >> 3;
+ int p = ((block >> 2) & 1) ? 14 : 12;
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p + 1);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 16] << 5;
+ }
+ if ((block & 15) == 0) {
+ int o = block >> 5;
+ int p = ((block >> 4) & 1) ? 11 : 3;
+ nzc_exp +=
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p + 4);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 4] << 5;
+ }
+ // Note nzc_exp is 64 times the average value expected at 8x8 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8);
+ break;
+
+ case TX_4X4:
+ if (block < 8) {
+ int o = block >> 2;
+ int p = block & 3;
+ nzc_exp = get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ 12 + p);
+ } else {
+ nzc_exp = (cur->mbmi.nzcs[block - 8] << 6);
+ }
+ if ((block & 7) == 0) {
+ int o = block >> 5;
+ int p = (block >> 3) & 3;
+ nzc_exp += get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ 3 + 4 * p);
+ } else {
+ nzc_exp += (cur->mbmi.nzcs[block - 1] << 6);
+ }
+ nzc_exp >>= 1;
+ // Note nzc_exp is 64 times the average value expected at 4x4 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4);
+ break;
+
+ default:
+ return 0;
+ break;
+ }
+}
+
+int vp9_get_nzc_context_y_mb16(VP9_COMMON *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block) {
+ // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy
+ // neighboring blocks are
+ int mis = cm->mode_info_stride;
+ int nzc_exp = 0;
+ TX_SIZE txfm_size = cur->mbmi.txfm_size;
+ assert(block < 16);
+ switch (txfm_size) {
+ case TX_16X16:
+ assert(block == 0);
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 12) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 13) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 14) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 15) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 15);
+ nzc_exp <<= 1;
+ // Note nzc_exp is 64 times the average value expected at 16x16 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16);
+
+ case TX_8X8:
+ assert((block & 3) == 0);
+ if (block < 8) {
+ int p = ((block >> 2) & 1) ? 14 : 12;
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, p) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, p + 1);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 8] << 5;
+ }
+ if ((block & 7) == 0) {
+ int p = ((block >> 3) & 1) ? 11 : 3;
+ nzc_exp +=
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, p) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, p + 4);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 4] << 5;
+ }
+ // Note nzc_exp is 64 times the average value expected at 8x8 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8);
+
+ case TX_4X4:
+ if (block < 4) {
+ int p = block & 3;
+ nzc_exp = get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col,
+ 12 + p);
+ } else {
+ nzc_exp = (cur->mbmi.nzcs[block - 4] << 6);
+ }
+ if ((block & 3) == 0) {
+ int p = (block >> 2) & 3;
+ nzc_exp += get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1,
+ 3 + 4 * p);
+ } else {
+ nzc_exp += (cur->mbmi.nzcs[block - 1] << 6);
+ }
+ nzc_exp >>= 1;
+ // Note nzc_exp is 64 times the average value expected at 4x4 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4);
+
+ default:
+ return 0;
+ break;
+ }
+}
+
+int vp9_get_nzc_context_uv_sb64(VP9_COMMON *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block) {
+ // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy
+ // neighboring blocks are
+ int mis = cm->mode_info_stride;
+ int nzc_exp = 0;
+ const int base = block - (block & 63);
+ const int boff = (block & 63);
+ const int base_mb16 = base >> 4;
+ TX_SIZE txfm_size = cur->mbmi.txfm_size;
+ TX_SIZE txfm_size_uv;
+
+ assert(block >= 256 && block < 384);
+ txfm_size_uv = txfm_size;
+
+ switch (txfm_size_uv) {
+ case TX_32X32:
+ assert(block == 256 || block == 320);
+ nzc_exp =
+ get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - mis + 2, mb_row - 1, mb_col + 2,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + 2, mb_row - 1, mb_col + 2,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - mis + 3, mb_row - 1, mb_col + 3,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + 3, mb_row - 1, mb_col + 3,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1 + 2 * mis, mb_row + 2, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + 2 * mis, mb_row + 2, mb_col - 1,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1 + 3 * mis, mb_row + 3, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + 3 * mis, mb_row + 3, mb_col - 1,
+ base_mb16 + 3);
+ nzc_exp <<= 2;
+ // Note nzc_exp is 64 times the average value expected at 32x32 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_32X32, NZC_T1_32X32);
+
+ case TX_16X16:
+ // uv txfm_size 16x16
+ assert((block & 15) == 0);
+ if (boff < 32) {
+ int o = (boff >> 4) & 1;
+ nzc_exp =
+ get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - mis + o + 1, mb_row - 1, mb_col + o + 1,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + o + 1, mb_row - 1, mb_col + o + 1,
+ base_mb16 + 3);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 32] << 4;
+ }
+ if ((boff & 31) == 0) {
+ int o = boff >> 5;
+ nzc_exp +=
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis,
+ mb_row + o, mb_col - 1, base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis,
+ mb_row + o, mb_col - 1, base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis + mis,
+ mb_row + o + 1, mb_col - 1, base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis + mis,
+ mb_row + o + 1, mb_col - 1, base_mb16 + 3);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 16] << 4;
+ }
+ nzc_exp <<= 1;
+ // Note nzc_exp is 64 times the average value expected at 16x16 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16);
+
+ case TX_8X8:
+ assert((block & 3) == 0);
+ if (boff < 16) {
+ int o = boff >> 2;
+ nzc_exp =
+ get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 3);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 16] << 5;
+ }
+ if ((boff & 15) == 0) {
+ int o = boff >> 4;
+ nzc_exp +=
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ base_mb16 + 3);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 4] << 5;
+ }
+ // Note nzc_exp is 64 times the average value expected at 8x8 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8);
+
+ case TX_4X4:
+ if (boff < 8) {
+ int o = boff >> 1;
+ int p = boff & 1;
+ nzc_exp = get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 2 + p);
+ } else {
+ nzc_exp = (cur->mbmi.nzcs[block - 8] << 6);
+ }
+ if ((boff & 7) == 0) {
+ int o = boff >> 4;
+ int p = (boff >> 3) & 1;
+ nzc_exp += get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ base_mb16 + 1 + 2 * p);
+ } else {
+ nzc_exp += (cur->mbmi.nzcs[block - 1] << 6);
+ }
+ nzc_exp >>= 1;
+ // Note nzc_exp is 64 times the average value expected at 4x4 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4);
+
+ default:
+ return 0;
+ }
+}
+
+int vp9_get_nzc_context_uv_sb32(VP9_COMMON *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block) {
+ // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy
+ // neighboring blocks are
+ int mis = cm->mode_info_stride;
+ int nzc_exp = 0;
+ const int base = block - (block & 15);
+ const int boff = (block & 15);
+ const int base_mb16 = base >> 2;
+ TX_SIZE txfm_size = cur->mbmi.txfm_size;
+ TX_SIZE txfm_size_uv;
+
+ assert(block >= 64 && block < 96);
+ if (txfm_size == TX_32X32)
+ txfm_size_uv = TX_16X16;
+ else
+ txfm_size_uv = txfm_size;
+
+ switch (txfm_size_uv) {
+ case TX_16X16:
+ // uv txfm_size 16x16
+ assert(block == 64 || block == 80);
+ nzc_exp =
+ get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row, mb_col - 1,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1,
+ base_mb16 + 3);
+ nzc_exp <<= 1;
+ // Note nzc_exp is 64 times the average value expected at 16x16 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16);
+ break;
+
+ case TX_8X8:
+ assert((block & 3) == 0);
+ if (boff < 8) {
+ int o = boff >> 2;
+ nzc_exp =
+ get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 3);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 8] << 5;
+ }
+ if ((boff & 7) == 0) {
+ int o = boff >> 3;
+ nzc_exp +=
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ base_mb16 + 3);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 4] << 5;
+ }
+ // Note nzc_exp is 64 times the average value expected at 8x8 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8);
+
+ case TX_4X4:
+ if (boff < 4) {
+ int o = boff >> 1;
+ int p = boff & 1;
+ nzc_exp = get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 2 + p);
+ } else {
+ nzc_exp = (cur->mbmi.nzcs[block - 4] << 6);
+ }
+ if ((boff & 3) == 0) {
+ int o = boff >> 3;
+ int p = (boff >> 2) & 1;
+ nzc_exp += get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ base_mb16 + 1 + 2 * p);
+ } else {
+ nzc_exp += (cur->mbmi.nzcs[block - 1] << 6);
+ }
+ nzc_exp >>= 1;
+ // Note nzc_exp is 64 times the average value expected at 4x4 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4);
+
+ default:
+ return 0;
+ }
+}
+
+int vp9_get_nzc_context_uv_mb16(VP9_COMMON *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block) {
+ // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy
+ // neighboring blocks are
+ int mis = cm->mode_info_stride;
+ int nzc_exp = 0;
+ const int base = block - (block & 3);
+ const int boff = (block & 3);
+ const int base_mb16 = base;
+ TX_SIZE txfm_size = cur->mbmi.txfm_size;
+ TX_SIZE txfm_size_uv;
+
+ assert(block >= 16 && block < 24);
+ if (txfm_size == TX_16X16)
+ txfm_size_uv = TX_8X8;
+ else if (txfm_size == TX_8X8 &&
+ (cur->mbmi.mode == I8X8_PRED || cur->mbmi.mode == SPLITMV))
+ txfm_size_uv = TX_4X4;
+ else
+ txfm_size_uv = txfm_size;
+
+ switch (txfm_size_uv) {
+ case TX_8X8:
+ assert((block & 3) == 0);
+ nzc_exp =
+ get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, base_mb16 + 3);
+ // Note nzc_exp is 64 times the average value expected at 8x8 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8);
+
+ case TX_4X4:
+ if (boff < 2) {
+ int p = boff & 1;
+ nzc_exp = get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col,
+ base_mb16 + 2 + p);
+ } else {
+ nzc_exp = (cur->mbmi.nzcs[block - 2] << 6);
+ }
+ if ((boff & 1) == 0) {
+ int p = (boff >> 1) & 1;
+ nzc_exp += get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1,
+ base_mb16 + 1 + 2 * p);
+ } else {
+ nzc_exp += (cur->mbmi.nzcs[block - 1] << 6);
+ }
+ nzc_exp >>= 1;
+ // Note nzc_exp is 64 times the average value expected at 4x4 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4);
+
+ default:
+ return 0;
+ }
+}
+
+int vp9_get_nzc_context(VP9_COMMON *cm, MACROBLOCKD *xd, int block) {
+ if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
+ assert(block < 384);
+ if (block < 256)
+ return vp9_get_nzc_context_y_sb64(cm, xd->mode_info_context,
+ get_mb_row(xd), get_mb_col(xd), block);
+ else
+ return vp9_get_nzc_context_uv_sb64(cm, xd->mode_info_context,
+ get_mb_row(xd), get_mb_col(xd), block);
+ } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) {
+ assert(block < 96);
+ if (block < 64)
+ return vp9_get_nzc_context_y_sb32(cm, xd->mode_info_context,
+ get_mb_row(xd), get_mb_col(xd), block);
+ else
+ return vp9_get_nzc_context_uv_sb32(cm, xd->mode_info_context,
+ get_mb_row(xd), get_mb_col(xd), block);
+ } else {
+ assert(block < 64);
+ if (block < 16)
+ return vp9_get_nzc_context_y_mb16(cm, xd->mode_info_context,
+ get_mb_row(xd), get_mb_col(xd), block);
+ else
+ return vp9_get_nzc_context_uv_mb16(cm, xd->mode_info_context,
+ get_mb_row(xd), get_mb_col(xd), block);
+ }
+}
+
+static void update_nzc(VP9_COMMON *cm,
+ uint16_t nzc,
+ int nzc_context,
+ TX_SIZE tx_size,
+ int ref,
+ int type) {
+ int e, c;
+ c = codenzc(nzc);
+ if (tx_size == TX_32X32)
+ cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++;
+ else if (tx_size == TX_16X16)
+ cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++;
+ else if (tx_size == TX_8X8)
+ cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++;
+ else if (tx_size == TX_4X4)
+ cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++;
+ else
+ assert(0);
+
+ if ((e = vp9_extranzcbits[c])) {
+ int x = nzc - vp9_basenzcvalue[c];
+ while (e--) {
+ int b = (x >> e) & 1;
+ cm->fc.nzc_pcat_counts[nzc_context][c - NZC_TOKENS_NOEXTRA][e][b]++;
+ }
+ }
+}
+
+static void update_nzcs_sb64(VP9_COMMON *cm,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col) {
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_32X32:
+ for (j = 0; j < 256; j += 64) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0);
+ }
+ for (j = 256; j < 384; j += 64) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 1);
+ }
+ break;
+
+ case TX_16X16:
+ for (j = 0; j < 256; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0);
+ }
+ for (j = 256; j < 384; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 256; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0);
+ }
+ for (j = 256; j < 384; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1);
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 256; ++j) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0);
+ }
+ for (j = 256; j < 384; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void update_nzcs_sb32(VP9_COMMON *cm,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col) {
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_32X32:
+ for (j = 0; j < 64; j += 64) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0);
+ }
+ for (j = 64; j < 96; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1);
+ }
+ break;
+
+ case TX_16X16:
+ for (j = 0; j < 64; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0);
+ }
+ for (j = 64; j < 96; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 64; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0);
+ }
+ for (j = 64; j < 96; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1);
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 64; ++j) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0);
+ }
+ for (j = 64; j < 96; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void update_nzcs_mb16(VP9_COMMON *cm,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col) {
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_16X16:
+ for (j = 0; j < 16; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0);
+ }
+ for (j = 16; j < 24; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 16; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0);
+ }
+ if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) {
+ for (j = 16; j < 24; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1);
+ }
+ } else {
+ for (j = 16; j < 24; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1);
+ }
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 16; ++j) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0);
+ }
+ for (j = 16; j < 24; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+void vp9_update_nzc_counts(VP9_COMMON *cm,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col) {
+ if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64)
+ update_nzcs_sb64(cm, xd, mb_row, mb_col);
+ else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32)
+ update_nzcs_sb32(cm, xd, mb_row, mb_col);
+ else
+ update_nzcs_mb16(cm, xd, mb_row, mb_col);
+}
+#endif // CONFIG_CODE_NONZEROCOUNT
+
// #define COEF_COUNT_TESTING
#define COEF_COUNT_SAT 24
@@ -277,10 +1475,10 @@
#define COEF_COUNT_SAT_AFTER_KEY 24
#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128
-static void update_coef_probs(vp9_coeff_probs *dst_coef_probs,
- vp9_coeff_probs *pre_coef_probs,
- int block_types, vp9_coeff_count *coef_counts,
- int count_sat, int update_factor) {
+static void adapt_coef_probs(vp9_coeff_probs *dst_coef_probs,
+ vp9_coeff_probs *pre_coef_probs,
+ int block_types, vp9_coeff_count *coef_counts,
+ int count_sat, int update_factor) {
int t, i, j, k, l, count;
unsigned int branch_ct[ENTROPY_NODES][2];
vp9_prob coef_probs[ENTROPY_NODES];
@@ -307,9 +1505,6 @@
}
void vp9_adapt_coef_probs(VP9_COMMON *cm) {
-#ifdef COEF_COUNT_TESTING
- int t, i, j, k;
-#endif
int count_sat;
int update_factor; /* denominator 256 */
@@ -325,16 +1520,143 @@
count_sat = COEF_COUNT_SAT;
}
- update_coef_probs(cm->fc.coef_probs_4x4, cm->fc.pre_coef_probs_4x4,
- BLOCK_TYPES, cm->fc.coef_counts_4x4,
- count_sat, update_factor);
- update_coef_probs(cm->fc.coef_probs_8x8, cm->fc.pre_coef_probs_8x8,
- BLOCK_TYPES, cm->fc.coef_counts_8x8,
- count_sat, update_factor);
- update_coef_probs(cm->fc.coef_probs_16x16, cm->fc.pre_coef_probs_16x16,
- BLOCK_TYPES, cm->fc.coef_counts_16x16,
- count_sat, update_factor);
- update_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32,
- BLOCK_TYPES, cm->fc.coef_counts_32x32,
- count_sat, update_factor);
+ adapt_coef_probs(cm->fc.coef_probs_4x4, cm->fc.pre_coef_probs_4x4,
+ BLOCK_TYPES, cm->fc.coef_counts_4x4,
+ count_sat, update_factor);
+ adapt_coef_probs(cm->fc.coef_probs_8x8, cm->fc.pre_coef_probs_8x8,
+ BLOCK_TYPES, cm->fc.coef_counts_8x8,
+ count_sat, update_factor);
+ adapt_coef_probs(cm->fc.coef_probs_16x16, cm->fc.pre_coef_probs_16x16,
+ BLOCK_TYPES, cm->fc.coef_counts_16x16,
+ count_sat, update_factor);
+ adapt_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32,
+ BLOCK_TYPES, cm->fc.coef_counts_32x32,
+ count_sat, update_factor);
}
+
+#if CONFIG_CODE_NONZEROCOUNT
+static void adapt_nzc_probs(VP9_COMMON *cm,
+ int block_size,
+ int count_sat,
+ int update_factor) {
+ int c, r, b, n;
+ int count, factor;
+ unsigned int nzc_branch_ct[NZC32X32_NODES][2];
+ vp9_prob nzc_probs[NZC32X32_NODES];
+ int tokens, nodes;
+ const vp9_tree_index *nzc_tree;
+ const struct vp9_token_struct *nzc_encodings;
+ vp9_prob *dst_nzc_probs;
+ vp9_prob *pre_nzc_probs;
+ unsigned int *nzc_counts;
+
+ if (block_size == 32) {
+ tokens = NZC32X32_TOKENS;
+ nzc_tree = vp9_nzc32x32_tree;
+ nzc_encodings = vp9_nzc32x32_encodings;
+ dst_nzc_probs = cm->fc.nzc_probs_32x32[0][0][0];
+ pre_nzc_probs = cm->fc.pre_nzc_probs_32x32[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_32x32[0][0][0];
+ } else if (block_size == 16) {
+ tokens = NZC16X16_TOKENS;
+ nzc_tree = vp9_nzc16x16_tree;
+ nzc_encodings = vp9_nzc16x16_encodings;
+ dst_nzc_probs = cm->fc.nzc_probs_16x16[0][0][0];
+ pre_nzc_probs = cm->fc.pre_nzc_probs_16x16[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_16x16[0][0][0];
+ } else if (block_size == 8) {
+ tokens = NZC8X8_TOKENS;
+ nzc_tree = vp9_nzc8x8_tree;
+ nzc_encodings = vp9_nzc8x8_encodings;
+ dst_nzc_probs = cm->fc.nzc_probs_8x8[0][0][0];
+ pre_nzc_probs = cm->fc.pre_nzc_probs_8x8[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_8x8[0][0][0];
+ } else {
+ nzc_tree = vp9_nzc4x4_tree;
+ nzc_encodings = vp9_nzc4x4_encodings;
+ tokens = NZC4X4_TOKENS;
+ dst_nzc_probs = cm->fc.nzc_probs_4x4[0][0][0];
+ pre_nzc_probs = cm->fc.pre_nzc_probs_4x4[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_4x4[0][0][0];
+ }
+ nodes = tokens - 1;
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c)
+ for (r = 0; r < REF_TYPES; ++r)
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b;
+ int offset_nodes = offset * nodes;
+ int offset_tokens = offset * tokens;
+ vp9_tree_probs_from_distribution(tokens,
+ nzc_encodings, nzc_tree,
+ nzc_probs, nzc_branch_ct,
+ nzc_counts + offset_tokens);
+ for (n = 0; n < nodes; ++n) {
+ count = nzc_branch_ct[n][0] + nzc_branch_ct[n][1];
+ count = count > count_sat ? count_sat : count;
+ factor = (update_factor * count / count_sat);
+ dst_nzc_probs[offset_nodes + n] =
+ weighted_prob(pre_nzc_probs[offset_nodes + n],
+ nzc_probs[n], factor);
+ }
+ }
+}
+
+static void adapt_nzc_pcat(VP9_COMMON *cm, int count_sat, int update_factor) {
+ int c, t;
+ int count, factor;
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA];
+ int b;
+ for (b = 0; b < bits; ++b) {
+ vp9_prob prob = get_binary_prob(cm->fc.nzc_pcat_counts[c][t][b][0],
+ cm->fc.nzc_pcat_counts[c][t][b][1]);
+ count = cm->fc.nzc_pcat_counts[c][t][b][0] +
+ cm->fc.nzc_pcat_counts[c][t][b][1];
+ count = count > count_sat ? count_sat : count;
+ factor = (update_factor * count / count_sat);
+ cm->fc.nzc_pcat_probs[c][t][b] = weighted_prob(
+ cm->fc.pre_nzc_pcat_probs[c][t][b], prob, factor);
+ }
+ }
+ }
+}
+
+// #define NZC_COUNT_TESTING
+void vp9_adapt_nzc_probs(VP9_COMMON *cm) {
+ int count_sat;
+ int update_factor; /* denominator 256 */
+#ifdef NZC_COUNT_TESTING
+ int c, r, b, t;
+ printf("\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c)
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ printf(" {");
+ for (t = 0; t < NZC4X4_TOKENS; ++t) {
+ printf(" %d,", cm->fc.nzc_counts_4x4[c][r][b][t]);
+ }
+ printf("}\n");
+ }
+ printf("\n");
+ }
+#endif
+
+ if (cm->frame_type == KEY_FRAME) {
+ update_factor = COEF_MAX_UPDATE_FACTOR_KEY;
+ count_sat = COEF_COUNT_SAT_KEY;
+ } else if (cm->last_frame_type == KEY_FRAME) {
+ update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */
+ count_sat = COEF_COUNT_SAT_AFTER_KEY;
+ } else {
+ update_factor = COEF_MAX_UPDATE_FACTOR;
+ count_sat = COEF_COUNT_SAT;
+ }
+
+ adapt_nzc_probs(cm, 4, count_sat, update_factor);
+ adapt_nzc_probs(cm, 8, count_sat, update_factor);
+ adapt_nzc_probs(cm, 16, count_sat, update_factor);
+ adapt_nzc_probs(cm, 32, count_sat, update_factor);
+ adapt_nzc_pcat(cm, count_sat, update_factor);
+}
+#endif // CONFIG_CODE_NONZEROCOUNT
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index 8d28b00..ceef1a7 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -142,4 +142,86 @@
}
extern int vp9_get_coef_context(int * recent_energy, int token);
+#if CONFIG_CODE_NONZEROCOUNT
+/* Alphabet for number of non-zero symbols in block */
+#define NZC_0 0 /* Used for all blocks */
+#define NZC_1 1 /* Used for all blocks */
+#define NZC_2 2 /* Used for all blocks */
+#define NZC_3TO4 3 /* Used for all blocks */
+#define NZC_5TO8 4 /* Used for all blocks */
+#define NZC_9TO16 5 /* Used for all blocks */
+#define NZC_17TO32 6 /* Used for 8x8 and larger blocks */
+#define NZC_33TO64 7 /* Used for 8x8 and larger blocks */
+#define NZC_65TO128 8 /* Used for 16x16 and larger blocks */
+#define NZC_129TO256 9 /* Used for 16x16 and larger blocks */
+#define NZC_257TO512 10 /* Used for 32x32 and larger blocks */
+#define NZC_513TO1024 11 /* Used for 32x32 and larger blocks */
+
+/* Number of tokens for each block size */
+#define NZC4X4_TOKENS 6
+#define NZC8X8_TOKENS 8
+#define NZC16X16_TOKENS 10
+#define NZC32X32_TOKENS 12
+
+/* Number of nodes for each block size */
+#define NZC4X4_NODES 5
+#define NZC8X8_NODES 7
+#define NZC16X16_NODES 9
+#define NZC32X32_NODES 11
+
+/* Max number of tokens with extra bits */
+#define NZC_TOKENS_EXTRA 9
+
+/* Max number of extra bits */
+#define NZC_BITS_EXTRA 9
+
+/* Tokens without extra bits */
+#define NZC_TOKENS_NOEXTRA (NZC32X32_TOKENS - NZC_TOKENS_EXTRA)
+
+#define MAX_NZC_CONTEXTS 3
+
+/* whether to update extra bit probabilities */
+#define NZC_PCAT_UPDATE
+
+/* nzc trees */
+extern const vp9_tree_index vp9_nzc4x4_tree[];
+extern const vp9_tree_index vp9_nzc8x8_tree[];
+extern const vp9_tree_index vp9_nzc16x16_tree[];
+extern const vp9_tree_index vp9_nzc32x32_tree[];
+
+/* nzc encodings */
+extern struct vp9_token_struct vp9_nzc4x4_encodings[NZC4X4_TOKENS];
+extern struct vp9_token_struct vp9_nzc8x8_encodings[NZC8X8_TOKENS];
+extern struct vp9_token_struct vp9_nzc16x16_encodings[NZC16X16_TOKENS];
+extern struct vp9_token_struct vp9_nzc32x32_encodings[NZC32X32_TOKENS];
+
+#define codenzc(x) (\
+ (x) <= 3 ? (x) : (x) <= 4 ? 3 : (x) <= 8 ? 4 : \
+ (x) <= 16 ? 5 : (x) <= 32 ? 6 : (x) <= 64 ? 7 :\
+ (x) <= 128 ? 8 : (x) <= 256 ? 9 : (x) <= 512 ? 10 : 11)
+
+int vp9_get_nzc_context_y_sb64(struct VP9Common *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block);
+int vp9_get_nzc_context_y_sb32(struct VP9Common *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block);
+int vp9_get_nzc_context_y_mb16(struct VP9Common *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block);
+int vp9_get_nzc_context_uv_sb64(struct VP9Common *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block);
+int vp9_get_nzc_context_uv_sb32(struct VP9Common *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block);
+int vp9_get_nzc_context_uv_mb16(struct VP9Common *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block);
+int vp9_get_nzc_context(struct VP9Common *cm, MACROBLOCKD *xd, int block);
+void vp9_update_nzc_counts(struct VP9Common *cm, MACROBLOCKD *xd,
+ int mb_row, int mb_col);
+void vp9_adapt_nzc_probs(struct VP9Common *cm);
+
+/* Extra bits array */
+extern const int vp9_extranzcbits[NZC32X32_TOKENS];
+
+/* Base nzc values */
+extern const int vp9_basenzcvalue[NZC32X32_TOKENS];
+
+#endif // CONFIG_CODE_NONZEROCOUNT
#endif // VP9_COMMON_VP9_ENTROPY_H_
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index 56cebff..9a7be45 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -42,7 +42,9 @@
-MV_CLASS_2, -MV_CLASS_3,
10, 12,
-MV_CLASS_4, -MV_CLASS_5,
+ 14, 16,
-MV_CLASS_6, -MV_CLASS_7,
+ -MV_CLASS_8, -MV_CLASS_9,
};
struct vp9_token_struct vp9_mv_class_encodings[MV_CLASSES];
@@ -63,9 +65,9 @@
{
{ /* vert component */
128, /* sign */
- {224, 144, 192, 168, 192, 176, 192}, /* class */
+ {224, 144, 192, 168, 192, 176, 192, 198, 198}, /* class */
{216}, /* class0 */
- {136, 140, 148, 160, 176, 192, 224}, /* bits */
+ {136, 140, 148, 160, 176, 192, 224, 234, 234}, /* bits */
{{128, 128, 64}, {96, 112, 64}}, /* class0_fp */
{64, 96, 64}, /* fp */
160, /* class0_hp bit */
@@ -73,9 +75,9 @@
},
{ /* hor component */
128, /* sign */
- {216, 128, 176, 160, 176, 176, 192}, /* class */
+ {216, 128, 176, 160, 176, 176, 192, 198, 198}, /* class */
{208}, /* class0 */
- {136, 140, 148, 160, 176, 192, 224}, /* bits */
+ {136, 140, 148, 160, 176, 192, 224, 234, 234}, /* bits */
{{128, 128, 64}, {96, 112, 64}}, /* class0_fp */
{64, 96, 64}, /* fp */
160, /* class0_hp bit */
@@ -103,6 +105,8 @@
else if (z < CLASS0_SIZE * 256) c = MV_CLASS_5;
else if (z < CLASS0_SIZE * 512) c = MV_CLASS_6;
else if (z < CLASS0_SIZE * 1024) c = MV_CLASS_7;
+ else if (z < CLASS0_SIZE * 2048) c = MV_CLASS_8;
+ else if (z < CLASS0_SIZE * 4096) c = MV_CLASS_9;
else assert(0);
if (offset)
*offset = z - mv_class_base(c);
@@ -134,6 +138,7 @@
int incr,
int usehp) {
int s, z, c, o, d, e, f;
+ if (!incr) return;
assert (v != 0); /* should not be zero */
s = v < 0;
mvcomp->sign[s] += incr;
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h
index f5cfee9..3350006 100644
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -49,7 +49,7 @@
extern struct vp9_token_struct vp9_mv_joint_encodings [MV_JOINTS];
/* Symbols for coding magnitude class of nonzero components */
-#define MV_CLASSES 8
+#define MV_CLASSES 10
typedef enum {
MV_CLASS_0 = 0, /* (0, 2] integer pel */
MV_CLASS_1 = 1, /* (2, 4] integer pel */
@@ -59,6 +59,8 @@
MV_CLASS_5 = 5, /* (32, 64] integer pel */
MV_CLASS_6 = 6, /* (64, 128] integer pel */
MV_CLASS_7 = 7, /* (128, 256] integer pel */
+ MV_CLASS_8 = 8, /* (256, 512] integer pel */
+ MV_CLASS_9 = 9, /* (512, 1024] integer pel */
} MV_CLASS_TYPE;
extern const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2];
diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c
index 2f709bf..8409885 100644
--- a/vp9/common/vp9_findnearmv.c
+++ b/vp9/common/vp9_findnearmv.c
@@ -9,10 +9,11 @@
*/
+#include <limits.h>
+
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_sadmxn.h"
#include "vp9/common/vp9_subpelvar.h"
-#include <limits.h>
const uint8_t vp9_mbsplit_offset[4][16] = {
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
@@ -32,8 +33,7 @@
}
vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc,
- vp9_prob p[4], const int context
- ) {
+ vp9_prob p[4], const int context) {
p[0] = pc->fc.vp9_mode_contexts[context][0];
p[1] = pc->fc.vp9_mode_contexts[context][1];
p[2] = pc->fc.vp9_mode_contexts[context][2];
diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h
index c42aab1..6887b04 100644
--- a/vp9/common/vp9_findnearmv.h
+++ b/vp9/common/vp9_findnearmv.h
@@ -17,6 +17,9 @@
#include "vp9/common/vp9_treecoder.h"
#include "vp9/common/vp9_onyxc_int.h"
+#define LEFT_TOP_MARGIN (16 << 3)
+#define RIGHT_BOTTOM_MARGIN (16 << 3)
+
/* check a list of motion vectors by sad score using a number rows of pixels
* above and a number cols of pixels in the left to select the one with best
* score to use as ref motion vector
@@ -30,8 +33,7 @@
static void mv_bias(int refmb_ref_frame_sign_bias, int refframe,
int_mv *mvp, const int *ref_frame_sign_bias) {
- MV xmv;
- xmv = mvp->as_mv;
+ MV xmv = mvp->as_mv;
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe]) {
xmv.row *= -1;
@@ -41,8 +43,6 @@
mvp->as_mv = xmv;
}
-#define LEFT_TOP_MARGIN (16 << 3)
-#define RIGHT_BOTTOM_MARGIN (16 << 3)
static void clamp_mv(int_mv *mv,
int mb_to_left_edge,
@@ -72,10 +72,10 @@
int mb_to_right_edge,
int mb_to_top_edge,
int mb_to_bottom_edge) {
- return (mv->as_mv.col < mb_to_left_edge) ||
- (mv->as_mv.col > mb_to_right_edge) ||
- (mv->as_mv.row < mb_to_top_edge) ||
- (mv->as_mv.row > mb_to_bottom_edge);
+ return mv->as_mv.col < mb_to_left_edge ||
+ mv->as_mv.col > mb_to_right_edge ||
+ mv->as_mv.row < mb_to_top_edge ||
+ mv->as_mv.row > mb_to_bottom_edge;
}
vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc,
@@ -90,11 +90,12 @@
if (!xd->left_available)
return 0;
- /* On L edge, get from MB to left of us */
+ // On L edge, get from MB to left of us
--cur_mb;
if (cur_mb->mbmi.mode != SPLITMV)
return cur_mb->mbmi.mv[0].as_int;
+
b += 4;
}
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index bc79b5c..9e55adf 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -15,17 +15,12 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
-
-#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))
-
-/* If we don't want to use ROUND_POWER_OF_TWO macro
-static INLINE int16_t round_power_of_two(int16_t value, int n) {
- return (value + (1 << (n - 1))) >> n;
-}*/
+#include "vp9/common/vp9_common.h"
// Constants and Macros used by all idct/dct functions
#define DCT_CONST_BITS 14
#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1))
+
// Constants are round(16384 * cos(k*Pi/64)) where k = 1 to 31.
// Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
static const int cospi_1_64 = 16364;
@@ -67,13 +62,13 @@
static const int sinpi_4_9 = 15212;
static INLINE int dct_const_round_shift(int input) {
- int rv = (input + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;
+ int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
assert(INT16_MIN <= rv && rv <= INT16_MAX);
return rv;
}
static INLINE int dct_32_round(int input) {
- int rv = (input + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;
+ int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
assert(-131072 <= rv && rv <= 131071);
return rv;
}
diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c
index 54b79ee..e210625 100644
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -26,6 +26,7 @@
#include <math.h>
#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_common.h"
@@ -109,7 +110,7 @@
}
}
-static void idct4_1d(int16_t *input, int16_t *output) {
+void vp9_idct4_1d_c(int16_t *input, int16_t *output) {
int16_t step[4];
int temp1, temp2;
// stage 1
@@ -140,7 +141,7 @@
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = input[j];
- idct4_1d(temp_in, outptr);
+ vp9_idct4_1d(temp_in, outptr);
input += 4;
outptr += 4;
}
@@ -149,7 +150,7 @@
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = out[j * 4 + i];
- idct4_1d(temp_in, temp_out);
+ vp9_idct4_1d(temp_in, temp_out);
for (j = 0; j < 4; ++j)
output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4);
}
@@ -205,7 +206,7 @@
step1[6] = dct_const_round_shift(temp2);
// stage 2 & stage 3 - even half
- idct4_1d(step1, step1);
+ vp9_idct4_1d(step1, step1);
// stage 2 - odd half
step2[4] = step1[4] + step1[5];
@@ -298,24 +299,23 @@
output[3] = dct_const_round_shift(s3);
}
-static const transform_2d IHT_4[] = {
- { idct4_1d, idct4_1d }, // DCT_DCT = 0
- { iadst4_1d, idct4_1d }, // ADST_DCT = 1
- { idct4_1d, iadst4_1d }, // DCT_ADST = 2
- { iadst4_1d, iadst4_1d } // ADST_ADST = 3
-};
-
void vp9_short_iht4x4_c(int16_t *input, int16_t *output,
- int pitch, TX_TYPE tx_type) {
+ int pitch, int tx_type) {
+ const transform_2d IHT_4[] = {
+ { vp9_idct4_1d, vp9_idct4_1d }, // DCT_DCT = 0
+ { iadst4_1d, vp9_idct4_1d }, // ADST_DCT = 1
+ { vp9_idct4_1d, iadst4_1d }, // DCT_ADST = 2
+ { iadst4_1d, iadst4_1d } // ADST_ADST = 3
+ };
+
int i, j;
int16_t out[4 * 4];
int16_t *outptr = out;
int16_t temp_in[4], temp_out[4];
- const transform_2d ht = IHT_4[tx_type];
// inverse transform row vectors
for (i = 0; i < 4; ++i) {
- ht.rows(input, outptr);
+ IHT_4[tx_type].rows(input, outptr);
input += 4;
outptr += 4;
}
@@ -324,7 +324,7 @@
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = out[j * 4 + i];
- ht.cols(temp_in, temp_out);
+ IHT_4[tx_type].cols(temp_in, temp_out);
for (j = 0; j < 4; ++j)
output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4);
}
@@ -415,7 +415,7 @@
};
void vp9_short_iht8x8_c(int16_t *input, int16_t *output,
- int pitch, TX_TYPE tx_type) {
+ int pitch, int tx_type) {
int i, j;
int16_t out[8 * 8];
int16_t *outptr = out;
@@ -838,7 +838,7 @@
};
void vp9_short_iht16x16_c(int16_t *input, int16_t *output,
- int pitch, TX_TYPE tx_type) {
+ int pitch, int tx_type) {
int i, j;
int16_t out[16 * 16];
int16_t *outptr = out;
diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c
index a26415f..a03a66e 100644
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -24,7 +24,7 @@
int i;
for (i = 0; i < 16; i++) {
- TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
+ TX_TYPE tx_type = get_tx_type_4x4(xd, i);
if (tx_type != DCT_DCT) {
vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
} else {
@@ -58,7 +58,7 @@
BLOCKD *blockd = xd->block;
for (i = 0; i < 9; i += 8) {
- TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
+ TX_TYPE tx_type = get_tx_type_8x8(xd, i);
if (tx_type != DCT_DCT) {
vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
} else {
@@ -67,7 +67,7 @@
}
}
for (i = 2; i < 11; i += 8) {
- TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
+ TX_TYPE tx_type = get_tx_type_8x8(xd, i);
if (tx_type != DCT_DCT) {
vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff,
16, tx_type);
@@ -100,7 +100,7 @@
void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) {
BLOCKD *bd = &xd->block[0];
- TX_TYPE tx_type = get_tx_type_16x16(xd, bd);
+ TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
if (tx_type != DCT_DCT) {
vp9_short_iht16x16(bd->dqcoeff, bd->diff, 16, tx_type);
} else {
@@ -123,9 +123,16 @@
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
+ const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4);
- vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
- xd->diff + x_idx * 16 + y_idx * 32 * 16, 64);
+ if (tx_type == DCT_DCT) {
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
+ xd->diff + x_idx * 16 + y_idx * 32 * 16,
+ 64);
+ } else {
+ vp9_short_iht16x16(xd->dqcoeff + n * 256,
+ xd->diff + x_idx * 16 + y_idx * 32 * 16, 32, tx_type);
+ }
}
}
@@ -134,9 +141,15 @@
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
- vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
- xd->diff + x_idx * 8 + y_idx * 32 * 8, 64);
+ if (tx_type == DCT_DCT) {
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
+ xd->diff + x_idx * 8 + y_idx * 32 * 8, 64);
+ } else {
+ vp9_short_iht8x8(xd->dqcoeff + n * 64,
+ xd->diff + x_idx * 8 + y_idx * 32 * 8, 32, tx_type);
+ }
}
}
@@ -145,9 +158,15 @@
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
- vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
- xd->diff + x_idx * 4 + y_idx * 4 * 32, 64);
+ if (tx_type == DCT_DCT) {
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
+ xd->diff + x_idx * 4 + y_idx * 4 * 32, 64);
+ } else {
+ vp9_short_iht4x4(xd->dqcoeff + n * 16,
+ xd->diff + x_idx * 4 + y_idx * 4 * 32, 32, tx_type);
+ }
}
}
@@ -206,9 +225,16 @@
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
+ const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4);
- vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
- xd->diff + x_idx * 16 + y_idx * 64 * 16, 128);
+ if (tx_type == DCT_DCT) {
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
+ xd->diff + x_idx * 16 + y_idx * 64 * 16,
+ 128);
+ } else {
+ vp9_short_iht16x16(xd->dqcoeff + n * 256,
+ xd->diff + x_idx * 16 + y_idx * 64 * 16, 64, tx_type);
+ }
}
}
@@ -217,9 +243,15 @@
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2);
- vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
- xd->diff + x_idx * 8 + y_idx * 64 * 8, 128);
+ if (tx_type == DCT_DCT) {
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
+ xd->diff + x_idx * 8 + y_idx * 64 * 8, 128);
+ } else {
+ vp9_short_iht8x8(xd->dqcoeff + n * 64,
+ xd->diff + x_idx * 8 + y_idx * 64 * 8, 64, tx_type);
+ }
}
}
@@ -228,9 +260,15 @@
for (n = 0; n < 256; n++) {
const int x_idx = n & 15, y_idx = n >> 4;
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx);
- vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
- xd->diff + x_idx * 4 + y_idx * 4 * 64, 128);
+ if (tx_type == DCT_DCT) {
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
+ xd->diff + x_idx * 4 + y_idx * 4 * 64, 128);
+ } else {
+ vp9_short_iht4x4(xd->dqcoeff + n * 16,
+ xd->diff + x_idx * 4 + y_idx * 4 * 64, 64, tx_type);
+ }
}
}
diff --git a/vp9/common/vp9_maskingmv.c b/vp9/common/vp9_maskingmv.c
index f1151e3..326201b 100644
--- a/vp9/common/vp9_maskingmv.c
+++ b/vp9/common/vp9_maskingmv.c
@@ -11,25 +11,19 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-extern unsigned int vp9_sad16x16_sse3(
+
+unsigned int vp9_sad16x16_sse3(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride,
int max_err);
-extern void vp9_sad16x16x3_sse3(
- unsigned char *src_ptr,
- int src_stride,
- unsigned char *ref_ptr,
- int ref_stride,
- int *results);
-
-extern int vp8_growmaskmb_sse3(
+int vp8_growmaskmb_sse3(
unsigned char *om,
unsigned char *nm);
-extern void vp8_makemask_sse3(
+void vp8_makemask_sse3(
unsigned char *y,
unsigned char *u,
unsigned char *v,
@@ -238,6 +232,7 @@
for (i = 0; i < 256; i++)
ym[i] = nym[i];
}
+
void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v,
unsigned char *ym, unsigned char *uvm,
int yp, int uvp,
@@ -283,6 +278,7 @@
return sad;
}
+
int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
unsigned char *ym) {
int i, j;
@@ -294,6 +290,7 @@
return sad;
}
+
int masked_motion_search(unsigned char *y, unsigned char *u, unsigned char *v,
int yp, int uvp,
unsigned char *dy, unsigned char *du, unsigned char *dv,
@@ -802,5 +799,5 @@
}
fclose(f);
fclose(g);
- return;
+ return 0;
}
diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c
index ed96292..8d99335 100644
--- a/vp9/common/vp9_mbpitch.c
+++ b/vp9/common/vp9_mbpitch.c
@@ -20,15 +20,15 @@
int mv_stride,
uint8_t **base,
uint8_t **base2,
- int Stride,
+ int stride,
int offset,
BLOCKSET bs) {
if (bs == DEST) {
- b->dst_stride = Stride;
+ b->dst_stride = stride;
b->dst = offset;
b->base_dst = base;
} else {
- b->pre_stride = Stride;
+ b->pre_stride = stride;
b->pre = offset;
b->base_pre = base;
b->base_second_pre = base2;
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index 25aa53b..2f322a3 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -11,23 +11,27 @@
#include "vp9/common/vp9_mvref_common.h"
#define MVREF_NEIGHBOURS 8
+
static int mb_mv_ref_search[MVREF_NEIGHBOURS][2] = {
{0, -1}, {-1, 0}, {-1, -1}, {0, -2},
{-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}
};
+
static int mb_ref_distance_weight[MVREF_NEIGHBOURS] =
{ 3, 3, 2, 1, 1, 1, 1, 1 };
+
static int sb_mv_ref_search[MVREF_NEIGHBOURS][2] = {
{0, -1}, {-1, 0}, {1, -1}, {-1, 1},
{-1, -1}, {0, -2}, {-2, 0}, {-1, -2}
};
+
static int sb_ref_distance_weight[MVREF_NEIGHBOURS] =
{ 3, 3, 2, 2, 2, 1, 1, 1 };
// clamp_mv_ref
#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
-static void clamp_mv_ref(const MACROBLOCKD *xd, int_mv *mv) {
+static void clamp_mv_ref(const MACROBLOCKD *xd, int_mv *mv) {
if (mv->as_mv.col < (xd->mb_to_left_edge - MV_BORDER))
mv->as_mv.col = xd->mb_to_left_edge - MV_BORDER;
else if (mv->as_mv.col > xd->mb_to_right_edge + MV_BORDER)
@@ -41,11 +45,9 @@
// Gets a candidate refenence motion vector from the given mode info
// structure if one exists that matches the given reference frame.
-static int get_matching_candidate(
- const MODE_INFO *candidate_mi,
+static int get_matching_candidate(const MODE_INFO *candidate_mi,
MV_REFERENCE_FRAME ref_frame,
- int_mv *c_mv
-) {
+ int_mv *c_mv) {
int ret_val = TRUE;
if (ref_frame == candidate_mi->mbmi.ref_frame) {
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 48d19a3..c8369eb 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -58,10 +58,23 @@
vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1];
vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1];
+
vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES];
vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES];
vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES];
vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES];
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_prob nzc_probs_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC4X4_NODES];
+ vp9_prob nzc_probs_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC8X8_NODES];
+ vp9_prob nzc_probs_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC16X16_NODES];
+ vp9_prob nzc_probs_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC32X32_NODES];
+ vp9_prob nzc_pcat_probs[MAX_NZC_CONTEXTS]
+ [NZC_TOKENS_EXTRA][NZC_BITS_EXTRA];
+#endif
nmv_context nmvc;
nmv_context pre_nmvc;
@@ -84,11 +97,35 @@
vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES];
vp9_coeff_probs pre_coef_probs_16x16[BLOCK_TYPES];
vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES];
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_prob pre_nzc_probs_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC4X4_NODES];
+ vp9_prob pre_nzc_probs_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC8X8_NODES];
+ vp9_prob pre_nzc_probs_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC16X16_NODES];
+ vp9_prob pre_nzc_probs_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC32X32_NODES];
+ vp9_prob pre_nzc_pcat_probs[MAX_NZC_CONTEXTS]
+ [NZC_TOKENS_EXTRA][NZC_BITS_EXTRA];
+#endif
vp9_coeff_count coef_counts_4x4[BLOCK_TYPES];
vp9_coeff_count coef_counts_8x8[BLOCK_TYPES];
vp9_coeff_count coef_counts_16x16[BLOCK_TYPES];
vp9_coeff_count coef_counts_32x32[BLOCK_TYPES];
+#if CONFIG_CODE_NONZEROCOUNT
+ unsigned int nzc_counts_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC4X4_TOKENS];
+ unsigned int nzc_counts_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC8X8_TOKENS];
+ unsigned int nzc_counts_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC16X16_TOKENS];
+ unsigned int nzc_counts_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC32X32_TOKENS];
+ unsigned int nzc_pcat_counts[MAX_NZC_CONTEXTS]
+ [NZC_TOKENS_EXTRA][NZC_BITS_EXTRA][2];
+#endif
nmv_context_counts NMVcount;
vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
@@ -300,4 +337,31 @@
buf[new_idx]++;
}
+// TODO(debargha): merge the two functions
+static void set_mb_row(VP9_COMMON *cm, MACROBLOCKD *xd,
+ int mb_row, int block_size) {
+ xd->mb_to_top_edge = -((mb_row * 16) << 3);
+ xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3;
+
+ // Are edges available for intra prediction?
+ xd->up_available = (mb_row != 0);
+}
+
+static void set_mb_col(VP9_COMMON *cm, MACROBLOCKD *xd,
+ int mb_col, int block_size) {
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+ xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3;
+
+ // Are edges available for intra prediction?
+ xd->left_available = (mb_col > cm->cur_tile_mb_col_start);
+ xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end);
+}
+
+static int get_mb_row(const MACROBLOCKD *xd) {
+ return ((-xd->mb_to_top_edge) >> 7);
+}
+
+static int get_mb_col(const MACROBLOCKD *xd) {
+ return ((-xd->mb_to_left_edge) >> 7);
+}
#endif // VP9_COMMON_VP9_ONYXC_INT_H_
diff --git a/vp9/common/vp9_postproc.h b/vp9/common/vp9_postproc.h
index 11f55ab..0a637f0 100644
--- a/vp9/common/vp9_postproc.h
+++ b/vp9/common/vp9_postproc.h
@@ -13,30 +13,26 @@
#define VP9_COMMON_VP9_POSTPROC_H_
#include "vpx_ports/mem.h"
+
struct postproc_state {
- int last_q;
- int last_noise;
- char noise[3072];
+ int last_q;
+ int last_noise;
+ char noise[3072];
DECLARE_ALIGNED(16, char, blackclamp[16]);
DECLARE_ALIGNED(16, char, whiteclamp[16]);
DECLARE_ALIGNED(16, char, bothclamp[16]);
};
+
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_ppflags.h"
+
int vp9_post_proc_frame(struct VP9Common *oci, YV12_BUFFER_CONFIG *dest,
vp9_ppflags_t *flags);
+void vp9_de_noise(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post,
+ int q, int low_var_thresh, int flag);
-void vp9_de_noise(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *post,
- int q,
- int low_var_thresh,
- int flag);
-
-void vp9_deblock(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *post,
- int q,
- int low_var_thresh,
- int flag);
+void vp9_deblock(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post,
+ int q, int low_var_thresh, int flag);
#endif // VP9_COMMON_VP9_POSTPROC_H_
diff --git a/vp9/common/vp9_pragmas.h b/vp9/common/vp9_pragmas.h
index cbeaf53..f079161 100644
--- a/vp9/common/vp9_pragmas.h
+++ b/vp9/common/vp9_pragmas.h
@@ -14,6 +14,7 @@
#ifdef __INTEL_COMPILER
#pragma warning(disable:997 1011 170)
#endif
+
#ifdef _MSC_VER
#pragma warning(disable:4799)
#endif
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index 41a4e00..9fe66fc 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -34,7 +34,6 @@
pred_context += (m - 1)->mbmi.seg_id_predicted;
break;
-
case PRED_REF:
pred_context = (m - cm->mode_info_stride)->mbmi.ref_predicted;
if (xd->left_available)
@@ -101,8 +100,7 @@
break;
default:
- // TODO *** add error trap code.
- pred_context = 0;
+ pred_context = 0; // *** add error trap code.
break;
}
@@ -114,39 +112,23 @@
vp9_prob vp9_get_pred_prob(const VP9_COMMON *const cm,
const MACROBLOCKD *const xd,
PRED_ID pred_id) {
- vp9_prob pred_probability;
- int pred_context;
-
- // Get the appropriate prediction context
- pred_context = vp9_get_pred_context(cm, xd, pred_id);
+ const int pred_context = vp9_get_pred_context(cm, xd, pred_id);
switch (pred_id) {
case PRED_SEG_ID:
- pred_probability = cm->segment_pred_probs[pred_context];
- break;
-
+ return cm->segment_pred_probs[pred_context];
case PRED_REF:
- pred_probability = cm->ref_pred_probs[pred_context];
- break;
-
+ return cm->ref_pred_probs[pred_context];
case PRED_COMP:
// In keeping with convention elsewhre the probability returned is
// the probability of a "0" outcome which in this case means the
// probability of comp pred off.
- pred_probability = cm->prob_comppred[pred_context];
- break;
-
+ return cm->prob_comppred[pred_context];
case PRED_MBSKIP:
- pred_probability = cm->mbskip_pred_probs[pred_context];
- break;
-
+ return cm->mbskip_pred_probs[pred_context];
default:
- // TODO *** add error trap code.
- pred_probability = 128;
- break;
+ return 128; // *** add error trap code.
}
-
- return pred_probability;
}
// This function returns a context probability ptr for coding a given
@@ -154,71 +136,41 @@
const vp9_prob *vp9_get_pred_probs(const VP9_COMMON *const cm,
const MACROBLOCKD *const xd,
PRED_ID pred_id) {
- const vp9_prob *pred_probability;
- int pred_context;
-
- // Get the appropriate prediction context
- pred_context = vp9_get_pred_context(cm, xd, pred_id);
+ const int pred_context = vp9_get_pred_context(cm, xd, pred_id);
switch (pred_id) {
case PRED_SEG_ID:
- pred_probability = &cm->segment_pred_probs[pred_context];
- break;
-
+ return &cm->segment_pred_probs[pred_context];
case PRED_REF:
- pred_probability = &cm->ref_pred_probs[pred_context];
- break;
-
+ return &cm->ref_pred_probs[pred_context];
case PRED_COMP:
// In keeping with convention elsewhre the probability returned is
// the probability of a "0" outcome which in this case means the
// probability of comp pred off.
- pred_probability = &cm->prob_comppred[pred_context];
- break;
-
+ return &cm->prob_comppred[pred_context];
case PRED_MBSKIP:
- pred_probability = &cm->mbskip_pred_probs[pred_context];
- break;
-
+ return &cm->mbskip_pred_probs[pred_context];
case PRED_SWITCHABLE_INTERP:
- pred_probability = &cm->fc.switchable_interp_prob[pred_context][0];
- break;
-
+ return &cm->fc.switchable_interp_prob[pred_context][0];
default:
- // TODO *** add error trap code.
- pred_probability = NULL;
- break;
+ return NULL; // *** add error trap code.
}
-
- return pred_probability;
}
// This function returns the status of the given prediction signal.
// I.e. is the predicted value for the given signal correct.
unsigned char vp9_get_pred_flag(const MACROBLOCKD *const xd,
PRED_ID pred_id) {
- unsigned char pred_flag = 0;
-
switch (pred_id) {
case PRED_SEG_ID:
- pred_flag = xd->mode_info_context->mbmi.seg_id_predicted;
- break;
-
+ return xd->mode_info_context->mbmi.seg_id_predicted;
case PRED_REF:
- pred_flag = xd->mode_info_context->mbmi.ref_predicted;
- break;
-
+ return xd->mode_info_context->mbmi.ref_predicted;
case PRED_MBSKIP:
- pred_flag = xd->mode_info_context->mbmi.mb_skip_coeff;
- break;
-
+ return xd->mode_info_context->mbmi.mb_skip_coeff;
default:
- // TODO *** add error trap code.
- pred_flag = 0;
- break;
+ return 0; // *** add error trap code.
}
-
- return pred_flag;
}
// This function sets the status of the given prediction signal.
@@ -280,7 +232,7 @@
break;
default:
- // TODO *** add error trap code.
+ // *** add error trap code.
break;
}
}
@@ -325,7 +277,6 @@
MV_REFERENCE_FRAME pred_ref = LAST_FRAME;
int segment_id = xd->mode_info_context->mbmi.segment_id;
- int seg_ref_active;
int i;
unsigned char frame_allowed[MAX_REF_FRAMES] = {1, 1, 1, 1};
@@ -336,7 +287,7 @@
unsigned char above_left_in_image;
// Is segment coding ennabled
- seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME);
+ int seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME);
// Special case treatment if segment coding is enabled.
// Dont allow prediction of a reference frame that the segment
@@ -389,9 +340,7 @@
// Functions to computes a set of modified reference frame probabilities
// to use when the prediction of the reference frame value fails
void vp9_calc_ref_probs(int *count, vp9_prob *probs) {
- int tot_count;
-
- tot_count = count[0] + count[1] + count[2] + count[3];
+ int tot_count = count[0] + count[1] + count[2] + count[3];
probs[0] = get_prob(count[0], tot_count);
tot_count -= count[0];
@@ -407,19 +356,12 @@
// they are not allowed for a given segment.
void vp9_compute_mod_refprobs(VP9_COMMON *const cm) {
int norm_cnt[MAX_REF_FRAMES];
- int intra_count;
- int inter_count;
- int last_count;
- int gfarf_count;
- int gf_count;
- int arf_count;
-
- intra_count = cm->prob_intra_coded;
- inter_count = (255 - intra_count);
- last_count = (inter_count * cm->prob_last_coded) / 255;
- gfarf_count = inter_count - last_count;
- gf_count = (gfarf_count * cm->prob_gf_coded) / 255;
- arf_count = gfarf_count - gf_count;
+ const int intra_count = cm->prob_intra_coded;
+ const int inter_count = (255 - intra_count);
+ const int last_count = (inter_count * cm->prob_last_coded) / 255;
+ const int gfarf_count = inter_count - last_count;
+ const int gf_count = (gfarf_count * cm->prob_gf_coded) / 255;
+ const int arf_count = gfarf_count - gf_count;
// Work out modified reference frame probabilities to use where prediction
// of the reference frame fails
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 52c4d42..49dcf0a 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -8,48 +8,48 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/common/vp9_blockd.h"
-
#ifndef VP9_COMMON_VP9_PRED_COMMON_H_
#define VP9_COMMON_VP9_PRED_COMMON_H_
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_onyxc_int.h"
// Predicted items
typedef enum {
- PRED_SEG_ID = 0, // Segment identifier
+ PRED_SEG_ID = 0, // Segment identifier
PRED_REF = 1,
PRED_COMP = 2,
PRED_MBSKIP = 3,
PRED_SWITCHABLE_INTERP = 4
} PRED_ID;
-extern unsigned char vp9_get_pred_context(const VP9_COMMON *const cm,
- const MACROBLOCKD *const xd,
- PRED_ID pred_id);
+unsigned char vp9_get_pred_context(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ PRED_ID pred_id);
-extern vp9_prob vp9_get_pred_prob(const VP9_COMMON *const cm,
- const MACROBLOCKD *const xd,
- PRED_ID pred_id);
+vp9_prob vp9_get_pred_prob(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ PRED_ID pred_id);
-extern const vp9_prob *vp9_get_pred_probs(const VP9_COMMON *const cm,
- const MACROBLOCKD *const xd,
- PRED_ID pred_id);
+const vp9_prob *vp9_get_pred_probs(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ PRED_ID pred_id);
-extern unsigned char vp9_get_pred_flag(const MACROBLOCKD *const xd,
- PRED_ID pred_id);
+unsigned char vp9_get_pred_flag(const MACROBLOCKD *const xd,
+ PRED_ID pred_id);
-extern void vp9_set_pred_flag(MACROBLOCKD *const xd,
- PRED_ID pred_id,
- unsigned char pred_flag);
+void vp9_set_pred_flag(MACROBLOCKD *const xd,
+ PRED_ID pred_id,
+ unsigned char pred_flag);
-extern unsigned char vp9_get_pred_mb_segid(const VP9_COMMON *const cm,
- const MACROBLOCKD *const xd,
- int MbIndex);
+unsigned char vp9_get_pred_mb_segid(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ int MbIndex);
-extern MV_REFERENCE_FRAME vp9_get_pred_ref(const VP9_COMMON *const cm,
- const MACROBLOCKD *const xd);
-extern void vp9_compute_mod_refprobs(VP9_COMMON *const cm);
+MV_REFERENCE_FRAME vp9_get_pred_ref(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd);
+
+void vp9_compute_mod_refprobs(VP9_COMMON *const cm);
#endif // VP9_COMMON_VP9_PRED_COMMON_H_
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index eb8de21..186532c 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -9,20 +9,19 @@
*/
#include <stdio.h>
+
#include "./vpx_config.h"
#include "vp9_rtcd.h"
#include "vp9/common/vp9_reconintra.h"
#include "vpx_mem/vpx_mem.h"
-/* For skip_recon_mb(), add vp9_build_intra_predictors_mby_s(MACROBLOCKD *xd)
- * and vp9_build_intra_predictors_mbuv_s(MACROBLOCKD *xd).
- */
+// For skip_recon_mb(), add vp9_build_intra_predictors_mby_s(MACROBLOCKD *xd)
+// and vp9_build_intra_predictors_mbuv_s(MACROBLOCKD *xd).
-/* Using multiplication and shifting instead of division in diagonal prediction.
- * iscale table is calculated from ((1<<16) + (i+2)/2) / (i+2) and used as
- * ((A + B) * iscale[i] + (1<<15)) >> 16;
- * where A and B are weighted pixel values.
- */
+// Using multiplication and shifting instead of division in diagonal prediction.
+// iscale table is calculated from ((1 << 16) + (i + 2) / 2) / (i+2) and used as
+// ((A + B) * iscale[i] + (1 << 15)) >> 16;
+// where A and B are weighted pixel values.
static const unsigned int iscale[64] = {
32768, 21845, 16384, 13107, 10923, 9362, 8192, 7282,
6554, 5958, 5461, 5041, 4681, 4369, 4096, 3855,
@@ -34,101 +33,107 @@
1130, 1111, 1092, 1074, 1057, 1040, 1024, 1008,
};
+static INLINE int iscale_round(int value, int i) {
+ return ROUND_POWER_OF_TWO(value * iscale[i], 16);
+}
static void d27_predictor(uint8_t *ypred_ptr, int y_stride, int n,
uint8_t *yabove_row, uint8_t *yleft_col) {
- int r, c, h, w, v;
- int a, b;
+ int r, c;
+
r = 0;
for (c = 0; c < n - 2; c++) {
- if (c & 1)
- a = yleft_col[r + 1];
- else
- a = (yleft_col[r] + yleft_col[r + 1] + 1) >> 1;
- b = yabove_row[c + 2];
- ypred_ptr[c] = ((2 * a + (c + 1) * b) * iscale[1+c] + (1<<15)) >> 16;
+ int a = c & 1 ? yleft_col[r + 1]
+ : ROUND_POWER_OF_TWO(yleft_col[r] + yleft_col[r + 1], 1);
+ int b = yabove_row[c + 2];
+ ypred_ptr[c] = iscale_round(2 * a + (c + 1) * b, 1 + c);
}
+
for (r = 1; r < n / 2 - 1; r++) {
for (c = 0; c < n - 2 - 2 * r; c++) {
- if (c & 1)
- a = yleft_col[r + 1];
- else
- a = (yleft_col[r] + yleft_col[r + 1] + 1) >> 1;
- b = ypred_ptr[(r - 1) * y_stride + c + 2];
- ypred_ptr[r * y_stride + c] =
- ((2 * a + (c + 1) * b) * iscale[1+c] + (1<<15)) >> 16;
+ int a = c & 1 ? yleft_col[r + 1]
+ : ROUND_POWER_OF_TWO(yleft_col[r] + yleft_col[r + 1], 1);
+ int b = ypred_ptr[(r - 1) * y_stride + c + 2];
+ ypred_ptr[r * y_stride + c] = iscale_round(2 * a + (c + 1) * b, 1 + c);
}
}
- for (; r < n - 1; ++r) {
+
+ for (; r < n - 1; r++) {
for (c = 0; c < n; c++) {
- v = (c & 1 ? yleft_col[r + 1] : (yleft_col[r] + yleft_col[r + 1] + 1) >> 1);
- h = r - c / 2;
+ int v = c & 1 ? yleft_col[r + 1]
+ : ROUND_POWER_OF_TWO(yleft_col[r] + yleft_col[r + 1], 1);
+ int h = r - c / 2;
ypred_ptr[h * y_stride + c] = v;
}
}
+
c = 0;
r = n - 1;
- ypred_ptr[r * y_stride] = (ypred_ptr[(r - 1) * y_stride] +
- yleft_col[r] + 1) >> 1;
+ ypred_ptr[r * y_stride] = ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride] +
+ yleft_col[r], 1);
for (r = n - 2; r >= n / 2; --r) {
- w = c + (n - 1 - r) * 2;
- ypred_ptr[r * y_stride + w] = (ypred_ptr[(r - 1) * y_stride + w] +
- ypred_ptr[r * y_stride + w - 1] + 1) >> 1;
+ int w = c + (n - 1 - r) * 2;
+ ypred_ptr[r * y_stride + w] =
+ ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride + w] +
+ ypred_ptr[r * y_stride + w - 1], 1);
}
+
for (c = 1; c < n; c++) {
for (r = n - 1; r >= n / 2 + c / 2; --r) {
- w = c + (n - 1 - r) * 2;
- ypred_ptr[r * y_stride + w] = (ypred_ptr[(r - 1) * y_stride + w] +
- ypred_ptr[r * y_stride + w - 1] + 1) >> 1;
+ int w = c + (n - 1 - r) * 2;
+ ypred_ptr[r * y_stride + w] =
+ ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride + w] +
+ ypred_ptr[r * y_stride + w - 1], 1);
}
}
}
static void d63_predictor(uint8_t *ypred_ptr, int y_stride, int n,
uint8_t *yabove_row, uint8_t *yleft_col) {
- int r, c, h, w, v;
- int a, b;
+ int r, c;
+
c = 0;
for (r = 0; r < n - 2; r++) {
- if (r & 1)
- a = yabove_row[c + 1];
- else
- a = (yabove_row[c] + yabove_row[c + 1] + 1) >> 1;
- b = yleft_col[r + 2];
- ypred_ptr[r * y_stride] = ((2 * a + (r + 1) * b) * iscale[1+r] +
- (1<<15)) >> 16;
+ int a = r & 1 ? yabove_row[c + 1]
+ : ROUND_POWER_OF_TWO(yabove_row[c] + yabove_row[c + 1], 1);
+ int b = yleft_col[r + 2];
+ ypred_ptr[r * y_stride] = iscale_round(2 * a + (r + 1) * b, 1 + r);
}
+
for (c = 1; c < n / 2 - 1; c++) {
for (r = 0; r < n - 2 - 2 * c; r++) {
- if (r & 1)
- a = yabove_row[c + 1];
- else
- a = (yabove_row[c] + yabove_row[c + 1] + 1) >> 1;
- b = ypred_ptr[(r + 2) * y_stride + c - 1];
- ypred_ptr[r * y_stride + c] = ((2 * a + (c + 1) * b) * iscale[1+c] +
- (1<<15)) >> 16;
+ int a = r & 1 ? yabove_row[c + 1]
+ : ROUND_POWER_OF_TWO(yabove_row[c] + yabove_row[c + 1], 1);
+ int b = ypred_ptr[(r + 2) * y_stride + c - 1];
+ ypred_ptr[r * y_stride + c] = iscale_round(2 * a + (c + 1) * b, 1 + c);
}
}
+
for (; c < n - 1; ++c) {
for (r = 0; r < n; r++) {
- v = (r & 1 ? yabove_row[c + 1] : (yabove_row[c] + yabove_row[c + 1] + 1) >> 1);
- w = c - r / 2;
+ int v = r & 1 ? yabove_row[c + 1]
+ : ROUND_POWER_OF_TWO(yabove_row[c] + yabove_row[c + 1], 1);
+ int w = c - r / 2;
ypred_ptr[r * y_stride + w] = v;
}
}
+
r = 0;
c = n - 1;
- ypred_ptr[c] = (ypred_ptr[(c - 1)] + yabove_row[c] + 1) >> 1;
+ ypred_ptr[c] = ROUND_POWER_OF_TWO(ypred_ptr[(c - 1)] + yabove_row[c], 1);
for (c = n - 2; c >= n / 2; --c) {
- h = r + (n - 1 - c) * 2;
- ypred_ptr[h * y_stride + c] = (ypred_ptr[h * y_stride + c - 1] +
- ypred_ptr[(h - 1) * y_stride + c] + 1) >> 1;
+ int h = r + (n - 1 - c) * 2;
+ ypred_ptr[h * y_stride + c] =
+ ROUND_POWER_OF_TWO(ypred_ptr[h * y_stride + c - 1] +
+ ypred_ptr[(h - 1) * y_stride + c], 1);
}
+
for (r = 1; r < n; r++) {
for (c = n - 1; c >= n / 2 + r / 2; --c) {
- h = r + (n - 1 - c) * 2;
- ypred_ptr[h * y_stride + c] = (ypred_ptr[h * y_stride + c - 1] +
- ypred_ptr[(h - 1) * y_stride + c] + 1) >> 1;
+ int h = r + (n - 1 - c) * 2;
+ ypred_ptr[h * y_stride + c] =
+ ROUND_POWER_OF_TWO(ypred_ptr[h * y_stride + c - 1] +
+ ypred_ptr[(h - 1) * y_stride + c], 1);
}
}
}
@@ -136,27 +141,28 @@
static void d45_predictor(uint8_t *ypred_ptr, int y_stride, int n,
uint8_t *yabove_row, uint8_t *yleft_col) {
int r, c;
+
for (r = 0; r < n - 1; ++r) {
for (c = 0; c <= r; ++c) {
- ypred_ptr[(r - c) * y_stride + c] =
- ((yabove_row[r + 1] * (c + 1) +
- yleft_col[r + 1] * (r - c + 1)) * iscale[r] + (1<<15)) >> 16;
+ ypred_ptr[(r - c) * y_stride + c] = iscale_round(
+ yabove_row[r + 1] * (c + 1) + yleft_col[r + 1] * (r - c + 1), r);
}
}
+
for (c = 0; c <= r; ++c) {
int yabove_ext = yabove_row[r]; // clip_pixel(2 * yabove_row[r] -
// yabove_row[r - 1]);
int yleft_ext = yleft_col[r]; // clip_pixel(2 * yleft_col[r] -
// yleft_col[r-1]);
ypred_ptr[(r - c) * y_stride + c] =
- ((yabove_ext * (c + 1) +
- yleft_ext * (r - c + 1)) * iscale[r] + (1<<15)) >> 16;
+ iscale_round(yabove_ext * (c + 1) + yleft_ext * (r - c + 1), r);
}
for (r = 1; r < n; ++r) {
for (c = n - r; c < n; ++c) {
const int yabove_ext = ypred_ptr[(r - 1) * y_stride + c];
const int yleft_ext = ypred_ptr[r * y_stride + c - 1];
- ypred_ptr[r * y_stride + c] = (yabove_ext + yleft_ext + 1) >> 1;
+ ypred_ptr[r * y_stride + c] =
+ ROUND_POWER_OF_TWO(yabove_ext + yleft_ext, 1);
}
}
}
@@ -165,7 +171,7 @@
uint8_t *yabove_row, uint8_t *yleft_col) {
int r, c;
for (c = 0; c < n; c++)
- ypred_ptr[c] = (yabove_row[c - 1] + yabove_row[c] + 1) >> 1;
+ ypred_ptr[c] = ROUND_POWER_OF_TWO(yabove_row[c - 1] + yabove_row[c], 1);
ypred_ptr += y_stride;
for (c = 0; c < n; c++)
ypred_ptr[c] = yabove_row[c - 1];
@@ -199,9 +205,10 @@
static void d153_predictor(uint8_t *ypred_ptr, int y_stride, int n,
uint8_t *yabove_row, uint8_t *yleft_col) {
int r, c;
- ypred_ptr[0] = (yabove_row[-1] + yleft_col[0] + 1) >> 1;
+ ypred_ptr[0] = ROUND_POWER_OF_TWO(yabove_row[-1] + yleft_col[0], 1);
for (r = 1; r < n; r++)
- ypred_ptr[r * y_stride] = (yleft_col[r - 1] + yleft_col[r] + 1) >> 1;
+ ypred_ptr[r * y_stride] =
+ ROUND_POWER_OF_TWO(yleft_col[r - 1] + yleft_col[r], 1);
ypred_ptr++;
ypred_ptr[0] = yabove_row[-1];
for (r = 1; r < n; r++)
@@ -268,6 +275,20 @@
}
}
+static INLINE int log2_minus_1(int n) {
+ switch (n) {
+ case 4: return 1;
+ case 8: return 2;
+ case 16: return 3;
+ case 32: return 4;
+ case 64: return 5;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+
void vp9_build_intra_predictors_internal(uint8_t *src, int src_stride,
uint8_t *ypred_ptr,
int y_stride, int mode, int bsize,
@@ -313,22 +334,7 @@
int i;
int shift;
int average = 0;
- int log2_bsize_minus_1;
-
- assert(bsize == 4 || bsize == 8 || bsize == 16 || bsize == 32 ||
- bsize == 64);
- if (bsize == 4) {
- log2_bsize_minus_1 = 1;
- } else if (bsize == 8) {
- log2_bsize_minus_1 = 2;
- } else if (bsize == 16) {
- log2_bsize_minus_1 = 3;
- } else if (bsize == 32) {
- log2_bsize_minus_1 = 4;
- } else {
- assert(bsize == 64);
- log2_bsize_minus_1 = 5;
- }
+ int log2_bsize_minus_1 = log2_minus_1(bsize);
if (up_available || left_available) {
if (up_available) {
@@ -343,7 +349,7 @@
}
}
shift = log2_bsize_minus_1 + up_available + left_available;
- expected_dc = (average + (1 << (shift - 1))) >> shift;
+ expected_dc = ROUND_POWER_OF_TWO(average, shift);
} else {
expected_dc = 128;
}
@@ -354,21 +360,19 @@
}
}
break;
- case V_PRED: {
+ case V_PRED:
for (r = 0; r < bsize; r++) {
memcpy(ypred_ptr, yabove_row, bsize);
ypred_ptr += y_stride;
}
- }
- break;
- case H_PRED: {
+ break;
+ case H_PRED:
for (r = 0; r < bsize; r++) {
vpx_memset(ypred_ptr, yleft_col[r], bsize);
ypred_ptr += y_stride;
}
- }
- break;
- case TM_PRED: {
+ break;
+ case TM_PRED:
for (r = 0; r < bsize; r++) {
for (c = 0; c < bsize; c++) {
ypred_ptr[c] = clip_pixel(yleft_col[r] + yabove_row[c] - ytop_left);
@@ -376,32 +380,25 @@
ypred_ptr += y_stride;
}
- }
- break;
- case D45_PRED: {
+ break;
+ case D45_PRED:
d45_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
- }
- break;
- case D135_PRED: {
+ break;
+ case D135_PRED:
d135_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
- }
- break;
- case D117_PRED: {
+ break;
+ case D117_PRED:
d117_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
- }
- break;
- case D153_PRED: {
+ break;
+ case D153_PRED:
d153_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
- }
- break;
- case D27_PRED: {
+ break;
+ case D27_PRED:
d27_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
- }
- break;
- case D63_PRED: {
+ break;
+ case D63_PRED:
d63_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
- }
- break;
+ break;
case I8X8_PRED:
case B_PRED:
case NEARESTMV:
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index dab88a3..04b67b9 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -139,6 +139,29 @@
prototype void vp9_intra_uv4x4_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor"
specialize vp9_intra_uv4x4_predict;
+if [ "$CONFIG_VP9_DECODER" = "yes" ]; then
+prototype void vp9_add_residual_4x4 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_residual_4x4 sse2
+
+prototype void vp9_add_residual_8x8 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_residual_8x8 sse2
+
+prototype void vp9_add_residual_16x16 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_residual_16x16 sse2
+
+prototype void vp9_add_residual_32x32 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_residual_32x32 sse2
+
+prototype void vp9_add_constant_residual_8x8 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_constant_residual_8x8 sse2
+
+prototype void vp9_add_constant_residual_16x16 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_constant_residual_16x16 sse2
+
+prototype void vp9_add_constant_residual_32x32 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_constant_residual_32x32 sse2
+fi
+
#
# Loopfilter
#
@@ -299,6 +322,9 @@
prototype void vp9_short_iht16x16 "int16_t *input, int16_t *output, int pitch, int tx_type"
specialize vp9_short_iht16x16
+prototype void vp9_idct4_1d "int16_t *input, int16_t *output"
+specialize vp9_idct4_1d sse2
+
# dct and add
prototype void vp9_dc_only_idct_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride"
diff --git a/vp9/common/vp9_tile_common.c b/vp9/common/vp9_tile_common.c
index 29f89b6..53a1eb8 100644
--- a/vp9/common/vp9_tile_common.c
+++ b/vp9/common/vp9_tile_common.c
@@ -10,6 +10,11 @@
#include "vp9/common/vp9_tile_common.h"
+#define MIN_TILE_WIDTH 256
+#define MAX_TILE_WIDTH 4096
+#define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6)
+#define MAX_TILE_WIDTH_SBS (MAX_TILE_WIDTH >> 6)
+
static void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off,
int *max_tile_off, int tile_idx,
int log2_n_tiles, int n_mbs) {
@@ -35,8 +40,6 @@
cm->log2_tile_rows, cm->mb_rows);
}
-#define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6)
-#define MAX_TILE_WIDTH_SBS (MAX_TILE_WIDTH >> 6)
void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles_ptr,
int *delta_log2_n_tiles) {
diff --git a/vp9/common/vp9_tile_common.h b/vp9/common/vp9_tile_common.h
index ea69356..7ea3772 100644
--- a/vp9/common/vp9_tile_common.h
+++ b/vp9/common/vp9_tile_common.h
@@ -13,9 +13,6 @@
#include "vp9/common/vp9_onyxc_int.h"
-#define MIN_TILE_WIDTH 256
-#define MAX_TILE_WIDTH 4096
-
void vp9_get_tile_col_offsets(VP9_COMMON *cm, int tile_col_idx);
void vp9_get_tile_row_offsets(VP9_COMMON *cm, int tile_row_idx);
diff --git a/vp9/common/x86/vp9_idctllm_x86.c b/vp9/common/x86/vp9_idctllm_x86.c
index 7b3c579..3d7a148 100644
--- a/vp9/common/x86/vp9_idctllm_x86.c
+++ b/vp9/common/x86/vp9_idctllm_x86.c
@@ -77,10 +77,10 @@
void vp9_short_idct4x4llm_sse2(int16_t *input, int16_t *output, int pitch) {
const __m128i zero = _mm_setzero_si128();
const __m128i eight = _mm_set1_epi16(8);
- const __m128i cst = _mm_setr_epi16((short)cospi_16_64, (short)cospi_16_64,
- (short)cospi_16_64, (short)-cospi_16_64,
- (short)cospi_24_64, (short)-cospi_8_64,
- (short)cospi_8_64, (short)cospi_24_64);
+ const __m128i cst = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,
+ (int16_t)cospi_16_64, (int16_t)-cospi_16_64,
+ (int16_t)cospi_24_64, (int16_t)-cospi_8_64,
+ (int16_t)cospi_8_64, (int16_t)cospi_24_64);
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const int half_pitch = pitch >> 1;
__m128i input0, input1, input2, input3;
@@ -198,4 +198,40 @@
input3 = _mm_srli_si128(input3, 8);
_mm_storel_epi64((__m128i *)(output + 2 * half_pitch), input3);
}
+
+void vp9_idct4_1d_sse2(int16_t *input, int16_t *output) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i c1 = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,
+ (int16_t)cospi_16_64, (int16_t)-cospi_16_64,
+ (int16_t)cospi_24_64, (int16_t)-cospi_8_64,
+ (int16_t)cospi_8_64, (int16_t)cospi_24_64);
+ const __m128i c2 = _mm_setr_epi16(1, 1, 1, 1, 1, -1, 1, -1);
+
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ __m128i in, temp;
+
+ // Load input data.
+ in = _mm_loadl_epi64((__m128i *)input);
+
+ // Construct i3, i1, i3, i1, i2, i0, i2, i0
+ in = _mm_shufflelo_epi16(in, 0xd8);
+ in = _mm_unpacklo_epi32(in, in);
+
+ // Stage 1
+ in = _mm_madd_epi16(in, c1);
+ in = _mm_add_epi32(in, rounding);
+ in = _mm_srai_epi32(in, DCT_CONST_BITS);
+ in = _mm_packs_epi32(in, zero);
+
+ // Stage 2
+ temp = _mm_shufflelo_epi16(in, 0x9c);
+ in = _mm_shufflelo_epi16(in, 0xc9);
+ in = _mm_unpacklo_epi64(temp, in);
+ in = _mm_madd_epi16(in, c2);
+ in = _mm_packs_epi32(in, zero);
+
+ // Store results
+ _mm_storel_epi64((__m128i *)output, in);
+}
+
#endif
diff --git a/vp9/decoder/vp9_dboolhuff.c b/vp9/decoder/vp9_dboolhuff.c
index 99c3664..7e3b464 100644
--- a/vp9/decoder/vp9_dboolhuff.c
+++ b/vp9/decoder/vp9_dboolhuff.c
@@ -8,11 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
-#include "vp9/decoder/vp9_dboolhuff.h"
#include "vpx_ports/mem.h"
#include "vpx_mem/vpx_mem.h"
+#include "vp9/decoder/vp9_dboolhuff.h"
+
int vp9_start_decode(BOOL_DECODER *br,
const unsigned char *source,
unsigned int source_sz) {
diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h
index cf31d38..eeb5c35 100644
--- a/vp9/decoder/vp9_dboolhuff.h
+++ b/vp9/decoder/vp9_dboolhuff.h
@@ -13,6 +13,7 @@
#include <stddef.h>
#include <limits.h>
+
#include "./vpx_config.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 326c802..86dfaf6 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -173,7 +173,6 @@
m->mbmi.mb_skip_coeff = 0;
}
-
y_mode = m->mbmi.sb_type ?
read_kf_sb_ymode(bc,
pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]):
@@ -677,22 +676,23 @@
mbmi->need_to_clamp_secondmv = 0;
mbmi->second_ref_frame = NONE;
- // Distance of Mb to the various image edges.
- // These specified to 8th pel as they are always compared to MV values
- // that are in 1/8th pel units
- xd->mb_to_left_edge = mb_to_left_edge
- = -((mb_col * 16) << 3);
- mb_to_left_edge -= LEFT_TOP_MARGIN;
-
- xd->mb_to_right_edge = mb_to_right_edge
- = ((pbi->common.mb_cols - mb_size - mb_col) * 16) << 3;
- mb_to_right_edge += RIGHT_BOTTOM_MARGIN;
-
// Make sure the MACROBLOCKD mode info pointer is pointed at the
// correct entry for the current macroblock.
xd->mode_info_context = mi;
xd->prev_mode_info_context = prev_mi;
+ // Distance of Mb to the various image edges.
+ // These specified to 8th pel as they are always compared to MV values
+ // that are in 1/8th pel units
+ set_mb_row(cm, xd, mb_row, mb_size);
+ set_mb_col(cm, xd, mb_col, mb_size);
+
+ mb_to_left_edge = xd->mb_to_left_edge;
+ mb_to_left_edge -= LEFT_TOP_MARGIN;
+
+ mb_to_right_edge = xd->mb_to_right_edge;
+ mb_to_right_edge += RIGHT_BOTTOM_MARGIN;
+
// Read the macroblock segment id.
read_mb_segment_id(pbi, mb_row, mb_col, bc);
@@ -750,17 +750,6 @@
vp9_mv_ref_probs(&pbi->common, mv_ref_p,
mbmi->mb_mode_context[ref_frame]);
- /*
- if (pbi->common.current_video_frame == 1) {
- int k = mbmi->mb_mode_context[ref_frame];
- printf("vp9_mode_contexts: [%d %d %d %d] %d %d %d %d\n",
- mb_row, mb_col, ref_frame, k,
- cm->fc.vp9_mode_contexts[k][0],
- cm->fc.vp9_mode_contexts[k][1],
- cm->fc.vp9_mode_contexts[k][2],
- cm->fc.vp9_mode_contexts[k][3]);
- }
- */
// If the segment level skip mode enabled
if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP)) {
@@ -1176,20 +1165,274 @@
vpx_memset(cm->mbskip_pred_probs, 0, sizeof(cm->mbskip_pred_probs));
if (pbi->common.mb_no_coeff_skip) {
int k;
- for (k = 0; k < MBSKIP_CONTEXTS; ++k)
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k) {
cm->mbskip_pred_probs[k] = (vp9_prob)vp9_read_literal(bc, 8);
+ }
}
mb_mode_mv_init(pbi, bc);
}
+#if CONFIG_CODE_NONZEROCOUNT
+static uint16_t read_nzc(VP9_COMMON *const cm,
+ int nzc_context,
+ TX_SIZE tx_size,
+ int ref,
+ int type,
+ BOOL_DECODER* const bc) {
+ int c, e;
+ uint16_t nzc;
+ if (tx_size == TX_32X32) {
+ c = treed_read(bc, vp9_nzc32x32_tree,
+ cm->fc.nzc_probs_32x32[nzc_context][ref][type]);
+ cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++;
+ } else if (tx_size == TX_16X16) {
+ c = treed_read(bc, vp9_nzc16x16_tree,
+ cm->fc.nzc_probs_16x16[nzc_context][ref][type]);
+ cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++;
+ } else if (tx_size == TX_8X8) {
+ c = treed_read(bc, vp9_nzc8x8_tree,
+ cm->fc.nzc_probs_8x8[nzc_context][ref][type]);
+ cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++;
+ } else if (tx_size == TX_4X4) {
+ c = treed_read(bc, vp9_nzc4x4_tree,
+ cm->fc.nzc_probs_4x4[nzc_context][ref][type]);
+ cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++;
+ } else {
+ assert(0);
+ }
+ nzc = vp9_basenzcvalue[c];
+ if ((e = vp9_extranzcbits[c])) {
+ int x = 0;
+ while (e--) {
+ int b = vp9_read(
+ bc, cm->fc.nzc_pcat_probs[nzc_context][c - NZC_TOKENS_NOEXTRA][e]);
+ x |= (b << e);
+ cm->fc.nzc_pcat_counts[nzc_context][c - NZC_TOKENS_NOEXTRA][e][b]++;
+ }
+ nzc += x;
+ }
+ if (tx_size == TX_32X32)
+ assert(nzc <= 1024);
+ else if (tx_size == TX_16X16)
+ assert(nzc <= 256);
+ else if (tx_size == TX_8X8)
+ assert(nzc <= 64);
+ else if (tx_size == TX_4X4)
+ assert(nzc <= 16);
+ return nzc;
+}
+
+static void read_nzcs_sb64(VP9_COMMON *const cm,
+ MACROBLOCKD* xd,
+ int mb_row,
+ int mb_col,
+ BOOL_DECODER* const bc) {
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0]));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_32X32:
+ for (j = 0; j < 256; j += 64) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, bc);
+ }
+ for (j = 256; j < 384; j += 64) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 1, bc);
+ }
+ break;
+
+ case TX_16X16:
+ for (j = 0; j < 256; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc);
+ }
+ for (j = 256; j < 384; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 256; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc);
+ }
+ for (j = 256; j < 384; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc);
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 256; ++j) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc);
+ }
+ for (j = 256; j < 384; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void read_nzcs_sb32(VP9_COMMON *const cm,
+ MACROBLOCKD* xd,
+ int mb_row,
+ int mb_col,
+ BOOL_DECODER* const bc) {
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0]));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_32X32:
+ for (j = 0; j < 64; j += 64) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, bc);
+ }
+ for (j = 64; j < 96; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc);
+ }
+ break;
+
+ case TX_16X16:
+ for (j = 0; j < 64; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc);
+ }
+ for (j = 64; j < 96; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 64; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc);
+ }
+ for (j = 64; j < 96; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc);
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 64; ++j) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc);
+ }
+ for (j = 64; j < 96; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void read_nzcs_mb16(VP9_COMMON *const cm,
+ MACROBLOCKD* xd,
+ int mb_row,
+ int mb_col,
+ BOOL_DECODER* const bc) {
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0]));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_16X16:
+ for (j = 0; j < 16; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc);
+ }
+ for (j = 16; j < 24; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 16; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc);
+ }
+ if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) {
+ for (j = 16; j < 24; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc);
+ }
+ } else {
+ for (j = 16; j < 24; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc);
+ }
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 16; ++j) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc);
+ }
+ for (j = 16; j < 24; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+#endif // CONFIG_CODE_NONZEROCOUNT
+
void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi,
MACROBLOCKD* const xd,
int mb_row,
int mb_col,
BOOL_DECODER* const bc) {
+ VP9_COMMON *const cm = &pbi->common;
MODE_INFO *mi = xd->mode_info_context;
MODE_INFO *prev_mi = xd->prev_mode_info_context;
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
if (pbi->common.frame_type == KEY_FRAME) {
kfread_modes(pbi, mi, mb_row, mb_col, bc);
@@ -1199,4 +1442,28 @@
mi->mbmi.ref_frame - 1, mi->mbmi.second_ref_frame - 1,
pbi->common.active_ref_scale);
}
+#if CONFIG_CODE_NONZEROCOUNT
+ if (mbmi->sb_type == BLOCK_SIZE_SB64X64)
+ read_nzcs_sb64(cm, xd, mb_row, mb_col, bc);
+ else if (mbmi->sb_type == BLOCK_SIZE_SB32X32)
+ read_nzcs_sb32(cm, xd, mb_row, mb_col, bc);
+ else
+ read_nzcs_mb16(cm, xd, mb_row, mb_col, bc);
+#endif // CONFIG_CODE_NONZEROCOUNT
+
+ if (mbmi->sb_type) {
+ const int n_mbs = 1 << mbmi->sb_type;
+ const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row);
+ const int x_mbs = MIN(n_mbs, cm->mb_cols - mb_col);
+ const int mis = cm->mode_info_stride;
+ int x, y;
+
+ for (y = 0; y < y_mbs; y++) {
+ for (x = !y; x < x_mbs; x++) {
+ mi[y * mis + x] = *mi;
+ }
+ }
+ } else {
+ update_blockd_bmi(xd);
+ }
}
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index b44d659..5b3e1bd 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -39,7 +39,7 @@
#define COEFCOUNT_TESTING
-//#define DEC_DEBUG
+// #define DEC_DEBUG
#ifdef DEC_DEBUG
int dec_debug = 0;
#endif
@@ -201,8 +201,7 @@
static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
BOOL_DECODER* const bc) {
- BLOCKD *bd = &xd->block[0];
- TX_TYPE tx_type = get_tx_type_16x16(xd, bd);
+ TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
#ifdef DEC_DEBUG
if (dec_debug) {
int i;
@@ -240,13 +239,13 @@
BOOL_DECODER* const bc) {
// First do Y
// if the first one is DCT_DCT assume all the rest are as well
- TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]);
+ TX_TYPE tx_type = get_tx_type_8x8(xd, 0);
#ifdef DEC_DEBUG
if (dec_debug) {
int i;
printf("\n");
printf("qcoeff 8x8\n");
- for (i = 0; i < 400; i++) {
+ for (i = 0; i < 384; i++) {
printf("%3d ", xd->qcoeff[i]);
if (i % 16 == 15) printf("\n");
}
@@ -267,7 +266,7 @@
int i8x8mode = b->bmi.as_mode.first;
vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor);
}
- tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
+ tx_type = get_tx_type_8x8(xd, ib);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride,
xd->eobs[idx]);
@@ -341,7 +340,7 @@
vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor);
for (j = 0; j < 4; j++) {
b = &xd->block[ib + iblock[j]];
- tx_type = get_tx_type_4x4(xd, b);
+ tx_type = get_tx_type_4x4(xd, ib + iblock[j]);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
@@ -375,7 +374,7 @@
eobtotal += vp9_decode_coefs_4x4(pbi, xd, bc, PLANE_TYPE_Y_WITH_DC, i);
vp9_intra4x4_predict(xd, b, b_mode, b->predictor);
- tx_type = get_tx_type_4x4(xd, b);
+ tx_type = get_tx_type_4x4(xd, i);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
@@ -397,7 +396,7 @@
xd->dst.v_buffer,
xd->dst.uv_stride,
xd);
- } else if (mode == SPLITMV || get_tx_type_4x4(xd, &xd->block[0]) == DCT_DCT) {
+ } else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) {
xd->itxm_add_y_block(xd->qcoeff,
xd->block[0].dequant,
xd->predictor,
@@ -431,7 +430,7 @@
#endif
for (i = 0; i < 16; i++) {
BLOCKD *b = &xd->block[i];
- tx_type = get_tx_type_4x4(xd, b);
+ tx_type = get_tx_type_4x4(xd, i);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
@@ -517,13 +516,24 @@
xd->block[20].dequant, xd->dst.v_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320]);
break;
- case TX_16X16: // FIXME(rbultje): adst
+ case TX_16X16:
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
- vp9_dequant_idct_add_16x16(xd->qcoeff + n * 256, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
+ const TX_TYPE tx_type = get_tx_type_16x16(xd,
+ (y_idx * 16 + x_idx) * 4);
+ if (tx_type == DCT_DCT) {
+ vp9_dequant_idct_add_16x16(xd->qcoeff + n * 256,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
+ } else {
+ vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff + n * 256,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
+ }
}
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
@@ -539,13 +549,23 @@
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 16]);
}
break;
- case TX_8X8: // FIXME(rbultje): adst
+ case TX_8X8:
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
- vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
- xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
- xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2);
+ if (tx_type == DCT_DCT) {
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
+ xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
+ } else {
+ vp9_ht_dequant_idct_add_8x8_c(tx_type, xd->qcoeff + n * 64,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
+ xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
+ }
}
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
@@ -561,13 +581,22 @@
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 4]);
}
break;
- case TX_4X4: // FIXME(rbultje): adst
+ case TX_4X4:
for (n = 0; n < 256; n++) {
const int x_idx = n & 15, y_idx = n >> 4;
- xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
- xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
- xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx);
+ if (tx_type == DCT_DCT) {
+ xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
+ xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
+ } else {
+ vp9_ht_dequant_idct_add_c(tx_type, xd->qcoeff + n * 16,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
+ xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
+ }
}
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
@@ -649,14 +678,24 @@
xd->dst.v_buffer,
xd->dst.uv_stride, xd);
break;
- case TX_16X16: // FIXME(rbultje): adst
+ case TX_16X16:
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
- vp9_dequant_idct_add_16x16(
- xd->qcoeff + n * 256, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
+ const TX_TYPE tx_type = get_tx_type_16x16(xd,
+ (y_idx * 8 + x_idx) * 4);
+ if (tx_type == DCT_DCT) {
+ vp9_dequant_idct_add_16x16(
+ xd->qcoeff + n * 256, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
+ } else {
+ vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff + n * 256,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
+ }
}
vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024,
xd->block[16].dequant,
@@ -664,13 +703,23 @@
xd->dst.v_buffer,
xd->dst.uv_stride, xd);
break;
- case TX_8X8: // FIXME(rbultje): adst
+ case TX_8X8:
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
- vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
- xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
- xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
+ if (tx_type == DCT_DCT) {
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
+ xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
+ } else {
+ vp9_ht_dequant_idct_add_8x8_c(tx_type, xd->qcoeff + n * 64,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
+ xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
+ }
}
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
@@ -686,13 +735,22 @@
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n * 4]);
}
break;
- case TX_4X4: // FIXME(rbultje): adst
+ case TX_4X4:
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
- xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
- xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
- xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
+ if (tx_type == DCT_DCT) {
+ xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
+ xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
+ } else {
+ vp9_ht_dequant_idct_add_c(tx_type, xd->qcoeff + n * 16,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
+ xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
+ }
}
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
@@ -862,14 +920,9 @@
* values that are in 1/8th pel units
*/
block_size >>= 4; // in mb units
- xd->mb_to_top_edge = -((mb_row * 16)) << 3;
- xd->mb_to_left_edge = -((mb_col * 16) << 3);
- xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3;
- xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3;
- xd->up_available = (mb_row != 0);
- xd->left_available = (mb_col > cm->cur_tile_mb_col_start);
- xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end);
+ set_mb_row(cm, xd, mb_row, block_size);
+ set_mb_col(cm, xd, mb_col, block_size);
xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
@@ -910,20 +963,6 @@
xd->corrupted |= cm->yv12_fb[second_ref_fb_idx].corrupted;
}
}
-
- if (mbmi->sb_type) {
- const int n_mbs = 1 << mbmi->sb_type;
- const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row);
- const int x_mbs = MIN(n_mbs, cm->mb_cols - mb_col);
- const int mis = cm->mode_info_stride;
- int x, y;
-
- for (y = 0; y < y_mbs; y++) {
- for (x = !y; x < x_mbs; x++) {
- mi[y * mis + x] = *mi;
- }
- }
- }
}
/* Decode a row of Superblocks (2x2 region of MBs) */
@@ -938,6 +977,11 @@
for (mb_col = pc->cur_tile_mb_col_start;
mb_col < pc->cur_tile_mb_col_end; mb_col += 4) {
if (vp9_read(bc, pc->sb64_coded)) {
+#ifdef DEC_DEBUG
+ dec_debug = (pc->current_video_frame == 1 && mb_row == 0 && mb_col == 0);
+ if (dec_debug)
+ printf("Debug\n");
+#endif
set_offsets(pbi, 64, mb_row, mb_col);
vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, bc);
set_refs(pbi, 64, mb_row, mb_col);
@@ -958,6 +1002,10 @@
xd->sb_index = j;
if (vp9_read(bc, pc->sb32_coded)) {
+#ifdef DEC_DEBUG
+ dec_debug = (pc->current_video_frame == 1 &&
+ mb_row + y_idx_sb == 0 && mb_col + x_idx_sb == 0);
+#endif
set_offsets(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb);
vp9_decode_mb_mode_mv(pbi,
xd, mb_row + y_idx_sb, mb_col + x_idx_sb, bc);
@@ -978,11 +1026,14 @@
// MB lies outside frame, skip on to next
continue;
}
+#ifdef DEC_DEBUG
+ dec_debug = (pc->current_video_frame == 1 &&
+ mb_row + y_idx == 0 && mb_col + x_idx == 0);
+#endif
set_offsets(pbi, 16, mb_row + y_idx, mb_col + x_idx);
xd->mb_index = i;
vp9_decode_mb_mode_mv(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc);
- update_blockd_bmi(xd);
set_refs(pbi, 16, mb_row + y_idx, mb_col + x_idx);
decode_macroblock(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc);
@@ -1073,6 +1124,85 @@
xd->fullpixel_mask = 0xfffffff8;
}
+#if CONFIG_CODE_NONZEROCOUNT
+static void read_nzc_probs_common(VP9_COMMON *cm,
+ BOOL_DECODER* const bc,
+ int block_size) {
+ int c, r, b, t;
+ int tokens, nodes;
+ vp9_prob *nzc_probs;
+ vp9_prob upd;
+
+ if (!vp9_read_bit(bc)) return;
+
+ if (block_size == 32) {
+ tokens = NZC32X32_TOKENS;
+ nzc_probs = cm->fc.nzc_probs_32x32[0][0][0];
+ upd = NZC_UPDATE_PROB_32X32;
+ } else if (block_size == 16) {
+ tokens = NZC16X16_TOKENS;
+ nzc_probs = cm->fc.nzc_probs_16x16[0][0][0];
+ upd = NZC_UPDATE_PROB_16X16;
+ } else if (block_size == 8) {
+ tokens = NZC8X8_TOKENS;
+ nzc_probs = cm->fc.nzc_probs_8x8[0][0][0];
+ upd = NZC_UPDATE_PROB_8X8;
+ } else {
+ tokens = NZC4X4_TOKENS;
+ nzc_probs = cm->fc.nzc_probs_4x4[0][0][0];
+ upd = NZC_UPDATE_PROB_4X4;
+ }
+ nodes = tokens - 1;
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b;
+ int offset_nodes = offset * nodes;
+ for (t = 0; t < nodes; ++t) {
+ vp9_prob *p = &nzc_probs[offset_nodes + t];
+ if (vp9_read(bc, upd)) {
+ *p = read_prob_diff_update(bc, *p);
+ }
+ }
+ }
+ }
+ }
+}
+
+static void read_nzc_pcat_probs(VP9_COMMON *cm, BOOL_DECODER* const bc) {
+ int c, t, b;
+ vp9_prob upd = NZC_UPDATE_PROB_PCAT;
+ if (!vp9_read_bit(bc)) {
+ return;
+ }
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA];
+ for (b = 0; b < bits; ++b) {
+ vp9_prob *p = &cm->fc.nzc_pcat_probs[c][t][b];
+ if (vp9_read(bc, upd)) {
+ *p = read_prob_diff_update(bc, *p);
+ }
+ }
+ }
+ }
+}
+
+static void read_nzc_probs(VP9_COMMON *cm,
+ BOOL_DECODER* const bc) {
+ read_nzc_probs_common(cm, bc, 4);
+ if (cm->txfm_mode != ONLY_4X4)
+ read_nzc_probs_common(cm, bc, 8);
+ if (cm->txfm_mode > ALLOW_8X8)
+ read_nzc_probs_common(cm, bc, 16);
+ if (cm->txfm_mode > ALLOW_16X16)
+ read_nzc_probs_common(cm, bc, 32);
+#ifdef NZC_PCAT_UPDATE
+ read_nzc_pcat_probs(cm, bc);
+#endif
+}
+#endif // CONFIG_CODE_NONZEROCOUNT
+
static void read_coef_probs_common(BOOL_DECODER* const bc,
vp9_coeff_probs *coef_probs,
int block_types) {
@@ -1085,7 +1215,7 @@
for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
if (l >= 3 && k == 0)
continue;
- for (m = 0; m < ENTROPY_NODES; m++) {
+ for (m = CONFIG_CODE_NONZEROCOUNT; m < ENTROPY_NODES; m++) {
vp9_prob *const p = coef_probs[i][j][k][l] + m;
if (vp9_read(bc, COEF_UPDATE_PROB)) {
@@ -1539,6 +1669,19 @@
pbi->common.fc.pre_interintra_prob = pbi->common.fc.interintra_prob;
#endif
pbi->common.fc.pre_nmvc = pbi->common.fc.nmvc;
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_copy(pbi->common.fc.pre_nzc_probs_4x4,
+ pbi->common.fc.nzc_probs_4x4);
+ vp9_copy(pbi->common.fc.pre_nzc_probs_8x8,
+ pbi->common.fc.nzc_probs_8x8);
+ vp9_copy(pbi->common.fc.pre_nzc_probs_16x16,
+ pbi->common.fc.nzc_probs_16x16);
+ vp9_copy(pbi->common.fc.pre_nzc_probs_32x32,
+ pbi->common.fc.nzc_probs_32x32);
+ vp9_copy(pbi->common.fc.pre_nzc_pcat_probs,
+ pbi->common.fc.nzc_pcat_probs);
+#endif
+
vp9_zero(pbi->common.fc.coef_counts_4x4);
vp9_zero(pbi->common.fc.coef_counts_8x8);
vp9_zero(pbi->common.fc.coef_counts_16x16);
@@ -1555,8 +1698,18 @@
#if CONFIG_COMP_INTERINTRA_PRED
vp9_zero(pbi->common.fc.interintra_counts);
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_zero(pbi->common.fc.nzc_counts_4x4);
+ vp9_zero(pbi->common.fc.nzc_counts_8x8);
+ vp9_zero(pbi->common.fc.nzc_counts_16x16);
+ vp9_zero(pbi->common.fc.nzc_counts_32x32);
+ vp9_zero(pbi->common.fc.nzc_pcat_counts);
+#endif
read_coef_probs(pbi, &header_bc);
+#if CONFIG_CODE_NONZEROCOUNT
+ read_nzc_probs(&pbi->common, &header_bc);
+#endif
/* Initialize xd pointers. Any reference should do for xd->pre, so use 0. */
vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->active_ref_idx[0]],
@@ -1700,8 +1853,12 @@
}
if (!pc->error_resilient_mode &&
- !pc->frame_parallel_decoding_mode)
+ !pc->frame_parallel_decoding_mode) {
vp9_adapt_coef_probs(pc);
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_adapt_nzc_probs(pc);
+#endif
+ }
if (pc->frame_type != KEY_FRAME) {
if (!pc->error_resilient_mode &&
!pc->frame_parallel_decoding_mode) {
diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c
index eaf9860..92b78ed 100644
--- a/vp9/decoder/vp9_dequantize.c
+++ b/vp9/decoder/vp9_dequantize.c
@@ -15,6 +15,7 @@
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/common/vp9_common.h"
+
static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch,
uint8_t *dest, int stride, int width, int height) {
int r, c;
@@ -29,6 +30,26 @@
}
}
+void vp9_add_residual_4x4_c(const int16_t *diff, const uint8_t *pred, int pitch,
+ uint8_t *dest, int stride) {
+ add_residual(diff, pred, pitch, dest, stride, 4, 4);
+}
+
+void vp9_add_residual_8x8_c(const int16_t *diff, const uint8_t *pred, int pitch,
+ uint8_t *dest, int stride) {
+ add_residual(diff, pred, pitch, dest, stride, 8, 8);
+}
+
+void vp9_add_residual_16x16_c(const int16_t *diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ add_residual(diff, pred, pitch, dest, stride, 16, 16);
+}
+
+void vp9_add_residual_32x32_c(const int16_t *diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ add_residual(diff, pred, pitch, dest, stride, 32, 32);
+}
+
static void add_constant_residual(const int16_t diff, const uint8_t *pred,
int pitch, uint8_t *dest, int stride,
int width, int height) {
@@ -43,26 +64,41 @@
}
}
+void vp9_add_constant_residual_8x8_c(const int16_t diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ add_constant_residual(diff, pred, pitch, dest, stride, 8, 8);
+}
+
+void vp9_add_constant_residual_16x16_c(const int16_t diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ add_constant_residual(diff, pred, pitch, dest, stride, 16, 16);
+}
+
+void vp9_add_constant_residual_32x32_c(const int16_t diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ add_constant_residual(diff, pred, pitch, dest, stride, 32, 32);
+}
+
void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq,
uint8_t *pred, uint8_t *dest,
int pitch, int stride, int eob) {
int i;
- int16_t output[16];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
for (i = 0; i < 16; i++)
input[i] *= dq[i];
vp9_short_iht4x4(input, output, 4, tx_type);
vpx_memset(input, 0, 32);
- add_residual(output, pred, pitch, dest, stride, 4, 4);
+ vp9_add_residual_4x4(output, pred, pitch, dest, stride);
}
void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq,
uint8_t *pred, uint8_t *dest,
int pitch, int stride, int eob) {
- int16_t output[64];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64);
if (eob == 0) {
// All 0 DCT coefficients
@@ -76,14 +112,14 @@
vp9_short_iht8x8(input, output, 8, tx_type);
vpx_memset(input, 0, 128);
- add_residual(output, pred, pitch, dest, stride, 8, 8);
+ vp9_add_residual_8x8(output, pred, pitch, dest, stride);
}
}
void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
uint8_t *dest, int pitch, int stride, int eob) {
int i;
- int16_t output[16];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
if (eob > 1) {
for (i = 0; i < 16; i++)
@@ -94,7 +130,7 @@
vpx_memset(input, 0, 32);
- add_residual(output, pred, pitch, dest, stride, 4, 4);
+ vp9_add_residual_4x4(output, pred, pitch, dest, stride);
} else {
vp9_dc_only_idct_add(input[0]*dq[0], pred, dest, pitch, stride);
((int *)input)[0] = 0;
@@ -104,7 +140,7 @@
void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
uint8_t *dest, int pitch, int stride, int dc) {
int i;
- int16_t output[16];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
input[0] = dc;
@@ -114,14 +150,14 @@
// the idct halves ( >> 1) the pitch
vp9_short_idct4x4llm(input, output, 4 << 1);
vpx_memset(input, 0, 32);
- add_residual(output, pred, pitch, dest, stride, 4, 4);
+ vp9_add_residual_4x4(output, pred, pitch, dest, stride);
}
void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq,
uint8_t *pred, uint8_t *dest,
int pitch, int stride, int eob) {
int i;
- int16_t output[16];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
if (eob > 1) {
for (i = 0; i < 16; i++)
@@ -131,7 +167,7 @@
vpx_memset(input, 0, 32);
- add_residual(output, pred, pitch, dest, stride, 4, 4);
+ vp9_add_residual_4x4(output, pred, pitch, dest, stride);
} else {
vp9_dc_only_inv_walsh_add(input[0]*dq[0], pred, dest, pitch, stride);
((int *)input)[0] = 0;
@@ -143,7 +179,7 @@
uint8_t *dest,
int pitch, int stride, int dc) {
int i;
- int16_t output[16];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
input[0] = dc;
@@ -152,14 +188,13 @@
vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1);
vpx_memset(input, 0, 32);
- add_residual(output, pred, pitch, dest, stride, 4, 4);
+ vp9_add_residual_4x4(output, pred, pitch, dest, stride);
}
void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq,
uint8_t *pred, uint8_t *dest, int pitch,
int stride, int eob) {
- int16_t output[64];
-
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64);
// If dc is 1, then input[0] is the reconstructed value, do not need
// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
@@ -182,7 +217,7 @@
vp9_short_idct1_8x8_c(&in, &out);
input[0] = 0;
- add_constant_residual(out, pred, pitch, dest, stride, 8, 8);
+ vp9_add_constant_residual_8x8(out, pred, pitch, dest, stride);
} else if (eob <= 10) {
input[1] *= dq[1];
input[2] *= dq[1];
@@ -201,7 +236,7 @@
input[16] = input[17] = 0;
input[24] = 0;
- add_residual(output, pred, pitch, dest, stride, 8, 8);
+ vp9_add_residual_8x8(output, pred, pitch, dest, stride);
} else {
int i;
@@ -212,7 +247,7 @@
// the idct halves ( >> 1) the pitch
vp9_short_idct8x8_c(input, output, 8 << 1);
vpx_memset(input, 0, 128);
- add_residual(output, pred, pitch, dest, stride, 8, 8);
+ vp9_add_residual_8x8(output, pred, pitch, dest, stride);
}
}
@@ -220,7 +255,7 @@
const int16_t *dq, uint8_t *pred,
uint8_t *dest, int pitch, int stride,
int eob) {
- int16_t output[256];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256);
if (eob == 0) {
// All 0 DCT coefficients
@@ -242,14 +277,14 @@
vpx_memset(input, 0, 512);
- add_residual(output, pred, pitch, dest, stride, 16, 16);
+ vp9_add_residual_16x16(output, pred, pitch, dest, stride);
}
}
void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
uint8_t *pred, uint8_t *dest, int pitch,
int stride, int eob) {
- int16_t output[256];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256);
/* The calculation can be simplified if there are not many non-zero dct
* coefficients. Use eobs to separate different cases. */
@@ -265,7 +300,7 @@
vp9_short_idct1_16x16_c(&in, &out);
input[0] = 0;
- add_constant_residual(out, pred, pitch, dest, stride, 16, 16);
+ vp9_add_constant_residual_16x16(out, pred, pitch, dest, stride);
} else if (eob <= 10) {
input[0] *= dq[0];
@@ -287,7 +322,7 @@
input[32] = input[33] = 0;
input[48] = 0;
- add_residual(output, pred, pitch, dest, stride, 16, 16);
+ vp9_add_residual_16x16(output, pred, pitch, dest, stride);
} else {
int i;
@@ -302,20 +337,20 @@
vpx_memset(input, 0, 512);
- add_residual(output, pred, pitch, dest, stride, 16, 16);
+ vp9_add_residual_16x16(output, pred, pitch, dest, stride);
}
}
void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq,
uint8_t *pred, uint8_t *dest, int pitch,
int stride, int eob) {
- int16_t output[1024];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 1024);
if (eob) {
input[0] = input[0] * dq[0] / 2;
if (eob == 1) {
vp9_short_idct1_32x32(input, output);
- add_constant_residual(output[0], pred, pitch, dest, stride, 32, 32);
+ vp9_add_constant_residual_32x32(output[0], pred, pitch, dest, stride);
input[0] = 0;
} else if (eob <= 10) {
input[1] = input[1] * dq[1] / 2;
@@ -336,14 +371,14 @@
input[64] = input[65] = 0;
input[96] = 0;
- add_residual(output, pred, pitch, dest, stride, 32, 32);
+ vp9_add_residual_32x32(output, pred, pitch, dest, stride);
} else {
int i;
for (i = 1; i < 1024; i++)
input[i] = input[i] * dq[1] / 2;
vp9_short_idct32x32(input, output, 64);
vpx_memset(input, 0, 2048);
- add_residual(output, pred, pitch, dest, stride, 32, 32);
+ vp9_add_residual_32x32(output, pred, pitch, dest, stride);
}
}
}
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index a192266..0a584d7 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -69,13 +69,24 @@
pt = vp9_get_coef_context(&recent_energy, token); \
} while (0)
+#if CONFIG_CODE_NONZEROCOUNT
#define WRITE_COEF_CONTINUE(val, token) \
{ \
- qcoeff_ptr[scan[c]] = (int16_t) get_signed(br, val); \
+ qcoeff_ptr[scan[c]] = (int16_t) get_signed(br, val); \
+ INCREMENT_COUNT(token); \
+ c++; \
+ nzc++; \
+ continue; \
+ }
+#else
+#define WRITE_COEF_CONTINUE(val, token) \
+ { \
+ qcoeff_ptr[scan[c]] = (int16_t) get_signed(br, val); \
INCREMENT_COUNT(token); \
c++; \
continue; \
}
+#endif // CONFIG_CODE_NONZEROCOUNT
#define ADJUST_COEF(prob, bits_count) \
do { \
@@ -85,9 +96,8 @@
static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
BOOL_DECODER* const br, int block_idx,
- PLANE_TYPE type, TX_TYPE tx_type,
- int seg_eob, int16_t *qcoeff_ptr,
- const int *const scan, TX_SIZE txfm_size) {
+ PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr,
+ TX_SIZE txfm_size) {
ENTROPY_CONTEXT* const A0 = (ENTROPY_CONTEXT *) xd->above_context;
ENTROPY_CONTEXT* const L0 = (ENTROPY_CONTEXT *) xd->left_context;
int aidx, lidx;
@@ -99,6 +109,11 @@
vp9_prob *prob;
vp9_coeff_count *coef_counts;
const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
+#if CONFIG_CODE_NONZEROCOUNT
+ uint16_t nzc = 0;
+ uint16_t nzc_expected = xd->mode_info_context->mbmi.nzcs[block_idx];
+#endif
+ const int *scan;
if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
aidx = vp9_block2above_sb64[txfm_size][block_idx];
@@ -113,19 +128,34 @@
switch (txfm_size) {
default:
- case TX_4X4:
+ case TX_4X4: {
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, block_idx);
+ switch (tx_type) {
+ default:
+ scan = vp9_default_zig_zag1d_4x4;
+ break;
+ case ADST_DCT:
+ scan = vp9_row_scan_4x4;
+ break;
+ case DCT_ADST:
+ scan = vp9_col_scan_4x4;
+ break;
+ }
above_ec = A0[aidx] != 0;
left_ec = L0[lidx] != 0;
coef_probs = fc->coef_probs_4x4;
coef_counts = fc->coef_counts_4x4;
break;
+ }
case TX_8X8:
+ scan = vp9_default_zig_zag1d_8x8;
coef_probs = fc->coef_probs_8x8;
coef_counts = fc->coef_counts_8x8;
above_ec = (A0[aidx] + A0[aidx + 1]) != 0;
left_ec = (L0[lidx] + L0[lidx + 1]) != 0;
break;
case TX_16X16:
+ scan = vp9_default_zig_zag1d_16x16;
coef_probs = fc->coef_probs_16x16;
coef_counts = fc->coef_counts_16x16;
if (type == PLANE_TYPE_UV) {
@@ -139,6 +169,7 @@
}
break;
case TX_32X32:
+ scan = vp9_default_zig_zag1d_32x32;
coef_probs = fc->coef_probs_32x32;
coef_counts = fc->coef_counts_32x32;
if (type == PLANE_TYPE_UV) {
@@ -170,12 +201,24 @@
if (c >= seg_eob)
break;
+#if CONFIG_CODE_NONZEROCOUNT
+ if (nzc == nzc_expected)
+ break;
+#endif
prob = coef_probs[type][ref][get_coef_band(txfm_size, c)][pt];
+#if CONFIG_CODE_NONZEROCOUNT == 0
if (!vp9_read(br, prob[EOB_CONTEXT_NODE]))
break;
+#endif
SKIP_START:
if (c >= seg_eob)
break;
+#if CONFIG_CODE_NONZEROCOUNT
+ if (nzc == nzc_expected)
+ break;
+ // decode zero node only if there are zeros left
+ if (seg_eob - nzc_expected - c + nzc > 0)
+#endif
if (!vp9_read(br, prob[ZERO_CONTEXT_NODE])) {
INCREMENT_COUNT(ZERO_TOKEN);
++c;
@@ -242,8 +285,10 @@
WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY6);
}
+#if CONFIG_CODE_NONZEROCOUNT == 0
if (c < seg_eob)
coef_counts[type][ref][get_coef_band(txfm_size, c)][pt][DCT_EOB_TOKEN]++;
+#endif
A0[aidx] = L0[lidx] = c > 0;
if (txfm_size >= TX_8X8) {
@@ -272,7 +317,6 @@
}
}
}
-
return c;
}
@@ -290,17 +334,15 @@
case TX_32X32:
// Luma block
c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC,
- DCT_DCT, get_eob(xd, segment_id, 1024),
- xd->qcoeff, vp9_default_zig_zag1d_32x32, TX_32X32);
+ get_eob(xd, segment_id, 1024), xd->qcoeff, TX_32X32);
xd->eobs[0] = c;
eobtotal += c;
// 16x16 chroma blocks
seg_eob = get_eob(xd, segment_id, 256);
for (i = 64; i < 96; i += 16) {
- c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
- xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_16x16, TX_16X16);
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
+ xd->qcoeff + i * 16, TX_16X16);
xd->eobs[i] = c;
eobtotal += c;
}
@@ -310,17 +352,15 @@
seg_eob = get_eob(xd, segment_id, 256);
for (i = 0; i < 64; i += 16) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
- DCT_DCT, seg_eob, xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_16x16, TX_16X16);
+ seg_eob, xd->qcoeff + i * 16, TX_16X16);
xd->eobs[i] = c;
eobtotal += c;
}
// 16x16 chroma blocks
for (i = 64; i < 96; i += 16) {
- c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
- xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_16x16, TX_16X16);
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
+ xd->qcoeff + i * 16, TX_16X16);
xd->eobs[i] = c;
eobtotal += c;
}
@@ -330,17 +370,15 @@
seg_eob = get_eob(xd, segment_id, 64);
for (i = 0; i < 64; i += 4) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
- DCT_DCT, seg_eob, xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_8x8, TX_8X8);
+ seg_eob, xd->qcoeff + i * 16, TX_8X8);
xd->eobs[i] = c;
eobtotal += c;
}
// 8x8 chroma blocks
for (i = 64; i < 96; i += 4) {
- c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
- xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_8x8, TX_8X8);
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
+ xd->qcoeff + i * 16, TX_8X8);
xd->eobs[i] = c;
eobtotal += c;
}
@@ -350,17 +388,15 @@
seg_eob = get_eob(xd, segment_id, 16);
for (i = 0; i < 64; i++) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
- DCT_DCT, seg_eob, xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_4x4, TX_4X4);
+ seg_eob, xd->qcoeff + i * 16, TX_4X4);
xd->eobs[i] = c;
eobtotal += c;
}
// 4x4 chroma blocks
for (i = 64; i < 96; i++) {
- c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
- xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_4x4, TX_4X4);
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
+ xd->qcoeff + i * 16, TX_4X4);
xd->eobs[i] = c;
eobtotal += c;
}
@@ -383,17 +419,15 @@
seg_eob = get_eob(xd, segment_id, 1024);
for (i = 0; i < 256; i += 64) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
- DCT_DCT, seg_eob, xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_32x32, TX_32X32);
+ seg_eob, xd->qcoeff + i * 16, TX_32X32);
xd->eobs[i] = c;
eobtotal += c;
}
// 32x32 chroma blocks
for (i = 256; i < 384; i += 64) {
- c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
- xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_32x32, TX_32X32);
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
+ xd->qcoeff + i * 16, TX_32X32);
xd->eobs[i] = c;
eobtotal += c;
}
@@ -403,17 +437,15 @@
seg_eob = get_eob(xd, segment_id, 256);
for (i = 0; i < 256; i += 16) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
- DCT_DCT, seg_eob, xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_16x16, TX_16X16);
+ seg_eob, xd->qcoeff + i * 16, TX_16X16);
xd->eobs[i] = c;
eobtotal += c;
}
// 16x16 chroma blocks
for (i = 256; i < 384; i += 16) {
- c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
- xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_16x16, TX_16X16);
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
+ xd->qcoeff + i * 16, TX_16X16);
xd->eobs[i] = c;
eobtotal += c;
}
@@ -423,17 +455,15 @@
seg_eob = get_eob(xd, segment_id, 64);
for (i = 0; i < 256; i += 4) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
- DCT_DCT, seg_eob, xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_8x8, TX_8X8);
+ seg_eob, xd->qcoeff + i * 16, TX_8X8);
xd->eobs[i] = c;
eobtotal += c;
}
// 8x8 chroma blocks
for (i = 256; i < 384; i += 4) {
- c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
- xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_8x8, TX_8X8);
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
+ xd->qcoeff + i * 16, TX_8X8);
xd->eobs[i] = c;
eobtotal += c;
}
@@ -443,17 +473,15 @@
seg_eob = get_eob(xd, segment_id, 16);
for (i = 0; i < 256; i++) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
- DCT_DCT, seg_eob, xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_4x4, TX_4X4);
+ seg_eob, xd->qcoeff + i * 16, TX_4X4);
xd->eobs[i] = c;
eobtotal += c;
}
// 4x4 chroma blocks
for (i = 256; i < 384; i++) {
- c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
- xd->qcoeff + i * 16,
- vp9_default_zig_zag1d_4x4, TX_4X4);
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
+ xd->qcoeff + i * 16, TX_4X4);
xd->eobs[i] = c;
eobtotal += c;
}
@@ -472,9 +500,7 @@
// Luma block
int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC,
- get_tx_type(xd, &xd->block[0]),
- get_eob(xd, segment_id, 256),
- xd->qcoeff, vp9_default_zig_zag1d_16x16, TX_16X16);
+ get_eob(xd, segment_id, 256), xd->qcoeff, TX_16X16);
xd->eobs[0] = c;
eobtotal += c;
@@ -482,8 +508,7 @@
seg_eob = get_eob(xd, segment_id, 64);
for (i = 16; i < 24; i += 4) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
- DCT_DCT, seg_eob, xd->block[i].qcoeff,
- vp9_default_zig_zag1d_8x8, TX_8X8);
+ seg_eob, xd->block[i].qcoeff, TX_8X8);
xd->eobs[i] = c;
eobtotal += c;
}
@@ -500,9 +525,7 @@
seg_eob = get_eob(xd, segment_id, 64);
for (i = 0; i < 16; i += 4) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
- get_tx_type(xd, xd->block + i),
- seg_eob, xd->block[i].qcoeff,
- vp9_default_zig_zag1d_8x8, TX_8X8);
+ seg_eob, xd->block[i].qcoeff, TX_8X8);
xd->eobs[i] = c;
eobtotal += c;
}
@@ -514,16 +537,14 @@
seg_eob = get_eob(xd, segment_id, 16);
for (i = 16; i < 24; i++) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
- DCT_DCT, seg_eob, xd->block[i].qcoeff,
- vp9_default_zig_zag1d_4x4, TX_4X4);
+ seg_eob, xd->block[i].qcoeff, TX_4X4);
xd->eobs[i] = c;
eobtotal += c;
}
} else {
for (i = 16; i < 24; i += 4) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
- DCT_DCT, seg_eob, xd->block[i].qcoeff,
- vp9_default_zig_zag1d_8x8, TX_8X8);
+ seg_eob, xd->block[i].qcoeff, TX_8X8);
xd->eobs[i] = c;
eobtotal += c;
}
@@ -534,43 +555,20 @@
static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd,
BOOL_DECODER* const bc,
- PLANE_TYPE type, int i, int seg_eob,
- TX_TYPE tx_type, const int *scan) {
- int c = decode_coefs(dx, xd, bc, i, type, tx_type, seg_eob,
- xd->block[i].qcoeff, scan, TX_4X4);
+ PLANE_TYPE type, int i, int seg_eob) {
+ int c = decode_coefs(dx, xd, bc, i, type, seg_eob,
+ xd->block[i].qcoeff, TX_4X4);
xd->eobs[i] = c;
return c;
}
-static int decode_coefs_4x4_y(VP9D_COMP *dx, MACROBLOCKD *xd,
- BOOL_DECODER* const bc,
- PLANE_TYPE type, int i, int seg_eob) {
- const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
- get_tx_type(xd, &xd->block[i]) : DCT_DCT;
- const int *scan;
-
- switch (tx_type) {
- case ADST_DCT:
- scan = vp9_row_scan_4x4;
- break;
- case DCT_ADST:
- scan = vp9_col_scan_4x4;
- break;
- default:
- scan = vp9_default_zig_zag1d_4x4;
- break;
- }
-
- return decode_coefs_4x4(dx, xd, bc, type, i, seg_eob, tx_type, scan);
-}
-
int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd,
BOOL_DECODER* const bc,
PLANE_TYPE type, int i) {
const int segment_id = xd->mode_info_context->mbmi.segment_id;
const int seg_eob = get_eob(xd, segment_id, 16);
- return decode_coefs_4x4_y(dx, xd, bc, type, i, seg_eob);
+ return decode_coefs_4x4(dx, xd, bc, type, i, seg_eob);
}
static int decode_mb_tokens_4x4_uv(VP9D_COMP* const dx,
@@ -581,8 +579,7 @@
// chroma blocks
for (i = 16; i < 24; i++) {
- eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_UV, i, seg_eob,
- DCT_DCT, vp9_default_zig_zag1d_4x4);
+ eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_UV, i, seg_eob);
}
return eobtotal;
@@ -606,8 +603,7 @@
// luma blocks
for (i = 0; i < 16; ++i) {
- eobtotal += decode_coefs_4x4_y(dx, xd, bc,
- PLANE_TYPE_Y_WITH_DC, i, seg_eob);
+ eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_Y_WITH_DC, i, seg_eob);
}
// chroma blocks
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index ce7958c..87d1d3b 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -9,6 +9,9 @@
*/
+#include <stdio.h>
+#include <assert.h>
+
#include "vp9/common/vp9_onyxc_int.h"
#if CONFIG_POSTPROC
#include "vp9/common/vp9_postproc.h"
@@ -19,8 +22,6 @@
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_swapyv12buffer.h"
-#include <stdio.h>
-#include <assert.h>
#include "vp9/common/vp9_quant_common.h"
#include "vpx_scale/vpx_scale.h"
@@ -99,7 +100,7 @@
}
#endif
-void vp9_initialize_dec(void) {
+void vp9_initialize_dec() {
static int init_done = 0;
if (!init_done) {
diff --git a/vp9/decoder/vp9_treereader.h b/vp9/decoder/vp9_treereader.h
index cca017d..305dfe5 100644
--- a/vp9/decoder/vp9_treereader.h
+++ b/vp9/decoder/vp9_treereader.h
@@ -13,7 +13,6 @@
#define VP9_DECODER_VP9_TREEREADER_H_
#include "vp9/common/vp9_treecoder.h"
-
#include "vp9/decoder/vp9_dboolhuff.h"
typedef BOOL_DECODER vp9_reader;
diff --git a/vp9/decoder/x86/vp9_dequantize_x86.c b/vp9/decoder/x86/vp9_dequantize_x86.c
new file mode 100644
index 0000000..acfae2a
--- /dev/null
+++ b/vp9/decoder/x86/vp9_dequantize_x86.c
@@ -0,0 +1,455 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <emmintrin.h> // SSE2
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_idct.h"
+
+#if HAVE_SSE2
+
+void vp9_add_residual_4x4_sse2(const int16_t *diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ const int width = 4;
+ const __m128i zero = _mm_setzero_si128();
+
+ // Diff data
+ const __m128i d0 = _mm_loadl_epi64((const __m128i *)(diff + 0 * width));
+ const __m128i d1 = _mm_loadl_epi64((const __m128i *)(diff + 1 * width));
+ const __m128i d2 = _mm_loadl_epi64((const __m128i *)(diff + 2 * width));
+ const __m128i d3 = _mm_loadl_epi64((const __m128i *)(diff + 3 * width));
+
+ // Prediction data.
+ __m128i p0 = _mm_cvtsi32_si128(*(const int *)(pred + 0 * pitch));
+ __m128i p1 = _mm_cvtsi32_si128(*(const int *)(pred + 1 * pitch));
+ __m128i p2 = _mm_cvtsi32_si128(*(const int *)(pred + 2 * pitch));
+ __m128i p3 = _mm_cvtsi32_si128(*(const int *)(pred + 3 * pitch));
+
+ p0 = _mm_unpacklo_epi8(p0, zero);
+ p1 = _mm_unpacklo_epi8(p1, zero);
+ p2 = _mm_unpacklo_epi8(p2, zero);
+ p3 = _mm_unpacklo_epi8(p3, zero);
+
+ p0 = _mm_add_epi16(p0, d0);
+ p1 = _mm_add_epi16(p1, d1);
+ p2 = _mm_add_epi16(p2, d2);
+ p3 = _mm_add_epi16(p3, d3);
+
+ p0 = _mm_packus_epi16(p0, p1);
+ p2 = _mm_packus_epi16(p2, p3);
+
+ *(int *)dest = _mm_cvtsi128_si32(p0);
+ dest += stride;
+
+ p0 = _mm_srli_si128(p0, 8);
+ *(int *)dest = _mm_cvtsi128_si32(p0);
+ dest += stride;
+
+ *(int *)dest = _mm_cvtsi128_si32(p2);
+ dest += stride;
+
+ p2 = _mm_srli_si128(p2, 8);
+ *(int *)dest = _mm_cvtsi128_si32(p2);
+}
+
+void vp9_add_residual_8x8_sse2(const int16_t *diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ const int width = 8;
+ const __m128i zero = _mm_setzero_si128();
+
+ // Diff data
+ const __m128i d0 = _mm_load_si128((const __m128i *)(diff + 0 * width));
+ const __m128i d1 = _mm_load_si128((const __m128i *)(diff + 1 * width));
+ const __m128i d2 = _mm_load_si128((const __m128i *)(diff + 2 * width));
+ const __m128i d3 = _mm_load_si128((const __m128i *)(diff + 3 * width));
+ const __m128i d4 = _mm_load_si128((const __m128i *)(diff + 4 * width));
+ const __m128i d5 = _mm_load_si128((const __m128i *)(diff + 5 * width));
+ const __m128i d6 = _mm_load_si128((const __m128i *)(diff + 6 * width));
+ const __m128i d7 = _mm_load_si128((const __m128i *)(diff + 7 * width));
+
+ // Prediction data.
+ __m128i p0 = _mm_loadl_epi64((const __m128i *)(pred + 0 * pitch));
+ __m128i p1 = _mm_loadl_epi64((const __m128i *)(pred + 1 * pitch));
+ __m128i p2 = _mm_loadl_epi64((const __m128i *)(pred + 2 * pitch));
+ __m128i p3 = _mm_loadl_epi64((const __m128i *)(pred + 3 * pitch));
+ __m128i p4 = _mm_loadl_epi64((const __m128i *)(pred + 4 * pitch));
+ __m128i p5 = _mm_loadl_epi64((const __m128i *)(pred + 5 * pitch));
+ __m128i p6 = _mm_loadl_epi64((const __m128i *)(pred + 6 * pitch));
+ __m128i p7 = _mm_loadl_epi64((const __m128i *)(pred + 7 * pitch));
+
+ p0 = _mm_unpacklo_epi8(p0, zero);
+ p1 = _mm_unpacklo_epi8(p1, zero);
+ p2 = _mm_unpacklo_epi8(p2, zero);
+ p3 = _mm_unpacklo_epi8(p3, zero);
+ p4 = _mm_unpacklo_epi8(p4, zero);
+ p5 = _mm_unpacklo_epi8(p5, zero);
+ p6 = _mm_unpacklo_epi8(p6, zero);
+ p7 = _mm_unpacklo_epi8(p7, zero);
+
+ p0 = _mm_add_epi16(p0, d0);
+ p1 = _mm_add_epi16(p1, d1);
+ p2 = _mm_add_epi16(p2, d2);
+ p3 = _mm_add_epi16(p3, d3);
+ p4 = _mm_add_epi16(p4, d4);
+ p5 = _mm_add_epi16(p5, d5);
+ p6 = _mm_add_epi16(p6, d6);
+ p7 = _mm_add_epi16(p7, d7);
+
+ p0 = _mm_packus_epi16(p0, p1);
+ p2 = _mm_packus_epi16(p2, p3);
+ p4 = _mm_packus_epi16(p4, p5);
+ p6 = _mm_packus_epi16(p6, p7);
+
+ _mm_storel_epi64((__m128i *)(dest + 0 * stride), p0);
+ p0 = _mm_srli_si128(p0, 8);
+ _mm_storel_epi64((__m128i *)(dest + 1 * stride), p0);
+
+ _mm_storel_epi64((__m128i *)(dest + 2 * stride), p2);
+ p2 = _mm_srli_si128(p2, 8);
+ _mm_storel_epi64((__m128i *)(dest + 3 * stride), p2);
+
+ _mm_storel_epi64((__m128i *)(dest + 4 * stride), p4);
+ p4 = _mm_srli_si128(p4, 8);
+ _mm_storel_epi64((__m128i *)(dest + 5 * stride), p4);
+
+ _mm_storel_epi64((__m128i *)(dest + 6 * stride), p6);
+ p6 = _mm_srli_si128(p6, 8);
+ _mm_storel_epi64((__m128i *)(dest + 7 * stride), p6);
+}
+
+void vp9_add_residual_16x16_sse2(const int16_t *diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ const int width = 16;
+ int i = 4;
+ const __m128i zero = _mm_setzero_si128();
+
+ // Diff data
+ __m128i d0, d1, d2, d3, d4, d5, d6, d7;
+ __m128i p0, p1, p2, p3, p4, p5, p6, p7;
+
+ do {
+ d0 = _mm_load_si128((const __m128i *)(diff + 0 * width));
+ d1 = _mm_load_si128((const __m128i *)(diff + 0 * width + 8));
+ d2 = _mm_load_si128((const __m128i *)(diff + 1 * width));
+ d3 = _mm_load_si128((const __m128i *)(diff + 1 * width + 8));
+ d4 = _mm_load_si128((const __m128i *)(diff + 2 * width));
+ d5 = _mm_load_si128((const __m128i *)(diff + 2 * width + 8));
+ d6 = _mm_load_si128((const __m128i *)(diff + 3 * width));
+ d7 = _mm_load_si128((const __m128i *)(diff + 3 * width + 8));
+
+ // Prediction data.
+ p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch));
+ p3 = _mm_load_si128((const __m128i *)(pred + 1 * pitch));
+ p5 = _mm_load_si128((const __m128i *)(pred + 2 * pitch));
+ p7 = _mm_load_si128((const __m128i *)(pred + 3 * pitch));
+
+ p0 = _mm_unpacklo_epi8(p1, zero);
+ p1 = _mm_unpackhi_epi8(p1, zero);
+ p2 = _mm_unpacklo_epi8(p3, zero);
+ p3 = _mm_unpackhi_epi8(p3, zero);
+ p4 = _mm_unpacklo_epi8(p5, zero);
+ p5 = _mm_unpackhi_epi8(p5, zero);
+ p6 = _mm_unpacklo_epi8(p7, zero);
+ p7 = _mm_unpackhi_epi8(p7, zero);
+
+ p0 = _mm_add_epi16(p0, d0);
+ p1 = _mm_add_epi16(p1, d1);
+ p2 = _mm_add_epi16(p2, d2);
+ p3 = _mm_add_epi16(p3, d3);
+ p4 = _mm_add_epi16(p4, d4);
+ p5 = _mm_add_epi16(p5, d5);
+ p6 = _mm_add_epi16(p6, d6);
+ p7 = _mm_add_epi16(p7, d7);
+
+ p0 = _mm_packus_epi16(p0, p1);
+ p1 = _mm_packus_epi16(p2, p3);
+ p2 = _mm_packus_epi16(p4, p5);
+ p3 = _mm_packus_epi16(p6, p7);
+
+ _mm_store_si128((__m128i *)(dest + 0 * stride), p0);
+ _mm_store_si128((__m128i *)(dest + 1 * stride), p1);
+ _mm_store_si128((__m128i *)(dest + 2 * stride), p2);
+ _mm_store_si128((__m128i *)(dest + 3 * stride), p3);
+
+ diff += 4 * width;
+ pred += 4 * pitch;
+ dest += 4 * stride;
+ } while (--i);
+}
+
+void vp9_add_residual_32x32_sse2(const int16_t *diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ const int width = 32;
+ int i = 16;
+ const __m128i zero = _mm_setzero_si128();
+
+ // Diff data
+ __m128i d0, d1, d2, d3, d4, d5, d6, d7;
+ __m128i p0, p1, p2, p3, p4, p5, p6, p7;
+
+ do {
+ d0 = _mm_load_si128((const __m128i *)(diff + 0 * width));
+ d1 = _mm_load_si128((const __m128i *)(diff + 0 * width + 8));
+ d2 = _mm_load_si128((const __m128i *)(diff + 0 * width + 16));
+ d3 = _mm_load_si128((const __m128i *)(diff + 0 * width + 24));
+ d4 = _mm_load_si128((const __m128i *)(diff + 1 * width));
+ d5 = _mm_load_si128((const __m128i *)(diff + 1 * width + 8));
+ d6 = _mm_load_si128((const __m128i *)(diff + 1 * width + 16));
+ d7 = _mm_load_si128((const __m128i *)(diff + 1 * width + 24));
+
+ // Prediction data.
+ p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch));
+ p3 = _mm_load_si128((const __m128i *)(pred + 0 * pitch + 16));
+ p5 = _mm_load_si128((const __m128i *)(pred + 1 * pitch));
+ p7 = _mm_load_si128((const __m128i *)(pred + 1 * pitch + 16));
+
+ p0 = _mm_unpacklo_epi8(p1, zero);
+ p1 = _mm_unpackhi_epi8(p1, zero);
+ p2 = _mm_unpacklo_epi8(p3, zero);
+ p3 = _mm_unpackhi_epi8(p3, zero);
+ p4 = _mm_unpacklo_epi8(p5, zero);
+ p5 = _mm_unpackhi_epi8(p5, zero);
+ p6 = _mm_unpacklo_epi8(p7, zero);
+ p7 = _mm_unpackhi_epi8(p7, zero);
+
+ p0 = _mm_add_epi16(p0, d0);
+ p1 = _mm_add_epi16(p1, d1);
+ p2 = _mm_add_epi16(p2, d2);
+ p3 = _mm_add_epi16(p3, d3);
+ p4 = _mm_add_epi16(p4, d4);
+ p5 = _mm_add_epi16(p5, d5);
+ p6 = _mm_add_epi16(p6, d6);
+ p7 = _mm_add_epi16(p7, d7);
+
+ p0 = _mm_packus_epi16(p0, p1);
+ p1 = _mm_packus_epi16(p2, p3);
+ p2 = _mm_packus_epi16(p4, p5);
+ p3 = _mm_packus_epi16(p6, p7);
+
+ _mm_store_si128((__m128i *)(dest + 0 * stride), p0);
+ _mm_store_si128((__m128i *)(dest + 0 * stride + 16), p1);
+ _mm_store_si128((__m128i *)(dest + 1 * stride), p2);
+ _mm_store_si128((__m128i *)(dest + 1 * stride + 16), p3);
+
+ diff += 2 * width;
+ pred += 2 * pitch;
+ dest += 2 * stride;
+ } while (--i);
+}
+
+void vp9_add_constant_residual_8x8_sse2(const int16_t diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ uint8_t abs_diff;
+ __m128i d;
+
+ // Prediction data.
+ __m128i p0 = _mm_loadl_epi64((const __m128i *)(pred + 0 * pitch));
+ __m128i p1 = _mm_loadl_epi64((const __m128i *)(pred + 1 * pitch));
+ __m128i p2 = _mm_loadl_epi64((const __m128i *)(pred + 2 * pitch));
+ __m128i p3 = _mm_loadl_epi64((const __m128i *)(pred + 3 * pitch));
+ __m128i p4 = _mm_loadl_epi64((const __m128i *)(pred + 4 * pitch));
+ __m128i p5 = _mm_loadl_epi64((const __m128i *)(pred + 5 * pitch));
+ __m128i p6 = _mm_loadl_epi64((const __m128i *)(pred + 6 * pitch));
+ __m128i p7 = _mm_loadl_epi64((const __m128i *)(pred + 7 * pitch));
+
+ p0 = _mm_unpacklo_epi64(p0, p1);
+ p2 = _mm_unpacklo_epi64(p2, p3);
+ p4 = _mm_unpacklo_epi64(p4, p5);
+ p6 = _mm_unpacklo_epi64(p6, p7);
+
+ // Clip diff value to [0, 255] range. Then, do addition or subtraction
+ // according to its sign.
+ if (diff >= 0) {
+ abs_diff = (diff > 255) ? 255 : diff;
+ d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
+
+ p0 = _mm_adds_epu8(p0, d);
+ p2 = _mm_adds_epu8(p2, d);
+ p4 = _mm_adds_epu8(p4, d);
+ p6 = _mm_adds_epu8(p6, d);
+ } else {
+ abs_diff = (diff < -255) ? 255 : -diff;
+ d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
+
+ p0 = _mm_subs_epu8(p0, d);
+ p2 = _mm_subs_epu8(p2, d);
+ p4 = _mm_subs_epu8(p4, d);
+ p6 = _mm_subs_epu8(p6, d);
+ }
+
+ _mm_storel_epi64((__m128i *)(dest + 0 * stride), p0);
+ p0 = _mm_srli_si128(p0, 8);
+ _mm_storel_epi64((__m128i *)(dest + 1 * stride), p0);
+
+ _mm_storel_epi64((__m128i *)(dest + 2 * stride), p2);
+ p2 = _mm_srli_si128(p2, 8);
+ _mm_storel_epi64((__m128i *)(dest + 3 * stride), p2);
+
+ _mm_storel_epi64((__m128i *)(dest + 4 * stride), p4);
+ p4 = _mm_srli_si128(p4, 8);
+ _mm_storel_epi64((__m128i *)(dest + 5 * stride), p4);
+
+ _mm_storel_epi64((__m128i *)(dest + 6 * stride), p6);
+ p6 = _mm_srli_si128(p6, 8);
+ _mm_storel_epi64((__m128i *)(dest + 7 * stride), p6);
+}
+
+void vp9_add_constant_residual_16x16_sse2(const int16_t diff,
+ const uint8_t *pred, int pitch,
+ uint8_t *dest, int stride) {
+ uint8_t abs_diff;
+ __m128i d;
+
+ // Prediction data.
+ __m128i p0 = _mm_load_si128((const __m128i *)(pred + 0 * pitch));
+ __m128i p1 = _mm_load_si128((const __m128i *)(pred + 1 * pitch));
+ __m128i p2 = _mm_load_si128((const __m128i *)(pred + 2 * pitch));
+ __m128i p3 = _mm_load_si128((const __m128i *)(pred + 3 * pitch));
+ __m128i p4 = _mm_load_si128((const __m128i *)(pred + 4 * pitch));
+ __m128i p5 = _mm_load_si128((const __m128i *)(pred + 5 * pitch));
+ __m128i p6 = _mm_load_si128((const __m128i *)(pred + 6 * pitch));
+ __m128i p7 = _mm_load_si128((const __m128i *)(pred + 7 * pitch));
+ __m128i p8 = _mm_load_si128((const __m128i *)(pred + 8 * pitch));
+ __m128i p9 = _mm_load_si128((const __m128i *)(pred + 9 * pitch));
+ __m128i p10 = _mm_load_si128((const __m128i *)(pred + 10 * pitch));
+ __m128i p11 = _mm_load_si128((const __m128i *)(pred + 11 * pitch));
+ __m128i p12 = _mm_load_si128((const __m128i *)(pred + 12 * pitch));
+ __m128i p13 = _mm_load_si128((const __m128i *)(pred + 13 * pitch));
+ __m128i p14 = _mm_load_si128((const __m128i *)(pred + 14 * pitch));
+ __m128i p15 = _mm_load_si128((const __m128i *)(pred + 15 * pitch));
+
+ // Clip diff value to [0, 255] range. Then, do addition or subtraction
+ // according to its sign.
+ if (diff >= 0) {
+ abs_diff = (diff > 255) ? 255 : diff;
+ d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
+
+ p0 = _mm_adds_epu8(p0, d);
+ p1 = _mm_adds_epu8(p1, d);
+ p2 = _mm_adds_epu8(p2, d);
+ p3 = _mm_adds_epu8(p3, d);
+ p4 = _mm_adds_epu8(p4, d);
+ p5 = _mm_adds_epu8(p5, d);
+ p6 = _mm_adds_epu8(p6, d);
+ p7 = _mm_adds_epu8(p7, d);
+ p8 = _mm_adds_epu8(p8, d);
+ p9 = _mm_adds_epu8(p9, d);
+ p10 = _mm_adds_epu8(p10, d);
+ p11 = _mm_adds_epu8(p11, d);
+ p12 = _mm_adds_epu8(p12, d);
+ p13 = _mm_adds_epu8(p13, d);
+ p14 = _mm_adds_epu8(p14, d);
+ p15 = _mm_adds_epu8(p15, d);
+ } else {
+ abs_diff = (diff < -255) ? 255 : -diff;
+ d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
+
+ p0 = _mm_subs_epu8(p0, d);
+ p1 = _mm_subs_epu8(p1, d);
+ p2 = _mm_subs_epu8(p2, d);
+ p3 = _mm_subs_epu8(p3, d);
+ p4 = _mm_subs_epu8(p4, d);
+ p5 = _mm_subs_epu8(p5, d);
+ p6 = _mm_subs_epu8(p6, d);
+ p7 = _mm_subs_epu8(p7, d);
+ p8 = _mm_subs_epu8(p8, d);
+ p9 = _mm_subs_epu8(p9, d);
+ p10 = _mm_subs_epu8(p10, d);
+ p11 = _mm_subs_epu8(p11, d);
+ p12 = _mm_subs_epu8(p12, d);
+ p13 = _mm_subs_epu8(p13, d);
+ p14 = _mm_subs_epu8(p14, d);
+ p15 = _mm_subs_epu8(p15, d);
+ }
+
+ // Store results
+ _mm_store_si128((__m128i *)(dest + 0 * stride), p0);
+ _mm_store_si128((__m128i *)(dest + 1 * stride), p1);
+ _mm_store_si128((__m128i *)(dest + 2 * stride), p2);
+ _mm_store_si128((__m128i *)(dest + 3 * stride), p3);
+ _mm_store_si128((__m128i *)(dest + 4 * stride), p4);
+ _mm_store_si128((__m128i *)(dest + 5 * stride), p5);
+ _mm_store_si128((__m128i *)(dest + 6 * stride), p6);
+ _mm_store_si128((__m128i *)(dest + 7 * stride), p7);
+ _mm_store_si128((__m128i *)(dest + 8 * stride), p8);
+ _mm_store_si128((__m128i *)(dest + 9 * stride), p9);
+ _mm_store_si128((__m128i *)(dest + 10 * stride), p10);
+ _mm_store_si128((__m128i *)(dest + 11 * stride), p11);
+ _mm_store_si128((__m128i *)(dest + 12 * stride), p12);
+ _mm_store_si128((__m128i *)(dest + 13 * stride), p13);
+ _mm_store_si128((__m128i *)(dest + 14 * stride), p14);
+ _mm_store_si128((__m128i *)(dest + 15 * stride), p15);
+}
+
+void vp9_add_constant_residual_32x32_sse2(const int16_t diff,
+ const uint8_t *pred, int pitch,
+ uint8_t *dest, int stride) {
+ uint8_t abs_diff;
+ __m128i d;
+ int i = 8;
+
+ if (diff >= 0) {
+ abs_diff = (diff > 255) ? 255 : diff;
+ d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
+ } else {
+ abs_diff = (diff < -255) ? 255 : -diff;
+ d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
+ }
+
+ do {
+ // Prediction data.
+ __m128i p0 = _mm_load_si128((const __m128i *)(pred + 0 * pitch));
+ __m128i p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch + 16));
+ __m128i p2 = _mm_load_si128((const __m128i *)(pred + 1 * pitch));
+ __m128i p3 = _mm_load_si128((const __m128i *)(pred + 1 * pitch + 16));
+ __m128i p4 = _mm_load_si128((const __m128i *)(pred + 2 * pitch));
+ __m128i p5 = _mm_load_si128((const __m128i *)(pred + 2 * pitch + 16));
+ __m128i p6 = _mm_load_si128((const __m128i *)(pred + 3 * pitch));
+ __m128i p7 = _mm_load_si128((const __m128i *)(pred + 3 * pitch + 16));
+
+ // Clip diff value to [0, 255] range. Then, do addition or subtraction
+ // according to its sign.
+ if (diff >= 0) {
+ p0 = _mm_adds_epu8(p0, d);
+ p1 = _mm_adds_epu8(p1, d);
+ p2 = _mm_adds_epu8(p2, d);
+ p3 = _mm_adds_epu8(p3, d);
+ p4 = _mm_adds_epu8(p4, d);
+ p5 = _mm_adds_epu8(p5, d);
+ p6 = _mm_adds_epu8(p6, d);
+ p7 = _mm_adds_epu8(p7, d);
+ } else {
+ p0 = _mm_subs_epu8(p0, d);
+ p1 = _mm_subs_epu8(p1, d);
+ p2 = _mm_subs_epu8(p2, d);
+ p3 = _mm_subs_epu8(p3, d);
+ p4 = _mm_subs_epu8(p4, d);
+ p5 = _mm_subs_epu8(p5, d);
+ p6 = _mm_subs_epu8(p6, d);
+ p7 = _mm_subs_epu8(p7, d);
+ }
+
+ // Store results
+ _mm_store_si128((__m128i *)(dest + 0 * stride), p0);
+ _mm_store_si128((__m128i *)(dest + 0 * stride + 16), p1);
+ _mm_store_si128((__m128i *)(dest + 1 * stride), p2);
+ _mm_store_si128((__m128i *)(dest + 1 * stride + 16), p3);
+ _mm_store_si128((__m128i *)(dest + 2 * stride), p4);
+ _mm_store_si128((__m128i *)(dest + 2 * stride + 16), p5);
+ _mm_store_si128((__m128i *)(dest + 3 * stride), p6);
+ _mm_store_si128((__m128i *)(dest + 3 * stride + 16), p7);
+
+ pred += 4 * pitch;
+ dest += 4 * stride;
+ } while (--i);
+}
+#endif
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index fcbd3a1..558971d 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -50,6 +50,24 @@
extern unsigned int active_section;
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+#ifdef NZC_STATS
+unsigned int nzc_stats_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC4X4_TOKENS];
+unsigned int nzc_stats_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC8X8_TOKENS];
+unsigned int nzc_stats_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC16X16_TOKENS];
+unsigned int nzc_stats_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC32X32_TOKENS];
+unsigned int nzc_pcat_stats[MAX_NZC_CONTEXTS][NZC_TOKENS_EXTRA]
+ [NZC_BITS_EXTRA][2];
+void init_nzcstats();
+void update_nzcstats(VP9_COMMON *const cm);
+void print_nzcstats();
+#endif
+#endif
+
#ifdef MODE_STATS
int count_mb_seg[4] = { 0, 0, 0, 0 };
#endif
@@ -719,10 +737,9 @@
// Distance of Mb to the various image edges.
// These specified to 8th pel as they are always compared to MV
// values that are in 1/8th pel units
- xd->mb_to_left_edge = -((mb_col * 16) << 3);
- xd->mb_to_top_edge = -((mb_row * 16)) << 3;
- xd->mb_to_right_edge = ((pc->mb_cols - mb_size - mb_col) * 16) << 3;
- xd->mb_to_bottom_edge = ((pc->mb_rows - mb_size - mb_row) * 16) << 3;
+
+ set_mb_row(pc, xd, mb_row, mb_size);
+ set_mb_col(pc, xd, mb_col, mb_size);
#ifdef ENTROPY_STATS
active_section = 9;
@@ -751,18 +768,7 @@
} else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
skip_coeff = 1;
} else {
- const int nmbs = mb_size;
- const int xmbs = MIN(nmbs, mb_cols_left);
- const int ymbs = MIN(nmbs, mb_rows_left);
- int x, y;
-
- skip_coeff = 1;
- for (y = 0; y < ymbs; y++) {
- for (x = 0; x < xmbs; x++) {
- skip_coeff = skip_coeff && m[y * mis + x].mbmi.mb_skip_coeff;
- }
- }
-
+ skip_coeff = m->mbmi.mb_skip_coeff;
vp9_write(bc, skip_coeff,
vp9_get_pred_prob(pc, xd, PRED_MBSKIP));
}
@@ -966,7 +972,7 @@
}
static void write_mb_modes_kf(const VP9_COMP *cpi,
- const MODE_INFO *m,
+ MODE_INFO *m,
vp9_writer *bc,
int mb_rows_left, int mb_cols_left) {
const VP9_COMMON *const c = &cpi->common;
@@ -985,18 +991,7 @@
} else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
skip_coeff = 1;
} else {
- const int nmbs = 1 << m->mbmi.sb_type;
- const int xmbs = MIN(nmbs, mb_cols_left);
- const int ymbs = MIN(nmbs, mb_rows_left);
- int x, y;
-
- skip_coeff = 1;
- for (y = 0; y < ymbs; y++) {
- for (x = 0; x < xmbs; x++) {
- skip_coeff = skip_coeff && m[y * mis + x].mbmi.mb_skip_coeff;
- }
- }
-
+ skip_coeff = m->mbmi.mb_skip_coeff;
vp9_write(bc, skip_coeff,
vp9_get_pred_prob(c, xd, PRED_MBSKIP));
}
@@ -1054,30 +1049,585 @@
}
}
+#if CONFIG_CODE_NONZEROCOUNT
+static void write_nzc(VP9_COMMON *const cm,
+ uint16_t nzc,
+ int nzc_context,
+ TX_SIZE tx_size,
+ int ref,
+ int type,
+ vp9_writer* const bc) {
+ int c, e;
+ c = codenzc(nzc);
+ if (tx_size == TX_32X32) {
+ write_token(bc, vp9_nzc32x32_tree,
+ cm->fc.nzc_probs_32x32[nzc_context][ref][type],
+ vp9_nzc32x32_encodings + c);
+ // cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++;
+ } else if (tx_size == TX_16X16) {
+ write_token(bc, vp9_nzc16x16_tree,
+ cm->fc.nzc_probs_16x16[nzc_context][ref][type],
+ vp9_nzc16x16_encodings + c);
+ // cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++;
+ } else if (tx_size == TX_8X8) {
+ write_token(bc, vp9_nzc8x8_tree,
+ cm->fc.nzc_probs_8x8[nzc_context][ref][type],
+ vp9_nzc8x8_encodings + c);
+ // cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++;
+ } else if (tx_size == TX_4X4) {
+ write_token(bc, vp9_nzc4x4_tree,
+ cm->fc.nzc_probs_4x4[nzc_context][ref][type],
+ vp9_nzc4x4_encodings + c);
+ // cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++;
+ } else {
+ assert(0);
+ }
+
+ if ((e = vp9_extranzcbits[c])) {
+ int x = nzc - vp9_basenzcvalue[c];
+ while (e--) {
+ int b = (x >> e) & 1;
+ vp9_write(bc, b,
+ cm->fc.nzc_pcat_probs[nzc_context][c - NZC_TOKENS_NOEXTRA][e]);
+ // cm->fc.nzc_pcat_counts[nzc_context][c - NZC_TOKENS_NOEXTRA][e][b]++;
+ }
+ }
+}
+
+static void write_nzcs_sb64(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col,
+ vp9_writer* const bc) {
+ VP9_COMMON *const cm = &cpi->common;
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_32X32:
+ for (j = 0; j < 256; j += 64) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0, bc);
+ }
+ for (j = 256; j < 384; j += 64) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 1, bc);
+ }
+ break;
+
+ case TX_16X16:
+ for (j = 0; j < 256; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc);
+ }
+ for (j = 256; j < 384; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 256; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc);
+ }
+ for (j = 256; j < 384; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 256; ++j) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc);
+ }
+ for (j = 256; j < 384; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void write_nzcs_sb32(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col,
+ vp9_writer* const bc) {
+ VP9_COMMON *const cm = &cpi->common;
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_32X32:
+ for (j = 0; j < 64; j += 64) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0, bc);
+ }
+ for (j = 64; j < 96; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc);
+ }
+ break;
+
+ case TX_16X16:
+ for (j = 0; j < 64; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc);
+ }
+ for (j = 64; j < 96; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 64; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc);
+ }
+ for (j = 64; j < 96; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 64; ++j) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc);
+ }
+ for (j = 64; j < 96; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void write_nzcs_mb16(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col,
+ vp9_writer* const bc) {
+ VP9_COMMON *const cm = &cpi->common;
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_16X16:
+ for (j = 0; j < 16; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc);
+ }
+ for (j = 16; j < 24; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 16; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc);
+ }
+ if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) {
+ for (j = 16; j < 24; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
+ }
+ } else {
+ for (j = 16; j < 24; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
+ }
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 16; ++j) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc);
+ }
+ for (j = 16; j < 24; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+#ifdef NZC_STATS
+void init_nzcstats() {
+ vp9_zero(nzc_stats_4x4);
+ vp9_zero(nzc_stats_8x8);
+ vp9_zero(nzc_stats_16x16);
+ vp9_zero(nzc_stats_32x32);
+ vp9_zero(nzc_pcat_stats);
+}
+
+void update_nzcstats(VP9_COMMON *const cm) {
+ int c, r, b, t;
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ for (t = 0; t < NZC4X4_TOKENS; ++t) {
+ nzc_stats_4x4[c][r][b][t] += cm->fc.nzc_counts_4x4[c][r][b][t];
+ }
+ }
+ }
+ }
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ for (t = 0; t < NZC8X8_TOKENS; ++t) {
+ nzc_stats_8x8[c][r][b][t] += cm->fc.nzc_counts_8x8[c][r][b][t];
+ }
+ }
+ }
+ }
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ for (t = 0; t < NZC16X16_TOKENS; ++t) {
+ nzc_stats_16x16[c][r][b][t] += cm->fc.nzc_counts_16x16[c][r][b][t];
+ }
+ }
+ }
+ }
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ for (t = 0; t < NZC32X32_TOKENS; ++t) {
+ nzc_stats_32x32[c][r][b][t] += cm->fc.nzc_counts_32x32[c][r][b][t];
+ }
+ }
+ }
+ }
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA];
+ for (b = 0; b < bits; ++b) {
+ nzc_pcat_stats[c][t][b][0] += cm->fc.nzc_pcat_counts[c][t][b][0];
+ nzc_pcat_stats[c][t][b][1] += cm->fc.nzc_pcat_counts[c][t][b][1];
+ }
+ }
+ }
+}
+
+void print_nzcstats() {
+ int c, r, b, t;
+ printf(
+ "static const unsigned int default_nzc_counts_4x4[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC4X4_TOKENS] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ printf(" {");
+ for (t = 0; t < NZC4X4_TOKENS; ++t) {
+ printf(" %-3d,", nzc_stats_4x4[c][r][b][t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const unsigned int default_nzc_counts_8x8[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC8X8_TOKENS] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ printf(" {");
+ for (t = 0; t < NZC8X8_TOKENS; ++t) {
+ printf(" %-3d,", nzc_stats_8x8[c][r][b][t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const unsigned int default_nzc_counts_16x16[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC16X16_TOKENS] = {"
+ "\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ printf(" {");
+ for (t = 0; t < NZC16X16_TOKENS; ++t) {
+ printf(" %-3d,", nzc_stats_16x16[c][r][b][t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const unsigned int default_nzc_counts_32x32[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC32X32_TOKENS] = {"
+ "\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ printf(" {");
+ for (t = 0; t < NZC32X32_TOKENS; ++t) {
+ printf(" %-3d,", nzc_stats_32x32[c][r][b][t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const vp9_prob default_nzc_pcat_counts[MAX_NZC_CONTEXTS]\n"
+ " [NZC_TOKENS_EXTRA]\n"
+ " [NZC_BITS_EXTRA] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ printf(" {");
+ for (b = 0; b < NZC_BITS_EXTRA; ++b) {
+ printf(" %d/%d,",
+ nzc_pcat_stats[c][t][b][0], nzc_pcat_stats[c][t][b][1]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const vp9_prob default_nzc_probs_4x4[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC4X4_TOKENS] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ vp9_prob probs[NZC4X4_NODES];
+ unsigned int branch_ct[NZC4X4_NODES][2];
+ vp9_tree_probs_from_distribution(NZC4X4_TOKENS,
+ vp9_nzc4x4_encodings,
+ vp9_nzc4x4_tree,
+ probs, branch_ct,
+ nzc_stats_4x4[c][r][b]);
+ printf(" {");
+ for (t = 0; t < NZC4X4_NODES; ++t) {
+ printf(" %-3d,", probs[t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const vp9_prob default_nzc_probs_8x8[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC8X8_TOKENS] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ vp9_prob probs[NZC8X8_NODES];
+ unsigned int branch_ct[NZC8X8_NODES][2];
+ vp9_tree_probs_from_distribution(NZC8X8_TOKENS,
+ vp9_nzc8x8_encodings,
+ vp9_nzc8x8_tree,
+ probs, branch_ct,
+ nzc_stats_8x8[c][r][b]);
+ printf(" {");
+ for (t = 0; t < NZC8X8_NODES; ++t) {
+ printf(" %-3d,", probs[t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const vp9_prob default_nzc_probs_16x16[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC16X16_TOKENS] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ vp9_prob probs[NZC16X16_NODES];
+ unsigned int branch_ct[NZC16X16_NODES][2];
+ vp9_tree_probs_from_distribution(NZC16X16_TOKENS,
+ vp9_nzc16x16_encodings,
+ vp9_nzc16x16_tree,
+ probs, branch_ct,
+ nzc_stats_16x16[c][r][b]);
+ printf(" {");
+ for (t = 0; t < NZC16X16_NODES; ++t) {
+ printf(" %-3d,", probs[t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const vp9_prob default_nzc_probs_32x32[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC32X32_TOKENS] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ vp9_prob probs[NZC32X32_NODES];
+ unsigned int branch_ct[NZC32X32_NODES][2];
+ vp9_tree_probs_from_distribution(NZC32X32_TOKENS,
+ vp9_nzc32x32_encodings,
+ vp9_nzc32x32_tree,
+ probs, branch_ct,
+ nzc_stats_32x32[c][r][b]);
+ printf(" {");
+ for (t = 0; t < NZC32X32_NODES; ++t) {
+ printf(" %-3d,", probs[t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const vp9_prob default_nzc_pcat_probs[MAX_NZC_CONTEXTS]\n"
+ " [NZC_TOKENS_EXTRA]\n"
+ " [NZC_BITS_EXTRA] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ printf(" {");
+ for (b = 0; b < NZC_BITS_EXTRA; ++b) {
+ vp9_prob prob = get_binary_prob(nzc_pcat_stats[c][t][b][0],
+ nzc_pcat_stats[c][t][b][1]);
+ printf(" %-3d,", prob);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+}
+#endif
+
+#endif // CONFIG_CODE_NONZEROCOUNT
+
static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
TOKENEXTRA **tok, TOKENEXTRA *tok_end,
int mb_row, int mb_col) {
- VP9_COMMON *const c = &cpi->common;
+ VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
xd->mode_info_context = m;
- xd->left_available = mb_col > c->cur_tile_mb_col_start;
- xd->right_available =
- (mb_col + (1 << m->mbmi.sb_type)) < c->cur_tile_mb_col_end;
- xd->up_available = mb_row > 0;
- if (c->frame_type == KEY_FRAME) {
+ set_mb_row(&cpi->common, xd, mb_row, (1 << m->mbmi.sb_type));
+ set_mb_col(&cpi->common, xd, mb_col, (1 << m->mbmi.sb_type));
+ if (cm->frame_type == KEY_FRAME) {
write_mb_modes_kf(cpi, m, bc,
- c->mb_rows - mb_row, c->mb_cols - mb_col);
+ cm->mb_rows - mb_row, cm->mb_cols - mb_col);
#ifdef ENTROPY_STATS
active_section = 8;
#endif
} else {
pack_inter_mode_mvs(cpi, m, bc,
- c->mb_rows - mb_row, c->mb_cols - mb_col);
+ cm->mb_rows - mb_row, cm->mb_cols - mb_col);
#ifdef ENTROPY_STATS
active_section = 1;
#endif
}
+#if CONFIG_CODE_NONZEROCOUNT
+ if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64)
+ write_nzcs_sb64(cpi, xd, mb_row, mb_col, bc);
+ else if (m->mbmi.sb_type == BLOCK_SIZE_SB32X32)
+ write_nzcs_sb32(cpi, xd, mb_row, mb_col, bc);
+ else
+ write_nzcs_mb16(cpi, xd, mb_row, mb_col, bc);
+#endif
assert(*tok < tok_end);
pack_mb_tokens(bc, tok, tok_end);
@@ -1230,6 +1780,234 @@
cpi->frame_branch_ct_32x32, BLOCK_TYPES);
}
+#if CONFIG_CODE_NONZEROCOUNT
+static void update_nzc_probs_common(VP9_COMP* cpi,
+ vp9_writer* const bc,
+ int block_size) {
+ VP9_COMMON *cm = &cpi->common;
+ int c, r, b, t;
+ int update[2] = {0, 0};
+ int savings = 0;
+ int tokens, nodes;
+ const vp9_tree_index *nzc_tree;
+ const struct vp9_token_struct *nzc_encodings;
+ vp9_prob *new_nzc_probs;
+ vp9_prob *old_nzc_probs;
+ unsigned int *nzc_counts;
+ unsigned int (*nzc_branch_ct)[2];
+ vp9_prob upd;
+
+ if (block_size == 32) {
+ tokens = NZC32X32_TOKENS;
+ nzc_tree = vp9_nzc32x32_tree;
+ nzc_encodings = vp9_nzc32x32_encodings;
+ old_nzc_probs = cm->fc.nzc_probs_32x32[0][0][0];
+ new_nzc_probs = cpi->frame_nzc_probs_32x32[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_32x32[0][0][0];
+ nzc_branch_ct = cpi->frame_nzc_branch_ct_32x32[0][0][0];
+ upd = NZC_UPDATE_PROB_32X32;
+ } else if (block_size == 16) {
+ tokens = NZC16X16_TOKENS;
+ nzc_tree = vp9_nzc16x16_tree;
+ nzc_encodings = vp9_nzc16x16_encodings;
+ old_nzc_probs = cm->fc.nzc_probs_16x16[0][0][0];
+ new_nzc_probs = cpi->frame_nzc_probs_16x16[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_16x16[0][0][0];
+ nzc_branch_ct = cpi->frame_nzc_branch_ct_16x16[0][0][0];
+ upd = NZC_UPDATE_PROB_16X16;
+ } else if (block_size == 8) {
+ tokens = NZC8X8_TOKENS;
+ nzc_tree = vp9_nzc8x8_tree;
+ nzc_encodings = vp9_nzc8x8_encodings;
+ old_nzc_probs = cm->fc.nzc_probs_8x8[0][0][0];
+ new_nzc_probs = cpi->frame_nzc_probs_8x8[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_8x8[0][0][0];
+ nzc_branch_ct = cpi->frame_nzc_branch_ct_8x8[0][0][0];
+ upd = NZC_UPDATE_PROB_8X8;
+ } else {
+ nzc_tree = vp9_nzc4x4_tree;
+ nzc_encodings = vp9_nzc4x4_encodings;
+ tokens = NZC4X4_TOKENS;
+ old_nzc_probs = cm->fc.nzc_probs_4x4[0][0][0];
+ new_nzc_probs = cpi->frame_nzc_probs_4x4[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_4x4[0][0][0];
+ nzc_branch_ct = cpi->frame_nzc_branch_ct_4x4[0][0][0];
+ upd = NZC_UPDATE_PROB_4X4;
+ }
+ nodes = tokens - 1;
+ // Get the new probabilities and the branch counts
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b;
+ int offset_nodes = offset * nodes;
+ int offset_tokens = offset * tokens;
+ vp9_tree_probs_from_distribution(tokens,
+ nzc_encodings, nzc_tree,
+ new_nzc_probs + offset_nodes,
+ nzc_branch_ct + offset_nodes,
+ nzc_counts + offset_tokens);
+ }
+ }
+ }
+
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b;
+ int offset_nodes = offset * nodes;
+ for (t = 0; t < nodes; ++t) {
+ vp9_prob newp = new_nzc_probs[offset_nodes + t];
+ vp9_prob oldp = old_nzc_probs[offset_nodes + t];
+ int s, u = 0;
+#if defined(SEARCH_NEWP)
+ s = prob_diff_update_savings_search(nzc_branch_ct[offset_nodes],
+ oldp, &newp, upd);
+ if (s > 0 && newp != oldp)
+ u = 1;
+ if (u)
+ savings += s - (int)(vp9_cost_zero(upd));
+ else
+ savings -= (int)(vp9_cost_zero(upd));
+#else
+ s = prob_update_savings(nzc_branch_ct[offset_nodes],
+ oldp, newp, upd);
+ if (s > 0)
+ u = 1;
+ if (u)
+ savings += s;
+#endif
+ update[u]++;
+ }
+ }
+ }
+ }
+ if (update[1] == 0 || savings < 0) {
+ vp9_write_bit(bc, 0);
+ } else {
+ vp9_write_bit(bc, 1);
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b;
+ int offset_nodes = offset * nodes;
+ for (t = 0; t < nodes; ++t) {
+ vp9_prob newp = new_nzc_probs[offset_nodes + t];
+ vp9_prob *oldp = &old_nzc_probs[offset_nodes + t];
+ int s, u = 0;
+#if defined(SEARCH_NEWP)
+ s = prob_diff_update_savings_search(nzc_branch_ct[offset_nodes],
+ *oldp, &newp, upd);
+ if (s > 0 && newp != *oldp)
+ u = 1;
+#else
+ s = prob_update_savings(nzc_branch_ct[offset_nodes],
+ *oldp, newp, upd);
+ if (s > 0)
+ u = 1;
+#endif
+ vp9_write(bc, u, upd);
+ if (u) {
+ /* send/use new probability */
+ write_prob_diff_update(bc, newp, *oldp);
+ *oldp = newp;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void update_nzc_pcat_probs(VP9_COMP *cpi, vp9_writer* const bc) {
+ VP9_COMMON *cm = &cpi->common;
+ int c, t, b;
+ int update[2] = {0, 0};
+ int savings = 0;
+ vp9_prob upd = NZC_UPDATE_PROB_PCAT;
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA];
+ for (b = 0; b < bits; ++b) {
+ vp9_prob newp = get_binary_prob(cm->fc.nzc_pcat_counts[c][t][b][0],
+ cm->fc.nzc_pcat_counts[c][t][b][1]);
+ vp9_prob oldp = cm->fc.nzc_pcat_probs[c][t][b];
+ int s, u = 0;
+#if defined(SEARCH_NEWP)
+ s = prob_diff_update_savings_search(cm->fc.nzc_pcat_counts[c][t][b],
+ oldp, &newp, upd);
+ if (s > 0 && newp != oldp)
+ u = 1;
+ if (u)
+ savings += s - (int)(vp9_cost_zero(upd));
+ else
+ savings -= (int)(vp9_cost_zero(upd));
+#else
+ s = prob_update_savings(cm->fc.nzc_pcat_counts[c][t][b],
+ oldp, newp, upd);
+ if (s > 0)
+ u = 1;
+ if (u)
+ savings += s;
+#endif
+ update[u]++;
+ }
+ }
+ }
+ if (update[1] == 0 || savings < 0) {
+ vp9_write_bit(bc, 0);
+ } else {
+ vp9_write_bit(bc, 1);
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA];
+ for (b = 0; b < bits; ++b) {
+ vp9_prob newp = get_binary_prob(cm->fc.nzc_pcat_counts[c][t][b][0],
+ cm->fc.nzc_pcat_counts[c][t][b][1]);
+ vp9_prob *oldp = &cm->fc.nzc_pcat_probs[c][t][b];
+ int s, u = 0;
+#if defined(SEARCH_NEWP)
+ s = prob_diff_update_savings_search(cm->fc.nzc_pcat_counts[c][t][b],
+ *oldp, &newp, upd);
+ if (s > 0 && newp != *oldp)
+ u = 1;
+#else
+ s = prob_update_savings(cm->fc.nzc_pcat_counts[c][t][b],
+ *oldp, newp, upd);
+ if (s > 0)
+ u = 1;
+#endif
+ vp9_write(bc, u, upd);
+ if (u) {
+ /* send/use new probability */
+ write_prob_diff_update(bc, newp, *oldp);
+ *oldp = newp;
+ }
+ }
+ }
+ }
+ }
+}
+
+static void update_nzc_probs(VP9_COMP* cpi,
+ vp9_writer* const bc) {
+ update_nzc_probs_common(cpi, bc, 4);
+ if (cpi->common.txfm_mode != ONLY_4X4)
+ update_nzc_probs_common(cpi, bc, 8);
+ if (cpi->common.txfm_mode > ALLOW_8X8)
+ update_nzc_probs_common(cpi, bc, 16);
+ if (cpi->common.txfm_mode > ALLOW_16X16)
+ update_nzc_probs_common(cpi, bc, 32);
+#ifdef NZC_PCAT_UPDATE
+ update_nzc_pcat_probs(cpi, bc);
+#endif
+#ifdef NZC_STATS
+ if (!cpi->dummy_packing)
+ update_nzcstats(&cpi->common);
+#endif
+}
+#endif // CONFIG_CODE_NONZEROCOUNT
+
static void update_coef_probs_common(vp9_writer* const bc,
#ifdef ENTROPY_STATS
VP9_COMP *cpi,
@@ -1251,7 +2029,7 @@
for (k = 0; k < COEF_BANDS; ++k) {
int prev_coef_savings[ENTROPY_NODES] = {0};
for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
- for (t = 0; t < ENTROPY_NODES; ++t) {
+ for (t = CONFIG_CODE_NONZEROCOUNT; t < ENTROPY_NODES; ++t) {
vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t];
const vp9_prob upd = COEF_UPDATE_PROB;
@@ -1297,7 +2075,7 @@
int prev_coef_savings[ENTROPY_NODES] = {0};
for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
// calc probs and branch cts for this frame only
- for (t = 0; t < ENTROPY_NODES; ++t) {
+ for (t = CONFIG_CODE_NONZEROCOUNT; t < ENTROPY_NODES; ++t) {
vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
const vp9_prob upd = COEF_UPDATE_PROB;
@@ -1898,6 +2676,27 @@
cpi->common.fc.coef_probs_16x16);
vp9_copy(cpi->common.fc.pre_coef_probs_32x32,
cpi->common.fc.coef_probs_32x32);
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_copy(cpi->common.fc.pre_nzc_probs_4x4,
+ cpi->common.fc.nzc_probs_4x4);
+ vp9_copy(cpi->common.fc.pre_nzc_probs_8x8,
+ cpi->common.fc.nzc_probs_8x8);
+ vp9_copy(cpi->common.fc.pre_nzc_probs_16x16,
+ cpi->common.fc.nzc_probs_16x16);
+ vp9_copy(cpi->common.fc.pre_nzc_probs_32x32,
+ cpi->common.fc.nzc_probs_32x32);
+ vp9_copy(cpi->common.fc.pre_nzc_pcat_probs,
+ cpi->common.fc.nzc_pcat_probs);
+ // NOTE that if the counts are reset, we also need to uncomment
+ // the count updates in the write_nzc function
+ /*
+ vp9_zero(cpi->common.fc.nzc_counts_4x4);
+ vp9_zero(cpi->common.fc.nzc_counts_8x8);
+ vp9_zero(cpi->common.fc.nzc_counts_16x16);
+ vp9_zero(cpi->common.fc.nzc_counts_32x32);
+ vp9_zero(cpi->common.fc.nzc_pcat_counts);
+ */
+#endif
vp9_copy(cpi->common.fc.pre_sb_ymode_prob, cpi->common.fc.sb_ymode_prob);
vp9_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob);
vp9_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob);
@@ -1914,6 +2713,9 @@
vp9_zero(cpi->common.fc.mv_ref_ct)
update_coef_probs(cpi, &header_bc);
+#if CONFIG_CODE_NONZEROCOUNT
+ update_nzc_probs(cpi, &header_bc);
+#endif
#ifdef ENTROPY_STATS
active_section = 2;
@@ -1925,8 +2727,9 @@
int k;
vp9_update_skip_probs(cpi);
- for (k = 0; k < MBSKIP_CONTEXTS; ++k)
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k) {
vp9_write_literal(&header_bc, pc->mbskip_pred_probs[k], 8);
+ }
}
if (pc->frame_type == KEY_FRAME) {
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 560c371..4390061 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -151,6 +151,12 @@
unsigned char *active_ptr;
vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES];
+#if CONFIG_CODE_NONZEROCOUNT
+ unsigned int nzc_costs_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][17];
+ unsigned int nzc_costs_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][65];
+ unsigned int nzc_costs_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][257];
+ unsigned int nzc_costs_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][1025];
+#endif
int optimize;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index a4dbdc5..87d456d 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -630,10 +630,6 @@
const int idx_map = mb_row * cm->mb_cols + mb_col;
const int idx_str = xd->mode_info_stride * mb_row + mb_col;
-#ifdef ENC_DEBUG
- enc_debug = (cpi->common.current_video_frame == 2 &&
- mb_row == 4 && mb_col == 5);
-#endif
// entropy context structures
xd->above_context = cm->above_context + mb_col;
xd->left_context = cm->left_context + (mb_row & 3);
@@ -668,15 +664,8 @@
// Set up distance of MB to edge of frame in 1/8th pel units
block_size >>= 4; // in macroblock units
assert(!(mb_col & (block_size - 1)) && !(mb_row & (block_size - 1)));
- xd->mb_to_top_edge = -((mb_row * 16) << 3);
- xd->mb_to_left_edge = -((mb_col * 16) << 3);
- xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3;
- xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3;
-
- // Are edges available for intra prediction?
- xd->up_available = (mb_row != 0);
- xd->left_available = (mb_col > cm->cur_tile_mb_col_start);
- xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end);
+ set_mb_row(cm, xd, mb_row, block_size);
+ set_mb_col(cm, xd, mb_col, block_size);
/* set up source buffers */
setup_pred_block(&x->src, cpi->Source, mb_row, mb_col, NULL, NULL);
@@ -891,7 +880,7 @@
}
}
-static void update_stats(VP9_COMP *cpi) {
+static void update_stats(VP9_COMP *cpi, int mb_row, int mb_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -945,6 +934,9 @@
if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME))
cpi->inter_zz_count++;
}
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_update_nzc_counts(&cpi->common, xd, mb_row, mb_col);
+#endif
}
static void encode_sb(VP9_COMP *cpi,
@@ -963,8 +955,9 @@
encode_superblock32(cpi, tp,
output_enabled, mb_row, mb_col);
- if (output_enabled)
- update_stats(cpi);
+ if (output_enabled) {
+ update_stats(cpi, mb_row, mb_col);
+ }
if (output_enabled) {
(*tp)->Token = EOSB_TOKEN;
@@ -992,12 +985,13 @@
encode_macroblock(cpi, tp,
output_enabled, mb_row + y_idx, mb_col + x_idx);
- if (output_enabled)
- update_stats(cpi);
+ if (output_enabled) {
+ update_stats(cpi, mb_row + y_idx, mb_col + x_idx);
+ }
if (output_enabled) {
(*tp)->Token = EOSB_TOKEN;
- (*tp)++;
+ (*tp)++;
if (mb_row + y_idx < cm->mb_rows)
cpi->tplist[mb_row + y_idx].stop = *tp;
}
@@ -1029,7 +1023,7 @@
update_state(cpi, &x->sb64_context, 64, 1);
encode_superblock64(cpi, tp,
1, mb_row, mb_col);
- update_stats(cpi);
+ update_stats(cpi, mb_row, mb_col);
(*tp)->Token = EOSB_TOKEN;
(*tp)++;
@@ -1247,8 +1241,9 @@
MACROBLOCKD *const xd = &x->e_mbd;
int totalrate;
-// fprintf(stderr, "encode_frame_internal frame %d (%d)\n",
-// cpi->common.current_video_frame, cpi->common.show_frame);
+// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
+// cpi->common.current_video_frame, cpi->common.show_frame,
+// cm->frame_type);
// Compute a modified set of reference frame probabilities to use when
// prediction fails. These are based on the current general estimates for
@@ -1286,6 +1281,13 @@
vp9_zero(cpi->coef_counts_8x8);
vp9_zero(cpi->coef_counts_16x16);
vp9_zero(cpi->coef_counts_32x32);
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_zero(cm->fc.nzc_counts_4x4);
+ vp9_zero(cm->fc.nzc_counts_8x8);
+ vp9_zero(cm->fc.nzc_counts_16x16);
+ vp9_zero(cm->fc.nzc_counts_32x32);
+ vp9_zero(cm->fc.nzc_pcat_counts);
+#endif
#if CONFIG_NEW_MVREF
vp9_zero(cpi->mb_mv_ref_count);
#endif
@@ -1327,30 +1329,34 @@
{
// Take tiles into account and give start/end MB
- int tile_col;
+ int tile_col, tile_row;
TOKENEXTRA *tp = cpi->tok;
- for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) {
- TOKENEXTRA *tp_old = tp;
- // For each row of SBs in the frame
- vp9_get_tile_col_offsets(cm, tile_col);
- for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4) {
- encode_sb_row(cpi, mb_row, &tp, &totalrate);
+ for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
+ vp9_get_tile_row_offsets(cm, tile_row);
+
+ for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) {
+ TOKENEXTRA *tp_old = tp;
+
+ // For each row of SBs in the frame
+ vp9_get_tile_col_offsets(cm, tile_col);
+ for (mb_row = cm->cur_tile_mb_row_start;
+ mb_row < cm->cur_tile_mb_row_end; mb_row += 4) {
+ encode_sb_row(cpi, mb_row, &tp, &totalrate);
+ }
+ cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old);
}
- cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old);
}
}
vpx_usec_timer_mark(&emr_timer);
cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
-
}
// 256 rate units to the bit,
// projected_frame_size in units of BYTES
cpi->projected_frame_size = totalrate >> 8;
-
#if 0
// Keep record of the total distortion this time around for future use
cpi->last_frame_distortion = cpi->frame_distortion;
@@ -1554,30 +1560,15 @@
txfm_type = ONLY_4X4;
cpi->mb.e_mbd.lossless = 1;
} else
- /* FIXME (rbultje)
- * this is a hack (no really), basically to work around the complete
- * nonsense coefficient cost prediction for keyframes. The probabilities
- * are reset to defaults, and thus we basically have no idea how expensive
- * a 4x4 vs. 8x8 will really be. The result is that any estimate at which
- * of the two is better is utterly bogus.
- * I'd like to eventually remove this hack, but in order to do that, we
- * need to move the frame reset code from the frame encode init to the
- * bitstream write code, or alternatively keep a backup of the previous
- * keyframe's probabilities as an estimate of what the current keyframe's
- * coefficient cost distributions may look like. */
- if (frame_type == 0) {
- txfm_type = ALLOW_32X32;
- } else
#if 0
- /* FIXME (rbultje)
- * this code is disabled for a similar reason as the code above; the
- * problem is that each time we "revert" to 4x4 only (or even 8x8 only),
- * the coefficient probabilities for 16x16 (and 8x8) start lagging behind,
- * thus leading to them lagging further behind and not being chosen for
- * subsequent frames either. This is essentially a local minimum problem
- * that we can probably fix by estimating real costs more closely within
- * a frame, perhaps by re-calculating costs on-the-fly as frame encoding
- * progresses. */
+ /* FIXME (rbultje): this code is disabled until we support cost updates
+ * while a frame is being encoded; the problem is that each time we
+ * "revert" to 4x4 only (or even 8x8 only), the coefficient probabilities
+ * for 16x16 (and 8x8) start lagging behind, thus leading to them lagging
+ * further behind and not being chosen for subsequent frames either. This
+ * is essentially a local minimum problem that we can probably fix by
+ * estimating real costs more closely within a frame, perhaps by re-
+ * calculating costs on-the-fly as frame encoding progresses. */
if (cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] >
cpi->rd_tx_select_threshes[frame_type][ONLY_4X4] &&
cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] >
@@ -1930,6 +1921,135 @@
}
}
+#if CONFIG_CODE_NONZEROCOUNT
+static void gather_nzcs_mb16(VP9_COMMON *const cm,
+ MACROBLOCKD *xd) {
+ int i;
+ vpx_memset(xd->mode_info_context->mbmi.nzcs, 0,
+ 384 * sizeof(xd->mode_info_context->mbmi.nzcs[0]));
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_4X4:
+ for (i = 0; i < 24; ++i) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_8X8:
+ for (i = 0; i < 16; i += 4) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV) {
+ for (i = 16; i < 24; ++i) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ } else {
+ for (i = 16; i < 24; i += 4) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ }
+ break;
+
+ case TX_16X16:
+ xd->mode_info_context->mbmi.nzcs[0] = xd->nzcs[0];
+ for (i = 16; i < 24; i += 4) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void gather_nzcs_sb32(VP9_COMMON *const cm,
+ MACROBLOCKD *xd) {
+ int i, j;
+ MODE_INFO *m = xd->mode_info_context;
+ int mis = cm->mode_info_stride;
+ vpx_memset(m->mbmi.nzcs, 0,
+ 384 * sizeof(xd->mode_info_context->mbmi.nzcs[0]));
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_4X4:
+ for (i = 0; i < 96; ++i) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_8X8:
+ for (i = 0; i < 96; i += 4) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_16X16:
+ for (i = 0; i < 96; i += 16) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_32X32:
+ xd->mode_info_context->mbmi.nzcs[0] = xd->nzcs[0];
+ for (i = 64; i < 96; i += 16) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ default:
+ break;
+ }
+ for (i = 0; i < 2; ++i)
+ for (j = 0; j < 2; ++j) {
+ if (i == 0 && j == 0) continue;
+ vpx_memcpy((m + j + mis * i)->mbmi.nzcs, m->mbmi.nzcs,
+ 384 * sizeof(m->mbmi.nzcs[0]));
+ }
+}
+
+static void gather_nzcs_sb64(VP9_COMMON *const cm,
+ MACROBLOCKD *xd) {
+ int i, j;
+ MODE_INFO *m = xd->mode_info_context;
+ int mis = cm->mode_info_stride;
+ vpx_memset(xd->mode_info_context->mbmi.nzcs, 0,
+ 384 * sizeof(xd->mode_info_context->mbmi.nzcs[0]));
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_4X4:
+ for (i = 0; i < 384; ++i) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_8X8:
+ for (i = 0; i < 384; i += 4) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_16X16:
+ for (i = 0; i < 384; i += 16) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_32X32:
+ for (i = 0; i < 384; i += 64) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ default:
+ break;
+ }
+ for (i = 0; i < 4; ++i)
+ for (j = 0; j < 4; ++j) {
+ if (i == 0 && j == 0) continue;
+ vpx_memcpy((m + j + mis * i)->mbmi.nzcs, m->mbmi.nzcs,
+ 384 * sizeof(m->mbmi.nzcs[0]));
+ }
+}
+#endif
+
static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
int output_enabled,
int mb_row, int mb_col) {
@@ -1944,8 +2064,8 @@
assert(!xd->mode_info_context->mbmi.sb_type);
#ifdef ENC_DEBUG
- enc_debug = (cpi->common.current_video_frame == 2 &&
- mb_row == 5 && mb_col == 18);
+ enc_debug = (cpi->common.current_video_frame == 1 &&
+ mb_row == 0 && mb_col == 0 && output_enabled);
if (enc_debug)
printf("Encode MB %d %d output %d\n", mb_row, mb_col, output_enabled);
#endif
@@ -1997,14 +2117,14 @@
}
#endif
if (mbmi->mode == B_PRED) {
- vp9_encode_intra16x16mbuv(x);
+ vp9_encode_intra16x16mbuv(cm, x);
vp9_encode_intra4x4mby(x);
} else if (mbmi->mode == I8X8_PRED) {
vp9_encode_intra8x8mby(x);
vp9_encode_intra8x8mbuv(x);
} else {
- vp9_encode_intra16x16mbuv(x);
- vp9_encode_intra16x16mby(x);
+ vp9_encode_intra16x16mbuv(cm, x);
+ vp9_encode_intra16x16mby(cm, x);
}
if (output_enabled)
@@ -2051,7 +2171,7 @@
}
if (!x->skip) {
- vp9_encode_inter16x16(x, mb_row, mb_col);
+ vp9_encode_inter16x16(cm, x, mb_row, mb_col);
// Clear mb_skip_coeff if mb_no_coeff_skip is not set
if (!cpi->common.mb_no_coeff_skip)
@@ -2079,12 +2199,12 @@
}
if (!x->skip) {
-#if 0 // def ENC_DEBUG
+#ifdef ENC_DEBUG
if (enc_debug) {
int i, j;
printf("\n");
printf("qcoeff\n");
- for (i = 0; i < 400; i++) {
+ for (i = 0; i < 384; i++) {
printf("%3d ", xd->qcoeff[i]);
if (i % 16 == 15) printf("\n");
}
@@ -2131,6 +2251,9 @@
}
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+ gather_nzcs_mb16(cm, xd);
+#endif
vp9_tokenize_mb(cpi, xd, t, !output_enabled);
} else {
@@ -2197,6 +2320,12 @@
unsigned int segment_id = mi->mbmi.segment_id;
const int mis = cm->mode_info_stride;
+#ifdef ENC_DEBUG
+ enc_debug = (cpi->common.current_video_frame == 1 &&
+ mb_row == 0 && mb_col == 0 && output_enabled);
+ if (enc_debug)
+ printf("Encode SB32 %d %d output %d\n", mb_row, mb_col, output_enabled);
+#endif
if (cm->frame_type == KEY_FRAME) {
if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
adjust_act_zbin(cpi, x);
@@ -2294,8 +2423,8 @@
vp9_quantize_sby_32x32(x);
vp9_quantize_sbuv_16x16(x);
if (x->optimize) {
- vp9_optimize_sby_32x32(x);
- vp9_optimize_sbuv_16x16(x);
+ vp9_optimize_sby_32x32(cm, x);
+ vp9_optimize_sbuv_16x16(cm, x);
}
vp9_inverse_transform_sby_32x32(xd);
vp9_inverse_transform_sbuv_16x16(xd);
@@ -2306,8 +2435,8 @@
vp9_quantize_sby_16x16(x);
vp9_quantize_sbuv_16x16(x);
if (x->optimize) {
- vp9_optimize_sby_16x16(x);
- vp9_optimize_sbuv_16x16(x);
+ vp9_optimize_sby_16x16(cm, x);
+ vp9_optimize_sbuv_16x16(cm, x);
}
vp9_inverse_transform_sby_16x16(xd);
vp9_inverse_transform_sbuv_16x16(xd);
@@ -2318,8 +2447,8 @@
vp9_quantize_sby_8x8(x);
vp9_quantize_sbuv_8x8(x);
if (x->optimize) {
- vp9_optimize_sby_8x8(x);
- vp9_optimize_sbuv_8x8(x);
+ vp9_optimize_sby_8x8(cm, x);
+ vp9_optimize_sbuv_8x8(cm, x);
}
vp9_inverse_transform_sby_8x8(xd);
vp9_inverse_transform_sbuv_8x8(xd);
@@ -2330,8 +2459,8 @@
vp9_quantize_sby_4x4(x);
vp9_quantize_sbuv_4x4(x);
if (x->optimize) {
- vp9_optimize_sby_4x4(x);
- vp9_optimize_sbuv_4x4(x);
+ vp9_optimize_sby_4x4(cm, x);
+ vp9_optimize_sbuv_4x4(cm, x);
}
vp9_inverse_transform_sby_4x4(xd);
vp9_inverse_transform_sbuv_4x4(xd);
@@ -2340,6 +2469,9 @@
}
vp9_recon_sby_s_c(xd, dst);
vp9_recon_sbuv_s_c(xd, udst, vdst);
+#if CONFIG_CODE_NONZEROCOUNT
+ gather_nzcs_sb32(cm, xd);
+#endif
vp9_tokenize_sb(cpi, xd, t, !output_enabled);
} else {
@@ -2407,6 +2539,12 @@
unsigned int segment_id = mi->mbmi.segment_id;
const int mis = cm->mode_info_stride;
+#ifdef ENC_DEBUG
+ enc_debug = (cpi->common.current_video_frame == 1 &&
+ mb_row == 0 && mb_col == 0 && output_enabled);
+ if (enc_debug)
+ printf("Encode SB64 %d %d output %d\n", mb_row, mb_col, output_enabled);
+#endif
if (cm->frame_type == KEY_FRAME) {
if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
adjust_act_zbin(cpi, x);
@@ -2502,8 +2640,8 @@
vp9_quantize_sb64y_32x32(x);
vp9_quantize_sb64uv_32x32(x);
if (x->optimize) {
- vp9_optimize_sb64y_32x32(x);
- vp9_optimize_sb64uv_32x32(x);
+ vp9_optimize_sb64y_32x32(cm, x);
+ vp9_optimize_sb64uv_32x32(cm, x);
}
vp9_inverse_transform_sb64y_32x32(xd);
vp9_inverse_transform_sb64uv_32x32(xd);
@@ -2514,8 +2652,8 @@
vp9_quantize_sb64y_16x16(x);
vp9_quantize_sb64uv_16x16(x);
if (x->optimize) {
- vp9_optimize_sb64y_16x16(x);
- vp9_optimize_sb64uv_16x16(x);
+ vp9_optimize_sb64y_16x16(cm, x);
+ vp9_optimize_sb64uv_16x16(cm, x);
}
vp9_inverse_transform_sb64y_16x16(xd);
vp9_inverse_transform_sb64uv_16x16(xd);
@@ -2526,8 +2664,8 @@
vp9_quantize_sb64y_8x8(x);
vp9_quantize_sb64uv_8x8(x);
if (x->optimize) {
- vp9_optimize_sb64y_8x8(x);
- vp9_optimize_sb64uv_8x8(x);
+ vp9_optimize_sb64y_8x8(cm, x);
+ vp9_optimize_sb64uv_8x8(cm, x);
}
vp9_inverse_transform_sb64y_8x8(xd);
vp9_inverse_transform_sb64uv_8x8(xd);
@@ -2538,8 +2676,8 @@
vp9_quantize_sb64y_4x4(x);
vp9_quantize_sb64uv_4x4(x);
if (x->optimize) {
- vp9_optimize_sb64y_4x4(x);
- vp9_optimize_sb64uv_4x4(x);
+ vp9_optimize_sb64y_4x4(cm, x);
+ vp9_optimize_sb64uv_4x4(cm, x);
}
vp9_inverse_transform_sb64y_4x4(xd);
vp9_inverse_transform_sb64uv_4x4(xd);
@@ -2548,7 +2686,9 @@
}
vp9_recon_sb64y_s_c(xd, dst);
vp9_recon_sb64uv_s_c(&x->e_mbd, udst, vdst);
-
+#if CONFIG_CODE_NONZEROCOUNT
+ gather_nzcs_sb64(cm, &x->e_mbd);
+#endif
vp9_tokenize_sb64(cpi, &x->e_mbd, t, !output_enabled);
} else {
// FIXME(rbultje): not tile-aware (mi - 1)
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
index 75c8ea8..3c98d4a 100644
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -25,7 +25,7 @@
mbmi->uv_mode = DC_PRED;
mbmi->ref_frame = INTRA_FRAME;
- vp9_encode_intra16x16mby(x);
+ vp9_encode_intra16x16mby(&cpi->common, x);
} else {
int i;
@@ -50,7 +50,7 @@
vp9_intra4x4_predict(&x->e_mbd, b, b->bmi.as_mode.first, b->predictor);
vp9_subtract_b(be, b, 16);
- tx_type = get_tx_type_4x4(&x->e_mbd, b);
+ tx_type = get_tx_type_4x4(&x->e_mbd, ib);
if (tx_type != DCT_DCT) {
vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
vp9_ht_quantize_b_4x4(x, ib, tx_type);
@@ -72,7 +72,7 @@
vp9_encode_intra4x4block(mb, i);
}
-void vp9_encode_intra16x16mby(MACROBLOCK *x) {
+void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd;
BLOCK *b = &x->block[0];
TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
@@ -86,21 +86,21 @@
vp9_transform_mby_16x16(x);
vp9_quantize_mby_16x16(x);
if (x->optimize)
- vp9_optimize_mby_16x16(x);
+ vp9_optimize_mby_16x16(cm, x);
vp9_inverse_transform_mby_16x16(xd);
break;
case TX_8X8:
vp9_transform_mby_8x8(x);
vp9_quantize_mby_8x8(x);
if (x->optimize)
- vp9_optimize_mby_8x8(x);
+ vp9_optimize_mby_8x8(cm, x);
vp9_inverse_transform_mby_8x8(xd);
break;
default:
vp9_transform_mby_4x4(x);
vp9_quantize_mby_4x4(x);
if (x->optimize)
- vp9_optimize_mby_4x4(x);
+ vp9_optimize_mby_4x4(cm, x);
vp9_inverse_transform_mby_4x4(xd);
break;
}
@@ -108,7 +108,7 @@
vp9_recon_mby(xd);
}
-void vp9_encode_intra16x16mbuv(MACROBLOCK *x) {
+void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd;
TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
@@ -122,14 +122,14 @@
vp9_transform_mbuv_4x4(x);
vp9_quantize_mbuv_4x4(x);
if (x->optimize)
- vp9_optimize_mbuv_4x4(x);
+ vp9_optimize_mbuv_4x4(cm, x);
vp9_inverse_transform_mbuv_4x4(xd);
break;
default: // 16x16 or 8x8
vp9_transform_mbuv_8x8(x);
vp9_quantize_mbuv_8x8(x);
if (x->optimize)
- vp9_optimize_mbuv_8x8(x);
+ vp9_optimize_mbuv_8x8(cm, x);
vp9_inverse_transform_mbuv_8x8(xd);
break;
}
@@ -152,7 +152,7 @@
if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
int idx = (ib & 0x02) ? (ib + 2) : ib;
- tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
+ tx_type = get_tx_type_8x8(xd, ib);
if (tx_type != DCT_DCT) {
vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
x->quantize_b_8x8(x, idx);
@@ -167,12 +167,13 @@
for (i = 0; i < 4; i++) {
b = &xd->block[ib + iblock[i]];
be = &x->block[ib + iblock[i]];
- tx_type = get_tx_type_4x4(xd, b);
+ tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
if (tx_type != DCT_DCT) {
vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);
- } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
+ } else if (!(i & 1) &&
+ get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
x->fwd_txm8x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1);
vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]],
diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h
index b017673..0b19b56 100644
--- a/vp9/encoder/vp9_encodeintra.h
+++ b/vp9/encoder/vp9_encodeintra.h
@@ -14,8 +14,8 @@
#include "vp9/encoder/vp9_onyx_int.h"
int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred);
-void vp9_encode_intra16x16mby(MACROBLOCK *x);
-void vp9_encode_intra16x16mbuv(MACROBLOCK *x);
+void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_encode_intra4x4mby(MACROBLOCK *mb);
void vp9_encode_intra4x4block(MACROBLOCK *x, int ib);
void vp9_encode_intra8x8mby(MACROBLOCK *x);
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index b2ee800..dae177a 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -210,10 +210,10 @@
for (i = 0; i < 16; i++) {
BLOCK *b = &x->block[i];
- TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
+ TX_TYPE tx_type = get_tx_type_4x4(xd, i);
if (tx_type != DCT_DCT) {
vp9_short_fht4x4(b->src_diff, b->coeff, 16, tx_type);
- } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) {
+ } else if (!(i & 1) && get_tx_type_4x4(xd, i + 1) == DCT_DCT) {
x->fwd_txm8x4(x->block[i].src_diff, x->block[i].coeff, 32);
i++;
} else {
@@ -241,7 +241,7 @@
for (i = 0; i < 9; i += 8) {
BLOCK *b = &x->block[i];
- tx_type = get_tx_type_8x8(xd, &xd->block[i]);
+ tx_type = get_tx_type_8x8(xd, i);
if (tx_type != DCT_DCT) {
vp9_short_fht8x8(b->src_diff, b->coeff, 16, tx_type);
} else {
@@ -250,7 +250,7 @@
}
for (i = 2; i < 11; i += 8) {
BLOCK *b = &x->block[i];
- tx_type = get_tx_type_8x8(xd, &xd->block[i]);
+ tx_type = get_tx_type_8x8(xd, i);
if (tx_type != DCT_DCT) {
vp9_short_fht8x8(b->src_diff, (b + 2)->coeff, 16, tx_type);
} else {
@@ -274,7 +274,7 @@
void vp9_transform_mby_16x16(MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd;
BLOCK *b = &x->block[0];
- TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]);
+ TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
vp9_clear_system_state();
if (tx_type != DCT_DCT) {
vp9_short_fht16x16(b->src_diff, b->coeff, 16, tx_type);
@@ -293,35 +293,56 @@
}
void vp9_transform_sby_16x16(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
int n;
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
+ const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4);
- x->fwd_txm16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16,
- x->coeff + n * 256, 64);
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16,
+ x->coeff + n * 256, 32, tx_type);
+ } else {
+ x->fwd_txm16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16,
+ x->coeff + n * 256, 64);
+ }
}
}
void vp9_transform_sby_8x8(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
int n;
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
- x->fwd_txm8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8,
- x->coeff + n * 64, 64);
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8,
+ x->coeff + n * 64, 32, tx_type);
+ } else {
+ x->fwd_txm8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8,
+ x->coeff + n * 64, 64);
+ }
}
}
void vp9_transform_sby_4x4(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
int n;
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
- x->fwd_txm4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4,
- x->coeff + n * 16, 64);
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4,
+ x->coeff + n * 16, 32, tx_type);
+ } else {
+ x->fwd_txm4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4,
+ x->coeff + n * 16, 64);
+ }
}
}
@@ -371,35 +392,56 @@
}
void vp9_transform_sb64y_16x16(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
int n;
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
+ const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4);
- x->fwd_txm16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16,
- x->coeff + n * 256, 128);
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16,
+ x->coeff + n * 256, 64, tx_type);
+ } else {
+ x->fwd_txm16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16,
+ x->coeff + n * 256, 128);
+ }
}
}
void vp9_transform_sb64y_8x8(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
int n;
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2);
- x->fwd_txm8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8,
- x->coeff + n * 64, 128);
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8,
+ x->coeff + n * 64, 64, tx_type);
+ } else {
+ x->fwd_txm8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8,
+ x->coeff + n * 64, 128);
+ }
}
}
void vp9_transform_sb64y_4x4(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
int n;
for (n = 0; n < 256; n++) {
const int x_idx = n & 15, y_idx = n >> 4;
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx);
- x->fwd_txm4x4(x->src_diff + y_idx * 64 * 4 + x_idx * 4,
- x->coeff + n * 16, 128);
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht8x8(x->src_diff + y_idx * 64 * 4 + x_idx * 4,
+ x->coeff + n * 16, 64, tx_type);
+ } else {
+ x->fwd_txm4x4(x->src_diff + y_idx * 64 * 4 + x_idx * 4,
+ x->coeff + n * 16, 128);
+ }
}
}
@@ -491,7 +533,8 @@
return vp9_get_coef_context(&recent_energy, token);
}
-static void optimize_b(MACROBLOCK *mb, int ib, PLANE_TYPE type,
+static void optimize_b(VP9_COMMON *const cm,
+ MACROBLOCK *mb, int ib, PLANE_TYPE type,
const int16_t *dequant_ptr,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int tx_size) {
@@ -512,26 +555,61 @@
int default_eob;
int const *scan;
const int mul = 1 + (tx_size == TX_32X32);
+#if CONFIG_CODE_NONZEROCOUNT
+ // TODO(debargha): the dynamic programming approach used in this function
+ // is not compatible with the true rate cost when nzcs are used. Note
+ // the total rate is the sum of the nzc rate and the indicvidual token
+ // rates. The latter part can be optimized in this function, but because
+ // the nzc rate is a function of all the other tokens without a Markov
+ // relationship this rate cannot be considered correctly.
+ // The current implementation uses a suboptimal approach to account for
+ // the nzc rates somewhat, but in reality the optimization approach needs
+ // to change substantially.
+ uint16_t nzc = xd->nzcs[ib];
+ uint16_t nzc0, nzc1;
+ uint16_t final_nzc = 0, final_nzc_exp;
+ int nzc_context = vp9_get_nzc_context(cm, xd, ib);
+ unsigned int *nzc_cost;
+ nzc0 = nzc1 = nzc;
+#endif
switch (tx_size) {
default:
- case TX_4X4:
+ case TX_4X4: {
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, ib);
default_eob = 16;
- // FIXME(rbultje): although optimize_b currently isn't called for
- // intra4x4, this should be changed to be adst-compatible
- scan = vp9_default_zig_zag1d_4x4;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_4x4[nzc_context][ref][type];
+#endif
+ if (tx_type == DCT_ADST) {
+ scan = vp9_col_scan_4x4;
+ } else if (tx_type == ADST_DCT) {
+ scan = vp9_row_scan_4x4;
+ } else {
+ scan = vp9_default_zig_zag1d_4x4;
+ }
break;
+ }
case TX_8X8:
scan = vp9_default_zig_zag1d_8x8;
default_eob = 64;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type];
+#endif
break;
case TX_16X16:
scan = vp9_default_zig_zag1d_16x16;
default_eob = 256;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type];
+#endif
break;
case TX_32X32:
scan = vp9_default_zig_zag1d_32x32;
default_eob = 1024;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_32x32[nzc_context][ref][type];
+#endif
break;
}
@@ -542,7 +620,11 @@
rddiv = mb->rddiv;
memset(best_index, 0, sizeof(best_index));
/* Initialize the sentinel node of the trellis. */
+#if CONFIG_CODE_NONZEROCOUNT
+ tokens[eob][0].rate = nzc_cost[nzc];
+#else
tokens[eob][0].rate = 0;
+#endif
tokens[eob][0].error = 0;
tokens[eob][0].next = default_eob;
tokens[eob][0].token = DCT_EOB_TOKEN;
@@ -551,6 +633,9 @@
next = eob;
for (i = eob; i-- > i0;) {
int base_bits, d2, dx;
+#if CONFIG_CODE_NONZEROCOUNT
+ int new_nzc0, new_nzc1;
+#endif
rc = scan[i];
x = qcoeff_ptr[rc];
@@ -584,6 +669,10 @@
tokens[i][0].token = t0;
tokens[i][0].qc = x;
best_index[i][0] = best;
+#if CONFIG_CODE_NONZEROCOUNT
+ new_nzc0 = (best ? nzc1 : nzc0);
+#endif
+
/* Evaluate the second possibility for this state. */
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
@@ -609,6 +698,12 @@
DCT_EOB_TOKEN : ZERO_TOKEN;
t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
DCT_EOB_TOKEN : ZERO_TOKEN;
+#if CONFIG_CODE_NONZEROCOUNT
+ // Account for rate drop because of the nzc change.
+ // TODO(debargha): Find a better solution
+ rate0 -= nzc_cost[nzc0] - nzc_cost[nzc0 - 1];
+ rate1 -= nzc_cost[nzc1] - nzc_cost[nzc1 - 1];
+#endif
} else {
t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token;
}
@@ -641,6 +736,11 @@
tokens[i][1].token = best ? t1 : t0;
tokens[i][1].qc = x;
best_index[i][1] = best;
+#if CONFIG_CODE_NONZEROCOUNT
+ new_nzc1 = (best ? nzc1 : nzc0) - (!x);
+ nzc0 = new_nzc0;
+ nzc1 = new_nzc1;
+#endif
/* Finally, make this the new head of the trellis. */
next = i;
}
@@ -679,11 +779,18 @@
rate1 += mb->token_costs[tx_size][type][ref][band][pt][t1];
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
+#if CONFIG_CODE_NONZEROCOUNT
+ final_nzc_exp = (best ? nzc1 : nzc0);
+#endif
final_eob = i0 - 1;
for (i = next; i < eob; i = next) {
x = tokens[i][best].qc;
- if (x)
+ if (x) {
final_eob = i;
+#if CONFIG_CODE_NONZEROCOUNT
+ ++final_nzc;
+#endif
+ }
rc = scan[i];
qcoeff_ptr[rc] = x;
dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul;
@@ -695,9 +802,13 @@
xd->eobs[ib] = final_eob;
*a = *l = (final_eob > 0);
+#if CONFIG_CODE_NONZEROCOUNT
+ assert(final_nzc == final_nzc_exp);
+ xd->nzcs[ib] = final_nzc;
+#endif
}
-void vp9_optimize_mby_4x4(MACROBLOCK *x) {
+void vp9_optimize_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
int b;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
@@ -713,13 +824,13 @@
tl = (ENTROPY_CONTEXT *)&t_left;
for (b = 0; b < 16; b++) {
- optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant,
+ optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b], TX_4X4);
}
}
-void vp9_optimize_mbuv_4x4(MACROBLOCK *x) {
+void vp9_optimize_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
int b;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
@@ -735,18 +846,18 @@
tl = (ENTROPY_CONTEXT *)&t_left;
for (b = 16; b < 24; b++) {
- optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant,
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b], TX_4X4);
}
}
-static void optimize_mb_4x4(MACROBLOCK *x) {
- vp9_optimize_mby_4x4(x);
- vp9_optimize_mbuv_4x4(x);
+static void optimize_mb_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
+ vp9_optimize_mby_4x4(cm, x);
+ vp9_optimize_mbuv_4x4(cm, x);
}
-void vp9_optimize_mby_8x8(MACROBLOCK *x) {
+void vp9_optimize_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
int b;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
@@ -765,14 +876,14 @@
ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
- optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant,
+ optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant,
&above_ec, &left_ec, TX_8X8);
a[1] = a[0] = above_ec;
l[1] = l[0] = left_ec;
}
}
-void vp9_optimize_mbuv_8x8(MACROBLOCK *x) {
+void vp9_optimize_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
int b;
ENTROPY_CONTEXT *const ta = (ENTROPY_CONTEXT *)x->e_mbd.above_context;
ENTROPY_CONTEXT *const tl = (ENTROPY_CONTEXT *)x->e_mbd.left_context;
@@ -785,17 +896,17 @@
ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
- optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant,
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant,
&above_ec, &left_ec, TX_8X8);
}
}
-static void optimize_mb_8x8(MACROBLOCK *x) {
- vp9_optimize_mby_8x8(x);
- vp9_optimize_mbuv_8x8(x);
+static void optimize_mb_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
+ vp9_optimize_mby_8x8(cm, x);
+ vp9_optimize_mbuv_8x8(cm, x);
}
-void vp9_optimize_mby_16x16(MACROBLOCK *x) {
+void vp9_optimize_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT_PLANES *const t_above = x->e_mbd.above_context;
ENTROPY_CONTEXT_PLANES *const t_left = x->e_mbd.left_context;
ENTROPY_CONTEXT ta, tl;
@@ -805,16 +916,16 @@
ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0;
tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0;
- optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
&ta, &tl, TX_16X16);
}
-static void optimize_mb_16x16(MACROBLOCK *x) {
- vp9_optimize_mby_16x16(x);
- vp9_optimize_mbuv_8x8(x);
+static void optimize_mb_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
+ vp9_optimize_mby_16x16(cm, x);
+ vp9_optimize_mbuv_8x8(cm, x);
}
-void vp9_optimize_sby_32x32(MACROBLOCK *x) {
+void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
@@ -823,11 +934,11 @@
ta = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0;
tl = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0;
- optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
&ta, &tl, TX_32X32);
}
-void vp9_optimize_sby_16x16(MACROBLOCK *x) {
+void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
@@ -842,12 +953,12 @@
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
- optimize_b(x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
ta + x_idx, tl + y_idx, TX_16X16);
}
}
-void vp9_optimize_sby_8x8(MACROBLOCK *x) {
+void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
@@ -866,12 +977,12 @@
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
- optimize_b(x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
ta + x_idx, tl + y_idx, TX_8X8);
}
}
-void vp9_optimize_sby_4x4(MACROBLOCK *x) {
+void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT ta[8], tl[8];
int n;
@@ -882,12 +993,12 @@
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
- optimize_b(x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
ta + x_idx, tl + y_idx, TX_4X4);
}
}
-void vp9_optimize_sbuv_16x16(MACROBLOCK *x) {
+void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec;
@@ -901,12 +1012,12 @@
l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
- optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
&above_ec, &left_ec, TX_16X16);
}
}
-void vp9_optimize_sbuv_8x8(MACROBLOCK *x) {
+void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
@@ -921,14 +1032,14 @@
l = tl + vp9_block2left_sb[TX_8X8][b];
above_ec = (a[0] + a[1]) != 0;
left_ec = (l[0] + l[1]) != 0;
- optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
&above_ec, &left_ec, TX_8X8);
a[0] = a[1] = above_ec;
l[0] = l[1] = left_ec;
}
}
-void vp9_optimize_sbuv_4x4(MACROBLOCK *x) {
+void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
@@ -941,12 +1052,12 @@
const int cidx = b >= 80 ? 20 : 16;
a = ta + vp9_block2above_sb[TX_4X4][b];
l = tl + vp9_block2left_sb[TX_4X4][b];
- optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
a, l, TX_4X4);
}
}
-void vp9_optimize_sb64y_32x32(MACROBLOCK *x) {
+void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2);
@@ -965,12 +1076,12 @@
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
- optimize_b(x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
ta + x_idx, tl + y_idx, TX_32X32);
}
}
-void vp9_optimize_sb64y_16x16(MACROBLOCK *x) {
+void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2);
@@ -993,12 +1104,12 @@
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
- optimize_b(x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
ta + x_idx, tl + y_idx, TX_16X16);
}
}
-void vp9_optimize_sb64y_8x8(MACROBLOCK *x) {
+void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2);
@@ -1029,12 +1140,12 @@
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
- optimize_b(x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
ta + x_idx, tl + y_idx, TX_8X8);
}
}
-void vp9_optimize_sb64y_4x4(MACROBLOCK *x) {
+void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT ta[16], tl[16];
int n;
@@ -1049,12 +1160,12 @@
for (n = 0; n < 256; n++) {
const int x_idx = n & 15, y_idx = n >> 4;
- optimize_b(x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
ta + x_idx, tl + y_idx, TX_4X4);
}
}
-void vp9_optimize_sb64uv_32x32(MACROBLOCK *x) {
+void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec;
@@ -1072,12 +1183,12 @@
l3 = l + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
a_ec = (a[0] + a[1] + a1[0] + a1[1] + a2[0] + a2[1] + a3[0] + a3[1]) != 0;
l_ec = (l[0] + l[1] + l1[0] + l1[1] + l2[0] + l2[1] + l3[0] + l3[1]) != 0;
- optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
&a_ec, &l_ec, TX_32X32);
}
}
-void vp9_optimize_sb64uv_16x16(MACROBLOCK *x) {
+void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
@@ -1094,14 +1205,14 @@
l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
- optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
&above_ec, &left_ec, TX_16X16);
a[0] = a[1] = a1[0] = a1[1] = above_ec;
l[0] = l[1] = l1[0] = l1[1] = left_ec;
}
}
-void vp9_optimize_sb64uv_8x8(MACROBLOCK *x) {
+void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
@@ -1116,14 +1227,14 @@
l = tl + vp9_block2left_sb64[TX_8X8][b];
above_ec = (a[0] + a[1]) != 0;
left_ec = (l[0] + l[1]) != 0;
- optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
&above_ec, &left_ec, TX_8X8);
a[0] = a[1] = above_ec;
l[0] = l[1] = left_ec;
}
}
-void vp9_optimize_sb64uv_4x4(MACROBLOCK *x) {
+void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
@@ -1136,12 +1247,12 @@
const int cidx = b >= 320 ? 20 : 16;
a = ta + vp9_block2above_sb64[TX_4X4][b];
l = tl + vp9_block2left_sb64[TX_4X4][b];
- optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
a, l, TX_4X4);
}
}
-void vp9_fidct_mb(MACROBLOCK *x) {
+void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) {
MACROBLOCKD *const xd = &x->e_mbd;
TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
@@ -1149,7 +1260,7 @@
vp9_transform_mb_16x16(x);
vp9_quantize_mb_16x16(x);
if (x->optimize)
- optimize_mb_16x16(x);
+ optimize_mb_16x16(cm, x);
vp9_inverse_transform_mb_16x16(xd);
} else if (tx_size == TX_8X8) {
if (xd->mode_info_context->mbmi.mode == SPLITMV) {
@@ -1159,8 +1270,8 @@
vp9_quantize_mby_8x8(x);
vp9_quantize_mbuv_4x4(x);
if (x->optimize) {
- vp9_optimize_mby_8x8(x);
- vp9_optimize_mbuv_4x4(x);
+ vp9_optimize_mby_8x8(cm, x);
+ vp9_optimize_mbuv_4x4(cm, x);
}
vp9_inverse_transform_mby_8x8(xd);
vp9_inverse_transform_mbuv_4x4(xd);
@@ -1168,24 +1279,25 @@
vp9_transform_mb_8x8(x);
vp9_quantize_mb_8x8(x);
if (x->optimize)
- optimize_mb_8x8(x);
+ optimize_mb_8x8(cm, x);
vp9_inverse_transform_mb_8x8(xd);
}
} else {
transform_mb_4x4(x);
vp9_quantize_mb_4x4(x);
if (x->optimize)
- optimize_mb_4x4(x);
+ optimize_mb_4x4(cm, x);
vp9_inverse_transform_mb_4x4(xd);
}
}
-void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col) {
+void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x,
+ int mb_row, int mb_col) {
MACROBLOCKD *const xd = &x->e_mbd;
vp9_build_inter_predictors_mb(xd, mb_row, mb_col);
subtract_mb(x);
- vp9_fidct_mb(x);
+ vp9_fidct_mb(cm, x);
vp9_recon_mb(xd);
}
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index 917cf8b..242afbe 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -13,6 +13,8 @@
#include "./vpx_config.h"
#include "vp9/encoder/vp9_block.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/common/vp9_onyxc_int.h"
typedef struct {
MB_PREDICTION_MODE mode;
@@ -21,60 +23,60 @@
} MODE_DEFINITION;
-#include "vp9/encoder/vp9_onyx_int.h"
struct VP9_ENCODER_RTCD;
-void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col);
+void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x,
+ int mb_row, int mb_col);
void vp9_transform_mbuv_4x4(MACROBLOCK *x);
void vp9_transform_mby_4x4(MACROBLOCK *x);
-void vp9_optimize_mby_4x4(MACROBLOCK *x);
-void vp9_optimize_mbuv_4x4(MACROBLOCK *x);
+void vp9_optimize_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_optimize_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col);
void vp9_transform_mb_8x8(MACROBLOCK *mb);
void vp9_transform_mby_8x8(MACROBLOCK *x);
void vp9_transform_mbuv_8x8(MACROBLOCK *x);
-void vp9_optimize_mby_8x8(MACROBLOCK *x);
-void vp9_optimize_mbuv_8x8(MACROBLOCK *x);
+void vp9_optimize_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_optimize_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_mb_16x16(MACROBLOCK *mb);
void vp9_transform_mby_16x16(MACROBLOCK *x);
-void vp9_optimize_mby_16x16(MACROBLOCK *x);
+void vp9_optimize_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sby_32x32(MACROBLOCK *x);
-void vp9_optimize_sby_32x32(MACROBLOCK *x);
+void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sby_16x16(MACROBLOCK *x);
-void vp9_optimize_sby_16x16(MACROBLOCK *x);
+void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sby_8x8(MACROBLOCK *x);
-void vp9_optimize_sby_8x8(MACROBLOCK *x);
+void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sby_4x4(MACROBLOCK *x);
-void vp9_optimize_sby_4x4(MACROBLOCK *x);
+void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sbuv_16x16(MACROBLOCK *x);
-void vp9_optimize_sbuv_16x16(MACROBLOCK *x);
+void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sbuv_8x8(MACROBLOCK *x);
-void vp9_optimize_sbuv_8x8(MACROBLOCK *x);
+void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sbuv_4x4(MACROBLOCK *x);
-void vp9_optimize_sbuv_4x4(MACROBLOCK *x);
+void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sb64y_32x32(MACROBLOCK *x);
-void vp9_optimize_sb64y_32x32(MACROBLOCK *x);
+void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sb64y_16x16(MACROBLOCK *x);
-void vp9_optimize_sb64y_16x16(MACROBLOCK *x);
+void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sb64y_8x8(MACROBLOCK *x);
-void vp9_optimize_sb64y_8x8(MACROBLOCK *x);
+void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sb64y_4x4(MACROBLOCK *x);
-void vp9_optimize_sb64y_4x4(MACROBLOCK *x);
+void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sb64uv_32x32(MACROBLOCK *x);
-void vp9_optimize_sb64uv_32x32(MACROBLOCK *x);
+void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sb64uv_16x16(MACROBLOCK *x);
-void vp9_optimize_sb64uv_16x16(MACROBLOCK *x);
+void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sb64uv_8x8(MACROBLOCK *x);
-void vp9_optimize_sb64uv_8x8(MACROBLOCK *x);
+void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sb64uv_4x4(MACROBLOCK *x);
-void vp9_optimize_sb64uv_4x4(MACROBLOCK *x);
+void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x);
-void vp9_fidct_mb(MACROBLOCK *x);
+void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 337276d..5e2f323 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -378,6 +378,19 @@
vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
int new_mv_mode_penalty = 256;
+ int sr = 0;
+ int quart_frm = MIN(cpi->common.Width, cpi->common.Height);
+
+ // refine the motion search range accroding to the frame dimension
+ // for first pass test
+ while ((quart_frm << sr) < MAX_FULL_PEL_VAL)
+ sr++;
+ if (sr)
+ sr--;
+
+ step_param += sr;
+ further_steps -= sr;
+
// override the default variance function to use MSE
v_fn_ptr.vf = vp9_mse16x16;
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 121de65..715d683 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -420,7 +420,7 @@
cpi->static_mb_pct = (ncnt[1] * 100) / cm->MBs;
// This error case should not be reachable as this function should
- // never be called with the common data structure unititialized.
+ // never be called with the common data structure uninitialized.
else
cpi->static_mb_pct = 0;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 300d9f8..5fd1e83 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -21,9 +21,9 @@
void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv) {
int col_min = (ref_mv->as_mv.col >> 3) - MAX_FULL_PEL_VAL +
- ((ref_mv->as_mv.col & 7) ? 1 : 0);
+ ((ref_mv->as_mv.col & 7) ? 1 : 0);
int row_min = (ref_mv->as_mv.row >> 3) - MAX_FULL_PEL_VAL +
- ((ref_mv->as_mv.row & 7) ? 1 : 0);
+ ((ref_mv->as_mv.row & 7) ? 1 : 0);
int col_max = (ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
int row_max = (ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
@@ -38,6 +38,19 @@
x->mv_row_max = row_max;
}
+int vp9_init_search_range(int width, int height) {
+ int sr = 0;
+ int frm = MIN(width, height);
+
+ while ((frm << sr) < MAX_FULL_PEL_VAL)
+ sr++;
+
+ if (sr)
+ sr--;
+
+ return sr;
+}
+
int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
int weight, int ishp) {
MV v;
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 2479d72..d5c7032 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -19,12 +19,17 @@
void print_mode_context(VP9_COMMON *pc);
#endif
-
-#define MAX_MVSEARCH_STEPS 8 // The maximum number of steps in a step search given the largest allowed initial step
-#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) // Max full pel mv specified in 1 pel units
-#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units
+// The maximum number of steps in a step search given the largest
+// allowed initial step
+#define MAX_MVSEARCH_STEPS 10
+// Max full pel mv specified in 1 pel units
+#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1)
+// Maximum size of the first step in full pel units
+#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))
void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv);
+int vp9_init_search_range(int width, int height);
+
int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost,
int *mvcost[2], int weight, int ishp);
void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 6335827..5a565fc 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -114,6 +114,13 @@
extern void print_nmvstats();
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+#ifdef NZC_STATS
+extern void init_nzcstats();
+extern void print_nzcstats();
+#endif
+#endif
+
#ifdef SPEEDSTATS
unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
#endif
@@ -1526,6 +1533,11 @@
#ifdef NMV_STATS
init_nmvstats();
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+#ifdef NZC_STATS
+ init_nzcstats();
+#endif
+#endif
/*Initialize the feed-forward activity masking.*/
cpi->activity_avg = 90 << 12;
@@ -1697,6 +1709,13 @@
cpi->common.error.setjmp = 0;
vp9_zero(cpi->y_uv_mode_count)
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_zero(cm->fc.nzc_counts_4x4);
+ vp9_zero(cm->fc.nzc_counts_8x8);
+ vp9_zero(cm->fc.nzc_counts_16x16);
+ vp9_zero(cm->fc.nzc_counts_32x32);
+ vp9_zero(cm->fc.nzc_pcat_counts);
+#endif
return (VP9_PTR) cpi;
}
@@ -1724,6 +1743,12 @@
if (cpi->pass != 1)
print_nmvstats();
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+#ifdef NZC_STATS
+ if (cpi->pass != 1)
+ print_nzcstats();
+#endif
+#endif
#if CONFIG_INTERNAL_STATS
@@ -2845,7 +2870,15 @@
cpi->active_best_quality * 15 / 16;
}
} else {
+#ifdef ONE_SHOT_Q_ESTIMATE
+#ifdef STRICT_ONE_SHOT_Q
+ cpi->active_best_quality = Q;
+#else
cpi->active_best_quality = inter_minq[Q];
+#endif
+#else
+ cpi->active_best_quality = inter_minq[Q];
+#endif
// For the constant/constrained quality mode we dont want
// q to fall below the cq level.
@@ -3332,8 +3365,12 @@
vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16);
vp9_copy(cpi->common.fc.coef_counts_32x32, cpi->coef_counts_32x32);
if (!cpi->common.error_resilient_mode &&
- !cpi->common.frame_parallel_decoding_mode)
+ !cpi->common.frame_parallel_decoding_mode) {
vp9_adapt_coef_probs(&cpi->common);
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_adapt_nzc_probs(&cpi->common);
+#endif
+ }
if (cpi->common.frame_type != KEY_FRAME) {
vp9_copy(cpi->common.fc.sb_ymode_counts, cpi->sb_ymode_count);
vp9_copy(cpi->common.fc.ymode_counts, cpi->ymode_count);
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 3dc4772..300e128 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -31,6 +31,7 @@
// Experimental rate control switches
// #define ONE_SHOT_Q_ESTIMATE 1
+// #define STRICT_ONE_SHOT_Q 1
// #define DISABLE_RC_LONG_TERM_MEM 1
// #define SPEEDSTATS 1
@@ -111,6 +112,18 @@
int mv_ref_ct[INTER_MODE_CONTEXTS][4][2];
int vp9_mode_contexts[INTER_MODE_CONTEXTS][4];
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_prob nzc_probs_4x4
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC4X4_NODES];
+ vp9_prob nzc_probs_8x8
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC8X8_NODES];
+ vp9_prob nzc_probs_16x16
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC16X16_NODES];
+ vp9_prob nzc_probs_32x32
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC32X32_NODES];
+ vp9_prob nzc_pcat_probs[MAX_NZC_CONTEXTS]
+ [NZC_TOKENS_EXTRA][NZC_BITS_EXTRA];
+#endif
} CODING_CONTEXT;
typedef struct {
@@ -480,6 +493,25 @@
vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES];
vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES];
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_prob frame_nzc_probs_4x4
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC4X4_NODES];
+ unsigned int frame_nzc_branch_ct_4x4
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC4X4_NODES][2];
+ vp9_prob frame_nzc_probs_8x8
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC8X8_NODES];
+ unsigned int frame_nzc_branch_ct_8x8
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC8X8_NODES][2];
+ vp9_prob frame_nzc_probs_16x16
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC16X16_NODES];
+ unsigned int frame_nzc_branch_ct_16x16
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC16X16_NODES][2];
+ vp9_prob frame_nzc_probs_32x32
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC32X32_NODES];
+ unsigned int frame_nzc_branch_ct_32x32
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC32X32_NODES][2];
+#endif
+
int gfu_boost;
int last_boost;
int kf_boost;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 75f22fa..9ac2c84 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -21,35 +21,46 @@
extern int enc_debug;
#endif
+static INLINE int plane_idx(MACROBLOCKD *xd, int b_idx) {
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+ if (b_idx < (16 << (sb_type * 2)))
+ return 0; // Y
+ else if (b_idx < (20 << (sb_type * 2)))
+ return 16; // U
+ assert(b_idx < (24 << (sb_type * 2)));
+ return 20; // V
+}
+
void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
MACROBLOCKD *const xd = &mb->e_mbd;
- BLOCK *const b = &mb->block[b_idx];
- BLOCKD *const d = &xd->block[b_idx];
+ BLOCK *const b = &mb->block[0];
+ BLOCKD *const d = &xd->block[0];
int i, rc, eob;
int zbin;
int x, y, z, sz;
+ int16_t *coeff_ptr = mb->coeff + b_idx * 16;
+ int16_t *qcoeff_ptr = xd->qcoeff + b_idx * 16;
+ int16_t *dqcoeff_ptr = xd->dqcoeff + b_idx * 16;
int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
- int16_t *coeff_ptr = b->coeff;
int16_t *zbin_ptr = b->zbin;
int16_t *round_ptr = b->round;
int16_t *quant_ptr = b->quant;
uint8_t *quant_shift_ptr = b->quant_shift;
- int16_t *qcoeff_ptr = d->qcoeff;
- int16_t *dqcoeff_ptr = d->dqcoeff;
int16_t *dequant_ptr = d->dequant;
int zbin_oq_value = b->zbin_extra;
+ const int *pt_scan;
+#if CONFIG_CODE_NONZEROCOUNT
+ int nzc = 0;
+#endif
- int const *pt_scan ;
-
+ assert(plane_idx(xd, b_idx) == 0);
switch (tx_type) {
case ADST_DCT:
pt_scan = vp9_row_scan_4x4;
break;
-
case DCT_ADST:
pt_scan = vp9_col_scan_4x4;
break;
-
default:
pt_scan = vp9_default_zig_zag1d_4x4;
break;
@@ -81,6 +92,9 @@
if (y) {
eob = i; // last nonzero coeffs
+#if CONFIG_CODE_NONZEROCOUNT
+ ++nzc; // number of nonzero coeffs
+#endif
zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
}
}
@@ -88,25 +102,32 @@
}
xd->eobs[b_idx] = eob + 1;
+#if CONFIG_CODE_NONZEROCOUNT
+ xd->nzcs[b_idx] = nzc;
+#endif
}
void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) {
MACROBLOCKD *const xd = &mb->e_mbd;
- BLOCK *const b = &mb->block[b_idx];
- BLOCKD *const d = &xd->block[b_idx];
+ const int c_idx = plane_idx(xd, b_idx);
+ BLOCK *const b = &mb->block[c_idx];
+ BLOCKD *const d = &xd->block[c_idx];
int i, rc, eob;
int zbin;
int x, y, z, sz;
+ int16_t *coeff_ptr = mb->coeff + b_idx * 16;
+ int16_t *qcoeff_ptr = xd->qcoeff + b_idx * 16;
+ int16_t *dqcoeff_ptr = xd->dqcoeff + b_idx * 16;
int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
- int16_t *coeff_ptr = b->coeff;
int16_t *zbin_ptr = b->zbin;
int16_t *round_ptr = b->round;
int16_t *quant_ptr = b->quant;
uint8_t *quant_shift_ptr = b->quant_shift;
- int16_t *qcoeff_ptr = d->qcoeff;
- int16_t *dqcoeff_ptr = d->dqcoeff;
int16_t *dequant_ptr = d->dequant;
int zbin_oq_value = b->zbin_extra;
+#if CONFIG_CODE_NONZEROCOUNT
+ int nzc = 0;
+#endif
vpx_memset(qcoeff_ptr, 0, 32);
vpx_memset(dqcoeff_ptr, 0, 32);
@@ -135,6 +156,9 @@
if (y) {
eob = i; // last nonzero coeffs
+#if CONFIG_CODE_NONZEROCOUNT
+ ++nzc; // number of nonzero coeffs
+#endif
zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
}
}
@@ -142,13 +166,16 @@
}
xd->eobs[b_idx] = eob + 1;
+#if CONFIG_CODE_NONZEROCOUNT
+ xd->nzcs[b_idx] = nzc;
+#endif
}
-void vp9_quantize_mby_4x4_c(MACROBLOCK *x) {
+void vp9_quantize_mby_4x4(MACROBLOCK *x) {
int i;
for (i = 0; i < 16; i++) {
- TX_TYPE tx_type = get_tx_type_4x4(&x->e_mbd, &x->e_mbd.block[i]);
+ TX_TYPE tx_type = get_tx_type_4x4(&x->e_mbd, i);
if (tx_type != DCT_DCT) {
vp9_ht_quantize_b_4x4(x, i, tx_type);
} else {
@@ -157,24 +184,25 @@
}
}
-void vp9_quantize_mbuv_4x4_c(MACROBLOCK *x) {
+void vp9_quantize_mbuv_4x4(MACROBLOCK *x) {
int i;
for (i = 16; i < 24; i++)
x->quantize_b_4x4(x, i);
}
-void vp9_quantize_mb_4x4_c(MACROBLOCK *x) {
- vp9_quantize_mby_4x4_c(x);
- vp9_quantize_mbuv_4x4_c(x);
+void vp9_quantize_mb_4x4(MACROBLOCK *x) {
+ vp9_quantize_mby_4x4(x);
+ vp9_quantize_mbuv_4x4(x);
}
void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx) {
MACROBLOCKD *const xd = &mb->e_mbd;
- BLOCK *const b = &mb->block[b_idx];
- BLOCKD *const d = &xd->block[b_idx];
- int16_t *qcoeff_ptr = d->qcoeff;
- int16_t *dqcoeff_ptr = d->dqcoeff;
+ int16_t *qcoeff_ptr = xd->qcoeff + 16 * b_idx;
+ int16_t *dqcoeff_ptr = xd->dqcoeff + 16 * b_idx;
+ const int c_idx = plane_idx(xd, b_idx);
+ BLOCK *const b = &mb->block[c_idx];
+ BLOCKD *const d = &xd->block[c_idx];
vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t));
vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t));
@@ -185,13 +213,16 @@
int x, y, z, sz;
int zero_run;
int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
- int16_t *coeff_ptr = b->coeff;
+ int16_t *coeff_ptr = mb->coeff + 16 * b_idx;
int16_t *zbin_ptr = b->zbin;
int16_t *round_ptr = b->round;
int16_t *quant_ptr = b->quant;
uint8_t *quant_shift_ptr = b->quant_shift;
int16_t *dequant_ptr = d->dequant;
int zbin_oq_value = b->zbin_extra;
+#if CONFIG_CODE_NONZEROCOUNT
+ int nzc = 0;
+#endif
eob = -1;
@@ -215,6 +246,9 @@
if (y) {
eob = 0; // last nonzero coeffs
+#if CONFIG_CODE_NONZEROCOUNT
+ ++nzc; // number of nonzero coeffs
+#endif
zero_run = 0;
}
}
@@ -241,19 +275,33 @@
if (y) {
eob = i; // last nonzero coeffs
+#if CONFIG_CODE_NONZEROCOUNT
+ ++nzc; // number of nonzero coeffs
+#endif
zero_run = 0;
}
}
}
xd->eobs[b_idx] = eob + 1;
+#if CONFIG_CODE_NONZEROCOUNT
+ xd->nzcs[b_idx] = nzc;
+#endif
} else {
xd->eobs[b_idx] = 0;
+#if CONFIG_CODE_NONZEROCOUNT
+ xd->nzcs[b_idx] = 0;
+#endif
}
}
void vp9_quantize_mby_8x8(MACROBLOCK *x) {
int i;
+#if CONFIG_CODE_NONZEROCOUNT
+ for (i = 0; i < 16; i ++) {
+ x->e_mbd.nzcs[i] = 0;
+ }
+#endif
for (i = 0; i < 16; i += 4) {
x->quantize_b_8x8(x, i);
}
@@ -262,6 +310,11 @@
void vp9_quantize_mbuv_8x8(MACROBLOCK *x) {
int i;
+#if CONFIG_CODE_NONZEROCOUNT
+ for (i = 16; i < 24; i ++) {
+ x->e_mbd.nzcs[i] = 0;
+ }
+#endif
for (i = 16; i < 24; i += 4)
x->quantize_b_8x8(x, i);
}
@@ -272,6 +325,12 @@
}
void vp9_quantize_mby_16x16(MACROBLOCK *x) {
+#if CONFIG_CODE_NONZEROCOUNT
+ int i;
+ for (i = 0; i < 16; i++) {
+ x->e_mbd.nzcs[i] = 0;
+ }
+#endif
x->quantize_b_16x16(x, 0);
}
@@ -286,12 +345,19 @@
uint8_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
int16_t *dequant_ptr, int zbin_oq_value,
- uint16_t *eob_ptr, const int *scan, int mul) {
+ uint16_t *eob_ptr,
+#if CONFIG_CODE_NONZEROCOUNT
+ uint16_t *nzc_ptr,
+#endif
+ const int *scan, int mul) {
int i, rc, eob;
int zbin;
int x, y, z, sz;
int zero_run = 0;
int16_t *zbin_boost_ptr = zbin_boost_orig_ptr;
+#if CONFIG_CODE_NONZEROCOUNT
+ int nzc = 0;
+#endif
vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
@@ -320,325 +386,173 @@
if (y) {
eob = i; // last nonzero coeffs
zero_run = 0;
+#if CONFIG_CODE_NONZEROCOUNT
+ ++nzc; // number of nonzero coeffs
+#endif
}
}
}
}
*eob_ptr = eob + 1;
+#if CONFIG_CODE_NONZEROCOUNT
+ *nzc_ptr = nzc;
+#endif
}
void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx) {
MACROBLOCKD *const xd = &mb->e_mbd;
- BLOCK *const b = &mb->block[b_idx];
- BLOCKD *const d = &xd->block[b_idx];
+ const int c_idx = plane_idx(xd, b_idx);
+ BLOCK *const b = &mb->block[c_idx];
+ BLOCKD *const d = &xd->block[c_idx];
+
quantize(b->zrun_zbin_boost,
- b->coeff,
+ mb->coeff + 16 * b_idx,
256, b->skip_block,
b->zbin, b->round, b->quant, b->quant_shift,
- d->qcoeff,
- d->dqcoeff,
+ xd->qcoeff + 16 * b_idx,
+ xd->dqcoeff + 16 * b_idx,
d->dequant,
b->zbin_extra,
- &xd->eobs[b_idx], vp9_default_zig_zag1d_16x16, 1);
+ &xd->eobs[b_idx],
+#if CONFIG_CODE_NONZEROCOUNT
+ &xd->nzcs[b_idx],
+#endif
+ vp9_default_zig_zag1d_16x16, 1);
}
-void vp9_quantize_sby_32x32(MACROBLOCK *x) {
- MACROBLOCKD *const xd = &x->e_mbd;
- BLOCK *const b = &x->block[0];
- BLOCKD *const d = &xd->block[0];
+void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ const int c_idx = plane_idx(xd, b_idx);
+ BLOCK *const b = &mb->block[c_idx];
+ BLOCKD *const d = &xd->block[c_idx];
quantize(b->zrun_zbin_boost,
- x->coeff,
+ mb->coeff + b_idx * 16,
1024, b->skip_block,
b->zbin,
b->round, b->quant, b->quant_shift,
- xd->qcoeff,
- xd->dqcoeff,
+ xd->qcoeff + b_idx * 16,
+ xd->dqcoeff + b_idx * 16,
d->dequant,
b->zbin_extra,
- &xd->eobs[0],
+ &xd->eobs[b_idx],
+#if CONFIG_CODE_NONZEROCOUNT
+ &xd->nzcs[b_idx],
+#endif
vp9_default_zig_zag1d_32x32, 2);
}
+void vp9_quantize_sby_32x32(MACROBLOCK *x) {
+ vp9_regular_quantize_b_32x32(x, 0);
+}
+
void vp9_quantize_sby_16x16(MACROBLOCK *x) {
- MACROBLOCKD *const xd = &x->e_mbd;
- BLOCK *const b = &x->block[0];
- BLOCKD *const d = &xd->block[0];
int n;
for (n = 0; n < 4; n++)
- quantize(b->zrun_zbin_boost,
- x->coeff + n * 256,
- 256, b->skip_block,
- b->zbin,
- b->round, b->quant, b->quant_shift,
- xd->qcoeff + n * 256,
- xd->dqcoeff + n * 256,
- d->dequant,
- b->zbin_extra,
- &xd->eobs[n * 16],
- vp9_default_zig_zag1d_16x16, 1);
+ x->quantize_b_16x16(x, n * 16);
}
void vp9_quantize_sby_8x8(MACROBLOCK *x) {
- MACROBLOCKD *const xd = &x->e_mbd;
- BLOCK *const b = &x->block[0];
- BLOCKD *const d = &xd->block[0];
int n;
for (n = 0; n < 16; n++)
- quantize(b->zrun_zbin_boost,
- x->coeff + n * 64,
- 64, b->skip_block,
- b->zbin,
- b->round, b->quant, b->quant_shift,
- xd->qcoeff + n * 64,
- xd->dqcoeff + n * 64,
- d->dequant,
- b->zbin_extra,
- &xd->eobs[n * 4],
- vp9_default_zig_zag1d_8x8, 1);
+ x->quantize_b_8x8(x, n * 4);
}
void vp9_quantize_sby_4x4(MACROBLOCK *x) {
MACROBLOCKD *const xd = &x->e_mbd;
- BLOCK *const b = &x->block[0];
- BLOCKD *const d = &xd->block[0];
int n;
- for (n = 0; n < 64; n++)
- quantize(b->zrun_zbin_boost,
- x->coeff + n * 16,
- 16, b->skip_block,
- b->zbin,
- b->round, b->quant, b->quant_shift,
- xd->qcoeff + n * 16,
- xd->dqcoeff + n * 16,
- d->dequant,
- b->zbin_extra,
- &xd->eobs[n],
- vp9_default_zig_zag1d_4x4, 1);
+ for (n = 0; n < 64; n++) {
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
+ if (tx_type != DCT_DCT) {
+ vp9_ht_quantize_b_4x4(x, n, tx_type);
+ } else {
+ x->quantize_b_4x4(x, n);
+ }
+ }
}
void vp9_quantize_sbuv_16x16(MACROBLOCK *x) {
- int i;
- MACROBLOCKD *const xd = &x->e_mbd;
-
- for (i = 64; i < 96; i += 16) {
- int cidx = i < 80 ? 16 : 20;
- quantize(x->block[cidx].zrun_zbin_boost,
- x->coeff + i * 16,
- 256, x->block[cidx].skip_block,
- x->block[cidx].zbin, x->block[cidx].round,
- x->block[cidx].quant, x->block[cidx].quant_shift,
- xd->qcoeff + i * 16,
- xd->dqcoeff + i * 16,
- xd->block[cidx].dequant,
- x->block[cidx].zbin_extra,
- &xd->eobs[i],
- vp9_default_zig_zag1d_16x16, 1);
- }
+ x->quantize_b_16x16(x, 64);
+ x->quantize_b_16x16(x, 80);
}
void vp9_quantize_sbuv_8x8(MACROBLOCK *x) {
int i;
- MACROBLOCKD *const xd = &x->e_mbd;
- for (i = 64; i < 96; i += 4) {
- int cidx = i < 80 ? 16 : 20;
- quantize(x->block[cidx].zrun_zbin_boost,
- x->coeff + i * 16,
- 64, x->block[cidx].skip_block,
- x->block[cidx].zbin, x->block[cidx].round,
- x->block[cidx].quant, x->block[cidx].quant_shift,
- xd->qcoeff + i * 16,
- xd->dqcoeff + i * 16,
- xd->block[cidx].dequant,
- x->block[cidx].zbin_extra,
- &xd->eobs[i],
- vp9_default_zig_zag1d_8x8, 1);
- }
+ for (i = 64; i < 96; i += 4)
+ x->quantize_b_8x8(x, i);
}
void vp9_quantize_sbuv_4x4(MACROBLOCK *x) {
int i;
- MACROBLOCKD *const xd = &x->e_mbd;
- for (i = 64; i < 96; i++) {
- int cidx = i < 80 ? 16 : 20;
- quantize(x->block[cidx].zrun_zbin_boost,
- x->coeff + i * 16,
- 16, x->block[cidx].skip_block,
- x->block[cidx].zbin, x->block[cidx].round,
- x->block[cidx].quant, x->block[cidx].quant_shift,
- xd->qcoeff + i * 16,
- xd->dqcoeff + i * 16,
- xd->block[cidx].dequant,
- x->block[cidx].zbin_extra,
- &xd->eobs[i],
- vp9_default_zig_zag1d_4x4, 1);
- }
+ for (i = 64; i < 96; i++)
+ x->quantize_b_4x4(x, i);
}
void vp9_quantize_sb64y_32x32(MACROBLOCK *x) {
- MACROBLOCKD *const xd = &x->e_mbd;
- BLOCK *const b = &x->block[0];
- BLOCKD *const d = &xd->block[0];
int n;
for (n = 0; n < 4; n++)
- quantize(b->zrun_zbin_boost,
- x->coeff + n * 1024,
- 1024, b->skip_block,
- b->zbin,
- b->round, b->quant, b->quant_shift,
- xd->qcoeff + n * 1024,
- xd->dqcoeff + n * 1024,
- d->dequant,
- b->zbin_extra,
- &xd->eobs[n * 64],
- vp9_default_zig_zag1d_32x32, 2);
+ vp9_regular_quantize_b_32x32(x, n * 64);
}
void vp9_quantize_sb64y_16x16(MACROBLOCK *x) {
- MACROBLOCKD *const xd = &x->e_mbd;
- BLOCK *const b = &x->block[0];
- BLOCKD *const d = &xd->block[0];
int n;
for (n = 0; n < 16; n++)
- quantize(b->zrun_zbin_boost,
- x->coeff + n * 256,
- 256, b->skip_block,
- b->zbin,
- b->round, b->quant, b->quant_shift,
- xd->qcoeff + n * 256,
- xd->dqcoeff + n * 256,
- d->dequant,
- b->zbin_extra,
- &xd->eobs[n * 16],
- vp9_default_zig_zag1d_16x16, 1);
+ x->quantize_b_16x16(x, n * 16);
}
void vp9_quantize_sb64y_8x8(MACROBLOCK *x) {
- MACROBLOCKD *const xd = &x->e_mbd;
- BLOCK *const b = &x->block[0];
- BLOCKD *const d = &xd->block[0];
int n;
for (n = 0; n < 64; n++)
- quantize(b->zrun_zbin_boost,
- x->coeff + n * 64,
- 64, b->skip_block,
- b->zbin,
- b->round, b->quant, b->quant_shift,
- xd->qcoeff + n * 64,
- xd->dqcoeff + n * 64,
- d->dequant,
- b->zbin_extra,
- &xd->eobs[n * 4],
- vp9_default_zig_zag1d_8x8, 1);
+ x->quantize_b_8x8(x, n * 4);
}
void vp9_quantize_sb64y_4x4(MACROBLOCK *x) {
MACROBLOCKD *const xd = &x->e_mbd;
- BLOCK *const b = &x->block[0];
- BLOCKD *const d = &xd->block[0];
int n;
- for (n = 0; n < 256; n++)
- quantize(b->zrun_zbin_boost,
- x->coeff + n * 16,
- 16, b->skip_block,
- b->zbin,
- b->round, b->quant, b->quant_shift,
- xd->qcoeff + n * 16,
- xd->dqcoeff + n * 16,
- d->dequant,
- b->zbin_extra,
- &xd->eobs[n],
- vp9_default_zig_zag1d_4x4, 1);
+ for (n = 0; n < 256; n++) {
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
+ if (tx_type != DCT_DCT) {
+ vp9_ht_quantize_b_4x4(x, n, tx_type);
+ } else {
+ x->quantize_b_4x4(x, n);
+ }
+ }
}
void vp9_quantize_sb64uv_32x32(MACROBLOCK *x) {
- int i;
- MACROBLOCKD *const xd = &x->e_mbd;
-
- for (i = 256; i < 384; i += 64) {
- int cidx = i < 320 ? 16 : 20;
- quantize(x->block[cidx].zrun_zbin_boost,
- x->coeff + i * 16,
- 1024, x->block[cidx].skip_block,
- x->block[cidx].zbin, x->block[cidx].round,
- x->block[cidx].quant, x->block[cidx].quant_shift,
- xd->qcoeff + i * 16,
- xd->dqcoeff + i * 16,
- xd->block[cidx].dequant,
- x->block[cidx].zbin_extra,
- &xd->eobs[i],
- vp9_default_zig_zag1d_32x32, 2);
- }
+ vp9_regular_quantize_b_32x32(x, 256);
+ vp9_regular_quantize_b_32x32(x, 320);
}
void vp9_quantize_sb64uv_16x16(MACROBLOCK *x) {
int i;
- MACROBLOCKD *const xd = &x->e_mbd;
- for (i = 256; i < 384; i += 16) {
- int cidx = i < 320 ? 16 : 20;
- quantize(x->block[cidx].zrun_zbin_boost,
- x->coeff + i * 16,
- 256, x->block[cidx].skip_block,
- x->block[cidx].zbin, x->block[cidx].round,
- x->block[cidx].quant, x->block[cidx].quant_shift,
- xd->qcoeff + i * 16,
- xd->dqcoeff + i * 16,
- xd->block[cidx].dequant,
- x->block[cidx].zbin_extra,
- &xd->eobs[i],
- vp9_default_zig_zag1d_16x16, 1);
- }
+ for (i = 256; i < 384; i += 16)
+ x->quantize_b_16x16(x, i);
}
void vp9_quantize_sb64uv_8x8(MACROBLOCK *x) {
int i;
- MACROBLOCKD *const xd = &x->e_mbd;
- for (i = 256; i < 384; i += 4) {
- int cidx = i < 320 ? 16 : 20;
- quantize(x->block[cidx].zrun_zbin_boost,
- x->coeff + i * 16,
- 64, x->block[cidx].skip_block,
- x->block[cidx].zbin, x->block[cidx].round,
- x->block[cidx].quant, x->block[cidx].quant_shift,
- xd->qcoeff + i * 16,
- xd->dqcoeff + i * 16,
- xd->block[cidx].dequant,
- x->block[cidx].zbin_extra,
- &xd->eobs[i],
- vp9_default_zig_zag1d_8x8, 1);
- }
+ for (i = 256; i < 384; i += 4)
+ x->quantize_b_8x8(x, i);
}
void vp9_quantize_sb64uv_4x4(MACROBLOCK *x) {
int i;
- MACROBLOCKD *const xd = &x->e_mbd;
- for (i = 256; i < 384; i++) {
- int cidx = i < 320 ? 16 : 20;
- quantize(x->block[cidx].zrun_zbin_boost,
- x->coeff + i * 16,
- 16, x->block[cidx].skip_block,
- x->block[cidx].zbin, x->block[cidx].round,
- x->block[cidx].quant, x->block[cidx].quant_shift,
- xd->qcoeff + i * 16,
- xd->dqcoeff + i * 16,
- xd->block[cidx].dequant,
- x->block[cidx].zbin_extra,
- &xd->eobs[i],
- vp9_default_zig_zag1d_4x4, 1);
- }
+ for (i = 256; i < 384; i++)
+ x->quantize_b_4x4(x, i);
}
/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
@@ -650,8 +564,7 @@
vp9_regular_quantize_b_4x4(x, b_idx2);
}
-static void invert_quant(int16_t *quant,
- uint8_t *shift, int d) {
+static void invert_quant(int16_t *quant, uint8_t *shift, int d) {
unsigned t;
int l;
t = d;
@@ -665,56 +578,52 @@
void vp9_init_quantizer(VP9_COMP *cpi) {
int i;
int quant_val;
- int Q;
+ int q;
static const int zbin_boost[16] = { 0, 0, 0, 8, 8, 8, 10, 12,
14, 16, 20, 24, 28, 32, 36, 40 };
- for (Q = 0; Q < QINDEX_RANGE; Q++) {
- int qzbin_factor = (vp9_dc_quant(Q, 0) < 148) ? 84 : 80;
+ for (q = 0; q < QINDEX_RANGE; q++) {
+ int qzbin_factor = (vp9_dc_quant(q, 0) < 148) ? 84 : 80;
int qrounding_factor = 48;
- if (Q == 0) {
+ if (q == 0) {
qzbin_factor = 64;
qrounding_factor = 64;
}
// dc values
- quant_val = vp9_dc_quant(Q, cpi->common.y1dc_delta_q);
- invert_quant(cpi->Y1quant[Q] + 0,
- cpi->Y1quant_shift[Q] + 0, quant_val);
- cpi->Y1zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y1round[Q][0] = (qrounding_factor * quant_val) >> 7;
- cpi->common.Y1dequant[Q][0] = quant_val;
- cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+ quant_val = vp9_dc_quant(q, cpi->common.y1dc_delta_q);
+ invert_quant(cpi->Y1quant[q] + 0, cpi->Y1quant_shift[q] + 0, quant_val);
+ cpi->Y1zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
+ cpi->Y1round[q][0] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.Y1dequant[q][0] = quant_val;
+ cpi->zrun_zbin_boost_y1[q][0] = (quant_val * zbin_boost[0]) >> 7;
- quant_val = vp9_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
- invert_quant(cpi->UVquant[Q] + 0,
- cpi->UVquant_shift[Q] + 0, quant_val);
- cpi->UVzbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->UVround[Q][0] = (qrounding_factor * quant_val) >> 7;
- cpi->common.UVdequant[Q][0] = quant_val;
- cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+ quant_val = vp9_dc_uv_quant(q, cpi->common.uvdc_delta_q);
+ invert_quant(cpi->UVquant[q] + 0, cpi->UVquant_shift[q] + 0, quant_val);
+ cpi->UVzbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
+ cpi->UVround[q][0] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.UVdequant[q][0] = quant_val;
+ cpi->zrun_zbin_boost_uv[q][0] = (quant_val * zbin_boost[0]) >> 7;
// all the 4x4 ac values =;
for (i = 1; i < 16; i++) {
int rc = vp9_default_zig_zag1d_4x4[i];
- quant_val = vp9_ac_yquant(Q);
- invert_quant(cpi->Y1quant[Q] + rc,
- cpi->Y1quant_shift[Q] + rc, quant_val);
- cpi->Y1zbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y1round[Q][rc] = (qrounding_factor * quant_val) >> 7;
- cpi->common.Y1dequant[Q][rc] = quant_val;
- cpi->zrun_zbin_boost_y1[Q][i] =
- ((quant_val * zbin_boost[i]) + 64) >> 7;
+ quant_val = vp9_ac_yquant(q);
+ invert_quant(cpi->Y1quant[q] + rc, cpi->Y1quant_shift[q] + rc, quant_val);
+ cpi->Y1zbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
+ cpi->Y1round[q][rc] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.Y1dequant[q][rc] = quant_val;
+ cpi->zrun_zbin_boost_y1[q][i] =
+ ROUND_POWER_OF_TWO(quant_val * zbin_boost[i], 7);
- quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q);
- invert_quant(cpi->UVquant[Q] + rc,
- cpi->UVquant_shift[Q] + rc, quant_val);
- cpi->UVzbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->UVround[Q][rc] = (qrounding_factor * quant_val) >> 7;
- cpi->common.UVdequant[Q][rc] = quant_val;
- cpi->zrun_zbin_boost_uv[Q][i] =
- ((quant_val * zbin_boost[i]) + 64) >> 7;
+ quant_val = vp9_ac_uv_quant(q, cpi->common.uvac_delta_q);
+ invert_quant(cpi->UVquant[q] + rc, cpi->UVquant_shift[q] + rc, quant_val);
+ cpi->UVzbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
+ cpi->UVround[q][rc] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.UVdequant[q][rc] = quant_val;
+ cpi->zrun_zbin_boost_uv[q][i] =
+ ROUND_POWER_OF_TWO(quant_val * zbin_boost[i], 7);
}
}
}
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 32eb05a..7392540 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -26,52 +26,24 @@
#include "x86/vp9_quantize_x86.h"
#endif
-#define prototype_quantize_block_type(sym) \
- void (sym)(MACROBLOCK *mb, int b_ix, TX_TYPE type)
-extern prototype_quantize_block_type(vp9_ht_quantize_b_4x4);
+void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_ix, TX_TYPE type);
+void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx);
+void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2);
+void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx);
+void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx);
+void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx);
-#ifndef vp9_quantize_quantb_4x4
-#define vp9_quantize_quantb_4x4 vp9_regular_quantize_b_4x4
-#endif
-extern prototype_quantize_block(vp9_quantize_quantb_4x4);
-
-#ifndef vp9_quantize_quantb_4x4_pair
-#define vp9_quantize_quantb_4x4_pair vp9_regular_quantize_b_4x4_pair
-#endif
-extern prototype_quantize_block_pair(vp9_quantize_quantb_4x4_pair);
-
-#ifndef vp9_quantize_quantb_8x8
-#define vp9_quantize_quantb_8x8 vp9_regular_quantize_b_8x8
-#endif
-extern prototype_quantize_block(vp9_quantize_quantb_8x8);
-
-#ifndef vp9_quantize_quantb_16x16
-#define vp9_quantize_quantb_16x16 vp9_regular_quantize_b_16x16
-#endif
-extern prototype_quantize_block(vp9_quantize_quantb_16x16);
-
-#ifndef vp9_quantize_mb_4x4
-#define vp9_quantize_mb_4x4 vp9_quantize_mb_4x4_c
-#endif
-extern prototype_quantize_mb(vp9_quantize_mb_4x4);
+void vp9_quantize_mb_4x4(MACROBLOCK *x);
void vp9_quantize_mb_8x8(MACROBLOCK *x);
-#ifndef vp9_quantize_mbuv_4x4
-#define vp9_quantize_mbuv_4x4 vp9_quantize_mbuv_4x4_c
-#endif
-extern prototype_quantize_mb(vp9_quantize_mbuv_4x4);
+void vp9_quantize_mbuv_4x4(MACROBLOCK *x);
+void vp9_quantize_mby_4x4(MACROBLOCK *x);
-#ifndef vp9_quantize_mby_4x4
-#define vp9_quantize_mby_4x4 vp9_quantize_mby_4x4_c
-#endif
-extern prototype_quantize_mb(vp9_quantize_mby_4x4);
-
-extern prototype_quantize_mb(vp9_quantize_mby_8x8);
-extern prototype_quantize_mb(vp9_quantize_mbuv_8x8);
+void vp9_quantize_mby_8x8(MACROBLOCK *x);
+void vp9_quantize_mbuv_8x8(MACROBLOCK *x);
void vp9_quantize_mb_16x16(MACROBLOCK *x);
-extern prototype_quantize_block(vp9_quantize_quantb_16x16);
-extern prototype_quantize_mb(vp9_quantize_mby_16x16);
+void vp9_quantize_mby_16x16(MACROBLOCK *x);
void vp9_quantize_sby_32x32(MACROBLOCK *x);
void vp9_quantize_sby_16x16(MACROBLOCK *x);
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index a2a7957..82bd70b 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -120,11 +120,14 @@
double q = vp9_convert_qindex_to_q(qindex);
if (frame_type == KEY_FRAME) {
- enumerator = 4500000;
+ enumerator = 4000000;
} else {
- enumerator = 2850000;
+ enumerator = 2500000;
}
+ // Q based adjustment to baseline enumberator
+ enumerator += (int)(enumerator * q) >> 12;
+
return (int)(0.5 + (enumerator * correction_factor / q));
}
@@ -182,6 +185,13 @@
#if CONFIG_COMP_INTERINTRA_PRED
cc->interintra_prob = cm->fc.interintra_prob;
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_copy(cc->nzc_probs_4x4, cm->fc.nzc_probs_4x4);
+ vp9_copy(cc->nzc_probs_8x8, cm->fc.nzc_probs_8x8);
+ vp9_copy(cc->nzc_probs_16x16, cm->fc.nzc_probs_16x16);
+ vp9_copy(cc->nzc_probs_32x32, cm->fc.nzc_probs_32x32);
+ vp9_copy(cc->nzc_pcat_probs, cm->fc.nzc_pcat_probs);
+#endif
}
void vp9_restore_coding_context(VP9_COMP *cpi) {
@@ -237,6 +247,13 @@
#if CONFIG_COMP_INTERINTRA_PRED
cm->fc.interintra_prob = cc->interintra_prob;
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_copy(cm->fc.nzc_probs_4x4, cc->nzc_probs_4x4);
+ vp9_copy(cm->fc.nzc_probs_8x8, cc->nzc_probs_8x8);
+ vp9_copy(cm->fc.nzc_probs_16x16, cc->nzc_probs_16x16);
+ vp9_copy(cm->fc.nzc_probs_32x32, cc->nzc_probs_32x32);
+ vp9_copy(cm->fc.nzc_pcat_probs, cc->nzc_pcat_probs);
+#endif
}
void vp9_setup_key_frame(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 61379b8..3004d6b 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -156,6 +156,12 @@
for (j = 0; j < REF_TYPES; j++)
for (k = 0; k < COEF_BANDS; k++)
for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
+#if CONFIG_CODE_NONZEROCOUNT
+ // All costs are without the EOB node
+ vp9_cost_tokens_skip((int *)(c[i][j][k][l]),
+ p[i][j][k][l],
+ vp9_coef_tree);
+#else
if (l == 0 && k > 0)
vp9_cost_tokens_skip((int *)(c[i][j][k][l]),
p[i][j][k][l],
@@ -164,9 +170,63 @@
vp9_cost_tokens((int *)(c[i][j][k][l]),
p[i][j][k][l],
vp9_coef_tree);
+#endif
}
}
+#if CONFIG_CODE_NONZEROCOUNT
+static void fill_nzc_costs(VP9_COMP *cpi, int block_size) {
+ int nzc_context, r, b, nzc, values;
+ int cost[16];
+ values = block_size * block_size + 1;
+
+ for (nzc_context = 0; nzc_context < MAX_NZC_CONTEXTS; ++nzc_context) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ unsigned int *nzc_costs;
+ if (block_size == 4) {
+ vp9_cost_tokens(cost,
+ cpi->common.fc.nzc_probs_4x4[nzc_context][r][b],
+ vp9_nzc4x4_tree);
+ nzc_costs = cpi->mb.nzc_costs_4x4[nzc_context][r][b];
+ } else if (block_size == 8) {
+ vp9_cost_tokens(cost,
+ cpi->common.fc.nzc_probs_8x8[nzc_context][r][b],
+ vp9_nzc8x8_tree);
+ nzc_costs = cpi->mb.nzc_costs_8x8[nzc_context][r][b];
+ } else if (block_size == 16) {
+ vp9_cost_tokens(cost,
+ cpi->common.fc.nzc_probs_16x16[nzc_context][r][b],
+ vp9_nzc16x16_tree);
+ nzc_costs = cpi->mb.nzc_costs_16x16[nzc_context][r][b];
+ } else {
+ vp9_cost_tokens(cost,
+ cpi->common.fc.nzc_probs_32x32[nzc_context][r][b],
+ vp9_nzc32x32_tree);
+ nzc_costs = cpi->mb.nzc_costs_32x32[nzc_context][r][b];
+ }
+
+ for (nzc = 0; nzc < values; ++nzc) {
+ int e, c, totalcost = 0;
+ c = codenzc(nzc);
+ totalcost = cost[c];
+ if ((e = vp9_extranzcbits[c])) {
+ int x = nzc - vp9_basenzcvalue[c];
+ while (e--) {
+ totalcost += vp9_cost_bit(
+ cpi->common.fc.nzc_pcat_probs[nzc_context]
+ [c - NZC_TOKENS_NOEXTRA][e],
+ ((x >> e) & 1));
+ }
+ }
+ nzc_costs[nzc] = totalcost;
+ }
+ }
+ }
+ }
+}
+#endif
+
static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -274,6 +334,12 @@
cpi->common.fc.coef_probs_16x16, BLOCK_TYPES);
fill_token_costs(cpi->mb.token_costs[TX_32X32],
cpi->common.fc.coef_probs_32x32, BLOCK_TYPES);
+#if CONFIG_CODE_NONZEROCOUNT
+ fill_nzc_costs(cpi, 4);
+ fill_nzc_costs(cpi, 8);
+ fill_nzc_costs(cpi, 16);
+ fill_nzc_costs(cpi, 32);
+#endif
/*rough estimate for costing*/
cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
@@ -379,25 +445,20 @@
return sse2;
}
-static INLINE int cost_coeffs(MACROBLOCK *mb,
+static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
int ib, PLANE_TYPE type,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
TX_SIZE tx_size) {
MACROBLOCKD *const xd = &mb->e_mbd;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
- const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type;
int pt;
const int eob = xd->eobs[ib];
int c = 0;
- int cost = 0, seg_eob;
- const int segment_id = mbmi->segment_id;
+ int cost = 0;
const int *scan;
const int16_t *qcoeff_ptr = xd->qcoeff + ib * 16;
const int ref = mbmi->ref_frame != INTRA_FRAME;
- const TX_TYPE tx_type = (sb_type == BLOCK_SIZE_MB16X16 &&
- type == PLANE_TYPE_Y_WITH_DC) ?
- get_tx_type(xd, &xd->block[ib]) : DCT_DCT;
unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
mb->token_costs[tx_size][type][ref];
ENTROPY_CONTEXT a_ec, l_ec;
@@ -406,29 +467,59 @@
ENTROPY_CONTEXT *const l1 = l +
sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
+#if CONFIG_CODE_NONZEROCOUNT
+ int nzc_context = vp9_get_nzc_context(cm, xd, ib);
+ unsigned int *nzc_cost;
+#else
+ int seg_eob;
+ const int segment_id = xd->mode_info_context->mbmi.segment_id;
+#endif
+
+ // Check for consistency of tx_size with mode info
+ if (type == PLANE_TYPE_Y_WITH_DC) {
+ assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
+ } else {
+ TX_SIZE tx_size_uv = get_uv_tx_size(xd);
+ assert(tx_size == tx_size_uv);
+ }
+
switch (tx_size) {
- case TX_4X4:
+ case TX_4X4: {
+ const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type_4x4(xd, ib) : DCT_DCT;
a_ec = *a;
l_ec = *l;
- scan = vp9_default_zig_zag1d_4x4;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_4x4[nzc_context][ref][type];
+#else
seg_eob = 16;
- if (type == PLANE_TYPE_Y_WITH_DC) {
- if (tx_type == ADST_DCT) {
- scan = vp9_row_scan_4x4;
- } else if (tx_type == DCT_ADST) {
- scan = vp9_col_scan_4x4;
- }
+#endif
+ if (tx_type == ADST_DCT) {
+ scan = vp9_row_scan_4x4;
+ } else if (tx_type == DCT_ADST) {
+ scan = vp9_col_scan_4x4;
+ } else {
+ scan = vp9_default_zig_zag1d_4x4;
}
break;
+ }
case TX_8X8:
a_ec = (a[0] + a[1]) != 0;
l_ec = (l[0] + l[1]) != 0;
scan = vp9_default_zig_zag1d_8x8;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type];
+#else
seg_eob = 64;
+#endif
break;
case TX_16X16:
scan = vp9_default_zig_zag1d_16x16;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type];
+#else
seg_eob = 256;
+#endif
if (type == PLANE_TYPE_UV) {
a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
@@ -439,7 +530,11 @@
break;
case TX_32X32:
scan = vp9_default_zig_zag1d_32x32;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_32x32[nzc_context][ref][type];
+#else
seg_eob = 1024;
+#endif
if (type == PLANE_TYPE_UV) {
ENTROPY_CONTEXT *a2, *a3, *l2, *l3;
a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
@@ -464,21 +559,33 @@
VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
+#if CONFIG_CODE_NONZEROCOUNT == 0
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
seg_eob = 0;
+#endif
{
int recent_energy = 0;
+#if CONFIG_CODE_NONZEROCOUNT
+ int nzc = 0;
+#endif
for (; c < eob; c++) {
int v = qcoeff_ptr[scan[c]];
int t = vp9_dct_value_tokens_ptr[v].Token;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc += (v != 0);
+#endif
cost += token_costs[get_coef_band(tx_size, c)][pt][t];
cost += vp9_dct_value_cost_ptr[v];
pt = vp9_get_coef_context(&recent_energy, t);
}
+#if CONFIG_CODE_NONZEROCOUNT
+ cost += nzc_cost[nzc];
+#else
if (c < seg_eob)
cost += mb->token_costs[tx_size][type][ref][get_coef_band(tx_size, c)]
[pt][DCT_EOB_TOKEN];
+#endif
}
// is eob first coefficient;
@@ -501,27 +608,19 @@
return cost;
}
-static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) {
+static int rdcost_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *mb) {
int cost = 0;
int b;
MACROBLOCKD *xd = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
- ENTROPY_CONTEXT *ta;
- ENTROPY_CONTEXT *tl;
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *)&t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *)&t_left;
- if (backup) {
- vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
-
- ta = (ENTROPY_CONTEXT *)&t_above;
- tl = (ENTROPY_CONTEXT *)&t_left;
- } else {
- ta = (ENTROPY_CONTEXT *)xd->above_context;
- tl = (ENTROPY_CONTEXT *)xd->left_context;
- }
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
for (b = 0; b < 16; b++)
- cost += cost_coeffs(mb, b, PLANE_TYPE_Y_WITH_DC,
+ cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b],
TX_4X4);
@@ -529,42 +628,35 @@
return cost;
}
-static void macro_block_yrd_4x4(MACROBLOCK *mb,
- int *Rate,
- int *Distortion,
- int *skippable, int backup) {
+static void macro_block_yrd_4x4(VP9_COMMON *const cm,
+ MACROBLOCK *mb,
+ int *rate,
+ int *distortion,
+ int *skippable) {
MACROBLOCKD *const xd = &mb->e_mbd;
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
vp9_transform_mby_4x4(mb);
vp9_quantize_mby_4x4(mb);
- *Distortion = vp9_mbblock_error(mb) >> 2;
- *Rate = rdcost_mby_4x4(mb, backup);
+ *distortion = vp9_mbblock_error(mb) >> 2;
+ *rate = rdcost_mby_4x4(cm, mb);
*skippable = vp9_mby_is_skippable_4x4(xd);
}
-static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) {
+static int rdcost_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *mb) {
int cost = 0;
int b;
MACROBLOCKD *xd = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
- ENTROPY_CONTEXT *ta;
- ENTROPY_CONTEXT *tl;
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *)&t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *)&t_left;
- if (backup) {
- vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
-
- ta = (ENTROPY_CONTEXT *)&t_above;
- tl = (ENTROPY_CONTEXT *)&t_left;
- } else {
- ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
- tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
- }
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
for (b = 0; b < 16; b += 4)
- cost += cost_coeffs(mb, b, PLANE_TYPE_Y_WITH_DC,
+ cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b],
TX_8X8);
@@ -572,45 +664,37 @@
return cost;
}
-static void macro_block_yrd_8x8(MACROBLOCK *mb,
- int *Rate,
- int *Distortion,
- int *skippable, int backup) {
+static void macro_block_yrd_8x8(VP9_COMMON *const cm,
+ MACROBLOCK *mb,
+ int *rate,
+ int *distortion,
+ int *skippable) {
MACROBLOCKD *const xd = &mb->e_mbd;
xd->mode_info_context->mbmi.txfm_size = TX_8X8;
vp9_transform_mby_8x8(mb);
vp9_quantize_mby_8x8(mb);
- *Distortion = vp9_mbblock_error(mb) >> 2;
- *Rate = rdcost_mby_8x8(mb, backup);
+ *distortion = vp9_mbblock_error(mb) >> 2;
+ *rate = rdcost_mby_8x8(cm, mb);
*skippable = vp9_mby_is_skippable_8x8(xd);
}
-static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) {
- int cost;
- MACROBLOCKD *xd = &mb->e_mbd;
+static int rdcost_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *mb) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
- ENTROPY_CONTEXT *ta, *tl;
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *)&t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *)&t_left;
- if (backup) {
- vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
- ta = (ENTROPY_CONTEXT *)&t_above;
- tl = (ENTROPY_CONTEXT *)&t_left;
- } else {
- ta = (ENTROPY_CONTEXT *)xd->above_context;
- tl = (ENTROPY_CONTEXT *)xd->left_context;
- }
-
- cost = cost_coeffs(mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
- return cost;
+ return cost_coeffs(cm, mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
}
-static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
- int *skippable, int backup) {
- MACROBLOCKD *xd = &mb->e_mbd;
+static void macro_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *mb,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
xd->mode_info_context->mbmi.txfm_size = TX_16X16;
vp9_transform_mby_16x16(mb);
@@ -620,10 +704,10 @@
// optimization in the rate-distortion optimization loop?
if (mb->optimize &&
xd->mode_info_context->mbmi.mode < I8X8_PRED)
- vp9_optimize_mby_16x16(mb);
+ vp9_optimize_mby_16x16(cm, mb);
- *Distortion = vp9_mbblock_error(mb) >> 2;
- *Rate = rdcost_mby_16x16(mb, backup);
+ *distortion = vp9_mbblock_error(mb) >> 2;
+ *rate = rdcost_mby_16x16(cm, mb);
*skippable = vp9_mby_is_skippable_16x16(xd);
}
@@ -715,15 +799,16 @@
static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int *skippable,
int64_t txfm_cache[NB_TXFM_MODES]) {
+ VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB];
vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,
x->block[0].src_stride);
- macro_block_yrd_16x16(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 1);
- macro_block_yrd_8x8(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 1);
- macro_block_yrd_4x4(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 1);
+ macro_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
+ macro_block_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
+ macro_block_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
txfm_cache, TX_16X16);
@@ -738,27 +823,8 @@
d[12] = p[12];
}
-static int rdcost_sby_32x32(MACROBLOCK *x, int backup) {
- MACROBLOCKD * const xd = &x->e_mbd;
- ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
- ENTROPY_CONTEXT *ta, *tl;
-
- if (backup) {
- ta = (ENTROPY_CONTEXT *) &t_above,
- tl = (ENTROPY_CONTEXT *) &t_left;
-
- vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
- vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
- } else {
- ta = (ENTROPY_CONTEXT *) xd->above_context;
- tl = (ENTROPY_CONTEXT *) xd->left_context;
- }
-
- return cost_coeffs(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
-}
-
static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
- int block_size) {
+ int block_size, int shift) {
int i;
int64_t error = 0;
@@ -766,32 +832,126 @@
unsigned int this_diff = coeff[i] - dqcoeff[i];
error += this_diff * this_diff;
}
+ error >>= shift;
return error > INT_MAX ? INT_MAX : (int)error;
}
-#define DEBUG_ERROR 0
-static void super_block_yrd_32x32(MACROBLOCK *x,
- int *rate, int *distortion, int *skippable,
- int backup) {
+static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
MACROBLOCKD *const xd = &x->e_mbd;
-#if DEBUG_ERROR
- int16_t out[1024];
-#endif
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ for (b = 0; b < 64; b++)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb[TX_4X4][b],
+ tl + vp9_block2left_sb[TX_4X4][b], TX_4X4);
+
+ return cost;
+}
+
+static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+ vp9_transform_sby_4x4(x);
+ vp9_quantize_sby_4x4(x);
+
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2);
+ *rate = rdcost_sby_4x4(cm, x);
+ *skippable = vp9_sby_is_skippable_4x4(xd);
+}
+
+static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
+
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ for (b = 0; b < 64; b += 4)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb[TX_8X8][b],
+ tl + vp9_block2left_sb[TX_8X8][b], TX_8X8);
+
+ return cost;
+}
+
+static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+ vp9_transform_sby_8x8(x);
+ vp9_quantize_sby_8x8(x);
+
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2);
+ *rate = rdcost_sby_8x8(cm, x);
+ *skippable = vp9_sby_is_skippable_8x8(xd);
+}
+
+static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
+
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ for (b = 0; b < 64; b += 16)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb[TX_16X16][b],
+ tl + vp9_block2left_sb[TX_16X16][b], TX_16X16);
+
+ return cost;
+}
+
+static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_16X16;
+ vp9_transform_sby_16x16(x);
+ vp9_quantize_sby_16x16(x);
+
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2);
+ *rate = rdcost_sby_16x16(cm, x);
+ *skippable = vp9_sby_is_skippable_16x16(xd);
+}
+
+static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
+ MACROBLOCKD * const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
+
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ return cost_coeffs(cm, x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
+}
+
+static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_32X32;
vp9_transform_sby_32x32(x);
vp9_quantize_sby_32x32(x);
-#if DEBUG_ERROR
- vp9_short_idct32x32(xd->dqcoeff, out, 64);
-#endif
- *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024);
-
-#if DEBUG_ERROR
- printf("IDCT/FDCT error 32x32: %d (d: %d)\n",
- vp9_block_error_c(x->src_diff, out, 1024), *distortion);
-#endif
- *rate = rdcost_sby_32x32(x, backup);
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 0);
+ *rate = rdcost_sby_32x32(cm, x);
*skippable = vp9_sby_is_skippable_32x32(xd);
}
@@ -799,179 +959,166 @@
MACROBLOCK *x, int *rate, int *distortion,
int *skip,
int64_t txfm_cache[NB_TXFM_MODES]) {
+ VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
- int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
+ int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
- ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_MB][2],
- *orig_above = xd->above_context;
- ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_MB][2],
- *orig_left = xd->left_context;
- for (n = TX_4X4; n < TX_SIZE_MAX_MB; n++) {
- vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
- vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
- r[n][0] = 0;
- d[n] = 0;
- s[n] = 1;
- }
+ vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride);
+ super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
+ super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
+ super_block_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
+ super_block_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
- vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride,
- dst, dst_y_stride);
- super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1);
-
-#if DEBUG_ERROR
- int err[3] = { 0, 0, 0 };
-#endif
- for (n = 0; n < 4; n++) {
- int x_idx = n & 1, y_idx = n >> 1;
- int r_tmp, d_tmp, s_tmp;
-
- vp9_subtract_mby_s_c(x->src_diff,
- src + x_idx * 16 + y_idx * 16 * src_y_stride,
- src_y_stride,
- dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
- dst_y_stride);
-
- xd->above_context = &t_above[TX_16X16][x_idx];
- xd->left_context = &t_left[TX_16X16][y_idx];
- macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
- d[TX_16X16] += d_tmp;
- r[TX_16X16][0] += r_tmp;
- s[TX_16X16] = s[TX_16X16] && s_tmp;
-#if DEBUG_ERROR
- vp9_inverse_transform_mby_16x16(xd);
- err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
-#endif
-
- xd->above_context = &t_above[TX_4X4][x_idx];
- xd->left_context = &t_left[TX_4X4][y_idx];
- macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
- d[TX_4X4] += d_tmp;
- r[TX_4X4][0] += r_tmp;
- s[TX_4X4] = s[TX_4X4] && s_tmp;
-#if DEBUG_ERROR
- vp9_inverse_transform_mby_4x4(xd);
- err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
-#endif
-
- xd->above_context = &t_above[TX_8X8][x_idx];
- xd->left_context = &t_left[TX_8X8][y_idx];
- macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
- d[TX_8X8] += d_tmp;
- r[TX_8X8][0] += r_tmp;
- s[TX_8X8] = s[TX_8X8] && s_tmp;
-#if DEBUG_ERROR
- vp9_inverse_transform_mby_8x8(xd);
- err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
-#endif
- }
-#if DEBUG_ERROR
- printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
- printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
- printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
-#endif
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
TX_SIZE_MAX_SB - 1);
+}
- xd->above_context = orig_above;
- xd->left_context = orig_left;
+static int rdcost_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
+
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ for (b = 0; b < 256; b++)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb64[TX_4X4][b],
+ tl + vp9_block2left_sb64[TX_4X4][b], TX_4X4);
+
+ return cost;
+}
+
+static void super_block64_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+ vp9_transform_sb64y_4x4(x);
+ vp9_quantize_sb64y_4x4(x);
+
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2);
+ *rate = rdcost_sb64y_4x4(cm, x);
+ *skippable = vp9_sb64y_is_skippable_4x4(xd);
+}
+
+static int rdcost_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
+
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ for (b = 0; b < 256; b += 4)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb64[TX_8X8][b],
+ tl + vp9_block2left_sb64[TX_8X8][b], TX_8X8);
+
+ return cost;
+}
+
+static void super_block64_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+ vp9_transform_sb64y_8x8(x);
+ vp9_quantize_sb64y_8x8(x);
+
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2);
+ *rate = rdcost_sb64y_8x8(cm, x);
+ *skippable = vp9_sb64y_is_skippable_8x8(xd);
+}
+
+static int rdcost_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
+
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ for (b = 0; b < 256; b += 16)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb64[TX_16X16][b],
+ tl + vp9_block2left_sb64[TX_16X16][b], TX_16X16);
+
+ return cost;
+}
+
+static void super_block64_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion,
+ int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_16X16;
+ vp9_transform_sb64y_16x16(x);
+ vp9_quantize_sb64y_16x16(x);
+
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2);
+ *rate = rdcost_sb64y_16x16(cm, x);
+ *skippable = vp9_sb64y_is_skippable_16x16(xd);
+}
+
+static int rdcost_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
+ MACROBLOCKD * const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
+
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ for (b = 0; b < 256; b += 64)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb64[TX_32X32][b],
+ tl + vp9_block2left_sb64[TX_32X32][b], TX_32X32);
+
+ return cost;
+}
+
+static void super_block64_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion,
+ int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_32X32;
+ vp9_transform_sb64y_32x32(x);
+ vp9_quantize_sb64y_32x32(x);
+
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 0);
+ *rate = rdcost_sb64y_32x32(cm, x);
+ *skippable = vp9_sb64y_is_skippable_32x32(xd);
}
static void super_block_64_yrd(VP9_COMP *cpi,
MACROBLOCK *x, int *rate, int *distortion,
int *skip,
int64_t txfm_cache[NB_TXFM_MODES]) {
+ VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
- int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
+ int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
- ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_SB][4],
- *orig_above = xd->above_context;
- ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_SB][4],
- *orig_left = xd->left_context;
- for (n = TX_4X4; n < TX_SIZE_MAX_SB; n++) {
- vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
- vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
- r[n][0] = 0;
- d[n] = 0;
- s[n] = 1;
- }
+ vp9_subtract_sb64y_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride);
+ super_block64_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
+ super_block64_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
+ super_block64_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
+ super_block64_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
- for (n = 0; n < 4; n++) {
- int x_idx = n & 1, y_idx = n >> 1;
- int r_tmp, d_tmp, s_tmp;
-
- xd->above_context = &t_above[TX_32X32][x_idx << 1];
- xd->left_context = &t_left[TX_32X32][y_idx << 1];
- vp9_subtract_sby_s_c(x->src_diff,
- src + 32 * x_idx + 32 * y_idx * src_y_stride,
- src_y_stride,
- dst + 32 * x_idx + 32 * y_idx * dst_y_stride,
- dst_y_stride);
- super_block_yrd_32x32(x, &r_tmp, &d_tmp, &s_tmp, 0);
- r[TX_32X32][0] += r_tmp;
- d[TX_32X32] += d_tmp;
- s[TX_32X32] = s[TX_32X32] && s_tmp;
- }
-
-#if DEBUG_ERROR
- int err[3] = { 0, 0, 0 };
-#endif
- for (n = 0; n < 16; n++) {
- int x_idx = n & 3, y_idx = n >> 2;
- int r_tmp, d_tmp, s_tmp;
-
- vp9_subtract_mby_s_c(x->src_diff,
- src + x_idx * 16 + y_idx * 16 * src_y_stride,
- src_y_stride,
- dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
- dst_y_stride);
-
- xd->above_context = &t_above[TX_16X16][x_idx];
- xd->left_context = &t_left[TX_16X16][y_idx];
- macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
- d[TX_16X16] += d_tmp;
- r[TX_16X16][0] += r_tmp;
- s[TX_16X16] = s[TX_16X16] && s_tmp;
-#if DEBUG_ERROR
- vp9_inverse_transform_mby_16x16(xd);
- err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
-#endif
-
- xd->above_context = &t_above[TX_4X4][x_idx];
- xd->left_context = &t_left[TX_4X4][y_idx];
- macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
- d[TX_4X4] += d_tmp;
- r[TX_4X4][0] += r_tmp;
- s[TX_4X4] = s[TX_4X4] && s_tmp;
-#if DEBUG_ERROR
- vp9_inverse_transform_mby_4x4(xd);
- err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
-#endif
-
- xd->above_context = &t_above[TX_8X8][x_idx];
- xd->left_context = &t_left[TX_8X8][y_idx];
- macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
- d[TX_8X8] += d_tmp;
- r[TX_8X8][0] += r_tmp;
- s[TX_8X8] = s[TX_8X8] && s_tmp;
-#if DEBUG_ERROR
- vp9_inverse_transform_mby_8x8(xd);
- err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
-#endif
- }
-#if DEBUG_ERROR
- printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
- printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
- printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
-#endif
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
TX_SIZE_MAX_SB - 1);
-
- xd->above_context = orig_above;
- xd->left_context = orig_left;
}
static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) {
@@ -1006,6 +1153,7 @@
int64_t best_rd = INT64_MAX;
int rate = 0;
int distortion;
+ VP9_COMMON *const cm = &cpi->common;
ENTROPY_CONTEXT ta = *a, tempa = *a;
ENTROPY_CONTEXT tl = *l, templ = *l;
@@ -1022,6 +1170,7 @@
#if CONFIG_NEWBINTRAMODES
b->bmi.as_mode.context = vp9_find_bpred_context(b);
#endif
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
for (mode = B_DC_PRED; mode < LEFT4X4; mode++) {
int64_t this_rd;
int ratey;
@@ -1048,7 +1197,7 @@
vp9_subtract_b(be, b, 16);
b->bmi.as_mode.first = mode;
- tx_type = get_tx_type_4x4(xd, b);
+ tx_type = get_tx_type_4x4(xd, be - x->block);
if (tx_type != DCT_DCT) {
vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
vp9_ht_quantize_b_4x4(x, be - x->block, tx_type);
@@ -1060,7 +1209,7 @@
tempa = ta;
templ = tl;
- ratey = cost_coeffs(x, b - xd->block,
+ ratey = cost_coeffs(cm, x, b - xd->block,
PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4);
rate += ratey;
distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2;
@@ -1311,6 +1460,7 @@
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int *bestrate, int *bestratey,
int *bestdistortion) {
+ VP9_COMMON *const cm = &cpi->common;
MB_PREDICTION_MODE mode;
MACROBLOCKD *xd = &x->e_mbd;
int64_t best_rd = INT64_MAX;
@@ -1346,7 +1496,7 @@
vp9_subtract_4b_c(be, b, 16);
if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
- TX_TYPE tx_type = get_tx_type_8x8(xd, b);
+ TX_TYPE tx_type = get_tx_type_8x8(xd, ib);
if (tx_type != DCT_DCT)
vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
else
@@ -1365,7 +1515,7 @@
ta1 = ta0 + 1;
tl1 = tl0 + 1;
- rate_t = cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC,
+ rate_t = cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
ta0, tl0, TX_8X8);
rate += rate_t;
@@ -1385,11 +1535,12 @@
int do_two = 0;
b = &xd->block[ib + iblock[i]];
be = &x->block[ib + iblock[i]];
- tx_type = get_tx_type_4x4(xd, b);
+ tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
if (tx_type != DCT_DCT) {
vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
- } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
+ } else if (!(i & 1) &&
+ get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
x->fwd_txm8x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1);
do_two = 1;
@@ -1398,12 +1549,12 @@
x->quantize_b_4x4(x, ib + iblock[i]);
}
distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two);
- rate_t += cost_coeffs(x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
+ rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
i&1 ? ta1 : ta0, i&2 ? tl1 : tl0,
TX_4X4);
if (do_two) {
i++;
- rate_t += cost_coeffs(x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
+ rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
i&1 ? ta1 : ta0, i&2 ? tl1 : tl0,
TX_4X4);
}
@@ -1491,7 +1642,80 @@
return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
}
-static int rd_cost_mbuv_4x4(MACROBLOCK *mb, int backup) {
+static int64_t rd_pick_intra8x8mby_modes_and_txsz(VP9_COMP *cpi, MACROBLOCK *x,
+ int *rate, int *rate_y,
+ int *distortion,
+ int *mode8x8,
+ int64_t best_yrd,
+ int64_t *txfm_cache) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ int cost0 = vp9_cost_bit(cm->prob_tx[0], 0);
+ int cost1 = vp9_cost_bit(cm->prob_tx[0], 1);
+ int64_t tmp_rd_4x4s, tmp_rd_8x8s;
+ int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd;
+ int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8;
+
+ mbmi->txfm_size = TX_4X4;
+ tmp_rd_4x4 = rd_pick_intra8x8mby_modes(cpi, x, &r4x4, &tok4x4,
+ &d4x4, best_yrd);
+ mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
+ mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
+ mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
+ mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
+ mbmi->txfm_size = TX_8X8;
+ tmp_rd_8x8 = rd_pick_intra8x8mby_modes(cpi, x, &r8x8, &tok8x8,
+ &d8x8, best_yrd);
+ txfm_cache[ONLY_4X4] = tmp_rd_4x4;
+ txfm_cache[ALLOW_8X8] = tmp_rd_8x8;
+ txfm_cache[ALLOW_16X16] = tmp_rd_8x8;
+ tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0);
+ tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0);
+ txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ?
+ tmp_rd_4x4s : tmp_rd_8x8s;
+ if (cm->txfm_mode == TX_MODE_SELECT) {
+ if (tmp_rd_4x4s < tmp_rd_8x8s) {
+ *rate = r4x4 + cost0;
+ *rate_y = tok4x4 + cost0;
+ *distortion = d4x4;
+ mbmi->txfm_size = TX_4X4;
+ tmp_rd = tmp_rd_4x4s;
+ } else {
+ *rate = r8x8 + cost1;
+ *rate_y = tok8x8 + cost1;
+ *distortion = d8x8;
+ mbmi->txfm_size = TX_8X8;
+ tmp_rd = tmp_rd_8x8s;
+
+ mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
+ mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
+ mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
+ mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
+ }
+ } else if (cm->txfm_mode == ONLY_4X4) {
+ *rate = r4x4;
+ *rate_y = tok4x4;
+ *distortion = d4x4;
+ mbmi->txfm_size = TX_4X4;
+ tmp_rd = tmp_rd_4x4;
+ } else {
+ *rate = r8x8;
+ *rate_y = tok8x8;
+ *distortion = d8x8;
+ mbmi->txfm_size = TX_8X8;
+ tmp_rd = tmp_rd_8x8;
+
+ mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
+ mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
+ mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
+ mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
+ }
+
+ return tmp_rd;
+}
+
+static int rd_cost_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) {
int b;
int cost = 0;
MACROBLOCKD *xd = &mb->e_mbd;
@@ -1510,7 +1734,7 @@
}
for (b = 16; b < 24; b++)
- cost += cost_coeffs(mb, b, PLANE_TYPE_UV,
+ cost += cost_coeffs(cm, mb, b, PLANE_TYPE_UV,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b],
TX_4X4);
@@ -1525,14 +1749,14 @@
vp9_transform_mbuv_4x4(x);
vp9_quantize_mbuv_4x4(x);
- *rate = rd_cost_mbuv_4x4(x, do_ctx_backup);
+ *rate = rd_cost_mbuv_4x4(&cpi->common, x, do_ctx_backup);
*distortion = vp9_mbuverror(x) / 4;
*skip = vp9_mbuv_is_skippable_4x4(&x->e_mbd);
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
-static int rd_cost_mbuv_8x8(MACROBLOCK *mb, int backup) {
+static int rd_cost_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) {
int b;
int cost = 0;
MACROBLOCKD *xd = &mb->e_mbd;
@@ -1551,7 +1775,7 @@
}
for (b = 16; b < 24; b += 4)
- cost += cost_coeffs(mb, b, PLANE_TYPE_UV,
+ cost += cost_coeffs(cm, mb, b, PLANE_TYPE_UV,
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b], TX_8X8);
@@ -1564,14 +1788,14 @@
vp9_transform_mbuv_8x8(x);
vp9_quantize_mbuv_8x8(x);
- *rate = rd_cost_mbuv_8x8(x, do_ctx_backup);
+ *rate = rd_cost_mbuv_8x8(&cpi->common, x, do_ctx_backup);
*distortion = vp9_mbuverror(x) / 4;
*skip = vp9_mbuv_is_skippable_8x8(&x->e_mbd);
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
-static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) {
+static int rd_cost_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, int backup) {
int b;
int cost = 0;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -1590,24 +1814,24 @@
}
for (b = 16; b < 24; b += 4)
- cost += cost_coeffs(x, b * 4, PLANE_TYPE_UV,
+ cost += cost_coeffs(cm, x, b * 4, PLANE_TYPE_UV,
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b], TX_16X16);
return cost;
}
-static void rd_inter32x32_uv_16x16(MACROBLOCK *x, int *rate,
- int *distortion, int *skip,
+static void rd_inter32x32_uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skip,
int backup) {
MACROBLOCKD *const xd = &x->e_mbd;
vp9_transform_sbuv_16x16(x);
vp9_quantize_sbuv_16x16(x);
- *rate = rd_cost_sbuv_16x16(x, backup);
- *distortion = vp9_block_error_c(x->coeff + 1024,
- xd->dqcoeff + 1024, 512) >> 2;
+ *rate = rd_cost_sbuv_16x16(cm, x, backup);
+ *distortion = vp9_sb_block_error_c(x->coeff + 1024,
+ xd->dqcoeff + 1024, 512, 2);
*skip = vp9_sbuv_is_skippable_16x16(xd);
}
@@ -1623,7 +1847,7 @@
vp9_subtract_sbuv_s_c(x->src_diff,
usrc, vsrc, src_uv_stride,
udst, vdst, dst_uv_stride);
- rd_inter32x32_uv_16x16(x, rate, distortion, skip, 1);
+ rd_inter32x32_uv_16x16(&cpi->common, x, rate, distortion, skip, 1);
} else {
int n, r = 0, d = 0;
int skippable = 1;
@@ -1671,23 +1895,14 @@
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
-static void super_block_64_uvrd(MACROBLOCK *x, int *rate,
+static void super_block_64_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, int *rate,
int *distortion, int *skip);
static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int fullpixel, int *skip) {
- super_block_64_uvrd(x, rate, distortion, skip);
+ super_block_64_uvrd(&cpi->common, x, rate, distortion, skip);
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
-static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
- int *distortion, int *skip, int fullpixel,
- int mb_row, int mb_col) {
- vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col);
- vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
- x->e_mbd.predictor, x->src.uv_stride);
- return rd_inter16x16_uv_4x4(cpi, x, rate, distortion, fullpixel, skip, 1);
-}
-
static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi,
MACROBLOCK *x,
int *rate,
@@ -1702,6 +1917,7 @@
int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
int rate_to, UNINITIALIZED_IS_SAFE(skip);
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
int rate;
int distortion;
@@ -1715,7 +1931,7 @@
vp9_transform_mbuv_4x4(x);
vp9_quantize_mbuv_4x4(x);
- rate_to = rd_cost_mbuv_4x4(x, 1);
+ rate_to = rd_cost_mbuv_4x4(&cpi->common, x, 1);
rate = rate_to
+ x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode];
@@ -1754,6 +1970,7 @@
int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
int rate_to, UNINITIALIZED_IS_SAFE(skip);
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
int rate;
int distortion;
@@ -1767,7 +1984,7 @@
vp9_quantize_mbuv_8x8(x);
- rate_to = rd_cost_mbuv_8x8(x, 1);
+ rate_to = rd_cost_mbuv_8x8(&cpi->common, x, 1);
rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode];
distortion = vp9_mbuverror(x) / 4;
@@ -1789,7 +2006,8 @@
}
// TODO(rbultje) very similar to rd_inter32x32_uv(), merge?
-static void super_block_uvrd(MACROBLOCK *x,
+static void super_block_uvrd(VP9_COMMON *const cm,
+ MACROBLOCK *x,
int *rate,
int *distortion,
int *skippable) {
@@ -1803,7 +2021,7 @@
vp9_subtract_sbuv_s_c(x->src_diff,
usrc, vsrc, src_uv_stride,
udst, vdst, dst_uv_stride);
- rd_inter32x32_uv_16x16(x, rate, distortion, skippable, 1);
+ rd_inter32x32_uv_16x16(cm, x, rate, distortion, skippable, 1);
} else {
int d = 0, r = 0, n, s = 1;
ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
@@ -1837,9 +2055,9 @@
xd->above_context = t_above + x_idx;
xd->left_context = t_left + y_idx;
if (mbmi->txfm_size == TX_4X4) {
- r += rd_cost_mbuv_4x4(x, 0);
+ r += rd_cost_mbuv_4x4(cm, x, 0);
} else {
- r += rd_cost_mbuv_8x8(x, 0);
+ r += rd_cost_mbuv_8x8(cm, x, 0);
}
}
@@ -1852,7 +2070,8 @@
}
}
-static int rd_cost_sb64uv_32x32(MACROBLOCK *x, int backup) {
+static int rd_cost_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
+ int backup) {
int b;
int cost = 0;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -1871,28 +2090,28 @@
}
for (b = 16; b < 24; b += 4)
- cost += cost_coeffs(x, b * 16, PLANE_TYPE_UV,
+ cost += cost_coeffs(cm, x, b * 16, PLANE_TYPE_UV,
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b], TX_32X32);
return cost;
}
-static void rd_inter64x64_uv_32x32(MACROBLOCK *x, int *rate,
- int *distortion, int *skip,
+static void rd_inter64x64_uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skip,
int backup) {
MACROBLOCKD *const xd = &x->e_mbd;
vp9_transform_sb64uv_32x32(x);
vp9_quantize_sb64uv_32x32(x);
- *rate = rd_cost_sb64uv_32x32(x, backup);
- *distortion = vp9_block_error_c(x->coeff + 4096,
- xd->dqcoeff + 4096, 2048);
+ *rate = rd_cost_sb64uv_32x32(cm, x, backup);
+ *distortion = vp9_sb_block_error_c(x->coeff + 4096,
+ xd->dqcoeff + 4096, 2048, 0);
*skip = vp9_sb64uv_is_skippable_32x32(xd);
}
-static void super_block_64_uvrd(MACROBLOCK *x,
+static void super_block_64_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
int *rate,
int *distortion,
int *skippable) {
@@ -1913,7 +2132,7 @@
if (mbmi->txfm_size == TX_32X32) {
vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride,
udst, vdst, dst_uv_stride);
- rd_inter64x64_uv_32x32(x, &r, &d, &s, 1);
+ rd_inter64x64_uv_32x32(cm, x, &r, &d, &s, 1);
} else if (mbmi->txfm_size == TX_16X16) {
int n;
@@ -1931,7 +2150,7 @@
dst_uv_stride);
xd->above_context = t_above + x_idx * 2;
xd->left_context = t_left + y_idx * 2;
- rd_inter32x32_uv_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
+ rd_inter32x32_uv_16x16(cm, x, &r_tmp, &d_tmp, &s_tmp, 0);
r += r_tmp;
d += d_tmp;
s = s && s_tmp;
@@ -1961,9 +2180,9 @@
xd->left_context = t_left + y_idx;
d += vp9_mbuverror(x) >> 2;
if (mbmi->txfm_size == TX_4X4) {
- r += rd_cost_mbuv_4x4(x, 0);
+ r += rd_cost_mbuv_4x4(cm, x, 0);
} else {
- r += rd_cost_mbuv_8x8(x, 0);
+ r += rd_cost_mbuv_8x8(cm, x, 0);
}
}
}
@@ -1992,7 +2211,7 @@
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
vp9_build_intra_predictors_sbuv_s(&x->e_mbd);
- super_block_uvrd(x, &this_rate_tokenonly,
+ super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
&this_distortion, &s);
this_rate = this_rate_tokenonly +
x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
@@ -2029,7 +2248,7 @@
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
vp9_build_intra_predictors_sb64uv_s(&x->e_mbd);
- super_block_64_uvrd(x, &this_rate_tokenonly,
+ super_block_64_uvrd(&cpi->common, x, &this_rate_tokenonly,
&this_distortion, &s);
this_rate = this_rate_tokenonly +
x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
@@ -2186,7 +2405,8 @@
return cost;
}
-static int64_t encode_inter_mb_segment(MACROBLOCK *x,
+static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
+ MACROBLOCK *x,
int const *labels,
int which_label,
int *labelyrate,
@@ -2225,7 +2445,7 @@
x->quantize_b_4x4(x, i);
thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);
*distortion += thisdistortion;
- *labelyrate += cost_coeffs(x, i, PLANE_TYPE_Y_WITH_DC,
+ *labelyrate += cost_coeffs(cm, x, i, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_4X4][i],
tl + vp9_block2left[TX_4X4][i], TX_4X4);
}
@@ -2234,7 +2454,8 @@
return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
}
-static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
+static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
+ MACROBLOCK *x,
int const *labels,
int which_label,
int *labelyrate,
@@ -2288,10 +2509,12 @@
x->quantize_b_8x8(x, idx);
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
otherdist += thisdistortion;
- othercost += cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC,
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+ othercost += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
tacp + vp9_block2above[TX_8X8][idx],
tlcp + vp9_block2left[TX_8X8][idx],
TX_8X8);
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
}
for (j = 0; j < 4; j += 2) {
bd = &xd->block[ib + iblock[j]];
@@ -2300,15 +2523,17 @@
x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
*distortion += thisdistortion;
- *labelyrate += cost_coeffs(x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
- ta + vp9_block2above[TX_4X4][ib + iblock[j]],
- tl + vp9_block2left[TX_4X4][ib + iblock[j]],
- TX_4X4);
- *labelyrate += cost_coeffs(x, ib + iblock[j] + 1,
- PLANE_TYPE_Y_WITH_DC,
- ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
- tl + vp9_block2left[TX_4X4][ib + iblock[j]],
- TX_4X4);
+ *labelyrate +=
+ cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above[TX_4X4][ib + iblock[j]],
+ tl + vp9_block2left[TX_4X4][ib + iblock[j]],
+ TX_4X4);
+ *labelyrate +=
+ cost_coeffs(cm, x, ib + iblock[j] + 1,
+ PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
+ tl + vp9_block2left[TX_4X4][ib + iblock[j]],
+ TX_4X4);
}
} else /* 8x8 */ {
if (otherrd) {
@@ -2319,22 +2544,26 @@
x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j]);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
otherdist += thisdistortion;
- othercost += cost_coeffs(x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
- tacp + vp9_block2above[TX_4X4][ib + iblock[j]],
- tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
- TX_4X4);
- othercost += cost_coeffs(x, ib + iblock[j] + 1,
- PLANE_TYPE_Y_WITH_DC,
- tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
- tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
- TX_4X4);
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+ othercost +=
+ cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
+ tacp + vp9_block2above[TX_4X4][ib + iblock[j]],
+ tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
+ TX_4X4);
+ othercost +=
+ cost_coeffs(cm, x, ib + iblock[j] + 1,
+ PLANE_TYPE_Y_WITH_DC,
+ tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
+ tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
+ TX_4X4);
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
}
}
x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
x->quantize_b_8x8(x, idx);
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
*distortion += thisdistortion;
- *labelyrate += cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC,
+ *labelyrate += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_8X8][idx],
tl + vp9_block2left[TX_8X8][idx], TX_8X8);
}
@@ -2574,11 +2803,13 @@
continue;
if (segmentation == PARTITIONING_4X4) {
- this_rd = encode_inter_mb_segment(x, labels, i, &labelyrate,
+ this_rd = encode_inter_mb_segment(&cpi->common,
+ x, labels, i, &labelyrate,
&distortion, ta_s, tl_s);
other_rd = this_rd;
} else {
- this_rd = encode_inter_mb_segment_8x8(x, labels, i, &labelyrate,
+ this_rd = encode_inter_mb_segment_8x8(&cpi->common,
+ x, labels, i, &labelyrate,
&distortion, &other_rd,
ta_s, tl_s);
}
@@ -3146,7 +3377,9 @@
// UV cost and distortion
vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
x->e_mbd.predictor, x->src.uv_stride);
- if (x->e_mbd.mode_info_context->mbmi.txfm_size != TX_4X4)
+ if (x->e_mbd.mode_info_context->mbmi.txfm_size != TX_4X4 &&
+ x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED &&
+ x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
rd_inter16x16_uv_8x8(cpi, x, rate_uv, distortion_uv,
cpi->common.full_pixel, &uv_skippable, 1);
else
@@ -3344,6 +3577,8 @@
vp9_clamp_mv_min_max(x, &ref_mv[0]);
+ sr = vp9_init_search_range(cpi->common.Width, cpi->common.Height);
+
// mvp_full.as_int = ref_mv[0].as_int;
mvp_full.as_int =
mbmi->ref_mvs[refs[0]][x->mv_best_ref_index[refs[0]]].as_int;
@@ -3933,7 +4168,10 @@
#if CONFIG_COMP_INTERINTRA_PRED
int is_best_interintra = 0;
int64_t best_intra16_rd = INT64_MAX;
- int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED;
+ int best_intra16_mode = DC_PRED;
+#if SEPARATE_INTERINTRA_UV
+ int best_intra16_uv_mode = DC_PRED;
+#endif
#endif
int64_t best_overall_rd = INT64_MAX;
INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
@@ -4015,6 +4253,8 @@
cpi->zbin_mode_boost = 0;
vp9_update_zbin_extra(cpi, x);
+ xd->mode_info_context->mbmi.mode = DC_PRED;
+
rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate,
&uv_intra_rate_tokenonly, &uv_intra_distortion,
&uv_intra_skippable);
@@ -4231,65 +4471,11 @@
}
break;
case I8X8_PRED: {
- int cost0 = vp9_cost_bit(cm->prob_tx[0], 0);
- int cost1 = vp9_cost_bit(cm->prob_tx[0], 1);
- int64_t tmp_rd_4x4s, tmp_rd_8x8s;
- int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd;
- int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8;
- mbmi->txfm_size = TX_4X4;
- tmp_rd_4x4 = rd_pick_intra8x8mby_modes(cpi, x, &r4x4, &tok4x4,
- &d4x4, best_yrd);
- mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
- mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
- mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
- mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
- mbmi->txfm_size = TX_8X8;
- tmp_rd_8x8 = rd_pick_intra8x8mby_modes(cpi, x, &r8x8, &tok8x8,
- &d8x8, best_yrd);
- txfm_cache[ONLY_4X4] = tmp_rd_4x4;
- txfm_cache[ALLOW_8X8] = tmp_rd_8x8;
- txfm_cache[ALLOW_16X16] = tmp_rd_8x8;
- tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0);
- tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0);
- txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ? tmp_rd_4x4s : tmp_rd_8x8s;
- if (cm->txfm_mode == TX_MODE_SELECT) {
- if (tmp_rd_4x4s < tmp_rd_8x8s) {
- rate = r4x4 + cost0;
- rate_y = tok4x4 + cost0;
- distortion = d4x4;
- mbmi->txfm_size = TX_4X4;
- tmp_rd = tmp_rd_4x4s;
- } else {
- rate = r8x8 + cost1;
- rate_y = tok8x8 + cost1;
- distortion = d8x8;
- mbmi->txfm_size = TX_8X8;
- tmp_rd = tmp_rd_8x8s;
+ int64_t tmp_rd;
- mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
- mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
- mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
- mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
- }
- } else if (cm->txfm_mode == ONLY_4X4) {
- rate = r4x4;
- rate_y = tok4x4;
- distortion = d4x4;
- mbmi->txfm_size = TX_4X4;
- tmp_rd = tmp_rd_4x4;
- } else {
- rate = r8x8;
- rate_y = tok8x8;
- distortion = d8x8;
- mbmi->txfm_size = TX_8X8;
- tmp_rd = tmp_rd_8x8;
-
- mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
- mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
- mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
- mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
- }
-
+ tmp_rd = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate, &rate_y,
+ &distortion, mode8x8,
+ best_yrd, txfm_cache);
rate2 += rate;
rate2 += intra_cost_penalty;
distortion2 += distortion;
@@ -4330,6 +4516,7 @@
this_rd_thresh =
(mbmi->ref_frame == GOLDEN_FRAME) ?
cpi->rd_threshes[THR_NEWG] : this_rd_thresh;
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
for (switchable_filter_index = 0;
switchable_filter_index < VP9_SWITCHABLE_FILTERS;
@@ -4421,8 +4608,11 @@
if (tmp_rd < best_yrd) {
int uv_skippable;
- rd_inter4x4_uv(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
- cpi->common.full_pixel, mb_row, mb_col);
+ vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col);
+ vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
+ x->e_mbd.predictor, x->src.uv_stride);
+ rd_inter16x16_uv_4x4(cpi, x, &rate_uv, &distortion_uv,
+ cpi->common.full_pixel, &uv_skippable, 1);
rate2 += rate_uv;
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
@@ -4543,8 +4733,10 @@
(this_rd < best_intra16_rd)) {
best_intra16_rd = this_rd;
best_intra16_mode = this_mode;
+#if SEPARATE_INTERINTRA_UV
best_intra16_uv_mode = (mbmi->txfm_size != TX_4X4 ?
uv_intra_mode_8x8 : uv_intra_mode);
+#endif
}
#endif
@@ -4792,6 +4984,7 @@
int64_t txfm_cache[NB_TXFM_MODES], err;
int i;
+ xd->mode_info_context->mbmi.mode = DC_PRED;
err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
&dist_y, &y_skip, txfm_cache);
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
@@ -4826,6 +5019,7 @@
int64_t txfm_cache[NB_TXFM_MODES], err;
int i;
+ xd->mode_info_context->mbmi.mode = DC_PRED;
err = rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
&dist_y, &y_skip, txfm_cache);
rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
@@ -4866,77 +5060,96 @@
int mode16x16;
int mode8x8[4];
int dist;
- int modeuv, uv_intra_skippable, uv_intra_skippable_8x8;
+ int modeuv, modeuv8x8, uv_intra_skippable, uv_intra_skippable_8x8;
int y_intra16x16_skippable = 0;
- int64_t txfm_cache[NB_TXFM_MODES];
- TX_SIZE txfm_size_16x16;
+ int64_t txfm_cache[2][NB_TXFM_MODES];
+ TX_SIZE txfm_size_16x16, txfm_size_8x8;
int i;
mbmi->ref_frame = INTRA_FRAME;
+ mbmi->mode = DC_PRED;
rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv,
&uv_intra_skippable);
modeuv = mbmi->uv_mode;
if (cpi->common.txfm_mode != ONLY_4X4) {
rd_pick_intra_mbuv_mode_8x8(cpi, x, &rateuv8x8, &rateuv8x8_tokenonly,
&distuv8x8, &uv_intra_skippable_8x8);
+ modeuv8x8 = mbmi->uv_mode;
} else {
uv_intra_skippable_8x8 = uv_intra_skippable;
rateuv8x8 = rateuv;
distuv8x8 = distuv;
rateuv8x8_tokenonly = rateuv_tokenonly;
+ modeuv8x8 = modeuv;
}
// current macroblock under rate-distortion optimization test loop
error16x16 = rd_pick_intra16x16mby_mode(cpi, x, &rate16x16,
&rate16x16_tokenonly, &dist16x16,
- &y_intra16x16_skippable, txfm_cache);
+ &y_intra16x16_skippable,
+ txfm_cache[1]);
mode16x16 = mbmi->mode;
txfm_size_16x16 = mbmi->txfm_size;
+ if (cpi->common.mb_no_coeff_skip && y_intra16x16_skippable &&
+ ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable) ||
+ (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable_8x8))) {
+ error16x16 -= RDCOST(x->rdmult, x->rddiv, rate16x16_tokenonly, 0);
+ rate16x16 -= rate16x16_tokenonly;
+ }
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ txfm_cache[0][i] = error16x16 - txfm_cache[1][cm->txfm_mode] +
+ txfm_cache[1][i];
+ }
- // FIXME(rbultje) support transform-size selection
- mbmi->txfm_size = (cm->txfm_mode == ONLY_4X4) ? TX_4X4 : TX_8X8;
- error8x8 = rd_pick_intra8x8mby_modes(cpi, x, &rate8x8, &rate8x8_tokenonly,
- &dist8x8, error16x16);
- mode8x8[0]= xd->mode_info_context->bmi[0].as_mode.first;
- mode8x8[1]= xd->mode_info_context->bmi[2].as_mode.first;
- mode8x8[2]= xd->mode_info_context->bmi[8].as_mode.first;
- mode8x8[3]= xd->mode_info_context->bmi[10].as_mode.first;
+ error8x8 = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate8x8,
+ &rate8x8_tokenonly,
+ &dist8x8, mode8x8,
+ error16x16, txfm_cache[1]);
+ txfm_size_8x8 = mbmi->txfm_size;
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ int64_t tmp_rd = error8x8 - txfm_cache[1][cm->txfm_mode] + txfm_cache[1][i];
+ if (tmp_rd < txfm_cache[0][i])
+ txfm_cache[0][i] = tmp_rd;
+ }
mbmi->txfm_size = TX_4X4;
error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
&rate4x4, &rate4x4_tokenonly,
&dist4x4, error16x16);
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ if (error4x4 < txfm_cache[0][i])
+ txfm_cache[0][i] = error4x4;
+ }
mbmi->mb_skip_coeff = 0;
- if (cpi->common.mb_no_coeff_skip &&
- y_intra16x16_skippable && uv_intra_skippable_8x8) {
+ if (cpi->common.mb_no_coeff_skip && y_intra16x16_skippable &&
+ ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable) ||
+ (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable_8x8))) {
mbmi->mb_skip_coeff = 1;
mbmi->mode = mode16x16;
- mbmi->uv_mode = modeuv;
- rate = rateuv8x8 + rate16x16 - rateuv8x8_tokenonly - rate16x16_tokenonly +
- vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
- dist = dist16x16 + (distuv8x8 >> 2);
+ mbmi->uv_mode = (cm->txfm_mode == ONLY_4X4) ? modeuv : modeuv8x8;
+ rate = rate16x16 + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
+ dist = dist16x16;
+ if (cm->txfm_mode == ONLY_4X4) {
+ rate += rateuv - rateuv_tokenonly;
+ dist += (distuv >> 2);
+ } else {
+ rate += rateuv8x8 - rateuv8x8_tokenonly;
+ dist += (distuv8x8 >> 2);
+ }
mbmi->txfm_size = txfm_size_16x16;
- memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0,
- sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff));
} else if (error8x8 > error16x16) {
if (error4x4 < error16x16) {
rate = rateuv + rate4x4;
mbmi->mode = B_PRED;
mbmi->txfm_size = TX_4X4;
dist = dist4x4 + (distuv >> 2);
- memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0,
- sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff));
} else {
mbmi->txfm_size = txfm_size_16x16;
mbmi->mode = mode16x16;
rate = rate16x16 + rateuv8x8;
dist = dist16x16 + (distuv8x8 >> 2);
- for (i = 0; i < NB_TXFM_MODES; i++) {
- x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff[i] =
- error16x16 - txfm_cache[i];
- }
}
if (cpi->common.mb_no_coeff_skip)
rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
@@ -4946,22 +5159,22 @@
mbmi->mode = B_PRED;
mbmi->txfm_size = TX_4X4;
dist = dist4x4 + (distuv >> 2);
- memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0,
- sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff));
} else {
- // FIXME(rbultje) support transform-size selection
mbmi->mode = I8X8_PRED;
- mbmi->txfm_size = (cm->txfm_mode == ONLY_4X4) ? TX_4X4 : TX_8X8;
+ mbmi->txfm_size = txfm_size_8x8;
set_i8x8_block_modes(x, mode8x8);
rate = rate8x8 + rateuv;
dist = dist8x8 + (distuv >> 2);
- memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0,
- sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff));
}
if (cpi->common.mb_no_coeff_skip)
rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
}
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff[i] =
+ txfm_cache[0][cm->txfm_mode] - txfm_cache[0][i];
+ }
+
*returnrate = rate;
*returndist = dist;
}
@@ -5002,7 +5215,10 @@
#if CONFIG_COMP_INTERINTRA_PRED
int is_best_interintra = 0;
int64_t best_intra16_rd = INT64_MAX;
- int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED;
+ int best_intra16_mode = DC_PRED;
+#if SEPARATE_INTERINTRA_UV
+ int best_intra16_uv_mode = DC_PRED;
+#endif
#endif
int64_t best_overall_rd = INT64_MAX;
INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
@@ -5334,8 +5550,10 @@
(this_rd < best_intra16_rd)) {
best_intra16_rd = this_rd;
best_intra16_mode = this_mode;
+#if SEPARATE_INTERINTRA_UV
best_intra16_uv_mode = (mbmi->txfm_size != TX_4X4 ?
mode_uv_8x8 : mode_uv_4x4);
+#endif
}
#endif
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index cfaf5f5..a04a20c 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -219,10 +219,8 @@
const int segment_id = mi->mbmi.segment_id;
xd->mode_info_context = mi;
- xd->mb_to_top_edge = -((mb_row * 16) << 3);
- xd->mb_to_left_edge = -((mb_col * 16) << 3);
- xd->mb_to_bottom_edge = ((cm->mb_rows - mb_size - mb_row) * 16) << 3;
- xd->mb_to_right_edge = ((cm->mb_cols - mb_size - mb_col) * 16) << 3;
+ set_mb_row(cm, xd, mb_row, mb_size);
+ set_mb_col(cm, xd, mb_col, mb_size);
// Count the number of hits on each segment with no prediction
no_pred_segcounts[segment_id]++;
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index d115fe8..df05bec 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -36,6 +36,21 @@
extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES];
#endif /* ENTROPY_STATS */
+#if CONFIG_CODE_NONZEROCOUNT
+#ifdef NZC_STATS
+unsigned int nzc_counts_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC4X4_TOKENS];
+unsigned int nzc_counts_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC8X8_TOKENS];
+unsigned int nzc_counts_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC16X16_TOKENS];
+unsigned int nzc_counts_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC32X32_TOKENS];
+unsigned int nzc_pcat_counts[MAX_NZC_CONTEXTS][NZC_TOKENS_EXTRA]
+ [NZC_BITS_EXTRA][2];
+#endif
+#endif
+
static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2];
const TOKENVALUE *vp9_dct_value_tokens_ptr;
static int dct_value_cost[DCT_MAX_VALUE * 2];
@@ -114,11 +129,13 @@
const int *scan;
vp9_coeff_count *counts;
vp9_coeff_probs *probs;
- const TX_TYPE tx_type = (sb_type == BLOCK_SIZE_MB16X16 &&
- type == PLANE_TYPE_Y_WITH_DC) ?
- get_tx_type(xd, &xd->block[ib]) : DCT_DCT;
const int ref = mbmi->ref_frame != INTRA_FRAME;
ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec;
+#if CONFIG_CODE_NONZEROCOUNT
+ int zerosleft, nzc = 0;
+ if (eob == 0)
+ assert(xd->nzcs[ib] == 0);
+#endif
if (sb_type == BLOCK_SIZE_SB64X64) {
a = (ENTROPY_CONTEXT *)xd->above_context +
@@ -144,7 +161,9 @@
switch (tx_size) {
default:
- case TX_4X4:
+ case TX_4X4: {
+ const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type_4x4(xd, ib) : DCT_DCT;
a_ec = *a;
l_ec = *l;
seg_eob = 16;
@@ -159,6 +178,7 @@
counts = cpi->coef_counts_4x4;
probs = cpi->common.fc.coef_probs_4x4;
break;
+ }
case TX_8X8:
a_ec = (a[0] + a[1]) != 0;
l_ec = (l[0] + l[1]) != 0;
@@ -207,29 +227,47 @@
do {
const int band = get_coef_band(tx_size, c);
int token;
-
+ int v = 0;
+#if CONFIG_CODE_NONZEROCOUNT
+ zerosleft = seg_eob - xd->nzcs[ib] - c + nzc;
+#endif
if (c < eob) {
const int rc = scan[c];
- const int v = qcoeff_ptr[rc];
+ v = qcoeff_ptr[rc];
assert(-DCT_MAX_VALUE <= v && v < DCT_MAX_VALUE);
t->Extra = vp9_dct_value_tokens_ptr[v].Extra;
token = vp9_dct_value_tokens_ptr[v].Token;
} else {
+#if CONFIG_CODE_NONZEROCOUNT
+ break;
+#else
token = DCT_EOB_TOKEN;
+#endif
}
t->Token = token;
t->context_tree = probs[type][ref][band][pt];
+#if CONFIG_CODE_NONZEROCOUNT
+ // Skip zero node if there are no zeros left
+ t->skip_eob_node = 1 + (zerosleft == 0);
+#else
t->skip_eob_node = (pt == 0) && (band > 0);
+#endif
assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
if (!dry_run) {
++counts[type][ref][band][pt][token];
}
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc += (v != 0);
+#endif
pt = vp9_get_coef_context(&recent_energy, token);
++t;
} while (c < eob && ++c < seg_eob);
+#if CONFIG_CODE_NONZEROCOUNT
+ assert(nzc == xd->nzcs[ib]);
+#endif
*tp = t;
a_ec = l_ec = (c > 0); /* 0 <-> all coeff data is zero */
@@ -331,7 +369,7 @@
vp9_sbuv_is_skippable_16x16(xd);
}
-static int sby_is_skippable_16x16(MACROBLOCKD *xd) {
+int vp9_sby_is_skippable_16x16(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
@@ -342,10 +380,10 @@
}
static int sb_is_skippable_16x16(MACROBLOCKD *xd) {
- return sby_is_skippable_16x16(xd) & vp9_sbuv_is_skippable_16x16(xd);
+ return vp9_sby_is_skippable_16x16(xd) & vp9_sbuv_is_skippable_16x16(xd);
}
-static int sby_is_skippable_8x8(MACROBLOCKD *xd) {
+int vp9_sby_is_skippable_8x8(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
@@ -355,7 +393,7 @@
return skip;
}
-static int sbuv_is_skippable_8x8(MACROBLOCKD *xd) {
+int vp9_sbuv_is_skippable_8x8(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
@@ -366,10 +404,10 @@
}
static int sb_is_skippable_8x8(MACROBLOCKD *xd) {
- return sby_is_skippable_8x8(xd) & sbuv_is_skippable_8x8(xd);
+ return vp9_sby_is_skippable_8x8(xd) & vp9_sbuv_is_skippable_8x8(xd);
}
-static int sby_is_skippable_4x4(MACROBLOCKD *xd) {
+int vp9_sby_is_skippable_4x4(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
@@ -379,7 +417,7 @@
return skip;
}
-static int sbuv_is_skippable_4x4(MACROBLOCKD *xd) {
+int vp9_sbuv_is_skippable_4x4(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
@@ -390,7 +428,7 @@
}
static int sb_is_skippable_4x4(MACROBLOCKD *xd) {
- return sby_is_skippable_4x4(xd) & sbuv_is_skippable_4x4(xd);
+ return vp9_sby_is_skippable_4x4(xd) & vp9_sbuv_is_skippable_4x4(xd);
}
void vp9_tokenize_sb(VP9_COMP *cpi,
@@ -476,7 +514,7 @@
*t = t_backup;
}
-static int sb64y_is_skippable_32x32(MACROBLOCKD *xd) {
+int vp9_sb64y_is_skippable_32x32(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
@@ -491,10 +529,10 @@
}
static int sb64_is_skippable_32x32(MACROBLOCKD *xd) {
- return sb64y_is_skippable_32x32(xd) & vp9_sb64uv_is_skippable_32x32(xd);
+ return vp9_sb64y_is_skippable_32x32(xd) & vp9_sb64uv_is_skippable_32x32(xd);
}
-static int sb64y_is_skippable_16x16(MACROBLOCKD *xd) {
+int vp9_sb64y_is_skippable_16x16(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
@@ -504,7 +542,7 @@
return skip;
}
-static int sb64uv_is_skippable_16x16(MACROBLOCKD *xd) {
+int vp9_sb64uv_is_skippable_16x16(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
@@ -515,10 +553,10 @@
}
static int sb64_is_skippable_16x16(MACROBLOCKD *xd) {
- return sb64y_is_skippable_16x16(xd) & sb64uv_is_skippable_16x16(xd);
+ return vp9_sb64y_is_skippable_16x16(xd) & vp9_sb64uv_is_skippable_16x16(xd);
}
-static int sb64y_is_skippable_8x8(MACROBLOCKD *xd) {
+int vp9_sb64y_is_skippable_8x8(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
@@ -528,7 +566,7 @@
return skip;
}
-static int sb64uv_is_skippable_8x8(MACROBLOCKD *xd) {
+int vp9_sb64uv_is_skippable_8x8(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
@@ -539,10 +577,10 @@
}
static int sb64_is_skippable_8x8(MACROBLOCKD *xd) {
- return sb64y_is_skippable_8x8(xd) & sb64uv_is_skippable_8x8(xd);
+ return vp9_sb64y_is_skippable_8x8(xd) & vp9_sb64uv_is_skippable_8x8(xd);
}
-static int sb64y_is_skippable_4x4(MACROBLOCKD *xd) {
+int vp9_sb64y_is_skippable_4x4(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
@@ -552,7 +590,7 @@
return skip;
}
-static int sb64uv_is_skippable_4x4(MACROBLOCKD *xd) {
+int vp9_sb64uv_is_skippable_4x4(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
@@ -563,7 +601,7 @@
}
static int sb64_is_skippable_4x4(MACROBLOCKD *xd) {
- return sb64y_is_skippable_4x4(xd) & sb64uv_is_skippable_4x4(xd);
+ return vp9_sb64y_is_skippable_4x4(xd) & vp9_sb64uv_is_skippable_4x4(xd);
}
void vp9_tokenize_sb64(VP9_COMP *cpi,
@@ -903,13 +941,15 @@
PLANE_TYPE type,
TX_SIZE tx_size,
int dry_run) {
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type;
+#if CONFIG_CODE_NONZEROCOUNT == 0
vp9_coeff_count *counts;
vp9_coeff_probs *probs;
int pt, band;
TOKENEXTRA *t = *tp;
- MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
const int ref = mbmi->ref_frame != INTRA_FRAME;
- const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type;
+#endif
ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec;
if (sb_type == BLOCK_SIZE_SB32X32) {
@@ -939,14 +979,18 @@
case TX_4X4:
a_ec = a[0];
l_ec = l[0];
+#if CONFIG_CODE_NONZEROCOUNT == 0
counts = cpi->coef_counts_4x4;
probs = cpi->common.fc.coef_probs_4x4;
+#endif
break;
case TX_8X8:
a_ec = (a[0] + a[1]) != 0;
l_ec = (l[0] + l[1]) != 0;
+#if CONFIG_CODE_NONZEROCOUNT == 0
counts = cpi->coef_counts_8x8;
probs = cpi->common.fc.coef_probs_8x8;
+#endif
break;
case TX_16X16:
if (type != PLANE_TYPE_UV) {
@@ -956,8 +1000,10 @@
a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
}
+#if CONFIG_CODE_NONZEROCOUNT == 0
counts = cpi->coef_counts_16x16;
probs = cpi->common.fc.coef_probs_16x16;
+#endif
break;
case TX_32X32:
if (type != PLANE_TYPE_UV) {
@@ -971,19 +1017,25 @@
l_ec = (l[0] + l[1] + l1[0] + l1[1] +
l2[0] + l2[1] + l3[0] + l3[1]) != 0;
}
+#if CONFIG_CODE_NONZEROCOUNT == 0
counts = cpi->coef_counts_32x32;
probs = cpi->common.fc.coef_probs_32x32;
+#endif
break;
}
+#if CONFIG_CODE_NONZEROCOUNT == 0
VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
-
band = get_coef_band(tx_size, 0);
t->Token = DCT_EOB_TOKEN;
t->context_tree = probs[type][ref][band][pt];
t->skip_eob_node = 0;
++t;
*tp = t;
+ if (!dry_run) {
+ ++counts[type][ref][band][pt][DCT_EOB_TOKEN];
+ }
+#endif
*a = *l = 0;
if (tx_size == TX_8X8) {
a[1] = 0;
@@ -1009,10 +1061,6 @@
l2[0] = l2[1] = l3[0] = l3[1] = l_ec;
}
}
-
- if (!dry_run) {
- ++counts[type][ref][band][pt][DCT_EOB_TOKEN];
- }
}
static void stuff_mb_8x8(VP9_COMP *cpi, MACROBLOCKD *xd,
diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h
index 4d6fe63..464d7ca 100644
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -37,8 +37,20 @@
int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd);
int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd);
int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd);
+int vp9_sby_is_skippable_16x16(MACROBLOCKD *xd);
+int vp9_sby_is_skippable_8x8(MACROBLOCKD *xd);
+int vp9_sby_is_skippable_4x4(MACROBLOCKD *xd);
int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd);
+int vp9_sbuv_is_skippable_8x8(MACROBLOCKD *xd);
+int vp9_sbuv_is_skippable_4x4(MACROBLOCKD *xd);
+int vp9_sb64y_is_skippable_32x32(MACROBLOCKD *xd);
+int vp9_sb64y_is_skippable_16x16(MACROBLOCKD *xd);
+int vp9_sb64y_is_skippable_8x8(MACROBLOCKD *xd);
+int vp9_sb64y_is_skippable_4x4(MACROBLOCKD *xd);
int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd);
+int vp9_sb64uv_is_skippable_16x16(MACROBLOCKD *xd);
+int vp9_sb64uv_is_skippable_8x8(MACROBLOCKD *xd);
+int vp9_sb64uv_is_skippable_4x4(MACROBLOCKD *xd);
struct VP9_COMP;
diff --git a/vp9/encoder/vp9_treewriter.c b/vp9/encoder/vp9_treewriter.c
index 8e25281..951ffa7 100644
--- a/vp9/encoder/vp9_treewriter.c
+++ b/vp9/encoder/vp9_treewriter.c
@@ -35,5 +35,6 @@
}
void vp9_cost_tokens_skip(int *c, const vp9_prob *p, vp9_tree t) {
+ c[0] = 0;
cost(c, t, p, 2, 0);
}
diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk
index 7622fc0..239ae30 100644
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@@ -38,5 +38,11 @@
VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/vp9_idct_blk_sse2.c
+VP9_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/vp9_dequantize_x86.c
+ifeq ($(HAVE_SSE2),yes)
+vp9/decoder/x86/vp9_dequantize_x86.c.o: CFLAGS += -msse2
+vp9/decoder/x86/vp9_dequantize_x86.c.d: CFLAGS += -msse2
+endif
+
$(eval $(call asm_offsets_template,\
vp9_asm_dec_offsets.asm, $(VP9_PREFIX)decoder/vp9_asm_dec_offsets.c))
diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h
index b130da8..62b86bb 100644
--- a/vpx_ports/mem.h
+++ b/vpx_ports/mem.h
@@ -11,6 +11,7 @@
#ifndef VPX_PORTS_MEM_H
#define VPX_PORTS_MEM_H
+
#include "vpx_config.h"
#include "vpx/vpx_integer.h"