Rectangular transforms 4x8 & 8x4
Added a new expt rect-tx to be used in conjunction with ext-tx.
[rect-tx is a temporary config flag and will eventually be
merged into ext-tx once it works correctly with all other
experiments].
Added 4x8 and 8x4 tranforms for use initially with rectangular
sub8x8 y blocks as part of this experiment.
There is about a -0.2% BDRATE improvement on lowres, others pending.
When var-tx is on rectangular transforms are currently not used.
That will be enabled in a subsequent patch.
Change-Id: Iaf3f88ede2740ffe6a0ffb1ef5fc01a16cd0283a
diff --git a/configure b/configure
index cf6a7c3..473d35e 100755
--- a/configure
+++ b/configure
@@ -267,6 +267,7 @@
fp_mb_stats
emulate_hardware
var_tx
+ rect_tx
ref_mv
dual_filter
ext_tx
diff --git a/vp10/common/blockd.c b/vp10/common/blockd.c
index 5ca5c05..6062917 100644
--- a/vp10/common/blockd.c
+++ b/vp10/common/blockd.c
@@ -53,7 +53,9 @@
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
- const int step = 1 << (tx_size << 1);
+ const uint8_t num_4x4_tw = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const uint8_t num_4x4_th = num_4x4_blocks_high_txsize_lookup[tx_size];
+ const int step = num_4x4_tw * num_4x4_th;
int i = 0, r, c;
// If mb_to_right_edge is < 0 we are in a situation in which
@@ -63,13 +65,15 @@
xd->mb_to_right_edge >> (5 + pd->subsampling_x));
const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 :
xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
- const int extra_step = ((num_4x4_w - max_blocks_wide) >> tx_size) * step;
+ const int extra_step =
+ ((num_4x4_w - max_blocks_wide) >>
+ num_4x4_blocks_wide_txsize_log2_lookup[tx_size]) * step;
// Keep track of the row and column of the blocks we use so that we know
// if we are in the unrestricted motion border.
- for (r = 0; r < max_blocks_high; r += (1 << tx_size)) {
+ for (r = 0; r < max_blocks_high; r += num_4x4_th) {
// Skip visiting the sub blocks that are wholly within the UMV.
- for (c = 0; c < max_blocks_wide; c += (1 << tx_size)) {
+ for (c = 0; c < max_blocks_wide; c += num_4x4_tw) {
visit(plane, i, r, c, plane_bsize, tx_size, arg);
i += step;
}
@@ -82,33 +86,33 @@
foreach_transformed_block_visitor visit,
void *arg) {
int plane;
-
for (plane = 0; plane < MAX_MB_PLANE; ++plane)
vp10_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
}
void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
- int aoff, int loff) {
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
+ int aoff, int loff) {
ENTROPY_CONTEXT *const a = pd->above_context + aoff;
ENTROPY_CONTEXT *const l = pd->left_context + loff;
- const int tx_size_in_blocks = 1 << tx_size;
+ const int tx_w_in_blocks = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int tx_h_in_blocks = num_4x4_blocks_high_txsize_lookup[tx_size];
// above
if (has_eob && xd->mb_to_right_edge < 0) {
int i;
const int blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize] +
(xd->mb_to_right_edge >> (5 + pd->subsampling_x));
- int above_contexts = tx_size_in_blocks;
+ int above_contexts = tx_w_in_blocks;
if (above_contexts + aoff > blocks_wide)
above_contexts = blocks_wide - aoff;
for (i = 0; i < above_contexts; ++i)
a[i] = has_eob;
- for (i = above_contexts; i < tx_size_in_blocks; ++i)
+ for (i = above_contexts; i < tx_w_in_blocks; ++i)
a[i] = 0;
} else {
- memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+ memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_w_in_blocks);
}
// left
@@ -116,16 +120,16 @@
int i;
const int blocks_high = num_4x4_blocks_high_lookup[plane_bsize] +
(xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
- int left_contexts = tx_size_in_blocks;
+ int left_contexts = tx_h_in_blocks;
if (left_contexts + loff > blocks_high)
left_contexts = blocks_high - loff;
for (i = 0; i < left_contexts; ++i)
l[i] = has_eob;
- for (i = left_contexts; i < tx_size_in_blocks; ++i)
+ for (i = left_contexts; i < tx_h_in_blocks; ++i)
l[i] = 0;
} else {
- memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+ memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_h_in_blocks);
}
}
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 4c46cbb..399fefe 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -422,6 +422,18 @@
}
#endif // CONFIG_SUPERTX
+static INLINE int get_tx1d_width(TX_SIZE tx_size) {
+ return num_4x4_blocks_wide_txsize_lookup[tx_size] << 2;
+}
+
+static INLINE int get_tx1d_height(TX_SIZE tx_size) {
+ return num_4x4_blocks_high_txsize_lookup[tx_size] << 2;
+}
+
+static INLINE int get_tx2d_size(TX_SIZE tx_size) {
+ return num_4x4_blocks_txsize_lookup[tx_size] << 4;
+}
+
#if CONFIG_EXT_TX
#define ALLOW_INTRA_EXT_TX 1
// whether masked transforms are used for 32X32
@@ -438,6 +450,7 @@
#if EXT_TX_SIZES == 4
static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs,
int is_inter) {
+ tx_size = txsize_sqr_map[tx_size];
if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0;
#if USE_REDUCED_TXSET_FOR_16X16
if (tx_size == TX_32X32)
@@ -468,6 +481,7 @@
static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs,
int is_inter) {
(void) is_inter;
+ tx_size = txsize_sqr_map[tx_size];
if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0;
if (tx_size == TX_32X32) return 0;
#if USE_REDUCED_TXSET_FOR_16X16
@@ -622,10 +636,11 @@
#if CONFIG_EXT_TX
#if EXT_TX_SIZES == 4
- if (xd->lossless[mbmi->segment_id] || tx_size > TX_32X32 ||
- (tx_size >= TX_32X32 && !is_inter_block(mbmi)))
+ if (xd->lossless[mbmi->segment_id] ||
+ txsize_sqr_map[tx_size] > TX_32X32 ||
+ (txsize_sqr_map[tx_size] >= TX_32X32 && !is_inter_block(mbmi)))
#else
- if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32)
+ if (xd->lossless[mbmi->segment_id] || txsize_sqr_map[tx_size] >= TX_32X32)
#endif
return DCT_DCT;
if (mbmi->sb_type >= BLOCK_8X8) {
@@ -637,8 +652,8 @@
}
if (is_inter_block(mbmi))
// UV Inter only
- return (mbmi->tx_type == IDTX && tx_size == TX_32X32 ?
- DCT_DCT : mbmi->tx_type);
+ return (mbmi->tx_type == IDTX && txsize_sqr_map[tx_size] == TX_32X32) ?
+ DCT_DCT : mbmi->tx_type;
}
// Sub8x8-Inter/Intra OR UV-Intra
@@ -647,10 +662,10 @@
else // Sub8x8 Intra OR UV-Intra
return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y ?
get_y_mode(mi, block_idx) : mbmi->uv_mode];
-#else
+#else // CONFIG_EXT_TX
(void) block_idx;
if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] ||
- tx_size >= TX_32X32)
+ txsize_sqr_map[tx_size] >= TX_32X32)
return DCT_DCT;
return mbmi->tx_type;
#endif // CONFIG_EXT_TX
diff --git a/vp10/common/common_data.h b/vp10/common/common_data.h
index 44ebff2..2506986 100644
--- a/vp10/common/common_data.h
+++ b/vp10/common/common_data.h
@@ -50,6 +50,46 @@
static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8)};
+static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = {
+ 1, 4, 16, 64,
+#if CONFIG_EXT_TX
+ 2, 2
+#endif // CONFIG_EXT_TX
+};
+static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = {
+ 1, 2, 4, 8,
+#if CONFIG_EXT_TX
+ 1, 2
+#endif // CONFIG_EXT_TX
+};
+static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = {
+ 1, 2, 4, 8,
+#if CONFIG_EXT_TX
+ 2, 1
+#endif // CONFIG_EXT_TX
+};
+
+static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = {
+ 0, 2, 4, 6,
+#if CONFIG_EXT_TX
+ 1, 1
+#endif // CONFIG_EXT_TX
+};
+static const uint8_t num_4x4_blocks_wide_txsize_log2_lookup
+ [TX_SIZES_ALL] = {
+ 0, 1, 2, 3,
+#if CONFIG_EXT_TX
+ 0, 1
+#endif // CONFIG_EXT_TX
+};
+static const uint8_t num_4x4_blocks_high_txsize_log2_lookup
+ [TX_SIZES_ALL] = {
+ 0, 1, 2, 3,
+#if CONFIG_EXT_TX
+ 1, 0
+#endif // CONFIG_EXT_TX
+};
+
// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize)))
static const uint8_t size_group_lookup[BLOCK_SIZES] =
{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, IF_EXT_PARTITION(3, 3, 3)};
@@ -297,12 +337,58 @@
#endif // CONFIG_EXT_PARTITION
};
-static const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = {
- BLOCK_4X4, // TX_4X4
- BLOCK_8X8, // TX_8X8
- BLOCK_16X16, // TX_16X16
- BLOCK_32X32, // TX_32X32
+#if CONFIG_EXT_TX
+static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES] = {
+ // 4X4
+ TX_4X4,
+ // 4X8, 8X4, 8X8
+ TX_4X8, TX_8X4, TX_8X8,
+ // 8X16, 16X8, 16X16
+ TX_8X8, TX_8X8, TX_16X16,
+ // 16X32, 32X16, 32X32
+ TX_16X16, TX_16X16, TX_32X32,
+ // 32X64, 64X32, 64X64
+ TX_32X32, TX_32X32, TX_32X32,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ TX_32X32, TX_32X32, TX_32X32,
+#endif // CONFIG_EXT_PARTITION
};
+#endif // CONFIG_EXT_TX
+
+static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = {
+ BLOCK_4X4, // TX_4X4
+ BLOCK_8X8, // TX_8X8
+ BLOCK_16X16, // TX_16X16
+ BLOCK_32X32, // TX_32X32
+#if CONFIG_EXT_TX
+ BLOCK_4X8, // TX_4X8
+ BLOCK_8X4, // TX_8X4
+#endif // CONFIG_EXT_TX
+};
+
+static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
+ TX_4X4, // TX_4X4
+ TX_8X8, // TX_8X8
+ TX_16X16, // TX_16X16
+ TX_32X32, // TX_32X32
+#if CONFIG_EXT_TX
+ TX_4X4, // TX_4X8
+ TX_4X4, // TX_8X4
+#endif // CONFIG_EXT_TX
+};
+
+static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
+ TX_4X4, // TX_4X4
+ TX_8X8, // TX_8X8
+ TX_16X16, // TX_16X16
+ TX_32X32, // TX_32X32
+#if CONFIG_EXT_TX
+ TX_8X8, // TX_4X8
+ TX_8X8, // TX_8X4
+#endif // CONFIG_EXT_TX
+};
+
static const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
TX_4X4, // ONLY_4X4
diff --git a/vp10/common/entropy.c b/vp10/common/entropy.c
index eea552c..1ce801a 100644
--- a/vp10/common/entropy.c
+++ b/vp10/common/entropy.c
@@ -56,11 +56,33 @@
};
#endif
+const uint16_t band_count_table[TX_SIZES_ALL][8] = {
+ { 1, 2, 3, 4, 3, 16 - 13, 0 },
+ { 1, 2, 3, 4, 11, 64 - 21, 0 },
+ { 1, 2, 3, 4, 11, 256 - 21, 0 },
+ { 1, 2, 3, 4, 11, 1024 - 21, 0 },
+#if CONFIG_EXT_TX
+ { 1, 2, 3, 4, 8, 32 - 18, 0 },
+ { 1, 2, 3, 4, 8, 32 - 18, 0 },
+#endif // CONFIG_EXT_TX
+};
+
+const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = {
+ { 0, 1, 3, 6, 10, 13, 16, 0 },
+ { 0, 1, 3, 6, 10, 21, 64, 0 },
+ { 0, 1, 3, 6, 10, 21, 256, 0 },
+ { 0, 1, 3, 6, 10, 21, 1024, 0 },
+#if CONFIG_EXT_TX
+ { 0, 1, 3, 6, 10, 18, 32, 0 },
+ { 0, 1, 3, 6, 10, 18, 32, 0 },
+#endif // CONFIG_EXT_TX
+};
+
const uint8_t vp10_coefband_trans_8x8plus[1024] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 5,
// beyond MAXBAND_INDEX+1 all values are filled as 5
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
@@ -125,6 +147,13 @@
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
};
+#if CONFIG_EXT_TX
+const uint8_t vp10_coefband_trans_8x4_4x8[32] = {
+ 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
+ 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+};
+#endif // CONFIG_EXT_TX
+
const uint8_t vp10_coefband_trans_4x4[16] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
};
diff --git a/vp10/common/entropy.h b/vp10/common/entropy.h
index d0ca880..baaa515 100644
--- a/vp10/common/entropy.h
+++ b/vp10/common/entropy.h
@@ -155,11 +155,28 @@
#define MAXBAND_INDEX 21
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x8plus[1024]);
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x4_4x8[32]);
+#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x4[16]);
+DECLARE_ALIGNED(16, extern const uint16_t,
+ band_count_table[TX_SIZES_ALL][8]);
+DECLARE_ALIGNED(16, extern const uint16_t,
+ band_cum_count_table[TX_SIZES_ALL][8]);
+
static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
- return tx_size == TX_4X4 ? vp10_coefband_trans_4x4
- : vp10_coefband_trans_8x8plus;
+ switch (tx_size) {
+ case TX_4X4:
+ return vp10_coefband_trans_4x4;
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ case TX_8X4:
+ return vp10_coefband_trans_8x4_4x8;
+#endif // CONFIG_EXT_TX
+ default:
+ return vp10_coefband_trans_8x8plus;
+ }
}
// 128 lists of probabilities are stored for the following ONE node probs:
@@ -198,7 +215,8 @@
return (a != 0) + (b != 0);
}
-static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
+static INLINE int get_entropy_context(TX_SIZE tx_size,
+ const ENTROPY_CONTEXT *a,
const ENTROPY_CONTEXT *l) {
ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
@@ -207,6 +225,16 @@
above_ec = a[0] != 0;
left_ec = l[0] != 0;
break;
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ above_ec = a[0] != 0;
+ left_ec = !!*(const uint16_t *)l;
+ break;
+ case TX_8X4:
+ above_ec = !!*(const uint16_t *)a;
+ left_ec = l[0] != 0;
+ break;
+#endif // CONFIG_EXT_TX
case TX_8X8:
above_ec = !!*(const uint16_t *)a;
left_ec = !!*(const uint16_t *)l;
@@ -223,7 +251,6 @@
assert(0 && "Invalid transform size.");
break;
}
-
return combine_entropy_contexts(above_ec, left_ec);
}
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index d1ce121..a93becc 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -137,6 +137,14 @@
#define TX_32X32 ((TX_SIZE)3) // 32x32 transform
#define TX_SIZES ((TX_SIZE)4)
+#if CONFIG_EXT_TX
+#define TX_4X8 ((TX_SIZE)4) // 4x8 transform
+#define TX_8X4 ((TX_SIZE)5) // 8x4 transform
+#define TX_SIZES_ALL ((TX_SIZE)6) // Includes rectangular transforms
+#else
+#define TX_SIZES_ALL ((TX_SIZE)4)
+#endif // CONFIG_EXT_TX
+
#define MAX_TX_SIZE_LOG2 5
#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
#define MIN_TX_SIZE_LOG2 2
@@ -170,10 +178,10 @@
} TX_TYPE_1D;
typedef enum {
- DCT_DCT = 0, // DCT in both horizontal and vertical
- ADST_DCT = 1, // ADST in vertical, DCT in horizontal
- DCT_ADST = 2, // DCT in vertical, ADST in horizontal
- ADST_ADST = 3, // ADST in both directions
+ DCT_DCT = 0, // DCT in both horizontal and vertical
+ ADST_DCT = 1, // ADST in vertical, DCT in horizontal
+ DCT_ADST = 2, // DCT in vertical, ADST in horizontal
+ ADST_ADST = 3, // ADST in both directions
#if CONFIG_EXT_TX
FLIPADST_DCT = 4,
DCT_FLIPADST = 5,
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index 1a573bd..9b70857 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -144,7 +144,7 @@
static void maybe_flip_strides(uint8_t **dst, int *dstride,
tran_low_t **src, int *sstride,
- int tx_type, int size) {
+ int tx_type, int sizey, int sizex) {
// Note that the transpose of src will be added to dst. In order to LR
// flip the addends (in dst coordinates), we UD flip the src. To UD flip
// the addends, we UD flip the dst.
@@ -163,19 +163,19 @@
case FLIPADST_ADST:
case V_FLIPADST:
// flip UD
- FLIPUD_PTR(*dst, *dstride, size);
+ FLIPUD_PTR(*dst, *dstride, sizey);
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
case H_FLIPADST:
// flip LR
- FLIPUD_PTR(*src, *sstride, size);
+ FLIPUD_PTR(*src, *sstride, sizex);
break;
case FLIPADST_FLIPADST:
// flip UD
- FLIPUD_PTR(*dst, *dstride, size);
+ FLIPUD_PTR(*dst, *dstride, sizey);
// flip LR
- FLIPUD_PTR(*src, *sstride, size);
+ FLIPUD_PTR(*src, *sstride, sizex);
break;
default:
assert(0);
@@ -445,7 +445,7 @@
static void maybe_flip_strides16(uint16_t **dst, int *dstride,
tran_low_t **src, int *sstride,
- int tx_type, int size) {
+ int tx_type, int sizey, int sizex) {
// Note that the transpose of src will be added to dst. In order to LR
// flip the addends (in dst coordinates), we UD flip the src. To UD flip
// the addends, we UD flip the dst.
@@ -464,19 +464,19 @@
case FLIPADST_ADST:
case V_FLIPADST:
// flip UD
- FLIPUD_PTR(*dst, *dstride, size);
+ FLIPUD_PTR(*dst, *dstride, sizey);
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
case H_FLIPADST:
// flip LR
- FLIPUD_PTR(*src, *sstride, size);
+ FLIPUD_PTR(*src, *sstride, sizex);
break;
case FLIPADST_FLIPADST:
// flip UD
- FLIPUD_PTR(*dst, *dstride, size);
+ FLIPUD_PTR(*dst, *dstride, sizey);
// flip LR
- FLIPUD_PTR(*src, *sstride, size);
+ FLIPUD_PTR(*src, *sstride, sizex);
break;
default:
assert(0);
@@ -536,7 +536,7 @@
}
#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4);
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
#endif
// Sum with the destination
@@ -549,6 +549,116 @@
}
}
+void vp10_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ static const transform_2d IHT_4x8[] = {
+ { idct8_c, idct4_c }, // DCT_DCT
+ { iadst8_c, idct4_c }, // ADST_DCT
+ { idct8_c, iadst4_c }, // DCT_ADST
+ { iadst8_c, iadst4_c }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { iadst8_c, idct4_c }, // FLIPADST_DCT
+ { idct8_c, iadst4_c }, // DCT_FLIPADST
+ { iadst8_c, iadst4_c }, // FLIPADST_FLIPADST
+ { iadst8_c, iadst4_c }, // ADST_FLIPADST
+ { iadst8_c, iadst4_c }, // FLIPADST_ADST
+ { iidtx8_c, iidtx4_c }, // IDTX
+ { idct8_c, iidtx4_c }, // V_DCT
+ { iidtx8_c, idct4_c }, // H_DCT
+ { iadst8_c, iidtx4_c }, // V_ADST
+ { iidtx8_c, iadst4_c }, // H_ADST
+ { iadst8_c, iidtx4_c }, // V_FLIPADST
+ { iidtx8_c, iadst4_c }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+ };
+
+ int i, j;
+ tran_low_t out[4][8], outtmp[4];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 8;
+
+ // inverse transform row vectors and transpose
+ for (i = 0; i < 8; ++i) {
+ IHT_4x8[tx_type].rows(input, outtmp);
+ for (j = 0; j < 4; ++j)
+ out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+ input += 4;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 4; ++i) {
+ IHT_4x8[tx_type].cols(out[i], out[i]);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 4);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 4; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+ }
+ }
+}
+
+void vp10_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ static const transform_2d IHT_8x4[] = {
+ { idct4_c, idct8_c }, // DCT_DCT
+ { iadst4_c, idct8_c }, // ADST_DCT
+ { idct4_c, iadst8_c }, // DCT_ADST
+ { iadst4_c, iadst8_c }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { iadst4_c, idct8_c }, // FLIPADST_DCT
+ { idct4_c, iadst8_c }, // DCT_FLIPADST
+ { iadst4_c, iadst8_c }, // FLIPADST_FLIPADST
+ { iadst4_c, iadst8_c }, // ADST_FLIPADST
+ { iadst4_c, iadst8_c }, // FLIPADST_ADST
+ { iidtx4_c, iidtx8_c }, // IDTX
+ { idct4_c, iidtx8_c }, // V_DCT
+ { iidtx4_c, idct8_c }, // H_DCT
+ { iadst4_c, iidtx8_c }, // V_ADST
+ { iidtx4_c, iadst8_c }, // H_ADST
+ { iadst4_c, iidtx8_c }, // V_FLIPADST
+ { iidtx4_c, iadst8_c }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+ };
+
+ int i, j;
+ tran_low_t out[8][4], outtmp[8];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 4;
+
+ // inverse transform row vectors and transpose
+ for (i = 0; i < 4; ++i) {
+ IHT_8x4[tx_type].rows(input, outtmp);
+ for (j = 0; j < 8; ++j)
+ out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+ input += 8;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 8; ++i) {
+ IHT_8x4[tx_type].cols(out[i], out[i]);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 8);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 8; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+ }
+ }
+}
+
void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
static const transform_2d IHT_8[] = {
@@ -599,7 +709,7 @@
}
#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8);
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
#endif
// Sum with the destination
@@ -662,7 +772,7 @@
}
#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16);
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
#endif
// Sum with the destination
@@ -723,7 +833,7 @@
IHT_32[tx_type].cols(out[i], out[i]);
}
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32);
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
// Sum with the destination
for (i = 0; i < 32; ++i) {
@@ -840,6 +950,20 @@
}
}
+#if CONFIG_EXT_TX
+void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, TX_TYPE tx_type) {
+ (void) eob;
+ vp10_iht8x4_32_add(input, dest, stride, tx_type);
+}
+
+void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, TX_TYPE tx_type) {
+ (void) eob;
+ vp10_iht4x8_32_add(input, dest, stride, tx_type);
+}
+#endif // CONFIG_EXT_TX
+
void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
int stride, int eob, TX_TYPE tx_type) {
switch (tx_type) {
@@ -1002,7 +1126,7 @@
}
#if CONFIG_EXT_TX
- maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4);
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
#endif
// Sum with the destination
@@ -1016,6 +1140,118 @@
}
}
+#if CONFIG_EXT_TX
+void vp10_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_4x8[] = {
+ { vpx_highbd_idct8_c, vpx_highbd_idct4_c }, // DCT_DCT
+ { vpx_highbd_iadst8_c, vpx_highbd_idct4_c }, // ADST_DCT
+ { vpx_highbd_idct8_c, vpx_highbd_iadst4_c }, // DCT_ADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c }, // ADST_ADST
+ { vpx_highbd_iadst8_c, vpx_highbd_idct4_c }, // FLIPADST_DCT
+ { vpx_highbd_idct8_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST
+ { highbd_iidtx8_c, highbd_iidtx4_c }, // IDTX
+ { vpx_highbd_idct8_c, highbd_iidtx4_c }, // V_DCT
+ { highbd_iidtx8_c, vpx_highbd_idct4_c }, // H_DCT
+ { vpx_highbd_iadst8_c, highbd_iidtx4_c }, // V_ADST
+ { highbd_iidtx8_c, vpx_highbd_iadst4_c }, // H_ADST
+ { vpx_highbd_iadst8_c, highbd_iidtx4_c }, // V_FLIPADST
+ { highbd_iidtx8_c, vpx_highbd_iadst4_c }, // H_FLIPADST
+ };
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t out[4][8], outtmp[4];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 8;
+
+ // inverse transform row vectors, and transpose
+ for (i = 0; i < 8; ++i) {
+ HIGH_IHT_4x8[tx_type].rows(input, outtmp, bd);
+ for (j = 0; j < 4; ++j)
+ out[j][i] = (tran_low_t)highbd_dct_const_round_shift(outtmp[j] * Sqrt2,
+ bd);
+ input += 4;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 4; ++i) {
+ HIGH_IHT_4x8[tx_type].cols(out[i], out[i], bd);
+ }
+
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8, 4);
+
+ // Sum with the destination
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 4; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = highbd_clip_pixel_add(dest[d],
+ ROUND_POWER_OF_TWO(outp[s], 5), bd);
+ }
+ }
+}
+
+void vp10_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_8x4[] = {
+ { vpx_highbd_idct4_c, vpx_highbd_idct8_c }, // DCT_DCT
+ { vpx_highbd_iadst4_c, vpx_highbd_idct8_c }, // ADST_DCT
+ { vpx_highbd_idct4_c, vpx_highbd_iadst8_c }, // DCT_ADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c }, // ADST_ADST
+ { vpx_highbd_iadst4_c, vpx_highbd_idct8_c }, // FLIPADST_DCT
+ { vpx_highbd_idct4_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST
+ { highbd_iidtx4_c, highbd_iidtx8_c }, // IDTX
+ { vpx_highbd_idct4_c, highbd_iidtx8_c }, // V_DCT
+ { highbd_iidtx4_c, vpx_highbd_idct8_c }, // H_DCT
+ { vpx_highbd_iadst4_c, highbd_iidtx8_c }, // V_ADST
+ { highbd_iidtx4_c, vpx_highbd_iadst8_c }, // H_ADST
+ { vpx_highbd_iadst4_c, highbd_iidtx8_c }, // V_FLIPADST
+ { highbd_iidtx4_c, vpx_highbd_iadst8_c }, // H_FLIPADST
+ };
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t out[8][4], outtmp[8];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 4;
+
+ // inverse transform row vectors, and transpose
+ for (i = 0; i < 4; ++i) {
+ HIGH_IHT_8x4[tx_type].rows(input, outtmp, bd);
+ for (j = 0; j < 8; ++j)
+ out[j][i] = (tran_low_t)highbd_dct_const_round_shift(outtmp[j] * Sqrt2,
+ bd);
+ input += 8;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 8; ++i) {
+ HIGH_IHT_8x4[tx_type].cols(out[i], out[i], bd);
+ }
+
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4, 8);
+
+ // Sum with the destination
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 8; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = highbd_clip_pixel_add(dest[d],
+ ROUND_POWER_OF_TWO(outp[s], 5), bd);
+ }
+ }
+}
+#endif // CONFIG_EXT_TX
+
void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
static const highbd_transform_2d HIGH_IHT_8[] = {
@@ -1068,7 +1304,7 @@
}
#if CONFIG_EXT_TX
- maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8);
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
#endif
// Sum with the destination
@@ -1134,7 +1370,7 @@
}
#if CONFIG_EXT_TX
- maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 16);
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
#endif
// Sum with the destination
@@ -1198,7 +1434,7 @@
HIGH_IHT_32[tx_type].cols(out[i], out[i], bd);
}
- maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 32);
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
// Sum with the destination
for (i = 0; i < 32; ++i) {
@@ -1320,6 +1556,22 @@
}
}
+#if CONFIG_EXT_TX
+void vp10_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type) {
+ (void) eob;
+ vp10_highbd_iht8x4_32_add_c(input, dest, stride, tx_type, bd);
+}
+
+void vp10_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type) {
+ (void) eob;
+ vp10_highbd_iht4x8_32_add_c(input, dest, stride, tx_type, bd);
+}
+#endif // CONFIG_EXT_TX
+
void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd,
TX_TYPE tx_type) {
@@ -1454,6 +1706,14 @@
case TX_8X8:
vp10_inv_txfm_add_8x8(input, dest, stride, eob, tx_type);
break;
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ vp10_inv_txfm_add_4x8(input, dest, stride, eob, tx_type);
+ break;
+ case TX_8X4:
+ vp10_inv_txfm_add_8x4(input, dest, stride, eob, tx_type);
+ break;
+#endif // CONFIG_EXT_TX
case TX_4X4:
// this is like vp10_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
@@ -1486,6 +1746,14 @@
case TX_8X8:
vp10_highbd_inv_txfm_add_8x8(input, dest, stride, eob, bd, tx_type);
break;
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ vp10_highbd_inv_txfm_add_4x8(input, dest, stride, eob, bd, tx_type);
+ break;
+ case TX_8X4:
+ vp10_highbd_inv_txfm_add_8x4(input, dest, stride, eob, bd, tx_type);
+ break;
+#endif // CONFIG_EXT_TX
case TX_4X4:
// this is like vp10_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
diff --git a/vp10/common/idct.h b/vp10/common/idct.h
index 5d52314..f20a154 100644
--- a/vp10/common/idct.h
+++ b/vp10/common/idct.h
@@ -66,6 +66,12 @@
void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
int stride, int eob, TX_TYPE tx_type, int lossless);
+#if CONFIG_EXT_TX
+void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, TX_TYPE tx_type);
+void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, TX_TYPE tx_type);
+#endif // CONFIG_EXT_TX
void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
int stride, int eob, TX_TYPE tx_type);
void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
@@ -88,6 +94,12 @@
void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd, TX_TYPE tx_type,
int lossless);
+#if CONFIG_EXT_TX
+void vp10_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd, TX_TYPE tx_type);
+void vp10_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd, TX_TYPE tx_type);
+#endif // CONFIG_EXT_TX
void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd, TX_TYPE tx_type);
void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c
index 55715d7..59446c2 100644
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -722,8 +722,11 @@
LOOP_FILTER_MASK *lfm) {
const MB_MODE_INFO *mbmi = &mi->mbmi;
const BLOCK_SIZE block_size = mbmi->sb_type;
- const TX_SIZE tx_size_y = mbmi->tx_size;
- const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1);
+ // TODO(debargha): Check if masks can be setup correctly when
+ // rectangular transfroms are used with the EXT_TX expt.
+ const TX_SIZE tx_size_y = txsize_sqr_map[mbmi->tx_size];
+ const TX_SIZE tx_size_uv =
+ get_uv_tx_size_impl(mbmi->tx_size, block_size, 1, 1);
const int filter_level = get_filter_level(lfi_n, mbmi);
uint64_t *const left_y = &lfm->left_y[tx_size_y];
uint64_t *const above_y = &lfm->above_y[tx_size_y];
@@ -803,7 +806,7 @@
#endif // CONFIG_SUPERTX
LOOP_FILTER_MASK *lfm) {
const MB_MODE_INFO *mbmi = &mi->mbmi;
- const TX_SIZE tx_size_y = mbmi->tx_size;
+ const TX_SIZE tx_size_y = txsize_sqr_map[mbmi->tx_size];
#if CONFIG_SUPERTX
const BLOCK_SIZE block_size =
supertx_enabled ? (BLOCK_SIZE)(3 * tx_size_y) : mbmi->sb_type;
@@ -1267,8 +1270,8 @@
const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
- TX_SIZE tx_size_c = tx_size;
- TX_SIZE tx_size_r = tx_size;
+ TX_SIZE tx_size_c = num_4x4_blocks_wide_txsize_log2_lookup[tx_size];
+ TX_SIZE tx_size_r = num_4x4_blocks_high_txsize_log2_lookup[tx_size];
int tx_size_mask = 0;
// Filter level can vary per MI
diff --git a/vp10/common/pred_common.h b/vp10/common/pred_common.h
index d4ae980..9b73eb2 100644
--- a/vp10/common/pred_common.h
+++ b/vp10/common/pred_common.h
@@ -177,10 +177,11 @@
const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
const int has_above = xd->up_available;
const int has_left = xd->left_available;
- int above_ctx = (has_above && !above_mbmi->skip) ? (int)above_mbmi->tx_size
- : max_tx_size;
- int left_ctx = (has_left && !left_mbmi->skip) ? (int)left_mbmi->tx_size
- : max_tx_size;
+ int above_ctx = (has_above && !above_mbmi->skip) ?
+ (int)txsize_sqr_map[above_mbmi->tx_size] : max_tx_size;
+ int left_ctx = (has_left && !left_mbmi->skip) ?
+ (int)txsize_sqr_map[left_mbmi->tx_size] : max_tx_size;
+ assert(xd->mi[0]->mbmi.sb_type >= BLOCK_8X8);
if (!has_left)
left_ctx = above_ctx;
diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c
index 89ff13b..fe98373 100644
--- a/vp10/common/reconintra.c
+++ b/vp10/common/reconintra.c
@@ -673,7 +673,7 @@
INTRA_FILTER filter_type) {
const int dx = (int)dr_intra_derivative[angle][0];
const int dy = (int)dr_intra_derivative[angle][1];
- const int bs = 4 << tx_size;
+ const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
assert(angle > 0 && angle < 270);
if (angle > 0 && angle < 90) {
@@ -1159,7 +1159,7 @@
DECLARE_ALIGNED(16, uint16_t, above_data[MAX_SB_SIZE + 16]);
uint16_t *above_row = above_data + 16;
const uint16_t *const_above_row = above_row;
- const int bs = 4 << tx_size;
+ const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
int need_left = extend_modes[mode] & NEED_LEFT;
int need_above = extend_modes[mode] & NEED_ABOVE;
const uint16_t *above_ref = ref - ref_stride;
@@ -1331,7 +1331,7 @@
DECLARE_ALIGNED(16, uint8_t, above_data[MAX_SB_SIZE + 16]);
uint8_t *above_row = above_data + 16;
const uint8_t *const_above_row = above_row;
- const int bs = 4 << tx_size;
+ const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
int need_left = extend_modes[mode] & NEED_LEFT;
int need_above = extend_modes[mode] & NEED_ABOVE;
#if CONFIG_EXT_INTRA
@@ -1491,7 +1491,7 @@
const uint8_t *ref, int ref_stride,
uint8_t *dst, int dst_stride,
int col_off, int row_off, int plane) {
- const int txw = (1 << tx_size);
+ const int txw = num_4x4_blocks_wide_txsize_lookup[tx_size];
const int have_top = row_off || xd->up_available;
const int have_left = col_off || xd->left_available;
const int x = col_off * 4;
@@ -1531,7 +1531,7 @@
(hpx - y - txpx);
if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) {
- const int bs = 4 * (1 << tx_size);
+ const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
const int stride = 4 * (1 << bwl_in);
int r, c;
uint8_t *map = NULL;
diff --git a/vp10/common/scan.c b/vp10/common/scan.c
index 8cfeb97..4c176d3 100644
--- a/vp10/common/scan.c
+++ b/vp10/common/scan.c
@@ -49,6 +49,50 @@
13, 11, 14, 15,
};
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, default_scan_4x8[32]) = {
+ 0, 1, 4, 5, 2, 8, 6, 9,
+ 10, 3, 12, 7, 13, 11, 14, 16,
+ 17, 15, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x8[32]) = {
+ 0, 4, 8, 12, 16, 20, 24, 28,
+ 1, 5, 9, 13, 17, 21, 25, 29,
+ 2, 6, 10, 14, 18, 22, 26, 30,
+ 3, 7, 11, 15, 19, 23, 27, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x8[32]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_8x4[32]) = {
+ 0, 1, 8, 9, 2, 16, 10, 17,
+ 18, 3, 24, 11, 25, 19, 26, 4,
+ 12, 27, 20, 5, 28, 13, 21, 29,
+ 6, 14, 22, 30, 7, 15, 23, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x4[32]) = {
+ 0, 8, 16, 24, 1, 9, 17, 25,
+ 2, 10, 18, 26, 3, 11, 19, 27,
+ 4, 12, 20, 28, 5, 13, 21, 29,
+ 6, 14, 22, 30, 7, 15, 23, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x4[32]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+};
+#endif // CONFIG_EXT_TX
+
DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = {
0, 8, 1, 16, 9, 2, 17, 24,
10, 3, 18, 25, 32, 11, 4, 26,
@@ -824,6 +868,86 @@
9, 12, 7, 10, 10, 13, 11, 14, 0, 0,
};
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+ default_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 0, 0, 1, 4,
+ 1, 1, 4, 4, 2, 5, 5, 8,
+ 6, 9, 2, 2, 8, 8, 3, 6,
+ 9, 12, 7, 10, 10, 13, 12, 12,
+ 13, 16, 11, 14, 14, 17, 15, 18,
+ 16, 16, 17, 20, 18, 21, 19, 22,
+ 20, 20, 21, 24, 22, 25, 23, 26,
+ 24, 24, 25, 28, 26, 29, 27, 30,
+ 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mcol_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 4, 4, 8, 8,
+ 12, 12, 16, 16, 20, 20, 24, 24,
+ 0, 0, 1, 4, 5, 8, 9, 12,
+ 13, 16, 17, 20, 21, 24, 25, 28,
+ 1, 1, 2, 5, 6, 9, 10, 13,
+ 14, 17, 18, 21, 22, 25, 26, 29,
+ 2, 2, 3, 6, 7, 10, 11, 14,
+ 15, 18, 19, 22, 23, 26, 27, 30,
+ 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mrow_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 2, 2,
+ 0, 0, 1, 4, 2, 5, 3, 6,
+ 4, 4, 5, 8, 6, 9, 7, 10,
+ 8, 8, 9, 12, 10, 13, 11, 14,
+ 12, 12, 13, 16, 14, 17, 15, 18,
+ 16, 16, 17, 20, 18, 21, 19, 22,
+ 20, 20, 21, 24, 22, 25, 23, 26,
+ 24, 24, 25, 28, 26, 29, 27, 30,
+ 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ default_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 0, 0, 1, 8,
+ 1, 1, 8, 8, 2, 9, 9, 16,
+ 10, 17, 2, 2, 16, 16, 3, 10,
+ 17, 24, 11, 18, 18, 25, 3, 3,
+ 4, 11, 19, 26, 12, 19, 4, 4,
+ 20, 27, 5, 12, 13, 20, 21, 28,
+ 5, 5, 6, 13, 14, 21, 22, 29,
+ 6, 6, 7, 14, 15, 22, 23, 30,
+ 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mcol_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 8, 8, 16, 16,
+ 0, 0, 1, 8, 9, 16, 17, 24,
+ 1, 1, 2, 9, 10, 17, 18, 25,
+ 2, 2, 3, 10, 11, 18, 19, 26,
+ 3, 3, 4, 11, 12, 19, 20, 27,
+ 4, 4, 5, 12, 13, 20, 21, 28,
+ 5, 5, 6, 13, 14, 21, 22, 29,
+ 6, 6, 7, 14, 15, 22, 23, 30,
+ 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mrow_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 2, 2,
+ 3, 3, 4, 4, 5, 5, 6, 6,
+ 0, 0, 1, 8, 2, 9, 3, 10,
+ 4, 11, 5, 12, 6, 13, 7, 14,
+ 8, 8, 9, 16, 10, 17, 11, 18,
+ 12, 19, 13, 20, 14, 21, 15, 22,
+ 16, 16, 17, 24, 18, 25, 19, 26,
+ 20, 27, 21, 28, 22, 29, 23, 30,
+ 0, 0
+};
+#endif // CONFIG_EXT_TX
+
DECLARE_ALIGNED(16, static const int16_t,
col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 8, 8, 8, 0, 16, 16, 1, 8,
@@ -2259,6 +2383,50 @@
};
#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_4x8[32]) = {
+ 0, 1, 4, 9, 2, 3, 6, 11,
+ 5, 7, 8, 13, 10, 12, 14, 17,
+ 15, 16, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_4x8[32]) = {
+ 0, 8, 16, 24, 1, 9, 17, 25,
+ 2, 10, 18, 26, 3, 11, 19, 27,
+ 4, 12, 20, 28, 5, 13, 21, 29,
+ 6, 14, 22, 30, 7, 15, 23, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_4x8[32]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_8x4[32]) = {
+ 0, 1, 4, 9, 15, 19, 24, 28,
+ 2, 3, 6, 11, 16, 21, 25, 29,
+ 5, 7, 8, 13, 18, 22, 26, 30,
+ 10, 12, 14, 17, 20, 23, 27, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_8x4[32]) = {
+ 0, 4, 8, 12, 16, 20, 24, 28,
+ 1, 5, 9, 13, 17, 21, 25, 29,
+ 2, 6, 10, 14, 18, 22, 26, 30,
+ 3, 7, 11, 15, 19, 23, 27, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_8x4[32]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+};
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_8x8[64]) = {
0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57, 2, 10,
18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59, 4, 12, 20,
@@ -2943,13 +3111,6 @@
};
#endif // CONFIG_EXT_TX
-const scan_order vp10_default_scan_orders[TX_SIZES] = {
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
- {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
-};
-
#if CONFIG_EXT_TX
const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ // TX_4X4
@@ -3039,7 +3200,7 @@
}
};
-const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES] = {
+const scan_order vp10_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ // TX_4X4
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
@@ -3126,6 +3287,40 @@
{mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
{mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors},
{mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
+ }, { // TX_4X8
+ {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors},
+ {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors},
+ {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors},
+ {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors},
+ {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors},
+ {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors},
+ {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors},
+ {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors},
+ {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors},
+ {mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors},
+ {mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors},
+ {mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors},
+ {mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors},
+ {mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors},
+ {mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors},
+ {mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors},
+ }, { // TX_8X4
+ {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors},
+ {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors},
+ {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors},
+ {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors},
+ {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors},
+ {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors},
+ {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors},
+ {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors},
+ {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors},
+ {mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors},
+ {mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors},
+ {mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors},
+ {mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors},
+ {mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors},
+ {mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors},
+ {mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors},
}
};
diff --git a/vp10/common/scan.h b/vp10/common/scan.h
index aadae40..92a8e6b 100644
--- a/vp10/common/scan.h
+++ b/vp10/common/scan.h
@@ -29,7 +29,6 @@
const int16_t *neighbors;
} scan_order;
-extern const scan_order vp10_default_scan_orders[TX_SIZES];
extern const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES];
static INLINE int get_coef_context(const int16_t *neighbors,
@@ -44,7 +43,7 @@
}
#if CONFIG_EXT_TX
-extern const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES];
+extern const scan_order vp10_inter_scan_orders[TX_SIZES_ALL][TX_TYPES];
static INLINE const scan_order *get_inter_scan(TX_SIZE tx_size,
TX_TYPE tx_type) {
diff --git a/vp10/common/vp10_inv_txfm2d.c b/vp10/common/vp10_inv_txfm2d.c
index 85a33ba..071419e 100644
--- a/vp10/common/vp10_inv_txfm2d.c
+++ b/vp10/common/vp10_inv_txfm2d.c
@@ -82,7 +82,7 @@
}
TXFM_2D_FLIP_CFG vp10_get_inv_txfm_64x64_cfg(int tx_type) {
- TXFM_2D_FLIP_CFG cfg;
+ TXFM_2D_FLIP_CFG cfg = {0, 0, NULL};
switch (tx_type) {
case DCT_DCT:
cfg.cfg = &inv_txfm_2d_cfg_dct_dct_64;
diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl
index 8f87b02..ab2fa16 100644
--- a/vp10/common/vp10_rtcd_defs.pl
+++ b/vp10/common/vp10_rtcd_defs.pl
@@ -83,6 +83,12 @@
add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp10_iht4x4_16_add/;
+ add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x4_32_add/;
+
+ add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht4x8_32_add/;
+
add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp10_iht8x8_64_add/;
@@ -143,6 +149,12 @@
add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp10_iht4x4_16_add sse2/;
+ add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x4_32_add/;
+
+ add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht4x8_32_add/;
+
add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp10_iht8x8_64_add sse2/;
@@ -206,6 +218,12 @@
add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp10_iht4x4_16_add/;
+ add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x4_32_add/;
+
+ add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht4x8_32_add/;
+
add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp10_iht8x8_64_add/;
@@ -242,6 +260,12 @@
add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp10_iht4x4_16_add sse2 neon dspr2/;
+ add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x4_32_add/;
+
+ add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht4x8_32_add/;
+
add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp10_iht8x8_64_add sse2 neon dspr2/;
@@ -348,6 +372,12 @@
add_proto qw/void vp10_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
specialize qw/vp10_highbd_iht4x4_16_add/;
+ add_proto qw/void vp10_highbd_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+ specialize qw/vp10_highbd_iht8x4_32_add/;
+
+ add_proto qw/void vp10_highbd_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+ specialize qw/vp10_highbd_iht4x8_32_add/;
+
add_proto qw/void vp10_highbd_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
specialize qw/vp10_highbd_iht8x8_64_add/;
@@ -407,6 +437,12 @@
add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp10_fht4x4 sse2/;
+ add_proto qw/void vp10_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht8x4/;
+
+ add_proto qw/void vp10_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht4x8/;
+
add_proto qw/void vp10_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp10_fht8x8 sse2/;
@@ -422,6 +458,12 @@
add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp10_fht4x4 sse2/;
+ add_proto qw/void vp10_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht8x4/;
+
+ add_proto qw/void vp10_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht4x8/;
+
add_proto qw/void vp10_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp10_fht8x8 sse2/;
@@ -699,6 +741,12 @@
add_proto qw/void vp10_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp10_highbd_fht4x4 sse4_1/;
+ add_proto qw/void vp10_highbd_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht8x4/;
+
+ add_proto qw/void vp10_highbd_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht4x8/;
+
add_proto qw/void vp10_highbd_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp10_highbd_fht8x8/;
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 66b44a3..6eab340 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -249,15 +249,16 @@
dqcoeff[0] = 0;
} else {
if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
- memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
+ memset(dqcoeff, 0, 4 * 4 * num_4x4_blocks_wide_txsize_lookup[tx_size] *
+ sizeof(dqcoeff[0]));
#if CONFIG_EXT_TX
else
- memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
+ memset(dqcoeff, 0, get_tx2d_size(tx_size) * sizeof(dqcoeff[0]));
#else
else if (tx_size == TX_32X32 && eob <= 34)
memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
else
- memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
+ memset(dqcoeff, 0, get_tx2d_size(tx_size) * sizeof(dqcoeff[0]));
#endif
}
}
@@ -285,8 +286,8 @@
mode = xd->mi[0]->bmi[(row << 1) + col].as_mode;
vp10_predict_intra_block(xd, pd->n4_wl, pd->n4_hl, tx_size, mode,
- dst, pd->dst.stride, dst, pd->dst.stride,
- col, row, plane);
+ dst, pd->dst.stride, dst, pd->dst.stride,
+ col, row, plane);
if (!mbmi->skip) {
TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
@@ -323,14 +324,18 @@
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
return;
- if (tx_size == plane_tx_size) {
+ if (tx_size == plane_tx_size
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ || plane_tx_size >= TX_SIZES
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+ ) {
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
- const scan_order *sc = get_scan(tx_size, tx_type, 1);
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, plane_tx_size);
+ const scan_order *sc = get_scan(plane_tx_size, tx_type, 1);
const int eob = vp10_decode_block_tokens(xd, plane, sc,
- blk_col, blk_row, tx_size,
+ blk_col, blk_row, plane_tx_size,
tx_type, r, mbmi->segment_id);
- inverse_transform_block(xd, plane, tx_type, tx_size,
+ inverse_transform_block(xd, plane, tx_type, plane_tx_size,
&pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col],
pd->dst.stride, eob);
*eob_total += eob;
@@ -344,7 +349,7 @@
for (i = 0; i < 4; ++i) {
const int offsetr = blk_row + ((i >> 1) << bsl);
const int offsetc = blk_col + ((i & 0x01) << bsl);
- int step = 1 << (2 * (tx_size - 1));
+ int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
continue;
@@ -431,7 +436,6 @@
set_skip_context(xd, mi_row, mi_col);
-
#if CONFIG_VAR_TX
xd->max_tx_size = max_txsize_lookup[bsize];
#endif
@@ -1321,7 +1325,8 @@
: mbmi->tx_size;
const int num_4x4_w = pd->n4_w;
const int num_4x4_h = pd->n4_h;
- const int step = (1 << tx_size);
+ const int stepr = num_4x4_blocks_high_txsize_lookup[tx_size];
+ const int stepc = num_4x4_blocks_wide_txsize_lookup[tx_size];
int row, col;
const int max_blocks_wide = num_4x4_w +
(xd->mb_to_right_edge >= 0 ?
@@ -1330,8 +1335,8 @@
(xd->mb_to_bottom_edge >= 0 ?
0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
- for (row = 0; row < max_blocks_high; row += step)
- for (col = 0; col < max_blocks_wide; col += step)
+ for (row = 0; row < max_blocks_high; row += stepr)
+ for (col = 0; col < max_blocks_wide; col += stepc)
predict_and_reconstruct_intra_block(xd,
r,
mbmi, plane,
@@ -1409,15 +1414,20 @@
int row, col;
#if CONFIG_VAR_TX
// TODO(jingning): This can be simplified for decoder performance.
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), pd);
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(
+ VPXMAX(bsize, BLOCK_8X8), pd);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ const TX_SIZE max_tx_size = plane ?
+ max_txsize_lookup[plane_bsize] : max_txsize_rect_lookup[plane_bsize];
+#else
const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
- const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
- int bw = num_4x4_blocks_wide_lookup[txb_size];
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+ int bw = num_4x4_blocks_wide_txsize_lookup[max_tx_size];
+ int bh = num_4x4_blocks_high_txsize_lookup[max_tx_size];
+ const int step = num_4x4_blocks_txsize_lookup[max_tx_size];
int block = 0;
- const int step = 1 << (max_tx_size << 1);
- for (row = 0; row < num_4x4_h; row += bw) {
+ for (row = 0; row < num_4x4_h; row += bh) {
for (col = 0; col < num_4x4_w; col += bw) {
decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize,
block, row, col, max_tx_size, &eobtotal);
@@ -1428,7 +1438,8 @@
const TX_SIZE tx_size =
plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
: mbmi->tx_size;
- const int step = (1 << tx_size);
+ const int stepr = num_4x4_blocks_high_txsize_lookup[tx_size];
+ const int stepc = num_4x4_blocks_wide_txsize_lookup[tx_size];
const int max_blocks_wide = num_4x4_w +
(xd->mb_to_right_edge >= 0 ?
0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
@@ -1436,8 +1447,8 @@
(xd->mb_to_bottom_edge >= 0 ?
0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
- for (row = 0; row < max_blocks_high; row += step)
- for (col = 0; col < max_blocks_wide; col += step)
+ for (row = 0; row < max_blocks_high; row += stepr)
+ for (col = 0; col < max_blocks_wide; col += stepc)
eobtotal += reconstruct_inter_block(xd,
r,
mbmi->segment_id,
@@ -1831,7 +1842,8 @@
const TX_SIZE tx_size =
i ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
: mbmi->tx_size;
- const int step = (1 << tx_size);
+ const int stepr = num_4x4_blocks_high_txsize_lookup[tx_size];
+ const int stepc = num_4x4_blocks_wide_txsize_lookup[tx_size];
const int max_blocks_wide = num_4x4_w +
(xd->mb_to_right_edge >= 0 ?
0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
@@ -1839,8 +1851,8 @@
(xd->mb_to_bottom_edge >= 0 ?
0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
- for (row = 0; row < max_blocks_high; row += step)
- for (col = 0; col < max_blocks_wide; col += step)
+ for (row = 0; row < max_blocks_high; row += stepr)
+ for (col = 0; col < max_blocks_wide; col += stepc)
eobtotal += reconstruct_inter_block(xd,
r,
mbmi->segment_id_supertx,
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index 8528370..e036ceb 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -233,7 +233,7 @@
}
#if CONFIG_VAR_TX
-static void read_tx_size_inter(VP10_COMMON *cm, MACROBLOCKD *xd,
+static void read_tx_size_vartx(VP10_COMMON *cm, MACROBLOCKD *xd,
MB_MODE_INFO *mbmi, FRAME_COUNTS *counts,
TX_SIZE tx_size, int blk_row, int blk_col,
vp10_reader *r) {
@@ -279,14 +279,14 @@
for (i = 0; i < 4; ++i) {
int offsetr = blk_row + ((i >> 1) << bsl);
int offsetc = blk_col + ((i & 0x01) << bsl);
- read_tx_size_inter(cm, xd, mbmi, counts,
+ read_tx_size_vartx(cm, xd, mbmi, counts,
tx_size - 1, offsetr, offsetc, r);
}
} else {
int idx, idy;
inter_tx_size[0][0] = tx_size;
- for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
- for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
+ for (idy = 0; idy < num_4x4_blocks_high_txsize_lookup[tx_size] / 2; ++idy)
+ for (idx = 0; idx < num_4x4_blocks_wide_txsize_lookup[tx_size] / 2; ++idx)
inter_tx_size[idy][idx] = tx_size;
mbmi->tx_size = tx_size;
if (counts)
@@ -309,17 +309,44 @@
return (TX_SIZE)tx_size;
}
-static TX_SIZE read_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd,
- int allow_select, vp10_reader *r) {
+static TX_SIZE read_tx_size_intra(VP10_COMMON *cm, MACROBLOCKD *xd,
+ vp10_reader *r) {
TX_MODE tx_mode = cm->tx_mode;
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
- const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
if (xd->lossless[xd->mi[0]->mbmi.segment_id])
return TX_4X4;
- if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8)
- return read_selected_tx_size(cm, xd, max_tx_size, r);
- else
- return VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]);
+ if (bsize >= BLOCK_8X8) {
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ if (tx_mode == TX_MODE_SELECT) {
+ return read_selected_tx_size(cm, xd, max_tx_size, r);
+ } else {
+ return VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]);
+ }
+ } else {
+ return TX_4X4;
+ }
+}
+
+static TX_SIZE read_tx_size_inter(VP10_COMMON *cm, MACROBLOCKD *xd,
+ int allow_select, vp10_reader *r) {
+ TX_MODE tx_mode = cm->tx_mode;
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id])
+ return TX_4X4;
+ if (bsize >= BLOCK_8X8) {
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ if (allow_select && tx_mode == TX_MODE_SELECT) {
+ return read_selected_tx_size(cm, xd, max_tx_size, r);
+ } else {
+ return VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]);
+ }
+ } else {
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && !CONFIG_VAR_TX
+ return max_txsize_rect_lookup[bsize];
+#else
+ return TX_4X4;
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && !CONFIG_VAR_TX
+ }
}
static int dec_get_segment_id(const VP10_COMMON *cm, const uint8_t *segment_ids,
@@ -577,7 +604,7 @@
mbmi->segment_id = read_intra_segment_id(cm, xd, mi_offset, x_mis, y_mis, r);
mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
- mbmi->tx_size = read_tx_size(cm, xd, 1, r);
+ mbmi->tx_size = read_tx_size_intra(cm, xd, r);
mbmi->ref_frame[0] = INTRA_FRAME;
mbmi->ref_frame[1] = NONE;
@@ -1670,14 +1697,18 @@
int idx, idy;
for (idy = 0; idy < height; idy += bs)
for (idx = 0; idx < width; idx += bs)
- read_tx_size_inter(cm, xd, mbmi, xd->counts, max_tx_size,
+ read_tx_size_vartx(cm, xd, mbmi, xd->counts, max_tx_size,
idy, idx, r);
if (xd->counts) {
const int ctx = get_tx_size_context(xd);
++xd->counts->tx_size[max_tx_size - TX_8X8][ctx][mbmi->tx_size];
}
} else {
- mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r);
+ if (inter_block)
+ mbmi->tx_size = read_tx_size_inter(cm, xd, !mbmi->skip, r);
+ else
+ mbmi->tx_size = read_tx_size_intra(cm, xd, r);
+
if (inter_block) {
const int width = num_4x4_blocks_wide_lookup[bsize];
const int height = num_4x4_blocks_high_lookup[bsize];
@@ -1691,7 +1722,10 @@
set_txfm_ctx(xd->above_txfm_context, mbmi->tx_size, xd->n8_w);
}
#else
- mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r);
+ if (inter_block)
+ mbmi->tx_size = read_tx_size_inter(cm, xd, !mbmi->skip, r);
+ else
+ mbmi->tx_size = read_tx_size_intra(cm, xd, r);
#endif // CONFIG_VAR_TX
#if CONFIG_SUPERTX
}
diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c
index cc3b18b..7cbf01e 100644
--- a/vp10/decoder/detokenize.c
+++ b/vp10/decoder/detokenize.c
@@ -55,12 +55,13 @@
int ctx, const int16_t *scan, const int16_t *nb,
vp10_reader *r) {
FRAME_COUNTS *counts = xd->counts;
- const int max_eob = 16 << (tx_size << 1);
+ const int max_eob = get_tx2d_size(tx_size);
const FRAME_CONTEXT *const fc = xd->fc;
const int ref = is_inter_block(&xd->mi[0]->mbmi);
int band, c = 0;
+ const int tx_size_ctx = txsize_sqr_map[tx_size];
const vpx_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
- fc->coef_probs[tx_size][type][ref];
+ fc->coef_probs[tx_size_ctx][type][ref];
const vpx_prob *prob;
unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
unsigned int (*eob_branch_count)[COEFF_CONTEXTS];
@@ -80,8 +81,8 @@
const uint8_t *cat6_prob;
if (counts) {
- coef_counts = counts->coef[tx_size][type][ref];
- eob_branch_count = counts->eob_branch[tx_size][type][ref];
+ coef_counts = counts->coef[tx_size_ctx][type][ref];
+ eob_branch_count = counts->eob_branch[tx_size_ctx][type][ref];
}
#if CONFIG_VP9_HIGHBITDEPTH
@@ -249,15 +250,16 @@
int ctx, const int16_t *scan, const int16_t *nb,
struct AnsDecoder *const ans) {
FRAME_COUNTS *counts = xd->counts;
- const int max_eob = 16 << (tx_size << 1);
+ const int max_eob = get_tx2d_size(tx_size);
const FRAME_CONTEXT *const fc = xd->fc;
const int ref = is_inter_block(&xd->mi[0]->mbmi);
int band, c = 0;
int skip_eob = 0;
+ const int tx_size_ctx = txsize_sqr_map[tx_size];
const vpx_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
- fc->coef_probs[tx_size][type][ref];
+ fc->coef_probs[tx_size_ctx][type][ref];
const rans_dec_lut(*coef_cdfs)[COEFF_CONTEXTS] =
- fc->coef_cdfs[tx_size][type][ref];
+ fc->coef_cdfs[tx_size_ctx][type][ref];
const vpx_prob *prob;
const rans_dec_lut *cdf;
unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
@@ -280,8 +282,8 @@
dq_shift = get_tx_scale(xd, tx_type, tx_size);
if (counts) {
- coef_counts = counts->coef[tx_size][type][ref];
- eob_branch_count = counts->eob_branch[tx_size][type][ref];
+ coef_counts = counts->coef[tx_size_ctx][type][ref];
+ eob_branch_count = counts->eob_branch[tx_size_ctx][type][ref];
}
#if CONFIG_VP9_HIGHBITDEPTH
@@ -425,23 +427,24 @@
int aoff, int loff) {
ENTROPY_CONTEXT *const a = pd->above_context + aoff;
ENTROPY_CONTEXT *const l = pd->left_context + loff;
- const int tx_size_in_blocks = 1 << tx_size;
+ const int tx_w_in_blocks = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int tx_h_in_blocks = num_4x4_blocks_high_txsize_lookup[tx_size];
// above
if (has_eob && xd->mb_to_right_edge < 0) {
int i;
const int blocks_wide = pd->n4_w +
(xd->mb_to_right_edge >> (5 + pd->subsampling_x));
- int above_contexts = tx_size_in_blocks;
+ int above_contexts = tx_w_in_blocks;
if (above_contexts + aoff > blocks_wide)
above_contexts = blocks_wide - aoff;
for (i = 0; i < above_contexts; ++i)
a[i] = has_eob;
- for (i = above_contexts; i < tx_size_in_blocks; ++i)
+ for (i = above_contexts; i < tx_w_in_blocks; ++i)
a[i] = 0;
} else {
- memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+ memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_w_in_blocks);
}
// left
@@ -449,16 +452,16 @@
int i;
const int blocks_high = pd->n4_h +
(xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
- int left_contexts = tx_size_in_blocks;
+ int left_contexts = tx_h_in_blocks;
if (left_contexts + loff > blocks_high)
left_contexts = blocks_high - loff;
for (i = 0; i < left_contexts; ++i)
l[i] = has_eob;
- for (i = left_contexts; i < tx_size_in_blocks; ++i)
+ for (i = left_contexts; i < tx_h_in_blocks; ++i)
l[i] = 0;
} else {
- memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+ memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_h_in_blocks);
}
}
@@ -528,7 +531,10 @@
ctx, sc->scan, sc->neighbors, r);
#endif // !CONFIG_ANS
dec_set_contexts(xd, pd, tx_size, eob > 0, x, y);
+ /*
+ vp10_set_contexts(xd, pd,
+ get_plane_block_size(xd->mi[0]->mbmi.sb_type, pd),
+ tx_size, eob > 0, x, y);
+ */
return eob;
}
-
-
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 6430a710..d63c5d3 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -375,7 +375,8 @@
TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size;
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
- if (max_tx_size > TX_4X4) {
+ // For sub8x8 blocks the tx_size symbol does not need to be sent
+ if (bsize >= BLOCK_8X8) {
vp10_write_token(w, vp10_tx_size_tree[max_tx_size - TX_8X8],
cm->fc->tx_size_probs[max_tx_size - TX_8X8]
[get_tx_size_context(xd)],
@@ -801,7 +802,7 @@
for (i = 0; i < 4; ++i) {
const int offsetr = blk_row + ((i >> 1) << bsl);
const int offsetc = blk_col + ((i & 0x01) << bsl);
- int step = 1 << (2 * (tx_size - 1));
+ int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
continue;
@@ -1662,7 +1663,7 @@
const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
int bw = num_4x4_blocks_wide_lookup[txb_size];
int block = 0;
- const int step = 1 << (max_tx_size << 1);
+ const int step = num_4x4_blocks_txsize_lookup[max_tx_size];
for (row = 0; row < num_4x4_h; row += bw) {
for (col = 0; col < num_4x4_w; col += bw) {
pack_txb_tokens(w, tok, tok_end, xd, mbmi, plane, plane_bsize,
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 11d4a8e..46bcd0b 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -1038,29 +1038,29 @@
// Note overall scaling factor is 4 times orthogonal
}
-static void copy_block(const int16_t *src, int src_stride, int l,
+static void copy_block(const int16_t *src, int src_stride, int l, int w,
int16_t *dest, int dest_stride) {
int i;
for (i = 0; i < l; ++i) {
memcpy(dest + dest_stride * i, src + src_stride * i,
- l * sizeof(int16_t));
+ w * sizeof(int16_t));
}
}
-static void fliplr(int16_t *dest, int stride, int l) {
+static void fliplr(int16_t *dest, int stride, int l, int w) {
int i, j;
for (i = 0; i < l; ++i) {
- for (j = 0; j < l / 2; ++j) {
+ for (j = 0; j < w / 2; ++j) {
const int16_t tmp = dest[i * stride + j];
- dest[i * stride + j] = dest[i * stride + l - 1 - j];
- dest[i * stride + l - 1 - j] = tmp;
+ dest[i * stride + j] = dest[i * stride + w - 1 - j];
+ dest[i * stride + w - 1 - j] = tmp;
}
}
}
-static void flipud(int16_t *dest, int stride, int l) {
+static void flipud(int16_t *dest, int stride, int l, int w) {
int i, j;
- for (j = 0; j < l; ++j) {
+ for (j = 0; j < w; ++j) {
for (i = 0; i < l / 2; ++i) {
const int16_t tmp = dest[i * stride + j];
dest[i * stride + j] = dest[(l - 1 - i) * stride + j];
@@ -1069,36 +1069,40 @@
}
}
-static void fliplrud(int16_t *dest, int stride, int l) {
+static void fliplrud(int16_t *dest, int stride, int l, int w) {
int i, j;
for (i = 0; i < l / 2; ++i) {
- for (j = 0; j < l; ++j) {
+ for (j = 0; j < w; ++j) {
const int16_t tmp = dest[i * stride + j];
- dest[i * stride + j] = dest[(l - 1 - i) * stride + l - 1 - j];
- dest[(l - 1 - i) * stride + l - 1 - j] = tmp;
+ dest[i * stride + j] = dest[(l - 1 - i) * stride + w - 1 - j];
+ dest[(l - 1 - i) * stride + w - 1 - j] = tmp;
}
}
}
-static void copy_fliplr(const int16_t *src, int src_stride, int l,
+static void copy_fliplr(const int16_t *src, int src_stride,
+ int l, int w,
+ int16_t *dest, int dest_stride) {
+ copy_block(src, src_stride, l, w, dest, dest_stride);
+ fliplr(dest, dest_stride, l, w);
+}
+
+static void copy_flipud(const int16_t *src, int src_stride,
+ int l, int w,
+ int16_t *dest, int dest_stride) {
+ copy_block(src, src_stride, l, w, dest, dest_stride);
+ flipud(dest, dest_stride, l, w);
+}
+
+static void copy_fliplrud(const int16_t *src, int src_stride,
+ int l, int w,
int16_t *dest, int dest_stride) {
- copy_block(src, src_stride, l, dest, dest_stride);
- fliplr(dest, dest_stride, l);
+ copy_block(src, src_stride, l, w, dest, dest_stride);
+ fliplrud(dest, dest_stride, l, w);
}
-static void copy_flipud(const int16_t *src, int src_stride, int l,
- int16_t *dest, int dest_stride) {
- copy_block(src, src_stride, l, dest, dest_stride);
- flipud(dest, dest_stride, l);
-}
-
-static void copy_fliplrud(const int16_t *src, int src_stride, int l,
- int16_t *dest, int dest_stride) {
- copy_block(src, src_stride, l, dest, dest_stride);
- fliplrud(dest, dest_stride, l);
-}
-
-static void maybe_flip_input(const int16_t **src, int *src_stride, int l,
+static void maybe_flip_input(const int16_t **src, int *src_stride,
+ int l, int w,
int16_t *buff, int tx_type) {
switch (tx_type) {
case DCT_DCT:
@@ -1114,21 +1118,21 @@
case FLIPADST_DCT:
case FLIPADST_ADST:
case V_FLIPADST:
- copy_flipud(*src, *src_stride, l, buff, l);
+ copy_flipud(*src, *src_stride, l, w, buff, w);
*src = buff;
- *src_stride = l;
+ *src_stride = w;
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
case H_FLIPADST:
- copy_fliplr(*src, *src_stride, l, buff, l);
+ copy_fliplr(*src, *src_stride, l, w, buff, w);
*src = buff;
- *src_stride = l;
+ *src_stride = w;
break;
case FLIPADST_FLIPADST:
- copy_fliplrud(*src, *src_stride, l, buff, l);
+ copy_fliplrud(*src, *src_stride, l, w, buff, w);
*src = buff;
- *src_stride = l;
+ *src_stride = w;
break;
default:
assert(0);
@@ -1219,6 +1223,44 @@
{ fhalfright32, fidtx32 }, // V_FLIPADST
{ fidtx32, fhalfright32 }, // H_FLIPADST
};
+
+static const transform_2d FHT_4x8[] = {
+ { fdct8, fdct4 }, // DCT_DCT
+ { fadst8, fdct4 }, // ADST_DCT
+ { fdct8, fadst4 }, // DCT_ADST
+ { fadst8, fadst4 }, // ADST_ADST
+ { fadst8, fdct4 }, // FLIPADST_DCT
+ { fdct8, fadst4 }, // DCT_FLIPADST
+ { fadst8, fadst4 }, // FLIPADST_FLIPADST
+ { fadst8, fadst4 }, // ADST_FLIPADST
+ { fadst8, fadst4 }, // FLIPADST_ADST
+ { fidtx8, fidtx4 }, // IDTX
+ { fdct8, fidtx4 }, // V_DCT
+ { fidtx8, fdct4 }, // H_DCT
+ { fadst8, fidtx4 }, // V_ADST
+ { fidtx8, fadst4 }, // H_ADST
+ { fadst8, fidtx4 }, // V_FLIPADST
+ { fidtx8, fadst4 }, // H_FLIPADST
+};
+
+static const transform_2d FHT_8x4[] = {
+ { fdct4, fdct8 }, // DCT_DCT
+ { fadst4, fdct8 }, // ADST_DCT
+ { fdct4, fadst8 }, // DCT_ADST
+ { fadst4, fadst8 }, // ADST_ADST
+ { fadst4, fdct8 }, // FLIPADST_DCT
+ { fdct4, fadst8 }, // DCT_FLIPADST
+ { fadst4, fadst8 }, // FLIPADST_FLIPADST
+ { fadst4, fadst8 }, // ADST_FLIPADST
+ { fadst4, fadst8 }, // FLIPADST_ADST
+ { fidtx4, fidtx8 }, // IDTX
+ { fdct4, fidtx8 }, // V_DCT
+ { fidtx4, fdct8 }, // H_DCT
+ { fadst4, fidtx8 }, // V_ADST
+ { fidtx4, fadst8 }, // H_ADST
+ { fadst4, fidtx8 }, // V_FLIPADST
+ { fidtx4, fadst8 }, // H_FLIPADST
+};
#endif // CONFIG_EXT_TX
void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
@@ -1233,7 +1275,7 @@
#if CONFIG_EXT_TX
int16_t flipped_input[4 * 4];
- maybe_flip_input(&input, &stride, 4, flipped_input, tx_type);
+ maybe_flip_input(&input, &stride, 4, 4, flipped_input, tx_type);
#endif
// Columns
@@ -1258,6 +1300,70 @@
}
}
+#if CONFIG_EXT_TX
+void vp10_fht4x8_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ const int n = 4;
+ const int n2 = 8;
+ tran_low_t out[8 * 4];
+ tran_low_t temp_in[8], temp_out[8];
+ int i, j;
+ const transform_2d ht = FHT_4x8[tx_type];
+ int16_t flipped_input[8 * 4];
+ maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
+
+ // Columns
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j)
+ temp_in[j] = input[j * stride + i] * 8;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < n2; ++j)
+ out[j * n + i] = (tran_low_t)fdct_round_shift(temp_out[j] * Sqrt2);
+ }
+
+ // Rows
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j)
+ temp_in[j] = out[j + i * n];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < n; ++j)
+ output[j + i * n] = (temp_out[j] + 1) >> 2;
+ }
+ // Note: overall scale factor of transform is 8 times unitary
+}
+
+void vp10_fht8x4_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ const int n = 4;
+ const int n2 = 8;
+ tran_low_t out[8 * 4];
+ tran_low_t temp_in[8], temp_out[8];
+ int i, j;
+ const transform_2d ht = FHT_8x4[tx_type];
+ int16_t flipped_input[8 * 4];
+ maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
+
+ // Columns
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j)
+ temp_in[j] = input[j * stride + i] * 8;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < n; ++j)
+ out[j * n2 + i] = (tran_low_t)fdct_round_shift(temp_out[j] * Sqrt2);
+ }
+
+ // Rows
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j)
+ temp_in[j] = out[j + i * n2];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < n2; ++j)
+ output[j + i * n2] = (temp_out[j] + 1) >> 2;
+ }
+ // Note: overall scale factor of transform is 8 times unitary
+}
+#endif // CONFIG_EXT_TX
+
void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
@@ -1382,7 +1488,7 @@
#if CONFIG_EXT_TX
int16_t flipped_input[8 * 8];
- maybe_flip_input(&input, &stride, 8, flipped_input, tx_type);
+ maybe_flip_input(&input, &stride, 8, 8, flipped_input, tx_type);
#endif
// Columns
@@ -1473,7 +1579,7 @@
#if CONFIG_EXT_TX
int16_t flipped_input[16 * 16];
- maybe_flip_input(&input, &stride, 16, flipped_input, tx_type);
+ maybe_flip_input(&input, &stride, 16, 16, flipped_input, tx_type);
#endif
// Columns
@@ -1498,17 +1604,29 @@
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_fht4x4_c(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
+ int stride, int tx_type) {
vp10_fht4x4_c(input, output, stride, tx_type);
}
+#if CONFIG_EXT_TX
+void vp10_highbd_fht8x4_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ vp10_fht8x4_c(input, output, stride, tx_type);
+}
+
+void vp10_highbd_fht4x8_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ vp10_fht4x8_c(input, output, stride, tx_type);
+}
+#endif // CONFIG_EXT_TX
+
void vp10_highbd_fht8x8_c(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
+ int stride, int tx_type) {
vp10_fht8x8_c(input, output, stride, tx_type);
}
void vp10_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output,
- int stride) {
+ int stride) {
vp10_fwht4x4_c(input, output, stride);
}
@@ -1530,7 +1648,7 @@
const transform_2d ht = FHT_32[tx_type];
int16_t flipped_input[32 * 32];
- maybe_flip_input(&input, &stride, 32, flipped_input, tx_type);
+ maybe_flip_input(&input, &stride, 32, 32, flipped_input, tx_type);
// Columns
for (i = 0; i < 32; ++i) {
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index aceb10f..aa8b402 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -67,20 +67,6 @@
rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
}
-static const int16_t band_count_table[TX_SIZES][8] = {
- { 1, 2, 3, 4, 3, 16 - 13, 0 },
- { 1, 2, 3, 4, 11, 64 - 21, 0 },
- { 1, 2, 3, 4, 11, 256 - 21, 0 },
- { 1, 2, 3, 4, 11, 1024 - 21, 0 },
-};
-
-static const int16_t band_cum_count_table[TX_SIZES][8] = {
- { 0, 1, 3, 6, 10, 13, 16, 0 },
- { 0, 1, 3, 6, 10, 21, 64, 0 },
- { 0, 1, 3, 6, 10, 21, 256, 0 },
- { 0, 1, 3, 6, 10, 21, 1024, 0 },
-};
-
int vp10_optimize_b(MACROBLOCK *mb, int plane, int block,
TX_SIZE tx_size, int ctx) {
MACROBLOCKD *const xd = &mb->e_mbd;
@@ -95,7 +81,7 @@
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
const int eob = p->eobs[block];
const PLANE_TYPE type = pd->plane_type;
- const int default_eob = 16 << (tx_size << 1);
+ const int default_eob = get_tx2d_size(tx_size);
const int16_t* const dequant_ptr = pd->dequant;
const uint8_t* const band_translate = get_band_translate(tx_size);
TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
@@ -125,9 +111,9 @@
const int *cat6_high_cost = vp10_get_high_cost_table(8);
#endif
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
- mb->token_costs[tx_size][type][ref];
- const int16_t *band_counts = &band_count_table[tx_size][band];
- int16_t band_left = eob - band_cum_count_table[tx_size][band] + 1;
+ mb->token_costs[txsize_sqr_map[tx_size]][type][ref];
+ const uint16_t *band_counts = &band_count_table[tx_size][band];
+ uint16_t band_left = eob - band_cum_count_table[tx_size][band] + 1;
int shortcut = 0;
int next_shortcut = 0;
@@ -444,8 +430,7 @@
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int16_t *src_diff;
- const int tx1d_size = get_tx1d_size(tx_size);
- const int tx2d_size = tx1d_size * tx1d_size;
+ const int tx2d_size = get_tx2d_size(tx_size);
FWD_TXFM_PARAM fwd_txfm_param;
QUANT_PARAM qparam;
@@ -524,89 +509,44 @@
fwd_txfm_param.bd = xd->bd;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
- switch (tx_size) {
- case TX_32X32:
- highbd_quantize_32x32_nuq(coeff, 1024, x->skip_block,
- p->quant, p->quant_shift, pd->dequant,
- (const cuml_bins_type_nuq *)
- p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)
- pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- case TX_16X16:
- highbd_quantize_nuq(coeff, 256, x->skip_block,
- p->quant, p->quant_shift, pd->dequant,
- (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)
+ if (tx_size == TX_32X32) {
+ highbd_quantize_32x32_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant, p->quant_shift, pd->dequant,
+ (const cuml_bins_type_nuq *)
+ p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)
pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- case TX_8X8:
- highbd_quantize_nuq(coeff, 64, x->skip_block,
- p->quant, p->quant_shift, pd->dequant,
- (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)
- pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- case TX_4X4:
- highbd_quantize_nuq(coeff, 16, x->skip_block,
- p->quant, p->quant_shift, pd->dequant,
- (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)
- pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- default:
- assert(0);
+ qcoeff, dqcoeff, eob,
+ scan_order->scan, band);
+ } else {
+ highbd_quantize_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant, p->quant_shift, pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)
+ pd->dequant_val_nuq[dq],
+ qcoeff, dqcoeff, eob,
+ scan_order->scan, band);
}
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
- switch (tx_size) {
- case TX_32X32:
- quantize_32x32_nuq(coeff, 1024, x->skip_block,
- p->quant, p->quant_shift, pd->dequant,
- (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)
- pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- case TX_16X16:
- quantize_nuq(coeff, 256, x->skip_block,
- p->quant, p->quant_shift, pd->dequant,
- (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- case TX_8X8:
- quantize_nuq(coeff, 64, x->skip_block,
- p->quant, p->quant_shift, pd->dequant,
- (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- case TX_4X4:
- quantize_nuq(coeff, 16, x->skip_block,
- p->quant, p->quant_shift, pd->dequant,
- (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- default:
- assert(0);
- break;
+ if (tx_size == TX_32X32) {
+ quantize_32x32_nuq(coeff, 1024, x->skip_block,
+ p->quant, p->quant_shift, pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)
+ pd->dequant_val_nuq[dq],
+ qcoeff, dqcoeff, eob,
+ scan_order->scan, band);
+ } else {
+ quantize_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant, p->quant_shift, pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
+ qcoeff, dqcoeff, eob,
+ scan_order->scan, band);
}
}
@@ -645,99 +585,48 @@
fwd_txfm_param.bd = xd->bd;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
- switch (tx_size) {
- case TX_32X32:
- highbd_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
- p->quant_fp, pd->dequant,
- (const cuml_bins_type_nuq *)
- p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)
- pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- case TX_16X16:
- highbd_quantize_fp_nuq(coeff, 256, x->skip_block,
- p->quant_fp, pd->dequant,
- (const cuml_bins_type_nuq *)
- p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)
- pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- case TX_8X8:
- highbd_quantize_fp_nuq(coeff, 64, x->skip_block,
- p->quant_fp, pd->dequant,
- (const cuml_bins_type_nuq *)
- p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)
- pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- case TX_4X4:
- highbd_quantize_fp_nuq(coeff, 16, x->skip_block,
- p->quant_fp, pd->dequant,
- (const cuml_bins_type_nuq *)
+ if (tx_size == TX_32X32) {
+ highbd_quantize_32x32_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant_fp, pd->dequant,
+ (const cuml_bins_type_nuq *)
p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)
+ (const dequant_val_type_nuq *)
pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- default:
- assert(0);
+ qcoeff, dqcoeff, eob,
+ scan_order->scan, band);
+ } else {
+ highbd_quantize_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant_fp, pd->dequant,
+ (const cuml_bins_type_nuq *)
+ p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)
+ pd->dequant_val_nuq[dq],
+ qcoeff, dqcoeff, eob,
+ scan_order->scan, band);
}
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
- switch (tx_size) {
- case TX_32X32:
- quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
- p->quant_fp, pd->dequant,
- (const cuml_bins_type_nuq *)
- p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)
- pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- case TX_16X16:
- quantize_fp_nuq(coeff, 256, x->skip_block,
- p->quant_fp, pd->dequant,
- (const cuml_bins_type_nuq *)
+ if (tx_size == TX_32X32) {
+ quantize_32x32_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant_fp, pd->dequant,
+ (const cuml_bins_type_nuq *)
p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)
+ (const dequant_val_type_nuq *)
pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- case TX_8X8:
- quantize_fp_nuq(coeff, 64, x->skip_block,
- p->quant_fp, pd->dequant,
- (const cuml_bins_type_nuq *)
- p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)
- pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- case TX_4X4:
- quantize_fp_nuq(coeff, 16, x->skip_block,
- p->quant_fp, pd->dequant,
- (const cuml_bins_type_nuq *)
- p->cuml_bins_nuq[dq],
- (const dequant_val_type_nuq *)
- pd->dequant_val_nuq[dq],
- qcoeff, dqcoeff, eob,
- scan_order->scan, band);
- break;
- default:
- assert(0);
- break;
+ qcoeff, dqcoeff, eob,
+ scan_order->scan, band);
+ } else {
+ quantize_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant_fp, pd->dequant,
+ (const cuml_bins_type_nuq *)
+ p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)
+ pd->dequant_val_nuq[dq],
+ qcoeff, dqcoeff, eob,
+ scan_order->scan, band);
}
}
@@ -773,79 +662,38 @@
fwd_txfm_param.bd = xd->bd;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
- switch (tx_size) {
- case TX_32X32:
- highbd_quantize_dc_32x32_nuq(coeff, 1024, x->skip_block,
- p->quant[0], p->quant_shift[0],
- pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- case TX_16X16:
- highbd_quantize_dc_nuq(coeff, 256, x->skip_block,
- p->quant[0], p->quant_shift[0],
- pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- case TX_8X8:
- highbd_quantize_dc_nuq(coeff, 64, x->skip_block,
- p->quant[0], p->quant_shift[0],
- pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- case TX_4X4:
- highbd_quantize_dc_nuq(coeff, 16, x->skip_block,
- p->quant[0], p->quant_shift[0],
- pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- default:
- assert(0);
+ if (tx_size == TX_32X32) {
+ highbd_quantize_dc_32x32_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant[0], p->quant_shift[0],
+ pd->dequant[0],
+ p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
+ } else {
+ highbd_quantize_dc_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant[0], p->quant_shift[0],
+ pd->dequant[0],
+ p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
}
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
- switch (tx_size) {
- case TX_32X32:
- quantize_dc_32x32_nuq(coeff, 1024, x->skip_block,
- p->quant[0], p->quant_shift[0], pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- case TX_16X16:
- quantize_dc_nuq(coeff, 256, x->skip_block,
- p->quant[0], p->quant_shift[0], pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- case TX_8X8:
- quantize_dc_nuq(coeff, 64, x->skip_block,
- p->quant[0], p->quant_shift[0], pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- case TX_4X4:
- quantize_dc_nuq(coeff, 16, x->skip_block,
- p->quant[0], p->quant_shift[0], pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- default:
- assert(0);
- break;
+ if (tx_size == TX_32X32) {
+ quantize_dc_32x32_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant[0], p->quant_shift[0], pd->dequant[0],
+ p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
+ } else {
+ quantize_dc_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant[0], p->quant_shift[0], pd->dequant[0],
+ p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
}
}
@@ -882,76 +730,37 @@
fwd_txfm_param.bd = xd->bd;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
- switch (tx_size) {
- case TX_32X32:
- highbd_quantize_dc_32x32_fp_nuq(coeff, 1024, x->skip_block,
- p->quant_fp[0], pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- case TX_16X16:
- highbd_quantize_dc_fp_nuq(coeff, 256, x->skip_block,
- p->quant_fp[0], pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- case TX_8X8:
- highbd_quantize_dc_fp_nuq(coeff, 64, x->skip_block,
- p->quant_fp[0], pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- case TX_4X4:
- highbd_quantize_dc_fp_nuq(coeff, 16, x->skip_block,
- p->quant_fp[0], pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- default:
- assert(0);
+ if (tx_size == TX_32X32) {
+ highbd_quantize_dc_32x32_fp_nuq(coeff, get_tx2d_size(tx_size),
+ x->skip_block,
+ p->quant_fp[0], pd->dequant[0],
+ p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
+ } else {
+ highbd_quantize_dc_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant_fp[0], pd->dequant[0],
+ p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
}
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
- switch (tx_size) {
- case TX_32X32:
- quantize_dc_32x32_fp_nuq(coeff, 1024, x->skip_block,
- p->quant_fp[0], pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- case TX_16X16:
- quantize_dc_fp_nuq(coeff, 256, x->skip_block,
- p->quant_fp[0], pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
-
- break;
- case TX_8X8:
- quantize_dc_fp_nuq(coeff, 64, x->skip_block,
- p->quant_fp[0], pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- case TX_4X4:
- quantize_dc_fp_nuq(coeff, 16, x->skip_block,
- p->quant_fp[0], pd->dequant[0],
- p->cuml_bins_nuq[dq][0],
- pd->dequant_val_nuq[dq][0],
- qcoeff, dqcoeff, eob);
- break;
- default:
- assert(0);
- break;
+ if (tx_size == TX_32X32) {
+ quantize_dc_32x32_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant_fp[0], pd->dequant[0],
+ p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
+ } else {
+ quantize_dc_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant_fp[0], pd->dequant[0],
+ p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
}
}
#endif // CONFIG_NEW_QUANT
@@ -1011,8 +820,10 @@
}
#if CONFIG_VAR_TX
- for (i = 0; i < (1 << tx_size); ++i) {
+ for (i = 0; i < num_4x4_blocks_wide_txsize_lookup[tx_size]; ++i) {
a[i] = a[0];
+ }
+ for (i = 0; i < num_4x4_blocks_high_txsize_lookup[tx_size]; ++i) {
l[i] = l[0];
}
#endif
@@ -1076,10 +887,14 @@
assert(bsl > 0);
--bsl;
+#if CONFIG_EXT_TX
+ assert(tx_size < TX_SIZES);
+#endif // CONFIG_EXT_TX
+
for (i = 0; i < 4; ++i) {
const int offsetr = blk_row + ((i >> 1) << bsl);
const int offsetc = blk_col + ((i & 0x01) << bsl);
- int step = 1 << (2 * (tx_size - 1));
+ int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
continue;
@@ -1165,7 +980,7 @@
const int bh = num_4x4_blocks_wide_lookup[txb_size];
int idx, idy;
int block = 0;
- int step = 1 << (max_tx_size * 2);
+ int step = num_4x4_blocks_txsize_lookup[max_tx_size];
vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctx.ta[plane], ctx.tl[plane]);
#else
const struct macroblockd_plane* const pd = &xd->plane[plane];
@@ -1242,12 +1057,15 @@
uint16_t *eob = &p->eobs[block];
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
- const int tx1d_size = get_tx1d_size(tx_size);
+ const int tx1d_width = num_4x4_blocks_wide_txsize_lookup[tx_size] << 2;
+ const int tx1d_height = num_4x4_blocks_high_txsize_lookup[tx_size] << 2;
ENTROPY_CONTEXT *a = NULL, *l = NULL;
int ctx;
INV_TXFM_PARAM inv_txfm_param;
+ assert(tx1d_width == tx1d_height);
+
dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
@@ -1257,14 +1075,14 @@
dst_stride, blk_col, blk_row, plane);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- vpx_highbd_subtract_block(tx1d_size, tx1d_size, src_diff, diff_stride, src,
- src_stride, dst, dst_stride, xd->bd);
+ vpx_highbd_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride,
+ src, src_stride, dst, dst_stride, xd->bd);
} else {
- vpx_subtract_block(tx1d_size, tx1d_size, src_diff, diff_stride, src,
+ vpx_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride, src,
src_stride, dst, dst_stride);
}
#else
- vpx_subtract_block(tx1d_size, tx1d_size, src_diff, diff_stride, src,
+ vpx_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride, src,
src_stride, dst, dst_stride);
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -1274,8 +1092,8 @@
if (args->enable_optimize_b) {
#if CONFIG_NEW_QUANT
- vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
- tx_size, ctx);
+ vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, ctx);
#else // CONFIG_NEW_QUANT
vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
VP10_XFORM_QUANT_FP);
diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c
index a0e0fdc..d5cf827 100644
--- a/vp10/encoder/hybrid_fwd_txfm.c
+++ b/vp10/encoder/hybrid_fwd_txfm.c
@@ -61,6 +61,22 @@
}
}
+#if CONFIG_EXT_TX
+static void fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void) fwd_txfm_opt;
+ vp10_fht8x4(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void) fwd_txfm_opt;
+ vp10_fht4x8(src_diff, coeff, diff_stride, tx_type);
+}
+#endif // CONFIG_EXT_TX
+
static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) {
@@ -214,6 +230,24 @@
}
}
+#if CONFIG_EXT_TX
+static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
+ (void) fwd_txfm_opt;
+ (void) bd;
+ vp10_highbd_fht8x4(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
+ (void) fwd_txfm_opt;
+ (void) bd;
+ vp10_highbd_fht4x8(src_diff, coeff, diff_stride, tx_type);
+}
+#endif // CONFIG_EXT_TX
+
static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
@@ -344,6 +378,14 @@
case TX_8X8:
fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
break;
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ fwd_txfm_4x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+ case TX_8X4:
+ fwd_txfm_8x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+#endif // CONFIG_EXT_TX
case TX_4X4:
fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless);
break;
@@ -375,6 +417,16 @@
highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type,
fwd_txfm_opt, bd);
break;
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ highbd_fwd_txfm_4x8(src_diff, coeff, diff_stride, tx_type,
+ fwd_txfm_opt, bd);
+ break;
+ case TX_8X4:
+ highbd_fwd_txfm_8x4(src_diff, coeff, diff_stride, tx_type,
+ fwd_txfm_opt, bd);
+ break;
+#endif // CONFIG_EXT_TX
case TX_4X4:
highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
lossless, bd);
diff --git a/vp10/encoder/hybrid_fwd_txfm.h b/vp10/encoder/hybrid_fwd_txfm.h
index cd028bc..07b832c 100644
--- a/vp10/encoder/hybrid_fwd_txfm.h
+++ b/vp10/encoder/hybrid_fwd_txfm.h
@@ -38,22 +38,6 @@
int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param);
#endif // CONFIG_VP9_HIGHBITDEPTH
-static INLINE int get_tx1d_size(TX_SIZE tx_size) {
- switch (tx_size) {
- case TX_32X32:
- return 32;
- case TX_16X16:
- return 16;
- case TX_8X8:
- return 8;
- case TX_4X4:
- return 4;
- default:
- assert(0);
- return -1;
- }
-}
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index 028d578..cbdcc94 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -597,6 +597,18 @@
memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
break;
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
+ for (i = 0; i < num_4x4_h; i += 2)
+ t_left[i] = !!*(const uint16_t *)&left[i];
+ break;
+ case TX_8X4:
+ for (i = 0; i < num_4x4_w; i += 2)
+ t_above[i] = !!*(const uint16_t *)&above[i];
+ memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
+ break;
+#endif // CONFIG_EXT_TX
case TX_8X8:
for (i = 0; i < num_4x4_w; i += 2)
t_above[i] = !!*(const uint16_t *)&above[i];
@@ -622,9 +634,9 @@
}
void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
- const struct macroblockd_plane *pd,
- ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE],
- ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE]) {
+ const struct macroblockd_plane *pd,
+ ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE],
+ ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE]) {
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
get_entropy_contexts_plane(plane_bsize, tx_size, pd, t_above, t_left);
}
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 8177212..97b6a6ff 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -21,6 +21,7 @@
#include "vpx_ports/system_state.h"
#include "vp10/common/common.h"
+#include "vp10/common/common_data.h"
#include "vp10/common/entropy.h"
#include "vp10/common/entropymode.h"
#include "vp10/common/idct.h"
@@ -927,12 +928,6 @@
* can skip this if the last coefficient in this transform block, e.g. the
* 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
* were non-zero). */
-static const int16_t band_counts[TX_SIZES][8] = {
- { 1, 2, 3, 4, 3, 16 - 13, 0 },
- { 1, 2, 3, 4, 11, 64 - 21, 0 },
- { 1, 2, 3, 4, 11, 256 - 21, 0 },
- { 1, 2, 3, 4, 11, 1024 - 21, 0 },
-};
static int cost_coeffs(MACROBLOCK *x,
int plane, int block,
#if CONFIG_VAR_TX
@@ -948,11 +943,12 @@
const struct macroblock_plane *p = &x->plane[plane];
const struct macroblockd_plane *pd = &xd->plane[plane];
const PLANE_TYPE type = pd->plane_type;
- const int16_t *band_count = &band_counts[tx_size][1];
+ const uint16_t *band_count = &band_count_table[tx_size][1];
const int eob = p->eobs[block];
const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ const int tx_size_ctx = txsize_sqr_map[tx_size];
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
- x->token_costs[tx_size][type][is_inter_block(mbmi)];
+ x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
uint8_t token_cache[MAX_TX_SQUARE];
#if CONFIG_VAR_TX
int pt = coeff_ctx;
@@ -1064,7 +1060,7 @@
if (cpi->sf.use_transform_domain_distortion) {
// Transform domain distortion computation is more accurate as it does
// not involve an inverse transform, but it is less accurate.
- const int ss_txfrm_size = tx_size << 1;
+ const int ss_txfrm_size = num_4x4_blocks_txsize_log2_lookup[tx_size];
int64_t this_sse;
int tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
int shift = (MAX_TX_SCALE - get_tx_scale(xd, tx_type, tx_size)) * 2;
@@ -1081,7 +1077,8 @@
*out_sse = this_sse >> shift;
} else {
const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
- const int bs = 4*num_4x4_blocks_wide_lookup[tx_bsize];
+ const int bsw = 4 * num_4x4_blocks_wide_lookup[tx_bsize];
+ const int bsh = 4 * num_4x4_blocks_high_lookup[tx_bsize];
const int src_stride = x->plane[plane].src.stride;
const int dst_stride = xd->plane[plane].dst.stride;
const int src_idx = 4 * (blk_row * src_stride + blk_col);
@@ -1121,13 +1118,13 @@
recon = CONVERT_TO_BYTEPTR(recon);
inv_txfm_param.bd = xd->bd;
vpx_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE,
- NULL, 0, NULL, 0, bs, bs, xd->bd);
+ NULL, 0, NULL, 0, bsw, bsh, xd->bd);
highbd_inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
} else
#endif // CONFIG_VP9_HIGHBITDEPTH
{
vpx_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE,
- NULL, 0, NULL, 0, bs, bs);
+ NULL, 0, NULL, 0, bsw, bsh);
inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
}
@@ -1159,6 +1156,29 @@
#endif // CONFIG_VAR_TX
}
+static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
+ TX_SIZE tx_size) {
+ uint64_t sse;
+ switch (tx_size) {
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ sse = vpx_sum_squares_2d_i16(diff, diff_stride, 4) +
+ vpx_sum_squares_2d_i16(diff + 4 * diff_stride, diff_stride, 4);
+ break;
+ case TX_8X4:
+ sse = vpx_sum_squares_2d_i16(diff, diff_stride, 4) +
+ vpx_sum_squares_2d_i16(diff + 4, diff_stride, 4);;
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(tx_size < TX_SIZES);
+ sse = vpx_sum_squares_2d_i16(
+ diff, diff_stride, num_4x4_blocks_wide_txsize_lookup[tx_size] << 2);
+ break;
+ }
+ return sse;
+}
+
static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
@@ -1188,7 +1208,6 @@
} else {
// Note that the encode block_intra call above already calls
// inv_txfm_add, so we can't just call dist_block here.
- const int bs = 4 << tx_size;
const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
const vpx_variance_fn_t variance = args->cpi->fn_ptr[tx_bsize].vf;
@@ -1204,8 +1223,8 @@
const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
unsigned int tmp;
+ sse = sum_squares_2d(diff, diff_stride, tx_size);
- sse = vpx_sum_squares_2d_i16(diff, diff_stride, bs);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
@@ -1316,6 +1335,10 @@
args.best_rd = ref_best_rd;
args.use_fast_coef_costing = use_fast_coef_casting;
+#if CONFIG_EXT_TX
+ assert(tx_size < TX_SIZES);
+#endif // CONFIG_EXT_TX
+
if (plane == 0)
xd->mi[0]->mbmi.tx_size = tx_size;
@@ -1361,6 +1384,7 @@
#endif // CONFIG_EXT_TX
assert(skip_prob > 0);
+
s0 = vp10_cost_bit(skip_prob, 0);
s1 = vp10_cost_bit(skip_prob, 1);
@@ -2955,6 +2979,10 @@
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+#if CONFIG_EXT_TX
+ assert(tx_size < TX_SIZES);
+#endif // CONFIG_EXT_TX
+
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
if (xd->mb_to_right_edge < 0)
@@ -3087,6 +3115,10 @@
int tmp_eob = 0;
int zero_blk_rate;
+#if CONFIG_EXT_TX
+ assert(tx_size < TX_SIZES);
+#endif // CONFIG_EXT_TX
+
if (ref_best_rd < 0) {
*is_cost_valid = 0;
return;
@@ -3158,7 +3190,7 @@
if (tx_size > TX_4X4) {
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bsl = b_height_log2_lookup[bsize];
- int sub_step = 1 << (2 * (tx_size - 1));
+ int sub_step = num_4x4_blocks_txsize_lookup[tx_size - 1];
int i;
int this_rate;
int64_t this_dist;
@@ -3167,6 +3199,9 @@
int this_cost_valid = 1;
int64_t tmp_rd = 0;
+#if CONFIG_EXT_TX
+ assert(tx_size < TX_SIZES);
+#endif // CONFIG_EXT_TX
--bsl;
for (i = 0; i < 4 && this_cost_valid; ++i) {
int offsetr = (i >> 1) << bsl;
@@ -3191,13 +3226,15 @@
if (this_rd < sum_rd) {
int idx, idy;
- for (i = 0; i < (1 << tx_size); ++i)
- pta[i] = ptl[i] = !(tmp_eob == 0);
+ for (i = 0; i < num_4x4_blocks_wide_txsize_lookup[tx_size]; ++i)
+ pta[i] = !(tmp_eob == 0);
+ for (i = 0; i < num_4x4_blocks_high_txsize_lookup[tx_size]; ++i)
+ ptl[i] = !(tmp_eob == 0);
txfm_partition_update(tx_above + (blk_col >> 1),
tx_left + (blk_row >> 1), tx_size);
inter_tx_size[0][0] = tx_size;
- for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
- for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
+ for (idy = 0; idy < num_4x4_blocks_high_txsize_lookup[tx_size] / 2; ++idy)
+ for (idx = 0; idx < num_4x4_blocks_wide_txsize_lookup[tx_size] / 2; ++idx)
inter_tx_size[idy][idx] = tx_size;
mbmi->tx_size = tx_size;
if (this_rd == INT64_MAX)
@@ -3453,6 +3490,10 @@
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+#if CONFIG_EXT_TX
+ assert(tx_size < TX_SIZES);
+#endif // CONFIG_EXT_TX
+
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
if (xd->mb_to_right_edge < 0)
@@ -3487,13 +3528,13 @@
coeff_ctx = combine_entropy_contexts(ta[0], tl[0]);
vp10_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
plane_bsize, coeff_ctx, rate, dist, bsse, skip);
- for (i = 0; i < (1 << tx_size); ++i) {
+ for (i = 0; i < num_4x4_blocks_wide_txsize_lookup[tx_size]; ++i)
ta[i] = !(p->eobs[block] == 0);
+ for (i = 0; i < num_4x4_blocks_high_txsize_lookup[tx_size]; ++i)
tl[i] = !(p->eobs[block] == 0);
- }
} else {
int bsl = b_width_log2_lookup[bsize];
- int step = 1 << (2 * (tx_size - 1));
+ int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
int i;
assert(bsl > 0);
@@ -3590,7 +3631,7 @@
return is_cost_valid;
}
-#endif
+#endif // CONFIG_VAR_TX
// Return value 0: early termination triggered, no valid rd cost available;
// 1: rd cost values are valid.
@@ -4402,11 +4443,22 @@
const uint8_t *const src =
&p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
uint8_t *const dst = &pd->dst.buf[vp10_raster_block_offset(BLOCK_8X8, i,
- pd->dst.stride)];
+ pd->dst.stride)];
int64_t thisdistortion = 0, thissse = 0;
int thisrate = 0;
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, TX_4X4);
- const scan_order *so = get_scan(TX_4X4, tx_type, 1);
+ TX_SIZE tx_size = mi->mbmi.tx_size;
+
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, tx_size);
+ const scan_order *so = get_scan(tx_size, tx_type, 1);
+ const int num_4x4_w = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int num_4x4_h = num_4x4_blocks_high_txsize_lookup[tx_size];
+
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && !CONFIG_VAR_TX
+ assert(tx_size == max_txsize_rect_lookup[mi->mbmi.sb_type]);
+#else
+ assert(tx_size == TX_4X4);
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && !CONFIG_VAR_TX
+ assert(tx_type == DCT_DCT);
vp10_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col);
@@ -4427,39 +4479,51 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
k = i;
- for (idy = 0; idy < height / 4; ++idy) {
- for (idx = 0; idx < width / 4; ++idx) {
- int64_t dist, ssz, rd, rd1, rd2;
+ for (idy = 0; idy < height / 4; idy += num_4x4_h) {
+ for (idx = 0; idx < width / 4; idx += num_4x4_w) {
+ int64_t dist, ssz, rd, rd1, rd2, block;
int coeff_ctx;
k += (idy * 2 + idx);
+ if (tx_size == TX_4X4)
+ block = k;
+ else
+ block = (i ? 2 : 0);
coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)),
*(tl + (k >> 1)));
#if CONFIG_NEW_QUANT
- vp10_xform_quant_fp_nuq(x, 0, k, idy + (i >> 1), idx + (i & 0x01),
- BLOCK_8X8, TX_4X4, coeff_ctx);
+ vp10_xform_quant_fp_nuq(x, 0, block, idy + (i >> 1), idx + (i & 0x01),
+ BLOCK_8X8, tx_size, coeff_ctx);
#else
- vp10_xform_quant(x, 0, k, idy + (i >> 1), idx + (i & 0x01), BLOCK_8X8,
- TX_4X4, VP10_XFORM_QUANT_FP);
+ vp10_xform_quant(x, 0, block, idy + (i >> 1), idx + (i & 0x01), BLOCK_8X8,
+ tx_size, VP10_XFORM_QUANT_FP);
#endif // CONFIG_NEW_QUANT
if (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0)
- vp10_optimize_b(x, 0, k, TX_4X4, coeff_ctx);
- dist_block(cpi, x, 0, k, idy + (i >> 1), idx + (i & 0x1), TX_4X4,
+ vp10_optimize_b(x, 0, block, tx_size, coeff_ctx);
+ dist_block(cpi, x, 0, block, idy + (i >> 1), idx + (i & 0x1), tx_size,
&dist, &ssz);
thisdistortion += dist;
thissse += ssz;
#if CONFIG_VAR_TX
- thisrate += cost_coeffs(x, 0, k, coeff_ctx,
- TX_4X4,
+ thisrate += cost_coeffs(x, 0, block, coeff_ctx,
+ tx_size,
so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
- *(ta + (k & 1)) = !(p->eobs[k] == 0);
- *(tl + (k >> 1)) = !(p->eobs[k] == 0);
+ *(ta + (k & 1)) = !(p->eobs[block] == 0);
+ *(tl + (k >> 1)) = !(p->eobs[block] == 0);
#else
- thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1),
- TX_4X4,
+ thisrate += cost_coeffs(x, 0, block, ta + (k & 1), tl + (k >> 1),
+ tx_size,
so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
-#endif
+#if CONFIG_EXT_TX
+ if (tx_size == TX_8X4) {
+ *(ta + (k & 1) + 1) = *(ta + (k & 1));
+ }
+ if (tx_size == TX_4X8) {
+ *(tl + (k >> 1) + 1) = *(tl + (k >> 1));
+ }
+#endif // CONFIG_EXT_TX
+#endif // CONFIG_VAR_TX
rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse);
rd = VPXMIN(rd1, rd2);
@@ -4951,6 +5015,11 @@
const int has_second_rf = has_second_ref(mbmi);
const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && !CONFIG_VAR_TX
+ mbmi->tx_size = max_txsize_rect_lookup[bsize];
+#else
+ mbmi->tx_size = TX_4X4;
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && !CONFIG_VAR_TX
vp10_zero(*bsi);
@@ -5020,8 +5089,8 @@
#if CONFIG_EXT_INTER
mv_ref_list,
#endif // CONFIG_EXT_INTER
- &frame_mv[NEARESTMV][frame],
- &frame_mv[NEARMV][frame]);
+ &frame_mv[NEARESTMV][frame],
+ &frame_mv[NEARMV][frame]);
#if CONFIG_REF_MV
tmp_ref_mv[ref] = frame_mv[NEARESTMV][mbmi->ref_frame[ref]];
@@ -5072,10 +5141,11 @@
#if CONFIG_EXT_INTER
for (this_mode = (has_second_rf ? NEAREST_NEARESTMV : NEARESTMV);
this_mode <= (has_second_rf ? NEW_NEWMV : NEWFROMNEARMV);
- ++this_mode) {
+ ++this_mode)
#else
- for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
+ for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode)
#endif // CONFIG_EXT_INTER
+ {
const struct buf_2d orig_src = x->plane[0].src;
struct buf_2d orig_pre[2];
// This flag controls if the motion estimation will kick off. When it
@@ -5342,10 +5412,11 @@
this_mode == NEWMV &&
#endif // CONFIG_EXT_INTER
#if CONFIG_DUAL_FILTER
- (mbmi->interp_filter[0] == EIGHTTAP_REGULAR || run_mv_search)) {
+ (mbmi->interp_filter[0] == EIGHTTAP_REGULAR || run_mv_search))
#else
- (mbmi->interp_filter == EIGHTTAP_REGULAR || run_mv_search)) {
+ (mbmi->interp_filter == EIGHTTAP_REGULAR || run_mv_search))
#endif
+ {
// adjust src pointers
mi_buf_shift(x, i);
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c
index c25f8bc..734ae8b 100644
--- a/vp10/encoder/tokenize.c
+++ b/vp10/encoder/tokenize.c
@@ -393,7 +393,7 @@
static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
TX_SIZE tx_size) {
- const int eob_max = 16 << (tx_size << 1);
+ const int eob_max = num_4x4_blocks_txsize_lookup[tx_size] << 4;
return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
}
@@ -463,21 +463,21 @@
const scan_order *const so = get_scan(tx_size, tx_type, is_inter_block(mbmi));
const int ref = is_inter_block(mbmi);
unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
- td->rd_counts.coef_counts[tx_size][type][ref];
+ td->rd_counts.coef_counts[txsize_sqr_map[tx_size]][type][ref];
#if CONFIG_ENTROPY
vpx_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
cpi->subframe_stats.coef_probs_buf[cpi->common.coef_probs_update_idx]
- [tx_size][type][ref];
+ [txsize_sqr_map[tx_size]][type][ref];
#else
vpx_prob (*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
- cpi->common.fc->coef_probs[tx_size][type][ref];
+ cpi->common.fc->coef_probs[txsize_sqr_map[tx_size]][type][ref];
#endif // CONFIG_ENTROPY
#if CONFIG_ANS
rans_dec_lut (*const coef_cdfs)[COEFF_CONTEXTS] =
- cpi->common.fc->coef_cdfs[tx_size][type][ref];
+ cpi->common.fc->coef_cdfs[txsize_sqr_map[tx_size]][type][ref];
#endif // CONFIG_ANS
unsigned int (*const eob_branch)[COEFF_CONTEXTS] =
- td->counts->eob_branch[tx_size][type][ref];
+ td->counts->eob_branch[txsize_sqr_map[tx_size]][type][ref];
const uint8_t *const band = get_band_translate(tx_size);
const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
int skip_eob = 0;
@@ -539,7 +539,7 @@
int result = 1;
struct is_skippable_args args = {x->plane[plane].eobs, &result};
vp10_foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane, is_skippable,
- &args);
+ &args);
return result;
}
@@ -560,7 +560,7 @@
int result = 0;
struct is_skippable_args args = {x->plane[plane].eobs, &result};
vp10_foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane,
- has_high_freq_coeff, &args);
+ has_high_freq_coeff, &args);
return result;
}
@@ -582,6 +582,9 @@
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ assert(tx_size < TX_SIZES);
+
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
if (xd->mb_to_right_edge < 0)
@@ -608,7 +611,7 @@
for (i = 0; i < 4; ++i) {
const int offsetr = blk_row + ((i >> 1) << bsl);
const int offsetc = blk_col + ((i & 0x01) << bsl);
- int step = 1 << (2 * (tx_size - 1));
+ int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
continue;
@@ -659,7 +662,7 @@
int bh = num_4x4_blocks_wide_lookup[txb_size];
int idx, idy;
int block = 0;
- int step = 1 << (max_tx_size * 2);
+ int step = num_4x4_blocks_txsize_lookup[max_tx_size];
for (idy = 0; idy < mi_height; idy += bh) {
for (idx = 0; idx < mi_width; idx += bh) {
tokenize_tx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx,
@@ -674,7 +677,7 @@
}
}
}
-#endif
+#endif // CONFIG_VAR_TX
void vp10_tokenize_sb(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
int dry_run, BLOCK_SIZE bsize) {