Enable rectangular transforms for Intra also.
These are under EXT_TX + RECT_TX experiment combo.
Results
=======
Derf Set:
--------
All Intra frames: 1.8% avg improvement (and 1.78% BD-rate improvement)
Video: 0.230% avg improvement (and 0.262% BD-rate improvement)
Objective-1-fast set
--------------------
Video: 0.52 PSNR improvement
Change-Id: I1893465929858e38419f327752dc61c19b96b997
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 9f735bc..285adfa 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -688,7 +688,7 @@
static INLINE int is_rect_tx_allowed(const MACROBLOCKD *xd,
const MB_MODE_INFO *mbmi) {
- return is_inter_block(mbmi) && is_rect_tx_allowed_bsize(mbmi->sb_type) &&
+ return is_rect_tx_allowed_bsize(mbmi->sb_type) &&
!xd->lossless[mbmi->segment_id];
}
@@ -699,40 +699,33 @@
static INLINE TX_SIZE tx_size_from_tx_mode(BLOCK_SIZE bsize, TX_MODE tx_mode,
int is_inter) {
const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
-#if CONFIG_VAR_TX
- const TX_SIZE max_tx_size = max_txsize_rect_lookup[bsize];
-
-#if CONFIG_CB4X4
- if (!is_inter || bsize == BLOCK_4X4)
- return AOMMIN(max_txsize_lookup[bsize], largest_tx_size);
-#else
- if (!is_inter || bsize < BLOCK_8X8)
- return AOMMIN(max_txsize_lookup[bsize], largest_tx_size);
-#endif
-
- if (txsize_sqr_map[max_tx_size] <= largest_tx_size)
- return max_tx_size;
- else
- return largest_tx_size;
+#if CONFIG_VAR_TX || (CONFIG_EXT_TX && CONFIG_RECT_TX)
+ const TX_SIZE max_rect_tx_size = max_txsize_rect_lookup[bsize];
#else
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+#endif // CONFIG_VAR_TX || (CONFIG_EXT_TX && CONFIG_RECT_TX)
+ (void)is_inter;
+#if CONFIG_VAR_TX
+#if CONFIG_CB4X4
+ if (bsize == BLOCK_4X4)
+ return AOMMIN(max_txsize_lookup[bsize], largest_tx_size);
+#else
+ if (bsize < BLOCK_8X8)
+ return AOMMIN(max_txsize_lookup[bsize], largest_tx_size);
#endif
-
-#if CONFIG_EXT_TX && CONFIG_RECT_TX
- if (!is_inter) {
- return AOMMIN(max_tx_size, largest_tx_size);
+ if (txsize_sqr_map[max_rect_tx_size] <= largest_tx_size)
+ return max_rect_tx_size;
+ else
+ return largest_tx_size;
+#elif CONFIG_EXT_TX && CONFIG_RECT_TX
+ if (txsize_sqr_up_map[max_rect_tx_size] <= largest_tx_size) {
+ return max_rect_tx_size;
} else {
- const TX_SIZE max_rect_tx_size = max_txsize_rect_lookup[bsize];
- if (txsize_sqr_up_map[max_rect_tx_size] <= largest_tx_size) {
- return max_rect_tx_size;
- } else {
- return largest_tx_size;
- }
+ return largest_tx_size;
}
#else
- (void)is_inter;
return AOMMIN(max_tx_size, largest_tx_size);
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+#endif // CONFIG_VAR_TX
}
#if CONFIG_FILTER_INTRA
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
index e1e1dd1..bcfdf1a 100644
--- a/av1/common/common_data.h
+++ b/av1/common/common_data.h
@@ -487,6 +487,42 @@
#endif // CONFIG_TX64X64
};
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+// Same as "max_txsize_lookup[bsize] - TX_8X8", except for rectangular
+// block which may use a rectangular transform, in which case it is
+// "(max_txsize_lookup[bsize] + 1) - TX_8X8", invalid for bsize < 8X8
+static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES] = {
+#if CONFIG_CB4X4
+ // 2X2, 2X4, 4X2,
+ INT32_MIN, INT32_MIN, INT32_MIN,
+#endif
+ // 4X4
+ INT32_MIN,
+ // 4X8, 8X4, 8X8
+ INT32_MIN, INT32_MIN, TX_8X8 - TX_8X8,
+ // 8X16, 16X8, 16X16
+ TX_16X16 - TX_8X8, TX_16X16 - TX_8X8, TX_16X16 - TX_8X8,
+ // 16X32, 32X16, 32X32
+ TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
+ // 32X64, 64X32,
+ TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
+#if CONFIG_TX64X64
+ // 64X64
+ TX_64X64 - TX_8X8,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ TX_64X64 - TX_8X8, TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
+#endif // CONFIG_EXT_PARTITION
+#else
+ // 64X64
+ TX_32X32 - TX_8X8,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
+#endif // CONFIG_EXT_PARTITION
+#endif // CONFIG_TX64X64
+};
+#else
// Same as "max_txsize_lookup[bsize] - TX_8X8", invalid for bsize < 8X8
static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES] = {
#if CONFIG_CB4X4
@@ -519,46 +555,10 @@
#endif // CONFIG_EXT_PARTITION
#endif // CONFIG_TX64X64
};
-
-#if CONFIG_EXT_TX && CONFIG_RECT_TX
-// Same as "max_txsize_lookup[bsize] - TX_8X8", except for rectangular
-// block which may use a rectangular transform, in which case it is
-// "(max_txsize_lookup[bsize] + 1) - TX_8X8", invalid for bsize < 8X8
-static const int32_t inter_tx_size_cat_lookup[BLOCK_SIZES] = {
-#if CONFIG_CB4X4
- // 2X2, 2X4, 4X2,
- INT32_MIN, INT32_MIN, INT32_MIN,
-#endif
- // 4X4
- INT32_MIN,
- // 4X8, 8X4, 8X8
- INT32_MIN, INT32_MIN, TX_8X8 - TX_8X8,
- // 8X16, 16X8, 16X16
- TX_16X16 - TX_8X8, TX_16X16 - TX_8X8, TX_16X16 - TX_8X8,
- // 16X32, 32X16, 32X32
- TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
- // 32X64, 64X32,
- TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
-#if CONFIG_TX64X64
- // 64X64
- TX_64X64 - TX_8X8,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- TX_64X64 - TX_8X8, TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
-#endif // CONFIG_EXT_PARTITION
-#else
- // 64X64
- TX_32X32 - TX_8X8,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
-#endif // CONFIG_EXT_PARTITION
-#endif // CONFIG_TX64X64
-};
-#else
-#define inter_tx_size_cat_lookup intra_tx_size_cat_lookup
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+#define inter_tx_size_cat_lookup intra_tx_size_cat_lookup
+
/* clang-format on */
static const TX_SIZE sub_tx_size_map[TX_SIZES_ALL] = {
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index 7f6cd0d..686b535 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -3534,6 +3534,9 @@
(const unsigned int(*)[REF_TYPES][COEF_BANDS]
[COEFF_CONTEXTS])cm->counts.eob_branch[tx_size];
int i, j, k, l, m;
+#if CONFIG_RECT_TX
+ assert(!is_rect_tx(tx_size));
+#endif // CONFIG_RECT_TX
for (i = 0; i < PLANE_TYPES; ++i)
for (j = 0; j < REF_TYPES; ++j)
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index ba2ed88..4df6079 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -2526,6 +2526,7 @@
}
#endif // CONFIG_AOM_HIGHBITDEPTH
+// TODO(urvang/davidbarker): Refactor with av1_predict_intra_block().
static void build_intra_predictors_for_interintra(MACROBLOCKD *xd, uint8_t *ref,
int ref_stride, uint8_t *dst,
int dst_stride,
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index ff93ada..bd6efcb 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -1387,6 +1387,8 @@
filter_intra_mode_info->filter_intra_mode[plane != 0];
#endif // CONFIG_FILTER_INTRA
int base = 128 << (xd->bd - 8);
+ assert(tx_size_wide[tx_size] == tx_size_high[tx_size]);
+
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
// 129 C D .. W X
@@ -1552,6 +1554,7 @@
const FILTER_INTRA_MODE filter_intra_mode =
filter_intra_mode_info->filter_intra_mode[plane != 0];
#endif // CONFIG_FILTER_INTRA
+ assert(tx_size_wide[tx_size] == tx_size_high[tx_size]);
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
@@ -1687,11 +1690,11 @@
}
}
-void av1_predict_intra_block(const MACROBLOCKD *xd, int wpx, int hpx,
- TX_SIZE tx_size, PREDICTION_MODE mode,
- const uint8_t *ref, int ref_stride, uint8_t *dst,
- int dst_stride, int col_off, int row_off,
- int plane) {
+static void predict_square_intra_block(const MACROBLOCKD *xd, int wpx, int hpx,
+ TX_SIZE tx_size, PREDICTION_MODE mode,
+ const uint8_t *ref, int ref_stride,
+ uint8_t *dst, int dst_stride,
+ int col_off, int row_off, int plane) {
const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int txw = tx_size_wide_unit[tx_size];
@@ -1725,6 +1728,7 @@
tx_size, row_off, col_off, pd->subsampling_x);
const int have_bottom = av1_has_bottom(bsize, mi_row, mi_col, yd > 0, tx_size,
row_off, col_off, pd->subsampling_y);
+ assert(txwpx == txhpx);
#if CONFIG_PALETTE
if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) {
@@ -1782,6 +1786,142 @@
plane);
}
+void av1_predict_intra_block(const MACROBLOCKD *xd, int wpx, int hpx,
+ TX_SIZE tx_size, PREDICTION_MODE mode,
+ const uint8_t *ref, int ref_stride, uint8_t *dst,
+ int dst_stride, int col_off, int row_off,
+ int plane) {
+ const int tx_width = tx_size_wide[tx_size];
+ const int tx_height = tx_size_high[tx_size];
+ if (tx_width == tx_height) {
+ predict_square_intra_block(xd, wpx, hpx, tx_size, mode, ref, ref_stride,
+ dst, dst_stride, col_off, row_off, plane);
+ } else {
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+#if CONFIG_AOM_HIGHBITDEPTH
+ uint16_t tmp16[MAX_SB_SIZE];
+#endif
+ uint8_t tmp[MAX_SB_SIZE];
+ const TX_SIZE sub_tx_size = txsize_sqr_map[tx_size];
+ assert(sub_tx_size < TX_SIZES);
+ assert((tx_width == wpx && tx_height == hpx) ||
+ (tx_width == (wpx >> 1) && tx_height == hpx) ||
+ (tx_width == wpx && tx_height == (hpx >> 1)));
+
+ if (tx_width < tx_height) {
+ assert(tx_height == (tx_width << 1));
+ // Predict the top square sub-block.
+ predict_square_intra_block(xd, wpx, hpx, sub_tx_size, mode, ref,
+ ref_stride, dst, dst_stride, col_off, row_off,
+ plane);
+ {
+ const int half_tx_height = tx_height >> 1;
+ const int half_txh_unit = tx_size_high_unit[tx_size] >> 1;
+ // Cast away const to modify 'ref' temporarily; will be restored later.
+ uint8_t *src_2 = (uint8_t *)ref + half_tx_height * ref_stride;
+ uint8_t *dst_2 = dst + half_tx_height * dst_stride;
+ const int row_off_2 = row_off + half_txh_unit;
+ // Save the last row of top square sub-block as 'above' row for bottom
+ // square sub-block.
+ if (src_2 != dst_2 || ref_stride != dst_stride) {
+#if CONFIG_AOM_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint16_t *src_2_16 = CONVERT_TO_SHORTPTR(src_2);
+ uint16_t *dst_2_16 = CONVERT_TO_SHORTPTR(dst_2);
+ memcpy(tmp16, src_2_16 - ref_stride, tx_width * sizeof(*src_2_16));
+ memcpy(src_2_16 - ref_stride, dst_2_16 - dst_stride,
+ tx_width * sizeof(*src_2_16));
+ } else {
+#endif // CONFIG_AOM_HIGHBITDEPTH
+ memcpy(tmp, src_2 - ref_stride, tx_width * sizeof(*src_2));
+ memcpy(src_2 - ref_stride, dst_2 - dst_stride,
+ tx_width * sizeof(*src_2));
+#if CONFIG_AOM_HIGHBITDEPTH
+ }
+#endif // CONFIG_AOM_HIGHBITDEPTH
+ }
+ // Predict the bottom square sub-block.
+ predict_square_intra_block(xd, wpx, hpx, sub_tx_size, mode, src_2,
+ ref_stride, dst_2, dst_stride, col_off,
+ row_off_2, plane);
+ // Restore the last row of top square sub-block.
+ if (src_2 != dst_2 || ref_stride != dst_stride) {
+#if CONFIG_AOM_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint16_t *src_2_16 = CONVERT_TO_SHORTPTR(src_2);
+ memcpy(src_2_16 - ref_stride, tmp16, tx_width * sizeof(*src_2_16));
+ } else {
+#endif // CONFIG_AOM_HIGHBITDEPTH
+ memcpy(src_2 - ref_stride, tmp, tx_width * sizeof(*src_2));
+#if CONFIG_AOM_HIGHBITDEPTH
+ }
+#endif // CONFIG_AOM_HIGHBITDEPTH
+ }
+ }
+ } else { // tx_width > tx_height
+ assert(tx_width == (tx_height << 1));
+ // Predict the left square sub-block
+ predict_square_intra_block(xd, wpx, hpx, sub_tx_size, mode, ref,
+ ref_stride, dst, dst_stride, col_off, row_off,
+ plane);
+ {
+ int i;
+ const int half_tx_width = tx_width >> 1;
+ const int half_txw_unit = tx_size_wide_unit[tx_size] >> 1;
+ // Cast away const to modify 'ref' temporarily; will be restored later.
+ uint8_t *src_2 = (uint8_t *)ref + half_tx_width;
+ uint8_t *dst_2 = dst + half_tx_width;
+ const int col_off_2 = col_off + half_txw_unit;
+ // Save the last column of left square sub-block as 'left' column for
+ // right square sub-block.
+ if (src_2 != dst_2 || ref_stride != dst_stride) {
+#if CONFIG_AOM_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint16_t *src_2_16 = CONVERT_TO_SHORTPTR(src_2);
+ uint16_t *dst_2_16 = CONVERT_TO_SHORTPTR(dst_2);
+ for (i = 0; i < tx_height; ++i) {
+ tmp16[i] = src_2_16[i * ref_stride - 1];
+ src_2_16[i * ref_stride - 1] = dst_2_16[i * dst_stride - 1];
+ }
+ } else {
+#endif // CONFIG_AOM_HIGHBITDEPTH
+ for (i = 0; i < tx_height; ++i) {
+ tmp[i] = src_2[i * ref_stride - 1];
+ src_2[i * ref_stride - 1] = dst_2[i * dst_stride - 1];
+ }
+#if CONFIG_AOM_HIGHBITDEPTH
+ }
+#endif // CONFIG_AOM_HIGHBITDEPTH
+ }
+ // Predict the right square sub-block.
+ predict_square_intra_block(xd, wpx, hpx, sub_tx_size, mode, src_2,
+ ref_stride, dst_2, dst_stride, col_off_2,
+ row_off, plane);
+ // Restore the last column of left square sub-block.
+ if (src_2 != dst_2 || ref_stride != dst_stride) {
+#if CONFIG_AOM_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint16_t *src_2_16 = CONVERT_TO_SHORTPTR(src_2);
+ for (i = 0; i < tx_height; ++i) {
+ src_2_16[i * ref_stride - 1] = tmp16[i];
+ }
+ } else {
+#endif // CONFIG_AOM_HIGHBITDEPTH
+ for (i = 0; i < tx_height; ++i) {
+ src_2[i * ref_stride - 1] = tmp[i];
+ }
+#if CONFIG_AOM_HIGHBITDEPTH
+ }
+#endif // CONFIG_AOM_HIGHBITDEPTH
+ }
+ }
+ }
+#else
+ assert(0);
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+ }
+}
+
void av1_init_intra_predictors(void) {
once(av1_init_intra_predictors_internal);
}
diff --git a/av1/common/scan.c b/av1/common/scan.c
index b5cde7c..7522a72 100644
--- a/av1/common/scan.c
+++ b/av1/common/scan.c
@@ -47,7 +47,6 @@
17, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x8[32]) = {
0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29,
2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31,
@@ -57,14 +56,12 @@
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};
-#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, default_scan_8x4[32]) = {
0, 1, 8, 9, 2, 16, 10, 17, 18, 3, 24, 11, 25, 19, 26, 4,
12, 27, 20, 5, 28, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x4[32]) = {
0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27,
4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
@@ -74,7 +71,6 @@
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};
-#endif
DECLARE_ALIGNED(16, static const int16_t, default_scan_4x16[64]) = {
0, 1, 4, 2, 5, 8, 3, 6, 9, 12, 7, 10, 13, 16, 11, 14,
@@ -306,7 +302,6 @@
122, 63, 78, 93, 108, 123, 79, 94, 109, 124, 95, 110, 125, 111, 126, 127,
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x16[128]) = {
0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120,
1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 105, 113, 121,
@@ -352,7 +347,6 @@
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127,
};
-#endif
DECLARE_ALIGNED(16, static const int16_t, default_scan_16x32[512]) = {
0, 1, 16, 2, 17, 32, 3, 18, 33, 48, 4, 19, 34, 49, 64,
@@ -430,7 +424,6 @@
510, 511,
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x32[512]) = {
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224,
240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464,
@@ -579,7 +572,6 @@
495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,
510, 511,
};
-#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = {
0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65,
@@ -1548,7 +1540,6 @@
24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0, 0
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mcol_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 4, 4, 8, 8, 12, 12, 16, 16, 20, 20, 24, 24, 0,
@@ -1564,7 +1555,6 @@
13, 16, 14, 17, 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21,
24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0, 0
};
-#endif
DECLARE_ALIGNED(16, static const int16_t,
default_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
@@ -1574,7 +1564,6 @@
13, 14, 21, 22, 29, 6, 6, 7, 14, 15, 22, 23, 30, 0, 0
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mcol_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 8, 8, 16, 16, 0, 0, 1, 8, 9, 16, 17, 24, 1,
@@ -1590,7 +1579,6 @@
9, 16, 10, 17, 11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 16, 17,
24, 18, 25, 19, 26, 20, 27, 21, 28, 22, 29, 23, 30, 0, 0
};
-#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
default_scan_4x16_neighbors[65 * MAX_NEIGHBORS]) = {
@@ -1995,7 +1983,6 @@
126, 0, 0
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mcol_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 8, 8, 16, 16, 24, 24, 32, 32, 40, 40, 48, 48,
@@ -2081,7 +2068,6 @@
104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111,
126, 0, 0
};
-#endif
DECLARE_ALIGNED(16, static const int16_t,
default_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = {
@@ -2229,7 +2215,6 @@
478, 509, 479, 510, 0, 0
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mcol_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96,
@@ -2521,7 +2506,6 @@
501, 471, 502, 472, 503, 473, 504, 474, 505, 475, 506, 476, 507, 477, 508,
478, 509, 479, 510, 0, 0
};
-#endif // CONFIG_EXT_TX
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
@@ -4297,7 +4281,6 @@
15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x8[32]) = {
0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27,
4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
@@ -4307,14 +4290,12 @@
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};
-#endif
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x4[32]) = {
0, 1, 4, 9, 15, 19, 24, 28, 2, 3, 6, 11, 16, 21, 25, 29,
5, 7, 8, 13, 18, 22, 26, 30, 10, 12, 14, 17, 20, 23, 27, 31,
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x4[32]) = {
0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29,
2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31,
@@ -4324,7 +4305,6 @@
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};
-#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x16[64]) = {
0, 1, 3, 6, 2, 4, 7, 10, 5, 8, 11, 14, 9, 12, 15, 18,
@@ -4554,7 +4534,6 @@
35, 43, 51, 59, 67, 75, 83, 91, 99, 106, 112, 117, 121, 124, 126, 127,
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x16[128]) = {
0, 16, 32, 48, 64, 80, 96, 112, 1, 17, 33, 49, 65, 81, 97, 113,
2, 18, 34, 50, 66, 82, 98, 114, 3, 19, 35, 51, 67, 83, 99, 115,
@@ -4600,7 +4579,6 @@
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127,
};
-#endif
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x32[512]) = {
0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105,
@@ -4678,7 +4656,6 @@
510, 511,
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x32[512]) = {
0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480,
1, 33, 65, 97, 129, 161, 193, 225, 257, 289, 321, 353, 385, 417, 449, 481,
@@ -4828,8 +4805,6 @@
510, 511,
};
-#endif // CONFIG_EXT_TX
-
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x16[256]) = {
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
@@ -5756,7 +5731,7 @@
#endif // CONFIG_TX64X64
};
-const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+const SCAN_ORDER av1_intra_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
#if CONFIG_CB4X4
{
// TX_2X2
@@ -5909,8 +5884,162 @@
{ default_scan_64x64, av1_default_iscan_64x64,
default_scan_64x64_neighbors },
#endif // CONFIG_EXT_TX
- }
+ },
#endif // CONFIG_TX64X64
+ {
+ // TX_4X8
+ { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
+ { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+ { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
+ { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
+#if CONFIG_EXT_TX
+ { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
+ { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
+ { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
+ { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
+ { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
+ { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+ { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+ { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
+ { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+ { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
+ { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+ { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
+#endif // CONFIG_EXT_TX
+ },
+ {
+ // TX_8X4
+ { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
+ { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+ { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
+ { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
+#if CONFIG_EXT_TX
+ { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
+ { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
+ { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
+ { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
+ { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
+ { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+ { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+ { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
+ { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+ { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
+ { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+ { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
+#endif // CONFIG_EXT_TX
+ },
+ {
+ // TX_8X16
+ { default_scan_8x16, av1_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+ { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
+ { default_scan_8x16, av1_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+#if CONFIG_EXT_TX
+ { default_scan_8x16, av1_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { default_scan_8x16, av1_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { default_scan_8x16, av1_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { default_scan_8x16, av1_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { default_scan_8x16, av1_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+ { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+ { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
+ { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+ { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
+ { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+ { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
+#endif // CONFIG_EXT_TX
+ },
+ {
+ // TX_16X8
+ { default_scan_16x8, av1_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+ { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
+ { default_scan_16x8, av1_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+#if CONFIG_EXT_TX
+ { default_scan_16x8, av1_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { default_scan_16x8, av1_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { default_scan_16x8, av1_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { default_scan_16x8, av1_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { default_scan_16x8, av1_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+ { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+ { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
+ { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+ { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
+ { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+ { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
+#endif // CONFIG_EXT_TX
+ },
+ {
+ // TX_16X32
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+ { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+#if CONFIG_EXT_TX
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+ { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+ { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
+ { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+ { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
+ { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+ { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
+#endif // CONFIG_EXT_TX
+ },
+ {
+ // TX_32X16
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+ { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+#if CONFIG_EXT_TX
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+ { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+ { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
+ { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+ { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
+ { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+ { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
+#endif // CONFIG_EXT_TX
+ },
};
const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
diff --git a/av1/common/scan.h b/av1/common/scan.h
index 71868d0..9047359 100644
--- a/av1/common/scan.h
+++ b/av1/common/scan.h
@@ -26,7 +26,7 @@
#define MAX_NEIGHBORS 2
extern const SCAN_ORDER av1_default_scan_orders[TX_SIZES];
-extern const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES];
+extern const SCAN_ORDER av1_intra_scan_orders[TX_SIZES_ALL][TX_TYPES];
extern const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES];
#if CONFIG_ADAPT_SCAN
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 48c0f5f..7d822ae 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -466,7 +466,7 @@
PREDICTION_MODE mode = (plane == 0) ? mbmi->mode : mbmi->uv_mode;
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
uint8_t *dst;
- int block_idx = (row << 1) + col;
+ const int block_idx = (row << 1) + col;
#if CONFIG_PVQ
(void)cm;
(void)r;
@@ -475,7 +475,7 @@
#if !CONFIG_CB4X4
if (mbmi->sb_type < BLOCK_8X8)
- if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode;
+ if (plane == 0) mode = xd->mi[0]->bmi[block_idx].as_mode;
#endif
av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mode, dst,
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 7c59417..84de6aa 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -384,45 +384,32 @@
int tx_size_cat, aom_reader *r) {
FRAME_COUNTS *counts = xd->counts;
const int ctx = get_tx_size_context(xd);
- int depth = aom_read_tree(r, av1_tx_size_tree[tx_size_cat],
- cm->fc->tx_size_probs[tx_size_cat][ctx], ACCT_STR);
- TX_SIZE tx_size = depth_to_tx_size(depth);
+ const int depth =
+ aom_read_tree(r, av1_tx_size_tree[tx_size_cat],
+ cm->fc->tx_size_probs[tx_size_cat][ctx], ACCT_STR);
+ const TX_SIZE tx_size = depth_to_tx_size(depth);
+#if CONFIG_RECT_TX
+ assert(!is_rect_tx(tx_size));
+#endif // CONFIG_RECT_TX
if (counts) ++counts->tx_size[tx_size_cat][ctx][depth];
return tx_size;
}
-static TX_SIZE read_tx_size_intra(AV1_COMMON *cm, MACROBLOCKD *xd,
- aom_reader *r) {
- TX_MODE tx_mode = cm->tx_mode;
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return TX_4X4;
- if (bsize >= BLOCK_8X8) {
- if (tx_mode == TX_MODE_SELECT) {
- const TX_SIZE tx_size =
- read_selected_tx_size(cm, xd, intra_tx_size_cat_lookup[bsize], r);
- assert(tx_size <= max_txsize_lookup[bsize]);
- return tx_size;
- } else {
- return tx_size_from_tx_mode(bsize, cm->tx_mode, 0);
- }
- } else {
- return TX_4X4;
- }
-}
-
-static TX_SIZE read_tx_size_inter(AV1_COMMON *cm, MACROBLOCKD *xd,
- int allow_select, aom_reader *r) {
- TX_MODE tx_mode = cm->tx_mode;
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+static TX_SIZE read_tx_size(AV1_COMMON *cm, MACROBLOCKD *xd, int is_inter,
+ int allow_select_inter, aom_reader *r) {
+ const TX_MODE tx_mode = cm->tx_mode;
+ const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return TX_4X4;
#if CONFIG_CB4X4 && CONFIG_VAR_TX
if (bsize > BLOCK_4X4) {
#else
if (bsize >= BLOCK_8X8) {
-#endif
- if (allow_select && tx_mode == TX_MODE_SELECT) {
+#endif // CONFIG_CB4X4 && CONFIG_VAR_TX
+ if ((!is_inter || allow_select_inter) && tx_mode == TX_MODE_SELECT) {
+ const int32_t tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
+ : intra_tx_size_cat_lookup[bsize];
const TX_SIZE coded_tx_size =
- read_selected_tx_size(cm, xd, inter_tx_size_cat_lookup[bsize], r);
+ read_selected_tx_size(cm, xd, tx_size_cat, r);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
if (coded_tx_size > max_txsize_lookup[bsize]) {
assert(coded_tx_size == max_txsize_lookup[bsize] + 1);
@@ -433,7 +420,7 @@
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
return coded_tx_size;
} else {
- return tx_size_from_tx_mode(bsize, cm->tx_mode, 1);
+ return tx_size_from_tx_mode(bsize, tx_mode, is_inter);
}
} else {
#if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -441,7 +428,7 @@
return max_txsize_rect_lookup[bsize];
#else
return TX_4X4;
-#endif
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
}
}
@@ -711,6 +698,7 @@
#endif
if (!FIXED_TX_TYPE) {
#if CONFIG_EXT_TX
+ const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
if (get_ext_tx_types(tx_size, mbmi->sb_type, inter_block) > 1 &&
cm->base_qindex > 0 && !mbmi->skip &&
#if CONFIG_SUPERTX
@@ -724,19 +712,19 @@
if (eset > 0) {
mbmi->tx_type = aom_read_tree(
r, av1_ext_tx_inter_tree[eset],
- cm->fc->inter_ext_tx_prob[eset][txsize_sqr_map[tx_size]],
- ACCT_STR);
+ cm->fc->inter_ext_tx_prob[eset][square_tx_size], ACCT_STR);
if (counts)
- ++counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]]
- [mbmi->tx_type];
+ ++counts->inter_ext_tx[eset][square_tx_size][mbmi->tx_type];
}
} else if (ALLOW_INTRA_EXT_TX) {
if (eset > 0) {
mbmi->tx_type = aom_read_tree(
r, av1_ext_tx_intra_tree[eset],
- cm->fc->intra_ext_tx_prob[eset][tx_size][mbmi->mode], ACCT_STR);
+ cm->fc->intra_ext_tx_prob[eset][square_tx_size][mbmi->mode],
+ ACCT_STR);
if (counts)
- ++counts->intra_ext_tx[eset][tx_size][mbmi->mode][mbmi->tx_type];
+ ++counts->intra_ext_tx[eset][square_tx_size][mbmi->mode]
+ [mbmi->tx_type];
}
}
} else {
@@ -807,7 +795,7 @@
}
#endif
- mbmi->tx_size = read_tx_size_intra(cm, xd, r);
+ mbmi->tx_size = read_tx_size(cm, xd, 0, 1, r);
mbmi->ref_frame[0] = INTRA_FRAME;
mbmi->ref_frame[1] = NONE;
@@ -1967,10 +1955,7 @@
read_tx_size_vartx(cm, xd, mbmi, xd->counts, max_tx_size,
height != width, idy, idx, r);
} else {
- if (inter_block)
- mbmi->tx_size = read_tx_size_inter(cm, xd, !mbmi->skip, r);
- else
- mbmi->tx_size = read_tx_size_intra(cm, xd, r);
+ mbmi->tx_size = read_tx_size(cm, xd, inter_block, !mbmi->skip, r);
if (inter_block) {
const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
@@ -1984,10 +1969,7 @@
set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, mbmi->skip, xd);
}
#else
- if (inter_block)
- mbmi->tx_size = read_tx_size_inter(cm, xd, !mbmi->skip, r);
- else
- mbmi->tx_size = read_tx_size_intra(cm, xd, r);
+ mbmi->tx_size = read_tx_size(cm, xd, inter_block, !mbmi->skip, r);
#endif // CONFIG_VAR_TX
#if CONFIG_SUPERTX
}
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index d18c8ca..1545aae 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1169,9 +1169,10 @@
const TX_SIZE tx_size = is_inter ? mbmi->min_tx_size : mbmi->tx_size;
#else
const TX_SIZE tx_size = mbmi->tx_size;
-#endif
+#endif // CONFIG_VAR_TX
if (!FIXED_TX_TYPE) {
#if CONFIG_EXT_TX
+ const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
const BLOCK_SIZE bsize = mbmi->sb_type;
if (get_ext_tx_types(tx_size, bsize, is_inter) > 1 && cm->base_qindex > 0 &&
!mbmi->skip &&
@@ -1182,16 +1183,19 @@
int eset = get_ext_tx_set(tx_size, bsize, is_inter);
if (is_inter) {
assert(ext_tx_used_inter[eset][mbmi->tx_type]);
- if (eset > 0)
- av1_write_token(
- w, av1_ext_tx_inter_tree[eset],
- cm->fc->inter_ext_tx_prob[eset][txsize_sqr_map[tx_size]],
- &ext_tx_inter_encodings[eset][mbmi->tx_type]);
+ if (eset > 0) {
+ av1_write_token(w, av1_ext_tx_inter_tree[eset],
+ cm->fc->inter_ext_tx_prob[eset][square_tx_size],
+ &ext_tx_inter_encodings[eset][mbmi->tx_type]);
+ }
} else if (ALLOW_INTRA_EXT_TX) {
- if (eset > 0)
- av1_write_token(w, av1_ext_tx_intra_tree[eset],
- cm->fc->intra_ext_tx_prob[eset][tx_size][mbmi->mode],
- &ext_tx_intra_encodings[eset][mbmi->tx_type]);
+ assert(ext_tx_used_intra[eset][mbmi->tx_type]);
+ if (eset > 0) {
+ av1_write_token(
+ w, av1_ext_tx_intra_tree[eset],
+ cm->fc->intra_ext_tx_prob[eset][square_tx_size][mbmi->mode],
+ &ext_tx_intra_encodings[eset][mbmi->tx_type]);
+ }
}
}
#else
@@ -2641,6 +2645,9 @@
unsigned int(*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
cpi->common.counts.eob_branch[tx_size];
int i, j, k, l, m;
+#if CONFIG_RECT_TX
+ assert(!is_rect_tx(tx_size));
+#endif // CONFIG_RECT_TX
for (i = 0; i < PLANE_TYPES; ++i) {
for (j = 0; j < REF_TYPES; ++j) {
@@ -2679,6 +2686,9 @@
#else
const int probwt = 1;
#endif
+#if CONFIG_RECT_TX
+ assert(!is_rect_tx(tx_size));
+#endif // CONFIG_RECT_TX
switch (cpi->sf.use_fast_coef_updates) {
case TWO_LOOP: {
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index b8e886b..5e29d9a 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -204,6 +204,27 @@
#endif
};
+// Converts block_index for given transform size to index of the block in raster
+// order.
+static inline int av1_block_index_to_raster_order(TX_SIZE tx_size,
+ int block_idx) {
+ // For transform size 4x8, the possible block_idx values are 0 & 2, because
+ // block_idx values are incremented in steps of size 'tx_width_unit x
+ // tx_height_unit'. But, for this transform size, block_idx = 2 corresponds to
+ // block number 1 in raster order, inside an 8x8 MI block.
+ // For any other transform size, the two indices are equivalent.
+ return (tx_size == TX_4X8 && block_idx == 2) ? 1 : block_idx;
+}
+
+// Inverse of above function.
+// Note: only implemented for transform sizes 4x4, 4x8 and 8x4 right now.
+static inline int av1_raster_order_to_block_index(TX_SIZE tx_size,
+ int raster_order) {
+ assert(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4);
+ // We ensure that block indices are 0 & 2 if tx size is 4x8 or 8x4.
+ return (tx_size == TX_4X4) ? raster_order : (raster_order > 0) ? 2 : 0;
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 69b1a02..c5a9c41 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5581,13 +5581,17 @@
} else {
intra_tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode, 1);
}
-#if CONFIG_EXT_TX && CONFIG_RECT_TX
- ++td->counts->tx_size_implied[max_txsize_lookup[bsize]]
- [txsize_sqr_up_map[tx_size]];
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
} else {
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ intra_tx_size = tx_size;
+#else
intra_tx_size = (bsize >= BLOCK_8X8) ? tx_size : TX_4X4;
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
}
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ ++td->counts->tx_size_implied[max_txsize_lookup[bsize]]
+ [txsize_sqr_up_map[tx_size]];
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
for (j = 0; j < mi_height; j++)
for (i = 0; i < mi_width; i++)
@@ -5613,7 +5617,8 @@
++td->counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]]
[mbmi->tx_type];
} else {
- ++td->counts->intra_ext_tx[eset][tx_size][mbmi->mode][mbmi->tx_type];
+ ++td->counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][mbmi->mode]
+ [mbmi->tx_type];
}
}
}
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index a869f82..a031387 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -101,7 +101,7 @@
av1_token_state tokens[MAX_TX_SQUARE + 1][2];
unsigned best_index[MAX_TX_SQUARE + 1][2];
uint8_t token_cache[MAX_TX_SQUARE];
- const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
+ const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
const int eob = p->eobs[block];
@@ -109,7 +109,8 @@
const int default_eob = tx_size_2d[tx_size];
const int16_t *const dequant_ptr = pd->dequant;
const uint8_t *const band_translate = get_band_translate(tx_size);
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block);
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block_raster_idx, tx_size);
const SCAN_ORDER *const scan_order =
get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
const int16_t *const scan = scan_order->scan;
@@ -486,7 +487,8 @@
struct macroblockd_plane *const pd = &xd->plane[plane];
#endif
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block);
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block_raster_idx, tx_size);
const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, is_inter);
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
@@ -626,8 +628,9 @@
uint8_t *dst;
ENTROPY_CONTEXT *a, *l;
INV_TXFM_PARAM inv_txfm_param;
+ const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block);
#if CONFIG_PVQ
- int tx_blk_size;
+ int tx_width_pixels, tx_height_pixels;
int i, j;
#endif
#if CONFIG_VAR_TX
@@ -690,18 +693,20 @@
if (x->pvq_skip[plane]) return;
// transform block size in pixels
- tx_blk_size = tx_size_wide[tx_size];
+ tx_width_pixels = tx_size_wide[tx_size];
+ tx_height_pixels = tx_size_high[tx_size];
// Since av1 does not have separate function which does inverse transform
// but av1_inv_txfm_add_*x*() also does addition of predicted image to
// inverse transformed image,
// pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros
- for (j = 0; j < tx_blk_size; j++)
- for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0;
+ for (j = 0; j < tx_height_pixels; j++)
+ for (i = 0; i < tx_width_pixels; i++) dst[j * pd->dst.stride + i] = 0;
#endif
// inverse transform parameters
- inv_txfm_param.tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
+ inv_txfm_param.tx_type =
+ get_tx_type(pd->plane_type, xd, block_raster_idx, tx_size);
inv_txfm_param.tx_size = tx_size;
inv_txfm_param.eob = p->eobs[block];
inv_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
@@ -927,7 +932,9 @@
struct macroblockd_plane *const pd = &xd->plane[plane];
tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
- const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block);
+ const TX_TYPE tx_type =
+ get_tx_type(plane_type, xd, block_raster_idx, tx_size);
PREDICTION_MODE mode;
const int diff_stride = block_size_wide[plane_bsize];
uint8_t *src, *dst;
@@ -945,13 +952,11 @@
int i, j;
#endif
- assert(tx1d_width == tx1d_height);
-
dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
src = &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
src_diff =
&p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
- mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
+ mode = (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode;
av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mode, dst,
dst_stride, dst, dst_stride, blk_col, blk_row, plane);
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 9418017..5369e2b 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1076,8 +1076,11 @@
const PLANE_TYPE plane_type = plane == 0 ? PLANE_TYPE_Y : PLANE_TYPE_UV;
INV_TXFM_PARAM inv_txfm_param;
+ const int block_raster_idx =
+ av1_block_index_to_raster_order(tx_size, block);
- inv_txfm_param.tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ inv_txfm_param.tx_type =
+ get_tx_type(plane_type, xd, block_raster_idx, tx_size);
inv_txfm_param.tx_size = tx_size;
inv_txfm_param.eob = eob;
inv_txfm_param.lossless = xd->lossless[mbmi->segment_id];
@@ -1360,6 +1363,29 @@
}
#endif // CONFIG_SUPERTX
+static int tx_size_cost(const AV1_COMP *const cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, TX_SIZE tx_size) {
+ const AV1_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+
+ const int tx_select =
+ cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
+
+ if (tx_select) {
+ const int is_inter = is_inter_block(mbmi);
+ const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
+ : intra_tx_size_cat_lookup[bsize];
+ const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
+ const int depth = tx_size_to_depth(coded_tx_size);
+ const int tx_size_ctx = get_tx_size_context(xd);
+ const int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
+ return r_tx_size;
+ } else {
+ return 0;
+ }
+}
+
static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs,
TX_TYPE tx_type, int tx_size) {
@@ -1370,16 +1396,10 @@
aom_prob skip_prob = av1_get_skip_prob(cm, xd);
int s0, s1;
const int is_inter = is_inter_block(mbmi);
-
- const int tx_size_cat =
- is_inter ? inter_tx_size_cat_lookup[bs] : intra_tx_size_cat_lookup[bs];
- const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
- const int depth = tx_size_to_depth(coded_tx_size);
const int tx_select =
cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
- const int tx_size_ctx = tx_select ? get_tx_size_context(xd) : 0;
- const int r_tx_size =
- tx_select ? cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth] : 0;
+
+ const int r_tx_size = tx_size_cost(cpi, x, bs, tx_size);
assert(skip_prob > 0);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -1405,8 +1425,9 @@
[mbmi->tx_type];
} else {
if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
- rd_stats->rate += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
- [mbmi->mode][mbmi->tx_type];
+ rd_stats->rate +=
+ cpi->intra_tx_type_costs[ext_tx_set][txsize_sqr_map[mbmi->tx_size]]
+ [mbmi->mode][mbmi->tx_type];
}
}
#else
@@ -1468,6 +1489,7 @@
#endif // CONFIG_RECT_TX
int ext_tx_set;
#endif // CONFIG_EXT_TX
+ assert(bs >= BLOCK_8X8);
if (tx_select) {
#if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -1494,8 +1516,9 @@
if (evaluate_rect_tx) {
const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
RD_STATS this_rd_stats;
- ext_tx_set = get_ext_tx_set(rect_tx_size, bs, 1);
- if (ext_tx_used_inter[ext_tx_set][tx_type]) {
+ ext_tx_set = get_ext_tx_set(rect_tx_size, bs, is_inter);
+ if ((is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) ||
+ (!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) {
rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type,
rect_tx_size);
best_tx_size = rect_tx_size;
@@ -1651,13 +1674,15 @@
if (is_inter) {
if (ext_tx_set > 0)
this_rd_stats.rate +=
- cpi->inter_tx_type_costs[ext_tx_set][mbmi->tx_size]
+ cpi->inter_tx_type_costs[ext_tx_set]
+ [txsize_sqr_map[mbmi->tx_size]]
[mbmi->tx_type];
} else {
if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
this_rd_stats.rate +=
- cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size][mbmi->mode]
- [mbmi->tx_type];
+ cpi->intra_tx_type_costs[ext_tx_set]
+ [txsize_sqr_map[mbmi->tx_size]]
+ [mbmi->mode][mbmi->tx_type];
}
}
@@ -1977,10 +2002,7 @@
}
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) {
- tokenonly_rd_stats.rate -=
- cpi->tx_size_cost[max_txsize_lookup[bsize] - TX_8X8]
- [get_tx_size_context(xd)]
- [tx_size_to_depth(mbmi->tx_size)];
+ tokenonly_rd_stats.rate -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
}
if (this_rd < *best_rd) {
*best_rd = this_rd;
@@ -2005,11 +2027,48 @@
}
#endif // CONFIG_PALETTE
-static int64_t rd_pick_intra4x4block(
+// Wrappers to make function pointers usable.
+static void inv_txfm_add_4x8_wrapper(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, TX_TYPE tx_type,
+ int lossless) {
+ (void)lossless;
+ av1_inv_txfm_add_4x8(input, dest, stride, eob, tx_type);
+}
+
+static void inv_txfm_add_8x4_wrapper(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, TX_TYPE tx_type,
+ int lossless) {
+ (void)lossless;
+ av1_inv_txfm_add_8x4(input, dest, stride, eob, tx_type);
+}
+
+typedef void (*inv_txfm_func_ptr)(const tran_low_t *, uint8_t *, int, int,
+ TX_TYPE, int);
+#if CONFIG_AOM_HIGHBITDEPTH
+
+void highbd_inv_txfm_add_4x8_wrapper(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type, int is_lossless) {
+ (void)is_lossless;
+ av1_highbd_inv_txfm_add_4x8(input, dest, stride, eob, bd, tx_type);
+}
+
+void highbd_inv_txfm_add_8x4_wrapper(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type, int is_lossless) {
+ (void)is_lossless;
+ av1_highbd_inv_txfm_add_8x4(input, dest, stride, eob, bd, tx_type);
+}
+
+typedef void (*highbd_inv_txfm_func_ptr)(const tran_low_t *, uint8_t *, int,
+ int, int, TX_TYPE, int);
+#endif // CONFIG_AOM_HIGHBITDEPTH
+
+static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
const AV1_COMP *const cpi, MACROBLOCK *x, int row, int col,
PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion,
- BLOCK_SIZE bsize, int *y_skip, int64_t rd_thresh) {
+ BLOCK_SIZE bsize, TX_SIZE tx_size, int *y_skip, int64_t rd_thresh) {
const AV1_COMMON *const cm = &cpi->common;
PREDICTION_MODE mode;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -2029,14 +2088,38 @@
ENTROPY_CONTEXT ta[2], tempa[2];
ENTROPY_CONTEXT tl[2], templ[2];
#endif
- const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
- const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+
+ const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
+ const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
+ const int tx_width_unit = tx_size_wide_unit[tx_size];
+ const int tx_height_unit = tx_size_high_unit[tx_size];
+ const int pred_block_width = block_size_wide[bsize];
+ const int pred_block_height = block_size_high[bsize];
+ const int tx_width = tx_size_wide[tx_size];
+ const int tx_height = tx_size_high[tx_size];
+ const int pred_width_in_transform_blocks = pred_block_width / tx_width;
+ const int pred_height_in_transform_blocks = pred_block_height / tx_height;
int idx, idy;
int best_can_skip = 0;
uint8_t best_dst[8 * 8];
+ inv_txfm_func_ptr inv_txfm_func =
+ (tx_size == TX_4X4) ? av1_inv_txfm_add_4x4
+ : (tx_size == TX_4X8) ? inv_txfm_add_4x8_wrapper
+ : inv_txfm_add_8x4_wrapper;
#if CONFIG_AOM_HIGHBITDEPTH
uint16_t best_dst16[8 * 8];
+ highbd_inv_txfm_func_ptr highbd_inv_txfm_func =
+ (tx_size == TX_4X4)
+ ? av1_highbd_inv_txfm_add_4x4
+ : (tx_size == TX_4X8) ? highbd_inv_txfm_add_4x8_wrapper
+ : highbd_inv_txfm_add_8x4_wrapper;
#endif
+ const int is_lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ const int sub_bsize = bsize;
+#else
+ const int sub_bsize = BLOCK_4X4;
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
#if CONFIG_PVQ
od_rollback_buffer pre_buf, post_buf;
@@ -2044,9 +2127,19 @@
od_encode_checkpoint(&x->daala_enc, &post_buf);
#endif
- memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
- memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
- xd->mi[0]->mbmi.tx_size = TX_4X4;
+ assert(bsize < BLOCK_8X8);
+ assert(tx_width < 8 || tx_height < 8);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ assert(tx_width == pred_block_width && tx_height == pred_block_height);
+#else
+ assert(tx_width == 4 && tx_height == 4);
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+
+ memcpy(ta, a, pred_width_in_transform_blocks * sizeof(a[0]));
+ memcpy(tl, l, pred_height_in_transform_blocks * sizeof(l[0]));
+
+ xd->mi[0]->mbmi.tx_size = tx_size;
+
#if CONFIG_PALETTE
xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
#endif // CONFIG_PALETTE
@@ -2060,7 +2153,9 @@
int rate = bmode_costs[mode];
int can_skip = 1;
- if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
+ if (!(cpi->sf.intra_y_mode_mask[txsize_sqr_up_map[tx_size]] &
+ (1 << mode)))
+ continue;
// Only do the oblique modes if the best so far is
// one of the neighboring directional modes
@@ -2068,70 +2163,97 @@
if (conditional_skipintra(mode, *best_mode)) continue;
}
- memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
- memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
+ memcpy(tempa, ta, pred_width_in_transform_blocks * sizeof(ta[0]));
+ memcpy(templ, tl, pred_height_in_transform_blocks * sizeof(tl[0]));
- for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
- for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
- const int block = (row + idy) * 2 + (col + idx);
+ for (idy = 0; idy < pred_height_in_transform_blocks; ++idy) {
+ for (idx = 0; idx < pred_width_in_transform_blocks; ++idx) {
+ const int block_raster_idx = (row + idy) * 2 + (col + idx);
+ const int block =
+ av1_raster_order_to_block_index(tx_size, block_raster_idx);
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
- int16_t *const src_diff =
- av1_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
- xd->mi[0]->bmi[block].as_mode = mode;
- av1_predict_intra_block(xd, pd->width, pd->height, TX_4X4, mode, dst,
+ int16_t *const src_diff = av1_raster_block_offset_int16(
+ BLOCK_8X8, block_raster_idx, p->src_diff);
+ int skip;
+ assert(block < 4);
+ assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
+ idx == 0 && idy == 0));
+ assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
+ block == 0 || block == 2));
+ xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
+ av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mode, dst,
dst_stride, dst, dst_stride, col + idx,
row + idy, 0);
- aom_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst,
- dst_stride, xd->bd);
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
- const SCAN_ORDER *scan_order = get_scan(cm, TX_4X4, tx_type, 0);
+ aom_highbd_subtract_block(tx_height, tx_width, src_diff, 8, src,
+ src_stride, dst, dst_stride, xd->bd);
+ if (is_lossless) {
+ TX_TYPE tx_type =
+ get_tx_type(PLANE_TYPE_Y, xd, block_raster_idx, tx_size);
+ const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
const int coeff_ctx =
- combine_entropy_contexts(*(tempa + idx), *(templ + idy));
+ combine_entropy_contexts(tempa[idx], templ[idy]);
#if CONFIG_NEW_QUANT
av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
- TX_4X4, coeff_ctx, AV1_XFORM_QUANT_FP_NUQ);
+ tx_size, coeff_ctx, AV1_XFORM_QUANT_FP_NUQ);
#else
av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
- TX_4X4, coeff_ctx, AV1_XFORM_QUANT_FP);
+ tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
#endif // CONFIG_NEW_QUANT
- ratey += av1_cost_coeffs(cm, x, 0, block, coeff_ctx, TX_4X4,
+ ratey += av1_cost_coeffs(cm, x, 0, block, coeff_ctx, tx_size,
scan_order->scan, scan_order->neighbors,
cpi->sf.use_fast_coef_costing);
- *(tempa + idx) = !(p->eobs[block] == 0);
- *(templ + idy) = !(p->eobs[block] == 0);
- can_skip &= (p->eobs[block] == 0);
+ skip = (p->eobs[block] == 0);
+ can_skip &= skip;
+ tempa[idx] = !skip;
+ templ[idy] = !skip;
+#if CONFIG_EXT_TX
+ if (tx_size == TX_8X4) {
+ tempa[idx + 1] = tempa[idx];
+ } else if (tx_size == TX_4X8) {
+ templ[idy + 1] = templ[idy];
+ }
+#endif // CONFIG_EXT_TX
+
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next_highbd;
- av1_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
- dst_stride, p->eobs[block], xd->bd,
- DCT_DCT, 1);
+ highbd_inv_txfm_func(BLOCK_OFFSET(pd->dqcoeff, block), dst,
+ dst_stride, p->eobs[block], xd->bd, DCT_DCT,
+ 1);
} else {
int64_t dist;
unsigned int tmp;
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
- const SCAN_ORDER *scan_order = get_scan(cm, TX_4X4, tx_type, 0);
+ TX_TYPE tx_type =
+ get_tx_type(PLANE_TYPE_Y, xd, block_raster_idx, tx_size);
+ const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
const int coeff_ctx =
- combine_entropy_contexts(*(tempa + idx), *(templ + idy));
+ combine_entropy_contexts(tempa[idx], templ[idy]);
#if CONFIG_NEW_QUANT
av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
- TX_4X4, coeff_ctx, AV1_XFORM_QUANT_FP_NUQ);
+ tx_size, coeff_ctx, AV1_XFORM_QUANT_FP_NUQ);
#else
av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
- TX_4X4, coeff_ctx, AV1_XFORM_QUANT_FP);
+ tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
#endif // CONFIG_NEW_QUANT
- av1_optimize_b(cm, x, 0, block, TX_4X4, coeff_ctx);
- ratey += av1_cost_coeffs(cm, x, 0, block, coeff_ctx, TX_4X4,
+ av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx);
+ ratey += av1_cost_coeffs(cm, x, 0, block, coeff_ctx, tx_size,
scan_order->scan, scan_order->neighbors,
cpi->sf.use_fast_coef_costing);
- *(tempa + idx) = !(p->eobs[block] == 0);
- *(templ + idy) = !(p->eobs[block] == 0);
- can_skip &= (p->eobs[block] == 0);
- av1_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
- dst_stride, p->eobs[block], xd->bd,
- tx_type, 0);
- cpi->fn_ptr[BLOCK_4X4].vf(src, src_stride, dst, dst_stride, &tmp);
+ skip = (p->eobs[block] == 0);
+ can_skip &= skip;
+ tempa[idx] = !skip;
+ templ[idy] = !skip;
+#if CONFIG_EXT_TX
+ if (tx_size == TX_8X4) {
+ tempa[idx + 1] = tempa[idx];
+ } else if (tx_size == TX_4X8) {
+ templ[idy + 1] = templ[idy];
+ }
+#endif // CONFIG_EXT_TX
+ highbd_inv_txfm_func(BLOCK_OFFSET(pd->dqcoeff, block), dst,
+ dst_stride, p->eobs[block], xd->bd, tx_type,
+ 0);
+ cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
dist = (int64_t)tmp << 4;
distortion += dist;
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
@@ -2150,12 +2272,12 @@
best_rd = this_rd;
best_can_skip = can_skip;
*best_mode = mode;
- memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
- memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
- for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
+ memcpy(a, tempa, pred_width_in_transform_blocks * sizeof(tempa[0]));
+ memcpy(l, templ, pred_height_in_transform_blocks * sizeof(templ[0]));
+ for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy) {
memcpy(best_dst16 + idy * 8,
CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
- num_4x4_blocks_wide * 4 * sizeof(uint16_t));
+ pred_width_in_transform_blocks * 4 * sizeof(uint16_t));
}
}
next_highbd : {}
@@ -2165,9 +2287,10 @@
if (y_skip) *y_skip &= best_can_skip;
- for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
+ for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy) {
memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
- best_dst16 + idy * 8, num_4x4_blocks_wide * 4 * sizeof(uint16_t));
+ best_dst16 + idy * 8,
+ pred_width_in_transform_blocks * 4 * sizeof(uint16_t));
}
return best_rd;
@@ -2185,7 +2308,10 @@
int rate = bmode_costs[mode];
int can_skip = 1;
- if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
+ if (!(cpi->sf.intra_y_mode_mask[txsize_sqr_up_map[tx_size]] &
+ (1 << mode))) {
+ continue;
+ }
// Only do the oblique modes if the best so far is
// one of the neighboring directional modes
@@ -2193,25 +2319,29 @@
if (conditional_skipintra(mode, *best_mode)) continue;
}
- memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
- memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
+ memcpy(tempa, ta, pred_width_in_transform_blocks * sizeof(ta[0]));
+ memcpy(templ, tl, pred_height_in_transform_blocks * sizeof(tl[0]));
- for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
- for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
- int block = (row + idy) * 2 + (col + idx);
+ for (idy = 0; idy < pred_height_in_4x4_blocks; idy += tx_height_unit) {
+ for (idx = 0; idx < pred_width_in_4x4_blocks; idx += tx_width_unit) {
+ const int block_raster_idx = (row + idy) * 2 + (col + idx);
+ int block = av1_raster_order_to_block_index(tx_size, block_raster_idx);
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
#if !CONFIG_PVQ
- int16_t *const src_diff =
- av1_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
+ int16_t *const src_diff = av1_raster_block_offset_int16(
+ BLOCK_8X8, block_raster_idx, p->src_diff);
#else
- int i, j, tx_blk_size;
- int skip;
-
- tx_blk_size = 4;
+ int i, j;
#endif
- xd->mi[0]->bmi[block].as_mode = mode;
- av1_predict_intra_block(xd, pd->width, pd->height, TX_4X4, mode, dst,
+ int skip;
+ assert(block < 4);
+ assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
+ idx == 0 && idy == 0));
+ assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
+ block == 0 || block == 2));
+ xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
+ av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mode, dst,
dst_stride, dst, dst_stride,
#if CONFIG_CB4X4
2 * (col + idx), 2 * (row + idy),
@@ -2220,21 +2350,23 @@
#endif
0);
#if !CONFIG_PVQ
- aom_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
+ aom_subtract_block(tx_height, tx_width, src_diff, 8, src, src_stride,
+ dst, dst_stride);
#endif
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
- const SCAN_ORDER *scan_order = get_scan(cm, TX_4X4, tx_type, 0);
+ if (is_lossless) {
+ TX_TYPE tx_type =
+ get_tx_type(PLANE_TYPE_Y, xd, block_raster_idx, tx_size);
+ const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
const int coeff_ctx =
- combine_entropy_contexts(*(tempa + idx), *(templ + idy));
+ combine_entropy_contexts(tempa[idx], templ[idy]);
#if CONFIG_CB4X4
block = 4 * block;
#endif
#if !CONFIG_PVQ
#if CONFIG_NEW_QUANT
av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
- TX_4X4, coeff_ctx, AV1_XFORM_QUANT_B_NUQ);
+ tx_size, coeff_ctx, AV1_XFORM_QUANT_B_NUQ);
#else
av1_xform_quant(cm, x, 0, block,
#if CONFIG_CB4X4
@@ -2242,14 +2374,22 @@
#else
row + idy, col + idx,
#endif
- BLOCK_8X8, TX_4X4, coeff_ctx, AV1_XFORM_QUANT_B);
+ BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
#endif // CONFIG_NEW_QUANT
- ratey += av1_cost_coeffs(cm, x, 0, block, coeff_ctx, TX_4X4,
+ ratey += av1_cost_coeffs(cm, x, 0, block, coeff_ctx, tx_size,
scan_order->scan, scan_order->neighbors,
cpi->sf.use_fast_coef_costing);
- *(tempa + idx) = !(p->eobs[block] == 0);
- *(templ + idy) = !(p->eobs[block] == 0);
- can_skip &= (p->eobs[block] == 0);
+ skip = (p->eobs[block] == 0);
+ can_skip &= skip;
+ tempa[idx] = !skip;
+ templ[idy] = !skip;
+#if CONFIG_EXT_TX
+ if (tx_size == TX_8X4) {
+ tempa[idx + 1] = tempa[idx];
+ } else if (tx_size == TX_4X8) {
+ templ[idy + 1] = templ[idy];
+ }
+#endif // CONFIG_EXT_TX
#else
(void)scan_order;
@@ -2259,40 +2399,41 @@
#else
row + idy, col + idx,
#endif
- BLOCK_8X8, TX_4X4, coeff_ctx, AV1_XFORM_QUANT_B);
+ BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
ratey += x->rate;
skip = x->pvq_skip[0];
- *(tempa + idx) = !skip;
- *(templ + idy) = !skip;
+ tempa[idx] = !skip;
+ templ[idy] = !skip;
can_skip &= skip;
#endif
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next;
#if CONFIG_PVQ
if (!skip) {
- for (j = 0; j < tx_blk_size; j++)
- for (i = 0; i < tx_blk_size; i++) dst[j * dst_stride + i] = 0;
+ for (j = 0; j < tx_height; j++)
+ for (i = 0; i < tx_width; i++) dst[j * dst_stride + i] = 0;
#endif
- av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
- dst_stride, p->eobs[block], DCT_DCT, 1);
+ inv_txfm_func(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
+ p->eobs[block], DCT_DCT, 1);
#if CONFIG_PVQ
}
#endif
} else {
int64_t dist;
unsigned int tmp;
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
- const SCAN_ORDER *scan_order = get_scan(cm, TX_4X4, tx_type, 0);
+ TX_TYPE tx_type =
+ get_tx_type(PLANE_TYPE_Y, xd, block_raster_idx, tx_size);
+ const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
const int coeff_ctx =
- combine_entropy_contexts(*(tempa + idx), *(templ + idy));
+ combine_entropy_contexts(tempa[idx], templ[idy]);
#if CONFIG_CB4X4
block = 4 * block;
#endif
#if !CONFIG_PVQ
#if CONFIG_NEW_QUANT
av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
- TX_4X4, coeff_ctx, AV1_XFORM_QUANT_FP_NUQ);
+ tx_size, coeff_ctx, AV1_XFORM_QUANT_FP_NUQ);
#else
av1_xform_quant(cm, x, 0, block,
#if CONFIG_CB4X4
@@ -2300,15 +2441,23 @@
#else
row + idy, col + idx,
#endif
- BLOCK_8X8, TX_4X4, coeff_ctx, AV1_XFORM_QUANT_FP);
+ BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
#endif // CONFIG_NEW_QUANT
- av1_optimize_b(cm, x, 0, block, TX_4X4, coeff_ctx);
- ratey += av1_cost_coeffs(cm, x, 0, block, coeff_ctx, TX_4X4,
+ av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx);
+ ratey += av1_cost_coeffs(cm, x, 0, block, coeff_ctx, tx_size,
scan_order->scan, scan_order->neighbors,
cpi->sf.use_fast_coef_costing);
- *(tempa + idx) = !(p->eobs[block] == 0);
- *(templ + idy) = !(p->eobs[block] == 0);
- can_skip &= (p->eobs[block] == 0);
+ skip = (p->eobs[block] == 0);
+ can_skip &= skip;
+ tempa[idx] = !skip;
+ templ[idy] = !skip;
+#if CONFIG_EXT_TX
+ if (tx_size == TX_8X4) {
+ tempa[idx + 1] = tempa[idx];
+ } else if (tx_size == TX_4X8) {
+ templ[idy + 1] = templ[idy];
+ }
+#endif // CONFIG_EXT_TX
#else
(void)scan_order;
@@ -2318,25 +2467,25 @@
#else
row + idy, col + idx,
#endif
- BLOCK_8X8, TX_4X4, coeff_ctx, AV1_XFORM_QUANT_FP);
+ BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
ratey += x->rate;
skip = x->pvq_skip[0];
- *(tempa + idx) = !skip;
- *(templ + idy) = !skip;
+ tempa[idx] = !skip;
+ templ[idy] = !skip;
can_skip &= skip;
#endif
#if CONFIG_PVQ
if (!skip) {
- for (j = 0; j < tx_blk_size; j++)
- for (i = 0; i < tx_blk_size; i++) dst[j * dst_stride + i] = 0;
+ for (j = 0; j < tx_height; j++)
+ for (i = 0; i < tx_width; i++) dst[j * dst_stride + i] = 0;
#endif
- av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
- dst_stride, p->eobs[block], tx_type, 0);
+ inv_txfm_func(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
+ p->eobs[block], tx_type, 0);
#if CONFIG_PVQ
}
#endif
// No need for av1_block_error2_c because the ssz is unused
- cpi->fn_ptr[BLOCK_4X4].vf(src, src_stride, dst, dst_stride, &tmp);
+ cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
dist = (int64_t)tmp << 4;
distortion += dist;
// To use the pixel domain distortion, the step below needs to be
@@ -2358,14 +2507,14 @@
best_rd = this_rd;
best_can_skip = can_skip;
*best_mode = mode;
- memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
- memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
+ memcpy(a, tempa, pred_width_in_transform_blocks * sizeof(tempa[0]));
+ memcpy(l, templ, pred_height_in_transform_blocks * sizeof(templ[0]));
#if CONFIG_PVQ
od_encode_checkpoint(&x->daala_enc, &post_buf);
#endif
- for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
+ for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy)
memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
- num_4x4_blocks_wide * 4);
+ pred_width_in_transform_blocks * 4);
}
next : {}
#if CONFIG_PVQ
@@ -2381,9 +2530,9 @@
if (y_skip) *y_skip &= best_can_skip;
- for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
+ for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy)
memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
- num_4x4_blocks_wide * 4);
+ pred_width_in_transform_blocks * 4);
return best_rd;
}
@@ -2392,55 +2541,65 @@
MACROBLOCK *mb, int *rate,
int *rate_y, int64_t *distortion,
int *y_skip, int64_t best_rd) {
- int i, j;
const MACROBLOCKD *const xd = &mb->e_mbd;
MODE_INFO *const mic = xd->mi[0];
const MODE_INFO *above_mi = xd->above_mi;
const MODE_INFO *left_mi = xd->left_mi;
- const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
- const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
- const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ MB_MODE_INFO *const mbmi = &mic->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
+ const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
int cost = 0;
int64_t total_distortion = 0;
int tot_rate_y = 0;
int64_t total_rd = 0;
const int *bmode_costs = cpi->mbmode_cost[0];
+ const int is_lossless = xd->lossless[mbmi->segment_id];
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ const TX_SIZE tx_size = is_lossless ? TX_4X4 : max_txsize_rect_lookup[bsize];
+#else
+ const TX_SIZE tx_size = TX_4X4;
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
#if CONFIG_EXT_INTRA
#if CONFIG_INTRA_INTERP
- mic->mbmi.intra_filter = INTRA_FILTER_LINEAR;
+ mbmi->intra_filter = INTRA_FILTER_LINEAR;
#endif // CONFIG_INTRA_INTERP
#endif // CONFIG_EXT_INTRA
#if CONFIG_FILTER_INTRA
- mic->mbmi.filter_intra_mode_info.use_filter_intra_mode[0] = 0;
+ mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
#endif // CONFIG_FILTER_INTRA
// TODO(any): Add search of the tx_type to improve rd performance at the
// expense of speed.
- mic->mbmi.tx_type = DCT_DCT;
- mic->mbmi.tx_size = TX_4X4;
+ mbmi->tx_type = DCT_DCT;
+ mbmi->tx_size = tx_size;
if (y_skip) *y_skip = 1;
- // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
- for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
- for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
+ // Pick modes for each prediction sub-block (of size 4x4, 4x8, or 8x4) in this
+ // 8x8 coding block.
+ for (idy = 0; idy < 2; idy += pred_height_in_4x4_blocks) {
+ for (idx = 0; idx < 2; idx += pred_width_in_4x4_blocks) {
PREDICTION_MODE best_mode = DC_PRED;
int r = INT_MAX, ry = INT_MAX;
int64_t d = INT64_MAX, this_rd = INT64_MAX;
- i = idy * 2 + idx;
+ int j;
+ const int pred_block_idx = idy * 2 + idx;
if (cpi->common.frame_type == KEY_FRAME) {
- const PREDICTION_MODE A = av1_above_block_mode(mic, above_mi, i);
- const PREDICTION_MODE L = av1_left_block_mode(mic, left_mi, i);
+ const PREDICTION_MODE A =
+ av1_above_block_mode(mic, above_mi, pred_block_idx);
+ const PREDICTION_MODE L =
+ av1_left_block_mode(mic, left_mi, pred_block_idx);
bmode_costs = cpi->y_mode_costs[A][L];
}
- this_rd = rd_pick_intra4x4block(
+ this_rd = rd_pick_intra_sub_8x8_y_subblock_mode(
cpi, mb, idy, idx, &best_mode, bmode_costs,
xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
- &ry, &d, bsize, y_skip, best_rd - total_rd);
+ &ry, &d, bsize, tx_size, y_skip, best_rd - total_rd);
if (this_rd >= best_rd - total_rd) return INT64_MAX;
total_rd += this_rd;
@@ -2448,33 +2607,33 @@
total_distortion += d;
tot_rate_y += ry;
- mic->bmi[i].as_mode = best_mode;
- for (j = 1; j < num_4x4_blocks_high; ++j)
- mic->bmi[i + j * 2].as_mode = best_mode;
- for (j = 1; j < num_4x4_blocks_wide; ++j)
- mic->bmi[i + j].as_mode = best_mode;
+ mic->bmi[pred_block_idx].as_mode = best_mode;
+ for (j = 1; j < pred_height_in_4x4_blocks; ++j)
+ mic->bmi[pred_block_idx + j * 2].as_mode = best_mode;
+ for (j = 1; j < pred_width_in_4x4_blocks; ++j)
+ mic->bmi[pred_block_idx + j].as_mode = best_mode;
if (total_rd >= best_rd) return INT64_MAX;
}
}
- mic->mbmi.mode = mic->bmi[3].as_mode;
+ mbmi->mode = mic->bmi[3].as_mode;
// Add in the cost of the transform type
- if (!xd->lossless[mic->mbmi.segment_id]) {
+ if (!is_lossless) {
int rate_tx_type = 0;
#if CONFIG_EXT_TX
- if (get_ext_tx_types(TX_4X4, bsize, 0) > 1) {
- const int eset = get_ext_tx_set(TX_4X4, bsize, 0);
- rate_tx_type = cpi->intra_tx_type_costs[eset][TX_4X4][mic->mbmi.mode]
- [mic->mbmi.tx_type];
+ if (get_ext_tx_types(tx_size, bsize, 0) > 1) {
+ const int eset = get_ext_tx_set(tx_size, bsize, 0);
+ rate_tx_type = cpi->intra_tx_type_costs[eset][txsize_sqr_map[tx_size]]
+ [mbmi->mode][mbmi->tx_type];
}
#else
rate_tx_type =
- cpi->intra_tx_type_costs[TX_4X4]
- [intra_mode_to_tx_type_context[mic->mbmi.mode]]
- [mic->mbmi.tx_type];
+ cpi->intra_tx_type_costs[txsize_sqr_map[tx_size]]
+ [intra_mode_to_tx_type_context[mbmi->mode]]
+ [mbmi->tx_type];
#endif
- assert(mic->mbmi.tx_size == TX_4X4);
+ assert(mbmi->tx_size == tx_size);
cost += rate_tx_type;
tot_rate_y += rate_tx_type;
}
@@ -2884,7 +3043,6 @@
const PREDICTION_MODE A = av1_above_block_mode(mic, above_mi, 0);
const PREDICTION_MODE L = av1_left_block_mode(mic, left_mi, 0);
const PREDICTION_MODE FINAL_MODE_SEARCH = TM_PRED + 1;
- const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
#if CONFIG_PVQ
od_rollback_buffer pre_buf, post_buf;
@@ -2962,9 +3120,7 @@
// tokenonly rate, but for intra blocks, tx_size is always coded
// (prediction granularity), so we account for it in the full rate,
// not the tokenonly rate.
- this_rate_tokenonly -=
- cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)]
- [tx_size_to_depth(mbmi->tx_size)];
+ this_rate_tokenonly -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
}
#if CONFIG_PALETTE
if (cpi->common.allow_screen_content_tools && mbmi->mode == DC_PRED)
@@ -4073,7 +4229,9 @@
pmi->palette_size[1] = 0;
#endif // CONFIG_PALETTE
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
- if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue;
+ if (!(cpi->sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
+ (1 << mode)))
+ continue;
mbmi->uv_mode = mode;
#if CONFIG_EXT_INTRA
@@ -4189,6 +4347,8 @@
pmi->palette_size[1] = palette_mode_info.palette_size[1];
#endif // CONFIG_PALETTE
+ // Make sure we actually chose a mode
+ assert(best_rd < INT64_MAX);
return best_rd;
}
@@ -4550,16 +4710,11 @@
for (idy = 0; idy < txb_height; idy += num_4x4_h) {
for (idx = 0; idx < txb_width; idx += num_4x4_w) {
int64_t dist, ssz, rd, rd1, rd2;
- int block;
int coeff_ctx;
- int k;
-
- k = i + (idy * 2 + idx);
- if (tx_size == TX_4X4)
- block = k;
- else
- block = (i ? 2 : 0);
-
+ const int k = i + (idy * 2 + idx);
+ const int block = av1_raster_order_to_block_index(tx_size, k);
+ assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
+ idx == 0 && idy == 0));
coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)), *(tl + (k >> 1)));
#if !CONFIG_PVQ
#if CONFIG_NEW_QUANT
@@ -8414,7 +8569,6 @@
#if CONFIG_PALETTE
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
#endif // CONFIG_PALETTE
- const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
int rate2 = 0, rate_y = INT_MAX, skippable = 0, rate_uv, rate_dummy, i;
int dc_mode_index;
const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
@@ -8491,8 +8645,7 @@
// tokenonly rate, but for intra blocks, tx_size is always coded
// (prediction granularity), so we account for it in the full rate,
// not the tokenonly rate.
- rate_y -= cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)]
- [tx_size_to_depth(mbmi->tx_size)];
+ rate_y -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
}
rate2 += av1_cost_bit(cm->fc->filter_intra_probs[0],
@@ -8642,21 +8795,21 @@
int64_t best_intra_rd = INT64_MAX;
unsigned int best_pred_sse = UINT_MAX;
PREDICTION_MODE best_intra_mode = DC_PRED;
- int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
- int64_t dist_uvs[TX_SIZES];
- int skip_uvs[TX_SIZES];
- PREDICTION_MODE mode_uv[TX_SIZES];
+ int rate_uv_intra[TX_SIZES_ALL], rate_uv_tokenonly[TX_SIZES_ALL];
+ int64_t dist_uvs[TX_SIZES_ALL];
+ int skip_uvs[TX_SIZES_ALL];
+ PREDICTION_MODE mode_uv[TX_SIZES_ALL];
#if CONFIG_PALETTE
- PALETTE_MODE_INFO pmi_uv[TX_SIZES];
+ PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL];
#endif // CONFIG_PALETTE
#if CONFIG_EXT_INTRA
- int8_t uv_angle_delta[TX_SIZES];
+ int8_t uv_angle_delta[TX_SIZES_ALL];
int is_directional_mode, angle_stats_ready = 0;
uint8_t directional_mode_skip_mask[INTRA_MODES];
#endif // CONFIG_EXT_INTRA
#if CONFIG_FILTER_INTRA
int8_t dc_skipped = 1;
- FILTER_INTRA_MODE_INFO filter_intra_mode_info_uv[TX_SIZES];
+ FILTER_INTRA_MODE_INFO filter_intra_mode_info_uv[TX_SIZES_ALL];
#endif // CONFIG_FILTER_INTRA
const int intra_cost_penalty = av1_get_intra_cost_penalty(
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
@@ -8676,7 +8829,6 @@
int64_t mode_threshold[MAX_MODES];
int *mode_map = tile_data->mode_map[bsize];
const int mode_search_skip_flags = sf->mode_search_skip_flags;
- const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
#if CONFIG_PVQ
od_rollback_buffer pre_buf;
#endif
@@ -8751,7 +8903,7 @@
&comp_mode_p);
for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
- for (i = 0; i < TX_SIZES; i++) rate_uv_intra[i] = INT_MAX;
+ for (i = 0; i < TX_SIZES_ALL; i++) rate_uv_intra[i] = INT_MAX;
for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
for (i = 0; i < MB_MODE_COUNT; ++i) {
for (k = 0; k < TOTAL_REFS_PER_FRAME; ++k) {
@@ -9281,9 +9433,7 @@
// tokenonly rate, but for intra blocks, tx_size is always coded
// (prediction granularity), so we account for it in the full rate,
// not the tokenonly rate.
- rate_y -=
- cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)]
- [tx_size_to_depth(mbmi->tx_size)];
+ rate_y -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
}
#if CONFIG_EXT_INTRA
if (is_directional_mode) {
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 5725154..222b8ba 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -333,7 +333,8 @@
struct macroblockd_plane *pd = &xd->plane[plane];
const PLANE_TYPE type = pd->plane_type;
const int ref = is_inter_block(mbmi);
- const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
+ const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block);
+ const TX_TYPE tx_type = get_tx_type(type, xd, block_raster_idx, tx_size);
const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, ref);
int pt = get_entropy_context(tx_size, pd->above_context + blk_col,
pd->left_context + blk_row);
@@ -438,7 +439,7 @@
int pt; /* near block/prev token context index */
int c;
TOKENEXTRA *t = *tp; /* store tokens starting here */
- int eob = p->eobs[block];
+ const int eob = p->eobs[block];
const PLANE_TYPE type = pd->plane_type;
const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
#if CONFIG_SUPERTX
@@ -447,7 +448,8 @@
const int segment_id = mbmi->segment_id;
#endif // CONFIG_SUEPRTX
const int16_t *scan, *nb;
- const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
+ const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block);
+ const TX_TYPE tx_type = get_tx_type(type, xd, block_raster_idx, tx_size);
const SCAN_ORDER *const scan_order =
get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
const int ref = is_inter_block(mbmi);
@@ -497,6 +499,7 @@
skip_eob = (token == ZERO_TOKEN);
}
if (c < seg_eob) {
+ assert(!skip_eob); // The last token must be non-zero.
add_token(&t, coef_probs[band[c]][pt],
#if CONFIG_EC_MULTISYMBOL
NULL,