ext-partition-types: Add 4:1 partitions
This patch adds support for 4:1 rectangular blocks to various common
data arrays, and adds new partition types to the EXT_PARTITION_TYPES
experiment which will use them.
This patch has the following restrictions, which can be lifted in
future patches:
* ext-partition-types is incompatible with fp_mb_stats and supertx
for the moment
* Currently only 32x32 superblocks can use the new partition types
There's a slightly odd restriction about when we allow
PARTITION_HORZ_4 or PARTITION_VERT_4. Since these both live in the
EXT_PARTITION_TYPES CDF, read_partition() can only return them if both
has_rows and has_cols is true. This means that at least half of the
width and height of the block must be visible. It might be nice to
relax that restriction but that would imply a change to how we encode
partition types, which seems already to be in a state of flux, so
maybe it's better to wait until that has settled down.
Change-Id: Id7fc3fd0f762f35f63b3d3e3bf4e07c245c7b4fa
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 05dca20..021abb4 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -2442,7 +2442,8 @@
xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
m = xd->mi[0];
- assert(m->mbmi.sb_type <= cm->sb_size);
+ assert(m->mbmi.sb_type <= cm->sb_size ||
+ (m->mbmi.sb_type >= BLOCK_4X16 && m->mbmi.sb_type <= BLOCK_32X8));
bh = mi_size_high[m->mbmi.sb_type];
bw = mi_size_wide[m->mbmi.sb_type];
@@ -2510,7 +2511,8 @@
#endif
xd->mi = cm->mi_grid_visible + mi_offset;
- assert(mbmi->sb_type <= cm->sb_size);
+ assert(mbmi->sb_type <= cm->sb_size ||
+ (mbmi->sb_type >= BLOCK_4X16 && mbmi->sb_type <= BLOCK_32X8));
bh = mi_size_high[mbmi->sb_type];
bw = mi_size_wide[mbmi->sb_type];
@@ -2896,6 +2898,10 @@
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
const int hbs = mi_size_wide[bsize] / 2;
+#if CONFIG_EXT_PARTITION_TYPES
+ const int quarter_step = mi_size_wide[bsize] / 4;
+ int i;
+#endif
const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize);
const BLOCK_SIZE subsize = get_subsize(bsize, partition);
#if CONFIG_CB4X4
@@ -3001,6 +3007,24 @@
write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
mi_row + hbs, mi_col + hbs);
break;
+ case PARTITION_HORZ_4:
+ for (i = 0; i < 4; ++i) {
+ int this_mi_row = mi_row + i * quarter_step;
+ if (i > 0 && this_mi_row >= cm->mi_rows) break;
+
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ this_mi_row, mi_col);
+ }
+ break;
+ case PARTITION_VERT_4:
+ for (i = 0; i < 4; ++i) {
+ int this_mi_col = mi_col + i * quarter_step;
+ if (i > 0 && this_mi_col >= cm->mi_cols) break;
+
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, this_mi_col);
+ }
+ break;
#endif // CONFIG_EXT_PARTITION_TYPES
default: assert(0);
}
@@ -4611,7 +4635,12 @@
}
#endif
#if CONFIG_WEDGE && !CONFIG_NEW_MULTISYMBOL
- for (i = 0; i < BLOCK_SIZES; i++) {
+#if CONFIG_EXT_PARTITION_TYPES
+ int block_sizes_to_update = BLOCK_SIZES_ALL;
+#else
+ int block_sizes_to_update = BLOCK_SIZES;
+#endif
+ for (i = 0; i < block_sizes_to_update; i++) {
if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i))
av1_cond_prob_diff_update(header_bc, &fc->wedge_interintra_prob[i],
cm->counts.wedge_interintra[i], probwt);
diff --git a/av1/encoder/context_tree.c b/av1/encoder/context_tree.c
index 229495f..5c521d468 100644
--- a/av1/encoder/context_tree.c
+++ b/av1/encoder/context_tree.c
@@ -141,6 +141,12 @@
&tree->verticalb[1]);
alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_B,
&tree->verticalb[2]);
+ for (int i = 0; i < 4; ++i) {
+ alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_4,
+ &tree->horizontal4[i]);
+ alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_4,
+ &tree->vertical4[i]);
+ }
#if CONFIG_SUPERTX
alloc_mode_context(cm, num_4x4_blk, PARTITION_HORZ,
&tree->horizontal_supertx);
@@ -184,6 +190,10 @@
free_mode_context(&tree->verticala[i]);
free_mode_context(&tree->verticalb[i]);
}
+ for (i = 0; i < 4; ++i) {
+ free_mode_context(&tree->horizontal4[i]);
+ free_mode_context(&tree->vertical4[i]);
+ }
#endif // CONFIG_EXT_PARTITION_TYPES
free_mode_context(&tree->none);
free_mode_context(&tree->horizontal[0]);
diff --git a/av1/encoder/context_tree.h b/av1/encoder/context_tree.h
index a52029e..bcfcc27 100644
--- a/av1/encoder/context_tree.h
+++ b/av1/encoder/context_tree.h
@@ -81,6 +81,8 @@
PICK_MODE_CONTEXT horizontalb[3];
PICK_MODE_CONTEXT verticala[3];
PICK_MODE_CONTEXT verticalb[3];
+ PICK_MODE_CONTEXT horizontal4[4];
+ PICK_MODE_CONTEXT vertical4[4];
#endif
union {
struct PC_TREE *split[4];
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 7d3d44e..ec9e410 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1182,6 +1182,7 @@
BLOCK_SIZE subsize = get_subsize(bsize, partition);
#if CONFIG_EXT_PARTITION_TYPES
const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+ const int quarter_step = mi_size_wide[bsize] / 4;
#endif
#if CONFIG_CB4X4
const int unify_bsize = 1;
@@ -1260,6 +1261,24 @@
set_mode_info_b(cpi, tile, td, mi_row + hbs, mi_col + hbs, bsize2,
&pc_tree->verticalb[2]);
break;
+ case PARTITION_HORZ_4:
+ for (int i = 0; i < 4; ++i) {
+ int this_mi_row = mi_row + i * quarter_step;
+ if (i > 0 && this_mi_row >= cm->mi_rows) break;
+
+ set_mode_info_b(cpi, tile, td, this_mi_row, mi_col, subsize,
+ &pc_tree->horizontal4[i]);
+ }
+ break;
+ case PARTITION_VERT_4:
+ for (int i = 0; i < 4; ++i) {
+ int this_mi_col = mi_col + i * quarter_step;
+ if (i > 0 && this_mi_col >= cm->mi_cols) break;
+
+ set_mode_info_b(cpi, tile, td, mi_row, this_mi_col, subsize,
+ &pc_tree->vertical4[i]);
+ }
+ break;
#endif // CONFIG_EXT_PARTITION_TYPES
default: assert(0 && "Invalid partition type."); break;
}
@@ -2030,6 +2049,8 @@
const BLOCK_SIZE subsize = get_subsize(bsize, partition);
#if CONFIG_EXT_PARTITION_TYPES
const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+ int quarter_step = mi_size_wide[bsize] / 4;
+ int i;
#endif
#if CONFIG_CB4X4
@@ -2228,6 +2249,24 @@
encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col + hbs, dry_run, bsize2,
partition, &pc_tree->verticalb[2], rate);
break;
+ case PARTITION_HORZ_4:
+ for (i = 0; i < 4; ++i) {
+ int this_mi_row = mi_row + i * quarter_step;
+ if (i > 0 && this_mi_row >= cm->mi_rows) break;
+
+ encode_b(cpi, tile, td, tp, this_mi_row, mi_col, dry_run, subsize,
+ partition, &pc_tree->horizontal4[i], rate);
+ }
+ break;
+ case PARTITION_VERT_4:
+ for (i = 0; i < 4; ++i) {
+ int this_mi_col = mi_col + i * quarter_step;
+ if (i > 0 && this_mi_col >= cm->mi_cols) break;
+
+ encode_b(cpi, tile, td, tp, mi_row, this_mi_col, dry_run, subsize,
+ partition, &pc_tree->vertical4[i], rate);
+ }
+ break;
#endif // CONFIG_EXT_PARTITION_TYPES
default: assert(0 && "Invalid partition type."); break;
}
@@ -2585,7 +2624,9 @@
case PARTITION_VERT_A:
case PARTITION_VERT_B:
case PARTITION_HORZ_A:
- case PARTITION_HORZ_B: assert(0 && "Cannot handle extended partiton types");
+ case PARTITION_HORZ_B:
+ case PARTITION_HORZ_4:
+ case PARTITION_VERT_4: assert(0 && "Cannot handle extended partiton types");
#endif // CONFIG_EXT_PARTITION_TYPES
default: assert(0); break;
}
@@ -2735,7 +2776,7 @@
}
/* clang-format off */
-static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
+static const BLOCK_SIZE min_partition_size[BLOCK_SIZES_ALL] = {
#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
BLOCK_2X2, BLOCK_2X2, BLOCK_2X2, // 2x2, 2x4, 4x2
#endif
@@ -2745,11 +2786,13 @@
BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 16x32, 32x16, 32x32
BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 32x64, 64x32, 64x64
#if CONFIG_EXT_PARTITION
- BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 // 64x128, 128x64, 128x128
+ BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 64x128, 128x64, 128x128
#endif // CONFIG_EXT_PARTITION
+ BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x16, 16x4, 8x32
+ BLOCK_8X8 // 32x8
};
-static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
+static const BLOCK_SIZE max_partition_size[BLOCK_SIZES_ALL] = {
#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, // 2x2, 2x4, 4x2
#endif
@@ -2759,12 +2802,14 @@
BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 16x32, 32x16, 32x32
BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 32x64, 64x32, 64x64
#if CONFIG_EXT_PARTITION
- BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST // 64x128, 128x64, 128x128
+ BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 64x128, 128x64, 128x128
#endif // CONFIG_EXT_PARTITION
+ BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 4x16, 16x4, 8x32
+ BLOCK_32X32 // 32x8
};
// Next square block size less or equal than current block size.
-static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
+static const BLOCK_SIZE next_square_size[BLOCK_SIZES_ALL] = {
#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
BLOCK_2X2, BLOCK_2X2, BLOCK_2X2, // 2x2, 2x4, 4x2
#endif
@@ -2774,8 +2819,10 @@
BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 16x32, 32x16, 32x32
BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, // 32x64, 64x32, 64x64
#if CONFIG_EXT_PARTITION
- BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 // 64x128, 128x64, 128x128
+ BLOCK_64X64, BLOCK_64X64, BLOCK_128X128, // 64x128, 128x64, 128x128
#endif // CONFIG_EXT_PARTITION
+ BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x16, 16x4, 8x32
+ BLOCK_8X8 // 32x8
};
/* clang-format on */
@@ -4215,6 +4262,120 @@
bsize2, mi_row + mi_step, mi_col + mi_step, bsize2);
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
}
+
+ // PARTITION_HORZ_4
+ // TODO(david.barker): For this and PARTITION_VERT_4,
+ // * Add support for BLOCK_16X16 once we support 2x8 and 8x2 blocks for the
+ // chroma plane
+ // * Add support for supertx
+ if (bsize == BLOCK_32X32 && partition_horz_allowed && !force_horz_split &&
+ (do_rectangular_split || av1_active_h_edge(cpi, mi_row, mi_step))) {
+ int i;
+ const int quarter_step = mi_size_high[bsize] / 4;
+ PICK_MODE_CONTEXT *ctx_prev = ctx_none;
+
+ subsize = get_subsize(bsize, PARTITION_HORZ_4);
+ av1_zero(sum_rdc);
+
+ for (i = 0; i < 4; ++i) {
+ int this_mi_row = mi_row + i * quarter_step;
+
+ if (i > 0 && this_mi_row >= cm->mi_rows) break;
+
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_prev);
+
+ ctx_prev = &pc_tree->horizontal4[i];
+
+ rd_pick_sb_modes(cpi, tile_data, x, this_mi_row, mi_col, &this_rdc,
+ PARTITION_HORZ_4, subsize, ctx_prev,
+ best_rdc.rdcost - sum_rdc.rdcost);
+
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+ break;
+ } else {
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
+ }
+
+ if (sum_rdc.rdcost >= best_rdc.rdcost) break;
+
+ if (i < 3) {
+ update_state(cpi, td, ctx_prev, this_mi_row, mi_col, subsize, 1);
+ encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, this_mi_row, mi_col,
+ subsize, NULL);
+ }
+ }
+
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ sum_rdc.rate += partition_cost[PARTITION_HORZ_4];
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
+ pc_tree->partitioning = PARTITION_HORZ_4;
+ }
+ }
+#if !CONFIG_PVQ
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+ restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
+ }
+ // PARTITION_VERT_4
+ if (bsize == BLOCK_32X32 && partition_vert_allowed && !force_vert_split &&
+ (do_rectangular_split || av1_active_v_edge(cpi, mi_row, mi_step))) {
+ int i;
+ const int quarter_step = mi_size_wide[bsize] / 4;
+ PICK_MODE_CONTEXT *ctx_prev = ctx_none;
+
+ subsize = get_subsize(bsize, PARTITION_VERT_4);
+ av1_zero(sum_rdc);
+
+ for (i = 0; i < 4; ++i) {
+ int this_mi_col = mi_col + i * quarter_step;
+
+ if (i > 0 && this_mi_col >= cm->mi_cols) break;
+
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_prev);
+
+ ctx_prev = &pc_tree->vertical4[i];
+
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, this_mi_col, &this_rdc,
+ PARTITION_VERT_4, subsize, ctx_prev,
+ best_rdc.rdcost - sum_rdc.rdcost);
+
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+ } else {
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
+ }
+
+ if (sum_rdc.rdcost >= best_rdc.rdcost) break;
+
+ if (i < 3) {
+ update_state(cpi, td, ctx_prev, mi_row, this_mi_col, subsize, 1);
+ encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, this_mi_col,
+ subsize, NULL);
+ }
+ }
+
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ sum_rdc.rate += partition_cost[PARTITION_VERT_4];
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
+ pc_tree->partitioning = PARTITION_VERT_4;
+ }
+ }
+#if !CONFIG_PVQ
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+ restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
+ }
#endif // CONFIG_EXT_PARTITION_TYPES
#if CONFIG_SPEED_REFS
@@ -4569,7 +4730,7 @@
TileDataEnc *const tile_data =
&cpi->tile_data[tile_row * tile_cols + tile_col];
int i, j;
- for (i = 0; i < BLOCK_SIZES; ++i) {
+ for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
for (j = 0; j < MAX_MODES; ++j) {
tile_data->thresh_freq_fact[i][j] = 32;
tile_data->mode_map[i][j] = j;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 54f4102..1af8911 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1187,6 +1187,21 @@
MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad4x4x8)
MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x4x4d)
+#if CONFIG_EXT_PARTITION_TYPES
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x16)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x16_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x16x4d)
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x4)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x4_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x4x4d)
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x32)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x32_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x32x4d)
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x8)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x8_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x8x4d)
+#endif
+
#if CONFIG_EXT_INTER
#define HIGHBD_MBFP(BT, MCSDF, MCSVF) \
cpi->fn_ptr[BT].msdf = MCSDF; \
@@ -1235,6 +1250,13 @@
MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x4)
MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x8)
MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x4)
+
+#if CONFIG_EXT_PARTITION_TYPES
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x16)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x4)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x32)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x8)
+#endif
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR
@@ -1278,6 +1300,13 @@
MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x4)
MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x8)
MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x4)
+
+#if CONFIG_EXT_PARTITION_TYPES
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x16)
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x4)
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x32)
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x8)
+#endif
#endif // CONFIG_MOTION_VAR
static void highbd_set_var_fns(AV1_COMP *const cpi) {
@@ -1285,6 +1314,32 @@
if (cm->use_highbitdepth) {
switch (cm->bit_depth) {
case AOM_BITS_8:
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits8,
+ aom_highbd_sad32x8_avg_bits8, aom_highbd_8_variance32x8,
+ aom_highbd_8_sub_pixel_variance32x8,
+ aom_highbd_8_sub_pixel_avg_variance32x8, NULL, NULL,
+ aom_highbd_sad32x8x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits8,
+ aom_highbd_sad8x32_avg_bits8, aom_highbd_8_variance8x32,
+ aom_highbd_8_sub_pixel_variance8x32,
+ aom_highbd_8_sub_pixel_avg_variance8x32, NULL, NULL,
+ aom_highbd_sad8x32x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits8,
+ aom_highbd_sad16x4_avg_bits8, aom_highbd_8_variance16x4,
+ aom_highbd_8_sub_pixel_variance16x4,
+ aom_highbd_8_sub_pixel_avg_variance16x4, NULL, NULL,
+ aom_highbd_sad16x4x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits8,
+ aom_highbd_sad4x16_avg_bits8, aom_highbd_8_variance4x16,
+ aom_highbd_8_sub_pixel_variance4x16,
+ aom_highbd_8_sub_pixel_avg_variance4x16, NULL, NULL,
+ aom_highbd_sad4x16x4d_bits8)
+#endif
+
HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits8,
aom_highbd_sad32x16_avg_bits8, aom_highbd_8_variance32x16,
aom_highbd_8_sub_pixel_variance32x16,
@@ -1432,6 +1487,19 @@
aom_highbd_8_masked_sub_pixel_variance8x4)
HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits8,
aom_highbd_8_masked_sub_pixel_variance4x4)
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits8,
+ aom_highbd_8_masked_sub_pixel_variance32x8)
+
+ HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits8,
+ aom_highbd_8_masked_sub_pixel_variance8x32)
+
+ HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits8,
+ aom_highbd_8_masked_sub_pixel_variance16x4)
+
+ HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits8,
+ aom_highbd_8_masked_sub_pixel_variance4x16)
+#endif
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR
#if CONFIG_EXT_PARTITION
@@ -1484,10 +1552,53 @@
HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits8,
aom_highbd_obmc_variance4x4,
aom_highbd_obmc_sub_pixel_variance4x4)
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits8,
+ aom_highbd_obmc_variance32x8,
+ aom_highbd_obmc_sub_pixel_variance32x8)
+
+ HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits8,
+ aom_highbd_obmc_variance8x32,
+ aom_highbd_obmc_sub_pixel_variance8x32)
+
+ HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits8,
+ aom_highbd_obmc_variance16x4,
+ aom_highbd_obmc_sub_pixel_variance16x4)
+
+ HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits8,
+ aom_highbd_obmc_variance4x16,
+ aom_highbd_obmc_sub_pixel_variance4x16)
+#endif
#endif // CONFIG_MOTION_VAR
break;
case AOM_BITS_10:
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits10,
+ aom_highbd_sad32x8_avg_bits10, aom_highbd_10_variance32x8,
+ aom_highbd_10_sub_pixel_variance32x8,
+ aom_highbd_10_sub_pixel_avg_variance32x8, NULL, NULL,
+ aom_highbd_sad32x8x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits10,
+ aom_highbd_sad8x32_avg_bits10, aom_highbd_10_variance8x32,
+ aom_highbd_10_sub_pixel_variance8x32,
+ aom_highbd_10_sub_pixel_avg_variance8x32, NULL, NULL,
+ aom_highbd_sad8x32x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits10,
+ aom_highbd_sad16x4_avg_bits10, aom_highbd_10_variance16x4,
+ aom_highbd_10_sub_pixel_variance16x4,
+ aom_highbd_10_sub_pixel_avg_variance16x4, NULL, NULL,
+ aom_highbd_sad16x4x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits10,
+ aom_highbd_sad4x16_avg_bits10, aom_highbd_10_variance4x16,
+ aom_highbd_10_sub_pixel_variance4x16,
+ aom_highbd_10_sub_pixel_avg_variance4x16, NULL, NULL,
+ aom_highbd_sad4x16x4d_bits10)
+#endif
+
HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits10,
aom_highbd_sad32x16_avg_bits10, aom_highbd_10_variance32x16,
aom_highbd_10_sub_pixel_variance32x16,
@@ -1639,6 +1750,19 @@
aom_highbd_10_masked_sub_pixel_variance8x4)
HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits10,
aom_highbd_10_masked_sub_pixel_variance4x4)
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits10,
+ aom_highbd_10_masked_sub_pixel_variance32x8)
+
+ HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits10,
+ aom_highbd_10_masked_sub_pixel_variance8x32)
+
+ HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits10,
+ aom_highbd_10_masked_sub_pixel_variance16x4)
+
+ HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits10,
+ aom_highbd_10_masked_sub_pixel_variance4x16)
+#endif
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR
#if CONFIG_EXT_PARTITION
@@ -1691,10 +1815,53 @@
HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits10,
aom_highbd_10_obmc_variance4x4,
aom_highbd_10_obmc_sub_pixel_variance4x4)
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits10,
+ aom_highbd_10_obmc_variance32x8,
+ aom_highbd_10_obmc_sub_pixel_variance32x8)
+
+ HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits10,
+ aom_highbd_10_obmc_variance8x32,
+ aom_highbd_10_obmc_sub_pixel_variance8x32)
+
+ HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits10,
+ aom_highbd_10_obmc_variance16x4,
+ aom_highbd_10_obmc_sub_pixel_variance16x4)
+
+ HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits10,
+ aom_highbd_10_obmc_variance4x16,
+ aom_highbd_10_obmc_sub_pixel_variance4x16)
+#endif
#endif // CONFIG_MOTION_VAR
break;
case AOM_BITS_12:
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits12,
+ aom_highbd_sad32x8_avg_bits12, aom_highbd_12_variance32x8,
+ aom_highbd_12_sub_pixel_variance32x8,
+ aom_highbd_12_sub_pixel_avg_variance32x8, NULL, NULL,
+ aom_highbd_sad32x8x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits12,
+ aom_highbd_sad8x32_avg_bits12, aom_highbd_12_variance8x32,
+ aom_highbd_12_sub_pixel_variance8x32,
+ aom_highbd_12_sub_pixel_avg_variance8x32, NULL, NULL,
+ aom_highbd_sad8x32x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits12,
+ aom_highbd_sad16x4_avg_bits12, aom_highbd_12_variance16x4,
+ aom_highbd_12_sub_pixel_variance16x4,
+ aom_highbd_12_sub_pixel_avg_variance16x4, NULL, NULL,
+ aom_highbd_sad16x4x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits12,
+ aom_highbd_sad4x16_avg_bits12, aom_highbd_12_variance4x16,
+ aom_highbd_12_sub_pixel_variance4x16,
+ aom_highbd_12_sub_pixel_avg_variance4x16, NULL, NULL,
+ aom_highbd_sad4x16x4d_bits12)
+#endif
+
HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits12,
aom_highbd_sad32x16_avg_bits12, aom_highbd_12_variance32x16,
aom_highbd_12_sub_pixel_variance32x16,
@@ -1846,6 +2013,19 @@
aom_highbd_12_masked_sub_pixel_variance8x4)
HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits12,
aom_highbd_12_masked_sub_pixel_variance4x4)
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits12,
+ aom_highbd_12_masked_sub_pixel_variance32x8)
+
+ HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits12,
+ aom_highbd_12_masked_sub_pixel_variance8x32)
+
+ HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits12,
+ aom_highbd_12_masked_sub_pixel_variance16x4)
+
+ HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits12,
+ aom_highbd_12_masked_sub_pixel_variance4x16)
+#endif
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR
@@ -1899,6 +2079,23 @@
HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits12,
aom_highbd_12_obmc_variance4x4,
aom_highbd_12_obmc_sub_pixel_variance4x4)
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits12,
+ aom_highbd_12_obmc_variance32x8,
+ aom_highbd_12_obmc_sub_pixel_variance32x8)
+
+ HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits12,
+ aom_highbd_12_obmc_variance8x32,
+ aom_highbd_12_obmc_sub_pixel_variance8x32)
+
+ HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits12,
+ aom_highbd_12_obmc_variance16x4,
+ aom_highbd_12_obmc_sub_pixel_variance16x4)
+
+ HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits12,
+ aom_highbd_12_obmc_variance4x16,
+ aom_highbd_12_obmc_sub_pixel_variance4x16)
+#endif
#endif // CONFIG_MOTION_VAR
break;
@@ -2295,6 +2492,24 @@
cpi->fn_ptr[BT].sdx8f = SDX8F; \
cpi->fn_ptr[BT].sdx4df = SDX4DF;
+#if CONFIG_EXT_PARTITION_TYPES
+ BFP(BLOCK_4X16, aom_sad4x16, aom_sad4x16_avg, aom_variance4x16,
+ aom_sub_pixel_variance4x16, aom_sub_pixel_avg_variance4x16, NULL, NULL,
+ aom_sad4x16x4d)
+
+ BFP(BLOCK_16X4, aom_sad16x4, aom_sad16x4_avg, aom_variance16x4,
+ aom_sub_pixel_variance16x4, aom_sub_pixel_avg_variance16x4, NULL, NULL,
+ aom_sad16x4x4d)
+
+ BFP(BLOCK_8X32, aom_sad8x32, aom_sad8x32_avg, aom_variance8x32,
+ aom_sub_pixel_variance8x32, aom_sub_pixel_avg_variance8x32, NULL, NULL,
+ aom_sad8x32x4d)
+
+ BFP(BLOCK_32X8, aom_sad32x8, aom_sad32x8_avg, aom_variance32x8,
+ aom_sub_pixel_variance32x8, aom_sub_pixel_avg_variance32x8, NULL, NULL,
+ aom_sad32x8x4d)
+#endif
+
#if CONFIG_EXT_PARTITION
BFP(BLOCK_128X128, aom_sad128x128, aom_sad128x128_avg, aom_variance128x128,
aom_sub_pixel_variance128x128, aom_sub_pixel_avg_variance128x128,
@@ -2407,6 +2622,20 @@
aom_obmc_sub_pixel_variance8x4)
OBFP(BLOCK_4X4, aom_obmc_sad4x4, aom_obmc_variance4x4,
aom_obmc_sub_pixel_variance4x4)
+
+#if CONFIG_EXT_PARTITION_TYPES
+ OBFP(BLOCK_4X16, aom_obmc_sad4x16, aom_obmc_variance4x16,
+ aom_obmc_sub_pixel_variance4x16)
+
+ OBFP(BLOCK_16X4, aom_obmc_sad16x4, aom_obmc_variance16x4,
+ aom_obmc_sub_pixel_variance16x4)
+
+ OBFP(BLOCK_8X32, aom_obmc_sad8x32, aom_obmc_variance8x32,
+ aom_obmc_sub_pixel_variance8x32)
+
+ OBFP(BLOCK_32X8, aom_obmc_sad32x8, aom_obmc_variance32x8,
+ aom_obmc_sub_pixel_variance32x8)
+#endif
#endif // CONFIG_MOTION_VAR
#if CONFIG_EXT_INTER
@@ -2433,6 +2662,16 @@
MBFP(BLOCK_4X8, aom_masked_sad4x8, aom_masked_sub_pixel_variance4x8)
MBFP(BLOCK_8X4, aom_masked_sad8x4, aom_masked_sub_pixel_variance8x4)
MBFP(BLOCK_4X4, aom_masked_sad4x4, aom_masked_sub_pixel_variance4x4)
+
+#if CONFIG_EXT_PARTITION_TYPES
+ MBFP(BLOCK_4X16, aom_masked_sad4x16, aom_masked_sub_pixel_variance4x16)
+
+ MBFP(BLOCK_16X4, aom_masked_sad16x4, aom_masked_sub_pixel_variance16x4)
+
+ MBFP(BLOCK_8X32, aom_masked_sad8x32, aom_masked_sub_pixel_variance8x32)
+
+ MBFP(BLOCK_32X8, aom_masked_sad32x8, aom_masked_sub_pixel_variance32x8)
+#endif
#endif // CONFIG_EXT_INTER
#if CONFIG_HIGHBITDEPTH
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index ab09cc1..3145657 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -307,8 +307,8 @@
// TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
typedef struct TileDataEnc {
TileInfo tile_info;
- int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
- int mode_map[BLOCK_SIZES][MAX_MODES];
+ int thresh_freq_fact[BLOCK_SIZES_ALL][MAX_MODES];
+ int mode_map[BLOCK_SIZES_ALL][MAX_MODES];
int m_search_count;
int ex_search_count;
#if CONFIG_PVQ
@@ -482,7 +482,7 @@
fractional_mv_step_fp *find_fractional_mv_step;
av1_full_search_fn_t full_search_sad; // It is currently unused.
av1_diamond_search_fn_t diamond_search_sad;
- aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES];
+ aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES_ALL];
uint64_t time_receive_data;
uint64_t time_compress_data;
uint64_t time_pick_lpf;
@@ -558,9 +558,9 @@
#endif // CONFIG_INTERINTRA
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- int motion_mode_cost[BLOCK_SIZES][MOTION_MODES];
+ int motion_mode_cost[BLOCK_SIZES_ALL][MOTION_MODES];
#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
- int motion_mode_cost1[BLOCK_SIZES][2];
+ int motion_mode_cost1[BLOCK_SIZES_ALL][2];
#endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
int intra_uv_mode_cost[INTRA_MODES][INTRA_MODES];
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 1c9ed43..bb2ba61 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -50,14 +50,15 @@
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for block size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
-static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
+static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2, 2, 2,
#endif
- 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32,
+ 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32,
#if CONFIG_EXT_PARTITION
- 48, 48, 64
+ 48, 48, 64,
#endif // CONFIG_EXT_PARTITION
+ 4, 4, 8, 8
};
static void fill_mode_costs(AV1_COMP *cpi) {
@@ -302,7 +303,7 @@
0, MAXQ);
const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
- for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
+ for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
// Threshold here seems unnecessarily harsh but fine given actual
// range of values used for cpi->sf.thresh_mult[].
const int t = q * rd_thresh_block_size_factor[bsize];
@@ -458,12 +459,12 @@
#endif // CONFIG_INTERINTRA
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- for (i = BLOCK_8X8; i < BLOCK_SIZES; i++) {
+ for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
av1_cost_tokens((int *)cpi->motion_mode_cost[i],
cm->fc->motion_mode_prob[i], av1_motion_mode_tree);
}
#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
- for (i = BLOCK_8X8; i < BLOCK_SIZES; i++) {
+ for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
cpi->motion_mode_cost1[i][0] = av1_cost_bit(cm->fc->obmc_prob[i], 0);
cpi->motion_mode_cost1[i][1] = av1_cost_bit(cm->fc->obmc_prob[i], 1);
}
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index efa6450..e8d2384 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -441,7 +441,7 @@
int thresh_mult[MAX_MODES];
int thresh_mult_sub8x8[MAX_REFS];
- int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
+ int threshes[MAX_SEGMENTS][BLOCK_SIZES_ALL][MAX_MODES];
int64_t prediction_type_threshes[TOTAL_REFS_PER_FRAME][REFERENCE_MODES];
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 0acc5cd..def2a36 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2069,8 +2069,14 @@
const MACROBLOCKD *const xd = &x->e_mbd;
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- const int tx_select =
- cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
+ const int tx_select = cm->tx_mode == TX_MODE_SELECT &&
+#if CONFIG_EXT_PARTITION_TYPES
+ // Currently these block shapes can only use 4x4
+ // transforms
+ mbmi->sb_type != BLOCK_4X16 &&
+ mbmi->sb_type != BLOCK_16X4 &&
+#endif
+ mbmi->sb_type >= BLOCK_8X8;
if (tx_select) {
const int is_inter = is_inter_block(mbmi);
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index ff29b8d..1094dfc 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -199,7 +199,7 @@
: FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR;
sf->disable_filter_search_var_thresh = 100;
- sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
+ sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->allow_partition_search_skip = 1;
sf->use_upsampled_references = 0;
@@ -413,7 +413,7 @@
sf->use_fast_coef_costing = 0;
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
sf->schedule_mode_search = 0;
- for (i = 0; i < BLOCK_SIZES; ++i) sf->inter_mode_mask[i] = INTER_ALL;
+ for (i = 0; i < BLOCK_SIZES_ALL; ++i) sf->inter_mode_mask[i] = INTER_ALL;
sf->max_intra_bsize = BLOCK_LARGEST;
sf->reuse_inter_pred_sby = 0;
// This setting only takes effect when partition_search_type is set
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 7e431d7..33ceabf 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -418,7 +418,7 @@
// A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
// modes are used in order from LSB to MSB for each BLOCK_SIZE.
- int inter_mode_mask[BLOCK_SIZES];
+ int inter_mode_mask[BLOCK_SIZES_ALL];
// This feature controls whether we do the expensive context update and
// calculation in the rd coefficient costing loop.