ARBITRARY_WEDGE: save raw mask instead of smoothed Also: - For RLE, compute rate in bits * 512 also - Bitstream writing is partial: call to rle only, but actually write to bitstream is pending - use av1_cost_literal - Build fix with dump masks Change-Id: I46efdf9f22f781c4eb5e43bd2dfaebdab018cc80
diff --git a/av1/common/blockd.h b/av1/common/blockd.h index 7428fc2..5d6af0b 100644 --- a/av1/common/blockd.h +++ b/av1/common/blockd.h
@@ -206,6 +206,9 @@ // sent together in functions related to interinter compound modes typedef struct { uint8_t *seg_mask; +#if CONFIG_ARBITRARY_WEDGE + uint8_t *seg_mask_smoothed; +#endif // CONFIG_ARBITRARY_WEDGE int8_t wedge_index; int8_t wedge_sign; DIFFWTD_MASK_TYPE mask_type; @@ -831,6 +834,13 @@ */ DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]); +#if CONFIG_ARBITRARY_WEDGE + // Only used for arbitrary wedge. Derived from 'seg_mask' by extending binary + // mask range, and then smoothing to get a contiguous soft mask. + // TODO(urvang): Does size need to be 2 times? + DECLARE_ALIGNED(16, uint8_t, seg_mask_smoothed[2 * MAX_SB_SQUARE]); +#endif // CONFIG_ARBITRARY_WEDGE + /*! * CFL (chroma from luma) related parameters. */
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c index 93b737d..5a00c63 100644 --- a/av1/common/reconinter.c +++ b/av1/common/reconinter.c
@@ -26,6 +26,10 @@ #include "av1/common/obmc.h" #include "av1/common/reconinter.h" #include "av1/common/reconintra.h" +// TODO(urvang): Move common parts to av1/common. +#if CONFIG_ARBITRARY_WEDGE +#include "av1/encoder/segment_patch.h" +#endif // CONFIG_ARBITRARY_WEDGE // This function will determine whether or not to create a warped // prediction. @@ -330,8 +334,23 @@ case COMPOUND_WEDGE: #if CONFIG_ARBITRARY_WEDGE if (av1_wedge_params_lookup[sb_type].codebook == NULL) { - // We are using an arbitrary mask, stored earlier. - return comp_data->seg_mask; + // We are using an arbitrary mask, stored earlier. But we need to derive + // the smooth mask from raw binary mask. + const int bw = block_size_wide[sb_type]; + const int bh = block_size_high[sb_type]; + const int N = bw * bh; + + memcpy(comp_data->seg_mask_smoothed, comp_data->seg_mask, + N * sizeof(*comp_data->seg_mask_smoothed)); + + // TODO(urvang): Refactor part below. + + // Convert binary mask with values {0, 1} to one with values {0, 64}. + av1_extend_binary_mask_range(comp_data->seg_mask_smoothed, bw, bh); + + // Get a smooth mask from the binary mask. + av1_apply_box_blur(comp_data->seg_mask_smoothed, bw, bh); + return comp_data->seg_mask_smoothed; } #endif // CONFIG_ARBITRARY_WEDGE return av1_get_contiguous_soft_mask(comp_data->wedge_index, @@ -951,6 +970,9 @@ } // Assign physical buffer. inter_pred_params.mask_comp.seg_mask = xd->seg_mask; +#if CONFIG_ARBITRARY_WEDGE + inter_pred_params.mask_comp.seg_mask_smoothed = xd->seg_mask_smoothed; +#endif // CONFIG_ARBITRARY_WEDGE } av1_build_one_inter_predictor(dst, dst_buf->stride, &mv, &inter_pred_params,
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c index f988e81..7f292bc 100644 --- a/av1/encoder/bitstream.c +++ b/av1/encoder/bitstream.c
@@ -43,6 +43,9 @@ #include "av1/encoder/encodetxb.h" #include "av1/encoder/mcomp.h" #include "av1/encoder/palette.h" +#if CONFIG_ARBITRARY_WEDGE +#include "av1/encoder/segment_patch.h" +#endif // CONFIG_ARBITRARY_WEDGE #include "av1/encoder/segmentation.h" #include "av1/encoder/tokenize.h" @@ -1235,9 +1238,28 @@ if (mbmi->interinter_comp.type == COMPOUND_WEDGE) { assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize)); - aom_write_symbol(w, mbmi->interinter_comp.wedge_index, - ec_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES); - aom_write_bit(w, mbmi->interinter_comp.wedge_sign); + // TODO(urvang): Same on decoder side also. +#if CONFIG_ARBITRARY_WEDGE + if (av1_wedge_params_lookup[bsize].codebook == NULL) { + // Arbirary wedge: run RLE and write the RLE output. + uint8_t rle_buf[3 * MAX_SB_SQUARE]; + int rle_size = 0; + int rle_rate = 0; + av1_run_length_encode(mbmi->interinter_comp.seg_mask, + block_size_wide[bsize], + block_size_high[bsize], + block_size_wide[bsize], rle_buf, &rle_size, + &rle_rate); + // TODO(urvang): Write rle_buf. + } else { + // Transmit wedge index and sign only. +#endif // CONFIG_ARBITRARY_WEDGE + aom_write_symbol(w, mbmi->interinter_comp.wedge_index, + ec_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES); + aom_write_bit(w, mbmi->interinter_comp.wedge_sign); +#if CONFIG_ARBITRARY_WEDGE + } +#endif // CONFIG_ARBITRARY_WEDGE } else { assert(mbmi->interinter_comp.type == COMPOUND_DIFFWTD); aom_write_literal(w, mbmi->interinter_comp.mask_type,
diff --git a/av1/encoder/compound_type.c b/av1/encoder/compound_type.c index 76eb4e1..470d377 100644 --- a/av1/encoder/compound_type.c +++ b/av1/encoder/compound_type.c
@@ -306,11 +306,10 @@ #if CONFIG_ARBITRARY_WEDGE // Create an arbitrary binary mask using spacial segmentation of this block. // This is used for larger blocks, where we don't have pre-defined wedges. -static int64_t pick_arbitrary_wedge(const AV1_COMP *const cpi, - MACROBLOCK *const x, const BLOCK_SIZE bsize, - const int16_t *const residual1, - const int16_t *const diff10, - uint8_t *seg_mask, uint64_t *best_sse) { +static int64_t pick_arbitrary_wedge( + const AV1_COMP *const cpi, MACROBLOCK *const x, const BLOCK_SIZE bsize, + const int16_t *const residual1, const int16_t *const diff10, + uint8_t *seg_mask, uint8_t *seg_mask_smoothed, uint64_t *best_sse) { const int bw = block_size_wide[bsize]; const int bh = block_size_high[bsize]; const int N = bw * bh; @@ -327,38 +326,42 @@ params.k = 5000; // TODO(urvang): Temporary hack to get 2 components. int num_components = -1; av1_get_segments(x->plane[0].src.buf, bw, bh, x->plane[0].src.stride, ¶ms, - seg_mask, &num_components); + seg_mask_smoothed, &num_components); if (num_components >= 2) { // TODO(urvang): Convert more than 2 components to 2 components. if (num_components == 2) { + // Save the raw mask to 'seg_mask', as that is the one to be used for + // signaling in the bitstream. + memcpy(seg_mask, seg_mask_smoothed, N * sizeof(*seg_mask)); + + // TODO(urvang): Refactor part below. + // Convert binary mask with values {0, 1} to one with values {0, 64}. - av1_extend_binary_mask_range(seg_mask, bw, bh); + av1_extend_binary_mask_range(seg_mask_smoothed, bw, bh); #if DUMP_SEGMENT_MASKS - av1_dump_raw_y_plane(seg_mask, bw, bh, bw, "/tmp/2.binary_mask.yuv"); + av1_dump_raw_y_plane(seg_mask_smoothed, bw, bh, bw, "/tmp/2.binary_mask.yuv"); #endif // DUMP_SEGMENT_MASKS // Get a smooth mask from the binary mask. - av1_apply_box_blur(seg_mask, bw, bh); + av1_apply_box_blur(seg_mask_smoothed, bw, bh); #if DUMP_SEGMENT_MASKS - av1_dump_raw_y_plane(seg_mask, bw, bh, bw, "/tmp/3.smooth_mask.yuv"); + av1_dump_raw_y_plane(seg_mask_smoothed, bw, bh, bw, "/tmp/3.smooth_mask.yuv"); #endif // DUMP_SEGMENT_MASKS // Get RDCost *best_sse = - av1_wedge_sse_from_residuals(residual1, diff10, seg_mask, N); + av1_wedge_sse_from_residuals(residual1, diff10, seg_mask_smoothed, N); const MACROBLOCKD *const xd = &x->e_mbd; const int hbd = is_cur_buf_hbd(xd); const int bd_round = hbd ? (xd->bd - 8) * 2 : 0; - *best_sse = ROUND_POWER_OF_TWO(*best_sse, bd_round); + *best_sse = ROUND_POWER_OF_TWO(*best_sse, bd_round); int rate; int64_t dist; - model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, *best_sse, N, - &rate, &dist); - // TODO(urvang): Add cost of signaling wedge itself to 'rate'. + model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, *best_sse, + N, &rate, &dist); const int64_t rd = RDCOST(x->rdmult, rate, dist); - // TODO(urvang): Subtrack rate of signaling wedge (like pick_wedge)? return rd; } return INT64_MAX; @@ -385,10 +388,11 @@ #if CONFIG_ARBITRARY_WEDGE if (av1_wedge_params_lookup[bsize].codebook == NULL) { - // TODO(urvang): Reuse seg_mask or have a different wedge_mask array? mbmi->interinter_comp.seg_mask = xd->seg_mask; - rd = pick_arbitrary_wedge(cpi, x, bsize, residual1, diff10, - mbmi->interinter_comp.seg_mask, best_sse); + mbmi->interinter_comp.seg_mask_smoothed = xd->seg_mask_smoothed; + rd = pick_arbitrary_wedge( + cpi, x, bsize, residual1, diff10, mbmi->interinter_comp.seg_mask, + mbmi->interinter_comp.seg_mask_smoothed, best_sse); mbmi->interinter_comp.wedge_sign = 0; mbmi->interinter_comp.wedge_index = -1; return rd; @@ -1135,15 +1139,15 @@ // 3*n, as storing each length takes 2 bytes. uint8_t rle_buf[3 * MAX_SB_SQUARE]; int rle_size = 0; + int rle_rate = INT_MAX; av1_run_length_encode(mbmi->interinter_comp.seg_mask, bw, bh, bw, rle_buf, - &rle_size); - return rle_size; + &rle_size, &rle_rate); + return rle_rate; } #endif // CONFIG_ARBITRARY_WEDGE return av1_cost_literal(1) + - mode_costs - ->wedge_idx_cost[mbmi->sb_type] - [mbmi->interinter_comp.wedge_index]; + mode_costs->wedge_idx_cost[mbmi->sb_type] + [mbmi->interinter_comp.wedge_index]; } else { assert(compound_type == COMPOUND_DIFFWTD); return av1_cost_literal(1);
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c index dedc8b3..7d7f5e6 100644 --- a/av1/encoder/motion_search_facade.c +++ b/av1/encoder/motion_search_facade.c
@@ -691,6 +691,9 @@ int_mv tmp_mv[2]; int tmp_rate_mv = 0; mbmi->interinter_comp.seg_mask = xd->seg_mask; +#if CONFIG_ARBITRARY_WEDGE + mbmi->interinter_comp.seg_mask_smoothed = xd->seg_mask_smoothed; +#endif // CONFIG_ARBITRARY_WEDGE const INTERINTER_COMPOUND_DATA *compound_data = &mbmi->interinter_comp; if (this_mode == NEW_NEWMV) {
diff --git a/av1/encoder/reconinter_enc.c b/av1/encoder/reconinter_enc.c index dbe7e4b..6d75d87 100644 --- a/av1/encoder/reconinter_enc.c +++ b/av1/encoder/reconinter_enc.c
@@ -368,6 +368,9 @@ struct buf_2d *const dst_buf = &pd->dst; uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; mbmi->interinter_comp.seg_mask = xd->seg_mask; +#if CONFIG_ARBITRARY_WEDGE + mbmi->interinter_comp.seg_mask_smoothed = xd->seg_mask_smoothed; +#endif // CONFIG_ARBITRARY_WEDGE const INTERINTER_COMPOUND_DATA *comp_data = &mbmi->interinter_comp; const int is_hbd = is_cur_buf_hbd(xd);
diff --git a/av1/encoder/segment_patch.cc b/av1/encoder/segment_patch.cc index 7080d50..d71750d 100644 --- a/av1/encoder/segment_patch.cc +++ b/av1/encoder/segment_patch.cc
@@ -1,8 +1,12 @@ #include <assert.h> #include <unordered_map> +#if DUMP_SEGMENT_MASKS +#include <fstream> +#endif // DUMP_SEGMENT_MASKS #include "aom_mem/aom_mem.h" #include "av1/common/enums.h" +#include "av1/encoder/cost.h" #include "av1/encoder/segment_patch.h" #include "third_party/segment/segment-image.h" @@ -164,19 +168,29 @@ } void av1_run_length_encode(const uint8_t *const img, int width, int height, - int stride, uint8_t *out, int *out_size) { + int stride, uint8_t *out, int *out_size, + int *out_rate) { int out_idx = 0; uint8_t prev_val = img[0]; int run_len = 1; + int num_bits = 0; + const int bits_per_symbol = 1; // Assuming 2 segments exactly. + // Assuming 64x64 or 128x128 block sizes only. + assert((width == 64 && height == 64) || (width == 128 && height == 128)); + const int bits_per_run_len = + (width == 128 && height == 128) ? 14 : 12; // log2(width * height) for (int r = 0; r < height; ++r) { for (int c = (r == 0) ? 1 : 0; c < width; ++c) { const uint8_t curr_val = img[r * stride + c]; + assert(curr_val < (1 << bits_per_symbol)); if (curr_val == prev_val) { ++run_len; } else { out[out_idx++] = prev_val; + num_bits += bits_per_symbol; write_run_length(run_len, out, &out_idx); + num_bits += bits_per_run_len; run_len = 1; prev_val = curr_val; } @@ -185,6 +199,7 @@ out[out_idx++] = prev_val; write_run_length(run_len, out, &out_idx); *out_size = out_idx; + *out_rate = av1_cost_literal(num_bits); } #if DUMP_SEGMENT_MASKS
diff --git a/av1/encoder/segment_patch.h b/av1/encoder/segment_patch.h index 41fde1a..567ee4a 100644 --- a/av1/encoder/segment_patch.h +++ b/av1/encoder/segment_patch.h
@@ -56,9 +56,12 @@ // - stride: image stride // Outputs: // - out: run-length encoded image. Assumed to be already allocated. -// - out_size: length of 'out' +// - out_size: length of 'out' in bytes. +// - out_rate: length of transmitting 'out' in bits, but scaled by 512. +// TODO(urvang): Need to write in bits directly and consolidate last two. void av1_run_length_encode(const uint8_t *const img, int width, int height, - int stride, uint8_t *out, int *out_size); + int stride, uint8_t *out, int *out_size, + int *out_rate); #define DUMP_SEGMENT_MASKS 0