[CFL] Calculate SSE for distinct alpha before RDO Separate the codes into a table of distinct values and an index into that table. Pull the SSE calculation of the RDO loop and avoid repeating for the same alpha values. Change-Id: I8c4bd7eab6f8000e6aca9687d9190abc3e270c37 Signed-off-by: David Michael Barr <b@rr-dav.id.au>
diff --git a/av1/common/cfl.h b/av1/common/cfl.h index 6502080..c6b72a4 100644 --- a/av1/common/cfl.h +++ b/av1/common/cfl.h
@@ -46,12 +46,15 @@ int num_tx_blk[CFL_PRED_PLANES]; } CFL_CTX; -static const double cfl_alpha_codes[CFL_ALPHABET_SIZE][CFL_PRED_PLANES] = { +static const double cfl_alpha_mags[CFL_MAGS_SIZE] = { + 0., 0.125, -0.125, 0.25, -0.25, 0.5, -0.5 +}; + +static const int cfl_alpha_codes[CFL_ALPHABET_SIZE][CFL_PRED_PLANES] = { // barrbrain's simple 1D quant ordered by subset 3 likelihood - { 0., 0. }, { 0.125, 0.125 }, { 0.25, 0. }, { 0.25, 0.125 }, - { 0.125, 0. }, { 0.25, 0.25 }, { 0., 0.125 }, { 0.5, 0.5 }, - { 0.5, 0.25 }, { 0.125, 0.25 }, { 0.5, 0. }, { 0.25, 0.5 }, - { 0., 0.25 }, { 0.5, 0.125 }, { 0.125, 0.5 }, { 0., 0.5 } + { 0, 0 }, { 1, 1 }, { 3, 0 }, { 3, 1 }, { 1, 0 }, { 3, 3 }, + { 0, 1 }, { 5, 5 }, { 5, 3 }, { 1, 3 }, { 5, 3 }, { 3, 5 }, + { 0, 3 }, { 5, 1 }, { 1, 5 }, { 0, 5 } }; void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm, int subsampling_x, @@ -61,11 +64,13 @@ static INLINE double cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign, CFL_PRED_TYPE pred_type) { - const double abs_alpha = cfl_alpha_codes[alpha_idx][pred_type]; + const int mag_idx = cfl_alpha_codes[alpha_idx][pred_type]; + const double abs_alpha = cfl_alpha_mags[mag_idx]; if (alpha_sign == CFL_SIGN_POS) { return abs_alpha; } else { assert(abs_alpha != 0.0); + assert(cfl_alpha_mags[mag_idx + 1] == -abs_alpha); return -abs_alpha; } }
diff --git a/av1/common/enums.h b/av1/common/enums.h index 93894bf..8588f0d 100644 --- a/av1/common/enums.h +++ b/av1/common/enums.h
@@ -287,6 +287,7 @@ #define CB_ALPHABET_SIZE 4 #define CR_ALPHABET_SIZE 4 #define CFL_ALPHABET_SIZE (CB_ALPHABET_SIZE * CR_ALPHABET_SIZE) +#define CFL_MAGS_SIZE 7 typedef enum { CFL_PRED_U = 0, CFL_PRED_V = 1, CFL_PRED_PLANES } CFL_PRED_TYPE; typedef enum { CFL_SIGN_NEG = 0, CFL_SIGN_POS = 1, CFL_SIGNS } CFL_SIGN_TYPE;
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c index 77fd713..d8cc525 100644 --- a/av1/decoder/decodemv.c +++ b/av1/decoder/decodemv.c
@@ -220,10 +220,10 @@ // Signs are only coded for nonzero values // sign == 0 implies negative alpha // sign == 1 implies positive alpha - signs_out[CFL_PRED_U] = (cfl_alpha_codes[ind][CFL_PRED_U] != 0.0) + signs_out[CFL_PRED_U] = cfl_alpha_codes[ind][CFL_PRED_U] ? aom_read_bit(r, "cfl:sign") : CFL_SIGN_POS; - signs_out[CFL_PRED_V] = (cfl_alpha_codes[ind][CFL_PRED_V] != 0.0) + signs_out[CFL_PRED_V] = cfl_alpha_codes[ind][CFL_PRED_V] ? aom_read_bit(r, "cfl:sign") : CFL_SIGN_POS;
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index 3aa9822..af9d5d2 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c
@@ -1899,8 +1899,23 @@ const double y_avg = cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, block_width, block_height); - int dist_u, dist_v; - int dist_u_neg, dist_v_neg; + int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE]; + sse[CFL_PRED_U][0] = + cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, + block_width, block_height, dc_pred_u, 0, NULL); + sse[CFL_PRED_V][0] = + cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, + block_width, block_height, dc_pred_v, 0, NULL); + for (int m = 1; m < CFL_MAGS_SIZE; m += 2) { + assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]); + sse[CFL_PRED_U][m] = cfl_alpha_dist( + tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, block_width, + block_height, dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]); + sse[CFL_PRED_V][m] = cfl_alpha_dist( + tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, block_width, + block_height, dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]); + } + int dist; int64_t cost; int64_t best_cost; @@ -1911,26 +1926,17 @@ signs_out[CFL_PRED_U] = CFL_SIGN_POS; signs_out[CFL_PRED_V] = CFL_SIGN_POS; - dist = cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, - block_width, block_height, dc_pred_u, 0, NULL) + - cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, - block_width, block_height, dc_pred_v, 0, NULL); + dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0]; dist *= 16; best_cost = RDCOST(x->rdmult, x->rddiv, cfl->costs[0], dist); for (int c = 1; c < CFL_ALPHABET_SIZE; c++) { - dist_u = cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, - block_width, block_height, dc_pred_u, - cfl_alpha_codes[c][CFL_PRED_U], &dist_u_neg); - dist_v = cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, - block_width, block_height, dc_pred_v, - cfl_alpha_codes[c][CFL_PRED_V], &dist_v_neg); - for (int sign_u = cfl_alpha_codes[c][CFL_PRED_U] == 0.0; sign_u < CFL_SIGNS; - sign_u++) { - for (int sign_v = cfl_alpha_codes[c][CFL_PRED_V] == 0.0; - sign_v < CFL_SIGNS; sign_v++) { - dist = (sign_u == CFL_SIGN_POS ? dist_u : dist_u_neg) + - (sign_v == CFL_SIGN_POS ? dist_v : dist_v_neg); + const int idx_u = cfl_alpha_codes[c][CFL_PRED_U]; + const int idx_v = cfl_alpha_codes[c][CFL_PRED_V]; + for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) { + for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) { + dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] + + sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)]; dist *= 16; cost = RDCOST(x->rdmult, x->rddiv, cfl->costs[c], dist); if (cost < best_cost) { @@ -1955,8 +1961,8 @@ cfl->costs[0] = av1_cost_zero(get_prob(prob_num, prob_den)); for (int c = 1; c < CFL_ALPHABET_SIZE; c++) { - int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0.0) + - (cfl_alpha_codes[c][CFL_PRED_V] != 0.0); + int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) + + (cfl_alpha_codes[c][CFL_PRED_V] != 0); prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) - AOM_ICDF(ec_ctx->cfl_alpha_cdf[c - 1]); cfl->costs[c] = av1_cost_zero(get_prob(prob_num, prob_den)) +