[CFL] Fewer bits for fixed point
Since alpha is Q3, we reduce y_average from Q10 to Q3. As such, the
prediction is reduced from Q13 to Q6. Chroma dc_pred is reduced from Q7
to Q6 in order to match with the prediction.
Results on Subset1 (compared to 209de2e5b with CfL enabled)
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
0.0010 | 0.0176 | -0.0538 | -0.0043 | 0.0027 | -0.0097 | -0.0018
Change-Id: Ib7dd3968a764e0380ddc0ad2333ebacf1e9699cd
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index b9e356b..1ba6dda 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -179,8 +179,14 @@
// TODO(ltrudeau) Because of max_block_wide and max_block_high, num_pel will
// not be a power of two. So these divisions will have to use a lookup table.
- cfl->dc_pred_q7[CFL_PRED_U] = (sum_u << 7) / num_pel;
- cfl->dc_pred_q7[CFL_PRED_V] = (sum_v << 7) / num_pel;
+ cfl->dc_pred_q6[CFL_PRED_U] = ((sum_u << 6) + (num_pel >> 1)) / num_pel;
+ cfl->dc_pred_q6[CFL_PRED_V] = ((sum_v << 6) + (num_pel >> 1)) / num_pel;
+
+ // Loss is never more than 1/2 (in Q6)
+ assert(fabs(cfl->dc_pred_q6[CFL_PRED_U] - (sum_u / ((double)num_pel) * 64)) <=
+ 0.5);
+ assert(fabs(cfl->dc_pred_q6[CFL_PRED_V] - (sum_v / ((double)num_pel) * 64)) <=
+ 0.5);
}
static void cfl_compute_averages(CFL_CTX *cfl, TX_SIZE tx_size) {
@@ -197,7 +203,7 @@
const uint8_t *y_pix = cfl->y_down_pix;
// TODO(ltrudeau) Convert to uint16 for HBD support
const uint8_t *t_y_pix;
- int *averages_q10 = cfl->y_averages_q10;
+ int *averages_q3 = cfl->y_averages_q3;
cfl_load(cfl, 0, 0, width, height);
@@ -212,11 +218,12 @@
}
t_y_pix += MAX_SB_SIZE;
}
- averages_q10[a++] = (sum << 10) >> num_pel_log2;
+ averages_q3[a++] =
+ ((sum << 3) + (1 << (num_pel_log2 - 1))) >> num_pel_log2;
- // Assert no loss from fixed point
- assert((double)averages_q10[a - 1] ==
- (sum / ((double)(1 << num_pel_log2))) * (1 << 10));
+ // Loss is never more than 1/2 (in Q3)
+ assert(fabs((double)averages_q3[a - 1] -
+ (sum / ((double)(1 << num_pel_log2))) * (1 << 3)) <= 0.5);
}
assert(a % stride == 0);
y_pix += block_row_stride;
@@ -253,7 +260,7 @@
// TODO(ltrudeau) Convert to uint16 to support HBD
const uint8_t *y_pix = cfl->y_down_pix;
- const int dc_pred_bias_q13 = (cfl->dc_pred_q7[plane - 1] << 6) + (1 << 12);
+ const int dc_pred_bias_q6 = cfl->dc_pred_q6[plane - 1] + 32;
const double alpha = cfl_idx_to_alpha(
mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], plane - 1);
// TODO(ltrudeau) Convert alpha to fixed point.
@@ -263,23 +270,23 @@
(row << tx_size_wide_log2[0]) >> tx_size_wide_log2[tx_size];
const int avg_col =
(col << tx_size_high_log2[0]) >> tx_size_high_log2[tx_size];
- const int avg_q10 =
- cfl->y_averages_q10[cfl->y_averages_stride * avg_row + avg_col];
+ const int avg_q3 =
+ cfl->y_averages_q3[cfl->y_averages_stride * avg_row + avg_col];
cfl_load(cfl, row, col, width, height);
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
- const int pred_q13 =
- get_scaled_luma_q13(alpha_q3, y_pix[i], avg_q10) + dc_pred_bias_q13;
+ const int pred_q6 =
+ get_scaled_luma_q6(alpha_q3, y_pix[i], avg_q3) + dc_pred_bias_q6;
// TODO(ltrudeau) Manage HBD.
- if (pred_q13 <= 0) {
+ if (pred_q6 <= 0) {
dst[i] = 0;
- } else if (pred_q13 > (255 << 13)) {
+ } else if (pred_q6 > (255 << 6)) {
dst[i] = 255;
} else {
- dst[i] = (uint8_t)(pred_q13 >> 13);
- assert(dst[i] == (int)(alpha * (y_pix[i] - (avg_q10 / 1024.0)) +
- (cfl->dc_pred_q7[plane - 1] / 128.0) + 0.5));
+ dst[i] = (uint8_t)(pred_q6 >> 6);
+ assert(dst[i] == (int)(alpha * (y_pix[i] - (avg_q3 / 8.0)) +
+ (cfl->dc_pred_q6[plane - 1] / 64.0) + 0.5));
}
}
dst += dst_stride;