[CFL] Fixed point implementation for tx average
This change does not impact the bitstream as no loss is incured by using
a fixed point value for the transform size average.
For low bit depth, the transform size average is stored using Q8.10
fixed point format. Worst case, smallest fraction is 1/1024.
Results on Subset1 (Compared to 366b74 with CfL)
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000
Change-Id: Ia5b046b92a0e4c40e413b16af3394bdc0a8c8cd9
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index 154df73..5f5e397 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -190,12 +190,14 @@
const int tx_width = tx_size_wide[tx_size];
const int stride = width >> tx_size_wide_log2[tx_size];
const int block_row_stride = MAX_SB_SIZE << tx_size_high_log2[tx_size];
- const double num_pel = tx_width * tx_height;
+ const int num_pel_log2 =
+ (tx_size_high_log2[tx_size] + tx_size_wide_log2[tx_size]);
+
// TODO(ltrudeau) Convert to uint16 for HBD support
const uint8_t *y_pix = cfl->y_down_pix;
// TODO(ltrudeau) Convert to uint16 for HBD support
const uint8_t *t_y_pix;
- double *averages = cfl->y_averages;
+ int *averages_q10 = cfl->y_averages_q10;
cfl_load(cfl, 0, 0, width, height);
@@ -210,7 +212,11 @@
}
t_y_pix += MAX_SB_SIZE;
}
- averages[a++] = sum / num_pel;
+ averages_q10[a++] = (sum << 10) >> num_pel_log2;
+
+ // Assert no loss from fixed point
+ assert((double)averages_q10[a - 1] ==
+ (sum / ((double)(1 << num_pel_log2))) * (1 << 10));
}
assert(a % stride == 0);
y_pix += block_row_stride;
@@ -256,7 +262,7 @@
const int avg_col =
(col << tx_size_high_log2[0]) >> tx_size_high_log2[tx_size];
const double avg =
- cfl->y_averages[cfl->y_averages_stride * avg_row + avg_col];
+ cfl->y_averages_q10[cfl->y_averages_stride * avg_row + avg_col] / 1024.0;
cfl_load(cfl, row, col, width, height);
for (int j = 0; j < height; j++) {
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index 6fbbc14..66e5359 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -43,7 +43,9 @@
// Transform level averages of the luma reconstructed values over the entire
// prediction unit
- double y_averages[MAX_NUM_TXB];
+ // Fixed point y_averages is Q12.10:
+ // * Worst case division is 1/1024
+ int y_averages_q10[MAX_NUM_TXB];
int y_averages_stride;
int are_parameters_computed;
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 842c78c..27ab853 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -1429,7 +1429,7 @@
#if CONFIG_CFL
static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
- const double y_averages[MAX_NUM_TXB],
+ const int y_averages_q10[MAX_NUM_TXB],
const uint8_t *src, int src_stride, int width,
int height, TX_SIZE tx_size, double dc_pred,
double alpha, int *dist_neg_out) {
@@ -1464,12 +1464,13 @@
const int h = b_j + tx_height;
for (int b_i = 0; b_i < width; b_i += tx_width) {
const int w = b_i + tx_width;
- const double tx_avg = y_averages[a++];
+ // TODO(ltrudeau) Remove div when DC_PRED is also fixed point
+ const double tx_avg_q10 = y_averages_q10[a++] / 1024.0;
t_y_pix = y_pix;
t_src = src;
for (int t_j = b_j; t_j < h; t_j++) {
for (int t_i = b_i; t_i < w; t_i++) {
- const double scaled_luma = alpha * (t_y_pix[t_i] - tx_avg);
+ const double scaled_luma = alpha * (t_y_pix[t_i] - tx_avg_q10);
const int uv = t_src[t_i];
// TODO(ltrudeau) add support for HBD.
@@ -1529,7 +1530,7 @@
const int height = cfl->uv_height;
const double dc_pred_u = cfl->dc_pred[CFL_PRED_U];
const double dc_pred_v = cfl->dc_pred[CFL_PRED_V];
- const double *y_averages = cfl->y_averages;
+ const int *y_averages = cfl->y_averages_q10;
const uint8_t *y_pix = cfl->y_down_pix;
CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs;