[CFL] Alpha signaling

Writes and reads alpha to and from the bitstream.

A special case is needed on the encoder side to handle prediction block
skips. Since whether or not a prediction block is skipped during CfL, a
rollback is required if the block was skipped and the alpha index was
not zero. The advantage of this is that no signaling is required when
the prediction block is skipped as it is assumed tha the alpha index is
zero.

A encode facade is added to the intra prediction facade as CfL requires
special encoder side operations.

Change-Id: Ic3b11d0fdbd51389d862112eb09d8785127a6b06
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 33b6c51..72a488c 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -336,7 +336,7 @@
 #endif  // CONFIG_EXT_INTER
 
 // This structure now relates to 8x8 block regions.
-typedef struct {
+typedef struct MB_MODE_INFO {
   // Common for both INTER and INTRA blocks
   BLOCK_SIZE sb_type;
   PREDICTION_MODE mode;
@@ -437,6 +437,13 @@
   WarpedMotionParams wm_params[2];
 #endif  // CONFIG_WARPED_MOTION
 
+#if CONFIG_CFL
+  // Index of the alpha Cb and alpha Cr combination
+  int cfl_alpha_ind;
+  // Signs of alpha Cb and alpha Cr
+  CFL_SIGN_TYPE cfl_alpha_signs[CFL_PRED_PLANES];
+#endif
+
   BOUNDARY_TYPE boundary_info;
 } MB_MODE_INFO;
 
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index d66a989..bdf9511 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -87,21 +87,29 @@
   xd->cfl->dc_pred[CFL_PRED_V] = (sum_v + (num_pel >> 1)) / num_pel;
 }
 
+double cfl_ind_to_alpha(const MB_MODE_INFO *const mbmi,
+                        CFL_PRED_TYPE pred_type) {
+  double const abs_alpha = cfl_alpha_codes[mbmi->cfl_alpha_ind][pred_type];
+  if (mbmi->cfl_alpha_signs[pred_type] == CFL_SIGN_POS) {
+    return abs_alpha;
+  } else {
+    assert(abs_alpha != 0.0);
+    return -abs_alpha;
+  }
+}
+
 // Predict the current transform block using CfL.
-// it is assumed that dst points at the start of the transform block
 void cfl_predict_block(const CFL_CTX *cfl, uint8_t *dst, int dst_stride,
-                       int row, int col, TX_SIZE tx_size, int dc_pred) {
+                       int row, int col, TX_SIZE tx_size, int dc_pred,
+                       double alpha) {
   const int tx_block_width = tx_size_wide[tx_size];
   const int tx_block_height = tx_size_high[tx_size];
 
-  // TODO(ltrudeau) implement alpha
-  // Place holder for alpha
-  const double alpha = 0;
   const double y_avg = cfl_load(cfl, dst, dst_stride, row, col, tx_size);
 
   for (int j = 0; j < tx_block_height; j++) {
     for (int i = 0; i < tx_block_width; i++) {
-      dst[i] = (uint8_t)(alpha * y_avg + dc_pred + 0.5);
+      dst[i] = (uint8_t)(alpha * (dst[i] - y_avg) + dc_pred + 0.5);
     }
     dst += dst_stride;
   }
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index 371df70..62d10ec 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -22,6 +22,10 @@
 // dependency by importing av1/common/blockd.h
 typedef struct macroblockd MACROBLOCKD;
 
+// Forward declaration of MB_MODE_INFO, in order to avoid creating a cyclic
+// dependency by importing av1/common/blockd.h
+typedef struct MB_MODE_INFO MB_MODE_INFO;
+
 typedef struct {
   // Pixel buffer containing the luma pixels used as prediction for chroma
   uint8_t y_pix[MAX_SB_SQUARE];
@@ -34,15 +38,31 @@
 
   // CfL Performs its own block level DC_PRED for each chromatic plane
   int dc_pred[CFL_PRED_PLANES];
+
+  // Count the number of TX blocks in a predicted block to know when you are at
+  // the last one, so you can check for skips.
+  // TODO(any) Is there a better way to do this?
+  int num_tx_blk[CFL_PRED_PLANES];
 } CFL_CTX;
 
+static const double cfl_alpha_codes[CFL_ALPHABET_SIZE][CFL_PRED_PLANES] = {
+  // barrbrain's simple 1D quant ordered by subset 3 likelihood
+  { 0., 0. },    { 0.125, 0.125 }, { 0.25, 0. },   { 0.25, 0.125 },
+  { 0.125, 0. }, { 0.25, 0.25 },   { 0., 0.125 },  { 0.5, 0.5 },
+  { 0.5, 0.25 }, { 0.125, 0.25 },  { 0.5, 0. },    { 0.25, 0.5 },
+  { 0., 0.25 },  { 0.5, 0.125 },   { 0.125, 0.5 }, { 0., 0.5 }
+};
+
 void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm, int subsampling_x,
               int subsampling_y);
 
 void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
 
+double cfl_ind_to_alpha(const MB_MODE_INFO *mbmi, CFL_PRED_TYPE pred_type);
+
 void cfl_predict_block(const CFL_CTX *cfl, uint8_t *dst, int dst_stride,
-                       int row, int col, TX_SIZE tx_size, int dc_pred);
+                       int row, int col, TX_SIZE tx_size, int dc_pred,
+                       double alpha);
 
 void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
                int col, TX_SIZE tx_size);
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 96c14e9..bc53c12 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -2544,6 +2544,16 @@
     };
 #endif  // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
 
+#if CONFIG_CFL
+static const aom_cdf_prob default_cfl_alpha_cdf[CFL_ALPHABET_SIZE] = {
+  // decreasing likelihood, after 6 iterations of alpha RDO on subset 3
+  AOM_ICDF(23928), AOM_ICDF(26267), AOM_ICDF(27319), AOM_ICDF(28330),
+  AOM_ICDF(29341), AOM_ICDF(30160), AOM_ICDF(30581), AOM_ICDF(30983),
+  AOM_ICDF(31353), AOM_ICDF(31634), AOM_ICDF(31907), AOM_ICDF(32171),
+  AOM_ICDF(32407), AOM_ICDF(32558), AOM_ICDF(32669), AOM_ICDF(32768)
+};
+#endif
+
 // CDF version of 'av1_kf_y_mode_prob'.
 const aom_cdf_prob
     av1_kf_y_mode_cdf[INTRA_MODES][INTRA_MODES][CDF_SIZE(INTRA_MODES)] = {
@@ -3332,6 +3342,9 @@
 #endif  // CONFIG_EC_MULTISYMBOL
 #endif
 #endif  // CONFIG_DELTA_Q
+#if CONFIG_CFL
+  av1_copy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf);
+#endif
 }
 
 #if CONFIG_EC_MULTISYMBOL
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index ff15aaa..7578973 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -303,6 +303,9 @@
   // such as coef_cdfs[], coef_tail_cdfs[], and coef_heaf_cdfs[] can be removed.
   od_adapt_ctx pvq_context;
 #endif  // CONFIG_PVQ
+#if CONFIG_CFL
+  aom_cdf_prob cfl_alpha_cdf[CFL_ALPHABET_SIZE];
+#endif
 } FRAME_CONTEXT;
 
 typedef struct FRAME_COUNTS {
diff --git a/av1/common/enums.h b/av1/common/enums.h
index ee18e3c..13b93cf 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -274,7 +274,13 @@
 typedef enum { PLANE_TYPE_Y = 0, PLANE_TYPE_UV = 1, PLANE_TYPES } PLANE_TYPE;
 
 #if CONFIG_CFL
+// TODO(ltrudeau) this should change based on QP size
+#define CB_ALPHABET_SIZE 4
+#define CR_ALPHABET_SIZE 4
+#define CFL_ALPHABET_SIZE (CB_ALPHABET_SIZE * CR_ALPHABET_SIZE)
+
 typedef enum { CFL_PRED_U = 0, CFL_PRED_V = 1, CFL_PRED_PLANES } CFL_PRED_TYPE;
+typedef enum { CFL_SIGN_NEG = 0, CFL_SIGN_POS = 1, CFL_SIGNS } CFL_SIGN_TYPE;
 #endif
 
 #if CONFIG_PALETTE
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 6e0ff52..f8bfec8 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -2316,8 +2316,10 @@
       // is signaled.
       cfl_dc_pred(xd, get_plane_block_size(block_idx, pd), tx_size);
     }
+
     cfl_predict_block(xd->cfl, dst, pd->dst.stride, blk_row, blk_col, tx_size,
-                      xd->cfl->dc_pred[plane - 1]);
+                      xd->cfl->dc_pred[plane - 1],
+                      cfl_ind_to_alpha(mbmi, plane - 1));
   }
 #endif
 }
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 3adc235..075e52d 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -207,6 +207,37 @@
   return uv_mode;
 }
 
+#if CONFIG_CFL
+static int read_cfl_alphas(
+#if CONFIG_EC_ADAPT
+    MACROBLOCKD *xd,
+#elif CONFIG_EC_MULTISYMBOL
+    AV1_COMMON *cm,
+#endif
+    aom_reader *r, CFL_SIGN_TYPE signs[CFL_PRED_PLANES]) {
+
+#if CONFIG_EC_ADAPT
+  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
+#elif CONFIG_EC_MULTISYMBOL
+  FRAME_CONTEXT *ec_ctx = cm->fc;
+#endif
+
+  const int ind =
+      aom_read_symbol(r, ec_ctx->cfl_alpha_cdf, CFL_ALPHABET_SIZE, "cfl:alpha");
+  // Signs are only coded for nonzero values
+  // sign == 0 implies negative alpha
+  // sign == 1 implies positive alpha
+  signs[CFL_PRED_U] = (cfl_alpha_codes[ind][CFL_PRED_U] != 0.0)
+                          ? aom_read_bit(r, "cfl:sign")
+                          : CFL_SIGN_POS;
+  signs[CFL_PRED_V] = (cfl_alpha_codes[ind][CFL_PRED_V] != 0.0)
+                          ? aom_read_bit(r, "cfl:sign")
+                          : CFL_SIGN_POS;
+
+  return ind;
+}
+#endif
+
 #if CONFIG_EXT_INTER
 static INTERINTRA_MODE read_interintra_mode(AV1_COMMON *cm, MACROBLOCKD *xd,
                                             aom_reader *r, int size_group) {
@@ -1096,6 +1127,25 @@
   mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
 #endif
 
+#if CONFIG_CFL
+  // TODO(ltrudeau) support PALETTE
+  if (mbmi->uv_mode == DC_PRED) {
+    if (mbmi->skip) {
+      mbmi->cfl_alpha_ind = 0;
+      mbmi->cfl_alpha_signs[CFL_PRED_U] = CFL_SIGN_POS;
+      mbmi->cfl_alpha_signs[CFL_PRED_V] = CFL_SIGN_POS;
+    } else {
+      mbmi->cfl_alpha_ind = read_cfl_alphas(
+#if CONFIG_EC_ADAPT
+          xd,
+#elif CONFIG_EC_MULTISYMBOL
+          cm,
+#endif
+          r, mbmi->cfl_alpha_signs);
+    }
+  }
+#endif
+
 #if CONFIG_EXT_INTRA
   read_intra_angle_info(cm, xd, r);
 #endif  // CONFIG_EXT_INTRA
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index a2c85ce..eea8a0b 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1690,6 +1690,21 @@
 #endif
 }
 
+#if CONFIG_CFL
+static void write_cfl_alphas(FRAME_CONTEXT *const frame_ctx, const int ind,
+                             const CFL_SIGN_TYPE signs[CFL_SIGNS],
+                             aom_writer *w) {
+  // Write a symbol representing a combination of alpha Cb and alpha Cr.
+  aom_write_symbol(w, ind, frame_ctx->cfl_alpha_cdf, CFL_ALPHABET_SIZE);
+
+  // Signs are only signaled for nonzero codes.
+  if (cfl_alpha_codes[ind][CFL_PRED_U] != 0)
+    aom_write_bit(w, signs[CFL_PRED_U]);
+  if (cfl_alpha_codes[ind][CFL_PRED_V] != 0)
+    aom_write_bit(w, signs[CFL_PRED_V]);
+}
+#endif
+
 static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
                                 const int mi_col,
 #if CONFIG_SUPERTX
@@ -2172,6 +2187,18 @@
   write_intra_uv_mode(ec_ctx, mbmi->uv_mode, mbmi->mode, w);
 #endif  // CONFIG_CB4X4
 
+#if CONFIG_CFL
+  if (mbmi->uv_mode == DC_PRED) {
+    if (mbmi->skip) {
+      assert(mbmi->cfl_alpha_ind == 0);
+      assert(mbmi->cfl_alpha_signs[CFL_PRED_U] == CFL_SIGN_POS);
+      assert(mbmi->cfl_alpha_signs[CFL_PRED_V] == CFL_SIGN_POS);
+    } else {
+      write_cfl_alphas(ec_ctx, mbmi->cfl_alpha_ind, mbmi->cfl_alpha_signs, w);
+    }
+  }
+#endif
+
 #if CONFIG_EXT_INTRA
   write_intra_angle_info(xd, ec_ctx, w);
 #endif  // CONFIG_EXT_INTRA
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 83bf5bf..37d60e0 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -1435,7 +1435,12 @@
   const int dst_stride = pd->dst.stride;
   uint8_t *dst =
       &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
+#if CONFIG_CFL
+  av1_predict_intra_block_encoder_facade(xd, plane, block, blk_col, blk_row,
+                                         tx_size);
+#else
   av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
+#endif
   av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
 
   const ENTROPY_CONTEXT *a = &args->ta[blk_col];
@@ -1464,12 +1469,70 @@
 // Note : *(args->skip) == mbmi->skip
 #endif
 #if CONFIG_CFL
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   if (plane == AOM_PLANE_Y && x->cfl_store_y) {
     cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
   }
+
+  if (mbmi->uv_mode == DC_PRED) {
+    // TODO(ltrudeau) find a cleaner way to detect last transform block
+    if (plane == AOM_PLANE_U) {
+      xd->cfl->num_tx_blk[CFL_PRED_U] =
+          (blk_row == 0 && blk_col == 0) ? 1
+                                         : xd->cfl->num_tx_blk[CFL_PRED_U] + 1;
+    }
+
+    if (plane == AOM_PLANE_V) {
+      xd->cfl->num_tx_blk[CFL_PRED_V] =
+          (blk_row == 0 && blk_col == 0) ? 1
+                                         : xd->cfl->num_tx_blk[CFL_PRED_V] + 1;
+
+      if (mbmi->skip &&
+          xd->cfl->num_tx_blk[CFL_PRED_U] == xd->cfl->num_tx_blk[CFL_PRED_V]) {
+        assert(plane_bsize != BLOCK_INVALID);
+        const int block_width = block_size_wide[plane_bsize];
+        const int block_height = block_size_high[plane_bsize];
+
+        // if SKIP is chosen at the block level, and ind != 0, we must change
+        // the prediction
+        if (mbmi->cfl_alpha_ind != 0) {
+          const struct macroblockd_plane *const pd_cb = &xd->plane[AOM_PLANE_U];
+          uint8_t *const dst_cb = pd_cb->dst.buf;
+          const int dst_stride_cb = pd_cb->dst.stride;
+          uint8_t *const dst_cr = pd->dst.buf;
+          const int dst_stride_cr = pd->dst.stride;
+          for (int j = 0; j < block_height; j++) {
+            for (int i = 0; i < block_width; i++) {
+              dst_cb[dst_stride_cb * j + i] =
+                  (uint8_t)(xd->cfl->dc_pred[CFL_PRED_U] + 0.5);
+              dst_cr[dst_stride_cr * j + i] =
+                  (uint8_t)(xd->cfl->dc_pred[CFL_PRED_V] + 0.5);
+            }
+          }
+          mbmi->cfl_alpha_ind = 0;
+          mbmi->cfl_alpha_signs[CFL_PRED_U] = CFL_SIGN_POS;
+          mbmi->cfl_alpha_signs[CFL_PRED_V] = CFL_SIGN_POS;
+        }
+      }
+    }
+  }
 #endif
 }
 
+#if CONFIG_CFL
+void av1_predict_intra_block_encoder_facade(MACROBLOCKD *xd, int plane,
+                                            int block_idx, int blk_col,
+                                            int blk_row, TX_SIZE tx_size) {
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  mbmi->cfl_alpha_ind = 0;
+  mbmi->cfl_alpha_signs[CFL_PRED_U] = CFL_SIGN_POS;
+  mbmi->cfl_alpha_signs[CFL_PRED_V] = CFL_SIGN_POS;
+
+  av1_predict_intra_block_facade(xd, plane, block_idx, blk_col, blk_row,
+                                 tx_size);
+}
+#endif
+
 void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
                                   BLOCK_SIZE bsize, int plane,
                                   int enable_optimize_b, const int mi_row,
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index 73fde1d..e0db43e 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h
@@ -85,6 +85,12 @@
                             int *size, int skip_rest, int skip_dir, int bs);
 #endif
 
+#if CONFIG_CFL
+void av1_predict_intra_block_encoder_facade(MACROBLOCKD *xd, int plane,
+                                            int block_idx, int blk_col,
+                                            int blk_row, TX_SIZE tx_size);
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 737d9cd..bce9bc2 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1533,7 +1533,12 @@
   if (args->exit_early) return;
 
   if (!is_inter_block(mbmi)) {
+#if CONFIG_CFL
+    av1_predict_intra_block_encoder_facade(xd, plane, block, blk_col, blk_row,
+                                           tx_size);
+#else
     av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
+#endif
     av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
   }
 
@@ -2343,7 +2348,11 @@
   int block = 0;
   for (row = 0; row < max_blocks_high; row += stepr) {
     for (col = 0; col < max_blocks_wide; col += stepc) {
+#if CONFIG_CFL
+      av1_predict_intra_block_encoder_facade(xd, 0, block, col, row, tx_size);
+#else
       av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size);
+#endif
       block += step;
     }
   }