[CFL] Alpha signaling

Writes and reads alpha to and from the bitstream.

A special case is needed on the encoder side to handle prediction block
skips. Since whether or not a prediction block is skipped during CfL, a
rollback is required if the block was skipped and the alpha index was
not zero. The advantage of this is that no signaling is required when
the prediction block is skipped as it is assumed tha the alpha index is
zero.

A encode facade is added to the intra prediction facade as CfL requires
special encoder side operations.

Change-Id: Ic3b11d0fdbd51389d862112eb09d8785127a6b06
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index a2c85ce..eea8a0b 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1690,6 +1690,21 @@
 #endif
 }
 
+#if CONFIG_CFL
+static void write_cfl_alphas(FRAME_CONTEXT *const frame_ctx, const int ind,
+                             const CFL_SIGN_TYPE signs[CFL_SIGNS],
+                             aom_writer *w) {
+  // Write a symbol representing a combination of alpha Cb and alpha Cr.
+  aom_write_symbol(w, ind, frame_ctx->cfl_alpha_cdf, CFL_ALPHABET_SIZE);
+
+  // Signs are only signaled for nonzero codes.
+  if (cfl_alpha_codes[ind][CFL_PRED_U] != 0)
+    aom_write_bit(w, signs[CFL_PRED_U]);
+  if (cfl_alpha_codes[ind][CFL_PRED_V] != 0)
+    aom_write_bit(w, signs[CFL_PRED_V]);
+}
+#endif
+
 static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
                                 const int mi_col,
 #if CONFIG_SUPERTX
@@ -2172,6 +2187,18 @@
   write_intra_uv_mode(ec_ctx, mbmi->uv_mode, mbmi->mode, w);
 #endif  // CONFIG_CB4X4
 
+#if CONFIG_CFL
+  if (mbmi->uv_mode == DC_PRED) {
+    if (mbmi->skip) {
+      assert(mbmi->cfl_alpha_ind == 0);
+      assert(mbmi->cfl_alpha_signs[CFL_PRED_U] == CFL_SIGN_POS);
+      assert(mbmi->cfl_alpha_signs[CFL_PRED_V] == CFL_SIGN_POS);
+    } else {
+      write_cfl_alphas(ec_ctx, mbmi->cfl_alpha_ind, mbmi->cfl_alpha_signs, w);
+    }
+  }
+#endif
+
 #if CONFIG_EXT_INTRA
   write_intra_angle_info(xd, ec_ctx, w);
 #endif  // CONFIG_EXT_INTRA
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 83bf5bf..37d60e0 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -1435,7 +1435,12 @@
   const int dst_stride = pd->dst.stride;
   uint8_t *dst =
       &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
+#if CONFIG_CFL
+  av1_predict_intra_block_encoder_facade(xd, plane, block, blk_col, blk_row,
+                                         tx_size);
+#else
   av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
+#endif
   av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
 
   const ENTROPY_CONTEXT *a = &args->ta[blk_col];
@@ -1464,12 +1469,70 @@
 // Note : *(args->skip) == mbmi->skip
 #endif
 #if CONFIG_CFL
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   if (plane == AOM_PLANE_Y && x->cfl_store_y) {
     cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
   }
+
+  if (mbmi->uv_mode == DC_PRED) {
+    // TODO(ltrudeau) find a cleaner way to detect last transform block
+    if (plane == AOM_PLANE_U) {
+      xd->cfl->num_tx_blk[CFL_PRED_U] =
+          (blk_row == 0 && blk_col == 0) ? 1
+                                         : xd->cfl->num_tx_blk[CFL_PRED_U] + 1;
+    }
+
+    if (plane == AOM_PLANE_V) {
+      xd->cfl->num_tx_blk[CFL_PRED_V] =
+          (blk_row == 0 && blk_col == 0) ? 1
+                                         : xd->cfl->num_tx_blk[CFL_PRED_V] + 1;
+
+      if (mbmi->skip &&
+          xd->cfl->num_tx_blk[CFL_PRED_U] == xd->cfl->num_tx_blk[CFL_PRED_V]) {
+        assert(plane_bsize != BLOCK_INVALID);
+        const int block_width = block_size_wide[plane_bsize];
+        const int block_height = block_size_high[plane_bsize];
+
+        // if SKIP is chosen at the block level, and ind != 0, we must change
+        // the prediction
+        if (mbmi->cfl_alpha_ind != 0) {
+          const struct macroblockd_plane *const pd_cb = &xd->plane[AOM_PLANE_U];
+          uint8_t *const dst_cb = pd_cb->dst.buf;
+          const int dst_stride_cb = pd_cb->dst.stride;
+          uint8_t *const dst_cr = pd->dst.buf;
+          const int dst_stride_cr = pd->dst.stride;
+          for (int j = 0; j < block_height; j++) {
+            for (int i = 0; i < block_width; i++) {
+              dst_cb[dst_stride_cb * j + i] =
+                  (uint8_t)(xd->cfl->dc_pred[CFL_PRED_U] + 0.5);
+              dst_cr[dst_stride_cr * j + i] =
+                  (uint8_t)(xd->cfl->dc_pred[CFL_PRED_V] + 0.5);
+            }
+          }
+          mbmi->cfl_alpha_ind = 0;
+          mbmi->cfl_alpha_signs[CFL_PRED_U] = CFL_SIGN_POS;
+          mbmi->cfl_alpha_signs[CFL_PRED_V] = CFL_SIGN_POS;
+        }
+      }
+    }
+  }
 #endif
 }
 
+#if CONFIG_CFL
+void av1_predict_intra_block_encoder_facade(MACROBLOCKD *xd, int plane,
+                                            int block_idx, int blk_col,
+                                            int blk_row, TX_SIZE tx_size) {
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  mbmi->cfl_alpha_ind = 0;
+  mbmi->cfl_alpha_signs[CFL_PRED_U] = CFL_SIGN_POS;
+  mbmi->cfl_alpha_signs[CFL_PRED_V] = CFL_SIGN_POS;
+
+  av1_predict_intra_block_facade(xd, plane, block_idx, blk_col, blk_row,
+                                 tx_size);
+}
+#endif
+
 void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
                                   BLOCK_SIZE bsize, int plane,
                                   int enable_optimize_b, const int mi_row,
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index 73fde1d..e0db43e 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h
@@ -85,6 +85,12 @@
                             int *size, int skip_rest, int skip_dir, int bs);
 #endif
 
+#if CONFIG_CFL
+void av1_predict_intra_block_encoder_facade(MACROBLOCKD *xd, int plane,
+                                            int block_idx, int blk_col,
+                                            int blk_row, TX_SIZE tx_size);
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 737d9cd..bce9bc2 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1533,7 +1533,12 @@
   if (args->exit_early) return;
 
   if (!is_inter_block(mbmi)) {
+#if CONFIG_CFL
+    av1_predict_intra_block_encoder_facade(xd, plane, block, blk_col, blk_row,
+                                           tx_size);
+#else
     av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
+#endif
     av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
   }
 
@@ -2343,7 +2348,11 @@
   int block = 0;
   for (row = 0; row < max_blocks_high; row += stepr) {
     for (col = 0; col < max_blocks_wide; col += stepc) {
+#if CONFIG_CFL
+      av1_predict_intra_block_encoder_facade(xd, 0, block, col, row, tx_size);
+#else
       av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size);
+#endif
       block += step;
     }
   }