Re-schedule sub8x8 chroma component encoding process

Use the top-left 4x4 luma block's coding information for chroma
component encoding at size 4x4.

Change-Id: I4bcfbc2bf8b71f7fc30094553503468460a56f9b
diff --git a/av1/common/blockd.c b/av1/common/blockd.c
index 46e334d..a0342db 100644
--- a/av1/common/blockd.c
+++ b/av1/common/blockd.c
@@ -130,7 +130,12 @@
   // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
   // transform size varies per plane, look it up in a common way.
   const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
+#if CONFIG_CB4X4
+  const BLOCK_SIZE plane_bsize =
+      AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
+#else
   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+#endif
   const uint8_t txw_unit = tx_size_wide_unit[tx_size];
   const uint8_t txh_unit = tx_size_high_unit[tx_size];
   const int step = txw_unit * txh_unit;
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 436a9f1..2fe92ea 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -871,6 +871,9 @@
 
   uv_txsize = uv_txsize_lookup[mbmi->sb_type][mbmi->tx_size][pd->subsampling_x]
                               [pd->subsampling_y];
+#if CONFIG_CB4X4
+  uv_txsize = AOMMAX(uv_txsize, TX_4X4);
+#endif
   assert(uv_txsize != TX_INVALID);
   return uv_txsize;
 }
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index fbe9400..bab2a29 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -719,6 +719,12 @@
 #endif  // CONFIG_EXT_PARTITION_TYPES
 }
 
+#if CONFIG_CB4X4
+static INLINE int is_chroma_reference(const int mi_row, const int mi_col) {
+  return !((mi_row & 0x01) || (mi_col & 0x01));
+}
+#endif
+
 #if CONFIG_EXT_PARTITION_TYPES
 static INLINE void update_ext_partition_context(MACROBLOCKD *xd, int mi_row,
                                                 int mi_col, BLOCK_SIZE subsize,
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 20df627..f67b419 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5591,7 +5591,8 @@
     int plane;
     mbmi->skip = 1;
     for (plane = 0; plane < MAX_MB_PLANE; ++plane)
-      av1_encode_intra_block_plane((AV1_COMMON *)cm, x, block_size, plane, 1);
+      av1_encode_intra_block_plane((AV1_COMMON *)cm, x, block_size, plane, 1,
+                                   mi_row, mi_col);
     if (!dry_run)
       sum_intra_stats(td->counts, mi, xd->above_mi, xd->left_mi,
                       frame_is_intra_only(cm));
@@ -5650,7 +5651,7 @@
 #if CONFIG_VAR_TX
     mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
 #endif
-    av1_tokenize_sb(cpi, td, t, dry_run, block_size, rate);
+    av1_tokenize_sb(cpi, td, t, dry_run, block_size, rate, mi_row, mi_col);
   } else {
     int ref;
     const int is_compound = has_second_ref(mbmi);
@@ -5706,13 +5707,13 @@
     }
 #endif  // CONFIG_MOTION_VAR
 
-    av1_encode_sb((AV1_COMMON *)cm, x, block_size);
+    av1_encode_sb((AV1_COMMON *)cm, x, block_size, mi_row, mi_col);
 #if CONFIG_VAR_TX
     if (mbmi->skip) mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
     av1_tokenize_sb_vartx(cpi, td, t, dry_run, mi_row, mi_col, block_size,
                           rate);
 #else
-    av1_tokenize_sb(cpi, td, t, dry_run, block_size, rate);
+    av1_tokenize_sb(cpi, td, t, dry_run, block_size, rate, mi_row, mi_col);
 #endif
   }
 
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 7de97d9..a7e49e9 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -874,7 +874,8 @@
                                          encode_block_pass1, &args);
 }
 
-void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize) {
+void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize,
+                   const int mi_row, const int mi_col) {
   MACROBLOCKD *const xd = &x->e_mbd;
   struct optimize_ctx ctx;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
@@ -911,6 +912,14 @@
     arg.ta = ctx.ta[plane];
     arg.tl = ctx.tl[plane];
 
+#if CONFIG_CB4X4
+    if (bsize < BLOCK_8X8 && plane && !is_chroma_reference(mi_row, mi_col))
+      continue;
+#else
+    (void)mi_row;
+    (void)mi_col;
+#endif
+
 #if CONFIG_VAR_TX
     for (idy = 0; idy < mi_height; idy += bh) {
       for (idx = 0; idx < mi_width; idx += bw) {
@@ -1121,7 +1130,8 @@
 
 void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
                                   BLOCK_SIZE bsize, int plane,
-                                  int enable_optimize_b) {
+                                  int enable_optimize_b, const int mi_row,
+                                  const int mi_col) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE] = { 0 };
   ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE] = { 0 };
@@ -1129,6 +1139,15 @@
   struct encode_b_args arg = {
     cm, x, NULL, &xd->mi[0]->mbmi.skip, ta, tl, enable_optimize_b
   };
+
+#if CONFIG_CB4X4
+  if (bsize < BLOCK_8X8 && plane && !is_chroma_reference(mi_row, mi_col))
+    return;
+#else
+  (void)mi_row;
+  (void)mi_col;
+#endif
+
   if (enable_optimize_b) {
     const struct macroblockd_plane *const pd = &xd->plane[plane];
     const TX_SIZE tx_size =
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index 420fab5..f093b3a 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h
@@ -48,7 +48,8 @@
   AV1_XFORM_QUANT_TYPES,
 } AV1_XFORM_QUANT;
 
-void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize);
+void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize,
+                   const int mi_row, const int mi_col);
 #if CONFIG_SUPERTX
 void av1_encode_sb_supertx(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize);
 #endif  // CONFIG_SUPERTX
@@ -67,7 +68,8 @@
 
 void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
                                   BLOCK_SIZE bsize, int plane,
-                                  int enable_optimize_b);
+                                  int enable_optimize_b, const int mi_row,
+                                  const int mi_col);
 
 #if CONFIG_PVQ
 PVQ_SKIP_TYPE av1_pvq_encode_helper(
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index f78b977..8f68d61 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -654,7 +654,7 @@
       xd->mi[0]->mbmi.mode = DC_PRED;
       xd->mi[0]->mbmi.tx_size =
           use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
-      av1_encode_intra_block_plane(cm, x, bsize, 0, 0);
+      av1_encode_intra_block_plane(cm, x, bsize, 0, 0, mb_row * 2, mb_col * 2);
       this_error = aom_get_mb_ss(x->plane[0].src_diff);
 
       // Keep a record of blocks that have almost no intra error residual
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 52a25bf..e6783b7 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -9170,9 +9170,9 @@
 #if CONFIG_FILTER_INTRA
 static void pick_filter_intra_interframe(
     const AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
-    BLOCK_SIZE bsize, int *rate_uv_intra, int *rate_uv_tokenonly,
-    int64_t *dist_uv, int *skip_uv, PREDICTION_MODE *mode_uv,
-    FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv,
+    BLOCK_SIZE bsize, int mi_row, int mi_col, int *rate_uv_intra,
+    int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv,
+    PREDICTION_MODE *mode_uv, FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv,
 #if CONFIG_EXT_INTRA
     int8_t *uv_angle_delta,
 #endif  // CONFIG_EXT_INTRA
@@ -9292,7 +9292,8 @@
                              mbmi->filter_intra_mode_info.filter_intra_mode[1]);
   }
   distortion2 = distortion_y + distortion_uv;
-  av1_encode_intra_block_plane((AV1_COMMON *)cm, x, bsize, 0, 0);
+  av1_encode_intra_block_plane((AV1_COMMON *)cm, x, bsize, 0, 0, mi_row,
+                               mi_col);
 #if CONFIG_AOM_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     x->recon_variance = av1_high_get_sby_perpixel_variance(
@@ -10806,8 +10807,8 @@
       !dc_skipped && best_mode_index >= 0 &&
       best_intra_rd < (best_rd + (best_rd >> 3))) {
     pick_filter_intra_interframe(
-        cpi, x, ctx, bsize, rate_uv_intra, rate_uv_tokenonly, dist_uvs,
-        skip_uvs, mode_uv, filter_intra_mode_info_uv,
+        cpi, x, ctx, bsize, mi_row, mi_col, rate_uv_intra, rate_uv_tokenonly,
+        dist_uvs, skip_uvs, mode_uv, filter_intra_mode_info_uv,
 #if CONFIG_EXT_INTRA
         uv_angle_delta,
 #endif  // CONFIG_EXT_INTRA
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index dea1a1c..d05dff5 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -790,7 +790,8 @@
 #endif  // CONFIG_VAR_TX
 
 void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
-                     RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate) {
+                     RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate,
+                     const int mi_row, const int mi_col) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -823,14 +824,30 @@
 #endif
   } else if (dry_run == DRY_RUN_NORMAL) {
     int plane;
-    for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+    for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_CB4X4
+      if (bsize < BLOCK_8X8 && plane && !is_chroma_reference(mi_row, mi_col))
+        continue;
+#else
+      (void)mi_row;
+      (void)mi_col;
+#endif
       av1_foreach_transformed_block_in_plane(xd, bsize, plane,
                                              set_entropy_context_b, &arg);
+    }
   } else if (dry_run == DRY_RUN_COSTCOEFFS) {
     int plane;
-    for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+    for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_CB4X4
+      if (bsize < BLOCK_8X8 && plane && !is_chroma_reference(mi_row, mi_col))
+        continue;
+#else
+      (void)mi_row;
+      (void)mi_col;
+#endif
       av1_foreach_transformed_block_in_plane(xd, bsize, plane, cost_coeffs_b,
                                              &arg);
+    }
   }
 #else
   if (!dry_run) {
@@ -838,9 +855,17 @@
 
     td->counts->skip[ctx][0] += skip_inc;
 
-    for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+    for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_CB4X4
+      if (bsize < BLOCK_8X8 && plane && !is_chroma_reference(mi_row, mi_col))
+        continue;
+#else
+      (void)mi_row;
+      (void)mi_col;
+#endif
       av1_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_pvq,
                                              &arg);
+    }
   }
 #endif
   if (rate) *rate += arg.this_rate;
diff --git a/av1/encoder/tokenize.h b/av1/encoder/tokenize.h
index 51a98a5..f255a7e 100644
--- a/av1/encoder/tokenize.h
+++ b/av1/encoder/tokenize.h
@@ -82,7 +82,7 @@
 #endif  // CONFIG_PALETTE
 void av1_tokenize_sb(const struct AV1_COMP *cpi, struct ThreadData *td,
                      TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
-                     int *rate);
+                     int *rate, const int mi_row, const int mi_col);
 #if CONFIG_SUPERTX
 void av1_tokenize_sb_supertx(const struct AV1_COMP *cpi, struct ThreadData *td,
                              TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,