Extend blk_skip to support chroma planes

Change-Id: Ia1d2d10010c2b6bf0d729b3a32ad072dad1b475a
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 6a92e90..0bc5dea 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -415,6 +415,36 @@
   return depth;
 }
 
+static INLINE void set_blk_skip(MACROBLOCK *x, int plane, int blk_idx,
+                                int skip) {
+  if (skip)
+    x->blk_skip[blk_idx] |= 1UL << plane;
+  else
+    x->blk_skip[blk_idx] &= ~(1UL << plane);
+#ifndef NDEBUG
+  // Set chroma planes to uninitialized states when luma is set to check if
+  // it will be set later
+  if (plane == 0) {
+    x->blk_skip[blk_idx] |= 1UL << (1 + 4);
+    x->blk_skip[blk_idx] |= 1UL << (2 + 4);
+  }
+
+  // Clear the initialization checking bit
+  x->blk_skip[blk_idx] &= ~(1UL << (plane + 4));
+#endif
+}
+
+static INLINE int is_blk_skip(MACROBLOCK *x, int plane, int blk_idx) {
+#ifndef NDEBUG
+  // Check if this is initialized
+  assert(!(x->blk_skip[blk_idx] & (1UL << (plane + 4))));
+
+  // The magic number is 0x77, this is to test if there is garbage data
+  assert((x->blk_skip[blk_idx] & 0x88) == 0);
+#endif
+  return (x->blk_skip[blk_idx] >> plane) & 1;
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 45d5f3d..cb226c5 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -2427,9 +2427,11 @@
 
 #ifndef NDEBUG
   // Nothing should rely on the default value of this array (which is just
-  // leftover from encoding the previous block. Setting it to magic number
+  // leftover from encoding the previous block. Setting it to fixed pattern
   // when debugging.
-  memset(x->blk_skip, 234, sizeof(x->blk_skip));
+  // bit 0, 1, 2 are blk_skip of each plane
+  // bit 4, 5, 6 are initialization checking of each plane
+  memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
 #endif  // NDEBUG
 
   assert(mi_size_wide[bsize] == mi_size_high[bsize]);
@@ -3269,9 +3271,11 @@
 
 #ifndef NDEBUG
   // Nothing should rely on the default value of this array (which is just
-  // leftover from encoding the previous block. Setting it to magic number
+  // leftover from encoding the previous block. Setting it to fixed pattern
   // when debugging.
-  memset(x->blk_skip, 234, sizeof(x->blk_skip));
+  // bit 0, 1, 2 are blk_skip of each plane
+  // bit 4, 5, 6 are initialization checking of each plane
+  memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
 #endif  // NDEBUG
 
   assert(mi_size_wide[bsize] == mi_size_high[bsize]);
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 3aab01a..ad12577 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -222,11 +222,8 @@
 
   a = &args->ta[blk_col];
   l = &args->tl[blk_row];
-  // Assert not magic number (uninitialized).
-  assert(plane != 0 || x->blk_skip[blk_row * bw + blk_col] != 234);
 
-  if ((plane != 0 || x->blk_skip[blk_row * bw + blk_col] == 0) &&
-      !mbmi->skip_mode) {
+  if (!is_blk_skip(x, plane, blk_row * bw + blk_col) && !mbmi->skip_mode) {
     TX_TYPE tx_type = av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col,
                                       tx_size, cm->reduced_tx_set_used);
     if (args->enable_optimize_b) {
@@ -573,9 +570,7 @@
   av1_predict_intra_block_facade(cm, xd, plane, blk_col, blk_row, tx_size);
 
   const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-  // Assert not magic number (uninitialized).
-  assert(plane != 0 || x->blk_skip[blk_row * bw + blk_col] != 234);
-  if (plane == 0 && x->blk_skip[blk_row * bw + blk_col]) {
+  if (plane == 0 && is_blk_skip(x, plane, blk_row * bw + blk_col)) {
     *eob = 0;
     p->txb_entropy_ctx[block] = 0;
   } else {
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index a599000..c2d1553 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3319,11 +3319,14 @@
 #endif  // CONFIG_RD_DEBUG
   av1_set_txb_context(x, plane, block, tx_size, a, l);
 
-  if (plane == 0) {
-    x->blk_skip[blk_row *
-                    (block_size_wide[plane_bsize] >> tx_size_wide_log2[0]) +
-                blk_col] = (x->plane[plane].eobs[block] == 0);
-  }
+  const int blk_idx =
+      blk_row * (block_size_wide[plane_bsize] >> tx_size_wide_log2[0]) +
+      blk_col;
+
+  if (plane == 0)
+    set_blk_skip(x, plane, blk_idx, x->plane[plane].eobs[block] == 0);
+  else
+    set_blk_skip(x, plane, blk_idx, 0);
 
   rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
   rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse);
@@ -3561,10 +3564,13 @@
     }
     if (n == TX_4X4) break;
   }
-  mbmi->tx_size = best_tx_size;
-  memcpy(mbmi->txk_type, best_txk_type,
-         sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
-  memcpy(x->blk_skip, best_blk_skip, sizeof(best_blk_skip[0]) * n4);
+
+  if (rd_stats->rate != INT_MAX) {
+    mbmi->tx_size = best_tx_size;
+    memcpy(mbmi->txk_type, best_txk_type,
+           sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
+    memcpy(x->blk_skip, best_blk_skip, sizeof(best_blk_skip[0]) * n4);
+  }
 
   // Reset the pruning flags.
   av1_zero(x->tx_search_prune);
@@ -4150,12 +4156,14 @@
     }
   }
 
-  mbmi->tx_size = best_tx_size;
-  mbmi->angle_delta[PLANE_TYPE_Y] = best_angle_delta;
-  memcpy(mbmi->txk_type, best_txk_type,
-         sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
-  memcpy(x->blk_skip, best_blk_skip,
-         sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize));
+  if (rd_stats->rate != INT_MAX) {
+    mbmi->tx_size = best_tx_size;
+    mbmi->angle_delta[PLANE_TYPE_Y] = best_angle_delta;
+    memcpy(mbmi->txk_type, best_txk_type,
+           sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
+    memcpy(x->blk_skip, best_blk_skip,
+           sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize));
+  }
   return best_rd;
 }
 
@@ -4724,12 +4732,12 @@
     rd_stats->rate = zero_blk_rate;
     rd_stats->dist = rd_stats->sse;
     rd_stats->skip = 1;
-    x->blk_skip[blk_row * bw + blk_col] = 1;
+    set_blk_skip(x, 0, blk_row * bw + blk_col, 1);
     p->eobs[block] = 0;
     update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
                      DCT_DCT);
   } else {
-    x->blk_skip[blk_row * bw + blk_col] = 0;
+    set_blk_skip(x, 0, blk_row * bw + blk_col, 0);
     rd_stats->skip = 0;
   }
 
@@ -4898,7 +4906,7 @@
     mbmi->tx_size = tx_size_selected;
     update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
                      no_split.tx_type);
-    x->blk_skip[blk_row * bw + blk_col] = rd_stats->skip;
+    set_blk_skip(x, 0, blk_row * bw + blk_col, rd_stats->skip);
   } else {
     *rd_stats = split_rd_stats;
     if (split_rd == INT64_MAX) *is_cost_valid = 0;
@@ -5076,14 +5084,14 @@
       rd_stats->rate = zero_blk_rate;
       rd_stats->dist = rd_stats->sse;
       rd_stats->skip = 1;
-      x->blk_skip[blk_row * mi_width + blk_col] = 1;
+      set_blk_skip(x, 0, blk_row * mi_width + blk_col, 1);
       x->plane[0].eobs[block] = 0;
       x->plane[0].txb_entropy_ctx[block] = 0;
       update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
                        DCT_DCT);
     } else {
       rd_stats->skip = 0;
-      x->blk_skip[blk_row * mi_width + blk_col] = 0;
+      set_blk_skip(x, 0, blk_row * mi_width + blk_col, 0);
     }
     if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
       rd_stats->rate += x->txfm_partition_cost[ctx][0];
@@ -5588,7 +5596,7 @@
   memset(mbmi->txk_type, DCT_DCT, sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN);
   memset(mbmi->inter_tx_size, tx_size, sizeof(mbmi->inter_tx_size));
   mbmi->tx_size = tx_size;
-  memset(x->blk_skip, 1, sizeof(x->blk_skip[0]) * n4);
+  for (int i = 0; i < n4; ++i) set_blk_skip(x, 0, i, 1);
   rd_stats->skip = 1;
   rd_stats->rate = 0;
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
@@ -5731,6 +5739,10 @@
                                 .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
   tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block, plane_bsize,
                 &txb_ctx, rd_stats, ftxs_mode, INT64_MAX, NULL);
+
+  const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
+  const int blk_idx = blk_row * mi_width + blk_col;
+
   av1_set_txb_context(x, plane, block, tx_size, ta, tl);
   if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
            RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
@@ -5739,6 +5751,9 @@
     rd_stats->rate = zero_blk_rate;
     rd_stats->dist = rd_stats->sse;
   }
+
+  // Set chroma blk_skip to 0
+  set_blk_skip(x, plane, blk_idx, 0);
 }
 
 // Return value 0: early termination triggered, no valid rd cost available;
@@ -8386,8 +8401,8 @@
     } else {
       super_block_yrd(cpi, x, rd_stats_y, bsize, rd_thresh);
       memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
-      memset(x->blk_skip, rd_stats_y->skip,
-             sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w);
+      for (int i = 0; i < xd->n4_h * xd->n4_w; ++i)
+        set_blk_skip(x, 0, i, rd_stats_y->skip);
     }
 
     if (rd_stats_y->rate == INT_MAX) {
@@ -9800,8 +9815,8 @@
     } else {
       super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
       memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
-      memset(x->blk_skip, rd_stats.skip,
-             sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w);
+      for (int i = 0; i < xd->n4_h * xd->n4_w; ++i)
+        set_blk_skip(x, 0, i, rd_stats.skip);
     }
     if (num_planes > 1) {
       super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
@@ -10177,8 +10192,8 @@
       } else {
         super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
         memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
-        memset(x->blk_skip, rd_stats_y.skip,
-               sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w);
+        for (int i = 0; i < xd->n4_h * xd->n4_w; ++i)
+          set_blk_skip(x, 0, i, rd_stats_y.skip);
       }
       if (num_planes > 1) {
         inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX, INT64_MAX,