Palette: enable all partitions no larger than 64x64

Enable palette mode for
4x4, 4x8, 8x4, 4x16, 16x4, 8x32, 32x8, 16x64, 64x16

0.8% gain on screen_content keyframe coding.

Change-Id: Ic3c089b74171ace9082a0d3ad9e27c8a27553789
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index dfcfad0..262905a 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -1354,8 +1354,8 @@
 
 static INLINE int av1_allow_palette(int allow_screen_content_tools,
                                     BLOCK_SIZE sb_type) {
-  return allow_screen_content_tools && sb_type >= BLOCK_8X8 &&
-         sb_type <= BLOCK_64X64;
+  return allow_screen_content_tools && block_size_wide[sb_type] <= 64 &&
+         block_size_high[sb_type] <= 64;
 }
 
 // Returns sub-sampled dimensions of the given block.
@@ -1381,10 +1381,21 @@
   assert(IMPLIES(plane == PLANE_TYPE_Y, pd->subsampling_y == 0));
   assert(block_width >= block_cols);
   assert(block_height >= block_rows);
-  if (width) *width = block_width >> pd->subsampling_x;
-  if (height) *height = block_height >> pd->subsampling_y;
-  if (rows_within_bounds) *rows_within_bounds = block_rows >> pd->subsampling_y;
-  if (cols_within_bounds) *cols_within_bounds = block_cols >> pd->subsampling_x;
+  const int plane_block_width = block_width >> pd->subsampling_x;
+  const int plane_block_height = block_height >> pd->subsampling_y;
+  // Special handling for chroma sub8x8.
+  const int is_chroma_sub8_x = plane > 0 && plane_block_width < 4;
+  const int is_chroma_sub8_y = plane > 0 && plane_block_height < 4;
+  if (width) *width = plane_block_width + 2 * is_chroma_sub8_x;
+  if (height) *height = plane_block_height + 2 * is_chroma_sub8_y;
+  if (rows_within_bounds) {
+    *rows_within_bounds =
+        (block_rows >> pd->subsampling_y) + 2 * is_chroma_sub8_y;
+  }
+  if (cols_within_bounds) {
+    *cols_within_bounds =
+        (block_cols >> pd->subsampling_x) + 2 * is_chroma_sub8_x;
+  }
 }
 
 /* clang-format off */
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index e63bce1..e3a6226 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -1141,6 +1141,8 @@
 const aom_cdf_prob
     default_palette_y_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)] = {
       { AOM_CDF7(12288, 19408, 24627, 26662, 28499, 30667) },
+      { AOM_CDF7(12288, 19408, 24627, 26662, 28499, 30667) },
+      { AOM_CDF7(12288, 19408, 24627, 26662, 28499, 30667) },
       { AOM_CDF7(2815, 4570, 9416, 10875, 13782, 19863) },
       { AOM_CDF7(12032, 14948, 22187, 23138, 24756, 27635) },
       { AOM_CDF7(14847, 20167, 25433, 26751, 28278, 30119) },
@@ -1152,6 +1154,8 @@
 const aom_cdf_prob
     default_palette_uv_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)] = {
       { AOM_CDF7(20480, 29888, 32453, 32715, 32751, 32766) },
+      { AOM_CDF7(20480, 29888, 32453, 32715, 32751, 32766) },
+      { AOM_CDF7(20480, 29888, 32453, 32715, 32751, 32766) },
       { AOM_CDF7(11135, 23641, 31056, 31998, 32496, 32668) },
       { AOM_CDF7(9984, 21999, 29192, 30645, 31640, 32402) },
       { AOM_CDF7(7552, 16614, 24880, 27283, 29254, 31203) },
@@ -1187,6 +1191,12 @@
                                                { { AOM_CDF2(128 * 240) },
                                                  { AOM_CDF2(128 * 180) },
                                                  { AOM_CDF2(128 * 100) } },
+                                               { { AOM_CDF2(128 * 240) },
+                                                 { AOM_CDF2(128 * 180) },
+                                                 { AOM_CDF2(128 * 100) } },
+                                               { { AOM_CDF2(128 * 240) },
+                                                 { AOM_CDF2(128 * 180) },
+                                                 { AOM_CDF2(128 * 100) } },
                                              };
 
 const aom_cdf_prob
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index fb52ac3..e9b2cb8 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -49,6 +49,14 @@
 // 1 if this block uses palette for Y plane (i.e. Y palette size > 0).
 #define PALETTE_UV_MODE_CONTEXTS 2
 
+// Map the number of pixels in a block size to a context
+//   16(BLOCK_4X4)                          -> 0
+//   32(BLOCK_4X8, BLOCK_8X4)               -> 1
+//   64(BLOCK_8X8, BLOCK_4x16, BLOCK_16X4)  -> 2
+//   ...
+// 4096(BLOCK_64X64)                        -> 8
+#define PALATTE_BSIZE_CTXS 9
+
 #if CONFIG_KF_CTX
 #define KF_MODE_CONTEXTS 5
 #endif
diff --git a/av1/common/enums.h b/av1/common/enums.h
index ef85f3f..d8bf048 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -69,20 +69,11 @@
 #define MI_SIZE_128X128 (128 >> MI_SIZE_LOG2)
 #endif
 
-#define MIN_PALETTE_BSIZE BLOCK_8X8
 #define MAX_PALETTE_SQUARE (64 * 64)
-
 // Maximum number of colors in a palette.
 #define PALETTE_MAX_SIZE 8
 // Minimum number of colors in a palette.
 #define PALETTE_MIN_SIZE 2
-// Map the number of pixels in a block size to a context
-//   64(BLOCK_8X8)              -> 0
-//  128(BLOCK_16X8, BLOCK_16X8) -> 1
-//  256(BLOCK_16X16)            -> 2
-//  ...
-// 4096(BLOCK_64X64)            -> 6
-#define PALATTE_BSIZE_CTXS 7
 
 #if CONFIG_FRAME_MARKER
 #define FRAME_OFFSET_BITS 5
diff --git a/av1/common/pred_common.h b/av1/common/pred_common.h
index 010f1d4..ecc3111 100644
--- a/av1/common/pred_common.h
+++ b/av1/common/pred_common.h
@@ -185,7 +185,7 @@
                           uint16_t *cache);
 
 static INLINE int av1_get_palette_bsize_ctx(BLOCK_SIZE bsize) {
-  return num_pels_log2_lookup[bsize] - num_pels_log2_lookup[MIN_PALETTE_BSIZE];
+  return num_pels_log2_lookup[bsize] - num_pels_log2_lookup[BLOCK_4X4];
 }
 
 int av1_get_intra_inter_context(const MACROBLOCKD *xd);
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index f9bf0c3..d91da5b 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -814,7 +814,7 @@
 }
 
 static void read_palette_mode_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
-                                   aom_reader *r) {
+                                   int mi_row, int mi_col, aom_reader *r) {
   MODE_INFO *const mi = xd->mi[0];
   MB_MODE_INFO *const mbmi = &mi->mbmi;
   const MODE_INFO *const above_mi = xd->above_mi;
@@ -845,7 +845,9 @@
       read_palette_colors_y(xd, cm->bit_depth, pmi, r);
     }
   }
-  if (mbmi->uv_mode == UV_DC_PRED) {
+  if (mbmi->uv_mode == UV_DC_PRED &&
+      is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
+                          xd->plane[1].subsampling_y)) {
     const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
     const int modev = aom_read_symbol(
         r, xd->tile_ctx->palette_uv_mode_cdf[palette_uv_mode_ctx], 2, ACCT_STR);
@@ -1204,7 +1206,7 @@
   mbmi->palette_mode_info.palette_size[0] = 0;
   mbmi->palette_mode_info.palette_size[1] = 0;
   if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
-    read_palette_mode_info(cm, xd, r);
+    read_palette_mode_info(cm, xd, mi_row, mi_col, r);
 #if CONFIG_FILTER_INTRA
   mbmi->filter_intra_mode_info.use_filter_intra = 0;
   read_filter_intra_mode_info(xd, r);
@@ -1568,7 +1570,7 @@
   mbmi->palette_mode_info.palette_size[0] = 0;
   mbmi->palette_mode_info.palette_size[1] = 0;
   if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
-    read_palette_mode_info(cm, xd, r);
+    read_palette_mode_info(cm, xd, mi_row, mi_col, r);
 #if CONFIG_FILTER_INTRA
   mbmi->filter_intra_mode_info.use_filter_intra = 0;
   read_filter_intra_mode_info(xd, r);
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index 54ebdeb..d4334ab 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -280,7 +280,6 @@
                                aom_reader *r) {
   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   assert(plane == 0 || plane == 1);
-  assert(mbmi->sb_type >= BLOCK_8X8);
   Av1ColorMapParam color_map_params;
   memset(&color_map_params, 0, sizeof(color_map_params));
   get_palette_params(xd, plane, mbmi->sb_type, &color_map_params);
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index e58c338..ede1c9c 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1012,7 +1012,8 @@
 }
 
 static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                                    const MODE_INFO *const mi, aom_writer *w) {
+                                    const MODE_INFO *const mi, int mi_row,
+                                    int mi_col, aom_writer *w) {
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
   const MODE_INFO *const above_mi = xd->above_mi;
   const MODE_INFO *const left_mi = xd->left_mi;
@@ -1047,7 +1048,9 @@
 #if CONFIG_MONO_VIDEO
       av1_num_planes(cm) > 1 &&
 #endif
-      mbmi->uv_mode == UV_DC_PRED;
+      mbmi->uv_mode == UV_DC_PRED &&
+      is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
+                          xd->plane[1].subsampling_y);
   if (uv_dc_pred) {
     const int n = pmi->palette_size[1];
     const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
@@ -1405,7 +1408,7 @@
     write_intra_angle_info(xd, ec_ctx, w);
 #endif  // CONFIG_EXT_INTRA
     if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
-      write_palette_mode_info(cm, xd, mi, w);
+      write_palette_mode_info(cm, xd, mi, mi_row, mi_col, w);
 #if CONFIG_FILTER_INTRA
     write_filter_intra_mode_info(xd, mbmi, w);
 #endif  // CONFIG_FILTER_INTRA
@@ -1716,7 +1719,7 @@
   write_intra_angle_info(xd, ec_ctx, w);
 #endif  // CONFIG_EXT_INTRA
   if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
-    write_palette_mode_info(cm, xd, mi, w);
+    write_palette_mode_info(cm, xd, mi, mi_row, mi_col, w);
 #if CONFIG_FILTER_INTRA
   write_filter_intra_mode_info(xd, mbmi, w);
 #endif  // CONFIG_FILTER_INTRA
@@ -1981,8 +1984,8 @@
 #if CONFIG_INTRABC
       assert(mbmi->use_intrabc == 0);
 #endif
+      assert(av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type));
       int rows, cols;
-      assert(mbmi->sb_type >= BLOCK_8X8);
       av1_get_block_dimensions(mbmi->sb_type, plane, xd, NULL, NULL, &rows,
                                &cols);
       assert(*tok < tok_end);
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index eb980e2..821dbf8 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4182,8 +4182,6 @@
   const MODE_INFO *const left_mi = xd->left_mi;
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-
-  assert(bsize >= BLOCK_8X8 && bsize <= BLOCK_LARGEST);
   const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
 
   if (mbmi->mode == DC_PRED) {
@@ -4464,15 +4462,16 @@
         update_palette_cdf(xd, mi);
     }
 
-    if (bsize >= BLOCK_8X8) {
+    if (av1_allow_palette(cm->allow_screen_content_tools, bsize)) {
       for (int plane = 0; plane < AOMMIN(2, num_planes); ++plane) {
         if (mbmi->palette_mode_info.palette_size[plane] > 0) {
-          if (!dry_run)
+          if (!dry_run) {
             av1_tokenize_color_map(x, plane, t, bsize, mbmi->tx_size,
                                    PALETTE_MAP);
-          else if (dry_run == DRY_RUN_COSTCOEFFS)
+          } else if (dry_run == DRY_RUN_COSTCOEFFS) {
             rate +=
                 av1_cost_color_map(x, plane, bsize, mbmi->tx_size, PALETTE_MAP);
+          }
         }
       }
     }
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index e21707d..5cc5390 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2943,7 +2943,7 @@
   MODE_INFO *const mic = xd->mi[0];
   MB_MODE_INFO *const mbmi = &mic->mbmi;
   assert(!is_inter_block(mbmi));
-  assert(bsize >= BLOCK_8X8);
+  assert(av1_allow_palette(cpi->common.allow_screen_content_tools, bsize));
   int colors, n;
   const int src_stride = x->plane[0].src.stride;
   const uint8_t *const src = x->plane[0].src.buf;
@@ -2952,8 +2952,6 @@
   av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
                            &cols);
 
-  assert(cpi->common.allow_screen_content_tools);
-
   int count_buf[1 << 12];  // Maximum (1 << 12) color levels.
 #if CONFIG_HIGHBITDEPTH
   if (cpi->common.use_highbitdepth)
@@ -3013,8 +3011,6 @@
     mbmi->filter_intra_mode_info.use_filter_intra = 0;
 #endif  // CONFIG_FILTER_INTRA
 
-    if (rows * cols > MAX_PALETTE_SQUARE) return 0;
-
     uint16_t color_cache[2 * PALETTE_MAX_SIZE];
     const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
 
@@ -5158,9 +5154,10 @@
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   assert(!is_inter_block(mbmi));
+  assert(
+      av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type));
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
   const BLOCK_SIZE bsize = mbmi->sb_type;
-  assert(bsize >= BLOCK_8X8);
   int this_rate;
   int64_t this_rd;
   int colors_u, colors_v, colors;
@@ -5172,7 +5169,6 @@
   int plane_block_width, plane_block_height, rows, cols;
   av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
                            &plane_block_height, &rows, &cols);
-  if (rows * cols > MAX_PALETTE_SQUARE) return;
 
   mbmi->uv_mode = UV_DC_PRED;
 
@@ -9041,7 +9037,6 @@
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
   const BLOCK_SIZE bsize = mbmi->sb_type;
-  assert(bsize >= BLOCK_8X8);
   int src_stride = x->plane[1].src.stride;
   const uint8_t *const src_u = x->plane[1].src.buf;
   const uint8_t *const src_v = x->plane[2].src.buf;