Further cleanups related to removal of cb4x4 flags

Removes a bunch of unused code.

Change-Id: I86bda117d8d455452ee1ee3a2a566742650e05e0
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index c709c65..a439f29 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3403,597 +3403,6 @@
   return rate_overhead;
 }
 
-static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
-    const AV1_COMP *const cpi, MACROBLOCK *x, int row, int col,
-    PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a,
-    ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion,
-    BLOCK_SIZE bsize, TX_SIZE tx_size, int *y_skip, int64_t rd_thresh) {
-  const AV1_COMMON *const cm = &cpi->common;
-  PREDICTION_MODE mode;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  assert(!is_inter_block(&xd->mi[0]->mbmi));
-  int64_t best_rd = rd_thresh;
-  struct macroblock_plane *p = &x->plane[0];
-  struct macroblockd_plane *pd = &xd->plane[0];
-  const int src_stride = p->src.stride;
-  const int dst_stride = pd->dst.stride;
-  const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
-  uint8_t *dst_init = &pd->dst.buf[row * 4 * dst_stride + col * 4];
-  ENTROPY_CONTEXT ta[2], tempa[2];
-  ENTROPY_CONTEXT tl[2], templ[2];
-
-  const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
-  const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
-  const int tx_width_unit = tx_size_wide_unit[tx_size];
-  const int tx_height_unit = tx_size_high_unit[tx_size];
-  const int pred_block_width = block_size_wide[bsize];
-  const int pred_block_height = block_size_high[bsize];
-  const int tx_width = tx_size_wide[tx_size];
-  const int tx_height = tx_size_high[tx_size];
-  const int pred_width_in_transform_blocks = pred_block_width / tx_width;
-  const int pred_height_in_transform_blocks = pred_block_height / tx_height;
-  int idx, idy;
-  int best_can_skip = 0;
-  uint8_t best_dst[8 * 8];
-#if CONFIG_HIGHBITDEPTH
-  uint16_t best_dst16[8 * 8];
-#endif  // CONFIG_HIGHBITDEPTH
-  const int is_lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
-#if CONFIG_EXT_TX && CONFIG_RECT_TX
-  const int sub_bsize = bsize;
-#else
-  const int sub_bsize = BLOCK_4X4;
-#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
-
-#if CONFIG_PVQ
-  od_rollback_buffer pre_buf, post_buf;
-  od_encode_checkpoint(&x->daala_enc, &pre_buf);
-  od_encode_checkpoint(&x->daala_enc, &post_buf);
-#endif  // CONFIG_PVQ
-
-  assert(bsize < BLOCK_8X8);
-  assert(tx_width < 8 || tx_height < 8);
-#if CONFIG_EXT_TX && CONFIG_RECT_TX
-  if (is_lossless)
-    assert(tx_width == 4 && tx_height == 4);
-  else
-    assert(tx_width == pred_block_width && tx_height == pred_block_height);
-#else
-  assert(tx_width == 4 && tx_height == 4);
-#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
-
-  memcpy(ta, a, pred_width_in_transform_blocks * sizeof(a[0]));
-  memcpy(tl, l, pred_height_in_transform_blocks * sizeof(l[0]));
-
-  xd->mi[0]->mbmi.tx_size = tx_size;
-
-  xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
-
-#if CONFIG_HIGHBITDEPTH
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-#if CONFIG_PVQ
-    od_encode_checkpoint(&x->daala_enc, &pre_buf);
-#endif
-    for (mode = DC_PRED; mode <= PAETH_PRED; ++mode) {
-      int64_t this_rd;
-      int ratey = 0;
-      int64_t distortion = 0;
-      int rate = bmode_costs[mode];
-      int can_skip = 1;
-
-      if (!(cpi->sf.intra_y_mode_mask[txsize_sqr_up_map[tx_size]] &
-            (1 << mode)))
-        continue;
-
-      // Only do the oblique modes if the best so far is
-      // one of the neighboring directional modes
-      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
-        if (conditional_skipintra(mode, *best_mode)) continue;
-      }
-
-      memcpy(tempa, ta, pred_width_in_transform_blocks * sizeof(ta[0]));
-      memcpy(templ, tl, pred_height_in_transform_blocks * sizeof(tl[0]));
-
-      for (idy = 0; idy < pred_height_in_transform_blocks; ++idy) {
-        for (idx = 0; idx < pred_width_in_transform_blocks; ++idx) {
-          const int block_raster_idx = (row + idy) * 2 + (col + idx);
-          const int block =
-              av1_raster_order_to_block_index(tx_size, block_raster_idx);
-          const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
-          uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
-#if !CONFIG_PVQ
-          int16_t *const src_diff = av1_raster_block_offset_int16(
-              BLOCK_8X8, block_raster_idx, p->src_diff);
-#endif
-          int skip;
-          assert(block < 4);
-          assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
-                         idx == 0 && idy == 0));
-          assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
-                         block == 0 || block == 2));
-          xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
-          av1_predict_intra_block(
-              cm, xd, pd->width, pd->height, txsize_to_bsize[tx_size], mode,
-              dst, dst_stride, dst, dst_stride, col + idx, row + idy, 0);
-#if !CONFIG_PVQ
-          aom_highbd_subtract_block(tx_height, tx_width, src_diff, 8, src,
-                                    src_stride, dst, dst_stride, xd->bd);
-#endif
-          if (is_lossless) {
-            TX_TYPE tx_type =
-                av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size);
-            const SCAN_ORDER *scan_order =
-                get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
-            const int coeff_ctx =
-                combine_entropy_contexts(tempa[idx], templ[idy]);
-#if !CONFIG_PVQ
-            av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
-                            tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
-            ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size,
-                                     scan_order, tempa + idx, templ + idy,
-                                     cpi->sf.use_fast_coef_costing);
-            skip = (p->eobs[block] == 0);
-            can_skip &= skip;
-            tempa[idx] = !skip;
-            templ[idy] = !skip;
-#if CONFIG_EXT_TX
-            if (tx_size == TX_8X4) {
-              tempa[idx + 1] = tempa[idx];
-            } else if (tx_size == TX_4X8) {
-              templ[idy + 1] = templ[idy];
-            }
-#endif  // CONFIG_EXT_TX
-#else
-            (void)scan_order;
-
-            av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
-                            tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
-
-            ratey += x->rate;
-            skip = x->pvq_skip[0];
-            tempa[idx] = !skip;
-            templ[idy] = !skip;
-            can_skip &= skip;
-#endif
-            if (RDCOST(x->rdmult, ratey, distortion) >= best_rd)
-              goto next_highbd;
-#if CONFIG_PVQ
-            if (!skip)
-#endif
-              av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
-#if CONFIG_LGT_FROM_PRED
-                                          mode,
-#endif
-#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
-                                          BLOCK_OFFSET(xd->mrc_mask, block),
-#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
-                                          DCT_DCT, tx_size, dst, dst_stride,
-                                          p->eobs[block]);
-          } else {
-            int64_t dist;
-            unsigned int tmp;
-            TX_TYPE tx_type =
-                av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size);
-            const SCAN_ORDER *scan_order =
-                get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
-            const int coeff_ctx =
-                combine_entropy_contexts(tempa[idx], templ[idy]);
-#if !CONFIG_PVQ
-#if DISABLE_TRELLISQ_SEARCH
-            av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
-                            tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
-#else
-            av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
-                            tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
-            av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size,
-                           tempa + idx, templ + idy, 1);
-#endif  // DISABLE_TRELLISQ_SEARCH
-            ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size,
-                                     scan_order, tempa + idx, templ + idy,
-                                     cpi->sf.use_fast_coef_costing);
-            skip = (p->eobs[block] == 0);
-            can_skip &= skip;
-            tempa[idx] = !skip;
-            templ[idy] = !skip;
-#if CONFIG_EXT_TX
-            if (tx_size == TX_8X4) {
-              tempa[idx + 1] = tempa[idx];
-            } else if (tx_size == TX_4X8) {
-              templ[idy + 1] = templ[idy];
-            }
-#endif  // CONFIG_EXT_TX
-#else
-            (void)scan_order;
-
-            av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
-                            tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
-            ratey += x->rate;
-            skip = x->pvq_skip[0];
-            tempa[idx] = !skip;
-            templ[idy] = !skip;
-            can_skip &= skip;
-#endif
-#if CONFIG_PVQ
-            if (!skip)
-#endif
-              av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
-#if CONFIG_LGT_FROM_PRED
-                                          mode,
-#endif
-#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
-                                          BLOCK_OFFSET(xd->mrc_mask, block),
-#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
-                                          tx_type, tx_size, dst, dst_stride,
-                                          p->eobs[block]);
-            cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
-            dist = (int64_t)tmp << 4;
-            distortion += dist;
-            if (RDCOST(x->rdmult, ratey, distortion) >= best_rd)
-              goto next_highbd;
-          }
-        }
-      }
-
-      rate += ratey;
-      this_rd = RDCOST(x->rdmult, rate, distortion);
-
-      if (this_rd < best_rd) {
-        *bestrate = rate;
-        *bestratey = ratey;
-        *bestdistortion = distortion;
-        best_rd = this_rd;
-        best_can_skip = can_skip;
-        *best_mode = mode;
-        memcpy(a, tempa, pred_width_in_transform_blocks * sizeof(tempa[0]));
-        memcpy(l, templ, pred_height_in_transform_blocks * sizeof(templ[0]));
-#if CONFIG_PVQ
-        od_encode_checkpoint(&x->daala_enc, &post_buf);
-#endif
-        for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy) {
-          memcpy(best_dst16 + idy * 8,
-                 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
-                 pred_width_in_transform_blocks * 4 * sizeof(uint16_t));
-        }
-      }
-    next_highbd : {}
-#if CONFIG_PVQ
-      od_encode_rollback(&x->daala_enc, &pre_buf);
-#endif
-    }
-
-    if (best_rd >= rd_thresh) return best_rd;
-
-#if CONFIG_PVQ
-    od_encode_rollback(&x->daala_enc, &post_buf);
-#endif
-
-    if (y_skip) *y_skip &= best_can_skip;
-
-    for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy) {
-      memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
-             best_dst16 + idy * 8,
-             pred_width_in_transform_blocks * 4 * sizeof(uint16_t));
-    }
-
-    return best_rd;
-  }
-#endif  // CONFIG_HIGHBITDEPTH
-
-#if CONFIG_PVQ
-  od_encode_checkpoint(&x->daala_enc, &pre_buf);
-#endif  // CONFIG_PVQ
-
-  for (mode = DC_PRED; mode <= PAETH_PRED; ++mode) {
-    int64_t this_rd;
-    int ratey = 0;
-    int64_t distortion = 0;
-    int rate = bmode_costs[mode];
-    int can_skip = 1;
-
-    if (!(cpi->sf.intra_y_mode_mask[txsize_sqr_up_map[tx_size]] &
-          (1 << mode))) {
-      continue;
-    }
-
-    // Only do the oblique modes if the best so far is
-    // one of the neighboring directional modes
-    if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
-      if (conditional_skipintra(mode, *best_mode)) continue;
-    }
-
-    memcpy(tempa, ta, pred_width_in_transform_blocks * sizeof(ta[0]));
-    memcpy(templ, tl, pred_height_in_transform_blocks * sizeof(tl[0]));
-
-    for (idy = 0; idy < pred_height_in_4x4_blocks; idy += tx_height_unit) {
-      for (idx = 0; idx < pred_width_in_4x4_blocks; idx += tx_width_unit) {
-        const int block_raster_idx = (row + idy) * 2 + (col + idx);
-        int block = av1_raster_order_to_block_index(tx_size, block_raster_idx);
-        const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
-        uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
-#if !CONFIG_PVQ
-        int16_t *const src_diff = av1_raster_block_offset_int16(
-            BLOCK_8X8, block_raster_idx, p->src_diff);
-#endif  // !CONFIG_PVQ
-        int skip;
-        assert(block < 4);
-        assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
-                       idx == 0 && idy == 0));
-        assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
-                       block == 0 || block == 2));
-        xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
-        av1_predict_intra_block(
-            cm, xd, pd->width, pd->height, txsize_to_bsize[tx_size], mode, dst,
-            dst_stride, dst, dst_stride, 2 * (col + idx), 2 * (row + idy), 0);
-#if !CONFIG_PVQ
-        aom_subtract_block(tx_height, tx_width, src_diff, 8, src, src_stride,
-                           dst, dst_stride);
-#endif  // !CONFIG_PVQ
-        TX_TYPE tx_type =
-            av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size);
-        const SCAN_ORDER *scan_order =
-            get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
-        const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]);
-        block = 4 * block;
-#if !CONFIG_PVQ
-#if DISABLE_TRELLISQ_SEARCH
-        av1_xform_quant(cm, x, 0, block, 2 * (row + idy), 2 * (col + idx),
-                        BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
-#else
-        const AV1_XFORM_QUANT xform_quant =
-            is_lossless ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP;
-        av1_xform_quant(cm, x, 0, block, 2 * (row + idy), 2 * (col + idx),
-                        BLOCK_8X8, tx_size, coeff_ctx, xform_quant);
-
-        av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size, tempa + idx,
-                       templ + idy, 1);
-#endif  // DISABLE_TRELLISQ_SEARCH
-        ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size, scan_order,
-                                 tempa + idx, templ + idy,
-                                 cpi->sf.use_fast_coef_costing);
-        skip = (p->eobs[block] == 0);
-        can_skip &= skip;
-        tempa[idx] = !skip;
-        templ[idy] = !skip;
-#if CONFIG_EXT_TX
-        if (tx_size == TX_8X4) {
-          tempa[idx + 1] = tempa[idx];
-        } else if (tx_size == TX_4X8) {
-          templ[idy + 1] = templ[idy];
-        }
-#endif  // CONFIG_EXT_TX
-#else
-        (void)scan_order;
-
-        av1_xform_quant(cm, x, 0, block, 2 * (row + idy), 2 * (col + idx),
-                        BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
-
-        ratey += x->rate;
-        skip = x->pvq_skip[0];
-        tempa[idx] = !skip;
-        templ[idy] = !skip;
-        can_skip &= skip;
-#endif  // !CONFIG_PVQ
-
-        if (!is_lossless) {  // To use the pixel domain distortion, we need to
-                             // calculate inverse txfm *before* calculating RD
-                             // cost. Compared to calculating the distortion in
-                             // the frequency domain, the overhead of encoding
-                             // effort is low.
-#if CONFIG_PVQ
-          if (!skip)
-#endif  // CONFIG_PVQ
-            av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
-#if CONFIG_LGT_FROM_PRED
-                                        mode,
-#endif
-#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
-                                        BLOCK_OFFSET(xd->mrc_mask, block),
-#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
-                                        tx_type, tx_size, dst, dst_stride,
-                                        p->eobs[block]);
-          unsigned int tmp;
-          cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
-          const int64_t dist = (int64_t)tmp << 4;
-          distortion += dist;
-        }
-
-        if (RDCOST(x->rdmult, ratey, distortion) >= best_rd) goto next;
-
-        if (is_lossless) {  // Calculate inverse txfm *after* RD cost.
-#if CONFIG_PVQ
-          if (!skip)
-#endif  // CONFIG_PVQ
-            av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
-#if CONFIG_LGT_FROM_PRED
-                                        mode,
-#endif
-#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
-                                        BLOCK_OFFSET(xd->mrc_mask, block),
-#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
-                                        DCT_DCT, tx_size, dst, dst_stride,
-                                        p->eobs[block]);
-        }
-      }
-    }
-
-    rate += ratey;
-    this_rd = RDCOST(x->rdmult, rate, distortion);
-
-    if (this_rd < best_rd) {
-      *bestrate = rate;
-      *bestratey = ratey;
-      *bestdistortion = distortion;
-      best_rd = this_rd;
-      best_can_skip = can_skip;
-      *best_mode = mode;
-      memcpy(a, tempa, pred_width_in_transform_blocks * sizeof(tempa[0]));
-      memcpy(l, templ, pred_height_in_transform_blocks * sizeof(templ[0]));
-#if CONFIG_PVQ
-      od_encode_checkpoint(&x->daala_enc, &post_buf);
-#endif  // CONFIG_PVQ
-      for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy)
-        memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
-               pred_width_in_transform_blocks * 4);
-    }
-  next : {}
-#if CONFIG_PVQ
-    od_encode_rollback(&x->daala_enc, &pre_buf);
-#endif  // CONFIG_PVQ
-  }     // mode decision loop
-
-  if (best_rd >= rd_thresh) return best_rd;
-
-#if CONFIG_PVQ
-  od_encode_rollback(&x->daala_enc, &post_buf);
-#endif  // CONFIG_PVQ
-
-  if (y_skip) *y_skip &= best_can_skip;
-
-  for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy)
-    memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
-           pred_width_in_transform_blocks * 4);
-
-  return best_rd;
-}
-
-static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
-                                            MACROBLOCK *mb, int *rate,
-                                            int *rate_y, int64_t *distortion,
-                                            int *y_skip, int64_t best_rd) {
-  const MACROBLOCKD *const xd = &mb->e_mbd;
-  MODE_INFO *const mic = xd->mi[0];
-  const MODE_INFO *above_mi = xd->above_mi;
-  const MODE_INFO *left_mi = xd->left_mi;
-  MB_MODE_INFO *const mbmi = &mic->mbmi;
-  assert(!is_inter_block(mbmi));
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-  const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
-  const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
-  int idx, idy;
-  int cost = 0;
-  int64_t total_distortion = 0;
-  int tot_rate_y = 0;
-  int64_t total_rd = 0;
-  const int *bmode_costs = mb->mbmode_cost[0];
-  const int is_lossless = xd->lossless[mbmi->segment_id];
-#if CONFIG_EXT_TX && CONFIG_RECT_TX
-  const TX_SIZE tx_size = is_lossless ? TX_4X4 : max_txsize_rect_lookup[bsize];
-#else
-  const TX_SIZE tx_size = TX_4X4;
-#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
-
-#if CONFIG_FILTER_INTRA
-  mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
-#endif  // CONFIG_FILTER_INTRA
-
-  // TODO(any): Add search of the tx_type to improve rd performance at the
-  // expense of speed.
-  mbmi->tx_type = DCT_DCT;
-  mbmi->tx_size = tx_size;
-#if CONFIG_LGT_FROM_PRED
-  mbmi->use_lgt = 0;
-#endif
-
-  if (y_skip) *y_skip = 1;
-
-  // Pick modes for each prediction sub-block (of size 4x4, 4x8, or 8x4) in this
-  // 8x8 coding block.
-  for (idy = 0; idy < 2; idy += pred_height_in_4x4_blocks) {
-    for (idx = 0; idx < 2; idx += pred_width_in_4x4_blocks) {
-      PREDICTION_MODE best_mode = DC_PRED;
-      int r = INT_MAX, ry = INT_MAX;
-      int64_t d = INT64_MAX, this_rd = INT64_MAX;
-      int j;
-      const int pred_block_idx = idy * 2 + idx;
-      if (cpi->common.frame_type == KEY_FRAME) {
-        const PREDICTION_MODE A =
-            av1_above_block_mode(mic, above_mi, pred_block_idx);
-        const PREDICTION_MODE L =
-            av1_left_block_mode(mic, left_mi, pred_block_idx);
-
-#if CONFIG_KF_CTX
-        const int above_ctx = intra_mode_context[A];
-        const int left_ctx = intra_mode_context[L];
-        bmode_costs = mb->y_mode_costs[above_ctx][left_ctx];
-#else
-        bmode_costs = mb->y_mode_costs[A][L];
-#endif
-      }
-      this_rd = rd_pick_intra_sub_8x8_y_subblock_mode(
-          cpi, mb, idy, idx, &best_mode, bmode_costs,
-          xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
-          &ry, &d, bsize, tx_size, y_skip, best_rd - total_rd);
-#if CONFIG_DIST_8X8
-      if (!cpi->oxcf.using_dist_8x8)
-#endif
-        if (this_rd >= best_rd - total_rd) return INT64_MAX;
-
-      total_rd += this_rd;
-      cost += r;
-      total_distortion += d;
-      tot_rate_y += ry;
-
-      mic->bmi[pred_block_idx].as_mode = best_mode;
-      for (j = 1; j < pred_height_in_4x4_blocks; ++j)
-        mic->bmi[pred_block_idx + j * 2].as_mode = best_mode;
-      for (j = 1; j < pred_width_in_4x4_blocks; ++j)
-        mic->bmi[pred_block_idx + j].as_mode = best_mode;
-
-      if (total_rd >= best_rd) return INT64_MAX;
-    }
-  }
-  mbmi->mode = mic->bmi[3].as_mode;
-
-#if CONFIG_DIST_8X8
-  if (cpi->oxcf.using_dist_8x8) {
-    const struct macroblock_plane *p = &mb->plane[0];
-    const struct macroblockd_plane *pd = &xd->plane[0];
-    const int src_stride = p->src.stride;
-    const int dst_stride = pd->dst.stride;
-    uint8_t *src = p->src.buf;
-    uint8_t *dst = pd->dst.buf;
-
-    // Daala-defined distortion computed for the block of 8x8 pixels
-    total_distortion = av1_dist_8x8(cpi, mb, src, src_stride, dst, dst_stride,
-                                    BLOCK_8X8, 8, 8, 8, 8, mb->qindex)
-                       << 4;
-  }
-#endif  // CONFIG_DIST_8X8
-  // Add in the cost of the transform type
-  if (!is_lossless) {
-    int rate_tx_type = 0;
-#if CONFIG_EXT_TX
-    if (get_ext_tx_types(tx_size, bsize, 0, cpi->common.reduced_tx_set_used) >
-        1) {
-      const int eset =
-          get_ext_tx_set(tx_size, bsize, 0, cpi->common.reduced_tx_set_used);
-#if CONFIG_LGT_FROM_PRED
-      if (LGT_FROM_PRED_INTRA && is_lgt_allowed(mbmi->mode, tx_size))
-        rate_tx_type += mb->intra_lgt_cost[txsize_sqr_map[tx_size]][mbmi->mode]
-                                          [mbmi->use_lgt];
-      if (!LGT_FROM_PRED_INTRA || !mbmi->use_lgt)
-#endif  // CONFIG_LGT_FROM_PRED
-        rate_tx_type += mb->intra_tx_type_costs[eset][txsize_sqr_map[tx_size]]
-                                               [mbmi->mode][mbmi->tx_type];
-    }
-#else
-    rate_tx_type =
-        mb->intra_tx_type_costs[txsize_sqr_map[tx_size]]
-                               [intra_mode_to_tx_type_context[mbmi->mode]]
-                               [mbmi->tx_type];
-#endif  // CONFIG_EXT_TX
-    assert(mbmi->tx_size == tx_size);
-    cost += rate_tx_type;
-    tot_rate_y += rate_tx_type;
-  }
-
-  *rate = cost;
-  *rate_y = tot_rate_y;
-  *distortion = total_distortion;
-
-  return RDCOST(mb->rdmult, cost, total_distortion);
-}
-
 #if CONFIG_FILTER_INTRA
 // Return 1 if an filter intra mode is selected; return 0 otherwise.
 static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
@@ -10019,7 +9428,6 @@
   int y_skip = 0, uv_skip = 0;
   int64_t dist_y = 0, dist_uv = 0;
   TX_SIZE max_uv_tx_size;
-  const int unify_bsize = 1;
 
   (void)cm;
 
@@ -10034,12 +9442,8 @@
   mbmi->use_lgt = 0;
 #endif
 
-  const int64_t intra_yrd =
-      (bsize >= BLOCK_8X8 || unify_bsize)
-          ? rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
-                                   &y_skip, bsize, best_rd)
-          : rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
-                                         &dist_y, &y_skip, best_rd);
+  const int64_t intra_yrd = rd_pick_intra_sby_mode(
+      cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, &y_skip, bsize, best_rd);
 
   if (intra_yrd < best_rd) {
 #if CONFIG_CFL