Merge "Removing unused YV12_BUFFER_CONFIG arguments from motion search functions." into experimental
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 8179a69..fd39439 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -70,7 +70,7 @@
 }
 
 int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
-  int i, mb_cols;
+  int i, mi_cols;
 
   // Our internal buffers are always multiples of 16
   const int aligned_width = multiple16(width);
@@ -140,19 +140,19 @@
 
   // FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling
   // information is exposed at this level
-  mb_cols = mb_cols_aligned_to_sb(oci);
-  oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 12 * mb_cols, 1);
+  mi_cols = mi_cols_aligned_to_sb(oci);
+  oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 6 * mi_cols, 1);
   if (!oci->above_context[0]) {
     vp9_free_frame_buffers(oci);
     return 1;
   }
   oci->above_context[1] =
-    oci->above_context[0] + sizeof(ENTROPY_CONTEXT) * 4 * mb_cols;
+    oci->above_context[0] + sizeof(ENTROPY_CONTEXT) * 2 * mi_cols;
   oci->above_context[2] =
-    oci->above_context[1] + sizeof(ENTROPY_CONTEXT) * 4 * mb_cols;
+    oci->above_context[1] + sizeof(ENTROPY_CONTEXT) * 2 * mi_cols;
 
   oci->above_seg_context =
-    vpx_calloc(sizeof(PARTITION_CONTEXT) * mb_cols_aligned_to_sb(oci), 1);
+    vpx_calloc(sizeof(PARTITION_CONTEXT) * mi_cols, 1);
 
   if (!oci->above_seg_context) {
     vp9_free_frame_buffers(oci);
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 6b104a1..d0a1203 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -408,7 +408,7 @@
 static INLINE void update_partition_context(MACROBLOCKD *xd,
                                             BLOCK_SIZE_TYPE sb_type,
                                             BLOCK_SIZE_TYPE sb_size) {
-  int bsl = mi_width_log2(sb_size), bs;
+  int bsl = mi_width_log2(sb_size), bs = 1 << bsl;
   int bwl = mi_width_log2(sb_type);
   int bhl = mi_height_log2(sb_type);
   int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
@@ -417,8 +417,6 @@
   if (bsl == 0)
     return;
 
-  bs = 1 << (bsl - 1);
-
   // update the partition context at the end notes. set partition bits
   // of block sizes larger than the current one to be one, and partition
   // bits of smaller block sizes to be zero.
@@ -449,18 +447,14 @@
 
 static INLINE int partition_plane_context(MACROBLOCKD *xd,
                                           BLOCK_SIZE_TYPE sb_type) {
-  int bsl = mi_width_log2(sb_type), bs;
+  int bsl = mi_width_log2(sb_type), bs = 1 << bsl;
   int above = 0, left = 0, i;
   int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
 
-  bs = 1 << (bsl - 1);
-
   assert(mi_width_log2(sb_type) == mi_height_log2(sb_type));
   assert(bsl >= 0);
   assert(boffset >= 0);
 
-  bs = 1 << (bsl - 1);
-
   for (i = 0; i < bs; i++)
     above |= (xd->above_seg_context[i] & (1 << boffset));
   for (i = 0; i < bs; i++)
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index b00d892..1663195 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -18,6 +18,8 @@
 #define MI_SIZE (1 << LOG2_MI_SIZE)
 #define MI_UV_SIZE (1 << (LOG2_MI_SIZE - 1))
 
+#define MI_MASK ((64 >> LOG2_MI_SIZE) - 1)
+
 typedef enum BLOCK_SIZE_TYPE {
   BLOCK_SIZE_AB4X4,
 #if CONFIG_AB4X4
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index d9d2989..de2cace 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -217,7 +217,7 @@
 
   // partition contexts
   PARTITION_CONTEXT *above_seg_context;
-  PARTITION_CONTEXT left_seg_context[4];
+  PARTITION_CONTEXT left_seg_context[8];
 
   /* keyframe block modes are predicted by their above, left neighbors */
 
@@ -297,8 +297,8 @@
   buf[new_idx]++;
 }
 
-static int mb_cols_aligned_to_sb(VP9_COMMON *cm) {
-  return (cm->mb_cols + 3) & ~3;
+static int mi_cols_aligned_to_sb(VP9_COMMON *cm) {
+  return 2 * ((cm->mb_cols + 3) & ~3);
 }
 
 static void set_mi_row_col(VP9_COMMON *cm, MACROBLOCKD *xd,
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 65a9093..0f5cbf4 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -18,11 +18,8 @@
 #include "vp9/common/vp9_reconintra.h"
 
 void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
-                                       YV12_BUFFER_CONFIG *other,
+                                       int other_w, int other_h,
                                        int this_w, int this_h) {
-  int other_h = other->y_crop_height;
-  int other_w = other->y_crop_width;
-
   scale->x_num = other_w;
   scale->x_den = this_w;
   scale->x_offset_q4 = 0;  // calculated per-mb
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index 3c04779..faf018c 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -35,7 +35,7 @@
                               VP9_COMMON *cm);
 
 void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
-                                       YV12_BUFFER_CONFIG *other,
+                                       int other_w, int other_h,
                                        int this_w, int this_h);
 
 void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
@@ -67,14 +67,11 @@
   return val;
 }
 
-static int scaled_buffer_offset(int x_offset,
-                                int y_offset,
-                                int stride,
+static int scaled_buffer_offset(int x_offset, int y_offset, int stride,
                                 const struct scale_factors *scale) {
-  if (scale)
-    return scale->scale_value_y(y_offset, scale) * stride +
-        scale->scale_value_x(x_offset, scale);
-  return y_offset * stride + x_offset;
+  const int x = scale ? scale->scale_value_x(x_offset, scale) : x_offset;
+  const int y = scale ? scale->scale_value_y(y_offset, scale) : y_offset;
+  return y * stride + x;
 }
 
 static void setup_pred_plane(struct buf_2d *dst,
@@ -92,18 +89,15 @@
 static void setup_dst_planes(MACROBLOCKD *xd,
                              const YV12_BUFFER_CONFIG *src,
                              int mi_row, int mi_col) {
-  setup_pred_plane(&xd->plane[0].dst,
-                   src->y_buffer, src->y_stride,
-                   mi_row, mi_col, NULL,
-                   xd->plane[0].subsampling_x, xd->plane[0].subsampling_y);
-  setup_pred_plane(&xd->plane[1].dst,
-                   src->u_buffer, src->uv_stride,
-                   mi_row, mi_col, NULL,
-                   xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
-  setup_pred_plane(&xd->plane[2].dst,
-                   src->v_buffer, src->uv_stride,
-                   mi_row, mi_col, NULL,
-                   xd->plane[2].subsampling_x, xd->plane[2].subsampling_y);
+  uint8_t *buffers[3] = {src->y_buffer, src->u_buffer, src->v_buffer};
+  int strides[3] = {src->y_stride, src->uv_stride, src->uv_stride};
+  int i;
+
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    struct macroblockd_plane *pd = &xd->plane[i];
+    setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL,
+                     pd->subsampling_x, pd->subsampling_y);
+  }
 }
 
 static void setup_pre_planes(MACROBLOCKD *xd,
@@ -112,26 +106,24 @@
                              int mi_row, int mi_col,
                              const struct scale_factors *scale,
                              const struct scale_factors *scale_uv) {
-  int i;
+  const YV12_BUFFER_CONFIG *srcs[2] = {src0, src1};
+  int i, j;
 
-  for (i = 0; i < 2; i++) {
-    const YV12_BUFFER_CONFIG *src = i ? src1 : src0;
+  for (i = 0; i < 2; ++i) {
+    const YV12_BUFFER_CONFIG *src = srcs[i];
+    if (src) {
+      uint8_t* buffers[3] = {src->y_buffer, src->u_buffer, src->v_buffer};
+      int strides[3] = {src->y_stride, src->uv_stride, src->uv_stride};
 
-    if (!src)
-      continue;
-
-    setup_pred_plane(&xd->plane[0].pre[i],
-                     src->y_buffer, src->y_stride,
-                     mi_row, mi_col, scale ? scale + i : NULL,
-                     xd->plane[0].subsampling_x, xd->plane[0].subsampling_y);
-    setup_pred_plane(&xd->plane[1].pre[i],
-                     src->u_buffer, src->uv_stride,
-                     mi_row, mi_col, scale_uv ? scale_uv + i : NULL,
-                     xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
-    setup_pred_plane(&xd->plane[2].pre[i],
-                     src->v_buffer, src->uv_stride,
-                     mi_row, mi_col, scale_uv ? scale_uv + i : NULL,
-                     xd->plane[2].subsampling_x, xd->plane[2].subsampling_y);
+      for (j = 0; j < MAX_MB_PLANE; ++j) {
+        struct macroblockd_plane *pd = &xd->plane[j];
+        const struct scale_factors *sf = j ? scale_uv : scale;
+        setup_pred_plane(&pd->pre[i],
+                         buffers[j], strides[j],
+                         mi_row, mi_col, sf ? &sf[i] : NULL,
+                         pd->subsampling_x, pd->subsampling_y);
+      }
+    }
   }
 }
 
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index f244358..5ce224b 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -371,8 +371,8 @@
     xd->plane[i].left_context = cm->left_context[i] +
         (((mi_row * 2) & 15) >> xd->plane[i].subsampling_y);
   }
-  xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
-  xd->left_seg_context  = cm->left_seg_context + ((mi_row >> 1) & 3);
+  xd->above_seg_context = cm->above_seg_context + mi_col;
+  xd->left_seg_context  = cm->left_seg_context + (mi_row & MI_MASK);
 
   // Distance of Mb to the various image edges. These are specified to 8th pel
   // as they are always compared to values that are in 1/8th pel units
@@ -442,9 +442,8 @@
   if (bsize > BLOCK_SIZE_SB8X8) {
     int pl;
     // read the partition information
-    xd->left_seg_context =
-        pc->left_seg_context + ((mi_row >> 1) & 3);
-    xd->above_seg_context = pc->above_seg_context + (mi_col >> 1);
+    xd->left_seg_context = pc->left_seg_context + (mi_row & MI_MASK);
+    xd->above_seg_context = pc->above_seg_context + mi_col;
     pl = partition_plane_context(xd, bsize);
     partition = treed_read(r, vp9_partition_tree,
                            pc->fc.partition_prob[pl]);
@@ -485,8 +484,8 @@
   if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16))
     return;
 
-  xd->left_seg_context = pc->left_seg_context + ((mi_row >> 1) & 3);
-  xd->above_seg_context = pc->above_seg_context + (mi_col >> 1);
+  xd->left_seg_context = pc->left_seg_context + (mi_row & MI_MASK);
+  xd->above_seg_context = pc->above_seg_context + mi_col;
   update_partition_context(xd, subsize, bsize);
 }
 
@@ -847,11 +846,11 @@
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(pc->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 4 *
-                                      MAX_MB_PLANE * mb_cols_aligned_to_sb(pc));
+  vpx_memset(pc->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 2 *
+                                      MAX_MB_PLANE * mi_cols_aligned_to_sb(pc));
 
   vpx_memset(pc->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
-                                       mb_cols_aligned_to_sb(pc));
+                                       mi_cols_aligned_to_sb(pc));
 
   if (pbi->oxcf.inv_tile_order) {
     const int n_cols = pc->tile_columns;
@@ -1014,7 +1013,9 @@
       if (mapped_ref >= NUM_YV12_BUFFERS)
         memset(sf, 0, sizeof(*sf));
       else
-        vp9_setup_scale_factors_for_frame(sf, fb, pc->width, pc->height);
+        vp9_setup_scale_factors_for_frame(sf,
+                                          fb->y_crop_width, fb->y_crop_height,
+                                          pc->width, pc->height);
     }
 
     // Read the sign bias for each reference frame buffer.
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index a7c26a4..050821d 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -878,9 +878,8 @@
 
   if (bsize > BLOCK_SIZE_SB8X8) {
     int pl;
-    xd->left_seg_context =
-        cm->left_seg_context + ((mi_row >> 1) & 3);
-    xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
+    xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
+    xd->above_seg_context = cm->above_seg_context + mi_col;
     pl = partition_plane_context(xd, bsize);
     // encode the partition information
     write_token(bc, vp9_partition_tree, cm->fc.partition_prob[pl],
@@ -918,8 +917,8 @@
   if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16))
     return;
 
-  xd->left_seg_context = cm->left_seg_context + ((mi_row >> 1) & 3);
-  xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
+  xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
+  xd->above_seg_context = cm->above_seg_context + mi_col;
   update_partition_context(xd, subsize, bsize);
 }
 
@@ -932,7 +931,7 @@
 
   m_ptr += c->cur_tile_mi_col_start + c->cur_tile_mi_row_start * mis;
   vpx_memset(c->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
-             mb_cols_aligned_to_sb(c));
+             mi_cols_aligned_to_sb(c));
 
   for (mi_row = c->cur_tile_mi_row_start;
        mi_row < c->cur_tile_mi_row_end;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 8ddad26..49e8cce 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -544,8 +544,8 @@
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
 
-  xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
-  xd->left_seg_context  = cm->left_seg_context + ((mi_row >> 1) & 3);
+  xd->above_seg_context = cm->above_seg_context + mi_col;
+  xd->left_seg_context  = cm->left_seg_context + (mi_row & MI_MASK);
 }
 
 static void set_offsets(VP9_COMP *cpi,
@@ -878,7 +878,7 @@
     int sb64_rate = 0, sb64_dist = 0;
     int sb64_skip = 0;
     ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
-    PARTITION_CONTEXT seg_l[4], seg_a[4];
+    PARTITION_CONTEXT seg_l[64 / MI_SIZE], seg_a[64 / MI_SIZE];
     TOKENEXTRA *tp_orig = *tp;
 
     for (p = 0; p < MAX_MB_PLANE; p++) {
@@ -888,9 +888,8 @@
       memcpy(l + 16 * p, cm->left_context[p],
              sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
     }
-    memcpy(&seg_a, cm->above_seg_context + (mi_col >> 1),
-           sizeof(seg_a));
-    memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l));
+    vpx_memcpy(&seg_a, cm->above_seg_context + mi_col, sizeof(seg_a));
+    vpx_memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l));
 
     // FIXME(rbultje): this function should probably be rewritten to be
     // recursive at some point in the future.
@@ -902,6 +901,7 @@
       int sb32_skip = 0;
       int j;
       ENTROPY_CONTEXT l2[8 * MAX_MB_PLANE], a2[8 * MAX_MB_PLANE];
+      PARTITION_CONTEXT sl32[32 / MI_SIZE], sa32[32 / MI_SIZE];
 
       sb_partitioning[i] = BLOCK_SIZE_MB16X16;
       if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
@@ -920,6 +920,8 @@
                        ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
                    sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
       }
+      vpx_memcpy(&sa32, cm->above_seg_context + mi_col + x_idx, sizeof(sa32));
+      vpx_memcpy(&sl32, cm->left_seg_context + y_idx, sizeof(sl32));
 
       /* Encode MBs in raster order within the SB */
       for (j = 0; j < 4; j++) {
@@ -928,6 +930,7 @@
         int r, d;
         int r2, d2, mb16_rate = 0, mb16_dist = 0, k;
         ENTROPY_CONTEXT l3[4 * MAX_MB_PLANE], a3[4 * MAX_MB_PLANE];
+        PARTITION_CONTEXT sl16[16 / MI_SIZE], sa16[16 / MI_SIZE];
 
         mb_partitioning[i][j] = BLOCK_SIZE_SB8X8;
 
@@ -950,6 +953,9 @@
                          ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
                      sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
         }
+        vpx_memcpy(&sa16, cm->above_seg_context + mi_col + x_idx_m,
+                   sizeof(sa16));
+        vpx_memcpy(&sl16, cm->left_seg_context + y_idx_m, sizeof(sl16));
 
         for (k = 0; k < 4; k++) {
           xd->b_index = k;
@@ -983,6 +989,9 @@
                      a3 + 4 * p,
                      sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
         }
+        vpx_memcpy(cm->above_seg_context + mi_col + x_idx_m,
+                   sa16, sizeof(sa16));
+        vpx_memcpy(cm->left_seg_context + y_idx_m, sl16, sizeof(sl16));
 
         // try 8x16 coding
         r2 = 0;
@@ -1102,6 +1111,9 @@
                    a2 + 8 * p,
                    sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
       }
+      // restore partition information context
+      vpx_memcpy(cm->above_seg_context + mi_col + x_idx, sa32, sizeof(sa32));
+      vpx_memcpy(cm->left_seg_context + y_idx, sl32, sizeof(sl32));
 
       set_partition_seg_context(cpi, mi_row + y_idx, mi_col + x_idx);
       pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32);
@@ -1258,8 +1270,7 @@
       memcpy(cm->left_context[p], l + 16 * p,
              sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
     }
-    memcpy(cm->above_seg_context + (mi_col >> 1), &seg_a,
-           sizeof(seg_a));
+    memcpy(cm->above_seg_context + mi_col, &seg_a, sizeof(seg_a));
     memcpy(cm->left_seg_context, &seg_l, sizeof(seg_l));
 
     set_partition_seg_context(cpi, mi_row, mi_col);
@@ -1428,10 +1439,10 @@
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(cm->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 4 *
-                                      MAX_MB_PLANE * mb_cols_aligned_to_sb(cm));
+  vpx_memset(cm->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 2 *
+                                      MAX_MB_PLANE * mi_cols_aligned_to_sb(cm));
   vpx_memset(cm->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
-                                       mb_cols_aligned_to_sb(cm));
+                                       mi_cols_aligned_to_sb(cm));
 }
 
 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 9e3bebd..7d4906c 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -36,7 +36,7 @@
 
   // Further step/diamond searches as necessary
   int step_param = cpi->sf.first_step +
-      (cpi->Speed < 8 ? (cpi->Speed > 5 ? 1 : 0) : 2);
+      (cpi->speed < 8 ? (cpi->speed > 5 ? 1 : 0) : 2);
 
   vp9_clamp_mv_min_max(x, ref_mv);
 
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 5fdbbef..782816f 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -695,7 +695,7 @@
 void vp9_set_speed_features(VP9_COMP *cpi) {
   SPEED_FEATURES *sf = &cpi->sf;
   int mode = cpi->compressor_speed;
-  int speed = cpi->Speed;
+  int speed = cpi->speed;
   int i;
 
   // Only modes 0 and 1 supported for now in experimental code basae
@@ -830,7 +830,7 @@
   cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1;
 
 #ifdef SPEEDSTATS
-  frames_at_speed[cpi->Speed]++;
+  frames_at_speed[cpi->speed]++;
 #endif
 }
 
@@ -1215,7 +1215,7 @@
     cpi->last_boosted_qindex = cpi->oxcf.fixed_q;
   }
 
-  cpi->Speed = cpi->oxcf.cpu_used;
+  cpi->speed = cpi->oxcf.cpu_used;
 
   if (cpi->oxcf.lag_in_frames == 0) {
     // force to allowlag to 0 if lag_in_frames is 0;
@@ -3851,12 +3851,12 @@
   for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) {
     if (cm->active_ref_idx[i] >= NUM_YV12_BUFFERS) {
       memset(&cm->active_ref_scale[i], 0, sizeof(cm->active_ref_scale[i]));
-      continue;
+    } else {
+      YV12_BUFFER_CONFIG *fb = &cm->yv12_fb[cm->active_ref_idx[i]];
+      vp9_setup_scale_factors_for_frame(&cm->active_ref_scale[i],
+                                        fb->y_crop_width, fb->y_crop_height,
+                                        cm->width, cm->height);
     }
-
-    vp9_setup_scale_factors_for_frame(&cm->active_ref_scale[i],
-                                      &cm->yv12_fb[cm->active_ref_idx[i]],
-                                      cm->width, cm->height);
   }
 
   vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm);
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index d3ae249..aba4c0e 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -247,8 +247,7 @@
   BLOCK_8X16,
   BLOCK_16X8,
   BLOCK_16X16,
-  BLOCK_MAX_SEGMENTS,
-  BLOCK_32X32 = BLOCK_MAX_SEGMENTS,
+  BLOCK_32X32,
   BLOCK_32X16,
   BLOCK_16X32,
   BLOCK_64X32,
@@ -467,7 +466,7 @@
   // for real time encoding
   int avg_encode_time;              // microsecond
   int avg_pick_mode_time;            // microsecond
-  int Speed;
+  int speed;
   unsigned int cpu_freq;           // Mhz
   int compressor_speed;
 
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 92e58f1..1b143f5 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1748,6 +1748,13 @@
   vp9_clear_system_state();
 }
 
+static enum BlockSize get_plane_block_size(BLOCK_SIZE_TYPE bsize,
+                                           struct macroblockd_plane *pd) {
+  const int bwl = b_width_log2(bsize) - pd->subsampling_x;
+  const int bhl = b_height_log2(bsize) - pd->subsampling_y;
+  return get_block_size(4 << bwl, 4 << bhl);
+}
+
 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                             MACROBLOCK *x, MACROBLOCKD *xd,
                             int *out_rate_sum, int *out_dist_sum) {
@@ -1761,6 +1768,7 @@
     struct macroblock_plane *const p = &x->plane[i];
     struct macroblockd_plane *const pd = &xd->plane[i];
 
+    // TODO(dkovalev) the same code in get_plane_block_size
     const int bwl = b_width_log2(bsize) - pd->subsampling_x;
     const int bhl = b_height_log2(bsize) - pd->subsampling_y;
     const enum BlockSize bs = get_block_size(4 << bwl, 4 << bhl);
@@ -1778,42 +1786,6 @@
   *out_dist_sum = dist_sum;
 }
 
-static enum BlockSize y_to_uv_block_size(enum BlockSize bs) {
-  switch (bs) {
-    case BLOCK_64X64: return BLOCK_32X32;
-    case BLOCK_64X32: return BLOCK_32X16;
-    case BLOCK_32X64: return BLOCK_16X32;
-    case BLOCK_32X32: return BLOCK_16X16;
-    case BLOCK_32X16: return BLOCK_16X8;
-    case BLOCK_16X32: return BLOCK_8X16;
-    case BLOCK_16X16: return BLOCK_8X8;
-    case BLOCK_16X8:  return BLOCK_8X4;
-    case BLOCK_8X16:  return BLOCK_4X8;
-    case BLOCK_8X8:   return BLOCK_4X4;
-    default:
-      assert(0);
-      return -1;
-  }
-}
-
-static enum BlockSize y_bsizet_to_block_size(BLOCK_SIZE_TYPE bs) {
-  switch (bs) {
-    case BLOCK_SIZE_SB64X64: return BLOCK_64X64;
-    case BLOCK_SIZE_SB64X32: return BLOCK_64X32;
-    case BLOCK_SIZE_SB32X64: return BLOCK_32X64;
-    case BLOCK_SIZE_SB32X32: return BLOCK_32X32;
-    case BLOCK_SIZE_SB32X16: return BLOCK_32X16;
-    case BLOCK_SIZE_SB16X32: return BLOCK_16X32;
-    case BLOCK_SIZE_MB16X16: return BLOCK_16X16;
-    case BLOCK_SIZE_SB16X8:  return BLOCK_16X8;
-    case BLOCK_SIZE_SB8X16:  return BLOCK_8X16;
-    case BLOCK_SIZE_SB8X8:   return BLOCK_8X8;
-    default:
-      assert(0);
-      return -1;
-  }
-}
-
 static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) {
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
@@ -1838,10 +1810,12 @@
                                  YV12_BUFFER_CONFIG *scaled_ref_frame,
                                  int mi_row, int mi_col) {
   const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
-  const enum BlockSize block_size = y_bsizet_to_block_size(bsize);
-  const enum BlockSize uv_block_size = y_to_uv_block_size(block_size);
+
   VP9_COMMON *cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
+  const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
+  const enum BlockSize uv_block_size = get_plane_block_size(bsize,
+                                                            &xd->plane[1]);
   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
   const int is_comp_pred = (mbmi->second_ref_frame > 0);
   const int num_refs = is_comp_pred ? 2 : 1;
@@ -2219,10 +2193,10 @@
                                   int *returndistortion,
                                   BLOCK_SIZE_TYPE bsize,
                                   PICK_MODE_CONTEXT *ctx) {
-  const enum BlockSize block_size = y_bsizet_to_block_size(bsize);
   VP9_COMMON *cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+  const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
   MB_PREDICTION_MODE this_mode;
   MB_PREDICTION_MODE best_mode = DC_PRED;
   MV_REFERENCE_FRAME ref_frame, second_ref = INTRA_FRAME;
@@ -2284,7 +2258,7 @@
     best_txfm_rd[i] = INT64_MAX;
 
   // Create a mask set to 1 for each frame used by a smaller resolution.
-  if (cpi->Speed > 0) {
+  if (cpi->speed > 0) {
     switch (block_size) {
       case BLOCK_64X64:
         for (i = 0; i < 4; i++) {
@@ -2324,8 +2298,8 @@
     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
     frame_mv[ZEROMV][ref_frame].as_int = 0;
   }
-  if (cpi->Speed == 0
-      || (cpi->Speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) {
+  if (cpi->speed == 0
+      || (cpi->speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) {
     mbmi->mode = DC_PRED;
     for (i = 0; i <= (bsize < BLOCK_SIZE_MB16X16 ? TX_4X4 :
                       (bsize < BLOCK_SIZE_SB32X32 ? TX_8X8 :
@@ -2363,7 +2337,7 @@
         || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
       continue;
     }
-    if (cpi->Speed > 0) {
+    if (cpi->speed > 0) {
       if (!(ref_frame_mask & (1 << ref_frame))) {
         continue;
       }
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index d272cbb..30143d7 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -118,9 +118,9 @@
 #if ALT_REF_MC_ENABLED
 
 static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
-                                              YV12_BUFFER_CONFIG *arf_frame,
-                                              YV12_BUFFER_CONFIG *frame_ptr,
-                                              int mb_offset,
+                                              uint8_t *arf_frame_buf,
+                                              uint8_t *frame_ptr_buf,
+                                              int stride,
                                               int error_thresh) {
   MACROBLOCK *x = &cpi->mb;
   MACROBLOCKD* const xd = &x->e_mbd;
@@ -141,18 +141,16 @@
   best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >> 3;
 
   // Setup frame pointers
-  x->plane[0].src.buf = arf_frame->y_buffer + mb_offset;
-  x->plane[0].src.stride = arf_frame->y_stride;
-  xd->plane[0].pre[0].buf = frame_ptr->y_buffer + mb_offset;
-  xd->plane[0].pre[0].stride = arf_frame->y_stride;
+  x->plane[0].src.buf = arf_frame_buf;
+  x->plane[0].src.stride = stride;
+  xd->plane[0].pre[0].buf = frame_ptr_buf;
+  xd->plane[0].pre[0].stride = stride;
 
   // Further step/diamond searches as necessary
-  if (cpi->Speed < 8) {
-    step_param = cpi->sf.first_step +
-                 ((cpi->Speed > 5) ? 1 : 0);
-  } else {
+  if (cpi->speed < 8)
+    step_param = cpi->sf.first_step + ((cpi->speed > 5) ? 1 : 0);
+  else
     step_param = cpi->sf.first_step + 2;
-  }
 
   /*cpi->sf.search_method == HEX*/
   // TODO Check that the 16x16 vf & sdf are selected here
@@ -260,9 +258,9 @@
           // Find best match in this frame by MC
           err = temporal_filter_find_matching_mb_c
                 (cpi,
-                 cpi->frames[alt_ref_index],
-                 cpi->frames[frame],
-                 mb_y_offset,
+                 cpi->frames[alt_ref_index]->y_buffer + mb_y_offset,
+                 cpi->frames[frame]->y_buffer + mb_y_offset,
+                 cpi->frames[frame]->y_stride,
                  THRESH_LOW);
 #endif
           // Assign higher weight to matching MB if it's error
@@ -360,10 +358,10 @@
 }
 
 void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
+  VP9_COMMON *const cm = &cpi->common;
+
   int frame = 0;
 
-  int num_frames_backward = 0;
-  int num_frames_forward = 0;
   int frames_to_blur_backward = 0;
   int frames_to_blur_forward = 0;
   int frames_to_blur = 0;
@@ -373,15 +371,13 @@
   int blur_type = cpi->oxcf.arnr_type;
   int max_frames = cpi->active_arnr_frames;
 
-  num_frames_backward = distance;
-  num_frames_forward = vp9_lookahead_depth(cpi->lookahead)
-                       - (num_frames_backward + 1);
+  const int num_frames_backward = distance;
+  const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead)
+                               - (num_frames_backward + 1);
 
   switch (blur_type) {
     case 1:
-      /////////////////////////////////////////
       // Backward Blur
-
       frames_to_blur_backward = num_frames_backward;
 
       if (frames_to_blur_backward >= max_frames)
@@ -391,7 +387,6 @@
       break;
 
     case 2:
-      /////////////////////////////////////////
       // Forward Blur
 
       frames_to_blur_forward = num_frames_forward;
@@ -404,7 +399,6 @@
 
     case 3:
     default:
-      /////////////////////////////////////////
       // Center Blur
       frames_to_blur_forward = num_frames_forward;
       frames_to_blur_backward = num_frames_backward;
@@ -444,25 +438,22 @@
 
   // Setup scaling factors. Scaling on each of the arnr frames is not supported
   vp9_setup_scale_factors_for_frame(&cpi->mb.e_mbd.scale_factor[0],
-      &cpi->common.yv12_fb[cpi->common.new_fb_idx],
-      cpi->common.width,
-      cpi->common.height);
+      cm->yv12_fb[cm->new_fb_idx].y_crop_width,
+      cm->yv12_fb[cm->new_fb_idx].y_crop_height,
+      cm->width, cm->height);
   cpi->mb.e_mbd.scale_factor_uv[0] = cpi->mb.e_mbd.scale_factor[0];
 
   // Setup frame pointers, NULL indicates frame not included in filter
   vpx_memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *));
   for (frame = 0; frame < frames_to_blur; frame++) {
-    int which_buffer =  start_frame - frame;
+    int which_buffer = start_frame - frame;
     struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead,
                                                      which_buffer);
     cpi->frames[frames_to_blur - 1 - frame] = &buf->img;
   }
 
-  temporal_filter_iterate_c(
-    cpi,
-    frames_to_blur,
-    frames_to_blur_backward,
-    strength);
+  temporal_filter_iterate_c(cpi, frames_to_blur, frames_to_blur_backward,
+                            strength);
 }
 
 void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame,