External partition: fix calculation of num of blocks

Take care of boundary condition as well.

Change-Id: Ia8de094a23dee629525cfb03c6a2a3c8a302e553
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index d4461ae..b2c4fce 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -107,8 +107,8 @@
       AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col);
   const int mi_height =
       AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row);
-  const int col_steps = mi_width / step;
-  const int row_steps = mi_height / step;
+  const int col_steps = (mi_width / step) + ((mi_width % step) > 0);
+  const int row_steps = (mi_height / step) + ((mi_height % step) > 0);
   const int num_blocks = col_steps * row_steps;
 
   if (features == NULL) {
diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c
index 377defd..79291f7 100644
--- a/av1/encoder/partition_strategy.c
+++ b/av1/encoder/partition_strategy.c
@@ -2291,8 +2291,8 @@
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   const BLOCK_SIZE fixed_block_size = BLOCK_16X16;
-  const int col_step = mi_size_wide[bsize] / mi_size_wide[fixed_block_size];
-  const int row_step = mi_size_high[bsize] / mi_size_high[fixed_block_size];
+  const int col_step = mi_size_wide[fixed_block_size];
+  const int row_step = mi_size_high[fixed_block_size];
   SIMPLE_MOTION_DATA_TREE *sms_tree = NULL;
   SIMPLE_MOTION_DATA_TREE *sms_root = setup_sms_tree(cpi, sms_tree);
   av1_init_simple_motion_search_mvs_for_sb(cpi, NULL, x, sms_root, mi_row,
@@ -2300,7 +2300,13 @@
   av1_reset_simple_motion_tree_partition(sms_root, bsize);
   const int ref_list[] = { cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME
                                                         : LAST_FRAME };
-  const int num_blocks = col_step * row_step;
+  const int mi_width =
+      AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col);
+  const int mi_height =
+      AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row);
+  const int col_steps = (mi_width / col_step) + ((mi_width % col_step) > 0);
+  const int row_steps = (mi_height / row_step) + ((mi_height % row_step) > 0);
+  const int num_blocks = col_steps * row_steps;
   unsigned int *block_sse = aom_calloc(num_blocks, sizeof(*block_sse));
   unsigned int *block_var = aom_calloc(num_blocks, sizeof(*block_var));
   int idx = 0;