Optimize allocation of MI buffers

A single partition size(16X16) is used in the stats generation
stage(firstpass/LAP), however MI buffers are allocated memory for
4X4(or 8X8 if res >=4k). Changed alloc_bsize of MI to 16x16 in stats
generation stage to reduce the memory footprint of firstpass/LAP.

Observed memory footprint reduction in singlepass AOMQ.

Resolution  Memory
            Single
            Thread
1920x1080   ~4.25%

Memory measuring command:
$ command time -v ./aomenc ...

Change-Id: I8c8aea682856b50095fe594bed013dfa69c25487
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index ac0c915..a70df19 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -640,8 +640,7 @@
   cpi->vaq_refresh = 0;
 }
 
-static void enc_set_mb_mi(CommonModeInfoParams *mi_params, int width,
-                          int height) {
+static void set_mb_mi(CommonModeInfoParams *mi_params, int width, int height) {
   // Ensure that the decoded width and height are both multiples of
   // 8 luma pixels (note: this may only be a multiple of 4 chroma pixels if
   // subsampling is used).
@@ -658,9 +657,6 @@
   mi_params->mb_rows = (mi_params->mi_rows + 2) >> 2;
   mi_params->MBs = mi_params->mb_rows * mi_params->mb_cols;
 
-  const int is_4k_or_larger = AOMMIN(width, height) >= 2160;
-
-  mi_params->mi_alloc_bsize = is_4k_or_larger ? BLOCK_8X8 : BLOCK_4X4;
   const int mi_alloc_size_1d = mi_size_wide[mi_params->mi_alloc_bsize];
   mi_params->mi_alloc_stride =
       (mi_params->mi_stride + mi_alloc_size_1d - 1) / mi_alloc_size_1d;
@@ -673,6 +669,21 @@
 #endif
 }
 
+static void enc_set_mb_mi(CommonModeInfoParams *mi_params, int width,
+                          int height) {
+  const int is_4k_or_larger = AOMMIN(width, height) >= 2160;
+  mi_params->mi_alloc_bsize = is_4k_or_larger ? BLOCK_8X8 : BLOCK_4X4;
+
+  set_mb_mi(mi_params, width, height);
+}
+
+static void stat_stage_set_mb_mi(CommonModeInfoParams *mi_params, int width,
+                                 int height) {
+  mi_params->mi_alloc_bsize = BLOCK_16X16;
+
+  set_mb_mi(mi_params, width, height);
+}
+
 static void enc_setup_mi(CommonModeInfoParams *mi_params) {
   const int mi_grid_size =
       mi_params->mi_stride * calc_mi_size(mi_params->mi_rows);
@@ -3037,11 +3048,15 @@
   }
 
   cm->error.setjmp = 1;
+  cpi->lap_enabled = num_lap_buffers > 0;
+  cpi->compressor_stage = stage;
 
   CommonModeInfoParams *const mi_params = &cm->mi_params;
   mi_params->free_mi = enc_free_mi;
   mi_params->setup_mi = enc_setup_mi;
-  mi_params->set_mb_mi = enc_set_mb_mi;
+  mi_params->set_mb_mi = (oxcf->pass == 1 || cpi->compressor_stage == LAP_STAGE)
+                             ? stat_stage_set_mb_mi
+                             : enc_set_mb_mi;
 
   mi_params->mi_alloc_bsize = BLOCK_4X4;
 
@@ -3055,8 +3070,6 @@
 
   cpi->common.buffer_pool = pool;
 
-  cpi->lap_enabled = num_lap_buffers > 0;
-  cpi->compressor_stage = stage;
   init_config(cpi, oxcf);
   if (cpi->compressor_stage == LAP_STAGE) {
     cpi->oxcf.lag_in_frames = lap_lag_in_frames;