Document buffers in MACROBLOCK

This CL adds a brief description for the buffers in MACROBLOCK. In
addition, the four buffers used to speed up obmc search are refactored to
a new struct called OBMCBuffer.

BUG=aoemdia:2618

Change-Id: Ibb9e57c359504c81af9c6aa50ccf880901f7c5c3
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 9d4c6b2..87456f5 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -224,6 +224,21 @@
   uint8_t *tmp_best_mask_buf;  // backup of the best segmentation mask
 } CompoundTypeRdBuffers;
 
+// Struct for buffers used to speed up rdopt for obmc.
+// See the comments for calc_target_weighted_pred for details.
+typedef struct {
+  // A new source weighted with the above and left predictors for efficient
+  // rdopt in obmc mode.
+  int32_t *wsrc;
+  // A new mask constructed from the original left and horizontal masks for
+  // fast obmc rdopt.
+  int32_t *mask;
+  // Holds a prediction using the above/left predictor. This is used to build
+  // the obmc predictor.
+  uint8_t *above_pred;
+  uint8_t *left_pred;
+} OBMCBuffer;
+
 typedef struct {
   // A multiplier that converts mv cost to l2 error.
   int errorperbit;
@@ -308,14 +323,13 @@
   int pred_mv_sad[REF_FRAMES];
   int best_pred_mv_sad;
 
-  int32_t *wsrc_buf;
-  int32_t *mask_buf;
-  uint8_t *above_pred_buf;
-  uint8_t *left_pred_buf;
-
+  // Buffers used to hold/create predictions during rdopt
+  OBMCBuffer obmc_buffer;
   PALETTE_BUFFER *palette_buffer;
   CompoundTypeRdBuffers comp_rd_buffer;
 
+  // A buffer used for convolution during the averaging prediction in compound
+  // mode.
   CONV_BUF_TYPE *tmp_conv_dst;
 
   // Points to a buffer that is used to hold temporary prediction results. This
@@ -323,7 +337,7 @@
   // 1. This is a temporary buffer used to pingpong the prediction in
   //    handle_inter_mode.
   // 2. xd->tmp_obmc_bufs also points to this buffer, and is used in ombc
-  //     prediction.
+  //    prediction.
   uint8_t *tmp_pred_bufs[2];
 
   FRAME_CONTEXT *row_ctx;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 23a6c80..ebfe6cf 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -834,14 +834,7 @@
   cpi->vmaf_info.rdmult_scaling_factors = NULL;
 #endif
 
-  aom_free(cpi->td.mb.above_pred_buf);
-  cpi->td.mb.above_pred_buf = NULL;
-
-  aom_free(cpi->td.mb.left_pred_buf);
-  cpi->td.mb.left_pred_buf = NULL;
-
-  aom_free(cpi->td.mb.wsrc_buf);
-  cpi->td.mb.wsrc_buf = NULL;
+  av1_release_obmc_buffers(&cpi->td.mb.obmc_buffer);
 
   aom_free(cpi->td.mb.inter_modes_info);
   cpi->td.mb.inter_modes_info = NULL;
@@ -851,8 +844,6 @@
       aom_free(cpi->td.mb.intrabc_hash_info.hash_value_buffer[i][j]);
       cpi->td.mb.intrabc_hash_info.hash_value_buffer[i][j] = NULL;
     }
-  aom_free(cpi->td.mb.mask_buf);
-  cpi->td.mb.mask_buf = NULL;
 
   aom_free(cm->tpl_mvs);
   cm->tpl_mvs = NULL;
@@ -2689,6 +2680,35 @@
   *block_mis_log2 = is_720p_or_larger ? 2 : 1;
 }
 
+void av1_alloc_obmc_buffers(OBMCBuffer *obmc_buffer, AV1_COMMON *cm) {
+  CHECK_MEM_ERROR(
+      cm, obmc_buffer->wsrc,
+      (int32_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*obmc_buffer->wsrc)));
+  CHECK_MEM_ERROR(
+      cm, obmc_buffer->mask,
+      (int32_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*obmc_buffer->mask)));
+  CHECK_MEM_ERROR(
+      cm, obmc_buffer->above_pred,
+      (uint8_t *)aom_memalign(
+          16, MAX_MB_PLANE * MAX_SB_SQUARE * sizeof(*obmc_buffer->above_pred)));
+  CHECK_MEM_ERROR(
+      cm, obmc_buffer->left_pred,
+      (uint8_t *)aom_memalign(
+          16, MAX_MB_PLANE * MAX_SB_SQUARE * sizeof(*obmc_buffer->left_pred)));
+}
+
+void av1_release_obmc_buffers(OBMCBuffer *obmc_buffer) {
+  aom_free(obmc_buffer->mask);
+  aom_free(obmc_buffer->above_pred);
+  aom_free(obmc_buffer->left_pred);
+  aom_free(obmc_buffer->wsrc);
+
+  obmc_buffer->mask = NULL;
+  obmc_buffer->above_pred = NULL;
+  obmc_buffer->left_pred = NULL;
+  obmc_buffer->wsrc = NULL;
+}
+
 void av1_alloc_compound_type_rd_buffers(AV1_COMMON *const cm,
                                         CompoundTypeRdBuffers *const bufs) {
   CHECK_MEM_ERROR(
@@ -3164,18 +3184,7 @@
 
   int sb_mi_size = av1_get_sb_mi_size(cm);
 
-  CHECK_MEM_ERROR(
-      cm, cpi->td.mb.above_pred_buf,
-      (uint8_t *)aom_memalign(16, MAX_MB_PLANE * MAX_SB_SQUARE *
-                                      sizeof(*cpi->td.mb.above_pred_buf)));
-  CHECK_MEM_ERROR(
-      cm, cpi->td.mb.left_pred_buf,
-      (uint8_t *)aom_memalign(16, MAX_MB_PLANE * MAX_SB_SQUARE *
-                                      sizeof(*cpi->td.mb.left_pred_buf)));
-
-  CHECK_MEM_ERROR(cm, cpi->td.mb.wsrc_buf,
-                  (int32_t *)aom_memalign(
-                      16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.wsrc_buf)));
+  av1_alloc_obmc_buffers(&cpi->td.mb.obmc_buffer, cm);
 
   CHECK_MEM_ERROR(
       cm, cpi->td.mb.inter_modes_info,
@@ -3191,10 +3200,6 @@
 
   cpi->td.mb.intrabc_hash_info.g_crc_initialized = 0;
 
-  CHECK_MEM_ERROR(cm, cpi->td.mb.mask_buf,
-                  (int32_t *)aom_memalign(
-                      16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.mask_buf)));
-
   CHECK_MEM_ERROR(cm, cpi->td.mb.mbmi_ext,
                   aom_calloc(sb_mi_size, sizeof(*cpi->td.mb.mbmi_ext)));
 
@@ -3608,9 +3613,7 @@
       for (int j = 0; j < 2; ++j) {
         aom_free(thread_data->td->tmp_pred_bufs[j]);
       }
-      aom_free(thread_data->td->above_pred_buf);
-      aom_free(thread_data->td->left_pred_buf);
-      aom_free(thread_data->td->wsrc_buf);
+      av1_release_obmc_buffers(&thread_data->td->obmc_buffer);
       aom_free(thread_data->td->vt64x64);
 
       aom_free(thread_data->td->inter_modes_info);
@@ -3620,7 +3623,6 @@
           thread_data->td->hash_value_buffer[x][y] = NULL;
         }
       }
-      aom_free(thread_data->td->mask_buf);
       aom_free(thread_data->td->counts);
       aom_free(thread_data->td->mbmi_ext);
       av1_free_shared_coeff_buffer(&thread_data->td->shared_coeff_buf);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index da7e6d2..b93ade3 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -750,10 +750,7 @@
   SIMPLE_MOTION_DATA_TREE *sms_root;
   InterModesInfo *inter_modes_info;
   uint32_t *hash_value_buffer[2][2];
-  int32_t *wsrc_buf;
-  int32_t *mask_buf;
-  uint8_t *above_pred_buf;
-  uint8_t *left_pred_buf;
+  OBMCBuffer obmc_buffer;
   PALETTE_BUFFER *palette_buffer;
   CompoundTypeRdBuffers comp_rd_buffer;
   CONV_BUF_TYPE *tmp_conv_dst;
@@ -1674,6 +1671,8 @@
 
 int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *input_size);
 
+void av1_alloc_obmc_buffers(OBMCBuffer *obmc_buffer, AV1_COMMON *cm);
+void av1_release_obmc_buffers(OBMCBuffer *obmc_buffer);
 void av1_alloc_compound_type_rd_buffers(AV1_COMMON *const cm,
                                         CompoundTypeRdBuffers *const bufs);
 void av1_release_compound_type_rd_buffers(CompoundTypeRdBuffers *const bufs);
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 6c36894..09039af 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -485,19 +485,7 @@
       av1_setup_sms_tree(cpi, thread_data->td);
       av1_setup_shared_coeff_buffer(cm, &thread_data->td->shared_coeff_buf);
 
-      CHECK_MEM_ERROR(cm, thread_data->td->above_pred_buf,
-                      (uint8_t *)aom_memalign(
-                          16, MAX_MB_PLANE * MAX_SB_SQUARE *
-                                  sizeof(*thread_data->td->above_pred_buf)));
-      CHECK_MEM_ERROR(cm, thread_data->td->left_pred_buf,
-                      (uint8_t *)aom_memalign(
-                          16, MAX_MB_PLANE * MAX_SB_SQUARE *
-                                  sizeof(*thread_data->td->left_pred_buf)));
-
-      CHECK_MEM_ERROR(
-          cm, thread_data->td->wsrc_buf,
-          (int32_t *)aom_memalign(
-              16, MAX_SB_SQUARE * sizeof(*thread_data->td->wsrc_buf)));
+      av1_alloc_obmc_buffers(&thread_data->td->obmc_buffer, cm);
 
       CHECK_MEM_ERROR(cm, thread_data->td->inter_modes_info,
                       (InterModesInfo *)aom_malloc(
@@ -511,10 +499,6 @@
                   AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
                   sizeof(*thread_data->td->hash_value_buffer[0][0])));
 
-      CHECK_MEM_ERROR(
-          cm, thread_data->td->mask_buf,
-          (int32_t *)aom_memalign(
-              16, MAX_SB_SQUARE * sizeof(*thread_data->td->mask_buf)));
       // Allocate frame counters in thread data.
       CHECK_MEM_ERROR(cm, thread_data->td->counts,
                       aom_calloc(1, sizeof(*thread_data->td->counts)));
@@ -637,9 +621,7 @@
     if (thread_data->td != &cpi->td) {
       thread_data->td->mb = cpi->td.mb;
       thread_data->td->rd_counts = cpi->td.rd_counts;
-      thread_data->td->mb.above_pred_buf = thread_data->td->above_pred_buf;
-      thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf;
-      thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf;
+      thread_data->td->mb.obmc_buffer = thread_data->td->obmc_buffer;
 
       thread_data->td->mb.inter_modes_info = thread_data->td->inter_modes_info;
       for (int x = 0; x < 2; x++) {
@@ -652,7 +634,6 @@
               thread_data->td->hash_value_buffer[x][y];
         }
       }
-      thread_data->td->mb.mask_buf = thread_data->td->mask_buf;
       thread_data->td->mb.mbmi_ext = thread_data->td->mbmi_ext;
     }
     if (thread_data->td->counts != &cpi->counts) {
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index ffe014c..8827486 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -51,8 +51,8 @@
 
   av1_set_ms_compound_refs(ms_buffers, NULL, NULL, 0, 0);
 
-  ms_buffers->wsrc = x->wsrc_buf;
-  ms_buffers->obmc_mask = x->mask_buf;
+  ms_buffers->wsrc = x->obmc_buffer.wsrc;
+  ms_buffers->obmc_mask = x->obmc_buffer.mask;
 }
 
 void av1_make_default_fullpel_ms_params(FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index fd30d46..6496493 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3401,28 +3401,28 @@
       ~(sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
 }
 
-static AOM_INLINE void init_pred_buf(const MACROBLOCK *const x,
-                                     HandleInterModeArgs *const args) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  if (is_cur_buf_hbd(xd)) {
+static AOM_INLINE void init_neighbor_pred_buf(
+    const OBMCBuffer *const obmc_buffer, HandleInterModeArgs *const args,
+    int is_hbd) {
+  if (is_hbd) {
     const int len = sizeof(uint16_t);
-    args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf);
-    args->above_pred_buf[1] =
-        CONVERT_TO_BYTEPTR(x->above_pred_buf + (MAX_SB_SQUARE >> 1) * len);
+    args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
+    args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
+                                                 (MAX_SB_SQUARE >> 1) * len);
     args->above_pred_buf[2] =
-        CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len);
-    args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf);
+        CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
+    args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
     args->left_pred_buf[1] =
-        CONVERT_TO_BYTEPTR(x->left_pred_buf + (MAX_SB_SQUARE >> 1) * len);
+        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
     args->left_pred_buf[2] =
-        CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len);
+        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
   } else {
-    args->above_pred_buf[0] = x->above_pred_buf;
-    args->above_pred_buf[1] = x->above_pred_buf + (MAX_SB_SQUARE >> 1);
-    args->above_pred_buf[2] = x->above_pred_buf + MAX_SB_SQUARE;
-    args->left_pred_buf[0] = x->left_pred_buf;
-    args->left_pred_buf[1] = x->left_pred_buf + (MAX_SB_SQUARE >> 1);
-    args->left_pred_buf[2] = x->left_pred_buf + MAX_SB_SQUARE;
+    args->above_pred_buf[0] = obmc_buffer->above_pred;
+    args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
+    args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
+    args->left_pred_buf[0] = obmc_buffer->left_pred;
+    args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
+    args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
   }
 }
 
@@ -3439,7 +3439,7 @@
   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   unsigned char segment_id = mbmi->segment_id;
 
-  init_pred_buf(x, args);
+  init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
   av1_collect_neighbors_ref_counts(xd);
   estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single,
                            ref_costs_comp);
@@ -5225,7 +5225,7 @@
 }
 
 struct calc_target_weighted_pred_ctxt {
-  const MACROBLOCK *x;
+  const OBMCBuffer *obmc_buffer;
   const uint8_t *tmp;
   int tmp_stride;
   int overlap;
@@ -5245,8 +5245,8 @@
   const int bw = xd->width << MI_SIZE_LOG2;
   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
 
-  int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_col * MI_SIZE);
-  int32_t *mask = ctxt->x->mask_buf + (rel_mi_col * MI_SIZE);
+  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
+  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
   const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
   const int is_hbd = is_cur_buf_hbd(xd);
 
@@ -5293,8 +5293,8 @@
   const int bw = xd->width << MI_SIZE_LOG2;
   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
 
-  int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_row * MI_SIZE * bw);
-  int32_t *mask = ctxt->x->mask_buf + (rel_mi_row * MI_SIZE * bw);
+  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
+  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
   const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
   const int is_hbd = is_cur_buf_hbd(xd);
 
@@ -5374,8 +5374,9 @@
   const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
   const int bw = xd->width << MI_SIZE_LOG2;
   const int bh = xd->height << MI_SIZE_LOG2;
-  int32_t *mask_buf = x->mask_buf;
-  int32_t *wsrc_buf = x->wsrc_buf;
+  const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
+  int32_t *mask_buf = obmc_buffer->mask;
+  int32_t *wsrc_buf = obmc_buffer->wsrc;
 
   const int is_hbd = is_cur_buf_hbd(xd);
   const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
@@ -5391,8 +5392,8 @@
   if (xd->up_available) {
     const int overlap =
         AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
-    struct calc_target_weighted_pred_ctxt ctxt = { x, above, above_stride,
-                                                   overlap };
+    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
+                                                   above_stride, overlap };
     foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
                                   max_neighbor_obmc[mi_size_wide_log2[bsize]],
                                   calc_target_weighted_pred_above, &ctxt);
@@ -5407,8 +5408,8 @@
   if (xd->left_available) {
     const int overlap =
         AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
-    struct calc_target_weighted_pred_ctxt ctxt = { x, left, left_stride,
-                                                   overlap };
+    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
+                                                   left_stride, overlap };
     foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
                                  max_neighbor_obmc[mi_size_high_log2[bsize]],
                                  calc_target_weighted_pred_left, &ctxt);