Reduce predictor buffer size for tpl motion search

Change-Id: Ic191a0b8ba037b91375d2797671a2dcf7403e36e
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index c9f24ee..b93b5c3 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -159,7 +159,7 @@
     src = xd->cur_buf->y_buffer + mb_y_offset;
     src_stride = xd->cur_buf->y_stride;
 
-    dst = &predictor[0];
+    dst = predictor;
     dst_stride = bw;
 
     av1_predict_intra_block(
@@ -180,10 +180,10 @@
       int64_t sse;
       if (is_cur_buf_hbd(xd)) {
         sse = aom_highbd_sse(xd->cur_buf->y_buffer + mb_y_offset,
-                             xd->cur_buf->y_stride, &predictor[0], bw, bw, bh);
+                             xd->cur_buf->y_stride, predictor, bw, bw, bh);
       } else {
         sse = aom_sse(xd->cur_buf->y_buffer + mb_y_offset,
-                      xd->cur_buf->y_stride, &predictor[0], bw, bw, bh);
+                      xd->cur_buf->y_stride, predictor, bw, bw, bh);
       }
       intra_cost = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
     }
@@ -240,13 +240,13 @@
                               mi_col * MI_SIZE, mi_row * MI_SIZE, xd, 0);
     if (use_satd) {
       if (is_cur_buf_hbd(xd)) {
-        aom_highbd_subtract_block(
-            bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
-            xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
+        aom_highbd_subtract_block(bh, bw, src_diff, bw,
+                                  xd->cur_buf->y_buffer + mb_y_offset,
+                                  xd->cur_buf->y_stride, predictor, bw, xd->bd);
       } else {
         aom_subtract_block(bh, bw, src_diff, bw,
                            xd->cur_buf->y_buffer + mb_y_offset,
-                           xd->cur_buf->y_stride, &predictor[0], bw);
+                           xd->cur_buf->y_stride, predictor, bw);
       }
       wht_fwd_txfm(src_diff, bw, coeff, tx_size, is_cur_buf_hbd(xd));
 
@@ -255,10 +255,10 @@
       int64_t sse;
       if (is_cur_buf_hbd(xd)) {
         sse = aom_highbd_sse(xd->cur_buf->y_buffer + mb_y_offset,
-                             xd->cur_buf->y_stride, &predictor[0], bw, bw, bh);
+                             xd->cur_buf->y_stride, predictor, bw, bw, bh);
       } else {
         sse = aom_sse(xd->cur_buf->y_buffer + mb_y_offset,
-                      xd->cur_buf->y_stride, &predictor[0], bw, bw, bh);
+                      xd->cur_buf->y_stride, predictor, bw, bw, bh);
       }
       inter_cost = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
     }
@@ -471,9 +471,7 @@
   const BLOCK_SIZE bsize = convert_length_to_bsize(MC_FLOW_BSIZE_1D);
   av1_tile_init(&xd->tile, cm, 0, 0);
 
-  DECLARE_ALIGNED(32, uint16_t, predictor16[MC_FLOW_NUM_PELS * 3]);
-  DECLARE_ALIGNED(32, uint8_t, predictor8[MC_FLOW_NUM_PELS * 3]);
-  uint8_t *predictor;
+  DECLARE_ALIGNED(32, uint8_t, predictor8[MC_FLOW_NUM_PELS * 2]);
   DECLARE_ALIGNED(32, int16_t, src_diff[MC_FLOW_NUM_PELS]);
   DECLARE_ALIGNED(32, tran_low_t, coeff[MC_FLOW_NUM_PELS]);
 
@@ -488,10 +486,8 @@
 
   xd->cur_buf = this_frame;
 
-  if (is_cur_buf_hbd(xd))
-    predictor = CONVERT_TO_BYTEPTR(predictor16);
-  else
-    predictor = predictor8;
+  uint8_t *predictor =
+      is_cur_buf_hbd(xd) ? CONVERT_TO_BYTEPTR(predictor8) : predictor8;
 
   // TODO(jingning): remove the duplicate frames.
   for (idx = 0; idx < INTER_REFS_PER_FRAME; ++idx)
@@ -734,16 +730,11 @@
   const int pix_num = bw * bh;
   const TX_SIZE tx_size = max_txsize_lookup[bsize];
 
-  DECLARE_ALIGNED(32, uint16_t, predictor16[MC_FLOW_NUM_PELS * 3]);
-  DECLARE_ALIGNED(32, uint8_t, predictor8[MC_FLOW_NUM_PELS * 3]);
-  uint8_t *predictor;
+  DECLARE_ALIGNED(32, uint8_t, predictor8[MC_FLOW_NUM_PELS * 2]);
   DECLARE_ALIGNED(32, int16_t, src_diff[MC_FLOW_NUM_PELS]);
   DECLARE_ALIGNED(32, tran_low_t, coeff[MC_FLOW_NUM_PELS]);
-
-  if (is_cur_buf_hbd(xd))
-    predictor = CONVERT_TO_BYTEPTR(predictor16);
-  else
-    predictor = predictor8;
+  uint8_t *predictor =
+      is_cur_buf_hbd(xd) ? CONVERT_TO_BYTEPTR(predictor8) : predictor8;
 
   // Initialize advanced prediction parameters as default values
   struct scale_factors sf;
@@ -785,7 +776,7 @@
             src->y_buffer + mi_row * MI_SIZE * src->y_stride + mi_col * MI_SIZE;
         const int src_stride = src->y_stride;
 
-        uint8_t *dst_buf = &predictor[0];
+        uint8_t *dst_buf = predictor;
         const int dst_stride = bw;
 
         av1_predict_intra_block(cm, xd, bw, bh, tx_size, mode, 0, 0,
@@ -829,7 +820,7 @@
                         ref->y_stride, bsize, mi_row, mi_col);
 
       av1_build_inter_predictor(
-          ref->y_buffer + mb_y_offset_ref, ref->y_stride, &predictor[0], bw,
+          ref->y_buffer + mb_y_offset_ref, ref->y_stride, predictor, bw,
           &x->best_mv.as_mv, &sf, bw, bh, &conv_params, kernel, &warp_types,
           mi_col * MI_SIZE, mi_row * MI_SIZE, 0, 0, MV_PRECISION_Q3,
           mi_col * MI_SIZE, mi_row * MI_SIZE, xd, 0);
@@ -837,10 +828,10 @@
         if (is_cur_buf_hbd(xd)) {
           aom_highbd_subtract_block(bh, bw, src_diff, bw,
                                     src->y_buffer + mb_y_offset, src->y_stride,
-                                    &predictor[0], bw, xd->bd);
+                                    predictor, bw, xd->bd);
         } else {
           aom_subtract_block(bh, bw, src_diff, bw, src->y_buffer + mb_y_offset,
-                             src->y_stride, &predictor[0], bw);
+                             src->y_stride, predictor, bw);
         }
         wht_fwd_txfm(src_diff, bw, coeff, tx_size, is_cur_buf_hbd(xd));
         inter_cost = aom_satd(coeff, pix_num);
@@ -848,10 +839,10 @@
         int64_t sse;
         if (is_cur_buf_hbd(xd)) {
           sse = aom_highbd_sse(src->y_buffer + mb_y_offset, src->y_stride,
-                               &predictor[0], bw, bw, bh);
+                               predictor, bw, bw, bh);
         } else {
-          sse = aom_sse(src->y_buffer + mb_y_offset, src->y_stride,
-                        &predictor[0], bw, bw, bh);
+          sse = aom_sse(src->y_buffer + mb_y_offset, src->y_stride, predictor,
+                        bw, bw, bh);
         }
         inter_cost = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
       }