Simple optimization of `temporal_filter.c`.

Optimize the function to build predictors:
  (1) Avoid multiplication in for loop.
  (2) Avoid switch-case statement.
  (3) Improve readability.

Change-Id: Iafc6d31a8a7a9ce8d6eeb7fc704670c784cd1ec5
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 24de6db..a9d554e 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -107,12 +107,14 @@
     const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
     const struct scale_factors *scale, const int num_planes,
     const int use_subblock, const MV *subblock_mvs, uint8_t *pred) {
+  assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
+
   // Information of the entire block.
   const int mb_height = block_size_high[block_size];  // Height.
   const int mb_width = block_size_wide[block_size];   // Width.
   const int mb_size = mb_height * mb_width;           // Number of pixels.
-  const int mb_y = mb_height * mb_row;                // Y-coord.
-  const int mb_x = mb_width * mb_col;                 // X-coord.
+  const int mb_y = mb_height * mb_row;                // Y-coord (Top-left).
+  const int mb_x = mb_width * mb_col;                 // X-coord (Top-left).
   const int bit_depth = mbd->bd;                      // Bit depth.
   const int is_high_bitdepth = is_cur_buf_hbd(mbd);   // Is high bit-depth?
   const int is_intrabc = 0;                           // Is intra-copied?
@@ -129,48 +131,50 @@
   const int_interpfilters interp_filters =
       av1_broadcast_interp_filter(MULTITAP_SHARP);
 
-  InterPredParams inter_pred_params;
-  struct buf_2d ref_buf = { NULL, NULL, 0, 0, 0 };
-
-  // Handle Y-plane and UV-plane.
+  // Handle Y-plane, U-plane and V-plane (if needed) in sequence.
+  int plane_offset = 0;
   for (int plane = 0; plane < num_planes; ++plane) {
     const int subsampling_y = mbd->plane[plane].subsampling_y;
     const int subsampling_x = mbd->plane[plane].subsampling_x;
-    const int plane_y = mb_y >> subsampling_y;
-    const int plane_x = mb_x >> subsampling_x;
-    const int h = block_height >> subsampling_y;
-    const int w = block_width >> subsampling_x;
-    const int pred_stride = mb_width >> subsampling_x;
-    switch (plane) {
-      case 0: ref_buf.buf0 = ref_frame->y_buffer; break;
-      case 1: ref_buf.buf0 = ref_frame->u_buffer; break;
-      case 2: ref_buf.buf0 = ref_frame->v_buffer; break;
-      default: assert(0 && "Number of planes should be at most 3.");
-    }
-    ref_buf.height = (plane == 0) ? ref_frame->y_height : ref_frame->uv_height;
-    ref_buf.width = (plane == 0) ? ref_frame->y_width : ref_frame->uv_width;
-    ref_buf.stride = (plane == 0) ? ref_frame->y_stride : ref_frame->uv_stride;
+    // Information of each sub-block in current plane.
+    const int plane_h = mb_height >> subsampling_y;  // Plane height.
+    const int plane_w = mb_width >> subsampling_x;   // Plane width.
+    const int plane_y = mb_y >> subsampling_y;       // Y-coord (Top-left).
+    const int plane_x = mb_x >> subsampling_x;       // X-coord (Top-left).
+    const int h = block_height >> subsampling_y;     // Sub-block height.
+    const int w = block_width >> subsampling_x;      // Sub-block width.
+    const int is_y_plane = (plane == 0);             // Is Y-plane?
+
+    const struct buf_2d ref_buf = { NULL, ref_frame->buffers[plane],
+                                    ref_frame->widths[is_y_plane ? 0 : 1],
+                                    ref_frame->heights[is_y_plane ? 0 : 1],
+                                    ref_frame->strides[is_y_plane ? 0 : 1] };
 
     // Handle entire block or sub-blocks if needed.
-    for (int i = 0; i < num_blocks; ++i) {
-      for (int j = 0; j < num_blocks; ++j) {
+    int subblock_idx = 0;
+    for (int i = 0; i < plane_h; i += h) {
+      for (int j = 0; j < plane_w; j += w) {
         // Choose proper motion vector.
-        const MV mv = use_subblock ? subblock_mvs[i * num_blocks + j] : mb_mv;
+        const MV mv = use_subblock ? subblock_mvs[subblock_idx] : mb_mv;
         assert(mv.row >= INT16_MIN && mv.row <= INT16_MAX &&
                mv.col >= INT16_MIN && mv.col <= INT16_MAX);
 
-        const int y = plane_y + i * h;
-        const int x = plane_x + j * w;
+        const int y = plane_y + i;
+        const int x = plane_x + j;
 
-        const int offset = i * h * pred_stride + j * w;
+        // Build predictior for each sub-block on current plane.
+        InterPredParams inter_pred_params;
         av1_init_inter_params(&inter_pred_params, w, h, y, x, subsampling_x,
                               subsampling_y, bit_depth, is_high_bitdepth,
                               is_intrabc, scale, &ref_buf, interp_filters);
         inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth);
-        av1_build_inter_predictor(&pred[mb_size * plane + offset], pred_stride,
-                                  &mv, &inter_pred_params);
+        av1_build_inter_predictor(&pred[plane_offset + i * plane_w + j],
+                                  plane_w, &mv, &inter_pred_params);
+
+        ++subblock_idx;
       }
     }
+    plane_offset += mb_size;
   }
 }
 
@@ -1004,6 +1008,8 @@
   uint8_t *dst1, *dst2;
   DECLARE_ALIGNED(32, uint16_t, predictor16[BLK_PELS * 3]);
   DECLARE_ALIGNED(32, uint8_t, predictor8[BLK_PELS * 3]);
+  memset(predictor16, 0, BLK_PELS * 3 * sizeof(predictor16[0]));
+  memset(predictor8, 0, BLK_PELS * 3 * sizeof(predictor8[0]));
   uint8_t *predictor;
   const int mb_uv_height = BH >> mbd->plane[1].subsampling_y;
   const int mb_uv_width = BW >> mbd->plane[1].subsampling_x;