Apply temporal filter after define_gf_group()

Test this change on several datasets, the compression
performance remain neutral.

Regular q mode with --lag-in-frames=35
        avg_psnr ovr_psnr ssim
ugc360p 0.019    0.013    -0.019
lowres  0.009    0.012     0.069
midres  0.049    0.076    -0.030
hdres   0.003    -0.014   -0.039
hdres2  -0.205   -0.189   -0.118

Common testing condition (CTC) mode with --lag-in-frames=48
        avg_psnr ovr_psnr ssim
ugc360p  0.103   0.151   0.177
lowres   0.089   0.098   0.127
midres   0.088   0.097   0.076
hdres    0.106   0.114   0.066
hdres2   0.050   0.059   0.030

Common testing condition (CTC) mode with --lag-in-frames=35
        avg_psnr ovr_psnr ssim
ugc360p 0.148    0.194    0.223
lowres  0.130    0.133    0.189
midres  0.136    0.140    0.109
hdres   0.122    0.132    0.053
hdres2  0.260    0.398    0.294

Here we see a significant performance drop at hdres2 in --lag-in-frames=35.
This is because we move ARF filtering to an earlier stage, where
we will have one less future frame for the filtering process.
The performance drop disappears in --lag-in-frames=48 hdres2 because in
that setting, we will always have enough future frames for filtering.

STATS_CHANGED

BUG=aomedia:3144

Change-Id: Ic2d1cb624a7870826acf432b7126659525b33558
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index fe2b9ae..6dda945 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -978,20 +978,28 @@
     int show_existing_alt_ref = 0;
     // TODO(bohanli): figure out why we need frame_type in cm here.
     cm->current_frame.frame_type = frame_params->frame_type;
-    int arf_src_index = gf_group->arf_src_offset[cpi->gf_frame_index];
-    int is_forward_keyframe = 0;
-    if (gf_group->frame_type[cpi->gf_frame_index] == KEY_FRAME &&
-        gf_group->refbuf_state[cpi->gf_frame_index] == REFBUF_UPDATE)
-      is_forward_keyframe = 1;
-
-    const int code_arf = av1_temporal_filter(
-        cpi, arf_src_index, update_type, is_forward_keyframe,
-        &show_existing_alt_ref, &cpi->ppi->alt_ref_buffer);
-    if (code_arf) {
-      aom_extend_frame_borders(&cpi->ppi->alt_ref_buffer, av1_num_planes(cm));
-      frame_input->source = &cpi->ppi->alt_ref_buffer;
-      aom_copy_metadata_to_frame_buffer(frame_input->source,
-                                        source_buffer->metadata);
+    if (update_type == KF_UPDATE || update_type == ARF_UPDATE) {
+      int show_tf_buf = 0;
+      YV12_BUFFER_CONFIG *tf_buf = av1_tf_info_get_filtered_buf(
+          &cpi->ppi->tf_info, cpi->gf_frame_index, &show_tf_buf);
+      if (tf_buf != NULL) {
+        frame_input->source = tf_buf;
+        show_existing_alt_ref = show_tf_buf;
+      }
+    } else {
+      const int arf_src_index = gf_group->arf_src_offset[cpi->gf_frame_index];
+      // Right now, we are still using alt_ref_buffer due to
+      // implementation complexity.
+      // TODO(angiebird): Reuse the buffer in tf_info here.
+      const int code_arf = av1_temporal_filter(
+          cpi, arf_src_index, cpi->gf_frame_index, &show_existing_alt_ref,
+          &cpi->ppi->alt_ref_buffer);
+      if (code_arf) {
+        aom_extend_frame_borders(&cpi->ppi->alt_ref_buffer, av1_num_planes(cm));
+        frame_input->source = &cpi->ppi->alt_ref_buffer;
+        aom_copy_metadata_to_frame_buffer(frame_input->source,
+                                          source_buffer->metadata);
+      }
     }
     // Currently INTNL_ARF_UPDATE only do show_existing.
     if (update_type == ARF_UPDATE &&