Eliminate temporal filtering for INTNL_ARF_UPDATE type

For INTNL_ARF_UPDATE type, temporal filtering isn't done. However,
in current code it still goes through temporal filtering, and
unchanged image is copied to alt_ref_buffer for encoding. This patch
eliminates the unnecessary temporal filtering and frame copying, and
uses the lookahead buffer image directly.

Ran Borg test on midres set for several speeds:
         avg_psnr: ovr_psnr:  ssim:
speed 0: -0.003    -0.001     0.005
speed 1:  0.001     0.000    -0.004
speed 4: -0.001    -0.001    -0.001
speed 5:  0.004     0.004     0.002

Sped up 2% - 2.3% for all speeds(speed 0 to speed 5).

STATS_CHANGED

Change-Id: If6467a324e2c2e4bd38600ce62cdecf26b8c561d
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index a838c01..268110c 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -430,9 +430,10 @@
 #if !CONFIG_REALTIME_ONLY
       if (oxcf->arnr_max_frames > 0) {
         // Produce the filtered ARF frame.
-        av1_temporal_filter(cpi, arf_src_index);
-        aom_extend_frame_borders(&cpi->alt_ref_buffer, av1_num_planes(cm));
-        *code_arf = 1;
+        *code_arf = av1_temporal_filter(cpi, arf_src_index);
+        if (*code_arf) {
+          aom_extend_frame_borders(&cpi->alt_ref_buffer, av1_num_planes(cm));
+        }
       }
 #endif
     }
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 6c3d1de..5ce3793 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -1489,7 +1489,7 @@
   *arnr_strength = estimate_strength(cpi, distance, group_boost, sigma);
 }
 
-void av1_temporal_filter(AV1_COMP *cpi, int distance) {
+int av1_temporal_filter(AV1_COMP *cpi, int distance) {
   RATE_CONTROL *const rc = &cpi->rc;
   int frame;
   int frames_to_blur;
@@ -1504,6 +1504,15 @@
   int rdmult = 0;
   double sigma = 0;
 
+  // TODO(yunqing): For INTNL_ARF_UPDATE type, the following me initialization
+  // is used somewhere unexpectedly. Should be resolved later.
+  // Initialize errorperbit, sadperbit16 and sadperbit4.
+  rdmult = av1_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX);
+  set_error_per_bit(&cpi->td.mb, rdmult);
+  av1_initialize_me_consts(cpi, &cpi->td.mb, ARNR_FILT_QINDEX);
+  av1_fill_mv_costs(cpi->common.fc, cpi->common.cur_frame_force_integer_mv,
+                    cpi->common.allow_high_precision_mv, &cpi->td.mb);
+
   // Apply context specific adjustments to the arnr filter parameters.
   if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
     // TODO(weitinglin): Currently, we enforce the filtering strength on
@@ -1511,7 +1520,10 @@
     // beneficial to use non-zero strength filtering.
     strength = 0;
     frames_to_blur = 1;
-  } else if (distance == -1) {
+    return 0;
+  }
+
+  if (distance == -1) {
     // Apply temporal filtering on key frame.
     strength = estimate_strength(cpi, distance, rc->gfu_boost, &sigma);
     // Number of frames for temporal filtering, could be tuned.
@@ -1556,14 +1568,9 @@
         frames[0]->y_crop_width, frames[0]->y_crop_height);
   }
 
-  // Initialize errorperbit, sadperbit16 and sadperbit4.
-  rdmult = av1_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX);
-  set_error_per_bit(&cpi->td.mb, rdmult);
-  av1_initialize_me_consts(cpi, &cpi->td.mb, ARNR_FILT_QINDEX);
-  av1_fill_mv_costs(cpi->common.fc, cpi->common.cur_frame_force_integer_mv,
-                    cpi->common.allow_high_precision_mv, &cpi->td.mb);
-
   temporal_filter_iterate_c(cpi, frames, frames_to_blur,
                             frames_to_blur_backward, strength, sigma,
                             distance == -1, &sf);
+
+  return 1;
 }
diff --git a/av1/encoder/temporal_filter.h b/av1/encoder/temporal_filter.h
index c0afe6f..b6abfba 100644
--- a/av1/encoder/temporal_filter.h
+++ b/av1/encoder/temporal_filter.h
@@ -34,7 +34,7 @@
 #define EDGE_THRESHOLD 50
 #define SQRT_PI_BY_2 1.25331413732
 
-void av1_temporal_filter(AV1_COMP *cpi, int distance);
+int av1_temporal_filter(AV1_COMP *cpi, int distance);
 double estimate_noise(const uint8_t *src, int width, int height, int stride,
                       int edge_thresh);
 double highbd_estimate_noise(const uint8_t *src8, int width, int height,