Reduce line buffer size for Wiener filter.

This patch forces the vertical filtering for the top and bottom
rows of a processing unit for the Wiener filter to be 5-tap.
The 5-taps are derived from the primary 7-tap fitler by forcing
the taps at the end to be zero, and absorbing their weights into
the other taps to maintain normalization.
This will effectively reduce the line buffer size for luma Wiener
filter to 4 (from 6).

Change-Id: I5e21b58369777eabf553a8987387d112f98a5598
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index e530137..9e682ea 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -358,8 +358,8 @@
   int ep, bestep = 0;
   int64_t err, besterr = -1;
   int exqd[2], bestxqd[2] = { 0, 0 };
-  int flt1_stride = width;
-  int flt2_stride = width;
+  int flt1_stride = ((width + 7) & ~7) + 8;
+  int flt2_stride = ((width + 7) & ~7) + 8;
   assert(pu_width == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
          pu_width == RESTORATION_PROC_UNIT_SIZE);
   assert(pu_height == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
@@ -385,11 +385,11 @@
                                      flt1_stride, sgr_params[ep].corner,
                                      sgr_params[ep].edge);
 #else
-          av1_selfguided_restoration_highbd_c(
+          av1_selfguided_restoration_highbd(
               dat_p, w, h, dat_stride, flt1_p, flt1_stride, bit_depth,
               sgr_params[ep].r1, sgr_params[ep].e1, tmpbuf2);
 #endif  // USE_HIGHPASS_IN_SGRPROJ
-          av1_selfguided_restoration_highbd_c(
+          av1_selfguided_restoration_highbd(
               dat_p, w, h, dat_stride, flt2_p, flt2_stride, bit_depth,
               sgr_params[ep].r2, sgr_params[ep].e2, tmpbuf2);
         }
@@ -406,13 +406,13 @@
           av1_highpass_filter(dat_p, w, h, dat_stride, flt1_p, flt1_stride,
                               sgr_params[ep].corner, sgr_params[ep].edge);
 #else
-        av1_selfguided_restoration_c(dat_p, w, h, dat_stride, flt1_p,
-                                     flt1_stride, sgr_params[ep].r1,
-                                     sgr_params[ep].e1, tmpbuf2);
+        av1_selfguided_restoration(dat_p, w, h, dat_stride, flt1_p, flt1_stride,
+                                   sgr_params[ep].r1, sgr_params[ep].e1,
+                                   tmpbuf2);
 #endif  // USE_HIGHPASS_IN_SGRPROJ
-          av1_selfguided_restoration_c(dat_p, w, h, dat_stride, flt2_p,
-                                       flt2_stride, sgr_params[ep].r2,
-                                       sgr_params[ep].e2, tmpbuf2);
+          av1_selfguided_restoration(dat_p, w, h, dat_stride, flt2_p,
+                                     flt2_stride, sgr_params[ep].r2,
+                                     sgr_params[ep].e2, tmpbuf2);
         }
 #if CONFIG_HIGHBITDEPTH
     }