Merge "Fix a couple of minor bugs in vp10_has_right and vp10_has_bottom" into nextgenv2

diff --git a/configure b/configure
index 9769880..5eec2a1 100755
--- a/configure
+++ b/configure

@@ -283,6 +283,7 @@
     loop_restoration
     ext_partition
     obmc
+    affine_motion
 "
 CONFIG_LIST="
     dependency_tracking

diff --git a/test/vp10_convolve_test.cc b/test/vp10_convolve_test.cc
index af34ffc..07b0dda 100644
--- a/test/vp10_convolve_test.cc
+++ b/test/vp10_convolve_test.cc

@@ -11,10 +11,10 @@
 namespace {
 TEST(VP10ConvolveTest, vp10_convolve8) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-  INTERP_FILTER interp_filter = EIGHTTAP;
+  INTERP_FILTER interp_filter = EIGHTTAP_REGULAR;
   InterpFilterParams filter_params =
       vp10_get_interp_filter_params(interp_filter);
-  ptrdiff_t filter_size = filter_params.tap;
+  ptrdiff_t filter_size = filter_params.taps;
   int filter_center = filter_size / 2 - 1;
   uint8_t src[12 * 12];
   int src_stride = filter_size;
@@ -39,9 +39,9 @@
                 subpel_y_q4, y_step_q4, avg);
 
   const int16_t* x_filter =
-      vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
+      vp10_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
   const int16_t* y_filter =
-      vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
+      vp10_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
 
   vpx_convolve8_c(src + src_stride * filter_center + filter_center, src_stride,
                   dst1, dst_stride, x_filter, 16, y_filter, 16, w, h);
@@ -49,10 +49,10 @@
 }
 TEST(VP10ConvolveTest, vp10_convolve) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-  INTERP_FILTER interp_filter = EIGHTTAP;
+  INTERP_FILTER interp_filter = EIGHTTAP_REGULAR;
   InterpFilterParams filter_params =
       vp10_get_interp_filter_params(interp_filter);
-  ptrdiff_t filter_size = filter_params.tap;
+  ptrdiff_t filter_size = filter_params.taps;
   int filter_center = filter_size / 2 - 1;
   uint8_t src[12 * 12];
   int src_stride = filter_size;
@@ -78,9 +78,9 @@
                     subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
 
       const int16_t* x_filter =
-          vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
+          vp10_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
       const int16_t* y_filter =
-          vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
+          vp10_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
 
       int temp[12];
       int dst_ref = 0;
@@ -100,10 +100,10 @@
 
 TEST(VP10ConvolveTest, vp10_convolve_avg) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-  INTERP_FILTER interp_filter = EIGHTTAP;
+  INTERP_FILTER interp_filter = EIGHTTAP_REGULAR;
   InterpFilterParams filter_params =
       vp10_get_interp_filter_params(interp_filter);
-  ptrdiff_t filter_size = filter_params.tap;
+  ptrdiff_t filter_size = filter_params.taps;
   int filter_center = filter_size / 2 - 1;
   uint8_t src0[12 * 12];
   uint8_t src1[12 * 12];
@@ -157,10 +157,10 @@
 #if CONFIG_VP9_HIGHBITDEPTH
 TEST(VP10ConvolveTest, vp10_highbd_convolve) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-  INTERP_FILTER interp_filter = EIGHTTAP;
+  INTERP_FILTER interp_filter = EIGHTTAP_REGULAR;
   InterpFilterParams filter_params =
       vp10_get_interp_filter_params(interp_filter);
-  ptrdiff_t filter_size = filter_params.tap;
+  ptrdiff_t filter_size = filter_params.taps;
   int filter_center = filter_size / 2 - 1;
   uint16_t src[12 * 12];
   int src_stride = filter_size;
@@ -188,9 +188,9 @@
           subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
 
       const int16_t* x_filter =
-          vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
+          vp10_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
       const int16_t* y_filter =
-          vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
+          vp10_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
 
       int temp[12];
       int dst_ref = 0;
@@ -211,10 +211,10 @@
 
 TEST(VP10ConvolveTest, vp10_highbd_convolve_avg) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-  INTERP_FILTER interp_filter = EIGHTTAP;
+  INTERP_FILTER interp_filter = EIGHTTAP_REGULAR;
   InterpFilterParams filter_params =
       vp10_get_interp_filter_params(interp_filter);
-  ptrdiff_t filter_size = filter_params.tap;
+  ptrdiff_t filter_size = filter_params.taps;
   int filter_center = filter_size / 2 - 1;
   uint16_t src0[12 * 12];
   uint16_t src1[12 * 12];

diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 3cea6e5..f96aa2e 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h

@@ -39,8 +39,7 @@
 } FRAME_TYPE;
 
 #if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
-#define IsInterpolatingFilter(filter) \
-    (vp10_filter_kernels[filter][0][SUBPEL_TAPS / 2 - 1] == 128)
+#define IsInterpolatingFilter(filter)  (vp10_is_interpolating_filter(filter))
 #else
 #define IsInterpolatingFilter(filter)  (1)
 #endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS

diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index c56c88c..8bb653c 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c

@@ -1348,23 +1348,23 @@
 #if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
 const vpx_tree_index vp10_switchable_interp_tree
 [TREE_SIZE(SWITCHABLE_FILTERS)] = {
-  -EIGHTTAP, 2,
-  4, -EIGHTTAP_SHARP,
+  -EIGHTTAP_REGULAR, 2,
+  4, -MULTITAP_SHARP,
   -EIGHTTAP_SMOOTH, -EIGHTTAP_SMOOTH2,
 };
 #elif CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 5
 const vpx_tree_index vp10_switchable_interp_tree
 [TREE_SIZE(SWITCHABLE_FILTERS)] = {
-  -EIGHTTAP, 2,
+  -EIGHTTAP_REGULAR, 2,
   4, 6,
   -EIGHTTAP_SMOOTH, -EIGHTTAP_SMOOTH2,
-  -EIGHTTAP_SHARP, -EIGHTTAP_SHARP2,
+  -MULTITAP_SHARP, -MULTITAP_SHARP2,
 };
 #else
 const vpx_tree_index vp10_switchable_interp_tree
 [TREE_SIZE(SWITCHABLE_FILTERS)] = {
-  -EIGHTTAP, 2,
-  -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP
+  -EIGHTTAP_REGULAR, 2,
+  -EIGHTTAP_SMOOTH, -MULTITAP_SHARP
 };
 #endif  // CONFIG_EXT_INTERP
 

diff --git a/vp10/common/filter.c b/vp10/common/filter.c
index d9bfcbc..2023cd6 100644
--- a/vp10/common/filter.c
+++ b/vp10/common/filter.c

@@ -32,8 +32,9 @@
   { 0, 0, 0,   8, 120, 0, 0, 0 }
 };
 
+#if USE_TEMPORALFILTER_12TAP
 DECLARE_ALIGNED(16, static const int16_t,
-                sub_pel_filters_12sharp[16][12]) = {
+                sub_pel_filters_temporalfilter_12[SUBPEL_SHIFTS][12]) = {
   // intfilt 0.8
   {0,   0,   0,   0,   0, 128,   0,   0,   0,   0,   0, 0},
   {0,   1,  -1,   3,  -7, 127,   8,  -4,   2,  -1,   0, 0},
@@ -52,10 +53,11 @@
   {0,   1,  -2,   4,  -8,  18, 124, -12,   5,  -3,   1, 0},
   {0,   0,  -1,   2,  -4,   8, 127,  -7,   3,  -1,   1, 0},
 };
+#endif  // USE_TEMPORALFILTER_12TAP
 
+#if CONFIG_EXT_INTERP
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_8[SUBPEL_SHIFTS]) = {
-#if CONFIG_EXT_INTERP
   // intfilt 0.575
   {0,   0,   0, 128,   0,   0,   0, 0},
   {0,   1,  -5, 126,   8,  -3,   1, 0},
@@ -73,30 +75,10 @@
   {0,   3,  -9,  27, 118, -14,   4, -1},
   {-1,   2,  -6,  18, 123, -10,   3, -1},
   {0,   1,  -3,   8, 126,  -5,   1, 0},
-#else
-  // Lagrangian interpolation filter
-  { 0,   0,   0, 128,   0,   0,   0,  0},
-  { 0,   1,  -5, 126,   8,  -3,   1,  0},
-  { -1,   3, -10, 122,  18,  -6,   2,  0},
-  { -1,   4, -13, 118,  27,  -9,   3, -1},
-  { -1,   4, -16, 112,  37, -11,   4, -1},
-  { -1,   5, -18, 105,  48, -14,   4, -1},
-  { -1,   5, -19,  97,  58, -16,   5, -1},
-  { -1,   6, -19,  88,  68, -18,   5, -1},
-  { -1,   6, -19,  78,  78, -19,   6, -1},
-  { -1,   5, -18,  68,  88, -19,   6, -1},
-  { -1,   5, -16,  58,  97, -19,   5, -1},
-  { -1,   4, -14,  48, 105, -18,   5, -1},
-  { -1,   4, -11,  37, 112, -16,   4, -1},
-  { -1,   3,  -9,  27, 118, -13,   4, -1},
-  { 0,   2,  -6,  18, 122, -10,   3, -1},
-  { 0,   1,  -3,   8, 126,  -5,   1,  0}
-#endif  // CONFIG_EXT_INTERP
 };
 
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
-#if CONFIG_EXT_INTERP
   // intfilt 0.8
   {0,   0,   0, 128,   0,   0,   0, 0},
   {-1,   2,  -6, 127,   9,  -4,   2, -1},
@@ -114,29 +96,9 @@
   {-2,   5, -11,  28, 119, -16,   7, -2},
   {-2,   4,  -7,  18, 124, -12,   5, -2},
   {-1,   2,  -4,   9, 127,  -6,   2, -1},
-#else
-  // DCT based filter
-  {0,   0,   0, 128,   0,   0,   0, 0},
-  {-1,   3,  -7, 127,   8,  -3,   1, 0},
-  {-2,   5, -13, 125,  17,  -6,   3, -1},
-  {-3,   7, -17, 121,  27, -10,   5, -2},
-  {-4,   9, -20, 115,  37, -13,   6, -2},
-  {-4,  10, -23, 108,  48, -16,   8, -3},
-  {-4,  10, -24, 100,  59, -19,   9, -3},
-  {-4,  11, -24,  90,  70, -21,  10, -4},
-  {-4,  11, -23,  80,  80, -23,  11, -4},
-  {-4,  10, -21,  70,  90, -24,  11, -4},
-  {-3,   9, -19,  59, 100, -24,  10, -4},
-  {-3,   8, -16,  48, 108, -23,  10, -4},
-  {-2,   6, -13,  37, 115, -20,   9, -4},
-  {-2,   5, -10,  27, 121, -17,   7, -3},
-  {-1,   3,  -6,  17, 125, -13,   5, -2},
-  {0,   1,  -3,   8, 127,  -7,   3, -1}
-#endif  // CONFIG_EXT_INTERP
 };
 
-#if CONFIG_EXT_INTERP && (SWITCHABLE_FILTERS == 4 || SWITCHABLE_FILTERS == 5)
-
+#if SWITCHABLE_FILTERS >= 4
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_8smooth2[SUBPEL_SHIFTS]) = {
 // freqmultiplier = 0.35
@@ -178,7 +140,74 @@
   {0,   2, -12,  37,  94,  14,  -9, 2},
   {0,   2, -11,  31,  95,  19, -10, 2},
 };
-#else
+#endif  // SWITCHABLE_FILTERS >= 4
+
+#if SWITCHABLE_FILTERS == 5
+DECLARE_ALIGNED(16, static const int16_t,
+                sub_pel_filters_12sharp[SUBPEL_SHIFTS][12]) = {
+  // intfilt 0.8
+  {0,   0,   0,   0,   0, 128,   0,   0,   0,   0,   0, 0},
+  {0,   1,  -1,   3,  -7, 127,   8,  -4,   2,  -1,   0, 0},
+  {0,   1,  -3,   5, -12, 124,  18,  -8,   4,  -2,   1, 0},
+  {-1,   2,  -4,   8, -17, 120,  28, -11,   6,  -3,   1, -1},
+  {-1,   2,  -4,  10, -21, 114,  38, -15,   8,  -4,   2, -1},
+  {-1,   3,  -5,  11, -23, 107,  49, -18,   9,  -5,   2, -1},
+  {-1,   3,  -6,  12, -25,  99,  60, -21,  11,  -6,   3, -1},
+  {-1,   3,  -6,  12, -25,  90,  70, -23,  12,  -6,   3, -1},
+  {-1,   3,  -6,  12, -24,  80,  80, -24,  12,  -6,   3, -1},
+  {-1,   3,  -6,  12, -23,  70,  90, -25,  12,  -6,   3, -1},
+  {-1,   3,  -6,  11, -21,  60,  99, -25,  12,  -6,   3, -1},
+  {-1,   2,  -5,   9, -18,  49, 107, -23,  11,  -5,   3, -1},
+  {-1,   2,  -4,   8, -15,  38, 114, -21,  10,  -4,   2, -1},
+  {-1,   1,  -3,   6, -11,  28, 120, -17,   8,  -4,   2, -1},
+  {0,   1,  -2,   4,  -8,  18, 124, -12,   5,  -3,   1, 0},
+  {0,   0,  -1,   2,  -4,   8, 127,  -7,   3,  -1,   1, 0},
+};
+#endif
+
+#else  // CONFIG_EXT_INTERP
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8[SUBPEL_SHIFTS]) = {
+  // Lagrangian interpolation filter
+  { 0,   0,   0, 128,   0,   0,   0,  0},
+  { 0,   1,  -5, 126,   8,  -3,   1,  0},
+  { -1,   3, -10, 122,  18,  -6,   2,  0},
+  { -1,   4, -13, 118,  27,  -9,   3, -1},
+  { -1,   4, -16, 112,  37, -11,   4, -1},
+  { -1,   5, -18, 105,  48, -14,   4, -1},
+  { -1,   5, -19,  97,  58, -16,   5, -1},
+  { -1,   6, -19,  88,  68, -18,   5, -1},
+  { -1,   6, -19,  78,  78, -19,   6, -1},
+  { -1,   5, -18,  68,  88, -19,   6, -1},
+  { -1,   5, -16,  58,  97, -19,   5, -1},
+  { -1,   4, -14,  48, 105, -18,   5, -1},
+  { -1,   4, -11,  37, 112, -16,   4, -1},
+  { -1,   3,  -9,  27, 118, -13,   4, -1},
+  { 0,   2,  -6,  18, 122, -10,   3, -1},
+  { 0,   1,  -3,   8, 126,  -5,   1,  0}
+};
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
+  // DCT based filter
+  {0,   0,   0, 128,   0,   0,   0, 0},
+  {-1,   3,  -7, 127,   8,  -3,   1, 0},
+  {-2,   5, -13, 125,  17,  -6,   3, -1},
+  {-3,   7, -17, 121,  27, -10,   5, -2},
+  {-4,   9, -20, 115,  37, -13,   6, -2},
+  {-4,  10, -23, 108,  48, -16,   8, -3},
+  {-4,  10, -24, 100,  59, -19,   9, -3},
+  {-4,  11, -24,  90,  70, -21,  10, -4},
+  {-4,  11, -23,  80,  80, -23,  11, -4},
+  {-4,  10, -21,  70,  90, -24,  11, -4},
+  {-3,   9, -19,  59, 100, -24,  10, -4},
+  {-3,   8, -16,  48, 108, -23,  10, -4},
+  {-2,   6, -13,  37, 115, -20,   9, -4},
+  {-2,   5, -10,  27, 121, -17,   7, -3},
+  {-1,   3,  -6,  17, 125, -13,   5, -2},
+  {0,   1,  -3,   8, 127,  -7,   3, -1}
+};
 
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
@@ -202,53 +231,60 @@
 };
 #endif  // CONFIG_EXT_INTERP
 
-const InterpKernel *vp10_filter_kernels[SWITCHABLE_FILTERS + 1] = {
-  sub_pel_filters_8,
-  sub_pel_filters_8smooth,
-  sub_pel_filters_8sharp,
-#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
-  sub_pel_filters_8smooth2,
-#elif CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 5
-  sub_pel_filters_8smooth2,
-  (const InterpKernel*)sub_pel_filters_12sharp,
-#endif
-  bilinear_filters
-};
-
 #if CONFIG_EXT_INTRA
 const InterpKernel *vp10_intra_filter_kernels[INTRA_FILTERS] = {
-    bilinear_filters,         // INTRA_FILTER_LINEAR
-    sub_pel_filters_8,        // INTRA_FILTER_8TAP
-    sub_pel_filters_8sharp,   // INTRA_FILTER_8TAP_SHARP
-    sub_pel_filters_8smooth,  // INTRA_FILTER_8TAP_SMOOTH
+  bilinear_filters,         // INTRA_FILTER_LINEAR
+  sub_pel_filters_8,        // INTRA_FILTER_8TAP
+  sub_pel_filters_8sharp,   // INTRA_FILTER_8TAP_SHARP
+  sub_pel_filters_8smooth,  // INTRA_FILTER_8TAP_SMOOTH
 };
 #endif  // CONFIG_EXT_INTRA
 
+#if CONFIG_EXT_INTERP
 static const InterpFilterParams
 vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
   {(const int16_t*)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS},
   {(const int16_t*)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS},
   {(const int16_t*)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS},
-#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+#if SWITCHABLE_FILTERS >= 4
   {(const int16_t*)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS},
-#elif CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 5
-  {(const int16_t*)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS},
+#endif
+#if SWITCHABLE_FILTERS == 5
   {(const int16_t*)sub_pel_filters_12sharp, 12, SUBPEL_SHIFTS},
 #endif
   {(const int16_t*)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS}
 };
-
-#if FILTER_12TAP
-static const InterpFilterParams vp10_interp_filter_12tap = {
-    (const int16_t*)sub_pel_filters_12sharp, 12, SUBPEL_SHIFTS
+#else
+static const InterpFilterParams
+vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
+  {(const int16_t*)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS},
+  {(const int16_t*)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS},
+  {(const int16_t*)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS},
+  {(const int16_t*)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS}
 };
-#endif
+#endif  // CONFIG_EXT_INTERP
+
+#if USE_TEMPORALFILTER_12TAP
+static const InterpFilterParams vp10_interp_temporalfilter_12tap = {
+    (const int16_t*)sub_pel_filters_temporalfilter_12, 12, SUBPEL_SHIFTS
+};
+#endif  // USE_TEMPORALFILTER_12TAP
 
 InterpFilterParams vp10_get_interp_filter_params(
     const INTERP_FILTER interp_filter) {
-#if FILTER_12TAP
-  if (interp_filter == SHARP_FILTER_12TAP)
-    return vp10_interp_filter_12tap;
-#endif
+#if USE_TEMPORALFILTER_12TAP
+  if (interp_filter == TEMPORALFILTER_12TAP)
+    return vp10_interp_temporalfilter_12tap;
+#endif  // USE_TEMPORALFILTER_12TAP
   return vp10_interp_filter_params_list[interp_filter];
 }
+
+const int16_t *vp10_get_interp_filter_kernel(
+    const INTERP_FILTER interp_filter) {
+#if USE_TEMPORALFILTER_12TAP
+  if (interp_filter == TEMPORALFILTER_12TAP)
+    return vp10_interp_temporalfilter_12tap.filter_ptr;
+#endif  // USE_TEMPORALFILTER_12TAP
+  return (const int16_t*)
+      vp10_interp_filter_params_list[interp_filter].filter_ptr;
+}

diff --git a/vp10/common/filter.h b/vp10/common/filter.h
index df46ca8..74a0c99 100644
--- a/vp10/common/filter.h
+++ b/vp10/common/filter.h

@@ -21,33 +21,29 @@
 extern "C" {
 #endif
 
-#define EIGHTTAP            0
+#define EIGHTTAP_REGULAR    0
 #define EIGHTTAP_SMOOTH     1
-#define EIGHTTAP_SHARP      2
+#define MULTITAP_SHARP      2
 
 #if CONFIG_EXT_INTERP
+#define MAX_SUBPEL_TAPS    12
 #define SUPPORT_NONINTERPOLATING_FILTERS 0  /* turn it on for experimentation */
 #define SWITCHABLE_FILTERS  5 /* Number of switchable filters */
 
-#if SWITCHABLE_FILTERS == 4
-
+#if SWITCHABLE_FILTERS >= 4
 #define EIGHTTAP_SMOOTH2    3
-
-#elif SWITCHABLE_FILTERS == 5
-
-#define EIGHTTAP_SMOOTH2    3
-#define EIGHTTAP_SHARP2     4
-
+#endif
+#if SWITCHABLE_FILTERS == 5
+#define MULTITAP_SHARP2     4
 #endif  // SWITCHABLE_FILTERS
 
 #else
 #define SWITCHABLE_FILTERS  3 /* Number of switchable filters */
 #endif  // CONFIG_EXT_INTERP
 
-// TODO(jingning): Align the experiment flags and clean this up.
-#define FILTER_12TAP (!CONFIG_EXT_INTERP)
-#if FILTER_12TAP
-#define SHARP_FILTER_12TAP (SWITCHABLE_FILTERS + 1)
+#define USE_TEMPORALFILTER_12TAP 1
+#if USE_TEMPORALFILTER_12TAP
+#define TEMPORALFILTER_12TAP (SWITCHABLE_FILTERS + 1)
 #endif
 
 // The codec can operate in four possible inter prediction filter mode:
@@ -59,8 +55,6 @@
 
 typedef uint8_t INTERP_FILTER;
 
-extern const InterpKernel *vp10_filter_kernels[SWITCHABLE_FILTERS + 1];
-
 #if CONFIG_EXT_INTRA
 typedef enum {
   INTRA_FILTER_LINEAR,
@@ -75,15 +69,25 @@
 
 typedef struct InterpFilterParams {
   const int16_t* filter_ptr;
-  uint16_t tap;
+  uint16_t taps;
   uint16_t subpel_shifts;
 } InterpFilterParams;
 
 InterpFilterParams vp10_get_interp_filter_params(
     const INTERP_FILTER interp_filter);
-static INLINE const int16_t* vp10_get_interp_filter_kernel(
+
+const int16_t *vp10_get_interp_filter_kernel(
+    const INTERP_FILTER interp_filter);
+
+static INLINE const int16_t* vp10_get_interp_filter_subpel_kernel(
     const InterpFilterParams filter_params, const int subpel) {
-  return filter_params.filter_ptr + filter_params.tap * subpel;
+  return filter_params.filter_ptr + filter_params.taps * subpel;
+}
+
+static INLINE int vp10_is_interpolating_filter(
+    const INTERP_FILTER interp_filter) {
+  const InterpFilterParams ip = vp10_get_interp_filter_params(interp_filter);
+  return (ip.filter_ptr[ip.taps / 2 - 1] == 128);
 }
 #ifdef __cplusplus
 }  // extern "C"

diff --git a/vp10/common/pred_common.c b/vp10/common/pred_common.c
index 4dd6841..3646961 100644
--- a/vp10/common/pred_common.c
+++ b/vp10/common/pred_common.c

@@ -46,13 +46,13 @@
     PREDICTION_MODE mode = left_mbmi->mode;
     if (is_inter_block(left_mbmi)) {
       switch (left_mbmi->interp_filter) {
-        case EIGHTTAP:
+        case EIGHTTAP_REGULAR:
           left_type = INTRA_FILTER_8TAP;
           break;
         case EIGHTTAP_SMOOTH:
           left_type = INTRA_FILTER_8TAP_SMOOTH;
           break;
-        case EIGHTTAP_SHARP:
+        case MULTITAP_SHARP:
           left_type = INTRA_FILTER_8TAP_SHARP;
           break;
         case BILINEAR:
@@ -76,13 +76,13 @@
   if (xd->up_available && above_mbmi->sb_type >= BLOCK_8X8) {
     if (is_inter_block(above_mbmi)) {
       switch (above_mbmi->interp_filter) {
-        case EIGHTTAP:
+        case EIGHTTAP_REGULAR:
           above_type = INTRA_FILTER_8TAP;
           break;
         case EIGHTTAP_SMOOTH:
           above_type = INTRA_FILTER_8TAP_SMOOTH;
           break;
-        case EIGHTTAP_SHARP:
+        case MULTITAP_SHARP:
           above_type = INTRA_FILTER_8TAP_SHARP;
           break;
         case BILINEAR:

diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h
index 6499999..0e7fa4c 100644
--- a/vp10/common/reconinter.h
+++ b/vp10/common/reconinter.h

@@ -30,23 +30,26 @@
                                    int xs, int ys) {
   InterpFilterParams interp_filter_params =
       vp10_get_interp_filter_params(interp_filter);
-  if (interp_filter_params.tap == SUBPEL_TAPS) {
-    const InterpKernel *kernel = vp10_filter_kernels[interp_filter];
+  if (interp_filter_params.taps == SUBPEL_TAPS) {
+    const int16_t *kernel_x =
+        vp10_get_interp_filter_subpel_kernel(interp_filter_params, subpel_x);
+    const int16_t *kernel_y =
+        vp10_get_interp_filter_subpel_kernel(interp_filter_params, subpel_y);
 #if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
     if (IsInterpolatingFilter(interp_filter)) {
       // Interpolating filter
       sf->predict[subpel_x != 0][subpel_y != 0][ref](
           src, src_stride, dst, dst_stride,
-          kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+          kernel_x, xs, kernel_y, ys, w, h);
     } else {
       sf->predict_ni[subpel_x != 0][subpel_y != 0][ref](
           src, src_stride, dst, dst_stride,
-          kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+          kernel_x, xs, kernel_y, ys, w, h);
     }
 #else
     sf->predict[subpel_x != 0][subpel_y != 0][ref](
         src, src_stride, dst, dst_stride,
-        kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+        kernel_x, xs, kernel_y, ys, w, h);
 #endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
   } else {
     // ref > 0 means this is the second reference frame
@@ -69,23 +72,26 @@
                                         int xs, int ys, int bd) {
   InterpFilterParams interp_filter_params =
       vp10_get_interp_filter_params(interp_filter);
-  if (interp_filter_params.tap == SUBPEL_TAPS) {
-    const InterpKernel *kernel = vp10_filter_kernels[interp_filter];
+  if (interp_filter_params.taps == SUBPEL_TAPS) {
+    const int16_t *kernel_x =
+        vp10_get_interp_filter_subpel_kernel(interp_filter_params, subpel_x);
+    const int16_t *kernel_y =
+        vp10_get_interp_filter_subpel_kernel(interp_filter_params, subpel_y);
 #if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
     if (IsInterpolatingFilter(interp_filter)) {
       // Interpolating filter
       sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
           src, src_stride, dst, dst_stride,
-          kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+          kernel_x, xs, kernel_y, ys, w, h, bd);
     } else {
       sf->highbd_predict_ni[subpel_x != 0][subpel_y != 0][ref](
           src, src_stride, dst, dst_stride,
-          kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+          kernel_x, xs, kernel_y, ys, w, h, bd);
     }
 #else
     sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
         src, src_stride, dst, dst_stride,
-        kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+        kernel_x, xs, kernel_y, ys, w, h, bd);
 #endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
   } else {
     // ref > 0 means this is the second reference frame

diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c
index e2ab952..76d50c6 100644
--- a/vp10/common/reconintra.c
+++ b/vp10/common/reconintra.c

@@ -334,8 +334,9 @@
         }
       } else {
         if (!flags[shift]) {
+          const int16_t *filter = vp10_intra_filter_kernels[filter_type][shift];
           vpx_convolve8_horiz(src + pad_size, 2 * bs, buf[shift], 2 * bs,
-                              vp10_intra_filter_kernels[filter_type][shift], 16,
+                              filter, 16,
                               NULL, 16, 2 * bs, 2 * bs < 16 ? 2 : 1);
           flags[shift] = 1;
         }
@@ -465,9 +466,10 @@
         }
       } else {
         if (!flags[shift]) {
+          const int16_t *filter = vp10_intra_filter_kernels[filter_type][shift];
           vpx_convolve8_vert(src + 4 * pad_size, 4,
                              buf[0] + 4 * shift, 4 * SUBPEL_SHIFTS, NULL, 16,
-                             vp10_intra_filter_kernels[filter_type][shift], 16,
+                             filter, 16,
                              2 * bs < 16 ? 4 : 4, 2 * bs);
           flags[shift] = 1;
         }

diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c
index 679adc6..ecc971a 100644
--- a/vp10/common/thread_common.c
+++ b/vp10/common/thread_common.c

@@ -365,6 +365,11 @@
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
     for (j = 0; j < SWITCHABLE_FILTERS; j++)
       cm->counts.switchable_interp[i][j] += counts->switchable_interp[i][j];
+#if CONFIG_OBMC
+  for (i = 0; i < BLOCK_SIZES; i++)
+    for (j = 0; j < 2; j++)
+      cm->counts.obmc[i][j] += counts->obmc[i][j];
+#endif  // CONFIG_OBMC
 
 #if CONFIG_REF_MV
   for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)

diff --git a/vp10/common/vp10_convolve.c b/vp10/common/vp10_convolve.c
index 33a8542..8fdd8f1 100644
--- a/vp10/common/vp10_convolve.c
+++ b/vp10/common/vp10_convolve.c

@@ -15,14 +15,15 @@
                            const InterpFilterParams filter_params,
                            const int subpel_x_q4, int x_step_q4, int avg) {
   int x, y;
-  int filter_size = filter_params.tap;
+  int filter_size = filter_params.taps;
   src -= filter_size / 2 - 1;
   for (y = 0; y < h; ++y) {
     int x_q4 = subpel_x_q4;
     for (x = 0; x < w; ++x) {
       const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
       const int16_t *x_filter =
-          vp10_get_interp_filter_kernel(filter_params, x_q4 & SUBPEL_MASK);
+          vp10_get_interp_filter_subpel_kernel(
+              filter_params, x_q4 & SUBPEL_MASK);
       int k, sum = 0;
       for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
       if (avg) {
@@ -43,7 +44,7 @@
                           const InterpFilterParams filter_params,
                           const int subpel_y_q4, int y_step_q4, int avg) {
   int x, y;
-  int filter_size = filter_params.tap;
+  int filter_size = filter_params.taps;
   src -= src_stride * (filter_size / 2 - 1);
 
   for (x = 0; x < w; ++x) {
@@ -51,7 +52,8 @@
     for (y = 0; y < h; ++y) {
       const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
       const int16_t *y_filter =
-          vp10_get_interp_filter_kernel(filter_params, y_q4 & SUBPEL_MASK);
+          vp10_get_interp_filter_subpel_kernel(
+              filter_params, y_q4 & SUBPEL_MASK);
       int k, sum = 0;
       for (k = 0; k < filter_size; ++k)
         sum += src_y[k * src_stride] * y_filter[k];
@@ -96,7 +98,7 @@
                    const InterpFilterParams filter_params,
                    const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
                    int y_step_q4, int avg) {
-  int filter_size = filter_params.tap;
+  int filter_size = filter_params.taps;
   int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
   int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
 
@@ -104,7 +106,7 @@
   assert(h <= MAX_BLOCK_HEIGHT);
   assert(y_step_q4 <= MAX_STEP);
   assert(x_step_q4 <= MAX_STEP);
-  assert(filter_params.tap <= MAX_FILTER_TAP);
+  assert(filter_params.taps <= MAX_FILTER_TAP);
 
   if (ignore_horiz && ignore_vert) {
     convolve_copy(src, src_stride, dst, dst_stride, w, h, avg);
@@ -140,14 +142,15 @@
                                   const int subpel_x_q4, int x_step_q4, int avg,
                                   int bd) {
   int x, y;
-  int filter_size = filter_params.tap;
+  int filter_size = filter_params.taps;
   src -= filter_size / 2 - 1;
   for (y = 0; y < h; ++y) {
     int x_q4 = subpel_x_q4;
     for (x = 0; x < w; ++x) {
       const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
       const int16_t *x_filter =
-          vp10_get_interp_filter_kernel(filter_params, x_q4 & SUBPEL_MASK);
+          vp10_get_interp_filter_subpel_kernel(
+              filter_params, x_q4 & SUBPEL_MASK);
       int k, sum = 0;
       for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
       if (avg)
@@ -170,7 +173,7 @@
                                  const int subpel_y_q4, int y_step_q4, int avg,
                                  int bd) {
   int x, y;
-  int filter_size = filter_params.tap;
+  int filter_size = filter_params.taps;
   src -= src_stride * (filter_size / 2 - 1);
 
   for (x = 0; x < w; ++x) {
@@ -178,7 +181,8 @@
     for (y = 0; y < h; ++y) {
       const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
       const int16_t *y_filter =
-          vp10_get_interp_filter_kernel(filter_params, y_q4 & SUBPEL_MASK);
+          vp10_get_interp_filter_subpel_kernel(
+              filter_params, y_q4 & SUBPEL_MASK);
       int k, sum = 0;
       for (k = 0; k < filter_size; ++k)
         sum += src_y[k * src_stride] * y_filter[k];
@@ -228,7 +232,7 @@
                           int bd) {
   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
-  int filter_size = filter_params.tap;
+  int filter_size = filter_params.taps;
   int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
   int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
 
@@ -236,7 +240,7 @@
   assert(h <= MAX_BLOCK_HEIGHT);
   assert(y_step_q4 <= MAX_STEP);
   assert(x_step_q4 <= MAX_STEP);
-  assert(filter_params.tap <= MAX_FILTER_TAP);
+  assert(filter_params.taps <= MAX_FILTER_TAP);
 
   if (ignore_horiz && ignore_vert) {
     highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, avg, bd);

diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index a1b1534..4136f14 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c

@@ -737,7 +737,7 @@
 
     InterpFilterParams filter_params =
         vp10_get_interp_filter_params(interp_filter);
-    int filter_size = filter_params.tap;
+    int filter_size = filter_params.taps;
 
     if (subpel_x ||
 #if CONFIG_EXT_INTERP

diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index 7e0ee7d..fccd3c8 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c

@@ -805,7 +805,7 @@
   FRAME_COUNTS *counts = xd->counts;
   INTERP_FILTER type;
 #if CONFIG_EXT_INTERP
-  if (!vp10_is_interp_needed(xd)) return EIGHTTAP;
+  if (!vp10_is_interp_needed(xd)) return EIGHTTAP_REGULAR;
 #endif
   type = (INTERP_FILTER)vpx_read_tree(r, vp10_switchable_interp_tree,
                                       cm->fc->switchable_interp_prob[ctx]);

diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 6385f2f..fa6c2cb 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c

@@ -868,7 +868,7 @@
     const int ctx = vp10_get_pred_context_switchable_interp(xd);
 #if CONFIG_EXT_INTERP
     if (!vp10_is_interp_needed(xd)) {
-      assert(mbmi->interp_filter == EIGHTTAP);
+      assert(mbmi->interp_filter == EIGHTTAP_REGULAR);
       return;
     }
 #endif

diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 7548456..d3ea94b 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c

@@ -4021,22 +4021,22 @@
 #if CONFIG_EXT_INTERP
   if (!is_alt_ref &&
       threshes[EIGHTTAP_SMOOTH2] > threshes[EIGHTTAP_SMOOTH] &&
-      threshes[EIGHTTAP_SMOOTH2] > threshes[EIGHTTAP] &&
-      threshes[EIGHTTAP_SMOOTH2] > threshes[EIGHTTAP_SHARP] &&
+      threshes[EIGHTTAP_SMOOTH2] > threshes[EIGHTTAP_REGULAR] &&
+      threshes[EIGHTTAP_SMOOTH2] > threshes[MULTITAP_SHARP] &&
       threshes[EIGHTTAP_SMOOTH2] > threshes[SWITCHABLE - 1]) {
     return EIGHTTAP_SMOOTH2;
   }
 #endif  // CONFIG_EXT_INTERP
   if (!is_alt_ref &&
-      threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] &&
-      threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] &&
+      threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_REGULAR] &&
+      threshes[EIGHTTAP_SMOOTH] > threshes[MULTITAP_SHARP] &&
       threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) {
     return EIGHTTAP_SMOOTH;
-  } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] &&
-             threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) {
-    return EIGHTTAP_SHARP;
-  } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) {
-    return EIGHTTAP;
+  } else if (threshes[MULTITAP_SHARP] > threshes[EIGHTTAP_REGULAR] &&
+             threshes[MULTITAP_SHARP] > threshes[SWITCHABLE - 1]) {
+    return MULTITAP_SHARP;
+  } else if (threshes[EIGHTTAP_REGULAR] > threshes[SWITCHABLE - 1]) {
+    return EIGHTTAP_REGULAR;
   } else {
     return SWITCHABLE;
   }

diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index d39d39e..fc9e2e9 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c

@@ -410,6 +410,15 @@
   vpx_free(cpi->active_map.map);
   cpi->active_map.map = NULL;
 
+#if CONFIG_AFFINE_MOTION
+  {
+    // Free up-sampled reference buffers.
+    int i;
+    for (i = 0; i < MAX_REF_FRAMES; i++)
+      vpx_free_frame_buffer(&cpi->upsampled_ref_bufs[i].buf);
+  }
+#endif
+
   vp10_free_ref_frame_buffers(cm->buffer_pool);
 #if CONFIG_VP9_POSTPROC
   vp10_free_postproc_buffers(cm);
@@ -744,6 +753,26 @@
                                NULL, NULL, NULL))
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to allocate scaled last source buffer");
+
+#if CONFIG_AFFINE_MOTION
+  {
+    // Allocate up-sampled reference buffers.
+    int i;
+
+    for (i = 0; i < MAX_REF_FRAMES; i++)
+      if (vpx_realloc_frame_buffer(&cpi->upsampled_ref_bufs[i].buf,
+                                   (cm->width << 3), (cm->height << 3),
+                                   cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                                   cm->use_highbitdepth,
+#endif
+                                   (VP9_ENC_BORDER_IN_PIXELS << 3),
+                                   cm->byte_alignment,
+                                   NULL, NULL, NULL))
+        vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+            "Failed to allocate up-sampled reference frame buffer");
+  }
+#endif
 }
 
 
@@ -2353,10 +2382,11 @@
 
 #if CONFIG_VP9_HIGHBITDEPTH
 static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
-                                   YV12_BUFFER_CONFIG *dst, int bd) {
+                                   YV12_BUFFER_CONFIG *dst, int planes,
+                                   int bd) {
 #else
 static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
-                                   YV12_BUFFER_CONFIG *dst) {
+                                   YV12_BUFFER_CONFIG *dst, int planes) {
 #endif  // CONFIG_VP9_HIGHBITDEPTH
   const int src_w = src->y_crop_width;
   const int src_h = src->y_crop_height;
@@ -2366,12 +2396,15 @@
   const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride};
   uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer};
   const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride};
-  const InterpKernel *const kernel = vp10_filter_kernels[EIGHTTAP];
+  const InterpFilterParams interp_filter_params =
+      vp10_get_interp_filter_params(EIGHTTAP_REGULAR);
+  const int16_t *kernel = interp_filter_params.filter_ptr;
+  const int taps = interp_filter_params.taps;
   int x, y, i;
 
   for (y = 0; y < dst_h; y += 16) {
     for (x = 0; x < dst_w; x += 16) {
-      for (i = 0; i < MAX_MB_PLANE; ++i) {
+      for (i = 0; i < planes; ++i) {
         const int factor = (i == 0 || i == 3 ? 1 : 2);
         const int x_q4 = x * (16 / factor) * src_w / dst_w;
         const int y_q4 = y * (16 / factor) * src_h / dst_h;
@@ -2384,26 +2417,29 @@
 #if CONFIG_VP9_HIGHBITDEPTH
         if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
           vpx_highbd_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
-                               kernel[x_q4 & 0xf], 16 * src_w / dst_w,
-                               kernel[y_q4 & 0xf], 16 * src_h / dst_h,
+                               &kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w,
+                               &kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
                                16 / factor, 16 / factor, bd);
         } else {
-          vpx_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
-                        kernel[x_q4 & 0xf], 16 * src_w / dst_w,
-                        kernel[y_q4 & 0xf], 16 * src_h / dst_h,
+          vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
+                        &kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w,
+                        &kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
                         16 / factor, 16 / factor);
         }
 #else
-        vpx_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
-                      kernel[x_q4 & 0xf], 16 * src_w / dst_w,
-                      kernel[y_q4 & 0xf], 16 * src_h / dst_h,
+        vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
+                      &kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w,
+                      &kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
                       16 / factor, 16 / factor);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
       }
     }
   }
 
-  vpx_extend_frame_borders(dst);
+  if (planes == 1)
+    vpx_extend_frame_borders_y(dst);
+  else
+    vpx_extend_frame_borders(dst);
 }
 
 static int scale_down(VP10_COMP *cpi, int q) {
@@ -2459,6 +2495,45 @@
   return force_recode;
 }
 
+#if CONFIG_AFFINE_MOTION
+static INLINE int get_free_upsampled_ref_buf(EncRefCntBuffer *ubufs) {
+  int i;
+
+  for (i = 0; i < MAX_REF_FRAMES; i++) {
+    if (!ubufs[i].ref_count) {
+      return i;
+    }
+  }
+  return INVALID_IDX;
+}
+
+// Up-sample reference frames.
+static INLINE int upsample_ref_frame(RefCntBuffer *bufs,
+#if CONFIG_VP9_HIGHBITDEPTH
+                                     EncRefCntBuffer *ubufs, int new_idx,
+                                     int bit_depth) {
+#else
+                                     EncRefCntBuffer *ubufs, int new_idx) {
+#endif
+  int new_uidx = get_free_upsampled_ref_buf(ubufs);
+
+  if (new_uidx == INVALID_IDX) {
+    return INVALID_IDX;
+  } else {
+    const YV12_BUFFER_CONFIG *const ref = &bufs[new_idx].buf;
+    YV12_BUFFER_CONFIG *upsampled_ref = &ubufs[new_uidx].buf;
+
+    // Currently, only Y plane is up-sampled, U, V are not used.
+#if CONFIG_VP9_HIGHBITDEPTH
+    scale_and_extend_frame(ref, upsampled_ref, 1, bit_depth);
+#else
+    scale_and_extend_frame(ref, upsampled_ref, 1);
+#endif
+    return new_uidx;
+  }
+}
+#endif
+
 void vp10_update_reference_frames(VP10_COMP *cpi) {
   VP10_COMMON * const cm = &cpi->common;
   BufferPool *const pool = cm->buffer_pool;
@@ -2466,6 +2541,17 @@
   int ref_frame;
 #endif  // CONFIG_EXT_REFS
 
+#if CONFIG_AFFINE_MOTION
+  // Always up-sample the current encoded frame.
+#if CONFIG_VP9_HIGHBITDEPTH
+  int new_uidx = upsample_ref_frame(pool->frame_bufs, cpi->upsampled_ref_bufs,
+                                    cm->new_fb_idx, (int)cm->bit_depth);
+#else
+  int new_uidx = upsample_ref_frame(pool->frame_bufs, cpi->upsampled_ref_bufs,
+                                    cm->new_fb_idx);
+#endif
+#endif
+
   // At this point the new frame has been encoded.
   // If any buffer copy / swapping is signaled it should be done here.
   if (cm->frame_type == KEY_FRAME) {
@@ -2473,6 +2559,13 @@
                &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
     ref_cnt_fb(pool->frame_bufs,
                &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
+
+#if CONFIG_AFFINE_MOTION
+    uref_cnt_fb(cpi->upsampled_ref_bufs,
+                &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
+    uref_cnt_fb(cpi->upsampled_ref_bufs,
+                &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
+#endif
   } else if (vp10_preserve_existing_gf(cpi)) {
     // We have decided to preserve the previously existing golden frame as our
     // new ARF frame. However, in the short term in function
@@ -2486,7 +2579,10 @@
 
     ref_cnt_fb(pool->frame_bufs,
                &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
-
+#if CONFIG_AFFINE_MOTION
+    uref_cnt_fb(cpi->upsampled_ref_bufs,
+                &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
+#endif
     tmp = cpi->alt_fb_idx;
     cpi->alt_fb_idx = cpi->gld_fb_idx;
     cpi->gld_fb_idx = tmp;
@@ -2500,6 +2596,10 @@
 
       ref_cnt_fb(pool->frame_bufs,
                  &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
+#if CONFIG_AFFINE_MOTION
+      uref_cnt_fb(cpi->upsampled_ref_bufs,
+                  &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
+#endif
       memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
              cpi->interp_filter_selected[0],
              sizeof(cpi->interp_filter_selected[0]));
@@ -2508,6 +2608,10 @@
     if (cpi->refresh_golden_frame) {
       ref_cnt_fb(pool->frame_bufs,
                  &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
+#if CONFIG_AFFINE_MOTION
+      uref_cnt_fb(cpi->upsampled_ref_bufs,
+                  &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
+#endif
       if (!cpi->rc.is_src_frame_alt_ref)
         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
                cpi->interp_filter_selected[0],
@@ -2542,6 +2646,10 @@
   if (cpi->refresh_last_frame) {
     ref_cnt_fb(pool->frame_bufs,
                &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
+#if CONFIG_AFFINE_MOTION
+    uref_cnt_fb(cpi->upsampled_ref_bufs,
+                &cpi->upsampled_ref_idx[cpi->lst_fb_idx], new_uidx);
+#endif
     if (!cpi->rc.is_src_frame_alt_ref) {
       memcpy(cpi->interp_filter_selected[LAST_FRAME],
              cpi->interp_filter_selected[0],
@@ -2675,7 +2783,8 @@
                                        cm->byte_alignment, NULL, NULL, NULL))
             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                                "Failed to allocate frame buffer");
-          scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth);
+          scale_and_extend_frame(ref, &new_fb_ptr->buf, MAX_MB_PLANE,
+                                 (int)cm->bit_depth);
           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
           alloc_frame_mvs(cm, new_fb);
         }
@@ -2700,11 +2809,39 @@
                                        cm->byte_alignment, NULL, NULL, NULL))
             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                                "Failed to allocate frame buffer");
-          scale_and_extend_frame(ref, &new_fb_ptr->buf);
+          scale_and_extend_frame(ref, &new_fb_ptr->buf, MAX_MB_PLANE);
           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
           alloc_frame_mvs(cm, new_fb);
         }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_AFFINE_MOTION
+        {
+          const int map_idx = get_ref_frame_map_idx(cpi, ref_frame);
+          EncRefCntBuffer *ubuf =
+              &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[map_idx]];
+
+          if (vpx_realloc_frame_buffer(&ubuf->buf,
+                                       (cm->width << 3), (cm->height << 3),
+                                       cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                                       cm->use_highbitdepth,
+#endif
+                                       (VP9_ENC_BORDER_IN_PIXELS << 3),
+                                       cm->byte_alignment,
+                                       NULL, NULL, NULL))
+            vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                               "Failed to allocate up-sampled frame buffer");
+#if CONFIG_VP9_HIGHBITDEPTH
+          scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, MAX_MB_PLANE,
+                                 (int)cm->bit_depth);
+#else
+          scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, MAX_MB_PLANE);
+#endif
+          cpi->scaled_ref_idx[ref_frame - LAST_FRAME] = new_fb;
+          alloc_frame_mvs(cm, new_fb);
+        }
+#endif
       } else {
         const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
         RefCntBuffer *const buf = &pool->frame_bufs[buf_idx];
@@ -3522,10 +3659,10 @@
       cpi->refresh_alt_ref_frame)
     return mask;
   for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
-    for (ifilter = EIGHTTAP; ifilter < SWITCHABLE_FILTERS; ++ifilter)
+    for (ifilter = EIGHTTAP_REGULAR; ifilter < SWITCHABLE_FILTERS; ++ifilter)
       ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
 
-  for (ifilter = EIGHTTAP; ifilter < SWITCHABLE_FILTERS; ++ifilter) {
+  for (ifilter = EIGHTTAP_REGULAR; ifilter < SWITCHABLE_FILTERS; ++ifilter) {
     if ((ref_total[LAST_FRAME] &&
         cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) &&
 #if CONFIG_EXT_REFS
@@ -3784,6 +3921,17 @@
   }
 }
 
+#if CONFIG_AFFINE_MOTION
+static INLINE void init_upsampled_ref_frame_bufs(VP10_COMP *cpi) {
+  int i;
+
+  for (i = 0; i < MAX_REF_FRAMES; ++i) {
+    cpi->upsampled_ref_bufs[i].ref_count = 0;
+    cpi->upsampled_ref_idx[i] = INVALID_IDX;
+  }
+}
+#endif
+
 static void check_initial_width(VP10_COMP *cpi,
 #if CONFIG_VP9_HIGHBITDEPTH
                                 int use_highbitdepth,
@@ -3806,7 +3954,9 @@
     alloc_raw_frame_buffers(cpi);
     init_ref_frame_bufs(cm);
     alloc_util_frame_buffers(cpi);
-
+#if CONFIG_AFFINE_MOTION
+    init_upsampled_ref_frame_bufs(cpi);
+#endif
     init_motion_estimation(cpi);  // TODO(agrange) This can be removed.
 
     cpi->initial_width = cm->width;

diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index 292494c..efde0fc 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h

@@ -286,6 +286,13 @@
   double worst;
 } ImageStat;
 
+#if CONFIG_AFFINE_MOTION
+typedef struct {
+  int ref_count;
+  YV12_BUFFER_CONFIG buf;
+} EncRefCntBuffer;
+#endif
+
 typedef struct VP10_COMP {
   QUANTS quants;
   ThreadData td;
@@ -304,6 +311,12 @@
   YV12_BUFFER_CONFIG *unscaled_last_source;
   YV12_BUFFER_CONFIG scaled_last_source;
 
+#if CONFIG_AFFINE_MOTION
+  // Up-sampled reference buffers
+  EncRefCntBuffer upsampled_ref_bufs[MAX_REF_FRAMES];
+  int upsampled_ref_idx[MAX_REF_FRAMES];
+#endif
+
   TileDataEnc *tile_data;
   int allocated_tiles;  // Keep track of memory allocated for tiles.
 
@@ -688,6 +701,20 @@
 
 #define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
 
+#if CONFIG_AFFINE_MOTION
+// Update up-sampled reference frame index.
+static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx,
+                               int new_uidx) {
+  const int ref_index = *uidx;
+
+  if (ref_index >= 0 && ubufs[ref_index].ref_count > 0)
+    ubufs[ref_index].ref_count--;
+
+  *uidx = new_uidx;
+  ubufs[new_uidx].ref_count++;
+}
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif

diff --git a/vp10/encoder/mbgraph.c b/vp10/encoder/mbgraph.c
index 2d3a33e..1f467b8 100644
--- a/vp10/encoder/mbgraph.c
+++ b/vp10/encoder/mbgraph.c

@@ -64,7 +64,11 @@
         &v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
         cond_cost_list(cpi, cost_list),
         NULL, NULL,
+#if CONFIG_AFFINE_MOTION
+        &distortion, &sse, NULL, 0, 0, 0);
+#else
         &distortion, &sse, NULL, 0, 0);
+#endif
   }
 
 #if CONFIG_EXT_INTER

diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c
index 6e3b06a..8949f76 100644
--- a/vp10/encoder/mcomp.c
+++ b/vp10/encoder/mcomp.c

@@ -208,6 +208,32 @@
     v = INT_MAX;                                                       \
   }
 
+#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
+
+#if CONFIG_AFFINE_MOTION
+static INLINE const uint8_t *upre(const uint8_t *buf, int stride,
+                                  int r, int c) {
+  return &buf[(r) * stride + (c)];
+}
+
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER1(v, r, c) \
+  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {              \
+    thismse = upsampled_pref_error(xd, vfp, z, src_stride,             \
+                                   upre(y, y_stride, r, c), y_stride,  \
+                                   second_pred, w, h, &sse);           \
+    if ((v = MVC(r, c) + thismse) < besterr) {                         \
+      besterr = v;                                                     \
+      br = r;                                                          \
+      bc = c;                                                          \
+      *distortion = thismse;                                           \
+      *sse1 = sse;                                                     \
+    }                                                                  \
+  } else {                                                             \
+    v = INT_MAX;                                                       \
+  }
+#endif
+
 #define FIRST_LEVEL_CHECKS                              \
   {                                                     \
     unsigned int left, right, up, down, diag;           \
@@ -276,7 +302,7 @@
 // TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
 // SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
 // later in the same way.
-#define SECOND_LEVEL_CHECKS_BEST                        \
+#define SECOND_LEVEL_CHECKS_BEST(k)                     \
   {                                                     \
     unsigned int second;                                \
     int br0 = br;                                       \
@@ -287,10 +313,10 @@
     } else if (tr != br && tc == bc) {                  \
       kr = br - tr;                                     \
     }                                                   \
-    CHECK_BETTER(second, br0 + kr, bc0);                \
-    CHECK_BETTER(second, br0, bc0 + kc);                \
+    CHECK_BETTER##k(second, br0 + kr, bc0);             \
+    CHECK_BETTER##k(second, br0, bc0 + kc);             \
     if (br0 != br || bc0 != bc) {                       \
-      CHECK_BETTER(second, br0 + kr, bc0 + kc);         \
+      CHECK_BETTER##k(second, br0 + kr, bc0 + kc);      \
     }                                                   \
   }
 
@@ -412,7 +438,11 @@
     int *distortion,
     unsigned int *sse1,
     const uint8_t *second_pred,
+#if CONFIG_AFFINE_MOTION
+    int w, int h, int use_upsampled_ref) {
+#else
     int w, int h) {
+#endif
   SETUP_SUBPEL_SEARCH;
   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
                                z, src_stride, y, y_stride, second_pred,
@@ -425,6 +455,9 @@
   (void) allow_hp;
   (void) forced_stop;
   (void) hstep;
+#if CONFIG_AFFINE_MOTION
+  (void) use_upsampled_ref;
+#endif
 
   if (cost_list &&
       cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
@@ -491,8 +524,17 @@
                                              int *distortion,
                                              unsigned int *sse1,
                                              const uint8_t *second_pred,
+#if CONFIG_AFFINE_MOTION
+                                             int w, int h,
+                                             int use_upsampled_ref) {
+#else
                                              int w, int h) {
+#endif
   SETUP_SUBPEL_SEARCH;
+#if CONFIG_AFFINE_MOTION
+  (void) use_upsampled_ref;
+#endif
+
   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
                                z, src_stride, y, y_stride, second_pred,
                                w, h, offset, mvjcost, mvcost,
@@ -565,8 +607,16 @@
                                         int *distortion,
                                         unsigned int *sse1,
                                         const uint8_t *second_pred,
+#if CONFIG_AFFINE_MOTION
+                                        int w, int h, int use_upsampled_ref) {
+#else
                                         int w, int h) {
+#endif
   SETUP_SUBPEL_SEARCH;
+#if CONFIG_AFFINE_MOTION
+  (void) use_upsampled_ref;
+#endif
+
   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
                                z, src_stride, y, y_stride, second_pred,
                                w, h, offset, mvjcost, mvcost,
@@ -655,6 +705,101 @@
     {0, -1}, {0, 1}, {-1, 0}, {1, 0}
 };
 
+
+#if CONFIG_AFFINE_MOTION
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_comp_avg_upsampled_pred(uint16_t *comp_pred,
+                                           const uint8_t *pred8,
+                                           int width, int height,
+                                           const uint8_t *ref8,
+                                           int ref_stride) {
+  int i, j;
+  int stride = ref_stride << 3;
+
+  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  for (i = 0; i < height; ++i) {
+    for (j = 0; j < width; ++j) {
+      const int tmp = pred[j] + ref[(j << 3)];
+      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
+    }
+    comp_pred += width;
+    pred += width;
+    ref += stride;
+  }
+}
+
+static void highbd_upsampled_pred(uint16_t *comp_pred,
+                                  int width, int height,
+                                  const uint8_t *ref8,
+                                  int ref_stride) {
+  int i, j;
+  int stride = ref_stride << 3;
+
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  for (i = 0; i < height; ++i) {
+    for (j = 0; j < width; ++j) {
+      comp_pred[j] = ref[(j << 3)];
+    }
+    comp_pred += width;
+    ref += stride;
+  }
+}
+#endif
+
+static int upsampled_pref_error(const MACROBLOCKD *xd,
+                                const vp9_variance_fn_ptr_t *vfp,
+                                const uint8_t *const src, const int src_stride,
+                                const uint8_t *const y, int y_stride,
+                                const uint8_t *second_pred,
+                                int w, int h, unsigned int *sse) {
+  unsigned int besterr;
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]);
+    if (second_pred != NULL)
+      highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y,
+                                     y_stride);
+    else
+      highbd_upsampled_pred(pred16, w, h, y, y_stride);
+
+    besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride,
+                      sse);
+  } else {
+    DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
+#else
+    DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
+    (void) xd;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    if (second_pred != NULL)
+      vpx_comp_avg_upsampled_pred(pred, second_pred, w, h, y,
+                                  y_stride);
+    else
+      vpx_upsampled_pred(pred, w, h, y, y_stride);
+
+    besterr = vfp->vf(pred, w, src, src_stride, sse);
+#if CONFIG_VP9_HIGHBITDEPTH
+  }
+#endif
+return besterr;
+}
+
+static unsigned int upsampled_setup_center_error(
+    const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
+    int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
+    const uint8_t *const src, const int src_stride,
+    const uint8_t *const y, int y_stride, const uint8_t *second_pred,
+    int w, int h, int offset, int *mvjcost, int *mvcost[2],
+    unsigned int *sse1, int *distortion) {
+  unsigned int besterr = upsampled_pref_error(xd, vfp, src, src_stride,
+                                              y + offset, y_stride, second_pred,
+                                              w, h, sse1);
+  *distortion = besterr;
+  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+  return besterr;
+}
+#endif
+
 int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
                                  MV *bestmv, const MV *ref_mv,
                                  int allow_hp,
@@ -667,14 +812,18 @@
                                  int *distortion,
                                  unsigned int *sse1,
                                  const uint8_t *second_pred,
+#if CONFIG_AFFINE_MOTION
+                                 int w, int h, int use_upsampled_ref) {
+#else
                                  int w, int h) {
+#endif
   const uint8_t *const z = x->plane[0].src.buf;
   const uint8_t *const src_address = z;
   const int src_stride = x->plane[0].src.stride;
   const MACROBLOCKD *xd = &x->e_mbd;
   unsigned int besterr = INT_MAX;
   unsigned int sse;
-  int thismse;
+  unsigned int thismse;
   const int y_stride = xd->plane[0].pre[0].stride;
   const int offset = bestmv->row * y_stride + bestmv->col;
   const uint8_t *const y = xd->plane[0].pre[0].buf;
@@ -703,10 +852,19 @@
   bestmv->row *= 8;
   bestmv->col *= 8;
 
-  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
-                               z, src_stride, y, y_stride, second_pred,
-                               w, h, offset, mvjcost, mvcost,
-                               sse1, distortion);
+#if CONFIG_AFFINE_MOTION
+  // use_upsampled_ref can be 0 or 1
+  if (use_upsampled_ref)
+    besterr = upsampled_setup_center_error(xd, bestmv, ref_mv, error_per_bit,
+                                           vfp, z, src_stride, y, y_stride,
+                                           second_pred, w, h, (offset << 3),
+                                           mvjcost, mvcost, sse1, distortion);
+  else
+#endif
+    besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
+                                 z, src_stride, y, y_stride, second_pred,
+                                 w, h, offset, mvjcost, mvcost,
+                                 sse1, distortion);
 
   (void) cost_list;  // to silence compiler warning
 
@@ -716,16 +874,29 @@
       tr = br + search_step[idx].row;
       tc = bc + search_step[idx].col;
       if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
-        const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
-        MV this_mv;
-        this_mv.row = tr;
-        this_mv.col = tc;
-        if (second_pred == NULL)
-          thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
-                             src_address, src_stride, &sse);
-        else
-          thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
-                              src_address, src_stride, &sse, second_pred);
+        MV this_mv = {tr, tc};
+
+#if CONFIG_AFFINE_MOTION
+        if (use_upsampled_ref) {
+          const uint8_t *const pre_address = y + tr * y_stride + tc;
+
+          thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
+                                         pre_address, y_stride, second_pred,
+                                         w, h, &sse);
+        } else {
+#endif
+          const uint8_t *const pre_address = y + (tr >> 3) * y_stride +
+              (tc >> 3);
+          if (second_pred == NULL)
+            thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
+                               src_address, src_stride, &sse);
+          else
+            thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+                                src_address, src_stride, &sse, second_pred);
+#if CONFIG_AFFINE_MOTION
+        }
+#endif
+
         cost_array[idx] = thismse +
             mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
 
@@ -747,14 +918,29 @@
     tc = bc + kc;
     tr = br + kr;
     if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
-      const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
       MV this_mv = {tr, tc};
-      if (second_pred == NULL)
-        thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
-                           src_address, src_stride, &sse);
-      else
-        thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
-                            src_address, src_stride, &sse, second_pred);
+
+#if CONFIG_AFFINE_MOTION
+      if (use_upsampled_ref) {
+        const uint8_t *const pre_address = y + tr * y_stride + tc;
+
+        thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
+                                       pre_address, y_stride, second_pred,
+                                       w, h, &sse);
+      } else {
+#endif
+        const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
+
+        if (second_pred == NULL)
+          thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
+                             src_address, src_stride, &sse);
+        else
+          thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+                              src_address, src_stride, &sse, second_pred);
+#if CONFIG_AFFINE_MOTION
+      }
+#endif
+
       cost_array[4] = thismse +
           mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
 
@@ -776,8 +962,17 @@
       bc = tc;
     }
 
-    if (iters_per_step > 1 && best_idx != -1)
-      SECOND_LEVEL_CHECKS_BEST;
+    if (iters_per_step > 1 && best_idx != -1) {
+#if CONFIG_AFFINE_MOTION
+      if (use_upsampled_ref) {
+        SECOND_LEVEL_CHECKS_BEST(1);
+      } else {
+#endif
+        SECOND_LEVEL_CHECKS_BEST(0);
+#if CONFIG_AFFINE_MOTION
+      }
+#endif
+    }
 
     tr = br;
     tc = bc;

diff --git a/vp10/encoder/mcomp.h b/vp10/encoder/mcomp.h
index 9d1ab2a..3063b99 100644
--- a/vp10/encoder/mcomp.h
+++ b/vp10/encoder/mcomp.h

@@ -116,7 +116,11 @@
     int *mvjcost, int *mvcost[2],
     int *distortion, unsigned int *sse1,
     const uint8_t *second_pred,
+#if CONFIG_AFFINE_MOTION
+    int w, int h, int use_upsampled_ref);
+#else
     int w, int h);
+#endif
 
 extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree;
 extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned;

diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index ff07489..96edc0f 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c

@@ -2404,7 +2404,7 @@
   MACROBLOCKD *xd = &x->e_mbd;
   const struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  unsigned int tmp;
+  int64_t tmp;
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
@@ -2472,7 +2472,7 @@
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
     tmp = ROUNDZ_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-  *bsse += (int64_t)tmp * 16;
+  *bsse += tmp * 16;
 
   if (p->eobs[block] > 0) {
     const int lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
@@ -2544,12 +2544,12 @@
         }
       }
     } else {
-      cpi->fn_ptr[txm_bsize].vf(src, src_stride,
-                                rec_buffer, 32, &tmp);
+      uint32_t this_dist;
+      cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &this_dist);
+      tmp = this_dist;
     }
   }
-  *dist += (int64_t)tmp * 16;
-
+  *dist += tmp * 16;
   *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size,
                        scan_order->scan, scan_order->neighbors, 0);
   *skip &= (p->eobs[block] == 0);
@@ -3929,7 +3929,8 @@
                                 int_mv* ref_mv_sub8x8[2],
 #endif
                                 int_mv single_newmv[MAX_REF_FRAMES],
-                                int *rate_mv) {
+                                int *rate_mv,
+                                const int block) {
   const VP10_COMMON *const cm = &cpi->common;
   const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
   const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
@@ -4076,6 +4077,40 @@
     if (bestsme < INT_MAX) {
       int dis; /* TODO: use dis in distortion calculation later. */
       unsigned int sse;
+#if CONFIG_AFFINE_MOTION
+      // Use up-sampled reference frames.
+      struct macroblockd_plane *const pd = &xd->plane[0];
+      struct buf_2d backup_pred = pd->pre[0];
+      const YV12_BUFFER_CONFIG *upsampled_ref =
+          get_upsampled_ref(cpi, refs[id]);
+
+      // Set pred for Y plane
+      setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer,
+                       upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
+                       NULL, pd->subsampling_x, pd->subsampling_y);
+
+      // If bsize < BLOCK_8X8, adjust pred pointer for this block
+      if (bsize < BLOCK_8X8)
+        pd->pre[0].buf =
+            &pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, block,
+            pd->pre[0].stride)) << 3];
+
+      bestsme = cpi->find_fractional_mv_step(
+          x, &tmp_mv,
+          &ref_mv[id].as_mv,
+          cpi->common.allow_high_precision_mv,
+          x->errorperbit,
+          &cpi->fn_ptr[bsize],
+          0, cpi->sf.mv.subpel_iters_per_step,
+          NULL,
+          x->nmvjointcost, x->mvcost,
+          &dis, &sse, second_pred,
+          pw, ph, 1);
+
+      // Restore the reference frames.
+      pd->pre[0] = backup_pred;
+#else
+      (void) block;
       bestsme = cpi->find_fractional_mv_step(
           x, &tmp_mv,
           &ref_mv[id].as_mv,
@@ -4087,6 +4122,7 @@
           x->nmvjointcost, x->mvcost,
           &dis, &sse, second_pred,
           pw, ph);
+#endif
     }
 
     // Restore the pointer to the first (possibly scaled) prediction buffer.
@@ -4367,6 +4403,43 @@
 
           if (bestsme < INT_MAX) {
             int distortion;
+#if CONFIG_AFFINE_MOTION
+            const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
+            const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
+            // Use up-sampled reference frames.
+            struct macroblockd_plane *const pd = &xd->plane[0];
+            struct buf_2d backup_pred = pd->pre[0];
+            const YV12_BUFFER_CONFIG *upsampled_ref =
+                get_upsampled_ref(cpi, mbmi->ref_frame[0]);
+
+            // Set pred for Y plane
+            setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer,
+                             upsampled_ref->y_stride,
+                             (mi_row << 3), (mi_col << 3),
+                             NULL, pd->subsampling_x, pd->subsampling_y);
+
+            // adjust pred pointer for this block
+            pd->pre[0].buf =
+                &pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, i,
+                pd->pre[0].stride)) << 3];
+
+            cpi->find_fractional_mv_step(
+                x,
+                new_mv,
+                &bsi->ref_mv[0]->as_mv,
+                cm->allow_high_precision_mv,
+                x->errorperbit, &cpi->fn_ptr[bsize],
+                cpi->sf.mv.subpel_force_stop,
+                cpi->sf.mv.subpel_iters_per_step,
+                cond_cost_list(cpi, cost_list),
+                x->nmvjointcost, x->mvcost,
+                &distortion,
+                &x->pred_sse[mbmi->ref_frame[0]],
+                NULL, pw, ph, 1);
+
+            // Restore the reference frames.
+            pd->pre[0] = backup_pred;
+#else
             cpi->find_fractional_mv_step(
                 x,
                 new_mv,
@@ -4380,6 +4453,7 @@
                 &distortion,
                 &x->pred_sse[mbmi->ref_frame[0]],
                 NULL, 0, 0);
+#endif
 
             // save motion search result for use in compound prediction
 #if CONFIG_EXT_INTER
@@ -4413,7 +4487,7 @@
 #else
             this_mode == NEWMV &&
 #endif  // CONFIG_EXT_INTER
-            mbmi->interp_filter == EIGHTTAP) {
+            mbmi->interp_filter == EIGHTTAP_REGULAR) {
           // adjust src pointers
           mi_buf_shift(x, i);
           if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
@@ -4426,7 +4500,7 @@
 #else
                                 seg_mvs[i],
 #endif  // CONFIG_EXT_INTER
-                                &rate_mv);
+                                &rate_mv, i);
 #if CONFIG_EXT_INTER
             compound_seg_newmvs[i][0].as_int =
                 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
@@ -4975,6 +5049,33 @@
 
   if (bestsme < INT_MAX) {
     int dis;  /* TODO: use dis in distortion calculation later. */
+#if CONFIG_AFFINE_MOTION
+    const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
+    const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
+    // Use up-sampled reference frames.
+    struct macroblockd_plane *const pd = &xd->plane[0];
+    struct buf_2d backup_pred = pd->pre[0];
+    const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
+
+    // Set pred for Y plane
+    setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer,
+                     upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
+                     NULL, pd->subsampling_x, pd->subsampling_y);
+
+    bestsme = cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
+                                           cm->allow_high_precision_mv,
+                                           x->errorperbit,
+                                           &cpi->fn_ptr[bsize],
+                                           cpi->sf.mv.subpel_force_stop,
+                                           cpi->sf.mv.subpel_iters_per_step,
+                                           cond_cost_list(cpi, cost_list),
+                                           x->nmvjointcost, x->mvcost,
+                                           &dis, &x->pred_sse[ref], NULL,
+                                           pw, ph, 1);
+
+    // Restore the reference frames.
+    pd->pre[0] = backup_pred;
+#else
     cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
                                  cm->allow_high_precision_mv,
                                  x->errorperbit,
@@ -4984,6 +5085,7 @@
                                  cond_cost_list(cpi, cost_list),
                                  x->nmvjointcost, x->mvcost,
                                  &dis, &x->pred_sse[ref], NULL, 0, 0);
+#endif
   }
   *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
                              x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
@@ -5139,11 +5241,11 @@
   }
   if (cm->interp_filter != BILINEAR) {
     if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
-      best_filter = EIGHTTAP;
+      best_filter = EIGHTTAP_REGULAR;
     }
 #if CONFIG_EXT_INTERP
     else if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE) {
-      best_filter = EIGHTTAP;
+      best_filter = EIGHTTAP_REGULAR;
     }
 #endif
   }
@@ -5328,7 +5430,7 @@
 
         if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
           joint_motion_search(cpi, x, bsize, frame_mv,
-                              mi_row, mi_col, NULL, single_newmv, &rate_mv);
+                              mi_row, mi_col, NULL, single_newmv, &rate_mv, 0);
         } else {
           rate_mv  = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
                                       &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
@@ -5358,7 +5460,7 @@
       if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
         joint_motion_search(cpi, x, bsize, frame_mv,
                             mi_row, mi_col,
-                            single_newmv, &rate_mv);
+                            single_newmv, &rate_mv, 0);
       } else {
         rate_mv  = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
                                    &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
@@ -6871,7 +6973,7 @@
 #endif  // CONFIG_EXT_INTRA
     // Evaluate all sub-pel filters irrespective of whether we can use
     // them for this frame.
-    mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
+    mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR
                                                           : cm->interp_filter;
     mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
 #if CONFIG_OBMC
@@ -7696,7 +7798,7 @@
   x->skip = 1;
 
   if (cm->interp_filter != BILINEAR) {
-    best_filter = EIGHTTAP;
+    best_filter = EIGHTTAP_REGULAR;
     if (cm->interp_filter == SWITCHABLE &&
 #if CONFIG_EXT_INTERP
         vp10_is_interp_needed(xd) &&
@@ -8039,7 +8141,7 @@
     mbmi->ref_frame[1] = second_ref_frame;
     // Evaluate all sub-pel filters irrespective of whether we can use
     // them for this frame.
-    mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
+    mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR
                                                           : cm->interp_filter;
     x->skip = 0;
     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
@@ -8123,9 +8225,9 @@
       mbmi->tx_type = DCT_DCT;
 
       if (cm->interp_filter != BILINEAR) {
-        tmp_best_filter = EIGHTTAP;
+        tmp_best_filter = EIGHTTAP_REGULAR;
         if (x->source_variance < sf->disable_filter_search_var_thresh) {
-          tmp_best_filter = EIGHTTAP;
+          tmp_best_filter = EIGHTTAP_REGULAR;
         } else if (sf->adaptive_pred_interp_filter == 1 &&
                    ctx->pred_interp_filter < SWITCHABLE) {
           tmp_best_filter = ctx->pred_interp_filter;
@@ -8153,7 +8255,7 @@
                                               mi_row, mi_col);
 #if CONFIG_EXT_INTERP
             if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
-                mbmi->interp_filter != EIGHTTAP)  // invalid configuration
+                mbmi->interp_filter != EIGHTTAP_REGULAR)  // invalid config
               continue;
 #endif  // CONFIG_EXT_INTERP
             if (tmp_rd == INT64_MAX)
@@ -8225,8 +8327,8 @@
                                           mi_row, mi_col);
 #if CONFIG_EXT_INTERP
         if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
-            mbmi->interp_filter != EIGHTTAP) {
-          mbmi->interp_filter = EIGHTTAP;
+            mbmi->interp_filter != EIGHTTAP_REGULAR) {
+          mbmi->interp_filter = EIGHTTAP_REGULAR;
           tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
                    &x->mbmi_ext->ref_mvs[ref_frame][0],
                    second_ref, best_yrd, &rate, &rate_y,

diff --git a/vp10/encoder/rdopt.h b/vp10/encoder/rdopt.h
index 066bf69..f4d9b95 100644
--- a/vp10/encoder/rdopt.h
+++ b/vp10/encoder/rdopt.h

@@ -102,6 +102,23 @@
                                          uint8_t *tmp_buf[MAX_MB_PLANE],
                                          int tmp_stride[MAX_MB_PLANE]);
 #endif  // CONFIG_OBMC
+
+#if CONFIG_AFFINE_MOTION
+static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(VP10_COMP *cpi,
+                                                          const int ref) {
+  // Use up-sampled reference frames.
+  int ref_idx = 0;
+  if (ref == LAST_FRAME)
+    ref_idx = cpi->lst_fb_idx;
+  else if (ref == GOLDEN_FRAME)
+    ref_idx = cpi->gld_fb_idx;
+  else if (ref == ALTREF_FRAME)
+    ref_idx = cpi->alt_fb_idx;
+
+  return &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[ref_idx]].buf;
+}
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif

diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h
index a696e83..4f931d8 100644
--- a/vp10/encoder/speed_features.h
+++ b/vp10/encoder/speed_features.h

@@ -176,9 +176,9 @@
 } MODE_SEARCH_SKIP_LOGIC;
 
 typedef enum {
-  FLAG_SKIP_EIGHTTAP = 1 << EIGHTTAP,
+  FLAG_SKIP_EIGHTTAP_REGULAR = 1 << EIGHTTAP_REGULAR,
   FLAG_SKIP_EIGHTTAP_SMOOTH = 1 << EIGHTTAP_SMOOTH,
-  FLAG_SKIP_EIGHTTAP_SHARP = 1 << EIGHTTAP_SHARP,
+  FLAG_SKIP_MULTITAP_SHARP = 1 << MULTITAP_SHARP,
 } INTERP_FILTER_MASK;
 
 typedef enum {

diff --git a/vp10/encoder/temporal_filter.c b/vp10/encoder/temporal_filter.c
index b4db251..3e1246a 100644
--- a/vp10/encoder/temporal_filter.c
+++ b/vp10/encoder/temporal_filter.c

@@ -48,15 +48,12 @@
   enum mv_precision mv_precision_uv;
   int uv_stride;
 
-#if FILTER_12TAP
-  const INTERP_FILTER interp_filter = SHARP_FILTER_12TAP;
-  (void)xd;
-#elif SWITCHABLE_FILTERS == 5
-  const INTERP_FILTER interp_filter = EIGHTTAP_SHARP2;
+#if USE_TEMPORALFILTER_12TAP
+  const INTERP_FILTER interp_filter = TEMPORALFILTER_12TAP;
   (void)xd;
 #else
   const INTERP_FILTER interp_filter = xd->mi[0]->mbmi.interp_filter;
-#endif
+#endif  // USE_TEMPORALFILTER_12TAP
 
   if (uv_block_width == 8) {
     uv_stride = (stride + 1) >> 1;
@@ -98,28 +95,28 @@
   }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
   vp10_build_inter_predictor(y_mb_ptr, stride,
-                            &pred[0], 16,
-                            &mv,
-                            scale,
-                            16, 16,
-                            which_mv,
-                            interp_filter, MV_PRECISION_Q3, x, y);
+                             &pred[0], 16,
+                             &mv,
+                             scale,
+                             16, 16,
+                             which_mv,
+                             interp_filter, MV_PRECISION_Q3, x, y);
 
   vp10_build_inter_predictor(u_mb_ptr, uv_stride,
-                            &pred[256], uv_block_width,
-                            &mv,
-                            scale,
-                            uv_block_width, uv_block_height,
-                            which_mv,
-                            interp_filter, mv_precision_uv, x, y);
+                             &pred[256], uv_block_width,
+                             &mv,
+                             scale,
+                             uv_block_width, uv_block_height,
+                             which_mv,
+                             interp_filter, mv_precision_uv, x, y);
 
   vp10_build_inter_predictor(v_mb_ptr, uv_stride,
-                            &pred[512], uv_block_width,
-                            &mv,
-                            scale,
-                            uv_block_width, uv_block_height,
-                            which_mv,
-                            interp_filter, mv_precision_uv, x, y);
+                             &pred[512], uv_block_width,
+                             &mv,
+                             scale,
+                             uv_block_width, uv_block_height,
+                             which_mv,
+                             interp_filter, mv_precision_uv, x, y);
 }
 
 void vp10_temporal_filter_init(void) {
@@ -131,14 +128,14 @@
 }
 
 void vp10_temporal_filter_apply_c(uint8_t *frame1,
-                                 unsigned int stride,
-                                 uint8_t *frame2,
-                                 unsigned int block_width,
-                                 unsigned int block_height,
-                                 int strength,
-                                 int filter_weight,
-                                 unsigned int *accumulator,
-                                 uint16_t *count) {
+                                  unsigned int stride,
+                                  uint8_t *frame2,
+                                  unsigned int block_width,
+                                  unsigned int block_height,
+                                  int strength,
+                                  int filter_weight,
+                                  unsigned int *accumulator,
+                                  uint16_t *count) {
   unsigned int i, j, k;
   int modifier;
   int byte = 0;
@@ -199,14 +196,14 @@
 
 #if CONFIG_VP9_HIGHBITDEPTH
 void vp10_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
-                                        unsigned int stride,
-                                        uint8_t *frame2_8,
-                                        unsigned int block_width,
-                                        unsigned int block_height,
-                                        int strength,
-                                        int filter_weight,
-                                        unsigned int *accumulator,
-                                        uint16_t *count) {
+                                         unsigned int stride,
+                                         uint8_t *frame2_8,
+                                         unsigned int block_width,
+                                         unsigned int block_height,
+                                         int strength,
+                                         int filter_weight,
+                                         unsigned int *accumulator,
+                                         uint16_t *count) {
   uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
   uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
   unsigned int i, j, k;
@@ -323,7 +320,11 @@
                                          0, mv_sf->subpel_iters_per_step,
                                          cond_cost_list(cpi, cost_list),
                                          NULL, NULL,
+#if CONFIG_AFFINE_MOTION
+                                         &distortion, &sse, NULL, 0, 0, 0);
+#else
                                          &distortion, &sse, NULL, 0, 0);
+#endif
 
   // Restore input state
   x->plane[0].src = src;

diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c
index e8bddb0..3b6c419 100644
--- a/vpx_dsp/variance.c
+++ b/vpx_dsp/variance.c

@@ -272,6 +272,41 @@
   }
 }
 
+#if CONFIG_AFFINE_MOTION
+// Get pred block from up-sampled reference.
+void vpx_upsampled_pred_c(uint8_t *comp_pred,
+                          int width, int height,
+                          const uint8_t *ref,  int ref_stride) {
+    int i, j, k;
+    int stride = ref_stride << 3;
+
+    for (i = 0; i < height; i++) {
+      for (j = 0, k = 0; j < width; j++, k += 8) {
+        comp_pred[j] = ref[k];
+      }
+      comp_pred += width;
+      ref += stride;
+    }
+}
+
+void vpx_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
+                                   int width, int height,
+                                   const uint8_t *ref, int ref_stride) {
+    int i, j;
+    int stride = ref_stride << 3;
+
+    for (i = 0; i < height; i++) {
+      for (j = 0; j < width; j++) {
+        const int tmp = ref[(j << 3)] + pred[j];
+        comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
+      }
+      comp_pred += width;
+      pred += width;
+      ref += stride;
+    }
+}
+#endif
+
 #if CONFIG_VP9_HIGHBITDEPTH
 static void highbd_variance64(const uint8_t *a8, int  a_stride,
                               const uint8_t *b8, int  b_stride,

diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 5457d00..8d1afdf 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl

@@ -1464,6 +1464,13 @@
 
 add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
 
+if (vpx_config("CONFIG_AFFINE_MOTION") eq "yes") {
+  add_proto qw/void vpx_upsampled_pred/, "uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride";
+    specialize qw/vpx_upsampled_pred sse2/;
+  add_proto qw/void vpx_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
+    specialize qw/vpx_comp_avg_upsampled_pred sse2/;
+}
+
 #
 # Subpixel Variance
 #

diff --git a/vpx_dsp/x86/variance_sse2.c b/vpx_dsp/x86/variance_sse2.c
index e6c9365..7943c84 100644
--- a/vpx_dsp/x86/variance_sse2.c
+++ b/vpx_dsp/x86/variance_sse2.c

@@ -475,3 +475,232 @@
 #undef FNS
 #undef FN
 #endif  // CONFIG_USE_X86INC
+
+#if CONFIG_AFFINE_MOTION
+void vpx_upsampled_pred_sse2(uint8_t *comp_pred,
+                             int width, int height,
+                             const uint8_t *ref,  int ref_stride) {
+    int i, j;
+    int stride = ref_stride << 3;
+
+    if (width >= 16) {
+      // read 16 points at one time
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j+= 16) {
+          __m128i s0 = _mm_loadu_si128((const __m128i *)ref);
+          __m128i s1 = _mm_loadu_si128((const __m128i *)(ref + 16));
+          __m128i s2 = _mm_loadu_si128((const __m128i *)(ref + 32));
+          __m128i s3 = _mm_loadu_si128((const __m128i *)(ref + 48));
+          __m128i s4 = _mm_loadu_si128((const __m128i *)(ref + 64));
+          __m128i s5 = _mm_loadu_si128((const __m128i *)(ref + 80));
+          __m128i s6 = _mm_loadu_si128((const __m128i *)(ref + 96));
+          __m128i s7 = _mm_loadu_si128((const __m128i *)(ref + 112));
+          __m128i t0, t1, t2, t3;
+
+          t0 = _mm_unpacklo_epi8(s0, s1);
+          s1 = _mm_unpackhi_epi8(s0, s1);
+          t1 = _mm_unpacklo_epi8(s2, s3);
+          s3 = _mm_unpackhi_epi8(s2, s3);
+          t2 = _mm_unpacklo_epi8(s4, s5);
+          s5 = _mm_unpackhi_epi8(s4, s5);
+          t3 = _mm_unpacklo_epi8(s6, s7);
+          s7 = _mm_unpackhi_epi8(s6, s7);
+
+          s0 = _mm_unpacklo_epi8(t0, s1);
+          s2 = _mm_unpacklo_epi8(t1, s3);
+          s4 = _mm_unpacklo_epi8(t2, s5);
+          s6 = _mm_unpacklo_epi8(t3, s7);
+
+          *(int *)comp_pred = _mm_cvtsi128_si32(s0);
+          *(int *)(comp_pred + 4) = _mm_cvtsi128_si32(s2);
+          *(int *)(comp_pred + 8) = _mm_cvtsi128_si32(s4);
+          *(int *)(comp_pred + 12) = _mm_cvtsi128_si32(s6);
+
+          comp_pred += 16;
+          ref += 16 * 8;
+        }
+        ref += stride - (width << 3);
+      }
+    } else if (width >= 8) {
+      // read 8 points at one time
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j+= 8) {
+          __m128i s0 = _mm_loadu_si128((const __m128i *)ref);
+          __m128i s1 = _mm_loadu_si128((const __m128i *)(ref + 16));
+          __m128i s2 = _mm_loadu_si128((const __m128i *)(ref + 32));
+          __m128i s3 = _mm_loadu_si128((const __m128i *)(ref + 48));
+          __m128i t0, t1;
+
+          t0 = _mm_unpacklo_epi8(s0, s1);
+          s1 = _mm_unpackhi_epi8(s0, s1);
+          t1 = _mm_unpacklo_epi8(s2, s3);
+          s3 = _mm_unpackhi_epi8(s2, s3);
+
+          s0 = _mm_unpacklo_epi8(t0, s1);
+          s2 = _mm_unpacklo_epi8(t1, s3);
+
+          *(int *)comp_pred = _mm_cvtsi128_si32(s0);
+          *(int *)(comp_pred + 4) = _mm_cvtsi128_si32(s2);
+          comp_pred += 8;
+          ref += 8 * 8;
+        }
+        ref += stride - (width << 3);
+      }
+    } else {
+      // read 4 points at one time
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j+= 4) {
+          __m128i s0 = _mm_loadu_si128((const __m128i *)ref);
+          __m128i s1 = _mm_loadu_si128((const __m128i *)(ref + 16));
+          __m128i t0;
+
+          t0 = _mm_unpacklo_epi8(s0, s1);
+          s1 = _mm_unpackhi_epi8(s0, s1);
+          s0 = _mm_unpacklo_epi8(t0, s1);
+
+          *(int *)comp_pred = _mm_cvtsi128_si32(s0);
+
+          comp_pred += 4;
+          ref += 4 * 8;
+        }
+        ref += stride - (width << 3);
+      }
+    }
+}
+
+void vpx_comp_avg_upsampled_pred_sse2(uint8_t *comp_pred, const uint8_t *pred,
+                                      int width, int height,
+                                      const uint8_t *ref,  int ref_stride) {
+    const __m128i zero = _mm_set1_epi16(0);
+    const __m128i one = _mm_set1_epi16(1);
+    int i, j;
+    int stride = ref_stride << 3;
+
+    if (width >= 16) {
+      // read 16 points at one time
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j+= 16) {
+          __m128i s0 = _mm_loadu_si128((const __m128i *)ref);
+          __m128i s1 = _mm_loadu_si128((const __m128i *)(ref + 16));
+          __m128i s2 = _mm_loadu_si128((const __m128i *)(ref + 32));
+          __m128i s3 = _mm_loadu_si128((const __m128i *)(ref + 48));
+          __m128i s4 = _mm_loadu_si128((const __m128i *)(ref + 64));
+          __m128i s5 = _mm_loadu_si128((const __m128i *)(ref + 80));
+          __m128i s6 = _mm_loadu_si128((const __m128i *)(ref + 96));
+          __m128i s7 = _mm_loadu_si128((const __m128i *)(ref + 112));
+          __m128i p0 = _mm_loadu_si128((const __m128i *)pred);
+          __m128i p1;
+          __m128i t0, t1, t2, t3;
+
+          t0 = _mm_unpacklo_epi8(s0, s1);
+          s1 = _mm_unpackhi_epi8(s0, s1);
+          t1 = _mm_unpacklo_epi8(s2, s3);
+          s3 = _mm_unpackhi_epi8(s2, s3);
+          t2 = _mm_unpacklo_epi8(s4, s5);
+          s5 = _mm_unpackhi_epi8(s4, s5);
+          t3 = _mm_unpacklo_epi8(s6, s7);
+          s7 = _mm_unpackhi_epi8(s6, s7);
+
+          s0 = _mm_unpacklo_epi8(t0, s1);
+          s2 = _mm_unpacklo_epi8(t1, s3);
+          s4 = _mm_unpacklo_epi8(t2, s5);
+          s6 = _mm_unpacklo_epi8(t3, s7);
+
+          s0 = _mm_unpacklo_epi32(s0, s2);
+          s4 = _mm_unpacklo_epi32(s4, s6);
+          s0 = _mm_unpacklo_epi8(s0, zero);
+          s4 = _mm_unpacklo_epi8(s4, zero);
+
+          p1 = _mm_unpackhi_epi8(p0, zero);
+          p0 = _mm_unpacklo_epi8(p0, zero);
+          p0 = _mm_adds_epu16(s0, p0);
+          p1 = _mm_adds_epu16(s4, p1);
+          p0 = _mm_adds_epu16(p0, one);
+          p1 = _mm_adds_epu16(p1, one);
+
+          p0 = _mm_srli_epi16(p0, 1);
+          p1 = _mm_srli_epi16(p1, 1);
+          p0 = _mm_packus_epi16(p0, p1);
+
+          *(int *)comp_pred = _mm_cvtsi128_si32(p0);
+          p0 = _mm_srli_si128(p0, 4);
+          *(int *)(comp_pred + 4) = _mm_cvtsi128_si32(p0);
+          p0 = _mm_srli_si128(p0, 4);
+          *(int *)(comp_pred + 8) = _mm_cvtsi128_si32(p0);
+          p0 = _mm_srli_si128(p0, 4);
+          *(int *)(comp_pred + 12) = _mm_cvtsi128_si32(p0);
+
+          comp_pred += 16;
+          pred += 16;
+          ref += 16 * 8;
+        }
+        ref += stride - (width << 3);
+      }
+    } else if (width >= 8) {
+      // read 8 points at one time
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j+= 8) {
+          __m128i s0 = _mm_loadu_si128((const __m128i *)ref);
+          __m128i s1 = _mm_loadu_si128((const __m128i *)(ref + 16));
+          __m128i s2 = _mm_loadu_si128((const __m128i *)(ref + 32));
+          __m128i s3 = _mm_loadu_si128((const __m128i *)(ref + 48));
+          __m128i p0 = _mm_loadl_epi64((const __m128i *)pred);
+          __m128i t0, t1;
+
+          t0 = _mm_unpacklo_epi8(s0, s1);
+          s1 = _mm_unpackhi_epi8(s0, s1);
+          t1 = _mm_unpacklo_epi8(s2, s3);
+          s3 = _mm_unpackhi_epi8(s2, s3);
+
+          s0 = _mm_unpacklo_epi8(t0, s1);
+          s2 = _mm_unpacklo_epi8(t1, s3);
+          s0 = _mm_unpacklo_epi32(s0, s2);
+          s0 = _mm_unpacklo_epi8(s0, zero);
+
+          p0 = _mm_unpacklo_epi8(p0, zero);
+          p0 = _mm_adds_epu16(s0, p0);
+          p0 = _mm_adds_epu16(p0, one);
+          p0 = _mm_srli_epi16(p0, 1);
+          p0 = _mm_packus_epi16(p0, zero);
+
+          *(int *)comp_pred = _mm_cvtsi128_si32(p0);
+          p0 = _mm_srli_si128(p0, 4);
+          *(int *)(comp_pred + 4) = _mm_cvtsi128_si32(p0);
+
+          comp_pred += 8;
+          pred += 8;
+          ref += 8 * 8;
+        }
+        ref += stride - (width << 3);
+      }
+    } else {
+      // read 4 points at one time
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j+= 4) {
+          __m128i s0 = _mm_loadu_si128((const __m128i *)ref);
+          __m128i s1 = _mm_loadu_si128((const __m128i *)(ref + 16));
+          __m128i p0 = _mm_cvtsi32_si128(*(const uint32_t *)pred);
+          __m128i t0;
+
+          t0 = _mm_unpacklo_epi8(s0, s1);
+          s1 = _mm_unpackhi_epi8(s0, s1);
+          s0 = _mm_unpacklo_epi8(t0, s1);
+          s0 = _mm_unpacklo_epi8(s0, zero);
+
+          p0 = _mm_unpacklo_epi8(p0, zero);
+          p0 = _mm_adds_epu16(s0, p0);
+          p0 = _mm_adds_epu16(p0, one);
+          p0 = _mm_srli_epi16(p0, 1);
+          p0 = _mm_packus_epi16(p0, zero);
+
+          *(int *)comp_pred = _mm_cvtsi128_si32(p0);
+
+          comp_pred += 4;
+          pred += 4;
+          ref += 4 * 8;
+        }
+        ref += stride - (width << 3);
+      }
+    }
+}
+#endif

diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c
index 670144b..5212075 100644
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c

@@ -210,6 +210,30 @@
   extend_frame(ybf, inner_bw);
 }
 
+void vpx_extend_frame_borders_y_c(YV12_BUFFER_CONFIG *ybf) {
+  int ext_size = ybf->border;
+  assert(ybf->y_height - ybf->y_crop_height < 16);
+  assert(ybf->y_width - ybf->y_crop_width < 16);
+  assert(ybf->y_height - ybf->y_crop_height >= 0);
+  assert(ybf->y_width - ybf->y_crop_width >= 0);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    extend_plane_high(ybf->y_buffer, ybf->y_stride,
+                      ybf->y_crop_width, ybf->y_crop_height,
+                      ext_size, ext_size,
+                      ext_size + ybf->y_height - ybf->y_crop_height,
+                      ext_size + ybf->y_width - ybf->y_crop_width);
+    return;
+  }
+#endif
+  extend_plane(ybf->y_buffer, ybf->y_stride,
+               ybf->y_crop_width, ybf->y_crop_height,
+               ext_size, ext_size,
+               ext_size + ybf->y_height - ybf->y_crop_height,
+               ext_size + ybf->y_width - ybf->y_crop_width);
+}
+
 #if CONFIG_VP9_HIGHBITDEPTH
 void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) {
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

diff --git a/vpx_scale/vpx_scale_rtcd.pl b/vpx_scale/vpx_scale_rtcd.pl
index 56b952b..68a1a3e 100644
--- a/vpx_scale/vpx_scale_rtcd.pl
+++ b/vpx_scale/vpx_scale_rtcd.pl

@@ -28,5 +28,8 @@
 
     add_proto qw/void vpx_extend_frame_inner_borders/, "struct yv12_buffer_config *ybf";
     specialize qw/vpx_extend_frame_inner_borders dspr2/;
+
+    add_proto qw/void vpx_extend_frame_borders_y/, "struct yv12_buffer_config *ybf";
+    specialize qw/vpx_extend_frame_borders_y/;
 }
 1;