Let hbd conv func be flexible

This CL allow us to change filter coefficients easily for SIMD
implementation of high bitdepth convolution functions

Change-Id: I454a5c76d3ba9e4454118c6a9d87737b3aa24898
diff --git a/av1/av1_common.mk b/av1/av1_common.mk
index d648448..da92def 100644
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk
@@ -71,7 +71,6 @@
 AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_ssse3.c
 ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
 AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_sse4.c
-AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_filters_sse4.h
 endif
 AV1_COMMON_SRCS-yes += common/convolve.c
 AV1_COMMON_SRCS-yes += common/convolve.h
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 86ede12..7cbb0ce 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -39,8 +39,8 @@
 #
 # 10/12-tap convolution filters
 #
-add_proto qw/void av1_convolve_init/, "void";
-specialize qw/av1_convolve_init ssse3/;
+add_proto qw/void av1_lowbd_convolve_init/, "void";
+specialize qw/av1_lowbd_convolve_init ssse3/;
 
 add_proto qw/void av1_convolve_horiz/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params";
 specialize qw/av1_convolve_horiz ssse3/;
@@ -49,6 +49,8 @@
 specialize qw/av1_convolve_vert ssse3/;
 
 if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
+  add_proto qw/void av1_highbd_convolve_init/, "void";
+  specialize qw/av1_highbd_convolve_init sse4_1/;
   add_proto qw/void av1_highbd_convolve_horiz/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
   specialize qw/av1_highbd_convolve_horiz sse4_1/;
   add_proto qw/void av1_highbd_convolve_vert/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
diff --git a/av1/common/convolve.c b/av1/common/convolve.c
index 9ef1b26..cf2acd0 100644
--- a/av1/common/convolve.c
+++ b/av1/common/convolve.c
@@ -16,6 +16,7 @@
 #include "./av1_rtcd.h"
 #include "av1/common/convolve.h"
 #include "av1/common/filter.h"
+#include "av1/common/onyxc_int.h"
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_ports/mem.h"
 
@@ -410,11 +411,29 @@
   }
 }
 
-void av1_convolve_init_c(void) {
+void av1_lowbd_convolve_init_c(void) {
   // A placeholder for SIMD initialization
   return;
 }
 
+void av1_highbd_convolve_init_c(void) {
+  // A placeholder for SIMD initialization
+  return;
+}
+
+void av1_convolve_init(AV1_COMMON *cm) {
+#if CONFIG_AOM_HIGHBITDEPTH
+  if (cm->use_highbitdepth)
+    av1_highbd_convolve_init();
+  else
+    av1_lowbd_convolve_init();
+#else
+  (void)cm;
+  av1_lowbd_convolve_init();
+#endif
+  return;
+}
+
 #if CONFIG_AOM_HIGHBITDEPTH
 void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
                                  uint16_t *dst, int dst_stride, int w, int h,
diff --git a/av1/common/convolve.h b/av1/common/convolve.h
index 19712fd..471152e 100644
--- a/av1/common/convolve.h
+++ b/av1/common/convolve.h
@@ -42,7 +42,8 @@
   conv_params.plane = plane;
   return conv_params;
 }
-
+struct AV1Common;
+void av1_convolve_init(struct AV1Common *cm);
 #if CONFIG_CONVOLVE_ROUND
 void av1_convolve_2d(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
                      int dst_stride, int w, int h,
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index f652cbe..8cf401e 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -2010,7 +2010,7 @@
 #if CONFIG_ADAPT_SCAN
   av1_init_scan_order(cm);
 #endif
-  av1_convolve_init();
+  av1_convolve_init(cm);
   cm->fc->initialized = 1;
 
   if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
diff --git a/av1/common/x86/av1_convolve_ssse3.c b/av1/common/x86/av1_convolve_ssse3.c
index 26a504d..91102bb 100644
--- a/av1/common/x86/av1_convolve_ssse3.c
+++ b/av1/common/x86/av1_convolve_ssse3.c
@@ -997,7 +997,7 @@
 };
 #endif
 
-void av1_convolve_init_ssse3(void) {
+void av1_lowbd_convolve_init_ssse3(void) {
 #if USE_TEMPORALFILTER_12TAP
   {
     InterpFilterParams filter_params =
diff --git a/av1/common/x86/av1_highbd_convolve_filters_sse4.h b/av1/common/x86/av1_highbd_convolve_filters_sse4.h
deleted file mode 100644
index 9ffabdf..0000000
--- a/av1/common/x86/av1_highbd_convolve_filters_sse4.h
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AV1_COMMON_X86_AV1_HIGHBD_CONVOLVE_FILTERS_SSE4_H_
-#define AV1_COMMON_X86_AV1_HIGHBD_CONVOLVE_FILTERS_SSE4_H_
-
-#include "./aom_config.h"
-
-#if CONFIG_AOM_HIGHBITDEPTH
-#if CONFIG_DUAL_FILTER
-DECLARE_ALIGNED(16, static const int16_t,
-                sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8]) = {
-  {
-      { 0, 1, 0, 1, 0, 1, 0, 1 },
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -7, 127, -7, 127, -7, 127, -7, 127 },
-      { 8, -4, 8, -4, 8, -4, 8, -4 },
-      { 2, -1, 2, -1, 2, -1, 2, -1 },
-      { 0, 0, 0, 0, 0, 0, 0, 0 },
-  },
-  {
-      { 0, 1, 0, 1, 0, 1, 0, 1 },
-      { -3, 5, -3, 5, -3, 5, -3, 5 },
-      { -12, 124, -12, 124, -12, 124, -12, 124 },
-      { 18, -8, 18, -8, 18, -8, 18, -8 },
-      { 4, -2, 4, -2, 4, -2, 4, -2 },
-      { 1, 0, 1, 0, 1, 0, 1, 0 },
-  },
-  {
-      { -1, 2, -1, 2, -1, 2, -1, 2 },
-      { -4, 8, -4, 8, -4, 8, -4, 8 },
-      { -17, 120, -17, 120, -17, 120, -17, 120 },
-      { 28, -11, 28, -11, 28, -11, 28, -11 },
-      { 6, -3, 6, -3, 6, -3, 6, -3 },
-      { 1, -1, 1, -1, 1, -1, 1, -1 },
-  },
-  {
-      { -1, 2, -1, 2, -1, 2, -1, 2 },
-      { -4, 10, -4, 10, -4, 10, -4, 10 },
-      { -21, 114, -21, 114, -21, 114, -21, 114 },
-      { 38, -15, 38, -15, 38, -15, 38, -15 },
-      { 8, -4, 8, -4, 8, -4, 8, -4 },
-      { 2, -1, 2, -1, 2, -1, 2, -1 },
-  },
-  {
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -5, 11, -5, 11, -5, 11, -5, 11 },
-      { -23, 107, -23, 107, -23, 107, -23, 107 },
-      { 49, -18, 49, -18, 49, -18, 49, -18 },
-      { 9, -5, 9, -5, 9, -5, 9, -5 },
-      { 2, -1, 2, -1, 2, -1, 2, -1 },
-  },
-  {
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -6, 12, -6, 12, -6, 12, -6, 12 },
-      { -25, 99, -25, 99, -25, 99, -25, 99 },
-      { 60, -21, 60, -21, 60, -21, 60, -21 },
-      { 11, -6, 11, -6, 11, -6, 11, -6 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-  },
-  {
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -6, 12, -6, 12, -6, 12, -6, 12 },
-      { -25, 90, -25, 90, -25, 90, -25, 90 },
-      { 70, -23, 70, -23, 70, -23, 70, -23 },
-      { 12, -6, 12, -6, 12, -6, 12, -6 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-  },
-  {
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -6, 12, -6, 12, -6, 12, -6, 12 },
-      { -24, 80, -24, 80, -24, 80, -24, 80 },
-      { 80, -24, 80, -24, 80, -24, 80, -24 },
-      { 12, -6, 12, -6, 12, -6, 12, -6 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-  },
-  {
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -6, 12, -6, 12, -6, 12, -6, 12 },
-      { -23, 70, -23, 70, -23, 70, -23, 70 },
-      { 90, -25, 90, -25, 90, -25, 90, -25 },
-      { 12, -6, 12, -6, 12, -6, 12, -6 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-  },
-  {
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -6, 11, -6, 11, -6, 11, -6, 11 },
-      { -21, 60, -21, 60, -21, 60, -21, 60 },
-      { 99, -25, 99, -25, 99, -25, 99, -25 },
-      { 12, -6, 12, -6, 12, -6, 12, -6 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-  },
-  {
-      { -1, 2, -1, 2, -1, 2, -1, 2 },
-      { -5, 9, -5, 9, -5, 9, -5, 9 },
-      { -18, 49, -18, 49, -18, 49, -18, 49 },
-      { 107, -23, 107, -23, 107, -23, 107, -23 },
-      { 11, -5, 11, -5, 11, -5, 11, -5 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-  },
-  {
-      { -1, 2, -1, 2, -1, 2, -1, 2 },
-      { -4, 8, -4, 8, -4, 8, -4, 8 },
-      { -15, 38, -15, 38, -15, 38, -15, 38 },
-      { 114, -21, 114, -21, 114, -21, 114, -21 },
-      { 10, -4, 10, -4, 10, -4, 10, -4 },
-      { 2, -1, 2, -1, 2, -1, 2, -1 },
-  },
-  {
-      { -1, 1, -1, 1, -1, 1, -1, 1 },
-      { -3, 6, -3, 6, -3, 6, -3, 6 },
-      { -11, 28, -11, 28, -11, 28, -11, 28 },
-      { 120, -17, 120, -17, 120, -17, 120, -17 },
-      { 8, -4, 8, -4, 8, -4, 8, -4 },
-      { 2, -1, 2, -1, 2, -1, 2, -1 },
-  },
-  {
-      { 0, 1, 0, 1, 0, 1, 0, 1 },
-      { -2, 4, -2, 4, -2, 4, -2, 4 },
-      { -8, 18, -8, 18, -8, 18, -8, 18 },
-      { 124, -12, 124, -12, 124, -12, 124, -12 },
-      { 5, -3, 5, -3, 5, -3, 5, -3 },
-      { 1, 0, 1, 0, 1, 0, 1, 0 },
-  },
-  {
-      { 0, 0, 0, 0, 0, 0, 0, 0 },
-      { -1, 2, -1, 2, -1, 2, -1, 2 },
-      { -4, 8, -4, 8, -4, 8, -4, 8 },
-      { 127, -7, 127, -7, 127, -7, 127, -7 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-      { 1, 0, 1, 0, 1, 0, 1, 0 },
-  },
-};
-#endif
-#endif
-#if CONFIG_AOM_HIGHBITDEPTH
-#if USE_TEMPORALFILTER_12TAP
-DECLARE_ALIGNED(
-    16, static const int16_t,
-    sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8]) = {
-  {
-      { 0, 1, 0, 1, 0, 1, 0, 1 },
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -7, 127, -7, 127, -7, 127, -7, 127 },
-      { 8, -4, 8, -4, 8, -4, 8, -4 },
-      { 2, -1, 2, -1, 2, -1, 2, -1 },
-      { 0, 0, 0, 0, 0, 0, 0, 0 },
-  },
-  {
-      { 0, 1, 0, 1, 0, 1, 0, 1 },
-      { -3, 5, -3, 5, -3, 5, -3, 5 },
-      { -12, 124, -12, 124, -12, 124, -12, 124 },
-      { 18, -8, 18, -8, 18, -8, 18, -8 },
-      { 4, -2, 4, -2, 4, -2, 4, -2 },
-      { 1, 0, 1, 0, 1, 0, 1, 0 },
-  },
-  {
-      { -1, 2, -1, 2, -1, 2, -1, 2 },
-      { -4, 8, -4, 8, -4, 8, -4, 8 },
-      { -17, 120, -17, 120, -17, 120, -17, 120 },
-      { 28, -11, 28, -11, 28, -11, 28, -11 },
-      { 6, -3, 6, -3, 6, -3, 6, -3 },
-      { 1, -1, 1, -1, 1, -1, 1, -1 },
-  },
-  {
-      { -1, 2, -1, 2, -1, 2, -1, 2 },
-      { -4, 10, -4, 10, -4, 10, -4, 10 },
-      { -21, 114, -21, 114, -21, 114, -21, 114 },
-      { 38, -15, 38, -15, 38, -15, 38, -15 },
-      { 8, -4, 8, -4, 8, -4, 8, -4 },
-      { 2, -1, 2, -1, 2, -1, 2, -1 },
-  },
-  {
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -5, 11, -5, 11, -5, 11, -5, 11 },
-      { -23, 107, -23, 107, -23, 107, -23, 107 },
-      { 49, -18, 49, -18, 49, -18, 49, -18 },
-      { 9, -5, 9, -5, 9, -5, 9, -5 },
-      { 2, -1, 2, -1, 2, -1, 2, -1 },
-  },
-  {
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -6, 12, -6, 12, -6, 12, -6, 12 },
-      { -25, 99, -25, 99, -25, 99, -25, 99 },
-      { 60, -21, 60, -21, 60, -21, 60, -21 },
-      { 11, -6, 11, -6, 11, -6, 11, -6 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-  },
-  {
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -6, 12, -6, 12, -6, 12, -6, 12 },
-      { -25, 90, -25, 90, -25, 90, -25, 90 },
-      { 70, -23, 70, -23, 70, -23, 70, -23 },
-      { 12, -6, 12, -6, 12, -6, 12, -6 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-  },
-  {
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -6, 12, -6, 12, -6, 12, -6, 12 },
-      { -24, 80, -24, 80, -24, 80, -24, 80 },
-      { 80, -24, 80, -24, 80, -24, 80, -24 },
-      { 12, -6, 12, -6, 12, -6, 12, -6 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-  },
-  {
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -6, 12, -6, 12, -6, 12, -6, 12 },
-      { -23, 70, -23, 70, -23, 70, -23, 70 },
-      { 90, -25, 90, -25, 90, -25, 90, -25 },
-      { 12, -6, 12, -6, 12, -6, 12, -6 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-  },
-  {
-      { -1, 3, -1, 3, -1, 3, -1, 3 },
-      { -6, 11, -6, 11, -6, 11, -6, 11 },
-      { -21, 60, -21, 60, -21, 60, -21, 60 },
-      { 99, -25, 99, -25, 99, -25, 99, -25 },
-      { 12, -6, 12, -6, 12, -6, 12, -6 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-  },
-  {
-      { -1, 2, -1, 2, -1, 2, -1, 2 },
-      { -5, 9, -5, 9, -5, 9, -5, 9 },
-      { -18, 49, -18, 49, -18, 49, -18, 49 },
-      { 107, -23, 107, -23, 107, -23, 107, -23 },
-      { 11, -5, 11, -5, 11, -5, 11, -5 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-  },
-  {
-      { -1, 2, -1, 2, -1, 2, -1, 2 },
-      { -4, 8, -4, 8, -4, 8, -4, 8 },
-      { -15, 38, -15, 38, -15, 38, -15, 38 },
-      { 114, -21, 114, -21, 114, -21, 114, -21 },
-      { 10, -4, 10, -4, 10, -4, 10, -4 },
-      { 2, -1, 2, -1, 2, -1, 2, -1 },
-  },
-  {
-      { -1, 1, -1, 1, -1, 1, -1, 1 },
-      { -3, 6, -3, 6, -3, 6, -3, 6 },
-      { -11, 28, -11, 28, -11, 28, -11, 28 },
-      { 120, -17, 120, -17, 120, -17, 120, -17 },
-      { 8, -4, 8, -4, 8, -4, 8, -4 },
-      { 2, -1, 2, -1, 2, -1, 2, -1 },
-  },
-  {
-      { 0, 1, 0, 1, 0, 1, 0, 1 },
-      { -2, 4, -2, 4, -2, 4, -2, 4 },
-      { -8, 18, -8, 18, -8, 18, -8, 18 },
-      { 124, -12, 124, -12, 124, -12, 124, -12 },
-      { 5, -3, 5, -3, 5, -3, 5, -3 },
-      { 1, 0, 1, 0, 1, 0, 1, 0 },
-  },
-  {
-      { 0, 0, 0, 0, 0, 0, 0, 0 },
-      { -1, 2, -1, 2, -1, 2, -1, 2 },
-      { -4, 8, -4, 8, -4, 8, -4, 8 },
-      { 127, -7, 127, -7, 127, -7, 127, -7 },
-      { 3, -1, 3, -1, 3, -1, 3, -1 },
-      { 1, 0, 1, 0, 1, 0, 1, 0 },
-  },
-};
-#endif
-#endif
-#endif  // AV1_COMMON_X86_AV1_HIGHBD_CONVOLVE_FILTERS_SSE4_H_
diff --git a/av1/common/x86/av1_highbd_convolve_sse4.c b/av1/common/x86/av1_highbd_convolve_sse4.c
index 4970bb3..c154c6b 100644
--- a/av1/common/x86/av1_highbd_convolve_sse4.c
+++ b/av1/common/x86/av1_highbd_convolve_sse4.c
@@ -14,9 +14,16 @@
 
 #include "./av1_rtcd.h"
 #include "av1/common/filter.h"
-#include "av1/common/x86/av1_highbd_convolve_filters_sse4.h"
 
-typedef const int16_t (*HbdSubpelFilterCoeffs)[8];
+#if CONFIG_DUAL_FILTER
+DECLARE_ALIGNED(16, static int16_t, subpel_filters_sharp[15][6][8]);
+#endif
+
+#if USE_TEMPORALFILTER_12TAP
+DECLARE_ALIGNED(16, static int16_t, subpel_temporalfilter[15][6][8]);
+#endif
+
+typedef int16_t (*HbdSubpelFilterCoeffs)[8];
 
 typedef void (*TransposeSave)(const int width, int pixelsNum, uint32_t *src,
                               int src_stride, uint16_t *dst, int dst_stride,
@@ -26,12 +33,12 @@
 hbd_get_subpel_filter_ver_signal_dir(const InterpFilterParams p, int index) {
 #if CONFIG_DUAL_FILTER
   if (p.interp_filter == MULTITAP_SHARP) {
-    return &sub_pel_filters_12sharp_highbd_ver_signal_dir[index][0];
+    return &subpel_filters_sharp[index][0];
   }
 #endif
 #if USE_TEMPORALFILTER_12TAP
   if (p.interp_filter == TEMPORALFILTER_12TAP) {
-    return &sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[index][0];
+    return &subpel_temporalfilter[index][0];
   }
 #endif
   (void)p;
@@ -39,6 +46,47 @@
   return NULL;
 }
 
+static void init_simd_filter(const int16_t *filter_ptr, int taps,
+                             int16_t (*simd_filter)[6][8]) {
+  int shift;
+  int offset = (12 - taps) / 2;
+  for (shift = 1; shift < SUBPEL_SHIFTS; ++shift) {
+    const int16_t *filter_row = filter_ptr + shift * taps;
+    int i, j;
+    for (i = 0; i < 12; ++i) {
+      for (j = 0; j < 4; ++j) {
+        int r = i / 2;
+        int c = j * 2 + (i % 2);
+        if (i - offset >= 0 && i - offset < taps)
+          simd_filter[shift - 1][r][c] = filter_row[i - offset];
+        else
+          simd_filter[shift - 1][r][c] = 0;
+      }
+    }
+  }
+}
+
+void av1_highbd_convolve_init_sse4_1(void) {
+#if USE_TEMPORALFILTER_12TAP
+  {
+    InterpFilterParams filter_params =
+        av1_get_interp_filter_params(TEMPORALFILTER_12TAP);
+    int taps = filter_params.taps;
+    const int16_t *filter_ptr = filter_params.filter_ptr;
+    init_simd_filter(filter_ptr, taps, subpel_temporalfilter);
+  }
+#endif
+#if CONFIG_DUAL_FILTER
+  {
+    InterpFilterParams filter_params =
+        av1_get_interp_filter_params(MULTITAP_SHARP);
+    int taps = filter_params.taps;
+    const int16_t *filter_ptr = filter_params.filter_ptr;
+    init_simd_filter(filter_ptr, taps, subpel_filters_sharp);
+  }
+#endif
+}
+
 // pixelsNum 0: write all 4 pixels
 //           1/2/3: residual pixels 1/2/3
 static void writePixel(__m128i *u, int width, int pixelsNum, uint16_t *dst,
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 8f68d61..5fbf239 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -591,7 +591,7 @@
 #if CONFIG_ADAPT_SCAN
   av1_init_scan_order(cm);
 #endif
-  av1_convolve_init();
+  av1_convolve_init(cm);
   av1_initialize_rd_consts(cpi);
 
   // Tiling is ignored in the first pass.
diff --git a/test/av1_convolve_optimz_test.cc b/test/av1_convolve_optimz_test.cc
index 962d244..3d14b9a 100644
--- a/test/av1_convolve_optimz_test.cc
+++ b/test/av1_convolve_optimz_test.cc
@@ -43,7 +43,7 @@
 // Test parameter list:
 //  <convolve_horiz_func, convolve_vert_func,
 //  <width, height>, filter_params, subpel_x_q4, avg, bit_dpeth>
-typedef tuple<hbd_conv_filter_t, hbd_conv_filter_t, BlockDimension,
+typedef tuple<ConvInit, hbd_conv_filter_t, hbd_conv_filter_t, BlockDimension,
               InterpFilter, int, int, int>
     HbdConvParams;
 #endif
@@ -228,7 +228,7 @@
 #if HAVE_SSSE3 && CONFIG_DUAL_FILTER
 INSTANTIATE_TEST_CASE_P(
     SSSE3, AV1ConvolveOptimzTest,
-    ::testing::Combine(::testing::Values(av1_convolve_init_ssse3),
+    ::testing::Combine(::testing::Values(av1_lowbd_convolve_init_ssse3),
                        ::testing::Values(av1_convolve_horiz_ssse3),
                        ::testing::Values(av1_convolve_vert_ssse3),
                        ::testing::ValuesIn(kBlockDim),
@@ -243,15 +243,17 @@
  public:
   virtual ~AV1HbdConvolveOptimzTest() {}
   virtual void SetUp() {
-    conv_horiz_ = GET_PARAM(0);
-    conv_vert_ = GET_PARAM(1);
-    BlockDimension block = GET_PARAM(2);
+    ConvInit conv_init = GET_PARAM(0);
+    conv_init();
+    conv_horiz_ = GET_PARAM(1);
+    conv_vert_ = GET_PARAM(2);
+    BlockDimension block = GET_PARAM(3);
     width_ = std::tr1::get<0>(block);
     height_ = std::tr1::get<1>(block);
-    filter_ = GET_PARAM(3);
-    subpel_ = GET_PARAM(4);
-    avg_ = GET_PARAM(5);
-    bit_depth_ = GET_PARAM(6);
+    filter_ = GET_PARAM(4);
+    subpel_ = GET_PARAM(5);
+    avg_ = GET_PARAM(6);
+    bit_depth_ = GET_PARAM(7);
 
     alloc_ = new uint16_t[maxBlockSize * 4];
     src_ = alloc_ + (vertiOffset * maxWidth);
@@ -390,7 +392,8 @@
 
 INSTANTIATE_TEST_CASE_P(
     SSE4_1, AV1HbdConvolveOptimzTest,
-    ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_sse4_1),
+    ::testing::Combine(::testing::Values(av1_highbd_convolve_init_sse4_1),
+                       ::testing::Values(av1_highbd_convolve_horiz_sse4_1),
                        ::testing::Values(av1_highbd_convolve_vert_sse4_1),
                        ::testing::ValuesIn(kBlockDim),
                        ::testing::ValuesIn(kFilter),