Various wedge enhancements

Increases number of wedges for smaller block and removes
wedge coding mode for blocks larger than 32x32.

Also adds various other enhancements for subsequent experimentation,
including adding provision for multiple smoothing functions
(though one is used currently), adds a speed feature that decides
the sign for interinter wedges using a fast mechanism, and refactors
wedge representations.

lowres: -2.651% BDRATE

Most of the gain is due to increase in codebook size for 8x8 - 16x16.

Change-Id: I50669f558c8d0d45e5a6f70aca4385a185b58b5b
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 21147af..071e89f 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -53,43 +53,6 @@
 }
 
 #if CONFIG_EXT_INTER
-#define WEDGE_BITS_SML    2
-#define WEDGE_BITS_MED    3
-#define WEDGE_BITS_BIG    4
-#define WEDGE_NONE       -1
-#define WEDGE_WEIGHT_BITS 6
-
-static const int get_wedge_bits_lookup[BLOCK_SIZES] = {
-  0,
-  0,
-  0,
-  WEDGE_BITS_SML,
-  WEDGE_BITS_MED,
-  WEDGE_BITS_MED,
-  WEDGE_BITS_MED,
-  WEDGE_BITS_MED,
-  WEDGE_BITS_MED,
-  WEDGE_BITS_MED,
-  WEDGE_BITS_BIG,
-  WEDGE_BITS_BIG,
-  WEDGE_BITS_BIG,
-#if CONFIG_EXT_PARTITION
-  WEDGE_BITS_BIG,
-  WEDGE_BITS_BIG,
-  WEDGE_BITS_BIG,
-#endif  // CONFIG_EXT_PARTITION
-};
-
-static INLINE int is_interinter_wedge_used(BLOCK_SIZE sb_type) {
-  (void) sb_type;
-  return get_wedge_bits_lookup[sb_type] > 0;
-}
-
-static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) {
-  (void) sb_type;
-  return get_wedge_bits_lookup[sb_type] > 0;
-}
-
 static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) {
   return mode >= NEARESTMV && mode <= NEWFROMNEARMV;
 }
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index 0ae2572..a47b2fe 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -281,16 +281,16 @@
 };
 
 static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = {
-  208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208,
+  208, 208, 208, 208, 208, 208, 216, 216, 216, 224, 224, 224, 240,
 #if CONFIG_EXT_PARTITION
   208, 208, 208
 #endif  // CONFIG_EXT_PARTITION
 };
 
 static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = {
-  208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208,
+  208, 208, 208, 208, 208, 208, 216, 216, 216, 224, 224, 224, 240,
 #if CONFIG_EXT_PARTITION
-  208, 208, 208
+  255, 255, 255
 #endif  // CONFIG_EXT_PARTITION
 };
 #endif  // CONFIG_EXT_INTER
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c
index ecfb54c..d6ac4bb 100644
--- a/vp10/common/reconinter.c
+++ b/vp10/common/reconinter.c
@@ -23,206 +23,352 @@
 #endif  // CONFIG_OBMC
 
 #if CONFIG_EXT_INTER
-static int get_masked_weight(int m) {
+#define NSMOOTHERS  2
+static int get_masked_weight(int m, int smoothness) {
 #define SMOOTHER_LEN  32
-  static const uint8_t smoothfn[2 * SMOOTHER_LEN + 1] = {
-    0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,
-    1,  1,  2,  4,  6, 10, 16, 23,
-    32,
-    41, 48, 54, 58, 60, 62, 63, 63,
-    64, 64, 64, 64, 64, 64, 64, 64,
-    64, 64, 64, 64, 64, 64, 64, 64,
-    64, 64, 64, 64, 64, 64, 64, 64,
+  static const uint8_t smoothfn[NSMOOTHERS][2 * SMOOTHER_LEN + 1] = {
+    {
+      0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  1,  2,  4,  7, 13, 21,
+      32,
+      43, 51, 57, 60, 62, 63, 64, 64,
+      64, 64, 64, 64, 64, 64, 64, 64,
+      64, 64, 64, 64, 64, 64, 64, 64,
+      64, 64, 64, 64, 64, 64, 64, 64,
+    }, {
+      0,  0,  0,  0,  0,  0,  0,  0,
+      1,  1,  1,  1,  1,  1,  2,  2,
+      3,  3,  4,  4,  5,  6,  8,  9,
+      11, 13, 15, 17, 20, 23, 26, 29,
+      32,
+      35, 38, 41, 44, 47, 49, 51, 53,
+      55, 56, 58, 59, 60, 60, 61, 61,
+      62, 62, 63, 63, 63, 63, 63, 63,
+      64, 64, 64, 64, 64, 64, 64, 64,
+    }
   };
   if (m < -SMOOTHER_LEN)
     return 0;
   else if (m > SMOOTHER_LEN)
     return (1 << WEDGE_WEIGHT_BITS);
   else
-    return smoothfn[m + SMOOTHER_LEN];
+    return smoothfn[smoothness][m + SMOOTHER_LEN];
 }
 
-#define WEDGE_OBLIQUE  1
-#define WEDGE_STRAIGHT 0
+// Angles are with respect to horizontal anti-clockwise
+typedef enum {
+  WEDGE_HORIZONTAL = 0,
+  WEDGE_VERTICAL = 1,
+  WEDGE_OBLIQUE27 = 2,
+  WEDGE_OBLIQUE63 = 3,
+  WEDGE_OBLIQUE117 = 4,
+  WEDGE_OBLIQUE153 = 5,
+  WEDGE_DIRECTIONS
+} WedgeDirectionType;
 
-#define WEDGE_PARMS    5
+#define WEDGE_PARMS    4
 
-// [negative][transpose][reverse]
-DECLARE_ALIGNED(16, static uint8_t,
-                wedge_mask_obl[2][2][2][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
-// [negative][transpose]
-DECLARE_ALIGNED(16, static uint8_t,
-                wedge_mask_str[2][2][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
+// [smoother][negative][direction]
+DECLARE_ALIGNED(
+    16, static uint8_t,
+    wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS]
+                  [MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
 
 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
 void vp10_init_wedge_masks() {
-  int i, j;
+  int i, j, s;
   const int w = MASK_MASTER_SIZE;
   const int h = MASK_MASTER_SIZE;
   const int stride = MASK_MASTER_STRIDE;
-  const int a[4] = {2, 1, 4, 4};
-  for (i = 0; i < h; ++i)
-    for (j = 0; j < w; ++j) {
-      int x = (2 * j + 1 - (a[2] * w) / 4);
-      int y = (2 * i + 1 - (a[3] * h) / 4);
-      int m = (a[0] * x + a[1] * y) / 2;
-      wedge_mask_obl[1][0][0][i * stride + j] =
-      wedge_mask_obl[1][1][0][j * stride + i] =
-          get_masked_weight(m);
-      wedge_mask_obl[1][0][1][i * stride + w - 1 - j] =
-      wedge_mask_obl[1][1][1][(w - 1 - j) * stride + i] =
-          (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m);
-      wedge_mask_obl[0][0][0][i * stride + j] =
-      wedge_mask_obl[0][1][0][j * stride + i] =
-          (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m);
-      wedge_mask_obl[0][0][1][i * stride + w - 1 - j] =
-      wedge_mask_obl[0][1][1][(w - 1 - j) * stride + i] =
-          get_masked_weight(m);
-      wedge_mask_str[1][0][i * stride + j] =
-      wedge_mask_str[1][1][j * stride + i] =
-          get_masked_weight(x);
-      wedge_mask_str[0][0][i * stride + j] =
-      wedge_mask_str[0][1][j * stride + i] =
-          (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x);
-    }
+  const int a[2] = {2, 1};
+  const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
+  for (s = 0; s < NSMOOTHERS; s++) {
+    for (i = 0; i < h; ++i)
+      for (j = 0; j < w; ++j) {
+        int x = (2 * j + 1 - w);
+        int y = (2 * i + 1 - h);
+        int m = (int)rint((a[0] * x + a[1] * y) / asqrt);
+        wedge_mask_obl[s][1][WEDGE_OBLIQUE63][i * stride + j] =
+        wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] =
+            get_masked_weight(m, s);
+        wedge_mask_obl[s][1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
+        wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
+            (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s);
+        wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j] =
+        wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] =
+            (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s);
+        wedge_mask_obl[s][0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
+        wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
+            get_masked_weight(m, s);
+        wedge_mask_obl[s][1][WEDGE_VERTICAL][i * stride + j] =
+        wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] =
+            get_masked_weight(x, s);
+        wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j] =
+        wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] =
+            (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x, s);
+      }
+  }
 }
 
-static const int wedge_params_sml[1 << WEDGE_BITS_SML]
-                                 [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
+static const int wedge_params_4[1 << WEDGE_BITS_2]
+                               [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 1},
+    {WEDGE_OBLIQUE63,  4, 4, 1},
+    {WEDGE_OBLIQUE117, 4, 4, 1},
+    {WEDGE_OBLIQUE153, 4, 4, 1},
 };
 
-static const int wedge_params_med_hgtw[1 << WEDGE_BITS_MED]
-                                      [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
+static const int wedge_params_8_hgtw[1 << WEDGE_BITS_3]
+                                    [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 1},
+    {WEDGE_OBLIQUE63,  4, 4, 1},
+    {WEDGE_OBLIQUE117, 4, 4, 1},
+    {WEDGE_OBLIQUE153, 4, 4, 1},
 
-    {WEDGE_OBLIQUE,  1, 1, 4, 2},
-    {WEDGE_OBLIQUE,  1, 1, 4, 6},
-    {WEDGE_OBLIQUE,  1, 0, 4, 2},
-    {WEDGE_OBLIQUE,  1, 0, 4, 6},
+    {WEDGE_OBLIQUE27,  4, 2, 1},
+    {WEDGE_OBLIQUE27,  4, 6, 1},
+    {WEDGE_OBLIQUE153, 4, 2, 1},
+    {WEDGE_OBLIQUE153, 4, 6, 1},
 };
 
-static const int wedge_params_med_hltw[1 << WEDGE_BITS_MED]
-                                      [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
+static const int wedge_params_8_hltw[1 << WEDGE_BITS_3]
+                                    [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 1},
+    {WEDGE_OBLIQUE63,  4, 4, 1},
+    {WEDGE_OBLIQUE117, 4, 4, 1},
+    {WEDGE_OBLIQUE153, 4, 4, 1},
 
-    {WEDGE_OBLIQUE,  0, 1, 2, 4},
-    {WEDGE_OBLIQUE,  0, 1, 6, 4},
-    {WEDGE_OBLIQUE,  0, 0, 2, 4},
-    {WEDGE_OBLIQUE,  0, 0, 6, 4},
+    {WEDGE_OBLIQUE63,  2, 4, 1},
+    {WEDGE_OBLIQUE63,  6, 4, 1},
+    {WEDGE_OBLIQUE117, 2, 4, 1},
+    {WEDGE_OBLIQUE117, 6, 4, 1},
 };
 
-static const int wedge_params_med_heqw[1 << WEDGE_BITS_MED]
-                                      [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
+static const int wedge_params_8_heqw[1 << WEDGE_BITS_3]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 1},
+    {WEDGE_OBLIQUE63,  4, 4, 1},
+    {WEDGE_OBLIQUE117, 4, 4, 1},
+    {WEDGE_OBLIQUE153, 4, 4, 1},
 
-    {WEDGE_STRAIGHT, 1, 0, 4, 2},
-    {WEDGE_STRAIGHT, 1, 0, 4, 6},
-    {WEDGE_STRAIGHT, 0, 0, 2, 4},
-    {WEDGE_STRAIGHT, 0, 0, 6, 4},
+    {WEDGE_HORIZONTAL, 4, 2, 1},
+    {WEDGE_HORIZONTAL, 4, 6, 1},
+    {WEDGE_VERTICAL,   2, 4, 1},
+    {WEDGE_VERTICAL,   6, 4, 1},
 };
 
-static const int wedge_params_big_hgtw[1 << WEDGE_BITS_BIG]
-                                      [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
+static const int wedge_params_16_hgtw[1 << WEDGE_BITS_4]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 0},
+    {WEDGE_OBLIQUE63,  4, 4, 0},
+    {WEDGE_OBLIQUE117, 4, 4, 0},
+    {WEDGE_OBLIQUE153, 4, 4, 0},
 
-    {WEDGE_OBLIQUE,  1, 1, 4, 2},
-    {WEDGE_OBLIQUE,  1, 1, 4, 6},
-    {WEDGE_OBLIQUE,  1, 0, 4, 2},
-    {WEDGE_OBLIQUE,  1, 0, 4, 6},
+    {WEDGE_HORIZONTAL, 4, 2, 0},
+    {WEDGE_HORIZONTAL, 4, 4, 0},
+    {WEDGE_HORIZONTAL, 4, 6, 0},
+    {WEDGE_VERTICAL,   4, 4, 0},
 
-    {WEDGE_OBLIQUE,  0, 1, 2, 4},
-    {WEDGE_OBLIQUE,  0, 1, 6, 4},
-    {WEDGE_OBLIQUE,  0, 0, 2, 4},
-    {WEDGE_OBLIQUE,  0, 0, 6, 4},
+    {WEDGE_OBLIQUE27,  4, 2, 0},
+    {WEDGE_OBLIQUE27,  4, 6, 0},
+    {WEDGE_OBLIQUE153, 4, 2, 0},
+    {WEDGE_OBLIQUE153, 4, 6, 0},
 
-    {WEDGE_STRAIGHT, 1, 0, 4, 2},
-    {WEDGE_STRAIGHT, 1, 0, 4, 4},
-    {WEDGE_STRAIGHT, 1, 0, 4, 6},
-    {WEDGE_STRAIGHT, 0, 0, 4, 4},
+    {WEDGE_OBLIQUE63,  2, 4, 0},
+    {WEDGE_OBLIQUE63,  6, 4, 0},
+    {WEDGE_OBLIQUE117, 2, 4, 0},
+    {WEDGE_OBLIQUE117, 6, 4, 0},
 };
 
-static const int wedge_params_big_hltw[1 << WEDGE_BITS_BIG]
-                                      [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
+static const int wedge_params_16_hltw[1 << WEDGE_BITS_4]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 0},
+    {WEDGE_OBLIQUE63,  4, 4, 0},
+    {WEDGE_OBLIQUE117, 4, 4, 0},
+    {WEDGE_OBLIQUE153, 4, 4, 0},
 
-    {WEDGE_OBLIQUE,  1, 1, 4, 2},
-    {WEDGE_OBLIQUE,  1, 1, 4, 6},
-    {WEDGE_OBLIQUE,  1, 0, 4, 2},
-    {WEDGE_OBLIQUE,  1, 0, 4, 6},
+    {WEDGE_VERTICAL,   2, 4, 0},
+    {WEDGE_VERTICAL,   4, 4, 0},
+    {WEDGE_VERTICAL,   6, 4, 0},
+    {WEDGE_HORIZONTAL, 4, 4, 0},
 
-    {WEDGE_OBLIQUE,  0, 1, 2, 4},
-    {WEDGE_OBLIQUE,  0, 1, 6, 4},
-    {WEDGE_OBLIQUE,  0, 0, 2, 4},
-    {WEDGE_OBLIQUE,  0, 0, 6, 4},
+    {WEDGE_OBLIQUE27,  4, 2, 0},
+    {WEDGE_OBLIQUE27,  4, 6, 0},
+    {WEDGE_OBLIQUE153, 4, 2, 0},
+    {WEDGE_OBLIQUE153, 4, 6, 0},
 
-    {WEDGE_STRAIGHT, 0, 0, 2, 4},
-    {WEDGE_STRAIGHT, 0, 0, 4, 4},
-    {WEDGE_STRAIGHT, 0, 0, 6, 4},
-    {WEDGE_STRAIGHT, 1, 0, 4, 4},
+    {WEDGE_OBLIQUE63,  2, 4, 0},
+    {WEDGE_OBLIQUE63,  6, 4, 0},
+    {WEDGE_OBLIQUE117, 2, 4, 0},
+    {WEDGE_OBLIQUE117, 6, 4, 0},
 };
 
-static const int wedge_params_big_heqw[1 << WEDGE_BITS_BIG]
-                                      [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
+static const int wedge_params_16_heqw[1 << WEDGE_BITS_4]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 0},
+    {WEDGE_OBLIQUE63,  4, 4, 0},
+    {WEDGE_OBLIQUE117, 4, 4, 0},
+    {WEDGE_OBLIQUE153, 4, 4, 0},
 
-    {WEDGE_OBLIQUE,  1, 1, 4, 2},
-    {WEDGE_OBLIQUE,  1, 1, 4, 6},
-    {WEDGE_OBLIQUE,  1, 0, 4, 2},
-    {WEDGE_OBLIQUE,  1, 0, 4, 6},
+    {WEDGE_HORIZONTAL, 4, 2, 0},
+    {WEDGE_HORIZONTAL, 4, 6, 0},
+    {WEDGE_VERTICAL,   2, 4, 0},
+    {WEDGE_VERTICAL,   6, 4, 0},
 
-    {WEDGE_OBLIQUE,  0, 1, 2, 4},
-    {WEDGE_OBLIQUE,  0, 1, 6, 4},
-    {WEDGE_OBLIQUE,  0, 0, 2, 4},
-    {WEDGE_OBLIQUE,  0, 0, 6, 4},
+    {WEDGE_OBLIQUE27,  4, 2, 0},
+    {WEDGE_OBLIQUE27,  4, 6, 0},
+    {WEDGE_OBLIQUE153, 4, 2, 0},
+    {WEDGE_OBLIQUE153, 4, 6, 0},
 
-    {WEDGE_STRAIGHT, 1, 0, 4, 2},
-    {WEDGE_STRAIGHT, 1, 0, 4, 6},
-    {WEDGE_STRAIGHT, 0, 0, 2, 4},
-    {WEDGE_STRAIGHT, 0, 0, 6, 4},
+    {WEDGE_OBLIQUE63,  2, 4, 0},
+    {WEDGE_OBLIQUE63,  6, 4, 0},
+    {WEDGE_OBLIQUE117, 2, 4, 0},
+    {WEDGE_OBLIQUE117, 6, 4, 0},
+};
+
+static const int wedge_params_32_hgtw[1 << WEDGE_BITS_5]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 0},
+    {WEDGE_OBLIQUE63,  4, 4, 0},
+    {WEDGE_OBLIQUE117, 4, 4, 0},
+    {WEDGE_OBLIQUE153, 4, 4, 0},
+
+    {WEDGE_HORIZONTAL, 4, 2, 0},
+    {WEDGE_HORIZONTAL, 4, 4, 0},
+    {WEDGE_HORIZONTAL, 4, 6, 0},
+    {WEDGE_VERTICAL,   4, 4, 0},
+
+    {WEDGE_OBLIQUE27,  4, 1, 0},
+    {WEDGE_OBLIQUE27,  4, 2, 0},
+    {WEDGE_OBLIQUE27,  4, 3, 0},
+    {WEDGE_OBLIQUE27,  4, 5, 0},
+    {WEDGE_OBLIQUE27,  4, 6, 0},
+    {WEDGE_OBLIQUE27,  4, 7, 0},
+
+    {WEDGE_OBLIQUE153, 4, 1, 0},
+    {WEDGE_OBLIQUE153, 4, 2, 0},
+    {WEDGE_OBLIQUE153, 4, 3, 0},
+    {WEDGE_OBLIQUE153, 4, 5, 0},
+    {WEDGE_OBLIQUE153, 4, 6, 0},
+    {WEDGE_OBLIQUE153, 4, 7, 0},
+
+    {WEDGE_OBLIQUE63,  1, 4, 0},
+    {WEDGE_OBLIQUE63,  2, 4, 0},
+    {WEDGE_OBLIQUE63,  3, 4, 0},
+    {WEDGE_OBLIQUE63,  5, 4, 0},
+    {WEDGE_OBLIQUE63,  6, 4, 0},
+    {WEDGE_OBLIQUE63,  7, 4, 0},
+
+    {WEDGE_OBLIQUE117, 1, 4, 0},
+    {WEDGE_OBLIQUE117, 2, 4, 0},
+    {WEDGE_OBLIQUE117, 3, 4, 0},
+    {WEDGE_OBLIQUE117, 5, 4, 0},
+    {WEDGE_OBLIQUE117, 6, 4, 0},
+    {WEDGE_OBLIQUE117, 7, 4, 0},
+};
+
+static const int wedge_params_32_hltw[1 << WEDGE_BITS_5]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 0},
+    {WEDGE_OBLIQUE63,  4, 4, 0},
+    {WEDGE_OBLIQUE117, 4, 4, 0},
+    {WEDGE_OBLIQUE153, 4, 4, 0},
+
+    {WEDGE_VERTICAL,   2, 4, 0},
+    {WEDGE_VERTICAL,   4, 4, 0},
+    {WEDGE_VERTICAL,   6, 4, 0},
+    {WEDGE_HORIZONTAL, 4, 4, 0},
+
+    {WEDGE_OBLIQUE27,  4, 1, 0},
+    {WEDGE_OBLIQUE27,  4, 2, 0},
+    {WEDGE_OBLIQUE27,  4, 3, 0},
+    {WEDGE_OBLIQUE27,  4, 5, 0},
+    {WEDGE_OBLIQUE27,  4, 6, 0},
+    {WEDGE_OBLIQUE27,  4, 7, 0},
+
+    {WEDGE_OBLIQUE153, 4, 1, 0},
+    {WEDGE_OBLIQUE153, 4, 2, 0},
+    {WEDGE_OBLIQUE153, 4, 3, 0},
+    {WEDGE_OBLIQUE153, 4, 5, 0},
+    {WEDGE_OBLIQUE153, 4, 6, 0},
+    {WEDGE_OBLIQUE153, 4, 7, 0},
+
+    {WEDGE_OBLIQUE63,  1, 4, 0},
+    {WEDGE_OBLIQUE63,  2, 4, 0},
+    {WEDGE_OBLIQUE63,  3, 4, 0},
+    {WEDGE_OBLIQUE63,  5, 4, 0},
+    {WEDGE_OBLIQUE63,  6, 4, 0},
+    {WEDGE_OBLIQUE63,  7, 4, 0},
+
+    {WEDGE_OBLIQUE117, 1, 4, 0},
+    {WEDGE_OBLIQUE117, 2, 4, 0},
+    {WEDGE_OBLIQUE117, 3, 4, 0},
+    {WEDGE_OBLIQUE117, 5, 4, 0},
+    {WEDGE_OBLIQUE117, 6, 4, 0},
+    {WEDGE_OBLIQUE117, 7, 4, 0},
+};
+
+static const int wedge_params_32_heqw[1 << WEDGE_BITS_5]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 0},
+    {WEDGE_OBLIQUE63,  4, 4, 0},
+    {WEDGE_OBLIQUE117, 4, 4, 0},
+    {WEDGE_OBLIQUE153, 4, 4, 0},
+
+    {WEDGE_HORIZONTAL, 4, 2, 0},
+    {WEDGE_HORIZONTAL, 4, 6, 0},
+    {WEDGE_VERTICAL,   2, 4, 0},
+    {WEDGE_VERTICAL,   6, 4, 0},
+
+    {WEDGE_OBLIQUE27,  4, 1, 0},
+    {WEDGE_OBLIQUE27,  4, 2, 0},
+    {WEDGE_OBLIQUE27,  4, 3, 0},
+    {WEDGE_OBLIQUE27,  4, 5, 0},
+    {WEDGE_OBLIQUE27,  4, 6, 0},
+    {WEDGE_OBLIQUE27,  4, 7, 0},
+
+    {WEDGE_OBLIQUE153, 4, 1, 0},
+    {WEDGE_OBLIQUE153, 4, 2, 0},
+    {WEDGE_OBLIQUE153, 4, 3, 0},
+    {WEDGE_OBLIQUE153, 4, 5, 0},
+    {WEDGE_OBLIQUE153, 4, 6, 0},
+    {WEDGE_OBLIQUE153, 4, 7, 0},
+
+    {WEDGE_OBLIQUE63,  1, 4, 0},
+    {WEDGE_OBLIQUE63,  2, 4, 0},
+    {WEDGE_OBLIQUE63,  3, 4, 0},
+    {WEDGE_OBLIQUE63,  5, 4, 0},
+    {WEDGE_OBLIQUE63,  6, 4, 0},
+    {WEDGE_OBLIQUE63,  7, 4, 0},
+
+    {WEDGE_OBLIQUE117, 1, 4, 0},
+    {WEDGE_OBLIQUE117, 2, 4, 0},
+    {WEDGE_OBLIQUE117, 3, 4, 0},
+    {WEDGE_OBLIQUE117, 5, 4, 0},
+    {WEDGE_OBLIQUE117, 6, 4, 0},
+    {WEDGE_OBLIQUE117, 7, 4, 0},
 };
 
 static const int *get_wedge_params_lookup[BLOCK_SIZES] = {
   NULL,
   NULL,
   NULL,
-  &wedge_params_sml[0][0],
-  &wedge_params_med_hgtw[0][0],
-  &wedge_params_med_hltw[0][0],
-  &wedge_params_med_heqw[0][0],
-  &wedge_params_med_hgtw[0][0],
-  &wedge_params_med_hltw[0][0],
-  &wedge_params_med_heqw[0][0],
-  &wedge_params_big_hgtw[0][0],
-  &wedge_params_big_hltw[0][0],
-  &wedge_params_big_heqw[0][0],
+  &wedge_params_16_heqw[0][0],
+  &wedge_params_16_hgtw[0][0],
+  &wedge_params_16_hltw[0][0],
+  &wedge_params_16_heqw[0][0],
+  &wedge_params_16_hgtw[0][0],
+  &wedge_params_16_hltw[0][0],
+  &wedge_params_16_heqw[0][0],
+  NULL,
+  NULL,
+  NULL,
 #if CONFIG_EXT_PARTITION
-  &wedge_params_big_hgtw[0][0],
-  &wedge_params_big_hltw[0][0],
-  &wedge_params_big_heqw[0][0],
+  NULL,
+  NULL,
+  NULL,
 #endif  // CONFIG_EXT_PARTITION
 };
 
@@ -244,11 +390,9 @@
   const int *a = get_wedge_params(wedge_index, sb_type);
   int woff, hoff;
   if (!a) return NULL;
-  woff = (a[3] * bw) >> 3;
-  hoff = (a[4] * bh) >> 3;
-  master = (a[0] ?
-            wedge_mask_obl[neg][a[1]][a[2]] :
-            wedge_mask_str[neg][a[1]]) +
+  woff = (a[1] * bw) >> 3;
+  hoff = (a[2] * bh) >> 3;
+  master = wedge_mask_obl[a[3]][neg][a[0]] +
       MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
       MASK_MASTER_SIZE / 2 - woff;
   return master;
diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h
index 1e8679b..28a5ae9 100644
--- a/vp10/common/reconinter.h
+++ b/vp10/common/reconinter.h
@@ -146,6 +146,55 @@
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
+#if CONFIG_EXT_INTER
+#define WEDGE_BITS_2      2
+#define WEDGE_BITS_3      3
+#define WEDGE_BITS_4      4
+#define WEDGE_BITS_5      5
+#define WEDGE_NONE       -1
+#define WEDGE_WEIGHT_BITS 6
+
+static const int get_wedge_bits_lookup[BLOCK_SIZES] = {
+  0,
+  0,
+  0,
+  WEDGE_BITS_4,
+  WEDGE_BITS_4,
+  WEDGE_BITS_4,
+  WEDGE_BITS_4,
+  WEDGE_BITS_4,
+  WEDGE_BITS_4,
+  WEDGE_BITS_4,
+  0,
+  0,
+  0,
+#if CONFIG_EXT_PARTITION
+  0,
+  0,
+  0,
+#endif  // CONFIG_EXT_PARTITION
+};
+
+static INLINE int is_interinter_wedge_used(BLOCK_SIZE sb_type) {
+  (void) sb_type;
+  return get_wedge_bits_lookup[sb_type] > 0;
+}
+
+static INLINE int get_interinter_wedge_bits(BLOCK_SIZE sb_type) {
+  const int wbits = get_wedge_bits_lookup[sb_type];
+  return (wbits > 0) ? wbits + 1 : 0;
+}
+
+static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) {
+  (void) sb_type;
+  return get_wedge_bits_lookup[sb_type] > 0;
+}
+
+static INLINE int get_interintra_wedge_bits(BLOCK_SIZE sb_type) {
+  return get_wedge_bits_lookup[sb_type];
+}
+#endif  // CONFIG_EXT_INTER
+
 void build_inter_predictors(MACROBLOCKD *xd, int plane,
 #if CONFIG_OBMC
                             int mi_col_offset, int mi_row_offset,
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index c111b56..6af0eb8 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -540,8 +540,6 @@
   get_energy_distribution_fine(cpi, bsize, src, src_stride,
                                dst, dst_stride, hdist, vdist);
 
-
-
   svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] +
                vdist[1] * ADST_FLIP_SVM[1] +
                vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
@@ -717,7 +715,7 @@
 
 static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
                             MACROBLOCK *x, MACROBLOCKD *xd,
-                            int num_planes,
+                            int plane_from, int plane_to,
                             int *out_rate_sum, int64_t *out_dist_sum,
                             int *skip_txfm_sb, int64_t *skip_sse_sb) {
   // Note our transform coeffs are 8 times an orthogonal transform.
@@ -744,7 +742,7 @@
 
   x->pred_sse[ref] = 0;
 
-  for (i = 0; i < num_planes; ++i) {
+  for (i = plane_from; i <= plane_to; ++i) {
     struct macroblock_plane *const p = &x->plane[i];
     struct macroblockd_plane *const pd = &xd->plane[i];
     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
@@ -6384,6 +6382,55 @@
                xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
 }
 
+#if CONFIG_EXT_INTER
+static int estimate_wedge_sign(const VP10_COMP *cpi,
+                               const MACROBLOCK *x,
+                               const BLOCK_SIZE bsize,
+                               uint8_t *pred0, int stride0,
+                               uint8_t *pred1, int stride1) {
+  const struct macroblock_plane *const p = &x->plane[0];
+  const uint8_t *src = p->src.buf;
+  int src_stride = p->src.stride;
+  const int f_index = bsize - BLOCK_8X8;
+  const int bw = 4 << (b_width_log2_lookup[bsize]);
+  const int bh = 4 << (b_height_log2_lookup[bsize]);
+  uint32_t esq[2][4], var;
+  int64_t tl, br;
+
+  var = cpi->fn_ptr[f_index].vf(
+      src, src_stride,
+      pred0, stride0, &esq[0][0]);
+  var = cpi->fn_ptr[f_index].vf(
+      src + bw / 2, src_stride,
+      pred0 + bw / 2, stride0, &esq[0][1]);
+  var = cpi->fn_ptr[f_index].vf(
+      src + bh / 2 * src_stride, src_stride,
+      pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
+  var = cpi->fn_ptr[f_index].vf(
+      src + bh / 2 * src_stride + bw / 2, src_stride,
+      pred0 + bh / 2 * stride0 + bw / 2, stride0, &esq[0][3]);
+  var = cpi->fn_ptr[f_index].vf(
+      src, src_stride,
+      pred1, stride1, &esq[1][0]);
+  var = cpi->fn_ptr[f_index].vf(
+      src + bw / 2, src_stride,
+      pred1 + bw / 2, stride1, &esq[1][1]);
+  var = cpi->fn_ptr[f_index].vf(
+      src + bh / 2 * src_stride, src_stride,
+      pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
+  var = cpi->fn_ptr[f_index].vf(
+      src + bh / 2 * src_stride + bw / 2, src_stride,
+      pred1 + bh / 2 * stride1 + bw / 2, stride0, &esq[1][3]);
+  (void) var;
+
+  tl = (int64_t)(esq[0][0] + esq[0][1] + esq[0][2]) -
+       (int64_t)(esq[1][0] + esq[1][1] + esq[1][2]);
+  br = (int64_t)(esq[1][3] + esq[1][1] + esq[1][2]) -
+       (int64_t)(esq[0][3] + esq[0][1] + esq[0][2]);
+  return (tl + br > 0);
+}
+#endif  // CONFIG_EXT_INTER
+
 static INTERP_FILTER predict_interp_filter(const VP10_COMP *cpi,
                                            const MACROBLOCK *x,
                                            const BLOCK_SIZE bsize,
@@ -6946,8 +6993,8 @@
           }
         }
         vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
-        model_rd_for_sb(cpi, bsize, x, xd, MAX_MB_PLANE, &rate_sum, &dist_sum,
-                        &tmp_skip_sb, &tmp_skip_sse);
+        model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1,
+                        &rate_sum, &dist_sum, &tmp_skip_sb, &tmp_skip_sse);
 
         rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
         if (cm->interp_filter == SWITCHABLE)
@@ -7024,8 +7071,9 @@
 #endif  // CONFIG_OBMC
 
   if (is_comp_pred && is_interinter_wedge_used(bsize)) {
-    int wedge_index, best_wedge_index = WEDGE_NONE, rs;
-    int rate_sum;
+    int wedge_index, best_wedge_index = WEDGE_NONE;
+    int wedge_sign, best_wedge_sign = 0;
+    int rate_sum, rs;
     int64_t dist_sum;
     int64_t best_rd_nowedge = INT64_MAX;
     int64_t best_rd_wedge = INT64_MAX;
@@ -7034,6 +7082,7 @@
     int64_t tmp_skip_sse_sb;
 
     rs = vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 0);
+    mbmi->use_wedge_interinter = 0;
     vp10_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
     vp10_subtract_plane(x, bsize, 0);
     rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
@@ -7042,41 +7091,43 @@
     if (rd != INT64_MAX)
       rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
     best_rd_nowedge = rd;
-    mbmi->use_wedge_interinter = 0;
 
     // Disbale wedge search if source variance is small
     if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
-        best_rd_nowedge < 3 * ref_best_rd) {
+        best_rd_nowedge / 3 < ref_best_rd) {
+      uint8_t pred0[2 * MAX_SB_SQUARE * 3];
+      uint8_t pred1[2 * MAX_SB_SQUARE * 3];
+      uint8_t *preds0[3] = {pred0,
+        pred0 + 2 * MAX_SB_SQUARE,
+        pred0 + 4 * MAX_SB_SQUARE};
+      uint8_t *preds1[3] = {pred1,
+        pred1 + 2 * MAX_SB_SQUARE,
+        pred1 + 4 * MAX_SB_SQUARE};
+      int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
+      int est_wedge_sign;
 
       mbmi->use_wedge_interinter = 1;
-      rs = vp10_cost_literal(1 + get_wedge_bits_lookup[bsize]) +
+      rs = vp10_cost_literal(get_interinter_wedge_bits(bsize)) +
           vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
       wedge_types = (1 << get_wedge_bits_lookup[bsize]);
-      if (have_newmv_in_inter_mode(this_mode)) {
-        int_mv tmp_mv[2];
-        int rate_mvs[2], tmp_rate_mv = 0;
-        uint8_t pred0[2 * MAX_SB_SQUARE * 3];
-        uint8_t pred1[2 * MAX_SB_SQUARE * 3];
-        uint8_t *preds0[3] = {pred0,
-          pred0 + 2 * MAX_SB_SQUARE,
-          pred0 + 4 * MAX_SB_SQUARE};
-        uint8_t *preds1[3] = {pred1,
-          pred1 + 2 * MAX_SB_SQUARE,
-          pred1 + 4 * MAX_SB_SQUARE};
-        int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
-        vp10_build_inter_predictors_for_planes_single_buf(
-            xd, bsize, 0, 0,  mi_row, mi_col, 0, preds0, strides);
-        vp10_build_inter_predictors_for_planes_single_buf(
-            xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
 
-        for (wedge_index = 0; wedge_index < 2 * wedge_types; ++wedge_index) {
-          mbmi->interinter_wedge_index = wedge_index >> 1;
-          mbmi->interinter_wedge_sign = wedge_index & 1;
-          vp10_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0,
-                                                    mi_row, mi_col,
+      vp10_build_inter_predictors_for_planes_single_buf(
+          xd, bsize, 0, 0,  mi_row, mi_col, 0, preds0, strides);
+      vp10_build_inter_predictors_for_planes_single_buf(
+          xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
+
+      // Choose the best wedge
+      if (cpi->sf.fast_wedge_sign_estimate) {
+        est_wedge_sign = estimate_wedge_sign(
+            cpi, x, bsize, pred0, MAX_SB_SIZE, pred1, MAX_SB_SIZE);
+        best_wedge_sign = mbmi->interinter_wedge_sign = est_wedge_sign;
+        for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+          mbmi->interinter_wedge_index = wedge_index;
+          vp10_build_wedge_inter_predictor_from_buf(xd, bsize,
+                                                    0, 0, mi_row, mi_col,
                                                     preds0, strides,
                                                     preds1, strides);
-          model_rd_for_sb(cpi, bsize, x, xd, 1,
+          model_rd_for_sb(cpi, bsize, x, xd, 0, 0,
                           &rate_sum, &dist_sum,
                           &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
           rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
@@ -7085,8 +7136,34 @@
             best_rd_wedge = rd;
           }
         }
-        mbmi->interinter_wedge_index = best_wedge_index >> 1;
-        mbmi->interinter_wedge_sign = best_wedge_index & 1;
+      } else {
+        for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+          for (wedge_sign = 0; wedge_sign < 2; ++wedge_sign) {
+            mbmi->interinter_wedge_index = wedge_index;
+            mbmi->interinter_wedge_sign = wedge_sign;
+            vp10_build_wedge_inter_predictor_from_buf(xd, bsize,
+                                                      0, 0, mi_row, mi_col,
+                                                      preds0, strides,
+                                                      preds1, strides);
+            model_rd_for_sb(cpi, bsize, x, xd, 0, 0,
+                            &rate_sum, &dist_sum,
+                            &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
+            rd = RDCOST(x->rdmult, x->rddiv,
+                        rs + rate_mv + rate_sum, dist_sum);
+            if (rd < best_rd_wedge) {
+              best_wedge_index = wedge_index;
+              best_wedge_sign = wedge_sign;
+              best_rd_wedge = rd;
+            }
+          }
+        }
+      }
+      mbmi->interinter_wedge_index = best_wedge_index;
+      mbmi->interinter_wedge_sign = best_wedge_sign;
+
+      if (have_newmv_in_inter_mode(this_mode)) {
+        int_mv tmp_mv[2];
+        int rate_mvs[2], tmp_rate_mv = 0;
         if (this_mode == NEW_NEWMV) {
           int mv_idxs[2] = {0, 0};
           do_masked_motion_search_indexed(cpi, x,
@@ -7117,7 +7194,7 @@
           mbmi->mv[1].as_int = tmp_mv[1].as_int;
         }
         vp10_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
-        model_rd_for_sb(cpi, bsize, x, xd, 1, &rate_sum, &dist_sum,
+        model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
                         &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
         rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum, dist_sum);
         if (rd < best_rd_wedge) {
@@ -7131,7 +7208,6 @@
                                                     preds0, strides,
                                                     preds1, strides);
         }
-
         vp10_subtract_plane(x, bsize, 0);
         rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
                                  &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
@@ -7143,8 +7219,8 @@
 
         if (best_rd_wedge < best_rd_nowedge) {
           mbmi->use_wedge_interinter = 1;
-          mbmi->interinter_wedge_index = best_wedge_index >> 1;
-          mbmi->interinter_wedge_sign = best_wedge_index & 1;
+          mbmi->interinter_wedge_index = best_wedge_index;
+          mbmi->interinter_wedge_sign = best_wedge_sign;
           xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
           xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
           *rate2 += tmp_rate_mv - rate_mv;
@@ -7157,37 +7233,6 @@
           xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
         }
       } else {
-        uint8_t pred0[2 * MAX_SB_SQUARE * 3];
-        uint8_t pred1[2 * MAX_SB_SQUARE * 3];
-        uint8_t *preds0[3] = {pred0,
-          pred0 + 2 * MAX_SB_SQUARE,
-          pred0 + 4 * MAX_SB_SQUARE};
-        uint8_t *preds1[3] = {pred1,
-          pred1 + 2 * MAX_SB_SQUARE,
-          pred1 + 4 * MAX_SB_SQUARE};
-        int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
-        vp10_build_inter_predictors_for_planes_single_buf(
-            xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
-        vp10_build_inter_predictors_for_planes_single_buf(
-            xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
-        for (wedge_index = 0; wedge_index < 2 * wedge_types; ++wedge_index) {
-          mbmi->interinter_wedge_index = wedge_index >> 1;
-          mbmi->interinter_wedge_sign = wedge_index & 1;
-          vp10_build_wedge_inter_predictor_from_buf(xd, bsize,
-                                                    0, 0, mi_row, mi_col,
-                                                    preds0, strides,
-                                                    preds1, strides);
-          model_rd_for_sb(cpi, bsize, x, xd, 1,
-                          &rate_sum, &dist_sum,
-                          &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
-          rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
-          if (rd < best_rd_wedge) {
-            best_wedge_index = wedge_index;
-            best_rd_wedge = rd;
-          }
-        }
-        mbmi->interinter_wedge_sign = best_wedge_index & 1;
-        mbmi->interinter_wedge_index = best_wedge_index >> 1;
         vp10_build_wedge_inter_predictor_from_buf(xd, bsize,
                                                   0, 0, mi_row, mi_col,
                                                   preds0, strides,
@@ -7197,12 +7242,12 @@
                                  &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
                                  INT64_MAX);
         if (rd != INT64_MAX)
-          rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
-        best_rd_wedge = rd;
+            rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
+          best_rd_wedge = rd;
         if (best_rd_wedge < best_rd_nowedge) {
           mbmi->use_wedge_interinter = 1;
-          mbmi->interinter_wedge_index = best_wedge_index >> 1;
-          mbmi->interinter_wedge_sign = best_wedge_index & 1;
+          mbmi->interinter_wedge_index = best_wedge_index;
+          mbmi->interinter_wedge_sign = best_wedge_sign;
         } else {
           mbmi->use_wedge_interinter = 0;
         }
@@ -7217,7 +7262,7 @@
 
     if (mbmi->use_wedge_interinter)
       *compmode_wedge_cost =
-          vp10_cost_literal(1 + get_wedge_bits_lookup[bsize]) +
+          vp10_cost_literal(get_interinter_wedge_bits(bsize)) +
           vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
     else
       *compmode_wedge_cost =
@@ -7303,7 +7348,7 @@
 
         mbmi->use_wedge_interintra = 1;
         wedge_types = (1 << get_wedge_bits_lookup[bsize]);
-        rwedge = vp10_cost_literal(get_wedge_bits_lookup[bsize]) +
+        rwedge = vp10_cost_literal(get_interintra_wedge_bits(bsize)) +
             vp10_cost_bit(cm->fc->wedge_interintra_prob[bsize], 1);
         for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
           mbmi->interintra_wedge_index = wedge_index;
@@ -7311,7 +7356,7 @@
           vp10_combine_interintra(xd, bsize, 0,
                                   tmp_buf, MAX_SB_SIZE,
                                   intrapred, MAX_SB_SIZE);
-          model_rd_for_sb(cpi, bsize, x, xd, 1,
+          model_rd_for_sb(cpi, bsize, x, xd, 0, 0,
                           &rate_sum, &dist_sum,
                           &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
           rd = RDCOST(x->rdmult, x->rddiv,
@@ -7333,7 +7378,7 @@
                                   0, mv_idx);
           mbmi->mv[0].as_int = tmp_mv.as_int;
           vp10_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
-          model_rd_for_sb(cpi, bsize, x, xd, 1, &rate_sum, &dist_sum,
+          model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
                           &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
           rd = RDCOST(x->rdmult, x->rddiv,
                       rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
@@ -7390,7 +7435,7 @@
           cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra);
       if (mbmi->use_wedge_interintra) {
         *compmode_interintra_cost +=
-            vp10_cost_literal(get_wedge_bits_lookup[bsize]);
+            vp10_cost_literal(get_interintra_wedge_bits(bsize));
       }
     }
   } else if (is_interintra_allowed(mbmi)) {
@@ -7428,8 +7473,8 @@
     // switchable list (ex. bilinear) is indicated at the frame level, or
     // skip condition holds.
     vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
-    model_rd_for_sb(cpi, bsize, x, xd, MAX_MB_PLANE, &tmp_rate, &tmp_dist,
-                    &skip_txfm_sb, &skip_sse_sb);
+    model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1,
+                    &tmp_rate, &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
     rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
     memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
     memcpy(bsse, x->bsse, sizeof(bsse));
@@ -7575,8 +7620,8 @@
                                        NULL, NULL,
                                        dst_buf1, dst_stride1,
                                        dst_buf2, dst_stride2);
-      model_rd_for_sb(cpi, bsize, x, xd, MAX_MB_PLANE, &tmp_rate, &tmp_dist,
-                      &skip_txfm_sb, &skip_sse_sb);
+      model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1,
+                      &tmp_rate, &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
     }
 #if CONFIG_VP9_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c
index 155f28e..b766cae 100644
--- a/vp10/encoder/speed_features.c
+++ b/vp10/encoder/speed_features.c
@@ -165,6 +165,7 @@
     sf->use_transform_domain_distortion = 1;
 #if CONFIG_EXT_INTER
     sf->disable_wedge_search_var_thresh = 100;
+    sf->fast_wedge_sign_estimate = 1;
 #endif  // CONFIG_EXT_INTER
   }
 
@@ -283,6 +284,7 @@
   sf->use_upsampled_references = 0;
 #if CONFIG_EXT_INTER
   sf->disable_wedge_search_var_thresh = 100;
+  sf->fast_wedge_sign_estimate = 1;
 #endif  // CONFIG_EXT_INTER
 
   // Use transform domain distortion computation
@@ -517,6 +519,7 @@
 #endif  // CONFIG_EXT_TILE
 #if CONFIG_EXT_INTER
   sf->disable_wedge_search_var_thresh = 0;
+  sf->fast_wedge_sign_estimate = 0;
 #endif  // CONFIG_EXT_INTER
 
   for (i = 0; i < TX_SIZES; i++) {
diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h
index 6cee748..ca6adbe 100644
--- a/vp10/encoder/speed_features.h
+++ b/vp10/encoder/speed_features.h
@@ -402,6 +402,9 @@
 #if CONFIG_EXT_INTER
   // A source variance threshold below which wedge search is disabled
   unsigned int disable_wedge_search_var_thresh;
+
+  // Whether fast wedge sign estimate is used
+  int fast_wedge_sign_estimate;
 #endif  // CONFIG_EXT_INTER
 
   // These bit masks allow you to enable or disable intra modes for each