ext-intra experiment

Currently there are two parts in this experiment: extra directional intra
prediction modes and the filter intra modes migrated from the nextgen branch.

Several macros are defined in "blockd.h" to provide controls of the experiment
settings. Setting "DR_ONLY" as 1 (default is 0) means we only use directional
modes, and skip the filter-intra modes; "EXT_INTRA_ANGLES" (default is 128)
defines the number of different angles we want to support; setting
"ANGLE_FAST_SEARCH" as 1 (default is 1) means we use fast sub-optimal search
for the best prediction angle, instead of exhaustive search. The fast search
is about 6 times faster than the exhaustive search, while preserving about
60% of the coding gains.

With extra directional prediction modes (fast search), we observe the following
code gains (number in parentheses is for all-key-frame setting):
derflr +0.42%  (+1.79%)
hevclr +0.78%  (+2.19%)
hevcmr +1.20%  (+3.49%)
stdhd  +0.56%
Speed-wise, about 110% slower for key frames, and 30% slower overall.

The gains of filter intra modes mostly add up with the gains of directional
modes. The overall coding gain of this experiment:
derflr +0.94%
hevclr +1.46%
hevcmr +1.94%
stdhd  +1.58%

Change-Id: Ida9ad00cdb33aff422d06eb42b4f4e5f25df8a2a
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index f398c40..7725850 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1447,6 +1447,10 @@
   memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
   memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
 
+#if CONFIG_EXT_INTRA
+  mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+#endif  // CONFIG_EXT_INTRA
+
   // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
@@ -1491,6 +1495,172 @@
   return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
 }
 
+#if CONFIG_EXT_INTRA
+// Return 1 if an ext intra mode is selected; return 0 otherwise.
+static int rd_pick_ext_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
+                                 int *rate, int *rate_tokenonly,
+                                 int64_t *distortion, int *skippable,
+                                 BLOCK_SIZE bsize, int mode_cost,
+                                 int64_t *best_rd) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MODE_INFO *const mic = xd->mi[0];
+  MB_MODE_INFO *mbmi = &mic->mbmi;
+  int this_rate, this_rate_tokenonly, s;
+  int ext_intra_selected_flag = 0;
+  int i, step, delta, angle, best_angle, best_angle_dir;
+  int deltas[3] = {25, 5, 1};
+  int branches[3] = {2, 2, 2};
+  int64_t this_distortion, this_rd, best_angle_rd = INT64_MAX;
+  EXT_INTRA_MODE mode;
+  TX_SIZE best_tx_size = TX_4X4;
+  EXT_INTRA_MODE_INFO ext_intra_mode_info;
+#if CONFIG_EXT_TX
+  TX_TYPE best_tx_type;
+#endif  // CONFIG_EXT_TX
+
+  vp10_zero(ext_intra_mode_info);
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 1;
+  mbmi->mode = DC_PRED;
+
+  if (!DR_ONLY) {
+    for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
+      mbmi->ext_intra_mode_info.ext_intra_mode[0] = mode;
+      super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+                      &s, NULL, bsize, *best_rd);
+      if (this_rate_tokenonly == INT_MAX)
+        continue;
+
+      this_rate = this_rate_tokenonly +
+          vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
+          vp10_cost_bit(DR_EXT_INTRA_PROB, 0) +
+          write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
+      this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+      if (this_rd < *best_rd) {
+        *best_rd            = this_rd;
+        best_tx_size        = mic->mbmi.tx_size;
+        ext_intra_mode_info = mbmi->ext_intra_mode_info;
+#if CONFIG_EXT_TX
+        best_tx_type        = mic->mbmi.tx_type;
+#endif  // CONFIG_EXT_TX
+        *rate               = this_rate;
+        *rate_tokenonly     = this_rate_tokenonly;
+        *distortion         = this_distortion;
+        *skippable          = s;
+        ext_intra_selected_flag = 1;
+      }
+    }
+  }
+
+  mbmi->ext_intra_mode_info.ext_intra_mode[0] = EXT_DR_PRED;
+  if (ANGLE_FAST_SEARCH) {
+    best_angle = EXT_INTRA_ANGLES / 2;
+    for (step = 0; step < 3; ++step) {
+      delta = deltas[step];
+      for (i = -branches[step]; i <= branches[step]; ++i) {
+        int64_t rd_thresh;
+        if (i == 0 && step != 0)
+          continue;
+        angle = best_angle + i * delta;
+        if (angle < 0)
+          angle = 0;
+        if (angle >= EXT_INTRA_ANGLES)
+          angle = EXT_INTRA_ANGLES - 1;
+        if (angle == best_angle && step != 0)
+          continue;
+        mbmi->ext_intra_mode_info.ext_intra_angle[0] = angle;
+        if (*best_rd == INT64_MAX)
+          rd_thresh = best_angle_rd;
+        else
+          rd_thresh = VPXMIN(best_angle_rd, *best_rd * RD_ADJUSTER);
+        super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+                        &s, NULL, bsize, rd_thresh);
+        if (this_rate_tokenonly == INT_MAX)
+          continue;
+        this_rate = this_rate_tokenonly +
+            vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
+            (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
+            write_uniform_cost(EXT_INTRA_ANGLES, angle) + mode_cost;
+        this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+        if (this_rd < *best_rd) {
+          *best_rd            = this_rd;
+          best_tx_size        = mic->mbmi.tx_size;
+          ext_intra_mode_info = mbmi->ext_intra_mode_info;
+#if CONFIG_EXT_TX
+          best_tx_type        = mic->mbmi.tx_type;
+#endif  // CONFIG_EXT_TX
+          *rate               = this_rate;
+          *rate_tokenonly     = this_rate_tokenonly;
+          *distortion         = this_distortion;
+          *skippable          = s;
+          ext_intra_selected_flag = 1;
+        }
+        if (this_rd < best_angle_rd) {
+          best_angle_rd = this_rd;
+          best_angle_dir = i;
+        }
+      }
+
+      best_angle += best_angle_dir * delta;
+      if (best_angle < 0)
+        best_angle = 0;
+      if (best_angle >= EXT_INTRA_ANGLES)
+        best_angle = EXT_INTRA_ANGLES - 1;
+      if (*best_rd < best_angle_rd / RD_ADJUSTER)
+        break;
+    }
+  } else {
+    for (angle = 0; angle < EXT_INTRA_ANGLES; ++angle) {
+      mbmi->ext_intra_mode_info.ext_intra_angle[0] = angle;
+      if (prediction_angle_map(angle) == 90 ||
+          prediction_angle_map(angle) == 180)
+        continue;
+      super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+                      &s, NULL, bsize, *best_rd);
+      if (this_rate_tokenonly == INT_MAX)
+        continue;
+
+      this_rate = this_rate_tokenonly +
+          vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
+          (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
+          write_uniform_cost(EXT_INTRA_ANGLES, angle) + mode_cost;
+      this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+      if (this_rd < *best_rd) {
+        *best_rd            = this_rd;
+        best_tx_size        = mic->mbmi.tx_size;
+        ext_intra_mode_info = mbmi->ext_intra_mode_info;
+#if CONFIG_EXT_TX
+        best_tx_type        = mic->mbmi.tx_type;
+#endif  // CONFIG_EXT_TX
+        *rate               = this_rate;
+        *rate_tokenonly     = this_rate_tokenonly;
+        *distortion         = this_distortion;
+        *skippable          = s;
+        ext_intra_selected_flag = 1;
+      }
+    }
+  }
+
+  if (ext_intra_selected_flag) {
+    mbmi->mode = DC_PRED;
+    mbmi->tx_size = best_tx_size;
+    mbmi->ext_intra_mode_info.use_ext_intra_mode[0] =
+        ext_intra_mode_info.use_ext_intra_mode[0];
+    mbmi->ext_intra_mode_info.ext_intra_mode[0] =
+        ext_intra_mode_info.ext_intra_mode[0];
+    mbmi->ext_intra_mode_info.ext_intra_angle[0] =
+        ext_intra_mode_info.ext_intra_angle[0];
+#if CONFIG_EXT_TX
+    mbmi->tx_type = best_tx_type;
+#endif  // CONFIG_EXT_TX
+    return 1;
+  } else {
+    return 0;
+  }
+}
+#endif  // CONFIG_EXT_INTRA
+
 // This function is used only for intra_only frames
 static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
                                       int *rate, int *rate_tokenonly,
@@ -1504,6 +1674,9 @@
   int this_rate, this_rate_tokenonly, s;
   int64_t this_distortion, this_rd;
   TX_SIZE best_tx = TX_4X4;
+#if CONFIG_EXT_INTRA
+  EXT_INTRA_MODE_INFO ext_intra_mode_info;
+#endif  // CONFIG_EXT_INTRA
 #if CONFIG_EXT_TX
   TX_TYPE best_tx_type = DCT_DCT;
 #endif  // CONFIG_EXT_TX
@@ -1520,6 +1693,10 @@
   const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0);
   bmode_costs = cpi->y_mode_costs[A][L];
 
+#if CONFIG_EXT_INTRA
+  ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+  mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+#endif  // CONFIG_EXT_INTRA
   memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
   palette_mode_info.palette_size[0] = 0;
   mic->mbmi.palette_mode_info.palette_size[0] = 0;
@@ -1529,7 +1706,7 @@
     palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
 
   /* Y Search for intra prediction mode */
-  for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
     mic->mbmi.mode = mode;
 
     super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
@@ -1543,6 +1720,10 @@
       this_rate +=
           vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
                                                          [palette_ctx], 0);
+#if CONFIG_EXT_INTRA
+    if (mode == DC_PRED)
+      this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 0);
+#endif  // CONFIG_EXT_INTRA
     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 
     if (this_rd < best_rd) {
@@ -1564,6 +1745,30 @@
                               &palette_mode_info, best_palette_color_map,
                               &best_tx, &mode_selected, &best_rd);
 
+#if CONFIG_EXT_INTRA
+  if (!palette_mode_info.palette_size[0] > 0) {
+    if (rd_pick_ext_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
+                              skippable, bsize, bmode_costs[DC_PRED],
+                              &best_rd)) {
+      mode_selected       = mic->mbmi.mode;
+      best_tx             = mic->mbmi.tx_size;
+      ext_intra_mode_info = mic->mbmi.ext_intra_mode_info;
+#if CONFIG_EXT_TX
+      best_tx_type        = mic->mbmi.tx_type;
+#endif  // CONFIG_EXT_TX
+    }
+  }
+
+  mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] =
+      ext_intra_mode_info.use_ext_intra_mode[0];
+  if (ext_intra_mode_info.use_ext_intra_mode[0]) {
+    mic->mbmi.ext_intra_mode_info.ext_intra_mode[0] =
+        ext_intra_mode_info.ext_intra_mode[0];
+    mic->mbmi.ext_intra_mode_info.ext_intra_angle[0] =
+        ext_intra_mode_info.ext_intra_angle[0];
+  }
+#endif  // CONFIG_EXT_INTRA
+
   mic->mbmi.mode = mode_selected;
   mic->mbmi.tx_size = best_tx;
 #if CONFIG_EXT_TX
@@ -2263,30 +2468,189 @@
   return is_cost_valid;
 }
 
+#if CONFIG_EXT_INTRA
+// Return 1 if an ext intra mode is selected; return 0 otherwise.
+static int rd_pick_ext_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
+                                  PICK_MODE_CONTEXT *ctx,
+                                  int *rate, int *rate_tokenonly,
+                                  int64_t *distortion, int *skippable,
+                                  BLOCK_SIZE bsize, int64_t *best_rd) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  int ext_intra_selected_flag = 0;
+  int this_rate_tokenonly, this_rate, s;
+  int64_t this_distortion, this_sse, this_rd, best_angle_rd = INT64_MAX;
+  EXT_INTRA_MODE mode;
+  int i, step, delta, angle, best_angle, best_angle_dir;
+  int deltas[3] = {25, 5, 1};
+  int branches[3] = {2, 2, 2};
+  EXT_INTRA_MODE_INFO ext_intra_mode_info;
+
+  vp10_zero(ext_intra_mode_info);
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 1;
+  mbmi->uv_mode = DC_PRED;
+
+  if (!DR_ONLY) {
+    for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
+      mbmi->ext_intra_mode_info.ext_intra_mode[1] = mode;
+      if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+                            &this_distortion, &s, &this_sse, bsize, *best_rd))
+        continue;
+
+      this_rate = this_rate_tokenonly +
+          vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
+          vp10_cost_bit(DR_EXT_INTRA_PROB, 0) +
+          cpi->intra_uv_mode_cost[mbmi->uv_mode] +
+          write_uniform_cost(FILTER_INTRA_MODES, mode);
+      this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+      if (this_rd < *best_rd) {
+        *best_rd        = this_rd;
+        *rate           = this_rate;
+        *rate_tokenonly = this_rate_tokenonly;
+        *distortion     = this_distortion;
+        *skippable      = s;
+        ext_intra_mode_info = mbmi->ext_intra_mode_info;
+        ext_intra_selected_flag = 1;
+        if (!x->select_tx_size)
+          swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
+      }
+    }
+  }
+
+  mbmi->ext_intra_mode_info.ext_intra_mode[1] = EXT_DR_PRED;
+  if (ANGLE_FAST_SEARCH) {
+    best_angle = EXT_INTRA_ANGLES / 2;
+    for (step = 0; step < 3; ++step) {
+      delta = deltas[step];
+      for (i = -branches[step]; i <= branches[step]; ++i) {
+        int64_t rd_thresh;
+        if (i == 0 && step != 0)
+          continue;
+        angle = best_angle + i * delta;
+        if (angle < 0)
+          angle = 0;
+        if (angle >= EXT_INTRA_ANGLES)
+          angle = EXT_INTRA_ANGLES - 1;
+        if (angle == best_angle && step != 0)
+          continue;
+        mbmi->ext_intra_mode_info.ext_intra_angle[1] = angle;
+        if (*best_rd == INT64_MAX)
+          rd_thresh = best_angle_rd;
+        else
+          rd_thresh = VPXMIN(best_angle_rd, *best_rd * RD_ADJUSTER);
+        if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+                              &s, &this_sse, bsize, rd_thresh))
+          continue;
+        this_rate = this_rate_tokenonly +
+            vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
+            (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
+            cpi->intra_uv_mode_cost[mbmi->uv_mode] +
+            write_uniform_cost(EXT_INTRA_ANGLES, angle);
+        this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+        if (this_rd < *best_rd) {
+          *best_rd        = this_rd;
+          *rate           = this_rate;
+          *rate_tokenonly = this_rate_tokenonly;
+          *distortion     = this_distortion;
+          *skippable      = s;
+          ext_intra_mode_info = mbmi->ext_intra_mode_info;
+          ext_intra_selected_flag = 1;
+          if (!x->select_tx_size)
+            swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
+        }
+        if (this_rd < best_angle_rd) {
+          best_angle_rd = this_rd;
+          best_angle_dir = i;
+        }
+      }
+      best_angle += best_angle_dir * delta;
+      if (best_angle < 0)
+        best_angle = 0;
+      if (best_angle >= EXT_INTRA_ANGLES)
+        best_angle = EXT_INTRA_ANGLES - 1;
+      if (*best_rd < best_angle_rd / RD_ADJUSTER)
+        break;
+    }
+  } else {
+    for (angle = 0; angle < EXT_INTRA_ANGLES; ++angle) {
+      mbmi->ext_intra_mode_info.ext_intra_angle[1] = angle;
+      if (prediction_angle_map(angle) == 90 ||
+          prediction_angle_map(angle) == 180)
+        continue;
+      if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+                            &this_distortion, &s, &this_sse, bsize, *best_rd))
+        continue;
+
+      this_rate = this_rate_tokenonly +
+          vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
+          (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
+          cpi->intra_uv_mode_cost[mbmi->uv_mode] +
+          write_uniform_cost(EXT_INTRA_ANGLES, angle);
+      this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+      if (this_rd < *best_rd) {
+        *best_rd        = this_rd;
+        *rate           = this_rate;
+        *rate_tokenonly = this_rate_tokenonly;
+        *distortion     = this_distortion;
+        *skippable      = s;
+        ext_intra_mode_info = mbmi->ext_intra_mode_info;
+        ext_intra_selected_flag = 1;
+        if (!x->select_tx_size)
+          swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
+      }
+    }
+  }
+
+  if (ext_intra_selected_flag) {
+    mbmi->uv_mode = DC_PRED;
+    mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+        ext_intra_mode_info.use_ext_intra_mode[1];
+    mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+        ext_intra_mode_info.ext_intra_mode[1];
+    mbmi->ext_intra_mode_info.ext_intra_angle[1] =
+        ext_intra_mode_info.ext_intra_angle[1];
+
+    return 1;
+  } else {
+    return 0;
+  }
+}
+#endif  // CONFIG_EXT_INTRA
+
 static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x,
                                        PICK_MODE_CONTEXT *ctx,
                                        int *rate, int *rate_tokenonly,
                                        int64_t *distortion, int *skippable,
                                        BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
   MACROBLOCKD *xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   PREDICTION_MODE mode;
   PREDICTION_MODE mode_selected = DC_PRED;
   int64_t best_rd = INT64_MAX, this_rd;
   int this_rate_tokenonly, this_rate, s;
   int64_t this_distortion, this_sse;
+#if CONFIG_EXT_INTRA
+  EXT_INTRA_MODE_INFO ext_intra_mode_info;
 
+  ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif  // CONFIG_EXT_INTRA
   memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
   xd->mi[0]->mbmi.palette_mode_info.palette_size[1] = 0;
   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
     if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
       continue;
 
-    xd->mi[0]->mbmi.uv_mode = mode;
+    mbmi->uv_mode = mode;
 
     if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
                           &this_distortion, &s, &this_sse, bsize, best_rd))
       continue;
     this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mode];
+#if CONFIG_EXT_INTRA
+    if (mode == DC_PRED)
+      this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 0);
+#endif  // CONFIG_EXT_INTRA
     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 
     if (this_rd < best_rd) {
@@ -2301,7 +2665,22 @@
     }
   }
 
-  xd->mi[0]->mbmi.uv_mode = mode_selected;
+#if CONFIG_EXT_INTRA
+  if (mbmi->sb_type >= BLOCK_8X8) {
+    if (rd_pick_ext_intra_sbuv(cpi, x, ctx, rate, rate_tokenonly, distortion,
+                               skippable, bsize, &best_rd)) {
+      mode_selected   = mbmi->uv_mode;
+      ext_intra_mode_info = mbmi->ext_intra_mode_info;
+    }
+  }
+
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+      ext_intra_mode_info.use_ext_intra_mode[1];
+  if (ext_intra_mode_info.use_ext_intra_mode[1])
+    mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+        ext_intra_mode_info.ext_intra_mode[1];
+#endif  // CONFIG_EXT_INTRA
+  mbmi->uv_mode = mode_selected;
   return best_rd;
 }
 
@@ -4104,6 +4483,9 @@
   int64_t dist_uv[TX_SIZES];
   int skip_uv[TX_SIZES];
   PREDICTION_MODE mode_uv[TX_SIZES];
+#if CONFIG_EXT_INTRA
+  EXT_INTRA_MODE_INFO ext_intra_mode_info_uv[TX_SIZES];
+#endif  // CONFIG_EXT_INTRA
   const int intra_cost_penalty = vp10_get_intra_cost_penalty(
       cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
   int best_skip2 = 0;
@@ -4363,6 +4745,10 @@
     mbmi->uv_mode = DC_PRED;
     mbmi->ref_frame[0] = ref_frame;
     mbmi->ref_frame[1] = second_ref_frame;
+#if CONFIG_EXT_INTRA
+    mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+    mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif  // CONFIG_EXT_INTRA
     // Evaluate all sub-pel filters irrespective of whether we can use
     // them for this frame.
     mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
@@ -4385,6 +4771,27 @@
       memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
       super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
                       NULL, bsize, best_rd);
+#if CONFIG_EXT_INTRA
+      // TODO(huisu): ext-intra is turned off in lossless mode for now to
+      // avoid a unit test failure
+      if (mbmi->mode == DC_PRED && !xd->lossless[mbmi->segment_id]) {
+        MB_MODE_INFO mbmi_copy = *mbmi;
+        int rate_dummy;
+
+        if (rate_y != INT_MAX) {
+          int this_rate = rate_y + cpi->mbmode_cost[mbmi->mode] +
+              vp10_cost_bit(cm->fc->ext_intra_probs[0], 0);
+          this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, distortion_y);
+        } else {
+          this_rd = best_rd;
+        }
+
+        if (!rd_pick_ext_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
+                                   &skippable, bsize,
+                                   cpi->mbmode_cost[mbmi->mode], &this_rd))
+          *mbmi = mbmi_copy;
+      }
+#endif  // CONFIG_EXT_INTRA
       if (rate_y == INT_MAX)
         continue;
       uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
@@ -4393,14 +4800,45 @@
         choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx,
                              &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
                              &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
+#if CONFIG_EXT_INTRA
+        ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info;
+#endif  // CONFIG_EXT_INTRA
       }
 
       rate_uv = rate_uv_tokenonly[uv_tx];
       distortion_uv = dist_uv[uv_tx];
       skippable = skippable && skip_uv[uv_tx];
       mbmi->uv_mode = mode_uv[uv_tx];
+#if CONFIG_EXT_INTRA
+      mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+          ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1];
+      if (ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1]) {
+        mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+            ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1];
+        mbmi->ext_intra_mode_info.ext_intra_angle[1] =
+            ext_intra_mode_info_uv[uv_tx].ext_intra_angle[1];
+      }
+#endif  // CONFIG_EXT_INTRA
 
       rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
+#if CONFIG_EXT_INTRA
+      if (mbmi->mode == DC_PRED) {
+        rate2 += vp10_cost_bit(cm->fc->ext_intra_probs[0],
+                               mbmi->ext_intra_mode_info.use_ext_intra_mode[0]);
+        if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
+          EXT_INTRA_MODE ext_intra_mode =
+              mbmi->ext_intra_mode_info.ext_intra_mode[0];
+          int angle = mbmi->ext_intra_mode_info.ext_intra_angle[0];
+          if (!DR_ONLY)
+              rate2 += vp10_cost_bit(DR_EXT_INTRA_PROB,
+                                     ext_intra_mode > FILTER_TM_PRED);
+          if (ext_intra_mode > FILTER_TM_PRED)
+            rate2 += write_uniform_cost(EXT_INTRA_ANGLES, angle);
+          else
+            rate2 += write_uniform_cost(FILTER_INTRA_MODES, ext_intra_mode);
+        }
+      }
+#endif  // CONFIG_EXT_INTRA
       if (this_mode != DC_PRED && this_mode != TM_PRED)
         rate2 += intra_cost_penalty;
       distortion2 = distortion_y + distortion_uv;
@@ -4734,6 +5172,10 @@
 
   mbmi->palette_mode_info.palette_size[0] = 0;
   mbmi->palette_mode_info.palette_size[1] = 0;
+#if CONFIG_EXT_INTRA
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif  // CONFIG_EXT_INTRA
   mbmi->mode = ZEROMV;
   mbmi->uv_mode = DC_PRED;
   mbmi->ref_frame[0] = LAST_FRAME;
@@ -4848,6 +5290,11 @@
   memset(x->zcoeff_blk[TX_4X4], 0, 4);
   vp10_zero(best_mbmode);
 
+#if CONFIG_EXT_INTRA
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif  // CONFIG_EXT_INTRA
+
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
     filter_cache[i] = INT64_MAX;