Refactor ext-intra

Miscellaneous changes. Mostly code simplification and cleanup.

Make luma max angle delta depend on block size, which is the
way it was before the nextgenv2 branch merge.

Also some fixes for compatibility of ext-intra and alt-intra.

Change-Id: I2ce869e6ba7dee765a8dc4802f5e88040db3df22
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index eb7f915..3c167d2 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -693,13 +693,31 @@
 #endif  // CONFIG_FILTER_INTRA
 
 #if CONFIG_EXT_INTRA
-#define ANGLE_STEP 3
-#define MAX_ANGLE_DELTAS 3
+#define MAX_ANGLE_DELTA_UV 2
+#define ANGLE_STEP_UV 4
+
+static const uint8_t av1_angle_step_y[TX_SIZES] = {
+  0, 4, 3, 3,
+};
+static const uint8_t av1_max_angle_delta_y[TX_SIZES] = {
+  0, 2, 3, 3,
+};
+
 extern const int16_t dr_intra_derivative[90];
 static const uint8_t mode_to_angle_map[INTRA_MODES] = {
   0, 90, 180, 45, 135, 111, 157, 203, 67, 0,
 };
 
+static INLINE int av1_get_angle_step(BLOCK_SIZE sb_type, int plane) {
+  const TX_SIZE max_tx_size = max_txsize_lookup[sb_type];
+  return plane ? ANGLE_STEP_UV : av1_angle_step_y[max_tx_size];
+}
+
+static INLINE int av1_get_max_angle_delta(BLOCK_SIZE sb_type, int plane) {
+  const TX_SIZE max_tx_size = max_txsize_lookup[sb_type];
+  return plane ? MAX_ANGLE_DELTA_UV : av1_max_angle_delta_y[max_tx_size];
+}
+
 #if CONFIG_INTRA_INTERP
 // Returns whether filter selection is needed for a given
 // intra prediction angle.
@@ -763,14 +781,19 @@
       return filter_intra_mode_to_tx_type_lookup[filter_intra_mode];
 #endif  // CONFIG_FILTER_INTRA
 #if CONFIG_EXT_INTRA
-    if (mode == DC_PRED) {
-      return DCT_DCT;
-    } else if (mode == TM_PRED) {
-      return ADST_ADST;
-    } else {
+#if CONFIG_ALT_INTRA
+    if (mode == SMOOTH_PRED) return ADST_ADST;
+#endif  // CONFIG_ALT_INTRA
+    if (mode == DC_PRED) return DCT_DCT;
+    if (mode == TM_PRED) return ADST_ADST;
+    {
       int angle = mode_to_angle_map[mode];
+      const int angle_step = av1_get_angle_step(mbmi->sb_type, (int)plane_type);
+      assert(mode == D45_PRED || mode == D63_PRED || mode == D117_PRED ||
+             mode == D135_PRED || mode == D153_PRED || mode == D207_PRED ||
+             mode == V_PRED || mode == H_PRED);
       if (mbmi->sb_type >= BLOCK_8X8)
-        angle += mbmi->angle_delta[plane_type] * ANGLE_STEP;
+        angle += mbmi->angle_delta[plane_type] * angle_step;
       assert(angle > 0 && angle < 270);
       if (angle == 135)
         return ADST_ADST;
diff --git a/av1/common/pred_common.c b/av1/common/pred_common.c
index bbf21c9..9ad204c 100644
--- a/av1/common/pred_common.c
+++ b/av1/common/pred_common.c
@@ -117,8 +117,9 @@
       }
     } else {
       if (av1_is_directional_mode(mode, ref_mbmi->sb_type)) {
+        const int angle_step = av1_get_angle_step(ref_mbmi->sb_type, 0);
         int p_angle =
-            mode_to_angle_map[mode] + ref_mbmi->angle_delta[0] * ANGLE_STEP;
+            mode_to_angle_map[mode] + ref_mbmi->angle_delta[0] * angle_step;
         if (av1_is_intra_filter_switchable(p_angle)) {
           ref_type = ref_mbmi->intra_filter;
         }
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 5a6583b..00abb24 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -450,7 +450,7 @@
   (void)left;
   (void)dy;
   assert(dy == 1);
-  assert(dx < 0);
+  assert(dx > 0);
 
 #if CONFIG_INTRA_INTERP
   if (filter_type != INTRA_FILTER_LINEAR) {
@@ -466,8 +466,8 @@
     memset(src + pad_size + 2 * bs, above[2 * bs - 1],
            pad_size * sizeof(above[0]));
     flags[0] = 1;
-    x = -dx;
-    for (r = 0; r < bs; ++r, dst += stride, x -= dx) {
+    x = dx;
+    for (r = 0; r < bs; ++r, dst += stride, x += dx) {
       base = x >> 8;
       shift = x & 0xFF;
       shift = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
@@ -513,8 +513,8 @@
   }
 #endif  // CONFIG_INTRA_INTERP
 
-  x = -dx;
-  for (r = 0; r < bs; ++r, dst += stride, x -= dx) {
+  x = dx;
+  for (r = 0; r < bs; ++r, dst += stride, x += dx) {
     base = x >> 8;
     shift = x & 0xFF;
 
@@ -598,7 +598,7 @@
   (void)dx;
 
   assert(dx == 1);
-  assert(dy < 0);
+  assert(dy > 0);
 
 #if CONFIG_INTRA_INTERP
   if (filter_type != INTRA_FILTER_LINEAR) {
@@ -614,8 +614,8 @@
     for (i = 0; i < pad_size; ++i)
       src[4 * (i + 2 * bs + pad_size)] = left[2 * bs - 1];
     flags[0] = 1;
-    y = -dy;
-    for (c = 0; c < bs; ++c, y -= dy) {
+    y = dy;
+    for (c = 0; c < bs; ++c, y += dy) {
       base = y >> 8;
       shift = y & 0xFF;
       shift = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
@@ -671,8 +671,8 @@
   }
 #endif  // CONFIG_INTRA_INTERP
 
-  y = -dy;
-  for (c = 0; c < bs; ++c, y -= dy) {
+  y = dy;
+  for (c = 0; c < bs; ++c, y += dy) {
     base = y >> 8;
     shift = y & 0xFF;
 
@@ -695,7 +695,7 @@
 // If angle > 180 && angle < 270, dx = 1;
 static INLINE int get_dx(int angle) {
   if (angle > 0 && angle < 90) {
-    return -dr_intra_derivative[angle];
+    return dr_intra_derivative[angle];
   } else if (angle > 90 && angle < 180) {
     return dr_intra_derivative[180 - angle];
   } else {
@@ -712,7 +712,7 @@
   if (angle > 90 && angle < 180) {
     return dr_intra_derivative[angle - 90];
   } else if (angle > 180 && angle < 270) {
-    return -dr_intra_derivative[270 - angle];
+    return dr_intra_derivative[270 - angle];
   } else {
     // In this case, we are not really going to use dy. We may return any value.
     return 1;
@@ -1357,6 +1357,8 @@
   const uint16_t *above_ref = ref - ref_stride;
 #if CONFIG_EXT_INTRA
   int p_angle = 0;
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const int angle_step = av1_get_angle_step(mbmi->sb_type, plane);
   const int is_dr_mode = av1_is_directional_mode(mode, xd->mi[0]->mbmi.sb_type);
 #endif  // CONFIG_EXT_INTRA
 #if CONFIG_FILTER_INTRA
@@ -1375,7 +1377,7 @@
 #if CONFIG_EXT_INTRA
   if (is_dr_mode) {
     p_angle = mode_to_angle_map[mode] +
-              xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+              xd->mi[0]->mbmi.angle_delta[plane != 0] * angle_step;
     if (p_angle <= 90)
       need_above = 1, need_left = 0, need_above_left = 1;
     else if (p_angle < 180)
@@ -1521,7 +1523,9 @@
   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
 #if CONFIG_EXT_INTRA
   int p_angle = 0;
-  const int is_dr_mode = av1_is_directional_mode(mode, xd->mi[0]->mbmi.sb_type);
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const int angle_step = av1_get_angle_step(mbmi->sb_type, plane);
+  const int is_dr_mode = av1_is_directional_mode(mode, mbmi->sb_type);
 #endif  // CONFIG_EXT_INTRA
 #if CONFIG_FILTER_INTRA
   const FILTER_INTRA_MODE_INFO *filter_intra_mode_info =
@@ -1540,7 +1544,7 @@
 #if CONFIG_EXT_INTRA
   if (is_dr_mode) {
     p_angle = mode_to_angle_map[mode] +
-              xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+              xd->mi[0]->mbmi.angle_delta[plane != 0] * angle_step;
     if (p_angle <= 90)
       need_above = 1, need_left = 0, need_above_left = 1;
     else if (p_angle < 180)
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 5709407..d4c0260 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -18,6 +18,9 @@
 #include "av1/common/mvref_common.h"
 #include "av1/common/pred_common.h"
 #include "av1/common/reconinter.h"
+#if CONFIG_EXT_INTRA
+#include "av1/common/reconintra.h"
+#endif  // CONFIG_EXT_INTRA
 #include "av1/common/seg_common.h"
 #if CONFIG_WARPED_MOTION
 #include "av1/common/warped_motion.h"
@@ -661,11 +664,13 @@
   (void)cm;
   if (bsize < BLOCK_8X8) return;
 
-  if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) {
+  if (av1_is_directional_mode(mbmi->mode, bsize)) {
+    const int max_angle_delta = av1_get_max_angle_delta(mbmi->sb_type, 0);
     mbmi->angle_delta[0] =
-        read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+        read_uniform(r, 2 * max_angle_delta + 1) - max_angle_delta;
 #if CONFIG_INTRA_INTERP
-    p_angle = mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+    p_angle = mode_to_angle_map[mbmi->mode] +
+              mbmi->angle_delta[0] * av1_get_angle_step(mbmi->sb_type, 0);
     if (av1_is_intra_filter_switchable(p_angle)) {
       FRAME_COUNTS *counts = xd->counts;
       mbmi->intra_filter = aom_read_tree(
@@ -677,9 +682,9 @@
 #endif  // CONFIG_INTRA_INTERP
   }
 
-  if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
+  if (av1_is_directional_mode(mbmi->uv_mode, bsize)) {
     mbmi->angle_delta[1] =
-        read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+        read_uniform(r, 2 * MAX_ANGLE_DELTA_UV + 1) - MAX_ANGLE_DELTA_UV;
   }
 }
 #endif  // CONFIG_EXT_INTRA
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 81d8b39..fe12f67 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -36,6 +36,9 @@
 #include "av1/common/odintrin.h"
 #include "av1/common/pred_common.h"
 #include "av1/common/reconinter.h"
+#if CONFIG_EXT_INTRA
+#include "av1/common/reconintra.h"
+#endif  // CONFIG_EXT_INTRA
 #include "av1/common/seg_common.h"
 #include "av1/common/tile_common.h"
 
@@ -1043,11 +1046,13 @@
   (void)cm;
   if (bsize < BLOCK_8X8) return;
 
-  if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) {
-    write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
-                  MAX_ANGLE_DELTAS + mbmi->angle_delta[0]);
+  if (av1_is_directional_mode(mbmi->mode, bsize)) {
+    const int max_angle_delta = av1_get_max_angle_delta(mbmi->sb_type, 0);
+    write_uniform(w, 2 * max_angle_delta + 1,
+                  max_angle_delta + mbmi->angle_delta[0]);
 #if CONFIG_INTRA_INTERP
-    p_angle = mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+    p_angle = mode_to_angle_map[mbmi->mode] +
+              mbmi->angle_delta[0] * av1_get_angle_step(mbmi->sb_type, 0);
     if (av1_is_intra_filter_switchable(p_angle)) {
       av1_write_token(w, av1_intra_filter_tree,
                       cm->fc->intra_filter_probs[intra_filter_ctx],
@@ -1056,9 +1061,9 @@
 #endif  // CONFIG_INTRA_INTERP
   }
 
-  if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
-    write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
-                  MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
+  if (av1_is_directional_mode(mbmi->uv_mode, bsize)) {
+    write_uniform(w, 2 * MAX_ANGLE_DELTA_UV + 1,
+                  MAX_ANGLE_DELTA_UV + mbmi->angle_delta[1]);
   }
 }
 #endif  // CONFIG_EXT_INTRA
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index de78669..e9b8aef 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5308,11 +5308,11 @@
 #endif  // CONFIG_FILTER_INTRA
 #if CONFIG_EXT_INTRA
 #if CONFIG_INTRA_INTERP
-      if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) {
+      if (av1_is_directional_mode(mbmi->mode, bsize)) {
         int p_angle;
         const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
-        p_angle =
-            mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+        p_angle = mode_to_angle_map[mbmi->mode] +
+                  mbmi->angle_delta[0] * av1_get_angle_step(mbmi->sb_type, 0);
         if (av1_is_intra_filter_switchable(p_angle))
           ++counts->intra_filter[intra_filter_ctx][mbmi->intra_filter];
       }
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 1a2c7a7..5be8180 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -113,7 +113,6 @@
 #define NEW_MV_DISCOUNT_FACTOR 8
 
 #if CONFIG_EXT_INTRA
-#define ANGLE_FAST_SEARCH 1
 #define ANGLE_SKIP_THRESH 10
 #define FILTER_FAST_SEARCH 1
 #endif  // CONFIG_EXT_INTRA
@@ -2555,22 +2554,29 @@
 #endif  // CONFIG_FILTER_INTRA
 
 #if CONFIG_EXT_INTRA
-static void pick_intra_angle_routine_sby(
-    const AV1_COMP *const cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly,
-    int64_t *distortion, int *skippable, int *best_angle_delta,
-    TX_SIZE *best_tx_size, TX_TYPE *best_tx_type,
+// Run RD calculation with given luma intra prediction angle., and return
+// the RD cost. Update the best mode info. if the RD cost is the best so far.
+static int64_t calc_rd_given_intra_angle(
+    const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mode_cost,
+    int64_t best_rd_in, int8_t angle_delta, int max_angle_delta, int *rate,
+    RD_STATS *rd_stats, int *best_angle_delta, TX_SIZE *best_tx_size,
+    TX_TYPE *best_tx_type,
 #if CONFIG_INTRA_INTERP
     INTRA_FILTER *best_filter,
 #endif  // CONFIG_INTRA_INTERP
-    BLOCK_SIZE bsize, int rate_overhead, int64_t *best_rd) {
+    int64_t *best_rd) {
   int this_rate;
   RD_STATS tokenonly_rd_stats;
   int64_t this_rd;
   MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
-  super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
-  if (tokenonly_rd_stats.rate == INT_MAX) return;
 
-  this_rate = tokenonly_rd_stats.rate + rate_overhead;
+  mbmi->angle_delta[0] = angle_delta;
+  super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in);
+  if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX;
+
+  this_rate = tokenonly_rd_stats.rate + mode_cost +
+              write_uniform_cost(2 * max_angle_delta + 1,
+                                 mbmi->angle_delta[0] + max_angle_delta);
   this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
 
   if (this_rd < *best_rd) {
@@ -2582,170 +2588,119 @@
 #endif  // CONFIG_INTRA_INTERP
     *best_tx_type = mbmi->tx_type;
     *rate = this_rate;
-    *rate_tokenonly = tokenonly_rd_stats.rate;
-    *distortion = tokenonly_rd_stats.dist;
-    *skippable = tokenonly_rd_stats.skip;
+    rd_stats->rate = tokenonly_rd_stats.rate;
+    rd_stats->dist = tokenonly_rd_stats.dist;
+    rd_stats->skip = tokenonly_rd_stats.skip;
   }
+  return this_rd;
 }
 
+// With given luma directional intra prediction mode, pick the best angle delta
+// Return the RD cost corresponding to the best angle delta.
 static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                       int *rate, int *rate_tokenonly,
-                                       int64_t *distortion, int *skippable,
-                                       BLOCK_SIZE bsize, int rate_overhead,
+                                       int *rate, RD_STATS *rd_stats,
+                                       BLOCK_SIZE bsize, int mode_cost,
                                        int64_t best_rd) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MODE_INFO *const mic = xd->mi[0];
   MB_MODE_INFO *mbmi = &mic->mbmi;
-  int this_rate;
-  RD_STATS tokenonly_rd_stats;
-  int angle_delta, best_angle_delta = 0;
+  int i, angle_delta, best_angle_delta = 0;
+  const int max_angle_delta = av1_get_max_angle_delta(bsize, 0);
+  int first_try = 1;
 #if CONFIG_INTRA_INTERP
   int p_angle;
   const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
   INTRA_FILTER filter, best_filter = INTRA_FILTER_LINEAR;
 #endif  // CONFIG_INTRA_INTERP
-  const double rd_adjust = 1.2;
-  int64_t this_rd;
+  int64_t this_rd, best_rd_in, rd_cost[16];
   TX_SIZE best_tx_size = mic->mbmi.tx_size;
   TX_TYPE best_tx_type = mbmi->tx_type;
 
-  if (ANGLE_FAST_SEARCH) {
-    int deltas_level1[3] = { 0, -2, 2 };
-    int deltas_level2[3][2] = {
-      { -1, 1 }, { -3, -1 }, { 1, 3 },
-    };
-    const int level1 = 3, level2 = 2;
-    int i, j, best_i = -1, first_try = 1;
+  for (i = 0; i < 2 * (max_angle_delta + 2); ++i) rd_cost[i] = INT64_MAX;
 
-    for (i = 0; i < level1; ++i) {
-      int64_t tmp_best_rd;
-      mic->mbmi.angle_delta[0] = deltas_level1[i];
+  for (angle_delta = 0; angle_delta <= max_angle_delta; angle_delta += 2) {
 #if CONFIG_INTRA_INTERP
-      p_angle =
-          mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
-      for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
-        if ((FILTER_FAST_SEARCH || !av1_is_intra_filter_switchable(p_angle)) &&
-            filter != INTRA_FILTER_LINEAR)
-          continue;
-        mic->mbmi.intra_filter = filter;
+    for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
+      if (FILTER_FAST_SEARCH && filter != INTRA_FILTER_LINEAR) continue;
+      mic->mbmi.intra_filter = filter;
 #endif  // CONFIG_INTRA_INTERP
-        tmp_best_rd = (first_try && best_rd < INT64_MAX)
-                          ? (int64_t)(best_rd * rd_adjust)
-                          : best_rd;
-        super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, tmp_best_rd);
-        if (tokenonly_rd_stats.rate == INT_MAX) {
-          if (first_try)
-            return best_rd;
-          else
-            continue;
-        }
-        this_rate = tokenonly_rd_stats.rate + rate_overhead;
+      for (i = 0; i < 2; ++i) {
+        best_rd_in = (best_rd == INT64_MAX)
+                         ? INT64_MAX
+                         : (best_rd + (best_rd >> (first_try ? 3 : 5)));
+        this_rd = calc_rd_given_intra_angle(
+            cpi, x, bsize,
 #if CONFIG_INTRA_INTERP
-        this_rate += cpi->intra_filter_cost[intra_filter_ctx][filter];
-#endif  // CONFIG_INTRA_INTERP
-        this_rd =
-            RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
-        if (first_try && best_rd < INT64_MAX && this_rd > best_rd * rd_adjust)
-          return best_rd;
-        first_try = 0;
-        if (this_rd < best_rd) {
-          best_i = i;
-          best_rd = this_rd;
-          best_angle_delta = mbmi->angle_delta[0];
-          best_tx_size = mbmi->tx_size;
-#if CONFIG_INTRA_INTERP
-          best_filter = mbmi->intra_filter;
-#endif  // CONFIG_INTRA_INTERP
-          best_tx_type = mbmi->tx_type;
-          *rate = this_rate;
-          *rate_tokenonly = tokenonly_rd_stats.rate;
-          *distortion = tokenonly_rd_stats.dist;
-          *skippable = tokenonly_rd_stats.skip;
-        }
-#if CONFIG_INTRA_INTERP
-      }
-#endif  // CONFIG_INTRA_INTERP
-    }
-
-    if (best_i >= 0) {
-      for (j = 0; j < level2; ++j) {
-        mic->mbmi.angle_delta[0] = deltas_level2[best_i][j];
-#if CONFIG_INTRA_INTERP
-        p_angle =
-            mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
-        for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
-          mic->mbmi.intra_filter = filter;
-          if ((FILTER_FAST_SEARCH ||
-               !av1_is_intra_filter_switchable(p_angle)) &&
-              filter != INTRA_FILTER_LINEAR)
-            continue;
-#endif  // CONFIG_INTRA_INTERP
-          pick_intra_angle_routine_sby(
-              cpi, x, rate, rate_tokenonly, distortion, skippable,
-              &best_angle_delta, &best_tx_size, &best_tx_type,
-#if CONFIG_INTRA_INTERP
-              &best_filter,
-#endif  // CONFIG_INTRA_INTERP
-              bsize,
-#if CONFIG_INTRA_INTERP
-              rate_overhead + cpi->intra_filter_cost[intra_filter_ctx][filter],
+            mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
 #else
-            rate_overhead,
+          mode_cost,
 #endif  // CONFIG_INTRA_INTERP
-              &best_rd);
-#if CONFIG_INTRA_INTERP
-        }
-#endif  // CONFIG_INTRA_INTERP
-      }
-    }
-  } else {
-    for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS;
-         ++angle_delta) {
-      mbmi->angle_delta[0] = angle_delta;
-#if CONFIG_INTRA_INTERP
-      p_angle =
-          mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
-      for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
-        mic->mbmi.intra_filter = filter;
-        if ((FILTER_FAST_SEARCH || !av1_is_intra_filter_switchable(p_angle)) &&
-            filter != INTRA_FILTER_LINEAR)
-          continue;
-#endif  // CONFIG_INTRA_INTERP
-        pick_intra_angle_routine_sby(
-            cpi, x, rate, rate_tokenonly, distortion, skippable,
-            &best_angle_delta, &best_tx_size, &best_tx_type,
+            best_rd_in, (1 - 2 * i) * angle_delta, max_angle_delta, rate,
+            rd_stats, &best_angle_delta, &best_tx_size, &best_tx_type,
 #if CONFIG_INTRA_INTERP
             &best_filter,
 #endif  // CONFIG_INTRA_INTERP
-            bsize,
-#if CONFIG_INTRA_INTERP
-            rate_overhead + cpi->intra_filter_cost[intra_filter_ctx][filter],
-#else
-          rate_overhead,
-#endif  // CONFIG_INTRA_INTERP
             &best_rd);
-#if CONFIG_INTRA_INTERP
+        rd_cost[2 * angle_delta + i] = this_rd;
+        if (first_try && this_rd == INT64_MAX) return best_rd;
+        first_try = 0;
+        if (angle_delta == 0) {
+          rd_cost[1] = this_rd;
+          break;
+        }
       }
-#endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_INTERP
     }
+#endif  // CONFIG_INTRA_INTERP
+  }
+
+  assert(best_rd != INT64_MAX);
+  for (angle_delta = 1; angle_delta <= max_angle_delta; angle_delta += 2) {
+    int64_t rd_thresh;
+#if CONFIG_INTRA_INTERP
+    for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
+      if (FILTER_FAST_SEARCH && filter != INTRA_FILTER_LINEAR) continue;
+      mic->mbmi.intra_filter = filter;
+#endif  // CONFIG_INTRA_INTERP
+      for (i = 0; i < 2; ++i) {
+        int skip_search = 0;
+        rd_thresh = best_rd + (best_rd >> 5);
+        if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
+            rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
+          skip_search = 1;
+        if (!skip_search) {
+          this_rd = calc_rd_given_intra_angle(
+              cpi, x, bsize,
+#if CONFIG_INTRA_INTERP
+              mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
+#else
+            mode_cost,
+#endif  // CONFIG_INTRA_INTERP
+              best_rd, (1 - 2 * i) * angle_delta, max_angle_delta, rate,
+              rd_stats, &best_angle_delta, &best_tx_size, &best_tx_type,
+#if CONFIG_INTRA_INTERP
+              &best_filter,
+#endif  // CONFIG_INTRA_INTERP
+              &best_rd);
+        }
+      }
+#if CONFIG_INTRA_INTERP
+    }
+#endif  // CONFIG_INTRA_INTERP
   }
 
 #if CONFIG_INTRA_INTERP
-  if (FILTER_FAST_SEARCH && *rate_tokenonly < INT_MAX) {
-    mbmi->angle_delta[0] = best_angle_delta;
-    p_angle = mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+  if (FILTER_FAST_SEARCH && rd_stats->rate < INT_MAX) {
+    p_angle = mode_to_angle_map[mbmi->mode] +
+              best_angle_delta * av1_get_angle_step(bsize, 0);
     if (av1_is_intra_filter_switchable(p_angle)) {
       for (filter = INTRA_FILTER_LINEAR + 1; filter < INTRA_FILTERS; ++filter) {
         mic->mbmi.intra_filter = filter;
-        pick_intra_angle_routine_sby(
-            cpi, x, rate, rate_tokenonly, distortion, skippable,
+        this_rd = calc_rd_given_intra_angle(
+            cpi, x, bsize,
+            mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
+            best_rd, best_angle_delta, max_angle_delta, rate, rd_stats,
             &best_angle_delta, &best_tx_size, &best_tx_type, &best_filter,
-            bsize,
-#if CONFIG_INTRA_INTERP
-            rate_overhead + cpi->intra_filter_cost[intra_filter_ctx][filter],
-#else
-            rate_overhead,
-#endif  // CONFIG_INTRA_INTERP
             &best_rd);
       }
     }
@@ -2911,7 +2866,7 @@
   const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
   INTRA_FILTER best_filter = INTRA_FILTER_LINEAR;
 #endif  // CONFIG_INTRA_INTERP
-  int is_directional_mode, rate_overhead, best_angle_delta = 0;
+  int is_directional_mode, best_angle_delta = 0;
   uint8_t directional_mode_skip_mask[INTRA_MODES];
   const int src_stride = x->plane[0].src.stride;
   const uint8_t *src = x->plane[0].src.buf;
@@ -2990,30 +2945,24 @@
     od_encode_rollback(&x->daala_enc, &pre_buf);
 #endif
 #if CONFIG_EXT_INTRA
-    is_directional_mode =
-        (mic->mbmi.mode != DC_PRED && mic->mbmi.mode != TM_PRED);
+    is_directional_mode = av1_is_directional_mode(mic->mbmi.mode, bsize);
     if (is_directional_mode && directional_mode_skip_mask[mic->mbmi.mode])
       continue;
     if (is_directional_mode) {
-      rate_overhead = bmode_costs[mic->mbmi.mode] +
-                      write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0);
-      this_rate_tokenonly = INT_MAX;
-      this_rd = rd_pick_intra_angle_sby(cpi, x, &this_rate,
-                                        &this_rate_tokenonly, &this_distortion,
-                                        &s, bsize, rate_overhead, best_rd);
+      this_rd_stats.rate = INT_MAX;
+      this_rd =
+          rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize,
+                                  bmode_costs[mic->mbmi.mode], best_rd);
     } else {
       mic->mbmi.angle_delta[0] = 0;
       super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
-      this_rate_tokenonly = this_rd_stats.rate;
-      this_distortion = this_rd_stats.dist;
-      s = this_rd_stats.skip;
     }
 #else
     super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
+#endif  // CONFIG_EXT_INTRA
     this_rate_tokenonly = this_rd_stats.rate;
     this_distortion = this_rd_stats.dist;
     s = this_rd_stats.skip;
-#endif  // CONFIG_EXT_INTRA
 
     if (this_rate_tokenonly == INT_MAX) continue;
 
@@ -3039,17 +2988,17 @@
 #endif  // CONFIG_FILTER_INTRA
 #if CONFIG_EXT_INTRA
     if (is_directional_mode) {
+      const int max_angle_delta = av1_get_max_angle_delta(bsize, 0);
 #if CONFIG_INTRA_INTERP
-      int p_angle;
-      p_angle = mode_to_angle_map[mic->mbmi.mode] +
-                mic->mbmi.angle_delta[0] * ANGLE_STEP;
+      const int p_angle =
+          mode_to_angle_map[mic->mbmi.mode] +
+          mic->mbmi.angle_delta[0] * av1_get_angle_step(bsize, 0);
       if (av1_is_intra_filter_switchable(p_angle))
         this_rate +=
             cpi->intra_filter_cost[intra_filter_ctx][mic->mbmi.intra_filter];
 #endif  // CONFIG_INTRA_INTERP
-      this_rate +=
-          write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
-                             MAX_ANGLE_DELTAS + mic->mbmi.angle_delta[0]);
+      this_rate += write_uniform_cost(
+          2 * max_angle_delta + 1, max_angle_delta + mic->mbmi.angle_delta[0]);
     }
 #endif  // CONFIG_EXT_INTRA
     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
@@ -4039,97 +3988,86 @@
 #endif  // CONFIG_FILTER_INTRA
 
 #if CONFIG_EXT_INTRA
-static void pick_intra_angle_routine_sbuv(
-    const AV1_COMP *const cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly,
-    int64_t *distortion, int *skippable, int *best_angle_delta,
-    BLOCK_SIZE bsize, int rate_overhead, int64_t *best_rd) {
+// Run RD calculation with given chroma intra prediction angle., and return
+// the RD cost. Update the best mode info. if the RD cost is the best so far.
+static int64_t pick_intra_angle_routine_sbuv(
+    const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
+    int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
+    int *best_angle_delta, int64_t *best_rd) {
   MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
   int this_rate;
   int64_t this_rd;
   RD_STATS tokenonly_rd_stats;
 
-  if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd)) return;
-
+  if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
+    return INT64_MAX;
   this_rate = tokenonly_rd_stats.rate + rate_overhead;
   this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
   if (this_rd < *best_rd) {
     *best_rd = this_rd;
     *best_angle_delta = mbmi->angle_delta[1];
     *rate = this_rate;
-    *rate_tokenonly = tokenonly_rd_stats.rate;
-    *distortion = tokenonly_rd_stats.dist;
-    *skippable = tokenonly_rd_stats.skip;
+    rd_stats->rate = tokenonly_rd_stats.rate;
+    rd_stats->dist = tokenonly_rd_stats.dist;
+    rd_stats->skip = tokenonly_rd_stats.skip;
   }
+  return this_rd;
 }
 
+// With given chroma directional intra prediction mode, pick the best angle
+// delta. Return true if a RD cost that is smaller than the input one is found.
 static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                    int *rate, int *rate_tokenonly,
-                                    int64_t *distortion, int *skippable,
                                     BLOCK_SIZE bsize, int rate_overhead,
-                                    int64_t best_rd) {
+                                    int64_t best_rd, int *rate,
+                                    RD_STATS *rd_stats) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  int this_rate;
-  int64_t this_rd;
-  int angle_delta, best_angle_delta = 0;
-  const double rd_adjust = 1.2;
-  RD_STATS tokenonly_rd_stats;
+  int i, angle_delta, best_angle_delta = 0;
+  int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA_UV + 2)];
 
-  *rate_tokenonly = INT_MAX;
-  if (ANGLE_FAST_SEARCH) {
-    int deltas_level1[3] = { 0, -2, 2 };
-    int deltas_level2[3][2] = {
-      { -1, 1 }, { -3, -1 }, { 1, 3 },
-    };
-    const int level1 = 3, level2 = 2;
-    int i, j, best_i = -1;
+  rd_stats->rate = INT_MAX;
+  rd_stats->skip = 0;
+  rd_stats->dist = INT64_MAX;
+  for (i = 0; i < 2 * (MAX_ANGLE_DELTA_UV + 2); ++i) rd_cost[i] = INT64_MAX;
 
-    for (i = 0; i < level1; ++i) {
-      int64_t tmp_best_rd;
-      mbmi->angle_delta[1] = deltas_level1[i];
-      tmp_best_rd = (i == 0 && best_rd < INT64_MAX)
-                        ? (int64_t)(best_rd * rd_adjust)
-                        : best_rd;
-      if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, tmp_best_rd)) {
-        if (i == 0)
-          break;
-        else
-          continue;
-      }
-      this_rate = tokenonly_rd_stats.rate + rate_overhead;
-      this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
-      if (i == 0 && best_rd < INT64_MAX && this_rd > best_rd * rd_adjust) break;
-      if (this_rd < best_rd) {
-        best_i = i;
-        best_rd = this_rd;
-        best_angle_delta = mbmi->angle_delta[1];
-        *rate = this_rate;
-        *rate_tokenonly = tokenonly_rd_stats.rate;
-        *distortion = tokenonly_rd_stats.dist;
-        *skippable = tokenonly_rd_stats.skip;
+  for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA_UV; angle_delta += 2) {
+    for (i = 0; i < 2; ++i) {
+      best_rd_in = (best_rd == INT64_MAX)
+                       ? INT64_MAX
+                       : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5)));
+      mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
+      this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
+                                              best_rd_in, rate, rd_stats,
+                                              &best_angle_delta, &best_rd);
+      rd_cost[2 * angle_delta + i] = this_rd;
+      if (angle_delta == 0) {
+        if (this_rd == INT64_MAX) return 0;
+        rd_cost[1] = this_rd;
+        break;
       }
     }
+  }
 
-    if (best_i >= 0) {
-      for (j = 0; j < level2; ++j) {
-        mbmi->angle_delta[1] = deltas_level2[best_i][j];
-        pick_intra_angle_routine_sbuv(cpi, x, rate, rate_tokenonly, distortion,
-                                      skippable, &best_angle_delta, bsize,
-                                      rate_overhead, &best_rd);
+  assert(best_rd != INT64_MAX);
+  for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA_UV; angle_delta += 2) {
+    int64_t rd_thresh;
+    for (i = 0; i < 2; ++i) {
+      int skip_search = 0;
+      rd_thresh = best_rd + (best_rd >> 5);
+      if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
+          rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
+        skip_search = 1;
+      if (!skip_search) {
+        mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
+        this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
+                                                best_rd, rate, rd_stats,
+                                                &best_angle_delta, &best_rd);
       }
     }
-  } else {
-    for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS;
-         ++angle_delta) {
-      mbmi->angle_delta[1] = angle_delta;
-      pick_intra_angle_routine_sbuv(cpi, x, rate, rate_tokenonly, distortion,
-                                    skippable, &best_angle_delta, bsize,
-                                    rate_overhead, &best_rd);
-    }
   }
 
   mbmi->angle_delta[1] = best_angle_delta;
-  return *rate_tokenonly != INT_MAX;
+  return rd_stats->rate != INT_MAX;
 }
 #endif  // CONFIG_EXT_INTRA
 
@@ -4176,13 +4114,11 @@
 #if CONFIG_EXT_INTRA
     is_directional_mode = av1_is_directional_mode(mode, mbmi->sb_type);
     rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] +
-                    write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0);
+                    write_uniform_cost(2 * MAX_ANGLE_DELTA_UV + 1, 0);
     mbmi->angle_delta[1] = 0;
     if (is_directional_mode) {
-      if (!rd_pick_intra_angle_sbuv(
-              cpi, x, &this_rate, &tokenonly_rd_stats.rate,
-              &tokenonly_rd_stats.dist, &tokenonly_rd_stats.skip, bsize,
-              rate_overhead, best_rd))
+      if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
+                                    &this_rate, &tokenonly_rd_stats))
         continue;
     } else {
       if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
@@ -4194,10 +4130,12 @@
     }
     this_rate =
         tokenonly_rd_stats.rate + cpi->intra_uv_mode_cost[mbmi->mode][mode];
-    if (is_directional_mode)
-      this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
-                                      MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
-#else
+    if (is_directional_mode) {
+      this_rate +=
+          write_uniform_cost(2 * MAX_ANGLE_DELTA_UV + 1,
+                             MAX_ANGLE_DELTA_UV + mbmi->angle_delta[1]);
+    }
+#else  // CONFIG_EXT_INTRA
     if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
 #if CONFIG_PVQ
       od_encode_rollback(&x->daala_enc, &buf);
@@ -4207,6 +4145,7 @@
     this_rate =
         tokenonly_rd_stats.rate + cpi->intra_uv_mode_cost[mbmi->mode][mode];
 #endif  // CONFIG_EXT_INTRA
+
 #if CONFIG_FILTER_INTRA
     if (mbmi->sb_type >= BLOCK_8X8 && mode == DC_PRED)
       this_rate += av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 0);
@@ -8465,9 +8404,9 @@
   rate2 += write_uniform_cost(
       FILTER_INTRA_MODES, mbmi->filter_intra_mode_info.filter_intra_mode[0]);
 #if CONFIG_EXT_INTRA
-  if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
-    rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
-                                MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
+  if (av1_is_directional_mode(mbmi->uv_mode, bsize)) {
+    rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA_UV + 1,
+                                MAX_ANGLE_DELTA_UV + mbmi->angle_delta[1]);
   }
 #endif  // CONFIG_EXT_INTRA
   if (mbmi->mode == DC_PRED) {
@@ -8617,7 +8556,6 @@
 #if CONFIG_EXT_INTRA
   int8_t uv_angle_delta[TX_SIZES];
   int is_directional_mode, angle_stats_ready = 0;
-  int rate_overhead, rate_dummy;
   uint8_t directional_mode_skip_mask[INTRA_MODES];
 #endif  // CONFIG_EXT_INTRA
 #if CONFIG_FILTER_INTRA
@@ -9153,8 +9091,9 @@
       TX_SIZE uv_tx;
       struct macroblockd_plane *const pd = &xd->plane[1];
 #if CONFIG_EXT_INTRA
-      is_directional_mode = (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED);
+      is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
       if (is_directional_mode) {
+        int rate_dummy;
         if (!angle_stats_ready) {
           const int src_stride = x->plane[0].src.stride;
           const uint8_t *src = x->plane[0].src.buf;
@@ -9169,25 +9108,20 @@
           angle_stats_ready = 1;
         }
         if (directional_mode_skip_mask[mbmi->mode]) continue;
-        rate_overhead = write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0) +
-                        intra_mode_cost[mbmi->mode];
-        rate_y = INT_MAX;
+        rd_stats_y.rate = INT_MAX;
         this_rd =
-            rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
-                                    &skippable, bsize, rate_overhead, best_rd);
+            rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rd_stats_y, bsize,
+                                    intra_mode_cost[mbmi->mode], best_rd);
       } else {
         mbmi->angle_delta[0] = 0;
         super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
-        rate_y = rd_stats_y.rate;
-        distortion_y = rd_stats_y.dist;
-        skippable = rd_stats_y.skip;
       }
 #else
       super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
+#endif  // CONFIG_EXT_INTRA
       rate_y = rd_stats_y.rate;
       distortion_y = rd_stats_y.dist;
       skippable = rd_stats_y.skip;
-#endif  // CONFIG_EXT_INTRA
 
       if (rate_y == INT_MAX) continue;
 
@@ -9257,20 +9191,21 @@
       }
 #if CONFIG_EXT_INTRA
       if (is_directional_mode) {
+        const int max_angle_delta = av1_get_max_angle_delta(bsize, 0);
 #if CONFIG_INTRA_INTERP
         int p_angle;
         const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
-        p_angle =
-            mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+        p_angle = mode_to_angle_map[mbmi->mode] +
+                  mbmi->angle_delta[0] * av1_get_angle_step(bsize, 0);
         if (av1_is_intra_filter_switchable(p_angle))
           rate2 += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
 #endif  // CONFIG_INTRA_INTERP
-        rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
-                                    MAX_ANGLE_DELTAS + mbmi->angle_delta[0]);
+        rate2 += write_uniform_cost(2 * max_angle_delta + 1,
+                                    max_angle_delta + mbmi->angle_delta[0]);
       }
       if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
-        rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
-                                    MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
+        rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA_UV + 1,
+                                    MAX_ANGLE_DELTA_UV + mbmi->angle_delta[1]);
       }
 #endif  // CONFIG_EXT_INTRA
 #if CONFIG_FILTER_INTRA