[intra-edge] Convert 4x4 VP9 to ext-intra; upsample edge samples

Updates to intra-edge experiment

- Convert VP9-style intra pred to Ext-intra style
- Upsample edge predictors by 2x based on angle and edge size

BD-rate, 1-kf AWCY
  360p:  -0.11%
  720p:  -0.54
  1080p: -0.96

Change-Id: Ib73805d31d5d286e607a7ee7470fcbdf11edbbff
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 0d56f9f..190ff4c 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -36,6 +36,14 @@
   NEED_BOTTOMLEFT = 1 << 5,
 };
 
+#if CONFIG_INTRA_EDGE
+#define INTRA_EDGE_FILT 3
+#define INTRA_EDGE_TAPS 5
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+#define MAX_UPSAMPLE_SZ 12
+#endif
+#endif
+
 static const uint8_t extend_modes[INTRA_MODES] = {
   NEED_ABOVE | NEED_LEFT,                   // DC
   NEED_ABOVE,                               // V
@@ -695,6 +703,9 @@
 #if CONFIG_INTRA_INTERP
                              INTRA_FILTER filter_type,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                             int upsample_above,
+#endif
                              int dx, int dy) {
   int r, c, x, base, shift, val;
 
@@ -764,27 +775,32 @@
   }
 #endif  // CONFIG_INTRA_INTERP
 
+#if !CONFIG_INTRA_EDGE_UPSAMPLE
+  const int upsample_above = 0;
+#endif
+  const int max_base_x = ((bw + bh) - 1) << upsample_above;
+  const int frac_bits = 8 - upsample_above;
+  const int base_inc = 1 << upsample_above;
   x = dx;
   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
-    base = x >> 8;
-    shift = x & 0xFF;
+    base = x >> frac_bits;
+    shift = (x << upsample_above) & 0xFF;
 
-    if (base >= bw + bh - 1) {
-      int i;
-      for (i = r; i < bh; ++i) {
-        memset(dst, above[bw + bh - 1], bw * sizeof(dst[0]));
+    if (base >= max_base_x) {
+      for (int i = r; i < bh; ++i) {
+        memset(dst, above[max_base_x], bw * sizeof(dst[0]));
         dst += stride;
       }
       return;
     }
 
-    for (c = 0; c < bw; ++c, ++base) {
-      if (base < bw + bh - 1) {
+    for (c = 0; c < bw; ++c, base += base_inc) {
+      if (base < max_base_x) {
         val = above[base] * (256 - shift) + above[base + 1] * shift;
         val = ROUND_POWER_OF_TWO(val, 8);
         dst[c] = clip_pixel(val);
       } else {
-        dst[c] = above[bw + bh - 1];
+        dst[c] = above[max_base_x];
       }
     }
   }
@@ -796,19 +812,30 @@
 #if CONFIG_INTRA_INTERP
                              INTRA_FILTER filter_type,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                             int upsample_above, int upsample_left,
+#endif
                              int dx, int dy) {
   int r, c, x, y, shift1, shift2, val, base1, base2;
 
   assert(dx > 0);
   assert(dy > 0);
 
+#if !CONFIG_INTRA_EDGE_UPSAMPLE
+  const int upsample_above = 0;
+  const int upsample_left = 0;
+#endif
+  const int min_base_x = -(1 << upsample_above);
+  const int frac_bits_x = 8 - upsample_above;
+  const int frac_bits_y = 8 - upsample_left;
+  const int base_inc_x = 1 << upsample_above;
   x = -dx;
   for (r = 0; r < bh; ++r, x -= dx, dst += stride) {
-    base1 = x >> 8;
+    base1 = x >> frac_bits_x;
     y = (r << 8) - dy;
-    for (c = 0; c < bw; ++c, ++base1, y -= dy) {
-      if (base1 >= -1) {
-        shift1 = x & 0xFF;
+    for (c = 0; c < bw; ++c, base1 += base_inc_x, y -= dy) {
+      if (base1 >= min_base_x) {
+        shift1 = (x << upsample_above) & 0xFF;
 #if CONFIG_INTRA_INTERP
         val =
             intra_subpel_interp(base1, shift1, above, -1, bw - 1, filter_type);
@@ -817,9 +844,9 @@
         val = ROUND_POWER_OF_TWO(val, 8);
 #endif  // CONFIG_INTRA_INTERP
       } else {
-        base2 = y >> 8;
-        assert(base2 >= -1);
-        shift2 = y & 0xFF;
+        base2 = y >> frac_bits_y;
+        assert(base2 >= -(1 << upsample_left));
+        shift2 = (y << upsample_left) & 0xFF;
 #if CONFIG_INTRA_INTERP
         val = intra_subpel_interp(base2, shift2, left, -1, bh - 1, filter_type);
 #else
@@ -838,6 +865,9 @@
 #if CONFIG_INTRA_INTERP
                              INTRA_FILTER filter_type,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                             int upsample_left,
+#endif
                              int dx, int dy) {
   int r, c, y, base, shift, val;
 
@@ -918,18 +948,24 @@
   }
 #endif  // CONFIG_INTRA_INTERP
 
+#if !CONFIG_INTRA_EDGE_UPSAMPLE
+  const int upsample_left = 0;
+#endif
+  const int max_base_y = (bw + bh - 1) << upsample_left;
+  const int frac_bits = 8 - upsample_left;
+  const int base_inc = 1 << upsample_left;
   y = dy;
   for (c = 0; c < bw; ++c, y += dy) {
-    base = y >> 8;
-    shift = y & 0xFF;
+    base = y >> frac_bits;
+    shift = (y << upsample_left) & 0xFF;
 
-    for (r = 0; r < bh; ++r, ++base) {
-      if (base < bw + bh - 1) {
+    for (r = 0; r < bh; ++r, base += base_inc) {
+      if (base < max_base_y) {
         val = left[base] * (256 - shift) + left[base + 1] * shift;
         val = ROUND_POWER_OF_TWO(val, 8);
         dst[r * stride + c] = clip_pixel(val);
       } else {
-        for (; r < bh; ++r) dst[r * stride + c] = left[bw + bh - 1];
+        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
         break;
       }
     }
@@ -971,6 +1007,9 @@
 #if CONFIG_INTRA_INTERP
                          INTRA_FILTER filter_type,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                         int upsample_above, int upsample_left,
+#endif
                          int angle) {
   const int dx = get_dx(angle);
   const int dy = get_dy(angle);
@@ -983,18 +1022,27 @@
 #if CONFIG_INTRA_INTERP
                      filter_type,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                     upsample_above,
+#endif
                      dx, dy);
   } else if (angle > 90 && angle < 180) {
     dr_prediction_z2(dst, stride, bw, bh, above, left,
 #if CONFIG_INTRA_INTERP
                      filter_type,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                     upsample_above, upsample_left,
+#endif
                      dx, dy);
   } else if (angle > 180 && angle < 270) {
     dr_prediction_z3(dst, stride, bw, bh, above, left,
 #if CONFIG_INTRA_INTERP
                      filter_type,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                     upsample_left,
+#endif
                      dx, dy);
   } else if (angle == 90) {
     pred[V_PRED][tx_size](dst, stride, above, left);
@@ -1042,6 +1090,9 @@
 #if CONFIG_INTRA_INTERP
                                     INTRA_FILTER filter_type,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                                    int upsample_above,
+#endif
                                     int dx, int dy, int bd) {
   int r, c, x, base, shift, val;
 
@@ -1050,22 +1101,27 @@
   assert(dy == 1);
   assert(dx > 0);
 
+#if !CONFIG_INTRA_EDGE_UPSAMPLE
+  const int upsample_above = 0;
+#endif
+  const int max_base_x = ((bw + bh) - 1) << upsample_above;
+  const int frac_bits = 8 - upsample_above;
+  const int base_inc = 1 << upsample_above;
   x = dx;
   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
-    base = x >> 8;
-    shift = x & 0xFF;
+    base = x >> frac_bits;
+    shift = (x << upsample_above) & 0xFF;
 
-    if (base >= bw + bh - 1) {
-      int i;
-      for (i = r; i < bh; ++i) {
-        aom_memset16(dst, above[bw + bh - 1], bw);
+    if (base >= max_base_x) {
+      for (int i = r; i < bh; ++i) {
+        aom_memset16(dst, above[max_base_x], bw);
         dst += stride;
       }
       return;
     }
 
-    for (c = 0; c < bw; ++c, ++base) {
-      if (base < bw + bh - 1) {
+    for (c = 0; c < bw; ++c, base += base_inc) {
+      if (base < max_base_x) {
 #if CONFIG_INTRA_INTERP
         val = highbd_intra_subpel_interp(base, shift, above, 0, bw + bh - 1,
                                          filter_type);
@@ -1075,7 +1131,7 @@
 #endif  // CONFIG_INTRA_INTERP
         dst[c] = clip_pixel_highbd(val, bd);
       } else {
-        dst[c] = above[bw + bh - 1];
+        dst[c] = above[max_base_x];
       }
     }
   }
@@ -1088,19 +1144,29 @@
 #if CONFIG_INTRA_INTERP
                                     INTRA_FILTER filter_type,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                                    int upsample_above, int upsample_left,
+#endif
                                     int dx, int dy, int bd) {
   int r, c, x, y, shift, val, base;
 
   assert(dx > 0);
   assert(dy > 0);
 
+#if !CONFIG_INTRA_EDGE_UPSAMPLE
+  const int upsample_above = 0;
+  const int upsample_left = 0;
+#endif
+  const int min_base_x = -(1 << upsample_above);
+  const int frac_bits_x = 8 - upsample_above;
+  const int frac_bits_y = 8 - upsample_left;
   for (r = 0; r < bh; ++r) {
     for (c = 0; c < bw; ++c) {
       y = r + 1;
       x = (c << 8) - y * dx;
-      base = x >> 8;
-      if (base >= -1) {
-        shift = x & 0xFF;
+      base = x >> frac_bits_x;
+      if (base >= min_base_x) {
+        shift = (x << upsample_above) & 0xFF;
 #if CONFIG_INTRA_INTERP
         val = highbd_intra_subpel_interp(base, shift, above, -1, bw - 1,
                                          filter_type);
@@ -1111,8 +1177,8 @@
       } else {
         x = c + 1;
         y = (r << 8) - x * dy;
-        base = y >> 8;
-        shift = y & 0xFF;
+        base = y >> frac_bits_y;
+        shift = (y << upsample_left) & 0xFF;
 #if CONFIG_INTRA_INTERP
         val = highbd_intra_subpel_interp(base, shift, left, -1, bh - 1,
                                          filter_type);
@@ -1134,6 +1200,9 @@
 #if CONFIG_INTRA_INTERP
                                     INTRA_FILTER filter_type,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                                    int upsample_left,
+#endif
                                     int dx, int dy, int bd) {
   int r, c, y, base, shift, val;
 
@@ -1142,13 +1211,19 @@
   assert(dx == 1);
   assert(dy > 0);
 
+#if !CONFIG_INTRA_EDGE_UPSAMPLE
+  const int upsample_left = 0;
+#endif
+  const int max_base_y = (bw + bh - 1) << upsample_left;
+  const int frac_bits = 8 - upsample_left;
+  const int base_inc = 1 << upsample_left;
   y = dy;
   for (c = 0; c < bw; ++c, y += dy) {
-    base = y >> 8;
-    shift = y & 0xFF;
+    base = y >> frac_bits;
+    shift = (y << upsample_left) & 0xFF;
 
-    for (r = 0; r < bh; ++r, ++base) {
-      if (base < bw + bh - 1) {
+    for (r = 0; r < bh; ++r, base += base_inc) {
+      if (base < max_base_y) {
 #if CONFIG_INTRA_INTERP
         val = highbd_intra_subpel_interp(base, shift, left, 0, bw + bh - 1,
                                          filter_type);
@@ -1158,7 +1233,7 @@
 #endif  // CONFIG_INTRA_INTERP
         dst[r * stride + c] = clip_pixel_highbd(val, bd);
       } else {
-        for (; r < bh; ++r) dst[r * stride + c] = left[bw + bh - 1];
+        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
         break;
       }
     }
@@ -1171,6 +1246,9 @@
 #if CONFIG_INTRA_INTERP
                                 INTRA_FILTER filter,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                                int upsample_above, int upsample_left,
+#endif
                                 int angle, int bd) {
   const int dx = get_dx(angle);
   const int dy = get_dy(angle);
@@ -1183,18 +1261,27 @@
 #if CONFIG_INTRA_INTERP
                             filter,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                            upsample_above,
+#endif
                             dx, dy, bd);
   } else if (angle > 90 && angle < 180) {
     highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
 #if CONFIG_INTRA_INTERP
                             filter,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                            upsample_above, upsample_left,
+#endif
                             dx, dy, bd);
   } else if (angle > 180 && angle < 270) {
     highbd_dr_prediction_z3(dst, stride, bw, bh, above, left,
 #if CONFIG_INTRA_INTERP
                             filter,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                            upsample_left,
+#endif
                             dx, dy, bd);
   } else if (angle == 90) {
     pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
@@ -1851,7 +1938,13 @@
   const int d = abs(delta);
   int strength = 0;
 
-  if (bsz == 8) {
+  if (bsz == 4) {
+    if (d < 56) {
+      strength = 0;
+    } else if (d < 90) {
+      strength = 1;
+    }
+  } else if (bsz == 8) {
     if (d < 8) {
       strength = 0;
     } else if (d < 32) {
@@ -1881,7 +1974,7 @@
 static void filter_intra_edge(uint8_t *p, int sz, int strength) {
   if (!strength) return;
 
-  const int kernel[3][5] = {
+  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
     { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 }
   };
   const int filt = strength - 1;
@@ -1890,7 +1983,7 @@
   memcpy(edge, p, sz * sizeof(*p));
   for (int i = 1; i < sz - 1; i++) {
     int s = 0;
-    for (int j = 0; j < 5; j++) {
+    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
       int k = i - 2 + j;
       k = (k < 0) ? 0 : k;
       k = (k > sz - 1) ? sz - 1 : k;
@@ -1905,7 +1998,7 @@
 static void filter_intra_edge_high(uint16_t *p, int sz, int strength) {
   if (!strength) return;
 
-  const int kernel[3][5] = {
+  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
     { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 }
   };
   const int filt = strength - 1;
@@ -1914,7 +2007,7 @@
   memcpy(edge, p, sz * sizeof(*p));
   for (int i = 1; i < sz - 1; i++) {
     int s = 0;
-    for (int j = 0; j < 5; j++) {
+    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
       int k = i - 2 + j;
       k = (k < 0) ? 0 : k;
       k = (k > sz - 1) ? sz - 1 : k;
@@ -1924,9 +2017,66 @@
     p[i] = s;
   }
 }
-#endif  // CONFIG_INTRA_EDGE
 #endif  // CONFIG_HIGHBITDEPTH
 
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+static int use_intra_edge_upsample(int bsz, int delta) {
+  const int d = abs(delta);
+  return (bsz == 4 && d < 56);
+}
+
+static void upsample_intra_edge(uint8_t *p, int sz) {
+  // interpolate half-sample positions
+  assert(sz <= MAX_UPSAMPLE_SZ);
+
+  uint8_t in[MAX_UPSAMPLE_SZ + 3];
+  // copy p[-1..(sz-1)] and extend first and last samples
+  in[0] = p[-1];
+  in[1] = p[-1];
+  for (int i = 0; i < sz; i++) {
+    in[i + 2] = p[i];
+  }
+  in[sz + 2] = p[sz - 1];
+
+  // interpolate half-sample edge positions
+  p[-2] = in[0];
+  for (int i = 0; i < sz; i++) {
+    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
+    s = clip_pixel((s + 8) >> 4);
+    p[2 * i - 1] = s;
+    p[2 * i] = in[i + 2];
+  }
+}
+
+#if CONFIG_HIGHBITDEPTH
+static void upsample_intra_edge_high(uint16_t *p, int sz, int bd) {
+  // interpolate half-sample positions
+  assert(sz <= MAX_UPSAMPLE_SZ);
+
+  uint16_t in[MAX_UPSAMPLE_SZ + 3];
+  // copy p[-1..(sz-1)] and extend first and last samples
+  in[0] = p[-1];
+  in[1] = p[-1];
+  for (int i = 0; i < sz; i++) {
+    in[i + 2] = p[i];
+  }
+  in[sz + 2] = p[sz - 1];
+
+  // interpolate half-sample edge positions
+  p[-2] = in[0];
+  for (int i = 0; i < sz; i++) {
+    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
+    s = (s + 8) >> 4;
+    s = clip_pixel_highbd(s, bd);
+    p[2 * i - 1] = s;
+    p[2 * i] = in[i + 2];
+  }
+}
+#endif  // CONFIG_HIGHBITDEPTH
+#endif  // CONFIG_INTRA_EDGE_UPSAMPLE
+
+#endif  // CONFIG_INTRA_EDGE
+
 #if CONFIG_HIGHBITDEPTH
 static void build_intra_predictors_high(
     const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8,
@@ -2107,20 +2257,34 @@
   }
 
 #if CONFIG_EXT_INTRA && CONFIG_INTRA_EDGE
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+  const int upsample_above = use_intra_edge_upsample(txwpx, p_angle - 90);
+  const int upsample_left = use_intra_edge_upsample(txhpx, p_angle - 180);
+#endif
+  const int need_right = p_angle < 90;
+  const int need_bottom = p_angle > 180;
   if (is_dr_mode && p_angle != 90 && p_angle != 180) {
     const int ab_le = need_above_left ? 1 : 0;
     if (need_above && n_top_px > 0) {
       const int strength = intra_edge_filter_strength(txwpx, p_angle - 90);
-      const int need_right = p_angle < 90;
       const int n_px = n_top_px + ab_le + (need_right ? n_topright_px : 0);
       filter_intra_edge_high(above_row - ab_le, n_px, strength);
     }
     if (need_left && n_left_px > 0) {
       const int strength = intra_edge_filter_strength(txhpx, p_angle - 180);
-      const int need_bottom = p_angle > 180;
       const int n_px = n_left_px + ab_le + (need_bottom ? n_bottomleft_px : 0);
       filter_intra_edge_high(left_col - ab_le, n_px, strength);
     }
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+    if (upsample_above) {
+      const int n_px = txwpx + (need_right ? txhpx : 0);
+      upsample_intra_edge_high(above_row, n_px, xd->bd);
+    }
+    if (upsample_left) {
+      const int n_px = txhpx + (need_bottom ? txwpx : 0);
+      upsample_intra_edge_high(left_col, n_px, xd->bd);
+    }
+#endif
   }
 #endif
 
@@ -2144,6 +2308,9 @@
 #if CONFIG_INTRA_INTERP
                         filter,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                        upsample_above, upsample_left,
+#endif
                         p_angle, xd->bd);
     return;
   }
@@ -2337,20 +2504,34 @@
   }
 
 #if CONFIG_EXT_INTRA && CONFIG_INTRA_EDGE
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+  const int upsample_above = use_intra_edge_upsample(txwpx, p_angle - 90);
+  const int upsample_left = use_intra_edge_upsample(txhpx, p_angle - 180);
+#endif
+  const int need_right = p_angle < 90;
+  const int need_bottom = p_angle > 180;
   if (is_dr_mode && p_angle != 90 && p_angle != 180) {
     const int ab_le = need_above_left ? 1 : 0;
     if (need_above && n_top_px > 0) {
       const int strength = intra_edge_filter_strength(txwpx, p_angle - 90);
-      const int need_right = p_angle < 90;
       const int n_px = n_top_px + ab_le + (need_right ? n_topright_px : 0);
       filter_intra_edge(above_row - ab_le, n_px, strength);
     }
     if (need_left && n_left_px > 0) {
       const int strength = intra_edge_filter_strength(txhpx, p_angle - 180);
-      const int need_bottom = p_angle > 180;
       const int n_px = n_left_px + ab_le + (need_bottom ? n_bottomleft_px : 0);
       filter_intra_edge(left_col - ab_le, n_px, strength);
     }
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+    if (upsample_above) {
+      const int n_px = txwpx + (need_right ? txhpx : 0);
+      upsample_intra_edge(above_row, n_px);
+    }
+    if (upsample_left) {
+      const int n_px = txhpx + (need_bottom ? txwpx : 0);
+      upsample_intra_edge(left_col, n_px);
+    }
+#endif
   }
 #endif
 
@@ -2373,6 +2554,9 @@
 #if CONFIG_INTRA_INTERP
                  filter,
 #endif  // CONFIG_INTRA_INTERP
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+                 upsample_above, upsample_left,
+#endif
                  p_angle);
     return;
   }
diff --git a/av1/common/reconintra.h b/av1/common/reconintra.h
index 8ad3bae..67e5706 100644
--- a/av1/common/reconintra.h
+++ b/av1/common/reconintra.h
@@ -73,10 +73,27 @@
 #define FILTER_INTRA_PREC_BITS 10
 #endif  // CONFIG_FILTER_INTRA
 
+#define CONFIG_INTRA_EDGE_UPSAMPLE CONFIG_INTRA_EDGE
+#define CONFIG_USE_ANGLE_DELTA_SUB8X8 0
+
 #if CONFIG_EXT_INTRA
 static INLINE int av1_is_directional_mode(PREDICTION_MODE mode,
                                           BLOCK_SIZE bsize) {
+#if CONFIG_INTRA_EDGE_UPSAMPLE
+  (void)bsize;
+  return mode >= V_PRED && mode <= D63_PRED;
+#else
   return mode >= V_PRED && mode <= D63_PRED && bsize >= BLOCK_8X8;
+#endif
+}
+
+static INLINE int av1_use_angle_delta(BLOCK_SIZE bsize) {
+  (void)bsize;
+#if CONFIG_USE_ANGLE_DELTA_SUB8X8
+  return 1;
+#else
+  return bsize >= BLOCK_8X8;
+#endif
 }
 #endif  // CONFIG_EXT_INTRA
 
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index a7d6693..dda15a3 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -866,7 +866,11 @@
 #endif  // CONFIG_INTRA_INTERP
 
   (void)cm;
-  if (bsize < BLOCK_8X8) return;
+
+  mbmi->angle_delta[0] = 0;
+  mbmi->angle_delta[1] = 0;
+
+  if (!av1_use_angle_delta(bsize)) return;
 
   if (av1_is_directional_mode(mbmi->mode, bsize)) {
     mbmi->angle_delta[0] =
@@ -1160,6 +1164,9 @@
 #endif  // CONFIG_CFL
 
 #if CONFIG_CB4X4
+  } else {
+    // Avoid decoding angle_info if there is is no chroma prediction
+    mbmi->uv_mode = DC_PRED;
   }
 #endif
 
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 0385221..016722a 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1451,7 +1451,7 @@
 #endif  // CONFIG_INTRA_INTERP
 
   (void)ec_ctx;
-  if (bsize < BLOCK_8X8) return;
+  if (!av1_use_angle_delta(bsize)) return;
 
   if (av1_is_directional_mode(mbmi->mode, bsize)) {
     write_uniform(w, 2 * MAX_ANGLE_DELTA + 1,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 6822b0d..4695f26 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2458,7 +2458,8 @@
   model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate,
                   &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse);
 #if CONFIG_EXT_INTRA
-  if (av1_is_directional_mode(mbmi->mode, bsize)) {
+  if (av1_is_directional_mode(mbmi->mode, bsize) &&
+      av1_use_angle_delta(bsize)) {
     mode_cost += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
                                     MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
   }
@@ -3579,8 +3580,8 @@
                              uint8_t *directional_mode_skip_mask) {
   memset(directional_mode_skip_mask, 0,
          INTRA_MODES * sizeof(*directional_mode_skip_mask));
-  // Sub-8x8 blocks do not use extra directions.
-  if (bsize < BLOCK_8X8) return;
+  // Check if angle_delta is used
+  if (!av1_use_angle_delta(bsize)) return;
   uint64_t hist[DIRECTIONAL_MODES];
   memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
   src += src_stride;
@@ -3634,8 +3635,8 @@
                                     uint8_t *directional_mode_skip_mask) {
   memset(directional_mode_skip_mask, 0,
          INTRA_MODES * sizeof(*directional_mode_skip_mask));
-  // Sub-8x8 blocks do not use extra directions.
-  if (bsize < BLOCK_8X8) return;
+  // Check if angle_delta is used
+  if (!av1_use_angle_delta(bsize)) return;
   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
   uint64_t hist[DIRECTIONAL_MODES];
   memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
@@ -3791,7 +3792,7 @@
 #if CONFIG_EXT_INTRA
     is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
     if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
-    if (is_directional_mode) {
+    if (is_directional_mode && av1_use_angle_delta(bsize)) {
       this_rd_stats.rate = INT_MAX;
       rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize,
                               bmode_costs[mbmi->mode], best_rd, &best_model_rd);
@@ -3837,8 +3838,10 @@
         this_rate +=
             cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
 #endif  // CONFIG_INTRA_INTERP
-      this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
-                                      MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
+      if (av1_use_angle_delta(bsize)) {
+        this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
+                                        MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
+      }
     }
 #endif  // CONFIG_EXT_INTRA
     this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
@@ -4921,7 +4924,7 @@
     mbmi->uv_mode = mode;
 #if CONFIG_EXT_INTRA
     mbmi->angle_delta[1] = 0;
-    if (is_directional_mode) {
+    if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
       const int rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] +
                                 write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, 0);
       if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
@@ -4942,7 +4945,7 @@
         tokenonly_rd_stats.rate + cpi->intra_uv_mode_cost[mbmi->mode][mode];
 
 #if CONFIG_EXT_INTRA
-    if (is_directional_mode) {
+    if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
       this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
                                       MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
     }
@@ -9013,7 +9016,8 @@
   rate2 += write_uniform_cost(
       FILTER_INTRA_MODES, mbmi->filter_intra_mode_info.filter_intra_mode[0]);
 #if CONFIG_EXT_INTRA
-  if (av1_is_directional_mode(mbmi->uv_mode, bsize)) {
+  if (av1_is_directional_mode(mbmi->uv_mode, bsize) &&
+      av1_use_angle_delta(bsize)) {
     rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
                                 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
   }
@@ -9760,7 +9764,7 @@
       struct macroblockd_plane *const pd = &xd->plane[1];
 #if CONFIG_EXT_INTRA
       is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
-      if (is_directional_mode) {
+      if (is_directional_mode && av1_use_angle_delta(bsize)) {
         int rate_dummy;
         int64_t model_rd = INT64_MAX;
         if (!angle_stats_ready) {
@@ -9873,10 +9877,13 @@
         if (av1_is_intra_filter_switchable(p_angle))
           rate2 += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
 #endif  // CONFIG_INTRA_INTERP
-        rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
-                                    MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
+        if (av1_use_angle_delta(bsize)) {
+          rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
+                                      MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
+        }
       }
-      if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
+      if (av1_is_directional_mode(mbmi->uv_mode, bsize) &&
+          av1_use_angle_delta(bsize)) {
         rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
                                     MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
       }