Added new experiment EXT_INTRA_MOD2.

HW simplifications for intra predicition.
Changed from 256 points interpolation to 32 points.
Changed values in dr_intra_derivative to be upscaled by 64
instead of 256 and adjusted the angles to be more evenly
spread out. Values are also kept within 10-bits.

AWCY results:
objective-1-fast-1, key-frame only, cpu-used=0
--disable-ext-partition --disable-ext-partition-types
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0476 |     N/A |  0.2705 |  -0.0741 | -0.1357 | -0.1960 |        N/A

Change-Id: I1e1712ab71dc47a3e898b1799ba9e648153735eb
diff --git a/av1/common/blockd.c b/av1/common/blockd.c
index a412b95..9d2fdbd 100644
--- a/av1/common/blockd.c
+++ b/av1/common/blockd.c
@@ -180,6 +180,41 @@
 }
 
 #if CONFIG_EXT_INTRA
+#if CONFIG_EXT_INTRA_MOD2
+const int16_t dr_intra_derivative[90] = {
+  // More evenly spread out angles and limited to 10-bit
+  // Values that are 0 will never be used
+  //                    Approx angle
+  0,    0, 0,        //
+  1023, 0, 0,        // 3, ...
+  547,  0, 0,        // 6, ...
+  372,  0, 0, 0, 0,  // 9, ...
+  273,  0, 0,        // 14, ...
+  215,  0, 0,        // 17, ...
+  178,  0, 0,        // 20, ...
+  151,  0, 0,        // 23, ... (113 & 203 are base angles)
+  132,  0, 0,        // 26, ...
+  116,  0, 0,        // 29, ...
+  102,  0, 0, 0,     // 32, ...
+  90,   0, 0,        // 36, ...
+  80,   0, 0,        // 39, ...
+  71,   0, 0,        // 42, ...
+  64,   0, 0,        // 45, ... (45 & 135 are base angles)
+  57,   0, 0,        // 48, ...
+  51,   0, 0,        // 51, ...
+  45,   0, 0, 0,     // 54, ...
+  40,   0, 0,        // 58, ...
+  35,   0, 0,        // 61, ...
+  31,   0, 0,        // 64, ...
+  27,   0, 0,        // 67, ... (67 & 157 are base angles)
+  23,   0, 0,        // 70, ...
+  19,   0, 0,        // 73, ...
+  15,   0, 0, 0, 0,  // 76, ...
+  11,   0, 0,        // 81, ...
+  7,    0, 0,        // 84, ...
+  3,    0, 0,        // 87, ...
+};
+#else
 const int16_t dr_intra_derivative[90] = {
   1,    14666, 7330, 4884, 3660, 2926, 2435, 2084, 1821, 1616, 1451, 1317, 1204,
   1108, 1026,  955,  892,  837,  787,  743,  703,  666,  633,  603,  574,  548,
@@ -189,4 +224,5 @@
   119,  113,   108,  103,  98,   93,   88,   83,   78,   73,   68,   63,   59,
   54,   49,    45,   40,   35,   31,   26,   22,   17,   13,   8,    4,
 };
+#endif  // CONFIG_EXT_INTRA_MOD2
 #endif  // CONFIG_EXT_INTRA
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 33c71e8..0b41123 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -773,12 +773,20 @@
   const int upsample_above = 0;
 #endif  // !CONFIG_INTRA_EDGE_UPSAMPLE
   const int max_base_x = ((bw + bh) - 1) << upsample_above;
+#if CONFIG_EXT_INTRA_MOD2
+  const int frac_bits = 6 - upsample_above;
+#else
   const int frac_bits = 8 - upsample_above;
+#endif
   const int base_inc = 1 << upsample_above;
   x = dx;
   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
     base = x >> frac_bits;
+#if CONFIG_EXT_INTRA_MOD2
+    shift = ((x << upsample_above) & 0x3F) >> 1;
+#else
     shift = (x << upsample_above) & 0xFF;
+#endif
 
     if (base >= max_base_x) {
       for (int i = r; i < bh; ++i) {
@@ -790,8 +798,13 @@
 
     for (c = 0; c < bw; ++c, base += base_inc) {
       if (base < max_base_x) {
+#if CONFIG_EXT_INTRA_MOD2
+        val = above[base] * (32 - shift) + above[base + 1] * shift;
+        val = ROUND_POWER_OF_TWO(val, 5);
+#else
         val = above[base] * (256 - shift) + above[base + 1] * shift;
         val = ROUND_POWER_OF_TWO(val, 8);
+#endif
         dst[c] = clip_pixel(val);
       } else {
         dst[c] = above[max_base_x];
@@ -817,24 +830,45 @@
   const int upsample_left = 0;
 #endif  // !CONFIG_INTRA_EDGE_UPSAMPLE
   const int min_base_x = -(1 << upsample_above);
+#if CONFIG_EXT_INTRA_MOD2
+  const int frac_bits_x = 6 - upsample_above;
+  const int frac_bits_y = 6 - upsample_left;
+#else
   const int frac_bits_x = 8 - upsample_above;
   const int frac_bits_y = 8 - upsample_left;
+#endif
   const int base_inc_x = 1 << upsample_above;
   x = -dx;
   for (r = 0; r < bh; ++r, x -= dx, dst += stride) {
     base1 = x >> frac_bits_x;
+#if CONFIG_EXT_INTRA_MOD2
+    y = (r << 6) - dy;
+#else
     y = (r << 8) - dy;
+#endif
     for (c = 0; c < bw; ++c, base1 += base_inc_x, y -= dy) {
       if (base1 >= min_base_x) {
+#if CONFIG_EXT_INTRA_MOD2
+        shift1 = ((x * (1 << upsample_above)) & 0x3F) >> 1;
+        val = above[base1] * (32 - shift1) + above[base1 + 1] * shift1;
+        val = ROUND_POWER_OF_TWO(val, 5);
+#else
         shift1 = (x * (1 << upsample_above)) & 0xFF;
         val = above[base1] * (256 - shift1) + above[base1 + 1] * shift1;
         val = ROUND_POWER_OF_TWO(val, 8);
+#endif
       } else {
         base2 = y >> frac_bits_y;
         assert(base2 >= -(1 << upsample_left));
+#if CONFIG_EXT_INTRA_MOD2
+        shift2 = ((y * (1 << upsample_left)) & 0x3F) >> 1;
+        val = left[base2] * (32 - shift2) + left[base2 + 1] * shift2;
+        val = ROUND_POWER_OF_TWO(val, 5);
+#else
         shift2 = (y * (1 << upsample_left)) & 0xFF;
         val = left[base2] * (256 - shift2) + left[base2 + 1] * shift2;
         val = ROUND_POWER_OF_TWO(val, 8);
+#endif
       }
       dst[c] = clip_pixel(val);
     }
@@ -860,17 +894,30 @@
   const int upsample_left = 0;
 #endif  // !CONFIG_INTRA_EDGE_UPSAMPLE
   const int max_base_y = (bw + bh - 1) << upsample_left;
+#if CONFIG_EXT_INTRA_MOD2
+  const int frac_bits = 6 - upsample_left;
+#else
   const int frac_bits = 8 - upsample_left;
+#endif
   const int base_inc = 1 << upsample_left;
   y = dy;
   for (c = 0; c < bw; ++c, y += dy) {
     base = y >> frac_bits;
+#if CONFIG_EXT_INTRA_MOD2
+    shift = ((y << upsample_left) & 0x3F) >> 1;
+#else
     shift = (y << upsample_left) & 0xFF;
+#endif
 
     for (r = 0; r < bh; ++r, base += base_inc) {
       if (base < max_base_y) {
+#if CONFIG_EXT_INTRA_MOD2
+        val = left[base] * (32 - shift) + left[base + 1] * shift;
+        val = ROUND_POWER_OF_TWO(val, 5);
+#else
         val = left[base] * (256 - shift) + left[base + 1] * shift;
         val = ROUND_POWER_OF_TWO(val, 8);
+#endif
         dst[r * stride + c] = clip_pixel(val);
       } else {
         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
@@ -967,12 +1014,20 @@
   const int upsample_above = 0;
 #endif  // !CONFIG_INTRA_EDGE_UPSAMPLE
   const int max_base_x = ((bw + bh) - 1) << upsample_above;
+#if CONFIG_EXT_INTRA_MOD2
+  const int frac_bits = 6 - upsample_above;
+#else
   const int frac_bits = 8 - upsample_above;
+#endif
   const int base_inc = 1 << upsample_above;
   x = dx;
   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
     base = x >> frac_bits;
+#if CONFIG_EXT_INTRA_MOD2
+    shift = ((x << upsample_above) & 0x3F) >> 1;
+#else
     shift = (x << upsample_above) & 0xFF;
+#endif
 
     if (base >= max_base_x) {
       for (int i = r; i < bh; ++i) {
@@ -984,8 +1039,13 @@
 
     for (c = 0; c < bw; ++c, base += base_inc) {
       if (base < max_base_x) {
+#if CONFIG_EXT_INTRA_MOD2
+        val = above[base] * (32 - shift) + above[base + 1] * shift;
+        val = ROUND_POWER_OF_TWO(val, 5);
+#else
         val = above[base] * (256 - shift) + above[base + 1] * shift;
         val = ROUND_POWER_OF_TWO(val, 8);
+#endif
         dst[c] = clip_pixel_highbd(val, bd);
       } else {
         dst[c] = above[max_base_x];
@@ -1012,24 +1072,49 @@
   const int upsample_left = 0;
 #endif  // !CONFIG_INTRA_EDGE_UPSAMPLE
   const int min_base_x = -(1 << upsample_above);
+#if CONFIG_EXT_INTRA_MOD2
+  const int frac_bits_x = 6 - upsample_above;
+  const int frac_bits_y = 6 - upsample_left;
+#else
   const int frac_bits_x = 8 - upsample_above;
   const int frac_bits_y = 8 - upsample_left;
+#endif
   for (r = 0; r < bh; ++r) {
     for (c = 0; c < bw; ++c) {
       y = r + 1;
+#if CONFIG_EXT_INTRA_MOD2
+      x = (c << 6) - y * dx;
+#else
       x = (c << 8) - y * dx;
+#endif
       base = x >> frac_bits_x;
       if (base >= min_base_x) {
+#if CONFIG_EXT_INTRA_MOD2
+        shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
+        val = above[base] * (32 - shift) + above[base + 1] * shift;
+        val = ROUND_POWER_OF_TWO(val, 5);
+#else
         shift = (x * (1 << upsample_above)) & 0xFF;
         val = above[base] * (256 - shift) + above[base + 1] * shift;
         val = ROUND_POWER_OF_TWO(val, 8);
+#endif
       } else {
         x = c + 1;
+#if CONFIG_EXT_INTRA_MOD2
+        y = (r << 6) - x * dy;
+#else
         y = (r << 8) - x * dy;
+#endif
         base = y >> frac_bits_y;
+#if CONFIG_EXT_INTRA_MOD2
+        shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
+        val = left[base] * (32 - shift) + left[base + 1] * shift;
+        val = ROUND_POWER_OF_TWO(val, 5);
+#else
         shift = (y * (1 << upsample_left)) & 0xFF;
         val = left[base] * (256 - shift) + left[base + 1] * shift;
         val = ROUND_POWER_OF_TWO(val, 8);
+#endif
       }
       dst[c] = clip_pixel_highbd(val, bd);
     }
@@ -1056,17 +1141,30 @@
   const int upsample_left = 0;
 #endif  // !CONFIG_INTRA_EDGE_UPSAMPLE
   const int max_base_y = (bw + bh - 1) << upsample_left;
+#if CONFIG_EXT_INTRA_MOD2
+  const int frac_bits = 6 - upsample_left;
+#else
   const int frac_bits = 8 - upsample_left;
+#endif
   const int base_inc = 1 << upsample_left;
   y = dy;
   for (c = 0; c < bw; ++c, y += dy) {
     base = y >> frac_bits;
+#if CONFIG_EXT_INTRA_MOD2
+    shift = ((y << upsample_left) & 0x3F) >> 1;
+#else
     shift = (y << upsample_left) & 0xFF;
+#endif
 
     for (r = 0; r < bh; ++r, base += base_inc) {
       if (base < max_base_y) {
+#if CONFIG_EXT_INTRA_MOD2
+        val = left[base] * (32 - shift) + left[base + 1] * shift;
+        val = ROUND_POWER_OF_TWO(val, 5);
+#else
         val = left[base] * (256 - shift) + left[base + 1] * shift;
         val = ROUND_POWER_OF_TWO(val, 8);
+#endif
         dst[r * stride + c] = clip_pixel_highbd(val, bd);
       } else {
         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 8564940..43d9e71 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -134,6 +134,7 @@
 set(CONFIG_EXT_DELTA_Q 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_EXT_INTRA 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_EXT_INTRA_MOD 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_EXT_INTRA_MOD2 0 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_EXT_PARTITION 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_EXT_PARTITION_TYPES 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_EXT_PARTITION_TYPES_AB 0 CACHE NUMBER "AV1 experiment flag.")
diff --git a/configure b/configure
index 0ffd515..a13efcf 100755
--- a/configure
+++ b/configure
@@ -259,6 +259,7 @@
     filter_intra
     intra_edge
     ext_intra_mod
+    ext_intra_mod2
     intrabc
     new_quant
     loop_restoration