New filter_intra implementation + entropy coding

Use 4x2 processing unit.
Reduce # of modes from 6 to 5.

Change-Id: I3c12e18084636de0e279c9102a8b212342faf4c7
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 7d6eb71..d113c5f 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -195,7 +195,7 @@
 } FILTER_INTRA_MODE_INFO;
 
 static const PREDICTION_MODE fimode_to_intradir[FILTER_INTRA_MODES] = {
-  DC_PRED, V_PRED, H_PRED, D117_PRED, D153_PRED, DC_PRED
+  DC_PRED, V_PRED, H_PRED, D153_PRED, DC_PRED
 };
 
 #define DISABLE_SUB8X8_FILTER_INTRA 0
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 55155c6..932e1ec 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -1560,8 +1560,8 @@
 
 #if CONFIG_FILTER_INTRA
 static const aom_cdf_prob default_filter_intra_mode_cdf[2][CDF_SIZE(
-    FILTER_INTRA_MODES)] = { { AOM_CDF6(22207, 23158, 24144, 24278, 30434) },
-                             { AOM_CDF6(32768, 32768, 32768, 32768, 32768) } };
+    FILTER_INTRA_MODES)] = { { AOM_CDF5(14259, 17304, 20463, 29377) },
+                             { AOM_CDF5(32768, 32768, 32768, 32768) } };
 
 static const aom_cdf_prob default_filter_intra_cdfs[TX_SIZES_ALL][CDF_SIZE(2)] =
     { { AOM_CDF2(10985) }, { AOM_CDF2(10985) }, { AOM_CDF2(16645) },
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index 049a347..c7a7f7f 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -428,6 +428,8 @@
 #if CONFIG_FILTER_INTRA
   unsigned int filter_intra_mode[PLANE_TYPES][FILTER_INTRA_MODES];
   unsigned int filter_intra_tx[TX_SIZES_ALL][2];
+  unsigned int filter_intra_mode_ctx[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS]
+                                    [FILTER_INTRA_MODES];
 #endif  // CONFIG_FILTER_INTRA
 #if CONFIG_LPF_SB
   unsigned int lpf_reuse[LPF_REUSE_CONTEXT][2];
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 8238e91..d949d74 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -567,7 +567,6 @@
   FILTER_DC_PRED,
   FILTER_V_PRED,
   FILTER_H_PRED,
-  FILTER_D117_PRED,
   FILTER_D153_PRED,
   FILTER_PAETH_PRED,
   FILTER_INTRA_MODES,
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 5abb102..4d5fc19 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -1107,7 +1107,7 @@
 #endif  // CONFIG_EXT_INTRA
 
 #if CONFIG_FILTER_INTRA
-#if FILTER_INTRA_PROC_UNIT_SIZE == 2
+#if FILTER_INTRA_PROC_UNIT_SIZE == 22
 static int filter_intra_taps_2x2procunit[FILTER_INTRA_MODES][4][5] = {
   {
       { -3, 5, 0, 6, 0 },
@@ -1128,12 +1128,6 @@
       { -2, 0, 2, 0, 8 },
   },
   {
-      { -1, 6, 0, 3, 0 },
-      { 0, 1, 6, 1, 0 },
-      { -1, 5, 0, 1, 3 },
-      { 0, 2, 4, 1, 1 },
-  },
-  {
       { -1, 4, 0, 5, 0 },
       { -1, 2, 4, 3, 0 },
       { -1, 2, 0, 2, 5 },
@@ -1146,9 +1140,153 @@
       { -4, 0, 6, 0, 6 },
   },
 };
-#elif FILTER_INTRA_PROC_UNIT_SIZE == 4
+#elif FILTER_INTRA_PROC_UNIT_SIZE == 42
+static int filter_intra_taps_4x2procunit[FILTER_INTRA_MODES][8][7] = {
+  {
+      { -6, 10, 0, 0, 0, 12, 0 },
+      { -5, 2, 10, 0, 0, 9, 0 },
+      { -3, 1, 1, 10, 0, 7, 0 },
+      { -3, 1, 1, 2, 10, 5, 0 },
+      { -4, 6, 0, 0, 0, 2, 12 },
+      { -3, 2, 6, 0, 0, 2, 9 },
+      { -3, 2, 2, 6, 0, 2, 7 },
+      { -3, 1, 2, 2, 6, 3, 5 },
+  },
+  {
+      { -10, 16, 0, 0, 0, 10, 0 },
+      { -6, 0, 16, 0, 0, 6, 0 },
+      { -4, 0, 0, 16, 0, 4, 0 },
+      { -2, 0, 0, 0, 16, 2, 0 },
+      { -10, 16, 0, 0, 0, 0, 10 },
+      { -6, 0, 16, 0, 0, 0, 6 },
+      { -4, 0, 0, 16, 0, 0, 4 },
+      { -2, 0, 0, 0, 16, 0, 2 },
+  },
+  {
+      { -8, 8, 0, 0, 0, 16, 0 },
+      { -8, 0, 8, 0, 0, 16, 0 },
+      { -8, 0, 0, 8, 0, 16, 0 },
+      { -8, 0, 0, 0, 8, 16, 0 },
+      { -4, 4, 0, 0, 0, 0, 16 },
+      { -4, 0, 4, 0, 0, 0, 16 },
+      { -4, 0, 0, 4, 0, 0, 16 },
+      { -4, 0, 0, 0, 4, 0, 16 },
+  },
+  {
+      { -2, 8, 0, 0, 0, 10, 0 },
+      { -1, 3, 8, 0, 0, 6, 0 },
+      { -1, 2, 3, 8, 0, 4, 0 },
+      { 0, 1, 2, 3, 8, 2, 0 },
+      { -1, 4, 0, 0, 0, 3, 10 },
+      { -1, 3, 4, 0, 0, 4, 6 },
+      { -1, 2, 3, 4, 0, 4, 4 },
+      { -1, 2, 2, 3, 4, 3, 3 },
+  },
+  {
+      { -12, 14, 0, 0, 0, 14, 0 },
+      { -10, 0, 14, 0, 0, 12, 0 },
+      { -9, 0, 0, 14, 0, 11, 0 },
+      { -8, 0, 0, 0, 14, 10, 0 },
+      { -10, 12, 0, 0, 0, 0, 14 },
+      { -9, 1, 12, 0, 0, 0, 12 },
+      { -8, 0, 0, 12, 0, 1, 11 },
+      { -7, 0, 0, 1, 12, 1, 9 },
+  },
+};
+#elif FILTER_INTRA_PROC_UNIT_SIZE == 44
 static int filter_intra_taps_4x4procunit[FILTER_INTRA_MODES][16][9] = {
-#if FILTER_INTRA_SCALE_BITS == 4
+#if FILTER_INTRA_SCALE_BITS == 5
+  {
+      { -12, 20, 0, 0, 0, 24, 0, 0, 0 },
+      { -9, 3, 20, 0, 0, 18, 0, 0, 0 },
+      { -7, 2, 3, 20, 0, 14, 0, 0, 0 },
+      { -5, 2, 2, 3, 20, 10, 0, 0, 0 },
+      { -8, 13, 0, 0, 0, 3, 24, 0, 0 },
+      { -7, 4, 12, 0, 0, 5, 18, 0, 0 },
+      { -6, 3, 4, 12, 0, 5, 14, 0, 0 },
+      { -5, 3, 3, 4, 12, 5, 10, 0, 0 },
+      { -5, 8, 0, 0, 0, 2, 3, 24, 0 },
+      { -5, 4, 8, 0, 0, 3, 4, 18, 0 },
+      { -5, 3, 4, 8, 0, 4, 5, 13, 0 },
+      { -5, 3, 3, 4, 8, 4, 5, 10, 0 },
+      { -3, 5, 0, 0, 0, 1, 2, 3, 24 },
+      { -4, 3, 5, 0, 0, 2, 3, 5, 18 },
+      { -4, 3, 3, 5, 0, 3, 4, 5, 13 },
+      { -4, 3, 3, 3, 5, 3, 4, 5, 10 },
+  },
+  {
+      { -20, 32, 0, 0, 0, 20, 0, 0, 0 },
+      { -13, 0, 32, 0, 0, 13, 0, 0, 0 },
+      { -8, 0, 0, 32, 0, 8, 0, 0, 0 },
+      { -5, 0, 0, 0, 32, 5, 0, 0, 0 },
+      { -20, 32, 0, 0, 0, 0, 20, 0, 0 },
+      { -13, 0, 32, 0, 0, 0, 13, 0, 0 },
+      { -8, 0, 0, 32, 0, 0, 8, 0, 0 },
+      { -5, 0, 0, 0, 32, 0, 5, 0, 0 },
+      { -20, 32, 0, 0, 0, 0, 0, 20, 0 },
+      { -13, 0, 32, 0, 0, 0, 0, 13, 0 },
+      { -8, 0, 0, 32, 0, 0, 0, 8, 0 },
+      { -5, 0, 0, 0, 32, 0, 0, 5, 0 },
+      { -20, 32, 0, 0, 0, 0, 0, 0, 20 },
+      { -13, 0, 32, 0, 0, 0, 0, 0, 13 },
+      { -8, 0, 0, 32, 0, 0, 0, 0, 8 },
+      { -5, 0, 0, 0, 32, 0, 0, 0, 5 },
+  },
+  {
+      { -16, 16, 0, 0, 0, 32, 0, 0, 0 },
+      { -16, 0, 16, 0, 0, 32, 0, 0, 0 },
+      { -16, 0, 0, 16, 0, 32, 0, 0, 0 },
+      { -16, 0, 0, 0, 16, 32, 0, 0, 0 },
+      { -8, 8, 0, 0, 0, 0, 32, 0, 0 },
+      { -8, 0, 8, 0, 0, 0, 32, 0, 0 },
+      { -8, 0, 0, 8, 0, 0, 32, 0, 0 },
+      { -8, 0, 0, 0, 8, 0, 32, 0, 0 },
+      { -4, 4, 0, 0, 0, 0, 0, 32, 0 },
+      { -4, 0, 4, 0, 0, 0, 0, 32, 0 },
+      { -4, 0, 0, 4, 0, 0, 0, 32, 0 },
+      { -4, 0, 0, 0, 4, 0, 0, 32, 0 },
+      { -2, 2, 0, 0, 0, 0, 0, 0, 32 },
+      { -2, 0, 2, 0, 0, 0, 0, 0, 32 },
+      { -2, 0, 0, 2, 0, 0, 0, 0, 32 },
+      { -2, 0, 0, 0, 2, 0, 0, 0, 32 },
+  },
+  {
+      { -4, 16, 0, 0, 0, 20, 0, 0, 0 },
+      { -3, 6, 16, 0, 0, 13, 0, 0, 0 },
+      { -2, 4, 6, 16, 0, 8, 0, 0, 0 },
+      { -1, 2, 4, 6, 16, 5, 0, 0, 0 },
+      { -2, 8, 0, 0, 0, 6, 20, 0, 0 },
+      { -2, 6, 8, 0, 0, 7, 13, 0, 0 },
+      { -2, 5, 6, 8, 0, 7, 8, 0, 0 },
+      { -2, 4, 5, 6, 8, 6, 5, 0, 0 },
+      { -1, 4, 0, 0, 0, 3, 6, 20, 0 },
+      { -1, 4, 4, 0, 0, 5, 8, 12, 0 },
+      { -1, 4, 4, 4, 0, 6, 7, 8, 0 },
+      { -1, 4, 4, 4, 4, 6, 6, 5, 0 },
+      { -1, 2, 0, 0, 0, 2, 3, 6, 20 },
+      { -1, 3, 2, 0, 0, 3, 5, 7, 13 },
+      { -1, 3, 3, 2, 0, 4, 6, 7, 8 },
+      { -1, 4, 3, 3, 2, 5, 5, 6, 5 },
+  },
+  {
+      { -24, 28, 0, 0, 0, 28, 0, 0, 0 },
+      { -21, 1, 28, 0, 0, 24, 0, 0, 0 },
+      { -18, 0, 1, 28, 0, 21, 0, 0, 0 },
+      { -16, 0, 0, 1, 28, 19, 0, 0, 0 },
+      { -21, 24, 0, 0, 0, 1, 28, 0, 0 },
+      { -19, 1, 24, 0, 0, 1, 25, 0, 0 },
+      { -17, 1, 1, 25, 0, 1, 21, 0, 0 },
+      { -15, 1, 1, 1, 24, 1, 19, 0, 0 },
+      { -18, 21, 0, 0, 0, 0, 1, 28, 0 },
+      { -17, 1, 21, 0, 0, 1, 1, 25, 0 },
+      { -15, 1, 1, 22, 0, 1, 1, 21, 0 },
+      { -14, 1, 1, 1, 22, 1, 1, 19, 0 },
+      { -16, 19, 0, 0, 0, 0, 0, 1, 28 },
+      { -15, 1, 19, 0, 0, 1, 1, 1, 24 },
+      { -14, 1, 1, 19, 0, 1, 1, 1, 22 },
+      { -13, 1, 1, 2, 19, 1, 1, 1, 19 },
+  },
+#elif FILTER_INTRA_SCALE_BITS == 4
   {
       { -6, 10, 0, 0, 0, 12, 0, 0, 0 },
       { -5, 2, 10, 0, 0, 9, 0, 0, 0 },
@@ -1157,15 +1295,15 @@
       { -4, 6, 0, 0, 0, 2, 12, 0, 0 },
       { -3, 2, 6, 0, 0, 2, 9, 0, 0 },
       { -3, 2, 2, 6, 0, 2, 7, 0, 0 },
-      { -2, 0, 2, 2, 6, 3, 5, 0, 0 },
+      { -3, 1, 2, 2, 6, 3, 5, 0, 0 },
       { -2, 4, 0, 0, 0, 1, 1, 12, 0 },
       { -3, 2, 4, 0, 0, 2, 2, 9, 0 },
-      { -2, 0, 2, 4, 0, 2, 3, 7, 0 },
-      { -2, 0, 0, 2, 4, 3, 3, 6, 0 },
+      { -3, 2, 2, 4, 0, 2, 2, 7, 0 },
+      { -2, 1, 2, 2, 4, 2, 2, 5, 0 },
       { -1, 2, 0, 0, 0, 1, 1, 1, 12 },
-      { -2, 2, 3, 0, 0, 0, 2, 2, 9 },
-      { -1, 0, 2, 3, 0, 0, 2, 3, 7 },
-      { -1, 0, 0, 2, 3, 0, 3, 3, 6 },
+      { -2, 2, 2, 0, 0, 1, 2, 2, 9 },
+      { -2, 1, 2, 2, 0, 1, 2, 3, 7 },
+      { -2, 1, 1, 2, 2, 2, 2, 3, 5 },
   },
   {
       { -10, 16, 0, 0, 0, 10, 0, 0, 0 },
@@ -1204,24 +1342,6 @@
       { -1, 0, 0, 0, 1, 0, 0, 0, 16 },
   },
   {
-      { -2, 12, 0, 0, 0, 6, 0, 0, 0 },
-      { -1, 3, 12, 0, 0, 2, 0, 0, 0 },
-      { 0, 1, 2, 12, 0, 1, 0, 0, 0 },
-      { 0, 0, 1, 3, 12, 0, 0, 0, 0 },
-      { -2, 9, 0, 0, 0, 3, 6, 0, 0 },
-      { -1, 4, 9, 0, 0, 2, 2, 0, 0 },
-      { -1, 2, 4, 9, 0, 1, 1, 0, 0 },
-      { 0, 1, 2, 4, 9, 0, 0, 0, 0 },
-      { -1, 7, 0, 0, 0, 2, 2, 6, 0 },
-      { -1, 4, 7, 0, 0, 2, 2, 2, 0 },
-      { 0, 2, 4, 7, 0, 1, 1, 1, 0 },
-      { 0, 1, 2, 4, 7, 1, 1, 0, 0 },
-      { -1, 5, 0, 0, 0, 1, 2, 3, 6 },
-      { 0, 4, 5, 0, 0, 1, 2, 2, 2 },
-      { 0, 3, 4, 5, 0, 2, 1, 1, 0 },
-      { 0, 2, 3, 4, 5, 1, 1, 0, 0 },
-  },
-  {
       { -2, 8, 0, 0, 0, 10, 0, 0, 0 },
       { -1, 3, 8, 0, 0, 6, 0, 0, 0 },
       { -1, 2, 3, 8, 0, 4, 0, 0, 0 },
@@ -1229,15 +1349,15 @@
       { -1, 4, 0, 0, 0, 3, 10, 0, 0 },
       { -1, 3, 4, 0, 0, 4, 6, 0, 0 },
       { -1, 2, 3, 4, 0, 4, 4, 0, 0 },
-      { 0, 2, 2, 3, 4, 3, 2, 0, 0 },
+      { -1, 2, 2, 3, 4, 3, 3, 0, 0 },
       { -1, 2, 0, 0, 0, 2, 3, 10, 0 },
       { -1, 2, 2, 0, 0, 3, 4, 6, 0 },
-      { 0, 2, 2, 2, 0, 3, 3, 4, 0 },
-      { 0, 2, 3, 2, 0, 3, 3, 3, 0 },
+      { -1, 2, 2, 2, 0, 3, 4, 4, 0 },
+      { -1, 2, 2, 2, 2, 3, 3, 3, 0 },
       { 0, 1, 0, 0, 0, 1, 1, 3, 10 },
       { 0, 1, 1, 0, 0, 2, 2, 4, 6 },
-      { 0, 2, 1, 0, 0, 2, 3, 4, 4 },
-      { 0, 2, 2, 0, 0, 3, 3, 3, 3 },
+      { -1, 2, 1, 1, 0, 2, 3, 4, 4 },
+      { -1, 2, 2, 2, 1, 2, 3, 3, 2 },
   },
   {
       { -12, 14, 0, 0, 0, 14, 0, 0, 0 },
@@ -1255,7 +1375,7 @@
       { -8, 10, 0, 0, 0, 0, 0, 0, 14 },
       { -7, 1, 9, 0, 0, 0, 0, 1, 12 },
       { -7, 1, 1, 9, 0, 0, 0, 1, 11 },
-      { -6, 0, 1, 1, 10, 0, 0, 1, 9 },
+      { -6, 0, 1, 1, 9, 0, 1, 1, 9 },
   },
 #else
   {
@@ -1265,16 +1385,16 @@
       { -1, 0, 1, 1, 5, 2, 0, 0, 0 },
       { -2, 3, 0, 0, 0, 1, 6, 0, 0 },
       { -2, 1, 3, 0, 0, 1, 5, 0, 0 },
-      { -1, 0, 1, 3, 0, 1, 4, 0, 0 },
-      { -1, 0, 0, 1, 3, 2, 3, 0, 0 },
+      { -1, 1, 1, 3, 0, 1, 3, 0, 0 },
+      { -1, 1, 1, 1, 3, 1, 2, 0, 0 },
       { -1, 2, 0, 0, 0, 0, 1, 6, 0 },
-      { -1, 1, 2, 0, 0, 0, 1, 5, 0 },
-      { -1, 0, 0, 2, 0, 1, 2, 4, 0 },
-      { -1, 0, 0, 1, 3, 0, 2, 3, 0 },
-      { -1, 1, 0, 0, 0, 1, 0, 1, 6 },
-      { -1, 1, 2, 0, 0, 0, 0, 1, 5 },
-      { -1, 0, 1, 2, 0, 0, 0, 2, 4 },
-      { 0, 0, 0, 1, 2, 0, 0, 2, 3 },
+      { -1, 1, 2, 0, 0, 1, 1, 4, 0 },
+      { -1, 1, 1, 2, 0, 1, 1, 3, 0 },
+      { -1, 1, 1, 1, 2, 1, 1, 2, 0 },
+      { -1, 1, 0, 0, 0, 0, 1, 1, 6 },
+      { -1, 1, 1, 0, 0, 1, 1, 1, 4 },
+      { -1, 1, 1, 1, 0, 1, 1, 1, 3 },
+      { -1, 1, 1, 1, 1, 1, 1, 1, 2 },
   },
   {
       { -5, 8, 0, 0, 0, 5, 0, 0, 0 },
@@ -1313,24 +1433,6 @@
       { -1, 0, 0, 0, 1, 0, 0, 0, 8 },
   },
   {
-      { -1, 6, 0, 0, 0, 3, 0, 0, 0 },
-      { 0, 1, 6, 0, 0, 1, 0, 0, 0 },
-      { 0, 1, 1, 6, 0, 0, 0, 0, 0 },
-      { 0, 0, 1, 1, 6, 0, 0, 0, 0 },
-      { -1, 5, 0, 0, 0, 1, 3, 0, 0 },
-      { 0, 2, 4, 0, 0, 1, 1, 0, 0 },
-      { 0, 1, 2, 4, 0, 1, 0, 0, 0 },
-      { 0, 0, 1, 2, 5, 0, 0, 0, 0 },
-      { 0, 3, 0, 0, 0, 1, 1, 3, 0 },
-      { 0, 2, 3, 0, 0, 1, 1, 1, 0 },
-      { 0, 1, 2, 3, 0, 1, 1, 0, 0 },
-      { 0, 1, 1, 2, 4, 0, 0, 0, 0 },
-      { 0, 3, 0, 0, 0, 1, 0, 1, 3 },
-      { 0, 2, 3, 0, 0, 1, 0, 1, 1 },
-      { 0, 1, 2, 3, 0, 1, 1, 0, 0 },
-      { 0, 1, 2, 2, 3, 0, 0, 0, 0 },
-  },
-  {
       { -1, 4, 0, 0, 0, 5, 0, 0, 0 },
       { -1, 2, 4, 0, 0, 3, 0, 0, 0 },
       { 0, 1, 1, 4, 0, 2, 0, 0, 0 },
@@ -1338,15 +1440,15 @@
       { -1, 2, 0, 0, 0, 2, 5, 0, 0 },
       { -1, 2, 2, 0, 0, 2, 3, 0, 0 },
       { 0, 1, 1, 2, 0, 2, 2, 0, 0 },
-      { 0, 1, 0, 2, 2, 2, 1, 0, 0 },
+      { 0, 1, 1, 2, 2, 1, 1, 0, 0 },
       { 0, 1, 0, 0, 0, 1, 1, 5, 0 },
       { 0, 1, 1, 0, 0, 1, 2, 3, 0 },
-      { 0, 1, 1, 0, 0, 2, 2, 2, 0 },
-      { 0, 1, 1, 0, 0, 2, 2, 2, 0 },
-      { 0, 1, 0, 0, 0, 0, 0, 2, 5 },
-      { 0, 1, 1, 0, 0, 1, 0, 2, 3 },
-      { 0, 1, 1, 0, 0, 2, 0, 2, 2 },
-      { 0, 1, 1, 0, 0, 2, 2, 2, 0 },
+      { 0, 1, 1, 1, 0, 1, 2, 2, 0 },
+      { 0, 1, 1, 1, 1, 1, 2, 1, 0 },
+      { 0, 0, 0, 0, 0, 0, 1, 2, 5 },
+      { 0, 1, 0, 0, 0, 1, 1, 2, 3 },
+      { 0, 1, 1, 0, 0, 1, 1, 2, 2 },
+      { 0, 1, 1, 1, 1, 1, 1, 1, 1 },
   },
   {
       { -6, 7, 0, 0, 0, 7, 0, 0, 0 },
@@ -1369,147 +1471,8 @@
 #endif
 };
 #else
-static int filter_intra_taps_3[TX_SIZES_ALL][FILTER_INTRA_MODES][3] = {
-  {
-      { 5, 7, -4 },
-      { 8, 4, -4 },
-      { 3, 8, -3 },
-      { 6, 3, -1 },
-      { 3, 5, 0 },
-      { 7, 7, -6 },
-  },
-  {
-      { 5, 6, -3 },
-      { 8, 5, -5 },
-      { 4, 8, -4 },
-      { 6, 3, -1 },
-      { 4, 5, -1 },
-      { 7, 7, -6 },
-  },
-  {
-      { 4, 7, -3 },
-      { 8, 6, -6 },
-      { 3, 10, -5 },
-      { 6, 5, -3 },
-      { 5, 6, -3 },
-      { 8, 8, -8 },
-  },
-  {
-      { 5, 7, -4 },
-      { 7, 5, -4 },
-      { 4, 8, -4 },
-      { 7, 4, -3 },
-      { 2, 6, 0 },
-      { 7, 8, -7 },
-  },
-#if CONFIG_TX64X64
-  {
-      { 5, 7, -4 },
-      { 7, 5, -4 },
-      { 4, 8, -4 },
-      { 7, 4, -3 },
-      { 2, 6, 0 },
-      { 7, 8, -7 },
-  },
-#endif  // CONFIG_TX64X64
-  {
-      { 5, 7, -4 },
-      { 8, 4, -4 },
-      { 3, 8, -3 },
-      { 6, 3, -1 },
-      { 3, 5, 0 },
-      { 7, 7, -6 },
-  },
-  {
-      { 5, 7, -4 },
-      { 8, 4, -4 },
-      { 3, 8, -3 },
-      { 6, 3, -1 },
-      { 3, 5, 0 },
-      { 7, 7, -6 },
-  },
-  {
-      { 5, 6, -3 },
-      { 8, 5, -5 },
-      { 4, 8, -4 },
-      { 6, 3, -1 },
-      { 4, 5, -1 },
-      { 7, 7, -6 },
-  },
-  {
-      { 5, 6, -3 },
-      { 8, 5, -5 },
-      { 4, 8, -4 },
-      { 6, 3, -1 },
-      { 4, 5, -1 },
-      { 7, 7, -6 },
-  },
-  {
-      { 4, 7, -3 },
-      { 8, 6, -6 },
-      { 3, 10, -5 },
-      { 6, 5, -3 },
-      { 5, 6, -3 },
-      { 8, 8, -8 },
-  },
-  {
-      { 4, 7, -3 },
-      { 8, 6, -6 },
-      { 3, 10, -5 },
-      { 6, 5, -3 },
-      { 5, 6, -3 },
-      { 8, 8, -8 },
-  },
-#if CONFIG_TX64X64
-  {
-      { 5, 7, -4 },
-      { 7, 5, -4 },
-      { 4, 8, -4 },
-      { 7, 4, -3 },
-      { 2, 6, 0 },
-      { 7, 8, -7 },
-  },
-  {
-      { 5, 7, -4 },
-      { 7, 5, -4 },
-      { 4, 8, -4 },
-      { 7, 4, -3 },
-      { 2, 6, 0 },
-      { 7, 8, -7 },
-  },
-#endif  // CONFIG_TX64X64
-  {
-      { 5, 7, -4 },
-      { 8, 4, -4 },
-      { 3, 8, -3 },
-      { 6, 3, -1 },
-      { 3, 5, 0 },
-      { 7, 7, -6 },
-  },
-  {
-      { 5, 7, -4 },
-      { 8, 4, -4 },
-      { 3, 8, -3 },
-      { 6, 3, -1 },
-      { 3, 5, 0 },
-      { 7, 7, -6 },
-  },
-  {
-      { 5, 6, -3 },
-      { 8, 5, -5 },
-      { 4, 8, -4 },
-      { 6, 3, -1 },
-      { 4, 5, -1 },
-      { 7, 7, -6 },
-  },
-  {
-      { 5, 6, -3 },
-      { 8, 5, -5 },
-      { 4, 8, -4 },
-      { 6, 3, -1 },
-      { 4, 5, -1 },
-      { 7, 7, -6 },
-  },
+static int filter_intra_taps_3[FILTER_INTRA_MODES][3] = {
+  { 5, 6, -3 }, { 8, 5, -5 }, { 4, 8, -4 }, { 4, 5, -1 }, { 7, 7, -6 },
 };
 #endif
 
@@ -1527,7 +1490,7 @@
 
   for (c = 0; c < bw + 1; ++c) buffer[0][c] = (int)above[c - 1];
 
-#if FILTER_INTRA_PROC_UNIT_SIZE == 2
+#if FILTER_INTRA_PROC_UNIT_SIZE == 22
   for (r = 1; r < bh + 1; r += 2)
     for (c = 1; c < bw + 1; c += 2) {
       const int p0 = buffer[r - 1][c - 1];
@@ -1549,7 +1512,33 @@
                 buffer[r + r_offset][c + c_offset], FILTER_INTRA_SCALE_BITS));
       }
     }
-#elif FILTER_INTRA_PROC_UNIT_SIZE == 4
+#elif FILTER_INTRA_PROC_UNIT_SIZE == 42
+  for (r = 1; r < bh + 1; r += 2)
+    for (c = 1; c < bw + 1; c += 4) {
+      const int p0 = buffer[r - 1][c - 1];
+      const int p1 = buffer[r - 1][c];
+      const int p2 = buffer[r - 1][c + 1];
+      const int p3 = buffer[r - 1][c + 2];
+      const int p4 = buffer[r - 1][c + 3];
+      const int p5 = buffer[r][c - 1];
+      const int p6 = buffer[r + 1][c - 1];
+      for (int k = 0; k < 8; ++k) {
+        int r_offset = k >> 2;
+        int c_offset = k & 0x03;
+        buffer[r + r_offset][c + c_offset] =
+            filter_intra_taps_4x2procunit[mode][k][0] * p0 +
+            filter_intra_taps_4x2procunit[mode][k][1] * p1 +
+            filter_intra_taps_4x2procunit[mode][k][2] * p2 +
+            filter_intra_taps_4x2procunit[mode][k][3] * p3 +
+            filter_intra_taps_4x2procunit[mode][k][4] * p4 +
+            filter_intra_taps_4x2procunit[mode][k][5] * p5 +
+            filter_intra_taps_4x2procunit[mode][k][6] * p6;
+        buffer[r + r_offset][c + c_offset] =
+            clip_pixel(ROUND_POWER_OF_TWO_SIGNED(
+                buffer[r + r_offset][c + c_offset], FILTER_INTRA_SCALE_BITS));
+      }
+    }
+#elif FILTER_INTRA_PROC_UNIT_SIZE == 44
   for (r = 1; r < bh + 1; r += 4)
     for (c = 1; c < bw + 1; c += 4) {
       const int p0 = buffer[r - 1][c - 1];
@@ -1581,9 +1570,9 @@
     }
 #else
   int ipred;
-  const int c0 = filter_intra_taps_3[tx_size][mode][0];
-  const int c1 = filter_intra_taps_3[tx_size][mode][1];
-  const int c2 = filter_intra_taps_3[tx_size][mode][2];
+  const int c0 = filter_intra_taps_3[mode][0];
+  const int c1 = filter_intra_taps_3[mode][1];
+  const int c2 = filter_intra_taps_3[mode][2];
   for (r = 1; r < bh + 1; ++r)
     for (c = 1; c < bw + 1; ++c) {
       ipred = c0 * buffer[r - 1][c] + c1 * buffer[r][c - 1] +
@@ -1618,13 +1607,6 @@
                                FILTER_H_PRED);
 }
 
-void av1_d117_filter_predictor_c(uint8_t *dst, ptrdiff_t stride,
-                                 TX_SIZE tx_size, const uint8_t *above,
-                                 const uint8_t *left) {
-  filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
-                               FILTER_D117_PRED);
-}
-
 void av1_d153_filter_predictor_c(uint8_t *dst, ptrdiff_t stride,
                                  TX_SIZE tx_size, const uint8_t *above,
                                  const uint8_t *left) {
@@ -1652,9 +1634,6 @@
     case FILTER_H_PRED:
       av1_h_filter_predictor(dst, stride, tx_size, above, left);
       break;
-    case FILTER_D117_PRED:
-      av1_d117_filter_predictor(dst, stride, tx_size, above, left);
-      break;
     case FILTER_D153_PRED:
       av1_d153_filter_predictor(dst, stride, tx_size, above, left);
       break;
@@ -1681,7 +1660,7 @@
 
   for (c = 0; c < bw + 1; ++c) buffer[0][c] = (int)above[c - 1];
 
-#if FILTER_INTRA_PROC_UNIT_SIZE == 2
+#if FILTER_INTRA_PROC_UNIT_SIZE == 22
   for (r = 1; r < bh + 1; r += 2)
     for (c = 1; c < bw + 1; c += 2) {
       const int p0 = buffer[r - 1][c - 1];
@@ -1704,7 +1683,34 @@
             bd);
       }
     }
-#elif FILTER_INTRA_PROC_UNIT_SIZE == 4
+#elif FILTER_INTRA_PROC_UNIT_SIZE == 42
+  for (r = 1; r < bh + 1; r += 2)
+    for (c = 1; c < bw + 1; c += 4) {
+      const int p0 = buffer[r - 1][c - 1];
+      const int p1 = buffer[r - 1][c];
+      const int p2 = buffer[r - 1][c + 1];
+      const int p3 = buffer[r - 1][c + 2];
+      const int p4 = buffer[r - 1][c + 3];
+      const int p5 = buffer[r][c - 1];
+      const int p6 = buffer[r + 1][c - 1];
+      for (int k = 0; k < 8; ++k) {
+        int r_offset = k >> 2;
+        int c_offset = k & 0x03;
+        buffer[r + r_offset][c + c_offset] =
+            filter_intra_taps_4x2procunit[mode][k][0] * p0 +
+            filter_intra_taps_4x2procunit[mode][k][1] * p1 +
+            filter_intra_taps_4x2procunit[mode][k][2] * p2 +
+            filter_intra_taps_4x2procunit[mode][k][3] * p3 +
+            filter_intra_taps_4x2procunit[mode][k][4] * p4 +
+            filter_intra_taps_4x2procunit[mode][k][5] * p5 +
+            filter_intra_taps_4x2procunit[mode][k][6] * p6;
+        buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
+            ROUND_POWER_OF_TWO_SIGNED(buffer[r + r_offset][c + c_offset],
+                                      FILTER_INTRA_SCALE_BITS),
+            bd);
+      }
+    }
+#elif FILTER_INTRA_PROC_UNIT_SIZE == 44
   for (r = 1; r < bh + 1; r += 4)
     for (c = 1; c < bw + 1; c += 4) {
       const int p0 = buffer[r - 1][c - 1];
@@ -1737,9 +1743,9 @@
     }
 #else
   int ipred;
-  const int c0 = filter_intra_taps_3[tx_size][mode][0];
-  const int c1 = filter_intra_taps_3[tx_size][mode][1];
-  const int c2 = filter_intra_taps_3[tx_size][mode][2];
+  const int c0 = filter_intra_taps_3[mode][0];
+  const int c1 = filter_intra_taps_3[mode][1];
+  const int c2 = filter_intra_taps_3[mode][2];
   for (r = 1; r < bh + 1; ++r)
     for (c = 1; c < bw + 1; ++c) {
       ipred = c0 * buffer[r - 1][c] + c1 * buffer[r][c - 1] +
@@ -1778,13 +1784,6 @@
                                       FILTER_H_PRED, bd);
 }
 
-void av1_highbd_d117_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
-                                        TX_SIZE tx_size, const uint16_t *above,
-                                        const uint16_t *left, int bd) {
-  highbd_filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
-                                      FILTER_D117_PRED, bd);
-}
-
 void av1_highbd_d153_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
                                         TX_SIZE tx_size, const uint16_t *above,
                                         const uint16_t *left, int bd) {
@@ -1814,9 +1813,6 @@
     case FILTER_H_PRED:
       av1_highbd_h_filter_predictor(dst, stride, tx_size, above, left, bd);
       break;
-    case FILTER_D117_PRED:
-      av1_highbd_d117_filter_predictor(dst, stride, tx_size, above, left, bd);
-      break;
     case FILTER_D153_PRED:
       av1_highbd_d153_filter_predictor(dst, stride, tx_size, above, left, bd);
       break;
diff --git a/av1/common/reconintra.h b/av1/common/reconintra.h
index c507a42..bad3b97 100644
--- a/av1/common/reconintra.h
+++ b/av1/common/reconintra.h
@@ -42,8 +42,10 @@
 };
 
 #if CONFIG_FILTER_INTRA
-#define FILTER_INTRA_PROC_UNIT_SIZE 2
-#if FILTER_INTRA_PROC_UNIT_SIZE == 4
+#define FILTER_INTRA_PROC_UNIT_SIZE 42  // ij means a i(cols)xj(rows) unit
+#if FILTER_INTRA_PROC_UNIT_SIZE == 44
+#define FILTER_INTRA_SCALE_BITS 5
+#elif FILTER_INTRA_PROC_UNIT_SIZE == 42
 #define FILTER_INTRA_SCALE_BITS 4
 #else
 #define FILTER_INTRA_SCALE_BITS 3
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 657630c..ed34076 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4518,14 +4518,26 @@
     const int use_filter_intra_mode =
         mbmi->filter_intra_mode_info.use_filter_intra_mode[0];
 #if CONFIG_ENTROPY_STATS
-    ++counts->filter_intra_mode[0][mbmi->filter_intra_mode_info
-                                       .filter_intra_mode[0]];
+    if (use_filter_intra_mode) {
+      const PREDICTION_MODE above = av1_above_block_mode(mi, above_mi, 0);
+      const PREDICTION_MODE left = av1_left_block_mode(mi, left_mi, 0);
+#if CONFIG_KF_CTX
+      int above_ctx = intra_mode_context[above];
+      int left_ctx = intra_mode_context[left];
+#endif
+      ++counts->filter_intra_mode[0][mbmi->filter_intra_mode_info
+                                         .filter_intra_mode[0]];
+      ++counts->filter_intra_mode_ctx[above_ctx][left_ctx]
+                                     [mbmi->filter_intra_mode_info
+                                          .filter_intra_mode[0]];
+    }
     ++counts->filter_intra_tx[mbmi->tx_size][use_filter_intra_mode];
 #endif  // CONFIG_ENTROPY_STATS
     if (allow_update_cdf) {
-      update_cdf(fc->filter_intra_mode_cdf[0],
-                 mbmi->filter_intra_mode_info.filter_intra_mode[0],
-                 FILTER_INTRA_MODES);
+      if (use_filter_intra_mode)
+        update_cdf(fc->filter_intra_mode_cdf[0],
+                   mbmi->filter_intra_mode_info.filter_intra_mode[0],
+                   FILTER_INTRA_MODES);
       update_cdf(fc->filter_intra_cdfs[mbmi->tx_size], use_filter_intra_mode,
                  2);
     }
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 8b4e23d..1a56a93 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3056,8 +3056,7 @@
                                     int *rate, int *rate_tokenonly,
                                     int64_t *distortion, int *skippable,
                                     BLOCK_SIZE bsize, int mode_cost,
-                                    int64_t *best_rd, int64_t *best_model_rd,
-                                    uint16_t skip_mask) {
+                                    int64_t *best_rd, int64_t *best_model_rd) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MODE_INFO *const mic = xd->mi[0];
   MB_MODE_INFO *mbmi = &mic->mbmi;
@@ -3076,7 +3075,6 @@
     int this_rate;
     int64_t this_rd, this_model_rd;
     RD_STATS tokenonly_rd_stats;
-    if (skip_mask & (1 << fimode_to_intradir[mode])) continue;
     mbmi->filter_intra_mode_info.filter_intra_mode[0] = mode;
     this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
     if (*best_model_rd != INT64_MAX &&
@@ -3388,7 +3386,6 @@
 #endif  // CONFIG_EXT_INTRA
 #if CONFIG_FILTER_INTRA
   int beat_best_rd = 0;
-  uint16_t filter_intra_mode_skip_mask = (1 << FILTER_INTRA_MODES) - 1;
 #endif  // CONFIG_FILTER_INTRA
   const int *bmode_costs;
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
@@ -3521,11 +3518,6 @@
       this_rate += x->intrabc_cost[0];
 #endif  // CONFIG_INTRABC
     this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
-#if CONFIG_FILTER_INTRA
-    if (best_rd == INT64_MAX || this_rd - best_rd < (best_rd >> 4)) {
-      filter_intra_mode_skip_mask ^= (1 << mbmi->mode);
-    }
-#endif  // CONFIG_FILTER_INTRA
 
     if (this_rd < best_rd) {
       best_mbmi = *mbmi;
@@ -3552,8 +3544,7 @@
       !xd->lossless[mbmi->segment_id]) {
     if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
                                  skippable, bsize, bmode_costs[DC_PRED],
-                                 &best_rd, &best_model_rd,
-                                 filter_intra_mode_skip_mask)) {
+                                 &best_rd, &best_model_rd)) {
       best_mbmi = *mbmi;
     }
   }
diff --git a/tools/aom_entropy_optimizer.c b/tools/aom_entropy_optimizer.c
index 3108a0d..3f73e50 100644
--- a/tools/aom_entropy_optimizer.c
+++ b/tools/aom_entropy_optimizer.c
@@ -662,6 +662,15 @@
   optimize_cdf_table(&fc.filter_intra_tx[0][0], probsfile, 2, cts_each_dim,
                      "static const aom_cdf_prob "
                      "default_filter_intra_cdfs[TX_SIZES_ALL][CDF_SIZE(2)]");
+
+  cts_each_dim[0] = KF_MODE_CONTEXTS;
+  cts_each_dim[1] = KF_MODE_CONTEXTS;
+  cts_each_dim[2] = FILTER_INTRA_MODES;
+  optimize_cdf_table(&fc.filter_intra_mode_ctx[0][0][0], probsfile, 3,
+                     cts_each_dim,
+                     "static const aom_cdf_prob "
+                     "default_filter_intra_mode_cdf[KF_MODE_CONTEXTS][KF_MODE_"
+                     "CONTEXTS][CDF_SIZE(FILTER_INTRA_MODES)]");
 #endif
 
 #if CONFIG_LV_MAP