Fix inefficient coding of filter_intra modes

Include a few RDO fixes that properly account for the extra flag
cost at the right place. Also entropy coding of the flag is made
contexted on tx_size and based on cdf framework.

Change-Id: I52f98ace7e253ccc08917a4a7b517515d1d58a98
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 23b60c5..58f9422 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -959,14 +959,13 @@
 }
 
 #if CONFIG_FILTER_INTRA
-static void write_filter_intra_mode_info(const AV1_COMMON *const cm,
-                                         const MACROBLOCKD *xd,
+static void write_filter_intra_mode_info(const MACROBLOCKD *xd,
                                          const MB_MODE_INFO *const mbmi,
                                          aom_writer *w) {
   if (mbmi->mode == DC_PRED && mbmi->palette_mode_info.palette_size[0] == 0 &&
       av1_filter_intra_allowed_txsize(mbmi->tx_size)) {
-    aom_write(w, mbmi->filter_intra_mode_info.use_filter_intra_mode[0],
-              cm->fc->filter_intra_probs[0]);
+    aom_write_symbol(w, mbmi->filter_intra_mode_info.use_filter_intra_mode[0],
+                     xd->tile_ctx->filter_intra_cdfs[mbmi->tx_size], 2);
     if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
       const FILTER_INTRA_MODE mode =
           mbmi->filter_intra_mode_info.filter_intra_mode[0];
@@ -1492,7 +1491,7 @@
     if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
       write_palette_mode_info(cm, xd, mi, w);
 #if CONFIG_FILTER_INTRA
-    write_filter_intra_mode_info(cm, xd, mbmi, w);
+    write_filter_intra_mode_info(xd, mbmi, w);
 #endif  // CONFIG_FILTER_INTRA
   } else {
     int16_t mode_ctx;
@@ -1809,7 +1808,7 @@
   if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
     write_palette_mode_info(cm, xd, mi, w);
 #if CONFIG_FILTER_INTRA
-  write_filter_intra_mode_info(cm, xd, mbmi, w);
+  write_filter_intra_mode_info(xd, mbmi, w);
 #endif  // CONFIG_FILTER_INTRA
 
 #if !CONFIG_TXK_SEL
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index bd79d1f..cc640a4 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -230,6 +230,7 @@
   int intra_uv_mode_cost[INTRA_MODES][UV_INTRA_MODES];
   int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
 #if CONFIG_FILTER_INTRA
+  int filter_intra_cost[TX_SIZES_ALL][2];
   int filter_intra_mode_cost[PLANE_TYPES][FILTER_INTRA_MODES];
 #endif
   int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index e03e22e..51c4371 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4498,7 +4498,13 @@
 #if CONFIG_ENTROPY_STATS
     const PREDICTION_MODE above = av1_above_block_mode(mi, above_mi, 0);
     const PREDICTION_MODE left = av1_left_block_mode(mi, left_mi, 0);
+#if CONFIG_KF_CTX
+    int above_ctx = intra_mode_context[above];
+    int left_ctx = intra_mode_context[left];
+    ++counts->kf_y_mode[above_ctx][left_ctx][y_mode];
+#else
     ++counts->kf_y_mode[above][left][y_mode];
+#endif
 #endif  // CONFIG_ENTROPY_STATS
     if (allow_update_cdf)
       update_cdf(get_y_mode_cdf(fc, mi, above_mi, left_mi, 0), y_mode,
@@ -4516,15 +4522,18 @@
       av1_filter_intra_allowed_txsize(mbmi->tx_size)) {
     const int use_filter_intra_mode =
         mbmi->filter_intra_mode_info.use_filter_intra_mode[0];
-    ++counts->filter_intra[0][use_filter_intra_mode];
 #if CONFIG_ENTROPY_STATS
     ++counts->filter_intra_mode[0][mbmi->filter_intra_mode_info
                                        .filter_intra_mode[0]];
+    ++counts->filter_intra_tx[mbmi->tx_size][use_filter_intra_mode];
 #endif  // CONFIG_ENTROPY_STATS
-    if (allow_update_cdf)
+    if (allow_update_cdf) {
       update_cdf(fc->filter_intra_mode_cdf[0],
                  mbmi->filter_intra_mode_info.filter_intra_mode[0],
                  FILTER_INTRA_MODES);
+      update_cdf(fc->filter_intra_cdfs[mbmi->tx_size], use_filter_intra_mode,
+                 2);
+    }
   }
 #endif  // CONFIG_FILTER_INTRA
 #if CONFIG_EXT_INTRA && CONFIG_EXT_INTRA_MOD
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index b941dc8..da21286 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -918,6 +918,7 @@
 
 static void set_tile_info(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
+  (void)cm;
 #if CONFIG_DEPENDENT_HORZTILES
   int tile_row, tile_col, num_tiles_in_tg;
   int tg_row_start, tg_col_start;
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 6191136..1ef57f6 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -143,6 +143,9 @@
 #if CONFIG_FILTER_INTRA
   av1_cost_tokens_from_cdf(x->filter_intra_mode_cost[0],
                            fc->filter_intra_mode_cdf[0], NULL);
+  for (i = 0; i < TX_SIZES_ALL; ++i)
+    av1_cost_tokens_from_cdf(x->filter_intra_cost[i], fc->filter_intra_cdfs[i],
+                             NULL);
 #endif
 
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 08f052d..02408b6 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2878,13 +2878,13 @@
   }
 #endif  // CONFIG_EXT_INTRA
 #if CONFIG_FILTER_INTRA
-  if (mbmi->mode == DC_PRED) {
-    const aom_prob prob = cpi->common.fc->filter_intra_probs[0];
+  if (mbmi->mode == DC_PRED && av1_filter_intra_allowed_txsize(mbmi->tx_size)) {
     if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
       const int mode = mbmi->filter_intra_mode_info.filter_intra_mode[0];
-      mode_cost += av1_cost_bit(prob, 1) + x->filter_intra_mode_cost[0][mode];
+      mode_cost += x->filter_intra_cost[mbmi->tx_size][1] +
+                   x->filter_intra_mode_cost[0][mode];
     } else {
-      mode_cost += av1_cost_bit(prob, 0);
+      mode_cost += x->filter_intra_cost[mbmi->tx_size][0];
     }
   }
 #endif  // CONFIG_FILTER_INTRA
@@ -3153,7 +3153,7 @@
     super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
     if (tokenonly_rd_stats.rate == INT_MAX) continue;
     this_rate = tokenonly_rd_stats.rate +
-                av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 1) +
+                x->filter_intra_cost[mbmi->tx_size][1] +
                 x->filter_intra_mode_cost[0][mode] + mode_cost;
     this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
 
@@ -3567,7 +3567,7 @@
     }
 #if CONFIG_FILTER_INTRA
     if (mbmi->mode == DC_PRED && av1_filter_intra_allowed_txsize(mbmi->tx_size))
-      this_rate += av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 0);
+      this_rate += x->filter_intra_cost[mbmi->tx_size][0];
 #endif  // CONFIG_FILTER_INTRA
 #if CONFIG_EXT_INTRA
     if (is_directional_mode) {
@@ -9760,11 +9760,10 @@
         int64_t best_rd_tmp = INT64_MAX;
         if (rate_y != INT_MAX &&
             av1_filter_intra_allowed_txsize(best_tx_size)) {
-          best_rd_tmp = RDCOST(
-              x->rdmult,
-              rate_y + av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 0) +
-                  intra_mode_cost[mbmi->mode],
-              distortion_y);
+          best_rd_tmp = RDCOST(x->rdmult,
+                               rate_y + x->filter_intra_cost[mbmi->tx_size][0] +
+                                   intra_mode_cost[mbmi->mode],
+                               distortion_y);
         }
 
         mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 1;
@@ -9777,11 +9776,10 @@
           super_block_yrd(cpi, x, &rd_stats_y_fi, bsize, best_rd);
           if (rd_stats_y_fi.rate == INT_MAX) continue;
 
-          this_rate_tmp =
-              rd_stats_y_fi.rate +
-              av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 1) +
-              x->filter_intra_mode_cost[0][fi_mode] +
-              intra_mode_cost[mbmi->mode];
+          this_rate_tmp = rd_stats_y_fi.rate +
+                          x->filter_intra_cost[mbmi->tx_size][1] +
+                          x->filter_intra_mode_cost[0][fi_mode] +
+                          intra_mode_cost[mbmi->mode];
           this_rd_tmp = RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist);
 
           if (this_rd_tmp < best_rd_tmp) {
@@ -9877,10 +9875,11 @@
       }
 #endif  // CONFIG_EXT_INTRA
 #if CONFIG_FILTER_INTRA
-      if (mbmi->mode == DC_PRED) {
+      if (mbmi->mode == DC_PRED &&
+          av1_filter_intra_allowed_txsize(mbmi->tx_size)) {
         rate2 +=
-            av1_cost_bit(cm->fc->filter_intra_probs[0],
-                         mbmi->filter_intra_mode_info.use_filter_intra_mode[0]);
+            x->filter_intra_cost[mbmi->tx_size][mbmi->filter_intra_mode_info
+                                                    .use_filter_intra_mode[0]];
         if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
           rate2 += x->filter_intra_mode_cost[0][mbmi->filter_intra_mode_info
                                                     .filter_intra_mode[0]];