Allintra: Turn off several intra coding tools automatically

Add a command line option to automatically determine whether to
turn off several intra coding tools to get faster encoding speed.
By default the feature is off.
To turn on it, add --auto-intra-tools-off=1.

This mode is effective only for all intra mode when "--deltaq-mode=3"
is true, i.e., the perceptual quality mode.

When the flag is set, the encoder analyzes the encoding condition
and the reconstructed image quality as compared to the source in
the preprocessing stange performed in deltaq-mode.

If it is in the high quality range, and the reconstructed image
quality is good enough, we turn off several intra coding tools,
based on the observation that in the high quality range,
the perceptual quality difference is less likely perceivable even
without a few intra coding tools that have an impact on the
compression efficiency.

With this change, in high quality range, the encoding speed is
up to 80% faster than the baseline on some images.
40% to 50% faster is usually seen.
There is no impact in low quality range.

Change-Id: I21566f14fbc7b226632bbcf48d955f8108cbbdc2
diff --git a/aom/aomcx.h b/aom/aomcx.h
index b8462e5..c34e02e 100644
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -1397,6 +1397,13 @@
    * int* parameter
    */
   AOME_GET_LOOPFILTER_LEVEL = 150,
+
+  /*!\brief Codec control to automatically turn off several intra coding tools
+   * - 0 = do not use the feature
+   * - 1 = enable the automatic decision to turn off several intra tools
+   */
+  AV1E_SET_AUTO_INTRA_TOOLS_OFF = 151,
+
   // Any new encoder control IDs should be added above.
   // Maximum allowed encoder control ID is 229.
   // No encoder control ID should be added below.
@@ -1965,6 +1972,9 @@
 AOM_CTRL_USE_TYPE(AOME_GET_LOOPFILTER_LEVEL, int *)
 #define AOM_CTRL_AOME_GET_LOOPFILTER_LEVEL
 
+AOM_CTRL_USE_TYPE(AV1E_SET_AUTO_INTRA_TOOLS_OFF, unsigned int)
+#define AOM_CTRL_AV1E_SET_AUTO_INTRA_TOOLS_OFF
+
 /*!\endcond */
 /*! @} - end defgroup aom_encoder */
 #ifdef __cplusplus
diff --git a/apps/aomenc.c b/apps/aomenc.c
index 65a2684..e8beb60 100644
--- a/apps/aomenc.c
+++ b/apps/aomenc.c
@@ -235,6 +235,7 @@
                                         AV1E_SET_ENABLE_DIRECTIONAL_INTRA,
                                         AV1E_SET_ENABLE_TX_SIZE_SEARCH,
                                         AV1E_SET_LOOPFILTER_CONTROL,
+                                        AV1E_SET_AUTO_INTRA_TOOLS_OFF,
                                         0 };
 
 const arg_def_t *main_args[] = { &g_av1_codec_arg_defs.help,
@@ -436,6 +437,7 @@
   &g_av1_codec_arg_defs.enable_directional_intra,
   &g_av1_codec_arg_defs.enable_tx_size_search,
   &g_av1_codec_arg_defs.loopfilter_control,
+  &g_av1_codec_arg_defs.auto_intra_tools_off,
   NULL,
 };
 
diff --git a/av1/arg_defs.c b/av1/arg_defs.c
index 3b8c075..1c1b8ad 100644
--- a/av1/arg_defs.c
+++ b/av1/arg_defs.c
@@ -641,6 +641,11 @@
       "loopfilter for all frames (default), 2: Disable loopfilter for "
       "non-reference frames, 3: Disable loopfilter for frames with low motion"),
 
+  .auto_intra_tools_off = ARG_DEF(
+      NULL, "auto-intra-tools-off", 1,
+      "Automatically turn off several intra coding tools for allintra mode. "
+      "Only in effect if --deltaq-mode=3."),
+
   .two_pass_input =
       ARG_DEF(NULL, "two-pass-input", 1,
               "The input file for the second pass for three-pass encoding."),
diff --git a/av1/arg_defs.h b/av1/arg_defs.h
index 3685cf5..e556ab7 100644
--- a/av1/arg_defs.h
+++ b/av1/arg_defs.h
@@ -226,6 +226,7 @@
   arg_def_t two_pass_width;
   arg_def_t two_pass_height;
   arg_def_t second_pass_log;
+  arg_def_t auto_intra_tools_off;
 #endif  // CONFIG_AV1_ENCODER
 } av1_codec_arg_definitions_t;
 
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 972b563..57bfa08 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -170,6 +170,18 @@
   // the name of the second pass output file when passes > 2
   const char *two_pass_output;
   const char *second_pass_log;
+  // Automatically determine whether to disable several intra tools
+  // when "--deltaq-mode=3" is true.
+  // Default as 0.
+  // When set to 1, the encoder will analyze the reconstruction quality
+  // as compared to the source image in the preprocessing pass.
+  // If the recontruction quality is considered high enough, we disable
+  // the following intra coding tools, for better encoding speed:
+  // "--enable_smooth_intra",
+  // "--enable_paeth_intra",
+  // "--enable_cfl_intra",
+  // "--enable_diagonal_intra".
+  int auto_intra_tools_off;
 };
 
 #if CONFIG_REALTIME_ONLY
@@ -322,6 +334,7 @@
   LOOPFILTER_ALL,  // loopfilter_control
   NULL,            // two_pass_output
   NULL,            // second_pass_log
+  0,               // auto_intra_tools_off
 };
 #else
 static const struct av1_extracfg default_extra_cfg = {
@@ -461,6 +474,7 @@
   LOOPFILTER_ALL,  // loopfilter_control
   NULL,            // two_pass_output
   NULL,            // second_pass_log
+  0,               // auto_intra_tools_off
 };
 #endif
 
@@ -1319,6 +1333,7 @@
   intra_mode_cfg->enable_directional_intra =
       extra_cfg->enable_directional_intra;
   intra_mode_cfg->enable_diagonal_intra = extra_cfg->enable_diagonal_intra;
+  intra_mode_cfg->auto_intra_tools_off = extra_cfg->auto_intra_tools_off;
 
   // Set transform size/type configuration.
   txfm_cfg->enable_tx64 = extra_cfg->enable_tx64;
@@ -2435,6 +2450,13 @@
   return res;
 }
 
+static aom_codec_err_t ctrl_set_auto_intra_tools_off(aom_codec_alg_priv_t *ctx,
+                                                     va_list args) {
+  struct av1_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.auto_intra_tools_off = CAST(AV1E_SET_AUTO_INTRA_TOOLS_OFF, args);
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+
 static aom_codec_err_t encoder_init(aom_codec_ctx_t *ctx) {
   aom_codec_err_t res = AOM_CODEC_OK;
 
@@ -4016,6 +4038,7 @@
   { AV1E_SET_EXTERNAL_PARTITION, ctrl_set_external_partition },
   { AV1E_SET_ENABLE_TX_SIZE_SEARCH, ctrl_set_enable_tx_size_search },
   { AV1E_SET_LOOPFILTER_CONTROL, ctrl_set_loopfilter_control },
+  { AV1E_SET_AUTO_INTRA_TOOLS_OFF, ctrl_set_auto_intra_tools_off },
 
   // Getters
   { AOME_GET_LAST_QUANTIZER, ctrl_get_quantizer },
diff --git a/av1/encoder/allintra_vis.c b/av1/encoder/allintra_vis.c
index c253088..9fad7bb 100644
--- a/av1/encoder/allintra_vis.c
+++ b/av1/encoder/allintra_vis.c
@@ -15,7 +15,9 @@
 
 #include "av1/common/reconinter.h"
 #include "av1/encoder/allintra_vis.h"
+#include "av1/encoder/encoder.h"
 #include "av1/encoder/hybrid_fwd_txfm.h"
+#include "av1/encoder/model_rd.h"
 #include "av1/encoder/rdopt_utils.h"
 
 // Process the wiener variance in 16x16 block basis.
@@ -332,6 +334,32 @@
   return norm_factor;
 }
 
+static void automatic_intra_tools_off(AV1_COMP *cpi,
+                                      const double sum_rec_distortion,
+                                      const double sum_est_rate) {
+  if (!cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) return;
+
+  // Thresholds
+  const int high_quality_qindex = 128;
+  const double high_quality_bpp = 2.0;
+  const double high_quality_dist_per_pix = 4.0;
+
+  AV1_COMMON *const cm = &cpi->common;
+  const int qindex = cm->quant_params.base_qindex;
+  const double dist_per_pix =
+      (double)sum_rec_distortion / (cm->width * cm->height);
+  // The estimate bpp is not accurate, an empirical constant 100 is divided.
+  const double estimate_bpp = sum_est_rate / (cm->width * cm->height * 100);
+
+  if (qindex < high_quality_qindex && estimate_bpp > high_quality_bpp &&
+      dist_per_pix < high_quality_dist_per_pix) {
+    cpi->oxcf.intra_mode_cfg.enable_smooth_intra = 0;
+    cpi->oxcf.intra_mode_cfg.enable_paeth_intra = 0;
+    cpi->oxcf.intra_mode_cfg.enable_cfl_intra = 0;
+    cpi->oxcf.intra_mode_cfg.enable_diagonal_intra = 0;
+  }
+}
+
 void av1_set_mb_wiener_variance(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   uint8_t *buffer = cpi->source->y_buffer;
@@ -373,6 +401,9 @@
   cpi->norm_wiener_variance = 0;
   int mb_step = mi_size_wide[bsize];
 
+  double sum_rec_distortion = 0.0;
+  double sum_est_rate = 0.0;
+  double sum_est_dist = 0.0;
   for (mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) {
     for (mi_col = 0; mi_col < cpi->frame_info.mi_cols; mi_col += mb_step) {
       PREDICTION_MODE best_mode = DC_PRED;
@@ -491,6 +522,15 @@
         }
       }
 
+      sum_rec_distortion += weber_stats->distortion;
+      int est_block_rate = 0;
+      int64_t est_block_dist = 0;
+      model_rd_sse_fn[MODELRD_LEGACY](cpi, x, bsize, 0, weber_stats->distortion,
+                                      pix_num, &est_block_rate,
+                                      &est_block_dist);
+      sum_est_rate += est_block_rate;
+      sum_est_dist += est_block_dist;
+
       weber_stats->src_variance -= (src_mean * src_mean) / pix_num;
       weber_stats->rec_variance -= (rec_mean * rec_mean) / pix_num;
       weber_stats->distortion -= (dist_mean * dist_mean) / pix_num;
@@ -504,6 +544,9 @@
     }
   }
 
+  // Determine whether to turn off several intra coding tools.
+  automatic_intra_tools_off(cpi, sum_rec_distortion, sum_est_rate);
+
   BLOCK_SIZE norm_block_size = BLOCK_16X16;
   cpi->norm_wiener_variance =
       pick_norm_factor_and_block_size(cpi, &norm_block_size);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 96df304..7208c36 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -328,6 +328,20 @@
    * enabled.
    */
   bool enable_angle_delta;
+  /*!
+   * Flag to indicate whether to automatically turn off several intral coding
+   * tools.
+   * This flag is only used when "--deltaq-mode=3" is true.
+   * When set to 1, the encoder will analyze the reconstruction quality
+   * as compared to the source image in the preprocessing pass.
+   * If the recontruction quality is considered high enough, we disable
+   * the following intra coding tools, for better encoding speed:
+   * "--enable_smooth_intra",
+   * "--enable_paeth_intra",
+   * "--enable_cfl_intra",
+   * "--enable_diagonal_intra".
+   */
+  bool auto_intra_tools_off;
 } IntraModeCfg;
 
 /*!