Non RD: Add hybrid intra search.

Add speed feature for it, and enabled for speed 8.

Add tests for periodic key frames.

Change-Id: I142003f61925461189a127f324632d1cbef5feac
diff --git a/av1/common/pred_common.h b/av1/common/pred_common.h
index a681e4e..826c631 100644
--- a/av1/common/pred_common.h
+++ b/av1/common/pred_common.h
@@ -203,6 +203,10 @@
   return xd->tile_ctx->comp_inter_cdf[av1_get_reference_mode_context(xd)];
 }
 
+static INLINE aom_cdf_prob *av1_get_skip_cdf(const MACROBLOCKD *xd) {
+  return xd->tile_ctx->skip_cdfs[av1_get_skip_context(xd)];
+}
+
 int av1_get_comp_reference_type_context(const MACROBLOCKD *xd);
 
 // == Uni-directional contexts ==
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 0b7ebcf..cd7f51a 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -668,6 +668,15 @@
          sizeof(x->mbmi_ext->global_mvs));
 }
 
+static void hybrid_intra_mode_search(AV1_COMP *cpi, MACROBLOCK *const x,
+                                     RD_STATS *rd_cost, BLOCK_SIZE bsize,
+                                     PICK_MODE_CONTEXT *ctx) {
+  if (!cpi->sf.hybrid_intra_pickmode || bsize < BLOCK_16X16)
+    av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
+  else
+    av1_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
+}
+
 static AOM_INLINE void pick_sb_modes(AV1_COMP *const cpi,
                                      TileDataEnc *tile_data,
                                      MACROBLOCK *const x, int mi_row,
@@ -783,7 +792,15 @@
 #if CONFIG_COLLECT_COMPONENT_TIMING
     start_timing(cpi, av1_rd_pick_intra_mode_sb_time);
 #endif
-    av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd.rdcost);
+    switch (pick_mode_type) {
+      case PICK_MODE_RD:
+        av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd.rdcost);
+        break;
+      case PICK_MODE_NONRD:
+        hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
+        break;
+      default: assert(0 && "Unknown pick mode type.");
+    }
 #if CONFIG_COLLECT_COMPONENT_TIMING
     end_timing(cpi, av1_rd_pick_intra_mode_sb_time);
 #endif
@@ -795,8 +812,7 @@
       av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
                                          rd_cost, bsize, ctx, best_rd.rdcost);
     } else {
-      // TODO(kyslov): do the same for pick_intra_mode and
-      //               pick_inter_mode_sb_seg_skip
+      // TODO(kyslov): do the same for pick_inter_mode_sb_seg_skip
       switch (pick_mode_type) {
         case PICK_MODE_RD:
           av1_rd_pick_inter_mode_sb(cpi, tile_data, x, rd_cost, bsize, ctx,
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 39598da..f65b860 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1361,6 +1361,76 @@
   pd->dst.buf = dst_buf_base;
 }
 
+void av1_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
+                         BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
+  AV1_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mi = xd->mi[0];
+  RD_STATS this_rdc, best_rdc;
+  struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
+  const TX_SIZE intra_tx_size =
+      AOMMIN(max_txsize_lookup[bsize],
+             tx_mode_to_biggest_tx_size[x->tx_mode_search_type]);
+  int *bmode_costs;
+  const MB_MODE_INFO *above_mi = xd->above_mbmi;
+  const MB_MODE_INFO *left_mi = xd->left_mbmi;
+  const PREDICTION_MODE A = av1_above_block_mode(above_mi);
+  const PREDICTION_MODE L = av1_left_block_mode(left_mi);
+  bmode_costs = x->y_mode_costs[A][L];
+
+  av1_invalid_rd_stats(&best_rdc);
+  av1_invalid_rd_stats(&this_rdc);
+
+  mi->ref_frame[0] = INTRA_FRAME;
+  mi->ref_frame[1] = NONE_FRAME;
+  mi->use_intrabc = 0;
+  mi->skip_mode = 0;
+
+  // Initialize interp_filter here so we do not have to check for inter block
+  // modes in get_pred_context_switchable_interp()
+  mi->interp_filters = av1_broadcast_interp_filter(SWITCHABLE_FILTERS);
+
+  mi->mv[0].as_int = INVALID_MV;
+  mi->uv_mode = DC_PRED;
+  memset(xd->tx_type_map, DCT_DCT,
+         sizeof(xd->tx_type_map[0]) * ctx->num_4x4_blk);
+  av1_zero(x->blk_skip);
+
+  // Change the limit of this loop to add other intra prediction
+  // mode tests.
+  for (int i = 0; i < 4; ++i) {
+    PREDICTION_MODE this_mode = intra_mode_list[i];
+    this_rdc.dist = this_rdc.rate = 0;
+    args.mode = this_mode;
+    args.skippable = 1;
+    args.rdc = &this_rdc;
+    mi->tx_size = intra_tx_size;
+    init_mbmi(mi, this_mode, INTRA_FRAME, NONE_FRAME, cm);
+    av1_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra,
+                                           &args);
+    if (args.skippable) {
+      this_rdc.rate = av1_cost_symbol(av1_get_skip_cdf(xd)[1]);
+    } else {
+      this_rdc.rate += av1_cost_symbol(av1_get_skip_cdf(xd)[0]);
+    }
+    this_rdc.rate += bmode_costs[this_mode];
+    this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
+
+    if (this_rdc.rdcost < best_rdc.rdcost) {
+      best_rdc = this_rdc;
+      mi->mode = this_mode;
+    }
+  }
+
+  *rd_cost = best_rdc;
+
+#if CONFIG_INTERNAL_STATS
+  store_coding_context(x, ctx, mi->mode);
+#else
+  store_coding_context(x, ctx);
+#endif  // CONFIG_INTERNAL_STATS
+}
+
 void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
                                   MACROBLOCK *x, RD_STATS *rd_cost,
                                   BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index e953c8c..a0294ea 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -123,6 +123,9 @@
                                BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
                                int64_t best_rd_so_far);
 
+void av1_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
+                         BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
+
 void av1_nonrd_pick_inter_mode_sb(struct AV1_COMP *cpi,
                                   struct TileDataEnc *tile_data,
                                   struct macroblock *x,
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 6569085..51d8393 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -538,6 +538,7 @@
   sf->use_nonrd_filter_search = 1;
   sf->nonrd_use_blockyrd_interp_filter = 0;
   sf->nonrd_reduce_golden_mode_search = 0;
+  sf->hybrid_intra_pickmode = 0;
 
   if (speed >= 1) {
     sf->gm_erroradv_type = GM_ERRORADV_TR_1;
@@ -722,7 +723,7 @@
     sf->nonrd_use_blockyrd_interp_filter = 0;
     sf->use_nonrd_altref_frame = 0;
     sf->nonrd_reduce_golden_mode_search = 1;
-
+    sf->hybrid_intra_pickmode = 1;
 // TODO(kyslov) Enable when better model is available
 // It gives +5% speedup and 11% overall BDRate degradation
 // So, can not enable now until better CurvFit is there
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index eac4697..7a77d35 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -880,6 +880,10 @@
   // colors to remaining colors) and terminate the search if current number of
   // palette colors is not the winner.
   int prune_palette_search_level;
+
+  // Use hybrid (rd for bsize < 16x16, otherwise nonrd) intra search for intra
+  // only frames.
+  int hybrid_intra_pickmode;
 } SPEED_FEATURES;
 
 struct AV1_COMP;
diff --git a/test/datarate_test.cc b/test/datarate_test.cc
index b9a89f4..053c055 100644
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -84,6 +84,30 @@
         << " The datarate for the file is greater than target by too much!";
   }
 
+  virtual void BasicRateTargetingCBRPeriodicKeyFrameTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 1;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    // Periodic keyframe
+    cfg_.kf_max_dist = 50;
+
+    ::libaom_test::I420VideoSource video("pixel_capture_w320h240.yuv", 320, 240,
+                                         30, 1, 0, 310);
+    const int bitrate_array[2] = { 150, 550 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85)
+        << " The datarate for the file is lower than target by too much!";
+    ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15)
+        << " The datarate for the file is greater than target by too much!";
+  }
+
   virtual void BasicRateTargetingAQModeOnOffCBRTest() {
     if (GET_PARAM(4) > 0) return;
     cfg_.rc_buf_initial_sz = 500;
@@ -211,11 +235,16 @@
   BasicRateTargetingVBRTest();
 }
 
-// Check basic rate targeting for CBR,
+// Check basic rate targeting for CBR.
 TEST_P(DatarateTestLarge, BasicRateTargetingCBR) {
   BasicRateTargetingCBRTest();
 }
 
+// Check basic rate targeting for periodic key frame.
+TEST_P(DatarateTestLarge, PeriodicKeyFrameCBR) {
+  BasicRateTargetingCBRPeriodicKeyFrameTest();
+}
+
 // Check basic rate targeting for CBR.
 TEST_P(DatarateTestLarge, BasicRateTargeting444CBR) {
   BasicRateTargeting444CBRTest();
@@ -291,11 +320,16 @@
   BasicRateTargetingVBRTest();
 }
 
-// Check basic rate targeting for CBR,
+// Check basic rate targeting for CBR.
 TEST_P(DatarateTestRealtime, BasicRateTargetingCBR) {
   BasicRateTargetingCBRTest();
 }
 
+// Check basic rate targeting for periodic key frame.
+TEST_P(DatarateTestRealtime, PeriodicKeyFrameCBR) {
+  BasicRateTargetingCBRPeriodicKeyFrameTest();
+}
+
 // Check basic rate targeting for CBR.
 TEST_P(DatarateTestRealtime, BasicRateTargeting444CBR) {
   BasicRateTargeting444CBRTest();