Speed up VMAF calculations when using libvmaf_rc

by reducing the number of vmaf_context initializations. 4% encoding
time savings in the 1080p test (30f, sp1).

40% User time and 96% Sys time savings compare to encoding with libvmaf.
However the Real time is slower due to multithreading not enabled.

Performance counter stats for './aomenc_old basketballdrive_1080p50.y4m
 --limit=30 -o output --tune=vmaf --cpu-used=1':
libvmaf
     441.762382891 seconds time elapsed
    1422.326568000 seconds user
      51.411417000 seconds sys
libvmaf_rc
     843.705030306 seconds time elapsed
     841.818651000 seconds user
       1.819919000 seconds sys

Change-Id: I5e29aff63ccd3fdfe268ef299b06df5a7126842c
(cherry picked from commit b2a3ecb3d95c036f9a9810ea716080cf4c2c9f34)
diff --git a/aom_dsp/vmaf.c b/aom_dsp/vmaf.c
index 35970f3..636aa64 100644
--- a/aom_dsp/vmaf.c
+++ b/aom_dsp/vmaf.c
@@ -169,7 +169,7 @@
 #endif
 
 #if CONFIG_USE_VMAF_RC
-void aom_init_vmaf_rc(VmafModel **vmaf_model, const char *model_path) {
+void aom_init_vmaf_model_rc(VmafModel **vmaf_model, const char *model_path) {
   if (*vmaf_model != NULL) return;
   VmafModelConfig model_cfg;
   model_cfg.flags = VMAF_MODEL_FLAG_DISABLE_CLIP;
@@ -181,7 +181,7 @@
   }
 }
 
-void aom_close_vmaf_rc(VmafModel *vmaf_model) {
+void aom_close_vmaf_model_rc(VmafModel *vmaf_model) {
   vmaf_model_destroy(vmaf_model);
 }
 
@@ -211,38 +211,53 @@
   }
 }
 
-void aom_calc_vmaf_rc(VmafModel *vmaf_model, const YV12_BUFFER_CONFIG *source,
-                      const YV12_BUFFER_CONFIG *distorted, int bit_depth,
-                      int cal_vmaf_neg, double *vmaf) {
+void aom_init_vmaf_context_rc(VmafContext **vmaf_context, VmafModel *vmaf_model,
+                              bool cal_vmaf_neg) {
   VmafConfiguration cfg;
   cfg.log_level = VMAF_LOG_LEVEL_NONE;
   cfg.n_threads = 0;
   cfg.n_subsample = 0;
   cfg.cpumask = 0;
 
-  VmafContext *vmaf_context;
-  if (vmaf_init(&vmaf_context, cfg)) {
+  if (vmaf_init(vmaf_context, cfg)) {
     vmaf_fatal_error("Failed to init VMAF context.");
   }
 
-  if (vmaf_use_features_from_model(vmaf_context, vmaf_model)) {
+  if (vmaf_use_features_from_model(*vmaf_context, vmaf_model)) {
     vmaf_fatal_error("Failed to load feature extractors from VMAF model.");
   }
 
   if (cal_vmaf_neg) {
     VmafFeatureDictionary *vif_feature = NULL;
     vmaf_feature_dictionary_set(&vif_feature, "vif_enhn_gain_limit", "1.0");
-    if (vmaf_use_feature(vmaf_context, "float_vif", vif_feature)) {
+    if (vmaf_use_feature(*vmaf_context, "float_vif", vif_feature)) {
       vmaf_fatal_error("Failed to use feature float_vif.");
     }
 
     VmafFeatureDictionary *adm_feature = NULL;
     vmaf_feature_dictionary_set(&adm_feature, "adm_enhn_gain_limit", "1.0");
-    if (vmaf_use_feature(vmaf_context, "float_adm", adm_feature)) {
+    if (vmaf_use_feature(*vmaf_context, "float_adm", adm_feature)) {
       vmaf_fatal_error("Failed to use feature float_adm.");
     }
   }
 
+  VmafFeatureDictionary *motion_force_zero = NULL;
+  vmaf_feature_dictionary_set(&motion_force_zero, "motion_force_zero", "true");
+  if (vmaf_use_feature(*vmaf_context, "float_motion", motion_force_zero)) {
+    vmaf_fatal_error("Failed to use feature float_motion.");
+  }
+}
+
+void aom_close_vmaf_context_rc(VmafContext *vmaf_context) {
+  if (vmaf_close(vmaf_context)) {
+    vmaf_fatal_error("Failed to close VMAF context.");
+  }
+}
+
+void aom_calc_vmaf_at_index_rc(VmafContext *vmaf_context, VmafModel *vmaf_model,
+                               const YV12_BUFFER_CONFIG *source,
+                               const YV12_BUFFER_CONFIG *distorted,
+                               int bit_depth, int frame_index, double *vmaf) {
   VmafPicture ref, dist;
   if (vmaf_picture_alloc(&ref, VMAF_PIX_FMT_YUV420P, bit_depth, source->y_width,
                          source->y_height) ||
@@ -252,18 +267,15 @@
   }
   copy_picture(bit_depth, source, &ref);
   copy_picture(bit_depth, distorted, &dist);
-  if (vmaf_read_pictures(vmaf_context, &ref, &dist, /*picture index=*/0)) {
+  if (vmaf_read_pictures(vmaf_context, &ref, &dist,
+                         /*picture index=*/frame_index)) {
     vmaf_fatal_error("Failed to read VMAF pictures.");
   }
 
   vmaf_picture_unref(&ref);
   vmaf_picture_unref(&dist);
 
-  vmaf_score_at_index(vmaf_context, vmaf_model, vmaf, 0);
-
-  if (vmaf_close(vmaf_context)) {
-    vmaf_fatal_error("Failed to close VMAF context.");
-  }
+  vmaf_score_at_index(vmaf_context, vmaf_model, vmaf, frame_index);
 }
 
 #endif  // CONFIG_USE_VMAF_RC
diff --git a/aom_dsp/vmaf.h b/aom_dsp/vmaf.h
index 607d4bd..65ba199 100644
--- a/aom_dsp/vmaf.h
+++ b/aom_dsp/vmaf.h
@@ -12,6 +12,7 @@
 #ifndef AOM_AOM_DSP_VMAF_H_
 #define AOM_AOM_DSP_VMAF_H_
 
+#include <stdbool.h>
 #include "aom_scale/yv12config.h"
 
 #if CONFIG_USE_VMAF_RC
@@ -47,13 +48,17 @@
 } TuneVMAFInfo;
 
 #if CONFIG_USE_VMAF_RC
-void aom_init_vmaf_rc(VmafModel **vmaf_model, const char *model_path);
+void aom_init_vmaf_context_rc(VmafContext **vmaf_context, VmafModel *vmaf_model,
+                              bool cal_vmaf_neg);
+void aom_close_vmaf_context_rc(VmafContext *vmaf_context);
 
-void aom_calc_vmaf_rc(VmafModel *vmaf_model, const YV12_BUFFER_CONFIG *source,
-                      const YV12_BUFFER_CONFIG *distorted, int bit_depth,
-                      int cal_vmaf_neg, double *vmaf);
+void aom_init_vmaf_model_rc(VmafModel **vmaf_model, const char *model_path);
+void aom_close_vmaf_model_rc(VmafModel *vmaf_model);
 
-void aom_close_vmaf_rc(VmafModel *vmaf_model);
+void aom_calc_vmaf_at_index_rc(VmafContext *vmaf_context, VmafModel *vmaf_model,
+                               const YV12_BUFFER_CONFIG *source,
+                               const YV12_BUFFER_CONFIG *distorted,
+                               int bit_depth, int frame_index, double *vmaf);
 #else
 void aom_calc_vmaf(const char *model_path, const YV12_BUFFER_CONFIG *source,
                    const YV12_BUFFER_CONFIG *distorted, int bit_depth,
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 838229a..bb8fb9a 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -2453,8 +2453,8 @@
   }
 
 #if CONFIG_USE_VMAF_RC
-  aom_init_vmaf_rc(&cpi->vmaf_info.vmaf_model,
-                   cpi->oxcf.tune_cfg.vmaf_model_path);
+  aom_init_vmaf_model_rc(&cpi->vmaf_info.vmaf_model,
+                         cpi->oxcf.tune_cfg.vmaf_model_path);
 #endif
 
   // Handle fixed keyframe intervals
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index 4f84710..b60ae8d 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -240,7 +240,7 @@
   cpi->vmaf_info.rdmult_scaling_factors = NULL;
 
 #if CONFIG_USE_VMAF_RC
-  aom_close_vmaf_rc(cpi->vmaf_info.vmaf_model);
+  aom_close_vmaf_model_rc(cpi->vmaf_info.vmaf_model);
 #endif
 #endif
 
diff --git a/av1/encoder/tune_vmaf.c b/av1/encoder/tune_vmaf.c
index ec63379..08c3d4e 100644
--- a/av1/encoder/tune_vmaf.c
+++ b/av1/encoder/tune_vmaf.c
@@ -158,16 +158,20 @@
   return var;
 }
 
-static double cal_approx_vmaf(const AV1_COMP *const cpi, double source_variance,
+static double cal_approx_vmaf(const AV1_COMP *const cpi,
+#if CONFIG_USE_VMAF_RC
+                              VmafContext *vmaf_context, int *vmaf_cal_index,
+#endif
+                              double source_variance,
                               YV12_BUFFER_CONFIG *const source,
                               YV12_BUFFER_CONFIG *const sharpened) {
   const int bit_depth = cpi->td.mb.e_mbd.bd;
   double new_vmaf;
 
 #if CONFIG_USE_VMAF_RC
-  aom_calc_vmaf_rc(cpi->vmaf_info.vmaf_model, source, sharpened, bit_depth,
-                   cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN,
-                   &new_vmaf);
+  aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, source,
+                            sharpened, bit_depth, *vmaf_cal_index, &new_vmaf);
+  (*vmaf_cal_index)++;
 #else
   aom_calc_vmaf(cpi->oxcf.tune_cfg.vmaf_model_path, source, sharpened,
                 bit_depth, &new_vmaf);
@@ -178,11 +182,14 @@
 }
 
 static double find_best_frame_unsharp_amount_loop(
-    const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const source,
-    YV12_BUFFER_CONFIG *const blurred, YV12_BUFFER_CONFIG *const sharpened,
-    double best_vmaf, const double baseline_variance,
-    const double unsharp_amount_start, const double step_size,
-    const int max_loop_count, const double max_amount) {
+    const AV1_COMP *const cpi,
+#if CONFIG_USE_VMAF_RC
+    VmafContext *vmaf_context, int *vmaf_cal_index,
+#endif
+    YV12_BUFFER_CONFIG *const source, YV12_BUFFER_CONFIG *const blurred,
+    YV12_BUFFER_CONFIG *const sharpened, double best_vmaf,
+    const double baseline_variance, const double unsharp_amount_start,
+    const double step_size, const int max_loop_count, const double max_amount) {
   const double min_amount = 0.0;
   int loop_count = 0;
   double approx_vmaf = best_vmaf;
@@ -192,7 +199,11 @@
     unsharp_amount += step_size;
     if (unsharp_amount > max_amount || unsharp_amount < min_amount) break;
     unsharp(cpi, source, blurred, sharpened, unsharp_amount);
-    approx_vmaf = cal_approx_vmaf(cpi, baseline_variance, source, sharpened);
+    approx_vmaf = cal_approx_vmaf(cpi,
+#if CONFIG_USE_VMAF_RC
+                                  vmaf_context, vmaf_cal_index,
+#endif
+                                  baseline_variance, source, sharpened);
 
     loop_count++;
   } while (approx_vmaf > best_vmaf && loop_count < max_loop_count);
@@ -211,7 +222,13 @@
   const AV1_COMMON *const cm = &cpi->common;
   const int width = source->y_width;
   const int height = source->y_height;
-
+#if CONFIG_USE_VMAF_RC
+  VmafContext *vmaf_context;
+  aom_init_vmaf_context_rc(
+      &vmaf_context, cpi->vmaf_info.vmaf_model,
+      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN);
+  int vmaf_cal_index = 0;
+#endif
   YV12_BUFFER_CONFIG sharpened;
   memset(&sharpened, 0, sizeof(sharpened));
   aom_alloc_frame_buffer(
@@ -222,29 +239,52 @@
   double unsharp_amount;
   if (unsharp_amount_start <= step_size) {
     unsharp_amount = find_best_frame_unsharp_amount_loop(
-        cpi, source, blurred, &sharpened, 0.0, baseline_variance, 0.0,
-        step_size, max_loop_count, max_filter_amount);
+        cpi,
+#if CONFIG_USE_VMAF_RC
+        vmaf_context, &vmaf_cal_index,
+#endif
+        source, blurred, &sharpened, 0.0, baseline_variance, 0.0, step_size,
+        max_loop_count, max_filter_amount);
   } else {
     double a0 = unsharp_amount_start - step_size, a1 = unsharp_amount_start;
     double v0, v1;
     unsharp(cpi, source, blurred, &sharpened, a0);
-    v0 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened);
+    v0 = cal_approx_vmaf(cpi,
+#if CONFIG_USE_VMAF_RC
+                         vmaf_context, &vmaf_cal_index,
+#endif
+                         baseline_variance, source, &sharpened);
     unsharp(cpi, source, blurred, &sharpened, a1);
-    v1 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened);
+    v1 = cal_approx_vmaf(cpi,
+#if CONFIG_USE_VMAF_RC
+                         vmaf_context, &vmaf_cal_index,
+#endif
+                         baseline_variance, source, &sharpened);
     if (fabs(v0 - v1) < 0.01) {
       unsharp_amount = a0;
     } else if (v0 > v1) {
       unsharp_amount = find_best_frame_unsharp_amount_loop(
-          cpi, source, blurred, &sharpened, v0, baseline_variance, a0,
-          -step_size, max_loop_count, max_filter_amount);
+          cpi,
+#if CONFIG_USE_VMAF_RC
+          vmaf_context, &vmaf_cal_index,
+#endif
+          source, blurred, &sharpened, v0, baseline_variance, a0, -step_size,
+          max_loop_count, max_filter_amount);
     } else {
       unsharp_amount = find_best_frame_unsharp_amount_loop(
-          cpi, source, blurred, &sharpened, v1, baseline_variance, a1,
-          step_size, max_loop_count, max_filter_amount);
+          cpi,
+#if CONFIG_USE_VMAF_RC
+          vmaf_context, &vmaf_cal_index,
+#endif
+          source, blurred, &sharpened, v1, baseline_variance, a1, step_size,
+          max_loop_count, max_filter_amount);
     }
   }
 
   aom_free_frame_buffer(&sharpened);
+#if CONFIG_USE_VMAF_RC
+  aom_close_vmaf_context_rc(vmaf_context);
+#endif
   return unsharp_amount;
 }
 
@@ -588,6 +628,11 @@
                          cpi->oxcf.border_in_pixels,
                          cm->features.byte_alignment);
   aom_yv12_copy_frame(&resized_source, &recon, 1);
+
+  VmafContext *vmaf_context;
+  aom_init_vmaf_context_rc(
+      &vmaf_context, cpi->vmaf_info.vmaf_model,
+      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN);
 #else
   double *scores = aom_malloc(sizeof(*scores) * (num_rows * num_cols));
   memset(scores, 0, sizeof(*scores) * (num_rows * num_cols));
@@ -639,9 +684,9 @@
       }
 
       double vmaf;
-      aom_calc_vmaf_rc(
-          cpi->vmaf_info.vmaf_model, &resized_source, &recon, bit_depth,
-          cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN, &vmaf);
+      aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model,
+                                &resized_source, &recon, bit_depth, index,
+                                &vmaf);
 
       // Restore recon buf
       if (cpi->common.seq_params.use_highbitdepth) {
@@ -678,7 +723,9 @@
 
   aom_free_frame_buffer(&resized_source);
   aom_free_frame_buffer(&blurred);
-#if !CONFIG_USE_VMAF_RC
+#if CONFIG_USE_VMAF_RC
+  aom_close_vmaf_context_rc(vmaf_context);
+#else
   aom_free(scores);
 #endif
   aom_clear_system_state();
@@ -879,31 +926,34 @@
 
 #if CONFIG_USE_VMAF_RC
 static double cal_approx_score(const AV1_COMP *const cpi,
+                               VmafContext *vmaf_context, int vmaf_cal_index,
                                YV12_BUFFER_CONFIG *const ref,
                                YV12_BUFFER_CONFIG *const sharpened) {
   double score;
   const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
-  aom_calc_vmaf_rc(cpi->vmaf_info.vmaf_model, ref, sharpened, bit_depth, 1,
-                   &score);
+  aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, ref,
+                            sharpened, bit_depth, vmaf_cal_index, &score);
   return score;
 }
 
 static double find_best_frame_unsharp_amount_loop_neg(
-    const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const ref,
-    YV12_BUFFER_CONFIG *const source, YV12_BUFFER_CONFIG *const blurred,
-    YV12_BUFFER_CONFIG *const sharpened, double best_score,
-    const double unsharp_amount_start, const double step_size,
-    const int max_loop_count, const double max_amount) {
+    const AV1_COMP *const cpi, VmafContext *vmaf_context,
+    YV12_BUFFER_CONFIG *const ref, YV12_BUFFER_CONFIG *const source,
+    YV12_BUFFER_CONFIG *const blurred, YV12_BUFFER_CONFIG *const sharpened,
+    double best_score, const double unsharp_amount_start,
+    const double step_size, const int max_loop_count, const double max_amount) {
   const double min_amount = 0.0;
   int loop_count = 0;
   double approx_score = best_score;
   double unsharp_amount = unsharp_amount_start;
+  int vmaf_cal_index = 2;
   do {
     best_score = approx_score;
     unsharp_amount += step_size;
     if (unsharp_amount > max_amount || unsharp_amount < min_amount) break;
     unsharp(cpi, source, blurred, sharpened, unsharp_amount);
-    approx_score = cal_approx_score(cpi, ref, sharpened);
+    approx_score =
+        cal_approx_score(cpi, vmaf_context, vmaf_cal_index++, ref, sharpened);
 
     loop_count++;
   } while (approx_score > best_score && loop_count < max_loop_count);
@@ -913,17 +963,18 @@
 }
 
 static double find_best_frame_unsharp_amount_neg(
-    const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const ref,
-    YV12_BUFFER_CONFIG *const source, YV12_BUFFER_CONFIG *const blurred,
-    const double unsharp_amount_start, const double step_size,
-    const int max_loop_count, const double max_filter_amount) {
+    const AV1_COMP *const cpi, VmafContext *vmaf_context,
+    YV12_BUFFER_CONFIG *const ref, YV12_BUFFER_CONFIG *const source,
+    YV12_BUFFER_CONFIG *const blurred, const double unsharp_amount_start,
+    const double step_size, const int max_loop_count,
+    const double max_filter_amount) {
   const AV1_COMMON *const cm = &cpi->common;
   const int width = source->y_width;
   const int height = source->y_height;
 
   double best_score = 0.0;
-  aom_calc_vmaf_rc(cpi->vmaf_info.vmaf_model, ref, source, cpi->td.mb.e_mbd.bd,
-                   1, &best_score);
+  aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, ref,
+                            source, cpi->td.mb.e_mbd.bd, 1, &best_score);
   YV12_BUFFER_CONFIG sharpened;
   memset(&sharpened, 0, sizeof(sharpened));
   aom_alloc_frame_buffer(
@@ -931,8 +982,8 @@
       cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
 
   const double unsharp_amount = find_best_frame_unsharp_amount_loop_neg(
-      cpi, ref, source, blurred, &sharpened, best_score, unsharp_amount_start,
-      step_size, max_loop_count, max_filter_amount);
+      cpi, vmaf_context, ref, source, blurred, &sharpened, best_score,
+      unsharp_amount_start, step_size, max_loop_count, max_filter_amount);
 
   aom_free_frame_buffer(&sharpened);
   return unsharp_amount;
@@ -943,9 +994,13 @@
                            YV12_BUFFER_CONFIG *recon) {
   const int bit_depth = cpi->td.mb.e_mbd.bd;
 #if CONFIG_USE_VMAF_RC
-  aom_calc_vmaf_rc(cpi->vmaf_info.vmaf_model, source, recon, bit_depth,
-                   cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN,
-                   &cpi->vmaf_info.last_frame_vmaf);
+  VmafContext *vmaf_context;
+  aom_init_vmaf_context_rc(
+      &vmaf_context, cpi->vmaf_info.vmaf_model,
+      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN);
+  aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, source,
+                            recon, bit_depth, 0,
+                            &cpi->vmaf_info.last_frame_vmaf);
 #else
   aom_calc_vmaf(cpi->oxcf.tune_cfg.vmaf_model_path, source, recon, bit_depth,
                 &cpi->vmaf_info.last_frame_vmaf);
@@ -973,9 +1028,10 @@
 
     gaussian_blur(bit_depth, recon, &blurred);
     cpi->vmaf_info.best_unsharp_amount = find_best_frame_unsharp_amount_neg(
-        cpi, source, recon, &blurred, 0.0, 0.025, 20, 1.01);
+        cpi, vmaf_context, source, recon, &blurred, 0.0, 0.025, 20, 1.01);
 
     aom_free_frame_buffer(&blurred);
   }
+  aom_close_vmaf_context_rc(vmaf_context);
 #endif
 }