Determine denom automatically in superes mode 3

The maximum superres denominator is determined now
based on frequency analysis. Use of superresolution
now starts from the thresholds specified in qthresh mode
and up to the maximum level determined by the analysis.

With --superres-mode=3 --superres-kf-qthresh=48 in single
frame (image coding) test:
hdres: -0.690% PSNR, -0.871% SSIM.
midres: +0.006% PSNR, -0.508% SSIM.

Change-Id: Ib88376b39079bcc6e22f0b3048232e47462bbf8e
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 75d1d49..9acc690 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -265,53 +265,65 @@
   }
 }
 
-// Compute the horizontal frequency component energy in a frame
-// by calculuating the 16x4 Horizontal DCT. This will be subsequently
-// used to decide the superresolution factors.
+// Compute the horizontal frequency components' energy in a frame
+// by calculuating the 16x4 Horizontal DCT. This is to be used to
+// decide the superresolution parameters.
 void analyze_hor_freq(const AV1_COMP *cpi, double *energy) {
-  uint64_t freq_energy[16] = { 0 };
+  uint64_t freq_energy[8] = { 0 };
   const YV12_BUFFER_CONFIG *buf = cpi->source;
   const int bd = cpi->td.mb.e_mbd.bd;
   const int width = buf->y_crop_width;
   const int height = buf->y_crop_height;
-  int32_t coeff[16 * 4];
+  DECLARE_ALIGNED(16, int32_t, coeff[16 * 4]);
   int n = 0;
+  memset(freq_energy, 0, sizeof(freq_energy));
   if (buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     const int16_t *src16 = (const int16_t *)CONVERT_TO_SHORTPTR(buf->y_buffer);
     for (int i = 0; i < height - 4; i += 4) {
       for (int j = 0; j < width - 16; j += 16) {
         av1_fwd_txfm2d_16x4(src16 + i * buf->y_stride + j, coeff, buf->y_stride,
                             H_DCT, bd);
-        for (int k = 4; k < 16; ++k) {
-          const int64_t this_energy =
-              coeff[k] * coeff[k] + coeff[k + 16] * coeff[k + 16] +
-              coeff[k + 32] * coeff[k + 32] + coeff[k + 48] * coeff[k + 48];
-          freq_energy[k] += ROUND_POWER_OF_TWO(this_energy, 2 * (bd - 8));
+        for (int k = 8; k < 16; ++k) {
+          const uint64_t this_energy =
+              ((int64_t)coeff[k] * coeff[k]) +
+              ((int64_t)coeff[k + 16] * coeff[k + 16]) +
+              ((int64_t)coeff[k + 32] * coeff[k + 32]) +
+              ((int64_t)coeff[k + 48] * coeff[k + 48]);
+          freq_energy[k - 8] +=
+              ROUND_POWER_OF_TWO(this_energy, 2 + 2 * (bd - 8));
         }
         n++;
       }
     }
   } else {
     assert(bd == 8);
-    int16_t src16[16 * 4];
+    DECLARE_ALIGNED(16, int16_t, src16[16 * 4]);
     for (int i = 0; i < height - 4; i += 4) {
       for (int j = 0; j < width - 16; j += 16) {
         for (int ii = 0; ii < 4; ++ii)
           for (int jj = 0; jj < 16; ++jj)
             src16[ii * 16 + jj] =
                 buf->y_buffer[(i + ii) * buf->y_stride + (j + jj)];
-        av1_fwd_txfm2d_16x4(src16, coeff, buf->y_stride, H_DCT, bd);
-        for (int k = 4; k < 16; ++k) {
-          const int64_t this_energy =
-              coeff[k] * coeff[k] + coeff[k + 16] * coeff[k + 16] +
-              coeff[k + 32] * coeff[k + 32] + coeff[k + 48] * coeff[k + 48];
-          freq_energy[k] += this_energy;
+        av1_fwd_txfm2d_16x4(src16, coeff, 16, H_DCT, bd);
+        for (int k = 8; k < 16; ++k) {
+          const uint64_t this_energy =
+              ((int64_t)coeff[k] * coeff[k]) +
+              ((int64_t)coeff[k + 16] * coeff[k + 16]) +
+              ((int64_t)coeff[k + 32] * coeff[k + 32]) +
+              ((int64_t)coeff[k + 48] * coeff[k + 48]);
+          freq_energy[k - 8] += ROUND_POWER_OF_TWO(this_energy, 2);
         }
         n++;
       }
     }
   }
-  for (int k = 4; k < 16; ++k) energy[k] = (double)freq_energy[k] / (4 * n);
+  if (n) {
+    for (int k = 0; k < 8; ++k) energy[k] = (double)freq_energy[k] / n;
+    // Convert to cumulative energy
+    for (int k = 6; k >= 0; --k) energy[k] += energy[k + 1];
+  } else {
+    for (int k = 0; k < 8; ++k) energy[k] = 1e+20;
+  }
 }
 
 static void set_high_precision_mv(AV1_COMP *cpi, int allow_high_precision_mv,
@@ -4100,7 +4112,7 @@
     cm->rst_info[i].frame_restoration_type = RESTORE_NONE;
 
   av1_alloc_restoration_buffers(cm);
-  alloc_util_frame_buffers(cpi);  // TODO(afergs): Remove? Gets called anyways.
+  alloc_util_frame_buffers(cpi);
   init_motion_estimation(cpi);
 
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
@@ -4149,15 +4161,24 @@
   return new_denom;
 }
 
-static void get_superres_characteristics(const AV1_COMP *cpi,
-                                         uint8_t *max_denom, int *qthresh) {
-  const AV1EncoderConfig *oxcf = &cpi->oxcf;
-  const AV1_COMMON *cm = &cpi->common;
-  // TODO(debargha): Determine the parameters below automatically based on
-  // frequency analysis of the source
-  *max_denom = SCALE_NUMERATOR << 1;
-  *qthresh = (frame_is_intra_only(cm)) ? oxcf->superres_kf_qthresh
-                                       : oxcf->superres_qthresh;
+#define ENERGY_BY_Q2_THRESH 0.015
+
+static uint8_t get_superres_denom_from_qindex_energy(int qindex, double *energy,
+                                                     double thresh) {
+  const double q = av1_convert_qindex_to_q(qindex, AOM_BITS_8);
+  const double threshq2 = thresh * q * q;
+  int k;
+  for (k = 8; k > 0; --k) {
+    if (energy[k - 1] > threshq2) break;
+  }
+  return 2 * SCALE_NUMERATOR - k;
+}
+
+static uint8_t get_superres_denom_for_qindex(const AV1_COMP *cpi, int qindex) {
+  double energy[8];
+  analyze_hor_freq(cpi, energy);
+  return get_superres_denom_from_qindex_energy(qindex, energy,
+                                               ENERGY_BY_Q2_THRESH);
 }
 
 static uint8_t calculate_next_superres_scale(AV1_COMP *cpi) {
@@ -4192,12 +4213,15 @@
       const int q = av1_rc_pick_q_and_bounds(
           cpi, cpi->oxcf.width, cpi->oxcf.height, &bottom_index, &top_index);
 
-      int qthresh;
-      uint8_t max_denom;
-      get_superres_characteristics(cpi, &max_denom, &qthresh);
+      const int qthresh = (frame_is_intra_only(&cpi->common))
+                              ? oxcf->superres_kf_qthresh
+                              : oxcf->superres_qthresh;
       if (q < qthresh) {
         new_denom = SCALE_NUMERATOR;
       } else {
+        // TODO(debargha): Experiment with the variant below.
+        // new_denom = get_superres_denom_for_qindex(cpi, q);
+        uint8_t max_denom = get_superres_denom_for_qindex(cpi, MAXQ);
         if (max_denom == SCALE_NUMERATOR) {
           new_denom = max_denom;
           break;