optimize_txb Separate fixed eob condition out

This speed up encoder by 4%

Change-Id: I00630c735d54b565de710e0b290c06f8fcf1bd93
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index 1125bcd..fd6f618 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -232,8 +232,7 @@
 static void get_dist_cost_stats(LevelDownStats *const stats, const int scan_idx,
                                 const int is_eob,
                                 const LV_MAP_COEFF_COST *const txb_costs,
-                                const TxbInfo *const txb_info,
-                                int has_nz_tail) {
+                                const TxbInfo *const txb_info) {
   const int16_t *const scan = txb_info->scan_order->scan;
   const int coeff_idx = scan[scan_idx];
   const tran_low_t qc = txb_info->qcoeff[coeff_idx];
@@ -289,24 +288,32 @@
     stats->rate_low = low_qc_cost;
     stats->rd_low = RDCOST(txb_info->rdmult, stats->rate_low, stats->dist_low);
   }
-  if ((has_nz_tail < 2) && !is_eob) {
-    (void)levels;
-    const int coeff_ctx_temp =
-        get_nz_map_ctx(levels, coeff_idx, txb_info->bwl, txb_info->height,
-                       scan_idx, 1, txb_info->tx_size, txb_info->tx_type);
-    const int qc_eob_cost =
-        get_coeff_cost(qc, scan_idx, 1, txb_info, txb_costs, coeff_ctx_temp);
-    int64_t rd_eob = RDCOST(txb_info->rdmult, qc_eob_cost, stats->dist);
-    if (stats->low_qc != 0) {
-      const int low_qc_eob_cost = get_coeff_cost(
-          stats->low_qc, scan_idx, 1, txb_info, txb_costs, coeff_ctx_temp);
-      int64_t rd_eob_low =
-          RDCOST(txb_info->rdmult, low_qc_eob_cost, stats->dist_low);
-      rd_eob = (rd_eob > rd_eob_low) ? rd_eob_low : rd_eob;
-    }
+}
 
-    stats->nz_rd = AOMMIN(stats->rd_low, stats->rd) - rd_eob;
+static void get_dist_cost_stats_with_eob(
+    LevelDownStats *const stats, const int scan_idx,
+    const LV_MAP_COEFF_COST *const txb_costs, const TxbInfo *const txb_info) {
+  const int is_eob = 0;
+  get_dist_cost_stats(stats, scan_idx, is_eob, txb_costs, txb_info);
+
+  const int16_t *const scan = txb_info->scan_order->scan;
+  const int coeff_idx = scan[scan_idx];
+  const tran_low_t qc = txb_info->qcoeff[coeff_idx];
+  const int coeff_ctx_temp = get_nz_map_ctx(
+      txb_info->levels, coeff_idx, txb_info->bwl, txb_info->height, scan_idx, 1,
+      txb_info->tx_size, txb_info->tx_type);
+  const int qc_eob_cost =
+      get_coeff_cost(qc, scan_idx, 1, txb_info, txb_costs, coeff_ctx_temp);
+  int64_t rd_eob = RDCOST(txb_info->rdmult, qc_eob_cost, stats->dist);
+  if (stats->low_qc != 0) {
+    const int low_qc_eob_cost = get_coeff_cost(
+        stats->low_qc, scan_idx, 1, txb_info, txb_costs, coeff_ctx_temp);
+    int64_t rd_eob_low =
+        RDCOST(txb_info->rdmult, low_qc_eob_cost, stats->dist_low);
+    rd_eob = (rd_eob > rd_eob_low) ? rd_eob_low : rd_eob;
   }
+
+  stats->nz_rd = AOMMIN(stats->rd_low, stats->rd) - rd_eob;
 }
 
 static INLINE void update_qcoeff(const int coeff_idx, const tran_low_t qc,
@@ -749,71 +756,106 @@
   int64_t accu_dist = 0;
   int64_t prev_eob_rd_cost = INT64_MAX;
   int64_t cur_eob_rd_cost = 0;
-  int8_t has_nz_tail = 0;
 
-  for (int si = init_eob - 1; si >= 0; --si) {
+  {
+    const int si = init_eob - 1;
+    const int coeff_idx = scan[si];
+    LevelDownStats stats;
+    get_dist_cost_stats(&stats, si, si == init_eob - 1, txb_costs, txb_info);
+    if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) {
+      update = 1;
+      update_coeff(coeff_idx, stats.low_qc, txb_info);
+      accu_rate += stats.rate_low;
+      accu_dist += stats.dist_low;
+    } else {
+      accu_rate += stats.rate;
+      accu_dist += stats.dist;
+    }
+  }
+
+  int si = init_eob - 2;
+  int8_t has_nz_tail = 0;
+  // eob is not fixed
+  for (; si >= 0 && has_nz_tail < 2; --si) {
+    assert(si != init_eob - 1);
     const int coeff_idx = scan[si];
     tran_low_t qc = txb_info->qcoeff[coeff_idx];
 
     if (qc == 0) {
-      assert(si != init_eob - 1);
       const int coeff_ctx =
           get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl,
                                txb_info->tx_size, txb_info->tx_type);
       accu_rate += txb_costs->base_cost[coeff_ctx][0];
     } else {
       LevelDownStats stats;
-      get_dist_cost_stats(&stats, si, si == init_eob - 1, txb_costs, txb_info,
-                          has_nz_tail);
+      get_dist_cost_stats_with_eob(&stats, si, txb_costs, txb_info);
+      // check if it is better to make this the last significant coefficient
+      int cur_eob_rate = get_eob_cost(si + 1, seg_eob, txb_eob_costs, txb_costs,
+                                      txb_info->tx_type);
+      cur_eob_rd_cost = RDCOST(txb_info->rdmult, cur_eob_rate, 0);
+      prev_eob_rd_cost =
+          RDCOST(txb_info->rdmult, accu_rate, accu_dist) + stats.nz_rd;
+      if (cur_eob_rd_cost <= prev_eob_rd_cost) {
+        update = 1;
+        for (int j = si + 1; j < txb_info->eob; j++) {
+          const int coeff_pos_j = scan[j];
+          update_coeff(coeff_pos_j, 0, txb_info);
+        }
+        txb_info->eob = si + 1;
 
-      if (has_nz_tail < 2) {
-        if (si == init_eob - 1) {
-          if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) {
-            update = 1;
-            update_coeff(coeff_idx, stats.low_qc, txb_info);
-            accu_rate += stats.rate_low;
-            accu_dist += stats.dist_low;
-          } else {
-            accu_rate += stats.rate;
-            accu_dist += stats.dist;
-          }
-          continue;
+        // rerun cost calculation due to change of eob
+        accu_rate = cur_eob_rate;
+        accu_dist = 0;
+        get_dist_cost_stats(&stats, si, 1, txb_costs, txb_info);
+        if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) {
+          update = 1;
+          update_coeff(coeff_idx, stats.low_qc, txb_info);
+          accu_rate += stats.rate_low;
+          accu_dist += stats.dist_low;
         } else {
-          // check if it is better to make this the last significant coefficient
-          int cur_eob_rate = get_eob_cost(si + 1, seg_eob, txb_eob_costs,
-                                          txb_costs, txb_info->tx_type);
-          cur_eob_rd_cost = RDCOST(txb_info->rdmult, cur_eob_rate, 0);
-          prev_eob_rd_cost =
-              RDCOST(txb_info->rdmult, accu_rate, accu_dist) + stats.nz_rd;
-          if (cur_eob_rd_cost <= prev_eob_rd_cost) {
+          accu_rate += stats.rate;
+          accu_dist += stats.dist;
+        }
+
+        // reset non zero tail when new eob is found
+        has_nz_tail = 0;
+      } else {
+        int bUpdCoeff = 0;
+        if (stats.rd_low < stats.rd) {
+          if ((si < txb_info->eob - 1)) {
+            bUpdCoeff = 1;
             update = 1;
-            for (int j = si + 1; j < txb_info->eob; j++) {
-              const int coeff_pos_j = scan[j];
-              update_coeff(coeff_pos_j, 0, txb_info);
-            }
-            txb_info->eob = si + 1;
-
-            // rerun cost calculation due to change of eob
-            accu_rate = cur_eob_rate;
-            accu_dist = 0;
-            get_dist_cost_stats(&stats, si, 1, txb_costs, txb_info,
-                                has_nz_tail);
-            if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) {
-              update = 1;
-              update_coeff(coeff_idx, stats.low_qc, txb_info);
-              accu_rate += stats.rate_low;
-              accu_dist += stats.dist_low;
-            } else {
-              accu_rate += stats.rate;
-              accu_dist += stats.dist;
-            }
-
-            // reset non zero tail when new eob is found
-            has_nz_tail = 0;
-            continue;
           }
+        } else {
+          ++has_nz_tail;
+        }
+
+        if (bUpdCoeff) {
+          update_coeff(coeff_idx, stats.low_qc, txb_info);
+          accu_rate += stats.rate_low;
+          accu_dist += stats.dist_low;
+        } else {
+          accu_rate += stats.rate;
+          accu_dist += stats.dist;
         }
       }
+    }
+  }  // for (si)
+
+  // eob is fixed
+  for (; si >= 0; --si) {
+    assert(si != init_eob - 1);
+    const int coeff_idx = scan[si];
+    tran_low_t qc = txb_info->qcoeff[coeff_idx];
+
+    if (qc == 0) {
+      const int coeff_ctx =
+          get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl,
+                               txb_info->tx_size, txb_info->tx_type);
+      accu_rate += txb_costs->base_cost[coeff_ctx][0];
+    } else {
+      LevelDownStats stats;
+      get_dist_cost_stats(&stats, si, 0, txb_costs, txb_info);
 
       int bUpdCoeff = 0;
       if (stats.rd_low < stats.rd) {
@@ -821,10 +863,7 @@
           bUpdCoeff = 1;
           update = 1;
         }
-      } else {
-        ++has_nz_tail;
       }
-
       if (bUpdCoeff) {
         update_coeff(coeff_idx, stats.low_qc, txb_info);
         accu_rate += stats.rate_low;