Reconstruct dqcoeff inline with coeff decoding

Reconstruct dqcoeff inline with transform coefficient level
decoding process. This saves one pass through 0 to eob scan. The
txb coeff decoding speed is up by 5%.

Change-Id: Icd88a9dca4595a180d514ce4b067ecf9dd90a609
diff --git a/av1/decoder/decodetxb.c b/av1/decoder/decodetxb.c
index 9d400b7..d70340a 100644
--- a/av1/decoder/decodetxb.c
+++ b/av1/decoder/decodetxb.c
@@ -109,8 +109,6 @@
   const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
   const int16_t *scan = scan_order->scan;
 
-  unsigned int(*nz_map_count)[SIG_COEF_CONTEXTS][2] =
-      (counts) ? &counts->nz_map[txs_ctx][plane_type] : NULL;
   int16_t dummy;
   int16_t max_eob_pt = get_eob_pos_token(seg_eob, &dummy);
 
@@ -165,14 +163,14 @@
 
   for (int i = 0; i < *eob; ++i) {
     c = *eob - 1 - i;
+    int pos = scan[c];
 #if CONFIG_LV_MAP_MULTI
-    (void)nz_map_count;
     int coeff_ctx =
         get_nz_map_ctx(levels, c, scan, bwl, height, tx_type, c == *eob - 1);
     int level = av1_read_record_symbol(
         counts, r, ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx], 4,
         ACCT_STR);
-    levels[get_paded_idx(scan[c], bwl)] = level;
+    levels[get_paded_idx(pos, bwl)] = level;
     if (level) *max_scan_line = AOMMAX(*max_scan_line, scan[c]);
     // printf("base_cdf: %d %d %2d\n", txs_ctx, plane_type, coeff_ctx);
     // printf("base_cdf: %d %d %2d : %3d %3d %3d\n", txs_ctx, plane_type,
@@ -182,6 +180,7 @@
     //            ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx][2]>>7);
     if (level < 3) {
       cul_level += level;
+      tcoeffs[pos] = (level * dequant[!!c]) >> shift;
     } else if (update_eob < 0) {
       update_eob = c;
     }
@@ -197,11 +196,6 @@
       is_nz = 1;
     }
 
-    // set non-zero coefficient map.
-    levels[get_paded_idx(scan[c], bwl)] = is_nz;
-
-    if (counts) ++(*nz_map_count)[coeff_ctx][is_nz];
-
 #if USE_CAUSAL_BASE_CTX
     if (is_nz) {
       int k;
@@ -215,13 +209,20 @@
         // semantic: is_k = 1 if level > (k+1)
         if (is_k == 0) {
           cul_level += k + 1;
+          tcoeffs[pos] = ((k + 1) * dequant[!!c]) >> shift;
           break;
         }
       }
-      levels[get_paded_idx(scan[c], bwl)] = k + 1;
-      *max_scan_line = AOMMAX(*max_scan_line, scan[c]);
+      levels[get_paded_idx(pos, bwl)] = k + 1;
+      *max_scan_line = AOMMAX(*max_scan_line, pos);
       if (update_eob < 0 && k == NUM_BASE_LEVELS) update_eob = c;
     }
+#else
+    // set non-zero coefficient map.
+    unsigned int(*nz_map_count)[SIG_COEF_CONTEXTS][2] =
+        (counts) ? &counts->nz_map[txs_ctx][plane_type] : NULL;
+    levels[get_paded_idx(pos, bwl)] = is_nz;
+    if (counts) ++(*nz_map_count)[coeff_ctx][is_nz];
 #endif
 #endif
   }
@@ -260,8 +261,9 @@
   // Loop to decode all signs in the transform block,
   // starting with the sign of the DC (if applicable)
   for (c = 0; c < *eob; ++c) {
-    int8_t *const sign = &signs[scan[c]];
-    if (levels[get_paded_idx(scan[c], bwl)] == 0) continue;
+    int pos = scan[c];
+    int8_t *const sign = &signs[pos];
+    if (levels[get_paded_idx(pos, bwl)] == 0) continue;
     if (c == 0) {
       int dc_sign_ctx = txb_ctx->dc_sign_ctx;
 #if LV_MAP_PROB
@@ -275,18 +277,20 @@
     } else {
       *sign = av1_read_record_bit(counts, r, ACCT_STR);
     }
+    if (*sign) tcoeffs[pos] = -tcoeffs[scan[c]];
   }
 
   if (update_eob >= 0) {
     av1_get_br_level_counts(levels, width, height, level_counts);
     for (c = update_eob; c >= 0; --c) {
-      uint8_t *const level = &levels[get_paded_idx(scan[c], bwl)];
+      int pos = scan[c];
+      uint8_t *const level = &levels[get_paded_idx(pos, bwl)];
       int idx;
       int ctx;
 
       if (*level <= NUM_BASE_LEVELS) continue;
 
-      ctx = get_br_ctx(levels, scan[c], bwl, level_counts[scan[c]]);
+      ctx = get_br_ctx(levels, pos, bwl, level_counts[pos]);
 
 #if CONFIG_LV_MAP_MULTI
       for (idx = 0; idx < COEFF_BASE_RANGE / (BR_CDF_SIZE - 1); ++idx) {
@@ -298,6 +302,9 @@
       }
       if (*level <= NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
         cul_level += *level;
+        tran_low_t t = (*level * dequant[!!c]) >> shift;
+        if (signs[pos]) t = -t;
+        tcoeffs[pos] = t;
         continue;
       }
 #else
@@ -327,6 +334,9 @@
 
           *level = NUM_BASE_LEVELS + 1 + br_base + br_offset;
           cul_level += *level;
+          tran_low_t t = (*level * dequant[!!c]) >> shift;
+          if (signs[pos]) t = -t;
+          tcoeffs[pos] = t;
           break;
         }
         if (counts) ++counts->coeff_br[txs_ctx][plane_type][idx][ctx][0];
@@ -337,24 +347,14 @@
       // decode 0-th order Golomb code
       *level = COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS;
       // Save golomb in tcoeffs because adding it to level may incur overflow
-      tcoeffs[scan[c]] = read_golomb(xd, r, counts);
-      cul_level += *level + tcoeffs[scan[c]];
+      tran_low_t t = *level + read_golomb(xd, r, counts);
+      cul_level += t;
+      t = (t * dequant[!!c]) >> shift;
+      if (signs[pos]) t = -t;
+      tcoeffs[pos] = t;
     }
   }
 
-  for (c = 0; c < *eob; ++c) {
-    const int16_t dqv = (c == 0) ? dequant[0] : dequant[1];
-    const int level = levels[get_paded_idx(scan[c], bwl)];
-    // printf("level: %3d\n", level);
-    // printf("activity: %d %d %2d %3d\n", txs_ctx, plane_type, base_cul_level,
-    // level);
-    const tran_low_t t = ((level + tcoeffs[scan[c]]) * dqv) >> shift;
-#if CONFIG_SYMBOLRATE
-    av1_record_coeff(counts, level);
-#endif
-    tcoeffs[scan[c]] = signs[scan[c]] ? -t : t;
-  }
-
   cul_level = AOMMIN(63, cul_level);
 
   // DC value