Properly schedule the transform block recursion order

This commit replaces the offset based block index calculation with
incremental based one. It does not change the coding statistics.

Change-Id: I3789294eb45416bd0823e773ec30f05ed41ba0dc
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 1de931e..0459603 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -856,7 +856,8 @@
       if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
 
       pack_txb_tokens(w, tp, tok_end, xd, mbmi, plane, plane_bsize, bit_depth,
-                      block + i * step, offsetr, offsetc, sub_txs);
+                      block, offsetr, offsetc, sub_txs);
+      block += step;
     }
   }
 }
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 8b62a22..f7f9021 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -891,8 +891,9 @@
 
       if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
 
-      encode_block_inter(plane, block + i * step, offsetr, offsetc, plane_bsize,
-                         sub_txs, arg);
+      encode_block_inter(plane, block, offsetr, offsetc, plane_bsize, sub_txs,
+                         arg);
+      block += step;
     }
   }
 }
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index d9409d5..217a365 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3134,18 +3134,23 @@
     assert(tx_size < TX_SIZES);
 #endif  // CONFIG_EXT_TX
     for (i = 0; i < 4 && this_cost_valid; ++i) {
-      int offsetr = (i >> 1) * bsl;
-      int offsetc = (i & 0x01) * bsl;
-      select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
-                      block + i * sub_step, sub_txs, depth + 1, plane_bsize, ta,
-                      tl, tx_above, tx_left, &this_rd_stats,
-                      ref_best_rd - tmp_rd, &this_cost_valid);
+      int offsetr = blk_row + (i >> 1) * bsl;
+      int offsetc = blk_col + (i & 0x01) * bsl;
+
+      if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
+
+      select_tx_block(cpi, x, offsetr, offsetc, plane, block, sub_txs,
+                      depth + 1, plane_bsize, ta, tl, tx_above, tx_left,
+                      &this_rd_stats, ref_best_rd - tmp_rd, &this_cost_valid);
+
       sum_rate += this_rd_stats.rate;
       sum_dist += this_rd_stats.dist;
       sum_bsse += this_rd_stats.sse;
       all_skip &= this_rd_stats.skip;
+
       tmp_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (this_rd < tmp_rd) break;
+      block += sub_step;
     }
     if (this_cost_valid) sum_rd = tmp_rd;
   }
@@ -3482,11 +3487,14 @@
     assert(bsl > 0);
 
     for (i = 0; i < 4; ++i) {
-      int offsetr = (i >> 1) * bsl;
-      int offsetc = (i & 0x01) * bsl;
-      tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
-                  block + i * step, sub_txs, plane_bsize, above_ctx, left_ctx,
-                  rd_stats);
+      int offsetr = blk_row + (i >> 1) * bsl;
+      int offsetc = blk_col + (i & 0x01) * bsl;
+
+      if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
+
+      tx_block_rd(cpi, x, offsetr, offsetc, plane, block, sub_txs, plane_bsize,
+                  above_ctx, left_ctx, rd_stats);
+      block += step;
     }
   }
 }
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 82cf39a..212f5d7 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -610,7 +610,8 @@
       if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
 
       tokenize_vartx(td, t, dry_run, sub_txs, plane_bsize, offsetr, offsetc,
-                     block + i * step, plane, arg);
+                     block, plane, arg);
+      block += step;
     }
   }
 }