Modify av1_foreach_transformed_block_in_plane()

In this CL, av1_foreach_transformed_block_in_plane() is
modified to separately handle the case when block size is equal
to transform block size. In this case, the visitor function is
invoked directly with appropriate arguments. An assert is added
to ensure that the visitor function is called at least once if
av1_foreach_transformed_block_in_plane() is called. This is a
bit-exact change.

For AVIF still-image encode,

             Encode Time
cpu-used     Reduction(%)
    6           0.309
    7           0.822
    8           0.643
    9           1.087

Change-Id: Ia7b7398c9b13b2a50554612c4c7417fcc9e826bb
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 8dee801..c0535b8 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -556,6 +556,13 @@
   // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
   // transform size varies per plane, look it up in a common way.
   const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
+  const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
+  // Call visit() directly with zero offsets if the current block size is the
+  // same as the transform block size.
+  if (plane_bsize == tx_bsize) {
+    visit(plane, 0, 0, 0, plane_bsize, tx_size, arg);
+    return;
+  }
   const uint8_t txw_unit = tx_size_wide_unit[tx_size];
   const uint8_t txh_unit = tx_size_high_unit[tx_size];
   const int step = txw_unit * txh_unit;
@@ -588,6 +595,8 @@
       }
     }
   }
+  // Check if visit() is invoked at least once.
+  assert(i >= 1);
 }
 
 typedef struct encode_block_pass1_args {