Make intra coding processed in 64x64 unit Process the intra block coding in maximum of 64x64 block unit. This allows to simpilify the hardware design for 128x128 level block coding. Change-Id: I6ef1dd8549bdb53b2d2394c6aa7d61cea93686bf

commit: 5b70174f93e8abde41ab766cf89322423b4a8ec3 [log] [tgz]
author: Jingning Han <jingning@google.com> Wed Jul 19 14:39:07 2017 -0700
committer: Jingning Han <jingning@google.com> Thu Jul 20 04:47:02 2017 +0000
tree: 499a493dea135edf36e22a6b47578c9e1c41ffc8
parent: c2b797fa2122b0e9842d52f02f050f9e7920fd91 [diff]
diff --git a/av1/common/blockd.c b/av1/common/blockd.c
index 7ec08e8..7a37f6f 100644
--- a/av1/common/blockd.c
+++ b/av1/common/blockd.c

@@ -145,13 +145,27 @@
   const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
   const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
 
+  int blk_row, blk_col;
+
+  const BLOCK_SIZE max_unit_bsize = get_plane_block_size(BLOCK_64X64, pd);
+  int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
+  int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
+  mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide);
+  mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high);
+
   // Keep track of the row and column of the blocks we use so that we know
   // if we are in the unrestricted motion border.
-  for (r = 0; r < max_blocks_high; r += txh_unit) {
+  for (r = 0; r < max_blocks_high; r += mu_blocks_high) {
     // Skip visiting the sub blocks that are wholly within the UMV.
-    for (c = 0; c < max_blocks_wide; c += txw_unit) {
-      visit(plane, i, r, c, plane_bsize, tx_size, arg);
-      i += step;
+    for (c = 0; c < max_blocks_wide; c += mu_blocks_wide) {
+      const int unit_height = AOMMIN(mu_blocks_high + r, max_blocks_high);
+      const int unit_width = AOMMIN(mu_blocks_wide + c, max_blocks_wide);
+      for (blk_row = r; blk_row < unit_height; blk_row += txh_unit) {
+        for (blk_col = c; blk_col < unit_width; blk_col += txw_unit) {
+          visit(plane, i, blk_row, blk_col, plane_bsize, tx_size, arg);
+          i += step;
+        }
+      }
     }
   }
 }

diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index d424d63..50c7466 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c

@@ -1988,11 +1988,26 @@
                                pd->subsampling_y))
         continue;
 #endif
+      int blk_row, blk_col;
+      const BLOCK_SIZE max_unit_bsize = get_plane_block_size(BLOCK_64X64, pd);
+      int mu_blocks_wide =
+          block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
+      int mu_blocks_high =
+          block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
+      mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide);
+      mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high);
 
-      for (row = 0; row < max_blocks_high; row += stepr)
-        for (col = 0; col < max_blocks_wide; col += stepc)
-          predict_and_reconstruct_intra_block(cm, xd, r, mbmi, plane, row, col,
-                                              tx_size);
+      for (row = 0; row < max_blocks_high; row += mu_blocks_high) {
+        for (col = 0; col < max_blocks_wide; col += mu_blocks_wide) {
+          const int unit_height = AOMMIN(mu_blocks_high + row, max_blocks_high);
+          const int unit_width = AOMMIN(mu_blocks_wide + col, max_blocks_wide);
+
+          for (blk_row = row; blk_row < unit_height; blk_row += stepr)
+            for (blk_col = col; blk_col < unit_width; blk_col += stepc)
+              predict_and_reconstruct_intra_block(cm, xd, r, mbmi, plane,
+                                                  blk_row, blk_col, tx_size);
+        }
+      }
     }
   } else {
     int ref;

diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index e6fb5b6..c9996c6 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c

@@ -2672,13 +2672,23 @@
         const TX_SIZE tx = av1_get_tx_size(plane, xd);
         const int bkw = tx_size_wide_unit[tx];
         const int bkh = tx_size_high_unit[tx];
-        for (row = 0; row < num_4x4_h; row += bkh) {
-          for (col = 0; col < num_4x4_w; col += bkw) {
+        int blk_row, blk_col;
+
+        for (row = 0; row < num_4x4_h; row += mu_blocks_high) {
+          for (col = 0; col < num_4x4_w; col += mu_blocks_wide) {
+            const int unit_height = AOMMIN(mu_blocks_high + row, num_4x4_h);
+            const int unit_width = AOMMIN(mu_blocks_wide + col, num_4x4_w);
+
+            for (blk_row = row; blk_row < unit_height; blk_row += bkh) {
+              for (blk_col = col; blk_col < unit_width; blk_col += bkw) {
 #if !CONFIG_PVQ
-            pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx, &token_stats);
+                pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx,
+                               &token_stats);
 #else
-            pack_pvq_tokens(w, x, xd, plane, bsize, tx);
+                pack_pvq_tokens(w, x, xd, plane, bsize, tx);
 #endif
+              }
+            }
           }
         }
 #endif  // CONFIG_LV_MAP
commit	5b70174f93e8abde41ab766cf89322423b4a8ec3	[log] [tgz]
author	Jingning Han <jingning@google.com>	Wed Jul 19 14:39:07 2017 -0700
committer	Jingning Han <jingning@google.com>	Thu Jul 20 04:47:02 2017 +0000
tree	499a493dea135edf36e22a6b47578c9e1c41ffc8
parent	c2b797fa2122b0e9842d52f02f050f9e7920fd91 [diff]