Rewrite filter_selectively_horiz for parallel loopfiltering Added loop filter mask checking, and made the caller function ready for implementation of parallel loopfiltering in horizontal direction. Next, we need to go through the loopfilter functions (both c and optimized versions), and provide 16-byte wide loopfiltering for each filter type. Change-Id: Ifef47e7ef9086ebc2fd6ca7ede8f27c9bbf79e66

commit: b45438181c1ec6e448715ba6f8f74c79680de0a4 [log] [tgz]
author: Yunqing Wang <yunqingwang@google.com> Fri Nov 08 16:12:04 2013 -0800
committer: Yunqing Wang <yunqingwang@google.com> Mon Nov 11 17:06:01 2013 -0800
tree: 8e539e507cd608ac1f4af3cb1c6003ecfe65dd0b
parent: 49cf335e7f076e90ff45a51b374d7d11576efbd5 [diff]
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 408fb55..62a890e 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c

@@ -383,7 +383,6 @@
                                      unsigned int mask_8x8,
                                      unsigned int mask_4x4,
                                      unsigned int mask_4x4_int,
-                                     int only_4x4_1,
                                      const loop_filter_info_n *lfi_n,
                                      const uint8_t *lfl) {
   unsigned int mask;
@@ -392,37 +391,101 @@
   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
        mask; mask >>= count) {
     const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+
     count = 1;
     if (mask & 1) {
-      if (!only_4x4_1) {
-        if (mask_16x16 & 1) {
-          if ((mask_16x16 & 3) == 3) {
-            vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
-                                         lfi->hev_thr, 2);
-            count = 2;
-          } else {
-            vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
-                                         lfi->hev_thr, 1);
-          }
-          assert(!(mask_8x8 & 1));
-          assert(!(mask_4x4 & 1));
-          assert(!(mask_4x4_int & 1));
-        } else if (mask_8x8 & 1) {
+      if (mask_16x16 & 1) {
+        if ((mask_16x16 & 3) == 3) {
+          vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
+                                       lfi->hev_thr, 2);
+          count = 2;
+        } else {
+          vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
+                                       lfi->hev_thr, 1);
+        }
+        assert(!(mask_8x8 & 1));
+        assert(!(mask_4x4 & 1));
+        assert(!(mask_4x4_int & 1));
+      } else if (mask_8x8 & 1) {
+        if ((mask_8x8 & 3) == 3) {
+          // Next block's thresholds
+          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+
+          // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering.
           vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
                                             lfi->hev_thr, 1);
-          assert(!(mask_16x16 & 1));
-          assert(!(mask_4x4 & 1));
-        } else if (mask_4x4 & 1) {
-          vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
-                                          lfi->hev_thr, 1);
-          assert(!(mask_16x16 & 1));
-          assert(!(mask_8x8 & 1));
-        }
-      }
+          vp9_mbloop_filter_horizontal_edge(s + 8, pitch, lfin->mblim,
+                                            lfin->lim, lfin->hev_thr, 1);
 
-      if (mask_4x4_int & 1)
+          if ((mask_4x4_int & 3) == 3) {
+            // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering.
+            vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
+                                            lfi->lim, lfi->hev_thr, 1);
+            vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
+                                            lfin->mblim, lfin->lim,
+                                            lfin->hev_thr, 1);
+          } else {
+            if (mask_4x4_int & 1)
+              vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
+                                              lfi->lim, lfi->hev_thr, 1);
+            else if (mask_4x4_int & 2)
+              vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
+                                              lfin->mblim, lfin->lim,
+                                              lfin->hev_thr, 1);
+          }
+          count = 2;
+        } else {
+          vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
+                                            lfi->hev_thr, 1);
+
+          if (mask_4x4_int & 1)
+            vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
+                                            lfi->lim, lfi->hev_thr, 1);
+        }
+        assert(!(mask_16x16 & 1));
+        assert(!(mask_4x4 & 1));
+      } else if (mask_4x4 & 1) {
+        if ((mask_4x4 & 3) == 3) {
+          // Next block's thresholds
+          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+
+          // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering.
+          vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
+                                            lfi->hev_thr, 1);
+          vp9_loop_filter_horizontal_edge(s + 8, pitch, lfin->mblim, lfin->lim,
+                                            lfin->hev_thr, 1);
+
+          if ((mask_4x4_int & 3) == 3) {
+            // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering.
+            vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
+                                            lfi->lim, lfi->hev_thr, 1);
+            vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
+                                            lfin->mblim, lfin->lim,
+                                            lfin->hev_thr, 1);
+          } else {
+            if (mask_4x4_int & 1)
+              vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
+                                              lfi->lim, lfi->hev_thr, 1);
+            else if (mask_4x4_int & 2)
+              vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
+                                              lfin->mblim, lfin->lim,
+                                              lfin->hev_thr, 1);
+          }
+          count = 2;
+        } else {
+        vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
+                                        lfi->hev_thr, 1);
+
+        if (mask_4x4_int & 1)
+          vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
+                                          lfi->lim, lfi->hev_thr, 1);
+        }
+        assert(!(mask_16x16 & 1));
+        assert(!(mask_8x8 & 1));
+      } else if (mask_4x4_int & 1) {
         vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
                                         lfi->lim, lfi->hev_thr, 1);
+      }
     }
     s += 8 * count;
     lfl += count;
@@ -913,11 +976,25 @@
     const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
     const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
 
+    unsigned int mask_16x16_r;
+    unsigned int mask_8x8_r;
+    unsigned int mask_4x4_r;
+
+    if (mi_row + r == 0) {
+      mask_16x16_r = 0;
+      mask_8x8_r = 0;
+      mask_4x4_r = 0;
+    } else {
+      mask_16x16_r = mask_16x16[r];
+      mask_8x8_r = mask_8x8[r];
+      mask_4x4_r = mask_4x4[r];
+    }
+
     filter_selectively_horiz(dst->buf, dst->stride,
-                             mask_16x16[r],
-                             mask_8x8[r],
-                             mask_4x4[r],
-                             mask_4x4_int_r, mi_row + r == 0,
+                             mask_16x16_r,
+                             mask_8x8_r,
+                             mask_4x4_r,
+                             mask_4x4_int_r,
                              &cm->lf_info, lfl[r]);
     dst->buf += 8 * dst->stride;
   }
@@ -969,12 +1046,25 @@
     mask_4x4 = lfm->above_y[TX_4X4];
 
     for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) {
+      unsigned int mask_16x16_r;
+      unsigned int mask_8x8_r;
+      unsigned int mask_4x4_r;
+
+      if (mi_row + r == 0) {
+        mask_16x16_r = 0;
+        mask_8x8_r = 0;
+        mask_4x4_r = 0;
+      } else {
+        mask_16x16_r = mask_16x16 & 0xff;
+        mask_8x8_r = mask_8x8 & 0xff;
+        mask_4x4_r = mask_4x4 & 0xff;
+      }
+
       filter_selectively_horiz(dst->buf, dst->stride,
-                               mask_16x16 & 0xff,
-                               mask_8x8 & 0xff,
-                               mask_4x4 & 0xff,
+                               mask_16x16_r,
+                               mask_8x8_r,
+                               mask_4x4_r,
                                mask_4x4_int_row[r],
-                               mi_row + r == 0,
                                &cm->lf_info, lfm->lfl_y[r]);
 
       dst->buf += 8 * dst->stride;
@@ -1021,13 +1111,25 @@
       const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
       const unsigned int mask_4x4_int_r = skip_border_4x4_r ?
           0 : (mask_4x4_int_row[r]);
+      unsigned int mask_16x16_r;
+      unsigned int mask_8x8_r;
+      unsigned int mask_4x4_r;
+
+      if (mi_row + r == 0) {
+        mask_16x16_r = 0;
+        mask_8x8_r = 0;
+        mask_4x4_r = 0;
+      } else {
+        mask_16x16_r = mask_16x16 & 0xf;
+        mask_8x8_r = mask_8x8 & 0xf;
+        mask_4x4_r = mask_4x4 & 0xf;
+      }
 
       filter_selectively_horiz(dst->buf, dst->stride,
-                               mask_16x16 & 0xf,
-                               mask_8x8 & 0xf,
-                               mask_4x4 & 0xf,
+                               mask_16x16_r,
+                               mask_8x8_r,
+                               mask_4x4_r,
                                mask_4x4_int_r,
-                               mi_row + r == 0,
                                &cm->lf_info, lfm->lfl_uv[r]);
 
       dst->buf += 8 * dst->stride;
commit	b45438181c1ec6e448715ba6f8f74c79680de0a4	[log] [tgz]
author	Yunqing Wang <yunqingwang@google.com>	Fri Nov 08 16:12:04 2013 -0800
committer	Yunqing Wang <yunqingwang@google.com>	Mon Nov 11 17:06:01 2013 -0800
tree	8e539e507cd608ac1f4af3cb1c6003ecfe65dd0b
parent	49cf335e7f076e90ff45a51b374d7d11576efbd5 [diff]