diff --git a/av1/common/av1_loopfilter.c b/av1/common/av1_loopfilter.c
index 0451fa1..ff24a3e 100644
--- a/av1/common/av1_loopfilter.c
+++ b/av1/common/av1_loopfilter.c
@@ -113,6 +113,10 @@
                                                       -1, -1, -1, 0,  1,  2,
                                                       3,  -1, -1, -1, -1, -1,
                                                       -1, -1, -1, -1 };
+const int mask_id_table_vert_border[BLOCK_SIZES_ALL] = { 0,  47, 49, 19, 51, 53,
+                                                         33, 55, 57, 42, 59, 60,
+                                                         46, -1, -1, -1, 61, 62,
+                                                         63, 64, 65, 66 };
 
 const FilterMask left_mask_univariant_reordered[67] = {
   // TX_4X4
@@ -1407,13 +1411,13 @@
 
         if ((mask_16x16 & two_block_mask) == two_block_mask) {
           if (plane) {
-            aom_highbd_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
-                                             lfi->hev_thr, lfin->mblim,
-                                             lfin->lim, lfin->hev_thr, bd);
+            aom_highbd_lpf_horizontal_6_dual_c(s, pitch, lfi->mblim, lfi->lim,
+                                               lfi->hev_thr, lfin->mblim,
+                                               lfin->lim, lfin->hev_thr, bd);
           } else {
-            aom_highbd_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim,
-                                              lfi->hev_thr, lfin->mblim,
-                                              lfin->lim, lfin->hev_thr, bd);
+            aom_highbd_lpf_horizontal_14_dual_c(s, pitch, lfi->mblim, lfi->lim,
+                                                lfi->hev_thr, lfin->mblim,
+                                                lfin->lim, lfin->hev_thr, bd);
           }
           count = 2;
         } else {
@@ -1426,13 +1430,13 @@
 
         if ((mask_8x8 & two_block_mask) == two_block_mask) {
           if (plane) {
-            aom_highbd_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
-                                             lfi->hev_thr, lfin->mblim,
-                                             lfin->lim, lfin->hev_thr, bd);
+            aom_highbd_lpf_horizontal_6_dual_c(s, pitch, lfi->mblim, lfi->lim,
+                                               lfi->hev_thr, lfin->mblim,
+                                               lfin->lim, lfin->hev_thr, bd);
           } else {
-            aom_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
-                                             lfi->hev_thr, lfin->mblim,
-                                             lfin->lim, lfin->hev_thr, bd);
+            aom_highbd_lpf_horizontal_8_dual_c(s, pitch, lfi->mblim, lfi->lim,
+                                               lfi->hev_thr, lfin->mblim,
+                                               lfin->lim, lfin->hev_thr, bd);
           }
           count = 2;
         } else {
@@ -1441,9 +1445,9 @@
         }
       } else if (mask_4x4 & 1) {
         if ((mask_4x4 & two_block_mask) == two_block_mask) {
-          aom_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
-                                           lfi->hev_thr, lfin->mblim, lfin->lim,
-                                           lfin->hev_thr, bd);
+          aom_highbd_lpf_horizontal_4_dual_c(s, pitch, lfi->mblim, lfi->lim,
+                                             lfi->hev_thr, lfin->mblim,
+                                             lfin->lim, lfin->hev_thr, bd);
           count = 2;
         } else {
           aom_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
@@ -2394,31 +2398,6 @@
     else if (plane == 2 && !(cm->lf.filter_level_v))
       continue;
 
-#if LOOP_FILTER_BITMASK
-    // filter all vertical edges every superblock (could be 128x128 or 64x64)
-    for (mi_row = start; mi_row < stop; mi_row += cm->seq_params.mib_size) {
-      for (mi_col = col_start; mi_col < col_end;
-           mi_col += cm->seq_params.mib_size) {
-        av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
-                             mi_col, plane, plane + 1);
-
-        av1_setup_bitmask(cm, mi_row, mi_col, plane, pd[plane].subsampling_x,
-                          pd[plane].subsampling_y, stop, col_end);
-        av1_filter_block_plane_ver(cm, &pd[plane], plane, mi_row, mi_col);
-      }
-    }
-
-    // filter all horizontal edges every superblock
-    for (mi_row = start; mi_row < stop; mi_row += cm->seq_params.mib_size) {
-      for (mi_col = col_start; mi_col < col_end;
-           mi_col += cm->seq_params.mib_size) {
-        av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
-                             mi_col, plane, plane + 1);
-
-        av1_filter_block_plane_hor(cm, &pd[plane], plane, mi_row, mi_col);
-      }
-    }
-#else
     if (cm->lf.combine_vert_horz_lf) {
       // filter all vertical and horizontal edges in every 128x128 super block
       for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
@@ -2464,7 +2443,6 @@
         }
       }
     }
-#endif  // LOOP_FILTER_BITMASK
   }
 }
 
diff --git a/av1/common/av1_loopfilter.h b/av1/common/av1_loopfilter.h
index afe0c44..4ea4e16 100644
--- a/av1/common/av1_loopfilter.h
+++ b/av1/common/av1_loopfilter.h
@@ -175,40 +175,6 @@
                                      int mi_row, int mi_col);
 int get_index_shift(int mi_col, int mi_row, int *index);
 
-static const FilterMask left_txform_mask[TX_SIZES] = {
-  { { 0x0000000000000001ULL,  // TX_4X4,
-      0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
-  { { 0x0000000000010001ULL,  // TX_8X8,
-      0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
-  { { 0x0001000100010001ULL,  // TX_16X16,
-      0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
-  { { 0x0001000100010001ULL,  // TX_32X32,
-      0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
-  { { 0x0001000100010001ULL,  // TX_64X64,
-      0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL } },
-};
-
-static const uint64_t above_txform_mask[2][TX_SIZES] = {
-  {
-      0x0000000000000001ULL,  // TX_4X4
-      0x0000000000000003ULL,  // TX_8X8
-      0x000000000000000fULL,  // TX_16X16
-      0x00000000000000ffULL,  // TX_32X32
-      0x000000000000ffffULL,  // TX_64X64
-  },
-  {
-      0x0000000000000001ULL,  // TX_4X4
-      0x0000000000000005ULL,  // TX_8X8
-      0x0000000000000055ULL,  // TX_16X16
-      0x0000000000005555ULL,  // TX_32X32
-      0x0000000055555555ULL,  // TX_64X64
-  },
-};
-
 extern const int mask_id_table_tx_4x4[BLOCK_SIZES_ALL];
 
 extern const int mask_id_table_tx_8x8[BLOCK_SIZES_ALL];
@@ -217,9 +183,14 @@
 
 extern const int mask_id_table_tx_32x32[BLOCK_SIZES_ALL];
 
+// corresponds to entry id in table left_mask_univariant_reordered,
+// of block size mxn and TX_mxn.
+extern const int mask_id_table_vert_border[BLOCK_SIZES_ALL];
+
 extern const FilterMask left_mask_univariant_reordered[67];
 
 extern const FilterMask above_mask_univariant_reordered[67];
+
 #endif
 
 #ifdef __cplusplus
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 869c06e..aaddb9d 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -86,7 +86,7 @@
 // TODO(chengchen): Temporal flag serve as experimental flag for WIP
 // bitmask construction.
 // Shall be removed when bitmask code is completely checkedin
-#define LOOP_FILTER_BITMASK 0
+#define LOOP_FILTER_BITMASK 1
 
 #define PROFILE_BITS 3
 // The following three profiles are currently defined.
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index d083d24..a00e6e8 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -5516,19 +5516,17 @@
 
   if (!cm->allow_intrabc && !cm->single_tile_decoding) {
     if (cm->lf.filter_level[0] || cm->lf.filter_level[1]) {
-#if LOOP_FILTER_BITMASK
-      av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb, 1, 0,
-                            num_planes, 0);
-#else
       if (pbi->num_workers > 1) {
         av1_loop_filter_frame_mt(get_frame_new_buffer(cm), cm, &pbi->mb, 0,
                                  num_planes, 0, pbi->tile_workers,
                                  pbi->num_workers, &pbi->lf_row_sync);
       } else {
-        av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb, 0,
-                              num_planes, 0);
-      }
+        av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
+#if LOOP_FILTER_BITMASK
+                              1,
 #endif
+                              0, num_planes, 0);
+      }
     }
 
     const int do_loop_restoration =
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index a2da2df..cd44731 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -4227,16 +4227,16 @@
   }
 
   if (lf->filter_level[0] || lf->filter_level[1]) {
-#if LOOP_FILTER_BITMASK
-    av1_loop_filter_frame(cm->frame_to_show, cm, xd, 0, 0, num_planes, 0);
-#else
     if (cpi->num_workers > 1)
       av1_loop_filter_frame_mt(cm->frame_to_show, cm, xd, 0, num_planes, 0,
                                cpi->workers, cpi->num_workers,
                                &cpi->lf_row_sync);
     else
-      av1_loop_filter_frame(cm->frame_to_show, cm, xd, 0, num_planes, 0);
+      av1_loop_filter_frame(cm->frame_to_show, cm, xd,
+#if LOOP_FILTER_BITMASK
+                            0,
 #endif
+                            0, num_planes, 0);
   }
 
   if (!no_restoration)
diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c
index c5508e2..c99d0a2 100644
--- a/av1/encoder/picklpf.c
+++ b/av1/encoder/picklpf.c
@@ -67,20 +67,18 @@
     case 2: cm->lf.filter_level_v = filter_level[0]; break;
   }
 
-      // TODO(any): please enable multi-thread and remove the flag when loop
-      // filter mask is compatible with multi-thread.
-#if LOOP_FILTER_BITMASK
-  av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, 0, plane,
-                        plane + 1, partial_frame);
-#else
+  // TODO(any): please enable multi-thread and remove the flag when loop
+  // filter mask is compatible with multi-thread.
   if (cpi->num_workers > 1)
     av1_loop_filter_frame_mt(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, plane,
                              plane + 1, partial_frame, cpi->workers,
                              cpi->num_workers, &cpi->lf_row_sync);
   else
-    av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, plane,
-                          plane + 1, partial_frame);
+    av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd,
+#if LOOP_FILTER_BITMASK
+                          0,
 #endif
+                          plane, plane + 1, partial_frame);
 
   filt_err = aom_get_sse_plane(sd, cm->frame_to_show, plane,
                                cm->seq_params.use_highbitdepth);
