Remove av1_loop_filter_frame()

Replace it with the very similar av1_loop_filter_frame_mt().
Adapt av1_loop_filter_frame_mt() to handle single-threaded filtering.
Remove unused av1_filter_block_plane_horz/vert_test().

Change-Id: Ib4cb9353929521f00a62af52cbb9cd71eacc85c8
diff --git a/av1/common/av1_loopfilter.c b/av1/common/av1_loopfilter.c
index f95bc38..705345c 100644
--- a/av1/common/av1_loopfilter.c
+++ b/av1/common/av1_loopfilter.c
@@ -737,172 +737,3 @@
     }
   }
 }
-
-void av1_filter_block_plane_vert_test(const AV1_COMMON *const cm,
-                                      const MACROBLOCKD *const xd,
-                                      const int plane,
-                                      const MACROBLOCKD_PLANE *const plane_ptr,
-                                      const uint32_t mi_row,
-                                      const uint32_t mi_col) {
-  const uint32_t scale_horz = plane_ptr->subsampling_x;
-  const uint32_t scale_vert = plane_ptr->subsampling_y;
-  const int y_range = cm->mi_params.mi_rows >> scale_vert;
-  const int x_range = cm->mi_params.mi_cols >> scale_horz;
-  for (int y = 0; y < y_range; y++) {
-    for (int x = 0; x < x_range;) {
-      // inner loop always filter vertical edges in a MI block. If MI size
-      // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
-      // If 4x4 transform is used, it will then filter the internal edge
-      //  aligned with a 4x4 block
-      const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
-      const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
-      uint32_t advance_units;
-      TX_SIZE tx_size;
-      AV1_DEBLOCKING_PARAMETERS params;
-      memset(&params, 0, sizeof(params));
-
-      tx_size =
-          set_lpf_parameters(&params, ((ptrdiff_t)1 << scale_horz), cm, xd,
-                             VERT_EDGE, curr_x, curr_y, plane, plane_ptr);
-      if (tx_size == TX_INVALID) {
-        params.filter_length = 0;
-        tx_size = TX_4X4;
-      }
-
-      // advance the destination pointer
-      advance_units = tx_size_wide_unit[tx_size];
-      x += advance_units;
-    }
-  }
-}
-
-void av1_filter_block_plane_horz_test(const AV1_COMMON *const cm,
-                                      const MACROBLOCKD *const xd,
-                                      const int plane,
-                                      const MACROBLOCKD_PLANE *const plane_ptr,
-                                      const uint32_t mi_row,
-                                      const uint32_t mi_col) {
-  const uint32_t scale_horz = plane_ptr->subsampling_x;
-  const uint32_t scale_vert = plane_ptr->subsampling_y;
-  const int y_range = cm->mi_params.mi_rows >> scale_vert;
-  const int x_range = cm->mi_params.mi_cols >> scale_horz;
-  for (int x = 0; x < x_range; x++) {
-    for (int y = 0; y < y_range;) {
-      // inner loop always filter vertical edges in a MI block. If MI size
-      // is 8x8, it will first filter the vertical edge aligned with a 8x8
-      // block. If 4x4 transform is used, it will then filter the internal
-      // edge aligned with a 4x4 block
-      const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
-      const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
-      uint32_t advance_units;
-      TX_SIZE tx_size;
-      AV1_DEBLOCKING_PARAMETERS params;
-      memset(&params, 0, sizeof(params));
-
-      tx_size = set_lpf_parameters(
-          &params, (cm->mi_params.mi_stride << scale_vert), cm, xd, HORZ_EDGE,
-          curr_x, curr_y, plane, plane_ptr);
-      if (tx_size == TX_INVALID) {
-        params.filter_length = 0;
-        tx_size = TX_4X4;
-      }
-
-      // advance the destination pointer
-      advance_units = tx_size_high_unit[tx_size];
-      y += advance_units;
-    }
-  }
-}
-
-static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
-                             MACROBLOCKD *xd, int start, int stop,
-                             int plane_start, int plane_end, int is_realtime) {
-  struct macroblockd_plane *pd = xd->plane;
-  const int col_start = 0;
-  const int col_end = cm->mi_params.mi_cols;
-  int mi_row, mi_col;
-  int plane;
-
-  for (plane = plane_start; plane < plane_end; plane++) {
-    if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1]))
-      break;
-    else if (plane == 1 && !(cm->lf.filter_level_u))
-      continue;
-    else if (plane == 2 && !(cm->lf.filter_level_v))
-      continue;
-    // filter all vertical and horizontal edges in every 128x128 super block
-    for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
-      for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
-        // filter vertical edges
-        av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer, mi_row,
-                             mi_col, plane, plane + 1);
-#if CONFIG_AV1_HIGHBITDEPTH
-        (void)is_realtime;
-        av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row, mi_col);
-#else
-        if (is_realtime && !plane) {
-          av1_filter_block_plane_vert_rt(cm, xd, plane, &pd[plane], mi_row,
-                                         mi_col);
-
-        } else {
-          av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
-                                      mi_col);
-        }
-#endif
-        // filter horizontal edges
-        if (mi_col - MAX_MIB_SIZE >= 0) {
-          av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer,
-                               mi_row, mi_col - MAX_MIB_SIZE, plane, plane + 1);
-#if CONFIG_AV1_HIGHBITDEPTH
-          (void)is_realtime;
-          av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
-                                      mi_col - MAX_MIB_SIZE);
-#else
-          if (is_realtime && !plane) {
-            av1_filter_block_plane_horz_rt(cm, xd, plane, &pd[plane], mi_row,
-                                           mi_col - MAX_MIB_SIZE);
-          } else {
-            av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
-                                        mi_col - MAX_MIB_SIZE);
-          }
-#endif
-        }
-      }
-      // filter horizontal edges
-      av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer, mi_row,
-                           mi_col - MAX_MIB_SIZE, plane, plane + 1);
-#if CONFIG_AV1_HIGHBITDEPTH
-      (void)is_realtime;
-      av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
-                                  mi_col - MAX_MIB_SIZE);
-#else
-      if (is_realtime && !plane) {
-        av1_filter_block_plane_horz_rt(cm, xd, plane, &pd[plane], mi_row,
-                                       mi_col - MAX_MIB_SIZE);
-
-      } else {
-        av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
-                                    mi_col - MAX_MIB_SIZE);
-      }
-#endif
-    }
-  }
-}
-
-void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
-                           MACROBLOCKD *xd, int plane_start, int plane_end,
-                           int partial_frame, int is_realtime) {
-  int start_mi_row, end_mi_row, mi_rows_to_filter;
-
-  start_mi_row = 0;
-  mi_rows_to_filter = cm->mi_params.mi_rows;
-  if (partial_frame && cm->mi_params.mi_rows > 8) {
-    start_mi_row = cm->mi_params.mi_rows >> 1;
-    start_mi_row &= 0xfffffff8;
-    mi_rows_to_filter = AOMMAX(cm->mi_params.mi_rows / 8, 8);
-  }
-  end_mi_row = start_mi_row + mi_rows_to_filter;
-  av1_loop_filter_frame_init(cm, plane_start, plane_end);
-  loop_filter_rows(frame, cm, xd, start_mi_row, end_mi_row, plane_start,
-                   plane_end, is_realtime);
-}
diff --git a/av1/common/av1_loopfilter.h b/av1/common/av1_loopfilter.h
index 977b126..1bd00b4 100644
--- a/av1/common/av1_loopfilter.h
+++ b/av1/common/av1_loopfilter.h
@@ -90,15 +90,6 @@
 void av1_loop_filter_frame_init(struct AV1Common *cm, int plane_start,
                                 int plane_end);
 
-/*!\brief Apply AV1 loop filter
- *
- * \ingroup in_loop_filter
- * \callgraph
- */
-void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
-                           struct macroblockd *xd, int plane_start,
-                           int plane_end, int partial_frame, int is_realtime);
-
 void av1_filter_block_plane_vert(const struct AV1Common *const cm,
                                  const MACROBLOCKD *const xd, const int plane,
                                  const MACROBLOCKD_PLANE *const plane_ptr,
diff --git a/av1/common/thread_common.c b/av1/common/thread_common.c
index 070e641..7b182e1 100644
--- a/av1/common/thread_common.c
+++ b/av1/common/thread_common.c
@@ -261,23 +261,20 @@
 #endif  // CONFIG_MULTITHREAD
 }
 
-static void enqueue_lf_jobs(AV1LfSync *lf_sync, AV1_COMMON *cm, int start,
-                            int stop, int plane_start, int plane_end,
-                            int is_realtime) {
+static void enqueue_lf_jobs(AV1LfSync *lf_sync, int start, int stop,
+                            const int planes_to_lf[3], int is_realtime) {
   int mi_row, plane, dir;
   AV1LfMTInfo *lf_job_queue = lf_sync->job_queue;
   lf_sync->jobs_enqueued = 0;
   lf_sync->jobs_dequeued = 0;
 
-  for (dir = 0; dir < 2; dir++) {
-    for (plane = plane_start; plane < plane_end; plane++) {
-      if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1]))
-        break;
-      else if (plane == 1 && !(cm->lf.filter_level_u))
-        continue;
-      else if (plane == 2 && !(cm->lf.filter_level_v))
-        continue;
-      for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
+  // Launch all vertical jobs first, as they are blocking the horizontal ones.
+  // Launch top row jobs for all planes first, in case the output can be
+  // partially reconstructed row by row.
+  for (dir = 0; dir < 2; ++dir) {
+    for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
+      for (plane = 0; plane < 3; ++plane) {
+        if (!planes_to_lf[plane]) continue;
         lf_job_queue->mi_row = mi_row;
         lf_job_queue->plane = plane;
         lf_job_queue->dir = dir;
@@ -308,82 +305,67 @@
   return cur_job_info;
 }
 
-// Implement row loopfiltering for each thread.
+// One job of row loopfiltering.
 static INLINE void thread_loop_filter_rows(
     const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm,
-    struct macroblockd_plane *planes, MACROBLOCKD *xd,
-    AV1LfSync *const lf_sync) {
+    struct macroblockd_plane *planes, MACROBLOCKD *xd, int mi_row, int plane,
+    int dir, int is_realtime, AV1LfSync *const lf_sync) {
   const int sb_cols =
       ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols, MAX_MIB_SIZE_LOG2) >>
       MAX_MIB_SIZE_LOG2;
-  int mi_row, mi_col, plane, dir, is_realtime;
-  int r, c;
+  const int r = mi_row >> MAX_MIB_SIZE_LOG2;
+  int mi_col, c;
 
-  while (1) {
-    AV1LfMTInfo *cur_job_info = get_lf_job_info(lf_sync);
+  if (dir == 0) {
+    for (mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += MAX_MIB_SIZE) {
+      c = mi_col >> MAX_MIB_SIZE_LOG2;
 
-    if (cur_job_info != NULL) {
-      mi_row = cur_job_info->mi_row;
-      plane = cur_job_info->plane;
-      dir = cur_job_info->dir;
-      r = mi_row >> MAX_MIB_SIZE_LOG2;
-      is_realtime = cur_job_info->is_realtime && !plane;
-
-      if (dir == 0) {
-        for (mi_col = 0; mi_col < cm->mi_params.mi_cols;
-             mi_col += MAX_MIB_SIZE) {
-          c = mi_col >> MAX_MIB_SIZE_LOG2;
-
-          av1_setup_dst_planes(planes, cm->seq_params->sb_size, frame_buffer,
-                               mi_row, mi_col, plane, plane + 1);
+      av1_setup_dst_planes(planes, cm->seq_params->sb_size, frame_buffer,
+                           mi_row, mi_col, plane, plane + 1);
 #if CONFIG_AV1_HIGHBITDEPTH
-          (void)is_realtime;
-          av1_filter_block_plane_vert(cm, xd, plane, &planes[plane], mi_row,
-                                      mi_col);
+      (void)is_realtime;
+      av1_filter_block_plane_vert(cm, xd, plane, &planes[plane], mi_row,
+                                  mi_col);
 #else
-          if (is_realtime) {
-            av1_filter_block_plane_vert_rt(cm, xd, plane, &planes[plane],
-                                           mi_row, mi_col);
+      if (is_realtime) {
+        av1_filter_block_plane_vert_rt(cm, xd, plane, &planes[plane], mi_row,
+                                       mi_col);
 
-          } else {
-            av1_filter_block_plane_vert(cm, xd, plane, &planes[plane], mi_row,
-                                        mi_col);
-          }
-#endif
-          sync_write(lf_sync, r, c, sb_cols, plane);
-        }
-      } else if (dir == 1) {
-        for (mi_col = 0; mi_col < cm->mi_params.mi_cols;
-             mi_col += MAX_MIB_SIZE) {
-          c = mi_col >> MAX_MIB_SIZE_LOG2;
-
-          // Wait for vertical edge filtering of the top-right block to be
-          // completed
-          sync_read(lf_sync, r, c, plane);
-
-          // Wait for vertical edge filtering of the right block to be
-          // completed
-          sync_read(lf_sync, r + 1, c, plane);
-
-          av1_setup_dst_planes(planes, cm->seq_params->sb_size, frame_buffer,
-                               mi_row, mi_col, plane, plane + 1);
-#if CONFIG_AV1_HIGHBITDEPTH
-          (void)is_realtime;
-          av1_filter_block_plane_horz(cm, xd, plane, &planes[plane], mi_row,
-                                      mi_col);
-#else
-          if (is_realtime) {
-            av1_filter_block_plane_horz_rt(cm, xd, plane, &planes[plane],
-                                           mi_row, mi_col);
-          } else {
-            av1_filter_block_plane_horz(cm, xd, plane, &planes[plane], mi_row,
-                                        mi_col);
-          }
-#endif
-        }
+      } else {
+        av1_filter_block_plane_vert(cm, xd, plane, &planes[plane], mi_row,
+                                    mi_col);
       }
-    } else {
-      break;
+#endif
+      if (lf_sync != NULL) sync_write(lf_sync, r, c, sb_cols, plane);
+    }
+  } else if (dir == 1) {
+    for (mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += MAX_MIB_SIZE) {
+      c = mi_col >> MAX_MIB_SIZE_LOG2;
+
+      if (lf_sync != NULL) {
+        // Wait for vertical edge filtering of the top-right block to be
+        // completed
+        sync_read(lf_sync, r, c, plane);
+
+        // Wait for vertical edge filtering of the right block to be completed
+        sync_read(lf_sync, r + 1, c, plane);
+      }
+
+      av1_setup_dst_planes(planes, cm->seq_params->sb_size, frame_buffer,
+                           mi_row, mi_col, plane, plane + 1);
+#if CONFIG_AV1_HIGHBITDEPTH
+      (void)is_realtime;
+      av1_filter_block_plane_horz(cm, xd, plane, &planes[plane], mi_row,
+                                  mi_col);
+#else
+      if (is_realtime) {
+        av1_filter_block_plane_horz_rt(cm, xd, plane, &planes[plane], mi_row,
+                                       mi_col);
+      } else {
+        av1_filter_block_plane_horz(cm, xd, plane, &planes[plane], mi_row,
+                                    mi_col);
+      }
+#endif
     }
   }
 }
@@ -392,22 +374,27 @@
 static int loop_filter_row_worker(void *arg1, void *arg2) {
   AV1LfSync *const lf_sync = (AV1LfSync *)arg1;
   LFWorkerData *const lf_data = (LFWorkerData *)arg2;
-  thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
-                          lf_data->xd, lf_sync);
+  AV1LfMTInfo *cur_job_info;
+  while ((cur_job_info = get_lf_job_info(lf_sync)) != NULL) {
+    const int is_realtime = cur_job_info->is_realtime && !cur_job_info->plane;
+    thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+                            lf_data->xd, cur_job_info->mi_row,
+                            cur_job_info->plane, cur_job_info->dir, is_realtime,
+                            lf_sync);
+  }
   return 1;
 }
 
 static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                                 MACROBLOCKD *xd, int start, int stop,
-                                int plane_start, int plane_end,
-                                AVxWorker *workers, int nworkers,
-                                AV1LfSync *lf_sync, int is_realtime) {
+                                const int planes_to_lf[3], AVxWorker *workers,
+                                int num_workers, AV1LfSync *lf_sync,
+                                int is_realtime) {
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
   // Number of superblock rows and cols
   const int sb_rows =
       ALIGN_POWER_OF_TWO(cm->mi_params.mi_rows, MAX_MIB_SIZE_LOG2) >>
       MAX_MIB_SIZE_LOG2;
-  const int num_workers = nworkers;
   int i;
 
   if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
@@ -422,8 +409,7 @@
            sizeof(*(lf_sync->cur_sb_col[i])) * sb_rows);
   }
 
-  enqueue_lf_jobs(lf_sync, cm, start, stop, plane_start, plane_end,
-                  is_realtime);
+  enqueue_lf_jobs(lf_sync, start, stop, planes_to_lf, is_realtime);
 
   // Set up loopfilter thread data.
   for (i = num_workers - 1; i >= 0; --i) {
@@ -451,12 +437,40 @@
   }
 }
 
+static void loop_filter_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
+                             MACROBLOCKD *xd, int start, int stop,
+                             const int planes_to_lf[3], int is_realtime) {
+  // Filter top rows of all planes first, in case the output can be partially
+  // reconstructed row by row.
+  int mi_row, plane, dir;
+  for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
+    for (plane = 0; plane < 3; ++plane) {
+      if (!planes_to_lf[plane]) continue;
+      for (dir = 0; dir < 2; ++dir) {
+        thread_loop_filter_rows(frame, cm, xd->plane, xd, mi_row, plane, dir,
+                                is_realtime && !plane, /*lf_sync=*/NULL);
+      }
+    }
+  }
+}
+
 void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                               MACROBLOCKD *xd, int plane_start, int plane_end,
                               int partial_frame, AVxWorker *workers,
                               int num_workers, AV1LfSync *lf_sync,
                               int is_realtime) {
   int start_mi_row, end_mi_row, mi_rows_to_filter;
+  int planes_to_lf[3];
+
+  // For each luma and chroma plane, whether to filter it or not.
+  planes_to_lf[0] = (cm->lf.filter_level[0] || cm->lf.filter_level[1]) &&
+                    plane_start <= 0 && 0 < plane_end;
+  planes_to_lf[1] = cm->lf.filter_level_u && plane_start <= 1 && 1 < plane_end;
+  planes_to_lf[2] = cm->lf.filter_level_v && plane_start <= 2 && 2 < plane_end;
+  // If the luma plane is purposely not filtered, the chroma planes neither.
+  if (!planes_to_lf[0] && plane_start <= 0 && 0 < plane_end) return;
+  // Early exit.
+  if (!planes_to_lf[0] && !planes_to_lf[1] && !planes_to_lf[2]) return;
 
   start_mi_row = 0;
   mi_rows_to_filter = cm->mi_params.mi_rows;
@@ -467,8 +481,16 @@
   }
   end_mi_row = start_mi_row + mi_rows_to_filter;
   av1_loop_filter_frame_init(cm, plane_start, plane_end);
-  loop_filter_rows_mt(frame, cm, xd, start_mi_row, end_mi_row, plane_start,
-                      plane_end, workers, num_workers, lf_sync, is_realtime);
+
+  if (num_workers > 1) {
+    // Enqueue and execute loopfiltering jobs.
+    loop_filter_rows_mt(frame, cm, xd, start_mi_row, end_mi_row, planes_to_lf,
+                        workers, num_workers, lf_sync, is_realtime);
+  } else {
+    // Directly filter in the main thread.
+    loop_filter_rows(frame, cm, xd, start_mi_row, end_mi_row, planes_to_lf,
+                     is_realtime);
+  }
 }
 
 #if !CONFIG_REALTIME_ONLY
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 51c95e2..309a458 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -5230,14 +5230,9 @@
 
   if (!cm->features.allow_intrabc && !tiles->single_tile_decoding) {
     if (cm->lf.filter_level[0] || cm->lf.filter_level[1]) {
-      if (pbi->num_workers > 1) {
-        av1_loop_filter_frame_mt(&cm->cur_frame->buf, cm, &pbi->dcb.xd, 0,
-                                 num_planes, 0, pbi->tile_workers,
-                                 pbi->num_workers, &pbi->lf_row_sync, 0);
-      } else {
-        av1_loop_filter_frame(&cm->cur_frame->buf, cm, &pbi->dcb.xd, 0,
-                              num_planes, 0, 0);
-      }
+      av1_loop_filter_frame_mt(&cm->cur_frame->buf, cm, &pbi->dcb.xd, 0,
+                               num_planes, 0, pbi->tile_workers,
+                               pbi->num_workers, &pbi->lf_row_sync, 0);
     }
 
     const int do_cdef =
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 5e33e48..3d9afeb 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2228,13 +2228,9 @@
 
   if ((lf->filter_level[0] || lf->filter_level[1]) &&
       !cpi->sf.rt_sf.skip_loopfilter_non_reference) {
-    if (num_workers > 1)
-      av1_loop_filter_frame_mt(&cm->cur_frame->buf, cm, xd, 0, num_planes, 0,
-                               mt_info->workers, num_workers,
-                               &mt_info->lf_row_sync, is_realtime);
-    else
-      av1_loop_filter_frame(&cm->cur_frame->buf, cm, xd, 0, num_planes, 0,
-                            is_realtime);
+    av1_loop_filter_frame_mt(&cm->cur_frame->buf, cm, xd, 0, num_planes, 0,
+                             mt_info->workers, num_workers,
+                             &mt_info->lf_row_sync, is_realtime);
   }
 #if CONFIG_COLLECT_COMPONENT_TIMING
   end_timing(cpi, loop_filter_time);
diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c
index c44038c..7608749 100644
--- a/av1/encoder/picklpf.c
+++ b/av1/encoder/picklpf.c
@@ -69,15 +69,9 @@
     case 2: cm->lf.filter_level_v = filter_level[0]; break;
   }
 
-  // TODO(any): please enable multi-thread and remove the flag when loop
-  // filter mask is compatible with multi-thread.
-  if (num_workers > 1)
-    av1_loop_filter_frame_mt(&cm->cur_frame->buf, cm, &cpi->td.mb.e_mbd, plane,
-                             plane + 1, partial_frame, mt_info->workers,
-                             num_workers, &mt_info->lf_row_sync, 0);
-  else
-    av1_loop_filter_frame(&cm->cur_frame->buf, cm, &cpi->td.mb.e_mbd, plane,
-                          plane + 1, partial_frame, 0);
+  av1_loop_filter_frame_mt(&cm->cur_frame->buf, cm, &cpi->td.mb.e_mbd, plane,
+                           plane + 1, partial_frame, mt_info->workers,
+                           num_workers, &mt_info->lf_row_sync, 0);
 
   filt_err = aom_get_sse_plane(sd, &cm->cur_frame->buf, plane,
                                cm->seq_params->use_highbitdepth);