Dual deblocking filter strength thresholds

A new experiment for deblocking filter that separates vertical
and horizontal filter strengths. This experiment is based on the
assumption that non-flatness characteristics of vertical and
horizontal direction may differ. Therefore selecting different
filter strengths for vertical and horizontal can improve deblocking
performance.

The process of finding proper filter strength:
1. Search through the filter level under the constraint that
   (vertical == horizontal), and find the best solution.
2. Fix vertical level as the best solution found in step 1 and vary
   horizontal level to find the best value.
3. Fix the selected horizontal level, vary vertical level to find
   its best value.

The experiment is working with UV_LVL, sharing the same config flag.
The searching for horizontal and vertical filter strength only applies
on Y plane for now.

The experimental flag should be changed to filter_level later.

Change-Id: I164eec8d3ccb3da7ff109c5c55f4b52c1536ddf1
diff --git a/av1/common/av1_loopfilter.c b/av1/common/av1_loopfilter.c
index 2cbc0b0..7da2ae9 100644
--- a/av1/common/av1_loopfilter.c
+++ b/av1/common/av1_loopfilter.c
@@ -595,6 +595,9 @@
 #if CONFIG_EXT_DELTA_Q
 static uint8_t get_filter_level(const AV1_COMMON *cm,
                                 const loop_filter_info_n *lfi_n,
+#if CONFIG_UV_LVL
+                                const int dir_idx,
+#endif
                                 const MB_MODE_INFO *mbmi) {
 #if CONFIG_SUPERTX
   const int segment_id = AOMMIN(mbmi->segment_id, mbmi->segment_id_supertx);
@@ -606,8 +609,14 @@
   const int segment_id = mbmi->segment_id;
 #endif  // CONFIG_SUPERTX
   if (cm->delta_lf_present_flag) {
+#if CONFIG_UV_LVL
+    int lvl_seg =
+        clamp(mbmi->current_delta_lf_from_base + cm->lf.filter_level[dir_idx],
+              0, MAX_LOOP_FILTER);
+#else
     int lvl_seg = clamp(mbmi->current_delta_lf_from_base + cm->lf.filter_level,
                         0, MAX_LOOP_FILTER);
+#endif
     const int scale = 1 << (lvl_seg >> 5);
     if (segfeature_active(&cm->seg, segment_id, SEG_LVL_ALT_LF)) {
       const int data = get_segdata(&cm->seg, segment_id, SEG_LVL_ALT_LF);
@@ -624,7 +633,12 @@
     }
     return lvl_seg;
   } else {
+#if CONFIG_UV_LVL
+    return lfi_n
+        ->lvl[segment_id][dir_idx][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
+#else
     return lfi_n->lvl[segment_id][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
+#endif
   }
 }
 #else
@@ -658,12 +672,13 @@
     memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
 }
 
-void av1_loop_filter_frame_init(AV1_COMMON *cm, int default_filt_lvl) {
+void av1_loop_filter_frame_init(AV1_COMMON *cm, int default_filt_lvl,
+                                int default_filt_lvl_r) {
   int seg_id;
   // n_shift is the multiplier for lf_deltas
   // the multiplier is 1 for when filter_lvl is between 0 and 31;
   // 2 when filter_lvl is between 32 and 63
-  const int scale = 1 << (default_filt_lvl >> 5);
+  int scale = 1 << (default_filt_lvl >> 5);
   loop_filter_info_n *const lfi = &cm->lf_info;
   struct loopfilter *const lf = &cm->lf;
   const struct segmentation *const seg = &cm->seg;
@@ -689,6 +704,26 @@
       memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
     } else {
       int ref, mode;
+#if CONFIG_UV_LVL
+      for (int dir = 0; dir < 2; ++dir) {
+        lvl_seg = (dir == 0) ? default_filt_lvl : default_filt_lvl_r;
+        scale = 1 << (lvl_seg >> 5);
+
+        const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
+        lfi->lvl[seg_id][dir][INTRA_FRAME][0] =
+            clamp(intra_lvl, 0, MAX_LOOP_FILTER);
+
+        for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) {
+          for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
+            const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
+                                  lf->mode_deltas[mode] * scale;
+            lfi->lvl[seg_id][dir][ref][mode] =
+                clamp(inter_lvl, 0, MAX_LOOP_FILTER);
+          }
+        }
+      }
+#else
+      (void)default_filt_lvl_r;
       const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
       lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
 
@@ -699,6 +734,7 @@
           lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
         }
       }
+#endif
     }
   }
 }
@@ -1394,7 +1430,11 @@
   const TX_SIZE tx_size_uv_above =
       txsize_vert_map[uv_txsize_lookup[block_size][mbmi->tx_size][1][1]];
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_UV_LVL
+  const int filter_level = get_filter_level(cm, lfi_n, 0, mbmi);
+#else
   const int filter_level = get_filter_level(cm, lfi_n, mbmi);
+#endif
 #else
   const int filter_level = get_filter_level(lfi_n, mbmi);
   (void)cm;
@@ -1488,7 +1528,11 @@
   const BLOCK_SIZE block_size = mbmi->sb_type;
 #endif
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_UV_LVL
+  const int filter_level = get_filter_level(cm, lfi_n, 0, mbmi);
+#else
   const int filter_level = get_filter_level(cm, lfi_n, mbmi);
+#endif
 #else
   const int filter_level = get_filter_level(lfi_n, mbmi);
   (void)cm;
@@ -2093,7 +2137,12 @@
 
 // Filter level can vary per MI
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_UV_LVL
+    if (!(lfl_r[c_step] = get_filter_level(cm, &cm->lf_info, 0, mbmi)))
+      continue;
+#else
     if (!(lfl_r[c_step] = get_filter_level(cm, &cm->lf_info, mbmi))) continue;
+#endif
 #else
     if (!(lfl_r[c_step] = get_filter_level(&cm->lf_info, mbmi))) continue;
 #endif
@@ -2786,7 +2835,12 @@
                                plane_ptr, scale_horz, scale_vert);
 
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_UV_LVL
+    const uint32_t curr_level =
+        get_filter_level(cm, &cm->lf_info, edge_dir, mbmi);
+#else
     const uint32_t curr_level = get_filter_level(cm, &cm->lf_info, mbmi);
+#endif
 #else
     const uint32_t curr_level = get_filter_level(&cm->lf_info, mbmi);
 #endif  // CONFIG_EXT_DELTA_Q
@@ -2818,8 +2872,13 @@
                                      plane_ptr, scale_horz, scale_vert);
 
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_UV_LVL
+          const uint32_t pv_lvl =
+              get_filter_level(cm, &cm->lf_info, edge_dir, &mi_prev->mbmi);
+#else
           const uint32_t pv_lvl =
               get_filter_level(cm, &cm->lf_info, &mi_prev->mbmi);
+#endif
 #else
           const uint32_t pv_lvl =
               get_filter_level(&cm->lf_info, &mi_prev->mbmi);
@@ -3392,13 +3451,25 @@
 }
 
 void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
-                           MACROBLOCKD *xd, int frame_filter_level, int y_only,
-                           int partial_frame) {
+                           MACROBLOCKD *xd, int frame_filter_level,
+#if CONFIG_UV_LVL
+                           int frame_filter_level_r,
+#endif
+                           int y_only, int partial_frame) {
   int start_mi_row, end_mi_row, mi_rows_to_filter;
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_UV_LVL
+  int orig_filter_level[2] = { cm->lf.filter_level[0], cm->lf.filter_level[1] };
+#else
   int orig_filter_level = cm->lf.filter_level;
 #endif
+#endif
+
+#if CONFIG_UV_LVL
+  if (!frame_filter_level && !frame_filter_level_r) return;
+#else
   if (!frame_filter_level) return;
+#endif
   start_mi_row = 0;
   mi_rows_to_filter = cm->mi_rows;
   if (partial_frame && cm->mi_rows > 8) {
@@ -3407,14 +3478,29 @@
     mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
   }
   end_mi_row = start_mi_row + mi_rows_to_filter;
-  av1_loop_filter_frame_init(cm, frame_filter_level);
+#if CONFIG_UV_LVL
+  av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level_r);
+#else
+  av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level);
+#endif
+
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_UV_LVL
+  cm->lf.filter_level[0] = frame_filter_level;
+  cm->lf.filter_level[1] = frame_filter_level_r;
+#else
   cm->lf.filter_level = frame_filter_level;
 #endif
+#endif
   av1_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only);
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_UV_LVL
+  cm->lf.filter_level[0] = orig_filter_level[0];
+  cm->lf.filter_level[1] = orig_filter_level[1];
+#else
   cm->lf.filter_level = orig_filter_level;
 #endif
+#endif
 }
 
 void av1_loop_filter_data_reset(
diff --git a/av1/common/av1_loopfilter.h b/av1/common/av1_loopfilter.h
index 043081e..39ff2f4 100644
--- a/av1/common/av1_loopfilter.h
+++ b/av1/common/av1_loopfilter.h
@@ -36,10 +36,12 @@
 };
 
 struct loopfilter {
-  int filter_level;
 #if CONFIG_UV_LVL
+  int filter_level[2];
   int filter_level_u;
   int filter_level_v;
+#else
+  int filter_level;
 #endif
 
   int sharpness_level;
@@ -69,7 +71,11 @@
 
 typedef struct {
   loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1];
+#if CONFIG_UV_LVL
+  uint8_t lvl[MAX_SEGMENTS][2][TOTAL_REFS_PER_FRAME][MAX_MODE_LF_DELTAS];
+#else
   uint8_t lvl[MAX_SEGMENTS][TOTAL_REFS_PER_FRAME][MAX_MODE_LF_DELTAS];
+#endif
 } loop_filter_info_n;
 
 // This structure holds bit masks for all 8x8 blocks in a 64x64 region.
@@ -132,10 +138,14 @@
 // This should be called before av1_loop_filter_rows(),
 // av1_loop_filter_frame()
 // calls this function directly.
-void av1_loop_filter_frame_init(struct AV1Common *cm, int default_filt_lvl);
+void av1_loop_filter_frame_init(struct AV1Common *cm, int default_filt_lvl,
+                                int default_filt_lvl_r);
 
 void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
                            struct macroblockd *mbd, int filter_level,
+#if CONFIG_UV_LVL
+                           int filter_level_r,
+#endif
                            int y_only, int partial_frame);
 
 // Apply the loop filter to [start, stop) macro block rows in frame_buffer.
diff --git a/av1/common/thread_common.c b/av1/common/thread_common.c
index d96a71a..3ee85e9 100644
--- a/av1/common/thread_common.c
+++ b/av1/common/thread_common.c
@@ -416,8 +416,11 @@
 
 void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                               struct macroblockd_plane planes[MAX_MB_PLANE],
-                              int frame_filter_level, int y_only,
-                              int partial_frame, AVxWorker *workers,
+                              int frame_filter_level,
+#if CONFIG_UV_LVL
+                              int frame_filter_level_r,
+#endif
+                              int y_only, int partial_frame, AVxWorker *workers,
                               int num_workers, AV1LfSync *lf_sync) {
   int start_mi_row, end_mi_row, mi_rows_to_filter;
 
@@ -431,8 +434,11 @@
     mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
   }
   end_mi_row = start_mi_row + mi_rows_to_filter;
-  av1_loop_filter_frame_init(cm, frame_filter_level);
-
+#if CONFIG_UV_LVL
+  av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level_r);
+#else
+  av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level);
+#endif  // CONFIG_UV_LVL
   loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, y_only,
                       workers, num_workers, lf_sync);
 }
diff --git a/av1/common/thread_common.h b/av1/common/thread_common.h
index 7b57ae8..bbb35b8 100644
--- a/av1/common/thread_common.h
+++ b/av1/common/thread_common.h
@@ -50,8 +50,11 @@
 // Multi-threaded loopfilter that uses the tile threads.
 void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
                               struct macroblockd_plane planes[MAX_MB_PLANE],
-                              int frame_filter_level, int y_only,
-                              int partial_frame, AVxWorker *workers,
+                              int frame_filter_level,
+#if CONFIG_UV_LVL
+                              int frame_filter_level_r,
+#endif
+                              int y_only, int partial_frame, AVxWorker *workers,
                               int num_workers, AV1LfSync *lf_sync);
 
 void av1_accumulate_frame_counts(struct FRAME_COUNTS *acc_counts,
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index fc270f6..5f9a8da 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -2842,12 +2842,15 @@
 
 static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
   struct loopfilter *lf = &cm->lf;
-  lf->filter_level = aom_rb_read_literal(rb, 6);
 #if CONFIG_UV_LVL
-  if (lf->filter_level > 0) {
+  lf->filter_level[0] = aom_rb_read_literal(rb, 6);
+  lf->filter_level[1] = aom_rb_read_literal(rb, 6);
+  if (lf->filter_level[0] || lf->filter_level[1]) {
     lf->filter_level_u = aom_rb_read_literal(rb, 6);
     lf->filter_level_v = aom_rb_read_literal(rb, 6);
   }
+#else
+  lf->filter_level = aom_rb_read_literal(rb, 6);
 #endif
   lf->sharpness_level = aom_rb_read_literal(rb, 3);
 
@@ -3808,13 +3811,13 @@
 #if CONFIG_VAR_TX || CONFIG_CB4X4
 // Loopfilter the whole frame.
 #if CONFIG_UV_LVL
-  if (cm->lf.filter_level > 0) {
+  if (cm->lf.filter_level[0] || cm->lf.filter_level[1]) {
     av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
-                          cm->lf.filter_level, 0, 0);
+                          cm->lf.filter_level[0], cm->lf.filter_level[1], 0, 0);
     av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
-                          cm->lf.filter_level_u, 1, 0);
+                          cm->lf.filter_level_u, cm->lf.filter_level_u, 1, 0);
     av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
-                          cm->lf.filter_level_v, 2, 0);
+                          cm->lf.filter_level_v, cm->lf.filter_level_v, 2, 0);
   }
 #else
   av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
@@ -4340,7 +4343,12 @@
     ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show);
     unlock_buffer_pool(pool);
 
+#if CONFIG_UV_LVL
+    cm->lf.filter_level[0] = 0;
+    cm->lf.filter_level[1] = 0;
+#else
     cm->lf.filter_level = 0;
+#endif
     cm->show_frame = 1;
     pbi->refresh_frame_flags = 0;
 
@@ -5265,9 +5273,17 @@
     aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
                        "Decode failed. Frame data header is corrupted.");
 
-  if (cm->lf.filter_level && !cm->skip_loop_filter) {
-    av1_loop_filter_frame_init(cm, cm->lf.filter_level);
+#if CONFIG_UV_LVL
+  if ((cm->lf.filter_level[0] || cm->lf.filter_level[1]) &&
+      !cm->skip_loop_filter) {
+    av1_loop_filter_frame_init(cm, cm->lf.filter_level[0],
+                               cm->lf.filter_level[1]);
   }
+#else
+  if (cm->lf.filter_level && !cm->skip_loop_filter) {
+    av1_loop_filter_frame_init(cm, cm->lf.filter_level, cm->lf.filter_level);
+  }
+#endif
 
   // If encoded in frame parallel mode, frame context is ready after decoding
   // the frame header.
@@ -5303,11 +5319,18 @@
     *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
     if (!xd->corrupted) {
       if (!cm->skip_loop_filter) {
-        // If multiple threads are used to decode tiles, then we use those
-        // threads to do parallel loopfiltering.
+// If multiple threads are used to decode tiles, then we use those
+// threads to do parallel loopfiltering.
+#if CONFIG_UV_LVL
+        av1_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane,
+                                 cm->lf.filter_level[0], cm->lf.filter_level[1],
+                                 0, 0, pbi->tile_workers, pbi->num_tile_workers,
+                                 &pbi->lf_row_sync);
+#else
         av1_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level,
                                  0, 0, pbi->tile_workers, pbi->num_tile_workers,
                                  &pbi->lf_row_sync);
+#endif  // CONFIG_UV_LVL
       }
     } else {
       aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
diff --git a/av1/decoder/dthread.c b/av1/decoder/dthread.c
index 50f8ed1..b263169 100644
--- a/av1/decoder/dthread.c
+++ b/av1/decoder/dthread.c
@@ -181,7 +181,12 @@
   memcpy(dst_cm->lf_info.lfthr, src_cm->lf_info.lfthr,
          (MAX_LOOP_FILTER + 1) * sizeof(loop_filter_thresh));
   dst_cm->lf.last_sharpness_level = src_cm->lf.sharpness_level;
+#if CONFIG_UV_LVL
+  dst_cm->lf.filter_level[0] = src_cm->lf.filter_level[0];
+  dst_cm->lf.filter_level[1] = src_cm->lf.filter_level[1];
+#else
   dst_cm->lf.filter_level = src_cm->lf.filter_level;
+#endif
   memcpy(dst_cm->lf.ref_deltas, src_cm->lf.ref_deltas, TOTAL_REFS_PER_FRAME);
   memcpy(dst_cm->lf.mode_deltas, src_cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
   dst_cm->seg = src_cm->seg;
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 4773be3..4663e82 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -3418,13 +3418,16 @@
   int i;
   struct loopfilter *lf = &cm->lf;
 
-  // Encode the loop filter level and type
-  aom_wb_write_literal(wb, lf->filter_level, 6);
+// Encode the loop filter level and type
 #if CONFIG_UV_LVL
-  if (lf->filter_level > 0) {
+  aom_wb_write_literal(wb, lf->filter_level[0], 6);
+  aom_wb_write_literal(wb, lf->filter_level[1], 6);
+  if (lf->filter_level[0] || lf->filter_level[1]) {
     aom_wb_write_literal(wb, lf->filter_level_u, 6);
     aom_wb_write_literal(wb, lf->filter_level_v, 6);
   }
+#else
+  aom_wb_write_literal(wb, lf->filter_level, 6);
 #endif
   aom_wb_write_literal(wb, lf->sharpness_level, 3);
 
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 76b230f..d37b3ff 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -4134,7 +4134,12 @@
 #endif  // CONFIG_EXT_TILE
 
   if (no_loopfilter) {
+#if CONFIG_UV_LVL
+    lf->filter_level[0] = 0;
+    lf->filter_level[1] = 0;
+#else
     lf->filter_level = 0;
+#endif
   } else {
     struct aom_usec_timer timer;
 
@@ -4148,12 +4153,21 @@
     cpi->time_pick_lpf += aom_usec_timer_elapsed(&timer);
   }
 
-  if (lf->filter_level > 0) {
+#if CONFIG_UV_LVL
+  if (lf->filter_level[0] || lf->filter_level[1])
+#else
+  if (lf->filter_level > 0)
+#endif
+  {
 #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_CB4X4
 #if CONFIG_UV_LVL
-    av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
-    av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level_u, 1, 0);
-    av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level_v, 2, 0);
+    av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level[0],
+                          lf->filter_level[1], 0, 0);
+    av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level_u,
+                          lf->filter_level_u, 1, 0);
+    av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level_v,
+                          lf->filter_level_v, 2, 0);
+
 #else
     av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
 #endif  // CONFIG_UV_LVL
diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c
index da15d6a..73cf256 100644
--- a/av1/encoder/picklpf.c
+++ b/av1/encoder/picklpf.c
@@ -51,7 +51,7 @@
                                 int partial_frame
 #if CONFIG_UV_LVL
                                 ,
-                                int plane
+                                int plane, int dir
 #endif
                                 ) {
   AV1_COMMON *const cm = &cpi->common;
@@ -60,8 +60,12 @@
 #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_CB4X4
 #if CONFIG_UV_LVL
   assert(plane >= 0 && plane <= 2);
-  av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
-                        plane, partial_frame);
+  int filter_level[2] = { filt_level, filt_level };
+  if (plane == 0 && dir == 0) filter_level[1] = cm->lf.filter_level[1];
+  if (plane == 0 && dir == 1) filter_level[0] = cm->lf.filter_level[0];
+
+  av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd,
+                        filter_level[0], filter_level[1], plane, partial_frame);
 #else
   av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, 1,
                         partial_frame);
@@ -100,7 +104,7 @@
                             int partial_frame, double *best_cost_ret
 #if CONFIG_UV_LVL
                             ,
-                            int plane
+                            int plane, int dir
 #endif
                             ) {
   const AV1_COMMON *const cm = &cpi->common;
@@ -117,7 +121,7 @@
 #if CONFIG_UV_LVL
   int lvl;
   switch (plane) {
-    case 0: lvl = lf->filter_level; break;
+    case 0: lvl = (dir == 1) ? lf->filter_level[1] : lf->filter_level[0]; break;
     case 1: lvl = lf->filter_level_u; break;
     case 2: lvl = lf->filter_level_v; break;
     default: assert(plane >= 0 && plane <= 2); return 0;
@@ -141,7 +145,7 @@
 #endif  // CONFIG_UV_LVL
 
 #if CONFIG_UV_LVL
-  best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame, plane);
+  best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame, plane, dir);
 #else
   best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame);
 #endif  // CONFIG_UV_LVL
@@ -166,7 +170,7 @@
       if (ss_err[filt_low] < 0) {
 #if CONFIG_UV_LVL
         ss_err[filt_low] =
-            try_filter_frame(sd, cpi, filt_low, partial_frame, plane);
+            try_filter_frame(sd, cpi, filt_low, partial_frame, plane, dir);
 #else
         ss_err[filt_low] = try_filter_frame(sd, cpi, filt_low, partial_frame);
 #endif  // CONFIG_UV_LVL
@@ -187,7 +191,7 @@
       if (ss_err[filt_high] < 0) {
 #if CONFIG_UV_LVL
         ss_err[filt_high] =
-            try_filter_frame(sd, cpi, filt_high, partial_frame, plane);
+            try_filter_frame(sd, cpi, filt_high, partial_frame, plane, dir);
 #else
         ss_err[filt_high] = try_filter_frame(sd, cpi, filt_high, partial_frame);
 #endif  // CONFIG_UV_LVL
@@ -224,8 +228,13 @@
 
   lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness;
 
-  if (method == LPF_PICK_MINIMAL_LPF && lf->filter_level) {
+  if (method == LPF_PICK_MINIMAL_LPF) {
+#if CONFIG_UV_LVL
+    lf->filter_level[0] = 0;
+    lf->filter_level[1] = 0;
+#else
     lf->filter_level = 0;
+#endif
   } else if (method >= LPF_PICK_FROM_Q) {
     const int min_filter_level = 0;
     const int max_filter_level = av1_get_max_filter_level(cpi);
@@ -254,15 +263,25 @@
     int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
 #endif  // CONFIG_HIGHBITDEPTH
     if (cm->frame_type == KEY_FRAME) filt_guess -= 4;
+#if CONFIG_UV_LVL
+    lf->filter_level[0] = clamp(filt_guess, min_filter_level, max_filter_level);
+    lf->filter_level[1] = clamp(filt_guess, min_filter_level, max_filter_level);
+#else
     lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
+#endif
   } else {
 #if CONFIG_UV_LVL
-    lf->filter_level = av1_search_filter_level(
-        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 0);
+    lf->filter_level[0] = lf->filter_level[1] = av1_search_filter_level(
+        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 0, 2);
+    lf->filter_level[0] = av1_search_filter_level(
+        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 0, 0);
+    lf->filter_level[1] = av1_search_filter_level(
+        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 0, 1);
+
     lf->filter_level_u = av1_search_filter_level(
-        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 1);
+        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 1, 0);
     lf->filter_level_v = av1_search_filter_level(
-        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 2);
+        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 2, 0);
 #else
     lf->filter_level = av1_search_filter_level(
         sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL);
diff --git a/av1/encoder/picklpf.h b/av1/encoder/picklpf.h
index bd248d1..4950996 100644
--- a/av1/encoder/picklpf.h
+++ b/av1/encoder/picklpf.h
@@ -23,7 +23,7 @@
 int av1_get_max_filter_level(const AV1_COMP *cpi);
 #if CONFIG_UV_LVL
 int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
-                            int partial_frame, double *err, int plane);
+                            int partial_frame, double *err, int plane, int dir);
 #else
 int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
                             int partial_frame, double *err);