Multi-thread recon frame padding

The function 'aom_extend_frame_borders'
is multi-threaded along with
CDEF/loop-restoration.

Change-Id: Icb45d34234468f68cc5219914367e02590bb27aa
diff --git a/aom_scale/aom_scale_rtcd.pl b/aom_scale/aom_scale_rtcd.pl
index eef6f16..e32cec4 100644
--- a/aom_scale/aom_scale_rtcd.pl
+++ b/aom_scale/aom_scale_rtcd.pl
@@ -45,6 +45,8 @@
 add_proto qw/void aom_yv12_partial_copy_v/, "const struct yv12_buffer_config *src_bc, int hstart1, int hend1, int vstart1, int vend1, struct yv12_buffer_config *dst_bc, int hstart2, int vstart2";
 add_proto qw/void aom_yv12_partial_coloc_copy_v/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend";
 
+add_proto qw/void aom_extend_frame_borders_plane_row/, "const struct yv12_buffer_config *ybf, int plane, int v_start, int v_end";
+
 add_proto qw/void aom_extend_frame_borders/, "struct yv12_buffer_config *ybf, const int num_planes";
 specialize qw/aom_extend_frame_borders dspr2/;
 
diff --git a/aom_scale/generic/yv12extend.c b/aom_scale/generic/yv12extend.c
index 5d797c8..997ff54 100644
--- a/aom_scale/generic/yv12extend.c
+++ b/aom_scale/generic/yv12extend.c
@@ -21,19 +21,20 @@
 
 static void extend_plane(uint8_t *const src, int src_stride, int width,
                          int height, int extend_top, int extend_left,
-                         int extend_bottom, int extend_right) {
+                         int extend_bottom, int extend_right, int v_start,
+                         int v_end) {
   assert(src != NULL);
   int i;
   const int linesize = extend_left + extend_right + width;
   assert(linesize <= src_stride);
 
   /* copy the left and right most columns out */
-  uint8_t *src_ptr1 = src;
-  uint8_t *src_ptr2 = src + width - 1;
-  uint8_t *dst_ptr1 = src - extend_left;
+  uint8_t *src_ptr1 = src + v_start * src_stride;
+  uint8_t *src_ptr2 = src + v_start * src_stride + width - 1;
+  uint8_t *dst_ptr1 = src + v_start * src_stride - extend_left;
   uint8_t *dst_ptr2 = src_ptr2 + 1;
 
-  for (i = 0; i < height; ++i) {
+  for (i = v_start; i < v_end; ++i) {
     memset(dst_ptr1, src_ptr1[0], extend_left);
     memset(dst_ptr2, src_ptr2[0], extend_right);
     src_ptr1 += src_stride;
@@ -65,19 +66,20 @@
 #if CONFIG_AV1_HIGHBITDEPTH
 static void extend_plane_high(uint8_t *const src8, int src_stride, int width,
                               int height, int extend_top, int extend_left,
-                              int extend_bottom, int extend_right) {
+                              int extend_bottom, int extend_right, int v_start,
+                              int v_end) {
   int i;
   const int linesize = extend_left + extend_right + width;
   assert(linesize <= src_stride);
   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
 
   /* copy the left and right most columns out */
-  uint16_t *src_ptr1 = src;
-  uint16_t *src_ptr2 = src + width - 1;
-  uint16_t *dst_ptr1 = src - extend_left;
+  uint16_t *src_ptr1 = src + v_start * src_stride;
+  uint16_t *src_ptr2 = src + v_start * src_stride + width - 1;
+  uint16_t *dst_ptr1 = src + v_start * src_stride - extend_left;
   uint16_t *dst_ptr2 = src_ptr2 + 1;
 
-  for (i = 0; i < height; ++i) {
+  for (i = v_start; i < v_end; ++i) {
     aom_memset16(dst_ptr1, src_ptr1[0], extend_left);
     aom_memset16(dst_ptr2, src_ptr2[0], extend_right);
     src_ptr1 += src_stride;
@@ -107,6 +109,41 @@
 }
 #endif  // CONFIG_AV1_HIGHBITDEPTH
 
+void aom_extend_frame_borders_plane_row_c(const YV12_BUFFER_CONFIG *ybf,
+                                          int plane, int v_start, int v_end) {
+  const int ext_size = ybf->border;
+  const int ss_x = ybf->subsampling_x;
+  const int ss_y = ybf->subsampling_y;
+
+  assert(ybf->y_height - ybf->y_crop_height < 16);
+  assert(ybf->y_width - ybf->y_crop_width < 16);
+  assert(ybf->y_height - ybf->y_crop_height >= 0);
+  assert(ybf->y_width - ybf->y_crop_width >= 0);
+
+  const int is_uv = plane > 0;
+  const int top = ext_size >> (is_uv ? ss_y : 0);
+  const int left = ext_size >> (is_uv ? ss_x : 0);
+  const int bottom = top + ybf->heights[is_uv] - ybf->crop_heights[is_uv];
+  const int right = left + ybf->widths[is_uv] - ybf->crop_widths[is_uv];
+  const int extend_top_border = (v_start == 0);
+  const int extend_bottom_border = (v_end == ybf->crop_heights[is_uv]);
+
+#if CONFIG_AV1_HIGHBITDEPTH
+  if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    extend_plane_high(ybf->buffers[plane], ybf->strides[is_uv],
+                      ybf->crop_widths[is_uv], ybf->crop_heights[is_uv],
+                      extend_top_border ? top : 0, left,
+                      extend_bottom_border ? bottom : 0, right, v_start, v_end);
+    return;
+  }
+#endif
+
+  extend_plane(ybf->buffers[plane], ybf->strides[is_uv],
+               ybf->crop_widths[is_uv], ybf->crop_heights[is_uv],
+               extend_top_border ? top : 0, left,
+               extend_bottom_border ? bottom : 0, right, v_start, v_end);
+}
+
 void aom_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf,
                                      const int num_planes) {
   assert(ybf->border % 2 == 0);
@@ -124,7 +161,8 @@
           ybf->buffers[plane], ybf->strides[is_uv], ybf->crop_widths[is_uv],
           ybf->crop_heights[is_uv], plane_border, plane_border,
           plane_border + ybf->heights[is_uv] - ybf->crop_heights[is_uv],
-          plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv]);
+          plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv], 0,
+          ybf->crop_heights[is_uv]);
     }
     return;
   }
@@ -137,7 +175,8 @@
                  ybf->crop_widths[is_uv], ybf->crop_heights[is_uv],
                  plane_border, plane_border,
                  plane_border + ybf->heights[is_uv] - ybf->crop_heights[is_uv],
-                 plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv]);
+                 plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv], 0,
+                 ybf->crop_heights[is_uv]);
   }
 }
 
@@ -161,7 +200,7 @@
       const int right = left + ybf->widths[is_uv] - ybf->crop_widths[is_uv];
       extend_plane_high(ybf->buffers[plane], ybf->strides[is_uv],
                         ybf->crop_widths[is_uv], ybf->crop_heights[is_uv], top,
-                        left, bottom, right);
+                        left, bottom, right, 0, ybf->crop_heights[is_uv]);
     }
     return;
   }
@@ -175,7 +214,7 @@
     const int right = left + ybf->widths[is_uv] - ybf->crop_widths[is_uv];
     extend_plane(ybf->buffers[plane], ybf->strides[is_uv],
                  ybf->crop_widths[is_uv], ybf->crop_heights[is_uv], top, left,
-                 bottom, right);
+                 bottom, right, 0, ybf->crop_heights[is_uv]);
   }
 }
 
@@ -199,17 +238,17 @@
   assert(ybf->y_width - ybf->y_crop_width >= 0);
 #if CONFIG_AV1_HIGHBITDEPTH
   if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    extend_plane_high(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
-                      ybf->y_crop_height, ext_size, ext_size,
-                      ext_size + ybf->y_height - ybf->y_crop_height,
-                      ext_size + ybf->y_width - ybf->y_crop_width);
+    extend_plane_high(
+        ybf->y_buffer, ybf->y_stride, ybf->y_crop_width, ybf->y_crop_height,
+        ext_size, ext_size, ext_size + ybf->y_height - ybf->y_crop_height,
+        ext_size + ybf->y_width - ybf->y_crop_width, 0, ybf->y_crop_height);
     return;
   }
 #endif
-  extend_plane(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
-               ybf->y_crop_height, ext_size, ext_size,
-               ext_size + ybf->y_height - ybf->y_crop_height,
-               ext_size + ybf->y_width - ybf->y_crop_width);
+  extend_plane(
+      ybf->y_buffer, ybf->y_stride, ybf->y_crop_width, ybf->y_crop_height,
+      ext_size, ext_size, ext_size + ybf->y_height - ybf->y_crop_height,
+      ext_size + ybf->y_width - ybf->y_crop_width, 0, ybf->y_crop_height);
 }
 
 #if CONFIG_AV1_HIGHBITDEPTH
diff --git a/av1/common/thread_common.c b/av1/common/thread_common.c
index 54e9443..b951ad3 100644
--- a/av1/common/thread_common.c
+++ b/av1/common/thread_common.c
@@ -845,6 +845,12 @@
       copy_funs[plane](lr_ctxt->dst, lr_ctxt->frame, ctxt[plane].tile_rect.left,
                        ctxt[plane].tile_rect.right, cur_job_info->v_copy_start,
                        cur_job_info->v_copy_end);
+
+      if (lrworkerdata->do_extend_border) {
+        aom_extend_frame_borders_plane_row(lr_ctxt->frame, plane,
+                                           cur_job_info->v_copy_start,
+                                           cur_job_info->v_copy_end);
+      }
     } else {
       break;
     }
@@ -854,7 +860,8 @@
 
 static void foreach_rest_unit_in_planes_mt(AV1LrStruct *lr_ctxt,
                                            AVxWorker *workers, int nworkers,
-                                           AV1LrSync *lr_sync, AV1_COMMON *cm) {
+                                           AV1LrSync *lr_sync, AV1_COMMON *cm,
+                                           int do_extend_border) {
   FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
 
   const int num_planes = av1_num_planes(cm);
@@ -897,6 +904,7 @@
   for (i = num_workers - 1; i >= 0; --i) {
     AVxWorker *const worker = &workers[i];
     lr_sync->lrworkerdata[i].lr_ctxt = (void *)lr_ctxt;
+    lr_sync->lrworkerdata[i].do_extend_border = do_extend_border;
     worker->hook = loop_restoration_row_worker;
     worker->data1 = lr_sync;
     worker->data2 = &lr_sync->lrworkerdata[i];
@@ -918,7 +926,8 @@
 void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
                                           AV1_COMMON *cm, int optimized_lr,
                                           AVxWorker *workers, int num_workers,
-                                          AV1LrSync *lr_sync, void *lr_ctxt) {
+                                          AV1LrSync *lr_sync, void *lr_ctxt,
+                                          int do_extend_border) {
   assert(!cm->features.all_lossless);
 
   const int num_planes = av1_num_planes(cm);
@@ -929,7 +938,7 @@
                                          optimized_lr, num_planes);
 
   foreach_rest_unit_in_planes_mt(loop_rest_ctxt, workers, num_workers, lr_sync,
-                                 cm);
+                                 cm, do_extend_border);
 }
 
 // Initializes cdef_sync parameters.
@@ -1002,13 +1011,27 @@
 static int cdef_sb_row_worker_hook(void *arg1, void *arg2) {
   AV1CdefSync *const cdef_sync = (AV1CdefSync *)arg1;
   AV1CdefWorkerData *const cdef_worker = (AV1CdefWorkerData *)arg2;
-  const int nvfb =
-      (cdef_worker->cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+  AV1_COMMON *cm = cdef_worker->cm;
+  const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
   int cur_fbr;
+  const int num_planes = av1_num_planes(cm);
   while (get_cdef_row_next_job(cdef_sync, &cur_fbr, nvfb)) {
-    av1_cdef_fb_row(cdef_worker->cm, cdef_worker->xd, cdef_worker->linebuf,
-                    cdef_worker->colbuf, cdef_worker->srcbuf, cur_fbr,
+    MACROBLOCKD *xd = cdef_worker->xd;
+    av1_cdef_fb_row(cm, xd, cdef_worker->linebuf, cdef_worker->colbuf,
+                    cdef_worker->srcbuf, cur_fbr,
                     cdef_worker->cdef_init_fb_row_fn, cdef_sync);
+    if (cdef_worker->do_extend_border) {
+      for (int plane = 0; plane < num_planes; ++plane) {
+        const YV12_BUFFER_CONFIG *ybf = &cm->cur_frame->buf;
+        const int is_uv = plane > 0;
+        const int mi_high = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y;
+        const int unit_height = MI_SIZE_64X64 << mi_high;
+        const int v_start = cur_fbr * unit_height;
+        const int v_end =
+            AOMMIN(v_start + unit_height, ybf->crop_heights[is_uv]);
+        aom_extend_frame_borders_plane_row(ybf, plane, v_start, v_end);
+      }
+    }
   }
   return 1;
 }
@@ -1017,7 +1040,8 @@
 static void prepare_cdef_frame_workers(
     AV1_COMMON *const cm, MACROBLOCKD *xd, AV1CdefWorkerData *const cdef_worker,
     AVxWorkerHook hook, AVxWorker *const workers, AV1CdefSync *const cdef_sync,
-    int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn) {
+    int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
+    int do_extend_border) {
   const int num_planes = av1_num_planes(cm);
 
   cdef_worker[0].srcbuf = cm->cdef_info.srcbuf;
@@ -1028,6 +1052,7 @@
     cdef_worker[i].cm = cm;
     cdef_worker[i].xd = xd;
     cdef_worker[i].cdef_init_fb_row_fn = cdef_init_fb_row_fn;
+    cdef_worker[i].do_extend_border = do_extend_border;
     for (int plane = 0; plane < num_planes; plane++)
       cdef_worker[i].linebuf[plane] = cm->cdef_info.linebuf[plane];
 
@@ -1111,8 +1136,8 @@
 void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
                        AV1CdefWorkerData *const cdef_worker,
                        AVxWorker *const workers, AV1CdefSync *const cdef_sync,
-                       int num_workers,
-                       cdef_init_fb_row_t cdef_init_fb_row_fn) {
+                       int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
+                       int do_extend_border) {
   YV12_BUFFER_CONFIG *frame = &cm->cur_frame->buf;
   const int num_planes = av1_num_planes(cm);
 
@@ -1122,7 +1147,7 @@
   reset_cdef_job_info(cdef_sync);
   prepare_cdef_frame_workers(cm, xd, cdef_worker, cdef_sb_row_worker_hook,
                              workers, cdef_sync, num_workers,
-                             cdef_init_fb_row_fn);
+                             cdef_init_fb_row_fn, do_extend_border);
   launch_cdef_workers(workers, num_workers);
   sync_cdef_workers(workers, cm, num_workers);
 }
diff --git a/av1/common/thread_common.h b/av1/common/thread_common.h
index 7c284fa..b1e622f 100644
--- a/av1/common/thread_common.h
+++ b/av1/common/thread_common.h
@@ -70,6 +70,7 @@
   int32_t *rst_tmpbuf;
   void *rlbs;
   void *lr_ctxt;
+  int do_extend_border;
 } LRWorkerData;
 
 // Looprestoration row synchronization
@@ -106,6 +107,7 @@
   uint16_t *srcbuf;
   uint16_t *linebuf[MAX_MB_PLANE];
   cdef_init_fb_row_t cdef_init_fb_row_fn;
+  int do_extend_border;
 } AV1CdefWorkerData;
 
 typedef struct AV1CdefRowSync {
@@ -135,7 +137,8 @@
 void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
                        AV1CdefWorkerData *const cdef_worker,
                        AVxWorker *const workers, AV1CdefSync *const cdef_sync,
-                       int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn);
+                       int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
+                       int do_extend_border);
 void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm,
                              const MACROBLOCKD *const xd,
                              CdefBlockInfo *const fb_info,
@@ -163,7 +166,7 @@
                                           struct AV1Common *cm,
                                           int optimized_lr, AVxWorker *workers,
                                           int num_workers, AV1LrSync *lr_sync,
-                                          void *lr_ctxt);
+                                          void *lr_ctxt, int do_extend_border);
 void av1_loop_restoration_dealloc(AV1LrSync *lr_sync, int num_workers);
 void av1_loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm,
                                 int num_workers, int num_rows_lr,
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 8e7ffce..34dd438 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -5280,6 +5280,9 @@
         cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
         cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
         cm->rst_info[2].frame_restoration_type != RESTORE_NONE;
+    // Frame border extension is not required in the decoder
+    // as it happens in extend_mc_border().
+    int do_extend_border_mt = 0;
     if (!optimized_loop_restoration) {
       if (do_loop_restoration)
         av1_loop_restoration_save_boundary_lines(&pbi->common.cur_frame->buf,
@@ -5289,7 +5292,8 @@
         if (pbi->num_workers > 1) {
           av1_cdef_frame_mt(cm, &pbi->dcb.xd, pbi->cdef_worker,
                             pbi->tile_workers, &pbi->cdef_sync,
-                            pbi->num_workers, av1_cdef_init_fb_row_mt);
+                            pbi->num_workers, av1_cdef_init_fb_row_mt,
+                            do_extend_border_mt);
         } else {
           av1_cdef_frame(&pbi->common.cur_frame->buf, cm, &pbi->dcb.xd,
                          av1_cdef_init_fb_row);
@@ -5305,7 +5309,7 @@
           av1_loop_restoration_filter_frame_mt(
               (YV12_BUFFER_CONFIG *)xd->cur_buf, cm, optimized_loop_restoration,
               pbi->tile_workers, pbi->num_workers, &pbi->lr_row_sync,
-              &pbi->lr_ctxt);
+              &pbi->lr_ctxt, do_extend_border_mt);
         } else {
           av1_loop_restoration_filter_frame((YV12_BUFFER_CONFIG *)xd->cur_buf,
                                             cm, optimized_loop_restoration,
@@ -5320,7 +5324,7 @@
           av1_loop_restoration_filter_frame_mt(
               (YV12_BUFFER_CONFIG *)xd->cur_buf, cm, optimized_loop_restoration,
               pbi->tile_workers, pbi->num_workers, &pbi->lr_row_sync,
-              &pbi->lr_ctxt);
+              &pbi->lr_ctxt, do_extend_border_mt);
         } else {
           av1_loop_restoration_filter_frame((YV12_BUFFER_CONFIG *)xd->cur_buf,
                                             cm, optimized_loop_restoration,
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index e5f1672..ea862ed 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2159,6 +2159,28 @@
   set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
 }
 
+static INLINE int extend_borders_mt(const AV1_COMP *cpi,
+                                    MULTI_THREADED_MODULES stage, int plane) {
+  const AV1_COMMON *const cm = &cpi->common;
+  if (cpi->mt_info.num_mod_workers[stage] < 2) return 0;
+  switch (stage) {
+    // TODO(deepa.kg@ittiam.com): When cdef and loop-restoration are disabled,
+    // multi-thread frame border extension along with loop filter frame.
+    // As loop-filtering of a superblock row modifies the pixels of the
+    // above superblock row, border extension requires that loop filtering
+    // of the current and above superblock row is complete.
+    case MOD_LPF: return 0;
+    case MOD_CDEF:
+      return is_cdef_used(cm) && !cpi->rtc_ref.non_reference_frame &&
+             !is_restoration_used(cm) && !av1_superres_scaled(cm);
+    case MOD_LR:
+      return is_restoration_used(cm) &&
+             (cm->rst_info[plane].frame_restoration_type != RESTORE_NONE);
+    default: assert(0);
+  }
+  return 0;
+}
+
 /*!\brief Select and apply cdef filters and switchable restoration filters
  *
  * \ingroup high_level_algo
@@ -2192,9 +2214,13 @@
     // Apply the filter
     if (!cpi->rtc_ref.non_reference_frame) {
       if (num_workers > 1) {
+        // Extension of frame borders is multi-threaded along with cdef.
+        const int do_extend_border =
+            extend_borders_mt(cpi, MOD_CDEF, /* plane */ 0);
         av1_cdef_frame_mt(cm, xd, cpi->mt_info.cdef_worker,
                           cpi->mt_info.workers, &cpi->mt_info.cdef_sync,
-                          num_workers, av1_cdef_init_fb_row_mt);
+                          num_workers, av1_cdef_init_fb_row_mt,
+                          do_extend_border);
       } else {
         av1_cdef_frame(&cm->cur_frame->buf, cm, xd, av1_cdef_init_fb_row);
       }
@@ -2223,13 +2249,17 @@
     if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
         cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
         cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
-      if (num_workers > 1)
+      if (num_workers > 1) {
+        // Extension of frame borders is multi-threaded along with loop
+        // restoration filter.
+        const int do_extend_border = 1;
         av1_loop_restoration_filter_frame_mt(
             &cm->cur_frame->buf, cm, 0, mt_info->workers, num_workers,
-            &mt_info->lr_row_sync, &cpi->lr_ctxt);
-      else
+            &mt_info->lr_row_sync, &cpi->lr_ctxt, do_extend_border);
+      } else {
         av1_loop_restoration_filter_frame(&cm->cur_frame->buf, cm, 0,
                                           &cpi->lr_ctxt);
+      }
     }
   } else {
     cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
@@ -2258,8 +2288,7 @@
 
   const int use_loopfilter =
       !cm->features.coded_lossless && !cm->tiles.large_scale;
-  const int use_cdef = cm->seq_params->enable_cdef &&
-                       !cm->features.coded_lossless && !cm->tiles.large_scale;
+  const int use_cdef = is_cdef_used(cm);
   const int use_restoration = is_restoration_used(cm);
   // lpf_opt_level = 1 : Enables dual/quad loop-filtering.
   // lpf_opt_level is set to 1 if transform size search depth in inter blocks
@@ -3085,8 +3114,15 @@
   }
 
   // TODO(debargha): Fix mv search range on encoder side
-  // aom_extend_frame_inner_borders(&cm->cur_frame->buf, av1_num_planes(cm));
-  aom_extend_frame_borders(&cm->cur_frame->buf, av1_num_planes(cm));
+  for (int plane = 0; plane < av1_num_planes(cm); ++plane) {
+    const int extend_border_done = extend_borders_mt(cpi, MOD_CDEF, plane) ||
+                                   extend_borders_mt(cpi, MOD_LR, plane);
+    if (extend_border_done == 0) {
+      const YV12_BUFFER_CONFIG *ybf = &cm->cur_frame->buf;
+      aom_extend_frame_borders_plane_row(ybf, plane, 0,
+                                         ybf->crop_heights[plane > 0]);
+    }
+  }
 
 #ifdef OUTPUT_YUV_REC
   aom_write_one_yuv_frame(cm, &cm->cur_frame->buf);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index f35843f..19e4a9c 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -4072,6 +4072,12 @@
            cpi->common.height != resize_pending_params->height));
 }
 
+// Check if CDEF is used.
+static INLINE int is_cdef_used(const AV1_COMMON *const cm) {
+  return cm->seq_params->enable_cdef && !cm->features.coded_lossless &&
+         !cm->tiles.large_scale;
+}
+
 // Check if loop restoration filter is used.
 static INLINE int is_restoration_used(const AV1_COMMON *const cm) {
   return cm->seq_params->enable_restoration && !cm->features.all_lossless &&