Add row MT support for cdef frame

This CL adds support for row-level multithreading of CDEF
filtering module.

Change-Id: Ia390cf3452eb8cbcad734178d976a249140d8626
diff --git a/av1/av1_dx_iface.c b/av1/av1_dx_iface.c
index 174b1f9..2d5a195 100644
--- a/av1/av1_dx_iface.c
+++ b/av1/av1_dx_iface.c
@@ -115,15 +115,18 @@
   if (ctx->frame_worker != NULL) {
     AVxWorker *const worker = ctx->frame_worker;
     FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+    AV1Decoder *const pbi = frame_worker_data->pbi;
     aom_get_worker_interface()->end(worker);
-    aom_free(frame_worker_data->pbi->common.tpl_mvs);
-    frame_worker_data->pbi->common.tpl_mvs = NULL;
+    aom_free(pbi->common.tpl_mvs);
+    pbi->common.tpl_mvs = NULL;
     av1_remove_common(&frame_worker_data->pbi->common);
-    av1_free_cdef_linebuf(&frame_worker_data->pbi->common);
+    av1_free_cdef_buffers(&pbi->common, &pbi->cdef_worker, &pbi->cdef_sync,
+                          pbi->num_workers);
+    av1_free_cdef_sync(&pbi->cdef_sync);
 #if !CONFIG_REALTIME_ONLY
-    av1_free_restoration_buffers(&frame_worker_data->pbi->common);
+    av1_free_restoration_buffers(&pbi->common);
 #endif
-    av1_decoder_remove(frame_worker_data->pbi);
+    av1_decoder_remove(pbi);
     aom_free(frame_worker_data);
 #if CONFIG_MULTITHREAD
     pthread_mutex_destroy(&ctx->buffer_pool->pool_mutex);
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index 6de10f5..30ef7e3 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c
@@ -20,6 +20,7 @@
 #include "av1/common/cdef_block.h"
 #include "av1/common/entropymode.h"
 #include "av1/common/entropymv.h"
+#include "av1/common/thread_common.h"
 
 int av1_get_MBs(int width, int height) {
   const int aligned_width = ALIGN_POWER_OF_TWO(width, 3);
@@ -52,25 +53,81 @@
   }
 }
 
-void av1_free_cdef_linebuf(AV1_COMMON *const cm) {
-  if (cm->cdef_info.srcbuf != NULL) aom_free(cm->cdef_info.srcbuf);
-  cm->cdef_info.srcbuf = NULL;
+static INLINE void free_cdef_bufs(uint16_t **colbuf, uint16_t **srcbuf) {
+  aom_free(*srcbuf);
+  *srcbuf = NULL;
   for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
-    if (cm->cdef_info.linebuf[plane] != NULL)
-      aom_free(cm->cdef_info.linebuf[plane]);
-    cm->cdef_info.linebuf[plane] = NULL;
-
-    if (cm->cdef_info.colbuf[plane] != NULL)
-      aom_free(cm->cdef_info.colbuf[plane]);
-    cm->cdef_info.colbuf[plane] = NULL;
+    aom_free(colbuf[plane]);
+    colbuf[plane] = NULL;
   }
 }
 
-void av1_alloc_cdef_linebuf(AV1_COMMON *const cm) {
+void av1_free_cdef_buffers(AV1_COMMON *const cm,
+                           AV1CdefWorkerData **cdef_worker,
+                           AV1CdefSync *cdef_sync, int num_workers) {
+  CdefInfo *cdef_info = &cm->cdef_info;
+  const int num_mi_rows = cdef_info->allocated_mi_rows;
+  AV1CdefRowSync **cdef_row_mt = &cdef_sync->cdef_row_mt;
+
+  for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
+    aom_free(cdef_info->linebuf[plane]);
+    cdef_info->linebuf[plane] = NULL;
+  }
+  // De-allocation of column buffer & source buffer (worker_0).
+  free_cdef_bufs(cdef_info->colbuf, &cdef_info->srcbuf);
+
+  if (num_workers < 2) return;
+  if (*cdef_worker != NULL) {
+    for (int idx = num_workers - 1; idx >= 1; idx--) {
+      // De-allocation of column buffer & source buffer for remaining workers.
+      free_cdef_bufs((*cdef_worker)[idx].colbuf, &(*cdef_worker)[idx].srcbuf);
+    }
+    aom_free(*cdef_worker);
+    *cdef_worker = NULL;
+  }
+
+  if (*cdef_row_mt != NULL) {
+#if CONFIG_MULTITHREAD
+    for (int row_idx = 0; row_idx < num_mi_rows; row_idx++) {
+      pthread_mutex_destroy((*cdef_row_mt)[row_idx].row_mutex_);
+      pthread_cond_destroy((*cdef_row_mt)[row_idx].row_cond_);
+      aom_free((*cdef_row_mt)[row_idx].row_mutex_);
+      aom_free((*cdef_row_mt)[row_idx].row_cond_);
+    }
+#else
+    (void)num_mi_rows;
+#endif  // CONFIG_MULTITHREAD
+    aom_free(*cdef_row_mt);
+    *cdef_row_mt = NULL;
+  }
+}
+
+static INLINE void alloc_cdef_bufs(AV1_COMMON *const cm, uint16_t **colbuf,
+                                   uint16_t **srcbuf, const int num_planes) {
+  if (*srcbuf == NULL)
+    CHECK_MEM_ERROR(cm, *srcbuf,
+                    aom_memalign(16, sizeof(**srcbuf) * CDEF_INBUF_SIZE));
+
+  for (int plane = 0; plane < num_planes; plane++) {
+    const int shift = plane == AOM_PLANE_Y ? 0 : cm->seq_params.subsampling_x;
+    const int block_height =
+        (CDEF_BLOCKSIZE << (MI_SIZE_LOG2 - shift)) * 2 * CDEF_VBORDER;
+
+    if (colbuf[plane] == NULL)
+      CHECK_MEM_ERROR(
+          cm, colbuf[plane],
+          aom_malloc(sizeof(*colbuf[plane]) * block_height * CDEF_HBORDER));
+  }
+}
+
+void av1_alloc_cdef_buffers(AV1_COMMON *const cm,
+                            AV1CdefWorkerData **cdef_worker,
+                            AV1CdefSync *cdef_sync, int num_workers) {
   const int num_planes = av1_num_planes(cm);
   const int luma_stride =
       ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols << MI_SIZE_LOG2, 4);
   CdefInfo *cdef_info = &cm->cdef_info;
+  AV1CdefRowSync **cdef_row_mt = &cdef_sync->cdef_row_mt;
   // Check for configuration change
   const int is_sub_sampling_changed =
       (cdef_info->allocated_subsampling_x != cm->seq_params.subsampling_x ||
@@ -82,22 +139,33 @@
   const int is_large_scale_tile_changed =
       cdef_info->prev_large_scale_tile_flag != cm->tiles.large_scale;
   const int is_num_planes_changed = cdef_info->prev_num_planes != num_planes;
+  const int num_mi_rows =
+      (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+  const int is_num_workers_changed =
+      cdef_info->allocated_num_workers != num_workers;
   // num-bufs=3 represents ping-pong buffers for top linebuf,
   // followed by bottom linebuf.
   // ping-pong is to avoid top linebuf over-write by consecutive row.
   int num_bufs = 3;
+  if (num_workers > 1)
+    num_bufs = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
 
+  // TODO(vishnu): Simplify the below conditional logic based on linebuf_size.
   if (is_frame_scaled || is_sub_sampling_changed || is_cdef_flag_changed ||
-      is_large_scale_tile_changed || is_num_planes_changed)
-    av1_free_cdef_linebuf(cm);
+      is_large_scale_tile_changed || is_num_planes_changed ||
+      is_num_workers_changed)
+    av1_free_cdef_buffers(cm, cdef_worker, cdef_sync,
+                          cdef_info->allocated_num_workers);
 
   // Store configuration to check change in configuration
   cdef_info->allocated_mi_cols = cm->mi_params.mi_cols;
+  cdef_info->allocated_mi_rows = num_mi_rows;
   cdef_info->allocated_subsampling_x = cm->seq_params.subsampling_x;
   cdef_info->allocated_subsampling_y = cm->seq_params.subsampling_y;
   cdef_info->prev_cdef_enable_flag = cm->seq_params.enable_cdef;
   cdef_info->prev_large_scale_tile_flag = cm->tiles.large_scale;
   cdef_info->prev_num_planes = num_planes;
+  cdef_info->allocated_num_workers = num_workers;
 
   if (!cm->seq_params.enable_cdef && cm->tiles.large_scale) return;
 
@@ -112,20 +180,37 @@
     }
   }
 
-  if (cm->cdef_info.srcbuf == NULL)
-    CHECK_MEM_ERROR(
-        cm, cm->cdef_info.srcbuf,
-        aom_memalign(16, sizeof(*cm->cdef_info.srcbuf) * CDEF_INBUF_SIZE));
+  // Memory allocation of column buffer & source buffer (worker_0).
+  alloc_cdef_bufs(cm, cdef_info->colbuf, &cdef_info->srcbuf, num_planes);
 
-  for (int plane = 0; plane < num_planes; plane++) {
-    const int shift = plane == AOM_PLANE_Y ? 0 : cm->seq_params.subsampling_x;
-    const int block_height =
-        (CDEF_BLOCKSIZE << (MI_SIZE_LOG2 - shift)) * 2 * CDEF_VBORDER;
+  if (num_workers < 2) return;
 
-    if (cm->cdef_info.colbuf[plane] == NULL)
-      CHECK_MEM_ERROR(cm, cm->cdef_info.colbuf[plane],
-                      aom_malloc(sizeof(*cm->cdef_info.colbuf[plane]) *
-                                 block_height * CDEF_HBORDER));
+  if (*cdef_worker == NULL)
+    CHECK_MEM_ERROR(cm, *cdef_worker,
+                    aom_calloc(num_workers, sizeof(**cdef_worker)));
+
+  for (int idx = num_workers - 1; idx >= 1; idx--) {
+    // Memory allocation of column buffer & source buffer for remaining workers.
+    alloc_cdef_bufs(cm, (*cdef_worker)[idx].colbuf, &(*cdef_worker)[idx].srcbuf,
+                    num_planes);
+  }
+
+  if (*cdef_row_mt == NULL) {
+    CHECK_MEM_ERROR(cm, *cdef_row_mt,
+                    aom_malloc(sizeof(**cdef_row_mt) * num_mi_rows));
+#if CONFIG_MULTITHREAD
+    for (int row_idx = 0; row_idx < num_mi_rows; row_idx++) {
+      CHECK_MEM_ERROR(cm, (*cdef_row_mt)[row_idx].row_mutex_,
+                      aom_malloc(sizeof(*(*cdef_row_mt)[row_idx].row_mutex_)));
+      pthread_mutex_init((*cdef_row_mt)[row_idx].row_mutex_, NULL);
+
+      CHECK_MEM_ERROR(cm, (*cdef_row_mt)[row_idx].row_cond_,
+                      aom_malloc(sizeof(*(*cdef_row_mt)[row_idx].row_cond_)));
+      pthread_cond_init((*cdef_row_mt)[row_idx].row_cond_, NULL);
+
+      (*cdef_row_mt)[row_idx].is_row_done = 0;
+    }
+#endif  // CONFIG_MULTITHREAD
   }
 }
 
diff --git a/av1/common/alloccommon.h b/av1/common/alloccommon.h
index 3d74007..0b43889 100644
--- a/av1/common/alloccommon.h
+++ b/av1/common/alloccommon.h
@@ -24,6 +24,8 @@
 struct BufferPool;
 struct CommonContexts;
 struct CommonModeInfoParams;
+struct AV1CdefWorker;
+struct AV1CdefSyncData;
 
 void av1_remove_common(struct AV1Common *cm);
 
@@ -36,8 +38,12 @@
 void av1_free_context_buffers(struct AV1Common *cm);
 
 void av1_free_ref_frame_buffers(struct BufferPool *pool);
-void av1_alloc_cdef_linebuf(struct AV1Common *const cm);
-void av1_free_cdef_linebuf(struct AV1Common *const cm);
+void av1_alloc_cdef_buffers(struct AV1Common *const cm,
+                            struct AV1CdefWorker **cdef_worker,
+                            struct AV1CdefSyncData *cdef_sync, int num_workers);
+void av1_free_cdef_buffers(struct AV1Common *const cm,
+                           struct AV1CdefWorker **cdef_worker,
+                           struct AV1CdefSyncData *cdef_sync, int num_workers);
 #if !CONFIG_REALTIME_ONLY
 void av1_alloc_restoration_buffers(struct AV1Common *cm);
 void av1_free_restoration_buffers(struct AV1Common *cm);
diff --git a/av1/common/av1_common_int.h b/av1/common/av1_common_int.h
index e8831c7..233c9fa 100644
--- a/av1/common/av1_common_int.h
+++ b/av1/common/av1_common_int.h
@@ -202,11 +202,13 @@
                                                 chroma */
   int cdef_bits;                 /*!< Number of CDEF strength values in bits */
   int allocated_mi_cols;         /*!< Number of cols in the frame in 4 pixel */
+  int allocated_mi_rows;         /*!< Number of rows in the frame in 4 pixel */
   int allocated_subsampling_x;   /*!< Chroma subsampling for x */
   int allocated_subsampling_y;   /*!< Chroma subsampling for y */
   uint8_t prev_cdef_enable_flag; /*!< CDEF on/off flag */
   unsigned int prev_large_scale_tile_flag; /*!< Large scale tile on/off flag */
   int prev_num_planes;                     /*!< Number of planes */
+  int allocated_num_workers;               /*!< Number of CDEF workers */
 } CdefInfo;
 
 /*!\cond */
diff --git a/av1/common/cdef.c b/av1/common/cdef.c
index 7630386..e8e381f 100644
--- a/av1/common/cdef.c
+++ b/av1/common/cdef.c
@@ -21,36 +21,6 @@
 #include "av1/common/cdef_block.h"
 #include "av1/common/reconinter.h"
 
-enum { TOP, LEFT, BOTTOM, RIGHT, BOUNDARIES } UENUM1BYTE(BOUNDARY);
-
-/*!\brief Parameters related to CDEF Block */
-typedef struct {
-  uint16_t *src;
-  uint16_t *top_linebuf[MAX_MB_PLANE];
-  uint16_t *bot_linebuf[MAX_MB_PLANE];
-  uint8_t *dst;
-  cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
-
-  int xdec;
-  int ydec;
-  int mi_wide_l2;
-  int mi_high_l2;
-  int frame_boundary[BOUNDARIES];
-
-  int damping;
-  int coeff_shift;
-  int level;
-  int sec_strength;
-  int cdef_count;
-  int is_zero_level;
-  int dir[CDEF_NBLOCKS][CDEF_NBLOCKS];
-  int var[CDEF_NBLOCKS][CDEF_NBLOCKS];
-
-  int dst_stride;
-  int coffset;
-  int roffset;
-} CdefBlockInfo;
-
 static int is_8x8_block_skip(MB_MODE_INFO **grid, int mi_row, int mi_col,
                              int mi_stride) {
   MB_MODE_INFO **mbmi = grid + mi_row * mi_stride + mi_col;
@@ -117,9 +87,9 @@
   }
 }
 
-static void copy_sb8_16(AV1_COMMON *cm, uint16_t *dst, int dstride,
-                        const uint8_t *src, int src_voffset, int src_hoffset,
-                        int sstride, int vsize, int hsize) {
+void av1_cdef_copy_sb8_16(AV1_COMMON *cm, uint16_t *dst, int dstride,
+                          const uint8_t *src, int src_voffset, int src_hoffset,
+                          int sstride, int vsize, int hsize) {
   if (cm->seq_params.use_highbitdepth) {
     const uint16_t *base =
         &CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + src_hoffset];
@@ -192,10 +162,10 @@
 
   /* Copy in the pixels we need from the current superblock for
   deringing.*/
-  copy_sb8_16(cm, &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
-              CDEF_BSTRIDE, fb_info->dst, fb_info->roffset,
-              fb_info->coffset + cstart, fb_info->dst_stride, vsize,
-              cend - cstart);
+  av1_cdef_copy_sb8_16(
+      cm, &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
+      CDEF_BSTRIDE, fb_info->dst, fb_info->roffset, fb_info->coffset + cstart,
+      fb_info->dst_stride, vsize, cend - cstart);
 
   /* Copy in the pixels we need for the current superblock from bottom buffer.*/
   if (fbr < nvfb - 1) {
@@ -340,7 +310,7 @@
     *cdef_left = 0;
     return;
   }
-  for (uint8_t plane = 0; plane < num_planes; plane++) {
+  for (int plane = 0; plane < num_planes; plane++) {
     cdef_init_fb_col(xd, &cm->cdef_info, fb_info, mbmi_cdef_strength, fbc, fbr,
                      plane);
     if (fb_info->is_zero_level ||
@@ -356,10 +326,12 @@
   *cdef_left = 1;
 }
 
-static INLINE void cdef_init_fb_row(AV1_COMMON *cm, MACROBLOCKD *const xd,
-                                    CdefBlockInfo *fb_info,
-                                    uint16_t **const linebuf,
-                                    uint16_t *const src, int fbr) {
+// Initializes row-level parameters for CDEF frame.
+void av1_cdef_init_fb_row(AV1_COMMON *cm, MACROBLOCKD *const xd,
+                          CdefBlockInfo *fb_info, uint16_t **const linebuf,
+                          uint16_t *const src,
+                          struct AV1CdefSyncData *cdef_sync, int fbr) {
+  (void)cdef_sync;
   const int num_planes = av1_num_planes(cm);
   const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
   const int luma_stride =
@@ -387,7 +359,7 @@
   av1_zero(fb_info->dir);
   av1_zero(fb_info->var);
 
-  for (uint8_t plane = 0; plane < num_planes; plane++) {
+  for (int plane = 0; plane < num_planes; plane++) {
     const int mi_high_l2 = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y;
     const int offset = MI_SIZE_64X64 * (fbr + 1) << mi_high_l2;
     const int stride = luma_stride >> xd->plane[plane].subsampling_x;
@@ -398,27 +370,28 @@
     fb_info->bot_linebuf[plane] = &linebuf[plane][(CDEF_VBORDER << 1) * stride];
 
     if (fbr != nvfb - 1)  // top line buffer copy
-      copy_sb8_16(cm, top_linebuf, stride, xd->plane[plane].dst.buf,
-                  offset - CDEF_VBORDER, 0, xd->plane[plane].dst.stride,
-                  CDEF_VBORDER, stride);
+      av1_cdef_copy_sb8_16(cm, top_linebuf, stride, xd->plane[plane].dst.buf,
+                           offset - CDEF_VBORDER, 0,
+                           xd->plane[plane].dst.stride, CDEF_VBORDER, stride);
     fb_info->top_linebuf[plane] =
         &linebuf[plane][(!ping_pong) * CDEF_VBORDER * stride];
 
     if (fbr != nvfb - 1)  // bottom line buffer copy
-      copy_sb8_16(cm, fb_info->bot_linebuf[plane], stride,
-                  xd->plane[plane].dst.buf, offset, 0,
-                  xd->plane[plane].dst.stride, CDEF_VBORDER, stride);
+      av1_cdef_copy_sb8_16(cm, fb_info->bot_linebuf[plane], stride,
+                           xd->plane[plane].dst.buf, offset, 0,
+                           xd->plane[plane].dst.stride, CDEF_VBORDER, stride);
   }
 }
 
-static void cdef_fb_row(AV1_COMMON *cm, MACROBLOCKD *xd,
-                        uint16_t **const linebuf, uint16_t **const colbuf,
-                        uint16_t *const src, int fbr) {
+void av1_cdef_fb_row(AV1_COMMON *cm, MACROBLOCKD *xd, uint16_t **const linebuf,
+                     uint16_t **const colbuf, uint16_t *const src, int fbr,
+                     cdef_init_fb_row_t cdef_init_fb_row_fn,
+                     struct AV1CdefSyncData *cdef_sync) {
   CdefBlockInfo fb_info;
   int cdef_left = 1;
   const int nhfb = (cm->mi_params.mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
 
-  cdef_init_fb_row(cm, xd, &fb_info, linebuf, src, fbr);
+  cdef_init_fb_row_fn(cm, xd, &fb_info, linebuf, src, cdef_sync, fbr);
   for (int fbc = 0; fbc < nhfb; fbc++) {
     fb_info.frame_boundary[LEFT] = (MI_SIZE_64X64 * fbc == 0) ? 1 : 0;
     if (fbc != nhfb - 1)
@@ -437,8 +410,8 @@
 //   xd: Pointer to common current coding block structure.
 // Returns:
 //   Nothing will be returned.
-void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
-                    MACROBLOCKD *xd) {
+void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
+                    cdef_init_fb_row_t cdef_init_fb_row_fn) {
   const int num_planes = av1_num_planes(cm);
   const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
 
@@ -446,6 +419,6 @@
                        num_planes);
 
   for (int fbr = 0; fbr < nvfb; fbr++)
-    cdef_fb_row(cm, xd, cm->cdef_info.linebuf, cm->cdef_info.colbuf,
-                cm->cdef_info.srcbuf, fbr);
+    av1_cdef_fb_row(cm, xd, cm->cdef_info.linebuf, cm->cdef_info.colbuf,
+                    cm->cdef_info.srcbuf, fbr, cdef_init_fb_row_fn, NULL);
 }
diff --git a/av1/common/cdef.h b/av1/common/cdef.h
index 4d6e600..81c8469 100644
--- a/av1/common/cdef.h
+++ b/av1/common/cdef.h
@@ -23,6 +23,40 @@
 #include "av1/common/av1_common_int.h"
 #include "av1/common/cdef_block.h"
 
+enum { TOP, LEFT, BOTTOM, RIGHT, BOUNDARIES } UENUM1BYTE(BOUNDARY);
+
+struct AV1CdefSyncData;
+
+/*!\brief Parameters related to CDEF Block */
+typedef struct {
+  uint16_t *src;                       /*!< CDEF intermediate buffer */
+  uint16_t *top_linebuf[MAX_MB_PLANE]; /*!< CDEF top line buffer */
+  uint16_t *bot_linebuf[MAX_MB_PLANE]; /*!< CDEF bottom line buffer */
+  uint8_t *dst;                        /*!< CDEF destination buffer */
+  cdef_list
+      dlist[MI_SIZE_64X64 * MI_SIZE_64X64]; /*!< CDEF 8x8 block positions */
+
+  int xdec;                       /*!< Sub-sampling X */
+  int ydec;                       /*!< Sub-sampling X */
+  int mi_wide_l2;                 /*!< Pixels per mi unit in width */
+  int mi_high_l2;                 /*!< Pixels per mi unit in height */
+  int frame_boundary[BOUNDARIES]; /*!< frame boundaries */
+
+  int damping;     /*!< CDEF damping factor */
+  int coeff_shift; /*!< Bit-depth based shift for calculating filter strength */
+  int level;       /*!< CDEF filtering level */
+  int sec_strength;  /*!< CDEF secondary strength */
+  int cdef_count;    /*!< Number of CDEF sub-blocks in superblock */
+  int is_zero_level; /*!< CDEF filtering level ON/OFF */
+  int dir[CDEF_NBLOCKS]
+         [CDEF_NBLOCKS]; /*!< CDEF filter direction for all 8x8 sub-blocks*/
+  int var[CDEF_NBLOCKS][CDEF_NBLOCKS]; /*!< variance for all 8x8 sub-blocks */
+
+  int dst_stride; /*!< CDEF destination buffer stride */
+  int coffset;    /*!< current superblock offset in a row */
+  int roffset;    /*!< current row offset */
+} CdefBlockInfo;
+
 static INLINE int sign(int i) { return i < 0 ? -1 : 1; }
 
 static INLINE int constrain(int diff, int threshold, int damping) {
@@ -41,19 +75,35 @@
                              int mi_row, int mi_col, cdef_list *dlist,
                              BLOCK_SIZE bsize);
 
+typedef void (*cdef_init_fb_row_t)(AV1_COMMON *cm, MACROBLOCKD *const xd,
+                                   CdefBlockInfo *fb_info,
+                                   uint16_t **const linebuf,
+                                   uint16_t *const src,
+                                   struct AV1CdefSyncData *cdef_sync, int fbr);
+
 /*!\brief Function for applying CDEF to a frame
  *
  * \ingroup in_loop_cdef
  * This function applies CDEF to a frame.
  *
- * \param[in, out]  frame       Compressed frame buffer
- * \param[in, out]  cm          Pointer to top level common structure
- * \param[in]       xd          Pointer to common current coding block structure
+ * \param[in, out]  frame     Compressed frame buffer
+ * \param[in, out]  cm        Pointer to top level common structure
+ * \param[in]       xd        Pointer to common current coding block structure
+ * \param[in]       cdef_init_fb_row_fn   Function Pointer
  *
  * \return Nothing is returned. Instead, the filtered frame is output in
  * \c frame.
  */
-void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd);
+void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
+                    cdef_init_fb_row_t cdef_init_fb_row_fn);
+void av1_cdef_fb_row(AV1_COMMON *cm, MACROBLOCKD *xd, uint16_t **const linebuf,
+                     uint16_t **const colbuf, uint16_t *const src, int fbr,
+                     cdef_init_fb_row_t cdef_init_fb_row_fn,
+                     struct AV1CdefSyncData *cdef_sync);
+void av1_cdef_init_fb_row(AV1_COMMON *cm, MACROBLOCKD *const xd,
+                          CdefBlockInfo *fb_info, uint16_t **const linebuf,
+                          uint16_t *const src,
+                          struct AV1CdefSyncData *cdef_sync, int fbr);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/av1/common/thread_common.c b/av1/common/thread_common.c
index 638dc4c..2f84355 100644
--- a/av1/common/thread_common.c
+++ b/av1/common/thread_common.c
@@ -152,6 +152,59 @@
   }
 }
 
+void av1_alloc_cdef_sync(AV1_COMMON *cm, AV1CdefSync *cdef_sync,
+                         int num_workers) {
+  if (num_workers < 1) return;
+#if CONFIG_MULTITHREAD
+  if (cdef_sync->mutex_ == NULL) {
+    CHECK_MEM_ERROR(cm, cdef_sync->mutex_,
+                    aom_malloc(sizeof(*(cdef_sync->mutex_))));
+    if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL);
+  }
+#else
+  (void)cm;
+  (void)cdef_sync;
+#endif  // CONFIG_MULTITHREAD
+}
+
+void av1_free_cdef_sync(AV1CdefSync *cdef_sync) {
+  if (cdef_sync == NULL) return;
+#if CONFIG_MULTITHREAD
+  if (cdef_sync->mutex_ != NULL) {
+    pthread_mutex_destroy(cdef_sync->mutex_);
+    aom_free(cdef_sync->mutex_);
+  }
+#endif  // CONFIG_MULTITHREAD
+}
+
+static INLINE void cdef_row_mt_sync_read(AV1CdefSync *cdef_sync, int row) {
+  if (!row) return;
+#if CONFIG_MULTITHREAD
+  AV1CdefRowSync *cdef_row_mt = cdef_sync->cdef_row_mt;
+  pthread_mutex_lock(cdef_row_mt[row - 1].row_mutex_);
+  while (cdef_row_mt[row - 1].is_row_done != 1)
+    pthread_cond_wait(cdef_row_mt[row - 1].row_cond_,
+                      cdef_row_mt[row - 1].row_mutex_);
+  cdef_row_mt[row - 1].is_row_done = 0;
+  pthread_mutex_unlock(cdef_row_mt[row - 1].row_mutex_);
+#else
+  (void)cdef_sync;
+#endif  // CONFIG_MULTITHREAD
+}
+
+static INLINE void cdef_row_mt_sync_write(AV1CdefSync *cdef_sync, int row) {
+#if CONFIG_MULTITHREAD
+  AV1CdefRowSync *cdef_row_mt = cdef_sync->cdef_row_mt;
+  pthread_mutex_lock(cdef_row_mt[row].row_mutex_);
+  pthread_cond_signal(cdef_row_mt[row].row_cond_);
+  cdef_row_mt[row].is_row_done = 1;
+  pthread_mutex_unlock(cdef_row_mt[row].row_mutex_);
+#else
+  (void)cdef_sync;
+  (void)row;
+#endif  // CONFIG_MULTITHREAD
+}
+
 static INLINE void sync_read(AV1LfSync *const lf_sync, int r, int c,
                              int plane) {
 #if CONFIG_MULTITHREAD
@@ -932,3 +985,196 @@
                                  cm);
 }
 #endif
+
+// Initializes cdef_sync parameters.
+static AOM_INLINE void reset_cdef_job_info(AV1CdefSync *cdef_sync) {
+  cdef_sync->end_of_frame = 0;
+  cdef_sync->fbr = 0;
+  cdef_sync->fbc = 0;
+}
+
+static AOM_INLINE void launch_cdef_workers(AVxWorker *const workers,
+                                           int num_workers) {
+  const AVxWorkerInterface *const winterface = aom_get_worker_interface();
+  for (int i = num_workers - 1; i >= 0; i--) {
+    AVxWorker *const worker = &workers[i];
+    if (i == 0)
+      winterface->execute(worker);
+    else
+      winterface->launch(worker);
+  }
+}
+
+static AOM_INLINE void sync_cdef_workers(AVxWorker *const workers,
+                                         AV1_COMMON *const cm,
+                                         int num_workers) {
+  const AVxWorkerInterface *const winterface = aom_get_worker_interface();
+  int had_error = 0;
+
+  // Wait for completion of Cdef frame.
+  for (int i = num_workers - 1; i >= 0; i--) {
+    AVxWorker *const worker = &workers[i];
+    had_error |= !winterface->sync(worker);
+  }
+  if (had_error)
+    aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+                       "Failed to process cdef frame");
+}
+
+// Updates the row index of the next job to be processed.
+// Also updates end_of_frame flag when the processing of all rows is complete.
+static void update_cdef_row_next_job_info(AV1CdefSync *cdef_sync, int nvfb) {
+  cdef_sync->fbr++;
+  if (cdef_sync->fbr == nvfb) {
+    cdef_sync->end_of_frame = 1;
+  }
+}
+
+// Checks if a job is available. If job is available,
+// populates next job information and returns 1, else returns 0.
+static AOM_INLINE int get_cdef_row_next_job(AV1CdefSync *cdef_sync,
+                                            int *cur_fbr, const int nvfb) {
+#if CONFIG_MULTITHREAD
+  pthread_mutex_lock(cdef_sync->mutex_);
+#endif  // CONFIG_MULTITHREAD
+  int do_next_row = 0;
+  // Populates information needed for current job and update the row
+  // index of the next row to be processed.
+  if (cdef_sync->end_of_frame == 0) {
+    do_next_row = 1;
+    *cur_fbr = cdef_sync->fbr;
+    update_cdef_row_next_job_info(cdef_sync, nvfb);
+  }
+#if CONFIG_MULTITHREAD
+  pthread_mutex_unlock(cdef_sync->mutex_);
+#endif  // CONFIG_MULTITHREAD
+  return do_next_row;
+}
+
+// Hook function for each thread in CDEF multi-threading.
+static int cdef_sb_row_worker_hook(void *arg1, void *arg2) {
+  AV1CdefSync *const cdef_sync = (AV1CdefSync *)arg1;
+  AV1CdefWorkerData *cdef_worker = (AV1CdefWorkerData *)arg2;
+  const int nvfb =
+      (cdef_worker->cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+  int cur_fbr;
+  while (get_cdef_row_next_job(cdef_sync, &cur_fbr, nvfb)) {
+    av1_cdef_fb_row(cdef_worker->cm, cdef_worker->xd, cdef_worker->linebuf,
+                    cdef_worker->colbuf, cdef_worker->srcbuf, cur_fbr,
+                    cdef_worker->cdef_init_fb_row_fn, cdef_sync);
+  }
+  return 1;
+}
+
+// Assigns CDEF hook function and thread data to each worker.
+static void prepare_cdef_frame_workers(AV1_COMMON *cm, MACROBLOCKD *xd,
+                                       AV1CdefWorkerData *cdef_worker,
+                                       AVxWorkerHook hook, AVxWorker *workers,
+                                       AV1CdefSync *cdef_sync, int num_workers,
+                                       cdef_init_fb_row_t cdef_init_fb_row_fn) {
+  const int num_planes = av1_num_planes(cm);
+
+  cdef_worker[0].srcbuf = cm->cdef_info.srcbuf;
+  for (int plane = 0; plane < num_planes; plane++)
+    cdef_worker[0].colbuf[plane] = cm->cdef_info.colbuf[plane];
+  for (int i = num_workers - 1; i >= 0; i--) {
+    AVxWorker *worker = &workers[i];
+    cdef_worker[i].cm = cm;
+    cdef_worker[i].xd = xd;
+    cdef_worker[i].cdef_init_fb_row_fn = cdef_init_fb_row_fn;
+    for (int plane = 0; plane < num_planes; plane++)
+      cdef_worker[i].linebuf[plane] = cm->cdef_info.linebuf[plane];
+
+    worker->hook = hook;
+    worker->data1 = cdef_sync;
+    worker->data2 = &cdef_worker[i];
+  }
+}
+
+// Initializes row-level parameters for CDEF frame.
+void av1_cdef_init_fb_row_mt(AV1_COMMON *cm, MACROBLOCKD *const xd,
+                             CdefBlockInfo *fb_info, uint16_t **const linebuf,
+                             uint16_t *const src,
+                             struct AV1CdefSyncData *cdef_sync, int fbr) {
+  const int num_planes = av1_num_planes(cm);
+  const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+  const int luma_stride =
+      ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols << MI_SIZE_LOG2, 4);
+
+  // for the current filter block, it's top left corner mi structure (mi_tl)
+  // is first accessed to check whether the top and left boundaries are
+  // frame boundaries. Then bottom-left and top-right mi structures are
+  // accessed to check whether the bottom and right boundaries
+  // (respectively) are frame boundaries.
+  //
+  // Note that we can't just check the bottom-right mi structure - eg. if
+  // we're at the right-hand edge of the frame but not the bottom, then
+  // the bottom-right mi is NULL but the bottom-left is not.
+  fb_info->frame_boundary[TOP] = (MI_SIZE_64X64 * fbr == 0) ? 1 : 0;
+  if (fbr != nvfb - 1)
+    fb_info->frame_boundary[BOTTOM] =
+        (MI_SIZE_64X64 * (fbr + 1) == cm->mi_params.mi_rows) ? 1 : 0;
+  else
+    fb_info->frame_boundary[BOTTOM] = 1;
+
+  fb_info->src = src;
+  fb_info->damping = cm->cdef_info.cdef_damping;
+  fb_info->coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0);
+  av1_zero(fb_info->dir);
+  av1_zero(fb_info->var);
+
+  for (int plane = 0; plane < num_planes; plane++) {
+    const int stride = luma_stride >> xd->plane[plane].subsampling_x;
+    uint16_t *top_linebuf = &linebuf[plane][0];
+    uint16_t *bot_linebuf = &linebuf[plane][nvfb * CDEF_VBORDER * stride];
+    {
+      const int mi_high_l2 = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y;
+      const int top_offset = MI_SIZE_64X64 * (fbr + 1) << mi_high_l2;
+      const int bot_offset = MI_SIZE_64X64 * (fbr + 1) << mi_high_l2;
+
+      if (fbr != nvfb - 1)  // if (fbr != 0)  // top line buffer copy
+        av1_cdef_copy_sb8_16(
+            cm, &top_linebuf[(fbr + 1) * CDEF_VBORDER * stride], stride,
+            xd->plane[plane].dst.buf, top_offset - CDEF_VBORDER, 0,
+            xd->plane[plane].dst.stride, CDEF_VBORDER, stride);
+      if (fbr != nvfb - 1)  // bottom line buffer copy
+        av1_cdef_copy_sb8_16(cm, &bot_linebuf[fbr * CDEF_VBORDER * stride],
+                             stride, xd->plane[plane].dst.buf, bot_offset, 0,
+                             xd->plane[plane].dst.stride, CDEF_VBORDER, stride);
+    }
+
+    fb_info->top_linebuf[plane] = &linebuf[plane][fbr * CDEF_VBORDER * stride];
+    fb_info->bot_linebuf[plane] =
+        &linebuf[plane]
+                [nvfb * CDEF_VBORDER * stride + (fbr * CDEF_VBORDER * stride)];
+  }
+
+  cdef_row_mt_sync_write(cdef_sync, fbr);
+  cdef_row_mt_sync_read(cdef_sync, fbr);
+}
+
+// Implements multi-threading for CDEF.
+// Perform CDEF on input frame.
+// Inputs:
+//   frame: Pointer to input frame buffer.
+//   cm: Pointer to common structure.
+//   xd: Pointer to common current coding block structure.
+// Returns:
+//   Nothing will be returned.
+void av1_cdef_frame_mt(AV1_COMMON *cm, MACROBLOCKD *xd,
+                       AV1CdefWorkerData *cdef_worker, AVxWorker *workers,
+                       AV1CdefSync *cdef_sync, int num_workers,
+                       cdef_init_fb_row_t cdef_init_fb_row_fn) {
+  YV12_BUFFER_CONFIG *frame = &cm->cur_frame->buf;
+  const int num_planes = av1_num_planes(cm);
+
+  av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
+                       num_planes);
+
+  reset_cdef_job_info(cdef_sync);
+  prepare_cdef_frame_workers(cm, xd, cdef_worker, cdef_sb_row_worker_hook,
+                             workers, cdef_sync, num_workers,
+                             cdef_init_fb_row_fn);
+  launch_cdef_workers(workers, num_workers);
+  sync_cdef_workers(workers, cm, num_workers);
+}
diff --git a/av1/common/thread_common.h b/av1/common/thread_common.h
index 97b8abc..d27baff 100644
--- a/av1/common/thread_common.h
+++ b/av1/common/thread_common.h
@@ -15,6 +15,7 @@
 #include "config/aom_config.h"
 
 #include "av1/common/av1_loopfilter.h"
+#include "av1/common/cdef.h"
 #include "aom_util/aom_thread.h"
 
 #ifdef __cplusplus
@@ -97,6 +98,54 @@
   int jobs_dequeued;
 } AV1LrSync;
 
+typedef struct AV1CdefWorker {
+  AV1_COMMON *cm;
+  MACROBLOCKD *xd;
+  uint16_t *colbuf[MAX_MB_PLANE];
+  uint16_t *srcbuf;
+  uint16_t *linebuf[MAX_MB_PLANE];
+  cdef_init_fb_row_t cdef_init_fb_row_fn;
+} AV1CdefWorkerData;
+
+typedef struct AV1CdefRowSync {
+#if CONFIG_MULTITHREAD
+  pthread_mutex_t *row_mutex_;
+  pthread_cond_t *row_cond_;
+#endif  // CONFIG_MULTITHREAD
+  int is_row_done;
+} AV1CdefRowSync;
+
+// Data related to CDEF search multi-thread synchronization.
+typedef struct AV1CdefSyncData {
+#if CONFIG_MULTITHREAD
+  // Mutex lock used while dispatching jobs.
+  pthread_mutex_t *mutex_;
+#endif  // CONFIG_MULTITHREAD
+  // Data related to CDEF row mt sync information
+  AV1CdefRowSync *cdef_row_mt;
+  // Flag to indicate all blocks are processed and end of frame is reached
+  int end_of_frame;
+  // Row index in units of 64x64 block
+  int fbr;
+  // Column index in units of 64x64 block
+  int fbc;
+} AV1CdefSync;
+
+void av1_cdef_frame_mt(AV1_COMMON *cm, MACROBLOCKD *xd,
+                       AV1CdefWorkerData *cdef_worker, AVxWorker *workers,
+                       AV1CdefSync *cdef_sync, int num_workers,
+                       cdef_init_fb_row_t cdef_init_fb_row_fn);
+void av1_cdef_init_fb_row_mt(AV1_COMMON *cm, MACROBLOCKD *const xd,
+                             CdefBlockInfo *fb_info, uint16_t **const linebuf,
+                             uint16_t *const src,
+                             struct AV1CdefSyncData *cdef_sync, int fbr);
+void av1_cdef_copy_sb8_16(AV1_COMMON *cm, uint16_t *dst, int dstride,
+                          const uint8_t *src, int src_voffset, int src_hoffset,
+                          int sstride, int vsize, int hsize);
+void av1_alloc_cdef_sync(AV1_COMMON *cm, AV1CdefSync *cdef_sync,
+                         int num_workers);
+void av1_free_cdef_sync(AV1CdefSync *cdef_sync);
+
 // Deallocate loopfilter synchronization related mutex and data.
 void av1_loop_filter_dealloc(AV1LfSync *lf_sync);
 
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 8ba19c5..93c4eb5 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -5232,7 +5232,6 @@
 static AOM_INLINE void setup_frame_info(AV1Decoder *pbi) {
   AV1_COMMON *const cm = &pbi->common;
 
-  av1_alloc_cdef_linebuf(cm);
 #if !CONFIG_REALTIME_ONLY
   if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
       cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
@@ -5282,6 +5281,10 @@
     return;
   }
 
+  av1_alloc_cdef_buffers(cm, &pbi->cdef_worker, &pbi->cdef_sync,
+                         pbi->num_workers);
+  av1_alloc_cdef_sync(cm, &pbi->cdef_sync, pbi->num_workers);
+
   if (!cm->features.allow_intrabc && !tiles->single_tile_decoding) {
     if (cm->lf.filter_level[0] || cm->lf.filter_level[1]) {
       if (pbi->num_workers > 1) {
@@ -5318,7 +5321,14 @@
                                                  cm, 0);
 
       if (do_cdef) {
-        av1_cdef_frame(&pbi->common.cur_frame->buf, cm, &pbi->dcb.xd);
+        if (pbi->num_workers > 1) {
+          av1_cdef_frame_mt(cm, &pbi->dcb.xd, pbi->cdef_worker,
+                            pbi->tile_workers, &pbi->cdef_sync,
+                            pbi->num_workers, av1_cdef_init_fb_row_mt);
+        } else {
+          av1_cdef_frame(&pbi->common.cur_frame->buf, cm, &pbi->dcb.xd,
+                         av1_cdef_init_fb_row);
+        }
       }
 
       superres_post_decode(pbi);
@@ -5356,7 +5366,14 @@
 #else
     if (!optimized_loop_restoration) {
       if (do_cdef) {
-        av1_cdef_frame(&pbi->common.cur_frame->buf, cm, &pbi->dcb.xd);
+        if (pbi->num_workers > 1) {
+          av1_cdef_frame_mt(cm, &pbi->dcb.xd, pbi->cdef_worker,
+                            pbi->tile_workers, &pbi->cdef_sync,
+                            pbi->num_workers, av1_cdef_init_fb_row_mt);
+        } else {
+          av1_cdef_frame(&pbi->common.cur_frame->buf, cm, &pbi->dcb.xd,
+                         av1_cdef_init_fb_row);
+        }
       }
     }
 #endif  // !CONFIG_REALTIME_ONLY
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index 61147f9..a8840e6 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h
@@ -229,6 +229,8 @@
   AV1LfSync lf_row_sync;
   AV1LrSync lr_row_sync;
   AV1LrStruct lr_ctxt;
+  AV1CdefSync cdef_sync;
+  AV1CdefWorkerData *cdef_worker;
   AVxWorker *tile_workers;
   int num_workers;
   DecWorkerData *thread_data;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 517bb87..4826ee8 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2102,7 +2102,10 @@
     aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
                        "Failed to allocate frame buffer");
 
-  if (!is_stat_generation_stage(cpi)) av1_alloc_cdef_linebuf(cm);
+  if (!is_stat_generation_stage(cpi))
+    av1_alloc_cdef_buffers(cm, &cpi->mt_info.cdef_worker,
+                           &cpi->mt_info.cdef_sync,
+                           cpi->mt_info.num_mod_workers[MOD_CDEF]);
 
 #if !CONFIG_REALTIME_ONLY
   const int use_restoration = cm->seq_params.enable_restoration &&
@@ -2159,13 +2162,21 @@
 #if CONFIG_COLLECT_COMPONENT_TIMING
     start_timing(cpi, cdef_time);
 #endif
+    const int num_workers = cpi->mt_info.num_mod_workers[MOD_CDEF];
     // Find CDEF parameters
     av1_cdef_search(&cpi->mt_info, &cm->cur_frame->buf, cpi->source, cm, xd,
                     cpi->sf.lpf_sf.cdef_pick_method, cpi->td.mb.rdmult);
 
     // Apply the filter
-    if (!cpi->sf.rt_sf.skip_loopfilter_non_reference)
-      av1_cdef_frame(&cm->cur_frame->buf, cm, xd);
+    if (!cpi->sf.rt_sf.skip_loopfilter_non_reference) {
+      if (num_workers > 1) {
+        av1_cdef_frame_mt(cm, xd, cpi->mt_info.cdef_worker,
+                          cpi->mt_info.workers, &cpi->mt_info.cdef_sync,
+                          num_workers, av1_cdef_init_fb_row_mt);
+      } else {
+        av1_cdef_frame(&cm->cur_frame->buf, cm, xd, av1_cdef_init_fb_row);
+      }
+    }
 #if CONFIG_COLLECT_COMPONENT_TIMING
     end_timing(cpi, cdef_time);
 #endif
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 6b2c029..48ad327b 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -166,6 +166,7 @@
   MOD_ENC,          // Encode stage
   MOD_LPF,          // Deblocking loop filter
   MOD_CDEF_SEARCH,  // CDEF search
+  MOD_CDEF,         // CDEF frame
   MOD_LR,           // Loop restoration filtering
   NUM_MT_MODULES
 } MULTI_THREADED_MODULES;
@@ -1453,6 +1454,11 @@
    * CDEF search multi-threading object.
    */
   AV1CdefSync cdef_sync;
+
+  /*!
+   * CDEF row multi-threading data.
+   */
+  AV1CdefWorkerData *cdef_worker;
 } MultiThreadInfo;
 
 /*!\cond */
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index 1d9b772..30b9121 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -253,7 +253,10 @@
   av1_free_restoration_buffers(cm);
 #endif
 
-  if (!is_stat_generation_stage(cpi)) av1_free_cdef_linebuf(cm);
+  if (!is_stat_generation_stage(cpi))
+    av1_free_cdef_buffers(cm, &cpi->mt_info.cdef_worker,
+                          &cpi->mt_info.cdef_sync,
+                          cpi->mt_info.num_mod_workers[MOD_CDEF]);
 
   aom_free_frame_buffer(&cpi->trial_frame_rst);
   aom_free_frame_buffer(&cpi->scaled_source);
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 6cdc2c5..14b185c 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -1826,6 +1826,9 @@
 
 // Initializes cdef_sync parameters.
 static AOM_INLINE void cdef_reset_job_info(AV1CdefSync *cdef_sync) {
+#if CONFIG_MULTITHREAD
+  if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL);
+#endif  // CONFIG_MULTITHREAD
   cdef_sync->end_of_frame = 0;
   cdef_sync->fbr = 0;
   cdef_sync->fbc = 0;
@@ -1961,6 +1964,7 @@
     case MOD_CDEF_SEARCH:
       num_mod_workers = compute_num_cdef_workers(cpi);
       break;
+    case MOD_CDEF: num_mod_workers = compute_num_cdef_workers(cpi); break;
     case MOD_LR: num_mod_workers = compute_num_lr_workers(cpi); break;
     default: assert(0); break;
   }
diff --git a/av1/encoder/pickcdef.h b/av1/encoder/pickcdef.h
index 7fe1edb..b8a244c 100644
--- a/av1/encoder/pickcdef.h
+++ b/av1/encoder/pickcdef.h
@@ -58,20 +58,6 @@
                                         BLOCK_SIZE bsize, int coeff_shift,
                                         int row, int col);
 
-// Data related to CDEF search multi-thread synchronization.
-typedef struct AV1CdefSyncData {
-#if CONFIG_MULTITHREAD
-  // Mutex lock used while dispatching jobs.
-  pthread_mutex_t *mutex_;
-#endif  // CONFIG_MULTITHREAD
-  // Flag to indicate all blocks are processed and end of frame is reached
-  int end_of_frame;
-  // Row index in units of 64x64 block
-  int fbr;
-  // Column index in units of 64x64 block
-  int fbc;
-} AV1CdefSync;
-
 /*! \brief CDEF search context.
  */
 typedef struct {