Dynamic allocation of PC_TREE and PICK_MODE_CONTEXT

Dynamically allocate PC_TREE and PICK_MODE_CONTEXT nodes only
when a tree node is being tested. Delete the nodes for non-optimal
partitions after RDO is completed.
It reduces memory usage in mid-quality 360p coding by ~27%.

Change-Id: I75177d9823c0a12420e42aaedfc7292ba967105f
diff --git a/av1/common/av1_common_int.h b/av1/common/av1_common_int.h
index 0403405..e61b730 100644
--- a/av1/common/av1_common_int.h
+++ b/av1/common/av1_common_int.h
@@ -1449,6 +1449,8 @@
   MB_MODE_INFO **mi = mi_params->mi_grid_base + offset;
   const BLOCK_SIZE subsize = mi[0]->sb_type;
 
+  assert(bsize < BLOCK_SIZES_ALL);
+
   if (subsize == bsize) return PARTITION_NONE;
 
   const int bhigh = mi_size_high[bsize];
diff --git a/av1/encoder/context_tree.c b/av1/encoder/context_tree.c
index 9b5b1cb..4cf58c1 100644
--- a/av1/encoder/context_tree.c
+++ b/av1/encoder/context_tree.c
@@ -16,229 +16,6 @@
   BLOCK_4X4, BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64, BLOCK_128X128,
 };
 
-typedef struct {
-  tran_low_t *coeff_buf[MAX_MB_PLANE];
-  tran_low_t *qcoeff_buf[MAX_MB_PLANE];
-  tran_low_t *dqcoeff_buf[MAX_MB_PLANE];
-} PC_TREE_SHARED_BUFFERS;
-
-static AOM_INLINE void alloc_mode_context(AV1_COMMON *cm, int num_pix,
-                                          PICK_MODE_CONTEXT *ctx,
-                                          PC_TREE_SHARED_BUFFERS *shared_bufs) {
-  const int num_planes = av1_num_planes(cm);
-  int i;
-  const int num_blk = num_pix / 16;
-  ctx->num_4x4_blk = num_blk;
-
-  CHECK_MEM_ERROR(cm, ctx->blk_skip,
-                  aom_calloc(num_blk, sizeof(*ctx->blk_skip)));
-  CHECK_MEM_ERROR(cm, ctx->tx_type_map,
-                  aom_calloc(num_blk, sizeof(*ctx->tx_type_map)));
-  for (i = 0; i < num_planes; ++i) {
-    ctx->coeff[i] = shared_bufs->coeff_buf[i];
-    ctx->qcoeff[i] = shared_bufs->qcoeff_buf[i];
-    ctx->dqcoeff[i] = shared_bufs->dqcoeff_buf[i];
-    CHECK_MEM_ERROR(cm, ctx->eobs[i],
-                    aom_memalign(32, num_blk * sizeof(*ctx->eobs[i])));
-    CHECK_MEM_ERROR(
-        cm, ctx->txb_entropy_ctx[i],
-        aom_memalign(32, num_blk * sizeof(*ctx->txb_entropy_ctx[i])));
-  }
-
-  if (num_pix <= MAX_PALETTE_SQUARE) {
-    for (i = 0; i < 2; ++i) {
-      CHECK_MEM_ERROR(
-          cm, ctx->color_index_map[i],
-          aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i])));
-    }
-  }
-}
-
-static AOM_INLINE void free_mode_context(PICK_MODE_CONTEXT *ctx,
-                                         const int num_planes) {
-  int i;
-  aom_free(ctx->blk_skip);
-  ctx->blk_skip = 0;
-  aom_free(ctx->tx_type_map);
-  ctx->tx_type_map = 0;
-  for (i = 0; i < num_planes; ++i) {
-    ctx->coeff[i] = 0;
-    ctx->qcoeff[i] = 0;
-    ctx->dqcoeff[i] = 0;
-    aom_free(ctx->eobs[i]);
-    ctx->eobs[i] = 0;
-    aom_free(ctx->txb_entropy_ctx[i]);
-    ctx->txb_entropy_ctx[i] = 0;
-  }
-
-  for (i = 0; i < 2; ++i) {
-    aom_free(ctx->color_index_map[i]);
-    ctx->color_index_map[i] = 0;
-  }
-}
-
-static AOM_INLINE void alloc_tree_contexts(
-    AV1_COMMON *cm, PC_TREE *tree, int num_pix, int is_leaf,
-    PC_TREE_SHARED_BUFFERS *shared_bufs) {
-  alloc_mode_context(cm, num_pix, &tree->none, shared_bufs);
-
-  if (is_leaf) return;
-
-  alloc_mode_context(cm, num_pix / 2, &tree->horizontal[0], shared_bufs);
-  alloc_mode_context(cm, num_pix / 2, &tree->vertical[0], shared_bufs);
-
-  alloc_mode_context(cm, num_pix / 2, &tree->horizontal[1], shared_bufs);
-  alloc_mode_context(cm, num_pix / 2, &tree->vertical[1], shared_bufs);
-
-  alloc_mode_context(cm, num_pix / 4, &tree->horizontala[0], shared_bufs);
-  alloc_mode_context(cm, num_pix / 4, &tree->horizontala[1], shared_bufs);
-  alloc_mode_context(cm, num_pix / 2, &tree->horizontala[2], shared_bufs);
-
-  alloc_mode_context(cm, num_pix / 2, &tree->horizontalb[0], shared_bufs);
-  alloc_mode_context(cm, num_pix / 4, &tree->horizontalb[1], shared_bufs);
-  alloc_mode_context(cm, num_pix / 4, &tree->horizontalb[2], shared_bufs);
-
-  alloc_mode_context(cm, num_pix / 4, &tree->verticala[0], shared_bufs);
-  alloc_mode_context(cm, num_pix / 4, &tree->verticala[1], shared_bufs);
-  alloc_mode_context(cm, num_pix / 2, &tree->verticala[2], shared_bufs);
-
-  alloc_mode_context(cm, num_pix / 2, &tree->verticalb[0], shared_bufs);
-  alloc_mode_context(cm, num_pix / 4, &tree->verticalb[1], shared_bufs);
-  alloc_mode_context(cm, num_pix / 4, &tree->verticalb[2], shared_bufs);
-
-  for (int i = 0; i < 4; ++i) {
-    alloc_mode_context(cm, num_pix / 4, &tree->horizontal4[i], shared_bufs);
-    alloc_mode_context(cm, num_pix / 4, &tree->vertical4[i], shared_bufs);
-  }
-}
-
-static AOM_INLINE void free_tree_contexts(PC_TREE *tree, const int num_planes) {
-  int i;
-  for (i = 0; i < 3; i++) {
-    free_mode_context(&tree->horizontala[i], num_planes);
-    free_mode_context(&tree->horizontalb[i], num_planes);
-    free_mode_context(&tree->verticala[i], num_planes);
-    free_mode_context(&tree->verticalb[i], num_planes);
-  }
-  for (i = 0; i < 4; ++i) {
-    free_mode_context(&tree->horizontal4[i], num_planes);
-    free_mode_context(&tree->vertical4[i], num_planes);
-  }
-  free_mode_context(&tree->none, num_planes);
-  free_mode_context(&tree->horizontal[0], num_planes);
-  free_mode_context(&tree->horizontal[1], num_planes);
-  free_mode_context(&tree->vertical[0], num_planes);
-  free_mode_context(&tree->vertical[1], num_planes);
-}
-
-// This function will compute the number of pc_tree nodes to be allocated
-// or freed as per the super block size of BLOCK_128X128 or BLOCK_64X64
-static AOM_INLINE int get_pc_tree_nodes(const int is_sb_size_128,
-                                        int stat_generation_stage) {
-  const int tree_nodes_inc = is_sb_size_128 ? 1024 : 0;
-  const int tree_nodes =
-      stat_generation_stage ? 1 : (tree_nodes_inc + 256 + 64 + 16 + 4 + 1);
-  return tree_nodes;
-}
-
-// This function sets up a tree of contexts such that at each square
-// partition level. There are contexts for none, horizontal, vertical, and
-// split.  Along with a block_size value and a selected block_size which
-// represents the state of our search.
-void av1_setup_pc_tree(AV1_COMP *const cpi, ThreadData *td) {
-  AV1_COMMON *const cm = &cpi->common;
-  int i, j, stat_generation_stage = is_stat_generation_stage(cpi);
-  const int is_sb_size_128 = cm->seq_params.sb_size == BLOCK_128X128;
-  const int tree_nodes =
-      get_pc_tree_nodes(is_sb_size_128, stat_generation_stage);
-  int pc_tree_index = 0;
-  PC_TREE *this_pc;
-  PC_TREE_SHARED_BUFFERS shared_bufs;
-  int square_index = 1;
-  int nodes;
-
-  aom_free(td->pc_tree);
-  CHECK_MEM_ERROR(cm, td->pc_tree,
-                  aom_calloc(tree_nodes, sizeof(*td->pc_tree)));
-  this_pc = &td->pc_tree[0];
-
-  for (i = 0; i < 3; i++) {
-    const int max_num_pix = MAX_SB_SIZE * MAX_SB_SIZE;
-    CHECK_MEM_ERROR(cm, td->tree_coeff_buf[i],
-                    aom_memalign(32, max_num_pix * sizeof(tran_low_t)));
-    CHECK_MEM_ERROR(cm, td->tree_qcoeff_buf[i],
-                    aom_memalign(32, max_num_pix * sizeof(tran_low_t)));
-    CHECK_MEM_ERROR(cm, td->tree_dqcoeff_buf[i],
-                    aom_memalign(32, max_num_pix * sizeof(tran_low_t)));
-    shared_bufs.coeff_buf[i] = td->tree_coeff_buf[i];
-    shared_bufs.qcoeff_buf[i] = td->tree_qcoeff_buf[i];
-    shared_bufs.dqcoeff_buf[i] = td->tree_dqcoeff_buf[i];
-  }
-
-  if (!stat_generation_stage) {
-    const int leaf_factor = is_sb_size_128 ? 4 : 1;
-    const int leaf_nodes = 256 * leaf_factor;
-
-    // Sets up all the leaf nodes in the tree.
-    for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) {
-      PC_TREE *const tree = &td->pc_tree[pc_tree_index];
-      tree->block_size = square[0];
-      alloc_tree_contexts(cm, tree, 16, 1, &shared_bufs);
-    }
-
-    // Each node has 4 leaf nodes, fill each block_size level of the tree
-    // from leafs to the root.
-    for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) {
-      for (i = 0; i < nodes; ++i) {
-        PC_TREE *const tree = &td->pc_tree[pc_tree_index];
-        alloc_tree_contexts(cm, tree, 16 << (2 * square_index), 0,
-                            &shared_bufs);
-        tree->block_size = square[square_index];
-        for (j = 0; j < 4; j++) tree->split[j] = this_pc++;
-        ++pc_tree_index;
-      }
-      ++square_index;
-    }
-  } else {
-    // Allocation for firstpass/LAP stage
-    // TODO(Mufaddal): refactor square_index to use a common block_size macro
-    // from firstpass.c
-    PC_TREE *const tree = &td->pc_tree[pc_tree_index];
-    square_index = 2;
-    alloc_tree_contexts(cm, tree, 16 << (2 * square_index), 1, &shared_bufs);
-    tree->block_size = square[square_index];
-  }
-
-  // Set up the root node for the applicable superblock size
-  td->pc_root = &td->pc_tree[tree_nodes - 1];
-#if CONFIG_INTERNAL_STATS
-  td->pc_root->none.best_mode_index = THR_INVALID;
-#endif  // CONFIG_INTERNAL_STATS
-}
-
-void av1_free_pc_tree(const AV1_COMP *const cpi, ThreadData *td,
-                      const int num_planes, BLOCK_SIZE sb_size) {
-  int stat_generation_stage = is_stat_generation_stage(cpi);
-  if (td->pc_tree != NULL) {
-    const int is_sb_size_128 = sb_size == BLOCK_128X128;
-    const int tree_nodes =
-        get_pc_tree_nodes(is_sb_size_128, stat_generation_stage);
-    for (int i = 0; i < tree_nodes; ++i) {
-      free_tree_contexts(&td->pc_tree[i], num_planes);
-    }
-    for (int i = 0; i < 3; ++i) {
-      aom_free(td->tree_coeff_buf[i]);
-      aom_free(td->tree_qcoeff_buf[i]);
-      aom_free(td->tree_dqcoeff_buf[i]);
-      td->tree_coeff_buf[i] = NULL;
-      td->tree_qcoeff_buf[i] = NULL;
-      td->tree_dqcoeff_buf[i] = NULL;
-    }
-    aom_free(td->pc_tree);
-    td->pc_tree = NULL;
-  }
-}
-
 void av1_copy_tree_context(PICK_MODE_CONTEXT *dst_ctx,
                            PICK_MODE_CONTEXT *src_ctx) {
   dst_ctx->mic = src_ctx->mic;
@@ -263,6 +40,237 @@
   dst_ctx->rd_mode_is_ready = src_ctx->rd_mode_is_ready;
 
   memcpy(dst_ctx->pred_mv, src_ctx->pred_mv, sizeof(MV) * REF_FRAMES);
+}
 
-  dst_ctx->partition = src_ctx->partition;
+void av1_setup_shared_coeff_buffer(AV1_COMMON *cm,
+                                   PC_TREE_SHARED_BUFFERS *shared_bufs) {
+  for (int i = 0; i < 3; i++) {
+    const int max_num_pix = MAX_SB_SIZE * MAX_SB_SIZE;
+    CHECK_MEM_ERROR(cm, shared_bufs->coeff_buf[i],
+                    aom_memalign(32, max_num_pix * sizeof(tran_low_t)));
+    CHECK_MEM_ERROR(cm, shared_bufs->qcoeff_buf[i],
+                    aom_memalign(32, max_num_pix * sizeof(tran_low_t)));
+    CHECK_MEM_ERROR(cm, shared_bufs->dqcoeff_buf[i],
+                    aom_memalign(32, max_num_pix * sizeof(tran_low_t)));
+  }
+}
+
+void av1_free_shared_coeff_buffer(PC_TREE_SHARED_BUFFERS *shared_bufs) {
+  for (int i = 0; i < 3; i++) {
+    aom_free(shared_bufs->coeff_buf[i]);
+    aom_free(shared_bufs->qcoeff_buf[i]);
+    aom_free(shared_bufs->dqcoeff_buf[i]);
+    shared_bufs->coeff_buf[i] = NULL;
+    shared_bufs->qcoeff_buf[i] = NULL;
+    shared_bufs->dqcoeff_buf[i] = NULL;
+  }
+}
+
+PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, BLOCK_SIZE bsize,
+                                 PC_TREE_SHARED_BUFFERS *shared_bufs) {
+  PICK_MODE_CONTEXT *ctx = NULL;
+  struct aom_internal_error_info error;
+
+  AOM_CHECK_MEM_ERROR(&error, ctx, aom_calloc(1, sizeof(*ctx)));
+  ctx->rd_mode_is_ready = 0;
+
+  const int num_planes = av1_num_planes(cm);
+  const int num_pix = block_size_wide[bsize] * block_size_high[bsize];
+  const int num_blk = num_pix / 16;
+
+  AOM_CHECK_MEM_ERROR(&error, ctx->blk_skip,
+                      aom_calloc(num_blk, sizeof(*ctx->blk_skip)));
+  AOM_CHECK_MEM_ERROR(&error, ctx->tx_type_map,
+                      aom_calloc(num_blk, sizeof(*ctx->tx_type_map)));
+  ctx->num_4x4_blk = num_blk;
+
+  for (int i = 0; i < num_planes; ++i) {
+    ctx->coeff[i] = shared_bufs->coeff_buf[i];
+    ctx->qcoeff[i] = shared_bufs->qcoeff_buf[i];
+    ctx->dqcoeff[i] = shared_bufs->dqcoeff_buf[i];
+    AOM_CHECK_MEM_ERROR(&error, ctx->eobs[i],
+                        aom_memalign(32, num_blk * sizeof(*ctx->eobs[i])));
+    AOM_CHECK_MEM_ERROR(
+        &error, ctx->txb_entropy_ctx[i],
+        aom_memalign(32, num_blk * sizeof(*ctx->txb_entropy_ctx[i])));
+  }
+
+  if (num_pix <= MAX_PALETTE_SQUARE) {
+    for (int i = 0; i < 2; ++i) {
+      AOM_CHECK_MEM_ERROR(
+          &error, ctx->color_index_map[i],
+          aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i])));
+    }
+  }
+
+  return ctx;
+}
+
+void av1_free_pmc(PICK_MODE_CONTEXT *ctx, int num_planes) {
+  if (ctx == NULL) return;
+
+  aom_free(ctx->blk_skip);
+  ctx->blk_skip = NULL;
+  aom_free(ctx->tx_type_map);
+  for (int i = 0; i < num_planes; ++i) {
+    ctx->coeff[i] = NULL;
+    ctx->qcoeff[i] = NULL;
+    ctx->dqcoeff[i] = NULL;
+    aom_free(ctx->eobs[i]);
+    ctx->eobs[i] = NULL;
+    aom_free(ctx->txb_entropy_ctx[i]);
+    ctx->txb_entropy_ctx[i] = NULL;
+  }
+
+  for (int i = 0; i < 2; ++i) {
+    aom_free(ctx->color_index_map[i]);
+    ctx->color_index_map[i] = NULL;
+  }
+
+  aom_free(ctx);
+}
+
+PC_TREE *av1_alloc_pc_tree_node(BLOCK_SIZE bsize) {
+  PC_TREE *pc_tree = NULL;
+  struct aom_internal_error_info error;
+
+  AOM_CHECK_MEM_ERROR(&error, pc_tree, aom_calloc(1, sizeof(*pc_tree)));
+
+  pc_tree->partitioning = PARTITION_NONE;
+  pc_tree->block_size = bsize;
+  pc_tree->index = 0;
+
+  pc_tree->none = NULL;
+  for (int i = 0; i < 2; ++i) {
+    pc_tree->horizontal[i] = NULL;
+    pc_tree->vertical[i] = NULL;
+  }
+  for (int i = 0; i < 3; ++i) {
+    pc_tree->horizontala[i] = NULL;
+    pc_tree->horizontalb[i] = NULL;
+    pc_tree->verticala[i] = NULL;
+    pc_tree->verticalb[i] = NULL;
+  }
+  for (int i = 0; i < 4; ++i) {
+    pc_tree->horizontal4[i] = NULL;
+    pc_tree->vertical4[i] = NULL;
+    pc_tree->split[i] = NULL;
+  }
+
+  return pc_tree;
+}
+
+#define FREE_PMC_NODE(CTX)         \
+  do {                             \
+    av1_free_pmc(CTX, num_planes); \
+    CTX = NULL;                    \
+  } while (0)
+
+void av1_free_pc_tree_recursive(PC_TREE *pc_tree, int num_planes, int keep_best,
+                                int keep_none) {
+  if (pc_tree == NULL) return;
+
+  const PARTITION_TYPE partition = pc_tree->partitioning;
+
+  if (!keep_none && (!keep_best || (partition != PARTITION_NONE)))
+    FREE_PMC_NODE(pc_tree->none);
+
+  for (int i = 0; i < 2; ++i) {
+    if (!keep_best || (partition != PARTITION_HORZ))
+      FREE_PMC_NODE(pc_tree->horizontal[i]);
+    if (!keep_best || (partition != PARTITION_VERT))
+      FREE_PMC_NODE(pc_tree->vertical[i]);
+  }
+  for (int i = 0; i < 3; ++i) {
+    if (!keep_best || (partition != PARTITION_HORZ_A))
+      FREE_PMC_NODE(pc_tree->horizontala[i]);
+    if (!keep_best || (partition != PARTITION_HORZ_B))
+      FREE_PMC_NODE(pc_tree->horizontalb[i]);
+    if (!keep_best || (partition != PARTITION_VERT_A))
+      FREE_PMC_NODE(pc_tree->verticala[i]);
+    if (!keep_best || (partition != PARTITION_VERT_B))
+      FREE_PMC_NODE(pc_tree->verticalb[i]);
+  }
+  for (int i = 0; i < 4; ++i) {
+    if (!keep_best || (partition != PARTITION_HORZ_4))
+      FREE_PMC_NODE(pc_tree->horizontal4[i]);
+    if (!keep_best || (partition != PARTITION_VERT_4))
+      FREE_PMC_NODE(pc_tree->vertical4[i]);
+  }
+
+  if (!keep_best || (partition != PARTITION_SPLIT)) {
+    for (int i = 0; i < 4; ++i) {
+      if (pc_tree->split[i] != NULL) {
+        av1_free_pc_tree_recursive(pc_tree->split[i], num_planes, 0, 0);
+        pc_tree->split[i] = NULL;
+      }
+    }
+  }
+
+  if (!keep_best && !keep_none) aom_free(pc_tree);
+}
+
+static AOM_INLINE int get_pc_tree_nodes(const int is_sb_size_128,
+                                        int stat_generation_stage) {
+  const int tree_nodes_inc = is_sb_size_128 ? 1024 : 0;
+  const int tree_nodes =
+      stat_generation_stage ? 1 : (tree_nodes_inc + 256 + 64 + 16 + 4 + 1);
+  return tree_nodes;
+}
+
+void av1_setup_sms_tree(AV1_COMP *const cpi, ThreadData *td) {
+  AV1_COMMON *const cm = &cpi->common;
+  const int stat_generation_stage = is_stat_generation_stage(cpi);
+  const int is_sb_size_128 = cm->seq_params.sb_size == BLOCK_128X128;
+  const int tree_nodes =
+      get_pc_tree_nodes(is_sb_size_128, stat_generation_stage);
+  int sms_tree_index = 0;
+  SIMPLE_MOTION_DATA_TREE *this_sms;
+  int square_index = 1;
+  int nodes;
+
+  aom_free(td->sms_tree);
+  CHECK_MEM_ERROR(cm, td->sms_tree,
+                  aom_calloc(tree_nodes, sizeof(*td->sms_tree)));
+  this_sms = &td->sms_tree[0];
+
+  if (!stat_generation_stage) {
+    const int leaf_factor = is_sb_size_128 ? 4 : 1;
+    const int leaf_nodes = 256 * leaf_factor;
+
+    // Sets up all the leaf nodes in the tree.
+    for (sms_tree_index = 0; sms_tree_index < leaf_nodes; ++sms_tree_index) {
+      SIMPLE_MOTION_DATA_TREE *const tree = &td->sms_tree[sms_tree_index];
+      tree->block_size = square[0];
+    }
+
+    // Each node has 4 leaf nodes, fill each block_size level of the tree
+    // from leafs to the root.
+    for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) {
+      for (int i = 0; i < nodes; ++i) {
+        SIMPLE_MOTION_DATA_TREE *const tree = &td->sms_tree[sms_tree_index];
+        tree->block_size = square[square_index];
+        for (int j = 0; j < 4; j++) tree->split[j] = this_sms++;
+        ++sms_tree_index;
+      }
+      ++square_index;
+    }
+  } else {
+    // Allocation for firstpass/LAP stage
+    // TODO(Mufaddal): refactor square_index to use a common block_size macro
+    // from firstpass.c
+    SIMPLE_MOTION_DATA_TREE *const tree = &td->sms_tree[sms_tree_index];
+    square_index = 2;
+    tree->block_size = square[square_index];
+  }
+
+  // Set up the root node for the largest superblock size
+  td->sms_root = &td->sms_tree[tree_nodes - 1];
+}
+
+void av1_free_sms_tree(ThreadData *td) {
+  if (td->sms_tree != NULL) {
+    aom_free(td->sms_tree);
+    td->sms_tree = NULL;
+  }
 }
diff --git a/av1/encoder/context_tree.h b/av1/encoder/context_tree.h
index a399794..cb9bdfc 100644
--- a/av1/encoder/context_tree.h
+++ b/av1/encoder/context_tree.h
@@ -25,6 +25,12 @@
 struct AV1Common;
 struct ThreadData;
 
+typedef struct {
+  tran_low_t *coeff_buf[MAX_MB_PLANE];
+  tran_low_t *qcoeff_buf[MAX_MB_PLANE];
+  tran_low_t *dqcoeff_buf[MAX_MB_PLANE];
+} PC_TREE_SHARED_BUFFERS;
+
 // Structure to hold snapshot of coding context during the mode picking process
 typedef struct {
   MB_MODE_INFO mic;
@@ -58,23 +64,28 @@
   // motion vector cache for adaptive motion search control in partition
   // search loop
   MV pred_mv[REF_FRAMES];
-  PARTITION_TYPE partition;
 } PICK_MODE_CONTEXT;
 
 typedef struct PC_TREE {
   PARTITION_TYPE partitioning;
   BLOCK_SIZE block_size;
-  PICK_MODE_CONTEXT none;
-  PICK_MODE_CONTEXT horizontal[2];
-  PICK_MODE_CONTEXT vertical[2];
-  PICK_MODE_CONTEXT horizontala[3];
-  PICK_MODE_CONTEXT horizontalb[3];
-  PICK_MODE_CONTEXT verticala[3];
-  PICK_MODE_CONTEXT verticalb[3];
-  PICK_MODE_CONTEXT horizontal4[4];
-  PICK_MODE_CONTEXT vertical4[4];
+  PICK_MODE_CONTEXT *none;
+  PICK_MODE_CONTEXT *horizontal[2];
+  PICK_MODE_CONTEXT *vertical[2];
+  PICK_MODE_CONTEXT *horizontala[3];
+  PICK_MODE_CONTEXT *horizontalb[3];
+  PICK_MODE_CONTEXT *verticala[3];
+  PICK_MODE_CONTEXT *verticalb[3];
+  PICK_MODE_CONTEXT *horizontal4[4];
+  PICK_MODE_CONTEXT *vertical4[4];
   struct PC_TREE *split[4];
   int index;
+} PC_TREE;
+
+typedef struct SIMPLE_MOTION_DATA_TREE {
+  BLOCK_SIZE block_size;
+  PARTITION_TYPE partitioning;
+  struct SIMPLE_MOTION_DATA_TREE *split[4];
 
   // Simple motion search_features
   FULLPEL_MV start_mvs[REF_FRAMES];
@@ -82,14 +93,25 @@
   unsigned int sms_rect_feat[8];
   int sms_none_valid;
   int sms_rect_valid;
-} PC_TREE;
+} SIMPLE_MOTION_DATA_TREE;
 
-void av1_setup_pc_tree(struct AV1_COMP *const cpi, struct ThreadData *td);
-void av1_free_pc_tree(const struct AV1_COMP *const cpi, struct ThreadData *td,
-                      const int num_planes, BLOCK_SIZE sb_size);
+void av1_setup_shared_coeff_buffer(AV1_COMMON *cm,
+                                   PC_TREE_SHARED_BUFFERS *shared_bufs);
+void av1_free_shared_coeff_buffer(PC_TREE_SHARED_BUFFERS *shared_bufs);
+
+PC_TREE *av1_alloc_pc_tree_node(BLOCK_SIZE bsize);
+void av1_free_pc_tree_recursive(PC_TREE *tree, int num_planes, int keep_best,
+                                int keep_none);
+
+PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, BLOCK_SIZE bsize,
+                                 PC_TREE_SHARED_BUFFERS *shared_bufs);
+void av1_free_pmc(PICK_MODE_CONTEXT *ctx, int num_planes);
 void av1_copy_tree_context(PICK_MODE_CONTEXT *dst_ctx,
                            PICK_MODE_CONTEXT *src_ctx);
 
+void av1_setup_sms_tree(struct AV1_COMP *const cpi, struct ThreadData *td);
+void av1_free_sms_tree(struct ThreadData *td);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 53b47d4..87af91d 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1711,6 +1711,7 @@
   BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
 
   if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
+  if (subsize == BLOCK_INVALID) return;
 
   if (!dry_run && ctx >= 0) {
     const int has_rows = (mi_row + hbs) < mi_params->mi_rows;
@@ -1732,22 +1733,22 @@
   switch (partition) {
     case PARTITION_NONE:
       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
-               partition, &pc_tree->none, rate);
+               partition, pc_tree->none, rate);
       break;
     case PARTITION_VERT:
       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
-               partition, &pc_tree->vertical[0], rate);
+               partition, pc_tree->vertical[0], rate);
       if (mi_col + hbs < mi_params->mi_cols) {
         encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
-                 partition, &pc_tree->vertical[1], rate);
+                 partition, pc_tree->vertical[1], rate);
       }
       break;
     case PARTITION_HORZ:
       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
-               partition, &pc_tree->horizontal[0], rate);
+               partition, pc_tree->horizontal[0], rate);
       if (mi_row + hbs < mi_params->mi_rows) {
         encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
-                 partition, &pc_tree->horizontal[1], rate);
+                 partition, pc_tree->horizontal[1], rate);
       }
       break;
     case PARTITION_SPLIT:
@@ -1763,36 +1764,36 @@
 
     case PARTITION_HORZ_A:
       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
-               partition, &pc_tree->horizontala[0], rate);
+               partition, pc_tree->horizontala[0], rate);
       encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
-               partition, &pc_tree->horizontala[1], rate);
+               partition, pc_tree->horizontala[1], rate);
       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
-               partition, &pc_tree->horizontala[2], rate);
+               partition, pc_tree->horizontala[2], rate);
       break;
     case PARTITION_HORZ_B:
       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
-               partition, &pc_tree->horizontalb[0], rate);
+               partition, pc_tree->horizontalb[0], rate);
       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
-               partition, &pc_tree->horizontalb[1], rate);
+               partition, pc_tree->horizontalb[1], rate);
       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
-               bsize2, partition, &pc_tree->horizontalb[2], rate);
+               bsize2, partition, pc_tree->horizontalb[2], rate);
       break;
     case PARTITION_VERT_A:
       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
-               partition, &pc_tree->verticala[0], rate);
+               partition, pc_tree->verticala[0], rate);
       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
-               partition, &pc_tree->verticala[1], rate);
+               partition, pc_tree->verticala[1], rate);
       encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
-               partition, &pc_tree->verticala[2], rate);
+               partition, pc_tree->verticala[2], rate);
 
       break;
     case PARTITION_VERT_B:
       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
-               partition, &pc_tree->verticalb[0], rate);
+               partition, pc_tree->verticalb[0], rate);
       encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
-               partition, &pc_tree->verticalb[1], rate);
+               partition, pc_tree->verticalb[1], rate);
       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
-               bsize2, partition, &pc_tree->verticalb[2], rate);
+               bsize2, partition, pc_tree->verticalb[2], rate);
       break;
     case PARTITION_HORZ_4:
       for (i = 0; i < 4; ++i) {
@@ -1800,7 +1801,7 @@
         if (i > 0 && this_mi_row >= mi_params->mi_rows) break;
 
         encode_b(cpi, tile_data, td, tp, this_mi_row, mi_col, dry_run, subsize,
-                 partition, &pc_tree->horizontal4[i], rate);
+                 partition, pc_tree->horizontal4[i], rate);
       }
       break;
     case PARTITION_VERT_4:
@@ -1808,7 +1809,7 @@
         int this_mi_col = mi_col + i * quarter_step;
         if (i > 0 && this_mi_col >= mi_params->mi_cols) break;
         encode_b(cpi, tile_data, td, tp, mi_row, this_mi_col, dry_run, subsize,
-                 partition, &pc_tree->vertical4[i], rate);
+                 partition, pc_tree->vertical4[i], rate);
       }
       break;
     default: assert(0 && "Invalid partition type."); break;
@@ -1889,7 +1890,6 @@
   MACROBLOCKD *const xd = &x->e_mbd;
   const int bs = mi_size_wide[bsize];
   const int hbs = bs / 2;
-  int i;
   const int pl = (bsize >= BLOCK_8X8)
                      ? partition_plane_context(xd, mi_row, mi_col, bsize)
                      : 0;
@@ -1902,7 +1902,11 @@
   BLOCK_SIZE sub_subsize = BLOCK_4X4;
   int splits_below = 0;
   BLOCK_SIZE bs_type = mib[0]->sb_type;
-  PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
+
+  if (pc_tree->none == NULL) {
+    pc_tree->none = av1_alloc_pmc(cm, bsize, &td->shared_coeff_buf);
+  }
+  PICK_MODE_CONTEXT *ctx_none = pc_tree->none;
 
   if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
 
@@ -1939,7 +1943,7 @@
     if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
       sub_subsize = get_partition_subsize(subsize, PARTITION_SPLIT);
       splits_below = 1;
-      for (i = 0; i < 4; i++) {
+      for (int i = 0; i < 4; i++) {
         int jj = i >> 1, ii = i & 0x01;
         MB_MODE_INFO *this_mi = mib[jj * hbs * mi_params->mi_stride + ii * hbs];
         if (this_mi && this_mi->sb_type >= sub_subsize) {
@@ -1968,25 +1972,33 @@
     }
   }
 
+  for (int i = 0; i < 4; ++i) {
+    pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
+    pc_tree->split[i]->index = i;
+  }
   switch (partition) {
     case PARTITION_NONE:
       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
                     PARTITION_NONE, bsize, ctx_none, invalid_rdc, PICK_MODE_RD);
       break;
     case PARTITION_HORZ:
+      for (int i = 0; i < 2; ++i) {
+        pc_tree->horizontal[i] =
+            av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
+      }
       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
-                    PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
+                    PARTITION_HORZ, subsize, pc_tree->horizontal[0],
                     invalid_rdc, PICK_MODE_RD);
       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
           mi_row + hbs < mi_params->mi_rows) {
         RD_STATS tmp_rdc;
-        const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
+        const PICK_MODE_CONTEXT *const ctx_h = pc_tree->horizontal[0];
         av1_init_rd_stats(&tmp_rdc);
         update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
         encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize,
                           NULL);
         pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
-                      PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
+                      PARTITION_HORZ, subsize, pc_tree->horizontal[1],
                       invalid_rdc, PICK_MODE_RD);
         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
           av1_invalid_rd_stats(&last_part_rdc);
@@ -1998,20 +2010,24 @@
       }
       break;
     case PARTITION_VERT:
+      for (int i = 0; i < 2; ++i) {
+        pc_tree->vertical[i] =
+            av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
+      }
       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
-                    PARTITION_VERT, subsize, &pc_tree->vertical[0], invalid_rdc,
+                    PARTITION_VERT, subsize, pc_tree->vertical[0], invalid_rdc,
                     PICK_MODE_RD);
       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
           mi_col + hbs < mi_params->mi_cols) {
         RD_STATS tmp_rdc;
-        const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0];
+        const PICK_MODE_CONTEXT *const ctx_v = pc_tree->vertical[0];
         av1_init_rd_stats(&tmp_rdc);
         update_state(cpi, td, ctx_v, mi_row, mi_col, subsize, 1);
         encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize,
                           NULL);
         pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
                       PARTITION_VERT, subsize,
-                      &pc_tree->vertical[bsize > BLOCK_8X8], invalid_rdc,
+                      pc_tree->vertical[bsize > BLOCK_8X8], invalid_rdc,
                       PICK_MODE_RD);
         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
           av1_invalid_rd_stats(&last_part_rdc);
@@ -2031,7 +2047,7 @@
       last_part_rdc.rate = 0;
       last_part_rdc.dist = 0;
       last_part_rdc.rdcost = 0;
-      for (i = 0; i < 4; i++) {
+      for (int i = 0; i < 4; i++) {
         int x_idx = (i & 1) * hbs;
         int y_idx = (i >> 1) * hbs;
         int jj = i >> 1, ii = i & 0x01;
@@ -2084,7 +2100,7 @@
     pc_tree->partitioning = PARTITION_SPLIT;
 
     // Split partition.
-    for (i = 0; i < 4; i++) {
+    for (int i = 0; i < 4; i++) {
       int x_idx = (i & 1) * hbs;
       int y_idx = (i >> 1) * hbs;
       RD_STATS tmp_rdc;
@@ -2095,8 +2111,11 @@
 
       save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
       pc_tree->split[i]->partitioning = PARTITION_NONE;
+      if (pc_tree->split[i]->none == NULL)
+        pc_tree->split[i]->none =
+            av1_alloc_pmc(cm, split_subsize, &td->shared_coeff_buf);
       pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
-                    PARTITION_SPLIT, split_subsize, &pc_tree->split[i]->none,
+                    PARTITION_SPLIT, split_subsize, pc_tree->split[i]->none,
                     invalid_rdc, PICK_MODE_RD);
 
       restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
@@ -2225,6 +2244,7 @@
 
   switch (partition) {
     case PARTITION_NONE:
+      pc_tree->none = av1_alloc_pmc(cm, bsize, &td->shared_coeff_buf);
       if (cpi->sf.rt_sf.nonrd_check_partition_split && do_slipt_check(bsize) &&
           !frame_is_intra_only(cm)) {
         RD_STATS split_rdc, none_rdc, block_rdc;
@@ -2236,7 +2256,7 @@
         save_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
         subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
         pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
-                      PARTITION_NONE, bsize, &pc_tree->none, invalid_rd,
+                      PARTITION_NONE, bsize, pc_tree->none, invalid_rd,
                       PICK_MODE_NONRD);
         none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
         none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
@@ -2256,12 +2276,12 @@
           pc_tree->split[i]->partitioning = PARTITION_NONE;
           pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
                         &block_rdc, PARTITION_NONE, subsize,
-                        &pc_tree->split[i]->none, invalid_rd, PICK_MODE_NONRD);
+                        pc_tree->split[i]->none, invalid_rd, PICK_MODE_NONRD);
           split_rdc.rate += block_rdc.rate;
           split_rdc.dist += block_rdc.dist;
 
           encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 1,
-                   subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
+                   subsize, PARTITION_NONE, pc_tree->split[i]->none, NULL);
         }
         split_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
         split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist);
@@ -2271,7 +2291,7 @@
           mib[0]->sb_type = bsize;
           pc_tree->partitioning = PARTITION_NONE;
           encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
-                   &pc_tree->none, NULL);
+                   pc_tree->none, NULL);
         } else {
           mib[0]->sb_type = subsize;
           pc_tree->partitioning = PARTITION_SPLIT;
@@ -2283,48 +2303,60 @@
               continue;
 
             encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 0,
-                     subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
+                     subsize, PARTITION_NONE, pc_tree->split[i]->none, NULL);
           }
         }
 
       } else {
         pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
-                      PARTITION_NONE, bsize, &pc_tree->none, invalid_rd,
+                      PARTITION_NONE, bsize, pc_tree->none, invalid_rd,
                       PICK_MODE_NONRD);
         encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
-                 &pc_tree->none, NULL);
+                 pc_tree->none, NULL);
       }
       break;
     case PARTITION_VERT:
+      for (int i = 0; i < 2; ++i) {
+        pc_tree->vertical[i] =
+            av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
+      }
       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
-                    PARTITION_VERT, subsize, &pc_tree->vertical[0], invalid_rd,
+                    PARTITION_VERT, subsize, pc_tree->vertical[0], invalid_rd,
                     PICK_MODE_NONRD);
       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
-               PARTITION_VERT, &pc_tree->vertical[0], NULL);
+               PARTITION_VERT, pc_tree->vertical[0], NULL);
       if (mi_col + hbs < mi_params->mi_cols && bsize > BLOCK_8X8) {
         pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &dummy_cost,
-                      PARTITION_VERT, subsize, &pc_tree->vertical[1],
-                      invalid_rd, PICK_MODE_NONRD);
+                      PARTITION_VERT, subsize, pc_tree->vertical[1], invalid_rd,
+                      PICK_MODE_NONRD);
         encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, 0, subsize,
-                 PARTITION_VERT, &pc_tree->vertical[1], NULL);
+                 PARTITION_VERT, pc_tree->vertical[1], NULL);
       }
       break;
     case PARTITION_HORZ:
+      for (int i = 0; i < 2; ++i) {
+        pc_tree->horizontal[i] =
+            av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
+      }
       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
-                    PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
-                    invalid_rd, PICK_MODE_NONRD);
+                    PARTITION_HORZ, subsize, pc_tree->horizontal[0], invalid_rd,
+                    PICK_MODE_NONRD);
       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
-               PARTITION_HORZ, &pc_tree->horizontal[0], NULL);
+               PARTITION_HORZ, pc_tree->horizontal[0], NULL);
 
       if (mi_row + hbs < mi_params->mi_rows && bsize > BLOCK_8X8) {
         pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &dummy_cost,
-                      PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
+                      PARTITION_HORZ, subsize, pc_tree->horizontal[1],
                       invalid_rd, PICK_MODE_NONRD);
         encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, 0, subsize,
-                 PARTITION_HORZ, &pc_tree->horizontal[1], NULL);
+                 PARTITION_HORZ, pc_tree->horizontal[1], NULL);
       }
       break;
     case PARTITION_SPLIT:
+      for (int i = 0; i < 4; ++i) {
+        pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
+        pc_tree->split[i]->index = i;
+      }
       if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode &&
           is_leaf_split_partition(cm, mi_row, mi_col, bsize) &&
           !frame_is_intra_only(cm) && bsize <= BLOCK_32X32) {
@@ -2338,14 +2370,15 @@
         xd->left_txfm_context =
             xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
         pc_tree->partitioning = PARTITION_NONE;
+        pc_tree->none = av1_alloc_pmc(cm, bsize, &td->shared_coeff_buf);
         pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
-                      PARTITION_NONE, bsize, &pc_tree->none, invalid_rd,
+                      PARTITION_NONE, bsize, pc_tree->none, invalid_rd,
                       PICK_MODE_NONRD);
         none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
         none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
         restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
         if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode != 2 ||
-            none_rdc.skip != 1 || pc_tree->none.mic.mode == NEWMV) {
+            none_rdc.skip != 1 || pc_tree->none->mic.mode == NEWMV) {
           av1_init_rd_stats(&split_rdc);
           for (int i = 0; i < 4; i++) {
             RD_STATS block_rdc;
@@ -2359,16 +2392,18 @@
                 cm->above_contexts.txfm[tile_info->tile_row] + mi_col + x_idx;
             xd->left_txfm_context = xd->left_txfm_context_buffer +
                                     ((mi_row + y_idx) & MAX_MIB_MASK);
+            if (pc_tree->split[i]->none == NULL)
+              pc_tree->split[i]->none =
+                  av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
             pc_tree->split[i]->partitioning = PARTITION_NONE;
             pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
                           &block_rdc, PARTITION_NONE, subsize,
-                          &pc_tree->split[i]->none, invalid_rd,
-                          PICK_MODE_NONRD);
+                          pc_tree->split[i]->none, invalid_rd, PICK_MODE_NONRD);
             split_rdc.rate += block_rdc.rate;
             split_rdc.dist += block_rdc.dist;
 
             encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 1,
-                     subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
+                     subsize, PARTITION_NONE, pc_tree->split[i]->none, NULL);
           }
           restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
           split_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
@@ -2378,7 +2413,7 @@
           mib[0]->sb_type = bsize;
           pc_tree->partitioning = PARTITION_NONE;
           encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
-                   &pc_tree->none, NULL);
+                   pc_tree->none, NULL);
         } else {
           mib[0]->sb_type = subsize;
           pc_tree->partitioning = PARTITION_SPLIT;
@@ -2389,8 +2424,11 @@
                 (mi_col + x_idx >= mi_params->mi_cols))
               continue;
 
+            if (pc_tree->split[i]->none == NULL)
+              pc_tree->split[i]->none =
+                  av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
             encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 0,
-                     subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
+                     subsize, PARTITION_NONE, pc_tree->split[i]->none, NULL);
           }
         }
       } else {
@@ -2548,7 +2586,7 @@
 static bool rd_test_partition3(AV1_COMP *const cpi, ThreadData *td,
                                TileDataEnc *tile_data, TOKENEXTRA **tp,
                                PC_TREE *pc_tree, RD_STATS *best_rdc,
-                               PICK_MODE_CONTEXT ctxs[3],
+                               PICK_MODE_CONTEXT *ctxs[3],
                                PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
                                BLOCK_SIZE bsize, PARTITION_TYPE partition,
                                int mi_row0, int mi_col0, BLOCK_SIZE subsize0,
@@ -2562,15 +2600,15 @@
   sum_rdc.rate = x->partition_cost[pl][partition];
   sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
   if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row0, mi_col0, subsize0,
-                       *best_rdc, &sum_rdc, partition, ctx, &ctxs[0]))
+                       *best_rdc, &sum_rdc, partition, ctx, ctxs[0]))
     return false;
 
   if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row1, mi_col1, subsize1,
-                       *best_rdc, &sum_rdc, partition, &ctxs[0], &ctxs[1]))
+                       *best_rdc, &sum_rdc, partition, ctxs[0], ctxs[1]))
     return false;
 
   if (!rd_try_subblock(cpi, td, tile_data, tp, 1, mi_row2, mi_col2, subsize2,
-                       *best_rdc, &sum_rdc, partition, &ctxs[1], &ctxs[2]))
+                       *best_rdc, &sum_rdc, partition, ctxs[1], ctxs[2]))
     return false;
 
   av1_rd_cost_update(x->rdmult, &sum_rdc);
@@ -2583,14 +2621,14 @@
   return true;
 }
 
-static AOM_INLINE void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
-  pc_tree->partitioning = PARTITION_NONE;
-  pc_tree->none.rd_stats.skip = 0;
+static AOM_INLINE void reset_simple_motion_tree_partition(
+    SIMPLE_MOTION_DATA_TREE *sms_tree, BLOCK_SIZE bsize) {
+  sms_tree->partitioning = PARTITION_NONE;
 
   if (bsize >= BLOCK_8X8) {
     BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
     for (int idx = 0; idx < 4; ++idx)
-      reset_partition(pc_tree->split[idx], subsize);
+      reset_simple_motion_tree_partition(sms_tree->split[idx], subsize);
   }
 }
 
@@ -2634,10 +2672,18 @@
                                 ? rect_part_win_info->horz_win
                                 : rect_part_win_info->vert_win;
   num_win += (sub_part_win) ? 1 : 0;
-  num_win +=
-      (pc_tree->split[split_idx1]->partitioning == PARTITION_NONE) ? 1 : 0;
-  num_win +=
-      (pc_tree->split[split_idx2]->partitioning == PARTITION_NONE) ? 1 : 0;
+  if (pc_tree->split[split_idx1]) {
+    num_win +=
+        (pc_tree->split[split_idx1]->partitioning == PARTITION_NONE) ? 1 : 0;
+  } else {
+    num_win += 1;
+  }
+  if (pc_tree->split[split_idx2]) {
+    num_win +=
+        (pc_tree->split[split_idx2]->partitioning == PARTITION_NONE) ? 1 : 0;
+  } else {
+    num_win += 1;
+  }
   if (num_win < num_win_thresh) {
     return 0;
   }
@@ -2671,14 +2717,13 @@
 //
 // Output:
 //     a bool value indicating whether a valid partition is found
-static bool rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
-                              TileDataEnc *tile_data, TOKENEXTRA **tp,
-                              int mi_row, int mi_col, BLOCK_SIZE bsize,
-                              BLOCK_SIZE max_sq_part, BLOCK_SIZE min_sq_part,
-                              RD_STATS *rd_cost, RD_STATS best_rdc,
-                              PC_TREE *pc_tree, int64_t *none_rd,
-                              SB_MULTI_PASS_MODE multi_pass_mode,
-                              RD_RECT_PART_WIN_INFO *rect_part_win_info) {
+static bool rd_pick_partition(
+    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
+    TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize,
+    BLOCK_SIZE max_sq_part, BLOCK_SIZE min_sq_part, RD_STATS *rd_cost,
+    RD_STATS best_rdc, PC_TREE *pc_tree, SIMPLE_MOTION_DATA_TREE *sms_tree,
+    int64_t *none_rd, SB_MULTI_PASS_MODE multi_pass_mode,
+    RD_RECT_PART_WIN_INFO *rect_part_win_info) {
   const AV1_COMMON *const cm = &cpi->common;
   const CommonModeInfoParams *const mi_params = &cm->mi_params;
   const int num_planes = av1_num_planes(cm);
@@ -2688,7 +2733,6 @@
   const int mi_step = mi_size_wide[bsize] / 2;
   RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
   const TOKENEXTRA *const tp_orig = *tp;
-  PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
   int tmp_partition_cost[PARTITION_TYPES];
   BLOCK_SIZE subsize;
   RD_STATS this_rdc, sum_rdc;
@@ -2717,6 +2761,8 @@
     { true, true }, { true, true }, { true, true }, { true, true }
   };
 
+  sms_tree->partitioning = PARTITION_NONE;
+
   bool found_best_partition = false;
   if (best_rdc.rdcost < 0) {
     av1_invalid_rd_stats(rd_cost);
@@ -2849,7 +2895,7 @@
 
   if (try_split_only) {
     av1_simple_motion_search_based_split(
-        cpi, x, pc_tree, mi_row, mi_col, bsize, &partition_none_allowed,
+        cpi, x, sms_tree, mi_row, mi_col, bsize, &partition_none_allowed,
         &partition_horz_allowed, &partition_vert_allowed, &do_rectangular_split,
         &do_square_split);
   }
@@ -2864,7 +2910,7 @@
 
   if (try_prune_rect) {
     av1_simple_motion_search_prune_rect(
-        cpi, x, pc_tree, mi_row, mi_col, bsize, &partition_horz_allowed,
+        cpi, x, sms_tree, mi_row, mi_col, bsize, &partition_horz_allowed,
         &partition_vert_allowed, &prune_horz, &prune_vert);
   }
 
@@ -2926,6 +2972,9 @@
   (void)pb_simple_motion_pred_sse;
 
   // PARTITION_NONE
+  if (pc_tree->none == NULL)
+    pc_tree->none = av1_alloc_pmc(cm, bsize, &td->shared_coeff_buf);
+  PICK_MODE_CONTEXT *ctx_none = pc_tree->none;
   if (is_le_min_sq_part && has_rows && has_cols) partition_none_allowed = 1;
   assert(terminate_partition_search == 0);
   int64_t part_none_rd = INT64_MAX;
@@ -2991,7 +3040,9 @@
 
         best_rdc = this_rdc;
         found_best_partition = true;
-        if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
+        if (bsize_at_least_8x8) {
+          pc_tree->partitioning = PARTITION_NONE;
+        }
 
         if (!frame_is_intra_only(cm) &&
             (do_square_split || do_rectangular_split) &&
@@ -3026,7 +3077,7 @@
             this_rdc.rdcost < INT64_MAX && this_rdc.rdcost >= 0 &&
             this_rdc.rate < INT_MAX && this_rdc.rate >= 0 &&
             (do_square_split || do_rectangular_split)) {
-          av1_simple_motion_search_early_term_none(cpi, x, pc_tree, mi_row,
+          av1_simple_motion_search_early_term_none(cpi, x, sms_tree, mi_row,
                                                    mi_col, bsize, &this_rdc,
                                                    &terminate_partition_search);
         }
@@ -3041,9 +3092,14 @@
 
   // PARTITION_SPLIT
   int64_t part_split_rd = INT64_MAX;
+  subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
   if ((!terminate_partition_search && do_square_split) || is_gt_max_sq_part) {
+    for (int i = 0; i < 4; ++i) {
+      if (pc_tree->split[i] == NULL)
+        pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
+      pc_tree->split[i]->index = i;
+    }
     av1_init_rd_stats(&sum_rdc);
-    subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
     sum_rdc.rate = partition_cost[PARTITION_SPLIT];
     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
 
@@ -3080,7 +3136,7 @@
       if (!rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
                              mi_col + x_idx, subsize, max_sq_part, min_sq_part,
                              &this_rdc, best_remain_rdcost, pc_tree->split[idx],
-                             p_split_rd, multi_pass_mode,
+                             sms_tree->split[idx], p_split_rd, multi_pass_mode,
                              &split_part_rect_win[idx])) {
         av1_invalid_rd_stats(&sum_rdc);
         break;
@@ -3094,7 +3150,7 @@
       av1_rd_cost_update(x->rdmult, &sum_rdc);
       if (idx <= 1 && (bsize <= BLOCK_8X8 ||
                        pc_tree->split[idx]->partitioning == PARTITION_NONE)) {
-        const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none.mic;
+        const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none->mic;
         const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
         // Neither palette mode nor cfl predicted
         if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
@@ -3138,7 +3194,7 @@
       !frame_is_intra_only(cm) && !terminate_partition_search &&
       do_rectangular_split &&
       (partition_horz_allowed || partition_vert_allowed)) {
-    av1_ml_early_term_after_split(cpi, x, pc_tree, bsize, best_rdc.rdcost,
+    av1_ml_early_term_after_split(cpi, x, sms_tree, bsize, best_rdc.rdcost,
                                   part_none_rd, part_split_rd, split_rd, mi_row,
                                   mi_col, &terminate_partition_search);
   }
@@ -3159,6 +3215,12 @@
       !is_gt_max_sq_part) {
     av1_init_rd_stats(&sum_rdc);
     subsize = get_partition_subsize(bsize, PARTITION_HORZ);
+    for (int i = 0; i < 2; ++i) {
+      if (pc_tree->horizontal[i] == NULL) {
+        pc_tree->horizontal[i] =
+            av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
+      }
+    }
     if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
     sum_rdc.rate = partition_cost[PARTITION_HORZ];
     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
@@ -3173,7 +3235,7 @@
     }
 #endif
     pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_HORZ,
-                  subsize, &pc_tree->horizontal[0], best_remain_rdcost,
+                  subsize, pc_tree->horizontal[0], best_remain_rdcost,
                   PICK_MODE_RD);
     av1_rd_cost_update(x->rdmult, &this_rdc);
 
@@ -3187,8 +3249,8 @@
     horz_rd[0] = this_rdc.rdcost;
 
     if (sum_rdc.rdcost < best_rdc.rdcost && has_rows) {
-      const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
-      const MB_MODE_INFO *const mbmi = &pc_tree->horizontal[0].mic;
+      const PICK_MODE_CONTEXT *const ctx_h = pc_tree->horizontal[0];
+      const MB_MODE_INFO *const mbmi = &pc_tree->horizontal[0]->mic;
       const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
       // Neither palette mode nor cfl predicted
       if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
@@ -3203,7 +3265,7 @@
                                &best_remain_rdcost);
 
       pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
-                    PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
+                    PARTITION_HORZ, subsize, pc_tree->horizontal[1],
                     best_remain_rdcost, PICK_MODE_RD);
       av1_rd_cost_update(x->rdmult, &this_rdc);
       horz_rd[1] = this_rdc.rdcost;
@@ -3249,6 +3311,12 @@
       !is_gt_max_sq_part) {
     av1_init_rd_stats(&sum_rdc);
     subsize = get_partition_subsize(bsize, PARTITION_VERT);
+    for (int i = 0; i < 2; ++i) {
+      if (pc_tree->vertical[i] == NULL) {
+        pc_tree->vertical[i] =
+            av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
+      }
+    }
 
     if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
 
@@ -3265,7 +3333,7 @@
     }
 #endif
     pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_VERT,
-                  subsize, &pc_tree->vertical[0], best_remain_rdcost,
+                  subsize, pc_tree->vertical[0], best_remain_rdcost,
                   PICK_MODE_RD);
     av1_rd_cost_update(x->rdmult, &this_rdc);
 
@@ -3278,13 +3346,13 @@
     }
     vert_rd[0] = this_rdc.rdcost;
     if (sum_rdc.rdcost < best_rdc.rdcost && has_cols) {
-      const MB_MODE_INFO *const mbmi = &pc_tree->vertical[0].mic;
+      const MB_MODE_INFO *const mbmi = &pc_tree->vertical[0]->mic;
       const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
       // Neither palette mode nor cfl predicted
       if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
         if (mbmi->uv_mode != UV_CFL_PRED) vert_ctx_is_ready = 1;
       }
-      update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 1);
+      update_state(cpi, td, pc_tree->vertical[0], mi_row, mi_col, subsize, 1);
       encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, NULL);
 
       if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
@@ -3292,7 +3360,7 @@
       av1_rd_stats_subtraction(x->rdmult, &best_rdc, &sum_rdc,
                                &best_remain_rdcost);
       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
-                    PARTITION_VERT, subsize, &pc_tree->vertical[1],
+                    PARTITION_VERT, subsize, pc_tree->vertical[1],
                     best_remain_rdcost, PICK_MODE_RD);
       av1_rd_cost_update(x->rdmult, &this_rdc);
       vert_rd[1] = this_rdc.rdcost;
@@ -3452,18 +3520,22 @@
   if (!terminate_partition_search && partition_horz_allowed &&
       horza_partition_allowed && !is_gt_max_sq_part) {
     subsize = get_partition_subsize(bsize, PARTITION_HORZ_A);
-    pc_tree->horizontala[0].rd_mode_is_ready = 0;
-    pc_tree->horizontala[1].rd_mode_is_ready = 0;
-    pc_tree->horizontala[2].rd_mode_is_ready = 0;
+
+    pc_tree->horizontala[0] = av1_alloc_pmc(cm, bsize2, &td->shared_coeff_buf);
+    pc_tree->horizontala[1] = av1_alloc_pmc(cm, bsize2, &td->shared_coeff_buf);
+    pc_tree->horizontala[2] = av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
+
+    pc_tree->horizontala[0]->rd_mode_is_ready = 0;
+    pc_tree->horizontala[1]->rd_mode_is_ready = 0;
+    pc_tree->horizontala[2]->rd_mode_is_ready = 0;
     if (split_ctx_is_ready[0]) {
-      av1_copy_tree_context(&pc_tree->horizontala[0], &pc_tree->split[0]->none);
-      pc_tree->horizontala[0].mic.partition = PARTITION_HORZ_A;
-      pc_tree->horizontala[0].rd_mode_is_ready = 1;
+      av1_copy_tree_context(pc_tree->horizontala[0], pc_tree->split[0]->none);
+      pc_tree->horizontala[0]->mic.partition = PARTITION_HORZ_A;
+      pc_tree->horizontala[0]->rd_mode_is_ready = 1;
       if (split_ctx_is_ready[1]) {
-        av1_copy_tree_context(&pc_tree->horizontala[1],
-                              &pc_tree->split[1]->none);
-        pc_tree->horizontala[1].mic.partition = PARTITION_HORZ_A;
-        pc_tree->horizontala[1].rd_mode_is_ready = 1;
+        av1_copy_tree_context(pc_tree->horizontala[1], pc_tree->split[1]->none);
+        pc_tree->horizontala[1]->mic.partition = PARTITION_HORZ_A;
+        pc_tree->horizontala[1]->rd_mode_is_ready = 1;
       }
     }
 #if CONFIG_COLLECT_PARTITION_STATS
@@ -3505,13 +3577,18 @@
   if (!terminate_partition_search && partition_horz_allowed &&
       horzb_partition_allowed && !is_gt_max_sq_part) {
     subsize = get_partition_subsize(bsize, PARTITION_HORZ_B);
-    pc_tree->horizontalb[0].rd_mode_is_ready = 0;
-    pc_tree->horizontalb[1].rd_mode_is_ready = 0;
-    pc_tree->horizontalb[2].rd_mode_is_ready = 0;
+
+    pc_tree->horizontalb[0] = av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
+    pc_tree->horizontalb[1] = av1_alloc_pmc(cm, bsize2, &td->shared_coeff_buf);
+    pc_tree->horizontalb[2] = av1_alloc_pmc(cm, bsize2, &td->shared_coeff_buf);
+
+    pc_tree->horizontalb[0]->rd_mode_is_ready = 0;
+    pc_tree->horizontalb[1]->rd_mode_is_ready = 0;
+    pc_tree->horizontalb[2]->rd_mode_is_ready = 0;
     if (horz_ctx_is_ready) {
-      av1_copy_tree_context(&pc_tree->horizontalb[0], &pc_tree->horizontal[0]);
-      pc_tree->horizontalb[0].mic.partition = PARTITION_HORZ_B;
-      pc_tree->horizontalb[0].rd_mode_is_ready = 1;
+      av1_copy_tree_context(pc_tree->horizontalb[0], pc_tree->horizontal[0]);
+      pc_tree->horizontalb[0]->mic.partition = PARTITION_HORZ_B;
+      pc_tree->horizontalb[0]->rd_mode_is_ready = 1;
     }
 #if CONFIG_COLLECT_PARTITION_STATS
     {
@@ -3553,13 +3630,18 @@
   if (!terminate_partition_search && partition_vert_allowed &&
       verta_partition_allowed && !is_gt_max_sq_part) {
     subsize = get_partition_subsize(bsize, PARTITION_VERT_A);
-    pc_tree->verticala[0].rd_mode_is_ready = 0;
-    pc_tree->verticala[1].rd_mode_is_ready = 0;
-    pc_tree->verticala[2].rd_mode_is_ready = 0;
+
+    pc_tree->verticala[0] = av1_alloc_pmc(cm, bsize2, &td->shared_coeff_buf);
+    pc_tree->verticala[1] = av1_alloc_pmc(cm, bsize2, &td->shared_coeff_buf);
+    pc_tree->verticala[2] = av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
+
+    pc_tree->verticala[0]->rd_mode_is_ready = 0;
+    pc_tree->verticala[1]->rd_mode_is_ready = 0;
+    pc_tree->verticala[2]->rd_mode_is_ready = 0;
     if (split_ctx_is_ready[0]) {
-      av1_copy_tree_context(&pc_tree->verticala[0], &pc_tree->split[0]->none);
-      pc_tree->verticala[0].mic.partition = PARTITION_VERT_A;
-      pc_tree->verticala[0].rd_mode_is_ready = 1;
+      av1_copy_tree_context(pc_tree->verticala[0], pc_tree->split[0]->none);
+      pc_tree->verticala[0]->mic.partition = PARTITION_VERT_A;
+      pc_tree->verticala[0]->rd_mode_is_ready = 1;
     }
 #if CONFIG_COLLECT_PARTITION_STATS
     {
@@ -3600,13 +3682,18 @@
   if (!terminate_partition_search && partition_vert_allowed &&
       vertb_partition_allowed && !is_gt_max_sq_part) {
     subsize = get_partition_subsize(bsize, PARTITION_VERT_B);
-    pc_tree->verticalb[0].rd_mode_is_ready = 0;
-    pc_tree->verticalb[1].rd_mode_is_ready = 0;
-    pc_tree->verticalb[2].rd_mode_is_ready = 0;
+
+    pc_tree->verticalb[0] = av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
+    pc_tree->verticalb[1] = av1_alloc_pmc(cm, bsize2, &td->shared_coeff_buf);
+    pc_tree->verticalb[2] = av1_alloc_pmc(cm, bsize2, &td->shared_coeff_buf);
+
+    pc_tree->verticalb[0]->rd_mode_is_ready = 0;
+    pc_tree->verticalb[1]->rd_mode_is_ready = 0;
+    pc_tree->verticalb[2]->rd_mode_is_ready = 0;
     if (vert_ctx_is_ready) {
-      av1_copy_tree_context(&pc_tree->verticalb[0], &pc_tree->vertical[0]);
-      pc_tree->verticalb[0].mic.partition = PARTITION_VERT_B;
-      pc_tree->verticalb[0].rd_mode_is_ready = 1;
+      av1_copy_tree_context(pc_tree->verticalb[0], pc_tree->vertical[0]);
+      pc_tree->verticalb[0]->mic.partition = PARTITION_VERT_B;
+      pc_tree->verticalb[0]->rd_mode_is_ready = 1;
     }
 #if CONFIG_COLLECT_PARTITION_STATS
     {
@@ -3713,6 +3800,11 @@
     sum_rdc.rate = partition_cost[PARTITION_HORZ_4];
     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
 
+    for (int i = 0; i < 4; ++i) {
+      pc_tree->horizontal4[i] =
+          av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
+    }
+
 #if CONFIG_COLLECT_PARTITION_STATS
     if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
       partition_attempts[PARTITION_HORZ_4] += 1;
@@ -3725,7 +3817,7 @@
 
       if (i > 0 && this_mi_row >= mi_params->mi_rows) break;
 
-      PICK_MODE_CONTEXT *ctx_this = &pc_tree->horizontal4[i];
+      PICK_MODE_CONTEXT *ctx_this = pc_tree->horizontal4[i];
 
       ctx_this->rd_mode_is_ready = 0;
       if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), this_mi_row,
@@ -3769,6 +3861,9 @@
     sum_rdc.rate = partition_cost[PARTITION_VERT_4];
     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
 
+    for (int i = 0; i < 4; ++i)
+      pc_tree->vertical4[i] = av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf);
+
 #if CONFIG_COLLECT_PARTITION_STATS
     if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
       partition_attempts[PARTITION_VERT_4] += 1;
@@ -3781,7 +3876,7 @@
 
       if (i > 0 && this_mi_col >= mi_params->mi_cols) break;
 
-      PICK_MODE_CONTEXT *ctx_this = &pc_tree->vertical4[i];
+      PICK_MODE_CONTEXT *ctx_this = pc_tree->vertical4[i];
 
       ctx_this->rd_mode_is_ready = 0;
       if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), mi_row,
@@ -3811,6 +3906,7 @@
     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
   }
 
+  sms_tree->partitioning = pc_tree->partitioning;
   if (bsize == cm->seq_params.sb_size && !found_best_partition) {
     // Did not find a valid partition, go back and search again, with less
     // constraint on which partition types to search.
@@ -3861,6 +3957,8 @@
   }
 #endif
 
+  sms_tree->partitioning = pc_tree->partitioning;
+  int pc_tree_dealloc = 0;
   if (found_best_partition && pc_tree->index != 3) {
     if (bsize == cm->seq_params.sb_size) {
       const int emit_output = multi_pass_mode != SB_DRY_PASS;
@@ -3869,12 +3967,17 @@
       x->cb_offset = 0;
       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, run_type, bsize,
                 pc_tree, NULL);
+      av1_free_pc_tree_recursive(pc_tree, num_planes, 0, 0);
+      pc_tree_dealloc = 1;
     } else {
       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
                 pc_tree, NULL);
     }
   }
 
+  if (pc_tree_dealloc == 0)
+    av1_free_pc_tree_recursive(pc_tree, num_planes, 1, 1);
+
   if (bsize == cm->seq_params.sb_size) {
     assert(best_rdc.rate < INT_MAX);
     assert(best_rdc.dist < INT64_MAX);
@@ -4427,8 +4530,7 @@
 }
 
 static AOM_INLINE void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
-                                       TileDataEnc *tile_data,
-                                       PC_TREE *const pc_root, TOKENEXTRA **tp,
+                                       TileDataEnc *tile_data, TOKENEXTRA **tp,
                                        const int mi_row, const int mi_col,
                                        const int seg_skip) {
   AV1_COMMON *const cm = &cpi->common;
@@ -4462,8 +4564,11 @@
          cpi->partition_search_skippable_frame ||
          sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
   td->mb.cb_offset = 0;
+
+  PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
   nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
                       pc_root);
+  av1_free_pc_tree_recursive(pc_root, av1_num_planes(cm), 0, 0);
 }
 
 // Memset the mbmis at the current superblock to 0
@@ -4667,8 +4772,8 @@
 // This function initializes the stats for encode_rd_sb.
 static INLINE void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
                                      const TileDataEnc *tile_data,
-                                     PC_TREE *pc_root, RD_STATS *rd_cost,
-                                     int mi_row, int mi_col,
+                                     SIMPLE_MOTION_DATA_TREE *sms_root,
+                                     RD_STATS *rd_cost, int mi_row, int mi_col,
                                      int gather_tpl_data) {
   const AV1_COMMON *cm = &cpi->common;
   const TileInfo *tile_info = &tile_data->tile_info;
@@ -4682,7 +4787,7 @@
        sf->part_sf.ml_early_term_after_part_split_level) &&
       !frame_is_intra_only(cm);
   if (use_simple_motion_search) {
-    init_simple_motion_search_mvs(pc_root);
+    init_simple_motion_search_mvs(sms_root);
   }
 
 #if !CONFIG_REALTIME_ONLY
@@ -4715,8 +4820,7 @@
 }
 
 static AOM_INLINE void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
-                                    TileDataEnc *tile_data,
-                                    PC_TREE *const pc_root, TOKENEXTRA **tp,
+                                    TileDataEnc *tile_data, TOKENEXTRA **tp,
                                     const int mi_row, const int mi_col,
                                     const int seg_skip) {
   AV1_COMMON *const cm = &cpi->common;
@@ -4726,6 +4830,7 @@
   MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
                       get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
   const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
+  const int num_planes = av1_num_planes(cm);
   int dummy_rate;
   int64_t dummy_dist;
   RD_STATS dummy_rdc;
@@ -4734,13 +4839,17 @@
   (void)seg_skip;
 #endif  // CONFIG_REALTIME_ONLY
 
-  init_encode_rd_sb(cpi, td, tile_data, pc_root, &dummy_rdc, mi_row, mi_col, 1);
+  SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
+  init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
+                    1);
 
   if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
     set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, sb_size);
     av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
+    PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
     rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
                      &dummy_rate, &dummy_dist, 1, pc_root);
+    av1_free_pc_tree_recursive(pc_root, num_planes, 0, 0);
   }
 #if !CONFIG_REALTIME_ONLY
   else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
@@ -4748,22 +4857,26 @@
     const BLOCK_SIZE bsize =
         seg_skip ? sb_size : sf->part_sf.always_this_block_size;
     set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
+    PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
     rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
                      &dummy_rate, &dummy_dist, 1, pc_root);
+    av1_free_pc_tree_recursive(pc_root, num_planes, 0, 0);
   } else if (cpi->partition_search_skippable_frame) {
     set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
     const BLOCK_SIZE bsize =
         get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
     set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
+    PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
     rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
                      &dummy_rate, &dummy_dist, 1, pc_root);
+    av1_free_pc_tree_recursive(pc_root, num_planes, 0, 0);
   } else {
     // No stats for overlay frames. Exclude key frame.
     x->valid_cost_b =
         get_tpl_stats_b(cpi, sb_size, mi_row, mi_col, x->intra_cost_b,
                         x->inter_cost_b, x->mv_b, &x->cost_stride);
 
-    reset_partition(pc_root, sb_size);
+    reset_simple_motion_tree_partition(sms_root, sb_size);
 
 #if CONFIG_COLLECT_COMPONENT_TIMING
     start_timing(cpi, rd_pick_partition_time);
@@ -4783,28 +4896,31 @@
     const int num_passes = cpi->oxcf.sb_multipass_unit_test ? 2 : 1;
 
     if (num_passes == 1) {
+      PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
                         max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc,
-                        pc_root, NULL, SB_SINGLE_PASS, NULL);
+                        pc_root, sms_root, NULL, SB_SINGLE_PASS, NULL);
     } else {
       // First pass
       SB_FIRST_PASS_STATS sb_fp_stats;
       backup_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col);
+      PC_TREE *const pc_root_p0 = av1_alloc_pc_tree_node(sb_size);
       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
                         max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc,
-                        pc_root, NULL, SB_DRY_PASS, NULL);
+                        pc_root_p0, sms_root, NULL, SB_DRY_PASS, NULL);
 
       // Second pass
-      init_encode_rd_sb(cpi, td, tile_data, pc_root, &dummy_rdc, mi_row, mi_col,
-                        0);
+      init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
+                        mi_col, 0);
       reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
-      reset_partition(pc_root, sb_size);
+      reset_simple_motion_tree_partition(sms_root, sb_size);
 
       restore_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col);
 
+      PC_TREE *const pc_root_p1 = av1_alloc_pc_tree_node(sb_size);
       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
                         max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc,
-                        pc_root, NULL, SB_WET_PASS, NULL);
+                        pc_root_p1, sms_root, NULL, SB_WET_PASS, NULL);
     }
     // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
     x->valid_cost_b = 0;
@@ -4935,9 +5051,6 @@
     x->color_sensitivity[1] = 0;
     x->content_state_sb = 0;
 
-    PC_TREE *const pc_root = td->pc_root;
-    pc_root->index = 0;
-
     xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
     td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
     x->source_variance = UINT_MAX;
@@ -4955,10 +5068,9 @@
     }
 
     if (use_nonrd_mode) {
-      encode_nonrd_sb(cpi, td, tile_data, pc_root, tp, mi_row, mi_col,
-                      seg_skip);
+      encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
     } else {
-      encode_rd_sb(cpi, td, tile_data, pc_root, tp, mi_row, mi_col, seg_skip);
+      encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
     }
 
     if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 9ec8e60..a174a58 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -804,7 +804,6 @@
 
 static void dealloc_compressor_data(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
 
   dealloc_context_buffers_ext(&cpi->mbmi_ext_info);
 
@@ -884,7 +883,8 @@
   aom_free(cpi->tplist[0][0]);
   cpi->tplist[0][0] = NULL;
 
-  av1_free_pc_tree(cpi, &cpi->td, num_planes, cm->seq_params.sb_size);
+  av1_free_shared_coeff_buffer(&cpi->td.shared_coeff_buf);
+  av1_free_sms_tree(&cpi->td);
 
   aom_free(cpi->td.mb.palette_buffer);
   av1_release_compound_type_rd_buffers(&cpi->td.mb.comp_rd_buffer);
@@ -1140,7 +1140,8 @@
                                sizeof(*cpi->tplist[0][0])));
   }
 
-  av1_setup_pc_tree(cpi, &cpi->td);
+  av1_setup_shared_coeff_buffer(&cpi->common, &cpi->td.shared_coeff_buf);
+  av1_setup_sms_tree(cpi, &cpi->td);
 }
 
 void av1_new_framerate(AV1_COMP *cpi, double framerate) {
@@ -2781,7 +2782,6 @@
 void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
   AV1_COMMON *const cm = &cpi->common;
   SequenceHeader *const seq_params = &cm->seq_params;
-  const int num_planes = av1_num_planes(cm);
   RATE_CONTROL *const rc = &cpi->rc;
   MACROBLOCK *const x = &cpi->td.mb;
   AV1LevelParams *const level_params = &cpi->level_params;
@@ -2939,7 +2939,8 @@
     if (cm->width > cpi->initial_width || cm->height > cpi->initial_height ||
         seq_params->sb_size != sb_size) {
       av1_free_context_buffers(cm);
-      av1_free_pc_tree(cpi, &cpi->td, num_planes, (BLOCK_SIZE)sb_size);
+      av1_free_shared_coeff_buffer(&cpi->td.shared_coeff_buf);
+      av1_free_sms_tree(&cpi->td);
       alloc_compressor_data(cpi);
       realloc_segmentation_maps(cpi);
       cpi->initial_width = cpi->initial_height = 0;
@@ -3500,7 +3501,6 @@
   if (!cpi) return;
 
   cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
 
   if (cm->current_frame.frame_number > 0) {
 #if CONFIG_ENTROPY_STATS
@@ -3632,9 +3632,9 @@
       }
       aom_free(thread_data->td->mask_buf);
       aom_free(thread_data->td->counts);
-      av1_free_pc_tree(cpi, thread_data->td, num_planes,
-                       cm->seq_params.sb_size);
       aom_free(thread_data->td->mbmi_ext);
+      av1_free_shared_coeff_buffer(&thread_data->td->shared_coeff_buf);
+      av1_free_sms_tree(thread_data->td);
       aom_free(thread_data->td);
     }
   }
@@ -4336,7 +4336,6 @@
 // Returns 1 if the assigned width or height was <= 0.
 int av1_set_size_literal(AV1_COMP *cpi, int width, int height) {
   AV1_COMMON *cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
   av1_check_initial_width(cpi, cm->seq_params.use_highbitdepth,
                           cm->seq_params.subsampling_x,
                           cm->seq_params.subsampling_y);
@@ -4349,7 +4348,8 @@
   if (cpi->initial_width && cpi->initial_height &&
       (cm->width > cpi->initial_width || cm->height > cpi->initial_height)) {
     av1_free_context_buffers(cm);
-    av1_free_pc_tree(cpi, &cpi->td, num_planes, cm->seq_params.sb_size);
+    av1_free_shared_coeff_buffer(&cpi->td.shared_coeff_buf);
+    av1_free_sms_tree(&cpi->td);
     alloc_compressor_data(cpi);
     realloc_segmentation_maps(cpi);
     cpi->initial_width = cpi->initial_height = 0;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 9b41653..88f024f 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -724,11 +724,9 @@
   MACROBLOCK mb;
   RD_COUNTS rd_counts;
   FRAME_COUNTS *counts;
-  PC_TREE *pc_tree;
-  PC_TREE *pc_root;
-  tran_low_t *tree_coeff_buf[MAX_MB_PLANE];
-  tran_low_t *tree_qcoeff_buf[MAX_MB_PLANE];
-  tran_low_t *tree_dqcoeff_buf[MAX_MB_PLANE];
+  PC_TREE_SHARED_BUFFERS shared_coeff_buf;
+  SIMPLE_MOTION_DATA_TREE *sms_tree;
+  SIMPLE_MOTION_DATA_TREE *sms_root;
   InterModesInfo *inter_modes_info;
   uint32_t *hash_value_buffer[2][2];
   int32_t *wsrc_buf;
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 693270b..406ae61 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -424,9 +424,9 @@
                       aom_memalign(32, sizeof(*thread_data->td)));
       av1_zero(*thread_data->td);
 
-      // Set up pc_tree.
-      thread_data->td->pc_tree = NULL;
-      av1_setup_pc_tree(cpi, thread_data->td);
+      // Set up sms_tree.
+      av1_setup_sms_tree(cpi, thread_data->td);
+      av1_setup_shared_coeff_buffer(cm, &thread_data->td->shared_coeff_buf);
 
       CHECK_MEM_ERROR(cm, thread_data->td->above_pred_buf,
                       (uint8_t *)aom_memalign(
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 0955510..3bc5162 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -854,7 +854,8 @@
   const SequenceHeader *const seq_params = &cm->seq_params;
   const int num_planes = av1_num_planes(cm);
   MACROBLOCKD *const xd = &x->e_mbd;
-  const PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none;
+  PICK_MODE_CONTEXT *ctx =
+      av1_alloc_pmc(cm, BLOCK_16X16, &cpi->td.shared_coeff_buf);
   MV last_mv = kZeroMv;
   const int qindex = find_fp_qindex(seq_params->bit_depth);
   // Detect if the key frame is screen content type.
@@ -1000,6 +1001,7 @@
     x->plane[2].src.buf += uv_mb_height * x->plane[1].src.stride -
                            uv_mb_height * mi_params->mb_cols;
   }
+  av1_free_pmc(ctx, num_planes);
   const double raw_err_stdev =
       raw_motion_error_stdev(raw_motion_err_list, raw_motion_err_counts);
   aom_free(raw_motion_err_list);
diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c
index cc820ba..96992d8 100644
--- a/av1/encoder/partition_strategy.c
+++ b/av1/encoder/partition_strategy.c
@@ -31,8 +31,9 @@
 
 #if !CONFIG_REALTIME_ONLY
 static AOM_INLINE void simple_motion_search_prune_part_features(
-    AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row,
-    int mi_col, BLOCK_SIZE bsize, float *features, int features_to_get);
+    AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
+    int mi_row, int mi_col, BLOCK_SIZE bsize, float *features,
+    int features_to_get);
 #endif
 
 static INLINE int convert_bsize_to_idx(BLOCK_SIZE bsize) {
@@ -249,8 +250,8 @@
 }
 
 void av1_simple_motion_search_based_split(
-    AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row,
-    int mi_col, BLOCK_SIZE bsize, int *partition_none_allowed,
+    AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
+    int mi_row, int mi_col, BLOCK_SIZE bsize, int *partition_none_allowed,
     int *partition_horz_allowed, int *partition_vert_allowed,
     int *do_rectangular_split, int *do_square_split) {
   aom_clear_system_state();
@@ -277,7 +278,7 @@
       av1_simple_motion_search_no_split_thresh[agg][res_idx][bsize_idx];
 
   float features[FEATURE_SIZE_SMS_SPLIT] = { 0.0f };
-  simple_motion_search_prune_part_features(cpi, x, pc_tree, mi_row, mi_col,
+  simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col,
                                            bsize, features,
                                            FEATURE_SMS_SPLIT_MODEL_FLAG);
   for (int idx = 0; idx < FEATURE_SIZE_SMS_SPLIT; idx++) {
@@ -306,12 +307,12 @@
 // the refs and returns the ref with the smallest sse. Returns -1 if none of the
 // ref in the list is available. Also stores the best sse and var in best_sse,
 // best_var, respectively. If save_mv is 0, don't update mv_ref_fulls in
-// pc_tree. If save_mv is 1, update mv_ref_fulls under pc_tree and the
+// sms_tree. If save_mv is 1, update mv_ref_fulls under sms_tree and the
 // subtrees.
 static int simple_motion_search_get_best_ref(
-    AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row,
-    int mi_col, BLOCK_SIZE bsize, const int *const refs, int num_refs,
-    int use_subpixel, int save_mv, unsigned int *best_sse,
+    AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
+    int mi_row, int mi_col, BLOCK_SIZE bsize, const int *const refs,
+    int num_refs, int use_subpixel, int save_mv, unsigned int *best_sse,
     unsigned int *best_var) {
   const AV1_COMMON *const cm = &cpi->common;
   int best_ref = -1;
@@ -336,7 +337,7 @@
     const int ref = refs[ref_idx];
 
     if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref]) {
-      const FULLPEL_MV *start_mvs = pc_tree->start_mvs;
+      const FULLPEL_MV *start_mvs = sms_tree->start_mvs;
       unsigned int curr_sse = 0, curr_var = 0;
       int_mv best_mv =
           av1_simple_motion_search(cpi, x, mi_row, mi_col, bsize, ref,
@@ -351,14 +352,14 @@
       }
 
       if (save_mv) {
-        pc_tree->start_mvs[ref].row = best_mv.as_mv.row / 8;
-        pc_tree->start_mvs[ref].col = best_mv.as_mv.col / 8;
+        sms_tree->start_mvs[ref].row = best_mv.as_mv.row / 8;
+        sms_tree->start_mvs[ref].col = best_mv.as_mv.col / 8;
 
         if (bsize >= BLOCK_8X8) {
           for (int r_idx = 0; r_idx < 4; r_idx++) {
             // Propagate the new motion vectors to a lower level
-            PC_TREE *sub_tree = pc_tree->split[r_idx];
-            sub_tree->start_mvs[ref] = pc_tree->start_mvs[ref];
+            SIMPLE_MOTION_DATA_TREE *sub_tree = sms_tree->split[r_idx];
+            sub_tree->start_mvs[ref] = sms_tree->start_mvs[ref];
           }
         }
       }
@@ -369,10 +370,10 @@
 }
 
 // Collects features using simple_motion_search and store them in features. The
-// features are also cached in PC_TREE. By default, the features collected are
-// the sse and var from the subblocks flagged by features_to_get. Furthermore,
-// if features is not NULL, then 7 more features are appended to the end of
-// features:
+// features are also cached in SIMPLE_MOTION_DATA_TREE. By default, the features
+// collected are the sse and var from the subblocks flagged by features_to_get.
+// Furthermore, if features is not NULL, then 7 more features are appended to
+// the end of features:
 //  - log(1.0 + dc_q ** 2)
 //  - whether an above macroblock exists
 //  - width of above macroblock
@@ -381,8 +382,9 @@
 //  - width of left macroblock
 //  - height of left macroblock
 static AOM_INLINE void simple_motion_search_prune_part_features(
-    AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row,
-    int mi_col, BLOCK_SIZE bsize, float *features, int features_to_get) {
+    AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
+    int mi_row, int mi_col, BLOCK_SIZE bsize, float *features,
+    int features_to_get) {
   const int w_mi = mi_size_wide[bsize];
   const int h_mi = mi_size_high[bsize];
   assert(mi_size_wide[bsize] == mi_size_high[bsize]);
@@ -396,12 +398,12 @@
   const int use_subpixel = 1;
 
   // Doing whole block first to update the mv
-  if (!pc_tree->sms_none_valid && features_to_get & FEATURE_SMS_NONE_FLAG) {
-    simple_motion_search_get_best_ref(cpi, x, pc_tree, mi_row, mi_col, bsize,
+  if (!sms_tree->sms_none_valid && features_to_get & FEATURE_SMS_NONE_FLAG) {
+    simple_motion_search_get_best_ref(cpi, x, sms_tree, mi_row, mi_col, bsize,
                                       ref_list, num_refs, use_subpixel, 1,
-                                      &pc_tree->sms_none_feat[0],
-                                      &pc_tree->sms_none_feat[1]);
-    pc_tree->sms_none_valid = 1;
+                                      &sms_tree->sms_none_feat[0],
+                                      &sms_tree->sms_none_feat[1]);
+    sms_tree->sms_none_valid = 1;
   }
 
   // Split subblocks
@@ -410,7 +412,7 @@
     for (int r_idx = 0; r_idx < 4; r_idx++) {
       const int sub_mi_col = mi_col + (r_idx & 1) * w_mi / 2;
       const int sub_mi_row = mi_row + (r_idx >> 1) * h_mi / 2;
-      PC_TREE *sub_tree = pc_tree->split[r_idx];
+      SIMPLE_MOTION_DATA_TREE *sub_tree = sms_tree->split[r_idx];
 
       if (!sub_tree->sms_none_valid) {
         simple_motion_search_get_best_ref(
@@ -423,7 +425,7 @@
   }
 
   // Rectangular subblocks
-  if (!pc_tree->sms_rect_valid && features_to_get & FEATURE_SMS_RECT_FLAG) {
+  if (!sms_tree->sms_rect_valid && features_to_get & FEATURE_SMS_RECT_FLAG) {
     // Horz subblock
     BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
     for (int r_idx = 0; r_idx < 2; r_idx++) {
@@ -431,9 +433,9 @@
       const int sub_mi_row = mi_row + r_idx * h_mi / 2;
 
       simple_motion_search_get_best_ref(
-          cpi, x, pc_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs,
-          use_subpixel, 0, &pc_tree->sms_rect_feat[2 * r_idx],
-          &pc_tree->sms_rect_feat[2 * r_idx + 1]);
+          cpi, x, sms_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs,
+          use_subpixel, 0, &sms_tree->sms_rect_feat[2 * r_idx],
+          &sms_tree->sms_rect_feat[2 * r_idx + 1]);
     }
 
     // Vert subblock
@@ -443,11 +445,11 @@
       const int sub_mi_row = mi_row + 0;
 
       simple_motion_search_get_best_ref(
-          cpi, x, pc_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs,
-          use_subpixel, 0, &pc_tree->sms_rect_feat[4 + 2 * r_idx],
-          &pc_tree->sms_rect_feat[4 + 2 * r_idx + 1]);
+          cpi, x, sms_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs,
+          use_subpixel, 0, &sms_tree->sms_rect_feat[4 + 2 * r_idx],
+          &sms_tree->sms_rect_feat[4 + 2 * r_idx + 1]);
     }
-    pc_tree->sms_rect_valid = 1;
+    sms_tree->sms_rect_valid = 1;
   }
 
   if (!features) return;
@@ -456,13 +458,13 @@
   int f_idx = 0;
   if (features_to_get & FEATURE_SMS_NONE_FLAG) {
     for (int sub_idx = 0; sub_idx < 2; sub_idx++) {
-      features[f_idx++] = logf(1.0f + pc_tree->sms_none_feat[sub_idx]);
+      features[f_idx++] = logf(1.0f + sms_tree->sms_none_feat[sub_idx]);
     }
   }
 
   if (features_to_get & FEATURE_SMS_SPLIT_FLAG) {
     for (int sub_idx = 0; sub_idx < 4; sub_idx++) {
-      PC_TREE *sub_tree = pc_tree->split[sub_idx];
+      SIMPLE_MOTION_DATA_TREE *sub_tree = sms_tree->split[sub_idx];
       features[f_idx++] = logf(1.0f + sub_tree->sms_none_feat[0]);
       features[f_idx++] = logf(1.0f + sub_tree->sms_none_feat[1]);
     }
@@ -470,9 +472,10 @@
 
   if (features_to_get & FEATURE_SMS_RECT_FLAG) {
     for (int sub_idx = 0; sub_idx < 8; sub_idx++) {
-      features[f_idx++] = logf(1.0f + pc_tree->sms_rect_feat[sub_idx]);
+      features[f_idx++] = logf(1.0f + sms_tree->sms_rect_feat[sub_idx]);
     }
   }
+  aom_clear_system_state();
 
   const MACROBLOCKD *xd = &x->e_mbd;
   set_offsets_for_motion_search(cpi, x, mi_row, mi_col, bsize);
@@ -494,12 +497,10 @@
   features[f_idx++] = (float)mi_size_high_log2[left_bsize];
 }
 
-void av1_simple_motion_search_prune_rect(AV1_COMP *const cpi, MACROBLOCK *x,
-                                         PC_TREE *pc_tree, int mi_row,
-                                         int mi_col, BLOCK_SIZE bsize,
-                                         int *partition_horz_allowed,
-                                         int *partition_vert_allowed,
-                                         int *prune_horz, int *prune_vert) {
+void av1_simple_motion_search_prune_rect(
+    AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
+    int mi_row, int mi_col, BLOCK_SIZE bsize, int *partition_horz_allowed,
+    int *partition_vert_allowed, int *prune_horz, int *prune_vert) {
   aom_clear_system_state();
   const AV1_COMMON *const cm = &cpi->common;
   const int bsize_idx = convert_bsize_to_idx(bsize);
@@ -525,7 +526,7 @@
 
   // Get features
   float features[FEATURE_SIZE_SMS_PRUNE_PART] = { 0.0f };
-  simple_motion_search_prune_part_features(cpi, x, pc_tree, mi_row, mi_col,
+  simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col,
                                            bsize, features,
                                            FEATURE_SMS_PRUNE_PART_FLAG);
   for (int f_idx = 0; f_idx < FEATURE_SIZE_SMS_PRUNE_PART; f_idx++) {
@@ -560,16 +561,14 @@
 //  - The frame is not intra only
 //  - The current bsize is > BLOCK_8X8
 //  - blk_row + blk_height/2 < total_rows and blk_col + blk_width/2 < total_cols
-void av1_simple_motion_search_early_term_none(AV1_COMP *const cpi,
-                                              MACROBLOCK *x, PC_TREE *pc_tree,
-                                              int mi_row, int mi_col,
-                                              BLOCK_SIZE bsize,
-                                              const RD_STATS *none_rdc,
-                                              int *early_terminate) {
+void av1_simple_motion_search_early_term_none(
+    AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
+    int mi_row, int mi_col, BLOCK_SIZE bsize, const RD_STATS *none_rdc,
+    int *early_terminate) {
   // TODO(chiyotsai@google.com): There are other features we can extract from
   // PARTITION_NONE. Play with this later.
   float features[FEATURE_SIZE_SMS_TERM_NONE] = { 0.0f };
-  simple_motion_search_prune_part_features(cpi, x, pc_tree, mi_row, mi_col,
+  simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col,
                                            bsize, features,
                                            FEATURE_SMS_PRUNE_PART_FLAG);
   int f_idx = FEATURE_SIZE_SMS_PRUNE_PART;
@@ -774,24 +773,24 @@
 }
 
 // Get the minimum partition block width and height(in log scale) under a
-// PC_TREE.
-static AOM_INLINE void get_min_bsize(const PC_TREE *pc_tree, int *min_bw,
-                                     int *min_bh) {
-  if (!pc_tree) return;
+// SIMPLE_MOTION_DATA_TREE.
+static AOM_INLINE void get_min_bsize(const SIMPLE_MOTION_DATA_TREE *sms_tree,
+                                     int *min_bw, int *min_bh) {
+  if (!sms_tree) return;
 
-  const BLOCK_SIZE bsize = pc_tree->block_size;
+  const BLOCK_SIZE bsize = sms_tree->block_size;
   if (bsize == BLOCK_4X4) {
     *min_bw = 0;
     *min_bh = 0;
     return;
   }
 
-  PARTITION_TYPE part_type = pc_tree->partitioning;
+  PARTITION_TYPE part_type = sms_tree->partitioning;
   if (part_type == PARTITION_INVALID) return;
 
   if (part_type == PARTITION_SPLIT) {
     for (int i = 0; i < 4; ++i) {
-      get_min_bsize(pc_tree->split[i], min_bw, min_bh);
+      get_min_bsize(sms_tree->split[i], min_bw, min_bh);
     }
   } else {
     if (part_type == PARTITION_HORZ_A || part_type == PARTITION_HORZ_B ||
@@ -815,9 +814,9 @@
 
 #define FEATURES 31
 void av1_ml_early_term_after_split(AV1_COMP *const cpi, MACROBLOCK *const x,
-                                   PC_TREE *const pc_tree, BLOCK_SIZE bsize,
-                                   int64_t best_rd, int64_t part_none_rd,
-                                   int64_t part_split_rd,
+                                   SIMPLE_MOTION_DATA_TREE *const sms_tree,
+                                   BLOCK_SIZE bsize, int64_t best_rd,
+                                   int64_t part_none_rd, int64_t part_split_rd,
                                    int64_t *split_block_rd, int mi_row,
                                    int mi_col,
                                    int *const terminate_partition_search) {
@@ -874,26 +873,26 @@
     add_rd_feature(split_block_rd[i], best_rd, features, &f_idx);
     int min_bw = MAX_SB_SIZE_LOG2;
     int min_bh = MAX_SB_SIZE_LOG2;
-    get_min_bsize(pc_tree->split[i], &min_bw, &min_bh);
+    get_min_bsize(sms_tree->split[i], &min_bw, &min_bh);
     features[f_idx++] = (float)min_bw;
     features[f_idx++] = (float)min_bh;
   }
 
-  simple_motion_search_prune_part_features(cpi, x, pc_tree, mi_row, mi_col,
+  simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col,
                                            bsize, NULL,
                                            FEATURE_SMS_PRUNE_PART_FLAG);
 
-  features[f_idx++] = logf(1.0f + (float)pc_tree->sms_none_feat[1]);
+  features[f_idx++] = logf(1.0f + (float)sms_tree->sms_none_feat[1]);
 
-  features[f_idx++] = logf(1.0f + (float)pc_tree->split[0]->sms_none_feat[1]);
-  features[f_idx++] = logf(1.0f + (float)pc_tree->split[1]->sms_none_feat[1]);
-  features[f_idx++] = logf(1.0f + (float)pc_tree->split[2]->sms_none_feat[1]);
-  features[f_idx++] = logf(1.0f + (float)pc_tree->split[3]->sms_none_feat[1]);
+  features[f_idx++] = logf(1.0f + (float)sms_tree->split[0]->sms_none_feat[1]);
+  features[f_idx++] = logf(1.0f + (float)sms_tree->split[1]->sms_none_feat[1]);
+  features[f_idx++] = logf(1.0f + (float)sms_tree->split[2]->sms_none_feat[1]);
+  features[f_idx++] = logf(1.0f + (float)sms_tree->split[3]->sms_none_feat[1]);
 
-  features[f_idx++] = logf(1.0f + (float)pc_tree->sms_rect_feat[1]);
-  features[f_idx++] = logf(1.0f + (float)pc_tree->sms_rect_feat[3]);
-  features[f_idx++] = logf(1.0f + (float)pc_tree->sms_rect_feat[5]);
-  features[f_idx++] = logf(1.0f + (float)pc_tree->sms_rect_feat[7]);
+  features[f_idx++] = logf(1.0f + (float)sms_tree->sms_rect_feat[1]);
+  features[f_idx++] = logf(1.0f + (float)sms_tree->sms_rect_feat[3]);
+  features[f_idx++] = logf(1.0f + (float)sms_tree->sms_rect_feat[5]);
+  features[f_idx++] = logf(1.0f + (float)sms_tree->sms_rect_feat[7]);
 
   assert(f_idx == FEATURES);
 
diff --git a/av1/encoder/partition_strategy.h b/av1/encoder/partition_strategy.h
index f9b4d8b..c149ec8 100644
--- a/av1/encoder/partition_strategy.h
+++ b/av1/encoder/partition_strategy.h
@@ -45,20 +45,18 @@
 // the variance of residues. Then use the features to determine whether we want
 // to go straight to splitting without trying PARTITION_NONE
 void av1_simple_motion_search_based_split(
-    AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row,
-    int mi_col, BLOCK_SIZE bsize, int *partition_none_allowed,
+    AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
+    int mi_row, int mi_col, BLOCK_SIZE bsize, int *partition_none_allowed,
     int *partition_horz_allowed, int *partition_vert_allowed,
     int *do_rectangular_split, int *do_square_split);
 
 // Performs a simple_motion_search with two reference frames and extract
 // the variance of residues. Then use the features to determine whether we want
 // to prune some partitions.
-void av1_simple_motion_search_prune_rect(AV1_COMP *const cpi, MACROBLOCK *x,
-                                         PC_TREE *pc_tree, int mi_row,
-                                         int mi_col, BLOCK_SIZE bsize,
-                                         int *partition_horz_allowed,
-                                         int *partition_vert_allowed,
-                                         int *prune_horz, int *prune_vert);
+void av1_simple_motion_search_prune_rect(
+    AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
+    int mi_row, int mi_col, BLOCK_SIZE bsize, int *partition_horz_allowed,
+    int *partition_vert_allowed, int *prune_horz, int *prune_vert);
 
 #if !CONFIG_REALTIME_ONLY
 // Early terminates PARTITION_NONE using simple_motion_search features and the
@@ -67,12 +65,10 @@
 //  - The frame is not intra only
 //  - The current bsize is > BLOCK_8X8
 //  - blk_row + blk_height/2 < total_rows and blk_col + blk_width/2 < total_cols
-void av1_simple_motion_search_early_term_none(AV1_COMP *const cpi,
-                                              MACROBLOCK *x, PC_TREE *pc_tree,
-                                              int mi_row, int mi_col,
-                                              BLOCK_SIZE bsize,
-                                              const RD_STATS *none_rdc,
-                                              int *early_terminate);
+void av1_simple_motion_search_early_term_none(
+    AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
+    int mi_row, int mi_col, BLOCK_SIZE bsize, const RD_STATS *none_rdc,
+    int *early_terminate);
 
 // Get the features for selecting the max and min partition size. Currently this
 // performs simple_motion_search on 16X16 subblocks of the current superblock,
@@ -87,9 +83,9 @@
 
 // Attempts an early termination after PARTITION_SPLIT.
 void av1_ml_early_term_after_split(AV1_COMP *const cpi, MACROBLOCK *const x,
-                                   PC_TREE *const pc_tree, BLOCK_SIZE bsize,
-                                   int64_t best_rd, int64_t part_none_rd,
-                                   int64_t part_split_rd,
+                                   SIMPLE_MOTION_DATA_TREE *const sms_tree,
+                                   BLOCK_SIZE bsize, int64_t best_rd,
+                                   int64_t part_none_rd, int64_t part_split_rd,
                                    int64_t *split_block_rd, int mi_row,
                                    int mi_col,
                                    int *const terminate_partition_search);
@@ -176,19 +172,19 @@
   av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
 }
 
-static INLINE void init_simple_motion_search_mvs(PC_TREE *pc_tree) {
-  av1_zero(pc_tree->start_mvs);
+static INLINE void init_simple_motion_search_mvs(
+    SIMPLE_MOTION_DATA_TREE *sms_tree) {
+  av1_zero(sms_tree->start_mvs);
+  av1_zero(sms_tree->sms_none_feat);
+  av1_zero(sms_tree->sms_rect_feat);
+  av1_zero(sms_tree->sms_none_valid);
+  av1_zero(sms_tree->sms_rect_valid);
 
-  av1_zero(pc_tree->sms_none_feat);
-  av1_zero(pc_tree->sms_rect_feat);
-  av1_zero(pc_tree->sms_none_valid);
-  av1_zero(pc_tree->sms_rect_valid);
-
-  if (pc_tree->block_size >= BLOCK_8X8) {
-    init_simple_motion_search_mvs(pc_tree->split[0]);
-    init_simple_motion_search_mvs(pc_tree->split[1]);
-    init_simple_motion_search_mvs(pc_tree->split[2]);
-    init_simple_motion_search_mvs(pc_tree->split[3]);
+  if (sms_tree->block_size >= BLOCK_8X8) {
+    init_simple_motion_search_mvs(sms_tree->split[0]);
+    init_simple_motion_search_mvs(sms_tree->split[1]);
+    init_simple_motion_search_mvs(sms_tree->split[2]);
+    init_simple_motion_search_mvs(sms_tree->split[3]);
   }
 }