Merge "Add filter_intra experiment flag" into nextgenv2
diff --git a/aom_dsp/bitwriter.h b/aom_dsp/bitwriter.h
index d256932..4b083fa 100644
--- a/aom_dsp/bitwriter.h
+++ b/aom_dsp/bitwriter.h
@@ -107,19 +107,8 @@
   s.cum_prob = symb > 0 ? cdf[symb - 1] : 0;
   s.prob = cdf[symb] - s.cum_prob;
   buf_rans_write(w, &s);
-#else
-  (void)w;
-  (void)symb;
-  (void)cdf;
-  (void)nsymbs;
-  assert(0 && "Unsupported bitwriter operation");
-#endif
-}
-
-static INLINE void aom_write_tree_cdf(aom_writer *w, int symb,
-                                      const uint16_t *cdf, int nsymbs) {
-#if CONFIG_DAALA_EC
-  daala_write_tree_cdf(w, symb, cdf, nsymbs);
+#elif CONFIG_DAALA_EC
+  daala_write_symbol(w, symb, cdf, nsymbs);
 #else
   (void)w;
   (void)symb;
diff --git a/aom_dsp/daalaboolwriter.h b/aom_dsp/daalaboolwriter.h
index c71b12e..c80cd4f 100644
--- a/aom_dsp/daalaboolwriter.h
+++ b/aom_dsp/daalaboolwriter.h
@@ -78,8 +78,8 @@
   } while (len);
 }
 
-static INLINE void daala_write_tree_cdf(daala_writer *w, int symb,
-                                        const uint16_t *cdf, int nsymbs) {
+static INLINE void daala_write_symbol(daala_writer *w, int symb,
+                                      const uint16_t *cdf, int nsymbs) {
   od_ec_encode_cdf_q15(&w->ec, symb, cdf, nsymbs);
 }
 
diff --git a/aom_dsp/entcode.c b/aom_dsp/entcode.c
index 7563f9f..49284b0 100644
--- a/aom_dsp/entcode.c
+++ b/aom_dsp/entcode.c
@@ -30,21 +30,30 @@
 
 /*CDFs for uniform probability distributions of small sizes (2 through 16,
    inclusive).*/
+// clang-format off
 const uint16_t OD_UNIFORM_CDFS_Q15[135] = {
-  16384, 32768, 10923, 21845, 32768, 8192,  16384, 24576, 32768, 6554,  13107,
-  19661, 26214, 32768, 5461,  10923, 16384, 21845, 27307, 32768, 4681,  9362,
-  14043, 18725, 23406, 28087, 32768, 4096,  8192,  12288, 16384, 20480, 24576,
-  28672, 32768, 3641,  7282,  10923, 14564, 18204, 21845, 25486, 29127, 32768,
-  3277,  6554,  9830,  13107, 16384, 19661, 22938, 26214, 29491, 32768, 2979,
-  5958,  8937,  11916, 14895, 17873, 20852, 23831, 26810, 29789, 32768, 2731,
-  5461,  8192,  10923, 13653, 16384, 19115, 21845, 24576, 27307, 30037, 32768,
-  2521,  5041,  7562,  10082, 12603, 15124, 17644, 20165, 22686, 25206, 27727,
-  30247, 32768, 2341,  4681,  7022,  9362,  11703, 14043, 16384, 18725, 21065,
-  23406, 25746, 28087, 30427, 32768, 2185,  4369,  6554,  8738,  10923, 13107,
-  15292, 17476, 19661, 21845, 24030, 26214, 28399, 30583, 32768, 2048,  4096,
-  6144,  8192,  10240, 12288, 14336, 16384, 18432, 20480, 22528, 24576, 26624,
-  28672, 30720, 32768
+  16384, 32768,
+  10923, 21845, 32768,
+  8192,  16384, 24576, 32768,
+  6554,  13107, 19661, 26214, 32768,
+  5461,  10923, 16384, 21845, 27307, 32768,
+  4681,   9362, 14043, 18725, 23406, 28087, 32768,
+  4096,   8192, 12288, 16384, 20480, 24576, 28672, 32768,
+  3641,   7282, 10923, 14564, 18204, 21845, 25486, 29127, 32768,
+  3277,   6554,  9830, 13107, 16384, 19661, 22938, 26214, 29491, 32768,
+  2979,   5958,  8937, 11916, 14895, 17873, 20852, 23831, 26810, 29789, 32768,
+  2731,   5461,  8192, 10923, 13653, 16384, 19115, 21845, 24576, 27307, 30037,
+  32768,
+  2521,   5041,  7562, 10082, 12603, 15124, 17644, 20165, 22686, 25206, 27727,
+  30247, 32768,
+  2341,   4681,  7022,  9362, 11703, 14043, 16384, 18725, 21065, 23406, 25746,
+  28087, 30427, 32768,
+  2185,   4369,  6554,  8738, 10923, 13107, 15292, 17476, 19661, 21845, 24030,
+  26214, 28399, 30583, 32768,
+  2048,   4096,  6144,  8192, 10240, 12288, 14336, 16384, 18432, 20480, 22528,
+  24576, 26624, 28672, 30720, 32768
 };
+// clang-format on
 
 /*Given the current total integer number of bits used and the current value of
    rng, computes the fraction number of bits used to OD_BITRES precision.
diff --git a/aom_dsp/entdec.c b/aom_dsp/entdec.c
index a4e1d1d..880b885 100644
--- a/aom_dsp/entdec.c
+++ b/aom_dsp/entdec.c
@@ -518,7 +518,7 @@
   Return: The number of bits.
           This will always be slightly larger than the exact value (e.g., all
            rounding error is in the positive direction).*/
-int od_ec_dec_tell(od_ec_dec *dec) {
+int od_ec_dec_tell(const od_ec_dec *dec) {
   return ((dec->end - dec->eptr) + (dec->bptr - dec->buf)) * 8 - dec->cnt -
          dec->nend_bits + dec->tell_offs;
 }
@@ -529,6 +529,6 @@
   Return: The number of bits scaled by 2**OD_BITRES.
           This will always be slightly larger than the exact value (e.g., all
            rounding error is in the positive direction).*/
-uint32_t od_ec_dec_tell_frac(od_ec_dec *dec) {
+uint32_t od_ec_dec_tell_frac(const od_ec_dec *dec) {
   return od_ec_tell_frac(od_ec_dec_tell(dec), dec->rng);
 }
diff --git a/aom_dsp/entdec.h b/aom_dsp/entdec.h
index 3c8e210..2e4481c 100644
--- a/aom_dsp/entdec.h
+++ b/aom_dsp/entdec.h
@@ -133,8 +133,10 @@
                                                unsigned ftb OD_ACC_STR)
     OD_ARG_NONNULL(1);
 
-OD_WARN_UNUSED_RESULT int od_ec_dec_tell(od_ec_dec *dec) OD_ARG_NONNULL(1);
-uint32_t od_ec_dec_tell_frac(od_ec_dec *dec) OD_ARG_NONNULL(1);
+OD_WARN_UNUSED_RESULT int od_ec_dec_tell(const od_ec_dec *dec)
+    OD_ARG_NONNULL(1);
+OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_tell_frac(const od_ec_dec *dec)
+    OD_ARG_NONNULL(1);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/aom_dsp/entenc.c b/aom_dsp/entenc.c
index 019c6fb..3e9cb62 100644
--- a/aom_dsp/entenc.c
+++ b/aom_dsp/entenc.c
@@ -647,7 +647,7 @@
   Return: The number of bits.
           This will always be slightly larger than the exact value (e.g., all
            rounding error is in the positive direction).*/
-int od_ec_enc_tell(od_ec_enc *enc) {
+int od_ec_enc_tell(const od_ec_enc *enc) {
   /*The 10 here counteracts the offset of -9 baked into cnt, and adds 1 extra
      bit, which we reserve for terminating the stream.*/
   return (enc->offs + enc->end_offs) * 8 + enc->cnt + enc->nend_bits + 10;
@@ -662,7 +662,7 @@
   Return: The number of bits scaled by 2**OD_BITRES.
           This will always be slightly larger than the exact value (e.g., all
            rounding error is in the positive direction).*/
-uint32_t od_ec_enc_tell_frac(od_ec_enc *enc) {
+uint32_t od_ec_enc_tell_frac(const od_ec_enc *enc) {
   return od_ec_tell_frac(od_ec_enc_tell(enc), enc->rng);
 }
 
diff --git a/aom_dsp/entenc.h b/aom_dsp/entenc.h
index 396da2c..32163f7 100644
--- a/aom_dsp/entenc.h
+++ b/aom_dsp/entenc.h
@@ -101,8 +101,9 @@
                                                     uint32_t *nbytes)
     OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
 
-OD_WARN_UNUSED_RESULT int od_ec_enc_tell(od_ec_enc *enc) OD_ARG_NONNULL(1);
-OD_WARN_UNUSED_RESULT uint32_t od_ec_enc_tell_frac(od_ec_enc *enc)
+OD_WARN_UNUSED_RESULT int od_ec_enc_tell(const od_ec_enc *enc)
+    OD_ARG_NONNULL(1);
+OD_WARN_UNUSED_RESULT uint32_t od_ec_enc_tell_frac(const od_ec_enc *enc)
     OD_ARG_NONNULL(1);
 
 void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src);
diff --git a/aomenc.c b/aomenc.c
index 9ba9301..63ef753 100644
--- a/aomenc.c
+++ b/aomenc.c
@@ -198,6 +198,17 @@
 #if CONFIG_AOM_HIGHBITDEPTH
 static const arg_def_t test16bitinternalarg = ARG_DEF(
     NULL, "test-16bit-internal", 0, "Force use of 16 bit internal buffer");
+
+static const struct arg_enum_list bitdepth_enum[] = {
+  { "8", AOM_BITS_8 }, { "10", AOM_BITS_10 }, { "12", AOM_BITS_12 }, { NULL, 0 }
+};
+
+static const arg_def_t bitdeptharg = ARG_DEF_ENUM(
+    "b", "bit-depth", 1,
+    "Bit depth for codec (8 for version <=1, 10 or 12 for version 2)",
+    bitdepth_enum);
+static const arg_def_t inbitdeptharg =
+    ARG_DEF(NULL, "input-bit-depth", 1, "Bit depth of input");
 #endif
 
 static const arg_def_t *main_args[] = { &debugmode,
@@ -270,6 +281,7 @@
                                           &error_resilient,
 #if CONFIG_AOM_HIGHBITDEPTH
                                           &test16bitinternalarg,
+                                          &bitdeptharg,
 #endif
                                           &lag_in_frames,
                                           NULL };
@@ -414,19 +426,6 @@
     ARG_DEF_ENUM(NULL, "color-space", 1, "The color space of input content:",
                  color_space_enum);
 
-#if CONFIG_AOM_HIGHBITDEPTH
-static const struct arg_enum_list bitdepth_enum[] = {
-  { "8", AOM_BITS_8 }, { "10", AOM_BITS_10 }, { "12", AOM_BITS_12 }, { NULL, 0 }
-};
-
-static const arg_def_t bitdeptharg = ARG_DEF_ENUM(
-    "b", "bit-depth", 1,
-    "Bit depth for codec (8 for version <=1, 10 or 12 for version 2)",
-    bitdepth_enum);
-static const arg_def_t inbitdeptharg =
-    ARG_DEF(NULL, "input-bit-depth", 1, "Bit depth of input");
-#endif
-
 static const struct arg_enum_list tune_content_enum[] = {
   { "default", AOM_CONTENT_DEFAULT },
   { "screen", AOM_CONTENT_SCREEN },
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 84843d8..157f00f 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -1284,6 +1284,9 @@
 };
 /* clang-format on */
 
+int av1_ext_tx_ind[TX_TYPES];
+int av1_ext_tx_inv[TX_TYPES];
+
 static const aom_prob
     default_intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1] = {
       { { 240, 85, 128 }, { 4, 1, 248 }, { 4, 1, 8 }, { 4, 248, 128 } },
@@ -1380,8 +1383,23 @@
 #if CONFIG_LOOP_RESTORATION
   av1_copy(fc->switchable_restore_prob, default_switchable_restore_prob);
 #endif  // CONFIG_LOOP_RESTORATION
+#if CONFIG_DAALA_EC
+  av1_tree_to_cdf_1D(av1_switchable_interp_tree, fc->switchable_interp_prob,
+                     fc->switchable_interp_cdf, SWITCHABLE_FILTER_CONTEXTS);
+  av1_tree_to_cdf_2D(av1_ext_tx_tree, fc->intra_ext_tx_prob,
+                     fc->intra_ext_tx_cdf, EXT_TX_SIZES, TX_TYPES);
+  av1_tree_to_cdf_1D(av1_ext_tx_tree, fc->inter_ext_tx_prob,
+                     fc->inter_ext_tx_cdf, EXT_TX_SIZES);
+  av1_tree_to_cdf_1D(av1_partition_tree, fc->partition_prob, fc->partition_cdf,
+                     PARTITION_CONTEXTS);
+#endif
 }
 
+#if CONFIG_DAALA_EC
+int av1_switchable_interp_ind[SWITCHABLE_FILTERS];
+int av1_switchable_interp_inv[SWITCHABLE_FILTERS];
+#endif
+
 #if CONFIG_EXT_INTERP
 const aom_tree_index av1_switchable_interp_tree[TREE_SIZE(SWITCHABLE_FILTERS)] =
     {
@@ -1569,14 +1587,23 @@
   }
 #else
   for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
-    for (j = 0; j < TX_TYPES; ++j)
+    for (j = 0; j < TX_TYPES; ++j) {
       aom_tree_merge_probs(av1_ext_tx_tree, pre_fc->intra_ext_tx_prob[i][j],
                            counts->intra_ext_tx[i][j],
                            fc->intra_ext_tx_prob[i][j]);
+#if CONFIG_DAALA_EC
+      av1_tree_to_cdf(av1_ext_tx_tree, fc->intra_ext_tx_prob[i][j],
+                      fc->intra_ext_tx_cdf[i][j]);
+#endif
+    }
   }
   for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
     aom_tree_merge_probs(av1_ext_tx_tree, pre_fc->inter_ext_tx_prob[i],
                          counts->inter_ext_tx[i], fc->inter_ext_tx_prob[i]);
+#if CONFIG_DAALA_EC
+    av1_tree_to_cdf(av1_ext_tx_tree, fc->inter_ext_tx_prob[i],
+                    fc->inter_ext_tx_cdf[i]);
+#endif
   }
 #endif  // CONFIG_EXT_TX
 
@@ -1603,9 +1630,14 @@
     aom_tree_merge_probs(av1_ext_partition_tree, pre_fc->partition_prob[i],
                          counts->partition[i], fc->partition_prob[i]);
 #else
-  for (i = 0; i < PARTITION_CONTEXTS; i++)
+  for (i = 0; i < PARTITION_CONTEXTS; i++) {
     aom_tree_merge_probs(av1_partition_tree, pre_fc->partition_prob[i],
                          counts->partition[i], fc->partition_prob[i]);
+#if CONFIG_DAALA_EC
+    av1_tree_to_cdf(av1_partition_tree, fc->partition_prob[i],
+                    fc->partition_cdf[i]);
+#endif
+  }
 #endif  // CONFIG_EXT_PARTITION_TYPES
 
 #if CONFIG_EXT_INTRA
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index 1caf319..77c73bf 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -55,6 +55,9 @@
 #else
   aom_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1];
 #endif
+#if CONFIG_DAALA_EC
+  uint16_t partition_cdf[PARTITION_CONTEXTS][PARTITION_TYPES];
+#endif
   av1_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES];
 #if CONFIG_ANS || CONFIG_DAALA_EC
   coeff_cdf_model coef_cdfs[TX_SIZES][PLANE_TYPES];
@@ -131,6 +134,12 @@
 #if CONFIG_LOOP_RESTORATION
   aom_prob switchable_restore_prob[RESTORE_SWITCHABLE_TYPES - 1];
 #endif  // CONFIG_LOOP_RESTORATION
+#if CONFIG_DAALA_EC
+  uint16_t switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS]
+                                [SWITCHABLE_FILTERS];
+  uint16_t intra_ext_tx_cdf[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
+  uint16_t inter_ext_tx_cdf[EXT_TX_SIZES][TX_TYPES];
+#endif
 } FRAME_CONTEXT;
 
 typedef struct FRAME_COUNTS {
@@ -278,11 +287,19 @@
 extern const aom_tree_index
     av1_switchable_restore_tree[TREE_SIZE(RESTORE_SWITCHABLE_TYPES)];
 #endif  // CONFIG_LOOP_RESTORATION
+#if CONFIG_DAALA_EC
+extern int av1_switchable_interp_ind[SWITCHABLE_FILTERS];
+extern int av1_switchable_interp_inv[SWITCHABLE_FILTERS];
+#endif
 
 void av1_setup_past_independence(struct AV1Common *cm);
 
 void av1_adapt_intra_frame_probs(struct AV1Common *cm);
 void av1_adapt_inter_frame_probs(struct AV1Common *cm);
+#if CONFIG_DAALA_EC
+extern int av1_ext_tx_ind[TX_TYPES];
+extern int av1_ext_tx_inv[TX_TYPES];
+#endif
 
 static INLINE int av1_ceil_log2(int n) {
   int i = 1, p = 2;
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 000757f..bf91a17 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -107,9 +107,14 @@
 
 static void read_switchable_interp_probs(FRAME_CONTEXT *fc, aom_reader *r) {
   int i, j;
-  for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
+  for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) {
     for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i)
       av1_diff_update_prob(r, &fc->switchable_interp_prob[j][i]);
+#if CONFIG_DAALA_EC
+    av1_tree_to_cdf(av1_switchable_interp_tree, fc->switchable_interp_prob[j],
+                    fc->switchable_interp_cdf[j]);
+#endif
+  }
 }
 
 static void read_inter_mode_probs(FRAME_CONTEXT *fc, aom_reader *r) {
@@ -1414,7 +1419,12 @@
     else
       p = (PARTITION_TYPE)aom_read_tree(r, av1_ext_partition_tree, probs);
 #else
+#if CONFIG_DAALA_EC
+    p = (PARTITION_TYPE)aom_read_tree_cdf(r, cm->fc->partition_cdf[ctx],
+                                          PARTITION_TYPES);
+#else
     p = (PARTITION_TYPE)aom_read_tree(r, av1_partition_tree, probs);
+#endif
 #endif  // CONFIG_EXT_PARTITION_TYPES
   else if (!has_rows && has_cols)
     p = aom_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ;
@@ -3514,15 +3524,24 @@
   int i, j, k;
   if (aom_read(r, GROUP_DIFF_UPDATE_PROB)) {
     for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
-      for (j = 0; j < TX_TYPES; ++j)
+      for (j = 0; j < TX_TYPES; ++j) {
         for (k = 0; k < TX_TYPES - 1; ++k)
           av1_diff_update_prob(r, &fc->intra_ext_tx_prob[i][j][k]);
+#if CONFIG_DAALA_EC
+        av1_tree_to_cdf(av1_ext_tx_tree, fc->intra_ext_tx_prob[i][j],
+                        fc->intra_ext_tx_cdf[i][j]);
+#endif
+      }
     }
   }
   if (aom_read(r, GROUP_DIFF_UPDATE_PROB)) {
     for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
       for (k = 0; k < TX_TYPES - 1; ++k)
         av1_diff_update_prob(r, &fc->inter_ext_tx_prob[i][k]);
+#if CONFIG_DAALA_EC
+      av1_tree_to_cdf(av1_ext_tx_tree, fc->inter_ext_tx_prob[i],
+                      fc->inter_ext_tx_cdf[i]);
+#endif
     }
   }
 }
@@ -3666,9 +3685,14 @@
     for (i = 0; i < EXT_PARTITION_TYPES - 1; ++i)
       av1_diff_update_prob(&r, &fc->partition_prob[j][i]);
 #else
-  for (j = 0; j < PARTITION_CONTEXTS; ++j)
+  for (j = 0; j < PARTITION_CONTEXTS; ++j) {
     for (i = 0; i < PARTITION_TYPES - 1; ++i)
       av1_diff_update_prob(&r, &fc->partition_prob[j][i]);
+#if CONFIG_DAALA_EC
+    av1_tree_to_cdf(av1_partition_tree, fc->partition_prob[j],
+                    fc->partition_cdf[j]);
+#endif
+  }
 #endif  // CONFIG_EXT_PARTITION_TYPES
 
 #if CONFIG_EXT_INTRA
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 3ede7e0..87ff351 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -867,8 +867,14 @@
     const int ctx = av1_get_pred_context_switchable_interp(xd);
 #endif
     FRAME_COUNTS *counts = xd->counts;
+#if CONFIG_DAALA_EC
+    const InterpFilter type =
+        (InterpFilter)av1_switchable_interp_inv[aom_read_tree_cdf(
+            r, cm->fc->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS)];
+#else
     const InterpFilter type = (InterpFilter)aom_read_tree(
         r, av1_switchable_interp_tree, cm->fc->switchable_interp_prob[ctx]);
+#endif
     if (counts) ++counts->switchable_interp[ctx][type];
     return type;
   }
@@ -1763,14 +1769,24 @@
         !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
       FRAME_COUNTS *counts = xd->counts;
       if (inter_block) {
+#if CONFIG_DAALA_EC
+        mbmi->tx_type = av1_ext_tx_inv[aom_read_tree_cdf(
+            r, cm->fc->inter_ext_tx_cdf[mbmi->tx_size], TX_TYPES)];
+#else
         mbmi->tx_type = aom_read_tree(r, av1_ext_tx_tree,
                                       cm->fc->inter_ext_tx_prob[mbmi->tx_size]);
+#endif
         if (counts) ++counts->inter_ext_tx[mbmi->tx_size][mbmi->tx_type];
       } else {
         const TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode];
+#if CONFIG_DAALA_EC
+        mbmi->tx_type = av1_ext_tx_inv[aom_read_tree_cdf(
+            r, cm->fc->intra_ext_tx_cdf[mbmi->tx_size][tx_type_nom], TX_TYPES)];
+#else
         mbmi->tx_type = aom_read_tree(
             r, av1_ext_tx_tree,
             cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom]);
+#endif
         if (counts)
           ++counts->intra_ext_tx[mbmi->tx_size][tx_type_nom][mbmi->tx_type];
       }
diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
index e6c8f90..61b9fc0 100644
--- a/av1/decoder/decoder.c
+++ b/av1/decoder/decoder.c
@@ -47,6 +47,12 @@
     av1_init_wedge_masks();
 #endif  // CONFIG_EXT_INTER
     init_done = 1;
+#if CONFIG_DAALA_EC
+    av1_indices_from_tree(av1_switchable_interp_ind, av1_switchable_interp_inv,
+                          SWITCHABLE_FILTERS, av1_switchable_interp_tree);
+    av1_indices_from_tree(av1_ext_tx_ind, av1_ext_tx_inv, TX_TYPES,
+                          av1_ext_tx_tree);
+#endif
   }
 }
 
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index e31fa67..cca104b 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -189,6 +189,19 @@
   av1_tokens_from_tree(switchable_restore_encodings,
                        av1_switchable_restore_tree);
 #endif  // CONFIG_LOOP_RESTORATION
+
+#if CONFIG_DAALA_EC
+  /* This hack is necessary when CONFIG_EXT_INTERP is enabled because the five
+      SWITCHABLE_FILTERS are not consecutive, e.g., 0, 1, 2, 3, 4, when doing
+      an in-order traversal of the av1_switchable_interp_tree structure. */
+  av1_indices_from_tree(av1_switchable_interp_ind, av1_switchable_interp_inv,
+                        SWITCHABLE_FILTERS, av1_switchable_interp_tree);
+  /* This hack is necessary because the four TX_TYPES are not consecutive,
+      e.g., 0, 1, 2, 3, when doing an in-order traversal of the av1_ext_tx_tree
+      structure. */
+  av1_indices_from_tree(av1_ext_tx_ind, av1_ext_tx_inv, TX_TYPES,
+                        av1_ext_tx_tree);
+#endif
 }
 
 static void write_intra_mode(aom_writer *w, PREDICTION_MODE mode,
@@ -486,10 +499,16 @@
 static void update_switchable_interp_probs(AV1_COMMON *cm, aom_writer *w,
                                            FRAME_COUNTS *counts) {
   int j;
-  for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
+  for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) {
     prob_diff_update(av1_switchable_interp_tree,
                      cm->fc->switchable_interp_prob[j],
                      counts->switchable_interp[j], SWITCHABLE_FILTERS, w);
+#if CONFIG_DAALA_EC
+    av1_tree_to_cdf(av1_switchable_interp_tree,
+                    cm->fc->switchable_interp_prob[j],
+                    cm->fc->switchable_interp_cdf[j]);
+#endif
+  }
 }
 
 #if CONFIG_EXT_TX
@@ -562,9 +581,14 @@
   aom_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
   if (do_update) {
     for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
-      for (j = 0; j < TX_TYPES; ++j)
+      for (j = 0; j < TX_TYPES; ++j) {
         prob_diff_update(av1_ext_tx_tree, cm->fc->intra_ext_tx_prob[i][j],
                          cm->counts.intra_ext_tx[i][j], TX_TYPES, w);
+#if CONFIG_DAALA_EC
+        av1_tree_to_cdf(av1_ext_tx_tree, cm->fc->intra_ext_tx_prob[i][j],
+                        cm->fc->intra_ext_tx_cdf[i][j]);
+#endif
+      }
     }
   }
   savings = 0;
@@ -579,6 +603,10 @@
     for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
       prob_diff_update(av1_ext_tx_tree, cm->fc->inter_ext_tx_prob[i],
                        cm->counts.inter_ext_tx[i], TX_TYPES, w);
+#if CONFIG_DAALA_EC
+      av1_tree_to_cdf(av1_ext_tx_tree, cm->fc->inter_ext_tx_prob[i],
+                      cm->fc->inter_ext_tx_cdf[i]);
+#endif
     }
   }
 }
@@ -963,9 +991,14 @@
 #else
     {
       const int ctx = av1_get_pred_context_switchable_interp(xd);
+#if CONFIG_DAALA_EC
+      aom_write_symbol(w, av1_switchable_interp_ind[mbmi->interp_filter],
+                       cm->fc->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS);
+#else
       av1_write_token(w, av1_switchable_interp_tree,
                       cm->fc->switchable_interp_prob[ctx],
                       &switchable_interp_encodings[mbmi->interp_filter]);
+#endif
       ++cpi->interp_filter_selected[0][mbmi->interp_filter];
     }
 #endif
@@ -1428,16 +1461,29 @@
 #endif  // CONFIG_SUPERTX
         !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
       if (is_inter) {
+#if CONFIG_DAALA_EC
+        aom_write_symbol(w, av1_ext_tx_ind[mbmi->tx_type],
+                         cm->fc->inter_ext_tx_cdf[mbmi->tx_size], TX_TYPES);
+#else
         av1_write_token(w, av1_ext_tx_tree,
                         cm->fc->inter_ext_tx_prob[mbmi->tx_size],
                         &ext_tx_encodings[mbmi->tx_type]);
+#endif
       } else {
+#if CONFIG_DAALA_EC
+        aom_write_symbol(
+            w, av1_ext_tx_ind[mbmi->tx_type],
+            cm->fc->intra_ext_tx_cdf[mbmi->tx_size]
+                                    [intra_mode_to_tx_type_context[mbmi->mode]],
+            TX_TYPES);
+#else
         av1_write_token(
             w, av1_ext_tx_tree,
             cm->fc
                 ->intra_ext_tx_prob[mbmi->tx_size]
                                    [intra_mode_to_tx_type_context[mbmi->mode]],
             &ext_tx_encodings[mbmi->tx_type]);
+#endif
       }
     } else {
       if (!mbmi->skip) {
@@ -1699,7 +1745,11 @@
       av1_write_token(w, av1_ext_partition_tree, probs,
                       &ext_partition_encodings[p]);
 #else
+#if CONFIG_DAALA_EC
+    aom_write_symbol(w, p, cm->fc->partition_cdf[ctx], PARTITION_TYPES);
+#else
     av1_write_token(w, av1_partition_tree, probs, &partition_encodings[p]);
+#endif
 #endif  // CONFIG_EXT_PARTITION_TYPES
   } else if (!has_rows && has_cols) {
     assert(p == PARTITION_SPLIT || p == PARTITION_HORZ);
@@ -3401,9 +3451,14 @@
     prob_diff_update(av1_ext_partition_tree, fc->partition_prob[i],
                      counts->partition[i], EXT_PARTITION_TYPES, header_bc);
 #else
-  for (i = 0; i < PARTITION_CONTEXTS; ++i)
+  for (i = 0; i < PARTITION_CONTEXTS; ++i) {
     prob_diff_update(av1_partition_tree, fc->partition_prob[i],
                      counts->partition[i], PARTITION_TYPES, header_bc);
+#if CONFIG_DAALA_EC
+    av1_tree_to_cdf(av1_partition_tree, cm->fc->partition_prob[i],
+                    cm->fc->partition_cdf[i]);
+#endif
+  }
 #endif  // CONFIG_EXT_PARTITION_TYPES
 
 #if CONFIG_EXT_INTRA
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 3b3f93a..a75dbd1 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -129,6 +129,10 @@
   int *nmvsadcost[2];
   int *nmvsadcost_hp[2];
   int **mvsadcost;
+#if CONFIG_MOTION_VAR
+  int32_t *wsrc_buf;
+  int32_t *mask_buf;
+#endif  // CONFIG_MOTION_VAR
 
 #if CONFIG_PALETTE
   PALETTE_BUFFER *palette_buffer;
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 1dc6b99..bd90739 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -3031,11 +3031,13 @@
 }
 
 int av1_find_best_obmc_sub_pixel_tree_up(
-    AV1_COMP *cpi, MACROBLOCK *x, const int32_t *wsrc, const int32_t *mask,
-    int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp,
-    int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop,
-    int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
-    unsigned int *sse1, int is_second, int use_upsampled_ref) {
+    AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, MV *bestmv,
+    const MV *ref_mv, int allow_hp, int error_per_bit,
+    const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
+    int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
+    int is_second, int use_upsampled_ref) {
+  const int32_t *wsrc = x->wsrc_buf;
+  const int32_t *mask = x->mask_buf;
   const int *const z = wsrc;
   const int *const src_address = z;
   MACROBLOCKD *xd = &x->e_mbd;
@@ -3356,11 +3358,12 @@
 }
 
 int av1_obmc_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x,
-                                const int32_t *wsrc, const int32_t *mask,
                                 MV *mvp_full, int step_param, int sadpb,
                                 int further_steps, int do_refine,
                                 const aom_variance_fn_ptr_t *fn_ptr,
                                 const MV *ref_mv, MV *dst_mv, int is_second) {
+  const int32_t *wsrc = x->wsrc_buf;
+  const int32_t *mask = x->mask_buf;
   MV temp_mv;
   int thissme, n, num00 = 0;
   int bestsme =
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index f1eccc2..8c42825 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -142,17 +142,16 @@
 
 #if CONFIG_MOTION_VAR
 int av1_obmc_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x,
-                                const int32_t *wsrc, const int32_t *mask,
                                 MV *mvp_full, int step_param, int sadpb,
                                 int further_steps, int do_refine,
                                 const aom_variance_fn_ptr_t *fn_ptr,
                                 const MV *ref_mv, MV *dst_mv, int is_second);
 int av1_find_best_obmc_sub_pixel_tree_up(
-    struct AV1_COMP *cpi, MACROBLOCK *x, const int32_t *wsrc,
-    const int32_t *mask, int mi_row, int mi_col, MV *bestmv, const MV *ref_mv,
-    int allow_hp, int error_per_bit, const aom_variance_fn_ptr_t *vfp,
-    int forced_stop, int iters_per_step, int *mvjcost, int *mvcost[2],
-    int *distortion, unsigned int *sse1, int is_second, int use_upsampled_ref);
+    struct AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, MV *bestmv,
+    const MV *ref_mv, int allow_hp, int error_per_bit,
+    const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
+    int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
+    int is_second, int use_upsampled_ref);
 #endif  // CONFIG_MOTION_VAR
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index e594a32..b776548 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2836,21 +2836,23 @@
   *sse = 0;
   *skippable = 1;
 
-  for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
-    txfm_rd_in_plane(x, cpi, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd,
-                     plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing);
-    if (pnrate == INT_MAX) {
-      is_cost_valid = 0;
-      break;
-    }
-    *rate += pnrate;
-    *distortion += pndist;
-    *sse += pnsse;
-    *skippable &= pnskip;
-    if (RDCOST(x->rdmult, x->rddiv, *rate, *distortion) > ref_best_rd &&
-        RDCOST(x->rdmult, x->rddiv, 0, *sse) > ref_best_rd) {
-      is_cost_valid = 0;
-      break;
+  if (is_cost_valid) {
+    for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+      txfm_rd_in_plane(x, cpi, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd,
+                       plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing);
+      if (pnrate == INT_MAX) {
+        is_cost_valid = 0;
+        break;
+      }
+      *rate += pnrate;
+      *distortion += pndist;
+      *sse += pnsse;
+      *skippable &= pnskip;
+      if (RDCOST(x->rdmult, x->rddiv, *rate, *distortion) > ref_best_rd &&
+          RDCOST(x->rdmult, x->rddiv, 0, *sse) > ref_best_rd) {
+        is_cost_valid = 0;
+        break;
+      }
     }
   }
 
@@ -5935,16 +5937,36 @@
 
   av1_set_mv_search_range(x, &ref_mv);
 
-  mvp_full = pred_mv[x->mv_best_ref_index[ref]];
+#if CONFIG_MOTION_VAR
+  if (mbmi->motion_mode != SIMPLE_TRANSLATION)
+    mvp_full = mbmi->mv[0].as_mv;
+  else
+#endif  // CONFIG_MOTION_VAR
+    mvp_full = pred_mv[x->mv_best_ref_index[ref]];
 
   mvp_full.col >>= 3;
   mvp_full.row >>= 3;
 
   x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
 
-  bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
-                                  cond_cost_list(cpi, cost_list), &ref_mv,
-                                  INT_MAX, 1);
+#if CONFIG_MOTION_VAR
+  switch (mbmi->motion_mode) {
+    case SIMPLE_TRANSLATION:
+#endif  // CONFIG_MOTION_VAR
+      bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
+                                      sadpb, cond_cost_list(cpi, cost_list),
+                                      &ref_mv, INT_MAX, 1);
+#if CONFIG_MOTION_VAR
+      break;
+    case OBMC_CAUSAL:
+      bestsme = av1_obmc_full_pixel_diamond(
+          cpi, x, &mvp_full, step_param, sadpb,
+          MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
+          &(x->best_mv.as_mv), 0);
+      break;
+    default: assert("Invalid motion mode!\n");
+  }
+#endif  // CONFIG_MOTION_VAR
 
   x->mv_col_min = tmp_col_min;
   x->mv_col_max = tmp_col_max;
@@ -5953,68 +5975,92 @@
 
   if (bestsme < INT_MAX) {
     int dis; /* TODO: use dis in distortion calculation later. */
-    if (cpi->sf.use_upsampled_references) {
-      int best_mv_var;
-      const int try_second = x->second_best_mv.as_int != INVALID_MV &&
-                             x->second_best_mv.as_int != x->best_mv.as_int;
-      const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
-      const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
-      // Use up-sampled reference frames.
-      struct macroblockd_plane *const pd = &xd->plane[0];
-      struct buf_2d backup_pred = pd->pre[ref_idx];
-      const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
+#if CONFIG_MOTION_VAR
+    switch (mbmi->motion_mode) {
+      case SIMPLE_TRANSLATION:
+#endif  // CONFIG_MOTION_VAR
+        if (cpi->sf.use_upsampled_references) {
+          int best_mv_var;
+          const int try_second = x->second_best_mv.as_int != INVALID_MV &&
+                                 x->second_best_mv.as_int != x->best_mv.as_int;
+          const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
+          const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
+          // Use up-sampled reference frames.
+          struct macroblockd_plane *const pd = &xd->plane[0];
+          struct buf_2d backup_pred = pd->pre[ref_idx];
+          const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
 
-      // Set pred for Y plane
-      setup_pred_plane(&pd->pre[ref_idx], upsampled_ref->y_buffer,
-                       upsampled_ref->y_crop_width,
-                       upsampled_ref->y_crop_height, upsampled_ref->y_stride,
-                       (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
-                       pd->subsampling_y);
+          // Set pred for Y plane
+          setup_pred_plane(
+              &pd->pre[ref_idx], upsampled_ref->y_buffer,
+              upsampled_ref->y_crop_width, upsampled_ref->y_crop_height,
+              upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), NULL,
+              pd->subsampling_x, pd->subsampling_y);
 
-      best_mv_var = cpi->find_fractional_mv_step(
-          x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
-          &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
-          cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
-          x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph, 1);
-
-      if (try_second) {
-        const int minc = AOMMAX(x->mv_col_min * 8, ref_mv.col - MV_MAX);
-        const int maxc = AOMMIN(x->mv_col_max * 8, ref_mv.col + MV_MAX);
-        const int minr = AOMMAX(x->mv_row_min * 8, ref_mv.row - MV_MAX);
-        const int maxr = AOMMIN(x->mv_row_max * 8, ref_mv.row + MV_MAX);
-        int this_var;
-        MV best_mv = x->best_mv.as_mv;
-
-        x->best_mv = x->second_best_mv;
-        if (x->best_mv.as_mv.row * 8 <= maxr &&
-            x->best_mv.as_mv.row * 8 >= minr &&
-            x->best_mv.as_mv.col * 8 <= maxc &&
-            x->best_mv.as_mv.col * 8 >= minc) {
-          this_var = cpi->find_fractional_mv_step(
+          best_mv_var = cpi->find_fractional_mv_step(
               x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
               &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
               cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
               x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph,
               1);
-          if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
-          x->best_mv.as_mv = best_mv;
-        }
-      }
 
-      // Restore the reference frames.
-      pd->pre[ref_idx] = backup_pred;
-    } else {
-      cpi->find_fractional_mv_step(
-          x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
-          &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
-          cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
-          x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0, 0);
+          if (try_second) {
+            const int minc = AOMMAX(x->mv_col_min * 8, ref_mv.col - MV_MAX);
+            const int maxc = AOMMIN(x->mv_col_max * 8, ref_mv.col + MV_MAX);
+            const int minr = AOMMAX(x->mv_row_min * 8, ref_mv.row - MV_MAX);
+            const int maxr = AOMMIN(x->mv_row_max * 8, ref_mv.row + MV_MAX);
+            int this_var;
+            MV best_mv = x->best_mv.as_mv;
+
+            x->best_mv = x->second_best_mv;
+            if (x->best_mv.as_mv.row * 8 <= maxr &&
+                x->best_mv.as_mv.row * 8 >= minr &&
+                x->best_mv.as_mv.col * 8 <= maxc &&
+                x->best_mv.as_mv.col * 8 >= minc) {
+              this_var = cpi->find_fractional_mv_step(
+                  x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
+                  &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
+                  cpi->sf.mv.subpel_iters_per_step,
+                  cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
+                  &dis, &x->pred_sse[ref], NULL, pw, ph, 1);
+              if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
+              x->best_mv.as_mv = best_mv;
+            }
+          }
+
+          // Restore the reference frames.
+          pd->pre[ref_idx] = backup_pred;
+        } else {
+          cpi->find_fractional_mv_step(
+              x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
+              &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
+              cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
+              x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0,
+              0);
+        }
+#if CONFIG_MOTION_VAR
+        break;
+      case OBMC_CAUSAL:
+        av1_find_best_obmc_sub_pixel_tree_up(
+            cpi, x, mi_row, mi_col, &x->best_mv.as_mv, &ref_mv,
+            cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize],
+            cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step,
+            x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], 0,
+            cpi->sf.use_upsampled_references);
+        break;
+      default: assert("Invalid motion mode!\n");
     }
+#endif  // CONFIG_MOTION_VAR
   }
   *rate_mv = av1_mv_bit_cost(&x->best_mv.as_mv, &ref_mv, x->nmvjointcost,
                              x->mvcost, MV_COST_WEIGHT);
 
-  if (cpi->sf.adaptive_motion_search) x->pred_mv[ref] = x->best_mv.as_mv;
+#if CONFIG_MOTION_VAR
+  if (cpi->sf.adaptive_motion_search && mbmi->motion_mode == SIMPLE_TRANSLATION)
+#else
+  if (cpi->sf.adaptive_motion_search)
+#endif  // CONFIG_MOTION_VAR
+    x->pred_mv[ref] = x->best_mv.as_mv;
 
   if (scaled_ref_frame) {
     int i;
@@ -6033,138 +6079,6 @@
   }
 }
 
-#if CONFIG_MOTION_VAR
-static void single_motion_search_obmc(AV1_COMP *cpi, MACROBLOCK *x,
-                                      BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                      const int32_t *wsrc, const int32_t *mask,
-#if CONFIG_EXT_INTER
-                                      int ref_idx, int mv_idx,
-#endif  // CONFIG_EXT_INTER
-                                      int_mv *tmp_mv, int_mv pred_mv,
-                                      int *rate_mv) {
-  MACROBLOCKD *xd = &x->e_mbd;
-  const AV1_COMMON *cm = &cpi->common;
-  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
-  int bestsme = INT_MAX;
-  int step_param;
-  int sadpb = x->sadperbit16;
-  MV mvp_full;
-#if CONFIG_EXT_INTER
-  int ref = mbmi->ref_frame[ref_idx];
-  MV ref_mv = x->mbmi_ext->ref_mvs[ref][mv_idx].as_mv;
-#else
-  int ref = mbmi->ref_frame[0];
-  MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
-  int ref_idx = 0;
-#endif  // CONFIG_EXT_INTER
-
-  int tmp_col_min = x->mv_col_min;
-  int tmp_col_max = x->mv_col_max;
-  int tmp_row_min = x->mv_row_min;
-  int tmp_row_max = x->mv_row_max;
-
-  const YV12_BUFFER_CONFIG *scaled_ref_frame =
-      av1_get_scaled_ref_frame(cpi, ref);
-
-#if CONFIG_REF_MV
-  av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
-#endif
-
-  if (scaled_ref_frame) {
-    int i;
-    // Swap out the reference frame for a version that's been scaled to
-    // match the resolution of the current frame, allowing the existing
-    // motion search code to be used without additional modifications.
-    for (i = 0; i < MAX_MB_PLANE; i++)
-      backup_yv12[i] = xd->plane[i].pre[ref_idx];
-
-    av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
-  }
-
-  // Work out the size of the first step in the mv step search.
-  // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc.
-  if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
-    // Take wtd average of the step_params based on the last frame's
-    // max mv magnitude and that based on the best ref mvs of the current
-    // block for the given reference.
-    step_param =
-        (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
-        2;
-  } else {
-    step_param = cpi->mv_step_param;
-  }
-
-  if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) {
-    int boffset =
-        2 * (b_width_log2_lookup[cm->sb_size] -
-             AOMMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
-    step_param = AOMMAX(step_param, boffset);
-  }
-
-  if (cpi->sf.adaptive_motion_search) {
-    int bwl = b_width_log2_lookup[bsize];
-    int bhl = b_height_log2_lookup[bsize];
-    int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
-
-    if (tlevel < 5) step_param += 2;
-
-    // prev_mv_sad is not setup for dynamically scaled frames.
-    if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
-      int i;
-      for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
-        if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
-          x->pred_mv[ref].row = 0;
-          x->pred_mv[ref].col = 0;
-          tmp_mv->as_int = INVALID_MV;
-
-          if (scaled_ref_frame) {
-            int i;
-            for (i = 0; i < MAX_MB_PLANE; ++i)
-              xd->plane[i].pre[ref_idx] = backup_yv12[i];
-          }
-          return;
-        }
-      }
-    }
-  }
-
-  av1_set_mv_search_range(x, &ref_mv);
-
-  mvp_full = pred_mv.as_mv;
-  mvp_full.col >>= 3;
-  mvp_full.row >>= 3;
-
-  bestsme = av1_obmc_full_pixel_diamond(
-      cpi, x, wsrc, mask, &mvp_full, step_param, sadpb,
-      MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
-      &tmp_mv->as_mv, ref_idx);
-
-  x->mv_col_min = tmp_col_min;
-  x->mv_col_max = tmp_col_max;
-  x->mv_row_min = tmp_row_min;
-  x->mv_row_max = tmp_row_max;
-
-  if (bestsme < INT_MAX) {
-    int dis;
-    av1_find_best_obmc_sub_pixel_tree_up(
-        cpi, x, wsrc, mask, mi_row, mi_col, &tmp_mv->as_mv, &ref_mv,
-        cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize],
-        cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step,
-        x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], ref_idx,
-        cpi->sf.use_upsampled_references);
-  }
-  *rate_mv = av1_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
-                             x->mvcost, MV_COST_WEIGHT);
-
-  if (scaled_ref_frame) {
-    int i;
-    for (i = 0; i < MAX_MB_PLANE; i++)
-      xd->plane[i].pre[ref_idx] = backup_yv12[i];
-  }
-}
-#endif  // CONFIG_MOTION_VAR
-
 #if CONFIG_EXT_INTER
 static void do_masked_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
                                     const uint8_t *mask, int mask_stride,
@@ -6702,8 +6616,8 @@
     int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
     int mi_col,
 #if CONFIG_MOTION_VAR
-    uint8_t *dst_buf1[3], int dst_stride1[3], uint8_t *dst_buf2[3],
-    int dst_stride2[3], const int32_t *const wsrc, const int32_t *const mask2d,
+    uint8_t *above_pred_buf[3], int above_pred_stride[3],
+    uint8_t *left_pred_buf[3], int left_pred_stride[3],
 #endif  // CONFIG_MOTION_VAR
 #if CONFIG_EXT_INTER
     int_mv single_newmvs[2][TOTAL_REFS_PER_FRAME],
@@ -6825,6 +6739,7 @@
       return INT64_MAX;
   }
 
+  mbmi->motion_mode = SIMPLE_TRANSLATION;
   if (have_newmv_in_inter_mode(this_mode)) {
     if (is_comp_pred) {
 #if CONFIG_EXT_INTER
@@ -7596,18 +7511,16 @@
       mbmi->motion_mode = OBMC_CAUSAL;
 #endif  // CONFIG_EXT_INTER
       if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
-        int_mv tmp_mv;
-        int_mv pred_mv;
         int tmp_rate_mv = 0;
 
-        pred_mv.as_int = mbmi->mv[0].as_int;
-        single_motion_search_obmc(cpi, x, bsize, mi_row, mi_col, wsrc, mask2d,
+        single_motion_search(cpi, x, bsize, mi_row, mi_col,
 #if CONFIG_EXT_INTER
-                                  0, mv_idx,
+                             0, mv_idx,
 #endif  // CONFIG_EXT_INTER
-                                  &tmp_mv, pred_mv, &tmp_rate_mv);
-        mbmi->mv[0].as_int = tmp_mv.as_int;
-        if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
+                             &tmp_rate_mv);
+        mbmi->mv[0].as_int = x->best_mv.as_int;
+        if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
+                                refs[0])) {
           tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
         }
 #if CONFIG_EXT_INTER
@@ -7635,8 +7548,9 @@
         av1_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
 #endif  // CONFIG_EXT_INTER
       }
-      av1_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1,
-                                      dst_stride1, dst_buf2, dst_stride2);
+      av1_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, above_pred_buf,
+                                      above_pred_stride, left_pred_buf,
+                                      left_pred_stride);
       model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
                       &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
     }
@@ -8238,8 +8152,7 @@
                                       const MACROBLOCKD *xd, int mi_row,
                                       int mi_col, const uint8_t *above,
                                       int above_stride, const uint8_t *left,
-                                      int left_stride, int32_t *mask_buf,
-                                      int32_t *wsrc_buf);
+                                      int left_stride);
 #endif  // CONFIG_MOTION_VAR
 
 void av1_rd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
@@ -8468,9 +8381,10 @@
   av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
                                      dst_width2, dst_height2, dst_stride2);
   av1_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
+  x->mask_buf = mask2d_buf;
+  x->wsrc_buf = weighted_src_buf;
   calc_target_weighted_pred(cm, x, xd, mi_row, mi_col, dst_buf1[0],
-                            dst_stride1[0], dst_buf2[0], dst_stride2[0],
-                            mask2d_buf, weighted_src_buf);
+                            dst_stride1[0], dst_buf2[0], dst_stride2[0]);
 #endif  // CONFIG_MOTION_VAR
 
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
@@ -9000,8 +8914,7 @@
           cpi, x, bsize, &rate2, &distortion2, &skippable, &rate_y, &rate_uv,
           &disable_skip, frame_mv, mi_row, mi_col,
 #if CONFIG_MOTION_VAR
-          dst_buf1, dst_stride1, dst_buf2, dst_stride2, weighted_src_buf,
-          mask2d_buf,
+          dst_buf1, dst_stride1, dst_buf2, dst_stride2,
 #endif  // CONFIG_MOTION_VAR
 #if CONFIG_EXT_INTER
           single_newmvs, single_newmvs_rate, &compmode_interintra_cost,
@@ -9108,8 +9021,7 @@
                 cpi, x, bsize, &tmp_rate, &tmp_dist, &tmp_skip, &tmp_rate_y,
                 &tmp_rate_uv, &dummy_disable_skip, frame_mv, mi_row, mi_col,
 #if CONFIG_MOTION_VAR
-                dst_buf1, dst_stride1, dst_buf2, dst_stride2, weighted_src_buf,
-                mask2d_buf,
+                dst_buf1, dst_stride1, dst_buf2, dst_stride2,
 #endif  // CONFIG_MOTION_VAR
 #if CONFIG_EXT_INTER
                 dummy_single_newmvs, dummy_single_newmvs_rate,
@@ -10854,12 +10766,13 @@
                                       const MACROBLOCKD *xd, int mi_row,
                                       int mi_col, const uint8_t *above,
                                       int above_stride, const uint8_t *left,
-                                      int left_stride, int32_t *mask_buf,
-                                      int32_t *wsrc_buf) {
+                                      int left_stride) {
   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   int row, col, i;
   const int bw = 8 * xd->n8_w;
   const int bh = 8 * xd->n8_h;
+  int32_t *mask_buf = x->mask_buf;
+  int32_t *wsrc_buf = x->wsrc_buf;
   const int wsrc_stride = bw;
   const int mask_stride = bw;
   const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;