Merge "Cleaning up vp9_rc_compute_frame_size_bounds()."
diff --git a/build/make/Makefile b/build/make/Makefile
index dd7fb4a..0c5ff64 100644
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -147,6 +147,15 @@
 	$(if $(quiet),@echo "    [CXX] $@")
 	$(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $<
 
+$(BUILD_PFX)%.cpp.d: %.cpp
+	$(if $(quiet),@echo "    [DEP] $@")
+	$(qexec)mkdir -p $(dir $@)
+	$(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -M $< | $(fmt_deps) > $@
+
+$(BUILD_PFX)%.cpp.o: %.cpp
+	$(if $(quiet),@echo "    [CXX] $@")
+	$(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $<
+
 $(BUILD_PFX)%.asm.d: %.asm
 	$(if $(quiet),@echo "    [DEP] $@")
 	$(qexec)mkdir -p $(dir $@)
@@ -218,7 +227,7 @@
 
 find_file1=$(word 1,$(wildcard $(subst //,/,$(addsuffix /$(1),$(2)))))
 find_file=$(foreach f,$(1),$(call find_file1,$(strip $(f)),$(strip $(2))) )
-obj_pats=.c=.c.o $(AS_SFX)=$(AS_SFX).o .cc=.cc.o
+obj_pats=.c=.c.o $(AS_SFX)=$(AS_SFX).o .cc=.cc.o .cpp=.cpp.o
 objs=$(addprefix $(BUILD_PFX),$(foreach p,$(obj_pats),$(filter %.o,$(1:$(p))) ))
 
 install_map_templates=$(eval $(call install_map_template,$(1),$(2)))
diff --git a/configure b/configure
index ff350cc..01c421d 100755
--- a/configure
+++ b/configure
@@ -704,13 +704,11 @@
         enabled postproc || die "postproc_visualizer requires postproc to be enabled"
     fi
 
-    # Enable WebM IO by default.
-    soft_enable webm_io
-
     # Enable unit tests by default if we have a working C++ compiler.
     case "$toolchain" in
         *-vs*)
             soft_enable unit_tests
+            soft_enable webm_io
         ;;
         *-android-*)
             # GTestLog must be modified to use Android logging utilities.
@@ -726,13 +724,21 @@
             check_cxx "$@" <<EOF && soft_enable unit_tests
 int z;
 EOF
+            check_cxx "$@" <<EOF && soft_enable webm_io
+int z;
+EOF
         ;;
         *)
             enabled pthread_h && check_cxx "$@" <<EOF && soft_enable unit_tests
 int z;
 EOF
+            check_cxx "$@" <<EOF && soft_enable webm_io
+int z;
+EOF
         ;;
     esac
+    # libwebm needs to be linked with C++ standard library
+    enabled webm_io && LD=${CXX}
 }
 
 
diff --git a/examples.mk b/examples.mk
index fa5d66c..91b9801 100644
--- a/examples.mk
+++ b/examples.mk
@@ -15,6 +15,16 @@
                 third_party/libyuv/source/scale.c  \
                 third_party/libyuv/source/cpu_id.c
 
+LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \
+                      third_party/libwebm/mkvmuxerutil.cpp \
+                      third_party/libwebm/mkvwriter.cpp \
+                      third_party/libwebm/mkvmuxer.hpp \
+                      third_party/libwebm/mkvmuxertypes.hpp \
+                      third_party/libwebm/mkvmuxerutil.hpp \
+                      third_party/libwebm/mkvparser.hpp \
+                      third_party/libwebm/mkvwriter.hpp \
+                      third_party/libwebm/webmids.hpp
+
 # List of examples to build. UTILS are tools meant for distribution
 # while EXAMPLES demonstrate specific portions of the API.
 UTILS-$(CONFIG_DECODERS)    += vpxdec.c
@@ -53,10 +63,8 @@
 vpxenc.SRCS                 += vpxstats.c vpxstats.h
 vpxenc.SRCS                 += $(LIBYUV_SRCS)
 ifeq ($(CONFIG_WEBM_IO),yes)
-  vpxenc.SRCS                 += third_party/libmkv/EbmlIDs.h
-  vpxenc.SRCS                 += third_party/libmkv/EbmlWriter.c
-  vpxenc.SRCS                 += third_party/libmkv/EbmlWriter.h
-  vpxenc.SRCS                 += webmenc.c webmenc.h
+  vpxenc.SRCS                 += $(LIBWEBM_MUXER_SRCS)
+  vpxenc.SRCS                 += webmenc.cc webmenc.h
 endif
 vpxenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
 vpxenc.DESCRIPTION           = Full featured encoder
diff --git a/examples/vpx_temporal_scalable_patterns.c b/examples/vpx_temporal_scalable_patterns.c
index 5cb4ee9..07dd318 100644
--- a/examples/vpx_temporal_scalable_patterns.c
+++ b/examples/vpx_temporal_scalable_patterns.c
@@ -526,7 +526,7 @@
   // Enable error resilient mode.
   cfg.g_error_resilient = 1;
   cfg.g_lag_in_frames   = 0;
-  cfg.kf_mode = VPX_KF_DISABLED;
+  cfg.kf_mode = VPX_KF_AUTO;
 
   // Disable automatic keyframe placement.
   cfg.kf_min_dist = cfg.kf_max_dist = 3000;
diff --git a/third_party/googletest/README.webm b/third_party/googletest/README.libvpx
similarity index 100%
rename from third_party/googletest/README.webm
rename to third_party/googletest/README.libvpx
diff --git a/third_party/libwebm/README.webm b/third_party/libwebm/README.libvpx
similarity index 100%
rename from third_party/libwebm/README.webm
rename to third_party/libwebm/README.libvpx
diff --git a/third_party/libyuv/README.webm b/third_party/libyuv/README.libvpx
similarity index 100%
rename from third_party/libyuv/README.webm
rename to third_party/libyuv/README.libvpx
diff --git a/third_party/nestegg/README.webm b/third_party/nestegg/README.libvpx
similarity index 100%
rename from third_party/nestegg/README.webm
rename to third_party/nestegg/README.libvpx
diff --git a/third_party/x86inc/README.webm b/third_party/x86inc/README.libvpx
similarity index 100%
rename from third_party/x86inc/README.webm
rename to third_party/x86inc/README.libvpx
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index ef37c0e..32c5997 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1761,8 +1761,11 @@
 
 }
 
+#ifndef M_LOG2_E
 #define M_LOG2_E 0.693147180559945309417
+#endif
 #define log2f(x) (log (x) / (float) M_LOG2_E)
+
 static void cal_mvsadcosts(int *mvsadcost[2])
 {
     int i = 1;
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 4c896b1..6ca6087 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -1235,6 +1235,8 @@
 
         0,                  /* rc_dropframe_thresh */
         0,                  /* rc_resize_allowed */
+        1,                  /* rc_scaled_width */
+        1,                  /* rc_scaled_height */
         60,                 /* rc_resize_down_thresold */
         30,                 /* rc_resize_up_thresold */
 
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 0b4c4cb..d75d258 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -41,19 +41,11 @@
 
 static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t);
 
-static const mem_req_t vp8_mem_req_segs[] =
-{
-    {VP8_SEG_ALG_PRIV,    0, 8, VPX_CODEC_MEM_ZERO, vp8_priv_sz},
-    {VP8_SEG_MAX, 0, 0, 0, NULL}
-};
-
 struct vpx_codec_alg_priv
 {
     vpx_codec_priv_t        base;
-    vpx_codec_mmap_t        mmaps[NELEMENTS(vp8_mem_req_segs)-1];
     vpx_codec_dec_cfg_t     cfg;
     vp8_stream_info_t       si;
-    int                     defer_alloc;
     int                     decoder_init;
     int                     postproc_cfg_set;
     vp8_postproc_cfg_t      postproc_cfg;
@@ -84,19 +76,14 @@
     return sizeof(vpx_codec_alg_priv_t);
 }
 
-static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap)
+static void vp8_init_ctx(vpx_codec_ctx_t *ctx)
 {
-    int i;
-
-    ctx->priv = mmap->base;
+    ctx->priv =
+        (vpx_codec_priv_t *)vpx_memalign(8, sizeof(vpx_codec_alg_priv_t));
+    vpx_memset(ctx->priv, 0, sizeof(vpx_codec_alg_priv_t));
     ctx->priv->sz = sizeof(*ctx->priv);
     ctx->priv->iface = ctx->iface;
-    ctx->priv->alg_priv = mmap->base;
-
-    for (i = 0; i < NELEMENTS(ctx->priv->alg_priv->mmaps); i++)
-        ctx->priv->alg_priv->mmaps[i].id = vp8_mem_req_segs[i].id;
-
-    ctx->priv->alg_priv->mmaps[0] = *mmap;
+    ctx->priv->alg_priv = (vpx_codec_alg_priv_t *)ctx->priv;
     ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si);
     ctx->priv->alg_priv->decrypt_cb = NULL;
     ctx->priv->alg_priv->decrypt_state = NULL;
@@ -110,11 +97,6 @@
     }
 }
 
-static void vp8_finalize_mmaps(vpx_codec_alg_priv_t *ctx)
-{
-    /* nothing to clean up */
-}
-
 static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
                                 vpx_codec_priv_enc_mr_cfg_t *data)
 {
@@ -129,17 +111,7 @@
      */
     if (!ctx->priv)
     {
-        vpx_codec_mmap_t mmap;
-
-        mmap.id = vp8_mem_req_segs[0].id;
-        mmap.sz = sizeof(vpx_codec_alg_priv_t);
-        mmap.align = vp8_mem_req_segs[0].align;
-        mmap.flags = vp8_mem_req_segs[0].flags;
-
-        res = vpx_mmap_alloc(&mmap);
-        if (res != VPX_CODEC_OK) return res;
-
-        vp8_init_ctx(ctx, &mmap);
+        vp8_init_ctx(ctx);
 
         /* initialize number of fragments to zero */
         ctx->priv->alg_priv->fragments.count = 0;
@@ -148,7 +120,6 @@
                 (ctx->priv->alg_priv->base.init_flags &
                     VPX_CODEC_USE_INPUT_FRAGMENTS);
 
-        ctx->priv->alg_priv->defer_alloc = 1;
         /*post processing level initialized to do nothing */
     }
 
@@ -175,15 +146,9 @@
 
 static vpx_codec_err_t vp8_destroy(vpx_codec_alg_priv_t *ctx)
 {
-    int i;
-
     vp8_remove_decoder_instances(&ctx->yv12_frame_buffers);
 
-    for (i = NELEMENTS(ctx->mmaps) - 1; i >= 0; i--)
-    {
-        if (ctx->mmaps[i].dtor)
-            ctx->mmaps[i].dtor(&ctx->mmaps[i]);
-    }
+    vpx_free(ctx);
 
     return VPX_CODEC_OK;
 }
@@ -389,72 +354,35 @@
     if ((ctx->si.h != h) || (ctx->si.w != w))
         resolution_change = 1;
 
-    /* Perform deferred allocations, if required */
-    if (!res && ctx->defer_alloc)
-    {
-        int i;
-
-        for (i = 1; !res && i < NELEMENTS(ctx->mmaps); i++)
-        {
-            vpx_codec_dec_cfg_t cfg;
-
-            cfg.w = ctx->si.w;
-            cfg.h = ctx->si.h;
-            ctx->mmaps[i].id = vp8_mem_req_segs[i].id;
-            ctx->mmaps[i].sz = vp8_mem_req_segs[i].sz;
-            ctx->mmaps[i].align = vp8_mem_req_segs[i].align;
-            ctx->mmaps[i].flags = vp8_mem_req_segs[i].flags;
-
-            if (!ctx->mmaps[i].sz)
-                ctx->mmaps[i].sz = vp8_mem_req_segs[i].calc_sz(&cfg,
-                                   ctx->base.init_flags);
-
-            res = vpx_mmap_alloc(&ctx->mmaps[i]);
-        }
-
-        if (!res)
-            vp8_finalize_mmaps(ctx);
-
-        ctx->defer_alloc = 0;
-    }
-
     /* Initialize the decoder instance on the first frame*/
     if (!res && !ctx->decoder_init)
     {
-        res = vpx_validate_mmaps(&ctx->si, ctx->mmaps,
-                                 vp8_mem_req_segs, NELEMENTS(vp8_mem_req_segs),
-                                 ctx->base.init_flags);
+      VP8D_CONFIG oxcf;
 
-        if (!res)
-        {
-            VP8D_CONFIG oxcf;
+      oxcf.Width = ctx->si.w;
+      oxcf.Height = ctx->si.h;
+      oxcf.Version = 9;
+      oxcf.postprocess = 0;
+      oxcf.max_threads = ctx->cfg.threads;
+      oxcf.error_concealment =
+          (ctx->base.init_flags & VPX_CODEC_USE_ERROR_CONCEALMENT);
 
-            oxcf.Width = ctx->si.w;
-            oxcf.Height = ctx->si.h;
-            oxcf.Version = 9;
-            oxcf.postprocess = 0;
-            oxcf.max_threads = ctx->cfg.threads;
-            oxcf.error_concealment =
-                    (ctx->base.init_flags & VPX_CODEC_USE_ERROR_CONCEALMENT);
+      /* If postprocessing was enabled by the application and a
+       * configuration has not been provided, default it.
+       */
+       if (!ctx->postproc_cfg_set
+           && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) {
+         ctx->postproc_cfg.post_proc_flag =
+             VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE;
+         ctx->postproc_cfg.deblocking_level = 4;
+         ctx->postproc_cfg.noise_level = 0;
+       }
 
-            /* If postprocessing was enabled by the application and a
-             * configuration has not been provided, default it.
-             */
-            if (!ctx->postproc_cfg_set
-                && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC))
-            {
-                ctx->postproc_cfg.post_proc_flag =
-                    VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE;
-                ctx->postproc_cfg.deblocking_level = 4;
-                ctx->postproc_cfg.noise_level = 0;
-            }
+       res = vp8_create_decoder_instances(&ctx->yv12_frame_buffers, &oxcf);
+       ctx->yv12_frame_buffers.pbi[0]->decrypt_cb = ctx->decrypt_cb;
+       ctx->yv12_frame_buffers.pbi[0]->decrypt_state = ctx->decrypt_state;
 
-            res = vp8_create_decoder_instances(&ctx->yv12_frame_buffers, &oxcf);
-            ctx->yv12_frame_buffers.pbi[0]->decrypt_cb = ctx->decrypt_cb;
-            ctx->yv12_frame_buffers.pbi[0]->decrypt_state = ctx->decrypt_state;
-        }
-
-        ctx->decoder_init = 1;
+       ctx->decoder_init = 1;
     }
 
     if (!res)
@@ -618,89 +546,6 @@
     return img;
 }
 
-
-static
-vpx_codec_err_t vp8_xma_get_mmap(const vpx_codec_ctx_t      *ctx,
-                                 vpx_codec_mmap_t           *mmap,
-                                 vpx_codec_iter_t           *iter)
-{
-    vpx_codec_err_t     res;
-    const mem_req_t  *seg_iter = *iter;
-
-    /* Get address of next segment request */
-    do
-    {
-        if (!seg_iter)
-            seg_iter = vp8_mem_req_segs;
-        else if (seg_iter->id != VP8_SEG_MAX)
-            seg_iter++;
-
-        *iter = (vpx_codec_iter_t)seg_iter;
-
-        if (seg_iter->id != VP8_SEG_MAX)
-        {
-            mmap->id = seg_iter->id;
-            mmap->sz = seg_iter->sz;
-            mmap->align = seg_iter->align;
-            mmap->flags = seg_iter->flags;
-
-            if (!seg_iter->sz)
-                mmap->sz = seg_iter->calc_sz(ctx->config.dec, ctx->init_flags);
-
-            res = VPX_CODEC_OK;
-        }
-        else
-            res = VPX_CODEC_LIST_END;
-    }
-    while (!mmap->sz && res != VPX_CODEC_LIST_END);
-
-    return res;
-}
-
-static vpx_codec_err_t vp8_xma_set_mmap(vpx_codec_ctx_t         *ctx,
-                                        const vpx_codec_mmap_t  *mmap)
-{
-    vpx_codec_err_t res = VPX_CODEC_MEM_ERROR;
-    int i, done;
-
-    if (!ctx->priv)
-    {
-        if (mmap->id == VP8_SEG_ALG_PRIV)
-        {
-            if (!ctx->priv)
-            {
-                vp8_init_ctx(ctx, mmap);
-                res = VPX_CODEC_OK;
-            }
-        }
-    }
-
-    done = 1;
-
-    if (!res && ctx->priv->alg_priv)
-    {
-        for (i = 0; i < NELEMENTS(ctx->priv->alg_priv->mmaps); i++)
-        {
-            if (ctx->priv->alg_priv->mmaps[i].id == mmap->id)
-                if (!ctx->priv->alg_priv->mmaps[i].base)
-                {
-                    ctx->priv->alg_priv->mmaps[i] = *mmap;
-                    res = VPX_CODEC_OK;
-                }
-
-            done &= (ctx->priv->alg_priv->mmaps[i].base != NULL);
-        }
-    }
-
-    if (done && !res)
-    {
-        vp8_finalize_mmaps(ctx->priv->alg_priv);
-        res = ctx->iface->init(ctx, NULL);
-    }
-
-    return res;
-}
-
 static vpx_codec_err_t image2yuvconfig(const vpx_image_t   *img,
                                        YV12_BUFFER_CONFIG  *yv12)
 {
@@ -922,8 +767,8 @@
     vp8_init,         /* vpx_codec_init_fn_t       init; */
     vp8_destroy,      /* vpx_codec_destroy_fn_t    destroy; */
     vp8_ctf_maps,     /* vpx_codec_ctrl_fn_map_t  *ctrl_maps; */
-    vp8_xma_get_mmap, /* vpx_codec_get_mmap_fn_t   get_mmap; */
-    vp8_xma_set_mmap, /* vpx_codec_set_mmap_fn_t   set_mmap; */
+    NOT_IMPLEMENTED,  /* vpx_codec_get_mmap_fn_t   get_mmap; */
+    NOT_IMPLEMENTED,  /* vpx_codec_set_mmap_fn_t   set_mmap; */
     {
         vp8_peek_si,      /* vpx_codec_peek_si_fn_t    peek_si; */
         vp8_get_si,       /* vpx_codec_get_si_fn_t     get_si; */
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 77f563f..20de434 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -120,7 +120,6 @@
   // frame header, 3 reset all contexts.
   int reset_frame_context;
 
-  int frame_flags;
   // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in
   // MODE_INFO (8-pixel) units.
   int MBs;
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index b455592..8a81554 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -13,7 +13,6 @@
 struct macroblock;
 struct vp9_variance_vtable;
 
-#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -758,20 +757,20 @@
 #
 # Motion search
 #
-add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv";
+add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
 specialize qw/vp9_full_search_sad sse3 sse4_1/;
 $vp9_full_search_sad_sse3=vp9_full_search_sadx3;
 $vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
 
-add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
 specialize qw/vp9_refining_search_sad sse3/;
 $vp9_refining_search_sad_sse3=vp9_refining_search_sadx4;
 
-add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
 specialize qw/vp9_diamond_search_sad sse3/;
 $vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4;
 
-add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
 specialize qw/vp9_full_range_search/;
 
 add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index fd74478..56dbc99 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -361,7 +361,7 @@
     // If multiple threads are used to decode tiles, then we use those threads
     // to do parallel loopfiltering.
     if (pbi->num_tile_workers) {
-      vp9_loop_filter_frame_mt(pbi, cm, &pbi->mb, cm->lf.filter_level, 0, 0);
+      vp9_loop_filter_frame_mt(pbi, cm, cm->lf.filter_level, 0, 0);
     } else {
       vp9_loop_filter_frame(cm, &pbi->mb, cm->lf.filter_level, 0, 0);
     }
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index c9dc251..b8250c2 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -49,9 +49,6 @@
 
   int decoded_key_frame;
 
-  int initial_width;
-  int initial_height;
-
   int do_loopfilter_inline;  // apply loopfilter to available rows immediately
   VP9Worker lf_worker;
 
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
index 9b124c9..9098063 100644
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -40,13 +40,13 @@
   const int nsync = lf_sync->sync_range;
 
   if (r && !(c & (nsync - 1))) {
-    mutex_lock(&lf_sync->mutex_[r - 1]);
+    pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1];
+    mutex_lock(mutex);
 
     while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
-      pthread_cond_wait(&lf_sync->cond_[r - 1],
-                        &lf_sync->mutex_[r - 1]);
+      pthread_cond_wait(&lf_sync->cond_[r - 1], mutex);
     }
-    pthread_mutex_unlock(&lf_sync->mutex_[r - 1]);
+    pthread_mutex_unlock(mutex);
   }
 #else
   (void)lf_sync;
@@ -94,21 +94,21 @@
                                 VP9LfSync *const lf_sync, int num_lf_workers) {
   const int num_planes = y_only ? 1 : MAX_MB_PLANE;
   int r, c;  // SB row and col
-  LOOP_FILTER_MASK lfm;
   const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
 
   for (r = start; r < stop; r += num_lf_workers) {
     const int mi_row = r << MI_BLOCK_SIZE_LOG2;
-    MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride;
+    MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
 
     for (c = 0; c < sb_cols; ++c) {
       const int mi_col = c << MI_BLOCK_SIZE_LOG2;
+      LOOP_FILTER_MASK lfm;
       int plane;
 
       sync_read(lf_sync, r, c);
 
       vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
-      vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, &lfm);
+      vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
 
       for (plane = 0; plane < num_planes; ++plane) {
         vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
@@ -134,9 +134,9 @@
 // threads.
 void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
                               VP9_COMMON *cm,
-                              MACROBLOCKD *xd,
                               int frame_filter_level,
                               int y_only, int partial_frame) {
+  VP9LfSync *const lf_sync = &pbi->lf_row_sync;
   // Number of superblock rows and cols
   const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
   const int tile_cols = 1 << cm->log2_tile_cols;
@@ -146,8 +146,6 @@
   // Allocate memory used in thread synchronization.
   // This always needs to be done even if frame_filter_level is 0.
   if (!cm->current_video_frame || cm->last_height != cm->height) {
-    VP9LfSync *const lf_sync = &pbi->lf_row_sync;
-
     if (cm->last_height != cm->height) {
       const int aligned_last_height =
           ALIGN_POWER_OF_TWO(cm->last_height, MI_SIZE_LOG2);
@@ -166,8 +164,7 @@
   vp9_loop_filter_frame_init(cm, frame_filter_level);
 
   // Initialize cur_sb_col to -1 for all SB rows.
-  vpx_memset(pbi->lf_row_sync.cur_sb_col, -1,
-             sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows);
+  vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
 
   // Set up loopfilter thread data.
   // The decoder is using num_workers instead of pbi->num_tile_workers
@@ -194,7 +191,7 @@
     lf_data->stop = sb_rows;
     lf_data->y_only = y_only;   // always do all planes in decoder
 
-    lf_data->lf_sync = &pbi->lf_row_sync;
+    lf_data->lf_sync = lf_sync;
     lf_data->num_lf_workers = num_workers;
 
     // Start loopfiltering
@@ -253,8 +250,12 @@
 
 // Deallocate lf synchronization related mutex and data
 void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) {
-#if CONFIG_MULTITHREAD
+#if !CONFIG_MULTITHREAD
+  (void)rows;
+#endif  // !CONFIG_MULTITHREAD
+
   if (lf_sync != NULL) {
+#if CONFIG_MULTITHREAD
     int i;
 
     if (lf_sync->mutex_ != NULL) {
@@ -269,17 +270,10 @@
       }
       vpx_free(lf_sync->cond_);
     }
-
+#endif  // CONFIG_MULTITHREAD
     vpx_free(lf_sync->cur_sb_col);
     // clear the structure as the source of this call may be a resize in which
     // case this call will be followed by an _alloc() which may fail.
-    vpx_memset(lf_sync, 0, sizeof(*lf_sync));
+    vp9_zero(*lf_sync);
   }
-#else
-  (void)rows;
-  if (lf_sync != NULL) {
-    vpx_free(lf_sync->cur_sb_col);
-    vpx_memset(lf_sync, 0, sizeof(*lf_sync));
-  }
-#endif  // CONFIG_MULTITHREAD
 }
diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h
index 005bd7b..8738cee 100644
--- a/vp9/decoder/vp9_dthread.h
+++ b/vp9/decoder/vp9_dthread.h
@@ -12,11 +12,9 @@
 #define VP9_DECODER_VP9_DTHREAD_H_
 
 #include "./vpx_config.h"
-#include "vp9/common/vp9_loopfilter.h"
 #include "vp9/decoder/vp9_reader.h"
 #include "vp9/decoder/vp9_thread.h"
 
-struct macroblockd;
 struct VP9Common;
 struct VP9Decoder;
 
@@ -43,16 +41,15 @@
 } VP9LfSync;
 
 // Allocate memory for loopfilter row synchronization.
-void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync,
+void vp9_loop_filter_alloc(struct VP9Common *cm, VP9LfSync *lf_sync,
                            int rows, int width);
 
 // Deallocate loopfilter synchronization related mutex and data.
-void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows);
+void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows);
 
 // Multi-threaded loopfilter that uses the tile threads.
 void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi,
                               struct VP9Common *cm,
-                              struct macroblockd *xd,
                               int frame_filter_level,
                               int y_only, int partial_frame);
 
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 7879091..e55881f 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -200,6 +200,7 @@
 
     // Rate target ratio to set q delta.
     const float rate_ratio_qdelta = 2.0;
+    const double q = vp9_convert_qindex_to_q(cm->base_qindex);
     vp9_clear_system_state();
     // Some of these parameters may be set via codec-control function later.
     cr->max_sbs_perframe = 10;
@@ -209,14 +210,12 @@
     // Set rate threshold to some fraction of target (and scaled by 256).
     cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 2;
     // Distortion threshold, quadratic in Q, scale factor to be adjusted.
-    cr->thresh_dist_sb = 8 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) *
-        vp9_convert_qindex_to_q(cm->base_qindex));
+    cr->thresh_dist_sb = 8 * (int)(q * q);
     if (cpi->sf.use_nonrd_pick_mode) {
       // May want to be more conservative with thresholds in non-rd mode for now
       // as rate/distortion are derived from model based on prediction residual.
       cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 3;
-      cr->thresh_dist_sb = 4 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) *
-          vp9_convert_qindex_to_q(cm->base_qindex));
+      cr->thresh_dist_sb = 4 * (int)(q * q);
     }
 
     cr->num_seg_blocks = 0;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 0cc50f7..70b70fe 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -243,8 +243,8 @@
   vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
 
   // R/D setup.
-  x->rddiv = cpi->RDDIV;
-  x->rdmult = cpi->RDMULT;
+  x->rddiv = cpi->rd.RDDIV;
+  x->rdmult = cpi->rd.RDMULT;
 
   // Setup segment ID.
   if (seg->enabled) {
@@ -819,6 +819,7 @@
                          int output_enabled) {
   int i, x_idx, y;
   VP9_COMMON *const cm = &cpi->common;
+  RD_OPT *const rd_opt = &cpi->rd;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   struct macroblock_plane *const p = x->plane;
@@ -904,7 +905,7 @@
 
   if (!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
     for (i = 0; i < TX_MODES; i++)
-      cpi->rd_tx_select_diff[i] += ctx->tx_rd_diff[i];
+      rd_opt->tx_select_diff[i] += ctx->tx_rd_diff[i];
   }
 
 #if CONFIG_INTERNAL_STATS
@@ -937,12 +938,12 @@
       }
     }
 
-    cpi->rd_comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
-    cpi->rd_comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
-    cpi->rd_comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
+    rd_opt->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
+    rd_opt->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
+    rd_opt->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
 
     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
-      cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
+      rd_opt->filter_diff[i] += ctx->best_filter_diff[i];
   }
 }
 
@@ -1354,6 +1355,25 @@
   }
 }
 
+static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8,
+  MODE_INFO **prev_mi_8x8) {
+  const int mis = cm->mi_stride;
+  int block_row, block_col;
+
+  for (block_row = 0; block_row < 8; ++block_row) {
+    for (block_col = 0; block_col < 8; ++block_col) {
+      MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col];
+      const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
+
+      if (prev_mi) {
+        const ptrdiff_t offset = prev_mi - cm->prev_mi;
+        mi_8x8[block_row * mis + block_col] = cm->mi + offset;
+        mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type;
+      }
+    }
+  }
+}
+
 static void constrain_copy_partitioning(VP9_COMP *const cpi,
                                         const TileInfo *const tile,
                                         MODE_INFO **mi_8x8,
@@ -1403,38 +1423,10 @@
     }
   } else {
     // Else this is a partial SB64, copy previous partition.
-    for (block_row = 0; block_row < 8; ++block_row) {
-      for (block_col = 0; block_col < 8; ++block_col) {
-        MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col];
-        const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
-        if (prev_mi) {
-          const ptrdiff_t offset = prev_mi - cm->prev_mi;
-          mi_8x8[block_row * mis + block_col] = cm->mi + offset;
-          mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type;
-        }
-      }
-    }
+    copy_partitioning(cm, mi_8x8, prev_mi_8x8);
   }
 }
 
-static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8,
-                              MODE_INFO **prev_mi_8x8) {
-  const int mis = cm->mi_stride;
-  int block_row, block_col;
-
-  for (block_row = 0; block_row < 8; ++block_row) {
-    for (block_col = 0; block_col < 8; ++block_col) {
-      MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col];
-      const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
-
-      if (prev_mi) {
-        const ptrdiff_t offset = prev_mi - cm->prev_mi;
-        mi_8x8[block_row * mis + block_col] = cm->mi + offset;
-        mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type;
-      }
-    }
-  }
-}
 
 const struct {
   int row;
@@ -1455,13 +1447,14 @@
                                            MODE_INFO **mi_8x8,
                                            int mi_row, int mi_col) {
   VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCK *x = &cpi->mb;
+  MACROBLOCK *const x = &cpi->mb;
   const int mis = cm->mi_stride;
-  int row8x8_remaining = tile->mi_row_end - mi_row;
-  int col8x8_remaining = tile->mi_col_end - mi_col;
-  int r, c;
+  const int row8x8_remaining = tile->mi_row_end - mi_row;
+  const int col8x8_remaining = tile->mi_col_end - mi_col;
   MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
 
+  vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+
   assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
 
   // In-image SB64
@@ -1473,8 +1466,8 @@
     const int pre_offset = (mi_row * MI_SIZE) * pre_stride +
                            (mi_col * MI_SIZE);
     const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset;
-    const int thr_32x32 = cpi->sf.source_var_thresh;
-    const int thr_64x64 = thr_32x32 << 1;
+    const unsigned int thr_32x32 = cpi->sf.source_var_thresh;
+    const unsigned int thr_64x64 = thr_32x32 << 1;
     int i, j;
     int index;
     diff d32[4];
@@ -1540,16 +1533,13 @@
     BLOCK_SIZE bsize = BLOCK_16X16;
     int bh = num_8x8_blocks_high_lookup[bsize];
     int bw = num_8x8_blocks_wide_lookup[bsize];
-
+    int r, c;
     for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
       for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
-        int index = r * mis + c;
-        // Find a partition size that fits
-        bsize = find_partition_size(bsize,
-                                    (row8x8_remaining - r),
-                                    (col8x8_remaining - c), &bh, &bw);
+        const int index = r * mis + c;
         mi_8x8[index] = mi_upper_left + index;
-        mi_8x8[index]->mbmi.sb_type = bsize;
+        mi_8x8[index]->mbmi.sb_type = find_partition_size(bsize,
+            row8x8_remaining - r, col8x8_remaining - c, &bh, &bw);
       }
     }
   }
@@ -2658,9 +2648,10 @@
     if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
       return ALLOW_32X32;
     } else if (cpi->sf.tx_size_search_method == USE_FULL_RD) {
+      const RD_OPT *const rd_opt = &cpi->rd;
       const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
-      return cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] >
-                 cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
+      return rd_opt->tx_select_threshes[frame_type][ALLOW_32X32] >
+                 rd_opt->tx_select_threshes[frame_type][TX_MODE_SELECT] ?
                      ALLOW_32X32 : TX_MODE_SELECT;
     } else {
       unsigned int total = 0;
@@ -3191,7 +3182,6 @@
                             1, &dummy_rate, &dummy_dist);
         break;
       case SOURCE_VAR_BASED_PARTITION:
-        set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
         set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col);
         nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                             1, &dummy_rate, &dummy_dist);
@@ -3222,8 +3212,23 @@
 }
 // end RTC play code
 
+static int get_skip_encode_frame(const VP9_COMMON *cm) {
+  unsigned int intra_count = 0, inter_count = 0;
+  int j;
+
+  for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
+    intra_count += cm->counts.intra_inter[j][0];
+    inter_count += cm->counts.intra_inter[j][1];
+  }
+
+  return (intra_count << 2) < inter_count &&
+         cm->frame_type != KEY_FRAME &&
+         cm->show_frame;
+}
+
 static void encode_frame_internal(VP9_COMP *cpi) {
   SPEED_FEATURES *const sf = &cpi->sf;
+  RD_OPT *const rd_opt = &cpi->rd;
   MACROBLOCK *const x = &cpi->mb;
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -3234,10 +3239,10 @@
   vp9_zero(cm->counts);
   vp9_zero(cpi->coef_counts);
   vp9_zero(cpi->tx_stepdown_count);
-  vp9_zero(cpi->rd_comp_pred_diff);
-  vp9_zero(cpi->rd_filter_diff);
-  vp9_zero(cpi->rd_tx_select_diff);
-  vp9_zero(cpi->rd_tx_select_threshes);
+  vp9_zero(rd_opt->comp_pred_diff);
+  vp9_zero(rd_opt->filter_diff);
+  vp9_zero(rd_opt->tx_select_diff);
+  vp9_zero(rd_opt->tx_select_threshes);
 
   cm->tx_mode = select_tx_mode(cpi);
 
@@ -3334,19 +3339,7 @@
     cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
   }
 
-  if (sf->skip_encode_sb) {
-    int j;
-    unsigned int intra_count = 0, inter_count = 0;
-    for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
-      intra_count += cm->counts.intra_inter[j][0];
-      inter_count += cm->counts.intra_inter[j][1];
-    }
-    sf->skip_encode_frame = (intra_count << 2) < inter_count &&
-                            cm->frame_type != KEY_FRAME &&
-                            cm->show_frame;
-  } else {
-    sf->skip_encode_frame = 0;
-  }
+  sf->skip_encode_frame = sf->skip_encode_sb ? get_skip_encode_frame(cm) : 0;
 
 #if 0
   // Keep record of the total distortion this time around for future use
@@ -3356,6 +3349,7 @@
 
 void vp9_encode_frame(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
+  RD_OPT *const rd_opt = &cpi->rd;
 
   // In the longer term the encoder should be generalized to match the
   // decoder such that we allow compound where one of the 3 buffers has a
@@ -3388,8 +3382,8 @@
     // that for subsequent frames.
     // It does the same analysis for transform size selection also.
     const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
-    const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type];
-    const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type];
+    const int64_t *mode_thresh = rd_opt->prediction_type_threshes[frame_type];
+    const int64_t *filter_thresh = rd_opt->filter_threshes[frame_type];
 
     /* prediction (compound, single or hybrid) mode selection */
     if (frame_type == ALTREF_FRAME || !cm->allow_comp_inter_inter)
@@ -3422,25 +3416,25 @@
     encode_frame_internal(cpi);
 
     for (i = 0; i < REFERENCE_MODES; ++i) {
-      const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs);
-      cpi->rd_prediction_type_threshes[frame_type][i] += diff;
-      cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
+      const int diff = (int) (rd_opt->comp_pred_diff[i] / cm->MBs);
+      rd_opt->prediction_type_threshes[frame_type][i] += diff;
+      rd_opt->prediction_type_threshes[frame_type][i] >>= 1;
     }
 
     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
-      const int64_t diff = cpi->rd_filter_diff[i] / cm->MBs;
-      cpi->rd_filter_threshes[frame_type][i] =
-          (cpi->rd_filter_threshes[frame_type][i] + diff) / 2;
+      const int64_t diff = rd_opt->filter_diff[i] / cm->MBs;
+      rd_opt->filter_threshes[frame_type][i] =
+          (rd_opt->filter_threshes[frame_type][i] + diff) / 2;
     }
 
     for (i = 0; i < TX_MODES; ++i) {
-      int64_t pd = cpi->rd_tx_select_diff[i];
+      int64_t pd = rd_opt->tx_select_diff[i];
       int diff;
       if (i == TX_MODE_SELECT)
         pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2048 * (TX_SIZES - 1), 0);
       diff = (int) (pd / cm->MBs);
-      cpi->rd_tx_select_threshes[frame_type][i] += diff;
-      cpi->rd_tx_select_threshes[frame_type][i] /= 2;
+      rd_opt->tx_select_threshes[frame_type][i] += diff;
+      rd_opt->tx_select_threshes[frame_type][i] /= 2;
     }
 
     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 5e98e4e..b0c014e 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -63,24 +63,17 @@
 }
 
 #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
-typedef struct vp9_token_state vp9_token_state;
 
-struct vp9_token_state {
+typedef struct vp9_token_state {
   int           rate;
   int           error;
   int           next;
   signed char   token;
   short         qc;
-};
+} vp9_token_state;
 
 // TODO(jimbankoski): experiment to find optimal RD numbers.
-#define Y1_RD_MULT 4
-#define UV_RD_MULT 2
-
-static const int plane_rd_mult[4] = {
-  Y1_RD_MULT,
-  UV_RD_MULT,
-};
+static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 };
 
 #define UPDATE_RD_COST()\
 {\
@@ -105,60 +98,56 @@
   return pt;
 }
 
-static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
-                       TX_SIZE tx_size, MACROBLOCK *mb,
-                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
+static int optimize_b(MACROBLOCK *mb, int plane, int block,
+                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int ctx) {
   MACROBLOCKD *const xd = &mb->e_mbd;
-  struct macroblock_plane *p = &mb->plane[plane];
-  struct macroblockd_plane *pd = &xd->plane[plane];
+  struct macroblock_plane *const p = &mb->plane[plane];
+  struct macroblockd_plane *const pd = &xd->plane[plane];
   const int ref = is_inter_block(&xd->mi[0]->mbmi);
   vp9_token_state tokens[1025][2];
   unsigned best_index[1025][2];
-  const int16_t *coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
-  int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
-  int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  int eob = p->eobs[block], final_eob, sz = 0;
-  const int i0 = 0;
-  int rc, x, next, i;
-  int64_t rdmult, rddiv, rd_cost0, rd_cost1;
-  int rate0, rate1, error0, error1, t0, t1;
-  int best, band, pt;
-  PLANE_TYPE type = pd->plane_type;
-  int err_mult = plane_rd_mult[type];
+  uint8_t token_cache[1024];
+  const int16_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
+  int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  const int eob = p->eobs[block];
+  const PLANE_TYPE type = pd->plane_type;
   const int default_eob = 16 << (tx_size << 1);
   const int mul = 1 + (tx_size == TX_32X32);
-  uint8_t token_cache[1024];
   const int16_t *dequant_ptr = pd->dequant;
   const uint8_t *const band_translate = get_band_translate(tx_size);
-  const scan_order *so = get_scan(xd, tx_size, type, block);
-  const int16_t *scan = so->scan;
-  const int16_t *nb = so->neighbors;
+  const scan_order *const so = get_scan(xd, tx_size, type, block);
+  const int16_t *const scan = so->scan;
+  const int16_t *const nb = so->neighbors;
+  int next = eob, sz = 0;
+  int64_t rdmult = mb->rdmult * plane_rd_mult[type], rddiv = mb->rddiv;
+  int64_t rd_cost0, rd_cost1;
+  int rate0, rate1, error0, error1, t0, t1;
+  int best, band, pt, i, final_eob;
 
   assert((!type && !plane) || (type && plane));
   assert(eob <= default_eob);
 
   /* Now set up a Viterbi trellis to evaluate alternative roundings. */
-  rdmult = mb->rdmult * err_mult;
-  if (!is_inter_block(&mb->e_mbd.mi[0]->mbmi))
+  if (!ref)
     rdmult = (rdmult * 9) >> 4;
-  rddiv = mb->rddiv;
+
   /* Initialize the sentinel node of the trellis. */
   tokens[eob][0].rate = 0;
   tokens[eob][0].error = 0;
   tokens[eob][0].next = default_eob;
   tokens[eob][0].token = EOB_TOKEN;
   tokens[eob][0].qc = 0;
-  *(tokens[eob] + 1) = *(tokens[eob] + 0);
-  next = eob;
+  tokens[eob][1] = tokens[eob][0];
+
   for (i = 0; i < eob; i++)
-    token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
-        qcoeff[scan[i]]].token];
+    token_cache[scan[i]] =
+        vp9_pt_energy_class[vp9_dct_value_tokens_ptr[qcoeff[scan[i]]].token];
 
-  for (i = eob; i-- > i0;) {
+  for (i = eob; i-- > 0;) {
     int base_bits, d2, dx;
-
-    rc = scan[i];
-    x = qcoeff[rc];
+    const int rc = scan[i];
+    int x = qcoeff[rc];
     /* Only add a trellis state for non-zero coefficients. */
     if (x) {
       int shortcut = 0;
@@ -172,17 +161,15 @@
       if (next < default_eob) {
         band = band_translate[i + 1];
         pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
-        rate0 +=
-          mb->token_costs[tx_size][type][ref][band][0][pt]
-                         [tokens[next][0].token];
-        rate1 +=
-          mb->token_costs[tx_size][type][ref][band][0][pt]
-                         [tokens[next][1].token];
+        rate0 += mb->token_costs[tx_size][type][ref][band][0][pt]
+                                [tokens[next][0].token];
+        rate1 += mb->token_costs[tx_size][type][ref][band][0][pt]
+                                [tokens[next][1].token];
       }
       UPDATE_RD_COST();
       /* And pick the best. */
       best = rd_cost1 < rd_cost0;
-      base_bits = *(vp9_dct_value_cost_ptr + x);
+      base_bits = vp9_dct_value_cost_ptr[x];
       dx = mul * (dqcoeff[rc] - coeff[rc]);
       d2 = dx * dx;
       tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
@@ -196,9 +183,9 @@
       rate0 = tokens[next][0].rate;
       rate1 = tokens[next][1].rate;
 
-      if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
-          (abs(x)*dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
-                                         dequant_ptr[rc != 0]))
+      if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
+          (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
+                                               dequant_ptr[rc != 0]))
         shortcut = 1;
       else
         shortcut = 0;
@@ -235,7 +222,7 @@
       UPDATE_RD_COST();
       /* And pick the best. */
       best = rd_cost1 < rd_cost0;
-      base_bits = *(vp9_dct_value_cost_ptr + x);
+      base_bits = vp9_dct_value_cost_ptr[x];
 
       if (shortcut) {
         dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
@@ -274,26 +261,26 @@
 
   /* Now pick the best path through the whole trellis. */
   band = band_translate[i + 1];
-  pt = combine_entropy_contexts(*a, *l);
   rate0 = tokens[next][0].rate;
   rate1 = tokens[next][1].rate;
   error0 = tokens[next][0].error;
   error1 = tokens[next][1].error;
   t0 = tokens[next][0].token;
   t1 = tokens[next][1].token;
-  rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0];
-  rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1];
+  rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0];
+  rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1];
   UPDATE_RD_COST();
   best = rd_cost1 < rd_cost0;
-  final_eob = i0 - 1;
+  final_eob = -1;
   vpx_memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
   vpx_memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
   for (i = next; i < eob; i = next) {
-    x = tokens[i][best].qc;
+    const int x = tokens[i][best].qc;
+    const int rc = scan[i];
     if (x) {
       final_eob = i;
     }
-    rc = scan[i];
+
     qcoeff[rc] = x;
     dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
 
@@ -303,7 +290,7 @@
   final_eob++;
 
   mb->plane[plane].eobs[block] = final_eob;
-  *a = *l = (final_eob > 0);
+  return final_eob;
 }
 
 static INLINE void fdct32x32(int rd_transform,
@@ -393,7 +380,8 @@
     vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 
   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
-    optimize_b(plane, block, plane_bsize, tx_size, x, a, l);
+    const int ctx = combine_entropy_contexts(*a, *l);
+    *a = *l = optimize_b(x, plane, block, plane_bsize, tx_size, ctx) > 0;
   } else {
     *a = *l = p->eobs[block] > 0;
   }
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index db32ef8..874767d 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -376,15 +376,12 @@
   }
 }
 
-static unsigned int zz_motion_search(const MACROBLOCK *x) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const uint8_t *const src = x->plane[0].src.buf;
-  const int src_stride = x->plane[0].src.stride;
-  const uint8_t *const ref = xd->plane[0].pre[0].buf;
-  const int ref_stride = xd->plane[0].pre[0].stride;
+static unsigned int get_prediction_error(BLOCK_SIZE bsize,
+                                         const struct buf_2d *src,
+                                         const struct buf_2d *ref) {
   unsigned int sse;
-  vp9_variance_fn_t fn = get_block_variance_fn(xd->mi[0]->mbmi.sb_type);
-  fn(src, src_stride, ref, ref_stride, &sse);
+  const vp9_variance_fn_t fn = get_block_variance_fn(bsize);
+  fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
   return sse;
 }
 
@@ -416,9 +413,7 @@
   // Center the initial step/diamond search on best mv.
   tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
                                     step_param,
-                                    x->sadperbit16, &num00, &v_fn_ptr,
-                                    x->nmvjointcost,
-                                    x->mvcost, ref_mv);
+                                    x->sadperbit16, &num00, &v_fn_ptr, ref_mv);
   if (tmp_err < INT_MAX)
     tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
   if (tmp_err < INT_MAX - new_mv_mode_penalty)
@@ -442,9 +437,7 @@
     } else {
       tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
                                         step_param + n, x->sadperbit16,
-                                        &num00, &v_fn_ptr,
-                                        x->nmvjointcost,
-                                        x->mvcost, ref_mv);
+                                        &num00, &v_fn_ptr, ref_mv);
       if (tmp_err < INT_MAX)
         tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
       if (tmp_err < INT_MAX - new_mv_mode_penalty)
@@ -632,7 +625,8 @@
         int_mv mv, tmp_mv;
 
         xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
-        motion_error = zz_motion_search(x);
+        motion_error = get_prediction_error(bsize, &x->plane[0].src,
+                                            &xd->plane[0].pre[0]);
         // Assume 0,0 motion with no mv overhead.
         mv.as_int = tmp_mv.as_int = 0;
 
@@ -668,7 +662,8 @@
           int gf_motion_error;
 
           xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
-          gf_motion_error = zz_motion_search(x);
+          gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
+                                                 &xd->plane[0].pre[0]);
 
           first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
                                    &gf_motion_error);
@@ -901,39 +896,38 @@
   return fclamp(pow(error_term, power_term), 0.05, 5.0);
 }
 
-int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
-                              int section_target_bandwitdh) {
-  int q;
-  const int num_mbs = cpi->common.MBs;
-  int target_norm_bits_per_mb;
+static int get_twopass_worst_quality(const VP9_COMP *cpi,
+                                     const FIRSTPASS_STATS *stats,
+                                     int section_target_bandwidth) {
   const RATE_CONTROL *const rc = &cpi->rc;
 
-  const double section_err = fpstats->coded_error / fpstats->count;
-  const double err_per_mb = section_err / num_mbs;
-  const double speed_term = 1.0 + ((double)cpi->speed * 0.04);
+  if (section_target_bandwidth <= 0) {
+    return rc->worst_quality;  // Highest value allowed
+  } else {
+    const int num_mbs = cpi->common.MBs;
+    const double section_err = stats->coded_error / stats->count;
+    const double err_per_mb = section_err / num_mbs;
+    const double speed_term = 1.0 + 0.04 * cpi->speed;
+    const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
+                                            BPER_MB_NORMBITS) / num_mbs;
+    int q;
 
-  if (section_target_bandwitdh <= 0)
-    return rc->worst_quality;          // Highest value allowed
+    // Try and pick a max Q that will be high enough to encode the
+    // content at the given rate.
+    for (q = rc->best_quality; q < rc->worst_quality; ++q) {
+      const double factor = calc_correction_factor(err_per_mb, ERR_DIVISOR,
+                                                   0.5, 0.90, q);
+      const int bits_per_mb = vp9_rc_bits_per_mb(INTER_FRAME, q,
+                                                 factor * speed_term);
+      if (bits_per_mb <= target_norm_bits_per_mb)
+        break;
+    }
 
-  target_norm_bits_per_mb =
-      ((uint64_t)section_target_bandwitdh << BPER_MB_NORMBITS) / num_mbs;
-
-  // Try and pick a max Q that will be high enough to encode the
-  // content at the given rate.
-  for (q = rc->best_quality; q < rc->worst_quality; ++q) {
-    const double err_correction_factor = calc_correction_factor(err_per_mb,
-                                             ERR_DIVISOR, 0.5, 0.90, q);
-    const int bits_per_mb_at_this_q =
-      vp9_rc_bits_per_mb(INTER_FRAME, q, (err_correction_factor * speed_term));
-    if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
-      break;
+    // Restriction on active max q for constrained quality mode.
+    if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
+      q = MAX(q, cpi->cq_target_quality);
+    return q;
   }
-
-  // Restriction on active max q for constrained quality mode.
-  if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
-    q = MAX(q, cpi->cq_target_quality);
-
-  return q;
 }
 
 extern void vp9_new_framerate(VP9_COMP *cpi, double framerate);
@@ -981,8 +975,6 @@
                                    oxcf->target_bandwidth / 10000000.0);
   }
 
-  cpi->output_framerate = oxcf->framerate;
-
   // Calculate a minimum intra value to be used in determining the IIratio
   // scores used in the second pass. We have this minimum to make sure
   // that clips that are static but "low complexity" in the intra domain
@@ -2187,7 +2179,7 @@
   VP9_COMMON *const cm = &cpi->common;
   if (!cpi->refresh_alt_ref_frame &&
       (cm->current_video_frame == 0 ||
-       (cm->frame_flags & FRAMEFLAGS_KEY))) {
+       (cpi->frame_flags & FRAMEFLAGS_KEY))) {
     cm->frame_type = KEY_FRAME;
   } else {
     cm->frame_type = INTER_FRAME;
@@ -2242,7 +2234,7 @@
     // Special case code for first frame.
     const int section_target_bandwidth = (int)(twopass->bits_left /
                                                frames_left);
-    const int tmp_q = vp9_twopass_worst_quality(cpi, &twopass->total_left_stats,
+    const int tmp_q = get_twopass_worst_quality(cpi, &twopass->total_left_stats,
                                                 section_target_bandwidth);
     twopass->active_worst_quality = tmp_q;
     rc->ni_av_qi = tmp_q;
@@ -2257,7 +2249,7 @@
 
   // Keyframe and section processing.
   if (rc->frames_to_key == 0 ||
-      (cm->frame_flags & FRAMEFLAGS_KEY)) {
+      (cpi->frame_flags & FRAMEFLAGS_KEY)) {
     // Define next KF group and assign bits to it.
     this_frame_copy = this_frame;
     find_next_key_frame(cpi, &this_frame_copy);
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 7a16c8f..f7ba423 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -91,8 +91,6 @@
 
 void vp9_init_second_pass(struct VP9_COMP *cpi);
 void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi);
-int vp9_twopass_worst_quality(struct VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
-                              int section_target_bandwitdh);
 
 // Post encode update of the rate control parameters for 2-pass
 void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 44b171f..a9da728 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -77,12 +77,12 @@
           INT_MAX);
 }
 
-static int do_16x16_motion_search(VP9_COMP *cpi, const int_mv *ref_mv,
+static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv,
                                   int_mv *dst_mv, int mb_row, int mb_col) {
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   unsigned int err, tmp_err;
-  int_mv tmp_mv;
+  MV tmp_mv;
 
   // Try zero MV first
   // FIXME should really use something like near/nearest MV and/or MV prediction
@@ -93,24 +93,22 @@
 
   // Test last reference frame using the previous best mv as the
   // starting point (best reference) for the search
-  tmp_err = do_16x16_motion_iteration(cpi, &ref_mv->as_mv, &tmp_mv.as_mv,
-                                      mb_row, mb_col);
+  tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col);
   if (tmp_err < err) {
     err = tmp_err;
-    dst_mv->as_int = tmp_mv.as_int;
+    dst_mv->as_mv = tmp_mv;
   }
 
   // If the current best reference mv is not centered on 0,0 then do a 0,0
   // based search as well.
-  if (ref_mv->as_int) {
+  if (ref_mv->row != 0 || ref_mv->col != 0) {
     unsigned int tmp_err;
-    int_mv zero_ref_mv, tmp_mv;
+    MV zero_ref_mv = {0, 0}, tmp_mv;
 
-    zero_ref_mv.as_int = 0;
-    tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv.as_mv, &tmp_mv.as_mv,
+    tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv,
                                         mb_row, mb_col);
     if (tmp_err < err) {
-      dst_mv->as_int = tmp_mv.as_int;
+      dst_mv->as_mv = tmp_mv;
       err = tmp_err;
     }
   }
@@ -173,7 +171,7 @@
   YV12_BUFFER_CONFIG *buf,
   int mb_y_offset,
   YV12_BUFFER_CONFIG *golden_ref,
-  int_mv *prev_golden_ref_mv,
+  const MV *prev_golden_ref_mv,
   YV12_BUFFER_CONFIG *alt_ref,
   int mb_row,
   int mb_col
@@ -239,13 +237,11 @@
 
   int mb_col, mb_row, offset = 0;
   int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
-  int_mv arf_top_mv, gld_top_mv;
+  MV arf_top_mv = {0, 0}, gld_top_mv = {0, 0};
   MODE_INFO mi_local = { { 0 } };
 
   // Set up limit values for motion vectors to prevent them extending outside
   // the UMV borders.
-  arf_top_mv.as_int = 0;
-  gld_top_mv.as_int = 0;
   x->mv_row_min     = -BORDER_MV_PIXELS_B16;
   x->mv_row_max     = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
   xd->up_available  = 0;
@@ -258,15 +254,13 @@
   mi_local.mbmi.ref_frame[1] = NONE;
 
   for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
-    int_mv arf_left_mv, gld_left_mv;
+    MV arf_left_mv = arf_top_mv, gld_left_mv = gld_top_mv;
     int mb_y_in_offset  = mb_y_offset;
     int arf_y_in_offset = arf_y_offset;
     int gld_y_in_offset = gld_y_offset;
 
     // Set up limit values for motion vectors to prevent them extending outside
     // the UMV borders.
-    arf_left_mv.as_int = arf_top_mv.as_int;
-    gld_left_mv.as_int = gld_top_mv.as_int;
     x->mv_col_min      = -BORDER_MV_PIXELS_B16;
     x->mv_col_max      = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
     xd->left_available = 0;
@@ -277,11 +271,11 @@
       update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset,
                               golden_ref, &gld_left_mv, alt_ref,
                               mb_row, mb_col);
-      arf_left_mv.as_int = mb_stats->ref[ALTREF_FRAME].m.mv.as_int;
-      gld_left_mv.as_int = mb_stats->ref[GOLDEN_FRAME].m.mv.as_int;
+      arf_left_mv = mb_stats->ref[ALTREF_FRAME].m.mv.as_mv;
+      gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv;
       if (mb_col == 0) {
-        arf_top_mv.as_int = arf_left_mv.as_int;
-        gld_top_mv.as_int = gld_left_mv.as_int;
+        arf_top_mv = arf_left_mv;
+        gld_top_mv = gld_left_mv;
       }
       xd->left_available = 1;
       mb_y_in_offset    += 16;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index f7a02a4..8a79011 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -66,7 +66,7 @@
 }
 
 static INLINE int mv_cost(const MV *mv,
-                          const int *joint_cost, int *comp_cost[2]) {
+                          const int *joint_cost, int *const comp_cost[2]) {
   return joint_cost[vp9_get_mv_joint(mv)] +
              comp_cost[0][mv->row] + comp_cost[1][mv->col];
 }
@@ -90,14 +90,13 @@
   return 0;
 }
 
-static int mvsad_err_cost(const MV *mv, const MV *ref,
-                          const int *mvjsadcost, int *mvsadcost[2],
+static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
                           int error_per_bit) {
-  if (mvsadcost) {
+  if (x->nmvsadcost) {
     const MV diff = { mv->row - ref->row,
                       mv->col - ref->col };
-    return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) *
-                                  error_per_bit, 8);
+    return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost,
+                                      x->nmvsadcost) * error_per_bit, 8);
   }
   return 0;
 }
@@ -170,14 +169,13 @@
   return (x & 7) << 1;
 }
 
-static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c,
-                                 int offset) {
-  return &buf[(r >> 3) * stride + (c >> 3) - offset];
+static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
+  return &buf[(r >> 3) * stride + (c >> 3)];
 }
 
 /* returns subpixel variance error function */
 #define DIST(r, c) \
-    vfp->svf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), z, \
+    vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
              src_stride, &sse)
 
 /* checks if (r, c) has better score than previous best */
@@ -270,7 +268,7 @@
                                  int *mvjcost, int *mvcost[2],
                                  int *distortion,
                                  unsigned int *sse1) {
-  const uint8_t *z = x->plane[0].src.buf;
+  const uint8_t *const z = x->plane[0].src.buf;
   const int src_stride = x->plane[0].src.stride;
   const MACROBLOCKD *xd = &x->e_mbd;
   unsigned int besterr = INT_MAX;
@@ -283,7 +281,7 @@
 
   const int y_stride = xd->plane[0].pre[0].stride;
   const int offset = bestmv->row * y_stride + bestmv->col;
-  const uint8_t *y = xd->plane[0].pre[0].buf + offset;
+  const uint8_t *const y = xd->plane[0].pre[0].buf;
 
   int rr = ref_mv->row;
   int rc = ref_mv->col;
@@ -303,7 +301,7 @@
   bestmv->col *= 8;
 
   // calculate central point error
-  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
+  besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
   *distortion = besterr;
   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
 
@@ -353,7 +351,7 @@
 #undef DIST
 /* returns subpixel variance error function */
 #define DIST(r, c) \
-    vfp->svaf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), \
+    vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
               z, src_stride, &sse, second_pred)
 
 int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
@@ -368,7 +366,7 @@
                                       unsigned int *sse1,
                                       const uint8_t *second_pred,
                                       int w, int h) {
-  const uint8_t *z = x->plane[0].src.buf;
+  const uint8_t *const z = x->plane[0].src.buf;
   const int src_stride = x->plane[0].src.stride;
   const MACROBLOCKD *xd = &x->e_mbd;
   unsigned int besterr = INT_MAX;
@@ -382,7 +380,7 @@
   DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
   const int y_stride = xd->plane[0].pre[0].stride;
   const int offset = bestmv->row * y_stride + bestmv->col;
-  const uint8_t *y = xd->plane[0].pre[0].buf + offset;
+  const uint8_t *const y = xd->plane[0].pre[0].buf;
 
   int rr = ref_mv->row;
   int rc = ref_mv->col;
@@ -404,7 +402,7 @@
   // calculate central point error
   // TODO(yunqingwang): central pointer error was already calculated in full-
   // pixel search, and can be passed in this function.
-  vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
+  vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
   besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
   *distortion = besterr;
   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -479,8 +477,7 @@
   {\
     if (thissad < bestsad) {\
       if (use_mvcost) \
-        thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \
-                                  mvjsadcost, mvsadcost, sad_per_bit);\
+        thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);\
       if (thissad < bestsad) {\
         bestsad = thissad;\
         best_site = i;\
@@ -520,9 +517,6 @@
   int k = -1;
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   int best_init_s = search_param_to_steps[search_param];
-  const int *const mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
   // adjust ref_mv to make sure it is within MV range
   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   br = ref_mv->row;
@@ -531,8 +525,8 @@
   // Work out the start point for the search
   bestsad = vfp->sdf(what->buf, what->stride,
                      get_buf_from_mv(in_what, ref_mv), in_what->stride,
-                     0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv,
-                         mvjsadcost, mvsadcost, sad_per_bit);
+                     0x7fffffff) + mvsad_err_cost(x, ref_mv, &fcenter_mv,
+                                                  sad_per_bit);
 
   // Search all possible scales upto the search param around the center point
   // pick the scale of the point that is best as the starting scale of
@@ -880,7 +874,6 @@
 int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
                             int search_param, int sad_per_bit, int *num00,
                             const vp9_variance_fn_ptr_t *fn_ptr,
-                            int *mvjcost, int *mvcost[2],
                             const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const uint8_t *what = x->plane[0].src.buf;
@@ -893,10 +886,6 @@
 
   unsigned int thissad;
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
   int tr, tc;
   int best_tr = 0;
   int best_tc = 0;
@@ -918,8 +907,7 @@
 
   // Check the starting position
   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
-                + mvsad_err_cost(best_mv, &fcenter_mv,
-                                 mvjsadcost, mvsadcost, sad_per_bit);
+                + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
   start_row = MAX(-range, x->mv_row_min - ref_row);
   start_col = MAX(-range, x->mv_col_min - ref_col);
@@ -940,8 +928,7 @@
           if (sad_array[i] < bestsad) {
             const MV this_mv = {ref_row + tr, ref_col + tc + i};
             thissad = sad_array[i] +
-                      mvsad_err_cost(&this_mv, &fcenter_mv,
-                                      mvjsadcost, mvsadcost, sad_per_bit);
+                      mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
             if (thissad < bestsad) {
               bestsad = thissad;
               best_tr = tr;
@@ -957,8 +944,7 @@
 
           if (thissad < bestsad) {
             const MV this_mv = {ref_row + tr, ref_col + tc + i};
-            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                      mvjsadcost, mvsadcost, sad_per_bit);
+            thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
 
             if (thissad < bestsad) {
               bestsad = thissad;
@@ -979,7 +965,6 @@
                              MV *ref_mv, MV *best_mv,
                              int search_param, int sad_per_bit, int *num00,
                              const vp9_variance_fn_ptr_t *fn_ptr,
-                             int *mvjcost, int *mvcost[2],
                              const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const struct buf_2d *const what = &x->plane[0].src;
@@ -991,23 +976,22 @@
   const search_site *const ss = &x->ss[search_param * x->searches_per_step];
   const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-  const uint8_t *best_address;
+  const uint8_t *best_address, *in_what_ref;
   int best_sad = INT_MAX;
   int best_site = 0;
   int last_site = 0;
   int i, j, step;
 
   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
-  best_address = get_buf_from_mv(in_what, ref_mv);
+  in_what_ref = get_buf_from_mv(in_what, ref_mv);
+  best_address = in_what_ref;
   *num00 = 0;
   *best_mv = *ref_mv;
 
   // Check the starting position
   best_sad = fn_ptr->sdf(what->buf, what->stride,
-                        in_what->buf, in_what->stride, 0x7fffffff) +
-      mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
+                         best_address, in_what->stride, 0x7fffffff) +
+      mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
   i = 1;
 
@@ -1020,8 +1004,7 @@
                              best_address + ss[i].offset, in_what->stride,
                              best_sad);
         if (sad < best_sad) {
-          sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
-                                sad_per_bit);
+          sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
           if (sad < best_sad) {
             best_sad = sad;
             best_site = i;
@@ -1046,8 +1029,7 @@
                                 best_address + ss[best_site].offset,
                                 in_what->stride, best_sad);
           if (sad < best_sad) {
-            sad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                  mvjsadcost, mvsadcost, sad_per_bit);
+            sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
             if (sad < best_sad) {
               best_sad = sad;
               best_mv->row += ss[best_site].mv.row;
@@ -1060,7 +1042,7 @@
         break;
       };
 #endif
-    } else if (best_address == in_what->buf) {
+    } else if (best_address == in_what_ref) {
       (*num00)++;
     }
   }
@@ -1071,7 +1053,6 @@
                              MV *ref_mv, MV *best_mv, int search_param,
                              int sad_per_bit, int *num00,
                              const vp9_variance_fn_ptr_t *fn_ptr,
-                             int *mvjcost, int *mvcost[2],
                              const MV *center_mv) {
   int i, j, step;
 
@@ -1098,10 +1079,6 @@
   const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
 
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   ref_row = ref_mv->row;
   ref_col = ref_mv->col;
@@ -1115,8 +1092,7 @@
 
   // Check the starting position
   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
-                + mvsad_err_cost(best_mv, &fcenter_mv,
-                                 mvjsadcost, mvsadcost, sad_per_bit);
+                + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
   i = 1;
 
@@ -1149,9 +1125,8 @@
           if (sad_array[t] < bestsad) {
             const MV this_mv = {best_mv->row + ss[i].mv.row,
                                 best_mv->col + ss[i].mv.col};
-            sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                           mvjsadcost, mvsadcost, sad_per_bit);
-
+            sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv,
+                                           sad_per_bit);
             if (sad_array[t] < bestsad) {
               bestsad = sad_array[t];
               best_site = i;
@@ -1171,9 +1146,7 @@
                                              in_what_stride, bestsad);
 
           if (thissad < bestsad) {
-            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                      mvjsadcost, mvsadcost, sad_per_bit);
-
+            thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
             if (thissad < bestsad) {
               bestsad = thissad;
               best_site = i;
@@ -1231,8 +1204,7 @@
   int thissme, n, num00 = 0;
   int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
                                         step_param, sadpb, &n,
-                                        fn_ptr, x->nmvjointcost,
-                                        x->mvcost, ref_mv);
+                                        fn_ptr, ref_mv);
   if (bestsme < INT_MAX)
     bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
   *dst_mv = temp_mv;
@@ -1250,8 +1222,7 @@
     } else {
       thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
                                         step_param + n, sadpb, &num00,
-                                        fn_ptr, x->nmvjointcost, x->mvcost,
-                                        ref_mv);
+                                        fn_ptr, ref_mv);
       if (thissme < INT_MAX)
         thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
 
@@ -1271,8 +1242,7 @@
     const int search_range = 8;
     MV best_mv = *dst_mv;
     thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
-                                       fn_ptr, x->nmvjointcost, x->mvcost,
-                                       ref_mv);
+                                       fn_ptr, ref_mv);
     if (thissme < INT_MAX)
       thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
     if (thissme < bestsme) {
@@ -1286,7 +1256,6 @@
 int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
                           int sad_per_bit, int distance,
                           const vp9_variance_fn_ptr_t *fn_ptr,
-                          int *mvjcost, int *mvcost[2],
                           const MV *center_mv, MV *best_mv) {
   int r, c;
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -1296,12 +1265,10 @@
   const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
   const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
   const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   int best_sad = fn_ptr->sdf(what->buf, what->stride,
       get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
-      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
+      mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
   *best_mv = *ref_mv;
 
   for (r = row_min; r < row_max; ++r) {
@@ -1309,9 +1276,7 @@
       const MV mv = {r, c};
       const int sad = fn_ptr->sdf(what->buf, what->stride,
           get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) +
-          mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
-                         sad_per_bit);
-
+              mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
       if (sad < best_sad) {
         best_sad = sad;
         *best_mv = mv;
@@ -1324,7 +1289,6 @@
 int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
                           int sad_per_bit, int distance,
                           const vp9_variance_fn_ptr_t *fn_ptr,
-                          int *mvjcost, int *mvcost[2],
                           const MV *center_mv, MV *best_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const uint8_t *const what = x->plane[0].src.buf;
@@ -1346,8 +1310,6 @@
   const int col_max = MIN(ref_col + distance, x->mv_col_max);
   unsigned int sad_array[3];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
 
   // Work out the mid point for the search
   const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
@@ -1358,8 +1320,7 @@
   // Baseline value at the centre
   bestsad = fn_ptr->sdf(what, what_stride,
                         bestaddress, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv,
-                             mvjsadcost, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
   for (r = row_min; r < row_max; r++) {
     const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
@@ -1376,9 +1337,7 @@
 
         if (thissad < bestsad) {
           this_mv.col = c;
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                    mvjsadcost, mvsadcost, sad_per_bit);
-
+          thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
           if (thissad < bestsad) {
             bestsad = thissad;
             best_mv->row = r;
@@ -1396,9 +1355,7 @@
 
       if (thissad < bestsad) {
         this_mv.col = c;
-        thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                   mvjsadcost, mvsadcost, sad_per_bit);
-
+        thissad  += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
         if (thissad < bestsad) {
           bestsad = thissad;
           best_mv->row = r;
@@ -1416,7 +1373,6 @@
 int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
                           int sad_per_bit, int distance,
                           const vp9_variance_fn_ptr_t *fn_ptr,
-                          int *mvjcost, int *mvcost[2],
                           const MV *center_mv, MV *best_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const uint8_t *const what = x->plane[0].src.buf;
@@ -1439,9 +1395,6 @@
   unsigned int sad_array[3];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
 
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
   // Work out the mid point for the search
   const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
 
@@ -1451,8 +1404,7 @@
   // Baseline value at the center
   bestsad = fn_ptr->sdf(what, what_stride,
                         bestaddress, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv,
-                             mvjsadcost, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
   for (r = row_min; r < row_max; r++) {
     const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
@@ -1469,9 +1421,7 @@
 
         if (thissad < bestsad) {
           this_mv.col = c;
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                    mvjsadcost, mvsadcost, sad_per_bit);
-
+          thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
           if (thissad < bestsad) {
             bestsad = thissad;
             best_mv->row = r;
@@ -1494,9 +1444,7 @@
 
         if (thissad < bestsad) {
           this_mv.col = c;
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                    mvjsadcost, mvsadcost, sad_per_bit);
-
+          thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
           if (thissad < bestsad) {
             bestsad = thissad;
             best_mv->row = r;
@@ -1515,9 +1463,7 @@
 
       if (thissad < bestsad) {
         this_mv.col = c;
-        thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                  mvjsadcost, mvsadcost, sad_per_bit);
-
+        thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
         if (thissad < bestsad) {
           bestsad = thissad;
           best_mv->row = r;
@@ -1536,20 +1482,16 @@
                               MV *ref_mv, int error_per_bit,
                               int search_range,
                               const vp9_variance_fn_ptr_t *fn_ptr,
-                              int *mvjcost, int *mvcost[2],
                               const MV *center_mv) {
   const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
   const MACROBLOCKD *const xd = &x->e_mbd;
   const struct buf_2d *const what = &x->plane[0].src;
   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
   unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
                                      get_buf_from_mv(in_what, ref_mv),
                                      in_what->stride, 0x7fffffff) +
-      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+      mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   int i, j;
 
   for (i = 0; i < search_range; i++) {
@@ -1562,8 +1504,7 @@
         unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
             get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
         if (sad < best_sad) {
-          sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
-                                error_per_bit);
+          sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
           if (sad < best_sad) {
             best_sad = sad;
             best_site = j;
@@ -1586,19 +1527,16 @@
                               MV *ref_mv, int error_per_bit,
                               int search_range,
                               const vp9_variance_fn_ptr_t *fn_ptr,
-                              int *mvjcost, int *mvcost[2],
                               const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
   const struct buf_2d *const what = &x->plane[0].src;
   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
   unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address,
                                     in_what->stride, 0x7fffffff) +
-      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+      mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   int i, j;
 
   for (i = 0; i < search_range; i++) {
@@ -1623,9 +1561,7 @@
         if (sads[j] < best_sad) {
           const MV mv = {ref_mv->row + neighbors[j].row,
                          ref_mv->col + neighbors[j].col};
-          sads[j] += mvsad_err_cost(&mv, &fcenter_mv,
-                                         mvjsadcost, mvsadcost, error_per_bit);
-
+          sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
           if (sads[j] < best_sad) {
             best_sad = sads[j];
             best_site = j;
@@ -1642,9 +1578,7 @@
                                          get_buf_from_mv(in_what, &mv),
                                          in_what->stride, best_sad);
           if (sad < best_sad) {
-            sad += mvsad_err_cost(&mv, &fcenter_mv,
-                                  mvjsadcost, mvsadcost, error_per_bit);
-
+            sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
             if (sad < best_sad) {
               best_sad = sad;
               best_site = j;
@@ -1672,7 +1606,6 @@
                              MV *ref_mv, int error_per_bit,
                              int search_range,
                              const vp9_variance_fn_ptr_t *fn_ptr,
-                             int *mvjcost, int *mvcost[2],
                              const MV *center_mv,
                              const uint8_t *second_pred, int w, int h) {
   const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
@@ -1681,12 +1614,10 @@
   const struct buf_2d *const what = &x->plane[0].src;
   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
       get_buf_from_mv(in_what, ref_mv), in_what->stride,
       second_pred, 0x7fffffff) +
-      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+      mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   int i, j;
 
   for (i = 0; i < search_range; ++i) {
@@ -1701,8 +1632,7 @@
             get_buf_from_mv(in_what, &mv), in_what->stride,
             second_pred, best_sad);
         if (sad < best_sad) {
-          sad += mvsad_err_cost(&mv, &fcenter_mv,
-                                    mvjsadcost, mvsadcost, error_per_bit);
+          sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
           if (sad < best_sad) {
             best_sad = sad;
             best_site = j;
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index f7b7c5e..70d7985 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -110,14 +110,12 @@
                                     const MV *ref_mv, int sad_per_bit,
                                     int distance,
                                     const vp9_variance_fn_ptr_t *fn_ptr,
-                                    int *mvjcost, int *mvcost[2],
                                     const MV *center_mv, MV *best_mv);
 
 typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x,
                                         MV *ref_mv, int sad_per_bit,
                                         int distance,
                                         const vp9_variance_fn_ptr_t *fn_ptr,
-                                        int *mvjcost, int *mvcost[2],
                                         const MV *center_mv);
 
 typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x,
@@ -125,14 +123,12 @@
                                        int search_param, int sad_per_bit,
                                        int *num00,
                                        const vp9_variance_fn_ptr_t *fn_ptr,
-                                       int *mvjcost, int *mvcost[2],
                                        const MV *center_mv);
 
 int vp9_refining_search_8p_c(const MACROBLOCK *x,
                              MV *ref_mv, int error_per_bit,
                              int search_range,
                              const vp9_variance_fn_ptr_t *fn_ptr,
-                             int *mvjcost, int *mvcost[2],
                              const MV *center_mv, const uint8_t *second_pred,
                              int w, int h);
 #ifdef __cplusplus
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 0ac9d5f..03f3c87 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -61,11 +61,6 @@
                                          // now so that HIGH_PRECISION is always
                                          // chosen.
 
-// Max rate target for 1080P and below encodes under normal circumstances
-// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
-#define MAX_MB_RATE 250
-#define MAXRATE_1080P 2025000
-
 // #define OUTPUT_YUV_REC
 
 #ifdef OUTPUT_YUV_SRC
@@ -386,117 +381,119 @@
 
 static void set_rd_speed_thresholds(VP9_COMP *cpi) {
   int i;
+  RD_OPT *const rd = &cpi->rd;
 
   // Set baseline threshold values
   for (i = 0; i < MAX_MODES; ++i)
-  cpi->rd_thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
+    rd->thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
 
-  cpi->rd_thresh_mult[THR_NEARESTMV] = 0;
-  cpi->rd_thresh_mult[THR_NEARESTG] = 0;
-  cpi->rd_thresh_mult[THR_NEARESTA] = 0;
+  rd->thresh_mult[THR_NEARESTMV] = 0;
+  rd->thresh_mult[THR_NEARESTG] = 0;
+  rd->thresh_mult[THR_NEARESTA] = 0;
 
-  cpi->rd_thresh_mult[THR_DC] += 1000;
+  rd->thresh_mult[THR_DC] += 1000;
 
-  cpi->rd_thresh_mult[THR_NEWMV] += 1000;
-  cpi->rd_thresh_mult[THR_NEWA] += 1000;
-  cpi->rd_thresh_mult[THR_NEWG] += 1000;
+  rd->thresh_mult[THR_NEWMV] += 1000;
+  rd->thresh_mult[THR_NEWA] += 1000;
+  rd->thresh_mult[THR_NEWG] += 1000;
 
-  cpi->rd_thresh_mult[THR_NEARMV] += 1000;
-  cpi->rd_thresh_mult[THR_NEARA] += 1000;
-  cpi->rd_thresh_mult[THR_COMP_NEARESTLA] += 1000;
-  cpi->rd_thresh_mult[THR_COMP_NEARESTGA] += 1000;
+  rd->thresh_mult[THR_NEARMV] += 1000;
+  rd->thresh_mult[THR_NEARA] += 1000;
+  rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
+  rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
 
-  cpi->rd_thresh_mult[THR_TM] += 1000;
+  rd->thresh_mult[THR_TM] += 1000;
 
-  cpi->rd_thresh_mult[THR_COMP_NEARLA] += 1500;
-  cpi->rd_thresh_mult[THR_COMP_NEWLA] += 2000;
-  cpi->rd_thresh_mult[THR_NEARG] += 1000;
-  cpi->rd_thresh_mult[THR_COMP_NEARGA] += 1500;
-  cpi->rd_thresh_mult[THR_COMP_NEWGA] += 2000;
+  rd->thresh_mult[THR_COMP_NEARLA] += 1500;
+  rd->thresh_mult[THR_COMP_NEWLA] += 2000;
+  rd->thresh_mult[THR_NEARG] += 1000;
+  rd->thresh_mult[THR_COMP_NEARGA] += 1500;
+  rd->thresh_mult[THR_COMP_NEWGA] += 2000;
 
-  cpi->rd_thresh_mult[THR_ZEROMV] += 2000;
-  cpi->rd_thresh_mult[THR_ZEROG] += 2000;
-  cpi->rd_thresh_mult[THR_ZEROA] += 2000;
-  cpi->rd_thresh_mult[THR_COMP_ZEROLA] += 2500;
-  cpi->rd_thresh_mult[THR_COMP_ZEROGA] += 2500;
+  rd->thresh_mult[THR_ZEROMV] += 2000;
+  rd->thresh_mult[THR_ZEROG] += 2000;
+  rd->thresh_mult[THR_ZEROA] += 2000;
+  rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
+  rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
 
-  cpi->rd_thresh_mult[THR_H_PRED] += 2000;
-  cpi->rd_thresh_mult[THR_V_PRED] += 2000;
-  cpi->rd_thresh_mult[THR_D45_PRED ] += 2500;
-  cpi->rd_thresh_mult[THR_D135_PRED] += 2500;
-  cpi->rd_thresh_mult[THR_D117_PRED] += 2500;
-  cpi->rd_thresh_mult[THR_D153_PRED] += 2500;
-  cpi->rd_thresh_mult[THR_D207_PRED] += 2500;
-  cpi->rd_thresh_mult[THR_D63_PRED] += 2500;
+  rd->thresh_mult[THR_H_PRED] += 2000;
+  rd->thresh_mult[THR_V_PRED] += 2000;
+  rd->thresh_mult[THR_D45_PRED ] += 2500;
+  rd->thresh_mult[THR_D135_PRED] += 2500;
+  rd->thresh_mult[THR_D117_PRED] += 2500;
+  rd->thresh_mult[THR_D153_PRED] += 2500;
+  rd->thresh_mult[THR_D207_PRED] += 2500;
+  rd->thresh_mult[THR_D63_PRED] += 2500;
 
   /* disable frame modes if flags not set */
   if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
-    cpi->rd_thresh_mult[THR_NEWMV    ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_NEARESTMV] = INT_MAX;
-    cpi->rd_thresh_mult[THR_ZEROMV   ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_NEARMV   ] = INT_MAX;
+    rd->thresh_mult[THR_NEWMV    ] = INT_MAX;
+    rd->thresh_mult[THR_NEARESTMV] = INT_MAX;
+    rd->thresh_mult[THR_ZEROMV   ] = INT_MAX;
+    rd->thresh_mult[THR_NEARMV   ] = INT_MAX;
   }
   if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
-    cpi->rd_thresh_mult[THR_NEARESTG ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_ZEROG    ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_NEARG    ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_NEWG     ] = INT_MAX;
+    rd->thresh_mult[THR_NEARESTG ] = INT_MAX;
+    rd->thresh_mult[THR_ZEROG    ] = INT_MAX;
+    rd->thresh_mult[THR_NEARG    ] = INT_MAX;
+    rd->thresh_mult[THR_NEWG     ] = INT_MAX;
   }
   if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
-    cpi->rd_thresh_mult[THR_NEARESTA ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_ZEROA    ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_NEARA    ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_NEWA     ] = INT_MAX;
+    rd->thresh_mult[THR_NEARESTA ] = INT_MAX;
+    rd->thresh_mult[THR_ZEROA    ] = INT_MAX;
+    rd->thresh_mult[THR_NEARA    ] = INT_MAX;
+    rd->thresh_mult[THR_NEWA     ] = INT_MAX;
   }
 
   if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
       (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
-    cpi->rd_thresh_mult[THR_COMP_ZEROLA   ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
-    cpi->rd_thresh_mult[THR_COMP_NEARLA   ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_COMP_NEWLA    ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_ZEROLA   ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEARLA   ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEWLA    ] = INT_MAX;
   }
   if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
       (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
-    cpi->rd_thresh_mult[THR_COMP_ZEROGA   ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
-    cpi->rd_thresh_mult[THR_COMP_NEARGA   ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_COMP_NEWGA    ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_ZEROGA   ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEARGA   ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEWGA    ] = INT_MAX;
   }
 }
 
 static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
   const SPEED_FEATURES *const sf = &cpi->sf;
+  RD_OPT *const rd = &cpi->rd;
   int i;
 
   for (i = 0; i < MAX_REFS; ++i)
-    cpi->rd_thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode)  ? -500 : 0;
+    rd->thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode)  ? -500 : 0;
 
-  cpi->rd_thresh_mult_sub8x8[THR_LAST] += 2500;
-  cpi->rd_thresh_mult_sub8x8[THR_GOLD] += 2500;
-  cpi->rd_thresh_mult_sub8x8[THR_ALTR] += 2500;
-  cpi->rd_thresh_mult_sub8x8[THR_INTRA] += 2500;
-  cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] += 4500;
-  cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] += 4500;
+  rd->thresh_mult_sub8x8[THR_LAST] += 2500;
+  rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
+  rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
+  rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
+  rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
+  rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
 
   // Check for masked out split cases.
   for (i = 0; i < MAX_REFS; i++)
     if (sf->disable_split_mask & (1 << i))
-      cpi->rd_thresh_mult_sub8x8[i] = INT_MAX;
+      rd->thresh_mult_sub8x8[i] = INT_MAX;
 
   // disable mode test if frame flag is not set
   if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
-    cpi->rd_thresh_mult_sub8x8[THR_LAST] = INT_MAX;
+    rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
   if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
-    cpi->rd_thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
+    rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
   if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
-    cpi->rd_thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
+    rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
   if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
       (VP9_LAST_FLAG | VP9_ALT_FLAG))
-    cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
+    rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
   if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
       (VP9_GOLD_FLAG | VP9_ALT_FLAG))
-    cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
+    rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
 }
 
 static void set_speed_features(VP9_COMP *cpi) {
@@ -627,50 +624,8 @@
 }
 
 void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
-  VP9_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
-  VP9_CONFIG *const oxcf = &cpi->oxcf;
-  int vbr_max_bits;
-
-  oxcf->framerate = framerate < 0.1 ? 30 : framerate;
-  cpi->output_framerate = cpi->oxcf.framerate;
-  rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth /
-                                     cpi->output_framerate);
-  rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth *
-                                  oxcf->two_pass_vbrmin_section / 100);
-
-  rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
-
-  // A maximum bitrate for a frame is defined.
-  // The baseline for this aligns with HW implementations that
-  // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
-  // per 16x16 MB (averaged over a frame). However this limit is extended if
-  // a very high rate is given on the command line or the the rate cannnot
-  // be acheived because of a user specificed max q (e.g. when the user
-  // specifies lossless encode.
-  //
-  vbr_max_bits = (int)(((int64_t)rc->av_per_frame_bandwidth *
-      oxcf->two_pass_vbrmax_section) / 100);
-  rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P),
-                                vbr_max_bits);
-
-  // Set Maximum gf/arf interval
-  rc->max_gf_interval = 16;
-
-  // Extended interval for genuinely static scenes
-  rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
-
-  // Special conditions when alt ref frame enabled in lagged compress mode
-  if (oxcf->play_alternate && oxcf->lag_in_frames) {
-    if (rc->max_gf_interval > oxcf->lag_in_frames - 1)
-      rc->max_gf_interval = oxcf->lag_in_frames - 1;
-
-    if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
-      rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
-  }
-
-  if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
-    rc->max_gf_interval = rc->static_scene_max_gf_interval;
+  cpi->oxcf.framerate = framerate < 0.1 ? 30 : framerate;
+  vp9_rc_update_framerate(cpi);
 }
 
 int64_t vp9_rescale(int64_t val, int64_t num, int denom) {
@@ -860,10 +815,7 @@
   cm->display_width = cpi->oxcf.width;
   cm->display_height = cpi->oxcf.height;
 
-  // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs)
-  cpi->oxcf.sharpness = MIN(7, cpi->oxcf.sharpness);
-
-  cpi->common.lf.sharpness_level = cpi->oxcf.sharpness;
+  cm->lf.sharpness_level = cpi->oxcf.sharpness;
 
   if (cpi->initial_width) {
     // Increasing the size of the frame beyond the first seen frame, or some
@@ -883,10 +835,6 @@
 
   cpi->speed = abs(cpi->oxcf.cpu_used);
 
-  // Limit on lag buffers as these are not currently dynamically allocated.
-  if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
-    cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
-
 #if CONFIG_MULTIPLE_ARF
   vp9_zero(cpi->alt_ref_source);
 #else
@@ -906,7 +854,9 @@
   cpi->ext_refresh_frame_context_pending = 0;
 }
 
+#ifndef M_LOG2_E
 #define M_LOG2_E 0.693147180559945309417
+#endif
 #define log2f(x) (log (x) / (float) M_LOG2_E)
 
 static void cal_nmvjointsadcost(int *mvjointsadcost) {
@@ -1275,9 +1225,9 @@
   // Default rd threshold factors for mode selection
   for (i = 0; i < BLOCK_SIZES; ++i) {
     for (j = 0; j < MAX_MODES; ++j)
-      cpi->rd_thresh_freq_fact[i][j] = 32;
+      cpi->rd.thresh_freq_fact[i][j] = 32;
     for (j = 0; j < MAX_REFS; ++j)
-      cpi->rd_thresh_freq_sub8x8[i][j] = 32;
+      cpi->rd.thresh_freq_sub8x8[i][j] = 32;
   }
 
 #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, \
@@ -1386,8 +1336,6 @@
 
   cm->error.setjmp = 0;
 
-  vp9_zero(cpi->common.counts.uv_mode);
-
 #ifdef MODE_TEST_HIT_STATS
   vp9_zero(cpi->mode_test_hits);
 #endif
@@ -1787,7 +1735,9 @@
                        dsts[i], out_h_uv, out_w_uv, dst_strides[i]);
     }
   }
-  vp8_yv12_extend_frame_borders(dst_fb);
+  // TODO(hkuang): Call C version explicitly
+  // as neon version only expand border size 32.
+  vp8_yv12_extend_frame_borders_c(dst_fb);
 }
 
 static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
@@ -1828,7 +1778,9 @@
     }
   }
 
-  vp8_yv12_extend_frame_borders(dst_fb);
+  // TODO(hkuang): Call C version explicitly
+  // as neon version only expand border size 32.
+  vp8_yv12_extend_frame_borders_c(dst_fb);
 }
 
 static int find_fp_qindex() {
@@ -2538,7 +2490,7 @@
 
   vp9_clear_system_state();
 
-  vp9_zero(cpi->rd_tx_select_threshes);
+  vp9_zero(cpi->rd.tx_select_threshes);
 
 #if CONFIG_VP9_POSTPROC
   if (cpi->oxcf.noise_sensitivity > 0) {
@@ -2647,22 +2599,23 @@
   output_frame_level_debug_stats(cpi);
 #endif
   if (cpi->refresh_golden_frame == 1)
-    cm->frame_flags |= FRAMEFLAGS_GOLDEN;
+    cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
   else
-    cm->frame_flags &= ~FRAMEFLAGS_GOLDEN;
+    cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
 
   if (cpi->refresh_alt_ref_frame == 1)
-    cm->frame_flags |= FRAMEFLAGS_ALTREF;
+    cpi->frame_flags |= FRAMEFLAGS_ALTREF;
   else
-    cm->frame_flags &= ~FRAMEFLAGS_ALTREF;
+    cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
 
   get_ref_frame_flags(cpi);
 
+  cm->last_frame_type = cm->frame_type;
   vp9_rc_postencode_update(cpi, *size);
 
   if (cm->frame_type == KEY_FRAME) {
     // Tell the caller that the frame was coded as a key frame
-    *frame_flags = cm->frame_flags | FRAMEFLAGS_KEY;
+    *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
 
 #if CONFIG_MULTIPLE_ARF
     // Reset the sequence number.
@@ -2673,7 +2626,7 @@
     }
 #endif
   } else {
-    *frame_flags = cm->frame_flags&~FRAMEFLAGS_KEY;
+    *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
 
 #if CONFIG_MULTIPLE_ARF
     /* Increment position in the coded frame sequence. */
@@ -3042,7 +2995,16 @@
   }
 #endif
 
-  cm->frame_flags = *frame_flags;
+  cpi->frame_flags = *frame_flags;
+
+  if (cpi->pass == 2 &&
+      cm->current_video_frame == 0 &&
+      cpi->oxcf.allow_spatial_resampling &&
+      cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) {
+    // Internal scaling is triggered on the first frame.
+    vp9_set_size_literal(cpi, cpi->oxcf.scaled_frame_width,
+                         cpi->oxcf.scaled_frame_height);
+  }
 
   // Reset the frame pointers to the current frame size
   vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 18203f9..7a110ac 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -185,6 +185,7 @@
   AQ_MODE_COUNT  // This should always be the last member of the enum
 } AQ_MODE;
 
+
 typedef struct VP9_CONFIG {
   BITSTREAM_PROFILE profile;
   BIT_DEPTH bit_depth;
@@ -231,6 +232,11 @@
   int lossless;
   AQ_MODE aq_mode;  // Adaptive Quantization mode
 
+  // Internal frame size scaling.
+  int allow_spatial_resampling;
+  int scaled_frame_width;
+  int scaled_frame_height;
+
   // Enable feature to reduce the frame quantization every x frames.
   int frame_periodic_boost;
 
@@ -281,6 +287,35 @@
   vp8e_tuning tuning;
 } VP9_CONFIG;
 
+
+typedef struct RD_OPT {
+  // Thresh_mult is used to set a threshold for the rd score. A higher value
+  // means that we will accept the best mode so far more often. This number
+  // is used in combination with the current block size, and thresh_freq_fact
+  // to pick a threshold.
+  int thresh_mult[MAX_MODES];
+  int thresh_mult_sub8x8[MAX_REFS];
+
+  int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
+  int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
+  int thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS];
+  int thresh_freq_sub8x8[BLOCK_SIZES][MAX_REFS];
+
+  int64_t comp_pred_diff[REFERENCE_MODES];
+  int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
+  int64_t tx_select_diff[TX_MODES];
+  // FIXME(rbultje) can this overflow?
+  int tx_select_threshes[MAX_REF_FRAMES][TX_MODES];
+
+  int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
+  int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
+  int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
+  int64_t mask_filter;
+
+  int RDMULT;
+  int RDDIV;
+} RD_OPT;
+
 typedef struct VP9_COMP {
   QUANTS quants;
   MACROBLOCK mb;
@@ -343,31 +378,7 @@
   // Ambient reconstruction err target for force key frames
   int ambient_err;
 
-  // Thresh_mult is used to set a threshold for the rd score. A higher value
-  // means that we will accept the best mode so far more often. This number
-  // is used in combination with the current block size, and thresh_freq_fact
-  // to pick a threshold.
-  int rd_thresh_mult[MAX_MODES];
-  int rd_thresh_mult_sub8x8[MAX_REFS];
-
-  int rd_threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
-  int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
-  int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS];
-  int rd_thresh_freq_sub8x8[BLOCK_SIZES][MAX_REFS];
-
-  int64_t rd_comp_pred_diff[REFERENCE_MODES];
-  int64_t rd_prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
-  int64_t rd_tx_select_diff[TX_MODES];
-  // FIXME(rbultje) can this overflow?
-  int rd_tx_select_threshes[MAX_REF_FRAMES][TX_MODES];
-
-  int64_t rd_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
-  int64_t rd_filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
-  int64_t rd_filter_cache[SWITCHABLE_FILTER_CONTEXTS];
-  int64_t mask_filter_rd;
-
-  int RDMULT;
-  int RDDIV;
+  RD_OPT rd;
 
   CODING_CONTEXT coding_context;
 
@@ -376,7 +387,6 @@
   int active_arnr_frames;           // <= cpi->oxcf.arnr_max_frames
   int active_arnr_strength;         // <= cpi->oxcf.arnr_max_strength
 
-  double output_framerate;
   int64_t last_time_stamp_seen;
   int64_t last_end_time_stamp_seen;
   int64_t first_time_stamp_ever;
@@ -498,6 +508,8 @@
 
   int use_large_partition_rate;
 
+  int frame_flags;
+
 #if CONFIG_MULTIPLE_ARF
   // ARF tracking variables.
   int multi_arf_enabled;
@@ -591,7 +603,7 @@
 
 // Intra only frames, golden frames (except alt ref overlays) and
 // alt ref frames tend to be coded at a higher than ambient quality
-static INLINE int vp9_frame_is_boosted(const VP9_COMP *cpi) {
+static INLINE int frame_is_boosted(const VP9_COMP *cpi) {
   return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
          (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
 }
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index 3ac8522..e003a0f 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -23,7 +23,7 @@
 #include "vp9/encoder/vp9_picklpf.h"
 #include "vp9/encoder/vp9_quantize.h"
 
-static int get_max_filter_level(VP9_COMP *cpi) {
+static int get_max_filter_level(const VP9_COMP *cpi) {
   return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
                                                : MAX_LOOP_FILTER;
 }
@@ -43,15 +43,15 @@
   return filt_err;
 }
 
-static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
-                                int partial_frame) {
-  VP9_COMMON *const cm = &cpi->common;
-  struct loopfilter *const lf = &cm->lf;
+static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
+                               int partial_frame) {
+  const VP9_COMMON *const cm = &cpi->common;
+  const struct loopfilter *const lf = &cm->lf;
   const int min_filter_level = 0;
   const int max_filter_level = get_max_filter_level(cpi);
-  int best_err;
-  int filt_best;
   int filt_direction = 0;
+  int best_err, filt_best;
+
   // Start the search at the previous frame filter level unless it is now out of
   // range.
   int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
@@ -128,7 +128,7 @@
     }
   }
 
-  lf->filter_level = filt_best;
+  return filt_best;
 }
 
 void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
@@ -150,6 +150,7 @@
       filt_guess -= 4;
     lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
   } else {
-    search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE);
+    lf->filter_level = search_filter_level(sd, cpi,
+                                           method == LPF_PICK_FROM_SUBIMAGE);
   }
 }
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index f3fe99c..3d398ed 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -233,8 +233,8 @@
   const int64_t intra_mode_cost = 50;
 
   unsigned char segment_id = mbmi->segment_id;
-  const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
-  const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
+  const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
+  const int *const rd_thresh_freq_fact = cpi->rd.thresh_freq_fact[bsize];
   // Mode index conversion form THR_MODES to MB_PREDICTION_MODE for a ref frame.
   int mode_idx[MB_MODE_COUNT] = {0};
   INTERP_FILTER filter_ref = SWITCHABLE;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 9044e5b..f2afc46 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -27,6 +27,11 @@
 #include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 
+// Max rate target for 1080P and below encodes under normal circumstances
+// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
+#define MAX_MB_RATE 250
+#define MAXRATE_1080P 2025000
+
 #define DEFAULT_KF_BOOST 2000
 #define DEFAULT_GF_BOOST 2000
 
@@ -74,14 +79,13 @@
 
   for (i = 0; i < QINDEX_RANGE; i++) {
     const double maxq = vp9_convert_qindex_to_q(i);
-
     kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.15);
     kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50);
     gf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.32);
     gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50);
     afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33);
     afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55);
-    inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75);
+    inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.55);
   }
 }
 
@@ -565,11 +569,18 @@
 
 #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
   // Limit Q range for the adaptive loop.
-  if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
-    if (!(cm->current_video_frame == 0))
-      *top_index = (active_worst_quality + active_best_quality * 3) / 4;
+  if (cm->frame_type == KEY_FRAME &&
+      !rc->this_key_frame_forced  &&
+      !(cm->current_video_frame == 0)) {
+    int qdelta = 0;
+    vp9_clear_system_state();
+    qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+                                        active_worst_quality, 2.0);
+    *top_index = active_worst_quality + qdelta;
+    *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
   }
 #endif
+
   // Special case code to try and match quality with forced key frames
   if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) {
     q = rc->last_boosted_qindex;
@@ -725,15 +736,26 @@
   *bottom_index = active_best_quality;
 
 #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
-  // Limit Q range for the adaptive loop.
-  if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
-    if (!(cm->current_video_frame == 0))
-      *top_index = (active_worst_quality + active_best_quality * 3) / 4;
-  } else if (!rc->is_src_frame_alt_ref &&
-             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
-    *top_index = (active_worst_quality + active_best_quality) / 2;
+  {
+    int qdelta = 0;
+    vp9_clear_system_state();
+
+    // Limit Q range for the adaptive loop.
+    if (cm->frame_type == KEY_FRAME &&
+        !rc->this_key_frame_forced &&
+        !(cm->current_video_frame == 0)) {
+      qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+                                          active_worst_quality, 2.0);
+    } else if (!rc->is_src_frame_alt_ref &&
+               (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+      qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+                                          active_worst_quality, 1.75);
+    }
+    *top_index = active_worst_quality + qdelta;
+    *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
   }
 #endif
+
   if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
     q = active_best_quality;
   // Special case code to try and match quality with forced key frames
@@ -907,13 +929,22 @@
   *bottom_index = active_best_quality;
 
 #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
-  // Limit Q range for the adaptive loop.
-  if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
-    *top_index = (active_worst_quality + active_best_quality * 3) / 4;
-  } else if (!rc->is_src_frame_alt_ref &&
-             (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) &&
-             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
-    *top_index = (active_worst_quality + active_best_quality) / 2;
+  {
+    int qdelta = 0;
+    vp9_clear_system_state();
+
+    // Limit Q range for the adaptive loop.
+    if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
+      qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+                                          active_worst_quality, 2.0);
+    } else if (!rc->is_src_frame_alt_ref &&
+               (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) &&
+               (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+      qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+                                          active_worst_quality, 1.75);
+    }
+    *top_index = active_worst_quality + qdelta;
+    *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
   }
 #endif
 
@@ -1053,11 +1084,11 @@
 }
 
 void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
-  VP9_COMMON *const cm = &cpi->common;
+  const VP9_COMMON *const cm = &cpi->common;
   const VP9_CONFIG *const oxcf = &cpi->oxcf;
   RATE_CONTROL *const rc = &cpi->rc;
+  const int qindex = cm->base_qindex;
 
-  cm->last_frame_type = cm->frame_type;
   // Update rate control heuristics
   rc->projected_frame_size = (int)(bytes_used << 3);
 
@@ -1068,25 +1099,24 @@
 
   // Keep a record of last Q and ambient average Q.
   if (cm->frame_type == KEY_FRAME) {
-    rc->last_q[KEY_FRAME] = cm->base_qindex;
-    rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO(
-        3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2);
+    rc->last_q[KEY_FRAME] = qindex;
+    rc->avg_frame_qindex[KEY_FRAME] =
+        ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
   } else if (!rc->is_src_frame_alt_ref &&
-      (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
-      !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) {
-    rc->last_q[2] = cm->base_qindex;
-    rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO(
-        3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2);
+             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
+             !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) {
+    rc->last_q[2] = qindex;
+    rc->avg_frame_qindex[2] =
+        ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[2] + qindex, 2);
   } else {
-    rc->last_q[INTER_FRAME] = cm->base_qindex;
-    rc->avg_frame_qindex[INTER_FRAME] = ROUND_POWER_OF_TWO(
-        3 * rc->avg_frame_qindex[INTER_FRAME] + cm->base_qindex, 2);
+    rc->last_q[INTER_FRAME] = qindex;
+    rc->avg_frame_qindex[INTER_FRAME] =
+        ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
     rc->ni_frames++;
-    rc->tot_q += vp9_convert_qindex_to_q(cm->base_qindex);
-    rc->avg_q = rc->tot_q / (double)rc->ni_frames;
-
+    rc->tot_q += vp9_convert_qindex_to_q(qindex);
+    rc->avg_q = rc->tot_q / rc->ni_frames;
     // Calculate the average Q for normal inter frames (not key or GFU frames).
-    rc->ni_tot_qi += cm->base_qindex;
+    rc->ni_tot_qi += qindex;
     rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames;
   }
 
@@ -1095,11 +1125,11 @@
   // If all mbs in this group are skipped only update if the Q value is
   // better than that already stored.
   // This is used to help set quality in forced key frames to reduce popping
-  if ((cm->base_qindex < rc->last_boosted_qindex) ||
+  if ((qindex < rc->last_boosted_qindex) ||
       ((cpi->static_mb_pct < 100) &&
        ((cm->frame_type == KEY_FRAME) || cpi->refresh_alt_ref_frame ||
         (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
-    rc->last_boosted_qindex = cm->base_qindex;
+    rc->last_boosted_qindex = qindex;
   }
 
   update_buffer_level(cpi, rc->projected_frame_size);
@@ -1184,7 +1214,7 @@
   int target;
   if (!cpi->refresh_alt_ref_frame &&
       (cm->current_video_frame == 0 ||
-       (cm->frame_flags & FRAMEFLAGS_KEY) ||
+       (cpi->frame_flags & FRAMEFLAGS_KEY) ||
        rc->frames_to_key == 0 ||
        (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
     cm->frame_type = KEY_FRAME;
@@ -1246,17 +1276,25 @@
 
 static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
   const RATE_CONTROL *rc = &cpi->rc;
+  const VP9_CONFIG *oxcf = &cpi->oxcf;
+  const SVC *const svc = &cpi->svc;
   int target;
-
   if (cpi->common.current_video_frame == 0) {
     target = ((cpi->oxcf.starting_buffer_level / 2) > INT_MAX)
       ? INT_MAX : (int)(cpi->oxcf.starting_buffer_level / 2);
   } else {
-    const int initial_boost = 32;
-    int kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
-    if (rc->frames_since_key < cpi->output_framerate / 2) {
+    int kf_boost = 32;
+    double framerate = oxcf->framerate;
+    if (svc->number_temporal_layers > 1 &&
+        oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+      // Use the layer framerate for temporal layers CBR mode.
+      const LAYER_CONTEXT *lc = &svc->layer_context[svc->temporal_layer_id];
+      framerate = lc->framerate;
+    }
+    kf_boost = MAX(kf_boost, (int)(2 * framerate - 16));
+    if (rc->frames_since_key <  framerate / 2) {
       kf_boost = (int)(kf_boost * rc->frames_since_key /
-                       (cpi->output_framerate / 2));
+                       (framerate / 2));
     }
     target = ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4;
   }
@@ -1268,7 +1306,7 @@
   RATE_CONTROL *const rc = &cpi->rc;
   int target = rc->av_per_frame_bandwidth;
   if ((cm->current_video_frame == 0) ||
-      (cm->frame_flags & FRAMEFLAGS_KEY) ||
+      (cpi->frame_flags & FRAMEFLAGS_KEY) ||
       (cpi->oxcf.auto_key && (rc->frames_since_key %
                               cpi->key_frame_frequency == 0))) {
     cm->frame_type = KEY_FRAME;
@@ -1292,7 +1330,7 @@
   RATE_CONTROL *const rc = &cpi->rc;
   int target;
   if ((cm->current_video_frame == 0 ||
-      (cm->frame_flags & FRAMEFLAGS_KEY) ||
+      (cpi->frame_flags & FRAMEFLAGS_KEY) ||
       rc->frames_to_key == 0 ||
       (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
     cm->frame_type = KEY_FRAME;
@@ -1354,3 +1392,46 @@
 
   return target_index - qindex;
 }
+
+void vp9_rc_update_framerate(VP9_COMP *cpi) {
+  const VP9_COMMON *const cm = &cpi->common;
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  RATE_CONTROL *const rc = &cpi->rc;
+  int vbr_max_bits;
+
+  rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth / oxcf->framerate);
+  rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth *
+                                oxcf->two_pass_vbrmin_section / 100);
+
+  rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
+
+  // A maximum bitrate for a frame is defined.
+  // The baseline for this aligns with HW implementations that
+  // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
+  // per 16x16 MB (averaged over a frame). However this limit is extended if
+  // a very high rate is given on the command line or the the rate cannnot
+  // be acheived because of a user specificed max q (e.g. when the user
+  // specifies lossless encode.
+  vbr_max_bits = (int)(((int64_t)rc->av_per_frame_bandwidth *
+                     oxcf->two_pass_vbrmax_section) / 100);
+  rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P),
+                                    vbr_max_bits);
+
+  // Set Maximum gf/arf interval
+  rc->max_gf_interval = 16;
+
+  // Extended interval for genuinely static scenes
+  rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
+
+  // Special conditions when alt ref frame enabled in lagged compress mode
+  if (oxcf->play_alternate && oxcf->lag_in_frames) {
+    if (rc->max_gf_interval > oxcf->lag_in_frames - 1)
+      rc->max_gf_interval = oxcf->lag_in_frames - 1;
+
+    if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
+      rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
+  }
+
+  if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
+    rc->max_gf_interval = rc->static_scene_max_gf_interval;
+}
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 7693c2b..cf6526b 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -125,8 +125,7 @@
 
 // Post encode update of the rate control parameters based
 // on bytes used
-void vp9_rc_postencode_update(struct VP9_COMP *cpi,
-                              uint64_t bytes_used);
+void vp9_rc_postencode_update(struct VP9_COMP *cpi, uint64_t bytes_used);
 // Post encode update of the rate control parameters for dropped frames
 void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi);
 
@@ -175,6 +174,8 @@
 int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
                                int qindex, double rate_target_ratio);
 
+void vp9_rc_update_framerate(struct VP9_COMP *cpi);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 1b4cc51..7ef21fa 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -245,6 +245,7 @@
 
 static void set_block_thresholds(VP9_COMP *cpi) {
   const VP9_COMMON *const cm = &cpi->common;
+  RD_OPT *const rd = &cpi->rd;
   int i, bsize, segment_id;
 
   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
@@ -260,14 +261,14 @@
       const int thresh_max = INT_MAX / t;
 
       for (i = 0; i < MAX_MODES; ++i)
-        cpi->rd_threshes[segment_id][bsize][i] =
-            cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4
+        rd->threshes[segment_id][bsize][i] =
+            rd->thresh_mult[i] < thresh_max ? rd->thresh_mult[i] * t / 4
                                             : INT_MAX;
 
       for (i = 0; i < MAX_REFS; ++i) {
-        cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
-            cpi->rd_thresh_mult_sub8x8[i] < thresh_max
-                ? cpi->rd_thresh_mult_sub8x8[i] * t / 4
+        rd->thresh_sub8x8[segment_id][bsize][i] =
+            rd->thresh_mult_sub8x8[i] < thresh_max
+                ? rd->thresh_mult_sub8x8[i] * t / 4
                 : INT_MAX;
       }
     }
@@ -281,10 +282,10 @@
 
   vp9_clear_system_state();
 
-  cpi->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
-  cpi->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
+  cpi->rd.RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
+  cpi->rd.RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
 
-  x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
+  x->errorperbit = cpi->rd.RDMULT / RD_MULT_EPB_RATIO;
   x->errorperbit += (x->errorperbit == 0);
 
   x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
@@ -1676,14 +1677,14 @@
 static int check_best_zero_mv(
     const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
     int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
-    int disable_inter_mode_mask, int this_mode, int ref_frame,
-    int second_ref_frame) {
+    int disable_inter_mode_mask, int this_mode,
+    const MV_REFERENCE_FRAME ref_frames[2]) {
   if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
       (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
-      frame_mv[this_mode][ref_frame].as_int == 0 &&
-      (second_ref_frame == NONE ||
-       frame_mv[this_mode][second_ref_frame].as_int == 0)) {
-    int rfc = mode_context[ref_frame];
+      frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
+      (ref_frames[1] == NONE ||
+       frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
+    int rfc = mode_context[ref_frames[0]];
     int c1 = cost_mv_ref(cpi, NEARMV, rfc);
     int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
     int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
@@ -1694,15 +1695,15 @@
       if (c2 > c3) return 0;
     } else {
       assert(this_mode == ZEROMV);
-      if (second_ref_frame == NONE) {
-        if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) ||
-            (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0))
+      if (ref_frames[1] == NONE) {
+        if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
+            (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
           return 0;
       } else {
-        if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 &&
-             frame_mv[NEARESTMV][second_ref_frame].as_int == 0) ||
-            (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 &&
-             frame_mv[NEARMV][second_ref_frame].as_int == 0))
+        if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
+             frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
+            (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
+             frame_mv[NEARMV][ref_frames[1]].as_int == 0))
           return 0;
       }
     }
@@ -1780,8 +1781,7 @@
 
         if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
                                 disable_inter_mode_mask,
-                                this_mode, mbmi->ref_frame[0],
-                                mbmi->ref_frame[1]))
+                                this_mode, mbmi->ref_frame))
           continue;
 
         vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
@@ -1891,7 +1891,6 @@
                      x->mv_row_min, x->mv_row_max);
             thissme = cpi->full_search_sad(x, &mvp_full,
                                            sadpb, 16, v_fn_ptr,
-                                           x->nmvjointcost, x->mvcost,
                                            &bsi->ref_mv[0]->as_mv,
                                            &best_mv->as_mv);
             if (thissme < bestsme) {
@@ -2580,7 +2579,7 @@
     struct buf_2d ref_yv12[2];
     int bestsme = INT_MAX;
     int sadpb = x->sadperbit16;
-    int_mv tmp_mv;
+    MV tmp_mv;
     int search_range = 3;
 
     int tmp_col_min = x->mv_col_min;
@@ -2609,20 +2608,19 @@
     vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
 
     // Use mv result from single mode as mvp.
-    tmp_mv.as_int = frame_mv[refs[id]].as_int;
+    tmp_mv = frame_mv[refs[id]].as_mv;
 
-    tmp_mv.as_mv.col >>= 3;
-    tmp_mv.as_mv.row >>= 3;
+    tmp_mv.col >>= 3;
+    tmp_mv.row >>= 3;
 
     // Small-range full-pixel motion search
-    bestsme = vp9_refining_search_8p_c(x, &tmp_mv.as_mv, sadpb,
+    bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
                                        search_range,
                                        &cpi->fn_ptr[bsize],
-                                       x->nmvjointcost, x->mvcost,
                                        &ref_mv[id].as_mv, second_pred,
                                        pw, ph);
     if (bestsme < INT_MAX)
-      bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv,
+      bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
                                       second_pred, &cpi->fn_ptr[bsize], 1);
 
     x->mv_col_min = tmp_col_min;
@@ -2634,7 +2632,7 @@
       int dis; /* TODO: use dis in distortion calculation later. */
       unsigned int sse;
       bestsme = cpi->find_fractional_mv_step_comp(
-          x, &tmp_mv.as_mv,
+          x, &tmp_mv,
           &ref_mv[id].as_mv,
           cpi->common.allow_high_precision_mv,
           x->errorperbit,
@@ -2649,7 +2647,7 @@
       xd->plane[0].pre[0] = scaled_first_yv12;
 
     if (bestsme < last_besterr[id]) {
-      frame_mv[refs[id]].as_int = tmp_mv.as_int;
+      frame_mv[refs[id]].as_mv = tmp_mv;
       last_besterr[id] = bestsme;
     } else {
       break;
@@ -2699,6 +2697,7 @@
                                  int64_t *psse,
                                  const int64_t ref_best_rd) {
   VP9_COMMON *cm = &cpi->common;
+  RD_OPT *rd_opt = &cpi->rd;
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   const int is_comp_pred = has_second_ref(mbmi);
@@ -2796,14 +2795,13 @@
 
   // Search for best switchable filter by checking the variance of
   // pred error irrespective of whether the filter will be used
-  cpi->mask_filter_rd = 0;
+  rd_opt->mask_filter = 0;
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
-    cpi->rd_filter_cache[i] = INT64_MAX;
+    rd_opt->filter_cache[i] = INT64_MAX;
 
   if (cm->interp_filter != BILINEAR) {
     *best_filter = EIGHTTAP;
-    if (x->source_variance <
-        cpi->sf.disable_filter_search_var_thresh) {
+    if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
       *best_filter = EIGHTTAP;
     } else {
       int newbest;
@@ -2819,12 +2817,12 @@
 
         if (i > 0 && intpel_mv) {
           rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
-          cpi->rd_filter_cache[i] = rd;
-          cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
-              MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
+          rd_opt->filter_cache[i] = rd;
+          rd_opt->filter_cache[SWITCHABLE_FILTERS] =
+              MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
           if (cm->interp_filter == SWITCHABLE)
             rd += rs_rd;
-          cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
+          rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
         } else {
           int rate_sum = 0;
           int64_t dist_sum = 0;
@@ -2844,12 +2842,12 @@
           model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
 
           rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
-          cpi->rd_filter_cache[i] = rd;
-          cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
-              MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
+          rd_opt->filter_cache[i] = rd;
+          rd_opt->filter_cache[SWITCHABLE_FILTERS] =
+              MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
           if (cm->interp_filter == SWITCHABLE)
             rd += rs_rd;
-          cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
+          rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
 
           if (i == 0 && intpel_mv) {
             tmp_rate_sum = rate_sum;
@@ -3126,6 +3124,7 @@
                                   PICK_MODE_CONTEXT *ctx,
                                   int64_t best_rd_so_far) {
   VP9_COMMON *const cm = &cpi->common;
+  RD_OPT *const rd_opt = &cpi->rd;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const struct segmentation *const seg = &cm->seg;
@@ -3165,8 +3164,8 @@
   int best_skip2 = 0;
   int mode_skip_mask = 0;
   int mode_skip_start = cpi->sf.mode_skip_start + 1;
-  const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
-  const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
+  const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
+  const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
   const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
   const int intra_y_mode_mask =
       cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
@@ -3379,11 +3378,12 @@
       }
     } else {
       if (x->in_active_map &&
-          !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+          !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+        const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
         if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
-                                disable_inter_mode_mask, this_mode, ref_frame,
-                                second_ref_frame))
+                                disable_inter_mode_mask, this_mode, ref_frames))
           continue;
+      }
     }
 
     mbmi->mode = this_mode;
@@ -3611,21 +3611,21 @@
 
       /* keep record of best filter type */
       if (!mode_excluded && cm->interp_filter != BILINEAR) {
-        int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
+        int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
                               SWITCHABLE_FILTERS : cm->interp_filter];
 
         for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
           int64_t adj_rd;
           if (ref == INT64_MAX)
             adj_rd = 0;
-          else if (cpi->rd_filter_cache[i] == INT64_MAX)
+          else if (rd_opt->filter_cache[i] == INT64_MAX)
             // when early termination is triggered, the encoder does not have
             // access to the rate-distortion cost. it only knows that the cost
             // should be above the maximum valid value. hence it takes the known
             // maximum plus an arbitrary constant as the rate-distortion cost.
-            adj_rd = cpi->mask_filter_rd - ref + 10;
+            adj_rd = rd_opt->mask_filter - ref + 10;
           else
-            adj_rd = cpi->rd_filter_cache[i] - ref;
+            adj_rd = rd_opt->filter_cache[i] - ref;
 
           adj_rd += this_rd;
           best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
@@ -3687,7 +3687,7 @@
   // combination that wins out.
   if (cpi->sf.adaptive_rd_thresh) {
     for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
-      int *const fact = &cpi->rd_thresh_freq_fact[bsize][mode_index];
+      int *const fact = &rd_opt->thresh_freq_fact[bsize][mode_index];
 
       if (mode_index == best_mode_index) {
         *fact -= (*fact >> 3);
@@ -3759,6 +3759,7 @@
                                       PICK_MODE_CONTEXT *ctx,
                                       int64_t best_rd_so_far) {
   VP9_COMMON *const cm = &cpi->common;
+  RD_OPT *const rd_opt = &cpi->rd;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const struct segmentation *const seg = &cm->seg;
@@ -3880,9 +3881,9 @@
 
     // Test best rd so far against threshold for trying this mode.
     if ((best_rd <
-         ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] *
-          cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
-        cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
+         ((int64_t)rd_opt->thresh_sub8x8[segment_id][bsize][mode_index] *
+          rd_opt->thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
+        rd_opt->thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
       continue;
 
     if (ref_frame > INTRA_FRAME &&
@@ -3909,10 +3910,11 @@
 
     // TODO(jingning, jkoleszar): scaling reference frame not supported for
     // sub8x8 blocks.
-    if (ref_frame > NONE && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
+    if (ref_frame > INTRA_FRAME &&
+        vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
       continue;
 
-    if (second_ref_frame > NONE &&
+    if (second_ref_frame > INTRA_FRAME &&
         vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
       continue;
 
@@ -4011,14 +4013,13 @@
       int uv_skippable;
 
       this_rd_thresh = (ref_frame == LAST_FRAME) ?
-          cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] :
-          cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR];
+          rd_opt->thresh_sub8x8[segment_id][bsize][THR_LAST] :
+          rd_opt->thresh_sub8x8[segment_id][bsize][THR_ALTR];
       this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
-          cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
-
-      cpi->mask_filter_rd = 0;
+      rd_opt->thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
+      rd_opt->mask_filter = 0;
       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
-        cpi->rd_filter_cache[i] = INT64_MAX;
+        rd_opt->filter_cache[i] = INT64_MAX;
 
       if (cm->interp_filter != BILINEAR) {
         tmp_best_filter = EIGHTTAP;
@@ -4051,14 +4052,14 @@
               continue;
             rs = vp9_get_switchable_rate(x);
             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
-            cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
-            cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
-                MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
+            rd_opt->filter_cache[switchable_filter_index] = tmp_rd;
+            rd_opt->filter_cache[SWITCHABLE_FILTERS] =
+                MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS],
                     tmp_rd + rs_rd);
             if (cm->interp_filter == SWITCHABLE)
               tmp_rd += rs_rd;
 
-            cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd);
+            rd_opt->mask_filter = MAX(rd_opt->mask_filter, tmp_rd);
 
             newbest = (tmp_rd < tmp_best_rd);
             if (newbest) {
@@ -4292,20 +4293,20 @@
     /* keep record of best filter type */
     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
         cm->interp_filter != BILINEAR) {
-      int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
+      int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
                               SWITCHABLE_FILTERS : cm->interp_filter];
       int64_t adj_rd;
       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
         if (ref == INT64_MAX)
           adj_rd = 0;
-        else if (cpi->rd_filter_cache[i] == INT64_MAX)
+        else if (rd_opt->filter_cache[i] == INT64_MAX)
           // when early termination is triggered, the encoder does not have
           // access to the rate-distortion cost. it only knows that the cost
           // should be above the maximum valid value. hence it takes the known
           // maximum plus an arbitrary constant as the rate-distortion cost.
-          adj_rd = cpi->mask_filter_rd - ref + 10;
+          adj_rd = rd_opt->mask_filter - ref + 10;
         else
-          adj_rd = cpi->rd_filter_cache[i] - ref;
+          adj_rd = rd_opt->filter_cache[i] - ref;
 
         adj_rd += this_rd;
         best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
@@ -4352,7 +4353,7 @@
   // combination that wins out.
   if (cpi->sf.adaptive_rd_thresh) {
     for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
-      int *const fact = &cpi->rd_thresh_freq_sub8x8[bsize][mode_index];
+      int *const fact = &rd_opt->thresh_freq_sub8x8[bsize][mode_index];
 
       if (mode_index == best_mode_index) {
         *fact -= (*fact >> 3);
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index adad800..f0bd8a1 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -13,32 +13,39 @@
 #include "vp9/encoder/vp9_onyx_int.h"
 #include "vp9/encoder/vp9_speed_features.h"
 
-#define ALL_INTRA_MODES ((1 << DC_PRED) | \
-                         (1 << V_PRED) | (1 << H_PRED) | \
-                         (1 << D45_PRED) | (1 << D135_PRED) | \
-                         (1 << D117_PRED) | (1 << D153_PRED) | \
-                         (1 << D207_PRED) | (1 << D63_PRED) | \
-                         (1 << TM_PRED))
-#define INTRA_DC_ONLY   (1 << DC_PRED)
-#define INTRA_DC_TM     ((1 << TM_PRED) | (1 << DC_PRED))
-#define INTRA_DC_H_V    ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED))
-#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED))
+enum {
+  ALL_INTRA_MODES = (1 << DC_PRED) |
+                    (1 << V_PRED) | (1 << H_PRED) |
+                    (1 << D45_PRED) | (1 << D135_PRED) |
+                    (1 << D117_PRED) | (1 << D153_PRED) |
+                    (1 << D207_PRED) | (1 << D63_PRED) |
+                    (1 << TM_PRED),
 
-// Masks for partially or completely disabling split mode
-#define DISABLE_ALL_INTER_SPLIT   ((1 << THR_COMP_GA) | \
-                                   (1 << THR_COMP_LA) | \
-                                   (1 << THR_ALTR) | \
-                                   (1 << THR_GOLD) | \
-                                   (1 << THR_LAST))
+  INTRA_DC_ONLY   = (1 << DC_PRED),
 
-#define DISABLE_ALL_SPLIT         ((1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT)
+  INTRA_DC_TM     = (1 << TM_PRED) | (1 << DC_PRED),
 
-#define DISABLE_COMPOUND_SPLIT    ((1 << THR_COMP_GA) | (1 << THR_COMP_LA))
+  INTRA_DC_H_V    = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
 
-#define LAST_AND_INTRA_SPLIT_ONLY ((1 << THR_COMP_GA) | \
-                                   (1 << THR_COMP_LA) | \
-                                   (1 << THR_ALTR) | \
-                                   (1 << THR_GOLD))
+  INTRA_DC_TM_H_V = INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)
+};
+
+enum {
+  DISABLE_ALL_INTER_SPLIT   = (1 << THR_COMP_GA) |
+                              (1 << THR_COMP_LA) |
+                              (1 << THR_ALTR) |
+                              (1 << THR_GOLD) |
+                              (1 << THR_LAST),
+
+  DISABLE_ALL_SPLIT         = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT,
+
+  DISABLE_COMPOUND_SPLIT    = (1 << THR_COMP_GA) | (1 << THR_COMP_LA),
+
+  LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) |
+                              (1 << THR_COMP_LA) |
+                              (1 << THR_ALTR) |
+                              (1 << THR_GOLD)
+};
 
 static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
                                    SPEED_FEATURES *sf, int speed) {
@@ -49,8 +56,8 @@
   if (speed >= 1) {
     sf->use_square_partition_only = !frame_is_intra_only(cm);
     sf->less_rectangular_check  = 1;
-    sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD
-                                                          : USE_LARGESTALL;
+    sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD
+                                                      : USE_LARGESTALL;
 
     if (MIN(cm->width, cm->height) >= 720)
       sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
@@ -73,8 +80,8 @@
   }
 
   if (speed >= 2) {
-    sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD
-                                                          : USE_LARGESTALL;
+    sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
+                                                        : USE_LARGESTALL;
 
     if (MIN(cm->width, cm->height) >= 720)
       sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 72f548a..5542297 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -344,7 +344,7 @@
   int search_type_check_frequency;
 
   // The threshold used in SOURCE_VAR_BASED_PARTITION search type.
-  int source_var_thresh;
+  unsigned int source_var_thresh;
 } SPEED_FEATURES;
 
 struct VP9_COMP;
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index c2b6263..9b3fc6e 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -178,7 +178,6 @@
   cpi->oxcf.starting_buffer_level = lc->starting_buffer_level;
   cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level;
   cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size;
-  cpi->output_framerate = lc->framerate;
   // Reset the frames_since_key and frames_to_key counters to their values
   // before the layer restore. Keep these defined for the stream (not layer).
   if (cpi->svc.number_temporal_layers > 1) {
@@ -197,7 +196,6 @@
   lc->starting_buffer_level = oxcf->starting_buffer_level;
   lc->optimal_buffer_level = oxcf->optimal_buffer_level;
   lc->maximum_buffer_size = oxcf->maximum_buffer_size;
-  lc->framerate = cpi->output_framerate;
 }
 
 void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index 62e20dc..c9e39a1 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -45,12 +45,6 @@
                                    int  ref_stride,
                                    unsigned int *sad_array);
 
-typedef void (*vp9_sad_multi1_fn_t)(const uint8_t *src_ptr,
-                                    int source_stride,
-                                    const uint8_t *ref_ptr,
-                                    int  ref_stride,
-                                    unsigned int *sad_array);
-
 typedef void (*vp9_sad_multi_d_fn_t)(const uint8_t *src_ptr,
                                      int source_stride,
                                      const uint8_t* const ref_ptr[],
@@ -96,7 +90,7 @@
   vp9_variance_fn_t          svf_halfpix_v;
   vp9_variance_fn_t          svf_halfpix_hv;
   vp9_sad_multi_fn_t         sdx3f;
-  vp9_sad_multi1_fn_t        sdx8f;
+  vp9_sad_multi_fn_t         sdx8f;
   vp9_sad_multi_d_fn_t       sdx4df;
 } vp9_variance_fn_ptr_t;
 
diff --git a/vp9/encoder/x86/vp9_mcomp_x86.h b/vp9/encoder/x86/vp9_mcomp_x86.h
deleted file mode 100644
index c15039a..0000000
--- a/vp9/encoder/x86/vp9_mcomp_x86.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_ENCODER_X86_VP9_MCOMP_X86_H_
-#define VP9_ENCODER_X86_VP9_MCOMP_X86_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_SSE3
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef  vp9_search_full_search
-#define vp9_search_full_search vp9_full_search_sadx3
-
-#undef  vp9_search_refining_search
-#define vp9_search_refining_search vp9_refining_search_sadx4
-
-#undef  vp9_search_diamond_search
-#define vp9_search_diamond_search vp9_diamond_search_sadx4
-
-#endif
-#endif
-
-#if HAVE_SSE4_1
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef  vp9_search_full_search
-#define vp9_search_full_search vp9_full_search_sadx8
-
-#endif
-#endif
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // VP9_ENCODER_X86_VP9_MCOMP_X86_H_
-
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 0623ad1..967431c 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -168,6 +168,11 @@
   RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100);
   RANGE_CHECK(cfg,        g_pass,         VPX_RC_ONE_PASS, VPX_RC_LAST_PASS);
 
+  if (cfg->rc_resize_allowed == 1) {
+    RANGE_CHECK(cfg, rc_scaled_width, 1, cfg->g_w);
+    RANGE_CHECK(cfg, rc_scaled_height, 1, cfg->g_h);
+  }
+
   RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS);
   RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
   if (cfg->ts_number_layers > 1) {
@@ -332,6 +337,10 @@
   oxcf->under_shoot_pct         = cfg->rc_undershoot_pct;
   oxcf->over_shoot_pct          = cfg->rc_overshoot_pct;
 
+  oxcf->allow_spatial_resampling = cfg->rc_resize_allowed;
+  oxcf->scaled_frame_width       = cfg->rc_scaled_width;
+  oxcf->scaled_frame_height      = cfg->rc_scaled_height;
+
   oxcf->maximum_buffer_size     = cfg->rc_buf_sz;
   oxcf->starting_buffer_level   = cfg->rc_buf_initial_sz;
   oxcf->optimal_buffer_level    = cfg->rc_buf_optimal_sz;
@@ -410,6 +419,9 @@
   printf("fixed_q: %d\n",  oxcf->fixed_q);
   printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q);
   printf("best_allowed_q: %d\n", oxcf->best_allowed_q);
+  printf("allow_spatial_resampling: %d\n", oxcf->allow_spatial_resampling);
+  printf("scaled_frame_width: %d\n", oxcf->scaled_frame_width);
+  printf("scaled_frame_height: %d\n", oxcf->scaled_frame_height);
   printf("two_pass_vbrbias: %d\n",  oxcf->two_pass_vbrbias);
   printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section);
   printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section);
@@ -1128,6 +1140,8 @@
 
       0,                  // rc_dropframe_thresh
       0,                  // rc_resize_allowed
+      1,                  // rc_scaled_width
+      1,                  // rc_scaled_height
       60,                 // rc_resize_down_thresold
       30,                 // rc_resize_up_thresold
 
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index da6c0f8..24b8d9d 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -87,8 +87,6 @@
 VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c
 VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
 
-
-VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_mcomp_x86.h
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_mmx.c
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_impl_mmx.asm
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index 2c882c1..571ad3f 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -396,6 +396,19 @@
      */
     unsigned int           rc_resize_allowed;
 
+    /*!\brief Internal coded frame width.
+     *
+     * If spatial resampling is enabled this specifies the width of the
+     * encoded frame.
+     */
+    unsigned int           rc_scaled_width;
+
+    /*!\brief Internal coded frame height.
+     *
+     * If spatial resampling is enabled this specifies the height of the
+     * encoded frame.
+     */
+    unsigned int           rc_scaled_height;
 
     /*!\brief Spatial resampling up watermark.
      *
diff --git a/vpxenc.c b/vpxenc.c
index 00d3e3e..f2b73aa 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -123,55 +123,6 @@
   return 0;
 }
 
-#if CONFIG_WEBM_IO
-/* Murmur hash derived from public domain reference implementation at
- *   http:// sites.google.com/site/murmurhash/
- */
-static unsigned int murmur(const void *key, int len, unsigned int seed) {
-  const unsigned int m = 0x5bd1e995;
-  const int r = 24;
-
-  unsigned int h = seed ^ len;
-
-  const unsigned char *data = (const unsigned char *)key;
-
-  while (len >= 4) {
-    unsigned int k;
-
-    k  = (unsigned int)data[0];
-    k |= (unsigned int)data[1] << 8;
-    k |= (unsigned int)data[2] << 16;
-    k |= (unsigned int)data[3] << 24;
-
-    k *= m;
-    k ^= k >> r;
-    k *= m;
-
-    h *= m;
-    h ^= k;
-
-    data += 4;
-    len -= 4;
-  }
-
-  switch (len) {
-    case 3:
-      h ^= data[2] << 16;
-    case 2:
-      h ^= data[1] << 8;
-    case 1:
-      h ^= data[0];
-      h *= m;
-  };
-
-  h ^= h >> 13;
-  h *= m;
-  h ^= h >> 15;
-
-  return h;
-}
-#endif  // CONFIG_WEBM_IO
-
 static const arg_def_t debugmode = ARG_DEF("D", "debug", 0,
                                            "Debug mode (makes output deterministic)");
 static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
@@ -284,6 +235,10 @@
                                                     "Temporal resampling threshold (buf %)");
 static const arg_def_t resize_allowed     = ARG_DEF(NULL, "resize-allowed", 1,
                                                     "Spatial resampling enabled (bool)");
+static const arg_def_t resize_width       = ARG_DEF(NULL, "resize-width", 1,
+                                                    "Width of encoded frame");
+static const arg_def_t resize_height      = ARG_DEF(NULL, "resize-height", 1,
+                                                    "Height of encoded frame");
 static const arg_def_t resize_up_thresh   = ARG_DEF(NULL, "resize-up", 1,
                                                     "Upscale threshold (buf %)");
 static const arg_def_t resize_down_thresh = ARG_DEF(NULL, "resize-down", 1,
@@ -314,10 +269,10 @@
 static const arg_def_t buf_optimal_sz     = ARG_DEF(NULL, "buf-optimal-sz", 1,
                                                     "Client optimal buffer size (ms)");
 static const arg_def_t *rc_args[] = {
-  &dropframe_thresh, &resize_allowed, &resize_up_thresh, &resize_down_thresh,
-  &end_usage, &target_bitrate, &min_quantizer, &max_quantizer,
-  &undershoot_pct, &overshoot_pct, &buf_sz, &buf_initial_sz, &buf_optimal_sz,
-  NULL
+  &dropframe_thresh, &resize_allowed, &resize_width, &resize_height,
+  &resize_up_thresh, &resize_down_thresh, &end_usage, &target_bitrate,
+  &min_quantizer, &max_quantizer, &undershoot_pct, &overshoot_pct, &buf_sz,
+  &buf_initial_sz, &buf_optimal_sz, NULL
 };
 
 
@@ -400,11 +355,11 @@
     NULL, "frame-parallel", 1, "Enable frame parallel decodability features");
 static const arg_def_t aq_mode = ARG_DEF(
     NULL, "aq-mode", 1,
-    "Adaptive q mode (0: off (by default), 1: variance 2: complexity, "
+    "Adaptive quantization mode (0: off (default), 1: variance 2: complexity, "
     "3: cyclic refresh)");
 static const arg_def_t frame_periodic_boost = ARG_DEF(
     NULL, "frame_boost", 1,
-    "Enable frame periodic boost (0: off (by default), 1: on)");
+    "Enable frame periodic boost (0: off (default), 1: on)");
 
 static const arg_def_t *vp9_args[] = {
   &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
@@ -619,7 +574,6 @@
   FILE                     *file;
   struct rate_hist         *rate_hist;
   struct EbmlGlobal         ebml;
-  uint32_t                  hash;
   uint64_t                  psnr_sse_total;
   uint64_t                  psnr_samples_total;
   double                    psnr_totals[4];
@@ -841,7 +795,9 @@
     stream->config.stereo_fmt = STEREO_FORMAT_MONO;
     stream->config.write_webm = 1;
 #if CONFIG_WEBM_IO
-    stream->ebml.last_pts_ms = -1;
+    stream->ebml.last_pts_ns = -1;
+    stream->ebml.writer = NULL;
+    stream->ebml.segment = NULL;
 #endif
 
     /* Allows removal of the application version from the EBML tags */
@@ -931,6 +887,10 @@
       config->cfg.rc_dropframe_thresh = arg_parse_uint(&arg);
     } else if (arg_match(&arg, &resize_allowed, argi)) {
       config->cfg.rc_resize_allowed = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &resize_width, argi)) {
+      config->cfg.rc_scaled_width = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &resize_height, argi)) {
+      config->cfg.rc_scaled_height = arg_parse_uint(&arg);
     } else if (arg_match(&arg, &resize_up_thresh, argi)) {
       config->cfg.rc_resize_up_thresh = arg_parse_uint(&arg);
     } else if (arg_match(&arg, &resize_down_thresh, argi)) {
@@ -1115,6 +1075,8 @@
   SHOW(g_lag_in_frames);
   SHOW(rc_dropframe_thresh);
   SHOW(rc_resize_allowed);
+  SHOW(rc_scaled_width);
+  SHOW(rc_scaled_height);
   SHOW(rc_resize_up_thresh);
   SHOW(rc_resize_down_thresh);
   SHOW(rc_end_usage);
@@ -1176,9 +1138,7 @@
 
 #if CONFIG_WEBM_IO
   if (stream->config.write_webm) {
-    write_webm_file_footer(&stream->ebml, stream->hash);
-    free(stream->ebml.cue_list);
-    stream->ebml.cue_list = NULL;
+    write_webm_file_footer(&stream->ebml);
   }
 #endif
 
@@ -1334,12 +1294,6 @@
         update_rate_histogram(stream->rate_hist, cfg, pkt);
 #if CONFIG_WEBM_IO
         if (stream->config.write_webm) {
-          /* Update the hash */
-          if (!stream->ebml.debug)
-            stream->hash = murmur(pkt->data.frame.buf,
-                                  (int)pkt->data.frame.sz,
-                                  stream->hash);
-
           write_webm_block(&stream->ebml, cfg, pkt);
         }
 #endif
diff --git a/webmdec.c b/webmdec.c
index 7cacdf9..a8e220c 100644
--- a/webmdec.c
+++ b/webmdec.c
@@ -86,7 +86,8 @@
   } else if (codec_id == NESTEGG_CODEC_VP9) {
     vpx_ctx->fourcc = VP9_FOURCC;
   } else {
-    fatal("Not VPx video, quitting.\n");
+    fprintf(stderr, "Not VPx video, quitting.\n");
+    goto fail;
   }
 
   webm_ctx->video_track = i;
diff --git a/webmenc.c b/webmenc.c
deleted file mode 100644
index 17bbeec..0000000
--- a/webmenc.c
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#include "webmenc.h"
-
-#include <limits.h>
-#include <string.h>
-
-#include "third_party/libmkv/EbmlWriter.h"
-#include "third_party/libmkv/EbmlIDs.h"
-
-void Ebml_Write(struct EbmlGlobal *glob,
-                const void *buffer_in,
-                unsigned long len) {
-  (void) fwrite(buffer_in, 1, len, glob->stream);
-}
-
-#define WRITE_BUFFER(s) \
-for (i = len - 1; i >= 0; i--) { \
-  x = (char)(*(const s *)buffer_in >> (i * CHAR_BIT)); \
-  Ebml_Write(glob, &x, 1); \
-}
-
-void Ebml_Serialize(struct EbmlGlobal *glob,
-                    const void *buffer_in,
-                    int buffer_size,
-                    unsigned long len) {
-  char x;
-  int i;
-
-  /* buffer_size:
-   * 1 - int8_t;
-   * 2 - int16_t;
-   * 3 - int32_t;
-   * 4 - int64_t;
-   */
-  switch (buffer_size) {
-    case 1:
-      WRITE_BUFFER(int8_t)
-      break;
-    case 2:
-      WRITE_BUFFER(int16_t)
-      break;
-    case 4:
-      WRITE_BUFFER(int32_t)
-      break;
-    case 8:
-      WRITE_BUFFER(int64_t)
-      break;
-    default:
-      break;
-  }
-}
-#undef WRITE_BUFFER
-
-/* Need a fixed size serializer for the track ID. libmkv provides a 64 bit
- * one, but not a 32 bit one.
- */
-static void Ebml_SerializeUnsigned32(struct EbmlGlobal *glob,
-                                     unsigned int class_id,
-                                     uint64_t ui) {
-  const unsigned char sizeSerialized = 4 | 0x80;
-  Ebml_WriteID(glob, class_id);
-  Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
-  Ebml_Serialize(glob, &ui, sizeof(ui), 4);
-}
-
-static void Ebml_StartSubElement(struct EbmlGlobal *glob,
-                                 EbmlLoc *ebmlLoc,
-                                 unsigned int class_id) {
-  const uint64_t kEbmlUnknownLength = LITERALU64(0x01FFFFFF, 0xFFFFFFFF);
-  Ebml_WriteID(glob, class_id);
-  *ebmlLoc = ftello(glob->stream);
-  Ebml_Serialize(glob, &kEbmlUnknownLength, sizeof(kEbmlUnknownLength), 8);
-}
-
-static void Ebml_EndSubElement(struct EbmlGlobal *glob, EbmlLoc *ebmlLoc) {
-  off_t pos;
-  uint64_t size;
-
-  /* Save the current stream pointer. */
-  pos = ftello(glob->stream);
-
-  /* Calculate the size of this element. */
-  size = pos - *ebmlLoc - 8;
-  size |= LITERALU64(0x01000000, 0x00000000);
-
-  /* Seek back to the beginning of the element and write the new size. */
-  fseeko(glob->stream, *ebmlLoc, SEEK_SET);
-  Ebml_Serialize(glob, &size, sizeof(size), 8);
-
-  /* Reset the stream pointer. */
-  fseeko(glob->stream, pos, SEEK_SET);
-}
-
-void write_webm_seek_element(struct EbmlGlobal *ebml,
-                             unsigned int id,
-                             off_t pos) {
-  uint64_t offset = pos - ebml->position_reference;
-  EbmlLoc start;
-  Ebml_StartSubElement(ebml, &start, Seek);
-  Ebml_SerializeBinary(ebml, SeekID, id);
-  Ebml_SerializeUnsigned64(ebml, SeekPosition, offset);
-  Ebml_EndSubElement(ebml, &start);
-}
-
-void write_webm_seek_info(struct EbmlGlobal *ebml) {
-  off_t pos;
-  EbmlLoc start;
-  EbmlLoc startInfo;
-  uint64_t frame_time;
-  char version_string[64];
-
-  /* Save the current stream pointer. */
-  pos = ftello(ebml->stream);
-
-  if (ebml->seek_info_pos)
-    fseeko(ebml->stream, ebml->seek_info_pos, SEEK_SET);
-  else
-    ebml->seek_info_pos = pos;
-
-  Ebml_StartSubElement(ebml, &start, SeekHead);
-  write_webm_seek_element(ebml, Tracks, ebml->track_pos);
-  write_webm_seek_element(ebml, Cues, ebml->cue_pos);
-  write_webm_seek_element(ebml, Info, ebml->segment_info_pos);
-  Ebml_EndSubElement(ebml, &start);
-
-  /* Create and write the Segment Info. */
-  if (ebml->debug) {
-    strcpy(version_string, "vpxenc");
-  } else {
-    strcpy(version_string, "vpxenc ");
-    strncat(version_string,
-            vpx_codec_version_str(),
-            sizeof(version_string) - 1 - strlen(version_string));
-  }
-
-  frame_time = (uint64_t)1000 * ebml->framerate.den
-               / ebml->framerate.num;
-  ebml->segment_info_pos = ftello(ebml->stream);
-  Ebml_StartSubElement(ebml, &startInfo, Info);
-  Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000);
-  Ebml_SerializeFloat(ebml, Segment_Duration,
-                      (double)(ebml->last_pts_ms + frame_time));
-  Ebml_SerializeString(ebml, 0x4D80, version_string);
-  Ebml_SerializeString(ebml, 0x5741, version_string);
-  Ebml_EndSubElement(ebml, &startInfo);
-}
-
-void write_webm_file_header(struct EbmlGlobal *glob,
-                            const vpx_codec_enc_cfg_t *cfg,
-                            const struct vpx_rational *fps,
-                            stereo_format_t stereo_fmt,
-                            unsigned int fourcc) {
-  EbmlLoc start;
-  EbmlLoc trackStart;
-  EbmlLoc videoStart;
-  unsigned int trackNumber = 1;
-  uint64_t trackID = 0;
-  unsigned int pixelWidth = cfg->g_w;
-  unsigned int pixelHeight = cfg->g_h;
-
-  /* Write the EBML header. */
-  Ebml_StartSubElement(glob, &start, EBML);
-  Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
-  Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1);
-  Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4);
-  Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8);
-  Ebml_SerializeString(glob, DocType, "webm");
-  Ebml_SerializeUnsigned(glob, DocTypeVersion, 2);
-  Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2);
-  Ebml_EndSubElement(glob, &start);
-
-  /* Open and begin writing the segment element. */
-  Ebml_StartSubElement(glob, &glob->startSegment, Segment);
-  glob->position_reference = ftello(glob->stream);
-  glob->framerate = *fps;
-  write_webm_seek_info(glob);
-
-  /* Open and write the Tracks element. */
-  glob->track_pos = ftello(glob->stream);
-  Ebml_StartSubElement(glob, &trackStart, Tracks);
-
-  /* Open and write the Track entry. */
-  Ebml_StartSubElement(glob, &start, TrackEntry);
-  Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
-  glob->track_id_pos = ftello(glob->stream);
-  Ebml_SerializeUnsigned32(glob, TrackUID, trackID);
-  Ebml_SerializeUnsigned(glob, TrackType, 1);
-  Ebml_SerializeString(glob, CodecID,
-                       fourcc == VP8_FOURCC ? "V_VP8" : "V_VP9");
-  Ebml_StartSubElement(glob, &videoStart, Video);
-  Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
-  Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
-  Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt);
-  Ebml_EndSubElement(glob, &videoStart);
-
-  /* Close Track entry. */
-  Ebml_EndSubElement(glob, &start);
-
-  /* Close Tracks element. */
-  Ebml_EndSubElement(glob, &trackStart);
-
-  /* Segment element remains open. */
-}
-
-void write_webm_block(struct EbmlGlobal *glob,
-                      const vpx_codec_enc_cfg_t *cfg,
-                      const vpx_codec_cx_pkt_t *pkt) {
-  unsigned int block_length;
-  unsigned char track_number;
-  uint16_t block_timecode = 0;
-  unsigned char flags;
-  int64_t pts_ms;
-  int start_cluster = 0, is_keyframe;
-
-  /* Calculate the PTS of this frame in milliseconds. */
-  pts_ms = pkt->data.frame.pts * 1000
-           * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den;
-
-  if (pts_ms <= glob->last_pts_ms)
-    pts_ms = glob->last_pts_ms + 1;
-
-  glob->last_pts_ms = pts_ms;
-
-  /* Calculate the relative time of this block. */
-  if (pts_ms - glob->cluster_timecode > SHRT_MAX)
-    start_cluster = 1;
-  else
-    block_timecode = (uint16_t)pts_ms - glob->cluster_timecode;
-
-  is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY);
-  if (start_cluster || is_keyframe) {
-    if (glob->cluster_open)
-      Ebml_EndSubElement(glob, &glob->startCluster);
-
-    /* Open the new cluster. */
-    block_timecode = 0;
-    glob->cluster_open = 1;
-    glob->cluster_timecode = (uint32_t)pts_ms;
-    glob->cluster_pos = ftello(glob->stream);
-    Ebml_StartSubElement(glob, &glob->startCluster, Cluster);
-    Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode);
-
-    /* Save a cue point if this is a keyframe. */
-    if (is_keyframe) {
-      struct cue_entry *cue, *new_cue_list;
-
-      new_cue_list = realloc(glob->cue_list,
-                             (glob->cues + 1) * sizeof(struct cue_entry));
-      if (new_cue_list)
-        glob->cue_list = new_cue_list;
-      else
-        fatal("Failed to realloc cue list.");
-
-      cue = &glob->cue_list[glob->cues];
-      cue->time = glob->cluster_timecode;
-      cue->loc = glob->cluster_pos;
-      glob->cues++;
-    }
-  }
-
-  /* Write the Simple Block. */
-  Ebml_WriteID(glob, SimpleBlock);
-
-  block_length = (unsigned int)pkt->data.frame.sz + 4;
-  block_length |= 0x10000000;
-  Ebml_Serialize(glob, &block_length, sizeof(block_length), 4);
-
-  track_number = 1;
-  track_number |= 0x80;
-  Ebml_Write(glob, &track_number, 1);
-
-  Ebml_Serialize(glob, &block_timecode, sizeof(block_timecode), 2);
-
-  flags = 0;
-  if (is_keyframe)
-    flags |= 0x80;
-  if (pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE)
-    flags |= 0x08;
-  Ebml_Write(glob, &flags, 1);
-
-  Ebml_Write(glob, pkt->data.frame.buf, (unsigned int)pkt->data.frame.sz);
-}
-
-void write_webm_file_footer(struct EbmlGlobal *glob, int hash) {
-  EbmlLoc start_cues;
-  EbmlLoc start_cue_point;
-  EbmlLoc start_cue_tracks;
-  unsigned int i;
-
-  if (glob->cluster_open)
-    Ebml_EndSubElement(glob, &glob->startCluster);
-
-  glob->cue_pos = ftello(glob->stream);
-  Ebml_StartSubElement(glob, &start_cues, Cues);
-
-  for (i = 0; i < glob->cues; i++) {
-    struct cue_entry *cue = &glob->cue_list[i];
-    Ebml_StartSubElement(glob, &start_cue_point, CuePoint);
-    Ebml_SerializeUnsigned(glob, CueTime, cue->time);
-
-    Ebml_StartSubElement(glob, &start_cue_tracks, CueTrackPositions);
-    Ebml_SerializeUnsigned(glob, CueTrack, 1);
-    Ebml_SerializeUnsigned64(glob, CueClusterPosition,
-                             cue->loc - glob->position_reference);
-    Ebml_EndSubElement(glob, &start_cue_tracks);
-
-    Ebml_EndSubElement(glob, &start_cue_point);
-  }
-
-  Ebml_EndSubElement(glob, &start_cues);
-
-  /* Close the Segment. */
-  Ebml_EndSubElement(glob, &glob->startSegment);
-
-  /* Patch up the seek info block. */
-  write_webm_seek_info(glob);
-
-  /* Patch up the track id. */
-  fseeko(glob->stream, glob->track_id_pos, SEEK_SET);
-  Ebml_SerializeUnsigned32(glob, TrackUID, glob->debug ? 0xDEADBEEF : hash);
-
-  fseeko(glob->stream, 0, SEEK_END);
-}
diff --git a/webmenc.cc b/webmenc.cc
new file mode 100644
index 0000000..a0e542b
--- /dev/null
+++ b/webmenc.cc
@@ -0,0 +1,87 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./webmenc.h"
+
+#include <string>
+
+#include "third_party/libwebm/mkvmuxer.hpp"
+#include "third_party/libwebm/mkvmuxerutil.hpp"
+#include "third_party/libwebm/mkvwriter.hpp"
+
+namespace {
+const uint64_t kDebugTrackUid = 0xDEADBEEF;
+const int kVideoTrackNumber = 1;
+}  // namespace
+
+void write_webm_file_header(struct EbmlGlobal *glob,
+                            const vpx_codec_enc_cfg_t *cfg,
+                            const struct vpx_rational *fps,
+                            stereo_format_t stereo_fmt,
+                            unsigned int fourcc) {
+  mkvmuxer::MkvWriter *const writer = new mkvmuxer::MkvWriter(glob->stream);
+  mkvmuxer::Segment *const segment = new mkvmuxer::Segment();
+  segment->Init(writer);
+  segment->set_mode(mkvmuxer::Segment::kFile);
+  segment->OutputCues(true);
+
+  mkvmuxer::SegmentInfo *const info = segment->GetSegmentInfo();
+  const uint64_t kTimecodeScale = 1000000;
+  info->set_timecode_scale(kTimecodeScale);
+  std::string version = "vpxenc";
+  if (!glob->debug) {
+    version.append(std::string(" ") + vpx_codec_version_str());
+  }
+  info->set_writing_app(version.c_str());
+
+  const uint64_t video_track_id =
+      segment->AddVideoTrack(static_cast<int>(cfg->g_w),
+                             static_cast<int>(cfg->g_h),
+                             kVideoTrackNumber);
+  mkvmuxer::VideoTrack* const video_track =
+      static_cast<mkvmuxer::VideoTrack*>(
+          segment->GetTrackByNumber(video_track_id));
+  video_track->SetStereoMode(stereo_fmt);
+  video_track->set_codec_id(fourcc == VP8_FOURCC ? "V_VP8" : "V_VP9");
+  if (glob->debug) {
+    video_track->set_uid(kDebugTrackUid);
+  }
+  glob->writer = writer;
+  glob->segment = segment;
+}
+
+void write_webm_block(struct EbmlGlobal *glob,
+                      const vpx_codec_enc_cfg_t *cfg,
+                      const vpx_codec_cx_pkt_t *pkt) {
+  mkvmuxer::Segment *const segment =
+      reinterpret_cast<mkvmuxer::Segment*>(glob->segment);
+  int64_t pts_ns = pkt->data.frame.pts * 1000000000ll *
+                   cfg->g_timebase.num / cfg->g_timebase.den;
+  if (pts_ns <= glob->last_pts_ns)
+    pts_ns = glob->last_pts_ns + 1000000;
+  glob->last_pts_ns = pts_ns;
+
+  segment->AddFrame(static_cast<uint8_t*>(pkt->data.frame.buf),
+                    pkt->data.frame.sz,
+                    kVideoTrackNumber,
+                    pts_ns,
+                    pkt->data.frame.flags & VPX_FRAME_IS_KEY);
+}
+
+void write_webm_file_footer(struct EbmlGlobal *glob) {
+  mkvmuxer::MkvWriter *const writer =
+      reinterpret_cast<mkvmuxer::MkvWriter*>(glob->writer);
+  mkvmuxer::Segment *const segment =
+      reinterpret_cast<mkvmuxer::Segment*>(glob->segment);
+  segment->Finalize();
+  delete segment;
+  delete writer;
+  glob->writer = NULL;
+  glob->segment = NULL;
+}
diff --git a/webmenc.h b/webmenc.h
index 362aa89..0ac606b 100644
--- a/webmenc.h
+++ b/webmenc.h
@@ -13,13 +13,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#if defined(_MSC_VER)
-/* MSVS doesn't define off_t */
-typedef __int64 off_t;
-#else
-#include <stdint.h>
-#endif
-
 #include "tools_common.h"
 #include "vpx/vpx_encoder.h"
 
@@ -27,40 +20,13 @@
 extern "C" {
 #endif
 
-typedef off_t EbmlLoc;
-
-struct cue_entry {
-  unsigned int time;
-  uint64_t loc;
-};
-
+/* TODO(vigneshv): Rename this struct */
 struct EbmlGlobal {
   int debug;
-
   FILE *stream;
-  int64_t last_pts_ms;
-  vpx_rational_t framerate;
-
-  /* These pointers are to the start of an element */
-  off_t position_reference;
-  off_t seek_info_pos;
-  off_t segment_info_pos;
-  off_t track_pos;
-  off_t cue_pos;
-  off_t cluster_pos;
-
-  /* This pointer is to a specific element to be serialized */
-  off_t track_id_pos;
-
-  /* These pointers are to the size field of the element */
-  EbmlLoc startSegment;
-  EbmlLoc startCluster;
-
-  uint32_t cluster_timecode;
-  int cluster_open;
-
-  struct cue_entry *cue_list;
-  unsigned int cues;
+  int64_t last_pts_ns;
+  void *writer;
+  void *segment;
 };
 
 /* Stereo 3D packed frame format */
@@ -72,10 +38,6 @@
   STEREO_FORMAT_RIGHT_LEFT = 11
 } stereo_format_t;
 
-void write_webm_seek_element(struct EbmlGlobal *ebml,
-                             unsigned int id,
-                             off_t pos);
-
 void write_webm_file_header(struct EbmlGlobal *glob,
                             const vpx_codec_enc_cfg_t *cfg,
                             const struct vpx_rational *fps,
@@ -86,7 +48,7 @@
                       const vpx_codec_enc_cfg_t *cfg,
                       const vpx_codec_cx_pkt_t *pkt);
 
-void write_webm_file_footer(struct EbmlGlobal *glob, int hash);
+void write_webm_file_footer(struct EbmlGlobal *glob);
 
 #ifdef __cplusplus
 }  // extern "C"