Merge "Removed the reset of mode_info from previous frame"
diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh
index 359157c..a6315b9 100755
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -174,6 +174,10 @@
                         Include=".\\$f"
                     # Separate file names with Condition?
                     tag_content ObjectFileName "\$(IntDir)$objf"
+                    # Check for AVX and turn it on to avoid warnings.
+                    if [[ $f =~ avx.?\.c$ ]]; then
+                        tag_content AdditionalOptions "/arch:AVX"
+                    fi
                     close_tag ClCompile
                 elif [ "$pat" == "h" ] ; then
                     tag ClInclude \
diff --git a/examples.mk b/examples.mk
index 24b5c37..e4abcf7 100644
--- a/examples.mk
+++ b/examples.mk
@@ -58,6 +58,8 @@
 vp9_spatial_scalable_encoder.SRCS += args.c args.h
 vp9_spatial_scalable_encoder.SRCS += ivfenc.c ivfenc.h
 vp9_spatial_scalable_encoder.SRCS += tools_common.c tools_common.h
+vp9_spatial_scalable_encoder.SRCS += video_common.h
+vp9_spatial_scalable_encoder.SRCS += video_writer.h video_writer.c
 vp9_spatial_scalable_encoder.GUID   = 4A38598D-627D-4505-9C7B-D4020C84100D
 vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder
 
@@ -135,6 +137,10 @@
 error_resilient.DESCRIPTION      = Error Resiliency Feature
 
 EXAMPLES-$(CONFIG_VP8_ENCODER)     += vp8_set_maps.c
+vp8_set_maps.SRCS                  += ivfenc.h ivfenc.c
+vp8_set_maps.SRCS                  += tools_common.h tools_common.c
+vp8_set_maps.SRCS                  += video_common.h
+vp8_set_maps.SRCS                  += video_writer.h video_writer.c
 vp8_set_maps.GUID                   = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
 vp8_set_maps.DESCRIPTION            = VP8 set active and ROI maps
 EXAMPLES-$(CONFIG_VP8_ENCODER)     += vp8cx_set_ref.c
diff --git a/examples/decode_to_md5.c b/examples/decode_to_md5.c
index 077513c..aabac60 100644
--- a/examples/decode_to_md5.c
+++ b/examples/decode_to_md5.c
@@ -82,9 +82,9 @@
   int frame_cnt = 0;
   FILE *outfile = NULL;
   vpx_codec_ctx_t codec;
-  vpx_codec_iface_t *iface = NULL;
   VpxVideoReader *reader = NULL;
   const VpxVideoInfo *info = NULL;
+  const VpxInterface *decoder = NULL;
 
   exec_name = argv[0];
 
@@ -100,13 +100,13 @@
 
   info = vpx_video_reader_get_info(reader);
 
-  iface = get_codec_interface(info->codec_fourcc);
-  if (!iface)
+  decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc);
+  if (!decoder)
     die("Unknown input codec.");
 
-  printf("Using %s\n", vpx_codec_iface_name(iface));
+  printf("Using %s\n", vpx_codec_iface_name(decoder->interface()));
 
-  if (vpx_codec_dec_init(&codec, iface, NULL, 0))
+  if (vpx_codec_dec_init(&codec, decoder->interface(), NULL, 0))
     die_codec(&codec, "Failed to initialize decoder");
 
   while (vpx_video_reader_read_frame(reader)) {
diff --git a/examples/decode_with_drops.c b/examples/decode_with_drops.c
index e8fc076..c6f7d43 100644
--- a/examples/decode_with_drops.c
+++ b/examples/decode_with_drops.c
@@ -76,7 +76,7 @@
   int frame_cnt = 0;
   FILE *outfile = NULL;
   vpx_codec_ctx_t codec;
-  vpx_codec_iface_t *iface = NULL;
+  const VpxInterface *decoder = NULL;
   VpxVideoReader *reader = NULL;
   const VpxVideoInfo *info = NULL;
   int n = 0;
@@ -104,13 +104,13 @@
 
   info = vpx_video_reader_get_info(reader);
 
-  iface = get_codec_interface(info->codec_fourcc);
-  if (!iface)
+  decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc);
+  if (!decoder)
     die("Unknown input codec.");
 
-  printf("Using %s\n", vpx_codec_iface_name(iface));
+  printf("Using %s\n", vpx_codec_iface_name(decoder->interface()));
 
-  if (vpx_codec_dec_init(&codec, iface, NULL, 0))
+  if (vpx_codec_dec_init(&codec, decoder->interface(), NULL, 0))
     die_codec(&codec, "Failed to initialize decoder.");
 
   while (vpx_video_reader_read_frame(reader)) {
diff --git a/examples/postproc.c b/examples/postproc.c
index 7281f1e..2912fe6 100644
--- a/examples/postproc.c
+++ b/examples/postproc.c
@@ -64,8 +64,8 @@
   FILE *outfile = NULL;
   vpx_codec_ctx_t codec;
   vpx_codec_err_t res;
-  vpx_codec_iface_t *iface = NULL;
   VpxVideoReader *reader = NULL;
+  const VpxInterface *decoder = NULL;
   const VpxVideoInfo *info = NULL;
 
   exec_name = argv[0];
@@ -82,17 +82,16 @@
 
   info = vpx_video_reader_get_info(reader);
 
-  iface = get_codec_interface(info->codec_fourcc);
-  if (!iface)
+  decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc);
+  if (!decoder)
     die("Unknown input codec.");
 
-  printf("Using %s\n", vpx_codec_iface_name(iface));
+  printf("Using %s\n", vpx_codec_iface_name(decoder->interface()));
 
-  res = vpx_codec_dec_init(&codec, iface, NULL, VPX_CODEC_USE_POSTPROC);
-  if (res == VPX_CODEC_INCAPABLE) {
-    printf("NOTICE: Postproc not supported.\n");
-    res = vpx_codec_dec_init(&codec, iface, NULL, 0);
-  }
+  res = vpx_codec_dec_init(&codec, decoder->interface(), NULL,
+                           VPX_CODEC_USE_POSTPROC);
+  if (res == VPX_CODEC_INCAPABLE)
+    die_codec(&codec, "Postproc not supported by this decoder.");
 
   if (res)
     die_codec(&codec, "Failed to initialize decoder.");
diff --git a/examples/simple_decoder.c b/examples/simple_decoder.c
index 4dc9308..b0ca77d 100644
--- a/examples/simple_decoder.c
+++ b/examples/simple_decoder.c
@@ -101,8 +101,8 @@
   int frame_cnt = 0;
   FILE *outfile = NULL;
   vpx_codec_ctx_t codec;
-  vpx_codec_iface_t *iface = NULL;
   VpxVideoReader *reader = NULL;
+  const VpxInterface *decoder = NULL;
   const VpxVideoInfo *info = NULL;
 
   exec_name = argv[0];
@@ -119,13 +119,13 @@
 
   info = vpx_video_reader_get_info(reader);
 
-  iface = get_codec_interface(info->codec_fourcc);
-  if (!iface)
+  decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc);
+  if (!decoder)
     die("Unknown input codec.");
 
-  printf("Using %s\n", vpx_codec_iface_name(iface));
+  printf("Using %s\n", vpx_codec_iface_name(decoder->interface()));
 
-  if (vpx_codec_dec_init(&codec, iface, NULL, 0))
+  if (vpx_codec_dec_init(&codec, decoder->interface(), NULL, 0))
     die_codec(&codec, "Failed to initialize decoder.");
 
   while (vpx_video_reader_read_frame(reader)) {
diff --git a/examples/simple_encoder.c b/examples/simple_encoder.c
index 5076054..e419e81 100644
--- a/examples/simple_encoder.c
+++ b/examples/simple_encoder.c
@@ -86,21 +86,46 @@
 #include <string.h>
 
 #define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
 
 #include "./tools_common.h"
 #include "./video_writer.h"
 
-#define interface (vpx_codec_vp8_cx())
-
 static const char *exec_name;
 
 void usage_exit() {
-  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name);
+  fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
+          exec_name);
   exit(EXIT_FAILURE);
 }
 
+static void encode_frame(vpx_codec_ctx_t *codec,
+                         vpx_image_t *img,
+                         int frame_index,
+                         VpxVideoWriter *writer) {
+  vpx_codec_iter_t iter = NULL;
+  const vpx_codec_cx_pkt_t *pkt = NULL;
+  const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, 0,
+                                               VPX_DL_GOOD_QUALITY);
+  if (res != VPX_CODEC_OK)
+    die_codec(codec, "Failed to encode frame");
+
+  while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) {
+    if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
+      const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
+      if (!vpx_video_writer_write_frame(writer,
+                                        pkt->data.frame.buf,
+                                        pkt->data.frame.sz,
+                                        pkt->data.frame.pts)) {
+        die_codec(codec, "Failed to write compressed frame");
+      }
+
+      printf(keyframe ? "K" : ".");
+      fflush(stdout);
+    }
+  }
+}
+
 int main(int argc, char **argv) {
   FILE *infile = NULL;
   vpx_codec_ctx_t codec;
@@ -110,17 +135,27 @@
   vpx_codec_err_t res;
   VpxVideoInfo info = {0};
   VpxVideoWriter *writer = NULL;
+  const VpxInterface *encoder = NULL;
   const int fps = 30;        // TODO(dkovalev) add command line argument
   const int bitrate = 200;   // kbit/s TODO(dkovalev) add command line argument
+  const char *const codec_arg = argv[1];
+  const char *const width_arg = argv[2];
+  const char *const height_arg = argv[3];
+  const char *const infile_arg = argv[4];
+  const char *const outfile_arg = argv[5];
 
   exec_name = argv[0];
 
-  if (argc != 5)
+  if (argc != 6)
     die("Invalid number of arguments");
 
-  info.codec_fourcc = VP8_FOURCC;
-  info.frame_width = strtol(argv[1], NULL, 0);
-  info.frame_height = strtol(argv[2], NULL, 0);
+  encoder = get_vpx_encoder_by_name(codec_arg);
+  if (!encoder)
+     die("Unsupported codec.");
+
+  info.codec_fourcc = encoder->fourcc;
+  info.frame_width = strtol(width_arg, NULL, 0);
+  info.frame_height = strtol(height_arg, NULL, 0);
   info.time_base.numerator = 1;
   info.time_base.denominator = fps;
 
@@ -136,9 +171,9 @@
     die("Failed to allocate image.");
   }
 
-  printf("Using %s\n", vpx_codec_iface_name(interface));
+  printf("Using %s\n", vpx_codec_iface_name(encoder->interface()));
 
-  res = vpx_codec_enc_config_default(interface, &cfg, 0);
+  res = vpx_codec_enc_config_default(encoder->interface(), &cfg, 0);
   if (res)
     die_codec(&codec, "Failed to get default codec config.");
 
@@ -148,40 +183,20 @@
   cfg.g_timebase.den = info.time_base.denominator;
   cfg.rc_target_bitrate = bitrate;
 
-  writer = vpx_video_writer_open(argv[4], kContainerIVF, &info);
+  writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
   if (!writer)
-    die("Failed to open %s for writing.", argv[4]);
+    die("Failed to open %s for writing.", outfile_arg);
 
-  if (!(infile = fopen(argv[3], "rb")))
-    die("Failed to open %s for reading.", argv[3]);
+  if (!(infile = fopen(infile_arg, "rb")))
+    die("Failed to open %s for reading.", infile_arg);
 
-  if (vpx_codec_enc_init(&codec, interface, &cfg, 0))
+  if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0))
     die_codec(&codec, "Failed to initialize encoder");
 
-  while (vpx_img_read(&raw, infile)) {
-    vpx_codec_iter_t iter = NULL;
-    const vpx_codec_cx_pkt_t *pkt = NULL;
+  while (vpx_img_read(&raw, infile))
+    encode_frame(&codec, &raw, frame_count++, writer);
+  encode_frame(&codec, NULL, -1, writer);  // flush the encoder
 
-    ++frame_count;
-
-    res = vpx_codec_encode(&codec, &raw, frame_count, 1, 0,
-                           VPX_DL_GOOD_QUALITY);
-    if (res != VPX_CODEC_OK)
-      die_codec(&codec, "Failed to encode frame");
-
-    while ((pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
-      if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
-        const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
-        if (!vpx_video_writer_write_frame(writer,
-                                          pkt->data.frame.buf,
-                                          pkt->data.frame.sz,
-                                          pkt->data.frame.pts))
-          die_codec(&codec, "Failed to write compressed frame.");
-        printf(keyframe ? "K" : ".");
-        fflush(stdout);
-      }
-    }
-  }
   printf("\n");
   fclose(infile);
   printf("Processed %d frames.\n", frame_count);
diff --git a/examples/twopass_encoder.c b/examples/twopass_encoder.c
index 93b6150..f16db66 100644
--- a/examples/twopass_encoder.c
+++ b/examples/twopass_encoder.c
@@ -53,18 +53,16 @@
 #include <string.h>
 
 #define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
 
 #include "./tools_common.h"
 #include "./video_writer.h"
 
-#define interface (vpx_codec_vp8_cx())
-
 static const char *exec_name;
 
 void usage_exit() {
-  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name);
+  fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
+          exec_name);
   exit(EXIT_FAILURE);
 }
 
@@ -130,18 +128,29 @@
   vpx_codec_err_t res;
   vpx_fixed_buf_t stats = {0};
   VpxVideoInfo info = {0};
+  const VpxInterface *encoder = NULL;
   int pass;
   const int fps = 30;        // TODO(dkovalev) add command line argument
   const int bitrate = 200;   // kbit/s TODO(dkovalev) add command line argument
+  const char *const codec_arg = argv[1];
+  const char *const width_arg = argv[2];
+  const char *const height_arg = argv[3];
+  const char *const infile_arg = argv[4];
+  const char *const outfile_arg = argv[5];
+  exec_name = argv[0];
 
-  if (argc != 5)
+  if (argc != 6)
     die("Invalid number of arguments.");
 
-  info.codec_fourcc = VP8_FOURCC;
+  encoder = get_vpx_encoder_by_name(codec_arg);
+  if (!encoder)
+    die("Unsupported codec.");
+
+  info.codec_fourcc = encoder->fourcc;
   info.time_base.numerator = 1;
   info.time_base.denominator = fps;
-  info.frame_width = strtol(argv[1], NULL, 0);
-  info.frame_height = strtol(argv[2], NULL, 0);
+  info.frame_width = strtol(width_arg, NULL, 0);
+  info.frame_height = strtol(height_arg, NULL, 0);
 
   if (info.frame_width <= 0 ||
       info.frame_height <= 0 ||
@@ -155,13 +164,13 @@
     die("Failed to allocate image", info.frame_width, info.frame_height);
   }
 
-  writer = vpx_video_writer_open(argv[4], kContainerIVF, &info);
+  writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
   if (!writer)
-    die("Failed to open %s for writing", argv[4]);
+    die("Failed to open %s for writing", outfile_arg);
 
-  printf("Using %s\n", vpx_codec_iface_name(interface));
+  printf("Using %s\n", vpx_codec_iface_name(encoder->interface()));
 
-  res = vpx_codec_enc_config_default(interface, &cfg, 0);
+  res = vpx_codec_enc_config_default(encoder->interface(), &cfg, 0);
   if (res)
     die_codec(&codec, "Failed to get default codec config.");
 
@@ -181,10 +190,10 @@
       cfg.rc_twopass_stats_in = stats;
     }
 
-    if (!(infile = fopen(argv[3], "rb")))
-      die("Failed to open %s for reading", argv[3]);
+    if (!(infile = fopen(infile_arg, "rb")))
+      die("Failed to open %s for reading", infile_arg);
 
-    if (vpx_codec_enc_init(&codec, interface, &cfg, 0))
+    if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0))
       die_codec(&codec, "Failed to initialize encoder");
 
     while (vpx_img_read(&raw, infile)) {
diff --git a/examples/vp8_set_maps.c b/examples/vp8_set_maps.c
index 4c0e8a0..ec9bc65 100644
--- a/examples/vp8_set_maps.c
+++ b/examples/vp8_set_maps.c
@@ -44,253 +44,197 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <stdarg.h>
 #include <string.h>
+
 #define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx/vpx_encoder.h"
 #include "vpx/vp8cx.h"
-#define interface (vpx_codec_vp8_cx())
-#define fourcc    0x30385056
+#include "vpx/vpx_encoder.h"
 
-#define IVF_FILE_HDR_SZ  (32)
-#define IVF_FRAME_HDR_SZ (12)
+#include "./tools_common.h"
+#include "./video_writer.h"
 
-static void mem_put_le16(char *mem, unsigned int val) {
-    mem[0] = val;
-    mem[1] = val>>8;
+static const char *exec_name;
+
+void usage_exit() {
+  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name);
+  exit(EXIT_FAILURE);
 }
 
-static void mem_put_le32(char *mem, unsigned int val) {
-    mem[0] = val;
-    mem[1] = val>>8;
-    mem[2] = val>>16;
-    mem[3] = val>>24;
+static void set_roi_map(const vpx_codec_enc_cfg_t *cfg,
+                        vpx_codec_ctx_t *codec) {
+  int i;
+  vpx_roi_map_t roi = {0};
+
+  roi.rows = cfg->g_h / 16;
+  roi.cols = cfg->g_w / 16;
+
+  roi.delta_q[0] = 0;
+  roi.delta_q[1] = -2;
+  roi.delta_q[2] = -4;
+  roi.delta_q[3] = -6;
+
+  roi.delta_lf[0] = 0;
+  roi.delta_lf[1] = 1;
+  roi.delta_lf[2] = 2;
+  roi.delta_lf[3] = 3;
+
+  roi.static_threshold[0] = 1500;
+  roi.static_threshold[1] = 1000;
+  roi.static_threshold[2] = 500;
+  roi.static_threshold[3] = 0;
+
+  roi.roi_map = (uint8_t *)malloc(roi.rows * roi.cols);
+  for (i = 0; i < roi.rows * roi.cols; ++i)
+    roi.roi_map[i] = i % 4;
+
+  if (vpx_codec_control(codec, VP8E_SET_ROI_MAP, &roi))
+    die_codec(codec, "Failed to set ROI map");
+
+  free(roi.roi_map);
 }
 
-static void die(const char *fmt, ...) {
-    va_list ap;
+static void set_active_map(const vpx_codec_enc_cfg_t *cfg,
+                           vpx_codec_ctx_t *codec) {
+  int i;
+  vpx_active_map_t map = {0};
 
-    va_start(ap, fmt);
-    vprintf(fmt, ap);
-    if(fmt[strlen(fmt)-1] != '\n')
-        printf("\n");
-    exit(EXIT_FAILURE);
+  map.rows = cfg->g_h / 16;
+  map.cols = cfg->g_w / 16;
+
+  map.active_map = (uint8_t *)malloc(map.rows * map.cols);
+  for (i = 0; i < map.rows * map.cols; ++i)
+    map.active_map[i] = i % 2;
+
+  if (vpx_codec_control(codec, VP8E_SET_ACTIVEMAP, &map))
+    die_codec(codec, "Failed to set active map");
+
+  free(map.active_map);
 }
 
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
-    const char *detail = vpx_codec_error_detail(ctx);
+static void unset_active_map(const vpx_codec_enc_cfg_t *cfg,
+                             vpx_codec_ctx_t *codec) {
+  vpx_active_map_t map = {0};
 
-    printf("%s: %s\n", s, vpx_codec_error(ctx));
-    if(detail)
-        printf("    %s\n",detail);
-    exit(EXIT_FAILURE);
+  map.rows = cfg->g_h / 16;
+  map.cols = cfg->g_w / 16;
+  map.active_map = NULL;
+
+  if (vpx_codec_control(codec, VP8E_SET_ACTIVEMAP, &map))
+    die_codec(codec, "Failed to set active map");
 }
 
-static int read_frame(FILE *f, vpx_image_t *img) {
-    size_t nbytes, to_read;
-    int    res = 1;
+static void encode_frame(vpx_codec_ctx_t *codec,
+                         vpx_image_t *img,
+                         int frame_index,
+                         VpxVideoWriter *writer) {
+  vpx_codec_iter_t iter = NULL;
+  const vpx_codec_cx_pkt_t *pkt = NULL;
+  const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, 0,
+                                               VPX_DL_GOOD_QUALITY);
+  if (res != VPX_CODEC_OK)
+    die_codec(codec, "Failed to encode frame");
 
-    to_read = img->w*img->h*3/2;
-    nbytes = fread(img->planes[0], 1, to_read, f);
-    if(nbytes != to_read) {
-        res = 0;
-        if(nbytes > 0)
-            printf("Warning: Read partial frame. Check your width & height!\n");
+  while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) {
+    if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
+      const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
+      if (!vpx_video_writer_write_frame(writer,
+                                        pkt->data.frame.buf,
+                                        pkt->data.frame.sz,
+                                        pkt->data.frame.pts)) {
+        die_codec(codec, "Failed to write compressed frame");
+      }
+
+      printf(keyframe ? "K" : ".");
+      fflush(stdout);
     }
-    return res;
-}
-
-static void write_ivf_file_header(FILE *outfile,
-                                  const vpx_codec_enc_cfg_t *cfg,
-                                  int frame_cnt) {
-    char header[32];
-
-    if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
-        return;
-    header[0] = 'D';
-    header[1] = 'K';
-    header[2] = 'I';
-    header[3] = 'F';
-    mem_put_le16(header+4,  0);                   /* version */
-    mem_put_le16(header+6,  32);                  /* headersize */
-    mem_put_le32(header+8,  fourcc);              /* headersize */
-    mem_put_le16(header+12, cfg->g_w);            /* width */
-    mem_put_le16(header+14, cfg->g_h);            /* height */
-    mem_put_le32(header+16, cfg->g_timebase.den); /* rate */
-    mem_put_le32(header+20, cfg->g_timebase.num); /* scale */
-    mem_put_le32(header+24, frame_cnt);           /* length */
-    mem_put_le32(header+28, 0);                   /* unused */
-
-    (void) fwrite(header, 1, 32, outfile);
-}
-
-
-static void write_ivf_frame_header(FILE *outfile,
-                                   const vpx_codec_cx_pkt_t *pkt)
-{
-    char             header[12];
-    vpx_codec_pts_t  pts;
-
-    if(pkt->kind != VPX_CODEC_CX_FRAME_PKT)
-        return;
-
-    pts = pkt->data.frame.pts;
-    mem_put_le32(header, pkt->data.frame.sz);
-    mem_put_le32(header+4, pts&0xFFFFFFFF);
-    mem_put_le32(header+8, pts >> 32);
-
-    (void) fwrite(header, 1, 12, outfile);
+  }
 }
 
 int main(int argc, char **argv) {
-    FILE                *infile, *outfile;
-    vpx_codec_ctx_t      codec;
-    vpx_codec_enc_cfg_t  cfg;
-    int                  frame_cnt = 0;
-    vpx_image_t          raw;
-    vpx_codec_err_t      res;
-    long                 width;
-    long                 height;
-    int                  frame_avail;
-    int                  got_data;
-    int                  flags = 0;
+  FILE *infile = NULL;
+  vpx_codec_ctx_t codec = {0};
+  vpx_codec_enc_cfg_t cfg = {0};
+  int frame_count = 0;
+  vpx_image_t raw = {0};
+  vpx_codec_err_t res;
+  VpxVideoInfo info = {0};
+  VpxVideoWriter *writer = NULL;
+  const VpxInterface *encoder = NULL;
+  const int fps = 2;        // TODO(dkovalev) add command line argument
+  const int bitrate = 200;   // kbit/s TODO(dkovalev) add command line argument
 
-    /* Open files */
-    if(argc!=5)
-        die("Usage: %s <width> <height> <infile> <outfile>\n", argv[0]);
-    width = strtol(argv[1], NULL, 0);
-    height = strtol(argv[2], NULL, 0);
-    if(width < 16 || width%2 || height <16 || height%2)
-        die("Invalid resolution: %ldx%ld", width, height);
-    if(!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 1))
-        die("Faile to allocate image", width, height);
-    if(!(outfile = fopen(argv[4], "wb")))
-        die("Failed to open %s for writing", argv[4]);
+  exec_name = argv[0];
 
-    printf("Using %s\n",vpx_codec_iface_name(interface));
+  if (argc != 5)
+    die("Invalid number of arguments");
 
-    /* Populate encoder configuration */
-    res = vpx_codec_enc_config_default(interface, &cfg, 0);
-    if(res) {
-        printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
-        return EXIT_FAILURE;
+  encoder = get_vpx_encoder_by_name("vp8");  // only vp8 for now
+  if (!encoder)
+    die("Unsupported codec.");
+
+  info.codec_fourcc = encoder->fourcc;
+  info.frame_width = strtol(argv[1], NULL, 0);
+  info.frame_height = strtol(argv[2], NULL, 0);
+  info.time_base.numerator = 1;
+  info.time_base.denominator = fps;
+
+  if (info.frame_width <= 0 ||
+      info.frame_height <= 0 ||
+      (info.frame_width % 2) != 0 ||
+      (info.frame_height % 2) != 0) {
+    die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
+  }
+
+  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width,
+                                             info.frame_height, 1)) {
+    die("Failed to allocate image.");
+  }
+
+  printf("Using %s\n", vpx_codec_iface_name(encoder->interface()));
+
+  res = vpx_codec_enc_config_default(encoder->interface(), &cfg, 0);
+  if (res)
+    die_codec(&codec, "Failed to get default codec config.");
+
+  cfg.g_w = info.frame_width;
+  cfg.g_h = info.frame_height;
+  cfg.g_timebase.num = info.time_base.numerator;
+  cfg.g_timebase.den = info.time_base.denominator;
+  cfg.rc_target_bitrate = bitrate;
+
+  writer = vpx_video_writer_open(argv[4], kContainerIVF, &info);
+  if (!writer)
+    die("Failed to open %s for writing.", argv[4]);
+
+  if (!(infile = fopen(argv[3], "rb")))
+    die("Failed to open %s for reading.", argv[3]);
+
+  if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0))
+    die_codec(&codec, "Failed to initialize encoder");
+
+  while (vpx_img_read(&raw, infile)) {
+    ++frame_count;
+
+    if (frame_count == 22) {
+      set_roi_map(&cfg, &codec);
+    } else if (frame_count == 33) {
+      set_active_map(&cfg, &codec);
+    } else if (frame_count == 44) {
+      unset_active_map(&cfg, &codec);
     }
 
-    /* Update the default configuration with our settings */
-    cfg.rc_target_bitrate = width * height * cfg.rc_target_bitrate
-                            / cfg.g_w / cfg.g_h;
-    cfg.g_w = width;
-    cfg.g_h = height;
+    encode_frame(&codec, &raw, frame_count, writer);
+  }
+  encode_frame(&codec, NULL, -1, writer);
+  printf("\n");
+  fclose(infile);
+  printf("Processed %d frames.\n", frame_count);
 
-    write_ivf_file_header(outfile, &cfg, 0);
+  vpx_img_free(&raw);
+  if (vpx_codec_destroy(&codec))
+    die_codec(&codec, "Failed to destroy codec.");
 
+  vpx_video_writer_close(writer);
 
-        /* Open input file for this encoding pass */
-        if(!(infile = fopen(argv[3], "rb")))
-            die("Failed to open %s for reading", argv[3]);
-
-        /* Initialize codec */
-        if(vpx_codec_enc_init(&codec, interface, &cfg, 0))
-            die_codec(&codec, "Failed to initialize encoder");
-
-        frame_avail = 1;
-        got_data = 0;
-        while(frame_avail || got_data) {
-            vpx_codec_iter_t iter = NULL;
-            const vpx_codec_cx_pkt_t *pkt;
-
-            if(frame_cnt + 1 == 22) {
-                vpx_roi_map_t  roi;
-                unsigned int   i;
-
-                roi.rows = cfg.g_h/16;
-                roi.cols = cfg.g_w/16;
-
-                roi.delta_q[0] = 0;
-                roi.delta_q[1] = -2;
-                roi.delta_q[2] = -4;
-                roi.delta_q[3] = -6;
-
-                roi.delta_lf[0] = 0;
-                roi.delta_lf[1] = 1;
-                roi.delta_lf[2] = 2;
-                roi.delta_lf[3] = 3;
-
-                roi.static_threshold[0] = 1500;
-                roi.static_threshold[1] = 1000;
-                roi.static_threshold[2] =  500;
-                roi.static_threshold[3] =    0;
-
-                /* generate an ROI map for example */
-                roi.roi_map = malloc(roi.rows * roi.cols);
-                for(i=0;i<roi.rows*roi.cols;i++)
-                    roi.roi_map[i] = i & 3;
-
-                if(vpx_codec_control(&codec, VP8E_SET_ROI_MAP, &roi))
-                    die_codec(&codec, "Failed to set ROI map");
-
-                free(roi.roi_map);
-            } else if(frame_cnt + 1 == 33) {
-                vpx_active_map_t  active;
-                unsigned int      i;
-
-                active.rows = cfg.g_h/16;
-                active.cols = cfg.g_w/16;
-
-                /* generate active map for example */
-                active.active_map = malloc(active.rows * active.cols);
-                for(i=0;i<active.rows*active.cols;i++)
-                    active.active_map[i] = i & 1;
-
-                if(vpx_codec_control(&codec, VP8E_SET_ACTIVEMAP, &active))
-                    die_codec(&codec, "Failed to set active map");
-
-                free(active.active_map);
-            } else if(frame_cnt + 1 == 44) {
-                vpx_active_map_t  active;
-
-                active.rows = cfg.g_h/16;
-                active.cols = cfg.g_w/16;
-
-                /* pass in null map to disable active_map*/
-                active.active_map = NULL;
-
-                if(vpx_codec_control(&codec, VP8E_SET_ACTIVEMAP, &active))
-                    die_codec(&codec, "Failed to set active map");
-            }
-            frame_avail = read_frame(infile, &raw);
-            if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt,
-                                1, flags, VPX_DL_REALTIME))
-                die_codec(&codec, "Failed to encode frame");
-            got_data = 0;
-            while( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) {
-                got_data = 1;
-                switch(pkt->kind) {
-                case VPX_CODEC_CX_FRAME_PKT:
-                    write_ivf_frame_header(outfile, pkt);
-                    (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
-                                  outfile);
-                    break;
-                default:
-                    break;
-                }
-                printf(pkt->kind == VPX_CODEC_CX_FRAME_PKT
-                       && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
-                fflush(stdout);
-            }
-            frame_cnt++;
-        }
-        printf("\n");
-        fclose(infile);
-
-    printf("Processed %d frames.\n",frame_cnt-1);
-    vpx_img_free(&raw);
-    if(vpx_codec_destroy(&codec))
-        die_codec(&codec, "Failed to destroy codec");
-
-    /* Try to rewrite the file header with the actual frame count */
-    if(!fseek(outfile, 0, SEEK_SET))
-        write_ivf_file_header(outfile, &cfg, frame_cnt-1);
-    fclose(outfile);
-    return EXIT_SUCCESS;
+  return EXIT_SUCCESS;
 }
diff --git a/examples/vpx_temporal_scalable_patterns.c b/examples/vpx_temporal_scalable_patterns.c
index 11d331b..28bb7ff 100644
--- a/examples/vpx_temporal_scalable_patterns.c
+++ b/examples/vpx_temporal_scalable_patterns.c
@@ -12,6 +12,7 @@
 //  encoding scheme based on temporal scalability for video applications
 //  that benefit from a scalable bitstream.
 
+#include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -31,6 +32,86 @@
 
 static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3};
 
+// For rate control encoding stats.
+struct RateControlMetrics {
+  // Number of input frames per layer.
+  int layer_input_frames[VPX_TS_MAX_LAYERS];
+  // Total (cumulative) number of encoded frames per layer.
+  int layer_tot_enc_frames[VPX_TS_MAX_LAYERS];
+  // Number of encoded non-key frames per layer.
+  int layer_enc_frames[VPX_TS_MAX_LAYERS];
+  // Framerate per layer layer (cumulative).
+  float layer_framerate[VPX_TS_MAX_LAYERS];
+  // Target average frame size per layer (per-frame-bandwidth per layer).
+  float layer_pfb[VPX_TS_MAX_LAYERS];
+  // Actual average frame size per layer.
+  float layer_avg_frame_size[VPX_TS_MAX_LAYERS];
+  // Average rate mismatch per layer (|target - actual| / target).
+  float layer_avg_rate_mismatch[VPX_TS_MAX_LAYERS];
+  // Actual encoding bitrate per layer (cumulative).
+  float layer_encoding_bitrate[VPX_TS_MAX_LAYERS];
+};
+
+static void set_rate_control_metrics(struct RateControlMetrics *rc,
+                                     vpx_codec_enc_cfg_t *cfg) {
+  int i = 0;
+  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
+  // per-frame-bandwidth, for the rate control encoding stats below.
+  float framerate = cfg->g_timebase.den / cfg->g_timebase.num;
+  rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0];
+  rc->layer_pfb[0] = 1000.0 * cfg->ts_target_bitrate[0] /
+      rc->layer_framerate[0];
+  for (i = 0; i < cfg->ts_number_layers; ++i) {
+    if (i > 0) {
+      rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
+      rc->layer_pfb[i] = 1000.0 *
+          (cfg->ts_target_bitrate[i] - cfg->ts_target_bitrate[i - 1]) /
+          (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
+    }
+    rc->layer_input_frames[i] = 0;
+    rc->layer_enc_frames[i] = 0;
+    rc->layer_tot_enc_frames[i] = 0;
+    rc->layer_encoding_bitrate[i] = 0.0;
+    rc->layer_avg_frame_size[i] = 0.0;
+    rc->layer_avg_rate_mismatch[i] = 0.0;
+  }
+}
+
+static void printout_rate_control_summary(struct RateControlMetrics *rc,
+                                          vpx_codec_enc_cfg_t *cfg,
+                                          int frame_cnt) {
+  int i = 0;
+  int check_num_frames = 0;
+  printf("Total number of processed frames: %d\n\n", frame_cnt -1);
+  printf("Rate control layer stats for %d layer(s):\n\n",
+      cfg->ts_number_layers);
+  for (i = 0; i < cfg->ts_number_layers; ++i) {
+    const int num_dropped = (i > 0) ?
+        (rc->layer_input_frames[i] - rc->layer_enc_frames[i]) :
+        (rc->layer_input_frames[i] - rc->layer_enc_frames[i] - 1);
+    rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[i] *
+        rc->layer_encoding_bitrate[i] / rc->layer_tot_enc_frames[i];
+    rc->layer_avg_frame_size[i] = rc->layer_avg_frame_size[i] /
+        rc->layer_enc_frames[i];
+    rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] /
+        rc->layer_enc_frames[i];
+    printf("For layer#: %d \n", i);
+    printf("Bitrate (target vs actual): %d %f \n", cfg->ts_target_bitrate[i],
+           rc->layer_encoding_bitrate[i]);
+    printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i],
+           rc->layer_avg_frame_size[i]);
+    printf("Average rate_mismatch: %f \n", rc->layer_avg_rate_mismatch[i]);
+    printf("Number of input frames, encoded (non-key) frames, "
+        "and perc dropped frames: %d %d %f \n", rc->layer_input_frames[i],
+        rc->layer_enc_frames[i],
+        100.0 * num_dropped / rc->layer_input_frames[i]);
+    check_num_frames += rc->layer_input_frames[i];
+    printf("\n");
+  }
+  if ((frame_cnt - 1) != check_num_frames)
+    die("Error: Number of input frames not equal to output! \n");
+}
+
 // Temporal scaling parameters:
 // NOTE: The 3 prediction frames cannot be used interchangeably due to
 // differences in the way they are handled throughout the code. The
@@ -355,41 +436,27 @@
   int pts = 0;  // PTS starts at 0.
   int frame_duration = 1;  // 1 timebase tick per frame.
   int layering_mode = 0;
-  int frames_in_layer[VPX_TS_MAX_LAYERS] = {0};
   int layer_flags[VPX_TS_MAX_PERIODICITY] = {0};
   int flag_periodicity = 1;
   int max_intra_size_pct;
   vpx_svc_layer_id_t layer_id = {0, 0};
-  char *codec_type;
-  vpx_codec_iface_t *(*interface)(void);
-  unsigned int fourcc;
-  struct VpxInputContext input_ctx = {0};
+  const VpxInterface *encoder = NULL;
+  FILE *infile = NULL;
+  struct RateControlMetrics rc;
 
   exec_name = argv[0];
   // Check usage and arguments.
-  if (argc < 10) {
+  if (argc < 11) {
     die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
-        "<rate_num> <rate_den> <mode> <Rate_0> ... <Rate_nlayers-1> \n",
-        argv[0]);
+        "<rate_num> <rate_den>  <frame_drop_threshold> <mode> "
+        "<Rate_0> ... <Rate_nlayers-1> \n", argv[0]);
   }
 
-  codec_type = argv[3];
-  if (strncmp(codec_type, "vp9", 3) == 0) {
-#if CONFIG_VP9_ENCODER
-    interface = vpx_codec_vp9_cx;
-    fourcc = VP9_FOURCC;
-#else
-    die("Encoder vp9 selected but not configured");
-#endif
-  } else  {
-#if CONFIG_VP8_ENCODER
-    interface = vpx_codec_vp8_cx;
-    fourcc = VP8_FOURCC;
-#else
-    die("Encoder vp8 selected but not configured");
-#endif
-  }
-  printf("Using %s\n", vpx_codec_iface_name(interface()));
+  encoder = get_vpx_encoder_by_name(argv[3]);
+  if (!encoder)
+    die("Unsupported codec.");
+
+  printf("Using %s\n", vpx_codec_iface_name(encoder->interface()));
 
   width = strtol(argv[4], NULL, 0);
   height = strtol(argv[5], NULL, 0);
@@ -397,12 +464,12 @@
     die("Invalid resolution: %d x %d", width, height);
   }
 
-  layering_mode = strtol(argv[8], NULL, 0);
-  if (layering_mode < 0 || layering_mode > 11) {
-    die("Invalid mode (0..11) %s", argv[8]);
+  layering_mode = strtol(argv[9], NULL, 0);
+  if (layering_mode < 0 || layering_mode > 12) {
+    die("Invalid mode (0..12) %s", argv[9]);
   }
 
-  if (argc != 9 + mode_to_num_layers[layering_mode]) {
+  if (argc != 10 + mode_to_num_layers[layering_mode]) {
     die("Invalid number of arguments");
   }
 
@@ -411,7 +478,7 @@
   }
 
   // Populate encoder configuration.
-  res = vpx_codec_enc_config_default(interface(), &cfg, 0);
+  res = vpx_codec_enc_config_default(encoder->interface(), &cfg, 0);
   if (res) {
     printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
     return EXIT_FAILURE;
@@ -425,18 +492,18 @@
   cfg.g_timebase.num = strtol(argv[6], NULL, 0);
   cfg.g_timebase.den = strtol(argv[7], NULL, 0);
 
-  for (i = 9; i < 9 + mode_to_num_layers[layering_mode]; ++i) {
-    cfg.ts_target_bitrate[i - 9] = strtol(argv[i], NULL, 0);
+  for (i = 10; i < 10 + mode_to_num_layers[layering_mode]; ++i) {
+    cfg.ts_target_bitrate[i - 10] = strtol(argv[i], NULL, 0);
   }
 
   // Real time parameters.
-  cfg.rc_dropframe_thresh = 0;
+  cfg.rc_dropframe_thresh = strtol(argv[8], NULL, 0);
   cfg.rc_end_usage = VPX_CBR;
   cfg.rc_resize_allowed = 0;
   cfg.rc_min_quantizer = 2;
   cfg.rc_max_quantizer = 56;
-  cfg.rc_undershoot_pct = 100;
-  cfg.rc_overshoot_pct = 15;
+  cfg.rc_undershoot_pct = 50;
+  cfg.rc_overshoot_pct = 50;
   cfg.rc_buf_initial_sz = 500;
   cfg.rc_buf_optimal_sz = 600;
   cfg.rc_buf_sz = 1000;
@@ -457,9 +524,10 @@
                              layer_flags,
                              &flag_periodicity);
 
+  set_rate_control_metrics(&rc, &cfg);
+
   // Open input file.
-  input_ctx.filename = argv[1];
-  if (!(input_ctx.file = fopen(input_ctx.filename, "rb"))) {
+  if (!(infile = fopen(argv[1], "rb"))) {
     die("Failed to open %s for reading", argv[1]);
   }
 
@@ -467,7 +535,7 @@
   for (i = 0; i < cfg.ts_number_layers; ++i) {
     char file_name[PATH_MAX];
     VpxVideoInfo info;
-    info.codec_fourcc = fourcc;
+    info.codec_fourcc = encoder->fourcc;
     info.frame_width = cfg.g_w;
     info.frame_height = cfg.g_h;
     info.time_base.numerator = cfg.g_timebase.num;
@@ -482,12 +550,12 @@
   cfg.ss_number_layers = 1;
 
   // Initialize codec.
-  if (vpx_codec_enc_init(&codec, interface(), &cfg, 0))
+  if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0))
     die_codec(&codec, "Failed to initialize encoder");
 
   vpx_codec_control(&codec, VP8E_SET_CPUUSED, -6);
   vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 1);
-  if (strncmp(codec_type, "vp9", 3) == 0) {
+  if (strncmp(encoder->name, "vp9", 3) == 0) {
     vpx_codec_control(&codec, VP8E_SET_CPUUSED, 3);
     vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 0);
     if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) {
@@ -508,9 +576,13 @@
     layer_id.spatial_layer_id = 0;
     layer_id.temporal_layer_id =
         cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
-    vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
+    if (strncmp(encoder->name, "vp9", 3) == 0) {
+      vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
+    }
     flags = layer_flags[frame_cnt % flag_periodicity];
-    frame_avail = !read_yuv_frame(&input_ctx, &raw);
+    frame_avail = vpx_img_read(&raw, infile);
+    if (frame_avail)
+      ++rc.layer_input_frames[layer_id.temporal_layer_id];
     if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts, 1, flags,
         VPX_DL_REALTIME)) {
       die_codec(&codec, "Failed to encode frame");
@@ -528,7 +600,17 @@
               i < cfg.ts_number_layers; ++i) {
             vpx_video_writer_write_frame(outfile[i], pkt->data.frame.buf,
                                          pkt->data.frame.sz, pts);
-            ++frames_in_layer[i];
+            ++rc.layer_tot_enc_frames[i];
+            rc.layer_encoding_bitrate[i] += 8.0 * pkt->data.frame.sz;
+            // Keep count of rate control stats per layer (for non-key frames).
+            if (i == cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity] &&
+                !(pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
+              rc.layer_avg_frame_size[i] += 8.0 * pkt->data.frame.sz;
+              rc.layer_avg_rate_mismatch[i] +=
+                  fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[i]) /
+                  rc.layer_pfb[i];
+              ++rc.layer_enc_frames[i];
+            }
           }
           break;
           default:
@@ -538,8 +620,9 @@
     ++frame_cnt;
     pts += frame_duration;
   }
-  fclose(input_ctx.file);
-  printf("Processed %d frames: \n", frame_cnt - 1);
+  fclose(infile);
+  printout_rate_control_summary(&rc, &cfg, frame_cnt);
+
   if (vpx_codec_destroy(&codec))
     die_codec(&codec, "Failed to destroy codec");
 
diff --git a/test/codec_factory.h b/test/codec_factory.h
index 80e87c8..7f9398c 100644
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -24,6 +24,8 @@
 #include "test/encode_test_driver.h"
 namespace libvpx_test {
 
+const int kCodecFactoryParam = 0;
+
 class CodecFactory {
  public:
   CodecFactory() {}
diff --git a/test/datarate_test.cc b/test/datarate_test.cc
index 31b8239..5ae76d7 100644
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -198,6 +198,7 @@
     last_pts_ = 0;
     bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
     frame_number_ = 0;
+    tot_frame_number_ = 0;
     first_drop_ = 0;
     num_drops_ = 0;
     // For testing up to 3 layers.
@@ -294,11 +295,22 @@
 
 
   virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-    int layer = SetLayerId(frame_number_, cfg_.ts_number_layers);
-
     // Time since last timestamp = duration.
     vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
 
+    if (duration > 1) {
+      // If first drop not set and we have a drop set it to this time.
+      if (!first_drop_)
+        first_drop_ = last_pts_ + 1;
+      // Update the number of frame drops.
+      num_drops_ += static_cast<int>(duration - 1);
+      // Update counter for total number of frames (#frames input to encoder).
+      // Needed for setting the proper layer_id below.
+      tot_frame_number_ += static_cast<int>(duration - 1);
+    }
+
+    int layer = SetLayerId(tot_frame_number_, cfg_.ts_number_layers);
+
     // Add to the buffer the bits we'd expect from a constant bitrate server.
     bits_in_buffer_model_ += static_cast<int64_t>(
         duration * timebase_ * cfg_.rc_target_bitrate * 1000);
@@ -315,18 +327,10 @@
       bits_total_[i] += frame_size_in_bits;
     }
 
-    // If first drop not set and we have a drop set it to this time.
-    if (!first_drop_ && duration > 1)
-      first_drop_ = last_pts_ + 1;
-
-    // Update the number of frame drops.
-    if (duration > 1) {
-      num_drops_ += static_cast<int>(duration - 1);
-    }
-
     // Update the most recent pts.
     last_pts_ = pkt->data.frame.pts;
     ++frame_number_;
+    ++tot_frame_number_;
   }
 
   virtual void EndPassHook(void) {
@@ -342,7 +346,8 @@
 
   vpx_codec_pts_t last_pts_;
   double timebase_;
-  int frame_number_;
+  int frame_number_;      // Counter for number of non-dropped/encoded frames.
+  int tot_frame_number_;  // Counter for total number of input frames.
   int64_t bits_total_[3];
   double duration_;
   double effective_datarate_[3];
@@ -489,14 +494,11 @@
 }
 
 // Check basic rate targeting for 3 temporal layers.
-TEST_P(DatarateTestVP9, DISABLED_BasicRateTargeting3TemporalLayers) {
+TEST_P(DatarateTestVP9, BasicRateTargeting3TemporalLayers) {
   cfg_.rc_buf_initial_sz = 500;
   cfg_.rc_buf_optimal_sz = 500;
   cfg_.rc_buf_sz = 1000;
-  // TODO(marpan): For now keep frame dropper off. Need to investigate an
-  // issue (rate-mismatch) that occcurs at speed 3 and low bitrate (200k) when
-  // frame dropper is on.
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 1;
   cfg_.rc_min_quantizer = 0;
   cfg_.rc_max_quantizer = 63;
   cfg_.rc_end_usage = VPX_CBR;
@@ -529,8 +531,53 @@
     }
   }
 }
+
+// Check basic rate targeting for 3 temporal layers, with frame dropping.
+// Only for one (low) bitrate with lower max_quantizer, and somewhat higher
+// frame drop threshold, to force frame dropping.
+TEST_P(DatarateTestVP9, BasicRateTargeting3TemporalLayersFrameDropping) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  // Set frame drop threshold and rc_max_quantizer to force some frame drops.
+  cfg_.rc_dropframe_thresh = 20;
+  cfg_.rc_max_quantizer = 45;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+
+  // 3 Temporal layers, no spatial layers: Framerate decimation (4, 2, 1).
+  cfg_.ss_number_layers = 1;
+  cfg_.ts_number_layers = 3;
+  cfg_.ts_rate_decimator[0] = 4;
+  cfg_.ts_rate_decimator[1] = 2;
+  cfg_.ts_rate_decimator[2] = 1;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 200);
+  cfg_.rc_target_bitrate = 200;
+  ResetModel();
+  // 40-20-40 bitrate allocation for 3 temporal layers.
+  cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
+  cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
+  cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
+    ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
+        << " The datarate for the file is lower than target by too much, "
+            "for layer: " << j;
+    ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
+        << " The datarate for the file is greater than target by too much, "
+            "for layer: " << j;
+    // Expect some frame drops in this test: for this 200 frames test,
+    // expect at least 10% and not more than 50% drops.
+    ASSERT_GE(num_drops_, 20);
+    ASSERT_LE(num_drops_, 100);
+  }
+}
+
 VP8_INSTANTIATE_TEST_CASE(DatarateTest, ALL_TEST_MODES);
 VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9,
                           ::testing::Values(::libvpx_test::kOnePassGood),
-                          ::testing::Range(1, 5));
+                          ::testing::Range(2, 5));
 }  // namespace
diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc
index 7a93e50..e667d1d 100644
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -15,11 +15,13 @@
 
 namespace libvpx_test {
 
-vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, int size) {
+vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size) {
   vpx_codec_err_t res_dec;
   InitOnce();
-  REGISTER_STATE_CHECK(res_dec = vpx_codec_decode(&decoder_,
-                                                  cxdata, size, NULL, 0));
+  REGISTER_STATE_CHECK(
+      res_dec = vpx_codec_decode(&decoder_,
+                                 cxdata, static_cast<unsigned int>(size),
+                                 NULL, 0));
   return res_dec;
 }
 
diff --git a/test/decode_test_driver.h b/test/decode_test_driver.h
index 3e8ff72..2734a45 100644
--- a/test/decode_test_driver.h
+++ b/test/decode_test_driver.h
@@ -49,7 +49,7 @@
     vpx_codec_destroy(&decoder_);
   }
 
-  vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, int size);
+  vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size);
 
   DxDataIterator GetDxData() {
     return DxDataIterator(&decoder_);
@@ -76,6 +76,15 @@
     return detail ? detail : vpx_codec_error(&decoder_);
   }
 
+  // Passes the external frame buffer information to libvpx.
+  vpx_codec_err_t SetFrameBufferFunctions(
+      vpx_get_frame_buffer_cb_fn_t cb_get,
+      vpx_release_frame_buffer_cb_fn_t cb_release, void *user_priv) {
+    InitOnce();
+    return vpx_codec_set_frame_buffer_functions(
+        &decoder_, cb_get, cb_release, user_priv);
+  }
+
  protected:
   virtual vpx_codec_iface_t* CodecInterface() const = 0;
 
diff --git a/test/external_frame_buffer_test.cc b/test/external_frame_buffer_test.cc
new file mode 100644
index 0000000..2e7adc1
--- /dev/null
+++ b/test/external_frame_buffer_test.cc
@@ -0,0 +1,466 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string>
+
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/ivf_video_source.h"
+#include "test/md5_helper.h"
+#include "test/test_vectors.h"
+#include "test/util.h"
+#include "test/webm_video_source.h"
+
+namespace {
+
+const int kVideoNameParam = 1;
+const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
+
+struct ExternalFrameBuffer {
+  uint8_t *data;
+  size_t size;
+  int in_use;
+};
+
+// Class to manipulate a list of external frame buffers.
+class ExternalFrameBufferList {
+ public:
+  ExternalFrameBufferList()
+      : num_buffers_(0),
+        ext_fb_list_(NULL) {}
+
+  virtual ~ExternalFrameBufferList() {
+    for (int i = 0; i < num_buffers_; ++i) {
+      delete [] ext_fb_list_[i].data;
+    }
+    delete [] ext_fb_list_;
+  }
+
+  // Creates the list to hold the external buffers. Returns true on success.
+  bool CreateBufferList(int num_buffers) {
+    if (num_buffers < 0)
+      return false;
+
+    num_buffers_ = num_buffers;
+    ext_fb_list_ = new ExternalFrameBuffer[num_buffers_];
+    EXPECT_TRUE(ext_fb_list_ != NULL);
+    memset(ext_fb_list_, 0, sizeof(ext_fb_list_[0]) * num_buffers_);
+    return true;
+  }
+
+  // Searches the frame buffer list for a free frame buffer. Makes sure
+  // that the frame buffer is at least |min_size| in bytes. Marks that the
+  // frame buffer is in use by libvpx. Finally sets |fb| to point to the
+  // external frame buffer. Returns < 0 on an error.
+  int GetFreeFrameBuffer(size_t min_size, vpx_codec_frame_buffer_t *fb) {
+    EXPECT_TRUE(fb != NULL);
+    const int idx = FindFreeBufferIndex();
+    if (idx == num_buffers_)
+      return -1;
+
+    if (ext_fb_list_[idx].size < min_size) {
+      delete [] ext_fb_list_[idx].data;
+      ext_fb_list_[idx].data = new uint8_t[min_size];
+      ext_fb_list_[idx].size = min_size;
+    }
+
+    SetFrameBuffer(idx, fb);
+    return 0;
+  }
+
+  // Test function that will not allocate any data for the frame buffer.
+  // Returns < 0 on an error.
+  int GetZeroFrameBuffer(size_t min_size, vpx_codec_frame_buffer_t *fb) {
+    EXPECT_TRUE(fb != NULL);
+    const int idx = FindFreeBufferIndex();
+    if (idx == num_buffers_)
+      return -1;
+
+    if (ext_fb_list_[idx].size < min_size) {
+      delete [] ext_fb_list_[idx].data;
+      ext_fb_list_[idx].data = NULL;
+      ext_fb_list_[idx].size = min_size;
+    }
+
+    SetFrameBuffer(idx, fb);
+    return 0;
+  }
+
+  // Marks the external frame buffer that |fb| is pointing too as free.
+  // Returns < 0 on an error.
+  int ReturnFrameBuffer(vpx_codec_frame_buffer_t *fb) {
+    EXPECT_TRUE(fb != NULL);
+    ExternalFrameBuffer *const ext_fb =
+        reinterpret_cast<ExternalFrameBuffer*>(fb->priv);
+    EXPECT_TRUE(ext_fb != NULL);
+    EXPECT_EQ(1, ext_fb->in_use);
+    ext_fb->in_use = 0;
+    return 0;
+  }
+
+  // Checks that the ximage data is contained within the external frame buffer
+  // private data passed back in the ximage.
+  void CheckXImageFrameBuffer(const vpx_image_t *img) {
+    if (img->fb_priv != NULL) {
+      const struct ExternalFrameBuffer *const ext_fb =
+          reinterpret_cast<ExternalFrameBuffer*>(img->fb_priv);
+
+      ASSERT_TRUE(img->planes[0] >= ext_fb->data &&
+                  img->planes[0] < (ext_fb->data + ext_fb->size));
+    }
+  }
+
+ private:
+  // Returns the index of the first free frame buffer. Returns |num_buffers_|
+  // if there are no free frame buffers.
+  int FindFreeBufferIndex() {
+    int i;
+    // Find a free frame buffer.
+    for (i = 0; i < num_buffers_; ++i) {
+      if (!ext_fb_list_[i].in_use)
+        break;
+    }
+    return i;
+  }
+
+  // Sets |fb| to an external frame buffer. idx is the index into the frame
+  // buffer list.
+  void SetFrameBuffer(int idx, vpx_codec_frame_buffer_t *fb) {
+    ASSERT_TRUE(fb != NULL);
+    fb->data = ext_fb_list_[idx].data;
+    fb->size = ext_fb_list_[idx].size;
+    ASSERT_EQ(0, ext_fb_list_[idx].in_use);
+    ext_fb_list_[idx].in_use = 1;
+    fb->priv = &ext_fb_list_[idx];
+  }
+
+  int num_buffers_;
+  ExternalFrameBuffer *ext_fb_list_;
+};
+
+// Callback used by libvpx to request the application to return a frame
+// buffer of at least |min_size| in bytes.
+int get_vp9_frame_buffer(void *user_priv, size_t min_size,
+                         vpx_codec_frame_buffer_t *fb) {
+  ExternalFrameBufferList *const fb_list =
+      reinterpret_cast<ExternalFrameBufferList*>(user_priv);
+  return fb_list->GetFreeFrameBuffer(min_size, fb);
+}
+
+// Callback used by libvpx to tell the application that |fb| is not needed
+// anymore.
+int release_vp9_frame_buffer(void *user_priv,
+                             vpx_codec_frame_buffer_t *fb) {
+  ExternalFrameBufferList *const fb_list =
+      reinterpret_cast<ExternalFrameBufferList*>(user_priv);
+  return fb_list->ReturnFrameBuffer(fb);
+}
+
+// Callback will not allocate data for frame buffer.
+int get_vp9_zero_frame_buffer(void *user_priv, size_t min_size,
+                              vpx_codec_frame_buffer_t *fb) {
+  ExternalFrameBufferList *const fb_list =
+      reinterpret_cast<ExternalFrameBufferList*>(user_priv);
+  return fb_list->GetZeroFrameBuffer(min_size, fb);
+}
+
+// Callback will allocate one less byte than |min_size|.
+int get_vp9_one_less_byte_frame_buffer(void *user_priv, size_t min_size,
+                                       vpx_codec_frame_buffer_t *fb) {
+  ExternalFrameBufferList *const fb_list =
+      reinterpret_cast<ExternalFrameBufferList*>(user_priv);
+  return fb_list->GetFreeFrameBuffer(min_size - 1, fb);
+}
+
+// Callback will not release the external frame buffer.
+int do_not_release_vp9_frame_buffer(void *user_priv,
+                                    vpx_codec_frame_buffer_t *fb) {
+  (void)user_priv;
+  (void)fb;
+  return 0;
+}
+
+// Class for testing passing in external frame buffers to libvpx.
+class ExternalFrameBufferMD5Test
+    : public ::libvpx_test::DecoderTest,
+      public ::libvpx_test::CodecTestWithParam<const char*> {
+ protected:
+  ExternalFrameBufferMD5Test()
+      : DecoderTest(GET_PARAM(::libvpx_test::kCodecFactoryParam)),
+        md5_file_(NULL),
+        num_buffers_(0) {}
+
+  virtual ~ExternalFrameBufferMD5Test() {
+    if (md5_file_ != NULL)
+      fclose(md5_file_);
+  }
+
+  virtual void PreDecodeFrameHook(
+      const libvpx_test::CompressedVideoSource &video,
+      libvpx_test::Decoder *decoder) {
+    if (num_buffers_ > 0 && video.frame_number() == 0) {
+      // Have libvpx use frame buffers we create.
+      ASSERT_TRUE(fb_list_.CreateBufferList(num_buffers_));
+      ASSERT_EQ(VPX_CODEC_OK,
+                decoder->SetFrameBufferFunctions(
+                    GetVp9FrameBuffer, ReleaseVP9FrameBuffer, this));
+    }
+  }
+
+  void OpenMD5File(const std::string &md5_file_name_) {
+    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
+    ASSERT_TRUE(md5_file_ != NULL) << "Md5 file open failed. Filename: "
+        << md5_file_name_;
+  }
+
+  virtual void DecompressedFrameHook(const vpx_image_t &img,
+                                     const unsigned int frame_number) {
+    ASSERT_TRUE(md5_file_ != NULL);
+    char expected_md5[33];
+    char junk[128];
+
+    // Read correct md5 checksums.
+    const int res = fscanf(md5_file_, "%s  %s", expected_md5, junk);
+    ASSERT_NE(EOF, res) << "Read md5 data failed";
+    expected_md5[32] = '\0';
+
+    ::libvpx_test::MD5 md5_res;
+    md5_res.Add(&img);
+    const char *const actual_md5 = md5_res.Get();
+
+    // Check md5 match.
+    ASSERT_STREQ(expected_md5, actual_md5)
+        << "Md5 checksums don't match: frame number = " << frame_number;
+  }
+
+  // Callback to get a free external frame buffer. Return value < 0 is an
+  // error.
+  static int GetVp9FrameBuffer(void *user_priv, size_t min_size,
+                               vpx_codec_frame_buffer_t *fb) {
+    ExternalFrameBufferMD5Test *const md5Test =
+        reinterpret_cast<ExternalFrameBufferMD5Test*>(user_priv);
+    return md5Test->fb_list_.GetFreeFrameBuffer(min_size, fb);
+  }
+
+  // Callback to release an external frame buffer. Return value < 0 is an
+  // error.
+  static int ReleaseVP9FrameBuffer(void *user_priv,
+                                   vpx_codec_frame_buffer_t *fb) {
+    ExternalFrameBufferMD5Test *const md5Test =
+        reinterpret_cast<ExternalFrameBufferMD5Test*>(user_priv);
+    return md5Test->fb_list_.ReturnFrameBuffer(fb);
+  }
+
+  void set_num_buffers(int num_buffers) { num_buffers_ = num_buffers; }
+  int num_buffers() const { return num_buffers_; }
+
+ private:
+  FILE *md5_file_;
+  int num_buffers_;
+  ExternalFrameBufferList fb_list_;
+};
+
+// Class for testing passing in external frame buffers to libvpx.
+class ExternalFrameBufferTest : public ::testing::Test {
+ protected:
+  ExternalFrameBufferTest()
+      : video_(NULL),
+        decoder_(NULL),
+        num_buffers_(0) {}
+
+  virtual void SetUp() {
+    video_ = new libvpx_test::WebMVideoSource(kVP9TestFile);
+    ASSERT_TRUE(video_ != NULL);
+    video_->Init();
+    video_->Begin();
+
+    vpx_codec_dec_cfg_t cfg = {0};
+    decoder_ = new libvpx_test::VP9Decoder(cfg, 0);
+    ASSERT_TRUE(decoder_ != NULL);
+  }
+
+  virtual void TearDown() {
+    delete decoder_;
+    delete video_;
+  }
+
+  // Passes the external frame buffer information to libvpx.
+  vpx_codec_err_t SetFrameBufferFunctions(
+      int num_buffers,
+      vpx_get_frame_buffer_cb_fn_t cb_get,
+      vpx_release_frame_buffer_cb_fn_t cb_release) {
+    if (num_buffers > 0) {
+      num_buffers_ = num_buffers;
+      EXPECT_TRUE(fb_list_.CreateBufferList(num_buffers_));
+    }
+
+    return decoder_->SetFrameBufferFunctions(cb_get, cb_release, &fb_list_);
+  }
+
+  vpx_codec_err_t DecodeOneFrame() {
+    const vpx_codec_err_t res =
+        decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
+    CheckDecodedFrames();
+    if (res == VPX_CODEC_OK)
+      video_->Next();
+    return res;
+  }
+
+  vpx_codec_err_t DecodeRemainingFrames() {
+    for (; video_->cxdata() != NULL; video_->Next()) {
+      const vpx_codec_err_t res =
+          decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
+      if (res != VPX_CODEC_OK)
+        return res;
+      CheckDecodedFrames();
+    }
+    return VPX_CODEC_OK;
+  }
+
+ private:
+  void CheckDecodedFrames() {
+    libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData();
+    const vpx_image_t *img = NULL;
+
+    // Get decompressed data
+    while ((img = dec_iter.Next()) != NULL) {
+      fb_list_.CheckXImageFrameBuffer(img);
+    }
+  }
+
+  libvpx_test::WebMVideoSource *video_;
+  libvpx_test::VP9Decoder *decoder_;
+  int num_buffers_;
+  ExternalFrameBufferList fb_list_;
+};
+
+// This test runs through the set of test vectors, and decodes them.
+// Libvpx will call into the application to allocate a frame buffer when
+// needed. The md5 checksums are computed for each frame in the video file.
+// If md5 checksums match the correct md5 data, then the test is passed.
+// Otherwise, the test failed.
+TEST_P(ExternalFrameBufferMD5Test, ExtFBMD5Match) {
+  const std::string filename = GET_PARAM(kVideoNameParam);
+  libvpx_test::CompressedVideoSource *video = NULL;
+
+  // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
+  // #VPX_MAXIMUM_WORK_BUFFERS + four jitter buffers.
+  const int jitter_buffers = 4;
+  const int num_buffers =
+      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
+  set_num_buffers(num_buffers);
+
+#if CONFIG_VP8_DECODER
+  // Tell compiler we are not using kVP8TestVectors.
+  (void)libvpx_test::kVP8TestVectors;
+#endif
+
+  // Open compressed video file.
+  if (filename.substr(filename.length() - 3, 3) == "ivf") {
+    video = new libvpx_test::IVFVideoSource(filename);
+  } else {
+    video = new libvpx_test::WebMVideoSource(filename);
+  }
+  ASSERT_TRUE(video != NULL);
+  video->Init();
+
+  // Construct md5 file name.
+  const std::string md5_filename = filename + ".md5";
+  OpenMD5File(md5_filename);
+
+  // Decode frame, and check the md5 matching.
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
+  delete video;
+}
+
+TEST_F(ExternalFrameBufferTest, MinFrameBuffers) {
+  // Minimum number of external frame buffers for VP9 is
+  // #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS.
+  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(VPX_CODEC_OK,
+            SetFrameBufferFunctions(
+                num_buffers, get_vp9_frame_buffer, release_vp9_frame_buffer));
+  ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames());
+}
+
+TEST_F(ExternalFrameBufferTest, EightJitterBuffers) {
+  // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
+  // #VPX_MAXIMUM_WORK_BUFFERS + eight jitter buffers.
+  const int jitter_buffers = 8;
+  const int num_buffers =
+      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
+  ASSERT_EQ(VPX_CODEC_OK,
+            SetFrameBufferFunctions(
+                num_buffers, get_vp9_frame_buffer, release_vp9_frame_buffer));
+  ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames());
+}
+
+TEST_F(ExternalFrameBufferTest, NotEnoughBuffers) {
+  // Minimum number of external frame buffers for VP9 is
+  // #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS. Most files will
+  // only use 5 frame buffers at one time.
+  const int num_buffers = 2;
+  ASSERT_EQ(VPX_CODEC_OK,
+            SetFrameBufferFunctions(
+                num_buffers, get_vp9_frame_buffer, release_vp9_frame_buffer));
+  ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame());
+  ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeRemainingFrames());
+}
+
+TEST_F(ExternalFrameBufferTest, NoRelease) {
+  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(VPX_CODEC_OK,
+            SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer,
+                                    do_not_release_vp9_frame_buffer));
+  ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame());
+  ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeRemainingFrames());
+}
+
+TEST_F(ExternalFrameBufferTest, NullRealloc) {
+  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(VPX_CODEC_OK,
+            SetFrameBufferFunctions(num_buffers, get_vp9_zero_frame_buffer,
+                                    release_vp9_frame_buffer));
+  ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeOneFrame());
+}
+
+TEST_F(ExternalFrameBufferTest, ReallocOneLessByte) {
+  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(VPX_CODEC_OK,
+            SetFrameBufferFunctions(
+                num_buffers, get_vp9_one_less_byte_frame_buffer,
+                release_vp9_frame_buffer));
+  ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeOneFrame());
+}
+
+TEST_F(ExternalFrameBufferTest, NullGetFunction) {
+  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(VPX_CODEC_INVALID_PARAM,
+            SetFrameBufferFunctions(num_buffers, NULL,
+                                    release_vp9_frame_buffer));
+}
+
+TEST_F(ExternalFrameBufferTest, NullReleaseFunction) {
+  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(VPX_CODEC_INVALID_PARAM,
+            SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer, NULL));
+}
+
+TEST_F(ExternalFrameBufferTest, SetAfterDecode) {
+  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame());
+  ASSERT_EQ(VPX_CODEC_ERROR,
+            SetFrameBufferFunctions(
+                num_buffers, get_vp9_frame_buffer, release_vp9_frame_buffer));
+}
+
+VP9_INSTANTIATE_TEST_CASE(ExternalFrameBufferMD5Test,
+                          ::testing::ValuesIn(libvpx_test::kVP9TestVectors));
+}  // namespace
diff --git a/test/test.mk b/test/test.mk
index a65decf..b598d6b 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -36,6 +36,7 @@
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.cc
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.h
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ivf_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc
 
 ## WebM Parsing
 NESTEGG_SRCS                           += ../nestegg/halloc/halloc.h
diff --git a/tools_common.c b/tools_common.c
index 5354687..f0e1606 100644
--- a/tools_common.c
+++ b/tools_common.c
@@ -15,6 +15,10 @@
 #include <stdlib.h>
 #include <string.h>
 
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
+#include "vpx/vp8cx.h"
+#endif
+
 #if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
 #include "vpx/vp8dx.h"
 #endif
@@ -144,30 +148,103 @@
   return shortread;
 }
 
-vpx_codec_iface_t *get_codec_interface(unsigned int fourcc) {
-  switch (fourcc) {
-#if CONFIG_VP8_DECODER
-    case VP8_FOURCC:
-      return vpx_codec_vp8_dx();
+static const VpxInterface vpx_encoders[] = {
+#if CONFIG_VP8_ENCODER
+  {"vp8", VP8_FOURCC, &vpx_codec_vp8_cx},
 #endif
-#if CONFIG_VP9_DECODER
-    case VP9_FOURCC:
-      return vpx_codec_vp9_dx();
+
+#if CONFIG_VP9_ENCODER
+  {"vp9", VP9_FOURCC, &vpx_codec_vp9_cx},
 #endif
-    default:
-      return NULL;
+};
+
+int get_vpx_encoder_count() {
+  return sizeof(vpx_encoders) / sizeof(vpx_encoders[0]);
+}
+
+const VpxInterface *get_vpx_encoder_by_index(int i) {
+  return &vpx_encoders[i];
+}
+
+const VpxInterface *get_vpx_encoder_by_name(const char *name) {
+  int i;
+
+  for (i = 0; i < get_vpx_encoder_count(); ++i) {
+    const VpxInterface *encoder = get_vpx_encoder_by_index(i);
+    if (strcmp(encoder->name, name) == 0)
+      return encoder;
   }
+
   return NULL;
 }
 
+static const VpxInterface vpx_decoders[] = {
+#if CONFIG_VP8_DECODER
+  {"vp8", VP8_FOURCC, &vpx_codec_vp8_dx},
+#endif
+
+#if CONFIG_VP9_DECODER
+  {"vp9", VP9_FOURCC, &vpx_codec_vp9_dx},
+#endif
+};
+
+int get_vpx_decoder_count() {
+  return sizeof(vpx_decoders) / sizeof(vpx_decoders[0]);
+}
+
+const VpxInterface *get_vpx_decoder_by_index(int i) {
+  return &vpx_decoders[i];
+}
+
+const VpxInterface *get_vpx_decoder_by_name(const char *name) {
+  int i;
+
+  for (i = 0; i < get_vpx_decoder_count(); ++i) {
+     const VpxInterface *const decoder = get_vpx_decoder_by_index(i);
+     if (strcmp(decoder->name, name) == 0)
+       return decoder;
+  }
+
+  return NULL;
+}
+
+const VpxInterface *get_vpx_decoder_by_fourcc(uint32_t fourcc) {
+  int i;
+
+  for (i = 0; i < get_vpx_decoder_count(); ++i) {
+    const VpxInterface *const decoder = get_vpx_decoder_by_index(i);
+    if (decoder->fourcc == fourcc)
+      return decoder;
+  }
+
+  return NULL;
+}
+
+// TODO(dkovalev): move this function to vpx_image.{c, h}, so it will be part
+// of vpx_image_t support
+int vpx_img_plane_width(const vpx_image_t *img, int plane) {
+  if (plane > 0 && img->x_chroma_shift > 0)
+    return (img->d_w + 1) >> img->x_chroma_shift;
+  else
+    return img->d_w;
+}
+
+int vpx_img_plane_height(const vpx_image_t *img, int plane) {
+  if (plane > 0 &&  img->y_chroma_shift > 0)
+    return (img->d_h + 1) >> img->y_chroma_shift;
+  else
+    return img->d_h;
+}
+
 void vpx_img_write(const vpx_image_t *img, FILE *file) {
-  int plane, y;
+  int plane;
 
   for (plane = 0; plane < 3; ++plane) {
     const unsigned char *buf = img->planes[plane];
     const int stride = img->stride[plane];
-    const int w = plane ? (img->d_w + 1) >> 1 : img->d_w;
-    const int h = plane ? (img->d_h + 1) >> 1 : img->d_h;
+    const int w = vpx_img_plane_width(img, plane);
+    const int h = vpx_img_plane_height(img, plane);
+    int y;
 
     for (y = 0; y < h; ++y) {
       fwrite(buf, 1, w, file);
@@ -182,8 +259,8 @@
   for (plane = 0; plane < 3; ++plane) {
     unsigned char *buf = img->planes[plane];
     const int stride = img->stride[plane];
-    const int w = plane ? (img->d_w + 1) >> 1 : img->d_w;
-    const int h = plane ? (img->d_h + 1) >> 1 : img->d_h;
+    const int w = vpx_img_plane_width(img, plane);
+    const int h = vpx_img_plane_height(img, plane);
     int y;
 
     for (y = 0; y < h; ++y) {
diff --git a/tools_common.h b/tools_common.h
index 0f60c4c..2e90259 100644
--- a/tools_common.h
+++ b/tools_common.h
@@ -123,14 +123,26 @@
 
 int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame);
 
-vpx_codec_iface_t *get_codec_interface(unsigned int fourcc);
+typedef struct VpxInterface {
+  const char *const name;
+  const uint32_t fourcc;
+  vpx_codec_iface_t *(*const interface)();
+} VpxInterface;
+
+int get_vpx_encoder_count();
+const VpxInterface *get_vpx_encoder_by_index(int i);
+const VpxInterface *get_vpx_encoder_by_name(const char *name);
+
+int get_vpx_decoder_count();
+const VpxInterface *get_vpx_decoder_by_index(int i);
+const VpxInterface *get_vpx_decoder_by_name(const char *name);
+const VpxInterface *get_vpx_decoder_by_fourcc(uint32_t fourcc);
 
 // TODO(dkovalev): move this function to vpx_image.{c, h}, so it will be part
 // of vpx_image_t support
+int vpx_img_plane_width(const vpx_image_t *img, int plane);
+int vpx_img_plane_height(const vpx_image_t *img, int plane);
 void vpx_img_write(const vpx_image_t *img, FILE *file);
-
-// TODO(dkovalev): move this function to vpx_image.{c, h}, so it will be part
-// of vpx_image_t support
 int vpx_img_read(vpx_image_t *img, FILE *file);
 
 #ifdef __cplusplus
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 871b8d3..0b4c4cb 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -929,6 +929,7 @@
         vp8_get_si,       /* vpx_codec_get_si_fn_t     get_si; */
         vp8_decode,       /* vpx_codec_decode_fn_t     decode; */
         vp8_get_frame,    /* vpx_codec_frame_get_fn_t  frame_get; */
+        NOT_IMPLEMENTED,
     },
     { /* encoder functions */
         NOT_IMPLEMENTED,
diff --git a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm b/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
index 8cb913c..5fe2bba 100644
--- a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
+++ b/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
@@ -439,6 +439,9 @@
     tst         r7, #1
     bxne        lr
 
+    orrs        r5, r5, r6                 ; Check for 0
+    orreq       r7, r7, #2                 ; Only do mbfilter branch
+
     ; mbfilter flat && mask branch
     ; TODO(fgalligan): Can I decrease the cycles shifting to consective d's
     ; and using vibt on the q's?
diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c
index 8cc6571..d918bed 100644
--- a/vp9/common/vp9_blockd.c
+++ b/vp9/common/vp9_blockd.c
@@ -98,16 +98,6 @@
     vp9_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
 }
 
-void vp9_foreach_transformed_block_uv(const MACROBLOCKD* const xd,
-                                      BLOCK_SIZE bsize,
-                                      foreach_transformed_block_visitor visit,
-                                      void *arg) {
-  int plane;
-
-  for (plane = 1; plane < MAX_MB_PLANE; plane++)
-    vp9_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
-}
-
 void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
                       BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
                       int aoff, int loff) {
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index f10a3c8..6086323 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -89,7 +89,6 @@
 
 #define INTER_OFFSET(mode) ((mode) - NEARESTMV)
 
-
 /* For keyframes, intra block modes are predicted by the (already decoded)
    modes for the Y blocks to the left and above us; for interframes, there
    is a single probability table. */
@@ -129,7 +128,7 @@
 
   uint8_t mode_context[MAX_REF_FRAMES];
 
-  unsigned char skip_coeff;    // 0=need to decode coeffs, 1=no coefficients
+  unsigned char skip;    // 0=need to decode coeffs, 1=no coefficients
   unsigned char segment_id;    // Segment id for this block.
 
   // Flags used for prediction status of various bit-stream signals
@@ -314,11 +313,6 @@
     const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
     foreach_transformed_block_visitor visit, void *arg);
 
-
-void vp9_foreach_transformed_block_uv(
-    const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
-    foreach_transformed_block_visitor visit, void *arg);
-
 static INLINE void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize,
                                             TX_SIZE tx_size, int block,
                                             int *x, int *y) {
diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c
index 355ac1a..24c785f 100644
--- a/vp9/common/vp9_debugmodes.c
+++ b/vp9/common/vp9_debugmodes.c
@@ -58,7 +58,7 @@
 
   print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type));
   print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode));
-  print_mi_data(cm, mvs, "Skips:", offsetof(MB_MODE_INFO, skip_coeff));
+  print_mi_data(cm, mvs, "Skips:", offsetof(MB_MODE_INFO, skip));
   print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0]));
   print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size));
   print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode));
diff --git a/vp9/common/vp9_frame_buffers.c b/vp9/common/vp9_frame_buffers.c
index d903ed6..dffeb8a 100644
--- a/vp9/common/vp9_frame_buffers.c
+++ b/vp9/common/vp9_frame_buffers.c
@@ -42,7 +42,7 @@
   int i;
   InternalFrameBufferList *const int_fb_list =
       (InternalFrameBufferList *)cb_priv;
-  if (int_fb_list == NULL || fb == NULL)
+  if (int_fb_list == NULL)
     return -1;
 
   // Find a free frame buffer.
@@ -73,12 +73,8 @@
 }
 
 int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) {
-  InternalFrameBuffer *int_fb;
+  InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv;
   (void)cb_priv;
-  if (fb == NULL)
-    return -1;
-
-  int_fb = (InternalFrameBuffer *)fb->priv;
   int_fb->in_use = 0;
   return 0;
 }
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 07d7a92..04f8934 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -496,7 +496,7 @@
   const BLOCK_SIZE block_size = mi->mbmi.sb_type;
   const TX_SIZE tx_size_y = mi->mbmi.tx_size;
   const TX_SIZE tx_size_uv = get_uv_tx_size(&mi->mbmi);
-  const int skip = mi->mbmi.skip_coeff;
+  const int skip = mi->mbmi.skip;
   const int seg = mi->mbmi.segment_id;
   const int ref = mi->mbmi.ref_frame[0];
   const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
@@ -577,7 +577,7 @@
                          LOOP_FILTER_MASK *lfm) {
   const BLOCK_SIZE block_size = mi->mbmi.sb_type;
   const TX_SIZE tx_size_y = mi->mbmi.tx_size;
-  const int skip = mi->mbmi.skip_coeff;
+  const int skip = mi->mbmi.skip;
   const int seg = mi->mbmi.segment_id;
   const int ref = mi->mbmi.ref_frame[0];
   const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
@@ -937,8 +937,7 @@
     for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
       const MODE_INFO *mi = mi_8x8[c];
       const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type;
-      const int skip_this = mi[0].mbmi.skip_coeff
-                            && is_inter_block(&mi[0].mbmi);
+      const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi);
       // left edge of current unit is block/partition edge -> no skip
       const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ?
           !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1;
diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c
index a172ba6..7baa9ee 100644
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -700,7 +700,7 @@
         char zz[4];
         int dc_diff = !(mi[mb_index].mbmi.mode != I4X4_PRED &&
                         mi[mb_index].mbmi.mode != SPLITMV &&
-                        mi[mb_index].mbmi.skip_coeff);
+                        mi[mb_index].mbmi.skip);
 
         if (cm->frame_type == KEY_FRAME)
           snprintf(zz, sizeof(zz) - 1, "a");
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index 487f00c..197bcb6 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -353,10 +353,10 @@
   const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd));
   const int has_above = above_mbmi != NULL;
   const int has_left = left_mbmi != NULL;
-  int above_ctx = (has_above && !above_mbmi->skip_coeff) ? above_mbmi->tx_size
-                                                         : max_tx_size;
-  int left_ctx = (has_left && !left_mbmi->skip_coeff) ? left_mbmi->tx_size
-                                                      : max_tx_size;
+  int above_ctx = (has_above && !above_mbmi->skip) ? above_mbmi->tx_size
+                                                   : max_tx_size;
+  int left_ctx = (has_left && !left_mbmi->skip) ? left_mbmi->tx_size
+                                                : max_tx_size;
   if (!has_left)
     left_ctx = above_ctx;
 
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 33ae5a8..7f9e563 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -47,8 +47,8 @@
 static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) {
   const MODE_INFO *const above_mi = get_above_mi(xd);
   const MODE_INFO *const left_mi = get_left_mi(xd);
-  const int above_skip = (above_mi != NULL) ? above_mi->mbmi.skip_coeff : 0;
-  const int left_skip = (left_mi != NULL) ? left_mi->mbmi.skip_coeff : 0;
+  const int above_skip = (above_mi != NULL) ? above_mi->mbmi.skip : 0;
+  const int left_skip = (left_mi != NULL) ? left_mi->mbmi.skip : 0;
   return above_skip + left_skip;
 }
 
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 7bdd11e..6317103 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -264,13 +264,13 @@
 specialize vp9_convolve_avg $sse2_x86inc neon dspr2
 
 prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8 sse2 ssse3 neon dspr2
+specialize vp9_convolve8 sse2 ssse3 avx2 neon dspr2
 
 prototype void vp9_convolve8_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_horiz sse2 ssse3 neon dspr2
+specialize vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2
 
 prototype void vp9_convolve8_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_vert sse2 ssse3 neon dspr2
+specialize vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2
 
 prototype void vp9_convolve8_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
 specialize vp9_convolve8_avg sse2 ssse3 neon dspr2
@@ -737,20 +737,20 @@
 #
 # Motion search
 #
-prototype int vp9_full_search_sad "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, int n"
+prototype int vp9_full_search_sad "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, int n"
 specialize vp9_full_search_sad sse3 sse4_1
 vp9_full_search_sad_sse3=vp9_full_search_sadx3
 vp9_full_search_sad_sse4_1=vp9_full_search_sadx8
 
-prototype int vp9_refining_search_sad "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
+prototype int vp9_refining_search_sad "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
 specialize vp9_refining_search_sad sse3
 vp9_refining_search_sad_sse3=vp9_refining_search_sadx4
 
-prototype int vp9_diamond_search_sad "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
+prototype int vp9_diamond_search_sad "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
 specialize vp9_diamond_search_sad sse3
 vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4
 
-prototype int vp9_full_range_search "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
+prototype int vp9_full_range_search "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
 specialize vp9_full_range_search
 
 prototype void vp9_temporal_filter_apply "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index 60018ea..a2cf910 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -139,7 +139,49 @@
                            filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
   } \
 }
+#if HAVE_AVX2
+filter8_1dfunction vp9_filter_block1d16_v8_avx2;
+filter8_1dfunction vp9_filter_block1d16_h8_avx2;
+filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
+filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
+#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3
+#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3
+#define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3
+#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3
+#define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3
+#define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3
+#define vp9_filter_block1d8_v2_avx2  vp9_filter_block1d8_v2_ssse3
+#define vp9_filter_block1d8_h2_avx2  vp9_filter_block1d8_h2_ssse3
+#define vp9_filter_block1d4_v2_avx2  vp9_filter_block1d4_v2_ssse3
+#define vp9_filter_block1d4_h2_avx2  vp9_filter_block1d4_h2_ssse3
+// void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
+//                                uint8_t *dst, ptrdiff_t dst_stride,
+//                                const int16_t *filter_x, int x_step_q4,
+//                                const int16_t *filter_y, int y_step_q4,
+//                                int w, int h);
+// void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
+//                               uint8_t *dst, ptrdiff_t dst_stride,
+//                               const int16_t *filter_x, int x_step_q4,
+//                               const int16_t *filter_y, int y_step_q4,
+//                               int w, int h);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
 
+// void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
+//                          uint8_t *dst, ptrdiff_t dst_stride,
+//                          const int16_t *filter_x, int x_step_q4,
+//                          const int16_t *filter_y, int y_step_q4,
+//                          int w, int h);
+FUN_CONV_2D(, avx2);
+#endif
 #if HAVE_SSSE3
 filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
 filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
new file mode 100644
index 0000000..0ffb1bc
--- /dev/null
+++ b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
@@ -0,0 +1,542 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <immintrin.h>
+#include "vpx_ports/mem.h"
+
+// filters for 16_h8 and 16_v8
+DECLARE_ALIGNED(32, const unsigned char, filt1_global_avx2[32])= {
+  0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+  0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8};
+
+DECLARE_ALIGNED(32, const unsigned char, filt2_global_avx2[32])= {
+  2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
+  2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10};
+
+DECLARE_ALIGNED(32, const unsigned char, filt3_global_avx2[32])= {
+  4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12,
+  4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12};
+
+DECLARE_ALIGNED(32, const unsigned char, filt4_global_avx2[32])= {
+  6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14,
+  6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14};
+
+
+void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
+                                  unsigned int src_pixels_per_line,
+                                  unsigned char *output_ptr,
+                                  unsigned int  output_pitch,
+                                  unsigned int  output_height,
+                                  int16_t *filter) {
+  __m128i filtersReg;
+  __m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg;
+  __m256i firstFilters, secondFilters, thirdFilters, forthFilters;
+  __m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3;
+  __m256i srcReg32b1, srcReg32b2, filtersReg32;
+  unsigned int i;
+  unsigned int src_stride, dst_stride;
+
+  // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
+  addFilterReg64 = _mm256_set1_epi32((int)0x0400040u);
+  filtersReg = _mm_loadu_si128((__m128i *)filter);
+  // converting the 16 bit (short) to 8 bit (byte) and have the same data
+  // in both lanes of 128 bit register.
+  filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
+  // have the same data in both lanes of a 256 bit register
+#if defined (__GNUC__)
+#if ( __GNUC__ < 4 || (__GNUC__ == 4 && \
+(__GNUC_MINOR__ < 6 || (__GNUC_MINOR__ == 6 && __GNUC_PATCHLEVEL__ > 0))))
+  filtersReg32 = _mm_broadcastsi128_si256((__m128i const *)&filtersReg);
+#elif(__GNUC__ == 4 && (__GNUC_MINOR__ == 7 && __GNUC_PATCHLEVEL__ > 0))
+  filtersReg32 = _mm_broadcastsi128_si256(filtersReg);
+#else
+  filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
+#endif
+#else
+  filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
+#endif
+
+  // duplicate only the first 16 bits (first and second byte)
+  // across 256 bit register
+  firstFilters = _mm256_shuffle_epi8(filtersReg32,
+                 _mm256_set1_epi16(0x100u));
+  // duplicate only the second 16 bits (third and forth byte)
+  // across 256 bit register
+  secondFilters = _mm256_shuffle_epi8(filtersReg32,
+                  _mm256_set1_epi16(0x302u));
+  // duplicate only the third 16 bits (fifth and sixth byte)
+  // across 256 bit register
+  thirdFilters = _mm256_shuffle_epi8(filtersReg32,
+                 _mm256_set1_epi16(0x504u));
+  // duplicate only the forth 16 bits (seventh and eighth byte)
+  // across 256 bit register
+  forthFilters = _mm256_shuffle_epi8(filtersReg32,
+                 _mm256_set1_epi16(0x706u));
+
+  filt1Reg = _mm256_load_si256((__m256i const *)filt1_global_avx2);
+  filt2Reg = _mm256_load_si256((__m256i const *)filt2_global_avx2);
+  filt3Reg = _mm256_load_si256((__m256i const *)filt3_global_avx2);
+  filt4Reg = _mm256_load_si256((__m256i const *)filt4_global_avx2);
+
+  // multiple the size of the source and destination stride by two
+  src_stride = src_pixels_per_line << 1;
+  dst_stride = output_pitch << 1;
+  for (i = output_height; i > 1; i-=2) {
+    // load the 2 strides of source
+    srcReg32b1 = _mm256_castsi128_si256(
+                 _mm_loadu_si128((__m128i *)(src_ptr-3)));
+    srcReg32b1 = _mm256_inserti128_si256(srcReg32b1,
+                 _mm_loadu_si128((__m128i *)
+                 (src_ptr+src_pixels_per_line-3)), 1);
+
+    // filter the source buffer
+    srcRegFilt32b1_1= _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
+    srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
+
+    // multiply 2 adjacent elements with the filter and add the result
+    srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters);
+    srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters);
+
+    // add and saturate the results together
+    srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2);
+
+    // filter the source buffer
+    srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b1, filt4Reg);
+    srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt3Reg);
+
+    // multiply 2 adjacent elements with the filter and add the result
+    srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, forthFilters);
+    srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
+
+    // add and saturate the results together
+    srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1,
+                       _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2));
+
+    // reading 2 strides of the next 16 bytes
+    // (part of it was being read by earlier read)
+    srcReg32b2 = _mm256_castsi128_si256(
+                 _mm_loadu_si128((__m128i *)(src_ptr+5)));
+    srcReg32b2 = _mm256_inserti128_si256(srcReg32b2,
+                 _mm_loadu_si128((__m128i *)
+                 (src_ptr+src_pixels_per_line+5)), 1);
+
+    // add and saturate the results together
+    srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1,
+                       _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2));
+
+    // filter the source buffer
+    srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg);
+    srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt2Reg);
+
+    // multiply 2 adjacent elements with the filter and add the result
+    srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters);
+    srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters);
+
+    // add and saturate the results together
+    srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2);
+
+    // filter the source buffer
+    srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b2, filt4Reg);
+    srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b2, filt3Reg);
+
+    // multiply 2 adjacent elements with the filter and add the result
+    srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, forthFilters);
+    srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
+
+    // add and saturate the results together
+    srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1,
+                       _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2));
+    srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1,
+                       _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2));
+
+
+    srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg64);
+
+    srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, addFilterReg64);
+
+    // shift by 7 bit each 16 bit
+    srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 7);
+    srcRegFilt32b2_1 = _mm256_srai_epi16(srcRegFilt32b2_1, 7);
+
+    // shrink to 8 bit each 16 bits, the first lane contain the first
+    // convolve result and the second lane contain the second convolve
+    // result
+    srcRegFilt32b1_1 = _mm256_packus_epi16(srcRegFilt32b1_1,
+                                           srcRegFilt32b2_1);
+
+    src_ptr+=src_stride;
+
+    // save 16 bytes
+    _mm_store_si128((__m128i*)output_ptr,
+    _mm256_castsi256_si128(srcRegFilt32b1_1));
+
+    // save the next 16 bits
+    _mm_store_si128((__m128i*)(output_ptr+output_pitch),
+    _mm256_extractf128_si256(srcRegFilt32b1_1, 1));
+    output_ptr+=dst_stride;
+  }
+
+  // if the number of strides is odd.
+  // process only 16 bytes
+  if (i > 0) {
+    __m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1;
+    __m128i srcRegFilt2, srcRegFilt3;
+
+    srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3));
+
+    // filter the source buffer
+    srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1,
+                    _mm256_castsi256_si128(filt1Reg));
+    srcRegFilt2 = _mm_shuffle_epi8(srcReg1,
+                  _mm256_castsi256_si128(filt2Reg));
+
+    // multiply 2 adjacent elements with the filter and add the result
+    srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1,
+                    _mm256_castsi256_si128(firstFilters));
+    srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
+                  _mm256_castsi256_si128(secondFilters));
+
+    // add and saturate the results together
+    srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
+
+    // filter the source buffer
+    srcRegFilt3= _mm_shuffle_epi8(srcReg1,
+                 _mm256_castsi256_si128(filt4Reg));
+    srcRegFilt2= _mm_shuffle_epi8(srcReg1,
+                 _mm256_castsi256_si128(filt3Reg));
+
+    // multiply 2 adjacent elements with the filter and add the result
+    srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3,
+                  _mm256_castsi256_si128(forthFilters));
+    srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
+                  _mm256_castsi256_si128(thirdFilters));
+
+    // add and saturate the results together
+    srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
+                    _mm_min_epi16(srcRegFilt3, srcRegFilt2));
+
+    // reading the next 16 bytes
+    // (part of it was being read by earlier read)
+    srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5));
+
+    // add and saturate the results together
+    srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
+                    _mm_max_epi16(srcRegFilt3, srcRegFilt2));
+
+    // filter the source buffer
+    srcRegFilt2_1 = _mm_shuffle_epi8(srcReg2,
+                    _mm256_castsi256_si128(filt1Reg));
+    srcRegFilt2 = _mm_shuffle_epi8(srcReg2,
+                  _mm256_castsi256_si128(filt2Reg));
+
+    // multiply 2 adjacent elements with the filter and add the result
+    srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1,
+                    _mm256_castsi256_si128(firstFilters));
+    srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
+                  _mm256_castsi256_si128(secondFilters));
+
+    // add and saturate the results together
+    srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2);
+
+    // filter the source buffer
+    srcRegFilt3 = _mm_shuffle_epi8(srcReg2,
+                  _mm256_castsi256_si128(filt4Reg));
+    srcRegFilt2 = _mm_shuffle_epi8(srcReg2,
+                  _mm256_castsi256_si128(filt3Reg));
+
+    // multiply 2 adjacent elements with the filter and add the result
+    srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3,
+                  _mm256_castsi256_si128(forthFilters));
+    srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
+                  _mm256_castsi256_si128(thirdFilters));
+
+    // add and saturate the results together
+    srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1,
+                    _mm_min_epi16(srcRegFilt3, srcRegFilt2));
+    srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1,
+                    _mm_max_epi16(srcRegFilt3, srcRegFilt2));
+
+
+    srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
+                    _mm256_castsi256_si128(addFilterReg64));
+
+    srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1,
+                    _mm256_castsi256_si128(addFilterReg64));
+
+    // shift by 7 bit each 16 bit
+    srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7);
+    srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7);
+
+    // shrink to 8 bit each 16 bits, the first lane contain the first
+    // convolve result and the second lane contain the second convolve
+    // result
+    srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, srcRegFilt2_1);
+
+    // save 16 bytes
+    _mm_store_si128((__m128i*)output_ptr, srcRegFilt1_1);
+  }
+}
+
+void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
+                                  unsigned int src_pitch,
+                                  unsigned char *output_ptr,
+                                  unsigned int out_pitch,
+                                  unsigned int output_height,
+                                  int16_t *filter) {
+  __m128i filtersReg;
+  __m256i addFilterReg64;
+  __m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5;
+  __m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10;
+  __m256i srcReg32b11, srcReg32b12, srcReg32b13, filtersReg32;
+  __m256i firstFilters, secondFilters, thirdFilters, forthFilters;
+  unsigned int i;
+  unsigned int src_stride, dst_stride;
+
+  // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
+  addFilterReg64 = _mm256_set1_epi32((int)0x0400040u);
+  filtersReg = _mm_loadu_si128((__m128i *)filter);
+  // converting the 16 bit (short) to  8 bit (byte) and have the
+  // same data in both lanes of 128 bit register.
+  filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
+  // have the same data in both lanes of a 256 bit register
+#if defined (__GNUC__)
+#if ( __GNUC__ < 4 || (__GNUC__ == 4 && \
+(__GNUC_MINOR__ < 6 || (__GNUC_MINOR__ == 6 && __GNUC_PATCHLEVEL__ > 0))))
+  filtersReg32 = _mm_broadcastsi128_si256((__m128i const *)&filtersReg);
+#elif(__GNUC__ == 4 && (__GNUC_MINOR__ == 7 && __GNUC_PATCHLEVEL__ > 0))
+  filtersReg32 = _mm_broadcastsi128_si256(filtersReg);
+#else
+  filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
+#endif
+#else
+  filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
+#endif
+
+  // duplicate only the first 16 bits (first and second byte)
+  // across 256 bit register
+  firstFilters = _mm256_shuffle_epi8(filtersReg32,
+                 _mm256_set1_epi16(0x100u));
+  // duplicate only the second 16 bits (third and forth byte)
+  // across 256 bit register
+  secondFilters = _mm256_shuffle_epi8(filtersReg32,
+                  _mm256_set1_epi16(0x302u));
+  // duplicate only the third 16 bits (fifth and sixth byte)
+  // across 256 bit register
+  thirdFilters = _mm256_shuffle_epi8(filtersReg32,
+                 _mm256_set1_epi16(0x504u));
+  // duplicate only the forth 16 bits (seventh and eighth byte)
+  // across 256 bit register
+  forthFilters = _mm256_shuffle_epi8(filtersReg32,
+                 _mm256_set1_epi16(0x706u));
+
+  // multiple the size of the source and destination stride by two
+  src_stride = src_pitch << 1;
+  dst_stride = out_pitch << 1;
+
+  // load 16 bytes 7 times in stride of src_pitch
+  srcReg32b1 = _mm256_castsi128_si256(
+               _mm_loadu_si128((__m128i *)(src_ptr)));
+  srcReg32b2 = _mm256_castsi128_si256(
+               _mm_loadu_si128((__m128i *)(src_ptr+src_pitch)));
+  srcReg32b3 = _mm256_castsi128_si256(
+               _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*2)));
+  srcReg32b4 = _mm256_castsi128_si256(
+               _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*3)));
+  srcReg32b5 = _mm256_castsi128_si256(
+               _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*4)));
+  srcReg32b6 = _mm256_castsi128_si256(
+               _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*5)));
+  srcReg32b7 = _mm256_castsi128_si256(
+               _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*6)));
+
+  // have each consecutive loads on the same 256 register
+  srcReg32b1 = _mm256_inserti128_si256(srcReg32b1,
+               _mm256_castsi256_si128(srcReg32b2), 1);
+  srcReg32b2 = _mm256_inserti128_si256(srcReg32b2,
+               _mm256_castsi256_si128(srcReg32b3), 1);
+  srcReg32b3 = _mm256_inserti128_si256(srcReg32b3,
+               _mm256_castsi256_si128(srcReg32b4), 1);
+  srcReg32b4 = _mm256_inserti128_si256(srcReg32b4,
+               _mm256_castsi256_si128(srcReg32b5), 1);
+  srcReg32b5 = _mm256_inserti128_si256(srcReg32b5,
+               _mm256_castsi256_si128(srcReg32b6), 1);
+  srcReg32b6 = _mm256_inserti128_si256(srcReg32b6,
+               _mm256_castsi256_si128(srcReg32b7), 1);
+
+  // merge every two consecutive registers except the last one
+  srcReg32b10 = _mm256_unpacklo_epi8(srcReg32b1, srcReg32b2);
+  srcReg32b1 = _mm256_unpackhi_epi8(srcReg32b1, srcReg32b2);
+
+  // save
+  srcReg32b11 = _mm256_unpacklo_epi8(srcReg32b3, srcReg32b4);
+
+  // save
+  srcReg32b3 = _mm256_unpackhi_epi8(srcReg32b3, srcReg32b4);
+
+  // save
+  srcReg32b2 = _mm256_unpacklo_epi8(srcReg32b5, srcReg32b6);
+
+  // save
+  srcReg32b5 = _mm256_unpackhi_epi8(srcReg32b5, srcReg32b6);
+
+
+  for (i = output_height; i > 1; i-=2) {
+     // load the last 2 loads of 16 bytes and have every two
+     // consecutive loads in the same 256 bit register
+     srcReg32b8 = _mm256_castsi128_si256(
+     _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7)));
+     srcReg32b7 = _mm256_inserti128_si256(srcReg32b7,
+     _mm256_castsi256_si128(srcReg32b8), 1);
+     srcReg32b9 = _mm256_castsi128_si256(
+     _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*8)));
+     srcReg32b8 = _mm256_inserti128_si256(srcReg32b8,
+     _mm256_castsi256_si128(srcReg32b9), 1);
+
+     // merge every two consecutive registers
+     // save
+     srcReg32b4 = _mm256_unpacklo_epi8(srcReg32b7, srcReg32b8);
+     srcReg32b7 = _mm256_unpackhi_epi8(srcReg32b7, srcReg32b8);
+
+     // multiply 2 adjacent elements with the filter and add the result
+     srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters);
+     srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters);
+     srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters);
+     srcReg32b8 = _mm256_maddubs_epi16(srcReg32b7, forthFilters);
+
+     // add and saturate the results together
+     srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6);
+     srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b8);
+
+
+     // multiply 2 adjacent elements with the filter and add the result
+     srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters);
+     srcReg32b6 = _mm256_maddubs_epi16(srcReg32b3, secondFilters);
+
+     // multiply 2 adjacent elements with the filter and add the result
+     srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters);
+     srcReg32b13 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters);
+
+
+     // add and saturate the results together
+     srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
+                   _mm256_min_epi16(srcReg32b8, srcReg32b12));
+     srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
+                  _mm256_min_epi16(srcReg32b6, srcReg32b13));
+
+     // add and saturate the results together
+     srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
+                   _mm256_max_epi16(srcReg32b8, srcReg32b12));
+     srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
+                  _mm256_max_epi16(srcReg32b6, srcReg32b13));
+
+
+     srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg64);
+     srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg64);
+
+     // shift by 7 bit each 16 bit
+     srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 7);
+     srcReg32b1 = _mm256_srai_epi16(srcReg32b1, 7);
+
+     // shrink to 8 bit each 16 bits, the first lane contain the first
+     // convolve result and the second lane contain the second convolve
+     // result
+     srcReg32b1 = _mm256_packus_epi16(srcReg32b10, srcReg32b1);
+
+     src_ptr+=src_stride;
+
+     // save 16 bytes
+     _mm_store_si128((__m128i*)output_ptr,
+     _mm256_castsi256_si128(srcReg32b1));
+
+     // save the next 16 bits
+     _mm_store_si128((__m128i*)(output_ptr+out_pitch),
+     _mm256_extractf128_si256(srcReg32b1, 1));
+
+     output_ptr+=dst_stride;
+
+     // save part of the registers for next strides
+     srcReg32b10 = srcReg32b11;
+     srcReg32b1 = srcReg32b3;
+     srcReg32b11 = srcReg32b2;
+     srcReg32b3 = srcReg32b5;
+     srcReg32b2 = srcReg32b4;
+     srcReg32b5 = srcReg32b7;
+     srcReg32b7 = srcReg32b9;
+  }
+  if (i > 0) {
+    __m128i srcRegFilt1, srcRegFilt3, srcRegFilt4, srcRegFilt5;
+    __m128i srcRegFilt6, srcRegFilt7, srcRegFilt8;
+    // load the last 16 bytes
+    srcRegFilt8 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7));
+
+    // merge the last 2 results together
+    srcRegFilt4 = _mm_unpacklo_epi8(
+                  _mm256_castsi256_si128(srcReg32b7), srcRegFilt8);
+    srcRegFilt7 = _mm_unpackhi_epi8(
+                  _mm256_castsi256_si128(srcReg32b7), srcRegFilt8);
+
+    // multiply 2 adjacent elements with the filter and add the result
+    srcRegFilt1 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b10),
+                  _mm256_castsi256_si128(firstFilters));
+    srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4,
+                  _mm256_castsi256_si128(forthFilters));
+    srcRegFilt3 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b1),
+                  _mm256_castsi256_si128(firstFilters));
+    srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7,
+                  _mm256_castsi256_si128(forthFilters));
+
+    // add and saturate the results together
+    srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
+    srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, srcRegFilt7);
+
+
+    // multiply 2 adjacent elements with the filter and add the result
+    srcRegFilt4 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b11),
+                  _mm256_castsi256_si128(secondFilters));
+    srcRegFilt5 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b3),
+                  _mm256_castsi256_si128(secondFilters));
+
+    // multiply 2 adjacent elements with the filter and add the result
+    srcRegFilt6 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b2),
+                  _mm256_castsi256_si128(thirdFilters));
+    srcRegFilt7 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b5),
+                  _mm256_castsi256_si128(thirdFilters));
+
+    // add and saturate the results together
+    srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
+                  _mm_min_epi16(srcRegFilt4, srcRegFilt6));
+    srcRegFilt3 = _mm_adds_epi16(srcRegFilt3,
+                  _mm_min_epi16(srcRegFilt5, srcRegFilt7));
+
+    // add and saturate the results together
+    srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
+                  _mm_max_epi16(srcRegFilt4, srcRegFilt6));
+    srcRegFilt3 = _mm_adds_epi16(srcRegFilt3,
+                  _mm_max_epi16(srcRegFilt5, srcRegFilt7));
+
+
+    srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
+                  _mm256_castsi256_si128(addFilterReg64));
+    srcRegFilt3 = _mm_adds_epi16(srcRegFilt3,
+                  _mm256_castsi256_si128(addFilterReg64));
+
+    // shift by 7 bit each 16 bit
+    srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
+    srcRegFilt3 = _mm_srai_epi16(srcRegFilt3, 7);
+
+    // shrink to 8 bit each 16 bits, the first lane contain the first
+    // convolve result and the second lane contain the second convolve
+    // result
+    srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt3);
+
+    // save 16 bytes
+    _mm_store_si128((__m128i*)output_ptr, srcRegFilt1);
+  }
+}
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 2d07dd6..791d0f2 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -303,7 +303,7 @@
                           dst, pd->dst.stride, dst, pd->dst.stride,
                           x, y, plane);
 
-  if (!mi->mbmi.skip_coeff) {
+  if (!mi->mbmi.skip) {
     const int eob = vp9_decode_block_tokens(cm, xd, plane, block,
                                             plane_bsize, x, y, tx_size,
                                             args->r);
@@ -397,7 +397,7 @@
   // Has to be called after set_offsets
   mbmi = &xd->mi_8x8[0]->mbmi;
 
-  if (mbmi->skip_coeff) {
+  if (mbmi->skip) {
     reset_skip_context(xd, bsize);
   } else {
     if (cm->seg.enabled)
@@ -421,12 +421,12 @@
     vp9_dec_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
 
     // Reconstruction
-    if (!mbmi->skip_coeff) {
+    if (!mbmi->skip) {
       int eobtotal = 0;
       struct inter_args arg = { cm, xd, r, &eobtotal };
       vp9_foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
       if (!less8x8 && eobtotal == 0)
-        mbmi->skip_coeff = 1;  // skip loopfilter
+        mbmi->skip = 1;  // skip loopfilter
     }
   }
 
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index c7fb71d..856c8b5 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -146,8 +146,8 @@
   return segment_id;
 }
 
-static int read_skip_coeff(VP9_COMMON *cm, const MACROBLOCKD *xd,
-                           int segment_id, vp9_reader *r) {
+static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
+                     int segment_id, vp9_reader *r) {
   if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
     return 1;
   } else {
@@ -169,7 +169,7 @@
   const BLOCK_SIZE bsize = mbmi->sb_type;
 
   mbmi->segment_id = read_intra_segment_id(cm, xd, mi_row, mi_col, r);
-  mbmi->skip_coeff = read_skip_coeff(cm, xd, mbmi->segment_id, r);
+  mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
   mbmi->tx_size = read_tx_size(cm, xd, cm->tx_mode, bsize, 1, r);
   mbmi->ref_frame[0] = INTRA_FRAME;
   mbmi->ref_frame[1] = NONE;
@@ -520,10 +520,10 @@
   mbmi->mv[0].as_int = 0;
   mbmi->mv[1].as_int = 0;
   mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r);
-  mbmi->skip_coeff = read_skip_coeff(cm, xd, mbmi->segment_id, r);
+  mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
   inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
   mbmi->tx_size = read_tx_size(cm, xd, cm->tx_mode, mbmi->sb_type,
-                               !mbmi->skip_coeff || !inter_block, r);
+                               !mbmi->skip || !inter_block, r);
 
   if (inter_block)
     read_inter_block_mode_info(cm, xd, tile, mi, mi_row, mi_col, r);
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index fd34883..1d3522e 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -342,6 +342,10 @@
       cm->frame_refs[0].buf->corrupted = 1;
   }
 
+  // Check if the previous frame was a frame without any references to it.
+  if (cm->new_fb_idx >= 0 && cm->frame_bufs[cm->new_fb_idx].ref_count == 0)
+    cm->release_fb_cb(cm->cb_priv,
+                      &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer);
   cm->new_fb_idx = get_free_fb(cm);
 
   if (setjmp(cm->error.jmp)) {
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index c8f334f..01ee92f 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -104,13 +104,13 @@
   }
 }
 
-static int write_skip_coeff(const VP9_COMP *cpi, int segment_id, MODE_INFO *m,
-                            vp9_writer *w) {
+static int write_skip(const VP9_COMP *cpi, int segment_id, MODE_INFO *m,
+                      vp9_writer *w) {
   const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   if (vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
     return 1;
   } else {
-    const int skip = m->mbmi.skip_coeff;
+    const int skip = m->mbmi.skip;
     vp9_write(w, skip, vp9_get_skip_prob(&cpi->common, xd));
     return skip;
   }
@@ -253,7 +253,7 @@
   const MV_REFERENCE_FRAME sec_rf = mi->ref_frame[1];
   const MB_PREDICTION_MODE mode = mi->mode;
   const int segment_id = mi->segment_id;
-  int skip_coeff;
+  int skip;
   const BLOCK_SIZE bsize = mi->sb_type;
   const int allow_hp = cm->allow_high_precision_mv;
 
@@ -273,14 +273,14 @@
     }
   }
 
-  skip_coeff = write_skip_coeff(cpi, segment_id, m, bc);
+  skip = write_skip(cpi, segment_id, m, bc);
 
   if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
     vp9_write(bc, rf != INTRA_FRAME, vp9_get_intra_inter_prob(cm, xd));
 
   if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
       !(rf != INTRA_FRAME &&
-        (skip_coeff || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) {
+        (skip || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) {
     write_selected_tx_size(cpi, m, mi->tx_size, bsize, bc);
   }
 
@@ -382,7 +382,7 @@
   if (seg->update_map)
     write_segment_id(bc, seg, m->mbmi.segment_id);
 
-  write_skip_coeff(cpi, segment_id, m, bc);
+  write_skip(cpi, segment_id, m, bc);
 
   if (m->mbmi.sb_type >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT)
     write_selected_tx_size(cpi, m, m->mbmi.tx_size, m->mbmi.sb_type, bc);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 713cc51..7cbdfce 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -172,9 +172,7 @@
   int skip_encode;
 
   // Used to store sub partition's choices.
-  int fast_ms;
   int_mv pred_mv[MAX_REF_FRAMES];
-  int subblock_ref;
 
   // TODO(jingning): Need to refactor the structure arrays that buffers the
   // coding mode decisions of each partition type.
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 15ad3bc..8023466 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -345,7 +345,6 @@
                                       int mi_row, int mi_col,
                                       int output_enabled, int projected_rate) {
   VP9_COMMON *const cm = &cpi->common;
-  int target_rate = cpi->rc.sb64_target_rate << 8;   // convert to bits << 8
 
   const int mi_offset = mi_row * cm->mi_cols + mi_col;
   const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
@@ -362,11 +361,10 @@
   } else {
     // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
     // It is converted to bits * 256 units
-    target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / (bw * bh);
+    const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) /
+                            (bw * bh);
 
     if (projected_rate < (target_rate / 4)) {
-      segment = 2;
-    } else if (projected_rate < (target_rate / 2)) {
       segment = 1;
     } else {
       segment = 0;
@@ -658,14 +656,25 @@
   x->skip_recode = 0;
 
   // Set to zero to make sure we do not use the previous encoded frame stats
-  xd->mi_8x8[0]->mbmi.skip_coeff = 0;
+  xd->mi_8x8[0]->mbmi.skip = 0;
 
   x->source_variance = get_sby_perpixel_variance(cpi, x, bsize);
 
   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
     const int energy = bsize <= BLOCK_16X16 ? x->mb_energy
                                             : vp9_block_energy(cpi, x, bsize);
-    xd->mi_8x8[0]->mbmi.segment_id = vp9_vaq_segment_id(energy);
+
+    if (cm->frame_type == KEY_FRAME ||
+        cpi->refresh_alt_ref_frame ||
+        (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
+      xd->mi_8x8[0]->mbmi.segment_id = vp9_vaq_segment_id(energy);
+    } else {
+      const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map
+                                                    : cm->last_frame_seg_map;
+      xd->mi_8x8[0]->mbmi.segment_id =
+        vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
+    }
+
     rdmult_ratio = vp9_vaq_rdmult_ratio(energy);
     vp9_mb_init_quantizer(cpi, x);
   }
@@ -679,11 +688,12 @@
   } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
     const int mi_offset = mi_row * cm->mi_cols + mi_col;
     unsigned char complexity = cpi->complexity_map[mi_offset];
-    const int is_edge = (mi_row == 0) || (mi_row == (cm->mi_rows - 1)) ||
-                        (mi_col == 0) || (mi_col == (cm->mi_cols - 1));
+    const int is_edge = (mi_row <= 1) || (mi_row >= (cm->mi_rows - 2)) ||
+                        (mi_col <= 1) || (mi_col >= (cm->mi_cols - 2));
 
-    if (!is_edge && (complexity > 128))
+    if (!is_edge && (complexity > 128)) {
       x->rdmult = x->rdmult  + ((x->rdmult * (complexity - 128)) / 256);
+    }
   }
 
   // Find best coding mode & reconstruct the MB so it is available
@@ -707,6 +717,9 @@
       *totalrate = round(*totalrate * rdmult_ratio);
     }
   }
+  else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
+    x->rdmult = orig_rdmult;
+  }
 }
 
 static void update_stats(VP9_COMP *cpi) {
@@ -1027,6 +1040,7 @@
   }
   return 0;
 }
+
 static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
                          BLOCK_SIZE bsize, int output_enabled) {
   int i;
@@ -1251,9 +1265,6 @@
     x->mb_energy = vp9_block_energy(cpi, x, bsize);
   }
 
-  x->fast_ms = 0;
-  x->subblock_ref = 0;
-
   if (cpi->sf.adjust_partitioning_from_last_frame) {
     // Check if any of the sub blocks are further split.
     if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
@@ -1526,6 +1537,15 @@
   }
 }
 
+// Next square block size less or equal than current block size.
+static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
+  BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
+  BLOCK_8X8, BLOCK_8X8, BLOCK_8X8,
+  BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
+  BLOCK_32X32, BLOCK_32X32, BLOCK_32X32,
+  BLOCK_64X64
+};
+
 // Look at neighboring blocks and set a min and max partition size based on
 // what they chose.
 static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
@@ -1592,79 +1612,13 @@
                                         row8x8_remaining, col8x8_remaining,
                                         &bh, &bw);
   *min_block_size = MIN(*min_block_size, *max_block_size);
-}
 
-static void compute_fast_motion_search_level(VP9_COMP *cpi, BLOCK_SIZE bsize) {
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCK *const x = &cpi->mb;
-
-  if (cm->frame_type == INTER_FRAME &&
-      !cpi->rc.is_src_frame_alt_ref &&
-      (bsize == BLOCK_16X16 || bsize == BLOCK_32X32 || bsize == BLOCK_64X64)) {
-    const PICK_MODE_CONTEXT *block_context = get_block_context(x, bsize);
-    const int ref0 = block_context[0].mic.mbmi.ref_frame[0];
-    const int ref1 = block_context[1].mic.mbmi.ref_frame[0];
-    const int ref2 = block_context[2].mic.mbmi.ref_frame[0];
-    const int ref3 = block_context[3].mic.mbmi.ref_frame[0];
-
-    // Currently, only consider 4 inter reference frames.
-    if (ref0 && ref1 && ref2 && ref3) {
-      int d01, d23, d02, d13;
-
-      // Motion vectors for the four subblocks.
-      int16_t mvr0 = block_context[0].mic.mbmi.mv[0].as_mv.row;
-      int16_t mvc0 = block_context[0].mic.mbmi.mv[0].as_mv.col;
-      int16_t mvr1 = block_context[1].mic.mbmi.mv[0].as_mv.row;
-      int16_t mvc1 = block_context[1].mic.mbmi.mv[0].as_mv.col;
-      int16_t mvr2 = block_context[2].mic.mbmi.mv[0].as_mv.row;
-      int16_t mvc2 = block_context[2].mic.mbmi.mv[0].as_mv.col;
-      int16_t mvr3 = block_context[3].mic.mbmi.mv[0].as_mv.row;
-      int16_t mvc3 = block_context[3].mic.mbmi.mv[0].as_mv.col;
-
-      // Adjust sign if ref is alt_ref.
-      if (cm->ref_frame_sign_bias[ref0]) {
-        mvr0 *= -1;
-        mvc0 *= -1;
-      }
-
-      if (cm->ref_frame_sign_bias[ref1]) {
-        mvr1 *= -1;
-        mvc1 *= -1;
-      }
-
-      if (cm->ref_frame_sign_bias[ref2]) {
-        mvr2 *= -1;
-        mvc2 *= -1;
-      }
-
-      if (cm->ref_frame_sign_bias[ref3]) {
-        mvr3 *= -1;
-        mvc3 *= -1;
-      }
-
-      // Calculate mv distances.
-      d01 = MAX(abs(mvr0 - mvr1), abs(mvc0 - mvc1));
-      d23 = MAX(abs(mvr2 - mvr3), abs(mvc2 - mvc3));
-      d02 = MAX(abs(mvr0 - mvr2), abs(mvc0 - mvc2));
-      d13 = MAX(abs(mvr1 - mvr3), abs(mvc1 - mvc3));
-
-      if (d01 < FAST_MOTION_MV_THRESH && d23 < FAST_MOTION_MV_THRESH &&
-          d02 < FAST_MOTION_MV_THRESH && d13 < FAST_MOTION_MV_THRESH) {
-        // Set fast motion search level.
-        x->fast_ms = 1;
-
-        if (ref0 == ref1 && ref1 == ref2 && ref2 == ref3 &&
-            d01 < 2 && d23 < 2 && d02 < 2 && d13 < 2) {
-          // Set fast motion search level.
-          x->fast_ms = 2;
-
-          if (!d01 && !d23 && !d02 && !d13) {
-            x->fast_ms = 3;
-            x->subblock_ref = ref0;
-          }
-        }
-      }
-    }
+  // When use_square_partition_only is true, make sure at least one square
+  // partition is allowed by selecting the next smaller square size as
+  // *min_block_size.
+  if (cpi->sf.use_square_partition_only &&
+      (*max_block_size - *min_block_size) < 2) {
+    *min_block_size = next_square_size[*min_block_size];
   }
 }
 
@@ -1707,8 +1661,6 @@
                                bsize >= BLOCK_8X8;
   int partition_vert_allowed = !force_horz_split && xss <= yss &&
                                bsize >= BLOCK_8X8;
-
-  int partition_split_done = 0;
   (void) *tp_orig;
 
   if (bsize < BLOCK_8X8) {
@@ -1850,18 +1802,9 @@
       if (cpi->sf.less_rectangular_check)
         do_rect &= !partition_none_allowed;
     }
-    partition_split_done = 1;
     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   }
 
-  x->fast_ms = 0;
-  x->subblock_ref = 0;
-
-  if (partition_split_done &&
-      cpi->sf.using_small_partition_info) {
-    compute_fast_motion_search_level(cpi, bsize);
-  }
-
   // PARTITION_HORZ
   if (partition_horz_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_HORZ);
@@ -1966,7 +1909,11 @@
     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   }
 
-
+  // TODO(jbb): This code added so that we avoid static analysis
+  // warning related to the fact that best_rd isn't used after this
+  // point.  This code should be refactored so that the duplicate
+  // checks occur in some sub function and thus are used...
+  (void) best_rd;
   *rate = best_rate;
   *dist = best_dist;
 
@@ -1990,41 +1937,6 @@
   }
 }
 
-// Examines 64x64 block and chooses a best reference frame
-static void rd_pick_reference_frame(VP9_COMP *cpi, const TileInfo *const tile,
-                                    int mi_row, int mi_col) {
-  VP9_COMMON * const cm = &cpi->common;
-  MACROBLOCK * const x = &cpi->mb;
-  int bsl = b_width_log2(BLOCK_64X64), bs = 1 << bsl;
-  int ms = bs / 2;
-  ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
-  PARTITION_CONTEXT sl[8], sa[8];
-  int pl;
-  int r;
-  int64_t d;
-
-  save_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64);
-
-  // Default is non mask (all reference frames allowed.
-  cpi->ref_frame_mask = 0;
-
-  // Do RD search for 64x64.
-  if ((mi_row + (ms >> 1) < cm->mi_rows) &&
-      (mi_col + (ms >> 1) < cm->mi_cols)) {
-    cpi->set_ref_frame_mask = 1;
-    rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &r, &d, BLOCK_64X64,
-                     get_block_context(x, BLOCK_64X64), INT64_MAX);
-    pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
-                                 mi_row, mi_col, BLOCK_64X64);
-    r += x->partition_cost[pl][PARTITION_NONE];
-
-    *(get_sb_partitioning(x, BLOCK_64X64)) = BLOCK_64X64;
-    cpi->set_ref_frame_mask = 0;
-  }
-
-  restore_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64);
-}
-
 static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                           int mi_row, TOKENEXTRA **tp) {
   VP9_COMMON *const cm = &cpi->common;
@@ -2187,7 +2099,7 @@
 
   for (y = 0; y < ymbs; y++) {
     for (x = 0; x < xmbs; x++) {
-      if (!mi_8x8[y * mis + x]->mbmi.skip_coeff)
+      if (!mi_8x8[y * mis + x]->mbmi.skip)
         return 0;
     }
   }
@@ -2318,6 +2230,7 @@
     }
   }
 }
+
 // Start RTC Exploration
 typedef enum {
   BOTH_ZERO = 0,
@@ -2345,16 +2258,19 @@
   mbmi->ref_frame[1] = INTRA_FRAME;
   mbmi->tx_size = max_txsize_lookup[bsize];
   mbmi->uv_mode = mode;
-  mbmi->skip_coeff = 0;
+  mbmi->skip = 0;
   mbmi->sb_type = bsize;
   mbmi->segment_id = 0;
 }
+
 static INLINE int get_block_row(int b32i, int b16i, int b8i) {
   return ((b32i >> 1) << 2) + ((b16i >> 1) << 1) + (b8i >> 1);
 }
+
 static INLINE int get_block_col(int b32i, int b16i, int b8i) {
   return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1);
 }
+
 static void rtc_use_partition(VP9_COMP *cpi,
                              const TileInfo *const tile,
                              MODE_INFO **mi_8x8,
@@ -2374,8 +2290,6 @@
   int row8x8_remaining = tile->mi_row_end - mi_row;
   int col8x8_remaining = tile->mi_col_end - mi_col;
   int b32i;
-  x->fast_ms = 0;
-  x->subblock_ref = 0;
   for (b32i = 0; b32i < 4; b32i++) {
     int b16i;
     for (b16i = 0; b16i < 4; b16i++) {
@@ -2386,10 +2300,6 @@
       int rate;
       int64_t dist;
 
-      int_mv frame_nearest_mv[MAX_REF_FRAMES];
-      int_mv frame_near_mv[MAX_REF_FRAMES];
-      struct buf_2d yv12_mb[MAX_REF_FRAMES][MAX_MB_PLANE];
-
       // Find a partition size that fits
       bsize = find_partition_size(cpi->sf.always_this_block_size,
                                   (row8x8_remaining - block_row),
@@ -2411,10 +2321,6 @@
       } else {
         set_mode_info(&mi_8x8[index]->mbmi, bsize, mode,
                       mi_row + block_row, mi_col + block_col);
-        vp9_setup_buffer_inter(cpi, x, tile,
-                               LAST_FRAME, cpi->sf.always_this_block_size,
-                               mi_row + block_row, mi_col + block_col,
-                               frame_nearest_mv, frame_near_mv, yv12_mb);
       }
 
       for (j = 0; j < mi_height; j++)
@@ -2451,9 +2357,8 @@
 
     const int idx_str = cm->mode_info_stride * mi_row + mi_col;
     MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
-
     cpi->mb.source_variance = UINT_MAX;
-    set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+
     set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col);
     rtc_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                      &dummy_rate, &dummy_dist, 1);
@@ -2828,9 +2733,10 @@
   }
 
   if (!is_inter_block(mbmi)) {
-    mbmi->skip_coeff = 1;
-    vp9_encode_intra_block_y(x, MAX(bsize, BLOCK_8X8));
-    vp9_encode_intra_block_uv(x, MAX(bsize, BLOCK_8X8));
+    int plane;
+    mbmi->skip = 1;
+    for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+      vp9_encode_intra_block_plane(x, MAX(bsize, BLOCK_8X8), plane);
     if (output_enabled)
       sum_intra_stats(&cm->counts, mi);
   } else {
@@ -2847,11 +2753,11 @@
   if (!is_inter_block(mbmi)) {
     vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
   } else if (!x->skip) {
-    mbmi->skip_coeff = 1;
+    mbmi->skip = 1;
     vp9_encode_sb(x, MAX(bsize, BLOCK_8X8));
     vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
   } else {
-    mbmi->skip_coeff = 1;
+    mbmi->skip = 1;
     if (output_enabled)
       cm->counts.skip[vp9_get_skip_context(xd)][1]++;
     reset_skip_context(xd, MAX(bsize, BLOCK_8X8));
@@ -2861,7 +2767,7 @@
     if (cm->tx_mode == TX_MODE_SELECT &&
         mbmi->sb_type >= BLOCK_8X8  &&
         !(is_inter_block(mbmi) &&
-            (mbmi->skip_coeff ||
+            (mbmi->skip ||
              vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)))) {
       ++get_tx_counts(max_txsize_lookup[bsize], vp9_get_tx_size_context(xd),
                       &cm->counts.tx)[mbmi->tx_size];
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 2c65351..8770107 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -32,7 +32,7 @@
 struct encode_b_args {
   MACROBLOCK *x;
   struct optimize_ctx *ctx;
-  unsigned char *skip_coeff;
+  unsigned char *skip;
 };
 
 void vp9_subtract_block_c(int rows, int cols,
@@ -439,7 +439,7 @@
   }
 
   if (p->eobs[block])
-    *(args->skip_coeff) = 0;
+    *(args->skip) = 0;
 
   if (x->skip_encode || p->eobs[block] == 0)
     return;
@@ -489,7 +489,7 @@
   MACROBLOCKD *const xd = &x->e_mbd;
   struct optimize_ctx ctx;
   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
-  struct encode_b_args arg = {x, &ctx, &mbmi->skip_coeff};
+  struct encode_b_args arg = {x, &ctx, &mbmi->skip};
 
   vp9_subtract_sby(x, bsize);
   if (x->optimize)
@@ -503,7 +503,7 @@
   MACROBLOCKD *const xd = &x->e_mbd;
   struct optimize_ctx ctx;
   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
-  struct encode_b_args arg = {x, &ctx, &mbmi->skip_coeff};
+  struct encode_b_args arg = {x, &ctx, &mbmi->skip};
 
   if (!x->skip_recode)
     vp9_subtract_sb(x, bsize);
@@ -648,29 +648,24 @@
       assert(0);
   }
   if (*eob)
-    *(args->skip_coeff) = 0;
+    *(args->skip) = 0;
 }
 
 void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block,
                             BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
-                            unsigned char *skip_coeff) {
-  struct encode_b_args arg = {x, NULL, skip_coeff};
+                            unsigned char *skip) {
+  struct encode_b_args arg = {x, NULL, skip};
   encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
 }
 
 
-void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize) {
+void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
   const MACROBLOCKD *const xd = &x->e_mbd;
-  struct encode_b_args arg = {x, NULL, &xd->mi_8x8[0]->mbmi.skip_coeff};
+  struct encode_b_args arg = {x, NULL, &xd->mi_8x8[0]->mbmi.skip};
 
-  vp9_foreach_transformed_block_in_plane(xd, bsize, 0, encode_block_intra,
+  vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block_intra,
                                          &arg);
 }
-void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  struct encode_b_args arg = {x, NULL, &xd->mi_8x8[0]->mbmi.skip_coeff};
-  vp9_foreach_transformed_block_uv(xd, bsize, encode_block_intra, &arg);
-}
 
 int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) {
   MB_MODE_INFO * mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
@@ -680,6 +675,6 @@
   mbmi->tx_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ? TX_16X16
                                                                  : TX_8X8)
                                    : TX_4X4;
-  vp9_encode_intra_block_y(x, mbmi->sb_type);
+  vp9_encode_intra_block_plane(x, mbmi->sb_type, 0);
   return vp9_get_mb_ss(x->plane[0].src_diff);
 }
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index cd7c46b..515935f 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -32,10 +32,9 @@
 
 void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block,
                             BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
-                            unsigned char *skip_coeff);
+                            unsigned char *skip);
 
-void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize);
-void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize);
+void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
 
 int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred);
 
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 3e04c2f..bf9dd3e 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -1566,13 +1566,7 @@
       (i >= MIN_GF_INTERVAL) &&
       // for real scene cuts (not forced kfs) dont allow arf very near kf.
       (rc->next_key_frame_forced ||
-        (i <= (rc->frames_to_key - MIN_GF_INTERVAL))) &&
-      ((next_frame.pcnt_inter > 0.75) ||
-       (next_frame.pcnt_second_ref > 0.5)) &&
-      ((mv_in_out_accumulator / (double)i > -0.2) ||
-       (mv_in_out_accumulator > -2.0)) &&
-      (boost_score > 100)) {
-
+      (i <= (rc->frames_to_key - MIN_GF_INTERVAL)))) {
     // Alternative boost calculation for alt ref
     rc->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost,
                                    &b_boost);
@@ -1926,8 +1920,6 @@
 
   double kf_mod_err = 0.0;
   double kf_group_err = 0.0;
-  double kf_group_intra_err = 0.0;
-  double kf_group_coded_err = 0.0;
   double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
 
   RATE_CONTROL *const rc = &cpi->rc;
@@ -1965,12 +1957,6 @@
     // Accumulate kf group error
     kf_group_err += calculate_modified_err(cpi, this_frame);
 
-    // These figures keep intra and coded error counts for all frames including
-    // key frames in the group. The effect of the key frame itself can be
-    // subtracted out using the first_frame data collected above.
-    kf_group_intra_err += this_frame->intra_error;
-    kf_group_coded_err += this_frame->coded_error;
-
     // load a the next frame's stats
     last_frame = *this_frame;
     input_stats(twopass, this_frame);
@@ -2030,15 +2016,11 @@
     reset_fpf_position(twopass, start_position);
 
     kf_group_err = 0;
-    kf_group_intra_err = 0;
-    kf_group_coded_err = 0;
 
     // Rescan to get the correct error data for the forced kf group
     for (i = 0; i < rc->frames_to_key; i++) {
       // Accumulate kf group errors
       kf_group_err += calculate_modified_err(cpi, &tmp_frame);
-      kf_group_intra_err += tmp_frame.intra_error;
-      kf_group_coded_err += tmp_frame.coded_error;
 
       // Load the next frame's stats.
       input_stats(twopass, &tmp_frame);
@@ -2054,12 +2036,6 @@
   if (twopass->stats_in >= twopass->stats_in_end) {
     // Accumulate kf group error
     kf_group_err += calculate_modified_err(cpi, this_frame);
-
-    // These figures keep intra and coded error counts for all frames including
-    // key frames in the group. The effect of the key frame itself can be
-    // subtracted out using the first_frame data collected above.
-    kf_group_intra_err += this_frame->intra_error;
-    kf_group_coded_err += this_frame->coded_error;
   }
 
   // Calculate the number of bits that should be assigned to the kf group.
@@ -2089,7 +2065,6 @@
   // frames use inter blocks.
   decay_accumulator = 1.0;
   boost_score = 0.0;
-  loop_decay_rate = 1.00;       // Starting decay rate
 
   // Scan through the kf group collating various stats.
   for (i = 0; i < rc->frames_to_key; i++) {
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index c500986..7eacda2 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -29,7 +29,6 @@
   MACROBLOCK   *const x  = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
-  unsigned int best_err;
 
   const int tmp_col_min = x->mv_col_min;
   const int tmp_col_max = x->mv_col_max;
@@ -48,27 +47,22 @@
   ref_full.row = ref_mv->row >> 3;
 
   /*cpi->sf.search_method == HEX*/
-  best_err = vp9_hex_search(x, &ref_full, step_param, x->errorperbit,
-                            0, &v_fn_ptr, 0, ref_mv, dst_mv);
+  vp9_hex_search(x, &ref_full, step_param, x->errorperbit, 0, &v_fn_ptr, 0,
+                 ref_mv, dst_mv);
 
   // Try sub-pixel MC
   // if (bestsme > error_thresh && bestsme < INT_MAX)
   {
     int distortion;
     unsigned int sse;
-    best_err = cpi->find_fractional_mv_step(
-        x, dst_mv, ref_mv,
-        cpi->common.allow_high_precision_mv,
-        x->errorperbit, &v_fn_ptr,
-        0, cpi->sf.subpel_iters_per_step, NULL, NULL,
-        & distortion, &sse);
+    cpi->find_fractional_mv_step(
+        x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
+        &v_fn_ptr, 0, cpi->sf.subpel_iters_per_step, NULL, NULL, &distortion,
+        &sse);
   }
 
   vp9_set_mbmode_and_mvs(xd, NEWMV, dst_mv);
   vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16);
-  best_err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
-                          xd->plane[0].dst.buf, xd->plane[0].dst.stride,
-                          INT_MAX);
 
   /* restore UMV window */
   x->mv_col_min = tmp_col_min;
@@ -76,7 +70,9 @@
   x->mv_row_min = tmp_row_min;
   x->mv_row_max = tmp_row_max;
 
-  return best_err;
+  return vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
+          xd->plane[0].dst.buf, xd->plane[0].dst.stride,
+          INT_MAX);
 }
 
 static int do_16x16_motion_search(VP9_COMP *cpi, const int_mv *ref_mv,
@@ -355,7 +351,7 @@
     for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
       // If any of the blocks in the sequence failed then the MB
       // goes in segment 0
-      if (arf_not_zz[mi_row/2*cm->mb_cols + mi_col/2]) {
+      if (arf_not_zz[mi_row / 2 * cm->mb_cols + mi_col / 2]) {
         ncnt[0]++;
         cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 0;
       } else {
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 198e11c..f5c0e9b 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -349,6 +349,10 @@
     tr = br;
     tc = bc;
   }
+  // These lines insure static analysis doesn't warn that
+  // tr and tc aren't used after the above point.
+  (void) tr;
+  (void) tc;
 
   bestmv->row = br;
   bestmv->col = bc;
@@ -452,6 +456,11 @@
     tr = br;
     tc = bc;
   }
+  // These lines insure static analysis doesn't warn that
+  // tr and tc aren't used after the above point.
+  (void) tr;
+  (void) tc;
+
   bestmv->row = br;
   bestmv->col = bc;
 
@@ -850,8 +859,9 @@
 
 int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
                             int search_param, int sad_per_bit, int *num00,
-                            vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
-                            int *mvcost[2], const MV *center_mv) {
+                            const vp9_variance_fn_ptr_t *fn_ptr,
+                            int *mvjcost, int *mvcost[2],
+                            const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const uint8_t *what = x->plane[0].src.buf;
   const int what_stride = x->plane[0].src.stride;
@@ -965,8 +975,9 @@
 int vp9_diamond_search_sad_c(const MACROBLOCK *x,
                              MV *ref_mv, MV *best_mv,
                              int search_param, int sad_per_bit, int *num00,
-                             vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
-                             int *mvcost[2], const MV *center_mv) {
+                             const vp9_variance_fn_ptr_t *fn_ptr,
+                             int *mvjcost, int *mvcost[2],
+                             const MV *center_mv) {
   int i, j, step;
 
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -1099,7 +1110,7 @@
 int vp9_diamond_search_sadx4(const MACROBLOCK *x,
                              MV *ref_mv, MV *best_mv, int search_param,
                              int sad_per_bit, int *num00,
-                             vp9_variance_fn_ptr_t *fn_ptr,
+                             const vp9_variance_fn_ptr_t *fn_ptr,
                              int *mvjcost, int *mvcost[2],
                              const MV *center_mv) {
   int i, j, step;
@@ -1278,142 +1289,118 @@
 
 int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
                            MV *mvp_full, int step_param,
-                           int sadpb, int further_steps,
-                           int do_refine, vp9_variance_fn_ptr_t *fn_ptr,
-                           const MV *ref_mv, int_mv *dst_mv) {
-  int_mv temp_mv;
-  int thissme, n, num00;
-  int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv.as_mv,
-                                        step_param, sadpb, &num00,
+                           int sadpb, int further_steps, int do_refine,
+                           const vp9_variance_fn_ptr_t *fn_ptr,
+                           const MV *ref_mv, MV *dst_mv) {
+  MV temp_mv;
+  int thissme, n, num00 = 0;
+  int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
+                                        step_param, sadpb, &n,
                                         fn_ptr, x->nmvjointcost,
                                         x->mvcost, ref_mv);
-  dst_mv->as_int = temp_mv.as_int;
+  *dst_mv = temp_mv;
 
-  n = num00;
-  num00 = 0;
-
-  /* If there won't be more n-step search, check to see if refining search is
-   * needed. */
+  // If there won't be more n-step search, check to see if refining search is
+  // needed.
   if (n > further_steps)
     do_refine = 0;
 
   while (n < further_steps) {
-    n++;
+    ++n;
 
     if (num00) {
       num00--;
     } else {
-      thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv.as_mv,
+      thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
                                         step_param + n, sadpb, &num00,
                                         fn_ptr, x->nmvjointcost, x->mvcost,
                                         ref_mv);
 
-      /* check to see if refining search is needed. */
-      if (num00 > (further_steps - n))
+      // check to see if refining search is needed.
+      if (num00 > further_steps - n)
         do_refine = 0;
 
       if (thissme < bestsme) {
         bestsme = thissme;
-        dst_mv->as_int = temp_mv.as_int;
+        *dst_mv = temp_mv;
       }
     }
   }
 
-  /* final 1-away diamond refining search */
-  if (do_refine == 1) {
-    int search_range = 8;
-    int_mv best_mv;
-    best_mv.as_int = dst_mv->as_int;
-    thissme = cpi->refining_search_sad(x, &best_mv.as_mv, sadpb, search_range,
+  // final 1-away diamond refining search
+  if (do_refine) {
+    const int search_range = 8;
+    MV best_mv = *dst_mv;
+    thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
                                        fn_ptr, x->nmvjointcost, x->mvcost,
                                        ref_mv);
-
     if (thissme < bestsme) {
       bestsme = thissme;
-      dst_mv->as_int = best_mv.as_int;
+      *dst_mv = best_mv;
     }
   }
+
   return bestsme;
 }
 
-int vp9_full_search_sad_c(const MACROBLOCK *x, MV *ref_mv,
+int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
                           int sad_per_bit, int distance,
-                          vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
-                          int *mvcost[2],
-                          const MV *center_mv, int n) {
+                          const vp9_variance_fn_ptr_t *fn_ptr,
+                          int *mvjcost, int *mvcost[2],
+                          const MV *center_mv, int block) {
+  int r, c;
   const MACROBLOCKD *const xd = &x->e_mbd;
   const uint8_t *const what = x->plane[0].src.buf;
   const int what_stride = x->plane[0].src.stride;
   const uint8_t *const in_what = xd->plane[0].pre[0].buf;
   const int in_what_stride = xd->plane[0].pre[0].stride;
-  MV *best_mv = &xd->mi_8x8[0]->bmi[n].as_mv[0].as_mv;
-  MV this_mv;
-  int bestsad = INT_MAX;
-  int r, c;
-  int thissad;
-  int ref_row = ref_mv->row;
-  int ref_col = ref_mv->col;
-  // Apply further limits to prevent us looking using vectors that stretch
-  // beyond the UMV border
-  const int row_min = MAX(ref_row - distance, x->mv_row_min);
-  const int row_max = MIN(ref_row + distance, x->mv_row_max);
-  const int col_min = MAX(ref_col - distance, x->mv_col_min);
-  const int col_max = MIN(ref_col + distance, x->mv_col_max);
-  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+  const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
+  const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
+  const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
+  const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
   const int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+  const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
+                                         ref_mv->col];
+  int best_sad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride,
+                             0x7fffffff) +
+      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
+  MV *best_mv = &xd->mi_8x8[0]->bmi[block].as_mv[0].as_mv;
+  *best_mv = *ref_mv;
 
-  // Work out the mid point for the search
-  const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
+  for (r = row_min; r < row_max; ++r) {
+    for (c = col_min; c < col_max; ++c) {
+      const MV this_mv = {r, c};
+      const uint8_t *check_here = &in_what[r * in_what_stride + c];
+      const int sad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
+                                  best_sad) +
+          mvsad_err_cost(&this_mv, &fcenter_mv,
+                         mvjsadcost, mvsadcost, sad_per_bit);
 
-  best_mv->row = ref_row;
-  best_mv->col = ref_col;
-
-  // Baseline value at the centre
-  bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
-                        in_what_stride, 0x7fffffff)
-                           + mvsad_err_cost(best_mv, &fcenter_mv,
-                                            mvjsadcost, mvsadcost, sad_per_bit);
-
-  for (r = row_min; r < row_max; r++) {
-    const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
-    this_mv.row = r;
-
-    for (c = col_min; c < col_max; c++) {
-      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
-                            bestsad);
-
-      this_mv.col = c;
-      thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                mvjsadcost, mvsadcost, sad_per_bit);
-
-      if (thissad < bestsad) {
-        bestsad = thissad;
-        best_mv->row = r;
-        best_mv->col = c;
-        bestaddress = check_here;
+      if (sad < best_sad) {
+        best_sad = sad;
+        *best_mv = this_mv;
+        best_address = check_here;
       }
-
-      check_here++;
     }
   }
 
-  this_mv.row = best_mv->row * 8;
-  this_mv.col = best_mv->col * 8;
-
-  if (bestsad < INT_MAX)
-    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
-                      (unsigned int *)(&thissad)) +
-                      mv_err_cost(&this_mv, center_mv,
-                                  mvjcost, mvcost, x->errorperbit);
-  else
+  if (best_sad < INT_MAX) {
+    unsigned int unused;
+    const MV mv = {best_mv->row * 8, best_mv->col * 8};
+    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &unused)
+                + mv_err_cost(&mv, center_mv, mvjcost, mvcost, x->errorperbit);
+  } else {
     return INT_MAX;
+  }
 }
 
-int vp9_full_search_sadx3(const MACROBLOCK *x, MV *ref_mv,
+int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
                           int sad_per_bit, int distance,
-                          vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
-                          int *mvcost[2], const MV *center_mv, int n) {
+                          const vp9_variance_fn_ptr_t *fn_ptr,
+                          int *mvjcost, int *mvcost[2],
+                          const MV *center_mv, int n) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const uint8_t *const what = x->plane[0].src.buf;
   const int what_stride = x->plane[0].src.stride;
@@ -1515,9 +1502,9 @@
     return INT_MAX;
 }
 
-int vp9_full_search_sadx8(const MACROBLOCK *x, MV *ref_mv,
+int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
                           int sad_per_bit, int distance,
-                          vp9_variance_fn_ptr_t *fn_ptr,
+                          const vp9_variance_fn_ptr_t *fn_ptr,
                           int *mvjcost, int *mvcost[2],
                           const MV *center_mv, int n) {
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -1651,7 +1638,8 @@
 
 int vp9_refining_search_sad_c(const MACROBLOCK *x,
                               MV *ref_mv, int error_per_bit,
-                              int search_range, vp9_variance_fn_ptr_t *fn_ptr,
+                              int search_range,
+                              const vp9_variance_fn_ptr_t *fn_ptr,
                               int *mvjcost, int *mvcost[2],
                               const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -1723,7 +1711,8 @@
 
 int vp9_refining_search_sadx4(const MACROBLOCK *x,
                               MV *ref_mv, int error_per_bit,
-                              int search_range, vp9_variance_fn_ptr_t *fn_ptr,
+                              int search_range,
+                              const vp9_variance_fn_ptr_t *fn_ptr,
                               int *mvjcost, int *mvcost[2],
                               const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -1836,8 +1825,10 @@
 // mode.
 int vp9_refining_search_8p_c(const MACROBLOCK *x,
                              MV *ref_mv, int error_per_bit,
-                             int search_range, vp9_variance_fn_ptr_t *fn_ptr,
-                             int *mvjcost, int *mvcost[2], const MV *center_mv,
+                             int search_range,
+                             const vp9_variance_fn_ptr_t *fn_ptr,
+                             int *mvjcost, int *mvcost[2],
+                             const MV *center_mv,
                              const uint8_t *second_pred, int w, int h) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 28b46b5..4414f3d 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -45,8 +45,8 @@
 int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x,
                            MV *mvp_full, int step_param,
                            int sadpb, int further_steps, int do_refine,
-                           vp9_variance_fn_ptr_t *fn_ptr,
-                           const MV *ref_mv, int_mv *dst_mv);
+                           const vp9_variance_fn_ptr_t *fn_ptr,
+                           const MV *ref_mv, MV *dst_mv);
 
 int vp9_hex_search(const MACROBLOCK *x,
                    MV *ref_mv,
@@ -107,15 +107,16 @@
 extern fractional_mv_step_comp_fp vp9_find_best_sub_pixel_comp_tree;
 
 typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x,
-                                    MV *ref_mv, int sad_per_bit,
-                                    int distance, vp9_variance_fn_ptr_t *fn_ptr,
+                                    const MV *ref_mv, int sad_per_bit,
+                                    int distance,
+                                    const vp9_variance_fn_ptr_t *fn_ptr,
                                     int *mvjcost, int *mvcost[2],
                                     const MV *center_mv, int n);
 
 typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x,
                                         MV *ref_mv, int sad_per_bit,
                                         int distance,
-                                        vp9_variance_fn_ptr_t *fn_ptr,
+                                        const vp9_variance_fn_ptr_t *fn_ptr,
                                         int *mvjcost, int *mvcost[2],
                                         const MV *center_mv);
 
@@ -123,13 +124,14 @@
                                        MV *ref_mv, MV *best_mv,
                                        int search_param, int sad_per_bit,
                                        int *num00,
-                                       vp9_variance_fn_ptr_t *fn_ptr,
+                                       const vp9_variance_fn_ptr_t *fn_ptr,
                                        int *mvjcost, int *mvcost[2],
                                        const MV *center_mv);
 
 int vp9_refining_search_8p_c(const MACROBLOCK *x,
                              MV *ref_mv, int error_per_bit,
-                             int search_range, vp9_variance_fn_ptr_t *fn_ptr,
+                             int search_range,
+                             const vp9_variance_fn_ptr_t *fn_ptr,
                              int *mvjcost, int *mvcost[2],
                              const MV *center_mv, const uint8_t *second_pred,
                              int w, int h);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index db6349c..84a0e60 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -96,7 +96,7 @@
 void vp9_init_quantizer(VP9_COMP *cpi);
 
 static const double in_frame_q_adj_ratio[MAX_SEGMENTS] =
-  {1.0, 1.5, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+  {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
 
 static INLINE void Scale2Ratio(int mode, int *hr, int *hs) {
   switch (mode) {
@@ -267,7 +267,6 @@
     // Clear down the complexity map used for rd
     vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols);
 
-    // Enable segmentation
     vp9_enable_segmentation((VP9_PTR)cpi);
     vp9_clearall_segfeatures(seg);
 
@@ -278,7 +277,7 @@
     vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q);
 
     // Use some of the segments for in frame Q adjustment
-    for (segment = 1; segment < 3; segment++) {
+    for (segment = 1; segment < 2; segment++) {
       qindex_delta =
         vp9_compute_qdelta_by_rate(cpi, cm->base_qindex,
                                    in_frame_q_adj_ratio[segment]);
@@ -496,6 +495,18 @@
   sf->thresh_mult[THR_D207_PRED] += 2500;
   sf->thresh_mult[THR_D63_PRED] += 2500;
 
+  // disable using golden frame modes if golden frames are not being used
+  if (cpi->rc.frames_till_gf_update_due == INT_MAX) {
+    sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
+    sf->thresh_mult[THR_ZEROG    ] = INT_MAX;
+    sf->thresh_mult[THR_NEARG    ] = INT_MAX;
+    sf->thresh_mult[THR_NEWG     ] = INT_MAX;
+    sf->thresh_mult[THR_COMP_ZEROGA   ] = INT_MAX;
+    sf->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
+    sf->thresh_mult[THR_COMP_NEARGA   ] = INT_MAX;
+    sf->thresh_mult[THR_COMP_NEWGA    ] = INT_MAX;
+  }
+
   /* disable frame modes if flags not set */
   if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
     sf->thresh_mult[THR_NEWMV    ] = INT_MAX;
@@ -911,7 +922,6 @@
   sf->use_uv_intra_rd_estimate = 0;
   sf->use_fast_lpf_pick = 0;
   sf->use_fast_coef_updates = 0;
-  sf->using_small_partition_info = 0;
   sf->mode_skip_start = MAX_MODES;  // Mode index at which mode skip mask set
   sf->use_pick_mode = 0;
   sf->encode_breakout_thresh = 0;
@@ -1474,7 +1484,7 @@
 
   if (cpi->initial_width) {
     // Increasing the size of the frame beyond the first seen frame, or some
-    // otherwise signalled maximum size, is not supported.
+    // otherwise signaled maximum size, is not supported.
     // TODO(jkoleszar): exit gracefully.
     assert(cm->width <= cpi->initial_width);
     assert(cm->height <= cpi->initial_height);
@@ -1489,14 +1499,13 @@
   cpi->speed = cpi->oxcf.cpu_used;
 
   if (cpi->oxcf.lag_in_frames == 0) {
-    // force to allowlag to 0 if lag_in_frames is 0;
+    // Force allow_lag to 0 if lag_in_frames is 0.
     cpi->oxcf.allow_lag = 0;
   } else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS) {
-     // Limit on lag buffers as these are not currently dynamically allocated
+     // Limit on lag buffers as these are not currently dynamically allocated.
     cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
   }
 
-  // YX Temp
 #if CONFIG_MULTIPLE_ARF
   vp9_zero(cpi->alt_ref_source);
 #else
@@ -3713,8 +3722,9 @@
   xd->interp_kernel = vp9_get_interp_kernel(
       DEFAULT_INTERP_FILTER == SWITCHABLE ? EIGHTTAP : DEFAULT_INTERP_FILTER);
 
-  if (cpi->oxcf.aq_mode == VARIANCE_AQ)
+  if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
     vp9_vaq_init();
+  }
 
   if (cpi->use_svc) {
     SvcEncode(cpi, size, dest, frame_flags);
@@ -3958,11 +3968,11 @@
     cm->width = width;
     if (cm->width * 5 < cpi->initial_width) {
       cm->width = cpi->initial_width / 5 + 1;
-      printf("Warning: Desired width too small, changed to %d \n", cm->width);
+      printf("Warning: Desired width too small, changed to %d\n", cm->width);
     }
     if (cm->width > cpi->initial_width) {
       cm->width = cpi->initial_width;
-      printf("Warning: Desired width too large, changed to %d \n", cm->width);
+      printf("Warning: Desired width too large, changed to %d\n", cm->width);
     }
   }
 
@@ -3970,11 +3980,11 @@
     cm->height = height;
     if (cm->height * 5 < cpi->initial_height) {
       cm->height = cpi->initial_height / 5 + 1;
-      printf("Warning: Desired height too small, changed to %d \n", cm->height);
+      printf("Warning: Desired height too small, changed to %d\n", cm->height);
     }
     if (cm->height > cpi->initial_height) {
       cm->height = cpi->initial_height;
-      printf("Warning: Desired height too large, changed to %d \n", cm->height);
+      printf("Warning: Desired height too large, changed to %d\n", cm->height);
     }
   }
 
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 88a0419..1ab1814 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -346,11 +346,6 @@
   // inter modes or to enable it always.
   int disable_split_mask;
 
-  // TODO(jbb): Remove this and everything that uses it. It's only valid if
-  // we were doing small to large partition checks. We currently do the
-  // reverse.
-  int using_small_partition_info;
-
   // TODO(jingning): combine the related motion search speed features
   // This allows us to use motion search at other sizes as a starting
   // point for this motion search and limits the search range around it.
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 512b6bf..48948fc 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -98,11 +98,8 @@
   mvp_full.col >>= 3;
   mvp_full.row >>= 3;
 
-  bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
-                                   sadpb, further_steps, 1,
-                                   &cpi->fn_ptr[bsize],
-                                   &ref_mv.as_mv, tmp_mv);
-
+  vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, sadpb, further_steps, 1,
+                         &cpi->fn_ptr[bsize], &ref_mv.as_mv, &tmp_mv->as_mv);
   x->mv_col_min = tmp_col_min;
   x->mv_col_max = tmp_col_max;
   x->mv_row_min = tmp_row_min;
@@ -172,7 +169,7 @@
                       tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
   mbmi->interp_filter = cpi->common.interp_filter == SWITCHABLE ?
                         EIGHTTAP : cpi->common.interp_filter;
-  mbmi->skip_coeff = 0;
+  mbmi->skip = 0;
   mbmi->segment_id = 0;
 
   for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
@@ -199,7 +196,7 @@
     clamp_mv2(&frame_mv[NEARMV][ref_frame].as_mv, xd);
 
     for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
-      int rate = cost[this_mode - NEARESTMV];
+      int rate = cost[INTER_OFFSET(this_mode)];
       int64_t dist;
 
       if (this_mode == NEWMV) {
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 9afa064..bc2c3a6 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -499,8 +499,6 @@
   // (at buffer = critical level).
   const VP9_CONFIG *oxcf = &cpi->oxcf;
   const RATE_CONTROL *rc = &cpi->rc;
-  // int active_worst_quality = rc->active_worst_quality;
-  // Maximum limit for down adjustment, ~20%.
   // Buffer level below which we push active_worst to worst_quality.
   int critical_level = oxcf->optimal_buffer_level >> 2;
   int adjustment = 0;
@@ -516,6 +514,7 @@
                                rc->avg_frame_qindex[KEY_FRAME] * 3 / 2);
   if (rc->buffer_level > oxcf->optimal_buffer_level) {
     // Adjust down.
+    // Maximum limit for down adjustment, ~30%.
     int max_adjustment_down = active_worst_quality / 3;
     if (max_adjustment_down) {
       buff_lvl_step = (int)((oxcf->maximum_buffer_size -
@@ -846,7 +845,6 @@
   int q;
 
   if (frame_is_intra_only(cm)) {
-    active_best_quality = rc->best_quality;
 #if !CONFIG_MULTIPLE_ARF
     // Handle the special case for key frames forced when we have75 reached
     // the maximum key frame interval. Here force the Q to a range
@@ -1315,7 +1313,8 @@
   int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
                              FRAME_OVERHEAD_BITS);
   int target = rc->av_per_frame_bandwidth;
-  if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+  if (cpi->svc.number_temporal_layers > 1 &&
+      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
     // Note that for layers, av_per_frame_bandwidth is the cumulative
     // per-frame-bandwidth. For the target size of this frame, use the
     // layer average frame size (i.e., non-cumulative per-frame-bw).
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index cae7884..f0a861d 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -68,7 +68,7 @@
   int64_t this_rd;
   int64_t best_rd;
   int skip;
-  const int16_t *scan, *nb;
+  const scan_order *so;
 };
 
 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
@@ -635,7 +635,7 @@
 
   args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
                            args->t_left + y_idx, tx_size,
-                           args->scan, args->nb);
+                           args->so->scan, args->so->neighbors);
 }
 
 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -650,8 +650,7 @@
     return;
 
   if (!is_inter_block(mbmi))
-    vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size,
-                           &mbmi->skip_coeff);
+    vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
   else
     vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 
@@ -710,49 +709,40 @@
   }
 }
 
-static void init_rdcost_stack(MACROBLOCK *x, const int64_t ref_rdcost,
-                              struct rdcost_block_args *arg) {
-  vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
-  arg->x = x;
-  arg->best_rd = ref_rdcost;
-}
-
 static void txfm_rd_in_plane(MACROBLOCK *x,
                              int *rate, int64_t *distortion,
                              int *skippable, int64_t *sse,
                              int64_t ref_best_rd, int plane,
                              BLOCK_SIZE bsize, TX_SIZE tx_size) {
-  struct rdcost_block_args rd_stack;
   MACROBLOCKD *const xd = &x->e_mbd;
   struct macroblockd_plane *const pd = &xd->plane[plane];
   const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
   const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
   const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
-  const scan_order *so;
+  struct rdcost_block_args args = { 0 };
+  args.x = x;
+  args.best_rd = ref_best_rd;
 
-  init_rdcost_stack(x, ref_best_rd, &rd_stack);
   if (plane == 0)
     xd->mi_8x8[0]->mbmi.tx_size = tx_size;
 
-  vp9_get_entropy_contexts(tx_size, rd_stack.t_above, rd_stack.t_left,
+  vp9_get_entropy_contexts(tx_size, args.t_above, args.t_left,
                            pd->above_context, pd->left_context,
                            num_4x4_w, num_4x4_h);
 
-  so = get_scan(xd, tx_size, pd->plane_type, 0);
-  rd_stack.scan = so->scan;
-  rd_stack.nb = so->neighbors;
+  args.so = get_scan(xd, tx_size, pd->plane_type, 0);
 
   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
-                                         block_rd_txfm, &rd_stack);
-  if (rd_stack.skip) {
+                                         block_rd_txfm, &args);
+  if (args.skip) {
     *rate       = INT_MAX;
     *distortion = INT64_MAX;
     *sse        = INT64_MAX;
     *skippable  = 0;
   } else {
-    *distortion = rd_stack.this_dist;
-    *rate       = rd_stack.this_rate;
-    *sse        = rd_stack.this_sse;
+    *distortion = args.this_dist;
+    *rate       = args.this_rate;
+    *sse        = args.this_sse;
     *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
   }
 }
@@ -787,7 +777,10 @@
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
   vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
-  int64_t rd[TX_SIZES][2];
+  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
+                             {INT64_MAX, INT64_MAX},
+                             {INT64_MAX, INT64_MAX},
+                             {INT64_MAX, INT64_MAX}};
   int n, m;
   int s0, s1;
   const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
@@ -862,7 +855,10 @@
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
   vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
-  int64_t rd[TX_SIZES][2];
+  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
+                             {INT64_MAX, INT64_MAX},
+                             {INT64_MAX, INT64_MAX},
+                             {INT64_MAX, INT64_MAX}};
   int n, m;
   int s0, s1;
   double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
@@ -1604,13 +1600,11 @@
   int mvthresh;
 } BEST_SEG_INFO;
 
-static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
-  int r = 0;
-  r |= (mv->as_mv.row >> 3) < x->mv_row_min;
-  r |= (mv->as_mv.row >> 3) > x->mv_row_max;
-  r |= (mv->as_mv.col >> 3) < x->mv_col_min;
-  r |= (mv->as_mv.col >> 3) > x->mv_col_max;
-  return r;
+static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
+  return (mv->row >> 3) < x->mv_row_min ||
+         (mv->row >> 3) > x->mv_row_max ||
+         (mv->col >> 3) < x->mv_col_min ||
+         (mv->col >> 3) > x->mv_col_max;
 }
 
 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
@@ -1825,7 +1819,7 @@
             bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
                                              sadpb, further_steps, 0, v_fn_ptr,
                                              &bsi->ref_mv->as_mv,
-                                             new_mv);
+                                             &new_mv->as_mv);
           }
 
           // Should we do a full search (best quality only)
@@ -1924,10 +1918,9 @@
         }
 
         // Trap vectors that reach beyond the UMV borders
-        if (mv_check_bounds(x, &mode_mv[this_mode]))
-          continue;
-        if (has_second_rf &&
-            mv_check_bounds(x, &second_mode_mv[this_mode]))
+        if (mv_check_bounds(x, &mode_mv[this_mode].as_mv) ||
+            (has_second_rf &&
+             mv_check_bounds(x, &second_mode_mv[this_mode].as_mv)))
           continue;
 
         if (filter_idx > 0) {
@@ -2380,24 +2373,16 @@
 
   vp9_set_mv_search_range(x, &ref_mv.as_mv);
 
-  // Adjust search parameters based on small partitions' result.
-  if (x->fast_ms) {
-    // adjust search range
-    step_param = 6;
-    if (x->fast_ms > 1)
-      step_param = 8;
+  // Work out the size of the first step in the mv step search.
+  // 0 here is maximum length first step. 1 is MAX >> 1 etc.
+  if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
+    // Take wtd average of the step_params based on the last frame's
+    // max mv magnitude and that based on the best ref mvs of the current
+    // block for the given reference.
+    step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
+                  cpi->mv_step_param) >> 1;
   } else {
-    // Work out the size of the first step in the mv step search.
-    // 0 here is maximum length first step. 1 is MAX >> 1 etc.
-    if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
-      // Take wtd average of the step_params based on the last frame's
-      // max mv magnitude and that based on the best ref mvs of the current
-      // block for the given reference.
-      step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
-                    cpi->mv_step_param) >> 1;
-    } else {
-      step_param = cpi->mv_step_param;
-    }
+    step_param = cpi->mv_step_param;
   }
 
   if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
@@ -2455,7 +2440,7 @@
     bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
                                      sadpb, further_steps, 1,
                                      &cpi->fn_ptr[bsize],
-                                     &ref_mv.as_mv, tmp_mv);
+                                     &ref_mv.as_mv, &tmp_mv->as_mv);
   }
 
   x->mv_col_min = tmp_col_min;
@@ -2750,7 +2735,7 @@
     if (this_mode != NEWMV)
       clamp_mv2(&cur_mv[i].as_mv, xd);
 
-    if (mv_check_bounds(x, &cur_mv[i]))
+    if (mv_check_bounds(x, &cur_mv[i].as_mv))
       return INT64_MAX;
     mbmi->mv[i].as_int = cur_mv[i].as_int;
   }
@@ -3260,12 +3245,6 @@
          vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
       continue;
 
-    // Skip some checking based on small partitions' result.
-    if (x->fast_ms > 1 && !ref_frame)
-      continue;
-    if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
-      continue;
-
     mbmi->ref_frame[0] = ref_frame;
     mbmi->ref_frame[1] = second_ref_frame;
 
@@ -4126,11 +4105,6 @@
         if (tmp_rd == INT64_MAX)
           continue;
       } else {
-        if (cm->interp_filter == SWITCHABLE) {
-          int rs = get_switchable_rate(x);
-          tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
-        }
-        tmp_rd = tmp_best_rdu;
         total_sse = tmp_best_sse;
         rate = tmp_best_rate;
         rate_y = tmp_best_ratey;
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index ca11dda..0040477 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -29,7 +29,6 @@
 #include "vpx_scale/vpx_scale.h"
 
 #define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
-#define ALT_REF_SUBPEL_ENABLED 1  // dis/enable subpel in MC AltRef filtering
 
 static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
                                             uint8_t *y_mb_ptr,
@@ -160,11 +159,9 @@
 
   /*cpi->sf.search_method == HEX*/
   // Ignore mv costing by sending NULL pointer instead of cost arrays
-  bestsme = vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
-                           &cpi->fn_ptr[BLOCK_16X16],
-                           0, &best_ref_mv1, ref_mv);
+  vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
+                 &cpi->fn_ptr[BLOCK_16X16], 0, &best_ref_mv1, ref_mv);
 
-#if ALT_REF_SUBPEL_ENABLED
   // Try sub-pixel MC?
   // if (bestsme > error_thresh && bestsme < INT_MAX)
   {
@@ -180,7 +177,6 @@
                                            NULL, NULL,
                                            &distortion, &sse);
   }
-#endif
 
   // Restore input state
   x->plane[0].src = src;
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index ed1301a..510ef78 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -160,7 +160,6 @@
   VP9_COMP *cpi;
   MACROBLOCKD *xd;
   TOKENEXTRA **tp;
-  TX_SIZE tx_size;
   uint8_t *token_cache;
 };
 
@@ -188,6 +187,18 @@
   ++counts[token];
 }
 
+static INLINE void add_token_no_extra(TOKENEXTRA **t,
+                                      const vp9_prob *context_tree,
+                                      uint8_t token,
+                                      uint8_t skip_eob_node,
+                                      unsigned int *counts) {
+  (*t)->token = token;
+  (*t)->context_tree = context_tree;
+  (*t)->skip_eob_node = skip_eob_node;
+  (*t)++;
+  ++counts[token];
+}
+
 static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
                        TX_SIZE tx_size, void *arg) {
   struct tokenize_b_args* const args = arg;
@@ -199,7 +210,7 @@
   struct macroblockd_plane *pd = &xd->plane[plane];
   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
   int pt; /* near block/prev token context index */
-  int c = 0;
+  int c;
   TOKENEXTRA *t = *tp;        /* store tokens starting here */
   int eob = p->eobs[block];
   const PLANE_TYPE type = pd->plane_type;
@@ -207,9 +218,14 @@
   const int segment_id = mbmi->segment_id;
   const int16_t *scan, *nb;
   const scan_order *so;
-  vp9_coeff_count *const counts = cpi->coef_counts[tx_size];
-  vp9_coeff_probs_model *const coef_probs = cpi->common.fc.coef_probs[tx_size];
   const int ref = is_inter_block(mbmi);
+  unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
+      cpi->coef_counts[tx_size][type][ref];
+  vp9_prob (*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
+      cpi->common.fc.coef_probs[tx_size][type][ref];
+  unsigned int (*const eob_branch)[COEFF_CONTEXTS] =
+      cpi->common.counts.eob_branch[tx_size][type][ref];
+
   const uint8_t *const band = get_band_translate(tx_size);
   const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
 
@@ -228,11 +244,9 @@
     v = qcoeff_ptr[scan[c]];
 
     while (!v) {
-      add_token(&t, coef_probs[type][ref][band[c]][pt], 0, ZERO_TOKEN, skip_eob,
-                counts[type][ref][band[c]][pt]);
-
-      cpi->common.counts.eob_branch[tx_size][type][ref][band[c]][pt] +=
-          !skip_eob;
+      add_token_no_extra(&t, coef_probs[band[c]][pt], ZERO_TOKEN, skip_eob,
+                         counts[band[c]][pt]);
+      eob_branch[band[c]][pt] += !skip_eob;
 
       skip_eob = 1;
       token_cache[scan[c]] = 0;
@@ -240,12 +254,12 @@
       pt = get_coef_context(nb, token_cache, c);
       v = qcoeff_ptr[scan[c]];
     }
-    add_token(&t, coef_probs[type][ref][band[c]][pt],
+
+    add_token(&t, coef_probs[band[c]][pt],
               vp9_dct_value_tokens_ptr[v].extra,
               vp9_dct_value_tokens_ptr[v].token, skip_eob,
-              counts[type][ref][band[c]][pt]);
-
-    cpi->common.counts.eob_branch[tx_size][type][ref][band[c]][pt] += !skip_eob;
+              counts[band[c]][pt]);
+    eob_branch[band[c]][pt] += !skip_eob;
 
     token_cache[scan[c]] =
         vp9_pt_energy_class[vp9_dct_value_tokens_ptr[v].token];
@@ -253,9 +267,9 @@
     pt = get_coef_context(nb, token_cache, c);
   }
   if (c < seg_eob) {
-    add_token(&t, coef_probs[type][ref][band[c]][pt], 0, EOB_TOKEN, 0,
-              counts[type][ref][band[c]][pt]);
-    ++cpi->common.counts.eob_branch[tx_size][type][ref][band[c]][pt];
+    add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN, 0,
+                       counts[band[c]][pt]);
+    ++eob_branch[band[c]][pt];
   }
 
   *tp = t;
@@ -299,8 +313,8 @@
   const int ctx = vp9_get_skip_context(xd);
   const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id,
                                               SEG_LVL_SKIP);
-  struct tokenize_b_args arg = {cpi, xd, t, mbmi->tx_size, cpi->mb.token_cache};
-  if (mbmi->skip_coeff) {
+  struct tokenize_b_args arg = {cpi, xd, t, cpi->mb.token_cache};
+  if (mbmi->skip) {
     if (!dry_run)
       cm->counts.skip[ctx][1] += skip_inc;
     reset_skip_context(xd, bsize);
diff --git a/vp9/encoder/vp9_vaq.c b/vp9/encoder/vp9_vaq.c
index 1f9cb87..acd7c41 100644
--- a/vp9/encoder/vp9_vaq.c
+++ b/vp9/encoder/vp9_vaq.c
@@ -19,8 +19,8 @@
 #include "vp9/encoder/vp9_segmentation.h"
 #include "vp9/common/vp9_systemdependent.h"
 
-#define ENERGY_MIN (-3)
-#define ENERGY_MAX (3)
+#define ENERGY_MIN (-1)
+#define ENERGY_MAX (1)
 #define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN +  1)
 #define ENERGY_IN_BOUNDS(energy)\
   assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX)
@@ -65,7 +65,7 @@
 
   vp9_clear_system_state();  // __asm emms;
 
-  base_ratio = 1.8;
+  base_ratio = 1.5;
 
   for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) {
     Q_RATIO(i) = pow(base_ratio, i/3.0);
@@ -80,30 +80,34 @@
                                         cm->y_dc_delta_q);
   int i;
 
-  vp9_enable_segmentation((VP9_PTR)cpi);
-  vp9_clearall_segfeatures(seg);
+  if (cm->frame_type == KEY_FRAME ||
+      cpi->refresh_alt_ref_frame ||
+      (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
+    vp9_enable_segmentation((VP9_PTR)cpi);
+    vp9_clearall_segfeatures(seg);
 
-  seg->abs_delta = SEGMENT_DELTADATA;
+    seg->abs_delta = SEGMENT_DELTADATA;
 
-  vp9_clear_system_state();  // __asm emms;
+    vp9_clear_system_state();  // __asm emms;
 
-  for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) {
-    int qindex_delta, segment_rdmult;
+    for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) {
+      int qindex_delta, segment_rdmult;
 
-    if (Q_RATIO(i) == 1) {
-      // No need to enable SEG_LVL_ALT_Q for this segment
-      RDMULT_RATIO(i) = 1;
-      continue;
+      if (Q_RATIO(i) == 1) {
+        // No need to enable SEG_LVL_ALT_Q for this segment
+        RDMULT_RATIO(i) = 1;
+        continue;
+      }
+
+      qindex_delta = vp9_compute_qdelta(cpi, base_q, base_q * Q_RATIO(i));
+      vp9_set_segdata(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q, qindex_delta);
+      vp9_enable_segfeature(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q);
+
+      segment_rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + qindex_delta +
+                                           cm->y_dc_delta_q);
+
+      RDMULT_RATIO(i) = (double) segment_rdmult / base_rdmult;
     }
-
-    qindex_delta = vp9_compute_qdelta(cpi, base_q, base_q * Q_RATIO(i));
-    vp9_set_segdata(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q, qindex_delta);
-    vp9_enable_segfeature(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q);
-
-    segment_rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + qindex_delta +
-                                         cm->y_dc_delta_q);
-
-    RDMULT_RATIO(i) = (double) segment_rdmult / base_rdmult;
   }
 }
 
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 85e83b8..48d6a7c 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -79,6 +79,7 @@
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_bilinear_sse2.asm
 VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
 VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_bilinear_ssse3.asm
+VP9_COMMON_SRCS-$(HAVE_AVX2) += common/x86/vp9_subpixel_8t_intrin_avx2.c
 ifeq ($(CONFIG_VP9_POSTPROC),yes)
 VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index ece6d52..8072f78 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -178,7 +178,7 @@
 
   RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
   if (cfg->ts_number_layers > 1) {
-    int i;
+    unsigned int i;
     for (i = 1; i < cfg->ts_number_layers; ++i) {
       if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i-1]) {
         ERROR("ts_target_bitrate entries are not increasing");
@@ -222,7 +222,7 @@
     int              n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz);
     FIRSTPASS_STATS *stats;
 
-    if (!cfg->rc_twopass_stats_in.buf)
+    if (cfg->rc_twopass_stats_in.buf == NULL)
       ERROR("rc_twopass_stats_in.buf not set.");
 
     if (cfg->rc_twopass_stats_in.sz % packet_sz)
@@ -419,7 +419,7 @@
 
   res = validate_config(ctx, cfg, &ctx->vp8_cfg);
 
-  if (!res) {
+  if (res == VPX_CODEC_OK) {
     ctx->cfg = *cfg;
     set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg);
     vp9_change_config(ctx->cpi, &ctx->oxcf);
@@ -439,8 +439,7 @@
 
 #define MAP(id, var) case id: *(RECAST(id, arg)) = var; break
 
-  if (!arg)
-    return VPX_CODEC_INVALID_PARAM;
+  if (arg == NULL) return VPX_CODEC_INVALID_PARAM;
 
   switch (ctrl_id) {
       MAP(VP8E_GET_LAST_QUANTIZER, vp9_get_quantizer(ctx->cpi));
@@ -482,7 +481,7 @@
 
   res = validate_config(ctx, &ctx->cfg, &xcfg);
 
-  if (!res) {
+  if (res == VPX_CODEC_OK) {
     ctx->vp8_cfg = xcfg;
     set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg);
     vp9_change_config(ctx->cpi, &ctx->oxcf);
@@ -501,12 +500,10 @@
 
   VP9_PTR optr;
 
-  if (!ctx->priv) {
+  if (ctx->priv == NULL) {
     priv = calloc(1, sizeof(struct vpx_codec_alg_priv));
 
-    if (!priv) {
-      return VPX_CODEC_MEM_ERROR;
-    }
+    if (priv == NULL) return VPX_CODEC_MEM_ERROR;
 
     ctx->priv = &priv->base;
     ctx->priv->sz = sizeof(*ctx->priv);
@@ -543,21 +540,19 @@
 
     priv->cx_data = malloc(priv->cx_data_sz);
 
-    if (!priv->cx_data) {
-      return VPX_CODEC_MEM_ERROR;
-    }
+    if (priv->cx_data == NULL) return VPX_CODEC_MEM_ERROR;
 
     vp9_initialize_enc();
 
     res = validate_config(priv, &priv->cfg, &priv->vp8_cfg);
 
-    if (!res) {
+    if (res == VPX_CODEC_OK) {
       set_vp9e_config(&ctx->priv->alg_priv->oxcf,
                       ctx->priv->alg_priv->cfg,
                       ctx->priv->alg_priv->vp8_cfg);
       optr = vp9_create_compressor(&ctx->priv->alg_priv->oxcf);
 
-      if (!optr)
+      if (optr == NULL)
         res = VPX_CODEC_MEM_ERROR;
       else
         ctx->priv->alg_priv->cpi = optr;
@@ -725,7 +720,7 @@
   }
 
   /* Initialize the encoder instance on the first frame. */
-  if (!res && ctx->cpi) {
+  if (res == VPX_CODEC_OK && ctx->cpi != NULL) {
     unsigned int lib_flags;
     YV12_BUFFER_CONFIG sd;
     int64_t dst_time_stamp, dst_end_time_stamp;
@@ -785,8 +780,8 @@
         VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
 
         /* Pack invisible frames with the next visible frame */
-        if (!cpi->common.show_frame) {
-          if (!ctx->pending_cx_data)
+        if (cpi->common.show_frame == 0) {
+          if (ctx->pending_cx_data == 0)
             ctx->pending_cx_data = cx_data;
           ctx->pending_cx_data_sz += size;
           ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
@@ -811,7 +806,7 @@
         if (lib_flags & FRAMEFLAGS_KEY)
           pkt.data.frame.flags |= VPX_FRAME_IS_KEY;
 
-        if (!cpi->common.show_frame) {
+        if (cpi->common.show_frame == 0) {
           pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE;
 
           // This timestamp should be as close as possible to the
@@ -1021,12 +1016,7 @@
     res = vp9_set_internal_size(ctx->cpi,
                                 (VPX_SCALING)scalemode->h_scaling_mode,
                                 (VPX_SCALING)scalemode->v_scaling_mode);
-
-    if (!res) {
-      return VPX_CODEC_OK;
-    } else {
-      return VPX_CODEC_INVALID_PARAM;
-    }
+    return (res == 0) ? VPX_CODEC_OK : VPX_CODEC_INVALID_PARAM;
   } else {
     return VPX_CODEC_INVALID_PARAM;
   }
@@ -1055,7 +1045,7 @@
   cpi->svc.temporal_layer_id = data->temporal_layer_id;
   // Checks on valid layer_id input.
   if (cpi->svc.temporal_layer_id < 0 ||
-      cpi->svc.temporal_layer_id >= ctx->cfg.ts_number_layers) {
+      cpi->svc.temporal_layer_id >= (int)ctx->cfg.ts_number_layers) {
     return VPX_CODEC_INVALID_PARAM;
   }
   if (cpi->svc.spatial_layer_id < 0 ||
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 881a7d1..1941fc0 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -60,6 +60,11 @@
   int                     img_setup;
   int                     img_avail;
   int                     invert_tile_order;
+
+  // External frame buffer info to save for VP9 common.
+  void *ext_priv;  // Private data associated with the external frame buffers.
+  vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb;
+  vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb;
 };
 
 static unsigned long priv_sz(const vpx_codec_dec_cfg_t *si,
@@ -300,13 +305,22 @@
         VP9D_COMP *const pbi = (VP9D_COMP*)optr;
         VP9_COMMON *const cm = &pbi->common;
 
-        cm->get_fb_cb = vp9_get_frame_buffer;
-        cm->release_fb_cb = vp9_release_frame_buffer;
+        // Set index to not initialized.
+        cm->new_fb_idx = -1;
 
-        if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers))
-          vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
-                             "Failed to initialize internal frame buffers");
-        cm->cb_priv = &cm->int_frame_buffers;
+        if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) {
+          cm->get_fb_cb = ctx->get_ext_fb_cb;
+          cm->release_fb_cb = ctx->release_ext_fb_cb;
+          cm->cb_priv = ctx->ext_priv;
+        } else {
+          cm->get_fb_cb = vp9_get_frame_buffer;
+          cm->release_fb_cb = vp9_release_frame_buffer;
+
+          if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers))
+            vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                               "Failed to initialize internal frame buffers");
+          cm->cb_priv = &cm->int_frame_buffers;
+        }
 
         ctx->pbi = optr;
       }
@@ -347,7 +361,11 @@
 
     if (!res && 0 == vp9_get_raw_frame(ctx->pbi, &sd, &time_stamp,
                                        &time_end_stamp, &flags)) {
+      VP9D_COMP *const pbi = (VP9D_COMP*)ctx->pbi;
+      VP9_COMMON *const cm = &pbi->common;
       yuvconfig2image(&ctx->img, &sd, user_priv);
+
+      ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
       ctx->img_avail = 1;
     }
   }
@@ -467,6 +485,24 @@
   return img;
 }
 
+static vpx_codec_err_t vp9_set_fb_fn(
+    vpx_codec_alg_priv_t *ctx,
+    vpx_get_frame_buffer_cb_fn_t cb_get,
+    vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) {
+  if (cb_get == NULL || cb_release == NULL) {
+    return VPX_CODEC_INVALID_PARAM;
+  } else if (ctx->pbi == NULL) {
+    // If the decoder has already been initialized, do not accept changes to
+    // the frame buffer functions.
+    ctx->get_ext_fb_cb = cb_get;
+    ctx->release_ext_fb_cb = cb_release;
+    ctx->ext_priv = cb_priv;
+    return VPX_CODEC_OK;
+  }
+
+  return VPX_CODEC_ERROR;
+}
+
 static vpx_codec_err_t vp9_xma_get_mmap(const vpx_codec_ctx_t *ctx,
                                         vpx_codec_mmap_t *mmap,
                                         vpx_codec_iter_t *iter) {
@@ -700,7 +736,8 @@
 CODEC_INTERFACE(vpx_codec_vp9_dx) = {
   "WebM Project VP9 Decoder" VERSION_STRING,
   VPX_CODEC_INTERNAL_ABI_VERSION,
-  VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC,
+  VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC |
+      VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER,
   /* vpx_codec_caps_t          caps; */
   vp9_init,         /* vpx_codec_init_fn_t       init; */
   vp9_destroy,      /* vpx_codec_destroy_fn_t    destroy; */
@@ -712,6 +749,7 @@
     vp9_get_si,       /* vpx_codec_get_si_fn_t     get_si; */
     vp9_decode,       /* vpx_codec_decode_fn_t     decode; */
     vp9_get_frame,    /* vpx_codec_frame_get_fn_t  frame_get; */
+    vp9_set_fb_fn,    /* vpx_codec_set_fb_fn_t     set_fb_fn; */
   },
   { // NOLINT
     /* encoder functions */
diff --git a/vp9_spatial_scalable_encoder.c b/vp9_spatial_scalable_encoder.c
index 50f45c2..9f526b0a 100644
--- a/vp9_spatial_scalable_encoder.c
+++ b/vp9_spatial_scalable_encoder.c
@@ -18,9 +18,11 @@
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
+
 #include "./args.h"
-#include "./ivfenc.h"
 #include "./tools_common.h"
+#include "./video_writer.h"
+
 #include "vpx/svc_context.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
@@ -73,10 +75,10 @@
 static const uint32_t default_kf_dist = 100;
 
 typedef struct {
-  char *output_filename;
+  const char *input_filename;
+  const char *output_filename;
   uint32_t frames_to_code;
   uint32_t frames_to_skip;
-  struct VpxInputContext input_ctx;
 } AppInput;
 
 static const char *exec_name;
@@ -92,8 +94,10 @@
 static void parse_command_line(int argc, const char **argv_,
                                AppInput *app_input, SvcContext *svc_ctx,
                                vpx_codec_enc_cfg_t *enc_cfg) {
-  struct arg arg;
-  char **argv, **argi, **argj;
+  struct arg arg = {0};
+  char **argv = NULL;
+  char **argi = NULL;
+  char **argj = NULL;
   vpx_codec_err_t res;
 
   // initialize SvcContext with parameters that will be passed to vpx_svc_init
@@ -160,7 +164,7 @@
   if (argv[0] == NULL || argv[1] == 0) {
     usage_exit();
   }
-  app_input->input_ctx.filename = argv[0];
+  app_input->input_filename = argv[0];
   app_input->output_filename = argv[1];
   free(argv);
 
@@ -183,7 +187,8 @@
 
 int main(int argc, const char **argv) {
   AppInput app_input = {0};
-  FILE *outfile;
+  VpxVideoWriter *writer = NULL;
+  VpxVideoInfo info = {0};
   vpx_codec_ctx_t codec;
   vpx_codec_enc_cfg_t enc_cfg;
   SvcContext svc_ctx;
@@ -193,6 +198,7 @@
   vpx_codec_err_t res;
   int pts = 0;            /* PTS starts at 0 */
   int frame_duration = 1; /* 1 timebase tick per frame */
+  FILE *infile = NULL;
 
   memset(&svc_ctx, 0, sizeof(svc_ctx));
   svc_ctx.log_print = 1;
@@ -203,27 +209,36 @@
   if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32))
     die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
 
-  if (!(app_input.input_ctx.file = fopen(app_input.input_ctx.filename, "rb")))
-    die("Failed to open %s for reading\n", app_input.input_ctx.filename);
-
-  if (!(outfile = fopen(app_input.output_filename, "wb")))
-    die("Failed to open %s for writing\n", app_input.output_filename);
+  if (!(infile = fopen(app_input.input_filename, "rb")))
+    die("Failed to open %s for reading\n", app_input.input_filename);
 
   // Initialize codec
   if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) !=
       VPX_CODEC_OK)
     die("Failed to initialize encoder\n");
 
-  ivf_write_file_header(outfile, &enc_cfg, VP9_FOURCC, 0);
+  info.codec_fourcc = VP9_FOURCC;
+  info.time_base.numerator = enc_cfg.g_timebase.num;
+  info.time_base.denominator = enc_cfg.g_timebase.den;
+  if (vpx_svc_get_layer_resolution(&svc_ctx, svc_ctx.spatial_layers - 1,
+                                   (unsigned int *)&info.frame_width,
+                                   (unsigned int *)&info.frame_height) !=
+      VPX_CODEC_OK) {
+    die("Failed to get output resolution");
+  }
+  writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF,
+                                 &info);
+  if (!writer)
+    die("Failed to open %s for writing\n", app_input.output_filename);
 
   // skip initial frames
-  for (i = 0; i < app_input.frames_to_skip; ++i) {
-    read_yuv_frame(&app_input.input_ctx, &raw);
-  }
+  for (i = 0; i < app_input.frames_to_skip; ++i)
+    vpx_img_read(&raw, infile);
 
   // Encode frames
   while (frame_cnt < app_input.frames_to_code) {
-    if (read_yuv_frame(&app_input.input_ctx, &raw)) break;
+    if (!vpx_img_read(&raw, infile))
+      break;
 
     res = vpx_svc_encode(&svc_ctx, &codec, &raw, pts, frame_duration,
                          VPX_DL_REALTIME);
@@ -232,9 +247,10 @@
       die_codec(&codec, "Failed to encode frame");
     }
     if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
-      ivf_write_frame_header(outfile, pts, vpx_svc_get_frame_size(&svc_ctx));
-      (void)fwrite(vpx_svc_get_buffer(&svc_ctx), 1,
-                   vpx_svc_get_frame_size(&svc_ctx), outfile);
+      vpx_video_writer_write_frame(writer,
+                                   vpx_svc_get_buffer(&svc_ctx),
+                                   vpx_svc_get_frame_size(&svc_ctx),
+                                   pts);
     }
     ++frame_cnt;
     pts += frame_duration;
@@ -242,22 +258,11 @@
 
   printf("Processed %d frames\n", frame_cnt);
 
-  fclose(app_input.input_ctx.file);
+  fclose(infile);
   if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
 
-  // rewrite the output file headers with the actual frame count, and
-  // resolution of the highest layer
-  if (!fseek(outfile, 0, SEEK_SET)) {
-    // get resolution of highest layer
-    if (VPX_CODEC_OK != vpx_svc_get_layer_resolution(&svc_ctx,
-                                                     svc_ctx.spatial_layers - 1,
-                                                     &enc_cfg.g_w,
-                                                     &enc_cfg.g_h)) {
-      die("Failed to get output resolution");
-    }
-    ivf_write_file_header(outfile, &enc_cfg, VP9_FOURCC, frame_cnt);
-  }
-  fclose(outfile);
+  vpx_video_writer_close(writer);
+
   vpx_img_free(&raw);
 
   // display average size, psnr
diff --git a/vpx/exports_dec b/vpx/exports_dec
index ed121f7..3ce1499 100644
--- a/vpx/exports_dec
+++ b/vpx/exports_dec
@@ -6,4 +6,5 @@
 text vpx_codec_peek_stream_info
 text vpx_codec_register_put_frame_cb
 text vpx_codec_register_put_slice_cb
+text vpx_codec_set_frame_buffer_functions
 text vpx_codec_set_mem_map
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index 0f42a1d..51ca65e 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -59,7 +59,7 @@
  * types, removing or reassigning enums, adding/removing/rearranging
  * fields to structures
  */
-#define VPX_CODEC_INTERNAL_ABI_VERSION (4) /**<\hideinitializer*/
+#define VPX_CODEC_INTERNAL_ABI_VERSION (5) /**<\hideinitializer*/
 
 typedef struct vpx_codec_alg_priv  vpx_codec_alg_priv_t;
 typedef struct vpx_codec_priv_enc_mr_cfg vpx_codec_priv_enc_mr_cfg_t;
@@ -218,6 +218,36 @@
 typedef vpx_image_t *(*vpx_codec_get_frame_fn_t)(vpx_codec_alg_priv_t *ctx,
                                                  vpx_codec_iter_t     *iter);
 
+/*!\brief Pass in external frame buffers for the decoder to use.
+ *
+ * Registers functions to be called when libvpx needs a frame buffer
+ * to decode the current frame and a function to be called when libvpx does
+ * not internally reference the frame buffer. This set function must
+ * be called before the first call to decode or libvpx will assume the
+ * default behavior of allocating frame buffers internally.
+ *
+ * \param[in] ctx          Pointer to this instance's context
+ * \param[in] cb_get       Pointer to the get callback function
+ * \param[in] cb_release   Pointer to the release callback function
+ * \param[in] cb_priv      Callback's private data
+ *
+ * \retval #VPX_CODEC_OK
+ *     External frame buffers will be used by libvpx.
+ * \retval #VPX_CODEC_INVALID_PARAM
+ *     One or more of the callbacks were NULL.
+ * \retval #VPX_CODEC_ERROR
+ *     Decoder context not initialized, or algorithm not capable of
+ *     using external frame buffers.
+ *
+ * \note
+ * When decoding VP9, the application may be required to pass in at least
+ * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame
+ * buffers.
+ */
+typedef vpx_codec_err_t (*vpx_codec_set_fb_fn_t)(
+    vpx_codec_alg_priv_t *ctx,
+    vpx_get_frame_buffer_cb_fn_t cb_get,
+    vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv);
 
 /*\brief eXternal Memory Allocation memory map get iterator
  *
@@ -308,6 +338,7 @@
     vpx_codec_get_si_fn_t     get_si;      /**< \copydoc ::vpx_codec_get_si_fn_t */
     vpx_codec_decode_fn_t     decode;      /**< \copydoc ::vpx_codec_decode_fn_t */
     vpx_codec_get_frame_fn_t  get_frame;   /**< \copydoc ::vpx_codec_get_frame_fn_t */
+    vpx_codec_set_fb_fn_t     set_fb_fn;   /**< \copydoc ::vpx_codec_set_fb_fn_t */
   } dec;
   struct vpx_codec_enc_iface {
     vpx_codec_enc_cfg_map_t           *cfg_maps;      /**< \copydoc ::vpx_codec_enc_cfg_map_t */
diff --git a/vpx/src/vpx_decoder.c b/vpx/src/vpx_decoder.c
index a99e48f..63fdaf3 100644
--- a/vpx/src/vpx_decoder.c
+++ b/vpx/src/vpx_decoder.c
@@ -226,3 +226,21 @@
 
   return SAVE_STATUS(ctx, res);
 }
+
+vpx_codec_err_t vpx_codec_set_frame_buffer_functions(
+    vpx_codec_ctx_t *ctx, vpx_get_frame_buffer_cb_fn_t cb_get,
+    vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) {
+  vpx_codec_err_t res;
+
+  if (!ctx || !cb_get || !cb_release) {
+    res = VPX_CODEC_INVALID_PARAM;
+  } else if (!ctx->iface || !ctx->priv ||
+             !(ctx->iface->caps & VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER)) {
+    res = VPX_CODEC_ERROR;
+  } else {
+    res = ctx->iface->dec.set_fb_fn(ctx->priv->alg_priv, cb_get, cb_release,
+                                    cb_priv);
+  }
+
+  return SAVE_STATUS(ctx, res);
+}
diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
index 7356bae..ba18328 100644
--- a/vpx/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -30,6 +30,7 @@
 #endif
 
 #include "./vpx_codec.h"
+#include "./vpx_frame_buffer.h"
 
   /*!\brief Current ABI version number
    *
@@ -39,7 +40,7 @@
    * types, removing or reassigning enums, adding/removing/rearranging
    * fields to structures
    */
-#define VPX_DECODER_ABI_VERSION (2 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
+#define VPX_DECODER_ABI_VERSION (3 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
 
   /*! \brief Decoder capabilities bitfield
    *
@@ -66,6 +67,8 @@
    */
 #define VPX_CODEC_CAP_FRAME_THREADING   0x200000 /**< Can support frame-based
                                                       multi-threading */
+#define VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000 /**< Can support external
+                                                          frame buffers */
 
 #define VPX_CODEC_USE_POSTPROC   0x10000 /**< Postprocess decoded frame */
 #define VPX_CODEC_USE_ERROR_CONCEALMENT 0x20000 /**< Conceal errors in decoded
@@ -326,6 +329,51 @@
 
   /*!@} - end defgroup cap_put_slice*/
 
+  /*!\defgroup cap_external_frame_buffer External Frame Buffer Functions
+   *
+   * The following section is required to be implemented for all decoders
+   * that advertise the VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER capability.
+   * Calling this function for codecs that don't advertise this capability
+   * will result in an error code being returned, usually VPX_CODEC_ERROR.
+   *
+   * \note
+   * Currently this only works with VP9.
+   * @{
+   */
+
+  /*!\brief Pass in external frame buffers for the decoder to use.
+   *
+   * Registers functions to be called when libvpx needs a frame buffer
+   * to decode the current frame and a function to be called when libvpx does
+   * not internally reference the frame buffer. This set function must
+   * be called before the first call to decode or libvpx will assume the
+   * default behavior of allocating frame buffers internally.
+   *
+   * \param[in] ctx          Pointer to this instance's context
+   * \param[in] cb_get       Pointer to the get callback function
+   * \param[in] cb_release   Pointer to the release callback function
+   * \param[in] cb_priv      Callback's private data
+   *
+   * \retval #VPX_CODEC_OK
+   *     External frame buffers will be used by libvpx.
+   * \retval #VPX_CODEC_INVALID_PARAM
+   *     One or more of the callbacks were NULL.
+   * \retval #VPX_CODEC_ERROR
+   *     Decoder context not initialized, or algorithm not capable of
+   *     using external frame buffers.
+   *
+   * \note
+   * When decoding VP9, the application may be required to pass in at least
+   * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame
+   * buffers.
+   */
+  vpx_codec_err_t vpx_codec_set_frame_buffer_functions(
+      vpx_codec_ctx_t *ctx,
+      vpx_get_frame_buffer_cb_fn_t cb_get,
+      vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv);
+
+  /*!@} - end defgroup cap_external_frame_buffer */
+
   /*!@} - end defgroup decoder*/
 #ifdef __cplusplus
 }
diff --git a/vpx/vpx_frame_buffer.h b/vpx/vpx_frame_buffer.h
index b5489b4..6803759 100644
--- a/vpx/vpx_frame_buffer.h
+++ b/vpx/vpx_frame_buffer.h
@@ -45,8 +45,9 @@
  * decoder needs a frame buffer to decode a compressed image into. This
  * function may be called more than once for every call to vpx_codec_decode.
  * The application may set fb->priv to some data which will be passed
- * back in the ximage and the release function call. On success the callback
- * must return 0. Any failure the callback must return a value less than 0.
+ * back in the ximage and the release function call. |fb| is guaranteed to
+ * not be NULL. On success the callback must return 0. Any failure the
+ * callback must return a value less than 0.
  *
  * \param[in] priv         Callback's private data
  * \param[in] new_size     Size in bytes needed by the buffer
@@ -58,8 +59,9 @@
 /*!\brief release frame buffer callback prototype
  *
  * This callback is invoked by the decoder when the frame buffer is not
- * referenced by any other buffers. On success the callback must return 0.
- * Any failure the callback must return a value less than 0.
+ * referenced by any other buffers. |fb| is guaranteed to not be NULL. On
+ * success the callback must return 0. Any failure the callback must return
+ * a value less than 0.
  *
  * \param[in] priv         Callback's private data
  * \param[in] fb           Pointer to vpx_codec_frame_buffer_t
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index d27325c..8d0f4ec 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -28,7 +28,7 @@
    * types, removing or reassigning enums, adding/removing/rearranging
    * fields to structures
    */
-#define VPX_IMAGE_ABI_VERSION (1) /**<\hideinitializer*/
+#define VPX_IMAGE_ABI_VERSION (2) /**<\hideinitializer*/
 
 
 #define VPX_IMG_FMT_PLANAR     0x100  /**< Image is a planar format */
@@ -139,6 +139,8 @@
     unsigned char *img_data;       /**< private */
     int      img_data_owner; /**< private */
     int      self_allocd;    /**< private */
+
+    void    *fb_priv; /**< Frame buffer data associated with the image. */
   } vpx_image_t; /**< alias for struct vpx_image */
 
   /**\brief Representation of a rectangle on a surface */
diff --git a/vpx_ports/vpx_ports.mk b/vpx_ports/vpx_ports.mk
index e6cb52f..869a204 100644
--- a/vpx_ports/vpx_ports.mk
+++ b/vpx_ports/vpx_ports.mk
@@ -19,7 +19,6 @@
 PORTS_SRCS-$(BUILD_LIBVPX) += emms.asm
 PORTS_SRCS-$(BUILD_LIBVPX) += x86.h
 PORTS_SRCS-$(BUILD_LIBVPX) += x86_abi_support.asm
-PORTS_SRCS-$(BUILD_LIBVPX) += x86_cpuid.c
 endif
 
 PORTS_SRCS-$(ARCH_ARM) += arm_cpudetect.c
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index 603e2b6..bc99f89 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -168,8 +168,6 @@
   return flags & mask;
 }
 
-vpx_cpu_t vpx_x86_vendor(void);
-
 #if ARCH_X86_64 && defined(_MSC_VER)
 unsigned __int64 __rdtsc(void);
 #pragma intrinsic(__rdtsc)
diff --git a/vpx_ports/x86_cpuid.c b/vpx_ports/x86_cpuid.c
deleted file mode 100644
index 02d382c..0000000
--- a/vpx_ports/x86_cpuid.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <string.h>
-#include "x86.h"
-
-struct cpuid_vendors {
-  char vendor_string[12];
-  vpx_cpu_t vendor_id;
-};
-
-static struct cpuid_vendors cpuid_vendor_list[VPX_CPU_LAST] = {
-  { "AuthenticAMD", VPX_CPU_AMD           },
-  { "AMDisbetter!", VPX_CPU_AMD_OLD       },
-  { "CentaurHauls", VPX_CPU_CENTAUR       },
-  { "CyrixInstead", VPX_CPU_CYRIX         },
-  { "GenuineIntel", VPX_CPU_INTEL         },
-  { "NexGenDriven", VPX_CPU_NEXGEN        },
-  { "Geode by NSC", VPX_CPU_NSC           },
-  { "RiseRiseRise", VPX_CPU_RISE          },
-  { "SiS SiS SiS ", VPX_CPU_SIS           },
-  { "GenuineTMx86", VPX_CPU_TRANSMETA     },
-  { "TransmetaCPU", VPX_CPU_TRANSMETA_OLD },
-  { "UMC UMC UMC ", VPX_CPU_UMC           },
-  { "VIA VIA VIA ", VPX_CPU_VIA           },
-};
-
-vpx_cpu_t vpx_x86_vendor(void) {
-  unsigned int reg_eax;
-  unsigned int vs[3];
-  int i;
-
-  /* Get the Vendor String from the CPU */
-  cpuid(0, 0, reg_eax, vs[0], vs[2], vs[1]);
-
-  for (i = 0; i < VPX_CPU_LAST; i++) {
-    if (strncmp((const char *)vs, cpuid_vendor_list[i].vendor_string, 12) == 0)
-      return (cpuid_vendor_list[i].vendor_id);
-  }
-
-  return VPX_CPU_UNKNOWN;
-}
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index ab0a30a..5e95d31 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -60,7 +60,7 @@
     const int frame_size = yplane_size + 2 * uvplane_size;
 
     if (!ybf->buffer_alloc) {
-      ybf->buffer_alloc = vpx_memalign(32, frame_size);
+      ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size);
       ybf->buffer_alloc_sz = frame_size;
     }
 
@@ -180,12 +180,12 @@
       // removed if border is totally removed.
       vpx_memset(fb->data, 0, fb->size);
 
-      ybf->buffer_alloc = yv12_align_addr(fb->data, 32);
+      ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32);
     } else if (frame_size > ybf->buffer_alloc_sz) {
       // Allocation to hold larger frame, or first allocation.
       if (ybf->buffer_alloc)
         vpx_free(ybf->buffer_alloc);
-      ybf->buffer_alloc = vpx_memalign(32, frame_size);
+      ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size);
       if (!ybf->buffer_alloc)
         return -1;
 
diff --git a/vpxdec.c b/vpxdec.c
index 98d1550..6c1ce11 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -37,19 +37,6 @@
 
 static const char *exec_name;
 
-static const struct {
-  char const *name;
-  vpx_codec_iface_t *(*iface)(void);
-  uint32_t fourcc;
-} ifaces[] = {
-#if CONFIG_VP8_DECODER
-  {"vp8",  vpx_codec_vp8_dx,   VP8_FOURCC},
-#endif
-#if CONFIG_VP9_DECODER
-  {"vp9",  vpx_codec_vp9_dx,   VP9_FOURCC},
-#endif
-};
-
 struct VpxDecInputContext {
   struct VpxInputContext *vpx_input_ctx;
   struct WebmInputContext *webm_ctx;
@@ -88,6 +75,8 @@
 static const arg_def_t scalearg = ARG_DEF("S", "scale", 0,
                                             "Scale output frames uniformly");
 
+static const arg_def_t fb_arg =
+    ARG_DEF(NULL, "frame-buffers", 1, "Number of frame buffers to use");
 
 static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0,
                                         "Compute the MD5 sum of the decoded frame");
@@ -95,7 +84,7 @@
 static const arg_def_t *all_args[] = {
   &codecarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
   &progressarg, &limitarg, &skiparg, &postprocarg, &summaryarg, &outputfile,
-  &threadsarg, &verbosearg, &scalearg,
+  &threadsarg, &verbosearg, &scalearg, &fb_arg,
   &md5arg,
   &error_concealment,
   NULL
@@ -170,10 +159,11 @@
          );
   fprintf(stderr, "\nIncluded decoders:\n\n");
 
-  for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
+  for (i = 0; i < get_vpx_decoder_count(); ++i) {
+    const VpxInterface *const decoder = get_vpx_decoder_by_index(i);
     fprintf(stderr, "    %-6s - %s\n",
-            ifaces[i].name,
-            vpx_codec_iface_name(ifaces[i].iface()));
+            decoder->name, vpx_codec_iface_name(decoder->interface()));
+  }
 
   exit(EXIT_FAILURE);
 }
@@ -241,18 +231,6 @@
   }
 }
 
-static int get_image_plane_width(int plane, const vpx_image_t *img) {
-  return (plane > 0 && img->x_chroma_shift > 0) ?
-             (img->d_w + 1) >> img->x_chroma_shift :
-             img->d_w;
-}
-
-static int get_image_plane_height(int plane, const vpx_image_t *img) {
-  return (plane > 0 &&  img->y_chroma_shift > 0) ?
-             (img->d_h + 1) >> img->y_chroma_shift :
-             img->d_h;
-}
-
 static void update_image_md5(const vpx_image_t *img, const int planes[3],
                              MD5Context *md5) {
   int i, y;
@@ -261,8 +239,8 @@
     const int plane = planes[i];
     const unsigned char *buf = img->planes[plane];
     const int stride = img->stride[plane];
-    const int w = get_image_plane_width(plane, img);
-    const int h = get_image_plane_height(plane, img);
+    const int w = vpx_img_plane_width(img, plane);
+    const int h = vpx_img_plane_height(img, plane);
 
     for (y = 0; y < h; ++y) {
       MD5Update(md5, buf, w);
@@ -279,8 +257,8 @@
     const int plane = planes[i];
     const unsigned char *buf = img->planes[plane];
     const int stride = img->stride[plane];
-    const int w = get_image_plane_width(plane, img);
-    const int h = get_image_plane_height(plane, img);
+    const int w = vpx_img_plane_width(img, plane);
+    const int h = vpx_img_plane_height(img, plane);
 
     for (y = 0; y < h; ++y) {
       fwrite(buf, 1, w, file);
@@ -300,11 +278,12 @@
     int i;
 
     if (mem_get_le32(buf) < 256 * 1024 * 1024) {
-      for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++) {
-        if (!vpx_codec_peek_stream_info(ifaces[i].iface(),
+      for (i = 0; i < get_vpx_decoder_count(); ++i) {
+        const VpxInterface *const decoder = get_vpx_decoder_by_index(i);
+        if (!vpx_codec_peek_stream_info(decoder->interface(),
                                         buf + 4, 32 - 4, &si)) {
           is_raw = 1;
-          input->fourcc = ifaces[i].fourcc;
+          input->fourcc = decoder->fourcc;
           input->width = si.w;
           input->height = si.h;
           input->framerate.numerator = 30;
@@ -325,6 +304,68 @@
           (float)frame_out * 1000000.0 / (float)dx_time);
 }
 
+struct ExternalFrameBuffer {
+  uint8_t* data;
+  size_t size;
+  int in_use;
+};
+
+struct ExternalFrameBufferList {
+  int num_external_frame_buffers;
+  struct ExternalFrameBuffer *ext_fb;
+};
+
+// Callback used by libvpx to request an external frame buffer. |cb_priv|
+// Application private data passed into the set function. |min_size| is the
+// minimum size in bytes needed to decode the next frame. |fb| pointer to the
+// frame buffer.
+int get_vp9_frame_buffer(void *cb_priv, size_t min_size,
+                         vpx_codec_frame_buffer_t *fb) {
+  int i;
+  struct ExternalFrameBufferList *const ext_fb_list =
+      (struct ExternalFrameBufferList *)cb_priv;
+  if (ext_fb_list == NULL)
+    return -1;
+
+  // Find a free frame buffer.
+  for (i = 0; i < ext_fb_list->num_external_frame_buffers; ++i) {
+    if (!ext_fb_list->ext_fb[i].in_use)
+      break;
+  }
+
+  if (i == ext_fb_list->num_external_frame_buffers)
+    return -1;
+
+  if (ext_fb_list->ext_fb[i].size < min_size) {
+    free(ext_fb_list->ext_fb[i].data);
+    ext_fb_list->ext_fb[i].data = (uint8_t *)malloc(min_size);
+    if (!ext_fb_list->ext_fb[i].data)
+      return -1;
+
+    ext_fb_list->ext_fb[i].size = min_size;
+  }
+
+  fb->data = ext_fb_list->ext_fb[i].data;
+  fb->size = ext_fb_list->ext_fb[i].size;
+  ext_fb_list->ext_fb[i].in_use = 1;
+
+  // Set the frame buffer's private data to point at the external frame buffer.
+  fb->priv = &ext_fb_list->ext_fb[i];
+  return 0;
+}
+
+// Callback used by libvpx when there are no references to the frame buffer.
+// |cb_priv| user private data passed into the set function. |fb| pointer
+// to the frame buffer.
+int release_vp9_frame_buffer(void *cb_priv,
+                             vpx_codec_frame_buffer_t *fb) {
+  struct ExternalFrameBuffer *const ext_fb =
+      (struct ExternalFrameBuffer *)fb->priv;
+  (void)cb_priv;
+  ext_fb->in_use = 0;
+  return 0;
+}
+
 void generate_filename(const char *pattern, char *out, size_t q_len,
                        unsigned int d_w, unsigned int d_h,
                        unsigned int frame_in) {
@@ -441,7 +482,6 @@
 int main_loop(int argc, const char **argv_) {
   vpx_codec_ctx_t       decoder;
   char                  *fn = NULL;
-  int                    i;
   uint8_t               *buf = NULL;
   size_t                 bytes_in_buffer = 0, buffer_size = 0;
   FILE                  *infile;
@@ -450,7 +490,8 @@
   int                    stop_after = 0, postproc = 0, summary = 0, quiet = 1;
   int                    arg_skip = 0;
   int                    ec_enabled = 0;
-  vpx_codec_iface_t       *iface = NULL;
+  const VpxInterface *interface = NULL;
+  const VpxInterface *fourcc_interface = NULL;
   unsigned long          dx_time = 0;
   struct arg               arg;
   char                   **argv, **argi, **argj;
@@ -470,6 +511,8 @@
   int                     do_scale = 0;
   vpx_image_t             *scaled_img = NULL;
   int                     frame_avail, got_data;
+  int                     num_external_frame_buffers = 0;
+  struct ExternalFrameBufferList ext_fb_list = {0};
 
   const char *outfile_pattern = NULL;
   char outfile_name[PATH_MAX] = {0};
@@ -493,17 +536,9 @@
     arg.argv_step = 1;
 
     if (arg_match(&arg, &codecarg, argi)) {
-      int j, k = -1;
-
-      for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++)
-        if (!strcmp(ifaces[j].name, arg.val))
-          k = j;
-
-      if (k >= 0)
-        iface = ifaces[k].iface();
-      else
-        die("Error: Unrecognized argument (%s) to --codec\n",
-            arg.val);
+      interface = get_vpx_decoder_by_name(arg.val);
+      if (!interface)
+        die("Error: Unrecognized argument (%s) to --codec\n", arg.val);
     } else if (arg_match(&arg, &looparg, argi)) {
       // no-op
     } else if (arg_match(&arg, &outputfile, argi))
@@ -536,6 +571,8 @@
       quiet = 0;
     else if (arg_match(&arg, &scalearg, argi))
       do_scale = 1;
+    else if (arg_match(&arg, &fb_arg, argi))
+      num_external_frame_buffers = arg_parse_uint(&arg);
 
 #if CONFIG_VP8_DECODER
     else if (arg_match(&arg, &addnoise_level, argi)) {
@@ -660,24 +697,20 @@
     }
   }
 
-  /* Try to determine the codec from the fourcc. */
-  for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
-    if (vpx_input_ctx.fourcc == ifaces[i].fourcc) {
-      vpx_codec_iface_t *vpx_iface = ifaces[i].iface();
+  fourcc_interface = get_vpx_decoder_by_fourcc(vpx_input_ctx.fourcc);
+  if (interface && fourcc_interface && interface != fourcc_interface)
+    warn("Header indicates codec: %s\n", fourcc_interface->name);
+  else
+    interface = fourcc_interface;
 
-      if (iface && iface != vpx_iface)
-        warn("Header indicates codec: %s\n", ifaces[i].name);
-      else
-        iface = vpx_iface;
-
-      break;
-    }
+  if (!interface)
+    interface = get_vpx_decoder_by_index(0);
 
   dec_flags = (postproc ? VPX_CODEC_USE_POSTPROC : 0) |
               (ec_enabled ? VPX_CODEC_USE_ERROR_CONCEALMENT : 0);
-  if (vpx_codec_dec_init(&decoder, iface ? iface :  ifaces[0].iface(), &cfg,
-                         dec_flags)) {
-    fprintf(stderr, "Failed to initialize decoder: %s\n", vpx_codec_error(&decoder));
+  if (vpx_codec_dec_init(&decoder, interface->interface(), &cfg, dec_flags)) {
+    fprintf(stderr, "Failed to initialize decoder: %s\n",
+            vpx_codec_error(&decoder));
     return EXIT_FAILURE;
   }
 
@@ -726,6 +759,19 @@
     arg_skip--;
   }
 
+  if (num_external_frame_buffers > 0) {
+    ext_fb_list.num_external_frame_buffers = num_external_frame_buffers;
+    ext_fb_list.ext_fb = (struct ExternalFrameBuffer *)calloc(
+        num_external_frame_buffers, sizeof(*ext_fb_list.ext_fb));
+    if (vpx_codec_set_frame_buffer_functions(
+            &decoder, get_vp9_frame_buffer, release_vp9_frame_buffer,
+            &ext_fb_list)) {
+      fprintf(stderr, "Failed to configure external frame buffers: %s\n",
+              vpx_codec_error(&decoder));
+      return EXIT_FAILURE;
+    }
+  }
+
   frame_avail = 1;
   got_data = 0;
 
@@ -898,6 +944,11 @@
 
   if (scaled_img) vpx_img_free(scaled_img);
 
+  for (i = 0; i < ext_fb_list.num_external_frame_buffers; ++i) {
+    free(ext_fb_list.ext_fb[i].data);
+  }
+  free(ext_fb_list.ext_fb);
+
   fclose(infile);
   free(argv);
 
diff --git a/vpxenc.c b/vpxenc.c
index 5e36fd9..73b3144 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -61,24 +61,6 @@
 
 static const char *exec_name;
 
-static const struct codec_item {
-  char const              *name;
-  vpx_codec_iface_t *(*iface)(void);
-  vpx_codec_iface_t *(*dx_iface)(void);
-  unsigned int             fourcc;
-} codecs[] = {
-#if CONFIG_VP8_ENCODER && CONFIG_VP8_DECODER
-  {"vp8", &vpx_codec_vp8_cx, &vpx_codec_vp8_dx, VP8_FOURCC},
-#elif CONFIG_VP8_ENCODER && !CONFIG_VP8_DECODER
-  {"vp8", &vpx_codec_vp8_cx, NULL, VP8_FOURCC},
-#endif
-#if CONFIG_VP9_ENCODER && CONFIG_VP9_DECODER
-  {"vp9", &vpx_codec_vp9_cx, &vpx_codec_vp9_dx, VP9_FOURCC},
-#elif CONFIG_VP9_ENCODER && !CONFIG_VP9_DECODER
-  {"vp9", &vpx_codec_vp9_cx, NULL, VP9_FOURCC},
-#endif
-};
-
 static void warn_or_exit_on_errorv(vpx_codec_ctx_t *ctx, int fatal,
                                    const char *s, va_list ap) {
   if (ctx->err) {
@@ -462,14 +444,13 @@
   fprintf(stderr, "\nStream timebase (--timebase):\n"
           "  The desired precision of timestamps in the output, expressed\n"
           "  in fractional seconds. Default is 1/1000.\n");
-  fprintf(stderr, "\n"
-          "Included encoders:\n"
-          "\n");
+  fprintf(stderr, "\nIncluded encoders:\n\n");
 
-  for (i = 0; i < sizeof(codecs) / sizeof(codecs[0]); i++)
+  for (i = 0; i < get_vpx_encoder_count(); ++i) {
+    const VpxInterface *const encoder = get_vpx_encoder_by_index(i);
     fprintf(stderr, "    %-6s - %s\n",
-            codecs[i].name,
-            vpx_codec_iface_name(codecs[i].iface()));
+            encoder->name, vpx_codec_iface_name(encoder->interface()));
+  }
 
   exit(EXIT_FAILURE);
 }
@@ -666,7 +647,7 @@
 
   /* Initialize default parameters */
   memset(global, 0, sizeof(*global));
-  global->codec = codecs;
+  global->codec = get_vpx_encoder_by_index(0);
   global->passes = 0;
   global->use_i420 = 1;
   /* Assign default deadline to good quality */
@@ -676,18 +657,9 @@
     arg.argv_step = 1;
 
     if (arg_match(&arg, &codecarg, argi)) {
-      int j, k = -1;
-
-      for (j = 0; j < sizeof(codecs) / sizeof(codecs[0]); j++)
-        if (!strcmp(codecs[j].name, arg.val))
-          k = j;
-
-      if (k >= 0)
-        global->codec = codecs + k;
-      else
-        die("Error: Unrecognized argument (%s) to --codec\n",
-            arg.val);
-
+      global->codec = get_vpx_encoder_by_name(arg.val);
+      if (!global->codec)
+        die("Error: Unrecognized argument (%s) to --codec\n", arg.val);
     } else if (arg_match(&arg, &passes, argi)) {
       global->passes = arg_parse_uint(&arg);
 
@@ -750,7 +722,7 @@
 #if CONFIG_VP9_ENCODER
     // Make default VP9 passes = 2 until there is a better quality 1-pass
     // encoder
-    global->passes = (global->codec->iface == vpx_codec_vp9_cx ? 2 : 1);
+    global->passes = strcmp(global->codec->name, "vp9") == 0 ? 2 : 1;
 #else
     global->passes = 1;
 #endif
@@ -830,7 +802,7 @@
     vpx_codec_err_t  res;
 
     /* Populate encoder configuration */
-    res = vpx_codec_enc_config_default(global->codec->iface(),
+    res = vpx_codec_enc_config_default(global->codec->interface(),
                                        &stream->config.cfg,
                                        global->usage);
     if (res)
@@ -874,15 +846,15 @@
   struct stream_config    *config = &stream->config;
   int                      eos_mark_found = 0;
 
-  /* Handle codec specific options */
+  // Handle codec specific options
   if (0) {
 #if CONFIG_VP8_ENCODER
-  } else if (global->codec->iface == vpx_codec_vp8_cx) {
+  } else if (strcmp(global->codec->name, "vp8") == 0) {
     ctrl_args = vp8_args;
     ctrl_args_map = vp8_arg_ctrl_map;
 #endif
 #if CONFIG_VP9_ENCODER
-  } else if (global->codec->iface == vpx_codec_vp9_cx) {
+  } else if (strcmp(global->codec->name, "vp9") == 0) {
     ctrl_args = vp9_args;
     ctrl_args_map = vp9_arg_ctrl_map;
 #endif
@@ -1090,7 +1062,7 @@
 
   if (stream->index == 0) {
     fprintf(stderr, "Codec: %s\n",
-            vpx_codec_iface_name(global->codec->iface()));
+            vpx_codec_iface_name(global->codec->interface()));
     fprintf(stderr, "Source file: %s Format: %s\n", input->filename,
             input->use_i420 ? "I420" : "YV12");
   }
@@ -1214,7 +1186,7 @@
   flags |= global->out_part ? VPX_CODEC_USE_OUTPUT_PARTITION : 0;
 
   /* Construct Encoder Context */
-  vpx_codec_enc_init(&stream->encoder, global->codec->iface(),
+  vpx_codec_enc_init(&stream->encoder, global->codec->interface(),
                      &stream->config.cfg, flags);
   ctx_exit_on_error(&stream->encoder, "Failed to initialize encoder");
 
@@ -1234,7 +1206,8 @@
 
 #if CONFIG_DECODERS
   if (global->test_decode != TEST_DECODE_OFF) {
-    vpx_codec_dec_init(&stream->decoder, global->codec->dx_iface(), NULL, 0);
+    const VpxInterface *decoder = get_vpx_decoder_by_name(global->codec->name);
+    vpx_codec_dec_init(&stream->decoder, decoder->interface(), NULL, 0);
   }
 #endif
 }
@@ -1420,14 +1393,14 @@
 
 static void test_decode(struct stream_state  *stream,
                         enum TestDecodeFatality fatal,
-                        const struct codec_item *codec) {
+                        const VpxInterface *codec) {
   vpx_image_t enc_img, dec_img;
 
   if (stream->mismatch_seen)
     return;
 
   /* Get the internal reference frame */
-  if (codec->fourcc == VP8_FOURCC) {
+  if (strcmp(codec->name, "vp8") == 0) {
     struct vpx_ref_frame ref_enc, ref_dec;
     int width, height;
 
diff --git a/vpxenc.h b/vpxenc.h
index 5103ee6..1e6acaa 100644
--- a/vpxenc.h
+++ b/vpxenc.h
@@ -22,9 +22,11 @@
   TEST_DECODE_WARN,
 };
 
+struct VpxInterface;
+
 /* Configuration elements common to all streams. */
 struct VpxEncoderConfig {
-  const struct codec_item *codec;
+  const struct VpxInterface *codec;
   int passes;
   int pass;
   int usage;