Merge "Add init and reset functions for RD_COST struct"

diff --git a/examples/vp8cx_set_ref.c b/examples/vp8cx_set_ref.c
index b0961a2..5e29d80 100644
--- a/examples/vp8cx_set_ref.c
+++ b/examples/vp8cx_set_ref.c

@@ -178,7 +178,7 @@
   }
 
   // Flush encoder.
-  while (encode_frame(&codec, NULL, -1, writer)) {};
+  while (encode_frame(&codec, NULL, -1, writer)) {}
 
   printf("\n");
   fclose(infile);

diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c
index 10d5388..53ede94 100644
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c

@@ -183,7 +183,7 @@
       enc_cfg->kf_min_dist = arg_parse_uint(&arg);
       enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
     } else if (arg_match(&arg, &scale_factors_arg, argi)) {
-      snprintf(string_options, 1024, "%s scale-factors=%s",
+      snprintf(string_options, sizeof(string_options), "%s scale-factors=%s",
                string_options, arg.val);
     } else if (arg_match(&arg, &passes_arg, argi)) {
       passes = arg_parse_uint(&arg);
@@ -198,10 +198,10 @@
     } else if (arg_match(&arg, &fpf_name_arg, argi)) {
       fpf_file_name = arg.val;
     } else if (arg_match(&arg, &min_q_arg, argi)) {
-      snprintf(string_options, 1024, "%s min-quantizers=%s",
+      snprintf(string_options, sizeof(string_options), "%s min-quantizers=%s",
                string_options, arg.val);
     } else if (arg_match(&arg, &max_q_arg, argi)) {
-      snprintf(string_options, 1024, "%s max-quantizers=%s",
+      snprintf(string_options, sizeof(string_options), "%s max-quantizers=%s",
                string_options, arg.val);
     } else if (arg_match(&arg, &min_bitrate_arg, argi)) {
       min_bitrate = arg_parse_uint(&arg);

diff --git a/test/i420_video_source.h b/test/i420_video_source.h
index c3315f9..0a18480 100644
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h

@@ -13,104 +13,22 @@
 #include <cstdlib>
 #include <string>
 
-#include "test/video_source.h"
+#include "test/yuv_video_source.h"
 
 namespace libvpx_test {
 
 // This class extends VideoSource to allow parsing of raw yv12
 // so that we can do actual file encodes.
-class I420VideoSource : public VideoSource {
+class I420VideoSource : public YUVVideoSource {
  public:
   I420VideoSource(const std::string &file_name,
                   unsigned int width, unsigned int height,
                   int rate_numerator, int rate_denominator,
                   unsigned int start, int limit)
-      : file_name_(file_name),
-        input_file_(NULL),
-        img_(NULL),
-        start_(start),
-        limit_(limit),
-        frame_(0),
-        width_(0),
-        height_(0),
-        framerate_numerator_(rate_numerator),
-        framerate_denominator_(rate_denominator) {
-    // This initializes raw_sz_, width_, height_ and allocates an img.
-    SetSize(width, height);
-  }
-
-  virtual ~I420VideoSource() {
-    vpx_img_free(img_);
-    if (input_file_)
-      fclose(input_file_);
-  }
-
-  virtual void Begin() {
-    if (input_file_)
-      fclose(input_file_);
-    input_file_ = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
-        << file_name_;
-    if (start_) {
-      fseek(input_file_, static_cast<unsigned>(raw_sz_) * start_, SEEK_SET);
-    }
-
-    frame_ = start_;
-    FillFrame();
-  }
-
-  virtual void Next() {
-    ++frame_;
-    FillFrame();
-  }
-
-  virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL;  }
-
-  // Models a stream where Timebase = 1/FPS, so pts == frame.
-  virtual vpx_codec_pts_t pts() const { return frame_; }
-
-  virtual unsigned long duration() const { return 1; }
-
-  virtual vpx_rational_t timebase() const {
-    const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ };
-    return t;
-  }
-
-  virtual unsigned int frame() const { return frame_; }
-
-  virtual unsigned int limit() const { return limit_; }
-
-  void SetSize(unsigned int width, unsigned int height) {
-    if (width != width_ || height != height_) {
-      vpx_img_free(img_);
-      img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, width, height, 1);
-      ASSERT_TRUE(img_ != NULL);
-      width_ = width;
-      height_ = height;
-      raw_sz_ = width * height * 3 / 2;
-    }
-  }
-
-  virtual void FillFrame() {
-    ASSERT_TRUE(input_file_ != NULL);
-    // Read a frame from input_file.
-    if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
-      limit_ = frame_;
-    }
-  }
-
- protected:
-  std::string file_name_;
-  FILE *input_file_;
-  vpx_image_t *img_;
-  size_t raw_sz_;
-  unsigned int start_;
-  unsigned int limit_;
-  unsigned int frame_;
-  unsigned int width_;
-  unsigned int height_;
-  int framerate_numerator_;
-  int framerate_denominator_;
+      : YUVVideoSource(file_name, VPX_IMG_FMT_I420,
+                       width, height,
+                       rate_numerator, rate_denominator,
+                       start, limit) {}
 };
 
 }  // namespace libvpx_test

diff --git a/test/svc_test.cc b/test/svc_test.cc
index fdde702..67e83e3 100644
--- a/test/svc_test.cc
+++ b/test/svc_test.cc

@@ -225,10 +225,9 @@
     EXPECT_EQ(received_frames, n);
   }
 
-  void DropLayersAndMakeItVP9Comaptible(struct vpx_fixed_buf *const inputs,
-                                        const int num_super_frames,
-                                        const int remained_spatial_layers,
-                                        const bool is_multiple_frame_contexts) {
+  void DropEnhancementLayers(struct vpx_fixed_buf *const inputs,
+                             const int num_super_frames,
+                             const int remained_spatial_layers) {
     ASSERT_TRUE(inputs != NULL);
     ASSERT_GT(num_super_frames, 0);
     ASSERT_GT(remained_spatial_layers, 0);
@@ -250,45 +249,6 @@
       if (frame_count == 0) {
         // There's no super frame but only a single frame.
         ASSERT_EQ(1, remained_spatial_layers);
-        if (is_multiple_frame_contexts) {
-          // Make a new super frame.
-          uint8_t marker = 0xc1;
-          unsigned int mask;
-          int mag;
-
-          // Choose the magnitude.
-          for (mag = 0, mask = 0xff; mag < 4; ++mag) {
-            if (inputs[i].sz < mask)
-              break;
-            mask <<= 8;
-            mask |= 0xff;
-          }
-          marker |= mag << 3;
-          int index_sz = 2 + (mag + 1) * 2;
-
-          inputs[i].buf = realloc(inputs[i].buf, inputs[i].sz + index_sz + 16);
-          ASSERT_TRUE(inputs[i].buf != NULL);
-          uint8_t *frame_data = static_cast<uint8_t*>(inputs[i].buf);
-          frame_data[0] &= ~2;      // Set the show_frame flag to 0.
-          frame_data += inputs[i].sz;
-          // Add an one byte frame with show_existing_frame.
-          *frame_data++ = 0x88;
-
-          // Write the super frame index.
-          *frame_data++ = marker;
-
-          frame_sizes[0] = inputs[i].sz;
-          frame_sizes[1] = 1;
-          for (int j = 0; j < 2; ++j) {
-            unsigned int this_sz = frame_sizes[j];
-            for (int k = 0; k <= mag; k++) {
-              *frame_data++ = this_sz & 0xff;
-              this_sz >>= 8;
-            }
-          }
-          *frame_data++ = marker;
-          inputs[i].sz += index_sz + 1;
-        }
       } else {
         // Found a super frame.
         uint8_t *frame_data = static_cast<uint8_t*>(inputs[i].buf);
@@ -304,16 +264,13 @@
         }
         ASSERT_LT(frame, frame_count) << "Couldn't find a visible frame. "
             << "remained_spatial_layers: " << remained_spatial_layers
-            << "    super_frame: " << i
-            << "    is_multiple_frame_context: " << is_multiple_frame_contexts;
-        if (frame == frame_count - 1 && !is_multiple_frame_contexts)
+            << "    super_frame: " << i;
+        if (frame == frame_count - 1)
           continue;
 
         frame_data += frame_sizes[frame];
 
         // We need to add one more frame for multiple frame contexts.
-        if (is_multiple_frame_contexts)
-          ++frame;
         uint8_t marker =
             static_cast<const uint8_t*>(inputs[i].buf)[inputs[i].sz - 1];
         const uint32_t mag = ((marker >> 3) & 0x3) + 1;
@@ -323,35 +280,14 @@
         marker |= frame;
 
         // Copy existing frame sizes.
-        memmove(frame_data + (is_multiple_frame_contexts ? 2 : 1),
-                frame_start + inputs[i].sz - index_sz + 1, new_index_sz - 2);
-        if (is_multiple_frame_contexts) {
-          // Add a one byte frame with flag show_existing_frame.
-          *frame_data++ = 0x88 | (remained_spatial_layers - 1);
-        }
+        memmove(frame_data + 1, frame_start + inputs[i].sz - index_sz + 1,
+                new_index_sz - 2);
         // New marker.
         frame_data[0] = marker;
         frame_data += (mag * (frame + 1) + 1);
 
-        if (is_multiple_frame_contexts) {
-          // Write the frame size for the one byte frame.
-          frame_data -= mag;
-          *frame_data++ = 1;
-          for (uint32_t j = 1; j < mag; ++j) {
-            *frame_data++ = 0;
-          }
-        }
-
         *frame_data++ = marker;
         inputs[i].sz = frame_data - frame_start;
-
-        if (is_multiple_frame_contexts) {
-          // Change the show frame flag to 0 for all frames.
-          for (int j = 0; j < frame; ++j) {
-            frame_start[0] &= ~2;
-            frame_start += frame_sizes[j];
-          }
-        }
       }
     }
   }
@@ -555,7 +491,7 @@
   vpx_fixed_buf outputs[10];
   memset(&outputs[0], 0, sizeof(outputs));
   Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, false);
+  DropEnhancementLayers(&outputs[0], 10, 1);
   DecodeNFrames(&outputs[0], 10);
   FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -573,13 +509,13 @@
   Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]);
 
   DecodeNFrames(&outputs[0], 10);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 4, false);
+  DropEnhancementLayers(&outputs[0], 10, 4);
   DecodeNFrames(&outputs[0], 10);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 3, false);
+  DropEnhancementLayers(&outputs[0], 10, 3);
   DecodeNFrames(&outputs[0], 10);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, false);
+  DropEnhancementLayers(&outputs[0], 10, 2);
   DecodeNFrames(&outputs[0], 10);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, false);
+  DropEnhancementLayers(&outputs[0], 10, 1);
   DecodeNFrames(&outputs[0], 10);
 
   FreeBitstreamBuffers(&outputs[0], 10);
@@ -616,9 +552,9 @@
   memset(&outputs[0], 0, sizeof(outputs));
   Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]);
   DecodeNFrames(&outputs[0], 20);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 20, 2, false);
+  DropEnhancementLayers(&outputs[0], 20, 2);
   DecodeNFrames(&outputs[0], 20);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 20, 1, false);
+  DropEnhancementLayers(&outputs[0], 20, 1);
   DecodeNFrames(&outputs[0], 20);
 
   FreeBitstreamBuffers(&outputs[0], 20);
@@ -649,7 +585,6 @@
   vpx_fixed_buf outputs[10];
   memset(&outputs[0], 0, sizeof(outputs));
   Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, true);
   DecodeNFrames(&outputs[0], 10);
   FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -667,7 +602,7 @@
   vpx_fixed_buf outputs[10];
   memset(&outputs[0], 0, sizeof(outputs));
   Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);
+  DropEnhancementLayers(&outputs[0], 10, 1);
   DecodeNFrames(&outputs[0], 10);
   FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -686,7 +621,6 @@
   vpx_fixed_buf outputs[10];
   memset(&outputs[0], 0, sizeof(outputs));
   Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, true);
   DecodeNFrames(&outputs[0], 10);
   FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -707,32 +641,13 @@
   memset(&outputs[0], 0, sizeof(outputs));
   Pass2EncodeNFrames(&stats_buf, 10, 3, &outputs[0]);
 
-  vpx_fixed_buf outputs_new[10];
-  for (int i = 0; i < 10; ++i) {
-    outputs_new[i].buf = malloc(outputs[i].sz + 16);
-    ASSERT_TRUE(outputs_new[i].buf != NULL);
-    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
-    outputs_new[i].sz = outputs[i].sz;
-  }
-  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 3, true);
-  DecodeNFrames(&outputs_new[0], 10);
-
-  for (int i = 0; i < 10; ++i) {
-    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
-    outputs_new[i].sz = outputs[i].sz;
-  }
-  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 2, true);
-  DecodeNFrames(&outputs_new[0], 10);
-
-  for (int i = 0; i < 10; ++i) {
-    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
-    outputs_new[i].sz = outputs[i].sz;
-  }
-  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 1, true);
-  DecodeNFrames(&outputs_new[0], 10);
+  DecodeNFrames(&outputs[0], 10);
+  DropEnhancementLayers(&outputs[0], 10, 2);
+  DecodeNFrames(&outputs[0], 10);
+  DropEnhancementLayers(&outputs[0], 10, 1);
+  DecodeNFrames(&outputs[0], 10);
 
   FreeBitstreamBuffers(&outputs[0], 10);
-  FreeBitstreamBuffers(&outputs_new[0], 10);
 }
 
 TEST_F(SvcTest, TwoPassEncode2TemporalLayers) {
@@ -769,7 +684,6 @@
   vpx_fixed_buf outputs[10];
   memset(&outputs[0], 0, sizeof(outputs));
   Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);
   DecodeNFrames(&outputs[0], 10);
   FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -814,7 +728,6 @@
   vpx_fixed_buf outputs[10];
   memset(&outputs[0], 0, sizeof(outputs));
   Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);
 
   vpx_fixed_buf base_layer[5];
   for (int i = 0; i < 5; ++i)

diff --git a/test/test-data.mk b/test/test-data.mk
index 404c166..e4dae3a 100644
--- a/test/test-data.mk
+++ b/test/test-data.mk

@@ -7,12 +7,15 @@
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_440.yuv
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv
 
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m
@@ -644,10 +647,12 @@
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-17-show-existing-frame.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-18-resize.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-18-resize.ivf.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-01.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-01.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-01.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-01.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-02.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-02.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm.md5
 

diff --git a/test/test-data.sha1 b/test/test-data.sha1
index e6114ab..69d1d2f 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1

@@ -17,12 +17,15 @@
 a432f96ff0a787268e2f94a8092ab161a18d1b06  park_joy_90p_10_420.y4m
 0b194cc312c3a2e84d156a221b0a5eb615dfddc5  park_joy_90p_10_422.y4m
 ff0e0a21dc2adc95b8c1b37902713700655ced17  park_joy_90p_10_444.y4m
+c934da6fb8cc54ee2a8c17c54cf6076dac37ead0  park_joy_90p_10_440.yuv
 614c32ae1eca391e867c70d19974f0d62664dd99  park_joy_90p_12_420.y4m
 c92825f1ea25c5c37855083a69faac6ac4641a9e  park_joy_90p_12_422.y4m
 b592189b885b6cc85db55cc98512a197d73d3b34  park_joy_90p_12_444.y4m
+82c1bfcca368c2f22bad7d693d690d5499ecdd11  park_joy_90p_12_440.yuv
 4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c  park_joy_90p_8_420.y4m
 7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947  park_joy_90p_8_422.y4m
 bdb7856e6bc93599bdda05c2e773a9f22b6c6d03  park_joy_90p_8_444.y4m
+81e1f3843748438b8f2e71db484eb22daf72e939  park_joy_90p_8_440.yuv
 b1f1c3ec79114b9a0651af24ce634afb44a9a419  rush_hour_444.y4m
 5184c46ddca8b1fadd16742e8500115bc8f749da  vp80-00-comprehensive-001.ivf
 65bf1bbbced81b97bd030f376d1b7f61a224793f  vp80-00-comprehensive-002.ivf
@@ -691,6 +694,8 @@
 368dccdde5288c13c25695d2eacdc7402cadf613  vp90-2-19-skip.webm.md5
 ffe460282df2b0e7d4603c2158653ad96f574b02  vp90-2-19-skip-01.webm
 bd21bc9eda4a4a36b221d71ede3a139fc3c7bd85  vp90-2-19-skip-01.webm.md5
+178f5bd239e38cc1cc2657a7a5e1a9f52ad2d3fe  vp90-2-19-skip-02.webm
+9020d5e260bd7df08e2b3d4b86f8623cee3daea2  vp90-2-19-skip-02.webm.md5
 b03c408cf23158638da18dbc3323b99a1635c68a  invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf
 0a3884edb3fd8f9d9b500223e650f7de257b67d8  invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf.res
 5e67e24e7f53fd189e565513cef8519b1bd6c712  invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf

diff --git a/test/test.mk b/test/test.mk
index 1bd732d..30c13a1 100644
--- a/test/test.mk
+++ b/test/test.mk

@@ -23,6 +23,7 @@
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += error_resilience_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += i420_video_source.h
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += yuv_video_source.h
 
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc
@@ -38,6 +39,7 @@
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_end_to_end_test.cc
 
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.cc
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.h

diff --git a/test/test_vectors.cc b/test/test_vectors.cc
index cccebf8..7efa8c0 100644
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc

@@ -182,7 +182,8 @@
   "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm",
   "vp90-2-16-intra-only.webm", "vp90-2-17-show-existing-frame.webm",
   "vp90-2-18-resize.ivf", "vp90-2-19-skip.webm",
-  "vp90-2-19-skip-01.webm", "vp91-2-04-yuv444.webm",
+  "vp90-2-19-skip-01.webm", "vp90-2-19-skip-02.webm",
+  "vp91-2-04-yuv444.webm",
 };
 const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
 #endif  // CONFIG_VP9_DECODER

diff --git a/test/vp8_denoiser_sse2_test.cc b/test/vp8_denoiser_sse2_test.cc
index d04a505..d4abdad 100644
--- a/test/vp8_denoiser_sse2_test.cc
+++ b/test/vp8_denoiser_sse2_test.cc

@@ -28,19 +28,18 @@
 namespace {
 
 const int kNumPixels = 16 * 16;
-class VP8DenoiserTest
-    : public ::testing::TestWithParam<int> {
+class VP8DenoiserTest : public ::testing::TestWithParam<int> {
  public:
   virtual ~VP8DenoiserTest() {}
 
   virtual void SetUp() {
-    increase_denoising = GetParam();
+    increase_denoising_ = GetParam();
   }
 
   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 
  protected:
-  int increase_denoising;
+  int increase_denoising_;
 };
 
 TEST_P(VP8DenoiserTest, BitexactCheck) {
@@ -63,8 +62,8 @@
 
   for (int i = 0; i < count_test_block; ++i) {
     // Generate random motion magnitude, 20% of which exceed the threshold.
-    uint8_t motion_magnitude_random
-              = rnd.Rand8() % (uint8_t)(MOTION_MAGNITUDE_THRESHOLD * 1.2);
+    const int motion_magnitude_ran =
+        rnd.Rand8() % static_cast<int>(MOTION_MAGNITUDE_THRESHOLD * 1.2);
 
     // Initialize a test block with random number in range [0, 255].
     for (int j = 0; j < kNumPixels; ++j) {
@@ -72,20 +71,20 @@
       sig_block_sse2[j] = sig_block_c[j] = rnd.Rand8();
       // The pixels in mc_avg_block are generated by adding a random
       // number in range [-19, 19] to corresponding pixels in sig_block.
-      temp = sig_block_c[j] + (rnd.Rand8() % 2 == 0? -1 : 1) *
-             (rnd.Rand8()%20);
+      temp = sig_block_c[j] + (rnd.Rand8() % 2 == 0 ? -1 : 1) *
+             (rnd.Rand8() % 20);
       // Clip.
-      mc_avg_block[j] = (temp < 0? 0 : (temp > 255? 255 : temp));
+      mc_avg_block[j] = (temp < 0) ? 0 : ((temp > 255) ? 255 : temp);
     }
 
     // Test denosiser on Y component.
-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_c(mc_avg_block, stride,
-                               avg_block_c, stride, sig_block_c, stride,
-                               motion_magnitude_random, increase_denoising));
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_c(
+        mc_avg_block, stride, avg_block_c, stride, sig_block_c, stride,
+        motion_magnitude_ran, increase_denoising_));
 
-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_sse2(mc_avg_block, stride,
-                               avg_block_sse2, stride, sig_block_sse2, stride,
-                               motion_magnitude_random, increase_denoising));
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_sse2(
+        mc_avg_block, stride, avg_block_sse2, stride, sig_block_sse2, stride,
+        motion_magnitude_ran, increase_denoising_));
 
     // Check bitexactness.
     for (int h = 0; h < 16; ++h) {
@@ -94,14 +93,14 @@
       }
     }
 
-    // Test denosiser on UV component.
-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_c(mc_avg_block, stride,
-                               avg_block_c, stride, sig_block_c, stride,
-                               motion_magnitude_random, increase_denoising));
+    // Test denoiser on UV component.
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_c(
+        mc_avg_block, stride, avg_block_c, stride, sig_block_c, stride,
+        motion_magnitude_ran, increase_denoising_));
 
-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_sse2(mc_avg_block, stride,
-                               avg_block_sse2, stride, sig_block_sse2, stride,
-                               motion_magnitude_random, increase_denoising));
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_sse2(
+        mc_avg_block, stride, avg_block_sse2, stride, sig_block_sse2, stride,
+        motion_magnitude_ran, increase_denoising_));
 
     // Check bitexactness.
     for (int h = 0; h < 16; ++h) {
@@ -113,7 +112,5 @@
 }
 
 // Test for all block size.
-INSTANTIATE_TEST_CASE_P(
-    SSE2, VP8DenoiserTest,
-    ::testing::Values(0, 1));
+INSTANTIATE_TEST_CASE_P(SSE2, VP8DenoiserTest, ::testing::Values(0, 1));
 }  // namespace

diff --git a/test/vp9_denoiser_sse2_test.cc b/test/vp9_denoiser_sse2_test.cc
index edebc83..0ecba07 100644
--- a/test/vp9_denoiser_sse2_test.cc
+++ b/test/vp9_denoiser_sse2_test.cc

@@ -29,19 +29,18 @@
 namespace {
 
 const int kNumPixels = 64 * 64;
-class VP9DenoiserTest
-    : public ::testing::TestWithParam<int> {
+class VP9DenoiserTest : public ::testing::TestWithParam<BLOCK_SIZE> {
  public:
   virtual ~VP9DenoiserTest() {}
 
   virtual void SetUp() {
-    bs = (BLOCK_SIZE)GetParam();
+    bs_ = GetParam();
   }
 
   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 
  protected:
-  BLOCK_SIZE bs;
+  BLOCK_SIZE bs_;
 };
 
 TEST_P(VP9DenoiserTest, BitexactCheck) {
@@ -60,8 +59,8 @@
 
   for (int i = 0; i < count_test_block; ++i) {
     // Generate random motion magnitude, 20% of which exceed the threshold.
-    uint8_t motion_magnitude_random
-              = rnd.Rand8() % (uint8_t)(MOTION_MAGNITUDE_THRESHOLD * 1.2);
+    const int motion_magnitude_random =
+        rnd.Rand8() % static_cast<int>(MOTION_MAGNITUDE_THRESHOLD * 1.2);
 
     // Initialize a test block with random number in range [0, 255].
     for (int j = 0; j < kNumPixels; ++j) {
@@ -69,23 +68,23 @@
       sig_block[j] = rnd.Rand8();
       // The pixels in mc_avg_block are generated by adding a random
       // number in range [-19, 19] to corresponding pixels in sig_block.
-      temp = sig_block[j] + (rnd.Rand8() % 2 == 0? -1 : 1) *
-             (rnd.Rand8()%20);
+      temp = sig_block[j] + ((rnd.Rand8() % 2 == 0) ? -1 : 1) *
+             (rnd.Rand8() % 20);
       // Clip.
-      mc_avg_block[j] = (temp < 0? 0 : (temp > 255? 255 : temp));
+      mc_avg_block[j] = (temp < 0) ? 0 : ((temp > 255) ? 255 : temp);
     }
 
-    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_c(sig_block, 64,
-                             mc_avg_block, 64, avg_block_c, 64,
-                             0, bs, motion_magnitude_random));
+    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_c(
+        sig_block, 64, mc_avg_block, 64, avg_block_c,
+        64, 0, bs_, motion_magnitude_random));
 
-    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_sse2(sig_block, 64,
-                             mc_avg_block, 64, avg_block_sse2, 64,
-                             0, bs, motion_magnitude_random));
+    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_sse2(
+        sig_block, 64, mc_avg_block, 64, avg_block_sse2,
+        64, 0, bs_, motion_magnitude_random));
 
     // Test bitexactness.
-    for (int h = 0; h < (4 << b_height_log2_lookup[bs]); ++h) {
-      for (int w = 0; w < (4 << b_width_log2_lookup[bs]); ++w) {
+    for (int h = 0; h < (4 << b_height_log2_lookup[bs_]); ++h) {
+      for (int w = 0; w < (4 << b_width_log2_lookup[bs_]); ++w) {
         EXPECT_EQ(avg_block_c[h * 64 + w], avg_block_sse2[h * 64 + w]);
       }
     }

diff --git a/test/vp9_end_to_end_test.cc b/test/vp9_end_to_end_test.cc
new file mode 100644
index 0000000..a8f6793
--- /dev/null
+++ b/test/vp9_end_to_end_test.cc

@@ -0,0 +1,155 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+#include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+namespace {
+
+const unsigned int kWidth  = 160;
+const unsigned int kHeight = 90;
+const unsigned int kFramerate = 50;
+const unsigned int kFrames = 10;
+const int kBitrate = 500;
+const int kCpuUsed = 2;
+const double psnr_threshold = 35.0;
+
+typedef struct {
+  const char *filename;
+  unsigned int input_bit_depth;
+  vpx_img_fmt fmt;
+  vpx_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+const TestVideoParam TestVectors[] = {
+  {"park_joy_90p_8_420.y4m", 8, VPX_IMG_FMT_I420, VPX_BITS_8, 0},
+  {"park_joy_90p_8_422.y4m", 8, VPX_IMG_FMT_I422, VPX_BITS_8, 1},
+  {"park_joy_90p_8_444.y4m", 8, VPX_IMG_FMT_I444, VPX_BITS_8, 1},
+  {"park_joy_90p_8_440.yuv", 8, VPX_IMG_FMT_I440, VPX_BITS_8, 1},
+#if CONFIG_VP9_HIGHBITDEPTH
+  {"park_joy_90p_10_420.y4m", 10, VPX_IMG_FMT_I42016, VPX_BITS_10, 2},
+  {"park_joy_90p_10_422.y4m", 10, VPX_IMG_FMT_I42216, VPX_BITS_10, 3},
+  {"park_joy_90p_10_444.y4m", 10, VPX_IMG_FMT_I44416, VPX_BITS_10, 3},
+  {"park_joy_90p_10_440.yuv", 10, VPX_IMG_FMT_I44016, VPX_BITS_10, 3},
+  {"park_joy_90p_12_420.y4m", 12, VPX_IMG_FMT_I42016, VPX_BITS_12, 2},
+  {"park_joy_90p_12_422.y4m", 12, VPX_IMG_FMT_I42216, VPX_BITS_12, 3},
+  {"park_joy_90p_12_444.y4m", 12, VPX_IMG_FMT_I44416, VPX_BITS_12, 3},
+  {"park_joy_90p_12_440.yuv", 12, VPX_IMG_FMT_I44016, VPX_BITS_12, 3},
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+};
+
+int is_extension_y4m(const char *filename) {
+  const char *dot = strrchr(filename, '.');
+  if (!dot || dot == filename)
+    return 0;
+  else
+    return !strcmp(dot, ".y4m");
+}
+
+class EndToEndTestLarge
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, \
+                                                 TestVideoParam> {
+ protected:
+  EndToEndTestLarge()
+      : EncoderTest(GET_PARAM(0)),
+        psnr_(0.0),
+        nframes_(0),
+        encoding_mode_(GET_PARAM(1)) {
+  }
+
+  virtual ~EndToEndTestLarge() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+    if (encoding_mode_ != ::libvpx_test::kRealTime) {
+      cfg_.g_lag_in_frames = 5;
+      cfg_.rc_end_usage = VPX_VBR;
+    } else {
+      cfg_.g_lag_in_frames = 0;
+      cfg_.rc_end_usage = VPX_CBR;
+    }
+    test_video_param_ = GET_PARAM(2);
+  }
+
+  virtual void BeginPassHook(unsigned int) {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(VP8E_SET_CPUUSED, kCpuUsed);
+      if (encoding_mode_ != ::libvpx_test::kRealTime) {
+        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
+        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+      }
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_)
+      return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  TestVideoParam test_video_param_;
+
+ private:
+  double psnr_;
+  unsigned int nframes_;
+  libvpx_test::TestMode encoding_mode_;
+};
+
+TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) {
+  cfg_.rc_target_bitrate = kBitrate;
+  cfg_.g_error_resilient = 0;
+  cfg_.g_profile = test_video_param_.profile;
+  cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+  cfg_.g_bit_depth = test_video_param_.bit_depth;
+  init_flags_ = VPX_CODEC_USE_PSNR;
+
+  libvpx_test::VideoSource *video;
+  if (is_extension_y4m(test_video_param_.filename)) {
+    video = new libvpx_test::Y4mVideoSource(test_video_param_.filename,
+                                            0, kFrames);
+  } else {
+    video = new libvpx_test::YUVVideoSource(test_video_param_.filename,
+                                            test_video_param_.fmt,
+                                            kWidth, kHeight,
+                                            kFramerate, 1, 0, kFrames);
+  }
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
+  const double psnr = GetAveragePsnr();
+  EXPECT_GT(psnr, psnr_threshold);
+  delete(video);
+}
+
+VP9_INSTANTIATE_TEST_CASE(
+    EndToEndTestLarge,
+    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood),
+    ::testing::ValuesIn(TestVectors));
+
+}  // namespace

diff --git a/test/yuv_video_source.h b/test/yuv_video_source.h
new file mode 100644
index 0000000..3c852b2
--- /dev/null
+++ b/test/yuv_video_source.h

@@ -0,0 +1,151 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef TEST_YUV_VIDEO_SOURCE_H_
+#define TEST_YUV_VIDEO_SOURCE_H_
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+#include "test/video_source.h"
+#include "vpx/vpx_image.h"
+
+namespace libvpx_test {
+
+// This class extends VideoSource to allow parsing of raw YUV
+// formats of various color sampling and bit-depths so that we can
+// do actual file encodes.
+class YUVVideoSource : public VideoSource {
+ public:
+  YUVVideoSource(const std::string &file_name, vpx_img_fmt format,
+                 unsigned int width, unsigned int height,
+                 int rate_numerator, int rate_denominator,
+                 unsigned int start, int limit)
+      : file_name_(file_name),
+        input_file_(NULL),
+        img_(NULL),
+        start_(start),
+        limit_(limit),
+        frame_(0),
+        width_(0),
+        height_(0),
+        format_(VPX_IMG_FMT_NONE),
+        framerate_numerator_(rate_numerator),
+        framerate_denominator_(rate_denominator) {
+    // This initializes format_, raw_size_, width_, height_ and allocates img.
+    SetSize(width, height, format);
+  }
+
+  virtual ~YUVVideoSource() {
+    vpx_img_free(img_);
+    if (input_file_)
+      fclose(input_file_);
+  }
+
+  virtual void Begin() {
+    if (input_file_)
+      fclose(input_file_);
+    input_file_ = OpenTestDataFile(file_name_);
+    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
+                                     << file_name_;
+    if (start_)
+      fseek(input_file_, static_cast<unsigned>(raw_size_) * start_, SEEK_SET);
+
+    frame_ = start_;
+    FillFrame();
+  }
+
+  virtual void Next() {
+    ++frame_;
+    FillFrame();
+  }
+
+  virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL;  }
+
+  // Models a stream where Timebase = 1/FPS, so pts == frame.
+  virtual vpx_codec_pts_t pts() const { return frame_; }
+
+  virtual unsigned long duration() const { return 1; }
+
+  virtual vpx_rational_t timebase() const {
+    const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ };
+    return t;
+  }
+
+  virtual unsigned int frame() const { return frame_; }
+
+  virtual unsigned int limit() const { return limit_; }
+
+  virtual void SetSize(unsigned int width, unsigned int height,
+                       vpx_img_fmt format) {
+    if (width != width_ || height != height_ || format != format_) {
+      vpx_img_free(img_);
+      img_ = vpx_img_alloc(NULL, format, width, height, 1);
+      ASSERT_TRUE(img_ != NULL);
+      width_ = width;
+      height_ = height;
+      format_ = format;
+      switch (format) {
+        case VPX_IMG_FMT_I420:
+          raw_size_ = width * height * 3 / 2;
+          break;
+        case VPX_IMG_FMT_I422:
+          raw_size_ = width * height * 2;
+          break;
+        case VPX_IMG_FMT_I440:
+          raw_size_ = width * height * 2;
+          break;
+        case VPX_IMG_FMT_I444:
+          raw_size_ = width * height * 3;
+          break;
+        case VPX_IMG_FMT_I42016:
+          raw_size_ = width * height * 3;
+          break;
+        case VPX_IMG_FMT_I42216:
+          raw_size_ = width * height * 4;
+          break;
+        case VPX_IMG_FMT_I44016:
+          raw_size_ = width * height * 4;
+          break;
+        case VPX_IMG_FMT_I44416:
+          raw_size_ = width * height * 6;
+          break;
+        default:
+          ASSERT_TRUE(0);
+      }
+    }
+  }
+
+  virtual void FillFrame() {
+    ASSERT_TRUE(input_file_ != NULL);
+    // Read a frame from input_file.
+    if (fread(img_->img_data, raw_size_, 1, input_file_) == 0) {
+      limit_ = frame_;
+    }
+  }
+
+ protected:
+  std::string file_name_;
+  FILE *input_file_;
+  vpx_image_t *img_;
+  size_t raw_size_;
+  unsigned int start_;
+  unsigned int limit_;
+  unsigned int frame_;
+  unsigned int width_;
+  unsigned int height_;
+  vpx_img_fmt format_;
+  int framerate_numerator_;
+  int framerate_denominator_;
+};
+
+}  // namespace libvpx_test
+
+#endif  // TEST_YUV_VIDEO_SOURCE_H_

diff --git a/tools_common.c b/tools_common.c
index 30fd352..e243a91 100644
--- a/tools_common.c
+++ b/tools_common.c

@@ -276,7 +276,7 @@
   // Note the offset is 1 less than half.
   const int offset = input_shift > 0 ? (1 << (input_shift - 1)) - 1 : 0;
   int plane;
-  if (dst->w != src->w || dst->h != src->h ||
+  if (dst->d_w != src->d_w || dst->d_h != src->d_h ||
       dst->x_chroma_shift != src->x_chroma_shift ||
       dst->y_chroma_shift != src->y_chroma_shift ||
       dst->fmt != src->fmt || input_shift < 0) {
@@ -293,12 +293,12 @@
       break;
   }
   for (plane = 0; plane < 3; plane++) {
-    int w = src->w;
-    int h = src->h;
+    int w = src->d_w;
+    int h = src->d_h;
     int x, y;
     if (plane) {
-      w >>= src->x_chroma_shift;
-      h >>= src->y_chroma_shift;
+      w = (w + src->x_chroma_shift) >> src->x_chroma_shift;
+      h = (h + src->y_chroma_shift) >> src->y_chroma_shift;
     }
     for (y = 0; y < h; y++) {
       uint16_t *p_src =
@@ -316,7 +316,7 @@
   // Note the offset is 1 less than half.
   const int offset = input_shift > 0 ? (1 << (input_shift - 1)) - 1 : 0;
   int plane;
-  if (dst->w != src->w || dst->h != src->h ||
+  if (dst->d_w != src->d_w || dst->d_h != src->d_h ||
       dst->x_chroma_shift != src->x_chroma_shift ||
       dst->y_chroma_shift != src->y_chroma_shift ||
       dst->fmt != src->fmt + VPX_IMG_FMT_HIGHBITDEPTH ||
@@ -334,8 +334,8 @@
       break;
   }
   for (plane = 0; plane < 3; plane++) {
-    int w = src->w;
-    int h = src->h;
+    int w = src->d_w;
+    int h = src->d_h;
     int x, y;
     if (plane) {
       w = (w + src->x_chroma_shift) >> src->x_chroma_shift;
@@ -384,8 +384,8 @@
     int h = src->d_h;
     int x, y;
     if (plane) {
-      w >>= src->x_chroma_shift;
-      h >>= src->y_chroma_shift;
+      w = (w + src->x_chroma_shift) >> src->x_chroma_shift;
+      h = (h + src->y_chroma_shift) >> src->y_chroma_shift;
     }
     for (y = 0; y < h; y++) {
       uint16_t *p_src =

diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index ea3b46e..9d5556d 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c

@@ -516,9 +516,8 @@
     // Adjust rd for ZEROMV and LAST, if LAST is the closest reference frame.
     if (this_mode == ZEROMV &&
         x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME &&
-        (denoise_aggressive || cpi->closest_reference_frame == LAST_FRAME))
-    {
-        this_rd = ((int64_t)this_rd) * rd_adj / 100;
+        (denoise_aggressive || cpi->closest_reference_frame == LAST_FRAME)) {
+      this_rd = ((int64_t)this_rd) * rd_adj / 100;
     }
 
     check_for_encode_breakout(*sse, x);

diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 3ab8ed0..5aa274d 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c

@@ -112,22 +112,19 @@
      * structure. More memory may be required at the time the stream
      * information becomes known.
      */
-    if (!ctx->priv)
-    {
-        vp8_init_ctx(ctx);
-        priv = (vpx_codec_alg_priv_t *)ctx->priv;
+    if (!ctx->priv) {
+      vp8_init_ctx(ctx);
+      priv = (vpx_codec_alg_priv_t *)ctx->priv;
 
-        /* initialize number of fragments to zero */
-        priv->fragments.count = 0;
-        /* is input fragments enabled? */
-        priv->fragments.enabled =
-            (priv->base.init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS);
+      /* initialize number of fragments to zero */
+      priv->fragments.count = 0;
+      /* is input fragments enabled? */
+      priv->fragments.enabled =
+          (priv->base.init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS);
 
-        /*post processing level initialized to do nothing */
-    }
-    else
-    {
-        priv = (vpx_codec_alg_priv_t *)ctx->priv;
+      /*post processing level initialized to do nothing */
+    } else {
+      priv = (vpx_codec_alg_priv_t *)ctx->priv;
     }
 
     priv->yv12_frame_buffers.use_frame_threads =
@@ -138,11 +135,10 @@
 
     if (priv->yv12_frame_buffers.use_frame_threads &&
         ((ctx->priv->init_flags & VPX_CODEC_USE_ERROR_CONCEALMENT) ||
-         (ctx->priv->init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS)))
-    {
-        /* row-based threading, error concealment, and input fragments will
-         * not be supported when using frame-based threading */
-        res = VPX_CODEC_INVALID_PARAM;
+         (ctx->priv->init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS))) {
+      /* row-based threading, error concealment, and input fragments will
+       * not be supported when using frame-based threading */
+      res = VPX_CODEC_INVALID_PARAM;
     }
 
     return res;

diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c
index e13445f..7094a01 100644
--- a/vp9/common/vp9_blockd.c
+++ b/vp9/common/vp9_blockd.c

@@ -92,7 +92,7 @@
                                    void *arg) {
   int plane;
 
-  for (plane = 0; plane < MAX_MB_PLANE; plane++)
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane)
     vp9_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
 }
 

diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index fbb3d4b..3b34050 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c

@@ -24,10 +24,7 @@
         ? cm->prev_mi[mi_row * xd->mi_stride + mi_col].src_mi
         : NULL;
   const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->src_mi->mbmi : NULL;
-
-
   const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
-
   int different_ref_found = 0;
   int context_counter = 0;
 
@@ -127,7 +124,6 @@
   }
 }
 
-
 void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
                            int_mv *mvlist, int_mv *nearest, int_mv *near) {
   int i;

diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index c28f156..f1eda91 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h

@@ -143,7 +143,6 @@
   int prev_mi_idx;
   int mi_alloc_size;
   MODE_INFO *mip_array[2];
-  MODE_INFO **mi_grid_base_array[2];
 
   MODE_INFO *mip; /* Base of allocated array */
   MODE_INFO *mi;  /* Corresponds to upper left visible macroblock */

diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index c2a9181..de389e7 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl

@@ -1155,7 +1155,7 @@
   specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64";
 
   add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
+  specialize qw/vp9_quantize_b sse2/, "$ssse3_x86_64";
 
   add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";

diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 2690f48..dc712f0 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c

@@ -656,10 +656,8 @@
 
 void vp9_read_frame_size(struct vp9_read_bit_buffer *rb,
                          int *width, int *height) {
-  const int w = vp9_rb_read_literal(rb, 16) + 1;
-  const int h = vp9_rb_read_literal(rb, 16) + 1;
-  *width = w;
-  *height = h;
+  *width = vp9_rb_read_literal(rb, 16) + 1;
+  *height = vp9_rb_read_literal(rb, 16) + 1;
 }
 
 static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
@@ -749,7 +747,7 @@
   if (!found)
     vp9_read_frame_size(rb, &width, &height);
 
-  if (width <=0 || height <= 0)
+  if (width <= 0 || height <= 0)
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
                        "Invalid frame size");
 

diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 3c8e7cc..a01fe84 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c

@@ -223,7 +223,6 @@
   fr = vp9_read_tree(r, vp9_mv_fp_tree, class0 ? mvcomp->class0_fp[d]
                                                : mvcomp->fp);
 
-
   // High precision part (if hp is not used, the default value of the hp is 1)
   hp = usehp ? vp9_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp)
              : 1;

diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 694cac7..3954fe6 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c

@@ -1013,7 +1013,11 @@
         ((cpi->svc.number_temporal_layers > 1 &&
          cpi->oxcf.rc_mode == VPX_CBR) ||
         (cpi->svc.number_spatial_layers > 1 &&
-         cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame))) {
+         cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame) ||
+        (is_two_pass_svc(cpi) &&
+         cpi->svc.encode_empty_frame_state == ENCODING &&
+         cpi->svc.layer_context[0].frames_from_key_frame <
+         cpi->svc.number_temporal_layers + 1))) {
       found = 0;
     }
     vp9_wb_write_bit(wb, found);
@@ -1105,8 +1109,7 @@
     // will change to show_frame flag to 0, then add an one byte frame with
     // show_existing_frame flag which tells the decoder which frame we want to
     // show.
-    if (!cm->show_frame ||
-        (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0))
+    if (!cm->show_frame)
       vp9_wb_write_bit(wb, cm->intra_only);
 
     if (!cm->error_resilient_mode)

diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index a867cf9..b4fe70a 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c

@@ -412,29 +412,47 @@
     return 1;
   }
 
-  // Vertical split is available on all but the bottom border.
-  if (mi_row + block_height / 2 < cm->mi_rows &&
-      vt.part_variances->vert[0].variance < threshold &&
-      vt.part_variances->vert[1].variance < threshold) {
-    BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
-    set_block_size(cpi, mi_row, mi_col, subsize);
-    set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
-    return 1;
+  // Only allow split for blocks above 16x16.
+  if (bsize > BLOCK_16X16) {
+    // Vertical split is available on all but the bottom border.
+    if (mi_row + block_height / 2 < cm->mi_rows &&
+        vt.part_variances->vert[0].variance < threshold &&
+        vt.part_variances->vert[1].variance < threshold) {
+      BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
+      set_block_size(cpi, mi_row, mi_col, subsize);
+      set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
+      return 1;
+    }
+
+    // Horizontal split is available on all but the right border.
+    if (mi_col + block_width / 2 < cm->mi_cols &&
+        vt.part_variances->horz[0].variance < threshold &&
+        vt.part_variances->horz[1].variance < threshold) {
+      BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
+      set_block_size(cpi, mi_row, mi_col, subsize);
+      set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
+      return 1;
+    }
   }
 
-  // Horizontal split is available on all but the right border.
-  if (mi_col + block_width / 2 < cm->mi_cols &&
-      vt.part_variances->horz[0].variance < threshold &&
-      vt.part_variances->horz[1].variance < threshold) {
-    BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
-    set_block_size(cpi, mi_row, mi_col, subsize);
-    set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
-    return 1;
+  // This will only allow 8x8 if the 16x16 variance is very large.
+  if (bsize == BLOCK_16X16) {
+    if (mi_col + block_width / 2 < cm->mi_cols &&
+        mi_row + block_height / 2 < cm->mi_rows &&
+        vt.part_variances->none.variance < (threshold << 6)) {
+      set_block_size(cpi, mi_row, mi_col, bsize);
+      return 1;
+    }
   }
   return 0;
 }
 
-// TODO(debargha): Fix this function and make it work as expected.
+// This function chooses partitioning based on the variance
+// between source and reconstructed last, where variance is
+// computed for 8x8 downsampled inputs. Some things to check:
+// using the last source rather than reconstructed last, and
+// allowing for small downsampling (4x4 or 2x2) for selection
+// of smaller block sizes (i.e., < 16x16).
 static void choose_partitioning(VP9_COMP *cpi,
                                 const TileInfo *const tile,
                                 int mi_row, int mi_col) {
@@ -549,27 +567,11 @@
         for (j = 0; j < 4; ++j) {
           const int x16_idx = ((j & 1) << 1);
           const int y16_idx = ((j >> 1) << 1);
-          // NOTE: This is a temporary hack to disable 8x8 partitions,
-          // since it works really bad - possibly due to a bug
-#define DISABLE_8X8_VAR_BASED_PARTITION
-#ifdef DISABLE_8X8_VAR_BASED_PARTITION
-          if (mi_row + y32_idx + y16_idx + 1 < cm->mi_rows &&
-              mi_row + x32_idx + x16_idx + 1 < cm->mi_cols) {
-            set_block_size(cpi,
-                           (mi_row + y32_idx + y16_idx),
-                           (mi_col + x32_idx + x16_idx),
-                           BLOCK_16X16);
-          } else {
-            for (k = 0; k < 4; ++k) {
-              const int x8_idx = (k & 1);
-              const int y8_idx = (k >> 1);
-              set_block_size(cpi,
-                             (mi_row + y32_idx + y16_idx + y8_idx),
-                             (mi_col + x32_idx + x16_idx + x8_idx),
-                             BLOCK_8X8);
-            }
-          }
-#else
+          // NOTE: Since this uses 8x8 downsampling for variance calculation
+          // we cannot really select block size 8x8 (or even 8x16/16x8),
+          // since we do not sufficient samples for variance.
+          // For now, 8x8 partition is only set if the variance of the 16x16
+          // block is very high. This is controlled in set_vt_partitioning.
           if (!set_vt_partitioning(cpi, &vt.split[i].split[j],
                                    BLOCK_16X16,
                                    mi_row + y32_idx + y16_idx,
@@ -583,7 +585,6 @@
                              BLOCK_8X8);
             }
           }
-#endif
         }
       }
     }
@@ -3156,7 +3157,6 @@
     int64_t dummy_dist = 0;
     const int idx_str = cm->mi_stride * mi_row + mi_col;
     MODE_INFO *mi = cm->mi + idx_str;
-    MODE_INFO *prev_mi = (cm->prev_mip + cm->mi_stride + 1 + idx_str)->src_mi;
     BLOCK_SIZE bsize;
     x->in_static_area = 0;
     x->source_variance = UINT_MAX;
@@ -3194,7 +3194,7 @@
                                &dummy_rate, &dummy_dist, 1, INT64_MAX,
                                cpi->pc_root);
         } else {
-          copy_partitioning(cm, mi, prev_mi);
+          choose_partitioning(cpi, tile, mi_row, mi_col);
           nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col,
                               BLOCK_64X64, 1, &dummy_rate, &dummy_dist,
                               cpi->pc_root);

diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 8096a90..1758e3f 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c

@@ -225,6 +225,9 @@
   }
   vpx_memset(&cpi->svc.scaled_frames[0], 0,
              MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0]));
+
+  vp9_free_frame_buffer(&cpi->svc.empty_frame.img);
+  vpx_memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame));
 }
 
 static void save_coding_context(VP9_COMP *cpi) {
@@ -585,8 +588,6 @@
   cpi->ref_frame_flags = 0;
 
   init_buffer_indices(cpi);
-
-  set_tile_limits(cpi);
 }
 
 static void set_rc_buffer_sizes(RATE_CONTROL *rc,
@@ -2981,7 +2982,9 @@
     }
   }
   if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) {
+    // Use the last frame context for the empty frame.
     cm->frame_context_idx =
+        (cpi->svc.encode_empty_frame_state == ENCODING) ? FRAME_CONTEXTS - 1 :
         cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers +
         cpi->svc.temporal_layer_id;
 
@@ -3162,7 +3165,9 @@
   cpi->ref_frame_flags = get_ref_frame_flags(cpi);
 
   cm->last_frame_type = cm->frame_type;
-  vp9_rc_postencode_update(cpi, *size);
+
+  if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING))
+    vp9_rc_postencode_update(cpi, *size);
 
 #if 0
   output_frame_level_debug_stats(cpi);
@@ -3186,12 +3191,8 @@
   cm->last_height = cm->height;
 
   // reset to normal state now that we are done.
-  if (!cm->show_existing_frame) {
-    if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0)
-      cm->last_show_frame = 0;
-    else
-      cm->last_show_frame = cm->show_frame;
-  }
+  if (!cm->show_existing_frame)
+    cm->last_show_frame = cm->show_frame;
 
   if (cm->show_frame) {
     vp9_swap_mi_and_prev_mi(cm);
@@ -3228,7 +3229,9 @@
                         uint8_t *dest, unsigned int *frame_flags) {
   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
-  vp9_twopass_postencode_update(cpi);
+
+  if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING))
+    vp9_twopass_postencode_update(cpi);
 }
 
 static void init_motion_estimation(VP9_COMP *cpi) {
@@ -3416,6 +3419,9 @@
   if (is_two_pass_svc(cpi)) {
 #if CONFIG_SPATIAL_SVC
     vp9_svc_start_frame(cpi);
+    // Use a small empty frame instead of a real frame
+    if (cpi->svc.encode_empty_frame_state == ENCODING)
+      source = &cpi->svc.empty_frame;
 #endif
     if (oxcf->pass == 2)
       vp9_restore_layer_context(cpi);
@@ -3434,6 +3440,11 @@
 
   // Should we encode an arf frame.
   arf_src_index = get_arf_src_index(cpi);
+
+  // Skip alt frame if we encode the empty frame
+  if (is_two_pass_svc(cpi) && source != NULL)
+    arf_src_index = 0;
+
   if (arf_src_index) {
     assert(arf_src_index <= rc->frames_to_key);
 
@@ -3544,7 +3555,10 @@
 
   // For two pass encodes analyse the first pass stats and determine
   // the bit allocation and other parameters for this frame / group of frames.
-  if ((oxcf->pass == 2) && (!cpi->use_svc || is_two_pass_svc(cpi))) {
+  if ((oxcf->pass == 2) &&
+      (!cpi->use_svc ||
+       (is_two_pass_svc(cpi) &&
+        cpi->svc.encode_empty_frame_state != ENCODING))) {
     vp9_rc_get_second_pass_params(cpi);
   }
 
@@ -3773,10 +3787,18 @@
 
 #endif
 
-  if (is_two_pass_svc(cpi) && cm->show_frame) {
-    ++cpi->svc.spatial_layer_to_encode;
-    if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
-      cpi->svc.spatial_layer_to_encode = 0;
+  if (is_two_pass_svc(cpi)) {
+    if (cpi->svc.encode_empty_frame_state == ENCODING)
+      cpi->svc.encode_empty_frame_state = ENCODED;
+
+    if (cm->show_frame) {
+      ++cpi->svc.spatial_layer_to_encode;
+      if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
+        cpi->svc.spatial_layer_to_encode = 0;
+
+      // May need the empty frame after an visible frame.
+      cpi->svc.encode_empty_frame_state = NEED_TO_ENCODE;
+    }
   }
   return 0;
 }
@@ -3867,10 +3889,6 @@
 
   if (width) {
     cm->width = width;
-    if (cm->width * 5 < cpi->initial_width) {
-      cm->width = cpi->initial_width / 5 + 1;
-      printf("Warning: Desired width too small, changed to %d\n", cm->width);
-    }
     if (cm->width > cpi->initial_width) {
       cm->width = cpi->initial_width;
       printf("Warning: Desired width too large, changed to %d\n", cm->width);
@@ -3879,10 +3897,6 @@
 
   if (height) {
     cm->height = height;
-    if (cm->height * 5 < cpi->initial_height) {
-      cm->height = cpi->initial_height / 5 + 1;
-      printf("Warning: Desired height too small, changed to %d\n", cm->height);
-    }
     if (cm->height > cpi->initial_height) {
       cm->height = cpi->initial_height;
       printf("Warning: Desired height too large, changed to %d\n", cm->height);

diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index c9588a3..96c3e0a 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c

@@ -2405,6 +2405,9 @@
         cpi->ref_frame_flags &=
             (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
         lc->frames_from_key_frame = 0;
+        // Reset the empty frame resolution since we have a key frame.
+        cpi->svc.empty_frame_width = cm->width;
+        cpi->svc.empty_frame_height = cm->height;
       }
     } else {
       cm->frame_type = INTER_FRAME;
@@ -2478,6 +2481,7 @@
   if (rc->total_actual_bits) {
     rc->rate_error_estimate =
       (int)((rc->vbr_bits_off_target * 100) / rc->total_actual_bits);
+    rc->rate_error_estimate = clamp(rc->rate_error_estimate, -100, 100);
   } else {
     rc->rate_error_estimate = 0;
   }

diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 42f4691..b74b2dd 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c

@@ -486,8 +486,12 @@
   // var_y and sse_y are saved to be used in skipping checking
   unsigned int var_y = UINT_MAX;
   unsigned int sse_y = UINT_MAX;
+  // Reduce the intra cost penalty for small blocks (<=16x16).
+  const int reduction_fac =
+      (cpi->sf.partition_search_type == VAR_BASED_PARTITION &&
+       bsize <= BLOCK_16X16) ? 4 : 1;
   const int intra_cost_penalty = vp9_get_intra_cost_penalty(
-      cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
+      cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) / reduction_fac;
   const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
                                            intra_cost_penalty, 0);
   const int intra_mode_cost = 50;

diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index ef32fe1..65bca66 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c

@@ -1500,9 +1500,7 @@
   rc->max_gf_interval = 16;
 
   // Extended interval for genuinely static scenes
-  rc->static_scene_max_gf_interval = oxcf->key_freq >> 1;
-  if (rc->static_scene_max_gf_interval > (MAX_LAG_BUFFERS * 2))
-    rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
+  rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
 
   if (is_altref_enabled(cpi)) {
     if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)

diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 7565cc5..eca8e58 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c

@@ -1140,12 +1140,14 @@
   *sse = 0;
   *skippable = 1;
 
-  for (plane = 1; plane < MAX_MB_PLANE && is_cost_valid; ++plane) {
+  for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
     txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
                      ref_best_rd, plane, bsize, uv_tx_size,
                      cpi->sf.use_fast_coef_costing);
-    if (pnrate == INT_MAX)
+    if (pnrate == INT_MAX) {
       is_cost_valid = 0;
+      break;
+    }
     *rate += pnrate;
     *distortion += pndist;
     *sse += pnsse;
@@ -3392,6 +3394,7 @@
   }
 
   if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
+    rd_cost->rate = INT_MAX;
     rd_cost->rdcost = INT64_MAX;
     return;
   }
@@ -3562,6 +3565,7 @@
   rd_cost->rdcost = this_rd;
 
   if (this_rd >= best_rd_so_far) {
+    rd_cost->rate = INT_MAX;
     rd_cost->rdcost = INT64_MAX;
     return;
   }
@@ -4113,6 +4117,7 @@
   }
 
   if (best_rd >= best_rd_so_far) {
+    rd_cost->rate = INT_MAX;
     rd_cost->rdcost = INT64_MAX;
     return;
   }

diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 8788be6..bec77d7 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c

@@ -271,6 +271,10 @@
     sf->partition_search_type = REFERENCE_PARTITION;
     sf->use_nonrd_pick_mode = 1;
     sf->allow_skip_recode = 0;
+    sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
   }
 
   if (speed >= 6) {
@@ -285,10 +289,6 @@
     sf->partition_search_type = VAR_BASED_PARTITION;
     sf->search_type_check_frequency = 50;
     sf->mv.search_method = NSTEP;
-    sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
-    sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
-    sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
-    sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
 
     sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
 

diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index eed681c..1573557 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c

@@ -14,6 +14,8 @@
 #include "vp9/encoder/vp9_svc_layercontext.h"
 #include "vp9/encoder/vp9_extend.h"
 
+#define SMALL_FRAME_FB_IDX 7
+
 void vp9_init_layer_context(VP9_COMP *const cpi) {
   SVC *const svc = &cpi->svc;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -28,6 +30,25 @@
     layer_end = svc->number_temporal_layers;
   } else {
     layer_end = svc->number_spatial_layers;
+
+    if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
+      if (vp9_realloc_frame_buffer(&cpi->svc.empty_frame.img,
+                                   cpi->common.width, cpi->common.height,
+                                   cpi->common.subsampling_x,
+                                   cpi->common.subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                                 cpi->common.use_highbitdepth,
+#endif
+                                 VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
+        vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+                           "Failed to allocate empty frame for multiple frame "
+                           "contexts");
+
+      vpx_memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80,
+                 cpi->svc.empty_frame.img.buffer_alloc_sz);
+      cpi->svc.empty_frame_width = cpi->common.width;
+      cpi->svc.empty_frame_height = cpi->common.height;
+    }
   }
 
   for (layer = 0; layer < layer_end; ++layer) {
@@ -310,6 +331,47 @@
   get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
                        lc->scaling_factor_num, lc->scaling_factor_den,
                        &width, &height);
+
+  // Workaround for multiple frame contexts. In some frames we can't use prev_mi
+  // since its previous frame could be changed during decoding time. The idea is
+  // we put a empty invisible frame in front of them, then we will not use
+  // prev_mi when encoding these frames.
+  if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2 &&
+      cpi->svc.encode_empty_frame_state == NEED_TO_ENCODE) {
+    if ((cpi->svc.number_temporal_layers > 1 &&
+         cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1) ||
+        (cpi->svc.number_spatial_layers > 1 &&
+         cpi->svc.spatial_layer_id == 0)) {
+      struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead, 0);
+
+      if (buf != NULL) {
+        cpi->svc.empty_frame.ts_start = buf->ts_start;
+        cpi->svc.empty_frame.ts_end = buf->ts_end;
+        cpi->svc.encode_empty_frame_state = ENCODING;
+        cpi->common.show_frame = 0;
+        cpi->ref_frame_flags = 0;
+        cpi->common.frame_type = INTER_FRAME;
+        cpi->lst_fb_idx =
+            cpi->gld_fb_idx = cpi->alt_fb_idx = SMALL_FRAME_FB_IDX;
+
+        // Gradually make the empty frame smaller to save bits. Make it half of
+        // its previous size because of the scaling factor restriction.
+        cpi->svc.empty_frame_width >>= 1;
+        cpi->svc.empty_frame_width = (cpi->svc.empty_frame_width + 1) & ~1;
+        if (cpi->svc.empty_frame_width < 16)
+          cpi->svc.empty_frame_width = 16;
+
+        cpi->svc.empty_frame_height >>= 1;
+        cpi->svc.empty_frame_height = (cpi->svc.empty_frame_height + 1) & ~1;
+        if (cpi->svc.empty_frame_height < 16)
+          cpi->svc.empty_frame_height = 16;
+
+        width = cpi->svc.empty_frame_width;
+        height = cpi->svc.empty_frame_height;
+      }
+    }
+  }
+
   if (vp9_set_size_literal(cpi, width, height) != 0)
     return VPX_CODEC_INVALID_PARAM;
 
@@ -317,7 +379,6 @@
   cpi->oxcf.best_allowed_q = vp9_quantizer_to_qindex(lc->min_q);
 
   vp9_change_config(cpi, &cpi->oxcf);
-
   vp9_set_high_precision_mv(cpi, 1);
 
   cpi->alt_ref_source = get_layer_context(cpi)->alt_ref_source;

diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 47a5456..e9645ce 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h

@@ -50,6 +50,16 @@
 
   int spatial_layer_to_encode;
 
+  // Workaround for multiple frame contexts
+  enum {
+    ENCODED = 0,
+    ENCODING,
+    NEED_TO_ENCODE
+  }encode_empty_frame_state;
+  struct lookahead_entry empty_frame;
+  int empty_frame_width;
+  int empty_frame_height;
+
   // Store scaled source frames to be used for temporal filter to generate
   // a alt ref frame.
   YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS];

diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 9ae81e7..5599227 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c

@@ -719,6 +719,9 @@
           ++frame_used;
         }
       }
+      cm->mi = cm->mip + cm->mi_stride + 1;
+      cpi->mb.e_mbd.mi = cm->mi;
+      cpi->mb.e_mbd.mi[0].src_mi = &cpi->mb.e_mbd.mi[0];
     } else {
       // ARF is produced at the native frame size and resized when coded.
 #if CONFIG_VP9_HIGHBITDEPTH

diff --git a/vp9/encoder/x86/vp9_denoiser_sse2.c b/vp9/encoder/x86/vp9_denoiser_sse2.c
index bf400d3..0a120e4 100644
--- a/vp9/encoder/x86/vp9_denoiser_sse2.c
+++ b/vp9/encoder/x86/vp9_denoiser_sse2.c

@@ -50,9 +50,9 @@
                                              const __m128i l21,
                                              __m128i acc_diff) {
   // Calculate differences
-  const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0]));
+  const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
   const __m128i v_mc_running_avg_y = _mm_loadu_si128(
-                                     (__m128i *)(&mc_running_avg_y[0]));
+                                     (const __m128i *)(&mc_running_avg_y[0]));
   __m128i v_running_avg_y;
   const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
   const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
@@ -103,9 +103,9 @@
                                               __m128i acc_diff) {
   __m128i v_running_avg_y = _mm_loadu_si128((__m128i *)(&running_avg_y[0]));
   // Calculate differences.
-  const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0]));
+  const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
   const __m128i v_mc_running_avg_y =
-                _mm_loadu_si128((__m128i *)(&mc_running_avg_y[0]));
+                _mm_loadu_si128((const __m128i *)(&mc_running_avg_y[0]));
   const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
   const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
   // Obtain the sign. FF if diff is negative.
@@ -357,9 +357,9 @@
   const __m128i l21 = _mm_set1_epi8(1);
   int sum_diff = 0;
 
-  for (int i = 0; i < 4; i++) {
-    for (int j = 0; j < 4; j++) {
-      acc_diff[i][j] = _mm_setzero_si128();
+  for (c = 0; c < 4; ++c) {
+    for (r = 0; r < 4; ++r) {
+      acc_diff[c][r] = _mm_setzero_si128();
     }
   }
 

diff --git a/vp9/encoder/x86/vp9_quantize_sse2.c b/vp9/encoder/x86/vp9_quantize_sse2.c
new file mode 100644
index 0000000..7c1c884
--- /dev/null
+++ b/vp9/encoder/x86/vp9_quantize_sse2.c

@@ -0,0 +1,225 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h>
+#include <xmmintrin.h>
+
+#include "vpx/vpx_integer.h"
+
+void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
+                         int skip_block, const int16_t* zbin_ptr,
+                         const int16_t* round_ptr, const int16_t* quant_ptr,
+                         const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
+                         int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
+                         int zbin_oq_value, uint16_t* eob_ptr,
+                         const int16_t* scan_ptr,
+                         const int16_t* iscan_ptr) {
+  __m128i zero;
+  (void)scan_ptr;
+
+  coeff_ptr += n_coeffs;
+  iscan_ptr += n_coeffs;
+  qcoeff_ptr += n_coeffs;
+  dqcoeff_ptr += n_coeffs;
+  n_coeffs = -n_coeffs;
+  zero = _mm_setzero_si128();
+  if (!skip_block) {
+    __m128i eob;
+    __m128i zbin;
+    __m128i round, quant, dequant, shift;
+    {
+      __m128i coeff0, coeff1;
+
+      // Setup global values
+      {
+        __m128i zbin_oq;
+        __m128i pw_1;
+        zbin_oq = _mm_set1_epi16(zbin_oq_value);
+        zbin = _mm_load_si128((const __m128i*)zbin_ptr);
+        round = _mm_load_si128((const __m128i*)round_ptr);
+        quant = _mm_load_si128((const __m128i*)quant_ptr);
+        zbin = _mm_add_epi16(zbin, zbin_oq);
+        pw_1 = _mm_set1_epi16(1);
+        zbin = _mm_sub_epi16(zbin, pw_1);
+        dequant = _mm_load_si128((const __m128i*)dequant_ptr);
+        shift = _mm_load_si128((const __m128i*)quant_shift_ptr);
+      }
+
+      {
+        __m128i coeff0_sign, coeff1_sign;
+        __m128i qcoeff0, qcoeff1;
+        __m128i qtmp0, qtmp1;
+        __m128i cmp_mask0, cmp_mask1;
+        // Do DC and first 15 AC
+        coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
+        coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
+
+        // Poor man's sign extract
+        coeff0_sign = _mm_srai_epi16(coeff0, 15);
+        coeff1_sign = _mm_srai_epi16(coeff1, 15);
+        qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+        qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+        qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+        qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+        cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
+        zbin = _mm_unpackhi_epi64(zbin, zbin);  // Switch DC to AC
+        cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
+        qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+        round = _mm_unpackhi_epi64(round, round);
+        qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+        qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+        quant = _mm_unpackhi_epi64(quant, quant);
+        qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+        qtmp0 = _mm_add_epi16(qtmp0, qcoeff0);
+        qtmp1 = _mm_add_epi16(qtmp1, qcoeff1);
+        qcoeff0 = _mm_mulhi_epi16(qtmp0, shift);
+        shift = _mm_unpackhi_epi64(shift, shift);
+        qcoeff1 = _mm_mulhi_epi16(qtmp1, shift);
+
+        // Reinsert signs
+        qcoeff0 = _mm_xor_si128(qcoeff0, coeff0_sign);
+        qcoeff1 = _mm_xor_si128(qcoeff1, coeff1_sign);
+        qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+        qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+        // Mask out zbin threshold coeffs
+        qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
+        qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
+
+        _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
+        _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+        coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+        dequant = _mm_unpackhi_epi64(dequant, dequant);
+        coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+        _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
+        _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+      }
+
+      {
+        // Scan for eob
+        __m128i zero_coeff0, zero_coeff1;
+        __m128i nzero_coeff0, nzero_coeff1;
+        __m128i iscan0, iscan1;
+        __m128i eob1;
+        zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+        zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+        nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+        nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+        iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
+        iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+        // Add one to convert from indices to counts
+        iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+        iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+        eob = _mm_and_si128(iscan0, nzero_coeff0);
+        eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+        eob = _mm_max_epi16(eob, eob1);
+      }
+      n_coeffs += 8 * 2;
+    }
+
+    // AC only loop
+    while (n_coeffs < 0) {
+      __m128i coeff0, coeff1;
+      {
+        __m128i coeff0_sign, coeff1_sign;
+        __m128i qcoeff0, qcoeff1;
+        __m128i qtmp0, qtmp1;
+        __m128i cmp_mask0, cmp_mask1;
+
+        coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
+        coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
+
+        // Poor man's sign extract
+        coeff0_sign = _mm_srai_epi16(coeff0, 15);
+        coeff1_sign = _mm_srai_epi16(coeff1, 15);
+        qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+        qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+        qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+        qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+        cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
+        cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
+        qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+        qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+        qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+        qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+        qtmp0 = _mm_add_epi16(qtmp0, qcoeff0);
+        qtmp1 = _mm_add_epi16(qtmp1, qcoeff1);
+        qcoeff0 = _mm_mulhi_epi16(qtmp0, shift);
+        qcoeff1 = _mm_mulhi_epi16(qtmp1, shift);
+
+        // Reinsert signs
+        qcoeff0 = _mm_xor_si128(qcoeff0, coeff0_sign);
+        qcoeff1 = _mm_xor_si128(qcoeff1, coeff1_sign);
+        qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+        qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+        // Mask out zbin threshold coeffs
+        qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
+        qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
+
+        _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
+        _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+        coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+        coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+        _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
+        _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+      }
+
+      {
+        // Scan for eob
+        __m128i zero_coeff0, zero_coeff1;
+        __m128i nzero_coeff0, nzero_coeff1;
+        __m128i iscan0, iscan1;
+        __m128i eob0, eob1;
+        zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+        zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+        nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+        nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+        iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
+        iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+        // Add one to convert from indices to counts
+        iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+        iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+        eob0 = _mm_and_si128(iscan0, nzero_coeff0);
+        eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+        eob0 = _mm_max_epi16(eob0, eob1);
+        eob = _mm_max_epi16(eob, eob0);
+      }
+      n_coeffs += 8 * 2;
+    }
+
+    // Accumulate EOB
+    {
+      __m128i eob_shuffled;
+      eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
+      eob = _mm_max_epi16(eob, eob_shuffled);
+      eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
+      eob = _mm_max_epi16(eob, eob_shuffled);
+      eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
+      eob = _mm_max_epi16(eob, eob_shuffled);
+      *eob_ptr = _mm_extract_epi16(eob, 1);
+    }
+  } else {
+    do {
+      _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
+      _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
+      _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
+      _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
+      n_coeffs += 8 * 2;
+    } while (n_coeffs < 0);
+    *eob_ptr = 0;
+  }
+}

diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 041ba27..d0ca524 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c

@@ -188,11 +188,9 @@
     }
     if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers)
       ERROR("Not enough ref buffers for svc alt ref frames");
-    if ((cfg->ss_number_layers > 3 ||
-         cfg->ss_number_layers * cfg->ts_number_layers > 4) &&
+    if (cfg->ss_number_layers * cfg->ts_number_layers > 3 &&
         cfg->g_error_resilient == 0)
-    ERROR("Multiple frame context are not supported for more than 3 spatial "
-          "layers or more than 4 spatial x temporal layers");
+    ERROR("Multiple frame context are not supported for more than 3 layers");
   }
 #endif
 

diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index a2e3cda..ad76722 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk

@@ -101,6 +101,7 @@
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
 
 ifeq ($(CONFIG_USE_X86INC),yes)
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm

diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index 773087d..fa3409c 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c

@@ -350,7 +350,7 @@
       }
     }
 
-    for (i = 0; i < svc_ctx->spatial_layers; ++i) {
+    for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
       if (total > 0) {
         enc_cfg->ss_target_bitrate[i] = (unsigned int)
             (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total);

diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index c6c7d08..044243d 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h

@@ -161,7 +161,9 @@
     VPX_CODEC_STATS_PKT,       /**< Two-pass statistics for this frame */
     VPX_CODEC_FPMB_STATS_PKT,  /**< first pass mb statistics for this frame */
     VPX_CODEC_PSNR_PKT,        /**< PSNR statistics for this frame */
-#if CONFIG_SPATIAL_SVC
+    // TODO(minghai): This is for testing purporses. The released library can't
+    // depend on vpx_config.h
+#if defined(CONFIG_SPATIAL_SVC) && CONFIG_SPATIAL_SVC
     VPX_CODEC_SPATIAL_SVC_LAYER_SIZES, /**< Sizes for each layer in this frame*/
     VPX_CODEC_SPATIAL_SVC_LAYER_PSNR, /**< PSNR for each layer in this frame*/
 #endif
@@ -201,7 +203,9 @@
         double       psnr[4];     /**< PSNR, total/y/u/v */
       } psnr;                       /**< data for PSNR packet */
       vpx_fixed_buf_t raw;     /**< data for arbitrary packets */
-#if CONFIG_SPATIAL_SVC
+      // TODO(minghai): This is for testing purporses. The released library
+      // can't depend on vpx_config.h
+#if defined(CONFIG_SPATIAL_SVC) && CONFIG_SPATIAL_SVC
       size_t layer_sizes[VPX_SS_MAX_LAYERS];
       struct vpx_psnr_pkt layer_psnr[VPX_SS_MAX_LAYERS];
 #endif

diff --git a/vpxdec.c b/vpxdec.c
index 0469071..2afdb71 100644
--- a/vpxdec.c
+++ b/vpxdec.c

@@ -47,52 +47,49 @@
   struct WebmInputContext *webm_ctx;
 };
 
-static const arg_def_t looparg = ARG_DEF(NULL, "loops", 1,
-                                          "Number of times to decode the file");
-static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1,
-                                          "Codec to use");
-static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0,
-                                          "Output raw YV12 frames");
-static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0,
-                                          "Output raw I420 frames");
-static const arg_def_t flipuvarg = ARG_DEF(NULL, "flipuv", 0,
-                                           "Flip the chroma planes in the output");
-static const arg_def_t rawvideo = ARG_DEF(NULL, "rawvideo", 0,
-                                          "Output raw YUV frames");
-static const arg_def_t noblitarg = ARG_DEF(NULL, "noblit", 0,
-                                           "Don't process the decoded frames");
-static const arg_def_t progressarg = ARG_DEF(NULL, "progress", 0,
-                                             "Show progress after each frame decodes");
-static const arg_def_t limitarg = ARG_DEF(NULL, "limit", 1,
-                                          "Stop decoding after n frames");
-static const arg_def_t skiparg = ARG_DEF(NULL, "skip", 1,
-                                         "Skip the first n input frames");
-static const arg_def_t postprocarg = ARG_DEF(NULL, "postproc", 0,
-                                             "Postprocess decoded frames");
-static const arg_def_t summaryarg = ARG_DEF(NULL, "summary", 0,
-                                            "Show timing summary");
-static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
-                                            "Output file name pattern (see below)");
-static const arg_def_t threadsarg = ARG_DEF("t", "threads", 1,
-                                            "Max threads to use");
-static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0,
-                                            "Show version string");
-static const arg_def_t error_concealment = ARG_DEF(NULL, "error-concealment", 0,
-                                                   "Enable decoder error-concealment");
-static const arg_def_t scalearg = ARG_DEF("S", "scale", 0,
-                                            "Scale output frames uniformly");
-static const arg_def_t continuearg =
-    ARG_DEF("k", "keep-going", 0, "(debug) Continue decoding after error");
-
-static const arg_def_t fb_arg =
-    ARG_DEF(NULL, "frame-buffers", 1, "Number of frame buffers to use");
-
-static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0,
-                                        "Compute the MD5 sum of the decoded frame");
+static const arg_def_t looparg = ARG_DEF(
+    NULL, "loops", 1, "Number of times to decode the file");
+static const arg_def_t codecarg = ARG_DEF(
+    NULL, "codec", 1, "Codec to use");
+static const arg_def_t use_yv12 = ARG_DEF(
+    NULL, "yv12", 0, "Output raw YV12 frames");
+static const arg_def_t use_i420 = ARG_DEF(
+    NULL, "i420", 0, "Output raw I420 frames");
+static const arg_def_t flipuvarg = ARG_DEF(
+    NULL, "flipuv", 0, "Flip the chroma planes in the output");
+static const arg_def_t rawvideo = ARG_DEF(
+    NULL, "rawvideo", 0, "Output raw YUV frames");
+static const arg_def_t noblitarg = ARG_DEF(
+    NULL, "noblit", 0, "Don't process the decoded frames");
+static const arg_def_t progressarg = ARG_DEF(
+    NULL, "progress", 0, "Show progress after each frame decodes");
+static const arg_def_t limitarg = ARG_DEF(
+    NULL, "limit", 1, "Stop decoding after n frames");
+static const arg_def_t skiparg = ARG_DEF(
+    NULL, "skip", 1, "Skip the first n input frames");
+static const arg_def_t postprocarg = ARG_DEF(
+    NULL, "postproc", 0, "Postprocess decoded frames");
+static const arg_def_t summaryarg = ARG_DEF(
+    NULL, "summary", 0, "Show timing summary");
+static const arg_def_t outputfile = ARG_DEF(
+    "o", "output", 1, "Output file name pattern (see below)");
+static const arg_def_t threadsarg = ARG_DEF(
+    "t", "threads", 1, "Max threads to use");
+static const arg_def_t verbosearg = ARG_DEF(
+    "v", "verbose", 0, "Show version string");
+static const arg_def_t error_concealment = ARG_DEF(
+    NULL, "error-concealment", 0, "Enable decoder error-concealment");
+static const arg_def_t scalearg = ARG_DEF(
+    "S", "scale", 0, "Scale output frames uniformly");
+static const arg_def_t continuearg = ARG_DEF(
+    "k", "keep-going", 0, "(debug) Continue decoding after error");
+static const arg_def_t fb_arg = ARG_DEF(
+    NULL, "frame-buffers", 1, "Number of frame buffers to use");
+static const arg_def_t md5arg = ARG_DEF(
+    NULL, "md5", 0, "Compute the MD5 sum of the decoded frame");
 #if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
 static const arg_def_t outbitdeptharg = ARG_DEF(
-    NULL, "output-bit-depth", 1,
-    "Output bit-depth for decoded frames");
+    NULL, "output-bit-depth", 1, "Output bit-depth for decoded frames");
 #endif
 
 static const arg_def_t *all_args[] = {

diff --git a/vpxenc.c b/vpxenc.c
index cc81f4a..0a0c071 100644
--- a/vpxenc.c
+++ b/vpxenc.c

@@ -128,50 +128,50 @@
   return 0;
 }
 
-static const arg_def_t debugmode = ARG_DEF("D", "debug", 0,
-                                           "Debug mode (makes output deterministic)");
-static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
-                                            "Output filename");
-static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0,
-                                          "Input file is YV12 ");
-static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0,
-                                          "Input file is I420 (default)");
-static const arg_def_t use_i422 = ARG_DEF(NULL, "i422", 0,
-                                          "Input file is I422");
-static const arg_def_t use_i444 = ARG_DEF(NULL, "i444", 0,
-                                          "Input file is I444");
-static const arg_def_t use_i440 = ARG_DEF(NULL, "i440", 0,
-                                          "Input file is I440");
-static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1,
-                                          "Codec to use");
-static const arg_def_t passes           = ARG_DEF("p", "passes", 1,
-                                                  "Number of passes (1/2)");
-static const arg_def_t pass_arg         = ARG_DEF(NULL, "pass", 1,
-                                                  "Pass to execute (1/2)");
-static const arg_def_t fpf_name         = ARG_DEF(NULL, "fpf", 1,
-                                                  "First pass statistics file name");
+static const arg_def_t debugmode = ARG_DEF(
+    "D", "debug", 0, "Debug mode (makes output deterministic)");
+static const arg_def_t outputfile = ARG_DEF(
+    "o", "output", 1, "Output filename");
+static const arg_def_t use_yv12 = ARG_DEF(
+    NULL, "yv12", 0, "Input file is YV12 ");
+static const arg_def_t use_i420 = ARG_DEF(
+    NULL, "i420", 0, "Input file is I420 (default)");
+static const arg_def_t use_i422 = ARG_DEF(
+    NULL, "i422", 0, "Input file is I422");
+static const arg_def_t use_i444 = ARG_DEF(
+    NULL, "i444", 0, "Input file is I444");
+static const arg_def_t use_i440 = ARG_DEF(
+    NULL, "i440", 0, "Input file is I440");
+static const arg_def_t codecarg = ARG_DEF(
+    NULL, "codec", 1, "Codec to use");
+static const arg_def_t passes = ARG_DEF(
+    "p", "passes", 1, "Number of passes (1/2)");
+static const arg_def_t pass_arg = ARG_DEF(
+    NULL, "pass", 1, "Pass to execute (1/2)");
+static const arg_def_t fpf_name = ARG_DEF(
+    NULL, "fpf", 1, "First pass statistics file name");
 #if CONFIG_FP_MB_STATS
-static const arg_def_t fpmbf_name         = ARG_DEF(NULL, "fpmbf", 1,
-                                      "First pass block statistics file name");
+static const arg_def_t fpmbf_name = ARG_DEF(
+    NULL, "fpmbf", 1, "First pass block statistics file name");
 #endif
-static const arg_def_t limit = ARG_DEF(NULL, "limit", 1,
-                                       "Stop encoding after n input frames");
-static const arg_def_t skip = ARG_DEF(NULL, "skip", 1,
-                                      "Skip the first n input frames");
-static const arg_def_t deadline         = ARG_DEF("d", "deadline", 1,
-                                                  "Deadline per frame (usec)");
-static const arg_def_t best_dl          = ARG_DEF(NULL, "best", 0,
-                                                  "Use Best Quality Deadline");
-static const arg_def_t good_dl          = ARG_DEF(NULL, "good", 0,
-                                                  "Use Good Quality Deadline");
-static const arg_def_t rt_dl            = ARG_DEF(NULL, "rt", 0,
-                                                  "Use Realtime Quality Deadline");
-static const arg_def_t quietarg         = ARG_DEF("q", "quiet", 0,
-                                                  "Do not print encode progress");
-static const arg_def_t verbosearg       = ARG_DEF("v", "verbose", 0,
-                                                  "Show encoder parameters");
-static const arg_def_t psnrarg          = ARG_DEF(NULL, "psnr", 0,
-                                                  "Show PSNR in status line");
+static const arg_def_t limit = ARG_DEF(
+    NULL, "limit", 1, "Stop encoding after n input frames");
+static const arg_def_t skip = ARG_DEF(
+    NULL, "skip", 1, "Skip the first n input frames");
+static const arg_def_t deadline = ARG_DEF(
+    "d", "deadline", 1, "Deadline per frame (usec)");
+static const arg_def_t best_dl = ARG_DEF(
+    NULL, "best", 0, "Use Best Quality Deadline");
+static const arg_def_t good_dl = ARG_DEF(
+    NULL, "good", 0, "Use Good Quality Deadline");
+static const arg_def_t rt_dl = ARG_DEF(
+    NULL, "rt", 0, "Use Realtime Quality Deadline");
+static const arg_def_t quietarg = ARG_DEF(
+    "q", "quiet", 0, "Do not print encode progress");
+static const arg_def_t verbosearg = ARG_DEF(
+    "v", "verbose", 0, "Show encoder parameters");
+static const arg_def_t psnrarg = ARG_DEF(
+    NULL, "psnr", 0, "Show PSNR in status line");
 
 static const struct arg_enum_list test_decode_enum[] = {
   {"off",   TEST_DECODE_OFF},
@@ -179,25 +179,25 @@
   {"warn",  TEST_DECODE_WARN},
   {NULL, 0}
 };
-static const arg_def_t recontest = ARG_DEF_ENUM(NULL, "test-decode", 1,
-                                                "Test encode/decode mismatch",
-                                                test_decode_enum);
-static const arg_def_t framerate        = ARG_DEF(NULL, "fps", 1,
-                                                  "Stream frame rate (rate/scale)");
-static const arg_def_t use_ivf          = ARG_DEF(NULL, "ivf", 0,
-                                                  "Output IVF (default is WebM if WebM IO is enabled)");
-static const arg_def_t out_part = ARG_DEF("P", "output-partitions", 0,
-                                          "Makes encoder output partitions. Requires IVF output!");
-static const arg_def_t q_hist_n         = ARG_DEF(NULL, "q-hist", 1,
-                                                  "Show quantizer histogram (n-buckets)");
-static const arg_def_t rate_hist_n         = ARG_DEF(NULL, "rate-hist", 1,
-                                                     "Show rate histogram (n-buckets)");
-static const arg_def_t disable_warnings =
-    ARG_DEF(NULL, "disable-warnings", 0,
-            "Disable warnings about potentially incorrect encode settings.");
-static const arg_def_t disable_warning_prompt =
-    ARG_DEF("y", "disable-warning-prompt", 0,
-            "Display warnings, but do not prompt user to continue.");
+static const arg_def_t recontest = ARG_DEF_ENUM(
+    NULL, "test-decode", 1, "Test encode/decode mismatch", test_decode_enum);
+static const arg_def_t framerate = ARG_DEF(
+    NULL, "fps", 1, "Stream frame rate (rate/scale)");
+static const arg_def_t use_ivf = ARG_DEF(
+    NULL, "ivf", 0, "Output IVF (default is WebM if WebM IO is enabled)");
+static const arg_def_t out_part = ARG_DEF(
+    "P", "output-partitions", 0,
+    "Makes encoder output partitions. Requires IVF output!");
+static const arg_def_t q_hist_n = ARG_DEF(
+    NULL, "q-hist", 1, "Show quantizer histogram (n-buckets)");
+static const arg_def_t rate_hist_n = ARG_DEF(
+    NULL, "rate-hist", 1, "Show rate histogram (n-buckets)");
+static const arg_def_t disable_warnings = ARG_DEF(
+    NULL, "disable-warnings", 0,
+    "Disable warnings about potentially incorrect encode settings.");
+static const arg_def_t disable_warning_prompt = ARG_DEF(
+    "y", "disable-warning-prompt", 0,
+    "Display warnings, but do not prompt user to continue.");
 
 #if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
 static const arg_def_t test16bitinternalarg = ARG_DEF(
@@ -213,16 +213,14 @@
   NULL
 };
 
-static const arg_def_t usage            = ARG_DEF("u", "usage", 1,
-                                                  "Usage profile number to use");
-static const arg_def_t threads          = ARG_DEF("t", "threads", 1,
-                                                  "Max number of threads to use");
-static const arg_def_t profile          = ARG_DEF(NULL, "profile", 1,
-                                                  "Bitstream profile number to use");
-static const arg_def_t width            = ARG_DEF("w", "width", 1,
-                                                  "Frame width");
-static const arg_def_t height           = ARG_DEF("h", "height", 1,
-                                                  "Frame height");
+static const arg_def_t usage = ARG_DEF(
+    "u", "usage", 1, "Usage profile number to use");
+static const arg_def_t threads = ARG_DEF(
+    "t", "threads", 1, "Max number of threads to use");
+static const arg_def_t profile = ARG_DEF(
+    NULL, "profile", 1, "Bitstream profile number to use");
+static const arg_def_t width = ARG_DEF("w", "width", 1, "Frame width");
+static const arg_def_t height = ARG_DEF("h", "height", 1, "Frame height");
 #if CONFIG_WEBM_IO
 static const struct arg_enum_list stereo_mode_enum[] = {
   {"mono", STEREO_FORMAT_MONO},
@@ -232,15 +230,15 @@
   {"right-left", STEREO_FORMAT_RIGHT_LEFT},
   {NULL, 0}
 };
-static const arg_def_t stereo_mode      = ARG_DEF_ENUM(NULL, "stereo-mode", 1,
-                                                       "Stereo 3D video format", stereo_mode_enum);
+static const arg_def_t stereo_mode = ARG_DEF_ENUM(
+    NULL, "stereo-mode", 1, "Stereo 3D video format", stereo_mode_enum);
 #endif
-static const arg_def_t timebase         = ARG_DEF(NULL, "timebase", 1,
-                                                  "Output timestamp precision (fractional seconds)");
-static const arg_def_t error_resilient  = ARG_DEF(NULL, "error-resilient", 1,
-                                                  "Enable error resiliency features");
-static const arg_def_t lag_in_frames    = ARG_DEF(NULL, "lag-in-frames", 1,
-                                                  "Max number of frames to lag");
+static const arg_def_t timebase = ARG_DEF(
+    NULL, "timebase", 1, "Output timestamp precision (fractional seconds)");
+static const arg_def_t error_resilient = ARG_DEF(
+    NULL, "error-resilient", 1, "Enable error resiliency features");
+static const arg_def_t lag_in_frames = ARG_DEF(
+    NULL, "lag-in-frames", 1, "Max number of frames to lag");
 
 static const arg_def_t *global_args[] = {
   &use_yv12, &use_i420, &use_i422, &use_i444, &use_i440,
@@ -257,18 +255,18 @@
   &lag_in_frames, NULL
 };
 
-static const arg_def_t dropframe_thresh   = ARG_DEF(NULL, "drop-frame", 1,
-                                                    "Temporal resampling threshold (buf %)");
-static const arg_def_t resize_allowed     = ARG_DEF(NULL, "resize-allowed", 1,
-                                                    "Spatial resampling enabled (bool)");
-static const arg_def_t resize_width       = ARG_DEF(NULL, "resize-width", 1,
-                                                    "Width of encoded frame");
-static const arg_def_t resize_height      = ARG_DEF(NULL, "resize-height", 1,
-                                                    "Height of encoded frame");
-static const arg_def_t resize_up_thresh   = ARG_DEF(NULL, "resize-up", 1,
-                                                    "Upscale threshold (buf %)");
-static const arg_def_t resize_down_thresh = ARG_DEF(NULL, "resize-down", 1,
-                                                    "Downscale threshold (buf %)");
+static const arg_def_t dropframe_thresh = ARG_DEF(
+    NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
+static const arg_def_t resize_allowed = ARG_DEF(
+    NULL, "resize-allowed", 1, "Spatial resampling enabled (bool)");
+static const arg_def_t resize_width = ARG_DEF(
+    NULL, "resize-width", 1, "Width of encoded frame");
+static const arg_def_t resize_height = ARG_DEF(
+    NULL, "resize-height", 1, "Height of encoded frame");
+static const arg_def_t resize_up_thresh = ARG_DEF(
+    NULL, "resize-up", 1, "Upscale threshold (buf %)");
+static const arg_def_t resize_down_thresh = ARG_DEF(
+    NULL, "resize-down", 1, "Downscale threshold (buf %)");
 static const struct arg_enum_list end_usage_enum[] = {
   {"vbr", VPX_VBR},
   {"cbr", VPX_CBR},
@@ -276,24 +274,24 @@
   {"q",   VPX_Q},
   {NULL, 0}
 };
-static const arg_def_t end_usage          = ARG_DEF_ENUM(NULL, "end-usage", 1,
-                                                         "Rate control mode", end_usage_enum);
-static const arg_def_t target_bitrate     = ARG_DEF(NULL, "target-bitrate", 1,
-                                                    "Bitrate (kbps)");
-static const arg_def_t min_quantizer      = ARG_DEF(NULL, "min-q", 1,
-                                                    "Minimum (best) quantizer");
-static const arg_def_t max_quantizer      = ARG_DEF(NULL, "max-q", 1,
-                                                    "Maximum (worst) quantizer");
-static const arg_def_t undershoot_pct     = ARG_DEF(NULL, "undershoot-pct", 1,
-                                                    "Datarate undershoot (min) target (%)");
-static const arg_def_t overshoot_pct      = ARG_DEF(NULL, "overshoot-pct", 1,
-                                                    "Datarate overshoot (max) target (%)");
-static const arg_def_t buf_sz             = ARG_DEF(NULL, "buf-sz", 1,
-                                                    "Client buffer size (ms)");
-static const arg_def_t buf_initial_sz     = ARG_DEF(NULL, "buf-initial-sz", 1,
-                                                    "Client initial buffer size (ms)");
-static const arg_def_t buf_optimal_sz     = ARG_DEF(NULL, "buf-optimal-sz", 1,
-                                                    "Client optimal buffer size (ms)");
+static const arg_def_t end_usage = ARG_DEF_ENUM(
+    NULL, "end-usage", 1, "Rate control mode", end_usage_enum);
+static const arg_def_t target_bitrate = ARG_DEF(
+    NULL, "target-bitrate", 1, "Bitrate (kbps)");
+static const arg_def_t min_quantizer = ARG_DEF(
+    NULL, "min-q", 1, "Minimum (best) quantizer");
+static const arg_def_t max_quantizer = ARG_DEF(
+    NULL, "max-q", 1, "Maximum (worst) quantizer");
+static const arg_def_t undershoot_pct = ARG_DEF(
+    NULL, "undershoot-pct", 1, "Datarate undershoot (min) target (%)");
+static const arg_def_t overshoot_pct = ARG_DEF(
+    NULL, "overshoot-pct", 1, "Datarate overshoot (max) target (%)");
+static const arg_def_t buf_sz = ARG_DEF(
+    NULL, "buf-sz", 1, "Client buffer size (ms)");
+static const arg_def_t buf_initial_sz = ARG_DEF(
+    NULL, "buf-initial-sz", 1, "Client initial buffer size (ms)");
+static const arg_def_t buf_optimal_sz = ARG_DEF(
+    NULL, "buf-optimal-sz", 1, "Client optimal buffer size (ms)");
 static const arg_def_t *rc_args[] = {
   &dropframe_thresh, &resize_allowed, &resize_width, &resize_height,
   &resize_up_thresh, &resize_down_thresh, &end_usage, &target_bitrate,
@@ -302,59 +300,59 @@
 };
 
 
-static const arg_def_t bias_pct = ARG_DEF(NULL, "bias-pct", 1,
-                                          "CBR/VBR bias (0=CBR, 100=VBR)");
-static const arg_def_t minsection_pct = ARG_DEF(NULL, "minsection-pct", 1,
-                                                "GOP min bitrate (% of target)");
-static const arg_def_t maxsection_pct = ARG_DEF(NULL, "maxsection-pct", 1,
-                                                "GOP max bitrate (% of target)");
+static const arg_def_t bias_pct = ARG_DEF(
+    NULL, "bias-pct", 1, "CBR/VBR bias (0=CBR, 100=VBR)");
+static const arg_def_t minsection_pct = ARG_DEF(
+    NULL, "minsection-pct", 1, "GOP min bitrate (% of target)");
+static const arg_def_t maxsection_pct = ARG_DEF(
+    NULL, "maxsection-pct", 1, "GOP max bitrate (% of target)");
 static const arg_def_t *rc_twopass_args[] = {
   &bias_pct, &minsection_pct, &maxsection_pct, NULL
 };
 
 
-static const arg_def_t kf_min_dist = ARG_DEF(NULL, "kf-min-dist", 1,
-                                             "Minimum keyframe interval (frames)");
-static const arg_def_t kf_max_dist = ARG_DEF(NULL, "kf-max-dist", 1,
-                                             "Maximum keyframe interval (frames)");
-static const arg_def_t kf_disabled = ARG_DEF(NULL, "disable-kf", 0,
-                                             "Disable keyframe placement");
+static const arg_def_t kf_min_dist = ARG_DEF(
+    NULL, "kf-min-dist", 1, "Minimum keyframe interval (frames)");
+static const arg_def_t kf_max_dist = ARG_DEF(
+    NULL, "kf-max-dist", 1, "Maximum keyframe interval (frames)");
+static const arg_def_t kf_disabled = ARG_DEF(
+    NULL, "disable-kf", 0, "Disable keyframe placement");
 static const arg_def_t *kf_args[] = {
   &kf_min_dist, &kf_max_dist, &kf_disabled, NULL
 };
 
 
-static const arg_def_t noise_sens = ARG_DEF(NULL, "noise-sensitivity", 1,
-                                            "Noise sensitivity (frames to blur)");
-static const arg_def_t sharpness = ARG_DEF(NULL, "sharpness", 1,
-                                           "Loop filter sharpness (0..7)");
-static const arg_def_t static_thresh = ARG_DEF(NULL, "static-thresh", 1,
-                                               "Motion detection threshold");
-static const arg_def_t cpu_used = ARG_DEF(NULL, "cpu-used", 1,
-                                          "CPU Used (-16..16)");
-static const arg_def_t auto_altref = ARG_DEF(NULL, "auto-alt-ref", 1,
-                                             "Enable automatic alt reference frames");
-static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1,
-                                                "AltRef max frames (0..15)");
-static const arg_def_t arnr_strength = ARG_DEF(NULL, "arnr-strength", 1,
-                                               "AltRef filter strength (0..6)");
-static const arg_def_t arnr_type = ARG_DEF(NULL, "arnr-type", 1,
-                                           "AltRef type");
+static const arg_def_t noise_sens = ARG_DEF(
+    NULL, "noise-sensitivity", 1, "Noise sensitivity (frames to blur)");
+static const arg_def_t sharpness = ARG_DEF(
+    NULL, "sharpness", 1, "Loop filter sharpness (0..7)");
+static const arg_def_t static_thresh = ARG_DEF(
+    NULL, "static-thresh", 1, "Motion detection threshold");
+static const arg_def_t cpu_used = ARG_DEF(
+    NULL, "cpu-used", 1, "CPU Used (-16..16)");
+static const arg_def_t auto_altref = ARG_DEF(
+    NULL, "auto-alt-ref", 1, "Enable automatic alt reference frames");
+static const arg_def_t arnr_maxframes = ARG_DEF(
+    NULL, "arnr-maxframes", 1, "AltRef max frames (0..15)");
+static const arg_def_t arnr_strength = ARG_DEF(
+    NULL, "arnr-strength", 1, "AltRef filter strength (0..6)");
+static const arg_def_t arnr_type = ARG_DEF(
+    NULL, "arnr-type", 1, "AltRef type");
 static const struct arg_enum_list tuning_enum[] = {
   {"psnr", VP8_TUNE_PSNR},
   {"ssim", VP8_TUNE_SSIM},
   {NULL, 0}
 };
-static const arg_def_t tune_ssim = ARG_DEF_ENUM(NULL, "tune", 1,
-                                                "Material to favor", tuning_enum);
-static const arg_def_t cq_level = ARG_DEF(NULL, "cq-level", 1,
-                                          "Constant/Constrained Quality level");
-static const arg_def_t max_intra_rate_pct = ARG_DEF(NULL, "max-intra-rate", 1,
-                                                    "Max I-frame bitrate (pct)");
+static const arg_def_t tune_ssim = ARG_DEF_ENUM(
+    NULL, "tune", 1, "Material to favor", tuning_enum);
+static const arg_def_t cq_level = ARG_DEF(
+    NULL, "cq-level", 1, "Constant/Constrained Quality level");
+static const arg_def_t max_intra_rate_pct = ARG_DEF(
+    NULL, "max-intra-rate", 1, "Max I-frame bitrate (pct)");
 
 #if CONFIG_VP8_ENCODER
-static const arg_def_t token_parts =
-    ARG_DEF(NULL, "token-parts", 1, "Number of token partitions to use, log2");
+static const arg_def_t token_parts = ARG_DEF(
+    NULL, "token-parts", 1, "Number of token partitions to use, log2");
 static const arg_def_t *vp8_args[] = {
   &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
   &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
@@ -372,11 +370,12 @@
 #endif
 
 #if CONFIG_VP9_ENCODER
-static const arg_def_t tile_cols =
-    ARG_DEF(NULL, "tile-columns", 1, "Number of tile columns to use, log2");
-static const arg_def_t tile_rows =
-    ARG_DEF(NULL, "tile-rows", 1, "Number of tile rows to use, log2");
-static const arg_def_t lossless = ARG_DEF(NULL, "lossless", 1, "Lossless mode");
+static const arg_def_t tile_cols = ARG_DEF(
+    NULL, "tile-columns", 1, "Number of tile columns to use, log2");
+static const arg_def_t tile_rows = ARG_DEF(
+    NULL, "tile-rows", 1, "Number of tile rows to use, log2");
+static const arg_def_t lossless = ARG_DEF(
+    NULL, "lossless", 1, "Lossless mode");
 static const arg_def_t frame_parallel_decoding = ARG_DEF(
     NULL, "frame-parallel", 1, "Enable frame parallel decodability features");
 static const arg_def_t aq_mode = ARG_DEF(
@@ -395,13 +394,12 @@
   {NULL, 0}
 };
 
-static const arg_def_t bitdeptharg   = ARG_DEF_ENUM("b", "bit-depth", 1,
-                                                    "Bit depth for codec "
-                                                    "(8 for version <=1, "
-                                                    "10 or 12 for version 2)",
-                                                    bitdepth_enum);
-static const arg_def_t inbitdeptharg = ARG_DEF(NULL, "input-bit-depth", 1,
-                                               "Bit depth of input");
+static const arg_def_t bitdeptharg = ARG_DEF_ENUM(
+    "b", "bit-depth", 1,
+    "Bit depth for codec (8 for version <=1, 10 or 12 for version 2)",
+    bitdepth_enum);
+static const arg_def_t inbitdeptharg = ARG_DEF(
+    NULL, "input-bit-depth", 1, "Bit depth of input");
 #endif
 
 static const struct arg_enum_list tune_content_enum[] = {
@@ -1161,7 +1159,6 @@
             if (j == config->arg_ctrl_cnt)
               config->arg_ctrl_cnt++;
           }
-
         }
       }
       if (!match)
@@ -1917,14 +1914,15 @@
     /* If the input file doesn't specify its w/h (raw files), try to get
      * the data from the first stream's configuration.
      */
-    if (!input.width || !input.height)
-      FOREACH_STREAM( {
-      if (stream->config.cfg.g_w && stream->config.cfg.g_h) {
-        input.width = stream->config.cfg.g_w;
-        input.height = stream->config.cfg.g_h;
-        break;
-      }
-    });
+    if (!input.width || !input.height) {
+      FOREACH_STREAM({
+        if (stream->config.cfg.g_w && stream->config.cfg.g_h) {
+          input.width = stream->config.cfg.g_w;
+          input.height = stream->config.cfg.g_h;
+          break;
+        }
+      });
+    }
 
     /* Update stream configurations from the input file's parameters */
     if (!input.width || !input.height)