Merge "Moving MBGRAPH_{MB, FRAME}_STATS structs to vp9_mbgraph.h."

diff --git a/examples.mk b/examples.mk
index 98e629a..0b62df9 100644
--- a/examples.mk
+++ b/examples.mk

@@ -64,11 +64,6 @@
 vp9_spatial_scalable_encoder.SRCS += tools_common.c tools_common.h
 vp9_spatial_scalable_encoder.GUID   = 4A38598D-627D-4505-9C7B-D4020C84100D
 vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder
-UTILS-$(CONFIG_ENCODERS)    += vpx_temporal_scalable_patterns.c
-vpx_temporal_scalable_patterns.SRCS += ivfenc.c ivfenc.h
-vpx_temporal_scalable_patterns.SRCS += tools_common.c tools_common.h
-vpx_temporal_scalable_patterns.GUID   = B18C08F2-A439-4502-A78E-849BE3D60947
-vpx_temporal_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
 
 ifeq ($(CONFIG_SHARED),no)
 UTILS-$(CONFIG_VP9_ENCODER)    += resize_util.c

diff --git a/test/datarate_test.cc b/test/datarate_test.cc
index dad2a69..db7dfdb 100644
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc

@@ -200,102 +200,21 @@
     frame_number_ = 0;
     first_drop_ = 0;
     num_drops_ = 0;
-    // For testing up to 3 layers.
-    for (int i = 0; i < 3; ++i) {
-      bits_total_[i] = 0;
-    }
-  }
-
-  //
-  // Frame flags and layer id for temporal layers.
-  //
-
-  // For two layers, test pattern is:
-  //   1     3
-  // 0    2     .....
-  // For three layers, test pattern is:
-  //   1      3    5      7
-  //      2           6
-  // 0          4            ....
-  // LAST is always update on base/layer 0, GOLDEN is updated on layer 1.
-  // For this 3 layer example, the 2rd enhancement layer (layer 2) does not
-  // update any reference frames.
-  int SetFrameFlags(int frame_num, int num_temp_layers) {
-    int frame_flags = 0;
-    if (num_temp_layers == 2) {
-      if (frame_num % 2 == 0) {
-        // Layer 0: predict from L and ARF, update L.
-        frame_flags = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF |
-                      VP8_EFLAG_NO_UPD_ARF;
-      } else {
-        // Layer 1: predict from L, G and ARF, and update G.
-        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-                      VP8_EFLAG_NO_UPD_ENTROPY;
-      }
-    } else if (num_temp_layers == 3) {
-      if (frame_num % 4 == 0) {
-        // Layer 0: predict from L and ARF; update L.
-        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-                      VP8_EFLAG_NO_REF_GF;
-      } else if ((frame_num - 2) % 4 == 0) {
-        // Layer 1: predict from L, G, ARF; update G.
-        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
-      }  else if ((frame_num - 1) % 2 == 0) {
-        // Layer 2: predict from L, G, ARF; update none.
-        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-                      VP8_EFLAG_NO_UPD_LAST;
-      }
-    }
-    return frame_flags;
-  }
-
-  int SetLayerId(int frame_num, int num_temp_layers) {
-    int layer_id = 0;
-    if (num_temp_layers == 2) {
-      if (frame_num % 2 == 0) {
-        layer_id = 0;
-      } else {
-        layer_id = 1;
-      }
-    } else if (num_temp_layers == 3) {
-      if (frame_num % 4 == 0) {
-        layer_id = 0;
-      } else if ((frame_num - 2) % 4 == 0) {
-        layer_id = 1;
-      } else if ((frame_num - 1) % 2 == 0) {
-        layer_id = 2;
-      }
-    }
-    return layer_id;
+    bits_total_ = 0;
+    duration_ = 0.0;
   }
 
   virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
+                                    ::libvpx_test::Encoder *encoder) {
     if (video->frame() == 1) {
       encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
     }
-    if (cfg_.ts_number_layers > 1) {
-      if (video->frame() == 1) {
-        encoder->Control(VP9E_SET_SVC, 1);
-      }
-      vpx_svc_layer_id_t layer_id = {0, 0};
-      layer_id.spatial_layer_id = 0;
-      frame_flags_ = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
-      layer_id.temporal_layer_id = SetLayerId(video->frame(),
-                                              cfg_.ts_number_layers);
-      if (video->frame() > 0) {
-       encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
-      }
-    }
     const vpx_rational_t tb = video->timebase();
     timebase_ = static_cast<double>(tb.num) / tb.den;
     duration_ = 0;
   }
 
-
   virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-    int layer = SetLayerId(frame_number_, cfg_.ts_number_layers);
-
     // Time since last timestamp = duration.
     vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
 
@@ -308,12 +227,7 @@
         << pkt->data.frame.pts;
 
     const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
-
-    // Update the total encoded bits. For temporal layers, update the cumulative
-    // encoded bits per layer.
-    for (int i = layer; i < static_cast<int>(cfg_.ts_number_layers); ++i) {
-      bits_total_[i] += frame_size_in_bits;
-    }
+    bits_total_ += frame_size_in_bits;
 
     // If first drop not set and we have a drop set it to this time.
     if (!first_drop_ && duration > 1)
@@ -330,22 +244,19 @@
   }
 
   virtual void EndPassHook(void) {
-    for (int layer = 0; layer < static_cast<int>(cfg_.ts_number_layers);
-        layer++) {
+    if (bits_total_) {
       duration_ = (last_pts_ + 1) * timebase_;
-      if (bits_total_[layer]) {
-        // Effective file datarate:
-        effective_datarate_[layer] = (bits_total_[layer] / 1000.0) / duration_;
-      }
+      // Effective file datarate:
+      effective_datarate_ = ((bits_total_) / 1000.0) / duration_;
     }
   }
 
   vpx_codec_pts_t last_pts_;
   double timebase_;
   int frame_number_;
-  int64_t bits_total_[3];
+  int64_t bits_total_;
   double duration_;
-  double effective_datarate_[3];
+  double effective_datarate_;
   int set_cpu_used_;
   int64_t bits_in_buffer_model_;
   vpx_codec_pts_t first_drop_;
@@ -361,7 +272,6 @@
   cfg_.rc_min_quantizer = 0;
   cfg_.rc_max_quantizer = 63;
   cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
 
   ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                        30, 1, 0, 140);
@@ -369,10 +279,12 @@
     cfg_.rc_target_bitrate = i;
     ResetModel();
     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
-        << " The datarate for the file is lower than target by too much!";
-    ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
-        << " The datarate for the file is greater than target by too much!";
+    ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate),
+              effective_datarate_ * 0.85)
+        << " The datarate for the file exceeds the target by too much!";
+    ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate),
+              effective_datarate_ * 1.15)
+        << " The datarate for the file missed the target!";
   }
 }
 
@@ -397,10 +309,10 @@
     ResetModel();
     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
     ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate),
-              effective_datarate_[0] * 0.85)
+              effective_datarate_ * 0.85)
         << " The datarate for the file exceeds the target by too much!";
     ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate),
-              effective_datarate_[0] * 1.15)
+              effective_datarate_ * 1.15)
         << " The datarate for the file missed the target!"
         << cfg_.rc_target_bitrate << " "<< effective_datarate_;
   }
@@ -422,7 +334,6 @@
   cfg_.rc_max_quantizer = 50;
   cfg_.rc_end_usage = VPX_CBR;
   cfg_.rc_target_bitrate = 200;
-  cfg_.g_lag_in_frames = 0;
 
   ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                        30, 1, 0, 140);
@@ -434,10 +345,10 @@
     cfg_.rc_dropframe_thresh = i;
     ResetModel();
     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
-        << " The datarate for the file is lower than target by too much!";
-    ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
-        << " The datarate for the file is greater than target by too much!";
+    ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85)
+           << " The datarate for the file is lower than target by too much!";
+    ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15)
+           << " The datarate for the file is greater than target by too much!";
     ASSERT_LE(first_drop_, last_drop)
         << " The first dropped frame for drop_thresh " << i
         << " > first dropped frame for drop_thresh "
@@ -451,81 +362,6 @@
   }
 }
 
-// Check basic rate targeting for 2 temporal layers.
-TEST_P(DatarateTestVP9, BasicRateTargeting2TemporalLayers) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-
-  // 2 Temporal layers, no spatial layers: Framerate decimation (2, 1).
-  cfg_.ss_number_layers = 1;
-  cfg_.ts_number_layers = 2;
-  cfg_.ts_rate_decimator[0] = 2;
-  cfg_.ts_rate_decimator[1] = 1;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 200);
-  for (int i = 200; i <= 800; i += 200) {
-    cfg_.rc_target_bitrate = i;
-    ResetModel();
-    // 60-40 bitrate allocation for 2 temporal layers.
-    cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
-    cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
-      ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
-          << " The datarate for the file is lower than target by too much, "
-              "for layer: " << j;
-      ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
-          << " The datarate for the file is greater than target by too much, "
-              "for layer: " << j;
-    }
-  }
-}
-
-// Check basic rate targeting for 3 temporal layers.
-TEST_P(DatarateTestVP9, BasicRateTargeting3TemporalLayers) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-
-  // 3 Temporal layers, no spatial layers: Framerate decimation (4, 2, 1).
-  cfg_.ss_number_layers = 1;
-  cfg_.ts_number_layers = 3;
-  cfg_.ts_rate_decimator[0] = 4;
-  cfg_.ts_rate_decimator[1] = 2;
-  cfg_.ts_rate_decimator[2] = 1;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 200);
-  for (int i = 200; i <= 800; i += 200) {
-    cfg_.rc_target_bitrate = i;
-    ResetModel();
-    // 40-20-40 bitrate allocation for 3 temporal layers.
-    cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
-    cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
-    cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
-      ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
-          << " The datarate for the file is lower than target by too much, "
-              "for layer: " << j;
-      ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
-          << " The datarate for the file is greater than target by too much, "
-              "for layer: " << j;
-    }
-  }
-}
 VP8_INSTANTIATE_TEST_CASE(DatarateTest, ALL_TEST_MODES);
 VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9,
                           ::testing::Values(::libvpx_test::kOnePassGood),

diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h
index 8017a2a..4dabcd5 100644
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h

@@ -123,11 +123,6 @@
     ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
   }
 
-  void Control(int ctrl_id, struct vpx_svc_layer_id *arg) {
-    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
-    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-  }
-
   void set_deadline(unsigned long deadline) {
     deadline_ = deadline;
   }

diff --git a/test/resize_test.cc b/test/resize_test.cc
index 1963453..8d08f1e 100644
--- a/test/resize_test.cc
+++ b/test/resize_test.cc

@@ -146,16 +146,16 @@
   ResizingVideoSource video;
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 
-  for (std::vector<FrameInfo>::iterator info = frame_info_list_.begin();
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
        info != frame_info_list_.end(); ++info) {
-    const vpx_codec_pts_t pts = info->pts;
-    const unsigned int expected_w = ScaleForFrameNumber(pts, kInitialWidth);
-    const unsigned int expected_h = ScaleForFrameNumber(pts, kInitialHeight);
+    const unsigned int frame = static_cast<unsigned>(info->pts);
+    const unsigned int expected_w = ScaleForFrameNumber(frame, kInitialWidth);
+    const unsigned int expected_h = ScaleForFrameNumber(frame, kInitialHeight);
 
     EXPECT_EQ(expected_w, info->w)
-        << "Frame " << pts << "had unexpected width";
+        << "Frame " << frame << "had unexpected width";
     EXPECT_EQ(expected_h, info->h)
-        << "Frame " << pts << "had unexpected height";
+        << "Frame " << frame << "had unexpected height";
   }
 }
 
@@ -247,7 +247,7 @@
   cfg_.g_lag_in_frames = 0;
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 
-  for (std::vector<FrameInfo>::iterator info = frame_info_list_.begin();
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
        info != frame_info_list_.end(); ++info) {
     const vpx_codec_pts_t pts = info->pts;
     if (pts >= kStepDownFrame && pts < kStepUpFrame) {

diff --git a/tools/diff.py b/tools/diff.py
index a42a4dc..a96c7db 100644
--- a/tools/diff.py
+++ b/tools/diff.py

@@ -56,6 +56,9 @@
         elif line[0] == " ":
             self.left.Append(line)
             self.right.Append(line)
+        elif line[0] == "\\":
+            # Ignore newline messages from git diff.
+            pass
         else:
             assert False, ("Unrecognized character at start of diff line "
                            "%r" % line[0])

diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 19e9d27..11d244f 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c

@@ -751,9 +751,6 @@
     if (!ctx->cfg.rc_target_bitrate)
         return res;
 
-    if (!ctx->cfg.rc_target_bitrate)
-        return res;
-
     if (img)
         res = validate_img(ctx, img);
 

diff --git a/vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm b/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.asm
similarity index 100%
rename from vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm
rename to vp9/common/arm/neon/vp9_idct16x16_1_add_neon.asm


diff --git a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm b/vp9/common/arm/neon/vp9_idct16x16_add_neon.asm
similarity index 100%
rename from vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
rename to vp9/common/arm/neon/vp9_idct16x16_add_neon.asm


diff --git a/vp9/common/arm/neon/vp9_short_idct32x32_1_add_neon.asm b/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.asm
similarity index 100%
rename from vp9/common/arm/neon/vp9_short_idct32x32_1_add_neon.asm
rename to vp9/common/arm/neon/vp9_idct32x32_1_add_neon.asm


diff --git a/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm b/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm
similarity index 100%
rename from vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
rename to vp9/common/arm/neon/vp9_idct32x32_add_neon.asm


diff --git a/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm b/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.asm
similarity index 100%
rename from vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
rename to vp9/common/arm/neon/vp9_idct4x4_1_add_neon.asm


diff --git a/vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm b/vp9/common/arm/neon/vp9_idct4x4_add_neon.asm
similarity index 100%
rename from vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm
rename to vp9/common/arm/neon/vp9_idct4x4_add_neon.asm


diff --git a/vp9/common/arm/neon/vp9_short_idct8x8_1_add_neon.asm b/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.asm
similarity index 100%
rename from vp9/common/arm/neon/vp9_short_idct8x8_1_add_neon.asm
rename to vp9/common/arm/neon/vp9_idct8x8_1_add_neon.asm


diff --git a/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm b/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm
similarity index 100%
rename from vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm
rename to vp9/common/arm/neon/vp9_idct8x8_add_neon.asm


diff --git a/vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm b/vp9/common/arm/neon/vp9_iht4x4_add_neon.asm
similarity index 100%
rename from vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm
rename to vp9/common/arm/neon/vp9_iht4x4_add_neon.asm


diff --git a/vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm b/vp9/common/arm/neon/vp9_iht8x8_add_neon.asm
similarity index 100%
rename from vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm
rename to vp9/common/arm/neon/vp9_iht8x8_add_neon.asm


diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index f85aa2b..70b8ffa 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h

@@ -230,7 +230,7 @@
   /* Inverse transform function pointers. */
   void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
 
-  const interp_kernel *interp_kernel;
+  const InterpKernel *interp_kernel;
 
   int corrupted;
 

diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index 69964da..2dccb70 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h

@@ -18,6 +18,7 @@
 #include "./vpx_config.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_systemdependent.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -59,16 +60,8 @@
   return value < low ? low : (value > high ? high : value);
 }
 
-static int get_unsigned_bits(unsigned int num_values) {
-  int cat = 0;
-  if (num_values <= 1)
-    return 0;
-  num_values--;
-  while (num_values > 0) {
-    cat++;
-    num_values >>= 1;
-  }
-  return cat;
+static INLINE int get_unsigned_bits(unsigned int num_values) {
+  return num_values > 0 ? get_msb(num_values) + 1 : 0;
 }
 
 #if CONFIG_DEBUG

diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index b105a57..3807ccc 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c

@@ -20,7 +20,7 @@
 
 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
                            uint8_t *dst, ptrdiff_t dst_stride,
-                           const interp_kernel *x_filters,
+                           const InterpKernel *x_filters,
                            int x0_q4, int x_step_q4, int w, int h) {
   int x, y;
   src -= SUBPEL_TAPS / 2 - 1;
@@ -42,7 +42,7 @@
 
 static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
                                uint8_t *dst, ptrdiff_t dst_stride,
-                               const interp_kernel *x_filters,
+                               const InterpKernel *x_filters,
                                int x0_q4, int x_step_q4, int w, int h) {
   int x, y;
   src -= SUBPEL_TAPS / 2 - 1;
@@ -65,7 +65,7 @@
 
 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
                           uint8_t *dst, ptrdiff_t dst_stride,
-                          const interp_kernel *y_filters,
+                          const InterpKernel *y_filters,
                           int y0_q4, int y_step_q4, int w, int h) {
   int x, y;
   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
@@ -88,7 +88,7 @@
 
 static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
                               uint8_t *dst, ptrdiff_t dst_stride,
-                              const interp_kernel *y_filters,
+                              const InterpKernel *y_filters,
                               int y0_q4, int y_step_q4, int w, int h) {
   int x, y;
   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
@@ -112,9 +112,9 @@
 
 static void convolve(const uint8_t *src, ptrdiff_t src_stride,
                      uint8_t *dst, ptrdiff_t dst_stride,
-                     const interp_kernel *const x_filters,
+                     const InterpKernel *const x_filters,
                      int x0_q4, int x_step_q4,
-                     const interp_kernel *const y_filters,
+                     const InterpKernel *const y_filters,
                      int y0_q4, int y_step_q4,
                      int w, int h) {
   // Fixed size intermediate buffer places limits on parameters.
@@ -138,14 +138,14 @@
                 y_filters, y0_q4, y_step_q4, w, h);
 }
 
-static const interp_kernel *get_filter_base(const int16_t *filter) {
+static const InterpKernel *get_filter_base(const int16_t *filter) {
   // NOTE: This assumes that the filter table is 256-byte aligned.
   // TODO(agrange) Modify to make independent of table alignment.
-  return (const interp_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
+  return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
 }
 
-static int get_filter_offset(const int16_t *f, const interp_kernel *base) {
-  return (const interp_kernel *)(intptr_t)f - base;
+static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
+  return (const InterpKernel *)(intptr_t)f - base;
 }
 
 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
@@ -153,7 +153,7 @@
                            const int16_t *filter_x, int x_step_q4,
                            const int16_t *filter_y, int y_step_q4,
                            int w, int h) {
-  const interp_kernel *const filters_x = get_filter_base(filter_x);
+  const InterpKernel *const filters_x = get_filter_base(filter_x);
   const int x0_q4 = get_filter_offset(filter_x, filters_x);
 
   convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
@@ -165,7 +165,7 @@
                                const int16_t *filter_x, int x_step_q4,
                                const int16_t *filter_y, int y_step_q4,
                                int w, int h) {
-  const interp_kernel *const filters_x = get_filter_base(filter_x);
+  const InterpKernel *const filters_x = get_filter_base(filter_x);
   const int x0_q4 = get_filter_offset(filter_x, filters_x);
 
   convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
@@ -177,7 +177,7 @@
                           const int16_t *filter_x, int x_step_q4,
                           const int16_t *filter_y, int y_step_q4,
                           int w, int h) {
-  const interp_kernel *const filters_y = get_filter_base(filter_y);
+  const InterpKernel *const filters_y = get_filter_base(filter_y);
   const int y0_q4 = get_filter_offset(filter_y, filters_y);
   convolve_vert(src, src_stride, dst, dst_stride, filters_y,
                 y0_q4, y_step_q4, w, h);
@@ -188,7 +188,7 @@
                               const int16_t *filter_x, int x_step_q4,
                               const int16_t *filter_y, int y_step_q4,
                               int w, int h) {
-  const interp_kernel *const filters_y = get_filter_base(filter_y);
+  const InterpKernel *const filters_y = get_filter_base(filter_y);
   const int y0_q4 = get_filter_offset(filter_y, filters_y);
   convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
                     y0_q4, y_step_q4, w, h);
@@ -199,10 +199,10 @@
                      const int16_t *filter_x, int x_step_q4,
                      const int16_t *filter_y, int y_step_q4,
                      int w, int h) {
-  const interp_kernel *const filters_x = get_filter_base(filter_x);
+  const InterpKernel *const filters_x = get_filter_base(filter_x);
   const int x0_q4 = get_filter_offset(filter_x, filters_x);
 
-  const interp_kernel *const filters_y = get_filter_base(filter_y);
+  const InterpKernel *const filters_y = get_filter_base(filter_y);
   const int y0_q4 = get_filter_offset(filter_y, filters_y);
 
   convolve(src, src_stride, dst, dst_stride,

diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c
index dbde6d5..546f603 100644
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c

@@ -14,7 +14,7 @@
 
 #include "vp9/common/vp9_filter.h"
 
-DECLARE_ALIGNED(256, const interp_kernel,
+DECLARE_ALIGNED(256, const InterpKernel,
                 vp9_bilinear_filters[SUBPEL_SHIFTS]) = {
   { 0, 0, 0, 128,   0, 0, 0, 0 },
   { 0, 0, 0, 120,   8, 0, 0, 0 },
@@ -35,7 +35,7 @@
 };
 
 // Lagrangian interpolation filter
-DECLARE_ALIGNED(256, const interp_kernel,
+DECLARE_ALIGNED(256, const InterpKernel,
                 vp9_sub_pel_filters_8[SUBPEL_SHIFTS]) = {
   { 0,   0,   0, 128,   0,   0,   0,  0},
   { 0,   1,  -5, 126,   8,  -3,   1,  0},
@@ -56,7 +56,7 @@
 };
 
 // DCT based filter
-DECLARE_ALIGNED(256, const interp_kernel,
+DECLARE_ALIGNED(256, const InterpKernel,
                 vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
   {0,   0,   0, 128,   0,   0,   0, 0},
   {-1,   3,  -7, 127,   8,  -3,   1, 0},
@@ -77,7 +77,7 @@
 };
 
 // freqmultiplier = 0.5
-DECLARE_ALIGNED(256, const interp_kernel,
+DECLARE_ALIGNED(256, const InterpKernel,
                 vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
   { 0,  0,  0, 128,  0,  0,  0,  0},
   {-3, -1, 32,  64, 38,  1, -3,  0},
@@ -98,14 +98,14 @@
 };
 
 
-static const interp_kernel* vp9_filter_kernels[4] = {
+static const InterpKernel* vp9_filter_kernels[4] = {
   vp9_sub_pel_filters_8,
   vp9_sub_pel_filters_8lp,
   vp9_sub_pel_filters_8s,
   vp9_bilinear_filters
 };
 
-const interp_kernel *vp9_get_interp_kernel(INTERP_FILTER filter) {
+const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter) {
   assert(filter != SWITCHABLE);
   return vp9_filter_kernels[filter];
 }

diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h
index b611e30..15610d7 100644
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h

@@ -33,14 +33,14 @@
   SWITCHABLE = 4  /* should be the last one */
 } INTERP_FILTER;
 
-typedef int16_t interp_kernel[SUBPEL_TAPS];
+typedef int16_t InterpKernel[SUBPEL_TAPS];
 
-const interp_kernel *vp9_get_interp_kernel(INTERP_FILTER filter);
+const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter);
 
-extern const interp_kernel vp9_bilinear_filters[SUBPEL_SHIFTS];
-extern const interp_kernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS];
-extern const interp_kernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS];
-extern const interp_kernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS];
+extern const InterpKernel vp9_bilinear_filters[SUBPEL_SHIFTS];
+extern const InterpKernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS];
+extern const InterpKernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS];
+extern const InterpKernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS];
 
 // The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
 // filter kernel as a 2 tap filter.

diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h
index f3a6b92..564e419 100644
--- a/vp9/common/vp9_onyx.h
+++ b/vp9/common/vp9_onyx.h

@@ -147,12 +147,8 @@
     // END DATARATE CONTROL OPTIONS
     // ----------------------------------------------------------------
 
-    // Spatial and temporal scalability.
-    int ss_number_layers;  // Number of spatial layers.
-    unsigned int ts_number_layers;  // Number of temporal layers.
-    // Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
-    unsigned int ts_target_bitrate[VPX_TS_MAX_LAYERS];
-    unsigned int ts_rate_decimator[VPX_TS_MAX_LAYERS];
+    // Spatial scalability
+    int ss_number_layers;
 
     // these parameters aren't to be used in final build don't use!!!
     int play_alternate;

diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index d554cc0..7576e7b 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c

@@ -69,7 +69,7 @@
                             const int subpel_y,
                             const struct scale_factors *sf,
                             int w, int h, int ref,
-                            const interp_kernel *kernel,
+                            const InterpKernel *kernel,
                             int xs, int ys) {
   sf->predict[subpel_x != 0][subpel_y != 0][ref](
       src, src_stride, dst, dst_stride,
@@ -81,7 +81,7 @@
                                const MV *src_mv,
                                const struct scale_factors *sf,
                                int w, int h, int ref,
-                               const interp_kernel *kernel,
+                               const InterpKernel *kernel,
                                enum mv_precision precision,
                                int x, int y) {
   const int is_q4 = precision == MV_PRECISION_Q4;
@@ -269,21 +269,15 @@
                ? (plane == 0 ? mi->bmi[block].as_mv[ref].as_mv
                              : mi_mv_pred_q4(mi, ref))
                : mi->mbmi.mv[ref].as_mv;
-
-    // TODO(jkoleszar): This clamping is done in the incorrect place for the
-    // scaling case. It needs to be done on the scaled MV, not the pre-scaling
-    // MV. Note however that it performs the subsampling aware scaling so
-    // that the result is always q4.
-    // mv_precision precision is MV_PRECISION_Q4.
-    const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh,
-                                               pd->subsampling_x,
-                                               pd->subsampling_y);
-
     MV32 scaled_mv;
-    int xs, ys, x0, y0, x0_16, y0_16, x1, y1, frame_width,
-        frame_height, subpel_x, subpel_y, buf_stride;
+    int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, buf_stride,
+        subpel_x, subpel_y;
     uint8_t *ref_frame, *buf_ptr;
     const YV12_BUFFER_CONFIG *ref_buf = xd->block_refs[ref]->buf;
+    const MV mv_q4 = {
+      mv.row * (1 << (1 - pd->subsampling_y)),
+      mv.col * (1 << (1 - pd->subsampling_x))
+    };
 
     // Get reference frame pointer, width and height.
     if (plane == 0) {
@@ -327,10 +321,6 @@
     x0_16 += scaled_mv.col;
     y0_16 += scaled_mv.row;
 
-    // Get reference block bottom right coordinate.
-    x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
-    y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
-
     // Get reference block pointer.
     buf_ptr = ref_frame + y0 * pre_buf->stride + x0;
     buf_stride = pre_buf->stride;
@@ -339,6 +329,9 @@
     // width/height is not a multiple of 8 pixels.
     if (scaled_mv.col || scaled_mv.row ||
         (frame_width & 0x7) || (frame_height & 0x7)) {
+      // Get reference block bottom right coordinate.
+      int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
+      int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
       int x_pad = 0, y_pad = 0;
 
       if (subpel_x || (sf->x_step_q4 & SUBPEL_MASK)) {

diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index 3345d83..bf738c2 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h

@@ -35,7 +35,7 @@
                                const MV *mv_q3,
                                const struct scale_factors *sf,
                                int w, int h, int do_avg,
-                               const interp_kernel *kernel,
+                               const InterpKernel *kernel,
                                enum mv_precision precision,
                                int x, int y);
 

diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index 8a2297f..8ab5fb1 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c

@@ -23,36 +23,63 @@
   const short *filter
 );
 
-#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
-void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
-                                  uint8_t *dst, ptrdiff_t dst_stride, \
-                                  const int16_t *filter_x, int x_step_q4, \
-                                  const int16_t *filter_y, int y_step_q4, \
-                                  int w, int h) { \
+#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt1, opt2) \
+void vp9_convolve8_##name##_##opt1(const uint8_t *src, ptrdiff_t src_stride, \
+                                   uint8_t *dst, ptrdiff_t dst_stride, \
+                                   const int16_t *filter_x, int x_step_q4, \
+                                   const int16_t *filter_y, int y_step_q4, \
+                                   int w, int h) { \
   if (step_q4 == 16 && filter[3] != 128) { \
-    while (w >= 16) { \
-      vp9_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, \
-                                               dst, dst_stride, \
-                                               h, filter); \
-      src += 16; \
-      dst += 16; \
-      w -= 16; \
-    } \
-    while (w >= 8) { \
-      vp9_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, \
-                                              dst, dst_stride, \
-                                              h, filter); \
-      src += 8; \
-      dst += 8; \
-      w -= 8; \
-    } \
-    while (w >= 4) { \
-      vp9_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, \
-                                              dst, dst_stride, \
-                                              h, filter); \
-      src += 4; \
-      dst += 4; \
-      w -= 4; \
+    if (filter[0] || filter[1] || filter[2]) { \
+      while (w >= 16) { \
+        vp9_filter_block1d16_##dir##8_##avg##opt1(src_start, src_stride, \
+                                                  dst, dst_stride, \
+                                                  h, filter); \
+        src += 16; \
+        dst += 16; \
+        w -= 16; \
+      } \
+      while (w >= 8) { \
+        vp9_filter_block1d8_##dir##8_##avg##opt1(src_start, src_stride, \
+                                                 dst, dst_stride, \
+                                                 h, filter); \
+        src += 8; \
+        dst += 8; \
+        w -= 8; \
+      } \
+      while (w >= 4) { \
+        vp9_filter_block1d4_##dir##8_##avg##opt1(src_start, src_stride, \
+                                                 dst, dst_stride, \
+                                                 h, filter); \
+        src += 4; \
+        dst += 4; \
+        w -= 4; \
+      } \
+    } else { \
+      while (w >= 16) { \
+        vp9_filter_block1d16_##dir##2_##avg##opt2(src, src_stride, \
+                                                  dst, dst_stride, \
+                                                  h, filter); \
+        src += 16; \
+        dst += 16; \
+        w -= 16; \
+      } \
+      while (w >= 8) { \
+        vp9_filter_block1d8_##dir##2_##avg##opt2(src, src_stride, \
+                                                 dst, dst_stride, \
+                                                 h, filter); \
+        src += 8; \
+        dst += 8; \
+        w -= 8; \
+      } \
+      while (w >= 4) { \
+        vp9_filter_block1d4_##dir##2_##avg##opt2(src, src_stride, \
+                                                 dst, dst_stride, \
+                                                 h, filter); \
+        src += 4; \
+        dst += 4; \
+        w -= 4; \
+      } \
     } \
   } \
   if (w) { \
@@ -68,17 +95,27 @@
                               const int16_t *filter_x, int x_step_q4, \
                               const int16_t *filter_y, int y_step_q4, \
                               int w, int h) { \
-  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); \
-  \
   assert(w <= 64); \
   assert(h <= 64); \
   if (x_step_q4 == 16 && y_step_q4 == 16) { \
-    vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
-                              filter_x, x_step_q4, filter_y, y_step_q4, \
-                              w, h + 7); \
-    vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
-                                    filter_x, x_step_q4, filter_y, y_step_q4, \
-                                    w, h); \
+    if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
+        filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
+      DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); \
+      vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
+                                filter_x, x_step_q4, filter_y, y_step_q4, \
+                                w, h + 7); \
+      vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
+                                      filter_x, x_step_q4, filter_y, \
+                                      y_step_q4, w, h); \
+    } else { \
+      DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 65); \
+      vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
+                                filter_x, x_step_q4, filter_y, y_step_q4, \
+                                w, h + 1); \
+      vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
+                                      filter_x, x_step_q4, filter_y, \
+                                      y_step_q4, w, h); \
+    } \
   } else { \
     vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
                            filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
@@ -99,6 +136,19 @@
 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
 
+filter8_1dfunction vp9_filter_block1d16_v2_sse2;
+filter8_1dfunction vp9_filter_block1d16_h2_sse2;
+filter8_1dfunction vp9_filter_block1d8_v2_sse2;
+filter8_1dfunction vp9_filter_block1d8_h2_sse2;
+filter8_1dfunction vp9_filter_block1d4_v2_sse2;
+filter8_1dfunction vp9_filter_block1d4_h2_sse2;
+filter8_1dfunction vp9_filter_block1d16_v2_avg_sse2;
+filter8_1dfunction vp9_filter_block1d16_h2_avg_sse2;
+filter8_1dfunction vp9_filter_block1d8_v2_avg_sse2;
+filter8_1dfunction vp9_filter_block1d8_h2_avg_sse2;
+filter8_1dfunction vp9_filter_block1d4_v2_avg_sse2;
+filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
+
 // void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 //                                uint8_t *dst, ptrdiff_t dst_stride,
 //                                const int16_t *filter_x, int x_step_q4,
@@ -119,11 +169,11 @@
 //                                   const int16_t *filter_x, int x_step_q4,
 //                                   const int16_t *filter_y, int y_step_q4,
 //                                   int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
-FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3, sse2);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3, sse2);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3, sse2);
 FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
-            ssse3);
+            ssse3, sse2);
 
 // void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 //                          uint8_t *dst, ptrdiff_t dst_stride,
@@ -153,6 +203,19 @@
 filter8_1dfunction vp9_filter_block1d4_v8_avg_sse2;
 filter8_1dfunction vp9_filter_block1d4_h8_avg_sse2;
 
+filter8_1dfunction vp9_filter_block1d16_v2_sse2;
+filter8_1dfunction vp9_filter_block1d16_h2_sse2;
+filter8_1dfunction vp9_filter_block1d8_v2_sse2;
+filter8_1dfunction vp9_filter_block1d8_h2_sse2;
+filter8_1dfunction vp9_filter_block1d4_v2_sse2;
+filter8_1dfunction vp9_filter_block1d4_h2_sse2;
+filter8_1dfunction vp9_filter_block1d16_v2_avg_sse2;
+filter8_1dfunction vp9_filter_block1d16_h2_avg_sse2;
+filter8_1dfunction vp9_filter_block1d8_v2_avg_sse2;
+filter8_1dfunction vp9_filter_block1d8_h2_avg_sse2;
+filter8_1dfunction vp9_filter_block1d4_v2_avg_sse2;
+filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
+
 // void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
 //                               uint8_t *dst, ptrdiff_t dst_stride,
 //                               const int16_t *filter_x, int x_step_q4,
@@ -173,10 +236,11 @@
 //                                  const int16_t *filter_x, int x_step_q4,
 //                                  const int16_t *filter_y, int y_step_q4,
 //                                  int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
-FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
-FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2, sse2);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2, sse2);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2, sse2);
+FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2,
+            sse2);
 
 // void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
 //                         uint8_t *dst, ptrdiff_t dst_stride,

diff --git a/vp9/common/x86/vp9_subpixel_bilinear_sse2.asm b/vp9/common/x86/vp9_subpixel_bilinear_sse2.asm
new file mode 100644
index 0000000..d94ccf2
--- /dev/null
+++ b/vp9/common/x86/vp9_subpixel_bilinear_sse2.asm

@@ -0,0 +1,448 @@
+;
+;  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%macro GET_PARAM_4 0
+    mov         rdx, arg(5)                 ;filter ptr
+    mov         rsi, arg(0)                 ;src_ptr
+    mov         rdi, arg(2)                 ;output_ptr
+    mov         rcx, 0x0400040
+
+    movdqa      xmm3, [rdx]                 ;load filters
+    pshuflw     xmm4, xmm3, 11111111b       ;k3
+    psrldq      xmm3, 8
+    pshuflw     xmm3, xmm3, 0b              ;k4
+    punpcklqdq  xmm4, xmm3                  ;k3k4
+
+    movq        xmm3, rcx                   ;rounding
+    pshufd      xmm3, xmm3, 0
+
+    pxor        xmm2, xmm2
+
+    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
+    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
+    movsxd      rcx, DWORD PTR arg(4)       ;output_height
+%endm
+
+%macro APPLY_FILTER_4 1
+
+    punpckldq   xmm0, xmm1                  ;two row in one register
+    punpcklbw   xmm0, xmm2                  ;unpack to word
+    pmullw      xmm0, xmm4                  ;multiply the filter factors
+
+    movdqa      xmm1, xmm0
+    psrldq      xmm1, 8
+    paddsw      xmm0, xmm1
+
+    paddsw      xmm0, xmm3                  ;rounding
+    psraw       xmm0, 7                     ;shift
+    packuswb    xmm0, xmm0                  ;pack to byte
+
+%if %1
+    movd        xmm1, [rdi]
+    pavgb       xmm0, xmm1
+%endif
+
+    movd        [rdi], xmm0
+    lea         rsi, [rsi + rax]
+    lea         rdi, [rdi + rdx]
+    dec         rcx
+%endm
+
+%macro GET_PARAM 0
+    mov         rdx, arg(5)                 ;filter ptr
+    mov         rsi, arg(0)                 ;src_ptr
+    mov         rdi, arg(2)                 ;output_ptr
+    mov         rcx, 0x0400040
+
+    movdqa      xmm7, [rdx]                 ;load filters
+
+    pshuflw     xmm6, xmm7, 11111111b       ;k3
+    pshufhw     xmm7, xmm7, 0b              ;k4
+    punpcklwd   xmm6, xmm6
+    punpckhwd   xmm7, xmm7
+
+    movq        xmm4, rcx                   ;rounding
+    pshufd      xmm4, xmm4, 0
+
+    pxor        xmm5, xmm5
+
+    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
+    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
+    movsxd      rcx, DWORD PTR arg(4)       ;output_height
+%endm
+
+%macro APPLY_FILTER_8 1
+    punpcklbw   xmm0, xmm5
+    punpcklbw   xmm1, xmm5
+
+    pmullw      xmm0, xmm6
+    pmullw      xmm1, xmm7
+    paddsw      xmm0, xmm1
+    paddsw      xmm0, xmm4                  ;rounding
+    psraw       xmm0, 7                     ;shift
+    packuswb    xmm0, xmm0                  ;pack back to byte
+%if %1
+    movq        xmm1, [rdi]
+    pavgb       xmm0, xmm1
+%endif
+    movq        [rdi], xmm0                 ;store the result
+
+    lea         rsi, [rsi + rax]
+    lea         rdi, [rdi + rdx]
+    dec         rcx
+%endm
+
+%macro APPLY_FILTER_16 1
+    punpcklbw   xmm0, xmm5
+    punpcklbw   xmm1, xmm5
+    punpckhbw   xmm2, xmm5
+    punpckhbw   xmm3, xmm5
+
+    pmullw      xmm0, xmm6
+    pmullw      xmm1, xmm7
+    pmullw      xmm2, xmm6
+    pmullw      xmm3, xmm7
+
+    paddsw      xmm0, xmm1
+    paddsw      xmm2, xmm3
+
+    paddsw      xmm0, xmm4                  ;rounding
+    paddsw      xmm2, xmm4
+    psraw       xmm0, 7                     ;shift
+    psraw       xmm2, 7
+    packuswb    xmm0, xmm2                  ;pack back to byte
+%if %1
+    movdqu      xmm1, [rdi]
+    pavgb       xmm0, xmm1
+%endif
+    movdqu      [rdi], xmm0                 ;store the result
+
+    lea         rsi, [rsi + rax]
+    lea         rdi, [rdi + rdx]
+    dec         rcx
+%endm
+
+global sym(vp9_filter_block1d4_v2_sse2) PRIVATE
+sym(vp9_filter_block1d4_v2_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    GET_PARAM_4
+.loop:
+    movd        xmm0, [rsi]                 ;load src
+    movd        xmm1, [rsi + rax]
+
+    APPLY_FILTER_4 0
+    jnz         .loop
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(vp9_filter_block1d8_v2_sse2) PRIVATE
+sym(vp9_filter_block1d8_v2_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM 7
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    GET_PARAM
+.loop:
+    movq        xmm0, [rsi]                 ;0
+    movq        xmm1, [rsi + rax]           ;1
+
+    APPLY_FILTER_8 0
+    jnz         .loop
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(vp9_filter_block1d16_v2_sse2) PRIVATE
+sym(vp9_filter_block1d16_v2_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM 7
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    GET_PARAM
+.loop:
+    movdqu        xmm0, [rsi]               ;0
+    movdqu        xmm1, [rsi + rax]         ;1
+    movdqa        xmm2, xmm0
+    movdqa        xmm3, xmm1
+
+    APPLY_FILTER_16 0
+    jnz         .loop
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(vp9_filter_block1d4_v2_avg_sse2) PRIVATE
+sym(vp9_filter_block1d4_v2_avg_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    GET_PARAM_4
+.loop:
+    movd        xmm0, [rsi]                 ;load src
+    movd        xmm1, [rsi + rax]
+
+    APPLY_FILTER_4 1
+    jnz         .loop
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(vp9_filter_block1d8_v2_avg_sse2) PRIVATE
+sym(vp9_filter_block1d8_v2_avg_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM 7
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    GET_PARAM
+.loop:
+    movq        xmm0, [rsi]                 ;0
+    movq        xmm1, [rsi + rax]           ;1
+
+    APPLY_FILTER_8 1
+    jnz         .loop
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(vp9_filter_block1d16_v2_avg_sse2) PRIVATE
+sym(vp9_filter_block1d16_v2_avg_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM 7
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    GET_PARAM
+.loop:
+    movdqu        xmm0, [rsi]               ;0
+    movdqu        xmm1, [rsi + rax]         ;1
+    movdqa        xmm2, xmm0
+    movdqa        xmm3, xmm1
+
+    APPLY_FILTER_16 1
+    jnz         .loop
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(vp9_filter_block1d4_h2_sse2) PRIVATE
+sym(vp9_filter_block1d4_h2_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    GET_PARAM_4
+.loop:
+    movdqu      xmm0, [rsi]                 ;load src
+    movdqa      xmm1, xmm0
+    psrldq      xmm1, 1
+
+    APPLY_FILTER_4 0
+    jnz         .loop
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(vp9_filter_block1d8_h2_sse2) PRIVATE
+sym(vp9_filter_block1d8_h2_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM 7
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    GET_PARAM
+.loop:
+    movdqu      xmm0, [rsi]                 ;load src
+    movdqa      xmm1, xmm0
+    psrldq      xmm1, 1
+
+    APPLY_FILTER_8 0
+    jnz         .loop
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(vp9_filter_block1d16_h2_sse2) PRIVATE
+sym(vp9_filter_block1d16_h2_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM 7
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    GET_PARAM
+.loop:
+    movdqu      xmm0,   [rsi]               ;load src
+    movdqu      xmm1,   [rsi + 1]
+    movdqa      xmm2, xmm0
+    movdqa      xmm3, xmm1
+
+    APPLY_FILTER_16 0
+    jnz         .loop
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(vp9_filter_block1d4_h2_avg_sse2) PRIVATE
+sym(vp9_filter_block1d4_h2_avg_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    GET_PARAM_4
+.loop:
+    movdqu      xmm0, [rsi]                 ;load src
+    movdqa      xmm1, xmm0
+    psrldq      xmm1, 1
+
+    APPLY_FILTER_4 1
+    jnz         .loop
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(vp9_filter_block1d8_h2_avg_sse2) PRIVATE
+sym(vp9_filter_block1d8_h2_avg_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM 7
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    GET_PARAM
+.loop:
+    movdqu      xmm0, [rsi]                 ;load src
+    movdqa      xmm1, xmm0
+    psrldq      xmm1, 1
+
+    APPLY_FILTER_8 1
+    jnz         .loop
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(vp9_filter_block1d16_h2_avg_sse2) PRIVATE
+sym(vp9_filter_block1d16_h2_avg_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM 7
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    GET_PARAM
+.loop:
+    movdqu      xmm0,   [rsi]               ;load src
+    movdqu      xmm1,   [rsi + 1]
+    movdqa      xmm2, xmm0
+    movdqa      xmm3, xmm1
+
+    APPLY_FILTER_16 1
+    jnz         .loop
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret

diff --git a/vp9/decoder/vp9_dsubexp.c b/vp9/decoder/vp9_dsubexp.c
index fcca017..e67b372 100644
--- a/vp9/decoder/vp9_dsubexp.c
+++ b/vp9/decoder/vp9_dsubexp.c

@@ -19,14 +19,10 @@
   return v % 2 ? m - (v + 1) / 2 : m + v / 2;
 }
 
-static int decode_uniform(vp9_reader *r, int n) {
-  int v;
-  const int l = get_unsigned_bits(n);
-  const int m = (1 << l) - n;
-  if (!l)
-    return 0;
-
-  v = vp9_read_literal(r, l - 1);
+static int decode_uniform(vp9_reader *r) {
+  const int l = 8;
+  const int m = (1 << l) - 191;
+  const int v = vp9_read_literal(r, l - 1);
   return v < m ?  v : (v << 1) - m + vp9_read_bit(r);
 }
 
@@ -78,30 +74,19 @@
   }
 }
 
-static int decode_term_subexp(vp9_reader *r, int k, int num_syms) {
-  int i = 0, mk = 0, word;
-  while (1) {
-    const int b = i ? k + i - 1 : k;
-    const int a = 1 << b;
-    if (num_syms <= mk + 3 * a) {
-      word = decode_uniform(r, num_syms - mk) + mk;
-      break;
-    } else {
-      if (vp9_read_bit(r)) {
-        i++;
-        mk += a;
-      } else {
-        word = vp9_read_literal(r, b) + mk;
-        break;
-      }
-    }
-  }
-  return word;
+static int decode_term_subexp(vp9_reader *r) {
+  if (!vp9_read_bit(r))
+    return vp9_read_literal(r, 4);
+  if (!vp9_read_bit(r))
+    return vp9_read_literal(r, 4) + 16;
+  if (!vp9_read_bit(r))
+    return vp9_read_literal(r, 5) + 32;
+  return decode_uniform(r) + 64;
 }
 
 void vp9_diff_update_prob(vp9_reader *r, vp9_prob* p) {
   if (vp9_read(r, DIFF_UPDATE_PROB)) {
-    const int delp = decode_term_subexp(r, SUBEXP_PARAM, 255);
+    const int delp = decode_term_subexp(r);
     *p = (vp9_prob)inv_remap_prob(delp, *p);
   }
 }

diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index dc64a10..c8f334f 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c

@@ -33,11 +33,6 @@
 #include "vp9/encoder/vp9_tokenize.h"
 #include "vp9/encoder/vp9_write_bit_buffer.h"
 
-
-#if defined(SECTIONBITS_OUTPUT)
-unsigned __int64 Sectionbits[500];
-#endif
-
 #ifdef ENTROPY_STATS
 vp9_coeff_stats tree_update_hist[TX_SIZES][PLANE_TYPES];
 extern unsigned int active_section;

diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index ba10d15..7fb5a03 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c

@@ -42,12 +42,6 @@
 
 #define DBG_PRNT_SEGMAP 0
 
-
-// #define ENC_DEBUG
-#ifdef ENC_DEBUG
-int enc_debug = 0;
-#endif
-
 static INLINE uint8_t *get_sb_index(MACROBLOCK *x, BLOCK_SIZE subsize) {
   switch (subsize) {
     case BLOCK_64X64:
@@ -483,18 +477,18 @@
   if (frame_is_intra_only(cm)) {
 #if CONFIG_INTERNAL_STATS
     static const int kf_mode_index[] = {
-      THR_DC /*DC_PRED*/,
-      THR_V_PRED /*V_PRED*/,
-      THR_H_PRED /*H_PRED*/,
-      THR_D45_PRED /*D45_PRED*/,
+      THR_DC        /*DC_PRED*/,
+      THR_V_PRED    /*V_PRED*/,
+      THR_H_PRED    /*H_PRED*/,
+      THR_D45_PRED  /*D45_PRED*/,
       THR_D135_PRED /*D135_PRED*/,
       THR_D117_PRED /*D117_PRED*/,
       THR_D153_PRED /*D153_PRED*/,
       THR_D207_PRED /*D207_PRED*/,
-      THR_D63_PRED /*D63_PRED*/,
-      THR_TM /*TM_PRED*/,
+      THR_D63_PRED  /*D63_PRED*/,
+      THR_TM        /*TM_PRED*/,
     };
-    cpi->mode_chosen_counts[kf_mode_index[mi->mbmi.mode]]++;
+    cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]]++;
 #endif
   } else {
     // Note how often each mode chosen as best

diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h
index c57b01d..f0463bbd 100644
--- a/vp9/encoder/vp9_encodemv.h
+++ b/vp9/encoder/vp9_encodemv.h

@@ -20,7 +20,7 @@
 
 void vp9_entropy_mv_init();
 
-void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer* const);
+void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w);
 
 void vp9_encode_mv(VP9_COMP *cpi, vp9_writer* w, const MV* mv, const MV* ref,
                    const nmv_context* mvctx, int usehp);

diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 1530464..3c38c36 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c

@@ -2336,8 +2336,26 @@
     cpi->rc.frames_to_key = cpi->key_frame_frequency;
     cpi->rc.kf_boost = KEY_FRAME_BOOST;
     cpi->rc.source_alt_ref_active = 0;
+    cpi->rc.per_frame_bandwidth = cpi->rc.av_per_frame_bandwidth * 8;
+    if (cm->current_video_frame == 0) {
+      cpi->rc.active_worst_quality = cpi->rc.worst_quality;
+    } else {
+      // Choose active worst quality twice as large as the last q.
+      cpi->rc.active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
+      if (cpi->rc.active_worst_quality > cpi->rc.worst_quality)
+        cpi->rc.active_worst_quality = cpi->rc.worst_quality;
+    }
   } else {
     cm->frame_type = INTER_FRAME;
+    cpi->rc.per_frame_bandwidth = cpi->rc.av_per_frame_bandwidth;
+    if (cm->current_video_frame == 1) {
+      cpi->rc.active_worst_quality = cpi->rc.worst_quality;
+    } else {
+      // Choose active worst quality twice as large as the last q.
+      cpi->rc.active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2;
+      if (cpi->rc.active_worst_quality > cpi->rc.worst_quality)
+        cpi->rc.active_worst_quality = cpi->rc.worst_quality;
+    }
   }
   if (cpi->rc.frames_till_gf_update_due == 0) {
     cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL;
@@ -2347,10 +2365,90 @@
       cpi->rc.frames_till_gf_update_due = cpi->rc.frames_to_key;
     cpi->refresh_golden_frame = 1;
     cpi->rc.source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
-    cpi->rc.gfu_boost = 1000;
+    cpi->rc.gfu_boost = 2000;
   }
 }
 
+// Adjust active_worst_quality level based on buffer level.
+static int calc_active_worst_quality_from_buffer_level(const VP9_COMP *cpi) {
+  // Adjust active_worst_quality: If buffer is above the optimal/target level,
+  // bring active_worst_quality down depending on fullness of buffer.
+  // If buffer is below the optimal level, let the active_worst_quality go from
+  // ambient Q (at buffer = optimal level) to worst_quality level
+  // (at buffer = critical level).
+  const VP9_CONFIG *oxcf = &cpi->oxcf;
+  const RATE_CONTROL *rc = &cpi->rc;
+  int active_worst_quality = rc->active_worst_quality;
+  // Maximum limit for down adjustment, ~20%.
+  int max_adjustment_down = active_worst_quality / 5;
+  // Buffer level below which we push active_worst to worst_quality.
+  int critical_level = oxcf->optimal_buffer_level >> 2;
+  int adjustment = 0;
+  int buff_lvl_step = 0;
+  if (rc->buffer_level > oxcf->optimal_buffer_level) {
+    // Adjust down.
+    if (max_adjustment_down) {
+      buff_lvl_step = (int)((oxcf->maximum_buffer_size -
+          oxcf->optimal_buffer_level) / max_adjustment_down);
+      if (buff_lvl_step)
+        adjustment = (int)((rc->buffer_level - oxcf->optimal_buffer_level) /
+                            buff_lvl_step);
+      active_worst_quality -= adjustment;
+    }
+  } else if (rc->buffer_level > critical_level) {
+    // Adjust up from ambient Q.
+    if (critical_level) {
+      buff_lvl_step = (oxcf->optimal_buffer_level - critical_level);
+      if (buff_lvl_step) {
+        adjustment = (rc->worst_quality - rc->avg_frame_qindex[INTER_FRAME]) *
+                         (oxcf->optimal_buffer_level - rc->buffer_level) /
+                             buff_lvl_step;
+      }
+      active_worst_quality = rc->avg_frame_qindex[INTER_FRAME] + adjustment;
+    }
+  } else {
+    // Set to worst_quality if buffer is below critical level.
+    active_worst_quality = rc->worst_quality;
+  }
+  return active_worst_quality;
+}
+
+static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
+  const VP9_CONFIG *oxcf = &cpi->oxcf;
+  const RATE_CONTROL *rc = &cpi->rc;
+  int target = rc->av_per_frame_bandwidth;
+  const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level;
+  const int one_pct_bits = 1 + oxcf->optimal_buffer_level / 100;
+  if (diff > 0) {
+    // Lower the target bandwidth for this frame.
+    const int pct_low = MIN(diff / one_pct_bits, oxcf->under_shoot_pct);
+    target -= (target * pct_low) / 200;
+  } else if (diff < 0) {
+    // Increase the target bandwidth for this frame.
+    const int pct_high = MIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
+    target += (target * pct_high) / 200;
+  }
+  return target;
+}
+
+static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
+  int per_frame_bandwidth;
+  const RATE_CONTROL *rc = &cpi->rc;
+  if (cpi->common.current_video_frame == 0) {
+    per_frame_bandwidth = cpi->oxcf.starting_buffer_level / 2;
+  } else {
+    int initial_boost = 32;
+    int kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
+    if (rc->frames_since_key < cpi->output_framerate / 2) {
+      kf_boost = (int)(kf_boost * rc->frames_since_key /
+                       (cpi->output_framerate / 2));
+    }
+    per_frame_bandwidth =
+        ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4;
+  }
+  return per_frame_bandwidth;
+}
+
 void vp9_get_one_pass_cbr_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   if ((cm->current_video_frame == 0 ||
@@ -2363,8 +2461,13 @@
     cpi->rc.frames_to_key = cpi->key_frame_frequency;
     cpi->rc.kf_boost = KEY_FRAME_BOOST;
     cpi->rc.source_alt_ref_active = 0;
+    cpi->rc.per_frame_bandwidth = calc_iframe_target_size_one_pass_cbr(cpi);
+    cpi->rc.active_worst_quality = cpi->rc.worst_quality;
   } else {
     cm->frame_type = INTER_FRAME;
+    cpi->rc.per_frame_bandwidth = calc_pframe_target_size_one_pass_cbr(cpi);
+    cpi->rc.active_worst_quality =
+        calc_active_worst_quality_from_buffer_level(cpi);
   }
   // Don't use gf_update by default in CBR mode.
   cpi->rc.frames_till_gf_update_due = INT_MAX;

diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index ec9934a..36591bd 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c

@@ -466,7 +466,6 @@
 #undef PRE
 #undef DIST
 #undef CHECK_BETTER
-#undef SP
 
 static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
                                int range) {
@@ -496,11 +495,6 @@
     }\
   }
 
-#define get_next_chkpts(list, i, n)   \
-    list[0] = ((i) == 0 ? (n) - 1 : (i) - 1);  \
-    list[1] = (i);                             \
-    list[2] = ((i) == (n) - 1 ? 0 : (i) + 1);
-
 #define MAX_PATTERN_SCALES         11
 #define MAX_PATTERN_CANDIDATES      8  // max number of canddiates per scale
 #define PATTERN_CANDIDATES_REF      3  // number of refinement candidates
@@ -644,7 +638,10 @@
       do {
         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
         best_site = -1;
-        get_next_chkpts(next_chkpts_indices, k, num_candidates[s]);
+        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
+        next_chkpts_indices[1] = k;
+        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
+
         if (check_bounds(x, br, bc, 1 << s)) {
           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
             this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;

diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 85f11ce..6e801c0 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c

@@ -98,10 +98,6 @@
                                     0, 0, 0};
 #endif
 
-#if defined(SECTIONBITS_OUTPUT)
-extern unsigned __int64 Sectionbits[500];
-#endif
-
 extern void vp9_init_quantizer(VP9_COMP *cpi);
 
 static const double in_frame_q_adj_ratio[MAX_SEGMENTS] =
@@ -1158,107 +1154,6 @@
   return (llval * llnum / llden);
 }
 
-// Initialize layer content data from init_config().
-static void init_layer_context(VP9_COMP *const cpi) {
-  int temporal_layer = 0;
-  cpi->svc.spatial_layer_id = 0;
-  cpi->svc.temporal_layer_id = 0;
-  for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
-      ++temporal_layer) {
-    LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
-    lc->rc.active_worst_quality = q_trans[cpi->oxcf.worst_allowed_q];
-    lc->rc.avg_frame_qindex[INTER_FRAME] = q_trans[cpi->oxcf.worst_allowed_q];
-    lc->rc.last_q[INTER_FRAME] = q_trans[cpi->oxcf.worst_allowed_q];
-    lc->rc.ni_av_qi = lc->rc.active_worst_quality;
-    lc->rc.total_actual_bits = 0;
-    lc->rc.total_target_vs_actual = 0;
-    lc->rc.ni_tot_qi = 0;
-    lc->rc.tot_q = 0.0;
-    lc->rc.ni_frames = 0;
-    lc->rc.rate_correction_factor = 1.0;
-    lc->rc.key_frame_rate_correction_factor = 1.0;
-    lc->target_bandwidth = cpi->oxcf.ts_target_bitrate[temporal_layer] *
-        1000;
-    lc->rc.buffer_level = rescale((int)(cpi->oxcf.starting_buffer_level),
-                                  lc->target_bandwidth, 1000);
-    lc->rc.bits_off_target = lc->rc.buffer_level;
-  }
-}
-
-// Update the layer context from a change_config() call.
-static void update_layer_context_change_config(VP9_COMP *const cpi,
-                                               const int target_bandwidth) {
-  int temporal_layer = 0;
-  float bitrate_alloc = 1.0;
-  for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
-      ++temporal_layer) {
-    LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
-    lc->target_bandwidth = cpi->oxcf.ts_target_bitrate[temporal_layer] * 1000;
-    bitrate_alloc = (float)lc->target_bandwidth / (float)target_bandwidth;
-    // Update buffer-related quantities.
-    lc->starting_buffer_level = cpi->oxcf.starting_buffer_level * bitrate_alloc;
-    lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level * bitrate_alloc;
-    lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size * bitrate_alloc;
-    lc->rc.bits_off_target = MIN(lc->rc.bits_off_target,
-                                 lc->maximum_buffer_size);
-    lc->rc.buffer_level = MIN(lc->rc.buffer_level, lc->maximum_buffer_size);
-    // Update framerate-related quantities.
-    lc->framerate = cpi->oxcf.framerate /
-        cpi->oxcf.ts_rate_decimator[temporal_layer];
-    lc->rc.av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
-    lc->rc.per_frame_bandwidth = lc->rc.av_per_frame_bandwidth;
-    lc->rc.max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
-    // Update qp-related quantities.
-    lc->rc.worst_quality = cpi->rc.worst_quality;
-    lc->rc.best_quality = cpi->rc.best_quality;
-    lc->rc.active_worst_quality = cpi->rc.active_worst_quality;
-  }
-}
-
-// Prior to encoding the frame, update framerate-related quantities
-// for the current layer.
-static void update_layer_framerate(VP9_COMP *const cpi) {
-  int temporal_layer = cpi->svc.temporal_layer_id;
-  LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
-  lc->framerate = cpi->oxcf.framerate /
-      cpi->oxcf.ts_rate_decimator[temporal_layer];
-  lc->rc.av_per_frame_bandwidth = (int)(lc->target_bandwidth /
-      lc->framerate);
-  lc->rc.per_frame_bandwidth = lc->rc.av_per_frame_bandwidth;
-  lc->rc.max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
-}
-
-// Prior to encoding the frame, set the layer context, for the current layer
-// to be encoded, to the cpi struct.
-static void restore_layer_context(VP9_COMP *const cpi) {
-  int temporal_layer = cpi->svc.temporal_layer_id;
-  LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
-  int frame_since_key = cpi->rc.frames_since_key;
-  int frame_to_key = cpi->rc.frames_to_key;
-  memcpy(&cpi->rc, &lc->rc, sizeof(RATE_CONTROL));
-  cpi->target_bandwidth = lc->target_bandwidth;
-  cpi->oxcf.starting_buffer_level = lc->starting_buffer_level;
-  cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level;
-  cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size;
-  cpi->output_framerate = lc->framerate;
-  // Reset the frames_since_key and frames_to_key counters to their values
-  // before the layer restore. Keep these defined for the stream (not layer).
-  cpi->rc.frames_since_key = frame_since_key;
-  cpi->rc.frames_to_key = frame_to_key;
-}
-
-// Save the layer context after encoding the frame.
-static void save_layer_context(VP9_COMP *const cpi) {
-  int temporal_layer = cpi->svc.temporal_layer_id;
-  LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
-  memcpy(&lc->rc, &cpi->rc, sizeof(RATE_CONTROL));
-  lc->target_bandwidth = cpi->target_bandwidth;
-  lc->starting_buffer_level = cpi->oxcf.starting_buffer_level;
-  lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level;
-  lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size;
-  lc->framerate = cpi->output_framerate;
-}
-
 static void set_tile_limits(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
 
@@ -1285,16 +1180,6 @@
   cm->subsampling_y = 0;
   vp9_alloc_compressor_data(cpi);
 
-  // Spatial scalability.
-  cpi->svc.number_spatial_layers = oxcf->ss_number_layers;
-  // Temporal scalability.
-  cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
-
-  if (cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
-    init_layer_context(cpi);
-  }
-
   // change includes all joint functionality
   vp9_change_config(ptr, oxcf);
 
@@ -1335,6 +1220,9 @@
   cpi->gld_fb_idx = 1;
   cpi->alt_fb_idx = 2;
 
+  cpi->current_layer = 0;
+  cpi->use_svc = 0;
+
   set_tile_limits(cpi);
 
   cpi->fixed_divide[0] = 0;
@@ -1342,6 +1230,7 @@
     cpi->fixed_divide[i] = 0x80000 / i;
 }
 
+
 void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
   VP9_COMP *cpi = (VP9_COMP *)(ptr);
   VP9_COMMON *const cm = &cpi->common;
@@ -1433,10 +1322,10 @@
                                             cpi->oxcf.target_bandwidth, 1000);
   // Under a configuration change, where maximum_buffer_size may change,
   // keep buffer level clipped to the maximum allowed buffer size.
-  cpi->rc.bits_off_target = MIN(cpi->rc.bits_off_target,
-                                cpi->oxcf.maximum_buffer_size);
-  cpi->rc.buffer_level = MIN(cpi->rc.buffer_level,
-                             cpi->oxcf.maximum_buffer_size);
+  if (cpi->rc.bits_off_target > cpi->oxcf.maximum_buffer_size) {
+    cpi->rc.bits_off_target = cpi->oxcf.maximum_buffer_size;
+    cpi->rc.buffer_level = cpi->rc.bits_off_target;
+  }
 
   // Set up frame rate and related parameters rate control values.
   vp9_new_framerate(cpi, cpi->oxcf.framerate);
@@ -1473,11 +1362,6 @@
   }
   update_frame_size(cpi);
 
-  if (cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
-    update_layer_context_change_config(cpi, cpi->oxcf.target_bandwidth);
-  }
-
   cpi->speed = cpi->oxcf.cpu_used;
 
   if (cpi->oxcf.lag_in_frames == 0) {
@@ -1701,8 +1585,6 @@
 
   vp9_create_common(cm);
 
-  cpi->use_svc = 0;
-
   init_config((VP9_PTR)cpi, oxcf);
 
   init_pick_mode_context(cpi);
@@ -1718,6 +1600,9 @@
   cpi->alt_is_last  = 0;
   cpi->gold_is_alt  = 0;
 
+  // Spatial scalability
+  cpi->number_spatial_layers = oxcf->ss_number_layers;
+
   // Create the encoder segmentation map and set all entries to 0
   CHECK_MEM_ERROR(cm, cpi->segmentation_map,
                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
@@ -2078,21 +1963,6 @@
     }
 #endif
 
-#if defined(SECTIONBITS_OUTPUT)
-
-    if (0) {
-      int i;
-      FILE *f = fopen("tokenbits.stt", "a");
-
-      for (i = 0; i < 28; i++)
-        fprintf(f, "%8d", (int)(Sectionbits[i] / 256));
-
-      fprintf(f, "\n");
-      fclose(f);
-    }
-
-#endif
-
 #if 0
     {
       printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000);
@@ -3669,12 +3539,6 @@
     adjust_frame_rate(cpi);
   }
 
-  if (cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
-    update_layer_framerate(cpi);
-    restore_layer_context(cpi);
-  }
-
   // start with a 0 size frame
   *size = 0;
 
@@ -3750,12 +3614,6 @@
     cpi->droppable = !frame_is_reference(cpi);
   }
 
-  // Save layer specific state.
-  if (cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
-    save_layer_context(cpi);
-  }
-
   vpx_usec_timer_mark(&cmptimer);
   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
 

diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 26a7b0a..a98c6bf 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h

@@ -408,15 +408,6 @@
   int super_fast_rtc;
 } SPEED_FEATURES;
 
-typedef struct {
-  RATE_CONTROL rc;
-  int target_bandwidth;
-  int64_t starting_buffer_level;
-  int64_t optimal_buffer_level;
-  int64_t maximum_buffer_size;
-  double framerate;
-} LAYER_CONTEXT;
-
 typedef struct VP9_COMP {
   DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
   DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
@@ -461,6 +452,9 @@
   int gld_fb_idx;
   int alt_fb_idx;
 
+  int current_layer;
+  int use_svc;
+
 #if CONFIG_MULTIPLE_ARF
   int alt_ref_fb_idx[REF_FRAMES - 3];
 #endif
@@ -677,18 +671,7 @@
   int initial_width;
   int initial_height;
 
-  int use_svc;
-
-  struct svc {
-    int spatial_layer_id;
-    int temporal_layer_id;
-    int number_spatial_layers;
-    int number_temporal_layers;
-    // Layer context used for rate control in CBR mode, only defined for
-    // temporal layers for now.
-    LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS];
-  } svc;
-
+  int number_spatial_layers;
   int enable_encode_breakout;   // Default value is 1. From first pass stats,
                                 // encode_breakout may be disabled.
 

diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 8c41724..a2eea1c 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c

@@ -18,10 +18,6 @@
 
 #include "vp9/common/vp9_seg_common.h"
 
-#ifdef ENC_DEBUG
-extern int enc_debug;
-#endif
-
 void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
                       int skip_block,
                       const int16_t *zbin_ptr, const int16_t *round_ptr,

diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 8c7463b..f8cfe49 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c

@@ -213,37 +213,10 @@
 static void calc_iframe_target_size(VP9_COMP *cpi) {
   const VP9_CONFIG *oxcf = &cpi->oxcf;
   RATE_CONTROL *const rc = &cpi->rc;
-  int target;
+  int target = rc->per_frame_bandwidth;
 
   vp9_clear_system_state();  // __asm emms;
 
-  // For 1-pass.
-  if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
-    if (cpi->common.current_video_frame == 0) {
-      target = oxcf->starting_buffer_level / 2;
-    } else {
-      // TODO(marpan): Add in adjustment based on Q.
-      // If this keyframe was forced, use a more recent Q estimate.
-      // int Q = (cpi->common.frame_flags & FRAMEFLAGS_KEY) ?
-      //    cpi->rc.avg_frame_qindex : cpi->rc.ni_av_qi;
-      int initial_boost = 32;
-      // Boost depends somewhat on frame rate.
-      int kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
-      // Adjustment up based on q: need to fix.
-      // kf_boost = kf_boost * kfboost_qadjust(Q) / 100;
-      // Frame separation adjustment (down).
-      if (rc->frames_since_key  < cpi->output_framerate / 2) {
-        kf_boost = (int)(kf_boost * rc->frames_since_key /
-                       (cpi->output_framerate / 2));
-      }
-      kf_boost = (kf_boost < 16) ? 16 : kf_boost;
-      target = ((16 + kf_boost) * rc->per_frame_bandwidth) >> 4;
-    }
-    rc->active_worst_quality = rc->worst_quality;
-  } else {
-    target = rc->per_frame_bandwidth;
-  }
-
   if (oxcf->rc_max_intra_bitrate_pct) {
     const int max_rate = rc->per_frame_bandwidth *
         oxcf->rc_max_intra_bitrate_pct / 100;
@@ -252,28 +225,8 @@
   rc->this_frame_target = target;
 }
 
-
-// Update the buffer level for higher layers, given the encoded current layer.
-static void update_layer_buffer_level(VP9_COMP *const cpi,
-                                      int encoded_frame_size) {
-  int temporal_layer = 0;
-  int current_temporal_layer = cpi->svc.temporal_layer_id;
-  for (temporal_layer = current_temporal_layer + 1;
-      temporal_layer < cpi->svc.number_temporal_layers; ++temporal_layer) {
-    LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
-    int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate -
-        encoded_frame_size);
-    lc->rc.bits_off_target += bits_off_for_this_layer;
-
-    // Clip buffer level to maximum buffer size for the layer.
-    lc->rc.bits_off_target = MIN(lc->rc.bits_off_target,
-                                 lc->maximum_buffer_size);
-    lc->rc.buffer_level = lc->rc.bits_off_target;
-  }
-}
-
 // Update the buffer level: leaky bucket model.
-void vp9_update_buffer_level(VP9_COMP *const cpi, int encoded_frame_size) {
+void vp9_update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
   const VP9_COMMON *const cm = &cpi->common;
   const VP9_CONFIG *oxcf = &cpi->oxcf;
   RATE_CONTROL *const rc = &cpi->rc;
@@ -286,18 +239,14 @@
   }
 
   // Clip the buffer level to the maximum specified buffer size.
-  rc->bits_off_target = MIN(rc->bits_off_target, oxcf->maximum_buffer_size);
-  rc->buffer_level = rc->bits_off_target;
-
-  if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
-    update_layer_buffer_level(cpi, encoded_frame_size);
-  }
+  rc->buffer_level = MIN(rc->bits_off_target, oxcf->maximum_buffer_size);
 }
 
-int vp9_drop_frame(VP9_COMP *const cpi) {
+int vp9_drop_frame(VP9_COMP *cpi) {
   const VP9_CONFIG *oxcf = &cpi->oxcf;
   RATE_CONTROL *const rc = &cpi->rc;
 
+
   if (!oxcf->drop_frames_water_mark) {
     return 0;
   } else {
@@ -308,7 +257,7 @@
       // If buffer is below drop_mark, for now just drop every other frame
       // (starting with the next frame) until it increases back over drop_mark.
       int drop_mark = (int)(oxcf->drop_frames_water_mark *
-          oxcf->optimal_buffer_level / 100);
+                                oxcf->optimal_buffer_level / 100);
       if ((rc->buffer_level > drop_mark) &&
           (rc->decimation_factor > 0)) {
         --rc->decimation_factor;
@@ -332,69 +281,6 @@
   }
 }
 
-// Adjust active_worst_quality level based on buffer level.
-static int adjust_active_worst_quality_from_buffer_level(const VP9_CONFIG *oxcf,
-    const RATE_CONTROL *rc) {
-  // Adjust active_worst_quality: If buffer is above the optimal/target level,
-  // bring active_worst_quality down depending on fullness over buffer.
-  // If buffer is below the optimal level, let the active_worst_quality go from
-  // ambient Q (at buffer = optimal level) to worst_quality level
-  // (at buffer = critical level).
-
-  int active_worst_quality = rc->active_worst_quality;
-  // Maximum limit for down adjustment, ~20%.
-  int max_adjustment_down = active_worst_quality / 5;
-  // Buffer level below which we push active_worst to worst_quality.
-  int critical_level = oxcf->optimal_buffer_level >> 2;
-  int adjustment = 0;
-  int buff_lvl_step = 0;
-  if (rc->buffer_level > oxcf->optimal_buffer_level) {
-    // Adjust down.
-    if (max_adjustment_down) {
-      buff_lvl_step = (int)((oxcf->maximum_buffer_size -
-          oxcf->optimal_buffer_level) / max_adjustment_down);
-      if (buff_lvl_step)
-        adjustment = (int)((rc->buffer_level - oxcf->optimal_buffer_level) /
-                            buff_lvl_step);
-      active_worst_quality -= adjustment;
-    }
-  } else if (rc->buffer_level > critical_level) {
-    // Adjust up from ambient Q.
-    if (critical_level) {
-      buff_lvl_step = (oxcf->optimal_buffer_level - critical_level);
-      if (buff_lvl_step) {
-        adjustment = (rc->worst_quality - rc->avg_frame_qindex[INTER_FRAME]) *
-                         (oxcf->optimal_buffer_level - rc->buffer_level) /
-                             buff_lvl_step;
-      }
-      active_worst_quality = rc->avg_frame_qindex[INTER_FRAME] + adjustment;
-    }
-  } else {
-    // Set to worst_quality if buffer is below critical level.
-    active_worst_quality = rc->worst_quality;
-  }
-  return active_worst_quality;
-}
-
-// Adjust target frame size with respect to the buffering constraints:
-static int target_size_from_buffer_level(const VP9_CONFIG *oxcf,
-                                         const RATE_CONTROL *rc) {
-  int target = rc->this_frame_target;
-  const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level;
-  const int one_pct_bits = 1 + oxcf->optimal_buffer_level / 100;
-
-  if (diff > 0) {
-    // Lower the target bandwidth for this frame.
-    const int pct_low = MIN(diff / one_pct_bits, oxcf->under_shoot_pct);
-    target -= (target * pct_low) / 200;
-  } else  if (diff < 0) {
-    // Increase the target bandwidth for this frame.
-    const int pct_high = MIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
-    target += (target * pct_high) / 200;
-  }
-  return target;
-}
-
 static void calc_pframe_target_size(VP9_COMP *const cpi) {
   RATE_CONTROL *const rc = &cpi->rc;
   const VP9_CONFIG *const oxcf = &cpi->oxcf;
@@ -406,11 +292,6 @@
     // For now, use: cpi->rc.av_per_frame_bandwidth / 16:
     min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
                            FRAME_OVERHEAD_BITS);
-    rc->this_frame_target = target_size_from_buffer_level(oxcf, rc);
-    // Adjust qp-max based on buffer level.
-    rc->active_worst_quality =
-        adjust_active_worst_quality_from_buffer_level(oxcf, rc);
-
     if (rc->this_frame_target < min_frame_target)
       rc->this_frame_target = min_frame_target;
     return;
@@ -451,8 +332,7 @@
   if (cpi->common.frame_type == KEY_FRAME) {
     return cpi->rc.key_frame_rate_correction_factor;
   } else {
-    if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
-        !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
+    if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
       return cpi->rc.gf_rate_correction_factor;
     else
       return cpi->rc.rate_correction_factor;
@@ -463,8 +343,7 @@
   if (cpi->common.frame_type == KEY_FRAME) {
     cpi->rc.key_frame_rate_correction_factor = factor;
   } else {
-    if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
-        !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
+    if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
       cpi->rc.gf_rate_correction_factor = factor;
     else
       cpi->rc.rate_correction_factor = factor;
@@ -488,7 +367,6 @@
   projected_size_based_on_q = estimate_bits_at_q(cpi->common.frame_type, q,
                                                  cpi->common.MBs,
                                                  rate_correction_factor);
-
   // Work out a size correction factor.
   if (projected_size_based_on_q > 0)
     correction_factor = (100 * cpi->rc.projected_frame_size) /
@@ -587,8 +465,9 @@
   }
 }
 
-int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
-                                      int *bottom_index, int *top_index) {
+static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
+                                                  int *bottom_index,
+                                                  int *top_index) {
   const VP9_COMMON *const cm = &cpi->common;
   const RATE_CONTROL *const rc = &cpi->rc;
   const VP9_CONFIG *const oxcf = &cpi->oxcf;
@@ -608,7 +487,7 @@
       int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q,
                                             (last_boosted_q * 0.75));
       active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
-    } else if (!(cpi->pass == 0 && cm->current_video_frame == 0)) {
+    } else if (cm->current_video_frame > 0) {
       // not first frame of one pass and kf_boost is set
       double q_adj_factor = 1.0;
       double q_val;
@@ -625,9 +504,6 @@
         q_adj_factor -= 0.25;
       }
 
-      // Make a further adjustment based on the kf zero motion measure.
-      q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct);
-
       // Convert the adjustment factor to a qindex delta
       // on active_best_quality.
       q_val = vp9_convert_qindex_to_q(active_best_quality);
@@ -643,7 +519,6 @@
 #endif
   } else if (!rc->is_src_frame_alt_ref &&
              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
-
     // Use the lower of active_worst_quality and recent
     // average Q as basis for GF/ARF best Q limit unless last frame was
     // a key frame.
@@ -694,14 +569,7 @@
     if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
       active_best_quality = cpi->cq_target_quality;
     } else {
-      if (cpi->pass == 0 &&
-          rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
-        // 1-pass: for now, use the average Q for the active_best, if its lower
-        // than active_worst.
-        active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
-      else
-        active_best_quality = inter_minq[active_worst_quality];
-
+      active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
       // For the constrained quality mode we don't want
       // q to fall below the cq level.
       if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) &&
@@ -736,7 +604,7 @@
 #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
   // Limit Q range for the adaptive loop.
   if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
-    if (!(cpi->pass == 0 && cm->current_video_frame == 0))
+    if (!(cm->current_video_frame == 0))
       *top_index = (active_worst_quality + active_best_quality * 3) / 4;
   } else if (!rc->is_src_frame_alt_ref &&
              (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) &&
@@ -786,6 +654,208 @@
   return q;
 }
 
+static int rc_pick_q_and_adjust_q_bounds_two_pass(const VP9_COMP *cpi,
+                                                  int *bottom_index,
+                                                  int *top_index) {
+  const VP9_COMMON *const cm = &cpi->common;
+  const RATE_CONTROL *const rc = &cpi->rc;
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  int active_best_quality;
+  int active_worst_quality = rc->active_worst_quality;
+  int q;
+
+  if (frame_is_intra_only(cm)) {
+    active_best_quality = rc->best_quality;
+#if !CONFIG_MULTIPLE_ARF
+    // Handle the special case for key frames forced when we have75 reached
+    // the maximum key frame interval. Here force the Q to a range
+    // based on the ambient Q to reduce the risk of popping.
+    if (rc->this_key_frame_forced) {
+      int qindex = rc->last_boosted_qindex;
+      double last_boosted_q = vp9_convert_qindex_to_q(qindex);
+      int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q,
+                                            (last_boosted_q * 0.75));
+      active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
+    } else {
+      // Not forced keyframe.
+      double q_adj_factor = 1.0;
+      double q_val;
+      // Baseline value derived from cpi->active_worst_quality and kf boost.
+      active_best_quality = get_active_quality(active_worst_quality,
+                                               rc->kf_boost,
+                                               kf_low, kf_high,
+                                               kf_low_motion_minq,
+                                               kf_high_motion_minq);
+
+      // Allow somewhat lower kf minq with small image formats.
+      if ((cm->width * cm->height) <= (352 * 288)) {
+        q_adj_factor -= 0.25;
+      }
+
+      // Make a further adjustment based on the kf zero motion measure.
+      q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct);
+
+      // Convert the adjustment factor to a qindex delta
+      // on active_best_quality.
+      q_val = vp9_convert_qindex_to_q(active_best_quality);
+      active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val *
+                                                   q_adj_factor);
+    }
+#else
+    double current_q;
+    // Force the KF quantizer to be 30% of the active_worst_quality.
+    current_q = vp9_convert_qindex_to_q(active_worst_quality);
+    active_best_quality = active_worst_quality
+        + vp9_compute_qdelta(cpi, current_q, current_q * 0.3);
+#endif
+  } else if (!rc->is_src_frame_alt_ref &&
+             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+    // Use the lower of active_worst_quality and recent
+    // average Q as basis for GF/ARF best Q limit unless last frame was
+    // a key frame.
+    if (rc->frames_since_key > 1 &&
+        rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
+      q = rc->avg_frame_qindex[INTER_FRAME];
+    } else {
+      q = active_worst_quality;
+    }
+    // For constrained quality dont allow Q less than the cq level
+    if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) {
+      if (q < cpi->cq_target_quality)
+        q = cpi->cq_target_quality;
+      if (rc->frames_since_key > 1) {
+        active_best_quality = get_active_quality(q, rc->gfu_boost,
+                                                 gf_low, gf_high,
+                                                 afq_low_motion_minq,
+                                                 afq_high_motion_minq);
+      } else {
+        active_best_quality = get_active_quality(q, rc->gfu_boost,
+                                                 gf_low, gf_high,
+                                                 gf_low_motion_minq,
+                                                 gf_high_motion_minq);
+      }
+      // Constrained quality use slightly lower active best.
+      active_best_quality = active_best_quality * 15 / 16;
+
+    } else if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
+      if (!cpi->refresh_alt_ref_frame) {
+        active_best_quality = cpi->cq_target_quality;
+      } else {
+        if (rc->frames_since_key > 1) {
+          active_best_quality = get_active_quality(
+              q, rc->gfu_boost, gf_low, gf_high,
+              afq_low_motion_minq, afq_high_motion_minq);
+        } else {
+          active_best_quality = get_active_quality(
+              q, rc->gfu_boost, gf_low, gf_high,
+              gf_low_motion_minq, gf_high_motion_minq);
+        }
+      }
+    } else {
+      active_best_quality = get_active_quality(
+          q, rc->gfu_boost, gf_low, gf_high,
+          gf_low_motion_minq, gf_high_motion_minq);
+    }
+  } else {
+    if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
+      active_best_quality = cpi->cq_target_quality;
+    } else {
+      active_best_quality = inter_minq[active_worst_quality];
+
+      // For the constrained quality mode we don't want
+      // q to fall below the cq level.
+      if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) &&
+          (active_best_quality < cpi->cq_target_quality)) {
+        // If we are strongly undershooting the target rate in the last
+        // frames then use the user passed in cq value not the auto
+        // cq value.
+        if (rc->rolling_actual_bits < rc->min_frame_bandwidth)
+          active_best_quality = oxcf->cq_level;
+        else
+          active_best_quality = cpi->cq_target_quality;
+      }
+    }
+  }
+
+  // Clip the active best and worst quality values to limits.
+  if (active_worst_quality > rc->worst_quality)
+    active_worst_quality = rc->worst_quality;
+
+  if (active_best_quality < rc->best_quality)
+    active_best_quality = rc->best_quality;
+
+  if (active_best_quality > rc->worst_quality)
+    active_best_quality = rc->worst_quality;
+
+  if (active_worst_quality < active_best_quality)
+    active_worst_quality = active_best_quality;
+
+  *top_index = active_worst_quality;
+  *bottom_index = active_best_quality;
+
+#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
+  // Limit Q range for the adaptive loop.
+  if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
+    *top_index = (active_worst_quality + active_best_quality * 3) / 4;
+  } else if (!rc->is_src_frame_alt_ref &&
+             (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) &&
+             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+    *top_index = (active_worst_quality + active_best_quality) / 2;
+  }
+#endif
+
+  if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
+    q = active_best_quality;
+  // Special case code to try and match quality with forced key frames.
+  } else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) {
+    q = rc->last_boosted_qindex;
+  } else {
+    q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
+                          active_best_quality, active_worst_quality);
+    if (q > *top_index) {
+      // Special case when we are targeting the max allowed rate.
+      if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth)
+        *top_index = q;
+      else
+        q = *top_index;
+    }
+  }
+#if CONFIG_MULTIPLE_ARF
+  // Force the quantizer determined by the coding order pattern.
+  if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) &&
+      cpi->oxcf.end_usage != USAGE_CONSTANT_QUALITY) {
+    double new_q;
+    double current_q = vp9_convert_qindex_to_q(active_worst_quality);
+    int level = cpi->this_frame_weight;
+    assert(level >= 0);
+    new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level)));
+    q = active_worst_quality +
+        vp9_compute_qdelta(cpi, current_q, new_q);
+
+    *bottom_index = q;
+    *top_index    = q;
+    printf("frame:%d q:%d\n", cm->current_video_frame, q);
+  }
+#endif
+  assert(*top_index <= rc->worst_quality &&
+         *top_index >= rc->best_quality);
+  assert(*bottom_index <= rc->worst_quality &&
+         *bottom_index >= rc->best_quality);
+  assert(q <= rc->worst_quality && q >= rc->best_quality);
+  return q;
+}
+
+int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
+                                      int *bottom_index,
+                                      int *top_index) {
+  if (cpi->pass == 0)
+    return rc_pick_q_and_adjust_q_bounds_one_pass(
+        cpi, bottom_index, top_index);
+  else
+    return rc_pick_q_and_adjust_q_bounds_two_pass(
+        cpi, bottom_index, top_index);
+}
+
 void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
                                       int this_frame_target,
                                       int *frame_under_shoot_limit,

diff --git a/vp9/encoder/vp9_subexp.c b/vp9/encoder/vp9_subexp.c
index def99d1..84fb35e 100644
--- a/vp9/encoder/vp9_subexp.c
+++ b/vp9/encoder/vp9_subexp.c

@@ -18,17 +18,6 @@
 
 static int update_bits[255];
 
-static int count_uniform(int v, int n) {
-  int l = get_unsigned_bits(n);
-  int m;
-  if (l == 0) return 0;
-  m = (1 << l) - n;
-  if (v < m)
-    return l - 1;
-  else
-    return l;
-}
-
 static int split_index(int i, int n, int modulus) {
   int max1 = (n - 1 - modulus / 2) / modulus + 1;
   if (i % modulus == modulus / 2)
@@ -81,29 +70,16 @@
   return i;
 }
 
-static int count_term_subexp(int word, int k, int num_syms) {
-  int count = 0;
-  int i = 0;
-  int mk = 0;
-  while (1) {
-    int b = (i ? k + i - 1 : k);
-    int a = (1 << b);
-    if (num_syms <= mk + 3 * a) {
-      count += count_uniform(word - mk, num_syms - mk);
-      break;
-    } else {
-      int t = (word >= mk + a);
-      count++;
-      if (t) {
-        i = i + 1;
-        mk += a;
-      } else {
-        count += b;
-        break;
-      }
-    }
-  }
-  return count;
+static int count_term_subexp(int word) {
+  if (word < 16)
+    return 5;
+  if (word < 32)
+    return 6;
+  if (word < 64)
+    return 8;
+  if (word < 129)
+    return 10;
+  return 11;
 }
 
 static int prob_diff_update_cost(vp9_prob newp, vp9_prob oldp) {
@@ -111,12 +87,9 @@
   return update_bits[delp] * 256;
 }
 
-static void encode_uniform(vp9_writer *w, int v, int n) {
-  int l = get_unsigned_bits(n);
-  int m;
-  if (l == 0)
-    return;
-  m = (1 << l) - n;
+static void encode_uniform(vp9_writer *w, int v) {
+  const int l = 8;
+  const int m = (1 << l) - 191;
   if (v < m) {
     vp9_write_literal(w, v, l - 1);
   } else {
@@ -125,38 +98,32 @@
   }
 }
 
-static void encode_term_subexp(vp9_writer *w, int word, int k, int num_syms) {
-  int i = 0;
-  int mk = 0;
-  while (1) {
-    int b = (i ? k + i - 1 : k);
-    int a = (1 << b);
-    if (num_syms <= mk + 3 * a) {
-      encode_uniform(w, word - mk, num_syms - mk);
-      break;
-    } else {
-      int t = (word >= mk + a);
-      vp9_write_literal(w, t, 1);
-      if (t) {
-        i = i + 1;
-        mk += a;
-      } else {
-        vp9_write_literal(w, word - mk, b);
-        break;
-      }
-    }
+static INLINE int write_bit_gte(vp9_writer *w, int word, int test) {
+  vp9_write_literal(w, word >= test, 1);
+  return word >= test;
+}
+
+static void encode_term_subexp(vp9_writer *w, int word) {
+  if (!write_bit_gte(w, word, 16)) {
+    vp9_write_literal(w, word, 4);
+  } else if (!write_bit_gte(w, word, 32)) {
+    vp9_write_literal(w, word - 16, 4);
+  } else if (!write_bit_gte(w, word, 64)) {
+    vp9_write_literal(w, word - 32, 5);
+  } else {
+    encode_uniform(w, word - 64);
   }
 }
 
 void vp9_write_prob_diff_update(vp9_writer *w, vp9_prob newp, vp9_prob oldp) {
   const int delp = remap_prob(newp, oldp);
-  encode_term_subexp(w, delp, SUBEXP_PARAM, 255);
+  encode_term_subexp(w, delp);
 }
 
 void vp9_compute_update_table() {
   int i;
   for (i = 0; i < 254; i++)
-    update_bits[i] = count_term_subexp(i, SUBEXP_PARAM, 255);
+    update_bits[i] = count_term_subexp(i);
 }
 
 int vp9_prob_diff_update_savings_search(const unsigned int *ct,

diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index e822e4c..e16467a 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c

@@ -134,17 +134,16 @@
   int sadpb = x->sadperbit16;
   int bestsme = INT_MAX;
 
-  int_mv best_ref_mv1;
-  int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
-  int_mv *ref_mv;
+  MV best_ref_mv1 = {0, 0};
+  MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
+  MV *ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0].as_mv;
 
   // Save input state
   struct buf_2d src = x->plane[0].src;
   struct buf_2d pre = xd->plane[0].pre[0];
 
-  best_ref_mv1.as_int = 0;
-  best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >> 3;
-  best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >> 3;
+  best_ref_mv1_full.col = best_ref_mv1.col >> 3;
+  best_ref_mv1_full.row = best_ref_mv1.row >> 3;
 
   // Setup frame pointers
   x->plane[0].src.buf = arf_frame_buf;
@@ -161,11 +160,9 @@
 
   /*cpi->sf.search_method == HEX*/
   // Ignore mv costing by sending NULL pointer instead of cost arrays
-  ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0];
-  bestsme = vp9_hex_search(x, &best_ref_mv1_full.as_mv,
-                           step_param, sadpb, 1,
+  bestsme = vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
                            &cpi->fn_ptr[BLOCK_16X16],
-                           0, &best_ref_mv1.as_mv, &ref_mv->as_mv);
+                           0, &best_ref_mv1, ref_mv);
 
 #if ALT_REF_SUBPEL_ENABLED
   // Try sub-pixel MC?
@@ -174,8 +171,8 @@
     int distortion;
     unsigned int sse;
     // Ignore mv costing by sending NULL pointer instead of cost array
-    bestsme = cpi->find_fractional_mv_step(x, &ref_mv->as_mv,
-                                           &best_ref_mv1.as_mv,
+    bestsme = cpi->find_fractional_mv_step(x, ref_mv,
+                                           &best_ref_mv1,
                                            cpi->common.allow_high_precision_mv,
                                            x->errorperbit,
                                            &cpi->fn_ptr[BLOCK_16X16],

diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance.c
similarity index 100%
rename from vp9/encoder/vp9_variance_c.c
rename to vp9/encoder/vp9_variance.c


diff --git a/vp9/encoder/vp9_writer.c b/vp9/encoder/vp9_writer.c
index 3d13d07..fda1b39 100644
--- a/vp9/encoder/vp9_writer.c
+++ b/vp9/encoder/vp9_writer.c

@@ -12,11 +12,6 @@
 #include "vp9/encoder/vp9_writer.h"
 #include "vp9/common/vp9_entropy.h"
 
-#if defined(SECTIONBITS_OUTPUT)
-unsigned __int64 Sectionbits[500];
-
-#endif
-
 #ifdef ENTROPY_STATS
 unsigned int active_section = 0;
 #endif

diff --git a/vp9/encoder/vp9_writer.h b/vp9/encoder/vp9_writer.h
index 62f555c..defeec3 100644
--- a/vp9/encoder/vp9_writer.h
+++ b/vp9/encoder/vp9_writer.h

@@ -44,17 +44,6 @@
   unsigned int lowvalue = br->lowvalue;
   register unsigned int shift;
 
-#ifdef ENTROPY_STATS
-#if defined(SECTIONBITS_OUTPUT)
-
-  if (bit)
-    Sectionbits[active_section] += vp9_prob_cost[255 - probability];
-  else
-    Sectionbits[active_section] += vp9_prob_cost[probability];
-
-#endif
-#endif
-
   split = 1 + (((range - 1) * probability) >> 8);
 
   range = split;

diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 78aabc7..c691411 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk

@@ -74,6 +74,7 @@
 VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
 VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_8t_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_bilinear_sse2.asm
 VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
 ifeq ($(CONFIG_VP9_POSTPROC),yes)
 VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm
@@ -124,16 +125,16 @@
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_16_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct4x4_1_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct4x4_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_1_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct16x16_1_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct16x16_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct32x32_1_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct32x32_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht4x4_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht8x8_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct4x4_1_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct4x4_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct8x8_1_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct8x8_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_1_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct32x32_1_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct32x32_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_copy_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM)

diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 8e12cb6..6b18171 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c

@@ -175,23 +175,6 @@
 
   RANGE_CHECK(cfg, ss_number_layers,      1,
               VPX_SS_MAX_LAYERS); /*Spatial layers max */
-
-  RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
-  if (cfg->ts_number_layers > 1) {
-    unsigned int i;
-    for (i = 1; i <cfg->ts_number_layers; i++) {
-      if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i-1]) {
-        ERROR("ts_target_bitrate entries are not increasing");
-      }
-    }
-    RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers-1], 1, 1);
-    for (i = cfg->ts_number_layers-2; i > 0; i--) {
-      if (cfg->ts_rate_decimator[i-1] != 2*cfg->ts_rate_decimator[i]) {
-        ERROR("ts_rate_decimator factors are not powers of 2");
-      }
-    }
-  }
-
   /* VP8 does not support a lower bound on the keyframe interval in
    * automatic keyframe placement mode.
    */
@@ -362,19 +345,6 @@
   oxcf->aq_mode = vp8_cfg.aq_mode;
 
   oxcf->ss_number_layers = cfg.ss_number_layers;
-
-  oxcf->ts_number_layers = cfg.ts_number_layers;
-
-  if (oxcf->ts_number_layers > 1) {
-    memcpy(oxcf->ts_target_bitrate, cfg.ts_target_bitrate,
-           sizeof(cfg.ts_target_bitrate));
-    memcpy(oxcf->ts_rate_decimator, cfg.ts_rate_decimator,
-           sizeof(cfg.ts_rate_decimator));
-  } else if (oxcf->ts_number_layers == 1) {
-    oxcf->ts_target_bitrate[0] = oxcf->target_bandwidth;
-    oxcf->ts_rate_decimator[0] = 1;
-  }
-
   /*
   printf("Current VP9 Settings: \n");
   printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
@@ -1046,32 +1016,6 @@
                                     va_list args) {
   int data = va_arg(args, int);
   vp9_set_svc(ctx->cpi, data);
-  // CBR mode for SVC with both temporal and spatial layers not yet supported.
-  if (data == 1 &&
-      ctx->cfg.rc_end_usage == VPX_CBR &&
-      ctx->cfg.ss_number_layers > 1 &&
-      ctx->cfg.ts_number_layers > 1) {
-    return VPX_CODEC_INVALID_PARAM;
-  }
-  return VPX_CODEC_OK;
-}
-
-static vpx_codec_err_t vp9e_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
-                                             int ctr_id,
-                                             va_list args) {
-  vpx_svc_layer_id_t *data = va_arg(args, vpx_svc_layer_id_t *);
-  VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
-  cpi->svc.spatial_layer_id = data->spatial_layer_id;
-  cpi->svc.temporal_layer_id = data->temporal_layer_id;
-  // Checks on valid layer_id input.
-  if (cpi->svc.temporal_layer_id < 0 ||
-      cpi->svc.temporal_layer_id >= ctx->cfg.ts_number_layers) {
-    return VPX_CODEC_INVALID_PARAM;
-  }
-  if (cpi->svc.spatial_layer_id < 0 ||
-      cpi->svc.spatial_layer_id >= ctx->cfg.ss_number_layers) {
-    return VPX_CODEC_INVALID_PARAM;
-  }
   return VPX_CODEC_OK;
 }
 
@@ -1087,9 +1031,7 @@
 
   params = *(vpx_svc_parameters_t *)data;
 
-  cpi->svc.spatial_layer_id = params.spatial_layer;
-  cpi->svc.temporal_layer_id = params.temporal_layer;
-
+  cpi->current_layer = params.layer;
   cpi->lst_fb_idx = params.lst_fb_idx;
   cpi->gld_fb_idx = params.gld_fb_idx;
   cpi->alt_fb_idx = params.alt_fb_idx;
@@ -1138,7 +1080,6 @@
   {VP9_GET_REFERENCE,                 get_reference},
   {VP9E_SET_SVC,                      vp9e_set_svc},
   {VP9E_SET_SVC_PARAMETERS,           vp9e_set_svc_parameters},
-  {VP9E_SET_SVC_LAYER_ID,             vp9e_set_svc_layer_id},
   { -1, NULL},
 };
 
@@ -1189,11 +1130,7 @@
       9999,               /* kf_max_dist */
 
       VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */
-      1,                  /* ts_number_layers */
-      {0},                /* ts_target_bitrate */
-      {0},                /* ts_rate_decimator */
-      0,                  /* ts_periodicity */
-      {0},                /* ts_layer_id */
+
 #if VPX_ENCODER_ABI_VERSION == (1 + VPX_CODEC_ABI_VERSION)
       1,                  /* g_delete_first_pass_file */
       "vp8.fpf"           /* first pass filename */

diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 63003b9..131d6c7 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk

@@ -66,7 +66,7 @@
 VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
 VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
 VP9_CX_SRCS-yes += encoder/vp9_treewriter.c
-VP9_CX_SRCS-yes += encoder/vp9_variance_c.c
+VP9_CX_SRCS-yes += encoder/vp9_variance.c
 VP9_CX_SRCS-yes += encoder/vp9_vaq.c
 VP9_CX_SRCS-yes += encoder/vp9_vaq.h
 ifeq ($(CONFIG_VP9_POSTPROC),yes)

diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index 12f7166..810e881 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c

@@ -499,7 +499,6 @@
 
   // modify encoder configuration
   enc_cfg->ss_number_layers = si->layers;
-  enc_cfg->ts_number_layers = 1;  // Temporal layers not used in this encoder.
   enc_cfg->kf_mode = VPX_KF_DISABLED;
   enc_cfg->g_pass = VPX_RC_ONE_PASS;
   // Lag in frames not currently supported
@@ -692,8 +691,7 @@
   SvcInternal *const si = get_svc_internal(svc_ctx);
 
   memset(&svc_params, 0, sizeof(svc_params));
-  svc_params.temporal_layer = 0;
-  svc_params.spatial_layer = si->layer;
+  svc_params.layer = si->layer;
   svc_params.flags = si->enc_frame_flags;
 
   layer = si->layer;

diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index d0ac1af..829490f 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h

@@ -194,8 +194,7 @@
   VP9E_SET_AQ_MODE,
 
   VP9E_SET_SVC,
-  VP9E_SET_SVC_PARAMETERS,
-  VP9E_SET_SVC_LAYER_ID
+  VP9E_SET_SVC_PARAMETERS
 };
 
 /*!\brief vpx 1-D scaling mode
@@ -286,8 +285,7 @@
 typedef struct vpx_svc_parameters {
   unsigned int width;         /**< width of current spatial layer */
   unsigned int height;        /**< height of current spatial layer */
-  int spatial_layer;          /**< current spatial layer number - 0 = base */
-  int temporal_layer;         /**< current temporal layer number - 0 = base */
+  int layer;                  /**< current layer number - 0 = base */
   int flags;                  /**< encode frame flags */
   int max_quantizer;          /**< max quantizer for current layer */
   int min_quantizer;          /**< min quantizer for current layer */
@@ -297,11 +295,6 @@
   int alt_fb_idx;             /**< alt reference frame frame buffer index */
 } vpx_svc_parameters_t;
 
-typedef struct vpx_svc_layer_id {
-  int spatial_layer_id;
-  int temporal_layer_id;
-} vpx_svc_layer_id_t;
-
 /*!\brief VP8 encoder control function parameter type
  *
  * Defines the data types that VP8E control functions take. Note that
@@ -323,7 +316,6 @@
 
 VPX_CTRL_USE_TYPE(VP9E_SET_SVC,                int)
 VPX_CTRL_USE_TYPE(VP9E_SET_SVC_PARAMETERS,     vpx_svc_parameters_t *)
-VPX_CTRL_USE_TYPE(VP9E_SET_SVC_LAYER_ID,       vpx_svc_layer_id_t *)
 
 VPX_CTRL_USE_TYPE(VP8E_SET_CPUUSED,            int)
 VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF,   unsigned int)

diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index 8e7d5a4..3473885 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h

@@ -604,48 +604,47 @@
      * Spatial scalability settings (ss)
      */
 
-    /*!\brief Number of spatial coding layers.
+    /*!\brief Number of coding layers (spatial)
      *
-     * This value specifies the number of spatial coding layers to be used.
+     * This value specifies the number of coding layers to be used.
      */
     unsigned int           ss_number_layers;
 
-    /*!\brief Number of temporal coding layers.
+    /*!\brief Number of coding layers
      *
-     * This value specifies the number of temporal layers to be used.
+     * This value specifies the number of coding layers to be used.
      */
     unsigned int           ts_number_layers;
 
-    /*!\brief Target bitrate for each temporal layer.
+    /*!\brief Target bitrate for each layer
      *
-     * These values specify the target coding bitrate to be used for each
-     * temporal layer.
+     * These values specify the target coding bitrate for each coding layer.
      */
     unsigned int           ts_target_bitrate[VPX_TS_MAX_LAYERS];
 
-    /*!\brief Frame rate decimation factor for each temporal layer.
+    /*!\brief Frame rate decimation factor for each layer
      *
      * These values specify the frame rate decimation factors to apply
-     * to each temporal layer.
+     * to each layer.
      */
     unsigned int           ts_rate_decimator[VPX_TS_MAX_LAYERS];
 
-    /*!\brief Length of the sequence defining frame temporal layer membership.
-    *
-    * This value specifies the length of the sequence that defines the
-    * membership of frames to temporal layers. For example, if ts_periodicity=8
-    * then frames are assigned to coding layers with a repeated sequence of
-    * length 8.
-    */
+    /*!\brief Length of the sequence defining frame layer membership
+     *
+     * This value specifies the length of the sequence that defines the
+     * membership of frames to layers. For example, if ts_periodicity=8 then
+     * frames are assigned to coding layers with a repeated sequence of
+     * length 8.
+     */
     unsigned int           ts_periodicity;
 
-    /*!\brief Template defining the membership of frames to temporal layers.
-    *
-    * This array defines the membership of frames to temporal coding layers.
-    * For a 2-layer encoding that assigns even numbered frames to one temporal
-    * layer (0) and odd numbered frames to a second temporal layer (1) with
-    * ts_periodicity=8, then ts_layer_id = (0,1,0,1,0,1,0,1).
-    */
+    /*!\brief Template defining the membership of frames to coding layers
+     *
+     * This array defines the membership of frames to coding layers. For a
+     * 2-layer encoding that assigns even numbered frames to one layer (0)
+     * and odd numbered frames to a second layer (1) with ts_periodicity=8,
+     * then ts_layer_id = (0,1,0,1,0,1,0,1).
+     */
     unsigned int           ts_layer_id[VPX_TS_MAX_PERIODICITY];
   } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */
 

diff --git a/vpx_temporal_scalable_patterns.c b/vpx_temporal_scalable_patterns.c
deleted file mode 100644
index 30a4b63..0000000
--- a/vpx_temporal_scalable_patterns.c
+++ /dev/null

@@ -1,537 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-//  This is an example demonstrating how to implement a multi-layer VP9
-//  encoding scheme based on temporal scalability for video applications
-//  that benefit from a scalable bitstream.
-
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#define VPX_CODEC_DISABLE_COMPAT 1
-#include "./ivfenc.h"
-#include "./tools_common.h"
-#include "./vpx_config.h"
-#include "vpx/vp8cx.h"
-#include "vpx/vpx_encoder.h"
-
-static const char *exec_name;
-
-void usage_exit() {
-  exit(EXIT_FAILURE);
-}
-
-static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3};
-
-// Temporal scaling parameters:
-// NOTE: The 3 prediction frames cannot be used interchangeably due to
-// differences in the way they are handled throughout the code. The
-// frames should be allocated to layers in the order LAST, GF, ARF.
-// Other combinations work, but may produce slightly inferior results.
-static void set_temporal_layer_pattern(int layering_mode,
-                                       vpx_codec_enc_cfg_t *cfg,
-                                       int *layer_flags,
-                                       int *flag_periodicity) {
-  switch (layering_mode) {
-    case 0: {
-      // 1-layer.
-      int ids[1] = {0};
-      cfg->ts_periodicity = 1;
-      *flag_periodicity = 1;
-      cfg->ts_number_layers = 1;
-      cfg->ts_rate_decimator[0] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // Update L only.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF;
-      break;
-    }
-    case 1: {
-      // 2-layers, 2-frame period.
-      int ids[2] = {0, 1};
-      cfg->ts_periodicity = 2;
-      *flag_periodicity = 2;
-      cfg->ts_number_layers = 2;
-      cfg->ts_rate_decimator[0] = 2;
-      cfg->ts_rate_decimator[1] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-#if 1
-      // 0=L, 1=GF, Intra-layer prediction enabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-      layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_REF_ARF;
-#else
-       // 0=L, 1=GF, Intra-layer prediction disabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-      layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST;
-#endif
-      break;
-    }
-    case 2: {
-      // 2-layers, 3-frame period.
-      int ids[3] = {0, 1, 1};
-      cfg->ts_periodicity = 3;
-      *flag_periodicity = 3;
-      cfg->ts_number_layers = 2;
-      cfg->ts_rate_decimator[0] = 3;
-      cfg->ts_rate_decimator[1] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, Intra-layer prediction enabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[1] =
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF  | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
-      break;
-    }
-    case 3: {
-      // 3-layers, 6-frame period.
-      int ids[6] = {0, 2, 2, 1, 2, 2};
-      cfg->ts_periodicity = 6;
-      *flag_periodicity = 6;
-      cfg->ts_number_layers = 3;
-      cfg->ts_rate_decimator[0] = 6;
-      cfg->ts_rate_decimator[1] = 3;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_UPD_LAST;
-      layer_flags[1] =
-      layer_flags[2] =
-      layer_flags[4] =
-      layer_flags[5] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
-      break;
-    }
-    case 4: {
-      // 3-layers, 4-frame period.
-      int ids[4] = {0, 2, 1, 2};
-      cfg->ts_periodicity = 4;
-      *flag_periodicity = 4;
-      cfg->ts_number_layers = 3;
-      cfg->ts_rate_decimator[0] = 4;
-      cfg->ts_rate_decimator[1] = 2;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
-      layer_flags[1] =
-      layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      break;
-    }
-    case 5: {
-      // 3-layers, 4-frame period.
-      int ids[4] = {0, 2, 1, 2};
-      cfg->ts_periodicity = 4;
-      *flag_periodicity = 4;
-      cfg->ts_number_layers     = 3;
-      cfg->ts_rate_decimator[0] = 4;
-      cfg->ts_rate_decimator[1] = 2;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, disabled
-      // in layer 2.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[1] =
-      layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      break;
-    }
-    case 6: {
-      // 3-layers, 4-frame period.
-      int ids[4] = {0, 2, 1, 2};
-      cfg->ts_periodicity = 4;
-      *flag_periodicity = 4;
-      cfg->ts_number_layers = 3;
-      cfg->ts_rate_decimator[0] = 4;
-      cfg->ts_rate_decimator[1] = 2;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[1] =
-      layer_flags[3] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
-      break;
-    }
-    case 7: {
-      // NOTE: Probably of academic interest only.
-      // 5-layers, 16-frame period.
-      int ids[16] = {0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4};
-      cfg->ts_periodicity = 16;
-      *flag_periodicity = 16;
-      cfg->ts_number_layers = 5;
-      cfg->ts_rate_decimator[0] = 16;
-      cfg->ts_rate_decimator[1] = 8;
-      cfg->ts_rate_decimator[2] = 4;
-      cfg->ts_rate_decimator[3] = 2;
-      cfg->ts_rate_decimator[4] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      layer_flags[0]  = VPX_EFLAG_FORCE_KF;
-      layer_flags[1]  =
-      layer_flags[3]  =
-      layer_flags[5]  =
-      layer_flags[7]  =
-      layer_flags[9]  =
-      layer_flags[11] =
-      layer_flags[13] =
-      layer_flags[15] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[2]  =
-      layer_flags[6]  =
-      layer_flags[10] =
-      layer_flags[14] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF;
-      layer_flags[4] =
-      layer_flags[12] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[8]  = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF;
-      break;
-    }
-    case 8: {
-      // 2-layers, with sync point at first frame of layer 1.
-      int ids[2] = {0, 1};
-      cfg->ts_periodicity = 2;
-      *flag_periodicity = 8;
-      cfg->ts_number_layers = 2;
-      cfg->ts_rate_decimator[0] = 2;
-      cfg->ts_rate_decimator[1] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF.
-      // ARF is used as predictor for all frames, and is only updated on
-      // key frame. Sync point every 8 frames.
-
-      // Layer 0: predict from L and ARF, update L and G.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_UPD_ARF;
-      // Layer 1: sync point: predict from L and ARF, and update G.
-      layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ARF;
-      // Layer 0, predict from L and ARF, update L.
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF  | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF;
-      // Layer 1: predict from L, G and ARF, and update G.
-      layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ENTROPY;
-      // Layer 0.
-      layer_flags[4] = layer_flags[2];
-      // Layer 1.
-      layer_flags[5] = layer_flags[3];
-      // Layer 0.
-      layer_flags[6] = layer_flags[4];
-      // Layer 1.
-      layer_flags[7] = layer_flags[5];
-     break;
-    }
-    case 9: {
-      // 3-layers: Sync points for layer 1 and 2 every 8 frames.
-      int ids[4] = {0, 2, 1, 2};
-      cfg->ts_periodicity = 4;
-      *flag_periodicity = 8;
-      cfg->ts_number_layers = 3;
-      cfg->ts_rate_decimator[0] = 4;
-      cfg->ts_rate_decimator[1] = 2;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF   | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[3] =
-      layer_flags[5] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
-      layer_flags[4] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[6] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[7] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_ENTROPY;
-      break;
-    }
-    case 10: {
-      // 3-layers structure where ARF is used as predictor for all frames,
-      // and is only updated on key frame.
-      // Sync points for layer 1 and 2 every 8 frames.
-
-      int ids[4] = {0, 2, 1, 2};
-      cfg->ts_periodicity = 4;
-      *flag_periodicity = 8;
-      cfg->ts_number_layers = 3;
-      cfg->ts_rate_decimator[0] = 4;
-      cfg->ts_rate_decimator[1] = 2;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF.
-      // Layer 0: predict from L and ARF; update L and G.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_REF_GF;
-      // Layer 2: sync point: predict from L and ARF; update none.
-      layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ENTROPY;
-      // Layer 1: sync point: predict from L and ARF; update G.
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_UPD_LAST;
-      // Layer 2: predict from L, G, ARF; update none.
-      layer_flags[3] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY;
-      // Layer 0: predict from L and ARF; update L.
-      layer_flags[4] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_REF_GF;
-      // Layer 2: predict from L, G, ARF; update none.
-      layer_flags[5] = layer_flags[3];
-      // Layer 1: predict from L, G, ARF; update G.
-      layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
-      // Layer 2: predict from L, G, ARF; update none.
-      layer_flags[7] = layer_flags[3];
-      break;
-    }
-    case 11:
-    default: {
-      // 3-layers structure as in case 10, but no sync/refresh points for
-      // layer 1 and 2.
-      int ids[4] = {0, 2, 1, 2};
-      cfg->ts_periodicity = 4;
-      *flag_periodicity = 8;
-      cfg->ts_number_layers = 3;
-      cfg->ts_rate_decimator[0] = 4;
-      cfg->ts_rate_decimator[1] = 2;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF.
-      // Layer 0: predict from L and ARF; update L.
-      layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_REF_GF;
-      layer_flags[4] = layer_flags[0];
-      // Layer 1: predict from L, G, ARF; update G.
-      layer_flags[2] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
-      layer_flags[6] = layer_flags[2];
-      // Layer 2: predict from L, G, ARF; update none.
-      layer_flags[1] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY;
-      layer_flags[3] = layer_flags[1];
-      layer_flags[5] = layer_flags[1];
-      layer_flags[7] = layer_flags[1];
-      break;
-    }
-  }
-}
-
-int main(int argc, char **argv) {
-  FILE *outfile[VPX_TS_MAX_LAYERS];
-  vpx_codec_ctx_t codec;
-  vpx_codec_enc_cfg_t cfg;
-  int frame_cnt = 0;
-  vpx_image_t raw;
-  vpx_codec_err_t res;
-  unsigned int width;
-  unsigned int height;
-  int frame_avail;
-  int got_data;
-  int flags = 0;
-  int i;
-  int pts = 0;  // PTS starts at 0.
-  int frame_duration = 1;  // 1 timebase tick per frame.
-  int layering_mode = 0;
-  int frames_in_layer[VPX_TS_MAX_LAYERS] = {0};
-  int layer_flags[VPX_TS_MAX_PERIODICITY] = {0};
-  int flag_periodicity = 1;
-  int max_intra_size_pct;
-  vpx_svc_layer_id_t layer_id = {0, 0};
-  char *codec_type;
-  // Default is VP8 codec.
-  const vpx_codec_iface_t *(*interface)(void) = vpx_codec_vp8_cx;
-  unsigned int fourcc = 0x30385056;
-  struct VpxInputContext input_ctx = {0};
-
-  exec_name = argv[0];
-  // Check usage and arguments.
-  if (argc < 10) {
-    die("Usage: %s <infile> <outfile> <width> <height> <rate_num> "
-        " <rate_den> <mode> <Rate_0> ... <Rate_nlayers-1> "
-        " <codec_type(vp8/vp9)> \n", argv[0]);
-  }
-  width = strtol(argv[3], NULL, 0);
-  height = strtol(argv[4], NULL, 0);
-  if (width < 16 || width%2 || height <16 || height%2) {
-    die("Invalid resolution: %d x %d", width, height);
-  }
-
-  layering_mode = strtol(argv[7], NULL, 0);
-  if (layering_mode < 0 || layering_mode > 11) {
-    die("Invalid mode (0..11) %s", argv[7]);
-  }
-
-  if (argc != 8 + mode_to_num_layers[layering_mode] + 1) {
-    die("Invalid number of arguments");
-  }
-
-  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 32)) {
-    die("Failed to allocate image", width, height);
-  }
-
-  codec_type = argv[8 + mode_to_num_layers[layering_mode]];
-  if (strncmp(codec_type, "vp9", 3) == 0) {
-    interface = vpx_codec_vp9_cx;
-    fourcc = 0x30395056;
-  }
-  printf("Using %s\n", vpx_codec_iface_name(interface()));
-
-  // Populate encoder configuration.
-  res = vpx_codec_enc_config_default(interface(), &cfg, 0);
-  if (res) {
-    printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
-    return EXIT_FAILURE;
-  }
-
-  // Update the default configuration with our settings.
-  cfg.g_w = width;
-  cfg.g_h = height;
-
-  // Timebase format e.g. 30fps: numerator=1, demoninator = 30.
-  cfg.g_timebase.num = strtol(argv[5], NULL, 0);
-  cfg.g_timebase.den = strtol(argv[6], NULL, 0);
-
-  for (i = 8; i < 8 + mode_to_num_layers[layering_mode]; ++i) {
-    cfg.ts_target_bitrate[i-8] = strtol(argv[i], NULL, 0);
-  }
-
-  // Real time parameters.
-  cfg.rc_dropframe_thresh = 0;
-  cfg.rc_end_usage = VPX_CBR;
-  cfg.rc_resize_allowed = 0;
-  cfg.rc_min_quantizer = 2;
-  cfg.rc_max_quantizer = 56;
-  cfg.rc_undershoot_pct = 100;
-  cfg.rc_overshoot_pct = 15;
-  cfg.rc_buf_initial_sz = 500;
-  cfg.rc_buf_optimal_sz = 600;
-  cfg.rc_buf_sz = 1000;
-
-  // Enable error resilient mode.
-  cfg.g_error_resilient = 1;
-  cfg.g_lag_in_frames   = 0;
-  cfg.kf_mode = VPX_KF_DISABLED;
-
-  // Disable automatic keyframe placement.
-  cfg.kf_min_dist = cfg.kf_max_dist = 3000;
-
-  // Default setting for bitrate: used in special case of 1 layer (case 0).
-  cfg.rc_target_bitrate = cfg.ts_target_bitrate[0];
-
-  set_temporal_layer_pattern(layering_mode,
-                             &cfg,
-                             layer_flags,
-                             &flag_periodicity);
-
-  // Open input file.
-  input_ctx.filename = argv[1];
-  if (!(input_ctx.file = fopen(input_ctx.filename, "rb"))) {
-    die("Failed to open %s for reading", argv[1]);
-  }
-
-  // Open an output file for each stream.
-  for (i = 0; i < cfg.ts_number_layers; ++i) {
-    char file_name[512];
-    snprintf(file_name, sizeof(file_name), "%s_%d.ivf", argv[2], i);
-    if (!(outfile[i] = fopen(file_name, "wb")))
-      die("Failed to open %s for writing", file_name);
-    ivf_write_file_header(outfile[i], &cfg, fourcc, 0);
-  }
-  // No spatial layers in this encoder.
-  cfg.ss_number_layers = 1;
-
-  // Initialize codec.
-  if (vpx_codec_enc_init(&codec, interface(), &cfg, 0))
-    die_codec(&codec, "Failed to initialize encoder");
-
-  vpx_codec_control(&codec, VP8E_SET_CPUUSED, -6);
-  vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 1);
-  if (strncmp(codec_type, "vp9", 3) == 0) {
-    vpx_codec_control(&codec, VP8E_SET_CPUUSED, 3);
-    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 0);
-    if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) {
-      die_codec(&codec, "Failed to set SVC");
-    }
-  }
-  vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
-  vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1);
-  max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5)
-      * ((double) cfg.g_timebase.den / cfg.g_timebase.num) / 10.0);
-  vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct);
-
-  frame_avail = 1;
-  while (frame_avail || got_data) {
-    vpx_codec_iter_t iter = NULL;
-    const vpx_codec_cx_pkt_t *pkt;
-    // Update the temporal layer_id. No spatial layers in this test.
-    layer_id.spatial_layer_id = 0;
-    layer_id.temporal_layer_id =
-        cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
-    vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
-    flags = layer_flags[frame_cnt % flag_periodicity];
-    frame_avail = !read_yuv_frame(&input_ctx, &raw);
-    if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts, 1, flags,
-        VPX_DL_REALTIME)) {
-      die_codec(&codec, "Failed to encode frame");
-    }
-    // Reset KF flag.
-    if (layering_mode != 7) {
-      layer_flags[0] &= ~VPX_EFLAG_FORCE_KF;
-    }
-    got_data = 0;
-    while ( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) {
-      got_data = 1;
-      switch (pkt->kind) {
-        case VPX_CODEC_CX_FRAME_PKT:
-          for (i = cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
-              i < cfg.ts_number_layers; ++i) {
-            ivf_write_frame_header(outfile[i], pts, pkt->data.frame.sz);
-            (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
-                          outfile[i]);
-            ++frames_in_layer[i];
-          }
-          break;
-          default:
-            break;
-      }
-    }
-    ++frame_cnt;
-    pts += frame_duration;
-  }
-  fclose(input_ctx.file);
-  printf("Processed %d frames: \n", frame_cnt-1);
-  if (vpx_codec_destroy(&codec)) {
-    die_codec(&codec, "Failed to destroy codec");
-  }
-  // Try to rewrite the output file headers with the actual frame count.
-  for (i = 0; i < cfg.ts_number_layers; ++i) {
-    if (!fseek(outfile[i], 0, SEEK_SET))
-      ivf_write_file_header(outfile[i], &cfg, fourcc, frame_cnt);
-    fclose(outfile[i]);
-  }
-  return EXIT_SUCCESS;
-}

diff --git a/vpxenc.c b/vpxenc.c
index 6ad3b9b..cf3831c 100644
--- a/vpxenc.c
+++ b/vpxenc.c

@@ -474,23 +474,26 @@
 }
 
 #define mmin(a, b)  ((a) < (b) ? (a) : (b))
-static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2,
+static void find_mismatch(const vpx_image_t *const img1,
+                          const vpx_image_t *const img2,
                           int yloc[4], int uloc[4], int vloc[4]) {
-  const unsigned int bsize = 64;
-  const unsigned int bsizey = bsize >> img1->y_chroma_shift;
-  const unsigned int bsizex = bsize >> img1->x_chroma_shift;
-  const int c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
-  const int c_h = (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
-  unsigned int match = 1;
-  unsigned int i, j;
+  const uint32_t bsize = 64;
+  const uint32_t bsizey = bsize >> img1->y_chroma_shift;
+  const uint32_t bsizex = bsize >> img1->x_chroma_shift;
+  const uint32_t c_w =
+      (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
+  const uint32_t c_h =
+      (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
+  int match = 1;
+  uint32_t i, j;
   yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1;
   for (i = 0, match = 1; match && i < img1->d_h; i += bsize) {
     for (j = 0; match && j < img1->d_w; j += bsize) {
       int k, l;
-      int si = mmin(i + bsize, img1->d_h) - i;
-      int sj = mmin(j + bsize, img1->d_w) - j;
-      for (k = 0; match && k < si; k++)
-        for (l = 0; match && l < sj; l++) {
+      const int si = mmin(i + bsize, img1->d_h) - i;
+      const int sj = mmin(j + bsize, img1->d_w) - j;
+      for (k = 0; match && k < si; ++k) {
+        for (l = 0; match && l < sj; ++l) {
           if (*(img1->planes[VPX_PLANE_Y] +
                 (i + k) * img1->stride[VPX_PLANE_Y] + j + l) !=
               *(img2->planes[VPX_PLANE_Y] +
@@ -505,6 +508,7 @@
             break;
           }
         }
+      }
     }
   }
 
@@ -512,10 +516,10 @@
   for (i = 0, match = 1; match && i < c_h; i += bsizey) {
     for (j = 0; match && j < c_w; j += bsizex) {
       int k, l;
-      int si = mmin(i + bsizey, c_h - i);
-      int sj = mmin(j + bsizex, c_w - j);
-      for (k = 0; match && k < si; k++)
-        for (l = 0; match && l < sj; l++) {
+      const int si = mmin(i + bsizey, c_h - i);
+      const int sj = mmin(j + bsizex, c_w - j);
+      for (k = 0; match && k < si; ++k) {
+        for (l = 0; match && l < sj; ++l) {
           if (*(img1->planes[VPX_PLANE_U] +
                 (i + k) * img1->stride[VPX_PLANE_U] + j + l) !=
               *(img2->planes[VPX_PLANE_U] +
@@ -525,21 +529,22 @@
             uloc[2] = *(img1->planes[VPX_PLANE_U] +
                         (i + k) * img1->stride[VPX_PLANE_U] + j + l);
             uloc[3] = *(img2->planes[VPX_PLANE_U] +
-                        (i + k) * img2->stride[VPX_PLANE_V] + j + l);
+                        (i + k) * img2->stride[VPX_PLANE_U] + j + l);
             match = 0;
             break;
           }
         }
+      }
     }
   }
   vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1;
   for (i = 0, match = 1; match && i < c_h; i += bsizey) {
     for (j = 0; match && j < c_w; j += bsizex) {
       int k, l;
-      int si = mmin(i + bsizey, c_h - i);
-      int sj = mmin(j + bsizex, c_w - j);
-      for (k = 0; match && k < si; k++)
-        for (l = 0; match && l < sj; l++) {
+      const int si = mmin(i + bsizey, c_h - i);
+      const int sj = mmin(j + bsizex, c_w - j);
+      for (k = 0; match && k < si; ++k) {
+        for (l = 0; match && l < sj; ++l) {
           if (*(img1->planes[VPX_PLANE_V] +
                 (i + k) * img1->stride[VPX_PLANE_V] + j + l) !=
               *(img2->planes[VPX_PLANE_V] +
@@ -554,34 +559,37 @@
             break;
           }
         }
+      }
     }
   }
 }
 
-static int compare_img(vpx_image_t *img1, vpx_image_t *img2)
-{
-  const int c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
-  const int c_h = (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
+static int compare_img(const vpx_image_t *const img1,
+                       const vpx_image_t *const img2) {
+  const uint32_t c_w =
+      (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
+  const uint32_t c_h =
+      (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
+  uint32_t i;
   int match = 1;
-  unsigned int i;
 
   match &= (img1->fmt == img2->fmt);
   match &= (img1->d_w == img2->d_w);
   match &= (img1->d_h == img2->d_h);
 
-  for (i = 0; i < img1->d_h; i++)
-    match &= (memcmp(img1->planes[VPX_PLANE_Y]+i*img1->stride[VPX_PLANE_Y],
-                     img2->planes[VPX_PLANE_Y]+i*img2->stride[VPX_PLANE_Y],
+  for (i = 0; i < img1->d_h; ++i)
+    match &= (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
+                     img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
                      img1->d_w) == 0);
 
-  for (i = 0; i < c_h; i++)
-    match &= (memcmp(img1->planes[VPX_PLANE_U]+i*img1->stride[VPX_PLANE_U],
-                     img2->planes[VPX_PLANE_U]+i*img2->stride[VPX_PLANE_U],
+  for (i = 0; i < c_h; ++i)
+    match &= (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
+                     img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
                      c_w) == 0);
 
-  for (i = 0; i < c_h; i++)
-    match &= (memcmp(img1->planes[VPX_PLANE_V]+i*img1->stride[VPX_PLANE_U],
-                     img2->planes[VPX_PLANE_V]+i*img2->stride[VPX_PLANE_U],
+  for (i = 0; i < c_h; ++i)
+    match &= (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
+                     img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
                      c_w) == 0);
 
   return match;