Merge "Configuration: Use 'is_in' instead of echo | grep"
diff --git a/build/make/configure.sh b/build/make/configure.sh
index aea3778..29f89b1 100644
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -768,13 +768,14 @@
   enabled shared && soft_enable pic
 
   # Minimum iOS version for all target platforms (darwin and iphonesimulator).
+  # Shared library framework builds are only possible on iOS 8 and later.
   if enabled shared; then
     IOS_VERSION_OPTIONS="--enable-shared"
+    IOS_VERSION_MIN="8.0"
   else
     IOS_VERSION_OPTIONS=""
+    IOS_VERSION_MIN="6.0"
   fi
-  IOS_VERSION_MIN=$("${source_path}/build/make/ios-version.sh" \
-    ${IOS_VERSION_OPTIONS})
 
   # Handle darwin variants. Newer SDKs allow targeting older
   # platforms, so use the newest one available.
diff --git a/build/make/ios-version.sh b/build/make/ios-version.sh
deleted file mode 100755
index 7252eb4..0000000
--- a/build/make/ios-version.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/sh
-##
-##  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license
-##  that can be found in the LICENSE file in the root of the source
-##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may
-##  be found in the AUTHORS file in the root of the source tree.
-##
-
-if [ "$1" = "--enable-shared" ]; then
-  # Shared library framework builds are only possible on iOS 8 and later.
-  echo "8.0"
-else
-  echo "6.0"
-fi
diff --git a/build/make/iosbuild.sh b/build/make/iosbuild.sh
index 96dc6cc..c703f22 100755
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@@ -347,10 +347,11 @@
 
 if [ "$ENABLE_SHARED" = "yes" ]; then
   IOS_VERSION_OPTIONS="--enable-shared"
+  IOS_VERSION_MIN="8.0"
 else
   IOS_VERSION_OPTIONS=""
+  IOS_VERSION_MIN="6.0"
 fi
-IOS_VERSION_MIN=$("${SCRIPT_DIR}/ios-version.sh" ${IOS_VERSION_OPTIONS})
 
 if [ "${VERBOSE}" = "yes" ]; then
 cat << EOF
diff --git a/test/datarate_test.cc b/test/datarate_test.cc
index 3941e16..2f1db9c 100644
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -90,7 +90,7 @@
           << pkt->data.frame.pts;
     }
 
-    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
+    const int64_t frame_size_in_bits = pkt->data.frame.sz * 8;
 
     // Subtract from the buffer the bits associated with a played back frame.
     bits_in_buffer_model_ -= frame_size_in_bits;
diff --git a/test/hadamard_test.cc b/test/hadamard_test.cc
index 400939a..7a5bd5b 100644
--- a/test/hadamard_test.cc
+++ b/test/hadamard_test.cc
@@ -21,20 +21,7 @@
 
 using ::libvpx_test::ACMRandom;
 
-typedef void (*Hadamard8x8Func)(const int16_t *a, int a_stride,
-                                int16_t *b);
-
-class HadamardTest : public ::testing::TestWithParam<Hadamard8x8Func> {
- public:
-  virtual void SetUp() {
-    h_func_ = GetParam();
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-  }
-
- protected:
-  Hadamard8x8Func h_func_;
-  ACMRandom rnd_;
-};
+typedef void (*HadamardFunc)(const int16_t *a, int a_stride, int16_t *b);
 
 void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
   int16_t b[8];
@@ -59,28 +46,74 @@
   out[5] = c[3] - c[7];
 }
 
-void reference_hadamard(const int16_t *a, int a_stride, int16_t *b) {
+void reference_hadamard8x8(const int16_t *a, int a_stride, int16_t *b) {
   int16_t buf[64];
-  for (int i = 0; i < 8; i++) {
+  for (int i = 0; i < 8; ++i) {
     hadamard_loop(a + i, a_stride, buf + i * 8);
   }
 
-  for (int i = 0; i < 8; i++) {
+  for (int i = 0; i < 8; ++i) {
     hadamard_loop(buf + i, 8, b + i * 8);
   }
 }
 
-TEST_P(HadamardTest, CompareReferenceRandom) {
+void reference_hadamard16x16(const int16_t *a, int a_stride, int16_t *b) {
+  /* The source is a 16x16 block. The destination is rearranged to 8x32.
+   * Input is 9 bit. */
+  reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
+  reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
+  reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
+  reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
+
+  /* Overlay the 8x8 blocks and combine. */
+  for (int i = 0; i < 64; ++i) {
+    /* 8x8 steps the range up to 15 bits. */
+    const int16_t a0 = b[0];
+    const int16_t a1 = b[64];
+    const int16_t a2 = b[128];
+    const int16_t a3 = b[192];
+
+    /* Prevent the result from escaping int16_t. */
+    const int16_t b0 = (a0 + a1) >> 1;
+    const int16_t b1 = (a0 - a1) >> 1;
+    const int16_t b2 = (a2 + a3) >> 1;
+    const int16_t b3 = (a2 - a3) >> 1;
+
+    /* Store a 16 bit value. */
+    b[  0] = b0 + b2;
+    b[ 64] = b1 + b3;
+    b[128] = b0 - b2;
+    b[192] = b1 - b3;
+
+    ++b;
+  }
+}
+
+class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
+ public:
+  virtual void SetUp() {
+    h_func_ = GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+ protected:
+  HadamardFunc h_func_;
+  ACMRandom rnd_;
+};
+
+class Hadamard8x8Test : public HadamardTestBase {};
+
+TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
   DECLARE_ALIGNED(16, int16_t, a[64]);
   DECLARE_ALIGNED(16, int16_t, b[64]);
   int16_t b_ref[64];
-  for (int i = 0; i < 64; i++) {
+  for (int i = 0; i < 64; ++i) {
     a[i] = rnd_.Rand9Signed();
   }
   memset(b, 0, sizeof(b));
   memset(b_ref, 0, sizeof(b_ref));
 
-  reference_hadamard(a, 8, b_ref);
+  reference_hadamard8x8(a, 8, b_ref);
   ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b));
 
   // The order of the output is not important. Sort before checking.
@@ -89,11 +122,11 @@
   EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
 }
 
-TEST_P(HadamardTest, VaryStride) {
+TEST_P(Hadamard8x8Test, VaryStride) {
   DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
   DECLARE_ALIGNED(16, int16_t, b[64]);
   int16_t b_ref[64];
-  for (int i = 0; i < 64 * 8; i++) {
+  for (int i = 0; i < 64 * 8; ++i) {
     a[i] = rnd_.Rand9Signed();
   }
 
@@ -101,7 +134,7 @@
     memset(b, 0, sizeof(b));
     memset(b_ref, 0, sizeof(b_ref));
 
-    reference_hadamard(a, i, b_ref);
+    reference_hadamard8x8(a, i, b_ref);
     ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
 
     // The order of the output is not important. Sort before checking.
@@ -111,21 +144,77 @@
   }
 }
 
-INSTANTIATE_TEST_CASE_P(C, HadamardTest,
+INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
                         ::testing::Values(&vpx_hadamard_8x8_c));
 
 #if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, HadamardTest,
+INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test,
                         ::testing::Values(&vpx_hadamard_8x8_sse2));
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
-INSTANTIATE_TEST_CASE_P(SSSE3, HadamardTest,
+INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test,
                         ::testing::Values(&vpx_hadamard_8x8_ssse3));
 #endif  // HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
 
 #if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, HadamardTest,
+INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
                         ::testing::Values(&vpx_hadamard_8x8_neon));
 #endif  // HAVE_NEON
+
+class Hadamard16x16Test : public HadamardTestBase {};
+
+TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
+  DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
+  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
+  int16_t b_ref[16 * 16];
+  for (int i = 0; i < 16 * 16; ++i) {
+    a[i] = rnd_.Rand9Signed();
+  }
+  memset(b, 0, sizeof(b));
+  memset(b_ref, 0, sizeof(b_ref));
+
+  reference_hadamard16x16(a, 16, b_ref);
+  ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b));
+
+  // The order of the output is not important. Sort before checking.
+  std::sort(b, b + 16 * 16);
+  std::sort(b_ref, b_ref + 16 * 16);
+  EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+}
+
+TEST_P(Hadamard16x16Test, VaryStride) {
+  DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
+  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
+  int16_t b_ref[16 * 16];
+  for (int i = 0; i < 16 * 16 * 8; ++i) {
+    a[i] = rnd_.Rand9Signed();
+  }
+
+  for (int i = 8; i < 64; i += 8) {
+    memset(b, 0, sizeof(b));
+    memset(b_ref, 0, sizeof(b_ref));
+
+    reference_hadamard16x16(a, i, b_ref);
+    ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
+
+    // The order of the output is not important. Sort before checking.
+    std::sort(b, b + 16 * 16);
+    std::sort(b_ref, b_ref + 16 * 16);
+    EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
+                        ::testing::Values(&vpx_hadamard_16x16_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
+                        ::testing::Values(&vpx_hadamard_16x16_sse2));
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
+                        ::testing::Values(&vpx_hadamard_16x16_neon));
+#endif  // HAVE_NEON
 }  // namespace
diff --git a/test/level_test.cc b/test/level_test.cc
index 5b9ce41..62d0247 100644
--- a/test/level_test.cc
+++ b/test/level_test.cc
@@ -22,7 +22,9 @@
      : EncoderTest(GET_PARAM(0)),
        encoding_mode_(GET_PARAM(1)),
        cpu_used_(GET_PARAM(2)),
-       target_level_(0) {}
+       min_gf_internal_(24),
+       target_level_(0),
+       level_(0) {}
   virtual ~LevelTest() {}
 
   virtual void SetUp() {
@@ -47,6 +49,7 @@
     if (video->frame() == 0) {
       encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
       encoder->Control(VP9E_SET_TARGET_LEVEL, target_level_);
+      encoder->Control(VP9E_SET_MIN_GF_INTERVAL, min_gf_internal_);
       if (encoding_mode_ != ::libvpx_test::kRealTime) {
         encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
         encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
@@ -54,20 +57,33 @@
         encoder->Control(VP8E_SET_ARNR_TYPE, 3);
       }
     }
+    encoder->Control(VP9E_GET_LEVEL, &level_);
+    ASSERT_LE(level_, 51);
+    ASSERT_GE(level_, 0);
   }
 
   ::libvpx_test::TestMode encoding_mode_;
   int cpu_used_;
+  int min_gf_internal_;
   int target_level_;
+  int level_;
 };
 
+// Test for keeping level stats only
 TEST_P(LevelTest, TestTargetLevel0) {
   ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       30);
+                                       40);
   target_level_ = 0;
+  min_gf_internal_ = 4;
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(11, level_);
+
+  cfg_.rc_target_bitrate = 1600;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(20, level_);
 }
 
+// Test for level control being turned off
 TEST_P(LevelTest, TestTargetLevel255) {
   ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
                                        30);
@@ -98,7 +114,6 @@
 
 VP9_INSTANTIATE_TEST_CASE(LevelTest,
                           ::testing::Values(::libvpx_test::kTwoPassGood,
-                                            ::libvpx_test::kOnePassGood,
-                                            ::libvpx_test::kRealTime),
+                                            ::libvpx_test::kOnePassGood),
                           ::testing::Range(0, 9));
 }  // namespace
diff --git a/test/realtime_test.cc b/test/realtime_test.cc
new file mode 100644
index 0000000..24749e4
--- /dev/null
+++ b/test/realtime_test.cc
@@ -0,0 +1,64 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/video_source.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+namespace {
+
+const int kVideoSourceWidth = 320;
+const int kVideoSourceHeight = 240;
+const int kFramesToEncode = 2;
+
+class RealtimeTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+ protected:
+  RealtimeTest()
+      : EncoderTest(GET_PARAM(0)), frame_packets_(0) {}
+  virtual ~RealtimeTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    cfg_.g_lag_in_frames = 0;
+    SetMode(::libvpx_test::kRealTime);
+  }
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+    // TODO(tomfinegan): We're changing the pass value here to make sure
+    // we get frames when real time mode is combined with |g_pass| set to
+    // VPX_RC_FIRST_PASS. This is necessary because EncoderTest::RunLoop() sets
+    // the pass value based on the mode passed into EncoderTest::SetMode(),
+    // which overrides the one specified in SetUp() above.
+    cfg_.g_pass = VPX_RC_FIRST_PASS;
+  }
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {
+    frame_packets_++;
+  }
+
+  int frame_packets_;
+};
+
+TEST_P(RealtimeTest, RealtimeFirstPassProducesFrames) {
+  ::libvpx_test::RandomVideoSource video;
+  video.SetSize(kVideoSourceWidth, kVideoSourceHeight);
+  video.set_limit(kFramesToEncode);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  EXPECT_EQ(kFramesToEncode, frame_packets_);
+}
+
+VP8_INSTANTIATE_TEST_CASE(RealtimeTest,
+                          ::testing::Values(::libvpx_test::kRealTime));
+VP9_INSTANTIATE_TEST_CASE(RealtimeTest,
+                          ::testing::Values(::libvpx_test::kRealTime));
+
+}  // namespace
diff --git a/test/test.mk b/test/test.mk
index 1eb702f..04acd96 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -24,6 +24,7 @@
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += encode_api_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += error_resilience_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += i420_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += realtime_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += resize_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_video_source.h
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += yuv_video_source.h
diff --git a/third_party/googletest/README.libvpx b/third_party/googletest/README.libvpx
index 7201a67..1eca78d 100644
--- a/third_party/googletest/README.libvpx
+++ b/third_party/googletest/README.libvpx
@@ -12,4 +12,8 @@
 generation.
 
 Local Modifications:
-Removed unused declarations of kPathSeparatorString to have warning free build.
\ No newline at end of file
+- Removed unused declarations of kPathSeparatorString to have warning
+  free build.
+- Added GTEST_ATTRIBUTE_UNUSED_ to test registering dummies in TEST_P
+  and INSTANTIATE_TEST_CASE_P to remove warnings about unused variables
+  under GCC 5.
\ No newline at end of file
diff --git a/third_party/googletest/src/include/gtest/gtest.h b/third_party/googletest/src/include/gtest/gtest.h
index 4f3804f..581a44e 100644
--- a/third_party/googletest/src/include/gtest/gtest.h
+++ b/third_party/googletest/src/include/gtest/gtest.h
@@ -16960,7 +16960,7 @@
                       GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \
       return 0; \
     } \
-    static int gtest_registering_dummy_; \
+    static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_; \
     GTEST_DISALLOW_COPY_AND_ASSIGN_(\
         GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
   }; \
@@ -16972,7 +16972,7 @@
 # define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \
   ::testing::internal::ParamGenerator<test_case_name::ParamType> \
       gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \
-  int gtest_##prefix##test_case_name##_dummy_ = \
+  int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \
       ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
           GetTestCasePatternHolder<test_case_name>(\
               #test_case_name, __FILE__, __LINE__)->AddTestCaseInstantiation(\
diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h
index 155847c..472a7b5 100644
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -104,7 +104,7 @@
 extern const unsigned char vp8_mbsplit_offset[4][16];
 
 
-static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b)
+static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b)
 {
     if (!(b & 3))
     {
@@ -119,7 +119,8 @@
     return (cur_mb->bmi + b - 1)->mv.as_int;
 }
 
-static INLINE int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
+static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b,
+                                      int mi_stride)
 {
     if (!(b >> 2))
     {
diff --git a/vp8/decoder/decodeframe.c b/vp8/decoder/decodeframe.c
index 4bc87eb..566972e 100644
--- a/vp8/decoder/decodeframe.c
+++ b/vp8/decoder/decodeframe.c
@@ -986,7 +986,8 @@
     VP8_COMMON *const pc = &pbi->common;
     MACROBLOCKD *const xd  = &pbi->mb;
     const unsigned char *data = pbi->fragments.ptrs[0];
-    const unsigned char *data_end =  data + pbi->fragments.sizes[0];
+    const unsigned int data_sz = pbi->fragments.sizes[0];
+    const unsigned char *data_end = data + data_sz;
     ptrdiff_t first_partition_length_in_bytes;
 
     int i, j, k, l;
@@ -1022,7 +1023,7 @@
         const unsigned char *clear = data;
         if (pbi->decrypt_cb)
         {
-            int n = (int)VPXMIN(sizeof(clear_buffer), data_end - data);
+            int n = (int)VPXMIN(sizeof(clear_buffer), data_sz);
             pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n);
             clear = clear_buffer;
         }
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index cff99c0..26ce120 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -529,7 +529,7 @@
         // Bias on zero motion vector sse.
         const int zero_bias = denoiser->denoise_pars.denoise_mv_bias;
         zero_mv_sse = (unsigned int)((int64_t)zero_mv_sse * zero_bias / 100);
-        sse_diff = zero_mv_sse - best_sse;
+        sse_diff = (int)zero_mv_sse - (int)best_sse;
 
         saved_mbmi = *mbmi;
 
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 95bb394..c526a3e 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -18,6 +18,7 @@
 #include "onyx_int.h"
 #include "vpx_dsp/variance.h"
 #include "encodeintra.h"
+#include "vp8/common/common.h"
 #include "vp8/common/setupintrarecon.h"
 #include "vp8/common/systemdependent.h"
 #include "mcomp.h"
@@ -2417,7 +2418,7 @@
     int tmp_q;
     int frames_left = (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame);
 
-    FIRSTPASS_STATS this_frame = {0};
+    FIRSTPASS_STATS this_frame;
     FIRSTPASS_STATS this_frame_copy;
 
     double this_frame_intra_error;
@@ -2425,6 +2426,8 @@
 
     int overhead_bits;
 
+    vp8_zero(this_frame);
+
     if (!cpi->twopass.stats_in)
     {
         return ;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 6617422..d5a0fff 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1523,7 +1523,8 @@
 void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
 {
     VP8_COMMON *cm = &cpi->common;
-    int last_w, last_h, prev_number_of_layers;
+    int last_w, last_h;
+    unsigned int prev_number_of_layers;
 
     if (!cpi)
         return;
@@ -1786,10 +1787,8 @@
     if (last_w != cpi->oxcf.Width || last_h != cpi->oxcf.Height)
         cpi->force_next_frame_intra = 1;
 
-    if (((cm->Width + 15) & 0xfffffff0) !=
-          cm->yv12_fb[cm->lst_fb_idx].y_width ||
-        ((cm->Height + 15) & 0xfffffff0) !=
-          cm->yv12_fb[cm->lst_fb_idx].y_height ||
+    if (((cm->Width + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
+        ((cm->Height + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
         cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
     {
         dealloc_raw_frame_buffers(cpi);
@@ -5221,7 +5220,7 @@
         vp8_second_pass(cpi);
 
     encode_frame_to_data_rate(cpi, size, dest, dest_end, frame_flags);
-    cpi->twopass.bits_left -= 8 * *size;
+    cpi->twopass.bits_left -= 8 * (int)(*size);
 
     if (!cpi->common.refresh_alt_ref_frame)
     {
@@ -5857,7 +5856,7 @@
         return -1;
 
     // Check number of rows and columns match
-    if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols)
+    if (cpi->common.mb_rows != (int)rows || cpi->common.mb_cols != (int)cols)
         return -1;
 
     // Range check the delta Q values and convert the external Q range values
@@ -5913,7 +5912,7 @@
 
 int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols)
 {
-    if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols)
+    if ((int)rows == cpi->common.mb_rows && (int)cols == cpi->common.mb_cols)
     {
         if (map)
         {
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 6ede9b9..44fbbd4 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -371,7 +371,7 @@
     double key_frame_rate_correction_factor;
     double gf_rate_correction_factor;
 
-    unsigned int frames_since_golden;
+    int frames_since_golden;
     /* Count down till next GF */
     int frames_till_gf_update_due;
 
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 257d2a0..b19ab7a 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -22,6 +22,7 @@
 #include "vpx/vp8cx.h"
 #include "vp8/encoder/firstpass.h"
 #include "vp8/common/onyx.h"
+#include "vp8/common/common.h"
 #include <stdlib.h>
 #include <string.h>
 
@@ -760,7 +761,7 @@
                                     unsigned long          duration,
                                     unsigned long          deadline)
 {
-    unsigned int new_qc;
+    int new_qc;
 
 #if !(CONFIG_REALTIME_ONLY)
     /* Use best quality mode if no deadline is given. */
@@ -785,7 +786,9 @@
     new_qc = MODE_REALTIME;
 #endif
 
-    if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
+    if (deadline == VPX_DL_REALTIME)
+        new_qc = MODE_REALTIME;
+    else if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
         new_qc = MODE_FIRSTPASS;
     else if (ctx->cfg.g_pass == VPX_RC_LAST_PASS)
         new_qc = (new_qc == MODE_BESTQUALITY)
@@ -1116,7 +1119,8 @@
 {
 
     YV12_BUFFER_CONFIG sd;
-    vp8_ppflags_t flags = {0};
+    vp8_ppflags_t flags;
+    vp8_zero(flags);
 
     if (ctx->preview_ppcfg.post_proc_flag)
     {
@@ -1305,8 +1309,8 @@
         30,                 /* rc_resize_up_thresold */
 
         VPX_VBR,            /* rc_end_usage */
-        {0},                /* rc_twopass_stats_in */
-        {0},                /* rc_firstpass_mb_stats_in */
+        {NULL, 0},          /* rc_twopass_stats_in */
+        {NULL, 0},          /* rc_firstpass_mb_stats_in */
         256,                /* rc_target_bandwidth */
         4,                  /* rc_min_quantizer */
         63,                 /* rc_max_quantizer */
@@ -1334,6 +1338,8 @@
         {0},                /* ts_rate_decimator */
         0,                  /* ts_periodicity */
         {0},                /* ts_layer_id */
+        {0},                /* layer_target_bitrate */
+        0                   /* temporal_layering_mode */
     }},
 };
 
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 9c78de1..fc9288d 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -522,7 +522,8 @@
     {
         YV12_BUFFER_CONFIG sd;
         int64_t time_stamp = 0, time_end_stamp = 0;
-        vp8_ppflags_t flags = {0};
+        vp8_ppflags_t flags;
+        vp8_zero(flags);
 
         if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
         {
@@ -816,11 +817,12 @@
     },
     { /* encoder functions */
         0,
-        NULL,
-        NULL,
-        NULL,
-        NULL,
-        NULL,
-        NULL
+        NULL,  /* vpx_codec_enc_cfg_map_t */
+        NULL,  /* vpx_codec_encode_fn_t */
+        NULL,  /* vpx_codec_get_cx_data_fn_t */
+        NULL,  /* vpx_codec_enc_config_set_fn_t */
+        NULL,  /* vpx_codec_get_global_headers_fn_t */
+        NULL,  /* vpx_codec_get_preview_frame_fn_t */
+        NULL   /* vpx_codec_enc_mr_get_mem_loc_fn_t */
     }
 };
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index 2aff132..908fa80 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -67,25 +67,6 @@
 
 #define VP9_FRAME_MARKER 0x2
 
-typedef enum {
-  LEVEL_UNKNOWN = 0,
-  LEVEL_1 = 10,
-  LEVEL_1_1 = 11,
-  LEVEL_2 = 20,
-  LEVEL_2_1 = 21,
-  LEVEL_3 = 30,
-  LEVEL_3_1 = 31,
-  LEVEL_4 = 40,
-  LEVEL_4_1 = 41,
-  LEVEL_5 = 50,
-  LEVEL_5_1 = 51,
-  LEVEL_5_2 = 52,
-  LEVEL_6 = 60,
-  LEVEL_6_1 = 61,
-  LEVEL_6_2 = 62,
-  LEVEL_NOT_CARE = 255,
-} VP9_LEVEL;
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 1df6f08..3fd935e 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -168,8 +168,6 @@
 
   int allow_high_precision_mv;
 
-  int keep_level_stats;
-
   // Flag signaling that the frame context should be reset to default values.
   // 0 or 1 implies don't reset, 2 reset just the context specified in the
   // frame header, 3 reset all contexts.
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 61cca39..73a2db0 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -891,7 +891,7 @@
     vpx_wb_write_bit(wb, cm->log2_tile_rows != 1);
 }
 
-static int get_refresh_mask(VP9_COMP *cpi) {
+int vp9_get_refresh_mask(VP9_COMP *cpi) {
   if (vp9_preserve_existing_gf(cpi)) {
     // We have decided to preserve the previously existing golden frame as our
     // new ARF frame. However, in the short term we leave it in the GF slot and,
@@ -1107,11 +1107,11 @@
         write_bitdepth_colorspace_sampling(cm, wb);
       }
 
-      vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+      vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES);
       write_frame_size(cm, wb);
     } else {
       MV_REFERENCE_FRAME ref_frame;
-      vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+      vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES);
       for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
         assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
         vpx_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
diff --git a/vp9/encoder/vp9_bitstream.h b/vp9/encoder/vp9_bitstream.h
index da6b414..f24d20f 100644
--- a/vp9/encoder/vp9_bitstream.h
+++ b/vp9/encoder/vp9_bitstream.h
@@ -18,6 +18,8 @@
 
 #include "vp9/encoder/vp9_encoder.h"
 
+int vp9_get_refresh_mask(VP9_COMP *cpi);
+
 void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
 
 static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index bbdfbb8..069c335 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -146,9 +146,9 @@
   uint8_t sb_is_skin;
 
   // Used to save the status of whether a block has a low variance in
-  // choose_partitioning. 0 for 64x64, 1 2 for 64x32, 3 4 for 32x64, 5~8 for
-  // 32x32.
-  uint8_t variance_low[9];
+  // choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for
+  // 32x32, 9~24 for 16x16.
+  uint8_t variance_low[25];
 
   void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
   void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 21a66bb..6af0c0b 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -773,9 +773,7 @@
     }
   }
 
-  for (i = 0; i < 9; i++) {
-    x->variance_low[i] = 0;
-  }
+  memset(x->variance_low, 0, sizeof(x->variance_low));
 
   if (xd->mb_to_right_edge < 0)
     pixels_wide += (xd->mb_to_right_edge >> 3);
@@ -798,7 +796,7 @@
     const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
 
     const YV12_BUFFER_CONFIG *yv12_g = NULL;
-    unsigned int y_sad, y_sad_g;
+    unsigned int y_sad, y_sad_g, y_sad_thr;
     const BLOCK_SIZE bsize = BLOCK_32X32
         + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows);
 
@@ -831,7 +829,10 @@
     mi->interp_filter = BILINEAR;
 
     y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
-    if (y_sad_g < y_sad) {
+    // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
+    // are close if short_circuit_low_temp_var is on.
+    y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
+    if (y_sad_g < y_sad_thr) {
       vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
                            &cm->frame_refs[GOLDEN_FRAME - 1].sf);
       mi->ref_frame[0] = GOLDEN_FRAME;
@@ -1083,28 +1084,53 @@
   }
 
   if (cpi->sf.short_circuit_low_temp_var) {
-    // Set low variance flag, only for blocks >= 32x32 and if LAST_FRAME was
-    // selected.
-    if (ref_frame_partition == LAST_FRAME) {
+    const int mv_thr = cm->width > 640 ? 8 : 4;
+    // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected
+    // and int_pro mv is small. If the temporal variance is small set the
+    // variance_low flag for the block. The variance threshold can be adjusted,
+    // the higher the more aggressive.
+    if (ref_frame_partition == LAST_FRAME &&
+        (cpi->sf.short_circuit_low_temp_var == 1 ||
+         (xd->mi[0]->mv[0].as_mv.col < mv_thr &&
+          xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
+          xd->mi[0]->mv[0].as_mv.row < mv_thr &&
+          xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
       if (xd->mi[0]->sb_type == BLOCK_64X64 &&
           vt.part_variances.none.variance < (thresholds[0] >> 1)) {
         x->variance_low[0] = 1;
       } else if (xd->mi[0]->sb_type == BLOCK_64X32) {
-        if (vt.part_variances.horz[0].variance < (thresholds[0] >> 2))
-          x->variance_low[1] = 1;
-        if (vt.part_variances.horz[1].variance < (thresholds[0] >> 2))
-          x->variance_low[2] = 1;
+        for (j = 0; j < 2; j++) {
+          if (vt.part_variances.horz[j].variance < (thresholds[0] >> 2))
+            x->variance_low[j + 1] = 1;
+        }
       } else if (xd->mi[0]->sb_type == BLOCK_32X64) {
-        if (vt.part_variances.vert[0].variance < (thresholds[0] >> 2))
-          x->variance_low[3] = 1;
-        if (vt.part_variances.vert[1].variance < (thresholds[0] >> 2))
-          x->variance_low[4] = 1;
+        for (j = 0; j < 2; j++) {
+          if (vt.part_variances.vert[j].variance < (thresholds[0] >> 2))
+            x->variance_low[j + 3] = 1;
+        }
       } else {
-        // 32x32
         for (i = 0; i < 4; i++) {
-          if (!force_split[i + 1] &&
-              vt.split[i].part_variances.none.variance < (thresholds[1] >> 1))
-            x->variance_low[i + 5] = 1;
+          if (!force_split[i + 1]) {
+            // 32x32
+            if (vt.split[i].part_variances.none.variance <
+                (thresholds[1] >> 1))
+              x->variance_low[i + 5] = 1;
+          } else if (cpi->sf.short_circuit_low_temp_var == 2) {
+            int idx[4] = {0, 4, xd->mi_stride << 2, (xd->mi_stride << 2) + 4};
+            const int idx_str = cm->mi_stride * mi_row + mi_col + idx[i];
+            MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;
+            // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
+            // inside.
+            if ((*this_mi)->sb_type == BLOCK_16X16 ||
+                (*this_mi)->sb_type == BLOCK_32X16 ||
+                (*this_mi)->sb_type == BLOCK_16X32) {
+              for (j = 0; j < 4; j++) {
+                if (vt.split[i].split[j].part_variances.none.variance <
+                    (thresholds[2] >> 8))
+                  x->variance_low[(i << 2) + j + 9] = 1;
+              }
+            }
+          }
         }
       }
     }
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 936e625..e2c2e23 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -54,9 +54,10 @@
   (((1 << (VP9_PROB_COST_SHIFT - 1)) + (R) * (RM)) & \
    ((1 << VP9_PROB_COST_SHIFT) - 1))
 
+// TODO(aconverse): Re-pack this structure.
 typedef struct vp9_token_state {
   int           rate;
-  int           error;
+  int64_t       error;
   int           next;
   int16_t       token;
   tran_low_t    qc;
@@ -112,7 +113,8 @@
   const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][type]) >> 1;
   const int64_t rddiv = mb->rddiv;
   int64_t rd_cost0, rd_cost1;
-  int rate0, rate1, error0, error1;
+  int rate0, rate1;
+  int64_t error0, error1;
   int16_t t0, t1;
   EXTRABIT e0;
   int best, band, pt, i, final_eob;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 8201794..3f88d9c 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -86,6 +86,25 @@
 FILE *keyfile;
 #endif
 
+static const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
+  {LEVEL_1,   829440,      36864,    200,    400,   2, 1,  4,  8},
+  {LEVEL_1_1, 2764800,     73728,    800,    1000,  2, 1,  4,  8},
+  {LEVEL_2,   4608000,     122880,   1800,   1500,  2, 1,  4,  8},
+  {LEVEL_2_1, 9216000,     245760,   3600,   2800,  2, 2,  4,  8},
+  {LEVEL_3,   20736000,    552960,   7200,   6000,  2, 4,  4,  8},
+  {LEVEL_3_1, 36864000,    983040,   12000,  10000, 2, 4,  4,  8},
+  {LEVEL_4,   83558400,    2228224,  18000,  16000, 4, 4,  4,  8},
+  {LEVEL_4_1, 160432128,   2228224,  30000,  18000, 4, 4,  5,  6},
+  {LEVEL_5,   311951360,   8912896,  60000,  36000, 6, 8,  6,  4},
+  {LEVEL_5_1, 588251136,   8912896,  120000, 46000, 8, 8,  10, 4},
+  // TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
+  // they are finalized (currently TBD).
+  {LEVEL_5_2, 1176502272,  8912896,  180000, 0,     8, 8,  10, 4},
+  {LEVEL_6,   1176502272,  35651584, 180000, 0,     8, 16, 10, 4},
+  {LEVEL_6_1, 2353004544u, 35651584, 240000, 0,     8, 16, 10, 4},
+  {LEVEL_6_2, 4706009088u, 35651584, 480000, 0,     8, 16, 10, 4},
+};
+
 static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
   switch (mode) {
     case NORMAL:
@@ -159,6 +178,39 @@
   }
 }
 
+static void init_level_info(Vp9LevelInfo *level_info) {
+  Vp9LevelStats *const level_stats = &level_info->level_stats;
+  Vp9LevelSpec *const level_spec = &level_info->level_spec;
+
+  memset(level_stats, 0, sizeof(*level_stats));
+  memset(level_spec, 0, sizeof(*level_spec));
+  level_spec->level = LEVEL_UNKNOWN;
+  level_spec->min_altref_distance = INT_MAX;
+}
+
+VP9_LEVEL vp9_get_level(const Vp9LevelSpec * const level_spec) {
+  int i;
+  const Vp9LevelSpec *this_level;
+
+  vpx_clear_system_state();
+
+  for (i = 0; i < VP9_LEVELS; ++i) {
+    this_level = &vp9_level_defs[i];
+    if ((double)level_spec->max_luma_sample_rate * (1 + SAMPLE_RATE_GRACE_P) >
+        (double)this_level->max_luma_sample_rate ||
+        level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
+        level_spec->average_bitrate > this_level->average_bitrate ||
+        level_spec->max_cpb_size > this_level->max_cpb_size ||
+        level_spec->compression_ratio < this_level->compression_ratio ||
+        level_spec->max_col_tiles > this_level->max_col_tiles ||
+        level_spec->min_altref_distance < this_level->min_altref_distance ||
+        level_spec->max_ref_frame_buffers > this_level->max_ref_frame_buffers)
+      continue;
+    break;
+  }
+  return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
+}
+
 int vp9_set_active_map(VP9_COMP* cpi,
                        unsigned char* new_map_16x16,
                        int rows,
@@ -783,7 +835,7 @@
   cm->color_range = oxcf->color_range;
 
   cpi->target_level = oxcf->target_level;
-  cm->keep_level_stats = oxcf->target_level != LEVEL_NOT_CARE;
+  cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
 
   cm->width = oxcf->width;
   cm->height = oxcf->height;
@@ -1476,7 +1528,7 @@
   cm->color_range = oxcf->color_range;
 
   cpi->target_level = oxcf->target_level;
-  cm->keep_level_stats = oxcf->target_level != LEVEL_NOT_CARE;
+  cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
 
   if (cm->profile <= PROFILE_1)
     assert(cm->bit_depth == VPX_BITS_8);
@@ -1660,7 +1712,6 @@
   } while (++i <= MV_MAX);
 }
 
-
 VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
                                 BufferPool *const pool) {
   unsigned int i;
@@ -1749,6 +1800,9 @@
   cpi->multi_arf_last_grp_enabled = 0;
 
   cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
+
+  init_level_info(&cpi->level_info);
+
 #if CONFIG_INTERNAL_STATS
   cpi->b_calculate_ssimg = 0;
   cpi->b_calculate_blockiness = 1;
@@ -2199,7 +2253,7 @@
 static void encoder_highbd_variance64(const uint8_t *a8, int  a_stride,
                                       const uint8_t *b8, int  b_stride,
                                       int w, int h, uint64_t *sse,
-                                      uint64_t *sum) {
+                                      int64_t *sum) {
   int i, j;
 
   uint16_t *a = CONVERT_TO_SHORTPTR(a8);
@@ -2223,7 +2277,7 @@
                                       int w, int h,
                                       unsigned int *sse, int *sum) {
   uint64_t sse_long = 0;
-  uint64_t sum_long = 0;
+  int64_t sum_long = 0;
   encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h,
                             &sse_long, &sum_long);
   *sse = (unsigned int)sse_long;
@@ -2798,7 +2852,7 @@
   } else if (vp9_preserve_existing_gf(cpi)) {
     // We have decided to preserve the previously existing golden frame as our
     // new ARF frame. However, in the short term in function
-    // vp9_bitstream.c::get_refresh_mask() we left it in the GF slot and, if
+    // vp9_get_refresh_mask() we left it in the GF slot and, if
     // we're updating the GF with the current decoded frame, we save it to the
     // ARF slot instead.
     // We now have to update the ARF with the current frame and swap gld_fb_idx
@@ -4420,6 +4474,124 @@
 }
 #endif  // CONFIG_INTERNAL_STATS
 
+static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
+  VP9_COMMON *const cm = &cpi->common;
+  Vp9LevelInfo *const level_info = &cpi->level_info;
+  Vp9LevelSpec *const level_spec = &level_info->level_spec;
+  Vp9LevelStats *const level_stats = &level_info->level_stats;
+  int i, idx;
+  uint64_t luma_samples, dur_end;
+  const uint32_t luma_pic_size = cm->width * cm->height;
+  double cpb_data_size;
+
+  vpx_clear_system_state();
+
+  // update level_stats
+  level_stats->total_compressed_size += *size;
+  if (cm->show_frame) {
+    level_stats->total_uncompressed_size +=
+        luma_pic_size +
+        2 * (luma_pic_size >> (cm->subsampling_x + cm->subsampling_y));
+    level_stats->time_encoded =
+        (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
+        (double)TICKS_PER_SEC;
+  }
+
+  if (arf_src_index > 0) {
+    if (!level_stats->seen_first_altref) {
+      level_stats->seen_first_altref = 1;
+    } else if (level_stats->frames_since_last_altref <
+             level_spec->min_altref_distance) {
+      level_spec->min_altref_distance = level_stats->frames_since_last_altref;
+    }
+    level_stats->frames_since_last_altref = 0;
+  } else {
+    ++level_stats->frames_since_last_altref;
+  }
+
+  if (level_stats->frame_window_buffer.len < FRAME_WINDOW_SIZE - 1) {
+    idx = (level_stats->frame_window_buffer.start +
+           level_stats->frame_window_buffer.len++) % FRAME_WINDOW_SIZE;
+  } else {
+    idx = level_stats->frame_window_buffer.start;
+    level_stats->frame_window_buffer.start = (idx + 1) % FRAME_WINDOW_SIZE;
+  }
+  level_stats->frame_window_buffer.buf[idx].ts = cpi->last_time_stamp_seen;
+  level_stats->frame_window_buffer.buf[idx].size = (uint32_t)(*size);
+  level_stats->frame_window_buffer.buf[idx].luma_samples = luma_pic_size;
+
+  if (cm->frame_type == KEY_FRAME) {
+    level_stats->ref_refresh_map = 0;
+  } else {
+    int count = 0;
+    level_stats->ref_refresh_map |= vp9_get_refresh_mask(cpi);
+    // Also need to consider the case where the encoder refers to a buffer
+    // that has been implicitly refreshed after encoding a keyframe.
+    if (!cm->intra_only) {
+      level_stats->ref_refresh_map |= (1 << cpi->lst_fb_idx);
+      level_stats->ref_refresh_map |= (1 << cpi->gld_fb_idx);
+      level_stats->ref_refresh_map |= (1 << cpi->alt_fb_idx);
+    }
+    for (i = 0; i < REF_FRAMES; ++i) {
+      count += (level_stats->ref_refresh_map >> i) & 1;
+    }
+    if (count > level_spec->max_ref_frame_buffers) {
+      level_spec->max_ref_frame_buffers = count;
+    }
+  }
+
+  // update average_bitrate
+  level_spec->average_bitrate =
+      (double)level_stats->total_compressed_size / 125.0 /
+      level_stats->time_encoded;
+
+  // update max_luma_sample_rate
+  luma_samples = 0;
+  for (i = 0; i < level_stats->frame_window_buffer.len; ++i) {
+    idx = (level_stats->frame_window_buffer.start +
+           level_stats->frame_window_buffer.len - 1 - i) % FRAME_WINDOW_SIZE;
+    if (i == 0) {
+      dur_end = level_stats->frame_window_buffer.buf[idx].ts;
+    }
+    if (dur_end - level_stats->frame_window_buffer.buf[idx].ts >=
+        TICKS_PER_SEC) {
+      break;
+    }
+    luma_samples += level_stats->frame_window_buffer.buf[idx].luma_samples;
+  }
+  if (luma_samples > level_spec->max_luma_sample_rate) {
+    level_spec->max_luma_sample_rate = luma_samples;
+  }
+
+  // update max_cpb_size
+  cpb_data_size = 0;
+  for (i = 0; i < CPB_WINDOW_SIZE; ++i) {
+    if (i >= level_stats->frame_window_buffer.len) break;
+    idx = (level_stats->frame_window_buffer.start +
+           level_stats->frame_window_buffer.len - 1 - i) % FRAME_WINDOW_SIZE;
+    cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
+  }
+  cpb_data_size = cpb_data_size / 125.0;
+  if (cpb_data_size > level_spec->max_cpb_size) {
+    level_spec->max_cpb_size = cpb_data_size;
+  }
+
+  // update max_luma_picture_size
+  if (luma_pic_size > level_spec->max_luma_picture_size) {
+    level_spec->max_luma_picture_size = luma_pic_size;
+  }
+
+  // update compression_ratio
+  level_spec->compression_ratio =
+      (double)level_stats->total_uncompressed_size * cm->bit_depth /
+      level_stats->total_compressed_size / 8.0;
+
+  // update max_col_tiles
+  if (level_spec->max_col_tiles < (1 << cm->log2_tile_cols)) {
+    level_spec->max_col_tiles = (1 << cm->log2_tile_cols);
+  }
+}
+
 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
                             size_t *size, uint8_t *dest,
                             int64_t *time_stamp, int64_t *time_end, int flush) {
@@ -4690,6 +4862,9 @@
   if (cpi->b_calculate_psnr && oxcf->pass != 1 && cm->show_frame)
     generate_psnr_packet(cpi);
 
+  if (cpi->keep_level_stats && oxcf->pass != 1)
+    update_level_info(cpi, size, arf_src_index);
+
 #if CONFIG_INTERNAL_STATS
 
   if (oxcf->pass != 1) {
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 6be61ac..b65dfa8 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -20,6 +20,7 @@
 #include "vpx_dsp/ssim.h"
 #endif
 #include "vpx_dsp/variance.h"
+#include "vpx_ports/system_state.h"
 #include "vpx_util/vpx_thread.h"
 
 #include "vp9/common/vp9_alloccommon.h"
@@ -51,6 +52,9 @@
 extern "C" {
 #endif
 
+// vp9 uses 10,000,000 ticks/second as time stamp
+#define TICKS_PER_SEC 10000000
+
 typedef struct {
   int nmvjointcost[MV_JOINTS];
   int nmvcosts[2][MV_VALS];
@@ -297,6 +301,69 @@
   double worst;
 } ImageStat;
 
+#define CPB_WINDOW_SIZE 4
+#define FRAME_WINDOW_SIZE 128
+#define SAMPLE_RATE_GRACE_P 0.015
+#define VP9_LEVELS 14
+
+typedef enum {
+  LEVEL_UNKNOWN = 0,
+  LEVEL_1 = 10,
+  LEVEL_1_1 = 11,
+  LEVEL_2 = 20,
+  LEVEL_2_1 = 21,
+  LEVEL_3 = 30,
+  LEVEL_3_1 = 31,
+  LEVEL_4 = 40,
+  LEVEL_4_1 = 41,
+  LEVEL_5 = 50,
+  LEVEL_5_1 = 51,
+  LEVEL_5_2 = 52,
+  LEVEL_6 = 60,
+  LEVEL_6_1 = 61,
+  LEVEL_6_2 = 62,
+  LEVEL_MAX = 255
+} VP9_LEVEL;
+
+typedef struct {
+  VP9_LEVEL level;
+  uint64_t max_luma_sample_rate;
+  uint32_t max_luma_picture_size;
+  double average_bitrate;  // in kilobits per second
+  double max_cpb_size;  // in kilobits
+  double compression_ratio;
+  uint8_t max_col_tiles;
+  uint32_t min_altref_distance;
+  uint8_t max_ref_frame_buffers;
+} Vp9LevelSpec;
+
+typedef struct {
+  int64_t ts;  // timestamp
+  uint32_t luma_samples;
+  uint32_t size;  // in bytes
+} FrameRecord;
+
+typedef struct {
+  FrameRecord buf[FRAME_WINDOW_SIZE];
+  uint8_t start;
+  uint8_t len;
+} FrameWindowBuffer;
+
+typedef struct {
+  uint8_t seen_first_altref;
+  uint32_t frames_since_last_altref;
+  uint64_t total_compressed_size;
+  uint64_t total_uncompressed_size;
+  double time_encoded;  // in seconds
+  FrameWindowBuffer frame_window_buffer;
+  int ref_refresh_map;
+} Vp9LevelStats;
+
+typedef struct {
+  Vp9LevelStats level_stats;
+  Vp9LevelSpec level_spec;
+} Vp9LevelInfo;
+
 typedef struct VP9_COMP {
   QUANTS quants;
   ThreadData td;
@@ -519,6 +586,9 @@
   VPxWorker *workers;
   struct EncWorkerData *tile_thr_data;
   VP9LfSync lf_row_sync;
+
+  int keep_level_stats;
+  Vp9LevelInfo level_info;
 } VP9_COMP;
 
 void vp9_initialize_enc(void);
@@ -674,6 +744,8 @@
   return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL;
 }
 
+VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec);
+
 void vp9_new_framerate(VP9_COMP *cpi, double framerate);
 
 #define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index a70eaea..f6e61b6 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -41,8 +41,6 @@
 #define OUTPUT_FPF          0
 #define ARF_STATS_OUTPUT    0
 
-#define GROUP_ADAPTIVE_MAXQ 1
-
 #define BOOST_BREAKOUT      12.5
 #define BOOST_FACTOR        12.5
 #define FACTOR_PT_LOW       0.70
@@ -1343,6 +1341,7 @@
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   const int is_two_pass_svc = (svc->number_spatial_layers > 1) ||
                               (svc->number_temporal_layers > 1);
+  RATE_CONTROL *const rc = &cpi->rc;
   TWO_PASS *const twopass = is_two_pass_svc ?
       &svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass;
   double frame_rate;
@@ -1399,15 +1398,21 @@
   }
 
   // Reset the vbr bits off target counters
-  cpi->rc.vbr_bits_off_target = 0;
-  cpi->rc.vbr_bits_off_target_fast = 0;
-
-  cpi->rc.rate_error_estimate = 0;
+  rc->vbr_bits_off_target = 0;
+  rc->vbr_bits_off_target_fast = 0;
+  rc->rate_error_estimate = 0;
 
   // Static sequence monitor variables.
   twopass->kf_zeromotion_pct = 100;
   twopass->last_kfgroup_zeromotion_pct = 100;
 
+  // Initialize bits per macro_block estimate correction factor.
+  twopass->bpm_factor = 1.0;
+  // Initiallize actual and target bits counters for ARF groups so that
+  // at the start we have a neutral bpm adjustment.
+  twopass->rolling_arf_group_target_bits = 1;
+  twopass->rolling_arf_group_actual_bits = 1;
+
   if (oxcf->resize_mode != RESIZE_NONE) {
     init_subsampling(cpi);
   }
@@ -1932,9 +1937,7 @@
   double boost_score = 0.0;
   double old_boost_score = 0.0;
   double gf_group_err = 0.0;
-#if GROUP_ADAPTIVE_MAXQ
   double gf_group_raw_error = 0.0;
-#endif
   double gf_group_skip_pct = 0.0;
   double gf_group_inactive_zone_rows = 0.0;
   double gf_first_frame_err = 0.0;
@@ -1984,9 +1987,7 @@
   // the error score / cost of this frame has already been accounted for.
   if (arf_active_or_kf) {
     gf_group_err -= gf_first_frame_err;
-#if GROUP_ADAPTIVE_MAXQ
     gf_group_raw_error -= this_frame->coded_error;
-#endif
     gf_group_skip_pct -= this_frame->intra_skip_pct;
     gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows;
   }
@@ -2040,9 +2041,7 @@
     // Accumulate error score of frames in this gf group.
     mod_frame_err = calculate_modified_err(cpi, twopass, oxcf, this_frame);
     gf_group_err += mod_frame_err;
-#if GROUP_ADAPTIVE_MAXQ
     gf_group_raw_error += this_frame->coded_error;
-#endif
     gf_group_skip_pct += this_frame->intra_skip_pct;
     gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
 
@@ -2142,9 +2141,7 @@
       if (EOF == input_stats(twopass, this_frame))
         break;
       gf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame);
-#if GROUP_ADAPTIVE_MAXQ
       gf_group_raw_error += this_frame->coded_error;
-#endif
       gf_group_skip_pct += this_frame->intra_skip_pct;
       gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
     }
@@ -2159,7 +2156,6 @@
   // Calculate the bits to be allocated to the gf/arf group as a whole
   gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
 
-#if GROUP_ADAPTIVE_MAXQ
   // Calculate an estimate of the maxq needed for the group.
   // We are more agressive about correcting for sections
   // where there could be significant overshoot than for easier
@@ -2181,7 +2177,6 @@
     twopass->active_worst_quality =
         (tmp_q + (twopass->active_worst_quality * 3)) >> 2;
   }
-#endif
 
   // Calculate the extra bits to be used for boosted frame(s)
   gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval,
@@ -2718,13 +2713,6 @@
       ((double)cm->mb_rows * section_length);
     int tmp_q;
 
-    // Initialize bits per macro_block estimate correction factor.
-    twopass->bpm_factor = 1.0;
-    // Initiallize actual and target bits counters for ARF groups so that
-    // at the start we have a neutral bpm adjustment.
-    twopass->rolling_arf_group_target_bits = 1;
-    twopass->rolling_arf_group_actual_bits = 1;
-
     tmp_q = get_twopass_worst_quality(cpi, section_error,
         section_intra_skip + section_inactive_zone, section_target_bandwidth);
 
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 554409b..f027f9f 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -40,6 +40,14 @@
   int in_use;
 } PRED_BUFFER;
 
+
+static const int pos_shift_16x16[4][4] = {
+  {9, 10, 13, 14},
+  {11, 12, 15, 16},
+  {17, 18, 21, 22},
+  {19, 20, 23, 24}
+};
+
 static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm,
                       const MACROBLOCK *x,
                       const MACROBLOCKD *xd,
@@ -582,39 +590,46 @@
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
-                      int *skippable, int64_t *sse, int plane,
-                      BLOCK_SIZE bsize, TX_SIZE tx_size) {
+static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
+                      int *skippable, int64_t *sse, BLOCK_SIZE bsize,
+                      TX_SIZE tx_size) {
   MACROBLOCKD *xd = &x->e_mbd;
   unsigned int var_y, sse_y;
-  (void)plane;
+
   (void)tx_size;
-  model_rd_for_sb_y(cpi, bsize, x, xd, rate, dist, &var_y, &sse_y);
+  model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, &var_y,
+                    &sse_y);
   *sse = INT_MAX;
   *skippable = 0;
   return;
 }
 #else
-static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
-                      int *skippable, int64_t *sse, int plane,
-                      BLOCK_SIZE bsize, TX_SIZE tx_size) {
+static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
+                      int *skippable, int64_t *sse, BLOCK_SIZE bsize,
+                      TX_SIZE tx_size) {
   MACROBLOCKD *xd = &x->e_mbd;
-  const struct macroblockd_plane *pd = &xd->plane[plane];
-  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *pd = &xd->plane[0];
+  struct macroblock_plane *const p = &x->plane[0];
   const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
   const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
   const int step = 1 << (tx_size << 1);
   const int block_step = (1 << tx_size);
   int block = 0, r, c;
-  int shift = tx_size == TX_32X32 ? 0 : 2;
   const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 :
-      xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+      xd->mb_to_right_edge >> 5);
   const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 :
-      xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+      xd->mb_to_bottom_edge >> 5);
   int eob_cost = 0;
+  const int bw = 4 * num_4x4_w;
+  const int bh = 4 * num_4x4_h;
 
   (void)cpi;
-  vp9_subtract_plane(x, bsize, plane);
+
+  // The max tx_size passed in is TX_16X16.
+  assert(tx_size != TX_32X32);
+
+  vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
+                     pd->dst.buf, pd->dst.stride);
   *skippable = 1;
   // Keep track of the row and column of the blocks we use so that we know
   // if we are in the unrestricted motion border.
@@ -626,18 +641,11 @@
         tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
         tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
         uint16_t *const eob = &p->eobs[block];
-        const int diff_stride = 4 * num_4x4_blocks_wide_lookup[bsize];
+        const int diff_stride = bw;
         const int16_t *src_diff;
         src_diff = &p->src_diff[(r * diff_stride + c) << 2];
 
         switch (tx_size) {
-          case TX_32X32:
-            vpx_fdct32x32_rd(src_diff, coeff, diff_stride);
-            vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
-                                  p->round_fp, p->quant_fp, p->quant_shift,
-                                  qcoeff, dqcoeff, pd->dequant, eob,
-                                  scan_order->scan, scan_order->iscan);
-            break;
           case TX_16X16:
             vpx_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff);
             vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
@@ -670,18 +678,17 @@
     }
   }
 
-  if (*skippable && *sse < INT64_MAX) {
-    *rate = 0;
-    *dist = (*sse << 6) >> shift;
-    *sse = *dist;
-    return;
+  this_rdc->rate = 0;
+  if (*sse < INT64_MAX) {
+    *sse = (*sse << 6) >> 2;
+    if (*skippable) {
+      this_rdc->dist = *sse;
+      return;
+    }
   }
 
   block = 0;
-  *rate = 0;
-  *dist = 0;
-  if (*sse < INT64_MAX)
-    *sse = (*sse << 6) >> shift;
+  this_rdc->dist = 0;
   for (r = 0; r < max_blocks_high; r += block_step) {
     for (c = 0; c < num_4x4_w; c += block_step) {
       if (c < max_blocks_wide) {
@@ -691,25 +698,26 @@
         uint16_t *const eob = &p->eobs[block];
 
         if (*eob == 1)
-          *rate += (int)abs(qcoeff[0]);
+          this_rdc->rate += (int)abs(qcoeff[0]);
         else if (*eob > 1)
-          *rate += vpx_satd((const int16_t *)qcoeff, step << 4);
+          this_rdc->rate += vpx_satd((const int16_t *)qcoeff, step << 4);
 
-        *dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> shift;
+        this_rdc->dist +=
+            vp9_block_error_fp(coeff, dqcoeff, step << 4) >> 2;
       }
       block += step;
     }
   }
 
   // If skippable is set, rate gets clobbered later.
-  *rate <<= (2 + VP9_PROB_COST_SHIFT);
-  *rate += (eob_cost << VP9_PROB_COST_SHIFT);
+  this_rdc->rate <<= (2 + VP9_PROB_COST_SHIFT);
+  this_rdc->rate += (eob_cost << VP9_PROB_COST_SHIFT);
 }
 #endif
 
 static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize,
                                MACROBLOCK *x, MACROBLOCKD *xd,
-                               int *out_rate_sum, int64_t *out_dist_sum,
+                               RD_COST *this_rdc,
                                unsigned int *var_y, unsigned int *sse_y,
                                int start_plane, int stop_plane) {
   // Note our transform coeffs are 8 times an orthogonal transform.
@@ -720,8 +728,8 @@
   int64_t dist;
   int i;
 
-  *out_rate_sum = 0;
-  *out_dist_sum = 0;
+  this_rdc->rate = 0;
+  this_rdc->dist = 0;
 
   for (i = start_plane; i <= stop_plane; ++i) {
     struct macroblock_plane *const p = &x->plane[i];
@@ -752,8 +760,8 @@
                                  dc_quant >> 3, &rate, &dist);
   #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-    *out_rate_sum += rate >> 1;
-    *out_dist_sum += dist << 3;
+    this_rdc->rate += rate >> 1;
+    this_rdc->dist += dist << 3;
 
   #if CONFIG_VP9_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -768,8 +776,8 @@
                                  ac_quant >> 3, &rate, &dist);
   #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-    *out_rate_sum += rate;
-    *out_dist_sum += dist << 4;
+    this_rdc->rate += rate;
+    this_rdc->dist += dist << 4;
   }
 }
 
@@ -906,8 +914,7 @@
   MACROBLOCK *x;
   PREDICTION_MODE mode;
   int skippable;
-  int rate;
-  int64_t dist;
+  RD_COST *rdc;
 };
 
 static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -924,8 +931,7 @@
   const int src_stride = p->src.stride;
   const int dst_stride = pd->dst.stride;
   int i, j;
-  int rate;
-  int64_t dist;
+  RD_COST this_rdc;
 
   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
 
@@ -942,19 +948,19 @@
   if (plane == 0) {
     int64_t this_sse = INT64_MAX;
     // TODO(jingning): This needs further refactoring.
-    block_yrd(cpi, x, &rate, &dist, &args->skippable, &this_sse, 0,
-              bsize_tx, VPXMIN(tx_size, TX_16X16));
+    block_yrd(cpi, x, &this_rdc, &args->skippable, &this_sse, bsize_tx,
+              VPXMIN(tx_size, TX_16X16));
   } else {
     unsigned int var = 0;
     unsigned int sse = 0;
-    model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &rate, &dist, &var, &sse,
-                       plane, plane);
+    model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &this_rdc, &var, &sse, plane,
+                       plane);
   }
 
   p->src.buf = src_buf_base;
   pd->dst.buf = dst_buf_base;
-  args->rate += rate;
-  args->dist += dist;
+  args->rdc->rate += this_rdc.rate;
+  args->rdc->dist += this_rdc.dist;
 }
 
 static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][4] = {
@@ -1007,7 +1013,7 @@
   MODE_INFO *const mi = xd->mi[0];
   RD_COST this_rdc, best_rdc;
   PREDICTION_MODE this_mode;
-  struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0, 0 };
+  struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
   const TX_SIZE intra_tx_size =
       VPXMIN(max_txsize_lookup[bsize],
              tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
@@ -1031,22 +1037,20 @@
   // Change the limit of this loop to add other intra prediction
   // mode tests.
   for (this_mode = DC_PRED; this_mode <= H_PRED; ++this_mode) {
+    this_rdc.dist = this_rdc.rate = 0;
     args.mode = this_mode;
     args.skippable = 1;
-    args.rate = 0;
-    args.dist = 0;
+    args.rdc = &this_rdc;
     mi->tx_size = intra_tx_size;
     vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
                                            estimate_block_intra, &args);
     if (args.skippable) {
       x->skip_txfm[0] = SKIP_TXFM_AC_DC;
-      args.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
+      this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
     } else {
       x->skip_txfm[0] = SKIP_TXFM_NONE;
-      args.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
+      this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
     }
-    this_rdc.rate = args.rate;
-    this_rdc.dist = args.dist;
     this_rdc.rate += bmode_costs[this_mode];
     this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
                              this_rdc.rate, this_rdc.dist);
@@ -1270,9 +1274,11 @@
 }
 #endif  // CONFIG_VP9_TEMPORAL_DENOISING
 
-static INLINE int set_force_skip_low_temp_var(uint8_t *variance_low,
+static INLINE int get_force_skip_low_temp_var(uint8_t *variance_low,
                                               int mi_row, int mi_col,
                                               BLOCK_SIZE bsize) {
+  const int i = (mi_row & 0x7) >> 1;
+  const int j = (mi_col & 0x7) >> 1;
   int force_skip_low_temp_var = 0;
   // Set force_skip_low_temp_var based on the block size and block offset.
   if (bsize == BLOCK_64X64) {
@@ -1299,6 +1305,19 @@
     } else if ((mi_col & 0x7) && (mi_row & 0x7)) {
       force_skip_low_temp_var = variance_low[8];
     }
+  } else if (bsize == BLOCK_16X16) {
+    force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
+  } else if (bsize == BLOCK_32X16) {
+    // The col shift index for the second 16x16 block.
+    const int j2 = ((mi_col + 2) & 0x7) >> 1;
+    // Only if each 16x16 block inside has low temporal variance.
+    force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
+                              variance_low[pos_shift_16x16[i][j2]];
+  } else if (bsize == BLOCK_16X32) {
+    // The row shift index for the second 16x16 block.
+    const int i2 = ((mi_row + 2) & 0x7) >> 1;
+    force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
+                              variance_low[pos_shift_16x16[i2][j]];
   }
   return force_skip_low_temp_var;
 }
@@ -1450,7 +1469,7 @@
 
   if (cpi->sf.short_circuit_low_temp_var) {
     force_skip_low_temp_var =
-        set_force_skip_low_temp_var(&x->variance_low[0], mi_row, mi_col, bsize);
+        get_force_skip_low_temp_var(&x->variance_low[0], mi_row, mi_col, bsize);
   }
 
   if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
@@ -1503,6 +1522,12 @@
       continue;
     }
 
+    if (cpi->sf.short_circuit_low_temp_var == 2 &&
+        force_skip_low_temp_var && ref_frame == LAST_FRAME &&
+        this_mode == NEWMV) {
+      continue;
+    }
+
     if (cpi->use_svc) {
       if (svc_force_zero_mode[ref_frame - 1] &&
           frame_mv[this_mode][ref_frame].as_int != 0)
@@ -1640,8 +1665,9 @@
 
     if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && pred_filter_search
         && (ref_frame == LAST_FRAME ||
-            (ref_frame == GOLDEN_FRAME && cpi->use_svc))
-        && (((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
+           (ref_frame == GOLDEN_FRAME &&
+           (cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) &&
+           (((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
       int pf_rate[3];
       int64_t pf_dist[3];
       unsigned int pf_var[3];
@@ -1696,7 +1722,7 @@
       vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
 
       // For large partition blocks, extra testing is done.
-      if (bsize > BLOCK_32X32 &&
+      if (cpi->oxcf.rc_mode == VPX_CBR && bsize > BLOCK_32X32 &&
         !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
         cm->base_qindex) {
         model_rd_for_sb_y_large(cpi, bsize, x, xd, &this_rdc.rate,
@@ -1710,8 +1736,8 @@
 
     if (!this_early_term) {
       this_sse = (int64_t)sse_y;
-      block_yrd(cpi, x, &this_rdc.rate, &this_rdc.dist, &is_skippable,
-                &this_sse, 0, bsize, VPXMIN(mi->tx_size, TX_16X16));
+      block_yrd(cpi, x, &this_rdc, &is_skippable, &this_sse, bsize,
+                VPXMIN(mi->tx_size, TX_16X16));
       x->skip_txfm[0] = is_skippable;
       if (is_skippable) {
         this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
@@ -1737,17 +1763,15 @@
     }
 
     if (x->color_sensitivity[0] || x->color_sensitivity[1]) {
-      int uv_rate = 0;
-      int64_t uv_dist = 0;
+      RD_COST rdc_uv;
       const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, &xd->plane[1]);
       if (x->color_sensitivity[0])
         vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1);
       if (x->color_sensitivity[1])
         vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2);
-      model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &uv_rate, &uv_dist,
-                         &var_y, &sse_y, 1, 2);
-      this_rdc.rate += uv_rate;
-      this_rdc.dist += uv_dist;
+      model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &rdc_uv, &var_y, &sse_y, 1, 2);
+      this_rdc.rate += rdc_uv.rate;
+      this_rdc.dist += rdc_uv.dist;
     }
 
     this_rdc.rate += rate_mv;
@@ -1842,12 +1866,13 @@
     inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
   }
   // Perform intra prediction search, if the best SAD is above a certain
-  // threshold. Skip intra prediction if force_skip_low_temp_var is set.
-  if (!force_skip_low_temp_var && perform_intra_pred &&
+  // threshold.
+  if ((!force_skip_low_temp_var || bsize < BLOCK_32X32) &&
+      perform_intra_pred &&
       (best_rdc.rdcost == INT64_MAX ||
        (!x->skip && best_rdc.rdcost > inter_mode_thresh &&
         bsize <= cpi->sf.max_intra_bsize))) {
-    struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0, 0 };
+    struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
     int i;
     TX_SIZE best_intra_tx_size = TX_SIZES;
     TX_SIZE intra_tx_size =
@@ -1896,10 +1921,10 @@
 
       mi->mode = this_mode;
       mi->ref_frame[0] = INTRA_FRAME;
+      this_rdc.dist = this_rdc.rate = 0;
       args.mode = this_mode;
       args.skippable = 1;
-      args.rate = 0;
-      args.dist = 0;
+      args.rdc = &this_rdc;
       mi->tx_size = intra_tx_size;
       vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
                                              estimate_block_intra, &args);
@@ -1907,10 +1932,10 @@
       // mirrors the behavior used by inter
       if (args.skippable) {
         x->skip_txfm[0] = SKIP_TXFM_AC_DC;
-        args.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
+        this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
       } else {
         x->skip_txfm[0] = SKIP_TXFM_NONE;
-        args.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
+        this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
       }
       // Inter and intra RD will mismatch in scale for non-screen content.
       if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) {
@@ -1921,8 +1946,6 @@
           vp9_foreach_transformed_block_in_plane(xd, bsize, 2,
                                                  estimate_block_intra, &args);
       }
-      this_rdc.rate = args.rate;
-      this_rdc.dist = args.dist;
       this_rdc.rate += cpi->mbmode_cost[this_mode];
       this_rdc.rate += ref_frame_cost[INTRA_FRAME];
       this_rdc.rate += intra_cost_penalty;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 9766c05..d68b684 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -94,7 +94,7 @@
       const int coeff_sign = (coeff >> 31);
       const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
       const int64_t tmp = abs_coeff + round_ptr[rc != 0];
-      const uint32_t abs_qcoeff = (uint32_t)((tmp * quant_ptr[rc != 0]) >> 16);
+      const int abs_qcoeff = (int)((tmp * quant_ptr[rc != 0]) >> 16);
       qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
       dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
       if (abs_qcoeff)
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 0090b4f..e7f04a2 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -429,7 +429,7 @@
     sf->mv.search_method = NSTEP;
     sf->mv.reduce_first_step_size = 1;
     sf->skip_encode_sb = 0;
-    if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.pass == 0 &&
+    if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
         content != VP9E_CONTENT_SCREEN) {
       // Enable short circuit for low temporal variance.
       sf->short_circuit_low_temp_var = 1;
@@ -450,6 +450,17 @@
     sf->adaptive_rd_thresh = 4;
     sf->mv.subpel_force_stop = (content == VP9E_CONTENT_SCREEN) ? 3 : 2;
     sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
+    // Only keep INTRA_DC mode for speed 8.
+    if (!is_keyframe) {
+      int i = 0;
+      for (i = 0; i < BLOCK_SIZES; ++i)
+        sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
+    }
+    if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
+        content != VP9E_CONTENT_SCREEN) {
+      // More aggressive short circuit for speed 8.
+      sf->short_circuit_low_temp_var = 2;
+    }
   }
 }
 
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 71ff0ac..e88a7df 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -449,6 +449,10 @@
 
   // Skip a number of expensive mode evaluations for blocks with very low
   // temporal variance.
+  // 1: Skip golden non-zeromv and ALL INTRA for bsize >= 32x32.
+  // 2: Skip golden non-zeromv and newmv-last for bsize >= 16x16, skip ALL
+  // INTRA for bsize >= 32x32 and vert/horz INTRA for bsize 16x16, 16x32 and
+  // 32x16.
   int short_circuit_low_temp_var;
 } SPEED_FEATURES;
 
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 1a11a6d..9ad86cb 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -205,7 +205,7 @@
         level != LEVEL_4 && level != LEVEL_4_1 && level != LEVEL_5 &&
         level != LEVEL_5_1 && level != LEVEL_5_2 && level != LEVEL_6 &&
         level != LEVEL_6_1 && level != LEVEL_6_2 &&
-        level != LEVEL_UNKNOWN && level != LEVEL_NOT_CARE)
+        level != LEVEL_UNKNOWN && level != LEVEL_MAX)
     ERROR("target_level is invalid");
   }
 
@@ -807,6 +807,13 @@
   return update_extra_cfg(ctx, &extra_cfg);
 }
 
+static vpx_codec_err_t ctrl_get_level(vpx_codec_alg_priv_t *ctx, va_list args) {
+  int *const arg = va_arg(args, int *);
+  if (arg == NULL) return VPX_CODEC_INVALID_PARAM;
+  *arg = (int)vp9_get_level(&ctx->cpi->level_info.level_spec);
+  return VPX_CODEC_OK;
+}
+
 static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
                                     vpx_codec_priv_enc_mr_cfg_t *data) {
   vpx_codec_err_t res = VPX_CODEC_OK;
@@ -898,6 +905,11 @@
       break;
   }
 
+  if (deadline == VPX_DL_REALTIME) {
+    ctx->oxcf.pass = 0;
+    new_mode = REALTIME;
+  }
+
   if (ctx->oxcf.mode != new_mode) {
     ctx->oxcf.mode = new_mode;
     vp9_change_config(ctx->cpi, &ctx->oxcf);
@@ -964,9 +976,6 @@
   return index_sz;
 }
 
-// vp9 uses 10,000,000 ticks/second as time stamp
-#define TICKS_PER_SEC 10000000LL
-
 static int64_t timebase_units_to_ticks(const vpx_rational_t *timebase,
                                        int64_t n) {
   return n * TICKS_PER_SEC * timebase->num / timebase->den;
@@ -974,7 +983,7 @@
 
 static int64_t ticks_to_timebase_units(const vpx_rational_t *timebase,
                                        int64_t n) {
-  const int64_t round = TICKS_PER_SEC * timebase->num / 2 - 1;
+  const int64_t round = (int64_t)TICKS_PER_SEC * timebase->num / 2 - 1;
   return (n * timebase->den + round) / timebase->num / TICKS_PER_SEC;
 }
 
@@ -1547,6 +1556,7 @@
   {VP9_GET_REFERENCE,                 ctrl_get_reference},
   {VP9E_GET_SVC_LAYER_ID,             ctrl_get_svc_layer_id},
   {VP9E_GET_ACTIVEMAP,                ctrl_get_active_map},
+  {VP9E_GET_LEVEL,                    ctrl_get_level},
 
   { -1, NULL},
 };
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index b059d47..2752a86 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -562,7 +562,13 @@
    *
    * Supported in codecs: VP9
    */
-  VP9E_SET_TARGET_LEVEL
+  VP9E_SET_TARGET_LEVEL,
+
+  /*!\brief Codec control function to get bitstream level.
+   *
+   * Supported in codecs: VP9
+   */
+  VP9E_GET_LEVEL
 };
 
 /*!\brief vpx 1-D scaling mode
@@ -821,6 +827,9 @@
 VPX_CTRL_USE_TYPE(VP9E_SET_TARGET_LEVEL,  unsigned int)
 #define VPX_CTRL_VP9E_SET_TARGET_LEVEL
 
+VPX_CTRL_USE_TYPE(VP9E_GET_LEVEL, int *)
+#define VPX_CTRL_VP9E_GET_LEVEL
+
 /*!\endcond */
 /*! @} - end defgroup vp8_encoder */
 #ifdef __cplusplus
diff --git a/vpx_dsp/arm/hadamard_neon.c b/vpx_dsp/arm/hadamard_neon.c
index cc9e804..21e3e3d 100644
--- a/vpx_dsp/arm/hadamard_neon.c
+++ b/vpx_dsp/arm/hadamard_neon.c
@@ -160,3 +160,42 @@
   vst1q_s16(coeff + 48, a6);
   vst1q_s16(coeff + 56, a7);
 }
+
+void vpx_hadamard_16x16_neon(const int16_t *src_diff, int src_stride,
+                             int16_t *coeff) {
+  int i;
+
+  /* Rearrange 16x16 to 8x32 and remove stride.
+   * Top left first. */
+  vpx_hadamard_8x8_neon(src_diff + 0 + 0 * src_stride, src_stride, coeff + 0);
+  /* Top right. */
+  vpx_hadamard_8x8_neon(src_diff + 8 + 0 * src_stride, src_stride, coeff + 64);
+  /* Bottom left. */
+  vpx_hadamard_8x8_neon(src_diff + 0 + 8 * src_stride, src_stride, coeff + 128);
+  /* Bottom right. */
+  vpx_hadamard_8x8_neon(src_diff + 8 + 8 * src_stride, src_stride, coeff + 192);
+
+  for (i = 0; i < 64; i += 8) {
+    const int16x8_t a0 = vld1q_s16(coeff + 0);
+    const int16x8_t a1 = vld1q_s16(coeff + 64);
+    const int16x8_t a2 = vld1q_s16(coeff + 128);
+    const int16x8_t a3 = vld1q_s16(coeff + 192);
+
+    const int16x8_t b0 = vhaddq_s16(a0, a1);
+    const int16x8_t b1 = vhsubq_s16(a0, a1);
+    const int16x8_t b2 = vhaddq_s16(a2, a3);
+    const int16x8_t b3 = vhsubq_s16(a2, a3);
+
+    const int16x8_t c0 = vaddq_s16(b0, b2);
+    const int16x8_t c1 = vaddq_s16(b1, b3);
+    const int16x8_t c2 = vsubq_s16(b0, b2);
+    const int16x8_t c3 = vsubq_s16(b1, b3);
+
+    vst1q_s16(coeff + 0, c0);
+    vst1q_s16(coeff + 64, c1);
+    vst1q_s16(coeff + 128, c2);
+    vst1q_s16(coeff + 192, c3);
+
+    coeff += 8;
+  }
+}
diff --git a/vpx_dsp/quantize.c b/vpx_dsp/quantize.c
index 6426ccc..80fcd66 100644
--- a/vpx_dsp/quantize.c
+++ b/vpx_dsp/quantize.c
@@ -53,7 +53,7 @@
     const int coeff_sign = (coeff >> 31);
     const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
     const int64_t tmp = abs_coeff + round_ptr[0];
-    const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 16);
+    const int abs_qcoeff = (int)((tmp * quant) >> 16);
     qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
     dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr;
     if (abs_qcoeff)
@@ -109,7 +109,7 @@
     const int coeff_sign = (coeff >> 31);
     const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
     const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
-    const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 15);
+    const int abs_qcoeff = (int)((tmp * quant) >> 15);
     qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
     dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2;
     if (abs_qcoeff)
diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c
index e8bddb0..cdec0db 100644
--- a/vpx_dsp/variance.c
+++ b/vpx_dsp/variance.c
@@ -275,7 +275,7 @@
 #if CONFIG_VP9_HIGHBITDEPTH
 static void highbd_variance64(const uint8_t *a8, int  a_stride,
                               const uint8_t *b8, int  b_stride,
-                              int w, int h, uint64_t *sse, uint64_t *sum) {
+                              int w, int h, uint64_t *sse, int64_t *sum) {
   int i, j;
 
   uint16_t *a = CONVERT_TO_SHORTPTR(a8);
@@ -298,7 +298,7 @@
                               const uint8_t *b8, int  b_stride,
                               int w, int h, uint32_t *sse, int *sum) {
   uint64_t sse_long = 0;
-  uint64_t sum_long = 0;
+  int64_t sum_long = 0;
   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
   *sse = (uint32_t)sse_long;
   *sum = (int)sum_long;
@@ -308,7 +308,7 @@
                                const uint8_t *b8, int  b_stride,
                                int w, int h, uint32_t *sse, int *sum) {
   uint64_t sse_long = 0;
-  uint64_t sum_long = 0;
+  int64_t sum_long = 0;
   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
@@ -318,7 +318,7 @@
                                const uint8_t *b8, int  b_stride,
                                int w, int h, uint32_t *sse, int *sum) {
   uint64_t sse_long = 0;
-  uint64_t sum_long = 0;
+  int64_t sum_long = 0;
   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 4144281..a62acb7 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1020,7 +1020,7 @@
   specialize qw/vpx_hadamard_8x8 sse2 neon/, "$ssse3_x86_64_x86inc";
 
   add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
-  specialize qw/vpx_hadamard_16x16 sse2/;
+  specialize qw/vpx_hadamard_16x16 sse2 neon/;
 
   add_proto qw/int vpx_satd/, "const int16_t *coeff, int length";
   specialize qw/vpx_satd sse2 neon/;