Merge "Updates for 1-pass CBR rate control."
diff --git a/args.h b/args.h
index ad591af..ea909cb 100644
--- a/args.h
+++ b/args.h
@@ -9,8 +9,8 @@
  */
 
 
-#ifndef ARGS_H
-#define ARGS_H
+#ifndef ARGS_H_
+#define ARGS_H_
 #include <stdio.h>
 
 struct arg {
@@ -48,4 +48,4 @@
 int arg_parse_int(const struct arg *arg);
 struct vpx_rational arg_parse_rational(const struct arg *arg);
 int arg_parse_enum_or_int(const struct arg *arg);
-#endif
+#endif  // ARGS_H_
diff --git a/ivfdec.h b/ivfdec.h
index b1468a9..5da9acc 100644
--- a/ivfdec.h
+++ b/ivfdec.h
@@ -27,4 +27,4 @@
 }  /* extern "C" */
 #endif
 
-#endif  /* IVFDEC_H_ */
+#endif  // IVFDEC_H_
diff --git a/ivfenc.h b/ivfenc.h
index a332c7d..b486bc8 100644
--- a/ivfenc.h
+++ b/ivfenc.h
@@ -30,4 +30,4 @@
 }  /* extern "C" */
 #endif
 
-#endif  /* IVFENC_H_ */
+#endif  // IVFENC_H_
diff --git a/md5_utils.h b/md5_utils.h
index 81792c4..9935eae 100644
--- a/md5_utils.h
+++ b/md5_utils.h
@@ -20,8 +20,8 @@
  * Still in the public domain.
  */
 
-#ifndef MD5_H
-#define MD5_H
+#ifndef MD5_UTILS_H_
+#define MD5_UTILS_H_
 
 #define md5byte unsigned char
 #define UWORD32 unsigned int
@@ -38,4 +38,4 @@
 void MD5Final(unsigned char digest[16], struct MD5Context *context);
 void MD5Transform(UWORD32 buf[4], UWORD32 const in[16]);
 
-#endif /* !MD5_H */
+#endif  // MD5_UTILS_H_
diff --git a/test/acm_random.h b/test/acm_random.h
index de94186..496dae3 100644
--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -26,6 +26,11 @@
   void Reset(int seed) {
     random_.Reseed(seed);
   }
+  uint16_t Rand16(void) {
+    const uint32_t value =
+        random_.Generate(testing::internal::Random::kMaxRange);
+    return (value >> 16) & 0xffff;
+  }
 
   uint8_t Rand8(void) {
     const uint32_t value =
diff --git a/test/external_frame_buffer_test.cc b/test/external_frame_buffer_test.cc
index 874d199..48eb853 100644
--- a/test/external_frame_buffer_test.cc
+++ b/test/external_frame_buffer_test.cc
@@ -232,8 +232,10 @@
   const int num_buffers = 13;
   set_num_buffers(num_buffers);
 
+#if CONFIG_VP8_DECODER
   // Tell compiler we are not using kVP8TestVectors.
   (void)libvpx_test::kVP8TestVectors;
+#endif
 
   // Open compressed video file.
   if (filename.substr(filename.length() - 3, 3) == "ivf") {
@@ -252,42 +254,51 @@
   delete video;
 }
 
-TEST_F(ExternalFrameBufferTest, EightFrameBuffers) {
-  // Minimum number of reference buffers for VP9 is 8.
-  const int num_buffers = 8;
+TEST_F(ExternalFrameBufferTest, NineFrameBuffers) {
+  // Minimum number of external frame buffers for VP9 is
+  // #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS.
+  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
   ASSERT_EQ(VPX_CODEC_OK,
             SetExternalFrameBuffers(num_buffers, realloc_vp9_frame_buffer));
   ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames());
 }
 
 TEST_F(ExternalFrameBufferTest, EightJitterBuffers) {
-  // Number of buffers equals number of possible reference buffers(8), plus
-  // one working buffer, plus eight jitter buffers.
-  const int num_buffers = 17;
+  // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
+  // #VPX_MAXIMUM_WORK_BUFFERS + eight jitter buffers.
+  const int jitter_buffers = 8;
+  const int num_buffers =
+      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
   ASSERT_EQ(VPX_CODEC_OK,
             SetExternalFrameBuffers(num_buffers, realloc_vp9_frame_buffer));
   ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames());
 }
 
 TEST_F(ExternalFrameBufferTest, NotEnoughBuffers) {
-  // Minimum number of reference buffers for VP9 is 8.
-  const int num_buffers = 7;
+  // Minimum number of external frame buffers for VP9 is
+  // #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS. Set one less.
+  const int num_buffers =
+      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS - 1;
   ASSERT_EQ(VPX_CODEC_INVALID_PARAM,
             SetExternalFrameBuffers(num_buffers, realloc_vp9_frame_buffer));
 }
 
 TEST_F(ExternalFrameBufferTest, NullFrameBufferList) {
-  // Number of buffers equals number of possible reference buffers(8), plus
-  // one working buffer, plus four jitter buffers.
-  const int num_buffers = 13;
+  // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
+  // #VPX_MAXIMUM_WORK_BUFFERS + four jitter buffers.
+  const int jitter_buffers = 4;
+  const int num_buffers =
+      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
   ASSERT_EQ(VPX_CODEC_INVALID_PARAM,
             SetNullFrameBuffers(num_buffers, realloc_vp9_frame_buffer));
 }
 
 TEST_F(ExternalFrameBufferTest, NullRealloc) {
-  // Number of buffers equals number of possible reference buffers(8), plus
-  // one working buffer, plus four jitter buffers.
-  const int num_buffers = 13;
+  // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
+  // #VPX_MAXIMUM_WORK_BUFFERS + four jitter buffers.
+  const int jitter_buffers = 4;
+  const int num_buffers =
+      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
   ASSERT_EQ(VPX_CODEC_OK,
             SetExternalFrameBuffers(num_buffers,
                                     zero_realloc_vp9_frame_buffer));
@@ -295,9 +306,11 @@
 }
 
 TEST_F(ExternalFrameBufferTest, ReallocOneLessByte) {
-  // Number of buffers equals number of possible reference buffers(8), plus
-  // one working buffer, plus four jitter buffers.
-  const int num_buffers = 13;
+  // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
+  // #VPX_MAXIMUM_WORK_BUFFERS + four jitter buffers.
+  const int jitter_buffers = 4;
+  const int num_buffers =
+      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
   ASSERT_EQ(VPX_CODEC_OK,
             SetExternalFrameBuffers(num_buffers,
                                     one_less_byte_realloc_vp9_frame_buffer));
diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc
new file mode 100644
index 0000000..14b78f6
--- /dev/null
+++ b/test/partial_idct_test.cc
@@ -0,0 +1,172 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+extern "C" {
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_scan.h"
+}
+
+#include "vpx/vpx_integer.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+typedef void (*fwd_txfm_t)(const int16_t *in, int16_t *out, int stride);
+typedef void (*inv_txfm_t)(const int16_t *in, uint8_t *out, int stride);
+typedef std::tr1::tuple<inv_txfm_t,
+                        inv_txfm_t,
+                        TX_SIZE, int> partial_itxfm_param_t;
+const int kMaxNumCoeffs = 1024;
+class PartialIDctTest : public ::testing::TestWithParam<partial_itxfm_param_t> {
+ public:
+  virtual ~PartialIDctTest() {}
+  virtual void SetUp() {
+    full_itxfm_ = GET_PARAM(0);
+    partial_itxfm_ = GET_PARAM(1);
+    tx_size_  = GET_PARAM(2);
+    last_nonzero_ = GET_PARAM(3);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  int last_nonzero_;
+  TX_SIZE tx_size_;
+  inv_txfm_t full_itxfm_;
+  inv_txfm_t partial_itxfm_;
+};
+
+TEST_P(PartialIDctTest, ResultsMatch) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int size;
+  switch (tx_size_) {
+  case TX_4X4:
+    size = 4;
+    break;
+  case TX_8X8:
+    size = 8;
+    break;
+  case TX_16X16:
+    size = 16;
+    break;
+  case TX_32X32:
+    size = 32;
+    break;
+  default:
+    ASSERT_TRUE(0) << "Wrong Size!";
+    break;
+  }
+  DECLARE_ALIGNED_ARRAY(16, int16_t, test_coef_block1, kMaxNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, int16_t, test_coef_block2, kMaxNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
+  const int count_test_block = 1000;
+  const int max_coeff = 32766 / 4;
+  const int block_size = size * size;
+  int max_error = 0;
+  for (int i = 0; i < count_test_block; ++i) {
+    // clear out destination buffer
+    memset(dst1, 0, sizeof(*dst1) * block_size);
+    memset(dst2, 0, sizeof(*dst2) * block_size);
+    memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
+    memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
+    int max_energy_leftover = max_coeff * max_coeff;
+    for (int j = 0; j < last_nonzero_; ++j) {
+      int16_t coef = static_cast<int16_t>(sqrt(max_energy_leftover) *
+                                          (rnd.Rand16() - 32768) / 65536);
+      max_energy_leftover -= coef * coef;
+      if (max_energy_leftover < 0) {
+        max_energy_leftover = 0;
+        coef = 0;
+      }
+      test_coef_block1[vp9_default_scan_orders[tx_size_].scan[j]] = coef;
+    }
+
+    memcpy(test_coef_block2, test_coef_block1,
+           sizeof(*test_coef_block2) * block_size);
+
+    REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
+    REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
+
+    for (int j = 0; j < block_size; ++j) {
+      const int diff = dst1[j] - dst2[j];
+      const int error = diff * diff;
+      if (max_error < error)
+        max_error = error;
+    }
+  }
+
+  EXPECT_EQ(0, max_error)
+      << "Error: partial inverse transform produces different results";
+}
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+    C, PartialIDctTest,
+    ::testing::Values(
+        make_tuple(vp9_idct32x32_1024_add_c,
+                   vp9_idct32x32_34_add_c,
+                   TX_32X32, 34),
+        make_tuple(vp9_idct32x32_1024_add_c,
+                   vp9_idct32x32_1_add_c,
+                   TX_32X32, 1),
+        make_tuple(vp9_idct16x16_256_add_c,
+                   vp9_idct16x16_10_add_c,
+                   TX_16X16, 10),
+        make_tuple(vp9_idct16x16_256_add_c,
+                   vp9_idct16x16_1_add_c,
+                   TX_16X16, 1),
+        make_tuple(vp9_idct8x8_64_add_c,
+                   vp9_idct8x8_10_add_c,
+                   TX_8X8, 10),
+        make_tuple(vp9_idct8x8_64_add_c,
+                   vp9_idct8x8_1_add_c,
+                   TX_8X8, 1),
+        make_tuple(vp9_idct4x4_16_add_c,
+                   vp9_idct4x4_1_add_c,
+                   TX_4X4, 1)));
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, PartialIDctTest,
+    ::testing::Values(
+        make_tuple(vp9_idct32x32_1024_add_c,
+                   vp9_idct32x32_34_add_sse2,
+                   TX_32X32, 34),
+        make_tuple(vp9_idct32x32_1024_add_c,
+                   vp9_idct32x32_1_add_sse2,
+                   TX_32X32, 1),
+        make_tuple(vp9_idct16x16_256_add_c,
+                   vp9_idct16x16_10_add_sse2,
+                   TX_16X16, 10),
+        make_tuple(vp9_idct16x16_256_add_c,
+                   vp9_idct16x16_1_add_sse2,
+                   TX_16X16, 1),
+        make_tuple(vp9_idct8x8_64_add_c,
+                   vp9_idct8x8_10_add_sse2,
+                   TX_8X8, 10),
+        make_tuple(vp9_idct8x8_64_add_c,
+                   vp9_idct8x8_1_add_sse2,
+                   TX_8X8, 1),
+        make_tuple(vp9_idct4x4_16_add_c,
+                   vp9_idct4x4_1_add_sse2,
+                   TX_4X4, 1)));
+#endif
+}  // namespace
diff --git a/test/test.mk b/test/test.mk
index 2905a1a..13e1e3a 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -92,6 +92,7 @@
 
 # IDCT test currently depends on FDCT function
 LIBVPX_TEST_SRCS-yes                   += idct8x8_test.cc
+LIBVPX_TEST_SRCS-yes                   += partial_idct_test.cc
 LIBVPX_TEST_SRCS-yes                   += superframe_test.cc
 LIBVPX_TEST_SRCS-yes                   += tile_independence_test.cc
 endif
diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm b/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm
index 751bc74..5b8ec20 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm
+++ b/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm
@@ -8,18 +8,18 @@
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
-    EXPORT  |vp9_loop_filter_horizontal_edge_16_neon|
+    EXPORT  |vp9_lpf_horizontal_4_dual_neon|
     ARM
 
     AREA ||.text||, CODE, READONLY, ALIGN=2
 
-;void vp9_loop_filter_horizontal_edge_16_neon(uint8_t *s, int p,
-;                                             const uint8_t *blimit0,
-;                                             const uint8_t *limit0,
-;                                             const uint8_t *thresh0,
-;                                             const uint8_t *blimit1,
-;                                             const uint8_t *limit1,
-;                                             const uint8_t *thresh1)
+;void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p,
+;                                    const uint8_t *blimit0,
+;                                    const uint8_t *limit0,
+;                                    const uint8_t *thresh0,
+;                                    const uint8_t *blimit1,
+;                                    const uint8_t *limit1,
+;                                    const uint8_t *thresh1)
 ; r0    uint8_t *s,
 ; r1    int p,
 ; r2    const uint8_t *blimit0,
@@ -29,7 +29,7 @@
 ; sp+8  const uint8_t *limit1,
 ; sp+12 const uint8_t *thresh1,
 
-|vp9_loop_filter_horizontal_edge_16_neon| PROC
+|vp9_lpf_horizontal_4_dual_neon| PROC
     push        {lr}
 
     ldr         r12, [sp, #4]              ; load thresh0
@@ -76,7 +76,7 @@
     vpop        {d8-d15}                   ; restore neon registers
 
     pop         {pc}
-    ENDP        ; |vp9_loop_filter_horizontal_edge_16_neon|
+    ENDP        ; |vp9_lpf_horizontal_4_dual_neon|
 
 ; void vp9_loop_filter_neon_16();
 ; This is a helper function for the loopfilters. The invidual functions do the
diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
index b97e7aa..0820db2 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
+++ b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
@@ -10,44 +10,43 @@
 
 #include "./vp9_rtcd.h"
 
-void vp9_mbloop_filter_horizontal_edge_16_neon(uint8_t *s, int p /* pitch */,
-                                               const uint8_t *blimit0,
-                                               const uint8_t *limit0,
-                                               const uint8_t *thresh0,
-                                               const uint8_t *blimit1,
-                                               const uint8_t *limit1,
-                                               const uint8_t *thresh1) {
-  vp9_mbloop_filter_horizontal_edge(s, p, blimit0, limit0, thresh0, 1);
-  vp9_mbloop_filter_horizontal_edge(s + 8, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
+                                    const uint8_t *blimit0,
+                                    const uint8_t *limit0,
+                                    const uint8_t *thresh0,
+                                    const uint8_t *blimit1,
+                                    const uint8_t *limit1,
+                                    const uint8_t *thresh1) {
+  vp9_lpf_horizontal_8(s, p, blimit0, limit0, thresh0, 1);
+  vp9_lpf_horizontal_8(s + 8, p, blimit1, limit1, thresh1, 1);
 }
 
-void vp9_loop_filter_vertical_edge_16_neon(uint8_t *s, int p,
-                                           const uint8_t *blimit0,
-                                           const uint8_t *limit0,
-                                           const uint8_t *thresh0,
-                                           const uint8_t *blimit1,
-                                           const uint8_t *limit1,
-                                           const uint8_t *thresh1) {
-  vp9_loop_filter_vertical_edge_neon(s, p, blimit0, limit0, thresh0, 1);
-  vp9_loop_filter_vertical_edge_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_vertical_4_dual_neon(uint8_t *s, int p,
+                                  const uint8_t *blimit0,
+                                  const uint8_t *limit0,
+                                  const uint8_t *thresh0,
+                                  const uint8_t *blimit1,
+                                  const uint8_t *limit1,
+                                  const uint8_t *thresh1) {
+  vp9_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);
+  vp9_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
 }
 
-void vp9_mbloop_filter_vertical_edge_16_neon(uint8_t *s, int p,
-                                             const uint8_t *blimit0,
-                                             const uint8_t *limit0,
-                                             const uint8_t *thresh0,
-                                             const uint8_t *blimit1,
-                                             const uint8_t *limit1,
-                                             const uint8_t *thresh1) {
-  vp9_mbloop_filter_vertical_edge_neon(s, p, blimit0, limit0, thresh0, 1);
-  vp9_mbloop_filter_vertical_edge_neon(s + 8 * p, p, blimit1, limit1, thresh1,
-                                       1);
+void vp9_lpf_vertical_8_dual_neon(uint8_t *s, int p,
+                                  const uint8_t *blimit0,
+                                  const uint8_t *limit0,
+                                  const uint8_t *thresh0,
+                                  const uint8_t *blimit1,
+                                  const uint8_t *limit1,
+                                  const uint8_t *thresh1) {
+  vp9_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);
+  vp9_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
 }
 
-void vp9_mb_lpf_vertical_edge_w_16_neon(uint8_t *s, int p,
-                                        const uint8_t *blimit,
-                                        const uint8_t *limit,
-                                        const uint8_t *thresh) {
-  vp9_mb_lpf_vertical_edge_w_neon(s, p, blimit, limit, thresh);
-  vp9_mb_lpf_vertical_edge_w_neon(s + 8 * p, p, blimit, limit, thresh);
+void vp9_lpf_vertical_16_dual_neon(uint8_t *s, int p,
+                                   const uint8_t *blimit,
+                                   const uint8_t *limit,
+                                   const uint8_t *thresh) {
+  vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
+  vp9_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);
 }
diff --git a/vp9/common/arm/neon/vp9_loopfilter_neon.asm b/vp9/common/arm/neon/vp9_loopfilter_neon.asm
index 8b4fe5d..4430322 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_neon.asm
+++ b/vp9/common/arm/neon/vp9_loopfilter_neon.asm
@@ -8,10 +8,10 @@
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
-    EXPORT  |vp9_loop_filter_horizontal_edge_neon|
-    EXPORT  |vp9_loop_filter_vertical_edge_neon|
-    EXPORT  |vp9_mbloop_filter_horizontal_edge_neon|
-    EXPORT  |vp9_mbloop_filter_vertical_edge_neon|
+    EXPORT  |vp9_lpf_horizontal_4_neon|
+    EXPORT  |vp9_lpf_vertical_4_neon|
+    EXPORT  |vp9_lpf_horizontal_8_neon|
+    EXPORT  |vp9_lpf_vertical_8_neon|
     ARM
 
     AREA ||.text||, CODE, READONLY, ALIGN=2
@@ -21,12 +21,12 @@
 ; TODO(fgalligan): See about removing the count code as this function is only
 ; called with a count of 1.
 ;
-; void vp9_loop_filter_horizontal_edge_neon(uint8_t *s,
-;                                           int p /* pitch */,
-;                                           const uint8_t *blimit,
-;                                           const uint8_t *limit,
-;                                           const uint8_t *thresh,
-;                                           int count)
+; void vp9_lpf_horizontal_4_neon(uint8_t *s,
+;                                int p /* pitch */,
+;                                const uint8_t *blimit,
+;                                const uint8_t *limit,
+;                                const uint8_t *thresh,
+;                                int count)
 ;
 ; r0    uint8_t *s,
 ; r1    int p, /* pitch */
@@ -34,7 +34,7 @@
 ; r3    const uint8_t *limit,
 ; sp    const uint8_t *thresh,
 ; sp+4  int count
-|vp9_loop_filter_horizontal_edge_neon| PROC
+|vp9_lpf_horizontal_4_neon| PROC
     push        {lr}
 
     vld1.8      {d0[]}, [r2]               ; duplicate *blimit
@@ -77,19 +77,19 @@
 
 end_vp9_lf_h_edge
     pop         {pc}
-    ENDP        ; |vp9_loop_filter_horizontal_edge_neon|
+    ENDP        ; |vp9_lpf_horizontal_4_neon|
 
 ; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
 ; works on 16 iterations at a time.
 ; TODO(fgalligan): See about removing the count code as this function is only
 ; called with a count of 1.
 ;
-; void vp9_loop_filter_vertical_edge_neon(uint8_t *s,
-;                                         int p /* pitch */,
-;                                         const uint8_t *blimit,
-;                                         const uint8_t *limit,
-;                                         const uint8_t *thresh,
-;                                         int count)
+; void vp9_lpf_vertical_4_neon(uint8_t *s,
+;                              int p /* pitch */,
+;                              const uint8_t *blimit,
+;                              const uint8_t *limit,
+;                              const uint8_t *thresh,
+;                              int count)
 ;
 ; r0    uint8_t *s,
 ; r1    int p, /* pitch */
@@ -97,7 +97,7 @@
 ; r3    const uint8_t *limit,
 ; sp    const uint8_t *thresh,
 ; sp+4  int count
-|vp9_loop_filter_vertical_edge_neon| PROC
+|vp9_lpf_vertical_4_neon| PROC
     push        {lr}
 
     vld1.8      {d0[]}, [r2]              ; duplicate *blimit
@@ -158,7 +158,7 @@
 
 end_vp9_lf_v_edge
     pop         {pc}
-    ENDP        ; |vp9_loop_filter_vertical_edge_neon|
+    ENDP        ; |vp9_lpf_vertical_4_neon|
 
 ; void vp9_loop_filter_neon();
 ; This is a helper function for the loopfilters. The invidual functions do the
@@ -276,18 +276,18 @@
     bx          lr
     ENDP        ; |vp9_loop_filter_neon|
 
-; void vp9_mbloop_filter_horizontal_edge_neon(uint8_t *s, int p,
-;                                             const uint8_t *blimit,
-;                                             const uint8_t *limit,
-;                                             const uint8_t *thresh,
-;                                             int count)
+; void vp9_lpf_horizontal_8_neon(uint8_t *s, int p,
+;                                const uint8_t *blimit,
+;                                const uint8_t *limit,
+;                                const uint8_t *thresh,
+;                                int count)
 ; r0    uint8_t *s,
 ; r1    int p, /* pitch */
 ; r2    const uint8_t *blimit,
 ; r3    const uint8_t *limit,
 ; sp    const uint8_t *thresh,
 ; sp+4  int count
-|vp9_mbloop_filter_horizontal_edge_neon| PROC
+|vp9_lpf_horizontal_8_neon| PROC
     push        {r4-r5, lr}
 
     vld1.8      {d0[]}, [r2]               ; duplicate *blimit
@@ -333,14 +333,14 @@
 end_vp9_mblf_h_edge
     pop         {r4-r5, pc}
 
-    ENDP        ; |vp9_mbloop_filter_horizontal_edge_neon|
+    ENDP        ; |vp9_lpf_horizontal_8_neon|
 
-; void vp9_mbloop_filter_vertical_edge_neon(uint8_t *s,
-;                                           int pitch,
-;                                           const uint8_t *blimit,
-;                                           const uint8_t *limit,
-;                                           const uint8_t *thresh,
-;                                           int count)
+; void vp9_lpf_vertical_8_neon(uint8_t *s,
+;                              int pitch,
+;                              const uint8_t *blimit,
+;                              const uint8_t *limit,
+;                              const uint8_t *thresh,
+;                              int count)
 ;
 ; r0    uint8_t *s,
 ; r1    int pitch,
@@ -348,7 +348,7 @@
 ; r3    const uint8_t *limit,
 ; sp    const uint8_t *thresh,
 ; sp+4  int count
-|vp9_mbloop_filter_vertical_edge_neon| PROC
+|vp9_lpf_vertical_8_neon| PROC
     push        {r4-r5, lr}
 
     vld1.8      {d0[]}, [r2]              ; duplicate *blimit
@@ -420,7 +420,7 @@
 
 end_vp9_mblf_v_edge
     pop         {r4-r5, pc}
-    ENDP        ; |vp9_mbloop_filter_vertical_edge_neon|
+    ENDP        ; |vp9_lpf_vertical_8_neon|
 
 ; void vp9_mbloop_filter_neon();
 ; This is a helper function for the loopfilters. The invidual functions do the
diff --git a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm b/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
index 2e8001b..8cb913c 100644
--- a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
+++ b/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
@@ -8,23 +8,23 @@
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
-    EXPORT  |vp9_mb_lpf_horizontal_edge_w_neon|
-    EXPORT  |vp9_mb_lpf_vertical_edge_w_neon|
+    EXPORT  |vp9_lpf_horizontal_16_neon|
+    EXPORT  |vp9_lpf_vertical_16_neon|
     ARM
 
     AREA ||.text||, CODE, READONLY, ALIGN=2
 
-; void vp9_mb_lpf_horizontal_edge_w_neon(uint8_t *s, int p,
-;                                        const uint8_t *blimit,
-;                                        const uint8_t *limit,
-;                                        const uint8_t *thresh
-;                                        int count)
+; void vp9_lpf_horizontal_16_neon(uint8_t *s, int p,
+;                                 const uint8_t *blimit,
+;                                 const uint8_t *limit,
+;                                 const uint8_t *thresh
+;                                 int count)
 ; r0    uint8_t *s,
 ; r1    int p, /* pitch */
 ; r2    const uint8_t *blimit,
 ; r3    const uint8_t *limit,
 ; sp    const uint8_t *thresh,
-|vp9_mb_lpf_horizontal_edge_w_neon| PROC
+|vp9_lpf_horizontal_16_neon| PROC
     push        {r4-r8, lr}
     vpush       {d8-d15}
     ldr         r4, [sp, #88]              ; load thresh
@@ -115,18 +115,18 @@
     vpop        {d8-d15}
     pop         {r4-r8, pc}
 
-    ENDP        ; |vp9_mb_lpf_horizontal_edge_w_neon|
+    ENDP        ; |vp9_lpf_horizontal_16_neon|
 
-; void vp9_mb_lpf_vertical_edge_w_neon(uint8_t *s, int p,
-;                                        const uint8_t *blimit,
-;                                        const uint8_t *limit,
-;                                        const uint8_t *thresh)
+; void vp9_lpf_vertical_16_neon(uint8_t *s, int p,
+;                               const uint8_t *blimit,
+;                               const uint8_t *limit,
+;                               const uint8_t *thresh)
 ; r0    uint8_t *s,
 ; r1    int p, /* pitch */
 ; r2    const uint8_t *blimit,
 ; r3    const uint8_t *limit,
 ; sp    const uint8_t *thresh,
-|vp9_mb_lpf_vertical_edge_w_neon| PROC
+|vp9_lpf_vertical_16_neon| PROC
     push        {r4-r8, lr}
     vpush       {d8-d15}
     ldr         r4, [sp, #88]              ; load thresh
@@ -279,7 +279,7 @@
     vpop        {d8-d15}
     pop         {r4-r8, pc}
 
-    ENDP        ; |vp9_mb_lpf_vertical_edge_w_neon|
+    ENDP        ; |vp9_lpf_vertical_16_neon|
 
 ; void vp9_wide_mbfilter_neon();
 ; This is a helper function for the loopfilters. The invidual functions do the
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c
index 0c0f155..3df7f4c 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c
@@ -20,12 +20,12 @@
 #include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
 
 #if HAVE_DSPR2
-void vp9_loop_filter_horizontal_edge_dspr2(unsigned char *s,
-                                           int pitch,
-                                           const uint8_t *blimit,
-                                           const uint8_t *limit,
-                                           const uint8_t *thresh,
-                                           int count) {
+void vp9_lpf_horizontal_4_dspr2(unsigned char *s,
+                                int pitch,
+                                const uint8_t *blimit,
+                                const uint8_t *limit,
+                                const uint8_t *thresh,
+                                int count) {
   uint8_t   i;
   uint32_t  mask;
   uint32_t  hev;
@@ -114,12 +114,12 @@
   }
 }
 
-void vp9_loop_filter_vertical_edge_dspr2(unsigned char *s,
-                                         int pitch,
-                                         const uint8_t *blimit,
-                                         const uint8_t *limit,
-                                         const uint8_t *thresh,
-                                         int count) {
+void vp9_lpf_vertical_4_dspr2(unsigned char *s,
+                              int pitch,
+                              const uint8_t *blimit,
+                              const uint8_t *limit,
+                              const uint8_t *thresh,
+                              int count) {
   uint8_t   i;
   uint32_t  mask, hev;
   uint32_t  pm1, p0, p1, p2, p3, p4, p5, p6;
@@ -307,58 +307,56 @@
   }
 }
 
-void vp9_loop_filter_horizontal_edge_16_dspr2(uint8_t *s, int p /* pitch */,
-                                              const uint8_t *blimit0,
-                                              const uint8_t *limit0,
-                                              const uint8_t *thresh0,
-                                              const uint8_t *blimit1,
-                                              const uint8_t *limit1,
-                                              const uint8_t *thresh1) {
-  vp9_loop_filter_horizontal_edge_dspr2(s, p, blimit0, limit0, thresh0, 1);
-  vp9_loop_filter_horizontal_edge_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,
+                                     const uint8_t *blimit0,
+                                     const uint8_t *limit0,
+                                     const uint8_t *thresh0,
+                                     const uint8_t *blimit1,
+                                     const uint8_t *limit1,
+                                     const uint8_t *thresh1) {
+  vp9_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
+  vp9_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
 }
 
-void vp9_mbloop_filter_horizontal_edge_16_dspr2(uint8_t *s, int p /* pitch */,
-                                                const uint8_t *blimit0,
-                                                const uint8_t *limit0,
-                                                const uint8_t *thresh0,
-                                                const uint8_t *blimit1,
-                                                const uint8_t *limit1,
-                                                const uint8_t *thresh1) {
-  vp9_mbloop_filter_horizontal_edge_dspr2(s, p, blimit0, limit0, thresh0, 1);
-  vp9_mbloop_filter_horizontal_edge_dspr2(s + 8, p, blimit1, limit1, thresh1,
-                                          1);
+void vp9_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
+                                     const uint8_t *blimit0,
+                                     const uint8_t *limit0,
+                                     const uint8_t *thresh0,
+                                     const uint8_t *blimit1,
+                                     const uint8_t *limit1,
+                                     const uint8_t *thresh1) {
+  vp9_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
+  vp9_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
 }
 
-void vp9_loop_filter_vertical_edge_16_dspr2(uint8_t *s, int p,
-                                            const uint8_t *blimit0,
-                                            const uint8_t *limit0,
-                                            const uint8_t *thresh0,
-                                            const uint8_t *blimit1,
-                                            const uint8_t *limit1,
-                                            const uint8_t *thresh1) {
-  vp9_loop_filter_vertical_edge_dspr2(s, p, blimit0, limit0, thresh0, 1);
-  vp9_loop_filter_vertical_edge_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
-                                      1);
+void vp9_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
+                                   const uint8_t *blimit0,
+                                   const uint8_t *limit0,
+                                   const uint8_t *thresh0,
+                                   const uint8_t *blimit1,
+                                   const uint8_t *limit1,
+                                   const uint8_t *thresh1) {
+  vp9_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
+  vp9_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);
 }
 
-void vp9_mbloop_filter_vertical_edge_16_dspr2(uint8_t *s, int p,
-                                              const uint8_t *blimit0,
-                                              const uint8_t *limit0,
-                                              const uint8_t *thresh0,
-                                              const uint8_t *blimit1,
-                                              const uint8_t *limit1,
-                                              const uint8_t *thresh1) {
-  vp9_mbloop_filter_vertical_edge_dspr2(s, p, blimit0, limit0, thresh0, 1);
-  vp9_mbloop_filter_vertical_edge_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
+void vp9_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
+                                   const uint8_t *blimit0,
+                                   const uint8_t *limit0,
+                                   const uint8_t *thresh0,
+                                   const uint8_t *blimit1,
+                                   const uint8_t *limit1,
+                                   const uint8_t *thresh1) {
+  vp9_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
+  vp9_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
                                        1);
 }
 
-void vp9_mb_lpf_vertical_edge_w_16_dspr2(uint8_t *s, int p,
-                                         const uint8_t *blimit,
-                                         const uint8_t *limit,
-                                         const uint8_t *thresh) {
-  vp9_mb_lpf_vertical_edge_w_dspr2(s, p, blimit, limit, thresh);
-  vp9_mb_lpf_vertical_edge_w_dspr2(s + 8 * p, p, blimit, limit, thresh);
+void vp9_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,
+                                    const uint8_t *blimit,
+                                    const uint8_t *limit,
+                                    const uint8_t *thresh) {
+  vp9_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);
+  vp9_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);
 }
 #endif  // #if HAVE_DSPR2
diff --git a/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c b/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c
index adfd755..7cd0b63 100644
--- a/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c
@@ -20,12 +20,12 @@
 #include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
 
 #if HAVE_DSPR2
-void vp9_mbloop_filter_horizontal_edge_dspr2(unsigned char *s,
-                                             int pitch,
-                                             const uint8_t *blimit,
-                                             const uint8_t *limit,
-                                             const uint8_t *thresh,
-                                             int count) {
+void vp9_lpf_horizontal_8_dspr2(unsigned char *s,
+                                int pitch,
+                                const uint8_t *blimit,
+                                const uint8_t *limit,
+                                const uint8_t *thresh,
+                                int count) {
   uint32_t  mask;
   uint32_t  hev, flat;
   uint8_t   i;
@@ -319,12 +319,12 @@
   }
 }
 
-void vp9_mbloop_filter_vertical_edge_dspr2(unsigned char *s,
-                                           int pitch,
-                                           const uint8_t *blimit,
-                                           const uint8_t *limit,
-                                           const uint8_t *thresh,
-                                           int count) {
+void vp9_lpf_vertical_8_dspr2(unsigned char *s,
+                              int pitch,
+                              const uint8_t *blimit,
+                              const uint8_t *limit,
+                              const uint8_t *thresh,
+                              int count) {
   uint8_t   i;
   uint32_t  mask, hev, flat;
   uint8_t   *s1, *s2, *s3, *s4;
diff --git a/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c b/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c
index 0759755..6c94674 100644
--- a/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c
@@ -20,12 +20,12 @@
 #include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
 
 #if HAVE_DSPR2
-void vp9_mb_lpf_horizontal_edge_w_dspr2(unsigned char *s,
-                                        int pitch,
-                                        const uint8_t *blimit,
-                                        const uint8_t *limit,
-                                        const uint8_t *thresh,
-                                        int count) {
+void vp9_lpf_horizontal_16_dspr2(unsigned char *s,
+                                 int pitch,
+                                 const uint8_t *blimit,
+                                 const uint8_t *limit,
+                                 const uint8_t *thresh,
+                                 int count) {
   uint32_t  mask;
   uint32_t  hev, flat, flat2;
   uint8_t   i;
diff --git a/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c b/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
index 9e9171c..851fc6c 100644
--- a/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
@@ -20,11 +20,11 @@
 #include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
 
 #if HAVE_DSPR2
-void vp9_mb_lpf_vertical_edge_w_dspr2(uint8_t *s,
-                                      int pitch,
-                                      const uint8_t *blimit,
-                                      const uint8_t *limit,
-                                      const uint8_t *thresh) {
+void vp9_lpf_vertical_16_dspr2(uint8_t *s,
+                               int pitch,
+                               const uint8_t *blimit,
+                               const uint8_t *limit,
+                               const uint8_t *thresh) {
   uint8_t   i;
   uint32_t  mask, hev, flat, flat2;
   uint8_t   *s1, *s2, *s3, *s4;
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 72adf92..2266e0e 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -221,23 +221,10 @@
 static const uint16_t left_border_uv =  0x1111;
 static const uint16_t above_border_uv = 0x000f;
 
-
-static void lf_init_lut(loop_filter_info_n *lfi) {
-  lfi->mode_lf_lut[DC_PRED] = 0;
-  lfi->mode_lf_lut[D45_PRED] = 0;
-  lfi->mode_lf_lut[D135_PRED] = 0;
-  lfi->mode_lf_lut[D117_PRED] = 0;
-  lfi->mode_lf_lut[D153_PRED] = 0;
-  lfi->mode_lf_lut[D207_PRED] = 0;
-  lfi->mode_lf_lut[D63_PRED] = 0;
-  lfi->mode_lf_lut[V_PRED] = 0;
-  lfi->mode_lf_lut[H_PRED] = 0;
-  lfi->mode_lf_lut[TM_PRED] = 0;
-  lfi->mode_lf_lut[ZEROMV]  = 0;
-  lfi->mode_lf_lut[NEARESTMV] = 1;
-  lfi->mode_lf_lut[NEARMV] = 1;
-  lfi->mode_lf_lut[NEWMV] = 1;
-}
+static const int mode_lf_lut[MB_MODE_COUNT] = {
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // INTRA_MODES
+  1, 1, 0, 1                     // INTER_MODES (ZEROMV == 0)
+};
 
 static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
   int lvl;
@@ -270,9 +257,6 @@
   update_sharpness(lfi, lf->sharpness_level);
   lf->last_sharpness_level = lf->sharpness_level;
 
-  // init LUT for lvl  and hev thr picking
-  lf_init_lut(lfi);
-
   // init hev threshold const vectors
   for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
     vpx_memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
@@ -355,56 +339,56 @@
     if (mask & 1) {
       if ((mask_16x16_0 | mask_16x16_1) & 1) {
         if ((mask_16x16_0 & mask_16x16_1) & 1) {
-          vp9_mb_lpf_vertical_edge_w_16(s, pitch, lfi0->mblim, lfi0->lim,
-                                     lfi0->hev_thr);
+          vp9_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
+                                   lfi0->hev_thr);
         } else if (mask_16x16_0 & 1) {
-          vp9_mb_lpf_vertical_edge_w(s, pitch, lfi0->mblim, lfi0->lim,
-                                     lfi0->hev_thr);
+          vp9_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
+                              lfi0->hev_thr);
         } else {
-          vp9_mb_lpf_vertical_edge_w(s + 8 *pitch, pitch, lfi1->mblim,
-                                     lfi1->lim, lfi1->hev_thr);
+          vp9_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
+                              lfi1->lim, lfi1->hev_thr);
         }
       }
 
       if ((mask_8x8_0 | mask_8x8_1) & 1) {
         if ((mask_8x8_0 & mask_8x8_1) & 1) {
-          vp9_mbloop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim,
-                                          lfi0->hev_thr, lfi1->mblim,
-                                          lfi1->lim, lfi1->hev_thr);
+          vp9_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
+                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+                                  lfi1->hev_thr);
         } else if (mask_8x8_0 & 1) {
-          vp9_mbloop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim,
-                                          lfi0->hev_thr, 1);
+          vp9_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
+                             1);
         } else {
-          vp9_mbloop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim,
-                                          lfi1->lim, lfi1->hev_thr, 1);
+          vp9_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+                             lfi1->hev_thr, 1);
         }
       }
 
       if ((mask_4x4_0 | mask_4x4_1) & 1) {
         if ((mask_4x4_0 & mask_4x4_1) & 1) {
-          vp9_loop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim,
-                                        lfi0->hev_thr, lfi1->mblim,
-                                        lfi1->lim, lfi1->hev_thr);
+          vp9_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
+                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+                                  lfi1->hev_thr);
         } else if (mask_4x4_0 & 1) {
-          vp9_loop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim,
-                                        lfi0->hev_thr, 1);
+          vp9_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
+                             1);
         } else {
-          vp9_loop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim,
-                                        lfi1->lim, lfi1->hev_thr, 1);
+          vp9_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+                             lfi1->hev_thr, 1);
         }
       }
 
       if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
         if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
-          vp9_loop_filter_vertical_edge_16(s + 4, pitch, lfi0->mblim, lfi0->lim,
-                                        lfi0->hev_thr, lfi1->mblim,
-                                        lfi1->lim, lfi1->hev_thr);
+          vp9_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
+                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+                                  lfi1->hev_thr);
         } else if (mask_4x4_int_0 & 1) {
-          vp9_loop_filter_vertical_edge(s + 4, pitch, lfi0->mblim, lfi0->lim,
-                                        lfi0->hev_thr, 1);
+          vp9_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
+                             lfi0->hev_thr, 1);
         } else {
-          vp9_loop_filter_vertical_edge(s + 8 *pitch + 4, pitch, lfi1->mblim,
-                                        lfi1->lim, lfi1->hev_thr, 1);
+          vp9_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
+                             lfi1->hev_thr, 1);
         }
       }
     }
@@ -440,81 +424,73 @@
     if (mask & 1) {
       if (mask_16x16 & 1) {
         if ((mask_16x16 & 3) == 3) {
-          vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
-                                       lfi->hev_thr, 2);
+          vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+                                lfi->hev_thr, 2);
           count = 2;
         } else {
-          vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
-                                       lfi->hev_thr, 1);
+          vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+                                lfi->hev_thr, 1);
         }
       } else if (mask_8x8 & 1) {
         if ((mask_8x8 & 3) == 3) {
           // Next block's thresholds
           const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
 
-          vp9_mbloop_filter_horizontal_edge_16(s, pitch, lfi->mblim,
-                                               lfi->lim, lfi->hev_thr,
-                                               lfin->mblim, lfin->lim,
-                                               lfin->hev_thr);
+          vp9_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
+                                    lfi->hev_thr, lfin->mblim, lfin->lim,
+                                    lfin->hev_thr);
 
           if ((mask_4x4_int & 3) == 3) {
-            vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim,
-                                               lfi->lim, lfi->hev_thr,
-                                               lfin->mblim, lfin->lim,
-                                               lfin->hev_thr);
+            vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+                                      lfi->lim, lfi->hev_thr, lfin->mblim,
+                                      lfin->lim, lfin->hev_thr);
           } else {
             if (mask_4x4_int & 1)
-              vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
-                                              lfi->lim, lfi->hev_thr, 1);
+              vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                   lfi->hev_thr, 1);
             else if (mask_4x4_int & 2)
-              vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
-                                              lfin->mblim, lfin->lim,
-                                              lfin->hev_thr, 1);
+              vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+                                   lfin->lim, lfin->hev_thr, 1);
           }
           count = 2;
         } else {
-          vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
-                                            lfi->hev_thr, 1);
+          vp9_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
 
           if (mask_4x4_int & 1)
-            vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
-                                            lfi->lim, lfi->hev_thr, 1);
+            vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                 lfi->hev_thr, 1);
         }
       } else if (mask_4x4 & 1) {
         if ((mask_4x4 & 3) == 3) {
           // Next block's thresholds
           const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
 
-          vp9_loop_filter_horizontal_edge_16(s, pitch, lfi->mblim,
-                                             lfi->lim, lfi->hev_thr,
-                                             lfin->mblim, lfin->lim,
-                                             lfin->hev_thr);
+          vp9_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
+                                    lfi->hev_thr, lfin->mblim, lfin->lim,
+                                    lfin->hev_thr);
           if ((mask_4x4_int & 3) == 3) {
-            vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim,
-                                               lfi->lim, lfi->hev_thr,
-                                               lfin->mblim, lfin->lim,
-                                               lfin->hev_thr);
+            vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+                                      lfi->lim, lfi->hev_thr, lfin->mblim,
+                                      lfin->lim, lfin->hev_thr);
           } else {
             if (mask_4x4_int & 1)
-              vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
-                                              lfi->lim, lfi->hev_thr, 1);
+              vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                   lfi->hev_thr, 1);
             else if (mask_4x4_int & 2)
-              vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
-                                              lfin->mblim, lfin->lim,
-                                              lfin->hev_thr, 1);
+              vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+                                   lfin->lim, lfin->hev_thr, 1);
           }
           count = 2;
         } else {
-        vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
-                                        lfi->hev_thr, 1);
+          vp9_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
 
-        if (mask_4x4_int & 1)
-          vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
-                                          lfi->lim, lfi->hev_thr, 1);
+          if (mask_4x4_int & 1)
+            vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                 lfi->hev_thr, 1);
         }
       } else if (mask_4x4_int & 1) {
-        vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
-                                        lfi->lim, lfi->hev_thr, 1);
+        vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                             lfi->hev_thr, 1);
       }
     }
     s += 8 * count;
@@ -543,8 +519,7 @@
   const int skip = mi->mbmi.skip_coeff;
   const int seg = mi->mbmi.segment_id;
   const int ref = mi->mbmi.ref_frame[0];
-  const int mode = lfi_n->mode_lf_lut[mi->mbmi.mode];
-  const int filter_level = lfi_n->lvl[seg][ref][mode];
+  const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
   uint64_t *left_y = &lfm->left_y[tx_size_y];
   uint64_t *above_y = &lfm->above_y[tx_size_y];
   uint64_t *int_4x4_y = &lfm->int_4x4_y;
@@ -625,8 +600,7 @@
   const int skip = mi->mbmi.skip_coeff;
   const int seg = mi->mbmi.segment_id;
   const int ref = mi->mbmi.ref_frame[0];
-  const int mode = lfi_n->mode_lf_lut[mi->mbmi.mode];
-  const int filter_level = lfi_n->lvl[seg][ref][mode];
+  const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
   uint64_t *left_y = &lfm->left_y[tx_size_y];
   uint64_t *above_y = &lfm->above_y[tx_size_y];
   uint64_t *int_4x4_y = &lfm->int_4x4_y;
@@ -919,10 +893,7 @@
                      const MB_MODE_INFO *mbmi) {
   const int seg = mbmi->segment_id;
   const int ref = mbmi->ref_frame[0];
-  const int mode = lfi_n->mode_lf_lut[mbmi->mode];
-  const int filter_level = lfi_n->lvl[seg][ref][mode];
-
-  return filter_level;
+  return lfi_n->lvl[seg][ref][mode_lf_lut[mbmi->mode]];
 }
 
 static void filter_selectively_vert(uint8_t *s, int pitch,
@@ -940,19 +911,15 @@
 
     if (mask & 1) {
       if (mask_16x16 & 1) {
-        vp9_mb_lpf_vertical_edge_w(s, pitch, lfi->mblim, lfi->lim,
-                                   lfi->hev_thr);
+        vp9_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
       } else if (mask_8x8 & 1) {
-        vp9_mbloop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim,
-                                        lfi->hev_thr, 1);
+        vp9_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
       } else if (mask_4x4 & 1) {
-        vp9_loop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim,
-                                      lfi->hev_thr, 1);
+        vp9_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
       }
     }
     if (mask_4x4_int & 1)
-      vp9_loop_filter_vertical_edge(s + 4, pitch, lfi->mblim, lfi->lim,
-                                    lfi->hev_thr, 1);
+      vp9_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
     s += 8;
     lfl += 1;
     mask_16x16 >>= 1;
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index 62389ea..98fac96 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -54,7 +54,6 @@
 typedef struct {
   loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1];
   uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
-  uint8_t mode_lf_lut[MB_MODE_COUNT];
 } loop_filter_info_n;
 
 /* assorted loopfilter functions which get used elsewhere */
diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c
index f2e910f..bbbad01 100644
--- a/vp9/common/vp9_loopfilter_filters.c
+++ b/vp9/common/vp9_loopfilter_filters.c
@@ -101,11 +101,9 @@
   *op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
 }
 
-void vp9_loop_filter_horizontal_edge_c(uint8_t *s, int p /* pitch */,
-                                       const uint8_t *blimit,
-                                       const uint8_t *limit,
-                                       const uint8_t *thresh,
-                                       int count) {
+void vp9_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
+                            const uint8_t *blimit, const uint8_t *limit,
+                            const uint8_t *thresh, int count) {
   int i;
 
   // loop filter designed to work using chars so that we can make maximum use
@@ -121,22 +119,17 @@
   }
 }
 
-void vp9_loop_filter_horizontal_edge_16_c(uint8_t *s, int p,
-                                          const uint8_t *blimit0,
-                                          const uint8_t *limit0,
-                                          const uint8_t *thresh0,
-                                          const uint8_t *blimit1,
-                                          const uint8_t *limit1,
-                                          const uint8_t *thresh1) {
-  vp9_loop_filter_horizontal_edge_c(s, p, blimit0, limit0, thresh0, 1);
-  vp9_loop_filter_horizontal_edge_c(s + 8, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
+                                 const uint8_t *limit0, const uint8_t *thresh0,
+                                 const uint8_t *blimit1, const uint8_t *limit1,
+                                 const uint8_t *thresh1) {
+  vp9_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
+  vp9_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
 }
 
-void vp9_loop_filter_vertical_edge_c(uint8_t *s, int pitch,
-                                     const uint8_t *blimit,
-                                     const uint8_t *limit,
-                                     const uint8_t *thresh,
-                                     int count) {
+void vp9_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
+                          const uint8_t *limit, const uint8_t *thresh,
+                          int count) {
   int i;
 
   // loop filter designed to work using chars so that we can make maximum use
@@ -152,15 +145,12 @@
   }
 }
 
-void vp9_loop_filter_vertical_edge_16_c(uint8_t *s, int pitch,
-                                        const uint8_t *blimit0,
-                                        const uint8_t *limit0,
-                                        const uint8_t *thresh0,
-                                        const uint8_t *blimit1,
-                                        const uint8_t *limit1,
-                                        const uint8_t *thresh1) {
-  vp9_loop_filter_vertical_edge_c(s, pitch, blimit0, limit0, thresh0, 1);
-  vp9_loop_filter_vertical_edge_c(s + 8 * pitch, pitch, blimit1, limit1,
+void vp9_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
+                               const uint8_t *limit0, const uint8_t *thresh0,
+                               const uint8_t *blimit1, const uint8_t *limit1,
+                               const uint8_t *thresh1) {
+  vp9_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);
+  vp9_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
                                   thresh1, 1);
 }
 
@@ -185,11 +175,9 @@
   }
 }
 
-void vp9_mbloop_filter_horizontal_edge_c(uint8_t *s, int p,
-                                         const uint8_t *blimit,
-                                         const uint8_t *limit,
-                                         const uint8_t *thresh,
-                                         int count) {
+void vp9_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
+                            const uint8_t *limit, const uint8_t *thresh,
+                            int count) {
   int i;
 
   // loop filter designed to work using chars so that we can make maximum use
@@ -208,22 +196,17 @@
   }
 }
 
-void vp9_mbloop_filter_horizontal_edge_16_c(uint8_t *s, int p,
-                                            const uint8_t *blimit0,
-                                            const uint8_t *limit0,
-                                            const uint8_t *thresh0,
-                                            const uint8_t *blimit1,
-                                            const uint8_t *limit1,
-                                            const uint8_t *thresh1) {
-  vp9_mbloop_filter_horizontal_edge_c(s, p, blimit0, limit0, thresh0, 1);
-  vp9_mbloop_filter_horizontal_edge_c(s + 8, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
+                                 const uint8_t *limit0, const uint8_t *thresh0,
+                                 const uint8_t *blimit1, const uint8_t *limit1,
+                                 const uint8_t *thresh1) {
+  vp9_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);
+  vp9_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);
 }
 
-void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch,
-                                       const uint8_t *blimit,
-                                       const uint8_t *limit,
-                                       const uint8_t *thresh,
-                                       int count) {
+void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
+                          const uint8_t *limit, const uint8_t *thresh,
+                          int count) {
   int i;
 
   for (i = 0; i < 8 * count; ++i) {
@@ -239,15 +222,12 @@
   }
 }
 
-void vp9_mbloop_filter_vertical_edge_16_c(uint8_t *s, int pitch,
-                                          const uint8_t *blimit0,
-                                          const uint8_t *limit0,
-                                          const uint8_t *thresh0,
-                                          const uint8_t *blimit1,
-                                          const uint8_t *limit1,
-                                          const uint8_t *thresh1) {
-  vp9_mbloop_filter_vertical_edge_c(s, pitch, blimit0, limit0, thresh0, 1);
-  vp9_mbloop_filter_vertical_edge_c(s + 8 * pitch, pitch, blimit1, limit1,
+void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
+                               const uint8_t *limit0, const uint8_t *thresh0,
+                               const uint8_t *blimit1, const uint8_t *limit1,
+                               const uint8_t *thresh1) {
+  vp9_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);
+  vp9_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
                                     thresh1, 1);
 }
 
@@ -302,11 +282,9 @@
   }
 }
 
-void vp9_mb_lpf_horizontal_edge_w_c(uint8_t *s, int p,
-                                    const uint8_t *blimit,
-                                    const uint8_t *limit,
-                                    const uint8_t *thresh,
-                                    int count) {
+void vp9_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
+                             const uint8_t *limit, const uint8_t *thresh,
+                             int count) {
   int i;
 
   // loop filter designed to work using chars so that we can make maximum use
@@ -355,16 +333,12 @@
   }
 }
 
-void vp9_mb_lpf_vertical_edge_w_c(uint8_t *s, int p,
-                                  const uint8_t *blimit,
-                                  const uint8_t *limit,
-                                  const uint8_t *thresh) {
+void vp9_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
+                           const uint8_t *limit, const uint8_t *thresh) {
   mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
 }
 
-void vp9_mb_lpf_vertical_edge_w_16_c(uint8_t *s, int p,
-                                     const uint8_t *blimit,
-                                     const uint8_t *limit,
-                                     const uint8_t *thresh) {
+void vp9_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
+                                const uint8_t *limit, const uint8_t *thresh) {
   mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16);
 }
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index 449b945..03f4cc2 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -42,29 +42,30 @@
   else
     return SWITCHABLE_FILTERS;
 }
-// Returns a context number for the given MB prediction signal
+
+// The mode info data structure has a one element border above and to the
+// left of the entries corresponding to real macroblocks.
+// The prediction flags in these dummy entries are initialized to 0.
+// 0 - inter/inter, inter/--, --/inter, --/--
+// 1 - intra/inter, inter/intra
+// 2 - intra/--, --/intra
+// 3 - intra/intra
 int vp9_get_intra_inter_context(const MACROBLOCKD *xd) {
   const MB_MODE_INFO *const above_mbmi = get_mbmi(get_above_mi(xd));
   const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd));
   const int has_above = above_mbmi != NULL;
   const int has_left = left_mbmi != NULL;
-  const int above_intra = has_above ? !is_inter_block(above_mbmi) : 1;
-  const int left_intra = has_left ? !is_inter_block(left_mbmi) : 1;
 
-  // The mode info data structure has a one element border above and to the
-  // left of the entries corresponding to real macroblocks.
-  // The prediction flags in these dummy entries are initialized to 0.
-  // 0 - inter/inter, inter/--, --/inter, --/--
-  // 1 - intra/inter, inter/intra
-  // 2 - intra/--, --/intra
-  // 3 - intra/intra
-  if (has_above && has_left)  // both edges available
+  if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
     return left_intra && above_intra ? 3
                                      : left_intra || above_intra;
-  else if (has_above || has_left)  // one edge available
-    return 2 * (has_above ? above_intra : left_intra);
-  else
+  } else if (has_above || has_left) {  // one edge available
+    return 2 * !is_inter_block(has_above ? above_mbmi : left_mbmi);
+  } else {
     return 0;
+  }
 }
 
 int vp9_get_reference_mode_context(const VP9_COMMON *cm,
@@ -117,8 +118,7 @@
   const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd));
   const int above_in_image = above_mbmi != NULL;
   const int left_in_image = left_mbmi != NULL;
-  const int above_intra = above_in_image ? !is_inter_block(above_mbmi) : 1;
-  const int left_intra = left_in_image ? !is_inter_block(left_mbmi) : 1;
+
   // Note:
   // The mode info data structure has a one element border above and to the
   // left of the entries correpsonding to real macroblocks.
@@ -127,6 +127,9 @@
   const int var_ref_idx = !fix_ref_idx;
 
   if (above_in_image && left_in_image) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
     if (above_intra && left_intra) {  // intra/intra (2)
       pred_context = 2;
     } else if (above_intra || left_intra) {  // intra/inter
@@ -196,13 +199,14 @@
   const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd));
   const int has_above = above_mbmi != NULL;
   const int has_left = left_mbmi != NULL;
-  const int above_intra = has_above ? !is_inter_block(above_mbmi) : 1;
-  const int left_intra = has_left ? !is_inter_block(left_mbmi) : 1;
   // Note:
   // The mode info data structure has a one element border above and to the
   // left of the entries correpsonding to real macroblocks.
   // The prediction flags in these dummy entries are initialised to 0.
   if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
     if (above_intra && left_intra) {  // intra/intra
       pred_context = 2;
     } else if (above_intra || left_intra) {  // intra/inter or inter/intra
@@ -260,14 +264,15 @@
   const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd));
   const int has_above = above_mbmi != NULL;
   const int has_left = left_mbmi != NULL;
-  const int above_intra = has_above ? !is_inter_block(above_mbmi) : 1;
-  const int left_intra = has_left ? !is_inter_block(left_mbmi) : 1;
 
   // Note:
   // The mode info data structure has a one element border above and to the
   // left of the entries correpsonding to real macroblocks.
   // The prediction flags in these dummy entries are initialised to 0.
   if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
     if (above_intra && left_intra) {  // intra/intra
       pred_context = 2;
     } else if (above_intra || left_intra) {  // intra/inter or inter/intra
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 727f5c4..3025ed4 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -191,38 +191,38 @@
 #
 # Loopfilter
 #
-prototype void vp9_mb_lpf_vertical_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
-specialize vp9_mb_lpf_vertical_edge_w sse2 neon dspr2
+prototype void vp9_lpf_vertical_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
+specialize vp9_lpf_vertical_16 sse2 neon dspr2
 
-prototype void vp9_mb_lpf_vertical_edge_w_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
-specialize vp9_mb_lpf_vertical_edge_w_16 sse2 neon dspr2
+prototype void vp9_lpf_vertical_16_dual "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
+specialize vp9_lpf_vertical_16_dual sse2 neon dspr2
 
-prototype void vp9_mbloop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_mbloop_filter_vertical_edge sse2 neon dspr2
+prototype void vp9_lpf_vertical_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_vertical_8 sse2 neon dspr2
 
-prototype void vp9_mbloop_filter_vertical_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_mbloop_filter_vertical_edge_16 sse2 neon dspr2
+prototype void vp9_lpf_vertical_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_lpf_vertical_8_dual sse2 neon dspr2
 
-prototype void vp9_loop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_loop_filter_vertical_edge mmx neon dspr2
+prototype void vp9_lpf_vertical_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_vertical_4 mmx neon dspr2
 
-prototype void vp9_loop_filter_vertical_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_loop_filter_vertical_edge_16 sse2 neon dspr2
+prototype void vp9_lpf_vertical_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_lpf_vertical_4_dual sse2 neon dspr2
 
-prototype void vp9_mb_lpf_horizontal_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_mb_lpf_horizontal_edge_w sse2 avx2 neon dspr2
+prototype void vp9_lpf_horizontal_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_horizontal_16 sse2 avx2 neon dspr2
 
-prototype void vp9_mbloop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_mbloop_filter_horizontal_edge sse2 neon dspr2
+prototype void vp9_lpf_horizontal_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_horizontal_8 sse2 neon dspr2
 
-prototype void vp9_mbloop_filter_horizontal_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_mbloop_filter_horizontal_edge_16 sse2 neon dspr2
+prototype void vp9_lpf_horizontal_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_lpf_horizontal_8_dual sse2 neon dspr2
 
-prototype void vp9_loop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_loop_filter_horizontal_edge mmx neon dspr2
+prototype void vp9_lpf_horizontal_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_horizontal_4 mmx neon dspr2
 
-prototype void vp9_loop_filter_horizontal_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_loop_filter_horizontal_edge_16 sse2 neon dspr2
+prototype void vp9_lpf_horizontal_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_lpf_horizontal_4_dual sse2 neon dspr2
 
 #
 # post proc
diff --git a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c b/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
index 3c5cb8f..439c028 100644
--- a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
+++ b/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
@@ -933,7 +933,7 @@
     }
 }
 
-void vp9_mb_lpf_horizontal_edge_w_avx2(unsigned char *s, int p,
+void vp9_lpf_horizontal_16_avx2(unsigned char *s, int p,
         const unsigned char *_blimit, const unsigned char *_limit,
         const unsigned char *_thresh, int count) {
     if (count == 1)
diff --git a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c b/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
index 3ca55cf..448ad5a 100644
--- a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
+++ b/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
@@ -846,24 +846,20 @@
 }
 
 // TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
-void vp9_mb_lpf_horizontal_edge_w_sse2(unsigned char *s,
-                                       int p,
-                                       const unsigned char *_blimit,
-                                       const unsigned char *_limit,
-                                       const unsigned char *_thresh,
-                                       int count) {
+void vp9_lpf_horizontal_16_sse2(unsigned char *s, int p,
+                                const unsigned char *_blimit,
+                                const unsigned char *_limit,
+                                const unsigned char *_thresh, int count) {
   if (count == 1)
     mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh);
   else
     mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);
 }
 
-void vp9_mbloop_filter_horizontal_edge_sse2(unsigned char *s,
-                                            int p,
-                                            const unsigned char *_blimit,
-                                            const unsigned char *_limit,
-                                            const unsigned char *_thresh,
-                                            int count) {
+void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p,
+                               const unsigned char *_blimit,
+                               const unsigned char *_limit,
+                               const unsigned char *_thresh, int count) {
   DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op2, 16);
   DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op1, 16);
   DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op0, 16);
@@ -1083,13 +1079,13 @@
   }
 }
 
-void vp9_mbloop_filter_horizontal_edge_16_sse2(uint8_t *s, int p,
-                                               const uint8_t *_blimit0,
-                                               const uint8_t *_limit0,
-                                               const uint8_t *_thresh0,
-                                               const uint8_t *_blimit1,
-                                               const uint8_t *_limit1,
-                                               const uint8_t *_thresh1) {
+void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p,
+                                    const uint8_t *_blimit0,
+                                    const uint8_t *_limit0,
+                                    const uint8_t *_thresh0,
+                                    const uint8_t *_blimit1,
+                                    const uint8_t *_limit1,
+                                    const uint8_t *_thresh1) {
   DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op2, 16);
   DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op1, 16);
   DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op0, 16);
@@ -1330,14 +1326,13 @@
   }
 }
 
-void vp9_loop_filter_horizontal_edge_16_sse2(unsigned char *s,
-                                            int p,
-                                            const unsigned char *_blimit0,
-                                            const unsigned char *_limit0,
-                                            const unsigned char *_thresh0,
-                                            const unsigned char *_blimit1,
-                                            const unsigned char *_limit1,
-                                            const unsigned char *_thresh1) {
+void vp9_lpf_horizontal_4_dual_sse2(unsigned char *s, int p,
+                                    const unsigned char *_blimit0,
+                                    const unsigned char *_limit0,
+                                    const unsigned char *_thresh0,
+                                    const unsigned char *_blimit1,
+                                    const unsigned char *_limit1,
+                                    const unsigned char *_thresh1) {
   const __m128i blimit =
       _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0),
                          _mm_load_si128((const __m128i *)_blimit1));
@@ -1598,13 +1593,12 @@
   } while (++idx8x8 < num_8x8_to_transpose);
 }
 
-void vp9_loop_filter_vertical_edge_16_sse2(uint8_t *s, int p,
-                                           const uint8_t *blimit0,
-                                           const uint8_t *limit0,
-                                           const uint8_t *thresh0,
-                                           const uint8_t *blimit1,
-                                           const uint8_t *limit1,
-                                           const uint8_t *thresh1) {
+void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
+                                  const uint8_t *limit0,
+                                  const uint8_t *thresh0,
+                                  const uint8_t *blimit1,
+                                  const uint8_t *limit1,
+                                  const uint8_t *thresh1) {
   DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 16 * 8);
   unsigned char *src[2];
   unsigned char *dst[2];
@@ -1613,8 +1607,8 @@
   transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
 
   // Loop filtering
-  vp9_loop_filter_horizontal_edge_16_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
-                                          thresh0, blimit1, limit1, thresh1);
+  vp9_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+                                 blimit1, limit1, thresh1);
   src[0] = t_dst;
   src[1] = t_dst + 8;
   dst[0] = s - 4;
@@ -1624,11 +1618,10 @@
   transpose(src, 16, dst, p, 2);
 }
 
-void vp9_mbloop_filter_vertical_edge_sse2(unsigned char *s, int p,
-                                          const unsigned char *blimit,
-                                          const unsigned char *limit,
-                                          const unsigned char *thresh,
-                                          int count) {
+void vp9_lpf_vertical_8_sse2(unsigned char *s, int p,
+                             const unsigned char *blimit,
+                             const unsigned char *limit,
+                             const unsigned char *thresh, int count) {
   DECLARE_ALIGNED_ARRAY(8, unsigned char, t_dst, 8 * 8);
   unsigned char *src[1];
   unsigned char *dst[1];
@@ -1641,8 +1634,7 @@
   transpose(src, p, dst, 8, 1);
 
   // Loop filtering
-  vp9_mbloop_filter_horizontal_edge_sse2(t_dst + 4 * 8, 8, blimit, limit,
-                                         thresh, 1);
+  vp9_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);
 
   src[0] = t_dst;
   dst[0] = s - 4;
@@ -1651,13 +1643,12 @@
   transpose(src, 8, dst, p, 1);
 }
 
-void vp9_mbloop_filter_vertical_edge_16_sse2(uint8_t *s, int p,
-                                             const uint8_t *blimit0,
-                                             const uint8_t *limit0,
-                                             const uint8_t *thresh0,
-                                             const uint8_t *blimit1,
-                                             const uint8_t *limit1,
-                                             const uint8_t *thresh1) {
+void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
+                                  const uint8_t *limit0,
+                                  const uint8_t *thresh0,
+                                  const uint8_t *blimit1,
+                                  const uint8_t *limit1,
+                                  const uint8_t *thresh1) {
   DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 16 * 8);
   unsigned char *src[2];
   unsigned char *dst[2];
@@ -1666,8 +1657,8 @@
   transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
 
   // Loop filtering
-  vp9_mbloop_filter_horizontal_edge_16_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
-                                            thresh0, blimit1, limit1, thresh1);
+  vp9_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+                                 blimit1, limit1, thresh1);
   src[0] = t_dst;
   src[1] = t_dst + 8;
 
@@ -1678,10 +1669,10 @@
   transpose(src, 16, dst, p, 2);
 }
 
-void vp9_mb_lpf_vertical_edge_w_sse2(unsigned char *s, int p,
-                                     const unsigned char *blimit,
-                                     const unsigned char *limit,
-                                     const unsigned char *thresh) {
+void vp9_lpf_vertical_16_sse2(unsigned char *s, int p,
+                              const unsigned char *blimit,
+                              const unsigned char *limit,
+                              const unsigned char *thresh) {
   DECLARE_ALIGNED_ARRAY(8, unsigned char, t_dst, 8 * 16);
   unsigned char *src[2];
   unsigned char *dst[2];
@@ -1706,10 +1697,9 @@
   transpose(src, 8, dst, p, 2);
 }
 
-void vp9_mb_lpf_vertical_edge_w_16_sse2(unsigned char *s, int p,
-                                        const uint8_t *blimit,
-                                        const uint8_t *limit,
-                                        const uint8_t *thresh) {
+void vp9_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
+                                   const uint8_t *blimit, const uint8_t *limit,
+                                   const uint8_t *thresh) {
   DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 256);
 
   // Transpose 16x16
diff --git a/vp9/common/x86/vp9_loopfilter_mmx.asm b/vp9/common/x86/vp9_loopfilter_mmx.asm
index 4ebb51b..a7f6930 100644
--- a/vp9/common/x86/vp9_loopfilter_mmx.asm
+++ b/vp9/common/x86/vp9_loopfilter_mmx.asm
@@ -12,7 +12,7 @@
 %include "vpx_ports/x86_abi_support.asm"
 
 
-;void vp9_loop_filter_horizontal_edge_mmx
+;void vp9_lpf_horizontal_4_mmx
 ;(
 ;    unsigned char *src_ptr,
 ;    int src_pixel_step,
@@ -21,8 +21,8 @@
 ;    const char *thresh,
 ;    int  count
 ;)
-global sym(vp9_loop_filter_horizontal_edge_mmx) PRIVATE
-sym(vp9_loop_filter_horizontal_edge_mmx):
+global sym(vp9_lpf_horizontal_4_mmx) PRIVATE
+sym(vp9_lpf_horizontal_4_mmx):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -224,7 +224,7 @@
     ret
 
 
-;void vp9_loop_filter_vertical_edge_mmx
+;void vp9_lpf_vertical_4_mmx
 ;(
 ;    unsigned char *src_ptr,
 ;    int  src_pixel_step,
@@ -233,8 +233,8 @@
 ;    const char *thresh,
 ;    int count
 ;)
-global sym(vp9_loop_filter_vertical_edge_mmx) PRIVATE
-sym(vp9_loop_filter_vertical_edge_mmx):
+global sym(vp9_lpf_vertical_4_mmx) PRIVATE
+sym(vp9_lpf_vertical_4_mmx):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index c167004..eb2d8b5 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -76,9 +76,8 @@
   }
 }
 
-// len == 0 is not allowed
 static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) {
-  return start + len > start && start + len <= end;
+  return len != 0 && len <= end - start;
 }
 
 static int decode_unsigned_max(struct vp9_read_bit_buffer *rb, int max) {
@@ -855,10 +854,14 @@
   if (!is_last) {
     if (!read_is_valid(*data, 4, data_end))
       vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
-          "Truncated packet or corrupt tile length");
+                         "Truncated packet or corrupt tile length");
 
     size = read_be32(*data);
     *data += 4;
+
+    if (size > data_end - *data)
+      vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
+                         "Truncated packet or corrupt tile size");
   } else {
     size = data_end - *data;
   }
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index daf4465..308bc6a 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -42,8 +42,6 @@
 void vp9_entropy_mode_init();
 void vp9_coef_tree_initialize();
 
-static void set_default_lf_deltas(struct loopfilter *lf);
-
 #define DEFAULT_INTERP_FILTER SWITCHABLE
 
 #define SHARP_FILTER_QTHRESH 0          /* Q threshold for 8-tap sharp filter */
@@ -172,27 +170,15 @@
   }
 }
 
-static void setup_features(VP9_COMMON *cm) {
-  struct loopfilter *const lf = &cm->lf;
+static void reset_segment_features(VP9_COMMON *cm) {
   struct segmentation *const seg = &cm->seg;
 
   // Set up default state for MB feature flags
   seg->enabled = 0;
-
   seg->update_map = 0;
   seg->update_data = 0;
   vpx_memset(seg->tree_probs, 255, sizeof(seg->tree_probs));
-
   vp9_clearall_segfeatures(seg);
-
-  lf->mode_ref_delta_enabled = 0;
-  lf->mode_ref_delta_update = 0;
-  vp9_zero(lf->ref_deltas);
-  vp9_zero(lf->mode_deltas);
-  vp9_zero(lf->last_ref_deltas);
-  vp9_zero(lf->last_mode_deltas);
-
-  set_default_lf_deltas(lf);
 }
 
 static void dealloc_compressor_data(VP9_COMP *cpi) {
@@ -489,23 +475,6 @@
   }
 }
 
-static void set_default_lf_deltas(struct loopfilter *lf) {
-  lf->mode_ref_delta_enabled = 1;
-  lf->mode_ref_delta_update = 1;
-
-  vp9_zero(lf->ref_deltas);
-  vp9_zero(lf->mode_deltas);
-
-  // Test of ref frame deltas
-  lf->ref_deltas[INTRA_FRAME] = 2;
-  lf->ref_deltas[LAST_FRAME] = 0;
-  lf->ref_deltas[GOLDEN_FRAME] = -2;
-  lf->ref_deltas[ALTREF_FRAME] = -2;
-
-  lf->mode_deltas[0] = 0;   // Zero
-  lf->mode_deltas[1] = 0;   // New mv
-}
-
 static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode) {
   SPEED_FEATURES *sf = &cpi->sf;
   int i;
@@ -1219,7 +1188,7 @@
   cm->refresh_frame_context = 1;
   cm->reset_frame_context = 0;
 
-  setup_features(cm);
+  reset_segment_features(cm);
   set_high_precision_mv(cpi, 0);
 
   {
@@ -2955,7 +2924,7 @@
   if (frame_is_intra_only(cm)) {
     vp9_setup_key_frame(cpi);
     // Reset the loop filter deltas and segmentation map.
-    setup_features(cm);
+    reset_segment_features(cm);
 
     // If segmentation is enabled force a map update for key frames.
     if (seg->enabled) {
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index c123c46..445198b 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -508,8 +508,9 @@
     vpx_codec_alg_priv_t *ctx,
     vpx_codec_frame_buffer_t *fb_list, int fb_count,
     vpx_realloc_frame_buffer_cb_fn_t cb, void *user_priv) {
-  if (fb_count < REF_FRAMES) {
-    /* The application must pass in at least REF_FRAMES frame buffers. */
+  if (fb_count < (VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS)) {
+    /* The application must pass in at least VP9_MAXIMUM_REF_BUFFERS +
+     * VPX_MAXIMUM_WORK_BUFFERS frame buffers. */
     return VPX_CODEC_INVALID_PARAM;
   } else if (!ctx->pbi) {
     /* If the decoder has already been initialized, do not accept external
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index 5ab3256..6c644f0 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -238,8 +238,9 @@
  *     using external frame buffers.
  *
  * \note
- * When decoding VP9, the application must pass in at least 8 external
- * frame buffers, as VP9 can have up to 8 reference frames.
+ * When decoding VP9, the application must pass in at least
+ * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame
+ * buffers.
  */
 typedef vpx_codec_err_t (*vpx_codec_set_frame_buffers_fn_t)(
     vpx_codec_alg_priv_t *ctx,
diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
index 30a74ea..24be82d 100644
--- a/vpx/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -362,8 +362,9 @@
    *     using external frame buffers.
    *
    * \note
-   * When decoding VP9, the application must pass in at least 8 external
-   * frame buffers, as VP9 can have up to 8 reference frames.
+   * When decoding VP9, the application must pass in at least
+   * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame
+   * buffers.
    */
   vpx_codec_err_t vpx_codec_set_frame_buffers(
       vpx_codec_ctx_t *ctx,
diff --git a/vpx/vpx_external_frame_buffer.h b/vpx/vpx_external_frame_buffer.h
index 0b787b8..98ce5fd 100644
--- a/vpx/vpx_external_frame_buffer.h
+++ b/vpx/vpx_external_frame_buffer.h
@@ -17,6 +17,14 @@
 
 #include "./vpx_integer.h"
 
+/*!\brief The maximum number of work buffers used by libvpx.
+ */
+#define VPX_MAXIMUM_WORK_BUFFERS 1
+
+/*!\brief The maximum number of reference buffers that a VP9 encoder may use.
+ */
+#define VP9_MAXIMUM_REF_BUFFERS 8
+
 /*!\brief External frame buffer
  *
  * This structure is used to hold external frame buffers passed into the
diff --git a/vpx_scale/vpx_scale.h b/vpx_scale/vpx_scale.h
index 9ddf62e..43fcf9d 100644
--- a/vpx_scale/vpx_scale.h
+++ b/vpx_scale/vpx_scale.h
@@ -9,8 +9,8 @@
  */
 
 
-#ifndef VPXSCALE_H
-#define VPXSCALE_H
+#ifndef VPX_SCALE_VPX_SCALE_H_
+#define VPX_SCALE_VPX_SCALE_H_
 
 #include "vpx_scale/yv12config.h"
 
@@ -24,4 +24,4 @@
                             unsigned int vratio,
                             unsigned int interlaced);
 
-#endif
+#endif  // VPX_SCALE_VPX_SCALE_H_
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index f23e116..bf5fc07 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#ifndef YV12_CONFIG_H
-#define YV12_CONFIG_H
+#ifndef VPX_SCALE_YV12CONFIG_H_
+#define VPX_SCALE_YV12CONFIG_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -85,4 +85,4 @@
 }
 #endif
 
-#endif  // YV12_CONFIG_H
+#endif  // VPX_SCALE_YV12CONFIG_H_
diff --git a/y4minput.h b/y4minput.h
index b2a390c..615debe 100644
--- a/y4minput.h
+++ b/y4minput.h
@@ -10,8 +10,10 @@
  *  Based on code from the OggTheora software codec source code,
  *  Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
  */
-#if !defined(_y4minput_H)
-# define _y4minput_H (1)
+
+#ifndef Y4MINPUT_H_
+#define Y4MINPUT_H_
+
 # include <stdio.h>
 # include "vpx/vpx_image.h"
 
@@ -60,4 +62,4 @@
 void y4m_input_close(y4m_input *_y4m);
 int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *img);
 
-#endif
+#endif  // Y4MINPUT_H_