Merge "Clean up full-pixel search calling code"
diff --git a/build/make/configure.sh b/build/make/configure.sh
index a65d395..4c3b05f 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -859,6 +859,13 @@
             msvs_arch_dir=arm-msvs
             disable_feature multithread
             disable_feature unit_tests
+            vs_version=${tgt_cc##vs}
+            if [ $vs_version -ge 12 ]; then
+                # MSVC 2013 doesn't allow doing plain .exe projects for ARM,
+                # only "AppContainerApplication" which requires an AppxManifest.
+                # Therefore disable the examples, just build the library.
+                disable_feature examples
+            fi
             ;;
         rvct)
             CC=armcc
diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh
index 9dc7906..8529eed 100755
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -292,6 +292,18 @@
         tag_content ProjectGuid "{${guid}}"
         tag_content RootNamespace ${name}
         tag_content Keyword ManagedCProj
+        if [ $vs_ver -ge 12 ] && [ "${platforms[0]}" = "ARM" ]; then
+            tag_content AppContainerApplication true
+            # The application type can be one of "Windows Store",
+            # "Windows Phone" or "Windows Phone Silverlight". The
+            # actual value doesn't matter from the libvpx point of view,
+            # since a static library built for one works on the others.
+            # The PlatformToolset field needs to be set in sync with this;
+            # for Windows Store and Windows Phone Silverlight it should be
+            # v120 while it should be v120_wp81 if the type is Windows Phone.
+            tag_content ApplicationType "Windows Store"
+            tag_content ApplicationTypeRevision 8.1
+        fi
     close_tag PropertyGroup
 
     tag Import \
@@ -324,18 +336,10 @@
                 fi
             fi
             if [ "$vs_ver" = "12" ]; then
-                if [ "$plat" = "ARM" ]; then
-                    # Setting the wp80 toolchain automatically sets the
-                    # WINAPI_FAMILY define, which is required for building
-                    # code for arm with the windows headers. Alternatively,
-                    # one could add AppContainerApplication=true in the Globals
-                    # section and add PrecompiledHeader=NotUsing and
-                    # CompileAsWinRT=false in ClCompile and SubSystem=Console
-                    # in Link.
-                    tag_content PlatformToolset v120_wp80
-                else
-                    tag_content PlatformToolset v120
-                fi
+                # Setting a PlatformToolset indicating windows phone isn't
+                # enough to build code for arm with MSVC 2013, one strictly
+                # has to enable AppContainerApplication as well.
+                tag_content PlatformToolset v120
             fi
             tag_content CharacterSet Unicode
             if [ "$config" = "Release" ]; then
@@ -427,15 +431,25 @@
             if ${werror:-false}; then
                 tag_content TreatWarningAsError true
             fi
+            if [ $vs_ver -ge 11 ]; then
+                # We need to override the defaults for these settings
+                # if AppContainerApplication is set.
+                tag_content CompileAsWinRT false
+                tag_content PrecompiledHeader NotUsing
+                tag_content SDLCheck false
+            fi
             close_tag ClCompile
             case "$proj_kind" in
             exe)
                 open_tag Link
                 if [ "$name" != "obj_int_extract" ]; then
-                    tag_content AdditionalDependencies "$curlibs"
+                    tag_content AdditionalDependencies "$curlibs;%(AdditionalDependencies)"
                     tag_content AdditionalLibraryDirectories "$libdirs;%(AdditionalLibraryDirectories)"
                 fi
                 tag_content GenerateDebugInformation true
+                # Console is the default normally, but if
+                # AppContainerApplication is set, we need to override it.
+                tag_content SubSystem Console
                 close_tag Link
                 ;;
             dll)
diff --git a/examples/vp9_spatial_scalable_encoder.c b/examples/vp9_spatial_scalable_encoder.c
index 64e62ef..983f52d 100644
--- a/examples/vp9_spatial_scalable_encoder.c
+++ b/examples/vp9_spatial_scalable_encoder.c
@@ -344,7 +344,7 @@
     }
 
     res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
-                         pts, frame_duration, VPX_DL_REALTIME);
+                         pts, frame_duration, VPX_DL_GOOD_QUALITY);
     printf("%s", vpx_svc_get_message(&svc_ctx));
     if (res != VPX_CODEC_OK) {
       die_codec(&codec, "Failed to encode frame");
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 127775c..e2ba8a8 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -48,6 +48,10 @@
   vp9_fht4x4_c(in, out, stride, tx_type);
 }
 
+void fwht4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
+  vp9_fwht4x4_c(in, out, stride);
+}
+
 class Trans4x4TestBase {
  public:
   virtual ~Trans4x4TestBase() {}
@@ -57,7 +61,7 @@
 
   virtual void RunInvTxfm(const int16_t *out, uint8_t *dst, int stride) = 0;
 
-  void RunAccuracyCheck() {
+  void RunAccuracyCheck(int limit) {
     ACMRandom rnd(ACMRandom::DeterministicSeed());
     uint32_t max_error = 0;
     int64_t total_error = 0;
@@ -88,11 +92,13 @@
       }
     }
 
-    EXPECT_GE(1u, max_error)
-        << "Error: 4x4 FHT/IHT has an individual round trip error > 1";
+    EXPECT_GE(static_cast<uint32_t>(limit), max_error)
+        << "Error: 4x4 FHT/IHT has an individual round trip error > "
+        << limit;
 
-    EXPECT_GE(count_test_block , total_error)
-        << "Error: 4x4 FHT/IHT has average round trip error > 1 per block";
+    EXPECT_GE(count_test_block * limit, total_error)
+        << "Error: 4x4 FHT/IHT has average round trip error > " << limit
+        << " per block";
   }
 
   void RunCoeffCheck() {
@@ -150,7 +156,7 @@
     }
   }
 
-  void RunInvAccuracyCheck() {
+  void RunInvAccuracyCheck(int limit) {
     ACMRandom rnd(ACMRandom::DeterministicSeed());
     const int count_test_block = 1000;
     DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
@@ -173,8 +179,8 @@
       for (int j = 0; j < kNumCoeffs; ++j) {
         const uint32_t diff = dst[j] - src[j];
         const uint32_t error = diff * diff;
-        EXPECT_GE(1u, error)
-            << "Error: 16x16 IDCT has error " << error
+        EXPECT_GE(static_cast<uint32_t>(limit), error)
+            << "Error: 4x4 IDCT has error " << error
             << " at index " << j;
       }
     }
@@ -213,7 +219,7 @@
 };
 
 TEST_P(Trans4x4DCT, AccuracyCheck) {
-  RunAccuracyCheck();
+  RunAccuracyCheck(1);
 }
 
 TEST_P(Trans4x4DCT, CoeffCheck) {
@@ -225,7 +231,7 @@
 }
 
 TEST_P(Trans4x4DCT, InvAccuracyCheck) {
-  RunInvAccuracyCheck();
+  RunInvAccuracyCheck(1);
 }
 
 class Trans4x4HT
@@ -257,7 +263,7 @@
 };
 
 TEST_P(Trans4x4HT, AccuracyCheck) {
-  RunAccuracyCheck();
+  RunAccuracyCheck(1);
 }
 
 TEST_P(Trans4x4HT, CoeffCheck) {
@@ -269,9 +275,51 @@
 }
 
 TEST_P(Trans4x4HT, InvAccuracyCheck) {
-  RunInvAccuracyCheck();
+  RunInvAccuracyCheck(1);
 }
 
+class Trans4x4WHT
+    : public Trans4x4TestBase,
+      public ::testing::TestWithParam<dct_4x4_param_t> {
+ public:
+  virtual ~Trans4x4WHT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_  = GET_PARAM(2);
+    pitch_    = 4;
+    fwd_txfm_ref = fwht4x4_ref;
+  }
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, int16_t *out, int stride) {
+    fwd_txfm_(in, out, stride);
+  }
+  void RunInvTxfm(const int16_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride);
+  }
+
+  fdct_t fwd_txfm_;
+  idct_t inv_txfm_;
+};
+
+TEST_P(Trans4x4WHT, AccuracyCheck) {
+  RunAccuracyCheck(0);
+}
+
+TEST_P(Trans4x4WHT, CoeffCheck) {
+  RunCoeffCheck();
+}
+
+TEST_P(Trans4x4WHT, MemCheck) {
+  RunMemCheck();
+}
+
+TEST_P(Trans4x4WHT, InvAccuracyCheck) {
+  RunInvAccuracyCheck(0);
+}
 using std::tr1::make_tuple;
 
 INSTANTIATE_TEST_CASE_P(
@@ -285,6 +333,10 @@
         make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1),
         make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2),
         make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3)));
+INSTANTIATE_TEST_CASE_P(
+    C, Trans4x4WHT,
+    ::testing::Values(
+        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0)));
 
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(
@@ -301,6 +353,13 @@
         make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3)));
 #endif
 
+#if HAVE_MMX
+INSTANTIATE_TEST_CASE_P(
+    MMX, Trans4x4WHT,
+    ::testing::Values(
+        make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0)));
+#endif
+
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
     SSE2, Trans4x4DCT,
diff --git a/test/postproc.sh b/test/postproc.sh
new file mode 100755
index 0000000..050a368
--- /dev/null
+++ b/test/postproc.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file tests the libvpx postproc example code. To add new tests to this
+##  file, do the following:
+##    1. Write a shell function (this is your test).
+##    2. Add the function to postproc_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+#   $VP8_IVF_FILE and $VP9_IVF_FILE are required.
+postproc_verify_environment() {
+  if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then
+    echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs postproc using $1 as input file. $2 is the codec name, and is used
+# solely to name the output file.
+postproc() {
+  local decoder="${LIBVPX_BIN_PATH}/postproc${VPX_TEST_EXE_SUFFIX}"
+  local input_file="$1"
+  local codec="$2"
+  local output_file="${VPX_TEST_OUTPUT_DIR}/postproc_${codec}.raw"
+
+  [ -x "${decoder}" ] || return 1
+
+  eval "${decoder}" "${input_file}" "${output_file}" ${devnull}
+
+  [ -e "${output_file}" ] || return 1
+}
+
+postproc_vp8() {
+  if [ "$(vp8_decode_available)" = "yes" ]; then
+    postproc "${VP8_IVF_FILE}" vp8 || return 1
+  fi
+}
+
+postproc_vp9() {
+  if [ "$(vpx_config_option_enabled CONFIG_VP9_POSTPROC)" = "yes" ]; then
+    if [ "$(vp9_decode_available)" = "yes" ]; then
+      postproc "${VP9_IVF_FILE}" vp9 || return 1
+    fi
+  fi
+}
+
+postproc_tests="postproc_vp8
+                postproc_vp9"
+
+run_tests postproc_verify_environment "${postproc_tests}"
diff --git a/test/register_state_check.h b/test/register_state_check.h
index 7e3d053..5987fe3 100644
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -82,8 +82,8 @@
 
 }  // namespace libvpx_test
 
-#elif defined(CONFIG_SHARED) && defined(HAVE_NEON) \
-      && !CONFIG_SHARED && HAVE_NEON
+#elif defined(CONFIG_SHARED) && defined(HAVE_NEON) && defined(CONFIG_VP9) \
+      && !CONFIG_SHARED && HAVE_NEON && CONFIG_VP9
 
 #include "vpx/vpx_integer.h"
 
diff --git a/third_party/libwebm/README.libvpx b/third_party/libwebm/README.libvpx
index 2c7570d..93814b7 100644
--- a/third_party/libwebm/README.libvpx
+++ b/third_party/libwebm/README.libvpx
@@ -1,5 +1,5 @@
 URL: https://chromium.googlesource.com/webm/libwebm
-Version: a7118d8ec564e9db841da1eb01f547f3229f240a
+Version: 249629d46c6e9391f25a90cff6d19075f47474cb
 License: BSD
 License File: LICENSE.txt
 
diff --git a/third_party/libwebm/mkvmuxer.cpp b/third_party/libwebm/mkvmuxer.cpp
index 8ae0dda..45167ea 100644
--- a/third_party/libwebm/mkvmuxer.cpp
+++ b/third_party/libwebm/mkvmuxer.cpp
@@ -22,7 +22,7 @@
 
 #ifdef _MSC_VER
 // Disable MSVC warnings that suggest making code non-portable.
-#pragma warning(disable:4996)
+#pragma warning(disable : 4996)
 #endif
 
 namespace mkvmuxer {
@@ -40,7 +40,7 @@
 
   char*& dst = *dst_ptr;
 
-  delete [] dst;
+  delete[] dst;
   dst = NULL;
 
   if (src == NULL)
@@ -61,11 +61,9 @@
 //
 // IMkvWriter Class
 
-IMkvWriter::IMkvWriter() {
-}
+IMkvWriter::IMkvWriter() {}
 
-IMkvWriter::~IMkvWriter() {
-}
+IMkvWriter::~IMkvWriter() {}
 
 bool WriteEbmlHeader(IMkvWriter* writer) {
   // Level 0
@@ -97,8 +95,7 @@
   return true;
 }
 
-bool ChunkedCopy(mkvparser::IMkvReader* source,
-                 mkvmuxer::IMkvWriter* dst,
+bool ChunkedCopy(mkvparser::IMkvReader* source, mkvmuxer::IMkvWriter* dst,
                  mkvmuxer::int64 start, int64 size) {
   // TODO(vigneshv): Check if this is a reasonable value.
   const uint32 kBufSize = 2048;
@@ -130,12 +127,11 @@
       length_(0),
       track_number_(0),
       timestamp_(0),
-      discard_padding_(0) {
-}
+      discard_padding_(0) {}
 
 Frame::~Frame() {
-  delete [] frame_;
-  delete [] additional_;
+  delete[] frame_;
+  delete[] additional_;
 }
 
 bool Frame::Init(const uint8* frame, uint64 length) {
@@ -144,7 +140,7 @@
   if (!data)
     return false;
 
-  delete [] frame_;
+  delete[] frame_;
   frame_ = data;
   length_ = length;
 
@@ -159,7 +155,7 @@
   if (!data)
     return false;
 
-  delete [] additional_;
+  delete[] additional_;
   additional_ = data;
   additional_length_ = length;
   add_id_ = add_id;
@@ -177,11 +173,9 @@
       track_(0),
       cluster_pos_(0),
       block_number_(1),
-      output_block_number_(true) {
-}
+      output_block_number_(true) {}
 
-CuePoint::~CuePoint() {
-}
+CuePoint::~CuePoint() {}
 
 bool CuePoint::Write(IMkvWriter* writer) const {
   if (!writer || track_ < 1 || cluster_pos_ < 1)
@@ -191,10 +185,10 @@
   size += EbmlElementSize(kMkvCueTrack, track_);
   if (output_block_number_ && block_number_ > 1)
     size += EbmlElementSize(kMkvCueBlockNumber, block_number_);
-  const uint64 track_pos_size = EbmlMasterElementSize(kMkvCueTrackPositions,
-                                                      size) + size;
-  const uint64 payload_size = EbmlElementSize(kMkvCueTime, time_) +
-                              track_pos_size;
+  const uint64 track_pos_size =
+      EbmlMasterElementSize(kMkvCueTrackPositions, size) + size;
+  const uint64 payload_size =
+      EbmlElementSize(kMkvCueTime, time_) + track_pos_size;
 
   if (!WriteEbmlMasterElement(writer, kMkvCuePoint, payload_size))
     return false;
@@ -231,10 +225,10 @@
   size += EbmlElementSize(kMkvCueTrack, track_);
   if (output_block_number_ && block_number_ > 1)
     size += EbmlElementSize(kMkvCueBlockNumber, block_number_);
-  const uint64 track_pos_size = EbmlMasterElementSize(kMkvCueTrackPositions,
-                                                      size) + size;
-  const uint64 payload_size = EbmlElementSize(kMkvCueTime, time_) +
-                              track_pos_size;
+  const uint64 track_pos_size =
+      EbmlMasterElementSize(kMkvCueTrackPositions, size) + size;
+  const uint64 payload_size =
+      EbmlElementSize(kMkvCueTime, time_) + track_pos_size;
 
   return payload_size;
 }
@@ -252,8 +246,7 @@
     : cue_entries_capacity_(0),
       cue_entries_size_(0),
       cue_entries_(NULL),
-      output_block_number_(true) {
-}
+      output_block_number_(true) {}
 
 Cues::~Cues() {
   if (cue_entries_) {
@@ -261,7 +254,7 @@
       CuePoint* const cue = cue_entries_[i];
       delete cue;
     }
-    delete [] cue_entries_;
+    delete[] cue_entries_;
   }
 }
 
@@ -278,7 +271,7 @@
       return false;
 
     CuePoint** const cues =
-        new (std::nothrow) CuePoint*[new_capacity];  // NOLINT
+        new (std::nothrow) CuePoint* [new_capacity];  // NOLINT
     if (!cues)
       return false;
 
@@ -286,7 +279,7 @@
       cues[i] = cue_entries_[i];
     }
 
-    delete [] cue_entries_;
+    delete[] cue_entries_;
 
     cue_entries_ = cues;
     cue_entries_capacity_ = new_capacity;
@@ -402,18 +395,15 @@
       encoding_order_(0),
       encoding_scope_(1),
       encoding_type_(1),
-      enc_key_id_length_(0) {
-}
+      enc_key_id_length_(0) {}
 
-ContentEncoding::~ContentEncoding() {
-  delete [] enc_key_id_;
-}
+ContentEncoding::~ContentEncoding() { delete[] enc_key_id_; }
 
 bool ContentEncoding::SetEncryptionID(const uint8* id, uint64 length) {
   if (!id || length < 1)
     return false;
 
-  delete [] enc_key_id_;
+  delete[] enc_key_id_;
 
   enc_key_id_ =
       new (std::nothrow) uint8[static_cast<size_t>(length)];  // NOLINT
@@ -429,9 +419,8 @@
 uint64 ContentEncoding::Size() const {
   const uint64 encryption_size = EncryptionSize();
   const uint64 encoding_size = EncodingSize(0, encryption_size);
-  const uint64 encodings_size = EbmlMasterElementSize(kMkvContentEncoding,
-                                                      encoding_size) +
-                                encoding_size;
+  const uint64 encodings_size =
+      EbmlMasterElementSize(kMkvContentEncoding, encoding_size) + encoding_size;
 
   return encodings_size;
 }
@@ -439,9 +428,8 @@
 bool ContentEncoding::Write(IMkvWriter* writer) const {
   const uint64 encryption_size = EncryptionSize();
   const uint64 encoding_size = EncodingSize(0, encryption_size);
-  const uint64 size = EbmlMasterElementSize(kMkvContentEncoding,
-                                            encoding_size) +
-                      encoding_size;
+  const uint64 size =
+      EbmlMasterElementSize(kMkvContentEncoding, encoding_size) + encoding_size;
 
   const int64 payload_position = writer->Position();
   if (payload_position < 0)
@@ -460,9 +448,7 @@
     return false;
   if (!WriteEbmlElement(writer, kMkvContentEncAlgo, enc_algo_))
     return false;
-  if (!WriteEbmlElement(writer,
-                        kMkvContentEncKeyID,
-                        enc_key_id_,
+  if (!WriteEbmlElement(writer, kMkvContentEncKeyID, enc_key_id_,
                         enc_key_id_length_))
     return false;
 
@@ -486,9 +472,9 @@
   uint64 encoding_size = 0;
 
   if (encryption_size > 0) {
-    encoding_size += EbmlMasterElementSize(kMkvContentEncryption,
-                                           encryption_size) +
-                     encryption_size;
+    encoding_size +=
+        EbmlMasterElementSize(kMkvContentEncryption, encryption_size) +
+        encryption_size;
   }
   encoding_size += EbmlElementSize(kMkvContentEncodingType, encoding_type_);
   encoding_size += EbmlElementSize(kMkvContentEncodingScope, encoding_scope_);
@@ -500,9 +486,8 @@
 uint64 ContentEncoding::EncryptionSize() const {
   const uint64 aes_size = enc_aes_settings_.Size();
 
-  uint64 encryption_size = EbmlElementSize(kMkvContentEncKeyID,
-                                           enc_key_id_,
-                                           enc_key_id_length_);
+  uint64 encryption_size =
+      EbmlElementSize(kMkvContentEncKeyID, enc_key_id_, enc_key_id_length_);
   encryption_size += EbmlElementSize(kMkvContentEncAlgo, enc_algo_);
 
   return encryption_size + aes_size;
@@ -523,23 +508,23 @@
       uid_(MakeUID(seed)),
       codec_delay_(0),
       seek_pre_roll_(0),
+      default_duration_(0),
       codec_private_length_(0),
       content_encoding_entries_(NULL),
-      content_encoding_entries_size_(0) {
-}
+      content_encoding_entries_size_(0) {}
 
 Track::~Track() {
-  delete [] codec_id_;
-  delete [] codec_private_;
-  delete [] language_;
-  delete [] name_;
+  delete[] codec_id_;
+  delete[] codec_private_;
+  delete[] language_;
+  delete[] name_;
 
   if (content_encoding_entries_) {
     for (uint32 i = 0; i < content_encoding_entries_size_; ++i) {
       ContentEncoding* const encoding = content_encoding_entries_[i];
       delete encoding;
     }
-    delete [] content_encoding_entries_;
+    delete[] content_encoding_entries_;
   }
 }
 
@@ -547,14 +532,14 @@
   const uint32 count = content_encoding_entries_size_ + 1;
 
   ContentEncoding** const content_encoding_entries =
-      new (std::nothrow) ContentEncoding*[count];  // NOLINT
+      new (std::nothrow) ContentEncoding* [count];  // NOLINT
   if (!content_encoding_entries)
     return false;
 
   ContentEncoding* const content_encoding =
       new (std::nothrow) ContentEncoding();  // NOLINT
   if (!content_encoding) {
-    delete [] content_encoding_entries;
+    delete[] content_encoding_entries;
     return false;
   }
 
@@ -562,7 +547,7 @@
     content_encoding_entries[i] = content_encoding_entries_[i];
   }
 
-  delete [] content_encoding_entries_;
+  delete[] content_encoding_entries_;
 
   content_encoding_entries_ = content_encoding_entries;
   content_encoding_entries_[content_encoding_entries_size_] = content_encoding;
@@ -587,8 +572,7 @@
   if (codec_id_)
     size += EbmlElementSize(kMkvCodecID, codec_id_);
   if (codec_private_)
-    size += EbmlElementSize(kMkvCodecPrivate,
-                            codec_private_,
+    size += EbmlElementSize(kMkvCodecPrivate, codec_private_,
                             codec_private_length_);
   if (language_)
     size += EbmlElementSize(kMkvLanguage, language_);
@@ -600,6 +584,8 @@
     size += EbmlElementSize(kMkvCodecDelay, codec_delay_);
   if (seek_pre_roll_)
     size += EbmlElementSize(kMkvSeekPreRoll, seek_pre_roll_);
+  if (default_duration_)
+    size += EbmlElementSize(kMkvDefaultDuration, default_duration_);
 
   if (content_encoding_entries_size_ > 0) {
     uint64 content_encodings_size = 0;
@@ -608,9 +594,9 @@
       content_encodings_size += encoding->Size();
     }
 
-    size += EbmlMasterElementSize(kMkvContentEncodings,
-                                  content_encodings_size) +
-            content_encodings_size;
+    size +=
+        EbmlMasterElementSize(kMkvContentEncodings, content_encodings_size) +
+        content_encodings_size;
   }
 
   return size;
@@ -633,14 +619,17 @@
   if (!WriteEbmlMasterElement(writer, kMkvTrackEntry, payload_size))
     return false;
 
+  // |type_| has to be specified before the Track can be written.
+  if (!type_)
+    return false;
+
   uint64 size = EbmlElementSize(kMkvTrackNumber, number_);
   size += EbmlElementSize(kMkvTrackUID, uid_);
   size += EbmlElementSize(kMkvTrackType, type_);
   if (codec_id_)
     size += EbmlElementSize(kMkvCodecID, codec_id_);
   if (codec_private_)
-    size += EbmlElementSize(kMkvCodecPrivate,
-                            codec_private_,
+    size += EbmlElementSize(kMkvCodecPrivate, codec_private_,
                             codec_private_length_);
   if (language_)
     size += EbmlElementSize(kMkvLanguage, language_);
@@ -652,7 +641,8 @@
     size += EbmlElementSize(kMkvCodecDelay, codec_delay_);
   if (seek_pre_roll_)
     size += EbmlElementSize(kMkvSeekPreRoll, seek_pre_roll_);
-
+  if (default_duration_)
+    size += EbmlElementSize(kMkvDefaultDuration, default_duration_);
 
   const int64 payload_position = writer->Position();
   if (payload_position < 0)
@@ -665,8 +655,7 @@
   if (!WriteEbmlElement(writer, kMkvTrackType, type_))
     return false;
   if (max_block_additional_id_) {
-    if (!WriteEbmlElement(writer,
-                          kMkvMaxBlockAdditionID,
+    if (!WriteEbmlElement(writer, kMkvMaxBlockAdditionID,
                           max_block_additional_id_)) {
       return false;
     }
@@ -679,14 +668,16 @@
     if (!WriteEbmlElement(writer, kMkvSeekPreRoll, seek_pre_roll_))
       return false;
   }
+  if (default_duration_) {
+    if (!WriteEbmlElement(writer, kMkvDefaultDuration, default_duration_))
+      return false;
+  }
   if (codec_id_) {
     if (!WriteEbmlElement(writer, kMkvCodecID, codec_id_))
       return false;
   }
   if (codec_private_) {
-    if (!WriteEbmlElement(writer,
-                          kMkvCodecPrivate,
-                          codec_private_,
+    if (!WriteEbmlElement(writer, kMkvCodecPrivate, codec_private_,
                           codec_private_length_))
       return false;
   }
@@ -711,8 +702,7 @@
       content_encodings_size += encoding->Size();
     }
 
-    if (!WriteEbmlMasterElement(writer,
-                                kMkvContentEncodings,
+    if (!WriteEbmlMasterElement(writer, kMkvContentEncodings,
                                 content_encodings_size))
       return false;
 
@@ -733,7 +723,7 @@
   if (!codec_private || length < 1)
     return false;
 
-  delete [] codec_private_;
+  delete[] codec_private_;
 
   codec_private_ =
       new (std::nothrow) uint8[static_cast<size_t>(length)];  // NOLINT
@@ -748,7 +738,7 @@
 
 void Track::set_codec_id(const char* codec_id) {
   if (codec_id) {
-    delete [] codec_id_;
+    delete[] codec_id_;
 
     const size_t length = strlen(codec_id) + 1;
     codec_id_ = new (std::nothrow) char[length];  // NOLINT
@@ -765,7 +755,7 @@
 // TODO(fgalligan): Vet the language parameter.
 void Track::set_language(const char* language) {
   if (language) {
-    delete [] language_;
+    delete[] language_;
 
     const size_t length = strlen(language) + 1;
     language_ = new (std::nothrow) char[length];  // NOLINT
@@ -781,7 +771,7 @@
 
 void Track::set_name(const char* name) {
   if (name) {
-    delete [] name_;
+    delete[] name_;
 
     const size_t length = strlen(name) + 1;
     name_ = new (std::nothrow) char[length];  // NOLINT
@@ -807,15 +797,12 @@
       height_(0),
       stereo_mode_(0),
       alpha_mode_(0),
-      width_(0) {
-}
+      width_(0) {}
 
-VideoTrack::~VideoTrack() {
-}
+VideoTrack::~VideoTrack() {}
 
 bool VideoTrack::SetStereoMode(uint64 stereo_mode) {
-  if (stereo_mode != kMono &&
-      stereo_mode != kSideBySideLeftIsFirst &&
+  if (stereo_mode != kMono && stereo_mode != kSideBySideLeftIsFirst &&
       stereo_mode != kTopBottomRightIsFirst &&
       stereo_mode != kTopBottomLeftIsFirst &&
       stereo_mode != kSideBySideRightIsFirst)
@@ -826,8 +813,7 @@
 }
 
 bool VideoTrack::SetAlphaMode(uint64 alpha_mode) {
-  if (alpha_mode != kNoAlpha &&
-      alpha_mode != kAlpha)
+  if (alpha_mode != kNoAlpha && alpha_mode != kAlpha)
     return false;
 
   alpha_mode_ = alpha_mode;
@@ -873,8 +859,7 @@
     if (!WriteEbmlElement(writer, kMkvAlphaMode, alpha_mode_))
       return false;
   if (frame_rate_ > 0.0)
-    if (!WriteEbmlElement(writer,
-                          kMkvFrameRate,
+    if (!WriteEbmlElement(writer, kMkvFrameRate,
                           static_cast<float>(frame_rate_)))
       return false;
 
@@ -908,20 +893,15 @@
 // AudioTrack Class
 
 AudioTrack::AudioTrack(unsigned int* seed)
-    : Track(seed),
-      bit_depth_(0),
-      channels_(1),
-      sample_rate_(0.0) {
-}
+    : Track(seed), bit_depth_(0), channels_(1), sample_rate_(0.0) {}
 
-AudioTrack::~AudioTrack() {
-}
+AudioTrack::~AudioTrack() {}
 
 uint64 AudioTrack::PayloadSize() const {
   const uint64 parent_size = Track::PayloadSize();
 
-  uint64 size = EbmlElementSize(kMkvSamplingFrequency,
-                                static_cast<float>(sample_rate_));
+  uint64 size =
+      EbmlElementSize(kMkvSamplingFrequency, static_cast<float>(sample_rate_));
   size += EbmlElementSize(kMkvChannels, channels_);
   if (bit_depth_ > 0)
     size += EbmlElementSize(kMkvBitDepth, bit_depth_);
@@ -935,8 +915,8 @@
     return false;
 
   // Calculate AudioSettings size.
-  uint64 size = EbmlElementSize(kMkvSamplingFrequency,
-                                static_cast<float>(sample_rate_));
+  uint64 size =
+      EbmlElementSize(kMkvSamplingFrequency, static_cast<float>(sample_rate_));
   size += EbmlElementSize(kMkvChannels, channels_);
   if (bit_depth_ > 0)
     size += EbmlElementSize(kMkvBitDepth, bit_depth_);
@@ -948,8 +928,7 @@
   if (payload_position < 0)
     return false;
 
-  if (!WriteEbmlElement(writer,
-                        kMkvSamplingFrequency,
+  if (!WriteEbmlElement(writer, kMkvSamplingFrequency,
                         static_cast<float>(sample_rate_)))
     return false;
   if (!WriteEbmlElement(writer, kMkvChannels, channels_))
@@ -975,11 +954,7 @@
 const char Tracks::kVp8CodecId[] = "V_VP8";
 const char Tracks::kVp9CodecId[] = "V_VP9";
 
-
-Tracks::Tracks()
-    : track_entries_(NULL),
-      track_entries_size_(0) {
-}
+Tracks::Tracks() : track_entries_(NULL), track_entries_size_(0) {}
 
 Tracks::~Tracks() {
   if (track_entries_) {
@@ -987,7 +962,7 @@
       Track* const track = track_entries_[i];
       delete track;
     }
-    delete [] track_entries_;
+    delete[] track_entries_;
   }
 }
 
@@ -1015,7 +990,7 @@
 
   const uint32 count = track_entries_size_ + 1;
 
-  Track** const track_entries = new (std::nothrow) Track*[count];  // NOLINT
+  Track** const track_entries = new (std::nothrow) Track* [count];  // NOLINT
   if (!track_entries)
     return false;
 
@@ -1023,7 +998,7 @@
     track_entries[i] = track_entries_[i];
   }
 
-  delete [] track_entries_;
+  delete[] track_entries_;
 
   // Find the lowest availible track number > 0.
   if (track_num == 0) {
@@ -1125,21 +1100,16 @@
 //
 // Chapter Class
 
-bool Chapter::set_id(const char* id) {
-  return StrCpy(id, &id_);
-}
+bool Chapter::set_id(const char* id) { return StrCpy(id, &id_); }
 
-void Chapter::set_time(const Segment& segment,
-                       uint64 start_ns,
-                       uint64 end_ns) {
+void Chapter::set_time(const Segment& segment, uint64 start_ns, uint64 end_ns) {
   const SegmentInfo* const info = segment.GetSegmentInfo();
   const uint64 timecode_scale = info->timecode_scale();
   start_timecode_ = start_ns / timecode_scale;
   end_timecode_ = end_ns / timecode_scale;
 }
 
-bool Chapter::add_string(const char* title,
-                         const char* language,
+bool Chapter::add_string(const char* title, const char* language,
                          const char* country) {
   if (!ExpandDisplaysArray())
     return false;
@@ -1171,8 +1141,7 @@
   // active on the array.
 }
 
-Chapter::~Chapter() {
-}
+Chapter::~Chapter() {}
 
 void Chapter::Init(unsigned int* seed) {
   id_ = NULL;
@@ -1200,7 +1169,7 @@
     d.Clear();
   }
 
-  delete [] displays_;
+  delete[] displays_;
   displays_ = NULL;
 
   displays_size_ = 0;
@@ -1220,7 +1189,7 @@
     displays[idx] = displays_[idx];  // shallow copy
   }
 
-  delete [] displays_;
+  delete[] displays_;
 
   displays_ = displays;
   displays_size_ = size;
@@ -1229,11 +1198,10 @@
 }
 
 uint64 Chapter::WriteAtom(IMkvWriter* writer) const {
-  uint64 payload_size =
-      EbmlElementSize(kMkvChapterStringUID, id_) +
-      EbmlElementSize(kMkvChapterUID, uid_) +
-      EbmlElementSize(kMkvChapterTimeStart, start_timecode_) +
-      EbmlElementSize(kMkvChapterTimeEnd, end_timecode_);
+  uint64 payload_size = EbmlElementSize(kMkvChapterStringUID, id_) +
+                        EbmlElementSize(kMkvChapterUID, uid_) +
+                        EbmlElementSize(kMkvChapterTimeStart, start_timecode_) +
+                        EbmlElementSize(kMkvChapterTimeEnd, end_timecode_);
 
   for (int idx = 0; idx < displays_count_; ++idx) {
     const Display& d = displays_[idx];
@@ -1241,8 +1209,7 @@
   }
 
   const uint64 atom_size =
-      EbmlMasterElementSize(kMkvChapterAtom, payload_size) +
-      payload_size;
+      EbmlMasterElementSize(kMkvChapterAtom, payload_size) + payload_size;
 
   if (writer == NULL)
     return atom_size;
@@ -1313,8 +1280,7 @@
     payload_size += EbmlElementSize(kMkvChapCountry, country_);
 
   const uint64 display_size =
-      EbmlMasterElementSize(kMkvChapterDisplay, payload_size) +
-      payload_size;
+      EbmlMasterElementSize(kMkvChapterDisplay, payload_size) + payload_size;
 
   if (writer == NULL)
     return display_size;
@@ -1349,11 +1315,7 @@
 //
 // Chapters Class
 
-Chapters::Chapters()
-    : chapters_size_(0),
-      chapters_count_(0),
-      chapters_(NULL) {
-}
+Chapters::Chapters() : chapters_size_(0), chapters_count_(0), chapters_(NULL) {}
 
 Chapters::~Chapters() {
   while (chapters_count_ > 0) {
@@ -1361,13 +1323,11 @@
     chapter.Clear();
   }
 
-  delete [] chapters_;
+  delete[] chapters_;
   chapters_ = NULL;
 }
 
-int Chapters::Count() const {
-  return chapters_count_;
-}
+int Chapters::Count() const { return chapters_count_; }
 
 Chapter* Chapters::AddChapter(unsigned int* seed) {
   if (!ExpandChaptersArray())
@@ -1417,7 +1377,7 @@
     src.ShallowCopy(dst);
   }
 
-  delete [] chapters_;
+  delete[] chapters_;
 
   chapters_ = chapters;
   chapters_size_ = size;
@@ -1434,8 +1394,7 @@
   }
 
   const uint64 edition_size =
-      EbmlMasterElementSize(kMkvEditionEntry, payload_size) +
-      payload_size;
+      EbmlMasterElementSize(kMkvEditionEntry, payload_size) + payload_size;
 
   if (writer == NULL)  // return size only
     return edition_size;
@@ -1473,11 +1432,9 @@
       position_for_cues_(cues_pos),
       size_position_(-1),
       timecode_(timecode),
-      writer_(NULL) {
-}
+      writer_(NULL) {}
 
-Cluster::~Cluster() {
-}
+Cluster::~Cluster() {}
 
 bool Cluster::Init(IMkvWriter* ptr_writer) {
   if (!ptr_writer) {
@@ -1487,69 +1444,39 @@
   return true;
 }
 
-bool Cluster::AddFrame(const uint8* frame,
-                       uint64 length,
-                       uint64 track_number,
-                       uint64 abs_timecode,
-                       bool is_key) {
-  return DoWriteBlock(frame,
-                      length,
-                      track_number,
-                      abs_timecode,
-                      is_key ? 1 : 0,
+bool Cluster::AddFrame(const uint8* frame, uint64 length, uint64 track_number,
+                       uint64 abs_timecode, bool is_key) {
+  return DoWriteBlock(frame, length, track_number, abs_timecode, is_key ? 1 : 0,
                       &WriteSimpleBlock);
 }
 
-bool Cluster::AddFrameWithAdditional(const uint8* frame,
-                                     uint64 length,
+bool Cluster::AddFrameWithAdditional(const uint8* frame, uint64 length,
                                      const uint8* additional,
-                                     uint64 additional_length,
-                                     uint64 add_id,
-                                     uint64 track_number,
-                                     uint64 abs_timecode,
+                                     uint64 additional_length, uint64 add_id,
+                                     uint64 track_number, uint64 abs_timecode,
                                      bool is_key) {
-  return DoWriteBlockWithAdditional(frame,
-                                    length,
-                                    additional,
-                                    additional_length,
-                                    add_id,
-                                    track_number,
-                                    abs_timecode,
-                                    is_key ? 1 : 0,
-                                    &WriteBlockWithAdditional);
+  return DoWriteBlockWithAdditional(
+      frame, length, additional, additional_length, add_id, track_number,
+      abs_timecode, is_key ? 1 : 0, &WriteBlockWithAdditional);
 }
 
-bool Cluster::AddFrameWithDiscardPadding(const uint8* frame,
-                                         uint64 length,
+bool Cluster::AddFrameWithDiscardPadding(const uint8* frame, uint64 length,
                                          int64 discard_padding,
                                          uint64 track_number,
-                                         uint64 abs_timecode,
-                                         bool is_key) {
-  return DoWriteBlockWithDiscardPadding(frame,
-                                        length,
-                                        discard_padding,
-                                        track_number,
-                                        abs_timecode,
-                                        is_key ? 1 : 0,
-                                        &WriteBlockWithDiscardPadding);
+                                         uint64 abs_timecode, bool is_key) {
+  return DoWriteBlockWithDiscardPadding(
+      frame, length, discard_padding, track_number, abs_timecode,
+      is_key ? 1 : 0, &WriteBlockWithDiscardPadding);
 }
 
-bool Cluster::AddMetadata(const uint8* frame,
-                          uint64 length,
-                          uint64 track_number,
-                          uint64 abs_timecode,
+bool Cluster::AddMetadata(const uint8* frame, uint64 length,
+                          uint64 track_number, uint64 abs_timecode,
                           uint64 duration_timecode) {
-  return DoWriteBlock(frame,
-                      length,
-                      track_number,
-                      abs_timecode,
-                      duration_timecode,
-                      &WriteMetadataBlock);
+  return DoWriteBlock(frame, length, track_number, abs_timecode,
+                      duration_timecode, &WriteMetadataBlock);
 }
 
-void Cluster::AddPayloadSize(uint64 size) {
-  payload_size_ += size;
-}
+void Cluster::AddPayloadSize(uint64 size) { payload_size_ += size; }
 
 bool Cluster::Finalize() {
   if (!writer_ || finalized_ || size_position_ == -1)
@@ -1575,8 +1502,7 @@
 
 uint64 Cluster::Size() const {
   const uint64 element_size =
-      EbmlMasterElementSize(kMkvCluster,
-                            0xFFFFFFFFFFFFFFFFULL) + payload_size_;
+      EbmlMasterElementSize(kMkvCluster, 0xFFFFFFFFFFFFFFFFULL) + payload_size_;
   return element_size;
 }
 
@@ -1616,13 +1542,9 @@
   return rel_timecode;
 }
 
-bool Cluster::DoWriteBlock(
-    const uint8* frame,
-    uint64 length,
-    uint64 track_number,
-    uint64 abs_timecode,
-    uint64 generic_arg,
-    WriteBlock write_block) {
+bool Cluster::DoWriteBlock(const uint8* frame, uint64 length,
+                           uint64 track_number, uint64 abs_timecode,
+                           uint64 generic_arg, WriteBlock write_block) {
   if (frame == NULL || length == 0)
     return false;
 
@@ -1636,12 +1558,8 @@
   if (!PreWriteBlock(write_block))
     return false;
 
-  const uint64 element_size = (*write_block)(writer_,
-                                             frame,
-                                             length,
-                                             track_number,
-                                             rel_timecode,
-                                             generic_arg);
+  const uint64 element_size = (*write_block)(
+      writer_, frame, length, track_number, rel_timecode, generic_arg);
   if (element_size == 0)
     return false;
 
@@ -1650,17 +1568,11 @@
 }
 
 bool Cluster::DoWriteBlockWithAdditional(
-    const uint8* frame,
-    uint64 length,
-    const uint8* additional,
-    uint64 additional_length,
-    uint64 add_id,
-    uint64 track_number,
-    uint64 abs_timecode,
-    uint64 generic_arg,
-    WriteBlockAdditional write_block) {
-  if (frame == NULL || length == 0 ||
-      additional == NULL || additional_length == 0)
+    const uint8* frame, uint64 length, const uint8* additional,
+    uint64 additional_length, uint64 add_id, uint64 track_number,
+    uint64 abs_timecode, uint64 generic_arg, WriteBlockAdditional write_block) {
+  if (frame == NULL || length == 0 || additional == NULL ||
+      additional_length == 0)
     return false;
 
   if (!IsValidTrackNumber(track_number))
@@ -1673,15 +1585,9 @@
   if (!PreWriteBlock(write_block))
     return false;
 
-  const uint64 element_size = (*write_block)(writer_,
-                                             frame,
-                                             length,
-                                             additional,
-                                             additional_length,
-                                             add_id,
-                                             track_number,
-                                             rel_timecode,
-                                             generic_arg);
+  const uint64 element_size =
+      (*write_block)(writer_, frame, length, additional, additional_length,
+                     add_id, track_number, rel_timecode, generic_arg);
   if (element_size == 0)
     return false;
 
@@ -1690,12 +1596,8 @@
 }
 
 bool Cluster::DoWriteBlockWithDiscardPadding(
-    const uint8* frame,
-    uint64 length,
-    int64 discard_padding,
-    uint64 track_number,
-    uint64 abs_timecode,
-    uint64 generic_arg,
+    const uint8* frame, uint64 length, int64 discard_padding,
+    uint64 track_number, uint64 abs_timecode, uint64 generic_arg,
     WriteBlockDiscardPadding write_block) {
   if (frame == NULL || length == 0 || discard_padding <= 0)
     return false;
@@ -1710,13 +1612,9 @@
   if (!PreWriteBlock(write_block))
     return false;
 
-  const uint64 element_size = (*write_block)(writer_,
-                                             frame,
-                                             length,
-                                             discard_padding,
-                                             track_number,
-                                             rel_timecode,
-                                             generic_arg);
+  const uint64 element_size =
+      (*write_block)(writer_, frame, length, discard_padding, track_number,
+                     rel_timecode, generic_arg);
   if (element_size == 0)
     return false;
 
@@ -1758,8 +1656,7 @@
   }
 }
 
-SeekHead::~SeekHead() {
-}
+SeekHead::~SeekHead() {}
 
 bool SeekHead::Finalize(IMkvWriter* writer) const {
   if (writer->Seekable()) {
@@ -1771,13 +1668,12 @@
 
     for (int32 i = 0; i < kSeekEntryCount; ++i) {
       if (seek_entry_id_[i] != 0) {
-        entry_size[i] = EbmlElementSize(
-            kMkvSeekID,
-            static_cast<uint64>(seek_entry_id_[i]));
+        entry_size[i] =
+            EbmlElementSize(kMkvSeekID, static_cast<uint64>(seek_entry_id_[i]));
         entry_size[i] += EbmlElementSize(kMkvSeekPosition, seek_entry_pos_[i]);
 
-        payload_size += EbmlMasterElementSize(kMkvSeek, entry_size[i]) +
-                        entry_size[i];
+        payload_size +=
+            EbmlMasterElementSize(kMkvSeek, entry_size[i]) + entry_size[i];
       }
     }
 
@@ -1797,8 +1693,7 @@
         if (!WriteEbmlMasterElement(writer, kMkvSeek, entry_size[i]))
           return false;
 
-        if (!WriteEbmlElement(writer,
-                              kMkvSeekID,
+        if (!WriteEbmlElement(writer, kMkvSeekID,
                               static_cast<uint64>(seek_entry_id_[i])))
           return false;
 
@@ -1809,8 +1704,8 @@
 
     const uint64 total_entry_size = kSeekEntryCount * MaxEntrySize();
     const uint64 total_size =
-        EbmlMasterElementSize(kMkvSeekHead,
-                              total_entry_size) + total_entry_size;
+        EbmlMasterElementSize(kMkvSeekHead, total_entry_size) +
+        total_entry_size;
     const int64 size_left = total_size - (writer->Position() - start_pos_);
 
     const uint64 bytes_written = WriteVoidElement(writer, size_left);
@@ -1888,12 +1783,12 @@
       muxing_app_(NULL),
       timecode_scale_(1000000ULL),
       writing_app_(NULL),
-      duration_pos_(-1) {
-}
+      date_utc_(LLONG_MIN),
+      duration_pos_(-1) {}
 
 SegmentInfo::~SegmentInfo() {
-  delete [] muxing_app_;
-  delete [] writing_app_;
+  delete[] muxing_app_;
+  delete[] writing_app_;
 }
 
 bool SegmentInfo::Init() {
@@ -1904,26 +1799,16 @@
   GetVersion(&major, &minor, &build, &revision);
   char temp[256];
 #ifdef _MSC_VER
-  sprintf_s(temp,
-            sizeof(temp)/sizeof(temp[0]),
-            "libwebm-%d.%d.%d.%d",
-            major,
-            minor,
-            build,
-            revision);
+  sprintf_s(temp, sizeof(temp) / sizeof(temp[0]), "libwebm-%d.%d.%d.%d", major,
+            minor, build, revision);
 #else
-  snprintf(temp,
-           sizeof(temp)/sizeof(temp[0]),
-           "libwebm-%d.%d.%d.%d",
-           major,
-           minor,
-           build,
-           revision);
+  snprintf(temp, sizeof(temp) / sizeof(temp[0]), "libwebm-%d.%d.%d.%d", major,
+           minor, build, revision);
 #endif
 
   const size_t app_len = strlen(temp) + 1;
 
-  delete [] muxing_app_;
+  delete[] muxing_app_;
 
   muxing_app_ = new (std::nothrow) char[app_len];  // NOLINT
   if (!muxing_app_)
@@ -1955,8 +1840,7 @@
       if (writer->Position(duration_pos_))
         return false;
 
-      if (!WriteEbmlElement(writer,
-                            kMkvDuration,
+      if (!WriteEbmlElement(writer, kMkvDuration,
                             static_cast<float>(duration_)))
         return false;
 
@@ -1975,6 +1859,8 @@
   uint64 size = EbmlElementSize(kMkvTimecodeScale, timecode_scale_);
   if (duration_ > 0.0)
     size += EbmlElementSize(kMkvDuration, static_cast<float>(duration_));
+  if (date_utc_ != LLONG_MIN)
+    size += EbmlDateElementSize(kMkvDateUTC, date_utc_);
   size += EbmlElementSize(kMkvMuxingApp, muxing_app_);
   size += EbmlElementSize(kMkvWritingApp, writing_app_);
 
@@ -1996,6 +1882,9 @@
       return false;
   }
 
+  if (date_utc_ != LLONG_MIN)
+    WriteEbmlDateElement(writer, kMkvDateUTC, date_utc_);
+
   if (!WriteEbmlElement(writer, kMkvMuxingApp, muxing_app_))
     return false;
   if (!WriteEbmlElement(writer, kMkvWritingApp, writing_app_))
@@ -2022,7 +1911,7 @@
     strcpy(temp_str, app);
 #endif
 
-    delete [] muxing_app_;
+    delete[] muxing_app_;
     muxing_app_ = temp_str;
   }
 }
@@ -2040,7 +1929,7 @@
     strcpy(temp_str, app);
 #endif
 
-    delete [] writing_app_;
+    delete[] writing_app_;
     writing_app_ = temp_str;
   }
 }
@@ -2093,7 +1982,7 @@
       Cluster* const cluster = cluster_list_[i];
       delete cluster;
     }
-    delete [] cluster_list_;
+    delete[] cluster_list_;
   }
 
   if (frames_) {
@@ -2101,11 +1990,11 @@
       Frame* const frame = frames_[i];
       delete frame;
     }
-    delete [] frames_;
+    delete[] frames_;
   }
 
-  delete [] chunk_name_;
-  delete [] chunking_base_name_;
+  delete[] chunk_name_;
+  delete[] chunking_base_name_;
 
   if (chunk_writer_cluster_) {
     chunk_writer_cluster_->Close();
@@ -2121,8 +2010,7 @@
   }
 }
 
-void Segment::MoveCuesBeforeClustersHelper(uint64 diff,
-                                           int32 index,
+void Segment::MoveCuesBeforeClustersHelper(uint64 diff, int32 index,
                                            uint64* cues_size) {
   const uint64 old_cues_size = *cues_size;
   CuePoint* const cue_point = cues_.GetCueByIndex(index);
@@ -2139,9 +2027,9 @@
   //    Let d = a + b + c. Now d is the new size of the Cues element which is
   //                       passed on to the next recursive call.
   const uint64 cue_point_size_diff = cue_point->Size() - old_cue_point_size;
-  const uint64 cue_size_diff = GetCodedUIntSize(*cues_size +
-                                                cue_point_size_diff) -
-                               GetCodedUIntSize(*cues_size);
+  const uint64 cue_size_diff =
+      GetCodedUIntSize(*cues_size + cue_point_size_diff) -
+      GetCodedUIntSize(*cues_size);
   *cues_size += cue_point_size_diff + cue_size_diff;
   diff = *cues_size - old_cues_size;
   if (diff > 0) {
@@ -2187,8 +2075,8 @@
                                             IMkvWriter* writer) {
   if (!writer->Seekable() || chunking_)
     return false;
-  const int64 cluster_offset = cluster_list_[0]->size_position() -
-                               GetUIntSize(kMkvCluster);
+  const int64 cluster_offset =
+      cluster_list_[0]->size_position() - GetUIntSize(kMkvCluster);
 
   // Copy the headers.
   if (!ChunkedCopy(reader, writer, 0, cluster_offset))
@@ -2214,8 +2102,7 @@
   const int64 pos = writer->Position();
   const int64 segment_size = writer->Position() - payload_pos_;
   if (writer->Position(size_position_) ||
-      WriteUIntSize(writer, segment_size, 8) ||
-      writer->Position(pos))
+      WriteUIntSize(writer, segment_size, 8) || writer->Position(pos))
     return false;
   return true;
 }
@@ -2227,7 +2114,7 @@
   if (mode_ == kFile) {
     if (cluster_list_size_ > 0) {
       // Update last cluster's size
-      Cluster* const old_cluster = cluster_list_[cluster_list_size_-1];
+      Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1];
 
       if (!old_cluster || !old_cluster->Finalize())
         return false;
@@ -2258,7 +2145,7 @@
         return false;
 
       const bool cues_open = chunk_writer_cues_->Open(name);
-      delete [] name;
+      delete[] name;
       if (!cues_open)
         return false;
     }
@@ -2321,9 +2208,7 @@
   return track;
 }
 
-Chapter* Segment::AddChapter() {
-  return chapters_.AddChapter(&seed_);
-}
+Chapter* Segment::AddChapter() { return chapters_.AddChapter(&seed_); }
 
 uint64 Segment::AddVideoTrack(int32 width, int32 height, int32 number) {
   VideoTrack* const track = new (std::nothrow) VideoTrack(&seed_);  // NOLINT
@@ -2342,10 +2227,10 @@
 }
 
 bool Segment::AddCuePoint(uint64 timestamp, uint64 track) {
-  if (cluster_list_size_  < 1)
+  if (cluster_list_size_ < 1)
     return false;
 
-  const Cluster* const cluster = cluster_list_[cluster_list_size_-1];
+  const Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
   if (!cluster)
     return false;
 
@@ -2364,9 +2249,7 @@
   return true;
 }
 
-uint64 Segment::AddAudioTrack(int32 sample_rate,
-                              int32 channels,
-                              int32 number) {
+uint64 Segment::AddAudioTrack(int32 sample_rate, int32 channels, int32 number) {
   AudioTrack* const track = new (std::nothrow) AudioTrack(&seed_);  // NOLINT
   if (!track)
     return 0;
@@ -2381,11 +2264,8 @@
   return track->number();
 }
 
-bool Segment::AddFrame(const uint8* frame,
-                       uint64 length,
-                       uint64 track_number,
-                       uint64 timestamp,
-                       bool is_key) {
+bool Segment::AddFrame(const uint8* frame, uint64 length, uint64 track_number,
+                       uint64 timestamp, bool is_key) {
   if (!frame)
     return false;
 
@@ -2426,11 +2306,7 @@
   const uint64 timecode_scale = segment_info_.timecode_scale();
   const uint64 abs_timecode = timestamp / timecode_scale;
 
-  if (!cluster->AddFrame(frame,
-                         length,
-                         track_number,
-                         abs_timecode,
-                         is_key))
+  if (!cluster->AddFrame(frame, length, track_number, abs_timecode, is_key))
     return false;
 
   if (new_cuepoint_ && cues_track_ == track_number) {
@@ -2444,13 +2320,10 @@
   return true;
 }
 
-bool Segment::AddFrameWithAdditional(const uint8* frame,
-                                     uint64 length,
+bool Segment::AddFrameWithAdditional(const uint8* frame, uint64 length,
                                      const uint8* additional,
-                                     uint64 additional_length,
-                                     uint64 add_id,
-                                     uint64 track_number,
-                                     uint64 timestamp,
+                                     uint64 additional_length, uint64 add_id,
+                                     uint64 track_number, uint64 timestamp,
                                      bool is_key) {
   if (frame == NULL || additional == NULL)
     return false;
@@ -2492,14 +2365,9 @@
   const uint64 timecode_scale = segment_info_.timecode_scale();
   const uint64 abs_timecode = timestamp / timecode_scale;
 
-  if (!cluster->AddFrameWithAdditional(frame,
-                                       length,
-                                       additional,
-                                       additional_length,
-                                       add_id,
-                                       track_number,
-                                       abs_timecode,
-                                       is_key))
+  if (!cluster->AddFrameWithAdditional(frame, length, additional,
+                                       additional_length, add_id, track_number,
+                                       abs_timecode, is_key))
     return false;
 
   if (new_cuepoint_ && cues_track_ == track_number) {
@@ -2513,11 +2381,9 @@
   return true;
 }
 
-bool Segment::AddFrameWithDiscardPadding(const uint8* frame,
-                                         uint64 length,
+bool Segment::AddFrameWithDiscardPadding(const uint8* frame, uint64 length,
                                          int64 discard_padding,
-                                         uint64 track_number,
-                                         uint64 timestamp,
+                                         uint64 track_number, uint64 timestamp,
                                          bool is_key) {
   if (frame == NULL || discard_padding <= 0)
     return false;
@@ -2560,11 +2426,8 @@
   const uint64 timecode_scale = segment_info_.timecode_scale();
   const uint64 abs_timecode = timestamp / timecode_scale;
 
-  if (!cluster->AddFrameWithDiscardPadding(frame, length,
-                                           discard_padding,
-                                           track_number,
-                                           abs_timecode,
-                                           is_key)) {
+  if (!cluster->AddFrameWithDiscardPadding(
+          frame, length, discard_padding, track_number, abs_timecode, is_key)) {
     return false;
   }
 
@@ -2579,10 +2442,8 @@
   return true;
 }
 
-bool Segment::AddMetadata(const uint8* frame,
-                          uint64 length,
-                          uint64 track_number,
-                          uint64 timestamp_ns,
+bool Segment::AddMetadata(const uint8* frame, uint64 length,
+                          uint64 track_number, uint64 timestamp_ns,
                           uint64 duration_ns) {
   if (!frame)
     return false;
@@ -2600,7 +2461,7 @@
   if (cluster_list_size_ < 1)
     return false;
 
-  Cluster* const cluster = cluster_list_[cluster_list_size_-1];
+  Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
 
   if (!cluster)
     return false;
@@ -2609,10 +2470,7 @@
   const uint64 abs_timecode = timestamp_ns / timecode_scale;
   const uint64 duration_timecode = duration_ns / timecode_scale;
 
-  if (!cluster->AddMetadata(frame,
-                            length,
-                            track_number,
-                            abs_timecode,
+  if (!cluster->AddMetadata(frame, length, track_number, abs_timecode,
                             duration_timecode))
     return false;
 
@@ -2625,40 +2483,25 @@
 bool Segment::AddGenericFrame(const Frame* frame) {
   last_block_duration_ = frame->duration();
   if (!tracks_.TrackIsAudio(frame->track_number()) &&
-      !tracks_.TrackIsVideo(frame->track_number()) &&
-      frame->duration() > 0) {
-    return AddMetadata(frame->frame(),
-                       frame->length(),
-                       frame->track_number(),
-                       frame->timestamp(),
-                       frame->duration());
+      !tracks_.TrackIsVideo(frame->track_number()) && frame->duration() > 0) {
+    return AddMetadata(frame->frame(), frame->length(), frame->track_number(),
+                       frame->timestamp(), frame->duration());
   } else if (frame->additional() && frame->additional_length() > 0) {
-    return AddFrameWithAdditional(frame->frame(),
-                                  frame->length(),
-                                  frame->additional(),
-                                  frame->additional_length(),
-                                  frame->add_id(),
-                                  frame->track_number(),
-                                  frame->timestamp(),
-                                  frame->is_key());
+    return AddFrameWithAdditional(
+        frame->frame(), frame->length(), frame->additional(),
+        frame->additional_length(), frame->add_id(), frame->track_number(),
+        frame->timestamp(), frame->is_key());
   } else if (frame->discard_padding() > 0) {
-    return AddFrameWithDiscardPadding(frame->frame(), frame->length(),
-                                      frame->discard_padding(),
-                                      frame->track_number(),
-                                      frame->timestamp(),
-                                      frame->is_key());
+    return AddFrameWithDiscardPadding(
+        frame->frame(), frame->length(), frame->discard_padding(),
+        frame->track_number(), frame->timestamp(), frame->is_key());
   } else {
-    return AddFrame(frame->frame(),
-                    frame->length(),
-                    frame->track_number(),
-                    frame->timestamp(),
-                    frame->is_key());
+    return AddFrame(frame->frame(), frame->length(), frame->track_number(),
+                    frame->timestamp(), frame->is_key());
   }
 }
 
-void Segment::OutputCues(bool output_cues) {
-  output_cues_ = output_cues;
-}
+void Segment::OutputCues(bool output_cues) { output_cues_ = output_cues; }
 
 bool Segment::SetChunking(bool chunking, const char* filename) {
   if (chunk_count_ > 0)
@@ -2683,7 +2526,7 @@
     strcpy(temp, filename);
 #endif
 
-    delete [] chunking_base_name_;
+    delete[] chunking_base_name_;
     chunking_base_name_ = temp;
 
     if (!UpdateChunkName("chk", &chunk_name_))
@@ -2723,7 +2566,7 @@
     strcat(header, ".hdr");
 #endif
     if (!chunk_writer_header_->Open(header)) {
-      delete [] header;
+      delete[] header;
       return false;
     }
 
@@ -2731,7 +2574,7 @@
     writer_cues_ = chunk_writer_cues_;
     writer_header_ = chunk_writer_header_;
 
-    delete [] header;
+    delete[] header;
   }
 
   chunking_ = chunking;
@@ -2748,9 +2591,7 @@
   return true;
 }
 
-void Segment::ForceNewClusterOnNextFrame() {
-  force_new_cluster_ = true;
-}
+void Segment::ForceNewClusterOnNextFrame() { force_new_cluster_ = true; }
 
 Track* Segment::GetTrackByNumber(uint64 track_number) const {
   return tracks_.GetTrackByNumber(track_number);
@@ -2775,7 +2616,7 @@
   if (SerializeInt(writer_header_, kEbmlUnknownValue, 8))
     return false;
 
-  payload_pos_ =  writer_header_->Position();
+  payload_pos_ = writer_header_->Position();
 
   if (mode_ == kFile && writer_header_->Seekable()) {
     // Set the duration > 0.0 so SegmentInfo will write out the duration. When
@@ -2819,8 +2660,7 @@
 // Here we are testing whether to create a new cluster, given a frame
 // having time frame_timestamp_ns.
 //
-int Segment::TestFrame(uint64 track_number,
-                       uint64 frame_timestamp_ns,
+int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns,
                        bool is_key) const {
   if (force_new_cluster_)
     return 1;
@@ -2850,7 +2690,7 @@
   // so this indicates a bug somewhere in our algorithm.
 
   if (frame_timecode < last_cluster_timecode)  // should never happen
-    return -1;  // error
+    return -1;
 
   // If the frame has a timestamp significantly larger than the last
   // cluster (in Matroska, cluster-relative timestamps are serialized
@@ -2900,7 +2740,7 @@
     const int32 new_capacity =
         (cluster_list_capacity_ <= 0) ? 1 : cluster_list_capacity_ * 2;
     Cluster** const clusters =
-        new (std::nothrow) Cluster*[new_capacity];  // NOLINT
+        new (std::nothrow) Cluster* [new_capacity];  // NOLINT
     if (!clusters)
       return false;
 
@@ -2908,7 +2748,7 @@
       clusters[i] = cluster_list_[i];
     }
 
-    delete [] cluster_list_;
+    delete[] cluster_list_;
 
     cluster_list_ = clusters;
     cluster_list_capacity_ = new_capacity;
@@ -2968,8 +2808,7 @@
 }
 
 bool Segment::DoNewClusterProcessing(uint64 track_number,
-                                     uint64 frame_timestamp_ns,
-                                     bool is_key) {
+                                     uint64 frame_timestamp_ns, bool is_key) {
   for (;;) {
     // Based on the characteristics of the current frame and current
     // cluster, decide whether to create a new cluster.
@@ -2977,12 +2816,12 @@
     if (result < 0)  // error
       return false;
 
-  // Always set force_new_cluster_ to false after TestFrame.
-  force_new_cluster_ = false;
+    // Always set force_new_cluster_ to false after TestFrame.
+    force_new_cluster_ = false;
 
-  // A non-zero result means create a new cluster.
-  if (result > 0 && !MakeNewCluster(frame_timestamp_ns))
-    return false;
+    // A non-zero result means create a new cluster.
+    if (result > 0 && !MakeNewCluster(frame_timestamp_ns))
+      return false;
 
     // Write queued (audio) frames.
     const int frame_count = WriteFramesAll();
@@ -3051,14 +2890,14 @@
     return false;
 
 #ifdef _MSC_VER
-  strcpy_s(str, length-strlen(ext_chk), chunking_base_name_);
+  strcpy_s(str, length - strlen(ext_chk), chunking_base_name_);
   strcat_s(str, length, ext_chk);
 #else
   strcpy(str, chunking_base_name_);
   strcat(str, ext_chk);
 #endif
 
-  delete [] *name;
+  delete[] * name;
   *name = str;
 
   return true;
@@ -3093,7 +2932,7 @@
     if (new_capacity < 1)
       return false;
 
-    Frame** const frames = new (std::nothrow) Frame*[new_capacity];  // NOLINT
+    Frame** const frames = new (std::nothrow) Frame* [new_capacity];  // NOLINT
     if (!frames)
       return false;
 
@@ -3101,7 +2940,7 @@
       frames[i] = frames_[i];
     }
 
-    delete [] frames_;
+    delete[] frames_;
     frames_ = frames;
     frames_capacity_ = new_capacity;
   }
@@ -3118,7 +2957,7 @@
   if (cluster_list_size_ < 1)
     return -1;
 
-  Cluster* const cluster = cluster_list_[cluster_list_size_-1];
+  Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
 
   if (!cluster)
     return -1;
@@ -3131,19 +2970,14 @@
     const uint64 frame_timecode = frame_timestamp / timecode_scale;
 
     if (frame->discard_padding() > 0) {
-      if (!cluster->AddFrameWithDiscardPadding(frame->frame(),
-                                               frame->length(),
-                                               frame->discard_padding(),
-                                               frame->track_number(),
-                                               frame_timecode,
-                                               frame->is_key())) {
+      if (!cluster->AddFrameWithDiscardPadding(
+              frame->frame(), frame->length(), frame->discard_padding(),
+              frame->track_number(), frame_timecode, frame->is_key())) {
         return -1;
       }
     } else {
-      if (!cluster->AddFrame(frame->frame(),
-                             frame->length(),
-                             frame->track_number(),
-                             frame_timecode,
+      if (!cluster->AddFrame(frame->frame(), frame->length(),
+                             frame->track_number(), frame_timecode,
                              frame->is_key())) {
         return -1;
       }
@@ -3175,7 +3009,7 @@
     if (!frames_)
       return false;
 
-    Cluster* const cluster = cluster_list_[cluster_list_size_-1];
+    Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
     if (!cluster)
       return false;
 
@@ -3190,25 +3024,21 @@
       if (frame_curr->timestamp() > timestamp)
         break;
 
-      const Frame* const frame_prev = frames_[i-1];
+      const Frame* const frame_prev = frames_[i - 1];
       const uint64 frame_timestamp = frame_prev->timestamp();
       const uint64 frame_timecode = frame_timestamp / timecode_scale;
       const int64 discard_padding = frame_prev->discard_padding();
 
       if (discard_padding > 0) {
-        if (!cluster->AddFrameWithDiscardPadding(frame_prev->frame(),
-                                                 frame_prev->length(),
-                                                 discard_padding,
-                                                 frame_prev->track_number(),
-                                                 frame_timecode,
-                                                 frame_prev->is_key())) {
+        if (!cluster->AddFrameWithDiscardPadding(
+                frame_prev->frame(), frame_prev->length(), discard_padding,
+                frame_prev->track_number(), frame_timecode,
+                frame_prev->is_key())) {
           return false;
         }
       } else {
-        if (!cluster->AddFrame(frame_prev->frame(),
-                               frame_prev->length(),
-                               frame_prev->track_number(),
-                               frame_timecode,
+        if (!cluster->AddFrame(frame_prev->frame(), frame_prev->length(),
+                               frame_prev->track_number(), frame_timecode,
                                frame_prev->is_key())) {
           return false;
         }
@@ -3232,7 +3062,7 @@
 
       const int32 new_frames_size = frames_size_ - shift_left;
       for (int32 i = 0; i < new_frames_size; ++i) {
-        frames_[i] = frames_[i+shift_left];
+        frames_[i] = frames_[i + shift_left];
       }
 
       frames_size_ = new_frames_size;
diff --git a/third_party/libwebm/mkvmuxer.hpp b/third_party/libwebm/mkvmuxer.hpp
index 63a315e..1c1c310 100644
--- a/third_party/libwebm/mkvmuxer.hpp
+++ b/third_party/libwebm/mkvmuxer.hpp
@@ -15,7 +15,7 @@
 // http://www.webmproject.org/code/specs/container/.
 
 namespace mkvparser {
-  class IMkvReader;
+class IMkvReader;
 }  // end namespace
 
 namespace mkvmuxer {
@@ -60,8 +60,8 @@
 bool WriteEbmlHeader(IMkvWriter* writer);
 
 // Copies in Chunk from source to destination between the given byte positions
-bool ChunkedCopy(mkvparser::IMkvReader* source, IMkvWriter* dst,
-                 int64 start, int64 size);
+bool ChunkedCopy(mkvparser::IMkvReader* source, IMkvWriter* dst, int64 start,
+                 int64 size);
 
 ///////////////////////////////////////////////////////////////
 // Class to hold data the will be written to a block.
@@ -74,8 +74,7 @@
   bool Init(const uint8* frame, uint64 length);
 
   // Copies |additional| data into |additional_|. Returns true on success.
-  bool AddAdditionalData(const uint8* additional, uint64 length,
-                         uint64 add_id);
+  bool AddAdditionalData(const uint8* additional, uint64 length, uint64 add_id);
 
   uint64 add_id() const { return add_id_; }
   const uint8* additional() const { return additional_; }
@@ -223,9 +222,7 @@
 // ContentEncAESSettings element
 class ContentEncAESSettings {
  public:
-  enum {
-    kCTR = 1
-  };
+  enum { kCTR = 1 };
 
   ContentEncAESSettings();
   ~ContentEncAESSettings() {}
@@ -353,6 +350,10 @@
     seek_pre_roll_ = seek_pre_roll;
   }
   uint64 seek_pre_roll() const { return seek_pre_roll_; }
+  void set_default_duration(uint64 default_duration) {
+    default_duration_ = default_duration;
+  }
+  uint64 default_duration() const { return default_duration_; }
 
   uint64 codec_private_length() const { return codec_private_length_; }
   uint32 content_encoding_entries_size() const {
@@ -360,7 +361,7 @@
   }
 
  private:
-  // Track element names
+  // Track element names.
   char* codec_id_;
   uint8* codec_private_;
   char* language_;
@@ -371,6 +372,7 @@
   uint64 uid_;
   uint64 codec_delay_;
   uint64 seek_pre_roll_;
+  uint64 default_duration_;
 
   // Size of the CodecPrivate data in bytes.
   uint64 codec_private_length_;
@@ -391,16 +393,13 @@
   // Supported modes for stereo 3D.
   enum StereoMode {
     kMono = 0,
-    kSideBySideLeftIsFirst  = 1,
-    kTopBottomRightIsFirst  = 2,
-    kTopBottomLeftIsFirst   = 3,
+    kSideBySideLeftIsFirst = 1,
+    kTopBottomRightIsFirst = 2,
+    kTopBottomLeftIsFirst = 3,
     kSideBySideRightIsFirst = 11
   };
 
-  enum AlphaMode {
-    kNoAlpha = 0,
-    kAlpha  = 1
-  };
+  enum AlphaMode { kNoAlpha = 0, kAlpha = 1 };
 
   // The |seed| parameter is used to synthesize a UID for the track.
   explicit VideoTrack(unsigned int* seed);
@@ -484,10 +483,7 @@
 class Tracks {
  public:
   // Audio and video type defined by the Matroska specs.
-  enum {
-    kVideo = 0x1,
-    kAudio = 0x2
-  };
+  enum { kVideo = 0x1, kAudio = 0x2 };
   // Opus, Vorbis, VP8, and VP9 codec ids defined by the Matroska specs.
   static const char kOpusCodecId[];
   static const char kVorbisCodecId[];
@@ -544,8 +540,7 @@
 
   // Converts the nanosecond start and stop times of this chapter to
   // their corresponding timecode values, and stores them that way.
-  void set_time(const Segment& segment,
-                uint64 start_time_ns,
+  void set_time(const Segment& segment, uint64 start_time_ns,
                 uint64 end_time_ns);
 
   // Sets the uid for this chapter. Primarily used to enable
@@ -568,9 +563,7 @@
   //  http://www.iana.org/domains/root/db/
   //
   // The function returns false if the string could not be allocated.
-  bool add_string(const char* title,
-                  const char* language,
-                  const char* country);
+  bool add_string(const char* title, const char* language, const char* country);
 
  private:
   friend class Chapters;
@@ -724,9 +717,7 @@
   //   timecode:     Absolute (not relative to cluster) timestamp of the
   //                 frame, expressed in timecode units.
   //   is_key:       Flag telling whether or not this frame is a key frame.
-  bool AddFrame(const uint8* frame,
-                uint64 length,
-                uint64 track_number,
+  bool AddFrame(const uint8* frame, uint64 length, uint64 track_number,
                 uint64 timecode,  // timecode units (absolute)
                 bool is_key);
 
@@ -743,14 +734,10 @@
   //   abs_timecode: Absolute (not relative to cluster) timestamp of the
   //                 frame, expressed in timecode units.
   //   is_key:       Flag telling whether or not this frame is a key frame.
-  bool AddFrameWithAdditional(const uint8* frame,
-                              uint64 length,
-                              const uint8* additional,
-                              uint64 additional_length,
-                              uint64 add_id,
-                              uint64 track_number,
-                              uint64 abs_timecode,
-                              bool is_key);
+  bool AddFrameWithAdditional(const uint8* frame, uint64 length,
+                              const uint8* additional, uint64 additional_length,
+                              uint64 add_id, uint64 track_number,
+                              uint64 abs_timecode, bool is_key);
 
   // Adds a frame to be output in the file. The frame is written out through
   // |writer_| if successful. Returns true on success.
@@ -763,12 +750,9 @@
   //   abs_timecode: Absolute (not relative to cluster) timestamp of the
   //                 frame, expressed in timecode units.
   //   is_key:       Flag telling whether or not this frame is a key frame.
-  bool AddFrameWithDiscardPadding(const uint8* frame,
-                                  uint64 length,
-                                  int64 discard_padding,
-                                  uint64 track_number,
-                                  uint64 abs_timecode,
-                                  bool is_key);
+  bool AddFrameWithDiscardPadding(const uint8* frame, uint64 length,
+                                  int64 discard_padding, uint64 track_number,
+                                  uint64 abs_timecode, bool is_key);
 
   // Writes a frame of metadata to the output medium; returns true on
   // success.
@@ -784,11 +768,8 @@
   // The metadata frame is written as a block group, with a duration
   // sub-element but no reference time sub-elements (indicating that
   // it is considered a keyframe, per Matroska semantics).
-  bool AddMetadata(const uint8* frame,
-                   uint64 length,
-                   uint64 track_number,
-                   uint64 timecode,  // timecode units (absolute)
-                   uint64 duration);  // timecode units
+  bool AddMetadata(const uint8* frame, uint64 length, uint64 track_number,
+                   uint64 timecode, uint64 duration);
 
   // Increments the size of the cluster's data in bytes.
   void AddPayloadSize(uint64 size);
@@ -809,34 +790,26 @@
  private:
   //  Signature that matches either of WriteSimpleBlock or WriteMetadataBlock
   //  in the muxer utilities package.
-  typedef uint64 (*WriteBlock)(IMkvWriter* writer,
-                               const uint8* data,
-                               uint64 length,
-                               uint64 track_number,
-                               int64 timecode,
-                               uint64 generic_arg);
+  typedef uint64 (*WriteBlock)(IMkvWriter* writer, const uint8* data,
+                               uint64 length, uint64 track_number,
+                               int64 timecode, uint64 generic_arg);
 
   //  Signature that matches WriteBlockWithAdditional
   //  in the muxer utilities package.
-  typedef uint64 (*WriteBlockAdditional)(IMkvWriter* writer,
-                                         const uint8* data,
-                                         uint64 length,
-                                         const uint8* additional,
+  typedef uint64 (*WriteBlockAdditional)(IMkvWriter* writer, const uint8* data,
+                                         uint64 length, const uint8* additional,
                                          uint64 add_id,
                                          uint64 additional_length,
-                                         uint64 track_number,
-                                         int64 timecode,
+                                         uint64 track_number, int64 timecode,
                                          uint64 is_key);
 
   //  Signature that matches WriteBlockWithDiscardPadding
   //  in the muxer utilities package.
   typedef uint64 (*WriteBlockDiscardPadding)(IMkvWriter* writer,
-                                             const uint8* data,
-                                             uint64 length,
+                                             const uint8* data, uint64 length,
                                              int64 discard_padding,
                                              uint64 track_number,
-                                             int64 timecode,
-                                             uint64 is_key);
+                                             int64 timecode, uint64 is_key);
 
   // Utility method that confirms that blocks can still be added, and that the
   // cluster header has been written. Used by |DoWriteBlock*|. Returns true
@@ -858,27 +831,20 @@
   int64 GetRelativeTimecode(int64 abs_timecode) const;
 
   //  Used to implement AddFrame and AddMetadata.
-  bool DoWriteBlock(const uint8* frame,
-                    uint64 length,
-                    uint64 track_number,
-                    uint64 absolute_timecode,
-                    uint64 generic_arg,
+  bool DoWriteBlock(const uint8* frame, uint64 length, uint64 track_number,
+                    uint64 absolute_timecode, uint64 generic_arg,
                     WriteBlock write_block);
 
   // Used to implement AddFrameWithAdditional
-  bool DoWriteBlockWithAdditional(const uint8* frame,
-                                  uint64 length,
+  bool DoWriteBlockWithAdditional(const uint8* frame, uint64 length,
                                   const uint8* additional,
-                                  uint64 additional_length,
-                                  uint64 add_id,
-                                  uint64 track_number,
-                                  uint64 absolute_timecode,
+                                  uint64 additional_length, uint64 add_id,
+                                  uint64 track_number, uint64 absolute_timecode,
                                   uint64 generic_arg,
                                   WriteBlockAdditional write_block);
 
   // Used to implement AddFrameWithDiscardPadding
-  bool DoWriteBlockWithDiscardPadding(const uint8* frame,
-                                      uint64 length,
+  bool DoWriteBlockWithDiscardPadding(const uint8* frame, uint64 length,
                                       int64 discard_padding,
                                       uint64 track_number,
                                       uint64 absolute_timecode,
@@ -993,6 +959,8 @@
   uint64 timecode_scale() const { return timecode_scale_; }
   void set_writing_app(const char* app);
   const char* writing_app() const { return writing_app_; }
+  void set_date_utc(int64 date_utc) { date_utc_ = date_utc; }
+  int64 date_utc() const { return date_utc_; }
 
  private:
   // Segment Information element names.
@@ -1004,6 +972,8 @@
   uint64 timecode_scale_;
   // Initially set to libwebm-%d.%d.%d.%d, major, minor, build, revision.
   char* writing_app_;
+  // LLONG_MIN when DateUTC is not set.
+  int64 date_utc_;
 
   // The file position of the duration element.
   int64 duration_pos_;
@@ -1019,10 +989,7 @@
 //  |Init| must be called before any other method in this class.
 class Segment {
  public:
-  enum Mode {
-    kLive = 0x1,
-    kFile = 0x2
-  };
+  enum Mode { kLive = 0x1, kFile = 0x2 };
 
   enum CuesPosition {
     kAfterClusters = 0x0,  // Position Cues after Clusters - Default
@@ -1070,11 +1037,8 @@
   //                 functions.
   //   timestamp:    Timestamp of the frame in nanoseconds from 0.
   //   is_key:       Flag telling whether or not this frame is a key frame.
-  bool AddFrame(const uint8* frame,
-                uint64 length,
-                uint64 track_number,
-                uint64 timestamp_ns,
-                bool is_key);
+  bool AddFrame(const uint8* frame, uint64 length, uint64 track_number,
+                uint64 timestamp_ns, bool is_key);
 
   // Writes a frame of metadata to the output medium; returns true on
   // success.
@@ -1090,11 +1054,8 @@
   // The metadata frame is written as a block group, with a duration
   // sub-element but no reference time sub-elements (indicating that
   // it is considered a keyframe, per Matroska semantics).
-  bool AddMetadata(const uint8* frame,
-                   uint64 length,
-                   uint64 track_number,
-                   uint64 timestamp_ns,
-                   uint64 duration_ns);
+  bool AddMetadata(const uint8* frame, uint64 length, uint64 track_number,
+                   uint64 timestamp_ns, uint64 duration_ns);
 
   // Writes a frame with additional data to the output medium; returns true on
   // success.
@@ -1109,14 +1070,10 @@
   //   timestamp:    Absolute timestamp of the frame, expressed in nanosecond
   //                 units.
   //   is_key:       Flag telling whether or not this frame is a key frame.
-  bool AddFrameWithAdditional(const uint8* frame,
-                              uint64 length,
-                              const uint8* additional,
-                              uint64 additional_length,
-                              uint64 add_id,
-                              uint64 track_number,
-                              uint64 timestamp,
-                              bool is_key);
+  bool AddFrameWithAdditional(const uint8* frame, uint64 length,
+                              const uint8* additional, uint64 additional_length,
+                              uint64 add_id, uint64 track_number,
+                              uint64 timestamp, bool is_key);
 
   // Writes a frame with DiscardPadding to the output medium; returns true on
   // success.
@@ -1129,12 +1086,9 @@
   //   timestamp:    Absolute timestamp of the frame, expressed in nanosecond
   //                 units.
   //   is_key:       Flag telling whether or not this frame is a key frame.
-  bool AddFrameWithDiscardPadding(const uint8* frame,
-                                  uint64 length,
-                                  int64 discard_padding,
-                                  uint64 track_number,
-                                  uint64 timestamp,
-                                  bool is_key);
+  bool AddFrameWithDiscardPadding(const uint8* frame, uint64 length,
+                                  int64 discard_padding, uint64 track_number,
+                                  uint64 timestamp, bool is_key);
 
   // Writes a Frame to the output medium. Chooses the correct way of writing
   // the frame (Block vs SimpleBlock) based on the parameters passed.
@@ -1268,7 +1222,6 @@
   // was necessary but creation was not successful.
   bool DoNewClusterProcessing(uint64 track_num, uint64 timestamp_ns, bool key);
 
-
   // Adjusts Cue Point values (to place Cues before Clusters) so that they
   // reflect the correct offsets.
   void MoveCuesBeforeClusters();
@@ -1398,6 +1351,6 @@
   LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Segment);
 };
 
-}  //end namespace mkvmuxer
+}  // end namespace mkvmuxer
 
-#endif //MKVMUXER_HPP
+#endif  // MKVMUXER_HPP
diff --git a/third_party/libwebm/mkvmuxertypes.hpp b/third_party/libwebm/mkvmuxertypes.hpp
index 2c66fd2..d0fc9fe 100644
--- a/third_party/libwebm/mkvmuxertypes.hpp
+++ b/third_party/libwebm/mkvmuxertypes.hpp
@@ -1,30 +1,30 @@
-// Copyright (c) 2012 The WebM project authors. All Rights Reserved.

-//

-// Use of this source code is governed by a BSD-style license

-// that can be found in the LICENSE file in the root of the source

-// tree. An additional intellectual property rights grant can be found

-// in the file PATENTS.  All contributing project authors may

-// be found in the AUTHORS file in the root of the source tree.

-

-#ifndef MKVMUXERTYPES_HPP

-#define MKVMUXERTYPES_HPP

-

-// Copied from Chromium basictypes.h

-// A macro to disallow the copy constructor and operator= functions

-// This should be used in the private: declarations for a class

-#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \

-  TypeName(const TypeName&);               \

-  void operator=(const TypeName&)

-

-namespace mkvmuxer {

-

-typedef unsigned char      uint8;

-typedef short              int16;

-typedef int                int32;

-typedef unsigned int       uint32;

-typedef long long          int64;

-typedef unsigned long long uint64;

-

-}  //end namespace mkvmuxer

-

-#endif // MKVMUXERTYPES_HPP

+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef MKVMUXERTYPES_HPP
+#define MKVMUXERTYPES_HPP
+
+// Copied from Chromium basictypes.h
+// A macro to disallow the copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&);                       \
+  void operator=(const TypeName&)
+
+namespace mkvmuxer {
+
+typedef unsigned char uint8;
+typedef short int16;
+typedef int int32;
+typedef unsigned int uint32;
+typedef long long int64;
+typedef unsigned long long uint64;
+
+}  // end namespace mkvmuxer
+
+#endif  // MKVMUXERTYPES_HPP
diff --git a/third_party/libwebm/mkvmuxerutil.cpp b/third_party/libwebm/mkvmuxerutil.cpp
index 18060e9..3fb9bc9 100644
--- a/third_party/libwebm/mkvmuxerutil.cpp
+++ b/third_party/libwebm/mkvmuxerutil.cpp
@@ -29,6 +29,13 @@
 
 namespace mkvmuxer {
 
+namespace {
+
+// Date elements are always 8 octets in size.
+const int kDateElementSize = 8;
+
+}  // namespace
+
 int32 GetCodedUIntSize(uint64 value) {
   if (value < 0x000000000000007FULL)
     return 1;
@@ -92,7 +99,7 @@
   return ebml_size;
 }
 
-uint64 EbmlElementSize(uint64 type, float /* value */ ) {
+uint64 EbmlElementSize(uint64 type, float /* value */) {
   // Size of EBML ID
   uint64 ebml_size = GetUIntSize(type);
 
@@ -137,6 +144,19 @@
   return ebml_size;
 }
 
+uint64 EbmlDateElementSize(uint64 type, int64 value) {
+  // Size of EBML ID
+  uint64 ebml_size = GetUIntSize(type);
+
+  // Datasize
+  ebml_size += kDateElementSize;
+
+  // Size of Datasize
+  ebml_size++;
+
+  return ebml_size;
+}
+
 int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size) {
   if (!writer || size < 1 || size > 8)
     return -1;
@@ -302,9 +322,7 @@
   return true;
 }
 
-bool WriteEbmlElement(IMkvWriter* writer,
-                      uint64 type,
-                      const uint8* value,
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
                       uint64 size) {
   if (!writer || !value || size < 1)
     return false;
@@ -321,12 +339,24 @@
   return true;
 }
 
-uint64 WriteSimpleBlock(IMkvWriter* writer,
-                        const uint8* data,
-                        uint64 length,
-                        uint64 track_number,
-                        int64 timecode,
-                        uint64 is_key) {
+bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value) {
+  if (!writer)
+    return false;
+
+  if (WriteID(writer, type))
+    return false;
+
+  if (WriteUInt(writer, kDateElementSize))
+    return false;
+
+  if (SerializeInt(writer, value, kDateElementSize))
+    return false;
+
+  return true;
+}
+
+uint64 WriteSimpleBlock(IMkvWriter* writer, const uint8* data, uint64 length,
+                        uint64 track_number, int64 timecode, uint64 is_key) {
   if (!writer)
     return false;
 
@@ -372,7 +402,7 @@
     return 0;
 
   const uint64 element_size =
-    GetUIntSize(kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 + length;
+      GetUIntSize(kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 + length;
 
   return element_size;
 }
@@ -391,11 +421,8 @@
 //     Duration size
 //     (duration payload)
 //
-uint64 WriteMetadataBlock(IMkvWriter* writer,
-                          const uint8* data,
-                          uint64 length,
-                          uint64 track_number,
-                          int64 timecode,
+uint64 WriteMetadataBlock(IMkvWriter* writer, const uint8* data, uint64 length,
+                          uint64 track_number, int64 timecode,
                           uint64 duration) {
   // We don't backtrack when writing to the stream, so we must
   // pre-compute the BlockGroup size, by summing the sizes of each
@@ -487,47 +514,37 @@
 //        1 (Denotes Alpha)
 //      BlockAdditional
 //        Data
-uint64 WriteBlockWithAdditional(IMkvWriter* writer,
-                                const uint8* data,
-                                uint64 length,
-                                const uint8* additional,
-                                uint64 additional_length,
-                                uint64 add_id,
-                                uint64 track_number,
-                                int64 timecode,
+uint64 WriteBlockWithAdditional(IMkvWriter* writer, const uint8* data,
+                                uint64 length, const uint8* additional,
+                                uint64 additional_length, uint64 add_id,
+                                uint64 track_number, int64 timecode,
                                 uint64 is_key) {
   if (!data || !additional || length < 1 || additional_length < 1)
     return 0;
 
   const uint64 block_payload_size = 4 + length;
-  const uint64 block_elem_size = EbmlMasterElementSize(kMkvBlock,
-                                                       block_payload_size) +
-                                 block_payload_size;
-  const uint64 block_additional_elem_size = EbmlElementSize(kMkvBlockAdditional,
-                                                            additional,
-                                                            additional_length);
+  const uint64 block_elem_size =
+      EbmlMasterElementSize(kMkvBlock, block_payload_size) + block_payload_size;
+  const uint64 block_additional_elem_size =
+      EbmlElementSize(kMkvBlockAdditional, additional, additional_length);
   const uint64 block_addid_elem_size = EbmlElementSize(kMkvBlockAddID, add_id);
 
-  const uint64 block_more_payload_size = block_addid_elem_size +
-                                         block_additional_elem_size;
-  const uint64 block_more_elem_size = EbmlMasterElementSize(
-                                          kMkvBlockMore,
-                                          block_more_payload_size) +
-                                      block_more_payload_size;
+  const uint64 block_more_payload_size =
+      block_addid_elem_size + block_additional_elem_size;
+  const uint64 block_more_elem_size =
+      EbmlMasterElementSize(kMkvBlockMore, block_more_payload_size) +
+      block_more_payload_size;
   const uint64 block_additions_payload_size = block_more_elem_size;
-  const uint64 block_additions_elem_size = EbmlMasterElementSize(
-                                               kMkvBlockAdditions,
-                                               block_additions_payload_size) +
-                                           block_additions_payload_size;
-  const uint64 block_group_payload_size = block_elem_size +
-                                          block_additions_elem_size;
-  const uint64 block_group_elem_size = EbmlMasterElementSize(
-                                           kMkvBlockGroup,
-                                           block_group_payload_size) +
-                                       block_group_payload_size;
+  const uint64 block_additions_elem_size =
+      EbmlMasterElementSize(kMkvBlockAdditions, block_additions_payload_size) +
+      block_additions_payload_size;
+  const uint64 block_group_payload_size =
+      block_elem_size + block_additions_elem_size;
+  const uint64 block_group_elem_size =
+      EbmlMasterElementSize(kMkvBlockGroup, block_group_payload_size) +
+      block_group_payload_size;
 
-  if (!WriteEbmlMasterElement(writer, kMkvBlockGroup,
-                              block_group_payload_size))
+  if (!WriteEbmlMasterElement(writer, kMkvBlockGroup, block_group_payload_size))
     return 0;
 
   if (!WriteEbmlMasterElement(writer, kMkvBlock, block_payload_size))
@@ -558,8 +575,8 @@
   if (!WriteEbmlElement(writer, kMkvBlockAddID, add_id))
     return 0;
 
-  if (!WriteEbmlElement(writer, kMkvBlockAdditional,
-                        additional, additional_length))
+  if (!WriteEbmlElement(writer, kMkvBlockAdditional, additional,
+                        additional_length))
     return 0;
 
   return block_group_elem_size;
@@ -571,31 +588,25 @@
 //  Block
 //    Data
 //  DiscardPadding
-uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer,
-                                    const uint8* data,
-                                    uint64 length,
-                                    int64 discard_padding,
-                                    uint64 track_number,
-                                    int64 timecode,
+uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer, const uint8* data,
+                                    uint64 length, int64 discard_padding,
+                                    uint64 track_number, int64 timecode,
                                     uint64 is_key) {
   if (!data || length < 1 || discard_padding <= 0)
     return 0;
 
   const uint64 block_payload_size = 4 + length;
-  const uint64 block_elem_size = EbmlMasterElementSize(kMkvBlock,
-                                                       block_payload_size) +
-                                 block_payload_size;
-  const uint64 discard_padding_elem_size = EbmlElementSize(kMkvDiscardPadding,
-                                                           discard_padding);
-  const uint64 block_group_payload_size = block_elem_size +
-                                          discard_padding_elem_size;
-  const uint64 block_group_elem_size = EbmlMasterElementSize(
-                                           kMkvBlockGroup,
-                                           block_group_payload_size) +
-                                       block_group_payload_size;
+  const uint64 block_elem_size =
+      EbmlMasterElementSize(kMkvBlock, block_payload_size) + block_payload_size;
+  const uint64 discard_padding_elem_size =
+      EbmlElementSize(kMkvDiscardPadding, discard_padding);
+  const uint64 block_group_payload_size =
+      block_elem_size + discard_padding_elem_size;
+  const uint64 block_group_elem_size =
+      EbmlMasterElementSize(kMkvBlockGroup, block_group_payload_size) +
+      block_group_payload_size;
 
-  if (!WriteEbmlMasterElement(writer, kMkvBlockGroup,
-                              block_group_payload_size))
+  if (!WriteEbmlMasterElement(writer, kMkvBlockGroup, block_group_payload_size))
     return 0;
 
   if (!WriteEbmlMasterElement(writer, kMkvBlock, block_payload_size))
@@ -634,9 +645,9 @@
     return false;
 
   // Subtract one for the void ID and the coded size.
-  uint64 void_entry_size = size - 1 - GetCodedUIntSize(size-1);
-  uint64 void_size = EbmlMasterElementSize(kMkvVoid, void_entry_size) +
-                     void_entry_size;
+  uint64 void_entry_size = size - 1 - GetCodedUIntSize(size - 1);
+  uint64 void_size =
+      EbmlMasterElementSize(kMkvVoid, void_entry_size) + void_entry_size;
 
   if (void_size != size)
     return 0;
@@ -684,13 +695,13 @@
   for (int i = 0; i < 7; ++i) {  // avoid problems with 8-byte values
     uid <<= 8;
 
-    // TODO(fgalligan): Move random number generation to platform specific code.
+// TODO(fgalligan): Move random number generation to platform specific code.
 #ifdef _MSC_VER
     (void)seed;
     unsigned int random_value;
     const errno_t e = rand_s(&random_value);
     (void)e;
-    const int32 nn  = random_value;
+    const int32 nn = random_value;
 #elif __ANDROID__
     int32 temp_num = 1;
     int fd = open("/dev/urandom", O_RDONLY);
diff --git a/third_party/libwebm/mkvmuxerutil.hpp b/third_party/libwebm/mkvmuxerutil.hpp
index d196ad3..a092abe 100644
--- a/third_party/libwebm/mkvmuxerutil.hpp
+++ b/third_party/libwebm/mkvmuxerutil.hpp
@@ -30,6 +30,7 @@
 uint64 EbmlElementSize(uint64 type, float value);
 uint64 EbmlElementSize(uint64 type, const char* value);
 uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size);
+uint64 EbmlDateElementSize(uint64 type, int64 value);
 
 // Creates an EBML coded number from |value| and writes it out. The size of
 // the coded number is determined by the value of |value|. |value| must not
@@ -52,10 +53,9 @@
 bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value);
 bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value);
 bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value);
-bool WriteEbmlElement(IMkvWriter* writer,
-                      uint64 type,
-                      const uint8* value,
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
                       uint64 size);
+bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value);
 
 // Output an Mkv Simple Block.
 // Inputs:
@@ -67,12 +67,8 @@
 //   timecode:     Relative timecode of the Block.  Only values in the
 //                  range [0, 2^15) are permitted.
 //   is_key:       Non-zero value specifies that frame is a key frame.
-uint64 WriteSimpleBlock(IMkvWriter* writer,
-                        const uint8* data,
-                        uint64 length,
-                        uint64 track_number,
-                        int64 timecode,
-                        uint64 is_key);
+uint64 WriteSimpleBlock(IMkvWriter* writer, const uint8* data, uint64 length,
+                        uint64 track_number, int64 timecode, uint64 is_key);
 
 // Output a metadata keyframe, using a Block Group element.
 // Inputs:
@@ -84,11 +80,8 @@
 //   timecode      Timecode of frame, relative to cluster timecode.  Only
 //                  values in the range [0, 2^15) are permitted.
 //   duration_timecode  Duration of frame, using timecode units.
-uint64 WriteMetadataBlock(IMkvWriter* writer,
-                          const uint8* data,
-                          uint64 length,
-                          uint64 track_number,
-                          int64 timecode,
+uint64 WriteMetadataBlock(IMkvWriter* writer, const uint8* data, uint64 length,
+                          uint64 track_number, int64 timecode,
                           uint64 duration_timecode);
 
 // Output an Mkv Block with BlockAdditional data.
@@ -104,14 +97,10 @@
 //   timecode:     Relative timecode of the Block.  Only values in the
 //                  range [0, 2^15) are permitted.
 //   is_key:       Non-zero value specifies that frame is a key frame.
-uint64 WriteBlockWithAdditional(IMkvWriter* writer,
-                                const uint8* data,
-                                uint64 length,
-                                const uint8* additional,
-                                uint64 additional_length,
-                                uint64 add_id,
-                                uint64 track_number,
-                                int64 timecode,
+uint64 WriteBlockWithAdditional(IMkvWriter* writer, const uint8* data,
+                                uint64 length, const uint8* additional,
+                                uint64 additional_length, uint64 add_id,
+                                uint64 track_number, int64 timecode,
                                 uint64 is_key);
 
 // Output an Mkv Block with a DiscardPadding element.
@@ -125,12 +114,9 @@
 //   timecode:        Relative timecode of the Block.  Only values in the
 //                    range [0, 2^15) are permitted.
 //   is_key:          Non-zero value specifies that frame is a key frame.
-uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer,
-                                    const uint8* data,
-                                    uint64 length,
-                                    int64 discard_padding,
-                                    uint64 track_number,
-                                    int64 timecode,
+uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer, const uint8* data,
+                                    uint64 length, int64 discard_padding,
+                                    uint64 track_number, int64 timecode,
                                     uint64 is_key);
 
 // Output a void element. |size| must be the entire size in bytes that will be
@@ -146,6 +132,6 @@
 // the random-number generator (see POSIX rand_r() for semantics).
 uint64 MakeUID(unsigned int* seed);
 
-}  //end namespace mkvmuxer
+}  // end namespace mkvmuxer
 
-#endif // MKVMUXERUTIL_HPP
+#endif  // MKVMUXERUTIL_HPP
diff --git a/third_party/libwebm/mkvparser.cpp b/third_party/libwebm/mkvparser.cpp
index b41456a..441f165 100644
--- a/third_party/libwebm/mkvparser.cpp
+++ b/third_party/libwebm/mkvparser.cpp
@@ -14,1383 +14,1204 @@
 
 #ifdef _MSC_VER
 // Disable MSVC warnings that suggest making code non-portable.
-#pragma warning(disable:4996)
+#pragma warning(disable : 4996)
 #endif
 
-mkvparser::IMkvReader::~IMkvReader()
-{
+mkvparser::IMkvReader::~IMkvReader() {}
+
+void mkvparser::GetVersion(int& major, int& minor, int& build, int& revision) {
+  major = 1;
+  minor = 0;
+  build = 0;
+  revision = 28;
 }
 
-void mkvparser::GetVersion(int& major, int& minor, int& build, int& revision)
-{
-    major = 1;
-    minor = 0;
-    build = 0;
-    revision = 27;
-}
+long long mkvparser::ReadUInt(IMkvReader* pReader, long long pos, long& len) {
+  assert(pReader);
+  assert(pos >= 0);
 
-long long mkvparser::ReadUInt(IMkvReader* pReader, long long pos, long& len)
-{
-    assert(pReader);
-    assert(pos >= 0);
+  int status;
 
-    int status;
+  //#ifdef _DEBUG
+  //    long long total, available;
+  //    status = pReader->Length(&total, &available);
+  //    assert(status >= 0);
+  //    assert((total < 0) || (available <= total));
+  //    assert(pos < available);
+  //    assert((available - pos) >= 1);  //assume here max u-int len is 8
+  //#endif
 
-//#ifdef _DEBUG
-//    long long total, available;
-//    status = pReader->Length(&total, &available);
-//    assert(status >= 0);
-//    assert((total < 0) || (available <= total));
-//    assert(pos < available);
-//    assert((available - pos) >= 1);  //assume here max u-int len is 8
-//#endif
+  len = 1;
 
-    len = 1;
+  unsigned char b;
 
-    unsigned char b;
+  status = pReader->Read(pos, 1, &b);
 
+  if (status < 0)  // error or underflow
+    return status;
+
+  if (status > 0)  // interpreted as "underflow"
+    return E_BUFFER_NOT_FULL;
+
+  if (b == 0)  // we can't handle u-int values larger than 8 bytes
+    return E_FILE_FORMAT_INVALID;
+
+  unsigned char m = 0x80;
+
+  while (!(b & m)) {
+    m >>= 1;
+    ++len;
+  }
+
+  //#ifdef _DEBUG
+  //    assert((available - pos) >= len);
+  //#endif
+
+  long long result = b & (~m);
+  ++pos;
+
+  for (int i = 1; i < len; ++i) {
     status = pReader->Read(pos, 1, &b);
 
-    if (status < 0)  //error or underflow
-        return status;
-
-    if (status > 0)  //interpreted as "underflow"
-        return E_BUFFER_NOT_FULL;
-
-    if (b == 0)  //we can't handle u-int values larger than 8 bytes
-        return E_FILE_FORMAT_INVALID;
-
-    unsigned char m = 0x80;
-
-    while (!(b & m))
-    {
-        m >>= 1;
-        ++len;
+    if (status < 0) {
+      len = 1;
+      return status;
     }
 
-//#ifdef _DEBUG
-//    assert((available - pos) >= len);
-//#endif
+    if (status > 0) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
 
-    long long result = b & (~m);
+    result <<= 8;
+    result |= b;
+
     ++pos;
+  }
 
-    for (int i = 1; i < len; ++i)
-    {
-        status = pReader->Read(pos, 1, &b);
-
-        if (status < 0)
-        {
-            len = 1;
-            return status;
-        }
-
-        if (status > 0)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        result <<= 8;
-        result |= b;
-
-        ++pos;
-    }
-
-    return result;
+  return result;
 }
 
-long long mkvparser::GetUIntLength(
-    IMkvReader* pReader,
-    long long pos,
-    long& len)
-{
-    assert(pReader);
-    assert(pos >= 0);
+long long mkvparser::GetUIntLength(IMkvReader* pReader, long long pos,
+                                   long& len) {
+  assert(pReader);
+  assert(pos >= 0);
 
-    long long total, available;
+  long long total, available;
 
-    int status = pReader->Length(&total, &available);
-    assert(status >= 0);
-    assert((total < 0) || (available <= total));
+  int status = pReader->Length(&total, &available);
+  assert(status >= 0);
+  assert((total < 0) || (available <= total));
 
-    len = 1;
+  len = 1;
 
-    if (pos >= available)
-        return pos;  //too few bytes available
+  if (pos >= available)
+    return pos;  // too few bytes available
 
+  unsigned char b;
+
+  status = pReader->Read(pos, 1, &b);
+
+  if (status < 0)
+    return status;
+
+  assert(status == 0);
+
+  if (b == 0)  // we can't handle u-int values larger than 8 bytes
+    return E_FILE_FORMAT_INVALID;
+
+  unsigned char m = 0x80;
+
+  while (!(b & m)) {
+    m >>= 1;
+    ++len;
+  }
+
+  return 0;  // success
+}
+
+long long mkvparser::UnserializeUInt(IMkvReader* pReader, long long pos,
+                                     long long size) {
+  assert(pReader);
+  assert(pos >= 0);
+
+  if ((size <= 0) || (size > 8))
+    return E_FILE_FORMAT_INVALID;
+
+  long long result = 0;
+
+  for (long long i = 0; i < size; ++i) {
     unsigned char b;
 
-    status = pReader->Read(pos, 1, &b);
+    const long status = pReader->Read(pos, 1, &b);
 
     if (status < 0)
-        return status;
+      return status;
 
-    assert(status == 0);
+    result <<= 8;
+    result |= b;
 
-    if (b == 0)  //we can't handle u-int values larger than 8 bytes
-        return E_FILE_FORMAT_INVALID;
+    ++pos;
+  }
 
-    unsigned char m = 0x80;
-
-    while (!(b & m))
-    {
-        m >>= 1;
-        ++len;
-    }
-
-    return 0;  //success
+  return result;
 }
 
+long mkvparser::UnserializeFloat(IMkvReader* pReader, long long pos,
+                                 long long size_, double& result) {
+  assert(pReader);
+  assert(pos >= 0);
 
-long long mkvparser::UnserializeUInt(
-    IMkvReader* pReader,
-    long long pos,
-    long long size)
-{
-    assert(pReader);
-    assert(pos >= 0);
+  if ((size_ != 4) && (size_ != 8))
+    return E_FILE_FORMAT_INVALID;
 
-    if ((size <= 0) || (size > 8))
-        return E_FILE_FORMAT_INVALID;
+  const long size = static_cast<long>(size_);
 
-    long long result = 0;
+  unsigned char buf[8];
 
-    for (long long i = 0; i < size; ++i)
-    {
-        unsigned char b;
+  const int status = pReader->Read(pos, size, buf);
 
-        const long status = pReader->Read(pos, 1, &b);
+  if (status < 0)  // error
+    return status;
 
-        if (status < 0)
-            return status;
+  if (size == 4) {
+    union {
+      float f;
+      unsigned long ff;
+    };
 
-        result <<= 8;
-        result |= b;
+    ff = 0;
 
-        ++pos;
+    for (int i = 0;;) {
+      ff |= buf[i];
+
+      if (++i >= 4)
+        break;
+
+      ff <<= 8;
     }
 
-    return result;
+    result = f;
+  } else {
+    assert(size == 8);
+
+    union {
+      double d;
+      unsigned long long dd;
+    };
+
+    dd = 0;
+
+    for (int i = 0;;) {
+      dd |= buf[i];
+
+      if (++i >= 8)
+        break;
+
+      dd <<= 8;
+    }
+
+    result = d;
+  }
+
+  return 0;
 }
 
+long mkvparser::UnserializeInt(IMkvReader* pReader, long long pos, long size,
+                               long long& result) {
+  assert(pReader);
+  assert(pos >= 0);
+  assert(size > 0);
+  assert(size <= 8);
 
-long mkvparser::UnserializeFloat(
-    IMkvReader* pReader,
-    long long pos,
-    long long size_,
-    double& result)
-{
-    assert(pReader);
-    assert(pos >= 0);
+  {
+    signed char b;
 
-    if ((size_ != 4) && (size_ != 8))
-        return E_FILE_FORMAT_INVALID;
+    const long status = pReader->Read(pos, 1, (unsigned char*)&b);
 
-    const long size = static_cast<long>(size_);
+    if (status < 0)
+      return status;
 
-    unsigned char buf[8];
+    result = b;
 
-    const int status = pReader->Read(pos, size, buf);
+    ++pos;
+  }
 
-    if (status < 0)  //error
-        return status;
+  for (long i = 1; i < size; ++i) {
+    unsigned char b;
 
-    if (size == 4)
-    {
-        union
-        {
-            float f;
-            unsigned long ff;
-        };
+    const long status = pReader->Read(pos, 1, &b);
 
-        ff = 0;
+    if (status < 0)
+      return status;
 
-        for (int i = 0;;)
-        {
-            ff |= buf[i];
+    result <<= 8;
+    result |= b;
 
-            if (++i >= 4)
-                break;
+    ++pos;
+  }
 
-            ff <<= 8;
-        }
-
-        result = f;
-    }
-    else
-    {
-        assert(size == 8);
-
-        union
-        {
-            double d;
-            unsigned long long dd;
-        };
-
-        dd = 0;
-
-        for (int i = 0;;)
-        {
-            dd |= buf[i];
-
-            if (++i >= 8)
-                break;
-
-            dd <<= 8;
-        }
-
-        result = d;
-    }
-
-    return 0;
+  return 0;  // success
 }
 
+long mkvparser::UnserializeString(IMkvReader* pReader, long long pos,
+                                  long long size_, char*& str) {
+  delete[] str;
+  str = NULL;
 
-long mkvparser::UnserializeInt(
-    IMkvReader* pReader,
-    long long pos,
-    long size,
-    long long& result)
-{
-    assert(pReader);
-    assert(pos >= 0);
-    assert(size > 0);
-    assert(size <= 8);
+  if (size_ >= LONG_MAX)  // we need (size+1) chars
+    return E_FILE_FORMAT_INVALID;
 
-    {
-        signed char b;
+  const long size = static_cast<long>(size_);
 
-        const long status = pReader->Read(pos, 1, (unsigned char*)&b);
+  str = new (std::nothrow) char[size + 1];
 
-        if (status < 0)
-            return status;
+  if (str == NULL)
+    return -1;
 
-        result = b;
+  unsigned char* const buf = reinterpret_cast<unsigned char*>(str);
 
-        ++pos;
-    }
+  const long status = pReader->Read(pos, size, buf);
 
-    for (long i = 1; i < size; ++i)
-    {
-        unsigned char b;
-
-        const long status = pReader->Read(pos, 1, &b);
-
-        if (status < 0)
-            return status;
-
-        result <<= 8;
-        result |= b;
-
-        ++pos;
-    }
-
-    return 0;  //success
-}
-
-
-long mkvparser::UnserializeString(
-    IMkvReader* pReader,
-    long long pos,
-    long long size_,
-    char*& str)
-{
+  if (status) {
     delete[] str;
     str = NULL;
 
-    if (size_ >= LONG_MAX)  //we need (size+1) chars
-        return E_FILE_FORMAT_INVALID;
+    return status;
+  }
 
-    const long size = static_cast<long>(size_);
+  str[size] = '\0';
 
-    str = new (std::nothrow) char[size+1];
-
-    if (str == NULL)
-        return -1;
-
-    unsigned char* const buf = reinterpret_cast<unsigned char*>(str);
-
-    const long status = pReader->Read(pos, size, buf);
-
-    if (status)
-    {
-        delete[] str;
-        str = NULL;
-
-        return status;
-    }
-
-    str[size] = '\0';
-
-    return 0;  //success
+  return 0;  // success
 }
 
+long mkvparser::ParseElementHeader(IMkvReader* pReader, long long& pos,
+                                   long long stop, long long& id,
+                                   long long& size) {
+  if ((stop >= 0) && (pos >= stop))
+    return E_FILE_FORMAT_INVALID;
 
-long mkvparser::ParseElementHeader(
-    IMkvReader* pReader,
-    long long& pos,
-    long long stop,
-    long long& id,
-    long long& size)
-{
-    if ((stop >= 0) && (pos >= stop))
-        return E_FILE_FORMAT_INVALID;
+  long len;
 
-    long len;
+  id = ReadUInt(pReader, pos, len);
 
-    id = ReadUInt(pReader, pos, len);
+  if (id < 0)
+    return E_FILE_FORMAT_INVALID;
 
-    if (id < 0)
-        return E_FILE_FORMAT_INVALID;
+  pos += len;  // consume id
 
-    pos += len;  //consume id
+  if ((stop >= 0) && (pos >= stop))
+    return E_FILE_FORMAT_INVALID;
 
-    if ((stop >= 0) && (pos >= stop))
-        return E_FILE_FORMAT_INVALID;
+  size = ReadUInt(pReader, pos, len);
 
-    size = ReadUInt(pReader, pos, len);
+  if (size < 0)
+    return E_FILE_FORMAT_INVALID;
 
-    if (size < 0)
-        return E_FILE_FORMAT_INVALID;
+  pos += len;  // consume length of size
 
-    pos += len;  //consume length of size
+  // pos now designates payload
 
-    //pos now designates payload
+  if ((stop >= 0) && ((pos + size) > stop))
+    return E_FILE_FORMAT_INVALID;
 
-    if ((stop >= 0) && ((pos + size) > stop))
-        return E_FILE_FORMAT_INVALID;
-
-    return 0;  //success
+  return 0;  // success
 }
 
+bool mkvparser::Match(IMkvReader* pReader, long long& pos, unsigned long id_,
+                      long long& val) {
+  assert(pReader);
+  assert(pos >= 0);
 
-bool mkvparser::Match(
-    IMkvReader* pReader,
-    long long& pos,
-    unsigned long id_,
-    long long& val)
-{
-    assert(pReader);
-    assert(pos >= 0);
+  long long total, available;
 
-    long long total, available;
+  const long status = pReader->Length(&total, &available);
+  assert(status >= 0);
+  assert((total < 0) || (available <= total));
+  if (status < 0)
+    return false;
 
-    const long status = pReader->Length(&total, &available);
-    assert(status >= 0);
-    assert((total < 0) || (available <= total));
-    if (status < 0)
-        return false;
+  long len;
 
-    long len;
+  const long long id = ReadUInt(pReader, pos, len);
+  assert(id >= 0);
+  assert(len > 0);
+  assert(len <= 8);
+  assert((pos + len) <= available);
 
-    const long long id = ReadUInt(pReader, pos, len);
-    assert(id >= 0);
-    assert(len > 0);
-    assert(len <= 8);
-    assert((pos + len) <= available);
+  if ((unsigned long)id != id_)
+    return false;
 
-    if ((unsigned long)id != id_)
-        return false;
+  pos += len;  // consume id
 
-    pos += len;  //consume id
+  const long long size = ReadUInt(pReader, pos, len);
+  assert(size >= 0);
+  assert(size <= 8);
+  assert(len > 0);
+  assert(len <= 8);
+  assert((pos + len) <= available);
 
-    const long long size = ReadUInt(pReader, pos, len);
-    assert(size >= 0);
-    assert(size <= 8);
-    assert(len > 0);
-    assert(len <= 8);
-    assert((pos + len) <= available);
+  pos += len;  // consume length of size of payload
 
-    pos += len;  //consume length of size of payload
+  val = UnserializeUInt(pReader, pos, size);
+  assert(val >= 0);
 
-    val = UnserializeUInt(pReader, pos, size);
-    assert(val >= 0);
+  pos += size;  // consume size of payload
 
-    pos += size;  //consume size of payload
-
-    return true;
+  return true;
 }
 
-bool mkvparser::Match(
-    IMkvReader* pReader,
-    long long& pos,
-    unsigned long id_,
-    unsigned char*& buf,
-    size_t& buflen)
-{
-    assert(pReader);
-    assert(pos >= 0);
+bool mkvparser::Match(IMkvReader* pReader, long long& pos, unsigned long id_,
+                      unsigned char*& buf, size_t& buflen) {
+  assert(pReader);
+  assert(pos >= 0);
 
-    long long total, available;
+  long long total, available;
 
-    long status = pReader->Length(&total, &available);
-    assert(status >= 0);
-    assert((total < 0) || (available <= total));
-    if (status < 0)
-        return false;
+  long status = pReader->Length(&total, &available);
+  assert(status >= 0);
+  assert((total < 0) || (available <= total));
+  if (status < 0)
+    return false;
 
-    long len;
-    const long long id = ReadUInt(pReader, pos, len);
-    assert(id >= 0);
-    assert(len > 0);
-    assert(len <= 8);
-    assert((pos + len) <= available);
+  long len;
+  const long long id = ReadUInt(pReader, pos, len);
+  assert(id >= 0);
+  assert(len > 0);
+  assert(len <= 8);
+  assert((pos + len) <= available);
 
-    if ((unsigned long)id != id_)
-        return false;
+  if ((unsigned long)id != id_)
+    return false;
 
-    pos += len;  //consume id
+  pos += len;  // consume id
 
-    const long long size_ = ReadUInt(pReader, pos, len);
-    assert(size_ >= 0);
-    assert(len > 0);
-    assert(len <= 8);
-    assert((pos + len) <= available);
+  const long long size_ = ReadUInt(pReader, pos, len);
+  assert(size_ >= 0);
+  assert(len > 0);
+  assert(len <= 8);
+  assert((pos + len) <= available);
 
-    pos += len;  //consume length of size of payload
-    assert((pos + size_) <= available);
+  pos += len;  // consume length of size of payload
+  assert((pos + size_) <= available);
 
-    const long buflen_ = static_cast<long>(size_);
+  const long buflen_ = static_cast<long>(size_);
 
-    buf = new (std::nothrow) unsigned char[buflen_];
-    assert(buf);  //TODO
+  buf = new (std::nothrow) unsigned char[buflen_];
+  assert(buf);  // TODO
 
-    status = pReader->Read(pos, buflen_, buf);
-    assert(status == 0);  //TODO
+  status = pReader->Read(pos, buflen_, buf);
+  assert(status == 0);  // TODO
 
-    buflen = buflen_;
+  buflen = buflen_;
 
-    pos += size_;  //consume size of payload
-    return true;
+  pos += size_;  // consume size of payload
+  return true;
 }
 
+namespace mkvparser {
 
-namespace mkvparser
-{
+EBMLHeader::EBMLHeader() : m_docType(NULL) { Init(); }
 
-EBMLHeader::EBMLHeader() :
-    m_docType(NULL)
-{
-    Init();
-}
+EBMLHeader::~EBMLHeader() { delete[] m_docType; }
 
-EBMLHeader::~EBMLHeader()
-{
+void EBMLHeader::Init() {
+  m_version = 1;
+  m_readVersion = 1;
+  m_maxIdLength = 4;
+  m_maxSizeLength = 8;
+
+  if (m_docType) {
     delete[] m_docType;
+    m_docType = NULL;
+  }
+
+  m_docTypeVersion = 1;
+  m_docTypeReadVersion = 1;
 }
 
-void EBMLHeader::Init()
-{
-    m_version = 1;
-    m_readVersion = 1;
-    m_maxIdLength = 4;
-    m_maxSizeLength = 8;
+long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) {
+  assert(pReader);
 
-    if (m_docType)
-    {
-        delete[] m_docType;
-        m_docType = NULL;
-    }
+  long long total, available;
 
-    m_docTypeVersion = 1;
-    m_docTypeReadVersion = 1;
-}
+  long status = pReader->Length(&total, &available);
 
-long long EBMLHeader::Parse(
-    IMkvReader* pReader,
-    long long& pos)
-{
-    assert(pReader);
+  if (status < 0)  // error
+    return status;
 
-    long long total, available;
+  pos = 0;
+  long long end = (available >= 1024) ? 1024 : available;
 
-    long status = pReader->Length(&total, &available);
+  for (;;) {
+    unsigned char b = 0;
 
-    if (status < 0)  //error
+    while (pos < end) {
+      status = pReader->Read(pos, 1, &b);
+
+      if (status < 0)  // error
         return status;
 
-    pos = 0;
-    long long end = (available >= 1024) ? 1024 : available;
+      if (b == 0x1A)
+        break;
 
-    for (;;)
-    {
-        unsigned char b = 0;
-
-        while (pos < end)
-        {
-            status = pReader->Read(pos, 1, &b);
-
-            if (status < 0)  //error
-                return status;
-
-            if (b == 0x1A)
-                break;
-
-            ++pos;
-        }
-
-        if (b != 0x1A)
-        {
-            if (pos >= 1024)
-                return E_FILE_FORMAT_INVALID;  //don't bother looking anymore
-
-            if ((total >= 0) && ((total - available) < 5))
-                return E_FILE_FORMAT_INVALID;
-
-            return available + 5;  //5 = 4-byte ID + 1st byte of size
-        }
-
-        if ((total >= 0) && ((total - pos) < 5))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((available - pos) < 5)
-            return pos + 5;  //try again later
-
-        long len;
-
-        const long long result = ReadUInt(pReader, pos, len);
-
-        if (result < 0)  //error
-            return result;
-
-        if (result == 0x0A45DFA3)  //EBML Header ID
-        {
-            pos += len;  //consume ID
-            break;
-        }
-
-        ++pos;  //throw away just the 0x1A byte, and try again
+      ++pos;
     }
 
-    //pos designates start of size field
+    if (b != 0x1A) {
+      if (pos >= 1024)
+        return E_FILE_FORMAT_INVALID;  // don't bother looking anymore
 
-    //get length of size field
+      if ((total >= 0) && ((total - available) < 5))
+        return E_FILE_FORMAT_INVALID;
 
+      return available + 5;  // 5 = 4-byte ID + 1st byte of size
+    }
+
+    if ((total >= 0) && ((total - pos) < 5))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((available - pos) < 5)
+      return pos + 5;  // try again later
+
+    long len;
+
+    const long long result = ReadUInt(pReader, pos, len);
+
+    if (result < 0)  // error
+      return result;
+
+    if (result == 0x0A45DFA3) {  // EBML Header ID
+      pos += len;  // consume ID
+      break;
+    }
+
+    ++pos;  // throw away just the 0x1A byte, and try again
+  }
+
+  // pos designates start of size field
+
+  // get length of size field
+
+  long len;
+  long long result = GetUIntLength(pReader, pos, len);
+
+  if (result < 0)  // error
+    return result;
+
+  if (result > 0)  // need more data
+    return result;
+
+  assert(len > 0);
+  assert(len <= 8);
+
+  if ((total >= 0) && ((total - pos) < len))
+    return E_FILE_FORMAT_INVALID;
+
+  if ((available - pos) < len)
+    return pos + len;  // try again later
+
+  // get the EBML header size
+
+  result = ReadUInt(pReader, pos, len);
+
+  if (result < 0)  // error
+    return result;
+
+  pos += len;  // consume size field
+
+  // pos now designates start of payload
+
+  if ((total >= 0) && ((total - pos) < result))
+    return E_FILE_FORMAT_INVALID;
+
+  if ((available - pos) < result)
+    return pos + result;
+
+  end = pos + result;
+
+  Init();
+
+  while (pos < end) {
+    long long id, size;
+
+    status = ParseElementHeader(pReader, pos, end, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (size == 0)  // weird
+      return E_FILE_FORMAT_INVALID;
+
+    if (id == 0x0286) {  // version
+      m_version = UnserializeUInt(pReader, pos, size);
+
+      if (m_version <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == 0x02F7) {  // read version
+      m_readVersion = UnserializeUInt(pReader, pos, size);
+
+      if (m_readVersion <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == 0x02F2) {  // max id length
+      m_maxIdLength = UnserializeUInt(pReader, pos, size);
+
+      if (m_maxIdLength <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == 0x02F3) {  // max size length
+      m_maxSizeLength = UnserializeUInt(pReader, pos, size);
+
+      if (m_maxSizeLength <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == 0x0282) {  // doctype
+      if (m_docType)
+        return E_FILE_FORMAT_INVALID;
+
+      status = UnserializeString(pReader, pos, size, m_docType);
+
+      if (status)  // error
+        return status;
+    } else if (id == 0x0287) {  // doctype version
+      m_docTypeVersion = UnserializeUInt(pReader, pos, size);
+
+      if (m_docTypeVersion <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == 0x0285) {  // doctype read version
+      m_docTypeReadVersion = UnserializeUInt(pReader, pos, size);
+
+      if (m_docTypeReadVersion <= 0)
+        return E_FILE_FORMAT_INVALID;
+    }
+
+    pos += size;
+  }
+
+  assert(pos == end);
+  return 0;
+}
+
+Segment::Segment(IMkvReader* pReader, long long elem_start,
+                 // long long elem_size,
+                 long long start, long long size)
+    : m_pReader(pReader),
+      m_element_start(elem_start),
+      // m_element_size(elem_size),
+      m_start(start),
+      m_size(size),
+      m_pos(start),
+      m_pUnknownSize(0),
+      m_pSeekHead(NULL),
+      m_pInfo(NULL),
+      m_pTracks(NULL),
+      m_pCues(NULL),
+      m_pChapters(NULL),
+      m_clusters(NULL),
+      m_clusterCount(0),
+      m_clusterPreloadCount(0),
+      m_clusterSize(0) {}
+
+Segment::~Segment() {
+  const long count = m_clusterCount + m_clusterPreloadCount;
+
+  Cluster** i = m_clusters;
+  Cluster** j = m_clusters + count;
+
+  while (i != j) {
+    Cluster* const p = *i++;
+    assert(p);
+
+    delete p;
+  }
+
+  delete[] m_clusters;
+
+  delete m_pTracks;
+  delete m_pInfo;
+  delete m_pCues;
+  delete m_pChapters;
+  delete m_pSeekHead;
+}
+
+long long Segment::CreateInstance(IMkvReader* pReader, long long pos,
+                                  Segment*& pSegment) {
+  assert(pReader);
+  assert(pos >= 0);
+
+  pSegment = NULL;
+
+  long long total, available;
+
+  const long status = pReader->Length(&total, &available);
+
+  if (status < 0)  // error
+    return status;
+
+  if (available < 0)
+    return -1;
+
+  if ((total >= 0) && (available > total))
+    return -1;
+
+  // I would assume that in practice this loop would execute
+  // exactly once, but we allow for other elements (e.g. Void)
+  // to immediately follow the EBML header.  This is fine for
+  // the source filter case (since the entire file is available),
+  // but in the splitter case over a network we should probably
+  // just give up early.  We could for example decide only to
+  // execute this loop a maximum of, say, 10 times.
+  // TODO:
+  // There is an implied "give up early" by only parsing up
+  // to the available limit.  We do do that, but only if the
+  // total file size is unknown.  We could decide to always
+  // use what's available as our limit (irrespective of whether
+  // we happen to know the total file length).  This would have
+  // as its sense "parse this much of the file before giving up",
+  // which a slightly different sense from "try to parse up to
+  // 10 EMBL elements before giving up".
+
+  for (;;) {
+    if ((total >= 0) && (pos >= total))
+      return E_FILE_FORMAT_INVALID;
+
+    // Read ID
     long len;
     long long result = GetUIntLength(pReader, pos, len);
 
-    if (result < 0)  //error
-        return result;
+    if (result)  // error, or too few available bytes
+      return result;
 
-    if (result > 0)  //need more data
-        return result;
+    if ((total >= 0) && ((pos + len) > total))
+      return E_FILE_FORMAT_INVALID;
 
-    assert(len > 0);
-    assert(len <= 8);
+    if ((pos + len) > available)
+      return pos + len;
 
-    if ((total >= 0) && ((total -  pos) < len))
-        return E_FILE_FORMAT_INVALID;
+    const long long idpos = pos;
+    const long long id = ReadUInt(pReader, pos, len);
 
-    if ((available - pos) < len)
-        return pos + len;  //try again later
+    if (id < 0)  // error
+      return id;
 
-    //get the EBML header size
+    pos += len;  // consume ID
 
-    result = ReadUInt(pReader, pos, len);
+    // Read Size
 
-    if (result < 0)  //error
-        return result;
+    result = GetUIntLength(pReader, pos, len);
 
-    pos += len;  //consume size field
+    if (result)  // error, or too few available bytes
+      return result;
 
-    //pos now designates start of payload
+    if ((total >= 0) && ((pos + len) > total))
+      return E_FILE_FORMAT_INVALID;
 
-    if ((total >= 0) && ((total - pos) < result))
-        return E_FILE_FORMAT_INVALID;
+    if ((pos + len) > available)
+      return pos + len;
 
-    if ((available - pos) < result)
-        return pos + result;
+    long long size = ReadUInt(pReader, pos, len);
 
-    end = pos + result;
+    if (size < 0)  // error
+      return size;
 
-    Init();
+    pos += len;  // consume length of size of element
 
-    while (pos < end)
-    {
-        long long id, size;
+    // Pos now points to start of payload
 
-        status = ParseElementHeader(
-                    pReader,
-                    pos,
-                    end,
-                    id,
-                    size);
+    // Handle "unknown size" for live streaming of webm files.
+    const long long unknown_size = (1LL << (7 * len)) - 1;
 
-        if (status < 0) //error
-            return status;
+    if (id == 0x08538067) {  // Segment ID
+      if (size == unknown_size)
+        size = -1;
 
-        if (size == 0)  //weird
-            return E_FILE_FORMAT_INVALID;
+      else if (total < 0)
+        size = -1;
 
-        if (id == 0x0286)  //version
-        {
-            m_version = UnserializeUInt(pReader, pos, size);
+      else if ((pos + size) > total)
+        size = -1;
 
-            if (m_version <= 0)
-                return E_FILE_FORMAT_INVALID;
-        }
-        else if (id == 0x02F7)  //read version
-        {
-            m_readVersion = UnserializeUInt(pReader, pos, size);
+      pSegment = new (std::nothrow) Segment(pReader, idpos,
+                                            // elem_size
+                                            pos, size);
 
-            if (m_readVersion <= 0)
-                return E_FILE_FORMAT_INVALID;
-        }
-        else if (id == 0x02F2)  //max id length
-        {
-            m_maxIdLength = UnserializeUInt(pReader, pos, size);
+      if (pSegment == 0)
+        return -1;  // generic error
 
-            if (m_maxIdLength <= 0)
-                return E_FILE_FORMAT_INVALID;
-        }
-        else if (id == 0x02F3)  //max size length
-        {
-            m_maxSizeLength = UnserializeUInt(pReader, pos, size);
-
-            if (m_maxSizeLength <= 0)
-                return E_FILE_FORMAT_INVALID;
-        }
-        else if (id == 0x0282)  //doctype
-        {
-            if (m_docType)
-                return E_FILE_FORMAT_INVALID;
-
-            status = UnserializeString(pReader, pos, size, m_docType);
-
-            if (status)  //error
-                return status;
-        }
-        else if (id == 0x0287)  //doctype version
-        {
-            m_docTypeVersion = UnserializeUInt(pReader, pos, size);
-
-            if (m_docTypeVersion <= 0)
-                return E_FILE_FORMAT_INVALID;
-        }
-        else if (id == 0x0285)  //doctype read version
-        {
-            m_docTypeReadVersion = UnserializeUInt(pReader, pos, size);
-
-            if (m_docTypeReadVersion <= 0)
-                return E_FILE_FORMAT_INVALID;
-        }
-
-        pos += size;
+      return 0;  // success
     }
 
-    assert(pos == end);
-    return 0;
+    if (size == unknown_size)
+      return E_FILE_FORMAT_INVALID;
+
+    if ((total >= 0) && ((pos + size) > total))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + size) > available)
+      return pos + size;
+
+    pos += size;  // consume payload
+  }
 }
 
+long long Segment::ParseHeaders() {
+  // Outermost (level 0) segment object has been constructed,
+  // and pos designates start of payload.  We need to find the
+  // inner (level 1) elements.
+  long long total, available;
 
-Segment::Segment(
-    IMkvReader* pReader,
-    long long elem_start,
-    //long long elem_size,
-    long long start,
-    long long size) :
-    m_pReader(pReader),
-    m_element_start(elem_start),
-    //m_element_size(elem_size),
-    m_start(start),
-    m_size(size),
-    m_pos(start),
-    m_pUnknownSize(0),
-    m_pSeekHead(NULL),
-    m_pInfo(NULL),
-    m_pTracks(NULL),
-    m_pCues(NULL),
-    m_pChapters(NULL),
-    m_clusters(NULL),
-    m_clusterCount(0),
-    m_clusterPreloadCount(0),
-    m_clusterSize(0)
-{
-}
+  const int status = m_pReader->Length(&total, &available);
 
+  if (status < 0)  // error
+    return status;
 
-Segment::~Segment()
-{
-    const long count = m_clusterCount + m_clusterPreloadCount;
+  assert((total < 0) || (available <= total));
 
-    Cluster** i = m_clusters;
-    Cluster** j = m_clusters + count;
+  const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+  assert((segment_stop < 0) || (total < 0) || (segment_stop <= total));
+  assert((segment_stop < 0) || (m_pos <= segment_stop));
 
-    while (i != j)
-    {
-        Cluster* const p = *i++;
-        assert(p);
+  for (;;) {
+    if ((total >= 0) && (m_pos >= total))
+      break;
 
-        delete p;
-    }
+    if ((segment_stop >= 0) && (m_pos >= segment_stop))
+      break;
 
-    delete[] m_clusters;
+    long long pos = m_pos;
+    const long long element_start = pos;
 
-    delete m_pTracks;
-    delete m_pInfo;
-    delete m_pCues;
-    delete m_pChapters;
-    delete m_pSeekHead;
-}
+    if ((pos + 1) > available)
+      return (pos + 1);
 
+    long len;
+    long long result = GetUIntLength(m_pReader, pos, len);
 
-long long Segment::CreateInstance(
-    IMkvReader* pReader,
-    long long pos,
-    Segment*& pSegment)
-{
-    assert(pReader);
-    assert(pos >= 0);
+    if (result < 0)  // error
+      return result;
 
-    pSegment = NULL;
+    if (result > 0)  // underflow (weird)
+      return (pos + 1);
 
-    long long total, available;
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
 
-    const long status = pReader->Length(&total, &available);
+    if ((pos + len) > available)
+      return pos + len;
 
-    if (status < 0) //error
-        return status;
+    const long long idpos = pos;
+    const long long id = ReadUInt(m_pReader, idpos, len);
 
-    if (available < 0)
+    if (id < 0)  // error
+      return id;
+
+    if (id == 0x0F43B675)  // Cluster ID
+      break;
+
+    pos += len;  // consume ID
+
+    if ((pos + 1) > available)
+      return (pos + 1);
+
+    // Read Size
+    result = GetUIntLength(m_pReader, pos, len);
+
+    if (result < 0)  // error
+      return result;
+
+    if (result > 0)  // underflow (weird)
+      return (pos + 1);
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > available)
+      return pos + len;
+
+    const long long size = ReadUInt(m_pReader, pos, len);
+
+    if (size < 0)  // error
+      return size;
+
+    pos += len;  // consume length of size of element
+
+    const long long element_size = size + pos - element_start;
+
+    // Pos now points to start of payload
+
+    if ((segment_stop >= 0) && ((pos + size) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    // We read EBML elements either in total or nothing at all.
+
+    if ((pos + size) > available)
+      return pos + size;
+
+    if (id == 0x0549A966) {  // Segment Info ID
+      if (m_pInfo)
+        return E_FILE_FORMAT_INVALID;
+
+      m_pInfo = new (std::nothrow)
+          SegmentInfo(this, pos, size, element_start, element_size);
+
+      if (m_pInfo == NULL)
         return -1;
 
-    if ((total >= 0) && (available > total))
+      const long status = m_pInfo->Parse();
+
+      if (status)
+        return status;
+    } else if (id == 0x0654AE6B) {  // Tracks ID
+      if (m_pTracks)
+        return E_FILE_FORMAT_INVALID;
+
+      m_pTracks = new (std::nothrow)
+          Tracks(this, pos, size, element_start, element_size);
+
+      if (m_pTracks == NULL)
         return -1;
 
-    //I would assume that in practice this loop would execute
-    //exactly once, but we allow for other elements (e.g. Void)
-    //to immediately follow the EBML header.  This is fine for
-    //the source filter case (since the entire file is available),
-    //but in the splitter case over a network we should probably
-    //just give up early.  We could for example decide only to
-    //execute this loop a maximum of, say, 10 times.
-    //TODO:
-    //There is an implied "give up early" by only parsing up
-    //to the available limit.  We do do that, but only if the
-    //total file size is unknown.  We could decide to always
-    //use what's available as our limit (irrespective of whether
-    //we happen to know the total file length).  This would have
-    //as its sense "parse this much of the file before giving up",
-    //which a slightly different sense from "try to parse up to
-    //10 EMBL elements before giving up".
+      const long status = m_pTracks->Parse();
 
-    for (;;)
-    {
-        if ((total >= 0) && (pos >= total))
-            return E_FILE_FORMAT_INVALID;
-
-        //Read ID
-        long len;
-        long long result = GetUIntLength(pReader, pos, len);
-
-        if (result)  //error, or too few available bytes
-            return result;
-
-        if ((total >= 0) && ((pos + len) > total))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((pos + len) > available)
-            return pos + len;
-
-        const long long idpos = pos;
-        const long long id = ReadUInt(pReader, pos, len);
-
-        if (id < 0)  //error
-            return id;
-
-        pos += len;  //consume ID
-
-        //Read Size
-
-        result = GetUIntLength(pReader, pos, len);
-
-        if (result)  //error, or too few available bytes
-            return result;
-
-        if ((total >= 0) && ((pos + len) > total))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((pos + len) > available)
-            return pos + len;
-
-        long long size = ReadUInt(pReader, pos, len);
-
-        if (size < 0)  //error
-            return size;
-
-        pos += len;  //consume length of size of element
-
-        //Pos now points to start of payload
-
-        //Handle "unknown size" for live streaming of webm files.
-        const long long unknown_size = (1LL << (7 * len)) - 1;
-
-        if (id == 0x08538067)  //Segment ID
-        {
-            if (size == unknown_size)
-                size = -1;
-
-            else if (total < 0)
-                size = -1;
-
-            else if ((pos + size) > total)
-                size = -1;
-
-            pSegment = new (std::nothrow) Segment(
-                                            pReader,
-                                            idpos,
-                                            //elem_size
-                                            pos,
-                                            size);
-
-            if (pSegment == 0)
-                return -1;  //generic error
-
-            return 0;    //success
-        }
-
-        if (size == unknown_size)
-            return E_FILE_FORMAT_INVALID;
-
-        if ((total >= 0) && ((pos + size) > total))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((pos + size) > available)
-            return pos + size;
-
-        pos += size;  //consume payload
-    }
-}
-
-
-long long Segment::ParseHeaders()
-{
-    //Outermost (level 0) segment object has been constructed,
-    //and pos designates start of payload.  We need to find the
-    //inner (level 1) elements.
-    long long total, available;
-
-    const int status = m_pReader->Length(&total, &available);
-
-    if (status < 0) //error
+      if (status)
         return status;
+    } else if (id == 0x0C53BB6B) {  // Cues ID
+      if (m_pCues == NULL) {
+        m_pCues = new (std::nothrow)
+            Cues(this, pos, size, element_start, element_size);
 
-    assert((total < 0) || (available <= total));
+        if (m_pCues == NULL)
+          return -1;
+      }
+    } else if (id == 0x014D9B74) {  // SeekHead ID
+      if (m_pSeekHead == NULL) {
+        m_pSeekHead = new (std::nothrow)
+            SeekHead(this, pos, size, element_start, element_size);
 
-    const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
-    assert((segment_stop < 0) || (total < 0) || (segment_stop <= total));
-    assert((segment_stop < 0) || (m_pos <= segment_stop));
+        if (m_pSeekHead == NULL)
+          return -1;
 
-    for (;;)
-    {
-        if ((total >= 0) && (m_pos >= total))
-            break;
+        const long status = m_pSeekHead->Parse();
 
-        if ((segment_stop >= 0) && (m_pos >= segment_stop))
-            break;
+        if (status)
+          return status;
+      }
+    } else if (id == 0x0043A770) {  // Chapters ID
+      if (m_pChapters == NULL) {
+        m_pChapters = new (std::nothrow)
+            Chapters(this, pos, size, element_start, element_size);
 
-        long long pos = m_pos;
-        const long long element_start = pos;
+        if (m_pChapters == NULL)
+          return -1;
 
-        if ((pos + 1) > available)
-            return (pos + 1);
+        const long status = m_pChapters->Parse();
 
-        long len;
-        long long result = GetUIntLength(m_pReader, pos, len);
-
-        if (result < 0)  //error
-            return result;
-
-        if (result > 0)  //underflow (weird)
-            return (pos + 1);
-
-        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((pos + len) > available)
-            return pos + len;
-
-        const long long idpos = pos;
-        const long long id = ReadUInt(m_pReader, idpos, len);
-
-        if (id < 0)  //error
-            return id;
-
-        if (id == 0x0F43B675)  //Cluster ID
-            break;
-
-        pos += len;  //consume ID
-
-        if ((pos + 1) > available)
-            return (pos + 1);
-
-        //Read Size
-        result = GetUIntLength(m_pReader, pos, len);
-
-        if (result < 0)  //error
-            return result;
-
-        if (result > 0)  //underflow (weird)
-            return (pos + 1);
-
-        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((pos + len) > available)
-            return pos + len;
-
-        const long long size = ReadUInt(m_pReader, pos, len);
-
-        if (size < 0)  //error
-            return size;
-
-        pos += len;  //consume length of size of element
-
-        const long long element_size = size + pos - element_start;
-
-        //Pos now points to start of payload
-
-        if ((segment_stop >= 0) && ((pos + size) > segment_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        //We read EBML elements either in total or nothing at all.
-
-        if ((pos + size) > available)
-            return pos + size;
-
-        if (id == 0x0549A966)  //Segment Info ID
-        {
-            if (m_pInfo)
-                return E_FILE_FORMAT_INVALID;
-
-            m_pInfo = new (std::nothrow) SegmentInfo(
-                                          this,
-                                          pos,
-                                          size,
-                                          element_start,
-                                          element_size);
-
-            if (m_pInfo == NULL)
-                return -1;
-
-            const long status = m_pInfo->Parse();
-
-            if (status)
-                return status;
-        }
-        else if (id == 0x0654AE6B)  //Tracks ID
-        {
-            if (m_pTracks)
-                return E_FILE_FORMAT_INVALID;
-
-            m_pTracks = new (std::nothrow) Tracks(this,
-                                                  pos,
-                                                  size,
-                                                  element_start,
-                                                  element_size);
-
-            if (m_pTracks == NULL)
-                return -1;
-
-            const long status = m_pTracks->Parse();
-
-            if (status)
-                return status;
-        }
-        else if (id == 0x0C53BB6B)  //Cues ID
-        {
-            if (m_pCues == NULL)
-            {
-                m_pCues = new (std::nothrow) Cues(
-                                                this,
-                                                pos,
-                                                size,
-                                                element_start,
-                                                element_size);
-
-                if (m_pCues == NULL)
-                    return -1;
-            }
-        }
-        else if (id == 0x014D9B74)  //SeekHead ID
-        {
-            if (m_pSeekHead == NULL)
-            {
-                m_pSeekHead = new (std::nothrow) SeekHead(
-                                                    this,
-                                                    pos,
-                                                    size,
-                                                    element_start,
-                                                    element_size);
-
-                if (m_pSeekHead == NULL)
-                    return -1;
-
-                const long status = m_pSeekHead->Parse();
-
-                if (status)
-                    return status;
-            }
-        }
-        else if (id == 0x0043A770)  //Chapters ID
-        {
-            if (m_pChapters == NULL)
-            {
-                m_pChapters = new (std::nothrow) Chapters(
-                                this,
-                                pos,
-                                size,
-                                element_start,
-                                element_size);
-
-                if (m_pChapters == NULL)
-                  return -1;
-
-                const long status = m_pChapters->Parse();
-
-                if (status)
-                  return status;
-            }
-        }
-
-        m_pos = pos + size;  //consume payload
+        if (status)
+          return status;
+      }
     }
 
-    assert((segment_stop < 0) || (m_pos <= segment_stop));
+    m_pos = pos + size;  // consume payload
+  }
 
-    if (m_pInfo == NULL)  //TODO: liberalize this behavior
-        return E_FILE_FORMAT_INVALID;
+  assert((segment_stop < 0) || (m_pos <= segment_stop));
 
-    if (m_pTracks == NULL)
-        return E_FILE_FORMAT_INVALID;
+  if (m_pInfo == NULL)  // TODO: liberalize this behavior
+    return E_FILE_FORMAT_INVALID;
 
-    return 0;  //success
+  if (m_pTracks == NULL)
+    return E_FILE_FORMAT_INVALID;
+
+  return 0;  // success
 }
 
+long Segment::LoadCluster(long long& pos, long& len) {
+  for (;;) {
+    const long result = DoLoadCluster(pos, len);
 
-long Segment::LoadCluster(
-    long long& pos,
-    long& len)
-{
-    for (;;)
-    {
-        const long result = DoLoadCluster(pos, len);
+    if (result <= 1)
+      return result;
+  }
+}
 
-        if (result <= 1)
-            return result;
+long Segment::DoLoadCluster(long long& pos, long& len) {
+  if (m_pos < 0)
+    return DoLoadClusterUnknownSize(pos, len);
+
+  long long total, avail;
+
+  long status = m_pReader->Length(&total, &avail);
+
+  if (status < 0)  // error
+    return status;
+
+  assert((total < 0) || (avail <= total));
+
+  const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+  long long cluster_off = -1;  // offset relative to start of segment
+  long long cluster_size = -1;  // size of cluster payload
+
+  for (;;) {
+    if ((total >= 0) && (m_pos >= total))
+      return 1;  // no more clusters
+
+    if ((segment_stop >= 0) && (m_pos >= segment_stop))
+      return 1;  // no more clusters
+
+    pos = m_pos;
+
+    // Read ID
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
     }
-}
 
+    long long result = GetUIntLength(m_pReader, pos, len);
 
-long Segment::DoLoadCluster(
-    long long& pos,
-    long& len)
-{
-    if (m_pos < 0)
-        return DoLoadClusterUnknownSize(pos, len);
+    if (result < 0)  // error
+      return static_cast<long>(result);
 
-    long long total, avail;
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
 
-    long status = m_pReader->Length(&total, &avail);
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
 
-    if (status < 0)  //error
-        return status;
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
 
-    assert((total < 0) || (avail <= total));
+    const long long idpos = pos;
+    const long long id = ReadUInt(m_pReader, idpos, len);
 
-    const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+    if (id < 0)  // error (or underflow)
+      return static_cast<long>(id);
 
-    long long cluster_off = -1;   //offset relative to start of segment
-    long long cluster_size = -1;  //size of cluster payload
+    pos += len;  // consume ID
 
-    for (;;)
-    {
-        if ((total >= 0) && (m_pos >= total))
-            return 1;  //no more clusters
+    // Read Size
 
-        if ((segment_stop >= 0) && (m_pos >= segment_stop))
-            return 1;  //no more clusters
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
 
-        pos = m_pos;
+    result = GetUIntLength(m_pReader, pos, len);
 
-        //Read ID
+    if (result < 0)  // error
+      return static_cast<long>(result);
 
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
 
-        long long result = GetUIntLength(m_pReader, pos, len);
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
 
-        if (result < 0)  //error
-            return static_cast<long>(result);
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
 
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
+    const long long size = ReadUInt(m_pReader, pos, len);
 
-        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-            return E_FILE_FORMAT_INVALID;
+    if (size < 0)  // error
+      return static_cast<long>(size);
 
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
+    pos += len;  // consume length of size of element
 
-        const long long idpos = pos;
-        const long long id = ReadUInt(m_pReader, idpos, len);
+    // pos now points to start of payload
 
-        if (id < 0)  //error (or underflow)
-            return static_cast<long>(id);
+    if (size == 0) {  // weird
+      m_pos = pos;
+      continue;
+    }
 
-        pos += len;  //consume ID
+    const long long unknown_size = (1LL << (7 * len)) - 1;
 
-        //Read Size
-
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        result = GetUIntLength(m_pReader, pos, len);
-
-        if (result < 0)  //error
-            return static_cast<long>(result);
-
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
-
-        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
-
-        const long long size = ReadUInt(m_pReader, pos, len);
-
-        if (size < 0)  //error
-            return static_cast<long>(size);
-
-        pos += len;  //consume length of size of element
-
-        //pos now points to start of payload
-
-        if (size == 0)  //weird
-        {
-            m_pos = pos;
-            continue;
-        }
-
-        const long long unknown_size = (1LL << (7 * len)) - 1;
-
-#if 0  //we must handle this to support live webm
+#if 0  // we must handle this to support live webm
         if (size == unknown_size)
             return E_FILE_FORMAT_INVALID;  //TODO: allow this
 #endif
 
-        if ((segment_stop >= 0) &&
-            (size != unknown_size) &&
-            ((pos + size) > segment_stop))
-        {
-            return E_FILE_FORMAT_INVALID;
-        }
+    if ((segment_stop >= 0) && (size != unknown_size) &&
+        ((pos + size) > segment_stop)) {
+      return E_FILE_FORMAT_INVALID;
+    }
 
-#if 0  //commented-out, to support incremental cluster parsing
+#if 0  // commented-out, to support incremental cluster parsing
         len = static_cast<long>(size);
 
         if ((pos + size) > avail)
             return E_BUFFER_NOT_FULL;
 #endif
 
-        if (id == 0x0C53BB6B)  //Cues ID
-        {
-            if (size == unknown_size)
-                return E_FILE_FORMAT_INVALID;  //TODO: liberalize
+    if (id == 0x0C53BB6B) {  // Cues ID
+      if (size == unknown_size)
+        return E_FILE_FORMAT_INVALID;  // TODO: liberalize
 
-            if (m_pCues == NULL)
-            {
-                const long long element_size = (pos - idpos) + size;
+      if (m_pCues == NULL) {
+        const long long element_size = (pos - idpos) + size;
 
-                m_pCues = new Cues(this,
-                                   pos,
-                                   size,
-                                   idpos,
-                                   element_size);
-                assert(m_pCues);  //TODO
-            }
+        m_pCues = new Cues(this, pos, size, idpos, element_size);
+        assert(m_pCues);  // TODO
+      }
 
-            m_pos = pos + size;  //consume payload
-            continue;
-        }
-
-        if (id != 0x0F43B675)  //Cluster ID
-        {
-            if (size == unknown_size)
-                return E_FILE_FORMAT_INVALID;  //TODO: liberalize
-
-            m_pos = pos + size;  //consume payload
-            continue;
-        }
-
-        //We have a cluster.
-
-        cluster_off = idpos - m_start;  //relative pos
-
-        if (size != unknown_size)
-            cluster_size = size;
-
-        break;
+      m_pos = pos + size;  // consume payload
+      continue;
     }
 
-    assert(cluster_off >= 0);  //have cluster
+    if (id != 0x0F43B675) {  // Cluster ID
+      if (size == unknown_size)
+        return E_FILE_FORMAT_INVALID;  // TODO: liberalize
 
-    long long pos_;
-    long len_;
-
-    status = Cluster::HasBlockEntries(this, cluster_off, pos_, len_);
-
-    if (status < 0) //error, or underflow
-    {
-        pos = pos_;
-        len = len_;
-
-        return status;
+      m_pos = pos + size;  // consume payload
+      continue;
     }
 
-    //status == 0 means "no block entries found"
-    //status > 0 means "found at least one block entry"
+    // We have a cluster.
 
-    //TODO:
-    //The issue here is that the segment increments its own
-    //pos ptr past the most recent cluster parsed, and then
-    //starts from there to parse the next cluster.  If we
-    //don't know the size of the current cluster, then we
-    //must either parse its payload (as we do below), looking
-    //for the cluster (or cues) ID to terminate the parse.
-    //This isn't really what we want: rather, we really need
-    //a way to create the curr cluster object immediately.
-    //The pity is that cluster::parse can determine its own
-    //boundary, and we largely duplicate that same logic here.
-    //
-    //Maybe we need to get rid of our look-ahead preloading
-    //in source::parse???
-    //
-    //As we're parsing the blocks in the curr cluster
-    //(in cluster::parse), we should have some way to signal
-    //to the segment that we have determined the boundary,
-    //so it can adjust its own segment::m_pos member.
-    //
-    //The problem is that we're asserting in asyncreadinit,
-    //because we adjust the pos down to the curr seek pos,
-    //and the resulting adjusted len is > 2GB.  I'm suspicious
-    //that this is even correct, but even if it is, we can't
-    //be loading that much data in the cache anyway.
+    cluster_off = idpos - m_start;  // relative pos
 
-    const long idx = m_clusterCount;
+    if (size != unknown_size)
+      cluster_size = size;
 
-    if (m_clusterPreloadCount > 0)
-    {
-        assert(idx < m_clusterSize);
+    break;
+  }
 
-        Cluster* const pCluster = m_clusters[idx];
-        assert(pCluster);
-        assert(pCluster->m_index < 0);
+  assert(cluster_off >= 0);  // have cluster
 
-        const long long off = pCluster->GetPosition();
-        assert(off >= 0);
+  long long pos_;
+  long len_;
 
-        if (off == cluster_off)  //preloaded already
-        {
-            if (status == 0)  //no entries found
-                return E_FILE_FORMAT_INVALID;
+  status = Cluster::HasBlockEntries(this, cluster_off, pos_, len_);
 
-            if (cluster_size >= 0)
-                pos += cluster_size;
-            else
-            {
-                const long long element_size = pCluster->GetElementSize();
+  if (status < 0) {  // error, or underflow
+    pos = pos_;
+    len = len_;
 
-                if (element_size <= 0)
-                    return E_FILE_FORMAT_INVALID;  //TODO: handle this case
+    return status;
+  }
 
-                pos = pCluster->m_element_start + element_size;
-            }
+  // status == 0 means "no block entries found"
+  // status > 0 means "found at least one block entry"
 
-            pCluster->m_index = idx;  //move from preloaded to loaded
-            ++m_clusterCount;
-            --m_clusterPreloadCount;
+  // TODO:
+  // The issue here is that the segment increments its own
+  // pos ptr past the most recent cluster parsed, and then
+  // starts from there to parse the next cluster.  If we
+  // don't know the size of the current cluster, then we
+  // must either parse its payload (as we do below), looking
+  // for the cluster (or cues) ID to terminate the parse.
+  // This isn't really what we want: rather, we really need
+  // a way to create the curr cluster object immediately.
+  // The pity is that cluster::parse can determine its own
+  // boundary, and we largely duplicate that same logic here.
+  //
+  // Maybe we need to get rid of our look-ahead preloading
+  // in source::parse???
+  //
+  // As we're parsing the blocks in the curr cluster
+  //(in cluster::parse), we should have some way to signal
+  // to the segment that we have determined the boundary,
+  // so it can adjust its own segment::m_pos member.
+  //
+  // The problem is that we're asserting in asyncreadinit,
+  // because we adjust the pos down to the curr seek pos,
+  // and the resulting adjusted len is > 2GB.  I'm suspicious
+  // that this is even correct, but even if it is, we can't
+  // be loading that much data in the cache anyway.
 
-            m_pos = pos;  //consume payload
-            assert((segment_stop < 0) || (m_pos <= segment_stop));
+  const long idx = m_clusterCount;
 
-            return 0;  //success
-        }
-    }
-
-    if (status == 0)  //no entries found
-    {
-        if (cluster_size < 0)
-            return E_FILE_FORMAT_INVALID;  //TODO: handle this
-
-        pos += cluster_size;
-
-        if ((total >= 0) && (pos >= total))
-        {
-            m_pos = total;
-            return 1;  //no more clusters
-        }
-
-        if ((segment_stop >= 0) && (pos >= segment_stop))
-        {
-            m_pos = segment_stop;
-            return 1;  //no more clusters
-        }
-
-        m_pos = pos;
-        return 2;  //try again
-    }
-
-    //status > 0 means we have an entry
-
-    Cluster* const pCluster = Cluster::Create(this,
-                                              idx,
-                                              cluster_off);
-                                              //element_size);
-    assert(pCluster);
-
-    AppendCluster(pCluster);
-    assert(m_clusters);
+  if (m_clusterPreloadCount > 0) {
     assert(idx < m_clusterSize);
-    assert(m_clusters[idx] == pCluster);
 
-    if (cluster_size >= 0)
-    {
+    Cluster* const pCluster = m_clusters[idx];
+    assert(pCluster);
+    assert(pCluster->m_index < 0);
+
+    const long long off = pCluster->GetPosition();
+    assert(off >= 0);
+
+    if (off == cluster_off) {  // preloaded already
+      if (status == 0)  // no entries found
+        return E_FILE_FORMAT_INVALID;
+
+      if (cluster_size >= 0)
         pos += cluster_size;
+      else {
+        const long long element_size = pCluster->GetElementSize();
 
-        m_pos = pos;
-        assert((segment_stop < 0) || (m_pos <= segment_stop));
+        if (element_size <= 0)
+          return E_FILE_FORMAT_INVALID;  // TODO: handle this case
 
-        return 0;
+        pos = pCluster->m_element_start + element_size;
+      }
+
+      pCluster->m_index = idx;  // move from preloaded to loaded
+      ++m_clusterCount;
+      --m_clusterPreloadCount;
+
+      m_pos = pos;  // consume payload
+      assert((segment_stop < 0) || (m_pos <= segment_stop));
+
+      return 0;  // success
+    }
+  }
+
+  if (status == 0) {  // no entries found
+    if (cluster_size < 0)
+      return E_FILE_FORMAT_INVALID;  // TODO: handle this
+
+    pos += cluster_size;
+
+    if ((total >= 0) && (pos >= total)) {
+      m_pos = total;
+      return 1;  // no more clusters
     }
 
-    m_pUnknownSize = pCluster;
-    m_pos = -pos;
+    if ((segment_stop >= 0) && (pos >= segment_stop)) {
+      m_pos = segment_stop;
+      return 1;  // no more clusters
+    }
 
-    return 0;  //partial success, since we have a new cluster
+    m_pos = pos;
+    return 2;  // try again
+  }
 
-    //status == 0 means "no block entries found"
+  // status > 0 means we have an entry
 
-    //pos designates start of payload
-    //m_pos has NOT been adjusted yet (in case we need to come back here)
+  Cluster* const pCluster = Cluster::Create(this, idx, cluster_off);
+  // element_size);
+  assert(pCluster);
+
+  AppendCluster(pCluster);
+  assert(m_clusters);
+  assert(idx < m_clusterSize);
+  assert(m_clusters[idx] == pCluster);
+
+  if (cluster_size >= 0) {
+    pos += cluster_size;
+
+    m_pos = pos;
+    assert((segment_stop < 0) || (m_pos <= segment_stop));
+
+    return 0;
+  }
+
+  m_pUnknownSize = pCluster;
+  m_pos = -pos;
+
+  return 0;  // partial success, since we have a new cluster
+
+// status == 0 means "no block entries found"
+
+// pos designates start of payload
+// m_pos has NOT been adjusted yet (in case we need to come back here)
 
 #if 0
 
-    if (cluster_size < 0)  //unknown size
-    {
+    if (cluster_size < 0) {  //unknown size
         const long long payload_pos = pos;  //absolute pos of cluster payload
 
-        for (;;)  //determine cluster size
-        {
+        for (;;) {  //determine cluster size
             if ((total >= 0) && (pos >= total))
                 break;
 
@@ -1523,16 +1344,11 @@
     return 2;     //try to find another cluster
 
 #endif
-
 }
 
-
-long Segment::DoLoadClusterUnknownSize(
-    long long& pos,
-    long& len)
-{
-    assert(m_pos < 0);
-    assert(m_pUnknownSize);
+long Segment::DoLoadClusterUnknownSize(long long& pos, long& len) {
+  assert(m_pos < 0);
+  assert(m_pUnknownSize);
 
 #if 0
     assert(m_pUnknownSize->GetElementSize() < 0);  //TODO: verify this
@@ -1559,8 +1375,7 @@
 
     long long element_size = -1;
 
-    for (;;)  //determine cluster size
-    {
+    for (;;) {  //determine cluster size
         if ((total >= 0) && (pos >= total))
         {
             element_size = total - element_start;
@@ -1609,8 +1424,7 @@
         //that we have exhausted the sub-element's inside the cluster
         //whose ID we parsed earlier.
 
-        if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) //Cluster ID or Cues ID
-        {
+        if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) {  //Cluster ID or Cues ID
             element_size = pos - element_start;
             assert(element_size > 0);
 
@@ -1687,348 +1501,299 @@
 
     return 2;  //continue parsing
 #else
-    const long status = m_pUnknownSize->Parse(pos, len);
+  const long status = m_pUnknownSize->Parse(pos, len);
 
-    if (status < 0)  //error or underflow
-        return status;
+  if (status < 0)  // error or underflow
+    return status;
 
-    if (status == 0)  //parsed a block
-        return 2;     //continue parsing
+  if (status == 0)  // parsed a block
+    return 2;  // continue parsing
 
-    assert(status > 0);   //nothing left to parse of this cluster
+  assert(status > 0);  // nothing left to parse of this cluster
 
-    const long long start = m_pUnknownSize->m_element_start;
+  const long long start = m_pUnknownSize->m_element_start;
 
-    const long long size = m_pUnknownSize->GetElementSize();
-    assert(size >= 0);
+  const long long size = m_pUnknownSize->GetElementSize();
+  assert(size >= 0);
 
-    pos = start + size;
-    m_pos = pos;
+  pos = start + size;
+  m_pos = pos;
 
-    m_pUnknownSize = 0;
+  m_pUnknownSize = 0;
 
-    return 2;  //continue parsing
+  return 2;  // continue parsing
 #endif
 }
 
+void Segment::AppendCluster(Cluster* pCluster) {
+  assert(pCluster);
+  assert(pCluster->m_index >= 0);
 
-void Segment::AppendCluster(Cluster* pCluster)
-{
-    assert(pCluster);
-    assert(pCluster->m_index >= 0);
+  const long count = m_clusterCount + m_clusterPreloadCount;
 
-    const long count = m_clusterCount + m_clusterPreloadCount;
+  long& size = m_clusterSize;
+  assert(size >= count);
 
-    long& size = m_clusterSize;
-    assert(size >= count);
+  const long idx = pCluster->m_index;
+  assert(idx == m_clusterCount);
 
-    const long idx = pCluster->m_index;
-    assert(idx == m_clusterCount);
+  if (count >= size) {
+    const long n = (size <= 0) ? 2048 : 2 * size;
 
-    if (count >= size)
-    {
-        const long n = (size <= 0) ? 2048 : 2*size;
+    Cluster** const qq = new Cluster* [n];
+    Cluster** q = qq;
 
-        Cluster** const qq = new Cluster*[n];
-        Cluster** q = qq;
+    Cluster** p = m_clusters;
+    Cluster** const pp = p + count;
 
-        Cluster** p = m_clusters;
-        Cluster** const pp = p + count;
+    while (p != pp)
+      *q++ = *p++;
 
-        while (p != pp)
-            *q++ = *p++;
+    delete[] m_clusters;
 
-        delete[] m_clusters;
+    m_clusters = qq;
+    size = n;
+  }
 
-        m_clusters = qq;
-        size = n;
-    }
-
-    if (m_clusterPreloadCount > 0)
-    {
-        assert(m_clusters);
-
-        Cluster** const p = m_clusters + m_clusterCount;
-        assert(*p);
-        assert((*p)->m_index < 0);
-
-        Cluster** q = p + m_clusterPreloadCount;
-        assert(q < (m_clusters + size));
-
-        for (;;)
-        {
-            Cluster** const qq = q - 1;
-            assert((*qq)->m_index < 0);
-
-            *q = *qq;
-            q = qq;
-
-            if (q == p)
-                break;
-        }
-    }
-
-    m_clusters[idx] = pCluster;
-    ++m_clusterCount;
-}
-
-
-void Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx)
-{
-    assert(pCluster);
-    assert(pCluster->m_index < 0);
-    assert(idx >= m_clusterCount);
-
-    const long count = m_clusterCount + m_clusterPreloadCount;
-
-    long& size = m_clusterSize;
-    assert(size >= count);
-
-    if (count >= size)
-    {
-        const long n = (size <= 0) ? 2048 : 2*size;
-
-        Cluster** const qq = new Cluster*[n];
-        Cluster** q = qq;
-
-        Cluster** p = m_clusters;
-        Cluster** const pp = p + count;
-
-        while (p != pp)
-            *q++ = *p++;
-
-        delete[] m_clusters;
-
-        m_clusters = qq;
-        size = n;
-    }
-
+  if (m_clusterPreloadCount > 0) {
     assert(m_clusters);
 
-    Cluster** const p = m_clusters + idx;
+    Cluster** const p = m_clusters + m_clusterCount;
+    assert(*p);
+    assert((*p)->m_index < 0);
 
-    Cluster** q = m_clusters + count;
-    assert(q >= p);
+    Cluster** q = p + m_clusterPreloadCount;
     assert(q < (m_clusters + size));
 
-    while (q > p)
-    {
-        Cluster** const qq = q - 1;
-        assert((*qq)->m_index < 0);
+    for (;;) {
+      Cluster** const qq = q - 1;
+      assert((*qq)->m_index < 0);
 
-        *q = *qq;
-        q = qq;
+      *q = *qq;
+      q = qq;
+
+      if (q == p)
+        break;
     }
+  }
 
-    m_clusters[idx] = pCluster;
-    ++m_clusterPreloadCount;
+  m_clusters[idx] = pCluster;
+  ++m_clusterCount;
 }
 
+void Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) {
+  assert(pCluster);
+  assert(pCluster->m_index < 0);
+  assert(idx >= m_clusterCount);
 
-long Segment::Load()
-{
-    assert(m_clusters == NULL);
-    assert(m_clusterSize == 0);
-    assert(m_clusterCount == 0);
-    //assert(m_size >= 0);
+  const long count = m_clusterCount + m_clusterPreloadCount;
 
-    //Outermost (level 0) segment object has been constructed,
-    //and pos designates start of payload.  We need to find the
-    //inner (level 1) elements.
+  long& size = m_clusterSize;
+  assert(size >= count);
 
-    const long long header_status = ParseHeaders();
+  if (count >= size) {
+    const long n = (size <= 0) ? 2048 : 2 * size;
 
-    if (header_status < 0)  //error
-        return static_cast<long>(header_status);
+    Cluster** const qq = new Cluster* [n];
+    Cluster** q = qq;
 
-    if (header_status > 0)  //underflow
-        return E_BUFFER_NOT_FULL;
+    Cluster** p = m_clusters;
+    Cluster** const pp = p + count;
 
-    assert(m_pInfo);
-    assert(m_pTracks);
+    while (p != pp)
+      *q++ = *p++;
 
-    for (;;)
-    {
-        const int status = LoadCluster();
+    delete[] m_clusters;
 
-        if (status < 0)  //error
-            return status;
+    m_clusters = qq;
+    size = n;
+  }
 
-        if (status >= 1)  //no more clusters
-            return 0;
-    }
+  assert(m_clusters);
+
+  Cluster** const p = m_clusters + idx;
+
+  Cluster** q = m_clusters + count;
+  assert(q >= p);
+  assert(q < (m_clusters + size));
+
+  while (q > p) {
+    Cluster** const qq = q - 1;
+    assert((*qq)->m_index < 0);
+
+    *q = *qq;
+    q = qq;
+  }
+
+  m_clusters[idx] = pCluster;
+  ++m_clusterPreloadCount;
 }
 
+long Segment::Load() {
+  assert(m_clusters == NULL);
+  assert(m_clusterSize == 0);
+  assert(m_clusterCount == 0);
+  // assert(m_size >= 0);
 
-SeekHead::SeekHead(
-    Segment* pSegment,
-    long long start,
-    long long size_,
-    long long element_start,
-    long long element_size) :
-    m_pSegment(pSegment),
-    m_start(start),
-    m_size(size_),
-    m_element_start(element_start),
-    m_element_size(element_size),
-    m_entries(0),
-    m_entry_count(0),
-    m_void_elements(0),
-    m_void_element_count(0)
-{
+  // Outermost (level 0) segment object has been constructed,
+  // and pos designates start of payload.  We need to find the
+  // inner (level 1) elements.
+
+  const long long header_status = ParseHeaders();
+
+  if (header_status < 0)  // error
+    return static_cast<long>(header_status);
+
+  if (header_status > 0)  // underflow
+    return E_BUFFER_NOT_FULL;
+
+  assert(m_pInfo);
+  assert(m_pTracks);
+
+  for (;;) {
+    const int status = LoadCluster();
+
+    if (status < 0)  // error
+      return status;
+
+    if (status >= 1)  // no more clusters
+      return 0;
+  }
 }
 
+SeekHead::SeekHead(Segment* pSegment, long long start, long long size_,
+                   long long element_start, long long element_size)
+    : m_pSegment(pSegment),
+      m_start(start),
+      m_size(size_),
+      m_element_start(element_start),
+      m_element_size(element_size),
+      m_entries(0),
+      m_entry_count(0),
+      m_void_elements(0),
+      m_void_element_count(0) {}
 
-SeekHead::~SeekHead()
-{
-    delete[] m_entries;
-    delete[] m_void_elements;
+SeekHead::~SeekHead() {
+  delete[] m_entries;
+  delete[] m_void_elements;
 }
 
+long SeekHead::Parse() {
+  IMkvReader* const pReader = m_pSegment->m_pReader;
 
-long SeekHead::Parse()
-{
-    IMkvReader* const pReader = m_pSegment->m_pReader;
+  long long pos = m_start;
+  const long long stop = m_start + m_size;
 
-    long long pos = m_start;
-    const long long stop = m_start + m_size;
+  // first count the seek head entries
 
-    //first count the seek head entries
+  int entry_count = 0;
+  int void_element_count = 0;
 
-    int entry_count = 0;
-    int void_element_count = 0;
+  while (pos < stop) {
+    long long id, size;
 
-    while (pos < stop)
-    {
-        long long id, size;
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
 
-        const long status = ParseElementHeader(
-                                pReader,
-                                pos,
-                                stop,
-                                id,
-                                size);
+    if (status < 0)  // error
+      return status;
 
-        if (status < 0)  //error
-            return status;
+    if (id == 0x0DBB)  // SeekEntry ID
+      ++entry_count;
+    else if (id == 0x6C)  // Void ID
+      ++void_element_count;
 
-        if (id == 0x0DBB)  //SeekEntry ID
-            ++entry_count;
-        else if (id == 0x6C)  //Void ID
-            ++void_element_count;
+    pos += size;  // consume payload
+    assert(pos <= stop);
+  }
 
-        pos += size;  //consume payload
-        assert(pos <= stop);
+  assert(pos == stop);
+
+  m_entries = new (std::nothrow) Entry[entry_count];
+
+  if (m_entries == NULL)
+    return -1;
+
+  m_void_elements = new (std::nothrow) VoidElement[void_element_count];
+
+  if (m_void_elements == NULL)
+    return -1;
+
+  // now parse the entries and void elements
+
+  Entry* pEntry = m_entries;
+  VoidElement* pVoidElement = m_void_elements;
+
+  pos = m_start;
+
+  while (pos < stop) {
+    const long long idpos = pos;
+
+    long long id, size;
+
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (id == 0x0DBB) {  // SeekEntry ID
+      if (ParseEntry(pReader, pos, size, pEntry)) {
+        Entry& e = *pEntry++;
+
+        e.element_start = idpos;
+        e.element_size = (pos + size) - idpos;
+      }
+    } else if (id == 0x6C) {  // Void ID
+      VoidElement& e = *pVoidElement++;
+
+      e.element_start = idpos;
+      e.element_size = (pos + size) - idpos;
     }
 
-    assert(pos == stop);
+    pos += size;  // consume payload
+    assert(pos <= stop);
+  }
 
-    m_entries = new (std::nothrow) Entry[entry_count];
+  assert(pos == stop);
 
-    if (m_entries == NULL)
-        return -1;
+  ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries);
+  assert(count_ >= 0);
+  assert(count_ <= entry_count);
 
-    m_void_elements = new (std::nothrow) VoidElement[void_element_count];
+  m_entry_count = static_cast<int>(count_);
 
-    if (m_void_elements == NULL)
-        return -1;
+  count_ = ptrdiff_t(pVoidElement - m_void_elements);
+  assert(count_ >= 0);
+  assert(count_ <= void_element_count);
 
-    //now parse the entries and void elements
+  m_void_element_count = static_cast<int>(count_);
 
-    Entry* pEntry = m_entries;
-    VoidElement* pVoidElement = m_void_elements;
+  return 0;
+}
 
-    pos = m_start;
+int SeekHead::GetCount() const { return m_entry_count; }
 
-    while (pos < stop)
-    {
-        const long long idpos = pos;
-
-        long long id, size;
-
-        const long status = ParseElementHeader(
-                                pReader,
-                                pos,
-                                stop,
-                                id,
-                                size);
-
-        if (status < 0)  //error
-            return status;
-
-        if (id == 0x0DBB)  //SeekEntry ID
-        {
-            if (ParseEntry(pReader, pos, size, pEntry))
-            {
-                Entry& e = *pEntry++;
-
-                e.element_start = idpos;
-                e.element_size = (pos + size) - idpos;
-            }
-        }
-        else if (id == 0x6C)  //Void ID
-        {
-            VoidElement& e = *pVoidElement++;
-
-            e.element_start = idpos;
-            e.element_size = (pos + size) - idpos;
-        }
-
-        pos += size;  //consume payload
-        assert(pos <= stop);
-    }
-
-    assert(pos == stop);
-
-    ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries);
-    assert(count_ >= 0);
-    assert(count_ <= entry_count);
-
-    m_entry_count = static_cast<int>(count_);
-
-    count_ = ptrdiff_t(pVoidElement - m_void_elements);
-    assert(count_ >= 0);
-    assert(count_ <= void_element_count);
-
-    m_void_element_count = static_cast<int>(count_);
-
+const SeekHead::Entry* SeekHead::GetEntry(int idx) const {
+  if (idx < 0)
     return 0;
+
+  if (idx >= m_entry_count)
+    return 0;
+
+  return m_entries + idx;
 }
 
+int SeekHead::GetVoidElementCount() const { return m_void_element_count; }
 
-int SeekHead::GetCount() const
-{
-    return m_entry_count;
+const SeekHead::VoidElement* SeekHead::GetVoidElement(int idx) const {
+  if (idx < 0)
+    return 0;
+
+  if (idx >= m_void_element_count)
+    return 0;
+
+  return m_void_elements + idx;
 }
 
-const SeekHead::Entry* SeekHead::GetEntry(int idx) const
-{
-    if (idx < 0)
-        return 0;
-
-    if (idx >= m_entry_count)
-        return 0;
-
-    return m_entries + idx;
-}
-
-int SeekHead::GetVoidElementCount() const
-{
-    return m_void_element_count;
-}
-
-const SeekHead::VoidElement* SeekHead::GetVoidElement(int idx) const
-{
-    if (idx < 0)
-        return 0;
-
-    if (idx >= m_void_element_count)
-        return 0;
-
-    return m_void_elements + idx;
-}
-
-
 #if 0
 void Segment::ParseCues(long long off)
 {
@@ -2078,133 +1843,122 @@
     //os << "Segment::ParseCues (end)" << endl;
 }
 #else
-long Segment::ParseCues(
-    long long off,
-    long long& pos,
-    long& len)
-{
-    if (m_pCues)
-        return 0;  //success
+long Segment::ParseCues(long long off, long long& pos, long& len) {
+  if (m_pCues)
+    return 0;  // success
 
-    if (off < 0)
-        return -1;
+  if (off < 0)
+    return -1;
 
-    long long total, avail;
+  long long total, avail;
 
-    const int status = m_pReader->Length(&total, &avail);
+  const int status = m_pReader->Length(&total, &avail);
 
-    if (status < 0)  //error
-        return status;
+  if (status < 0)  // error
+    return status;
 
-    assert((total < 0) || (avail <= total));
+  assert((total < 0) || (avail <= total));
 
-    pos = m_start + off;
+  pos = m_start + off;
 
-    if ((total < 0) || (pos >= total))
-        return 1;  //don't bother parsing cues
+  if ((total < 0) || (pos >= total))
+    return 1;  // don't bother parsing cues
 
-    const long long element_start = pos;
-    const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+  const long long element_start = pos;
+  const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
 
-    if ((pos + 1) > avail)
-    {
-        len = 1;
-        return E_BUFFER_NOT_FULL;
-    }
+  if ((pos + 1) > avail) {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
 
-    long long result = GetUIntLength(m_pReader, pos, len);
+  long long result = GetUIntLength(m_pReader, pos, len);
 
-    if (result < 0)  //error
-        return static_cast<long>(result);
+  if (result < 0)  // error
+    return static_cast<long>(result);
 
-    if (result > 0) //underflow (weird)
-    {
-        len = 1;
-        return E_BUFFER_NOT_FULL;
-    }
+  if (result > 0)  // underflow (weird)
+  {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
 
-    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-        return E_FILE_FORMAT_INVALID;
+  if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+    return E_FILE_FORMAT_INVALID;
 
-    if ((pos + len) > avail)
-        return E_BUFFER_NOT_FULL;
+  if ((pos + len) > avail)
+    return E_BUFFER_NOT_FULL;
 
-    const long long idpos = pos;
+  const long long idpos = pos;
 
-    const long long id = ReadUInt(m_pReader, idpos, len);
+  const long long id = ReadUInt(m_pReader, idpos, len);
 
-    if (id != 0x0C53BB6B)  //Cues ID
-        return E_FILE_FORMAT_INVALID;
+  if (id != 0x0C53BB6B)  // Cues ID
+    return E_FILE_FORMAT_INVALID;
 
-    pos += len;  //consume ID
-    assert((segment_stop < 0) || (pos <= segment_stop));
+  pos += len;  // consume ID
+  assert((segment_stop < 0) || (pos <= segment_stop));
 
-    //Read Size
+  // Read Size
 
-    if ((pos + 1) > avail)
-    {
-        len = 1;
-        return E_BUFFER_NOT_FULL;
-    }
+  if ((pos + 1) > avail) {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
 
-    result = GetUIntLength(m_pReader, pos, len);
+  result = GetUIntLength(m_pReader, pos, len);
 
-    if (result < 0)  //error
-        return static_cast<long>(result);
+  if (result < 0)  // error
+    return static_cast<long>(result);
 
-    if (result > 0) //underflow (weird)
-    {
-        len = 1;
-        return E_BUFFER_NOT_FULL;
-    }
+  if (result > 0)  // underflow (weird)
+  {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
 
-    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-        return E_FILE_FORMAT_INVALID;
+  if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+    return E_FILE_FORMAT_INVALID;
 
-    if ((pos + len) > avail)
-        return E_BUFFER_NOT_FULL;
+  if ((pos + len) > avail)
+    return E_BUFFER_NOT_FULL;
 
-    const long long size = ReadUInt(m_pReader, pos, len);
+  const long long size = ReadUInt(m_pReader, pos, len);
 
-    if (size < 0)  //error
-        return static_cast<long>(size);
+  if (size < 0)  // error
+    return static_cast<long>(size);
 
-    if (size == 0)  //weird, although technically not illegal
-        return 1;   //done
+  if (size == 0)  // weird, although technically not illegal
+    return 1;  // done
 
-    pos += len;  //consume length of size of element
-    assert((segment_stop < 0) || (pos <= segment_stop));
+  pos += len;  // consume length of size of element
+  assert((segment_stop < 0) || (pos <= segment_stop));
 
-    //Pos now points to start of payload
+  // Pos now points to start of payload
 
-    const long long element_stop = pos + size;
+  const long long element_stop = pos + size;
 
-    if ((segment_stop >= 0) && (element_stop > segment_stop))
-        return E_FILE_FORMAT_INVALID;
+  if ((segment_stop >= 0) && (element_stop > segment_stop))
+    return E_FILE_FORMAT_INVALID;
 
-    if ((total >= 0) && (element_stop > total))
-        return 1;  //don't bother parsing anymore
+  if ((total >= 0) && (element_stop > total))
+    return 1;  // don't bother parsing anymore
 
-    len = static_cast<long>(size);
+  len = static_cast<long>(size);
 
-    if (element_stop > avail)
-        return E_BUFFER_NOT_FULL;
+  if (element_stop > avail)
+    return E_BUFFER_NOT_FULL;
 
-    const long long element_size = element_stop - element_start;
+  const long long element_size = element_stop - element_start;
 
-    m_pCues = new (std::nothrow) Cues(
-                                    this,
-                                    pos,
-                                    size,
-                                    element_start,
-                                    element_size);
-    assert(m_pCues);  //TODO
+  m_pCues =
+      new (std::nothrow) Cues(this, pos, size, element_start, element_size);
+  assert(m_pCues);  // TODO
 
-    return 0;  //success
+  return 0;  // success
 }
 #endif
 
-
 #if 0
 void Segment::ParseSeekEntry(
     long long start,
@@ -2264,304 +2018,269 @@
         ParseCues(seekOff);
 }
 #else
-bool SeekHead::ParseEntry(
-    IMkvReader* pReader,
-    long long start,
-    long long size_,
-    Entry* pEntry)
-{
-    if (size_ <= 0)
-        return false;
+bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_,
+                          Entry* pEntry) {
+  if (size_ <= 0)
+    return false;
 
-    long long pos = start;
-    const long long stop = start + size_;
+  long long pos = start;
+  const long long stop = start + size_;
 
-    long len;
+  long len;
 
-    //parse the container for the level-1 element ID
+  // parse the container for the level-1 element ID
 
-    const long long seekIdId = ReadUInt(pReader, pos, len);
-    //seekIdId;
+  const long long seekIdId = ReadUInt(pReader, pos, len);
+  // seekIdId;
 
-    if (seekIdId != 0x13AB)  //SeekID ID
-        return false;
+  if (seekIdId != 0x13AB)  // SeekID ID
+    return false;
 
-    if ((pos + len) > stop)
-        return false;
+  if ((pos + len) > stop)
+    return false;
 
-    pos += len;  //consume SeekID id
+  pos += len;  // consume SeekID id
 
-    const long long seekIdSize = ReadUInt(pReader, pos, len);
+  const long long seekIdSize = ReadUInt(pReader, pos, len);
 
-    if (seekIdSize <= 0)
-        return false;
+  if (seekIdSize <= 0)
+    return false;
 
-    if ((pos + len) > stop)
-        return false;
+  if ((pos + len) > stop)
+    return false;
 
-    pos += len;  //consume size of field
+  pos += len;  // consume size of field
 
-    if ((pos + seekIdSize) > stop)
-        return false;
+  if ((pos + seekIdSize) > stop)
+    return false;
 
-    //Note that the SeekId payload really is serialized
-    //as a "Matroska integer", not as a plain binary value.
-    //In fact, Matroska requires that ID values in the
-    //stream exactly match the binary representation as listed
-    //in the Matroska specification.
-    //
-    //This parser is more liberal, and permits IDs to have
-    //any width.  (This could make the representation in the stream
-    //different from what's in the spec, but it doesn't matter here,
-    //since we always normalize "Matroska integer" values.)
+  // Note that the SeekId payload really is serialized
+  // as a "Matroska integer", not as a plain binary value.
+  // In fact, Matroska requires that ID values in the
+  // stream exactly match the binary representation as listed
+  // in the Matroska specification.
+  //
+  // This parser is more liberal, and permits IDs to have
+  // any width.  (This could make the representation in the stream
+  // different from what's in the spec, but it doesn't matter here,
+  // since we always normalize "Matroska integer" values.)
 
-    pEntry->id = ReadUInt(pReader, pos, len);  //payload
+  pEntry->id = ReadUInt(pReader, pos, len);  // payload
 
-    if (pEntry->id <= 0)
-        return false;
+  if (pEntry->id <= 0)
+    return false;
 
-    if (len != seekIdSize)
-        return false;
+  if (len != seekIdSize)
+    return false;
 
-    pos += seekIdSize;  //consume SeekID payload
+  pos += seekIdSize;  // consume SeekID payload
 
-    const long long seekPosId = ReadUInt(pReader, pos, len);
+  const long long seekPosId = ReadUInt(pReader, pos, len);
 
-    if (seekPosId != 0x13AC)  //SeekPos ID
-        return false;
+  if (seekPosId != 0x13AC)  // SeekPos ID
+    return false;
 
-    if ((pos + len) > stop)
-        return false;
+  if ((pos + len) > stop)
+    return false;
 
-    pos += len;  //consume id
+  pos += len;  // consume id
 
-    const long long seekPosSize = ReadUInt(pReader, pos, len);
+  const long long seekPosSize = ReadUInt(pReader, pos, len);
 
-    if (seekPosSize <= 0)
-        return false;
+  if (seekPosSize <= 0)
+    return false;
 
-    if ((pos + len) > stop)
-        return false;
+  if ((pos + len) > stop)
+    return false;
 
-    pos += len;  //consume size
+  pos += len;  // consume size
 
-    if ((pos + seekPosSize) > stop)
-        return false;
+  if ((pos + seekPosSize) > stop)
+    return false;
 
-    pEntry->pos = UnserializeUInt(pReader, pos, seekPosSize);
+  pEntry->pos = UnserializeUInt(pReader, pos, seekPosSize);
 
-    if (pEntry->pos < 0)
-        return false;
+  if (pEntry->pos < 0)
+    return false;
 
-    pos += seekPosSize;  //consume payload
+  pos += seekPosSize;  // consume payload
 
-    if (pos != stop)
-        return false;
+  if (pos != stop)
+    return false;
 
-    return true;
+  return true;
 }
 #endif
 
+Cues::Cues(Segment* pSegment, long long start_, long long size_,
+           long long element_start, long long element_size)
+    : m_pSegment(pSegment),
+      m_start(start_),
+      m_size(size_),
+      m_element_start(element_start),
+      m_element_size(element_size),
+      m_cue_points(NULL),
+      m_count(0),
+      m_preload_count(0),
+      m_pos(start_) {}
 
-Cues::Cues(
-    Segment* pSegment,
-    long long start_,
-    long long size_,
-    long long element_start,
-    long long element_size) :
-    m_pSegment(pSegment),
-    m_start(start_),
-    m_size(size_),
-    m_element_start(element_start),
-    m_element_size(element_size),
-    m_cue_points(NULL),
-    m_count(0),
-    m_preload_count(0),
-    m_pos(start_)
-{
+Cues::~Cues() {
+  const long n = m_count + m_preload_count;
+
+  CuePoint** p = m_cue_points;
+  CuePoint** const q = p + n;
+
+  while (p != q) {
+    CuePoint* const pCP = *p++;
+    assert(pCP);
+
+    delete pCP;
+  }
+
+  delete[] m_cue_points;
 }
 
+long Cues::GetCount() const {
+  if (m_cue_points == NULL)
+    return -1;
 
-Cues::~Cues()
-{
-    const long n = m_count + m_preload_count;
+  return m_count;  // TODO: really ignore preload count?
+}
 
-    CuePoint** p = m_cue_points;
-    CuePoint** const q = p + n;
+bool Cues::DoneParsing() const {
+  const long long stop = m_start + m_size;
+  return (m_pos >= stop);
+}
 
-    while (p != q)
-    {
-        CuePoint* const pCP = *p++;
-        assert(pCP);
+void Cues::Init() const {
+  if (m_cue_points)
+    return;
 
-        delete pCP;
-    }
+  assert(m_count == 0);
+  assert(m_preload_count == 0);
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  const long long stop = m_start + m_size;
+  long long pos = m_start;
+
+  long cue_points_size = 0;
+
+  while (pos < stop) {
+    const long long idpos = pos;
+
+    long len;
+
+    const long long id = ReadUInt(pReader, pos, len);
+    assert(id >= 0);  // TODO
+    assert((pos + len) <= stop);
+
+    pos += len;  // consume ID
+
+    const long long size = ReadUInt(pReader, pos, len);
+    assert(size >= 0);
+    assert((pos + len) <= stop);
+
+    pos += len;  // consume Size field
+    assert((pos + size) <= stop);
+
+    if (id == 0x3B)  // CuePoint ID
+      PreloadCuePoint(cue_points_size, idpos);
+
+    pos += size;  // consume payload
+    assert(pos <= stop);
+  }
+}
+
+void Cues::PreloadCuePoint(long& cue_points_size, long long pos) const {
+  assert(m_count == 0);
+
+  if (m_preload_count >= cue_points_size) {
+    const long n = (cue_points_size <= 0) ? 2048 : 2 * cue_points_size;
+
+    CuePoint** const qq = new CuePoint* [n];
+    CuePoint** q = qq;  // beginning of target
+
+    CuePoint** p = m_cue_points;  // beginning of source
+    CuePoint** const pp = p + m_preload_count;  // end of source
+
+    while (p != pp)
+      *q++ = *p++;
 
     delete[] m_cue_points;
+
+    m_cue_points = qq;
+    cue_points_size = n;
+  }
+
+  CuePoint* const pCP = new CuePoint(m_preload_count, pos);
+  m_cue_points[m_preload_count++] = pCP;
 }
 
+bool Cues::LoadCuePoint() const {
+  // odbgstream os;
+  // os << "Cues::LoadCuePoint" << endl;
 
-long Cues::GetCount() const
-{
-    if (m_cue_points == NULL)
-        return -1;
+  const long long stop = m_start + m_size;
 
-    return m_count;  //TODO: really ignore preload count?
-}
+  if (m_pos >= stop)
+    return false;  // nothing else to do
 
+  Init();
 
-bool Cues::DoneParsing() const
-{
-    const long long stop = m_start + m_size;
-    return (m_pos >= stop);
-}
+  IMkvReader* const pReader = m_pSegment->m_pReader;
 
+  while (m_pos < stop) {
+    const long long idpos = m_pos;
 
-void Cues::Init() const
-{
-    if (m_cue_points)
-        return;
+    long len;
 
-    assert(m_count == 0);
-    assert(m_preload_count == 0);
+    const long long id = ReadUInt(pReader, m_pos, len);
+    assert(id >= 0);  // TODO
+    assert((m_pos + len) <= stop);
 
-    IMkvReader* const pReader = m_pSegment->m_pReader;
+    m_pos += len;  // consume ID
 
-    const long long stop = m_start + m_size;
-    long long pos = m_start;
+    const long long size = ReadUInt(pReader, m_pos, len);
+    assert(size >= 0);
+    assert((m_pos + len) <= stop);
 
-    long cue_points_size = 0;
+    m_pos += len;  // consume Size field
+    assert((m_pos + size) <= stop);
 
-    while (pos < stop)
-    {
-        const long long idpos = pos;
+    if (id != 0x3B) {  // CuePoint ID
+      m_pos += size;  // consume payload
+      assert(m_pos <= stop);
 
-        long len;
-
-        const long long id = ReadUInt(pReader, pos, len);
-        assert(id >= 0);  //TODO
-        assert((pos + len) <= stop);
-
-        pos += len;  //consume ID
-
-        const long long size = ReadUInt(pReader, pos, len);
-        assert(size >= 0);
-        assert((pos + len) <= stop);
-
-        pos += len;  //consume Size field
-        assert((pos + size) <= stop);
-
-        if (id == 0x3B)  //CuePoint ID
-            PreloadCuePoint(cue_points_size, idpos);
-
-        pos += size;  //consume payload
-        assert(pos <= stop);
-    }
-}
-
-
-void Cues::PreloadCuePoint(
-    long& cue_points_size,
-    long long pos) const
-{
-    assert(m_count == 0);
-
-    if (m_preload_count >= cue_points_size)
-    {
-        const long n = (cue_points_size <= 0) ? 2048 : 2*cue_points_size;
-
-        CuePoint** const qq = new CuePoint*[n];
-        CuePoint** q = qq;  //beginning of target
-
-        CuePoint** p = m_cue_points;                //beginning of source
-        CuePoint** const pp = p + m_preload_count;  //end of source
-
-        while (p != pp)
-            *q++ = *p++;
-
-        delete[] m_cue_points;
-
-        m_cue_points = qq;
-        cue_points_size = n;
+      continue;
     }
 
-    CuePoint* const pCP = new CuePoint(m_preload_count, pos);
-    m_cue_points[m_preload_count++] = pCP;
+    assert(m_preload_count > 0);
+
+    CuePoint* const pCP = m_cue_points[m_count];
+    assert(pCP);
+    assert((pCP->GetTimeCode() >= 0) || (-pCP->GetTimeCode() == idpos));
+    if (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos))
+      return false;
+
+    pCP->Load(pReader);
+    ++m_count;
+    --m_preload_count;
+
+    m_pos += size;  // consume payload
+    assert(m_pos <= stop);
+
+    return true;  // yes, we loaded a cue point
+  }
+
+  // return (m_pos < stop);
+  return false;  // no, we did not load a cue point
 }
 
-
-bool Cues::LoadCuePoint() const
-{
-    //odbgstream os;
-    //os << "Cues::LoadCuePoint" << endl;
-
-    const long long stop = m_start + m_size;
-
-    if (m_pos >= stop)
-        return false;  //nothing else to do
-
-    Init();
-
-    IMkvReader* const pReader = m_pSegment->m_pReader;
-
-    while (m_pos < stop)
-    {
-        const long long idpos = m_pos;
-
-        long len;
-
-        const long long id = ReadUInt(pReader, m_pos, len);
-        assert(id >= 0);  //TODO
-        assert((m_pos + len) <= stop);
-
-        m_pos += len;  //consume ID
-
-        const long long size = ReadUInt(pReader, m_pos, len);
-        assert(size >= 0);
-        assert((m_pos + len) <= stop);
-
-        m_pos += len;  //consume Size field
-        assert((m_pos + size) <= stop);
-
-        if (id != 0x3B)  //CuePoint ID
-        {
-            m_pos += size;  //consume payload
-            assert(m_pos <= stop);
-
-            continue;
-        }
-
-        assert(m_preload_count > 0);
-
-        CuePoint* const pCP = m_cue_points[m_count];
-        assert(pCP);
-        assert((pCP->GetTimeCode() >= 0) || (-pCP->GetTimeCode() == idpos));
-        if (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos))
-            return false;
-
-        pCP->Load(pReader);
-        ++m_count;
-        --m_preload_count;
-
-        m_pos += size;  //consume payload
-        assert(m_pos <= stop);
-
-        return true;  //yes, we loaded a cue point
-    }
-
-    //return (m_pos < stop);
-    return false;  //no, we did not load a cue point
-}
-
-
-bool Cues::Find(
-    long long time_ns,
-    const Track* pTrack,
-    const CuePoint*& pCP,
-    const CuePoint::TrackPosition*& pTP) const
-{
-    assert(time_ns >= 0);
-    assert(pTrack);
+bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP,
+                const CuePoint::TrackPosition*& pTP) const {
+  assert(time_ns >= 0);
+  assert(pTrack);
 
 #if 0
     LoadCuePoint();  //establish invariant
@@ -2619,71 +2338,68 @@
     assert(pCP);
     assert(pCP->GetTime(m_pSegment) <= time_ns);
 #else
-    if (m_cue_points == NULL)
-        return false;
+  if (m_cue_points == NULL)
+    return false;
 
-    if (m_count == 0)
-        return false;
+  if (m_count == 0)
+    return false;
 
-    CuePoint** const ii = m_cue_points;
-    CuePoint** i = ii;
+  CuePoint** const ii = m_cue_points;
+  CuePoint** i = ii;
 
-    CuePoint** const jj = ii + m_count;
-    CuePoint** j = jj;
+  CuePoint** const jj = ii + m_count;
+  CuePoint** j = jj;
 
-    pCP = *i;
-    assert(pCP);
+  pCP = *i;
+  assert(pCP);
 
-    if (time_ns <= pCP->GetTime(m_pSegment))
-    {
-        pTP = pCP->Find(pTrack);
-        return (pTP != NULL);
-    }
-
-    while (i < j)
-    {
-        //INVARIANT:
-        //[ii, i) <= time_ns
-        //[i, j)  ?
-        //[j, jj) > time_ns
-
-        CuePoint** const k = i + (j - i) / 2;
-        assert(k < jj);
-
-        CuePoint* const pCP = *k;
-        assert(pCP);
-
-        const long long t = pCP->GetTime(m_pSegment);
-
-        if (t <= time_ns)
-            i = k + 1;
-        else
-            j = k;
-
-        assert(i <= j);
-    }
-
-    assert(i == j);
-    assert(i <= jj);
-    assert(i > ii);
-
-    pCP = *--i;
-    assert(pCP);
-    assert(pCP->GetTime(m_pSegment) <= time_ns);
-#endif
-
-    //TODO: here and elsewhere, it's probably not correct to search
-    //for the cue point with this time, and then search for a matching
-    //track.  In principle, the matching track could be on some earlier
-    //cue point, and with our current algorithm, we'd miss it.  To make
-    //this bullet-proof, we'd need to create a secondary structure,
-    //with a list of cue points that apply to a track, and then search
-    //that track-based structure for a matching cue point.
-
+  if (time_ns <= pCP->GetTime(m_pSegment)) {
     pTP = pCP->Find(pTrack);
     return (pTP != NULL);
-}
+  }
 
+  while (i < j) {
+    // INVARIANT:
+    //[ii, i) <= time_ns
+    //[i, j)  ?
+    //[j, jj) > time_ns
+
+    CuePoint** const k = i + (j - i) / 2;
+    assert(k < jj);
+
+    CuePoint* const pCP = *k;
+    assert(pCP);
+
+    const long long t = pCP->GetTime(m_pSegment);
+
+    if (t <= time_ns)
+      i = k + 1;
+    else
+      j = k;
+
+    assert(i <= j);
+  }
+
+  assert(i == j);
+  assert(i <= jj);
+  assert(i > ii);
+
+  pCP = *--i;
+  assert(pCP);
+  assert(pCP->GetTime(m_pSegment) <= time_ns);
+#endif
+
+  // TODO: here and elsewhere, it's probably not correct to search
+  // for the cue point with this time, and then search for a matching
+  // track.  In principle, the matching track could be on some earlier
+  // cue point, and with our current algorithm, we'd miss it.  To make
+  // this bullet-proof, we'd need to create a secondary structure,
+  // with a list of cue points that apply to a track, and then search
+  // that track-based structure for a matching cue point.
+
+  pTP = pCP->Find(pTrack);
+  return (pTP != NULL);
+}
 
 #if 0
 bool Cues::FindNext(
@@ -2744,14 +2460,12 @@
 }
 #endif
 
+const CuePoint* Cues::GetFirst() const {
+  if (m_cue_points == NULL)
+    return NULL;
 
-const CuePoint* Cues::GetFirst() const
-{
-    if (m_cue_points == NULL)
-        return NULL;
-
-    if (m_count == 0)
-        return NULL;
+  if (m_count == 0)
+    return NULL;
 
 #if 0
     LoadCuePoint();  //init cues
@@ -2762,24 +2476,22 @@
         return NULL;
 #endif
 
-    CuePoint* const* const pp = m_cue_points;
-    assert(pp);
+  CuePoint* const* const pp = m_cue_points;
+  assert(pp);
 
-    CuePoint* const pCP = pp[0];
-    assert(pCP);
-    assert(pCP->GetTimeCode() >= 0);
+  CuePoint* const pCP = pp[0];
+  assert(pCP);
+  assert(pCP->GetTimeCode() >= 0);
 
-    return pCP;
+  return pCP;
 }
 
+const CuePoint* Cues::GetLast() const {
+  if (m_cue_points == NULL)
+    return NULL;
 
-const CuePoint* Cues::GetLast() const
-{
-    if (m_cue_points == NULL)
-        return NULL;
-
-    if (m_count <= 0)
-        return NULL;
+  if (m_count <= 0)
+    return NULL;
 
 #if 0
     LoadCuePoint();  //init cues
@@ -2800,28 +2512,26 @@
     pCP->Load(m_pSegment->m_pReader);
     assert(pCP->GetTimeCode() >= 0);
 #else
-    const long index = m_count - 1;
+  const long index = m_count - 1;
 
-    CuePoint* const* const pp = m_cue_points;
-    assert(pp);
+  CuePoint* const* const pp = m_cue_points;
+  assert(pp);
 
-    CuePoint* const pCP = pp[index];
-    assert(pCP);
-    assert(pCP->GetTimeCode() >= 0);
+  CuePoint* const pCP = pp[index];
+  assert(pCP);
+  assert(pCP->GetTimeCode() >= 0);
 #endif
 
-    return pCP;
+  return pCP;
 }
 
+const CuePoint* Cues::GetNext(const CuePoint* pCurr) const {
+  if (pCurr == NULL)
+    return NULL;
 
-const CuePoint* Cues::GetNext(const CuePoint* pCurr) const
-{
-    if (pCurr == NULL)
-        return NULL;
-
-    assert(pCurr->GetTimeCode() >= 0);
-    assert(m_cue_points);
-    assert(m_count >= 1);
+  assert(pCurr->GetTimeCode() >= 0);
+  assert(m_cue_points);
+  assert(m_count >= 1);
 
 #if 0
     const size_t count = m_count + m_preload_count;
@@ -2843,386 +2553,347 @@
 
     pNext->Load(m_pSegment->m_pReader);
 #else
-    long index = pCurr->m_index;
-    assert(index < m_count);
+  long index = pCurr->m_index;
+  assert(index < m_count);
 
-    CuePoint* const* const pp = m_cue_points;
-    assert(pp);
-    assert(pp[index] == pCurr);
+  CuePoint* const* const pp = m_cue_points;
+  assert(pp);
+  assert(pp[index] == pCurr);
 
-    ++index;
+  ++index;
 
-    if (index >= m_count)
-        return NULL;
+  if (index >= m_count)
+    return NULL;
 
-    CuePoint* const pNext = pp[index];
-    assert(pNext);
-    assert(pNext->GetTimeCode() >= 0);
+  CuePoint* const pNext = pp[index];
+  assert(pNext);
+  assert(pNext->GetTimeCode() >= 0);
 #endif
 
-    return pNext;
+  return pNext;
 }
 
+const BlockEntry* Cues::GetBlock(const CuePoint* pCP,
+                                 const CuePoint::TrackPosition* pTP) const {
+  if (pCP == NULL)
+    return NULL;
 
-const BlockEntry* Cues::GetBlock(
-    const CuePoint* pCP,
-    const CuePoint::TrackPosition* pTP) const
-{
-    if (pCP == NULL)
-        return NULL;
+  if (pTP == NULL)
+    return NULL;
 
-    if (pTP == NULL)
-        return NULL;
-
-    return m_pSegment->GetBlock(*pCP, *pTP);
+  return m_pSegment->GetBlock(*pCP, *pTP);
 }
 
+const BlockEntry* Segment::GetBlock(const CuePoint& cp,
+                                    const CuePoint::TrackPosition& tp) {
+  Cluster** const ii = m_clusters;
+  Cluster** i = ii;
 
-const BlockEntry* Segment::GetBlock(
-    const CuePoint& cp,
-    const CuePoint::TrackPosition& tp)
-{
-    Cluster** const ii = m_clusters;
-    Cluster** i = ii;
+  const long count = m_clusterCount + m_clusterPreloadCount;
 
-    const long count = m_clusterCount + m_clusterPreloadCount;
+  Cluster** const jj = ii + count;
+  Cluster** j = jj;
 
-    Cluster** const jj = ii + count;
-    Cluster** j = jj;
+  while (i < j) {
+    // INVARIANT:
+    //[ii, i) < pTP->m_pos
+    //[i, j) ?
+    //[j, jj)  > pTP->m_pos
 
-    while (i < j)
-    {
-        //INVARIANT:
-        //[ii, i) < pTP->m_pos
-        //[i, j) ?
-        //[j, jj)  > pTP->m_pos
+    Cluster** const k = i + (j - i) / 2;
+    assert(k < jj);
 
-        Cluster** const k = i + (j - i) / 2;
-        assert(k < jj);
-
-        Cluster* const pCluster = *k;
-        assert(pCluster);
-
-        //const long long pos_ = pCluster->m_pos;
-        //assert(pos_);
-        //const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
-
-        const long long pos = pCluster->GetPosition();
-        assert(pos >= 0);
-
-        if (pos < tp.m_pos)
-            i = k + 1;
-        else if (pos > tp.m_pos)
-            j = k;
-        else
-            return pCluster->GetEntry(cp, tp);
-    }
-
-    assert(i == j);
-    //assert(Cluster::HasBlockEntries(this, tp.m_pos));
-
-    Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos); //, -1);
+    Cluster* const pCluster = *k;
     assert(pCluster);
 
-    const ptrdiff_t idx = i - m_clusters;
+    // const long long pos_ = pCluster->m_pos;
+    // assert(pos_);
+    // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
 
-    PreloadCluster(pCluster, idx);
-    assert(m_clusters);
-    assert(m_clusterPreloadCount > 0);
-    assert(m_clusters[idx] == pCluster);
+    const long long pos = pCluster->GetPosition();
+    assert(pos >= 0);
 
-    return pCluster->GetEntry(cp, tp);
+    if (pos < tp.m_pos)
+      i = k + 1;
+    else if (pos > tp.m_pos)
+      j = k;
+    else
+      return pCluster->GetEntry(cp, tp);
+  }
+
+  assert(i == j);
+  // assert(Cluster::HasBlockEntries(this, tp.m_pos));
+
+  Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos);  //, -1);
+  assert(pCluster);
+
+  const ptrdiff_t idx = i - m_clusters;
+
+  PreloadCluster(pCluster, idx);
+  assert(m_clusters);
+  assert(m_clusterPreloadCount > 0);
+  assert(m_clusters[idx] == pCluster);
+
+  return pCluster->GetEntry(cp, tp);
 }
 
+const Cluster* Segment::FindOrPreloadCluster(long long requested_pos) {
+  if (requested_pos < 0)
+    return 0;
 
-const Cluster* Segment::FindOrPreloadCluster(long long requested_pos)
-{
-    if (requested_pos < 0)
-        return 0;
+  Cluster** const ii = m_clusters;
+  Cluster** i = ii;
 
-    Cluster** const ii = m_clusters;
-    Cluster** i = ii;
+  const long count = m_clusterCount + m_clusterPreloadCount;
 
-    const long count = m_clusterCount + m_clusterPreloadCount;
+  Cluster** const jj = ii + count;
+  Cluster** j = jj;
 
-    Cluster** const jj = ii + count;
-    Cluster** j = jj;
+  while (i < j) {
+    // INVARIANT:
+    //[ii, i) < pTP->m_pos
+    //[i, j) ?
+    //[j, jj)  > pTP->m_pos
 
-    while (i < j)
-    {
-        //INVARIANT:
-        //[ii, i) < pTP->m_pos
-        //[i, j) ?
-        //[j, jj)  > pTP->m_pos
+    Cluster** const k = i + (j - i) / 2;
+    assert(k < jj);
 
-        Cluster** const k = i + (j - i) / 2;
-        assert(k < jj);
-
-        Cluster* const pCluster = *k;
-        assert(pCluster);
-
-        //const long long pos_ = pCluster->m_pos;
-        //assert(pos_);
-        //const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
-
-        const long long pos = pCluster->GetPosition();
-        assert(pos >= 0);
-
-        if (pos < requested_pos)
-            i = k + 1;
-        else if (pos > requested_pos)
-            j = k;
-        else
-            return pCluster;
-    }
-
-    assert(i == j);
-    //assert(Cluster::HasBlockEntries(this, tp.m_pos));
-
-    Cluster* const pCluster = Cluster::Create(
-                                this,
-                                -1,
-                                requested_pos);
-                                //-1);
+    Cluster* const pCluster = *k;
     assert(pCluster);
 
-    const ptrdiff_t idx = i - m_clusters;
+    // const long long pos_ = pCluster->m_pos;
+    // assert(pos_);
+    // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
 
-    PreloadCluster(pCluster, idx);
-    assert(m_clusters);
-    assert(m_clusterPreloadCount > 0);
-    assert(m_clusters[idx] == pCluster);
+    const long long pos = pCluster->GetPosition();
+    assert(pos >= 0);
 
-    return pCluster;
+    if (pos < requested_pos)
+      i = k + 1;
+    else if (pos > requested_pos)
+      j = k;
+    else
+      return pCluster;
+  }
+
+  assert(i == j);
+  // assert(Cluster::HasBlockEntries(this, tp.m_pos));
+
+  Cluster* const pCluster = Cluster::Create(this, -1, requested_pos);
+  //-1);
+  assert(pCluster);
+
+  const ptrdiff_t idx = i - m_clusters;
+
+  PreloadCluster(pCluster, idx);
+  assert(m_clusters);
+  assert(m_clusterPreloadCount > 0);
+  assert(m_clusters[idx] == pCluster);
+
+  return pCluster;
 }
 
-
-CuePoint::CuePoint(long idx, long long pos) :
-    m_element_start(0),
-    m_element_size(0),
-    m_index(idx),
-    m_timecode(-1 * pos),
-    m_track_positions(NULL),
-    m_track_positions_count(0)
-{
-    assert(pos > 0);
+CuePoint::CuePoint(long idx, long long pos)
+    : m_element_start(0),
+      m_element_size(0),
+      m_index(idx),
+      m_timecode(-1 * pos),
+      m_track_positions(NULL),
+      m_track_positions_count(0) {
+  assert(pos > 0);
 }
 
+CuePoint::~CuePoint() { delete[] m_track_positions; }
 
-CuePoint::~CuePoint()
-{
-    delete[] m_track_positions;
-}
+void CuePoint::Load(IMkvReader* pReader) {
+  // odbgstream os;
+  // os << "CuePoint::Load(begin): timecode=" << m_timecode << endl;
 
+  if (m_timecode >= 0)  // already loaded
+    return;
 
-void CuePoint::Load(IMkvReader* pReader)
-{
-    //odbgstream os;
-    //os << "CuePoint::Load(begin): timecode=" << m_timecode << endl;
+  assert(m_track_positions == NULL);
+  assert(m_track_positions_count == 0);
 
-    if (m_timecode >= 0)  //already loaded
-        return;
+  long long pos_ = -m_timecode;
+  const long long element_start = pos_;
 
-    assert(m_track_positions == NULL);
-    assert(m_track_positions_count == 0);
+  long long stop;
 
-    long long pos_ = -m_timecode;
-    const long long element_start = pos_;
+  {
+    long len;
 
-    long long stop;
+    const long long id = ReadUInt(pReader, pos_, len);
+    assert(id == 0x3B);  // CuePoint ID
+    if (id != 0x3B)
+      return;
 
-    {
-        long len;
+    pos_ += len;  // consume ID
 
-        const long long id = ReadUInt(pReader, pos_, len);
-        assert(id == 0x3B);  //CuePoint ID
-        if (id != 0x3B)
-            return;
+    const long long size = ReadUInt(pReader, pos_, len);
+    assert(size >= 0);
 
-        pos_ += len;  //consume ID
+    pos_ += len;  // consume Size field
+    // pos_ now points to start of payload
 
-        const long long size = ReadUInt(pReader, pos_, len);
-        assert(size >= 0);
+    stop = pos_ + size;
+  }
 
-        pos_ += len;  //consume Size field
-        //pos_ now points to start of payload
+  const long long element_size = stop - element_start;
 
-        stop = pos_ + size;
+  long long pos = pos_;
+
+  // First count number of track positions
+
+  while (pos < stop) {
+    long len;
+
+    const long long id = ReadUInt(pReader, pos, len);
+    assert(id >= 0);  // TODO
+    assert((pos + len) <= stop);
+
+    pos += len;  // consume ID
+
+    const long long size = ReadUInt(pReader, pos, len);
+    assert(size >= 0);
+    assert((pos + len) <= stop);
+
+    pos += len;  // consume Size field
+    assert((pos + size) <= stop);
+
+    if (id == 0x33)  // CueTime ID
+      m_timecode = UnserializeUInt(pReader, pos, size);
+
+    else if (id == 0x37)  // CueTrackPosition(s) ID
+      ++m_track_positions_count;
+
+    pos += size;  // consume payload
+    assert(pos <= stop);
+  }
+
+  assert(m_timecode >= 0);
+  assert(m_track_positions_count > 0);
+
+  // os << "CuePoint::Load(cont'd): idpos=" << idpos
+  //   << " timecode=" << m_timecode
+  //   << endl;
+
+  m_track_positions = new TrackPosition[m_track_positions_count];
+
+  // Now parse track positions
+
+  TrackPosition* p = m_track_positions;
+  pos = pos_;
+
+  while (pos < stop) {
+    long len;
+
+    const long long id = ReadUInt(pReader, pos, len);
+    assert(id >= 0);  // TODO
+    assert((pos + len) <= stop);
+
+    pos += len;  // consume ID
+
+    const long long size = ReadUInt(pReader, pos, len);
+    assert(size >= 0);
+    assert((pos + len) <= stop);
+
+    pos += len;  // consume Size field
+    assert((pos + size) <= stop);
+
+    if (id == 0x37) {  // CueTrackPosition(s) ID
+      TrackPosition& tp = *p++;
+      tp.Parse(pReader, pos, size);
     }
 
-    const long long element_size = stop - element_start;
+    pos += size;  // consume payload
+    assert(pos <= stop);
+  }
 
-    long long pos = pos_;
+  assert(size_t(p - m_track_positions) == m_track_positions_count);
 
-    //First count number of track positions
-
-    while (pos < stop)
-    {
-        long len;
-
-        const long long id = ReadUInt(pReader, pos, len);
-        assert(id >= 0);  //TODO
-        assert((pos + len) <= stop);
-
-        pos += len;  //consume ID
-
-        const long long size = ReadUInt(pReader, pos, len);
-        assert(size >= 0);
-        assert((pos + len) <= stop);
-
-        pos += len;  //consume Size field
-        assert((pos + size) <= stop);
-
-        if (id == 0x33)  //CueTime ID
-            m_timecode = UnserializeUInt(pReader, pos, size);
-
-        else if (id == 0x37) //CueTrackPosition(s) ID
-            ++m_track_positions_count;
-
-        pos += size;  //consume payload
-        assert(pos <= stop);
-    }
-
-    assert(m_timecode >= 0);
-    assert(m_track_positions_count > 0);
-
-    //os << "CuePoint::Load(cont'd): idpos=" << idpos
-    //   << " timecode=" << m_timecode
-    //   << endl;
-
-    m_track_positions = new TrackPosition[m_track_positions_count];
-
-    //Now parse track positions
-
-    TrackPosition* p = m_track_positions;
-    pos = pos_;
-
-    while (pos < stop)
-    {
-        long len;
-
-        const long long id = ReadUInt(pReader, pos, len);
-        assert(id >= 0);  //TODO
-        assert((pos + len) <= stop);
-
-        pos += len;  //consume ID
-
-        const long long size = ReadUInt(pReader, pos, len);
-        assert(size >= 0);
-        assert((pos + len) <= stop);
-
-        pos += len;  //consume Size field
-        assert((pos + size) <= stop);
-
-        if (id == 0x37) //CueTrackPosition(s) ID
-        {
-            TrackPosition& tp = *p++;
-            tp.Parse(pReader, pos, size);
-        }
-
-        pos += size;  //consume payload
-        assert(pos <= stop);
-    }
-
-    assert(size_t(p - m_track_positions) == m_track_positions_count);
-
-    m_element_start = element_start;
-    m_element_size = element_size;
+  m_element_start = element_start;
+  m_element_size = element_size;
 }
 
+void CuePoint::TrackPosition::Parse(IMkvReader* pReader, long long start_,
+                                    long long size_) {
+  const long long stop = start_ + size_;
+  long long pos = start_;
 
+  m_track = -1;
+  m_pos = -1;
+  m_block = 1;  // default
 
-void CuePoint::TrackPosition::Parse(
-    IMkvReader* pReader,
-    long long start_,
-    long long size_)
-{
-    const long long stop = start_ + size_;
-    long long pos = start_;
+  while (pos < stop) {
+    long len;
 
-    m_track = -1;
-    m_pos = -1;
-    m_block = 1;  //default
+    const long long id = ReadUInt(pReader, pos, len);
+    assert(id >= 0);  // TODO
+    assert((pos + len) <= stop);
 
-    while (pos < stop)
-    {
-        long len;
+    pos += len;  // consume ID
 
-        const long long id = ReadUInt(pReader, pos, len);
-        assert(id >= 0);  //TODO
-        assert((pos + len) <= stop);
+    const long long size = ReadUInt(pReader, pos, len);
+    assert(size >= 0);
+    assert((pos + len) <= stop);
 
-        pos += len;  //consume ID
+    pos += len;  // consume Size field
+    assert((pos + size) <= stop);
 
-        const long long size = ReadUInt(pReader, pos, len);
-        assert(size >= 0);
-        assert((pos + len) <= stop);
+    if (id == 0x77)  // CueTrack ID
+      m_track = UnserializeUInt(pReader, pos, size);
 
-        pos += len;  //consume Size field
-        assert((pos + size) <= stop);
+    else if (id == 0x71)  // CueClusterPos ID
+      m_pos = UnserializeUInt(pReader, pos, size);
 
-        if (id == 0x77)  //CueTrack ID
-            m_track = UnserializeUInt(pReader, pos, size);
+    else if (id == 0x1378)  // CueBlockNumber
+      m_block = UnserializeUInt(pReader, pos, size);
 
-        else if (id == 0x71)  //CueClusterPos ID
-            m_pos = UnserializeUInt(pReader, pos, size);
+    pos += size;  // consume payload
+    assert(pos <= stop);
+  }
 
-        else if (id == 0x1378)  //CueBlockNumber
-            m_block = UnserializeUInt(pReader, pos, size);
-
-        pos += size;  //consume payload
-        assert(pos <= stop);
-    }
-
-    assert(m_pos >= 0);
-    assert(m_track > 0);
-    //assert(m_block > 0);
+  assert(m_pos >= 0);
+  assert(m_track > 0);
+  // assert(m_block > 0);
 }
 
+const CuePoint::TrackPosition* CuePoint::Find(const Track* pTrack) const {
+  assert(pTrack);
 
-const CuePoint::TrackPosition* CuePoint::Find(const Track* pTrack) const
-{
-    assert(pTrack);
+  const long long n = pTrack->GetNumber();
 
-    const long long n = pTrack->GetNumber();
+  const TrackPosition* i = m_track_positions;
+  const TrackPosition* const j = i + m_track_positions_count;
 
-    const TrackPosition* i = m_track_positions;
-    const TrackPosition* const j = i + m_track_positions_count;
+  while (i != j) {
+    const TrackPosition& p = *i++;
 
-    while (i != j)
-    {
-        const TrackPosition& p = *i++;
+    if (p.m_track == n)
+      return &p;
+  }
 
-        if (p.m_track == n)
-            return &p;
-    }
-
-    return NULL;  //no matching track number found
+  return NULL;  // no matching track number found
 }
 
+long long CuePoint::GetTimeCode() const { return m_timecode; }
 
-long long CuePoint::GetTimeCode() const
-{
-    return m_timecode;
+long long CuePoint::GetTime(const Segment* pSegment) const {
+  assert(pSegment);
+  assert(m_timecode >= 0);
+
+  const SegmentInfo* const pInfo = pSegment->GetInfo();
+  assert(pInfo);
+
+  const long long scale = pInfo->GetTimeCodeScale();
+  assert(scale >= 1);
+
+  const long long time = scale * m_timecode;
+
+  return time;
 }
 
-long long CuePoint::GetTime(const Segment* pSegment) const
-{
-    assert(pSegment);
-    assert(m_timecode >= 0);
-
-    const SegmentInfo* const pInfo = pSegment->GetInfo();
-    assert(pInfo);
-
-    const long long scale = pInfo->GetTimeCodeScale();
-    assert(scale >= 1);
-
-    const long long time = scale * m_timecode;
-
-    return time;
-}
-
-
 #if 0
 long long Segment::Unparsed() const
 {
@@ -3237,808 +2908,745 @@
     return result;
 }
 #else
-bool Segment::DoneParsing() const
-{
-    if (m_size < 0)
-    {
-        long long total, avail;
+bool Segment::DoneParsing() const {
+  if (m_size < 0) {
+    long long total, avail;
 
-        const int status = m_pReader->Length(&total, &avail);
+    const int status = m_pReader->Length(&total, &avail);
 
-        if (status < 0)  //error
-            return true;  //must assume done
+    if (status < 0)  // error
+      return true;  // must assume done
 
-        if (total < 0)
-            return false;  //assume live stream
+    if (total < 0)
+      return false;  // assume live stream
 
-        return (m_pos >= total);
-    }
+    return (m_pos >= total);
+  }
 
-    const long long stop = m_start + m_size;
+  const long long stop = m_start + m_size;
 
-    return (m_pos >= stop);
+  return (m_pos >= stop);
 }
 #endif
 
+const Cluster* Segment::GetFirst() const {
+  if ((m_clusters == NULL) || (m_clusterCount <= 0))
+    return &m_eos;
 
-const Cluster* Segment::GetFirst() const
-{
-    if ((m_clusters == NULL) || (m_clusterCount <= 0))
-       return &m_eos;
+  Cluster* const pCluster = m_clusters[0];
+  assert(pCluster);
 
-    Cluster* const pCluster = m_clusters[0];
-    assert(pCluster);
-
-    return pCluster;
+  return pCluster;
 }
 
+const Cluster* Segment::GetLast() const {
+  if ((m_clusters == NULL) || (m_clusterCount <= 0))
+    return &m_eos;
 
-const Cluster* Segment::GetLast() const
-{
-    if ((m_clusters == NULL) || (m_clusterCount <= 0))
-        return &m_eos;
+  const long idx = m_clusterCount - 1;
 
-    const long idx = m_clusterCount - 1;
+  Cluster* const pCluster = m_clusters[idx];
+  assert(pCluster);
 
-    Cluster* const pCluster = m_clusters[idx];
-    assert(pCluster);
-
-    return pCluster;
+  return pCluster;
 }
 
+unsigned long Segment::GetCount() const { return m_clusterCount; }
 
-unsigned long Segment::GetCount() const
-{
-    return m_clusterCount;
-}
+const Cluster* Segment::GetNext(const Cluster* pCurr) {
+  assert(pCurr);
+  assert(pCurr != &m_eos);
+  assert(m_clusters);
 
+  long idx = pCurr->m_index;
 
-const Cluster* Segment::GetNext(const Cluster* pCurr)
-{
-    assert(pCurr);
-    assert(pCurr != &m_eos);
-    assert(m_clusters);
+  if (idx >= 0) {
+    assert(m_clusterCount > 0);
+    assert(idx < m_clusterCount);
+    assert(pCurr == m_clusters[idx]);
 
-    long idx =  pCurr->m_index;
+    ++idx;
 
-    if (idx >= 0)
-    {
-        assert(m_clusterCount > 0);
-        assert(idx < m_clusterCount);
-        assert(pCurr == m_clusters[idx]);
+    if (idx >= m_clusterCount)
+      return &m_eos;  // caller will LoadCluster as desired
 
-        ++idx;
-
-        if (idx >= m_clusterCount)
-            return &m_eos;  //caller will LoadCluster as desired
-
-        Cluster* const pNext = m_clusters[idx];
-        assert(pNext);
-        assert(pNext->m_index >= 0);
-        assert(pNext->m_index == idx);
-
-        return pNext;
-    }
-
-    assert(m_clusterPreloadCount > 0);
-
-    long long pos = pCurr->m_element_start;
-
-    assert(m_size >= 0);  //TODO
-    const long long stop = m_start + m_size;  //end of segment
-
-    {
-        long len;
-
-        long long result = GetUIntLength(m_pReader, pos, len);
-        assert(result == 0);
-        assert((pos + len) <= stop);  //TODO
-        if (result != 0)
-            return NULL;
-
-        const long long id = ReadUInt(m_pReader, pos, len);
-        assert(id == 0x0F43B675);  //Cluster ID
-        if (id != 0x0F43B675)
-            return NULL;
-
-        pos += len;  //consume ID
-
-        //Read Size
-        result = GetUIntLength(m_pReader, pos, len);
-        assert(result == 0);  //TODO
-        assert((pos + len) <= stop);  //TODO
-
-        const long long size = ReadUInt(m_pReader, pos, len);
-        assert(size > 0);  //TODO
-        //assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
-
-        pos += len;  //consume length of size of element
-        assert((pos + size) <= stop);  //TODO
-
-        //Pos now points to start of payload
-
-        pos += size;  //consume payload
-    }
-
-    long long off_next = 0;
-
-    while (pos < stop)
-    {
-        long len;
-
-        long long result = GetUIntLength(m_pReader, pos, len);
-        assert(result == 0);
-        assert((pos + len) <= stop);  //TODO
-        if (result != 0)
-            return NULL;
-
-        const long long idpos = pos;  //pos of next (potential) cluster
-
-        const long long id = ReadUInt(m_pReader, idpos, len);
-        assert(id > 0);  //TODO
-
-        pos += len;  //consume ID
-
-        //Read Size
-        result = GetUIntLength(m_pReader, pos, len);
-        assert(result == 0);  //TODO
-        assert((pos + len) <= stop);  //TODO
-
-        const long long size = ReadUInt(m_pReader, pos, len);
-        assert(size >= 0);  //TODO
-
-        pos += len;  //consume length of size of element
-        assert((pos + size) <= stop);  //TODO
-
-        //Pos now points to start of payload
-
-        if (size == 0)  //weird
-            continue;
-
-        if (id == 0x0F43B675)  //Cluster ID
-        {
-            const long long off_next_ = idpos - m_start;
-
-            long long pos_;
-            long len_;
-
-            const long status = Cluster::HasBlockEntries(
-                                    this,
-                                    off_next_,
-                                    pos_,
-                                    len_);
-
-            assert(status >= 0);
-
-            if (status > 0)
-            {
-                off_next = off_next_;
-                break;
-            }
-        }
-
-        pos += size;  //consume payload
-    }
-
-    if (off_next <= 0)
-        return 0;
-
-    Cluster** const ii = m_clusters + m_clusterCount;
-    Cluster** i = ii;
-
-    Cluster** const jj = ii + m_clusterPreloadCount;
-    Cluster** j = jj;
-
-    while (i < j)
-    {
-        //INVARIANT:
-        //[0, i) < pos_next
-        //[i, j) ?
-        //[j, jj)  > pos_next
-
-        Cluster** const k = i + (j - i) / 2;
-        assert(k < jj);
-
-        Cluster* const pNext = *k;
-        assert(pNext);
-        assert(pNext->m_index < 0);
-
-        //const long long pos_ = pNext->m_pos;
-        //assert(pos_);
-        //pos = pos_ * ((pos_ < 0) ? -1 : 1);
-
-        pos = pNext->GetPosition();
-
-        if (pos < off_next)
-            i = k + 1;
-        else if (pos > off_next)
-            j = k;
-        else
-            return pNext;
-    }
-
-    assert(i == j);
-
-    Cluster* const pNext = Cluster::Create(this,
-                                          -1,
-                                          off_next);
+    Cluster* const pNext = m_clusters[idx];
     assert(pNext);
-
-    const ptrdiff_t idx_next = i - m_clusters;  //insertion position
-
-    PreloadCluster(pNext, idx_next);
-    assert(m_clusters);
-    assert(idx_next < m_clusterSize);
-    assert(m_clusters[idx_next] == pNext);
+    assert(pNext->m_index >= 0);
+    assert(pNext->m_index == idx);
 
     return pNext;
-}
+  }
 
+  assert(m_clusterPreloadCount > 0);
 
-long Segment::ParseNext(
-    const Cluster* pCurr,
-    const Cluster*& pResult,
-    long long& pos,
-    long& len)
-{
-    assert(pCurr);
-    assert(!pCurr->EOS());
-    assert(m_clusters);
+  long long pos = pCurr->m_element_start;
 
-    pResult = 0;
+  assert(m_size >= 0);  // TODO
+  const long long stop = m_start + m_size;  // end of segment
 
-    if (pCurr->m_index >= 0)  //loaded (not merely preloaded)
-    {
-        assert(m_clusters[pCurr->m_index] == pCurr);
+  {
+    long len;
 
-        const long next_idx = pCurr->m_index + 1;
+    long long result = GetUIntLength(m_pReader, pos, len);
+    assert(result == 0);
+    assert((pos + len) <= stop);  // TODO
+    if (result != 0)
+      return NULL;
 
-        if (next_idx < m_clusterCount)
-        {
-            pResult = m_clusters[next_idx];
-            return 0;  //success
-        }
+    const long long id = ReadUInt(m_pReader, pos, len);
+    assert(id == 0x0F43B675);  // Cluster ID
+    if (id != 0x0F43B675)
+      return NULL;
 
-        //curr cluster is last among loaded
+    pos += len;  // consume ID
 
-        const long result = LoadCluster(pos, len);
+    // Read Size
+    result = GetUIntLength(m_pReader, pos, len);
+    assert(result == 0);  // TODO
+    assert((pos + len) <= stop);  // TODO
 
-        if (result < 0)  //error or underflow
-            return result;
+    const long long size = ReadUInt(m_pReader, pos, len);
+    assert(size > 0);  // TODO
+    // assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
 
-        if (result > 0)  //no more clusters
-        {
-            //pResult = &m_eos;
-            return 1;
-        }
+    pos += len;  // consume length of size of element
+    assert((pos + size) <= stop);  // TODO
 
-        pResult = GetLast();
-        return 0;  //success
+    // Pos now points to start of payload
+
+    pos += size;  // consume payload
+  }
+
+  long long off_next = 0;
+
+  while (pos < stop) {
+    long len;
+
+    long long result = GetUIntLength(m_pReader, pos, len);
+    assert(result == 0);
+    assert((pos + len) <= stop);  // TODO
+    if (result != 0)
+      return NULL;
+
+    const long long idpos = pos;  // pos of next (potential) cluster
+
+    const long long id = ReadUInt(m_pReader, idpos, len);
+    assert(id > 0);  // TODO
+
+    pos += len;  // consume ID
+
+    // Read Size
+    result = GetUIntLength(m_pReader, pos, len);
+    assert(result == 0);  // TODO
+    assert((pos + len) <= stop);  // TODO
+
+    const long long size = ReadUInt(m_pReader, pos, len);
+    assert(size >= 0);  // TODO
+
+    pos += len;  // consume length of size of element
+    assert((pos + size) <= stop);  // TODO
+
+    // Pos now points to start of payload
+
+    if (size == 0)  // weird
+      continue;
+
+    if (id == 0x0F43B675) {  // Cluster ID
+      const long long off_next_ = idpos - m_start;
+
+      long long pos_;
+      long len_;
+
+      const long status = Cluster::HasBlockEntries(this, off_next_, pos_, len_);
+
+      assert(status >= 0);
+
+      if (status > 0) {
+        off_next = off_next_;
+        break;
+      }
     }
 
-    assert(m_pos > 0);
+    pos += size;  // consume payload
+  }
 
-    long long total, avail;
+  if (off_next <= 0)
+    return 0;
 
-    long status = m_pReader->Length(&total, &avail);
+  Cluster** const ii = m_clusters + m_clusterCount;
+  Cluster** i = ii;
 
-    if (status < 0)  //error
-        return status;
+  Cluster** const jj = ii + m_clusterPreloadCount;
+  Cluster** j = jj;
 
-    assert((total < 0) || (avail <= total));
+  while (i < j) {
+    // INVARIANT:
+    //[0, i) < pos_next
+    //[i, j) ?
+    //[j, jj)  > pos_next
 
-    const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+    Cluster** const k = i + (j - i) / 2;
+    assert(k < jj);
 
-    //interrogate curr cluster
+    Cluster* const pNext = *k;
+    assert(pNext);
+    assert(pNext->m_index < 0);
 
-    pos = pCurr->m_element_start;
+    // const long long pos_ = pNext->m_pos;
+    // assert(pos_);
+    // pos = pos_ * ((pos_ < 0) ? -1 : 1);
 
-    if (pCurr->m_element_size >= 0)
-        pos += pCurr->m_element_size;
+    pos = pNext->GetPosition();
+
+    if (pos < off_next)
+      i = k + 1;
+    else if (pos > off_next)
+      j = k;
     else
-    {
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
+      return pNext;
+  }
 
-        long long result = GetUIntLength(m_pReader, pos, len);
+  assert(i == j);
 
-        if (result < 0)  //error
-            return static_cast<long>(result);
+  Cluster* const pNext = Cluster::Create(this, -1, off_next);
+  assert(pNext);
 
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
+  const ptrdiff_t idx_next = i - m_clusters;  // insertion position
 
-        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-            return E_FILE_FORMAT_INVALID;
+  PreloadCluster(pNext, idx_next);
+  assert(m_clusters);
+  assert(idx_next < m_clusterSize);
+  assert(m_clusters[idx_next] == pNext);
 
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
-
-        const long long id = ReadUInt(m_pReader, pos, len);
-
-        if (id != 0x0F43B675)  //weird: not Cluster ID
-            return -1;
-
-        pos += len;  //consume ID
-
-        //Read Size
-
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        result = GetUIntLength(m_pReader, pos, len);
-
-        if (result < 0)  //error
-            return static_cast<long>(result);
-
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
-
-        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
-
-        const long long size = ReadUInt(m_pReader, pos, len);
-
-        if (size < 0) //error
-            return static_cast<long>(size);
-
-        pos += len;  //consume size field
-
-        const long long unknown_size = (1LL << (7 * len)) - 1;
-
-        if (size == unknown_size)          //TODO: should never happen
-            return E_FILE_FORMAT_INVALID;  //TODO: resolve this
-
-        //assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
-
-        if ((segment_stop >= 0) && ((pos + size) > segment_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        //Pos now points to start of payload
-
-        pos += size;  //consume payload (that is, the current cluster)
-        assert((segment_stop < 0) || (pos <= segment_stop));
-
-        //By consuming the payload, we are assuming that the curr
-        //cluster isn't interesting.  That is, we don't bother checking
-        //whether the payload of the curr cluster is less than what
-        //happens to be available (obtained via IMkvReader::Length).
-        //Presumably the caller has already dispensed with the current
-        //cluster, and really does want the next cluster.
-    }
-
-    //pos now points to just beyond the last fully-loaded cluster
-
-    for (;;)
-    {
-        const long status = DoParseNext(pResult, pos, len);
-
-        if (status <= 1)
-            return status;
-    }
+  return pNext;
 }
 
+long Segment::ParseNext(const Cluster* pCurr, const Cluster*& pResult,
+                        long long& pos, long& len) {
+  assert(pCurr);
+  assert(!pCurr->EOS());
+  assert(m_clusters);
 
-long Segment::DoParseNext(
-    const Cluster*& pResult,
-    long long& pos,
-    long& len)
-{
-    long long total, avail;
+  pResult = 0;
 
-    long status = m_pReader->Length(&total, &avail);
+  if (pCurr->m_index >= 0) {  // loaded (not merely preloaded)
+    assert(m_clusters[pCurr->m_index] == pCurr);
 
-    if (status < 0)  //error
-        return status;
+    const long next_idx = pCurr->m_index + 1;
 
-    assert((total < 0) || (avail <= total));
+    if (next_idx < m_clusterCount) {
+      pResult = m_clusters[next_idx];
+      return 0;  // success
+    }
 
-    const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+    // curr cluster is last among loaded
 
-    //Parse next cluster.  This is strictly a parsing activity.
-    //Creation of a new cluster object happens later, after the
-    //parsing is done.
+    const long result = LoadCluster(pos, len);
 
-    long long off_next = 0;
-    long long cluster_size = -1;
+    if (result < 0)  // error or underflow
+      return result;
 
-    for (;;)
+    if (result > 0)  // no more clusters
     {
-        if ((total >= 0) && (pos >= total))
-            return 1;  //EOF
+      // pResult = &m_eos;
+      return 1;
+    }
 
-        if ((segment_stop >= 0) && (pos >= segment_stop))
-            return 1;  //EOF
+    pResult = GetLast();
+    return 0;  // success
+  }
 
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
+  assert(m_pos > 0);
 
-        long long result = GetUIntLength(m_pReader, pos, len);
+  long long total, avail;
 
-        if (result < 0)  //error
-            return static_cast<long>(result);
+  long status = m_pReader->Length(&total, &avail);
 
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
+  if (status < 0)  // error
+    return status;
 
-        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-            return E_FILE_FORMAT_INVALID;
+  assert((total < 0) || (avail <= total));
 
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
+  const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
 
-        const long long idpos = pos;             //absolute
-        const long long idoff = pos - m_start;   //relative
+  // interrogate curr cluster
 
-        const long long id = ReadUInt(m_pReader, idpos, len);  //absolute
+  pos = pCurr->m_element_start;
 
-        if (id < 0)  //error
-            return static_cast<long>(id);
+  if (pCurr->m_element_size >= 0)
+    pos += pCurr->m_element_size;
+  else {
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
 
-        if (id == 0)  //weird
-            return -1;  //generic error
+    long long result = GetUIntLength(m_pReader, pos, len);
 
-        pos += len;  //consume ID
+    if (result < 0)  // error
+      return static_cast<long>(result);
 
-        //Read Size
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
 
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
 
-        result = GetUIntLength(m_pReader, pos, len);
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
 
-        if (result < 0)  //error
-            return static_cast<long>(result);
+    const long long id = ReadUInt(m_pReader, pos, len);
 
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
+    if (id != 0x0F43B675)  // weird: not Cluster ID
+      return -1;
 
-        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-            return E_FILE_FORMAT_INVALID;
+    pos += len;  // consume ID
 
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
+    // Read Size
 
-        const long long size = ReadUInt(m_pReader, pos, len);
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
 
-        if (size < 0)  //error
-            return static_cast<long>(size);
+    result = GetUIntLength(m_pReader, pos, len);
 
-        pos += len;  //consume length of size of element
+    if (result < 0)  // error
+      return static_cast<long>(result);
 
-        //Pos now points to start of payload
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
 
-        if (size == 0)  //weird
-            continue;
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
 
-        const long long unknown_size = (1LL << (7 * len)) - 1;
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
 
-        if ((segment_stop >= 0) &&
-            (size != unknown_size) &&
-            ((pos + size) > segment_stop))
-        {
-            return E_FILE_FORMAT_INVALID;
-        }
+    const long long size = ReadUInt(m_pReader, pos, len);
 
-        if (id == 0x0C53BB6B)  //Cues ID
-        {
-            if (size == unknown_size)
-                return E_FILE_FORMAT_INVALID;
+    if (size < 0)  // error
+      return static_cast<long>(size);
 
-            const long long element_stop = pos + size;
+    pos += len;  // consume size field
 
-            if ((segment_stop >= 0) && (element_stop > segment_stop))
-                return E_FILE_FORMAT_INVALID;
+    const long long unknown_size = (1LL << (7 * len)) - 1;
 
-            const long long element_start = idpos;
-            const long long element_size = element_stop - element_start;
+    if (size == unknown_size)  // TODO: should never happen
+      return E_FILE_FORMAT_INVALID;  // TODO: resolve this
 
-            if (m_pCues == NULL)
-            {
-                m_pCues = new Cues(this,
-                                    pos,
-                                    size,
-                                    element_start,
-                                    element_size);
-                assert(m_pCues);  //TODO
-            }
+    // assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
 
-            pos += size;  //consume payload
-            assert((segment_stop < 0) || (pos <= segment_stop));
+    if ((segment_stop >= 0) && ((pos + size) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
 
-            continue;
-        }
+    // Pos now points to start of payload
 
-        if (id != 0x0F43B675)  //not a Cluster ID
-        {
-            if (size == unknown_size)
-                return E_FILE_FORMAT_INVALID;
+    pos += size;  // consume payload (that is, the current cluster)
+    assert((segment_stop < 0) || (pos <= segment_stop));
 
-            pos += size;  //consume payload
-            assert((segment_stop < 0) || (pos <= segment_stop));
+    // By consuming the payload, we are assuming that the curr
+    // cluster isn't interesting.  That is, we don't bother checking
+    // whether the payload of the curr cluster is less than what
+    // happens to be available (obtained via IMkvReader::Length).
+    // Presumably the caller has already dispensed with the current
+    // cluster, and really does want the next cluster.
+  }
 
-            continue;
-        }
+  // pos now points to just beyond the last fully-loaded cluster
 
-#if 0 //this is commented-out to support incremental cluster parsing
+  for (;;) {
+    const long status = DoParseNext(pResult, pos, len);
+
+    if (status <= 1)
+      return status;
+  }
+}
+
+long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) {
+  long long total, avail;
+
+  long status = m_pReader->Length(&total, &avail);
+
+  if (status < 0)  // error
+    return status;
+
+  assert((total < 0) || (avail <= total));
+
+  const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+  // Parse next cluster.  This is strictly a parsing activity.
+  // Creation of a new cluster object happens later, after the
+  // parsing is done.
+
+  long long off_next = 0;
+  long long cluster_size = -1;
+
+  for (;;) {
+    if ((total >= 0) && (pos >= total))
+      return 1;  // EOF
+
+    if ((segment_stop >= 0) && (pos >= segment_stop))
+      return 1;  // EOF
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    long long result = GetUIntLength(m_pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long idpos = pos;  // absolute
+    const long long idoff = pos - m_start;  // relative
+
+    const long long id = ReadUInt(m_pReader, idpos, len);  // absolute
+
+    if (id < 0)  // error
+      return static_cast<long>(id);
+
+    if (id == 0)  // weird
+      return -1;  // generic error
+
+    pos += len;  // consume ID
+
+    // Read Size
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(m_pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(m_pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(size);
+
+    pos += len;  // consume length of size of element
+
+    // Pos now points to start of payload
+
+    if (size == 0)  // weird
+      continue;
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if ((segment_stop >= 0) && (size != unknown_size) &&
+        ((pos + size) > segment_stop)) {
+      return E_FILE_FORMAT_INVALID;
+    }
+
+    if (id == 0x0C53BB6B) {  // Cues ID
+      if (size == unknown_size)
+        return E_FILE_FORMAT_INVALID;
+
+      const long long element_stop = pos + size;
+
+      if ((segment_stop >= 0) && (element_stop > segment_stop))
+        return E_FILE_FORMAT_INVALID;
+
+      const long long element_start = idpos;
+      const long long element_size = element_stop - element_start;
+
+      if (m_pCues == NULL) {
+        m_pCues = new Cues(this, pos, size, element_start, element_size);
+        assert(m_pCues);  // TODO
+      }
+
+      pos += size;  // consume payload
+      assert((segment_stop < 0) || (pos <= segment_stop));
+
+      continue;
+    }
+
+    if (id != 0x0F43B675) {  // not a Cluster ID
+      if (size == unknown_size)
+        return E_FILE_FORMAT_INVALID;
+
+      pos += size;  // consume payload
+      assert((segment_stop < 0) || (pos <= segment_stop));
+
+      continue;
+    }
+
+#if 0  // this is commented-out to support incremental cluster parsing
         len = static_cast<long>(size);
 
         if (element_stop > avail)
             return E_BUFFER_NOT_FULL;
 #endif
 
-        //We have a cluster.
+    // We have a cluster.
 
-        off_next = idoff;
+    off_next = idoff;
 
-        if (size != unknown_size)
-            cluster_size = size;
+    if (size != unknown_size)
+      cluster_size = size;
 
+    break;
+  }
+
+  assert(off_next > 0);  // have cluster
+
+  // We have parsed the next cluster.
+  // We have not created a cluster object yet.  What we need
+  // to do now is determine whether it has already be preloaded
+  //(in which case, an object for this cluster has already been
+  // created), and if not, create a new cluster object.
+
+  Cluster** const ii = m_clusters + m_clusterCount;
+  Cluster** i = ii;
+
+  Cluster** const jj = ii + m_clusterPreloadCount;
+  Cluster** j = jj;
+
+  while (i < j) {
+    // INVARIANT:
+    //[0, i) < pos_next
+    //[i, j) ?
+    //[j, jj)  > pos_next
+
+    Cluster** const k = i + (j - i) / 2;
+    assert(k < jj);
+
+    const Cluster* const pNext = *k;
+    assert(pNext);
+    assert(pNext->m_index < 0);
+
+    pos = pNext->GetPosition();
+    assert(pos >= 0);
+
+    if (pos < off_next)
+      i = k + 1;
+    else if (pos > off_next)
+      j = k;
+    else {
+      pResult = pNext;
+      return 0;  // success
+    }
+  }
+
+  assert(i == j);
+
+  long long pos_;
+  long len_;
+
+  status = Cluster::HasBlockEntries(this, off_next, pos_, len_);
+
+  if (status < 0) {  // error or underflow
+    pos = pos_;
+    len = len_;
+
+    return status;
+  }
+
+  if (status > 0) {  // means "found at least one block entry"
+    Cluster* const pNext = Cluster::Create(this,
+                                           -1,  // preloaded
+                                           off_next);
+    // element_size);
+    assert(pNext);
+
+    const ptrdiff_t idx_next = i - m_clusters;  // insertion position
+
+    PreloadCluster(pNext, idx_next);
+    assert(m_clusters);
+    assert(idx_next < m_clusterSize);
+    assert(m_clusters[idx_next] == pNext);
+
+    pResult = pNext;
+    return 0;  // success
+  }
+
+  // status == 0 means "no block entries found"
+
+  if (cluster_size < 0) {  // unknown size
+    const long long payload_pos = pos;  // absolute pos of cluster payload
+
+    for (;;) {  // determine cluster size
+      if ((total >= 0) && (pos >= total))
         break;
-    }
 
-    assert(off_next > 0);  //have cluster
+      if ((segment_stop >= 0) && (pos >= segment_stop))
+        break;  // no more clusters
 
-    //We have parsed the next cluster.
-    //We have not created a cluster object yet.  What we need
-    //to do now is determine whether it has already be preloaded
-    //(in which case, an object for this cluster has already been
-    //created), and if not, create a new cluster object.
+      // Read ID
 
-    Cluster** const ii = m_clusters + m_clusterCount;
-    Cluster** i = ii;
+      if ((pos + 1) > avail) {
+        len = 1;
+        return E_BUFFER_NOT_FULL;
+      }
 
-    Cluster** const jj = ii + m_clusterPreloadCount;
-    Cluster** j = jj;
+      long long result = GetUIntLength(m_pReader, pos, len);
 
-    while (i < j)
-    {
-        //INVARIANT:
-        //[0, i) < pos_next
-        //[i, j) ?
-        //[j, jj)  > pos_next
+      if (result < 0)  // error
+        return static_cast<long>(result);
 
-        Cluster** const k = i + (j - i) / 2;
-        assert(k < jj);
+      if (result > 0)  // weird
+        return E_BUFFER_NOT_FULL;
 
-        const Cluster* const pNext = *k;
-        assert(pNext);
-        assert(pNext->m_index < 0);
+      if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+        return E_FILE_FORMAT_INVALID;
 
-        pos = pNext->GetPosition();
-        assert(pos >= 0);
+      if ((pos + len) > avail)
+        return E_BUFFER_NOT_FULL;
 
-        if (pos < off_next)
-            i = k + 1;
-        else if (pos > off_next)
-            j = k;
-        else
-        {
-            pResult = pNext;
-            return 0;  //success
-        }
-    }
+      const long long idpos = pos;
+      const long long id = ReadUInt(m_pReader, idpos, len);
 
-    assert(i == j);
+      if (id < 0)  // error (or underflow)
+        return static_cast<long>(id);
 
-    long long pos_;
-    long len_;
+      // This is the distinguished set of ID's we use to determine
+      // that we have exhausted the sub-element's inside the cluster
+      // whose ID we parsed earlier.
 
-    status = Cluster::HasBlockEntries(this, off_next, pos_, len_);
+      if (id == 0x0F43B675)  // Cluster ID
+        break;
 
-    if (status < 0)  //error or underflow
-    {
-        pos = pos_;
-        len = len_;
+      if (id == 0x0C53BB6B)  // Cues ID
+        break;
 
-        return status;
-    }
+      pos += len;  // consume ID (of sub-element)
 
-    if (status > 0)  //means "found at least one block entry"
-    {
-        Cluster* const pNext = Cluster::Create(this,
-                                                -1,   //preloaded
-                                                off_next);
-                                                //element_size);
-        assert(pNext);
+      // Read Size
 
-        const ptrdiff_t idx_next = i - m_clusters;  //insertion position
+      if ((pos + 1) > avail) {
+        len = 1;
+        return E_BUFFER_NOT_FULL;
+      }
 
-        PreloadCluster(pNext, idx_next);
-        assert(m_clusters);
-        assert(idx_next < m_clusterSize);
-        assert(m_clusters[idx_next] == pNext);
+      result = GetUIntLength(m_pReader, pos, len);
 
-        pResult = pNext;
-        return 0;  //success
-    }
+      if (result < 0)  // error
+        return static_cast<long>(result);
 
-    //status == 0 means "no block entries found"
+      if (result > 0)  // weird
+        return E_BUFFER_NOT_FULL;
 
-    if (cluster_size < 0)  //unknown size
-    {
-        const long long payload_pos = pos;  //absolute pos of cluster payload
+      if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+        return E_FILE_FORMAT_INVALID;
 
-        for (;;)  //determine cluster size
-        {
-            if ((total >= 0) && (pos >= total))
-                break;
+      if ((pos + len) > avail)
+        return E_BUFFER_NOT_FULL;
 
-            if ((segment_stop >= 0) && (pos >= segment_stop))
-                break;  //no more clusters
+      const long long size = ReadUInt(m_pReader, pos, len);
 
-            //Read ID
+      if (size < 0)  // error
+        return static_cast<long>(size);
 
-            if ((pos + 1) > avail)
-            {
-                len = 1;
-                return E_BUFFER_NOT_FULL;
-            }
+      pos += len;  // consume size field of element
 
-            long long result = GetUIntLength(m_pReader, pos, len);
+      // pos now points to start of sub-element's payload
 
-            if (result < 0)  //error
-                return static_cast<long>(result);
+      if (size == 0)  // weird
+        continue;
 
-            if (result > 0)  //weird
-                return E_BUFFER_NOT_FULL;
+      const long long unknown_size = (1LL << (7 * len)) - 1;
 
-            if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-                return E_FILE_FORMAT_INVALID;
+      if (size == unknown_size)
+        return E_FILE_FORMAT_INVALID;  // not allowed for sub-elements
 
-            if ((pos + len) > avail)
-                return E_BUFFER_NOT_FULL;
+      if ((segment_stop >= 0) && ((pos + size) > segment_stop))  // weird
+        return E_FILE_FORMAT_INVALID;
 
-            const long long idpos = pos;
-            const long long id = ReadUInt(m_pReader, idpos, len);
+      pos += size;  // consume payload of sub-element
+      assert((segment_stop < 0) || (pos <= segment_stop));
+    }  // determine cluster size
 
-            if (id < 0)  //error (or underflow)
-                return static_cast<long>(id);
+    cluster_size = pos - payload_pos;
+    assert(cluster_size >= 0);  // TODO: handle cluster_size = 0
 
-            //This is the distinguished set of ID's we use to determine
-            //that we have exhausted the sub-element's inside the cluster
-            //whose ID we parsed earlier.
+    pos = payload_pos;  // reset and re-parse original cluster
+  }
 
-            if (id == 0x0F43B675)  //Cluster ID
-                break;
+  pos += cluster_size;  // consume payload
+  assert((segment_stop < 0) || (pos <= segment_stop));
 
-            if (id == 0x0C53BB6B)  //Cues ID
-                break;
-
-            pos += len;  //consume ID (of sub-element)
-
-            //Read Size
-
-            if ((pos + 1) > avail)
-            {
-                len = 1;
-                return E_BUFFER_NOT_FULL;
-            }
-
-            result = GetUIntLength(m_pReader, pos, len);
-
-            if (result < 0)  //error
-                return static_cast<long>(result);
-
-            if (result > 0)  //weird
-                return E_BUFFER_NOT_FULL;
-
-            if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-                return E_FILE_FORMAT_INVALID;
-
-            if ((pos + len) > avail)
-                return E_BUFFER_NOT_FULL;
-
-            const long long size = ReadUInt(m_pReader, pos, len);
-
-            if (size < 0)  //error
-                return static_cast<long>(size);
-
-            pos += len;  //consume size field of element
-
-            //pos now points to start of sub-element's payload
-
-            if (size == 0)  //weird
-                continue;
-
-            const long long unknown_size = (1LL << (7 * len)) - 1;
-
-            if (size == unknown_size)
-                return E_FILE_FORMAT_INVALID;  //not allowed for sub-elements
-
-            if ((segment_stop >= 0) && ((pos + size) > segment_stop))  //weird
-                return E_FILE_FORMAT_INVALID;
-
-            pos += size;  //consume payload of sub-element
-            assert((segment_stop < 0) || (pos <= segment_stop));
-        }  //determine cluster size
-
-        cluster_size = pos - payload_pos;
-        assert(cluster_size >= 0);  //TODO: handle cluster_size = 0
-
-        pos = payload_pos;  //reset and re-parse original cluster
-    }
-
-    pos += cluster_size;  //consume payload
-    assert((segment_stop < 0) || (pos <= segment_stop));
-
-    return 2;             //try to find a cluster that follows next
+  return 2;  // try to find a cluster that follows next
 }
 
+const Cluster* Segment::FindCluster(long long time_ns) const {
+  if ((m_clusters == NULL) || (m_clusterCount <= 0))
+    return &m_eos;
 
-const Cluster* Segment::FindCluster(long long time_ns) const
-{
-    if ((m_clusters == NULL) || (m_clusterCount <= 0))
-        return &m_eos;
+  {
+    Cluster* const pCluster = m_clusters[0];
+    assert(pCluster);
+    assert(pCluster->m_index == 0);
 
-    {
-        Cluster* const pCluster = m_clusters[0];
-        assert(pCluster);
-        assert(pCluster->m_index == 0);
+    if (time_ns <= pCluster->GetTime())
+      return pCluster;
+  }
 
-        if (time_ns <= pCluster->GetTime())
-            return pCluster;
-    }
+  // Binary search of cluster array
 
-    //Binary search of cluster array
+  long i = 0;
+  long j = m_clusterCount;
 
-    long i = 0;
-    long j = m_clusterCount;
+  while (i < j) {
+    // INVARIANT:
+    //[0, i) <= time_ns
+    //[i, j) ?
+    //[j, m_clusterCount)  > time_ns
 
-    while (i < j)
-    {
-        //INVARIANT:
-        //[0, i) <= time_ns
-        //[i, j) ?
-        //[j, m_clusterCount)  > time_ns
-
-        const long k = i + (j - i) / 2;
-        assert(k < m_clusterCount);
-
-        Cluster* const pCluster = m_clusters[k];
-        assert(pCluster);
-        assert(pCluster->m_index == k);
-
-        const long long t = pCluster->GetTime();
-
-        if (t <= time_ns)
-            i = k + 1;
-        else
-            j = k;
-
-        assert(i <= j);
-    }
-
-    assert(i == j);
-    assert(i > 0);
-    assert(i <= m_clusterCount);
-
-    const long k = i - 1;
+    const long k = i + (j - i) / 2;
+    assert(k < m_clusterCount);
 
     Cluster* const pCluster = m_clusters[k];
     assert(pCluster);
     assert(pCluster->m_index == k);
-    assert(pCluster->GetTime() <= time_ns);
 
-    return pCluster;
+    const long long t = pCluster->GetTime();
+
+    if (t <= time_ns)
+      i = k + 1;
+    else
+      j = k;
+
+    assert(i <= j);
+  }
+
+  assert(i == j);
+  assert(i > 0);
+  assert(i <= m_clusterCount);
+
+  const long k = i - 1;
+
+  Cluster* const pCluster = m_clusters[k];
+  assert(pCluster);
+  assert(pCluster->m_index == k);
+  assert(pCluster->GetTime() <= time_ns);
+
+  return pCluster;
 }
 
-
 #if 0
 const BlockEntry* Segment::Seek(
     long long time_ns,
@@ -4064,8 +3672,7 @@
 
     Cluster** const j = i + m_clusterCount;
 
-    if (pTrack->GetType() == 2)  //audio
-    {
+    if (pTrack->GetType() == 2) {  //audio
         //TODO: we could decide to use cues for this, as we do for video.
         //But we only use it for video because looking around for a keyframe
         //can get expensive.  Audio doesn't require anything special so a
@@ -4184,7 +3791,6 @@
 }
 #endif
 
-
 #if 0
 bool Segment::SearchCues(
     long long time_ns,
@@ -4215,844 +3821,592 @@
 }
 #endif
 
+const Tracks* Segment::GetTracks() const { return m_pTracks; }
 
-const Tracks* Segment::GetTracks() const
-{
-    return m_pTracks;
+const SegmentInfo* Segment::GetInfo() const { return m_pInfo; }
+
+const Cues* Segment::GetCues() const { return m_pCues; }
+
+const Chapters* Segment::GetChapters() const { return m_pChapters; }
+
+const SeekHead* Segment::GetSeekHead() const { return m_pSeekHead; }
+
+long long Segment::GetDuration() const {
+  assert(m_pInfo);
+  return m_pInfo->GetDuration();
 }
 
+Chapters::Chapters(Segment* pSegment, long long payload_start,
+                   long long payload_size, long long element_start,
+                   long long element_size)
+    : m_pSegment(pSegment),
+      m_start(payload_start),
+      m_size(payload_size),
+      m_element_start(element_start),
+      m_element_size(element_size),
+      m_editions(NULL),
+      m_editions_size(0),
+      m_editions_count(0) {}
 
-const SegmentInfo* Segment::GetInfo() const
-{
-    return m_pInfo;
+Chapters::~Chapters() {
+  while (m_editions_count > 0) {
+    Edition& e = m_editions[--m_editions_count];
+    e.Clear();
+  }
 }
 
+long Chapters::Parse() {
+  IMkvReader* const pReader = m_pSegment->m_pReader;
 
-const Cues* Segment::GetCues() const
-{
-    return m_pCues;
-}
+  long long pos = m_start;  // payload start
+  const long long stop = pos + m_size;  // payload stop
 
+  while (pos < stop) {
+    long long id, size;
 
-const Chapters* Segment::GetChapters() const
-{
-  return m_pChapters;
-}
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
 
+    if (status < 0)  // error
+      return status;
 
-const SeekHead* Segment::GetSeekHead() const
-{
-    return m_pSeekHead;
-}
+    if (size == 0)  // weird
+      continue;
 
+    if (id == 0x05B9) {  // EditionEntry ID
+      status = ParseEdition(pos, size);
 
-long long Segment::GetDuration() const
-{
-    assert(m_pInfo);
-    return m_pInfo->GetDuration();
-}
-
-
-Chapters::Chapters(
-    Segment* pSegment,
-    long long payload_start,
-    long long payload_size,
-    long long element_start,
-    long long element_size) :
-    m_pSegment(pSegment),
-    m_start(payload_start),
-    m_size(payload_size),
-    m_element_start(element_start),
-    m_element_size(element_size),
-    m_editions(NULL),
-    m_editions_size(0),
-    m_editions_count(0)
-{
-}
-
-
-Chapters::~Chapters()
-{
-    while (m_editions_count > 0)
-    {
-        Edition& e = m_editions[--m_editions_count];
-        e.Clear();
-    }
-}
-
-
-long Chapters::Parse()
-{
-    IMkvReader* const pReader = m_pSegment->m_pReader;
-
-    long long pos = m_start;  // payload start
-    const long long stop = pos + m_size;  // payload stop
-
-    while (pos < stop)
-    {
-        long long id, size;
-
-        long status = ParseElementHeader(
-                        pReader,
-                        pos,
-                        stop,
-                        id,
-                        size);
-
-        if (status < 0)  // error
-            return status;
-
-        if (size == 0)  // weird
-            continue;
-
-        if (id == 0x05B9)  // EditionEntry ID
-        {
-            status = ParseEdition(pos, size);
-
-            if (status < 0)  // error
-                return status;
-        }
-
-        pos += size;
-        assert(pos <= stop);
+      if (status < 0)  // error
+        return status;
     }
 
-    assert(pos == stop);
-    return 0;
+    pos += size;
+    assert(pos <= stop);
+  }
+
+  assert(pos == stop);
+  return 0;
 }
 
+int Chapters::GetEditionCount() const { return m_editions_count; }
 
-int Chapters::GetEditionCount() const
-{
-    return m_editions_count;
+const Chapters::Edition* Chapters::GetEdition(int idx) const {
+  if (idx < 0)
+    return NULL;
+
+  if (idx >= m_editions_count)
+    return NULL;
+
+  return m_editions + idx;
 }
 
+bool Chapters::ExpandEditionsArray() {
+  if (m_editions_size > m_editions_count)
+    return true;  // nothing else to do
 
-const Chapters::Edition* Chapters::GetEdition(int idx) const
-{
-    if (idx < 0)
-        return NULL;
+  const int size = (m_editions_size == 0) ? 1 : 2 * m_editions_size;
 
-    if (idx >= m_editions_count)
-        return NULL;
+  Edition* const editions = new (std::nothrow) Edition[size];
 
-    return m_editions + idx;
+  if (editions == NULL)
+    return false;
+
+  for (int idx = 0; idx < m_editions_count; ++idx) {
+    m_editions[idx].ShallowCopy(editions[idx]);
+  }
+
+  delete[] m_editions;
+  m_editions = editions;
+
+  m_editions_size = size;
+  return true;
 }
 
+long Chapters::ParseEdition(long long pos, long long size) {
+  if (!ExpandEditionsArray())
+    return -1;
 
-bool Chapters::ExpandEditionsArray()
-{
-    if (m_editions_size > m_editions_count)
-        return true;  // nothing else to do
+  Edition& e = m_editions[m_editions_count++];
+  e.Init();
 
-    const int size = (m_editions_size == 0) ? 1 : 2 * m_editions_size;
+  return e.Parse(m_pSegment->m_pReader, pos, size);
+}
 
-    Edition* const editions = new (std::nothrow) Edition[size];
+Chapters::Edition::Edition() {}
 
-    if (editions == NULL)
-        return false;
+Chapters::Edition::~Edition() {}
 
-    for (int idx = 0; idx < m_editions_count; ++idx)
-    {
-        m_editions[idx].ShallowCopy(editions[idx]);
+int Chapters::Edition::GetAtomCount() const { return m_atoms_count; }
+
+const Chapters::Atom* Chapters::Edition::GetAtom(int index) const {
+  if (index < 0)
+    return NULL;
+
+  if (index >= m_atoms_count)
+    return NULL;
+
+  return m_atoms + index;
+}
+
+void Chapters::Edition::Init() {
+  m_atoms = NULL;
+  m_atoms_size = 0;
+  m_atoms_count = 0;
+}
+
+void Chapters::Edition::ShallowCopy(Edition& rhs) const {
+  rhs.m_atoms = m_atoms;
+  rhs.m_atoms_size = m_atoms_size;
+  rhs.m_atoms_count = m_atoms_count;
+}
+
+void Chapters::Edition::Clear() {
+  while (m_atoms_count > 0) {
+    Atom& a = m_atoms[--m_atoms_count];
+    a.Clear();
+  }
+
+  delete[] m_atoms;
+  m_atoms = NULL;
+
+  m_atoms_size = 0;
+}
+
+long Chapters::Edition::Parse(IMkvReader* pReader, long long pos,
+                              long long size) {
+  const long long stop = pos + size;
+
+  while (pos < stop) {
+    long long id, size;
+
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (size == 0)  // weird
+      continue;
+
+    if (id == 0x36) {  // Atom ID
+      status = ParseAtom(pReader, pos, size);
+
+      if (status < 0)  // error
+        return status;
     }
 
-    delete[] m_editions;
-    m_editions = editions;
+    pos += size;
+    assert(pos <= stop);
+  }
 
-    m_editions_size = size;
-    return true;
+  assert(pos == stop);
+  return 0;
 }
 
+long Chapters::Edition::ParseAtom(IMkvReader* pReader, long long pos,
+                                  long long size) {
+  if (!ExpandAtomsArray())
+    return -1;
 
-long Chapters::ParseEdition(
-    long long pos,
-    long long size)
-{
-    if (!ExpandEditionsArray())
-        return -1;
+  Atom& a = m_atoms[m_atoms_count++];
+  a.Init();
 
-    Edition& e = m_editions[m_editions_count++];
-    e.Init();
-
-    return e.Parse(m_pSegment->m_pReader, pos, size);
+  return a.Parse(pReader, pos, size);
 }
 
+bool Chapters::Edition::ExpandAtomsArray() {
+  if (m_atoms_size > m_atoms_count)
+    return true;  // nothing else to do
 
-Chapters::Edition::Edition()
-{
+  const int size = (m_atoms_size == 0) ? 1 : 2 * m_atoms_size;
+
+  Atom* const atoms = new (std::nothrow) Atom[size];
+
+  if (atoms == NULL)
+    return false;
+
+  for (int idx = 0; idx < m_atoms_count; ++idx) {
+    m_atoms[idx].ShallowCopy(atoms[idx]);
+  }
+
+  delete[] m_atoms;
+  m_atoms = atoms;
+
+  m_atoms_size = size;
+  return true;
 }
 
+Chapters::Atom::Atom() {}
 
-Chapters::Edition::~Edition()
-{
+Chapters::Atom::~Atom() {}
+
+unsigned long long Chapters::Atom::GetUID() const { return m_uid; }
+
+const char* Chapters::Atom::GetStringUID() const { return m_string_uid; }
+
+long long Chapters::Atom::GetStartTimecode() const { return m_start_timecode; }
+
+long long Chapters::Atom::GetStopTimecode() const { return m_stop_timecode; }
+
+long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const {
+  return GetTime(pChapters, m_start_timecode);
 }
 
-
-int Chapters::Edition::GetAtomCount() const
-{
-    return m_atoms_count;
+long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const {
+  return GetTime(pChapters, m_stop_timecode);
 }
 
+int Chapters::Atom::GetDisplayCount() const { return m_displays_count; }
 
-const Chapters::Atom* Chapters::Edition::GetAtom(int index) const
-{
-    if (index < 0)
-        return NULL;
+const Chapters::Display* Chapters::Atom::GetDisplay(int index) const {
+  if (index < 0)
+    return NULL;
 
-    if (index >= m_atoms_count)
-        return NULL;
+  if (index >= m_displays_count)
+    return NULL;
 
-    return m_atoms + index;
+  return m_displays + index;
 }
 
+void Chapters::Atom::Init() {
+  m_string_uid = NULL;
+  m_uid = 0;
+  m_start_timecode = -1;
+  m_stop_timecode = -1;
 
-void Chapters::Edition::Init()
-{
-    m_atoms = NULL;
-    m_atoms_size = 0;
-    m_atoms_count = 0;
+  m_displays = NULL;
+  m_displays_size = 0;
+  m_displays_count = 0;
 }
 
+void Chapters::Atom::ShallowCopy(Atom& rhs) const {
+  rhs.m_string_uid = m_string_uid;
+  rhs.m_uid = m_uid;
+  rhs.m_start_timecode = m_start_timecode;
+  rhs.m_stop_timecode = m_stop_timecode;
 
-void Chapters::Edition::ShallowCopy(Edition& rhs) const
-{
-    rhs.m_atoms = m_atoms;
-    rhs.m_atoms_size = m_atoms_size;
-    rhs.m_atoms_count = m_atoms_count;
+  rhs.m_displays = m_displays;
+  rhs.m_displays_size = m_displays_size;
+  rhs.m_displays_count = m_displays_count;
 }
 
+void Chapters::Atom::Clear() {
+  delete[] m_string_uid;
+  m_string_uid = NULL;
 
-void Chapters::Edition::Clear()
-{
-    while (m_atoms_count > 0)
-    {
-        Atom& a = m_atoms[--m_atoms_count];
-        a.Clear();
+  while (m_displays_count > 0) {
+    Display& d = m_displays[--m_displays_count];
+    d.Clear();
+  }
+
+  delete[] m_displays;
+  m_displays = NULL;
+
+  m_displays_size = 0;
+}
+
+long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) {
+  const long long stop = pos + size;
+
+  while (pos < stop) {
+    long long id, size;
+
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (size == 0)  // weird
+      continue;
+
+    if (id == 0x00) {  // Display ID
+      status = ParseDisplay(pReader, pos, size);
+
+      if (status < 0)  // error
+        return status;
+    } else if (id == 0x1654) {  // StringUID ID
+      status = UnserializeString(pReader, pos, size, m_string_uid);
+
+      if (status < 0)  // error
+        return status;
+    } else if (id == 0x33C4) {  // UID ID
+      const long long val = UnserializeUInt(pReader, pos, size);
+
+      if (val < 0)  // error
+        return static_cast<long>(val);
+
+      m_uid = val;
+    } else if (id == 0x11) {  // TimeStart ID
+      const long long val = UnserializeUInt(pReader, pos, size);
+
+      if (val < 0)  // error
+        return static_cast<long>(val);
+
+      m_start_timecode = val;
+    } else if (id == 0x12) {  // TimeEnd ID
+      const long long val = UnserializeUInt(pReader, pos, size);
+
+      if (val < 0)  // error
+        return static_cast<long>(val);
+
+      m_stop_timecode = val;
     }
 
-    delete[] m_atoms;
-    m_atoms = NULL;
+    pos += size;
+    assert(pos <= stop);
+  }
 
-    m_atoms_size = 0;
+  assert(pos == stop);
+  return 0;
 }
 
+long long Chapters::Atom::GetTime(const Chapters* pChapters,
+                                  long long timecode) {
+  if (pChapters == NULL)
+    return -1;
 
-long Chapters::Edition::Parse(
-    IMkvReader* pReader,
-    long long pos,
-    long long size)
-{
-    const long long stop = pos + size;
+  Segment* const pSegment = pChapters->m_pSegment;
 
-    while (pos < stop)
-    {
-        long long id, size;
+  if (pSegment == NULL)  // weird
+    return -1;
 
-        long status = ParseElementHeader(
-                        pReader,
-                        pos,
-                        stop,
-                        id,
-                        size);
+  const SegmentInfo* const pInfo = pSegment->GetInfo();
 
-        if (status < 0)  // error
-            return status;
+  if (pInfo == NULL)
+    return -1;
 
-        if (size == 0)  // weird
-            continue;
+  const long long timecode_scale = pInfo->GetTimeCodeScale();
 
-        if (id == 0x36)  // Atom ID
-        {
-            status = ParseAtom(pReader, pos, size);
+  if (timecode_scale < 1)  // weird
+    return -1;
 
-            if (status < 0)  // error
-                return status;
-        }
+  if (timecode < 0)
+    return -1;
 
-        pos += size;
-        assert(pos <= stop);
+  const long long result = timecode_scale * timecode;
+
+  return result;
+}
+
+long Chapters::Atom::ParseDisplay(IMkvReader* pReader, long long pos,
+                                  long long size) {
+  if (!ExpandDisplaysArray())
+    return -1;
+
+  Display& d = m_displays[m_displays_count++];
+  d.Init();
+
+  return d.Parse(pReader, pos, size);
+}
+
+bool Chapters::Atom::ExpandDisplaysArray() {
+  if (m_displays_size > m_displays_count)
+    return true;  // nothing else to do
+
+  const int size = (m_displays_size == 0) ? 1 : 2 * m_displays_size;
+
+  Display* const displays = new (std::nothrow) Display[size];
+
+  if (displays == NULL)
+    return false;
+
+  for (int idx = 0; idx < m_displays_count; ++idx) {
+    m_displays[idx].ShallowCopy(displays[idx]);
+  }
+
+  delete[] m_displays;
+  m_displays = displays;
+
+  m_displays_size = size;
+  return true;
+}
+
+Chapters::Display::Display() {}
+
+Chapters::Display::~Display() {}
+
+const char* Chapters::Display::GetString() const { return m_string; }
+
+const char* Chapters::Display::GetLanguage() const { return m_language; }
+
+const char* Chapters::Display::GetCountry() const { return m_country; }
+
+void Chapters::Display::Init() {
+  m_string = NULL;
+  m_language = NULL;
+  m_country = NULL;
+}
+
+void Chapters::Display::ShallowCopy(Display& rhs) const {
+  rhs.m_string = m_string;
+  rhs.m_language = m_language;
+  rhs.m_country = m_country;
+}
+
+void Chapters::Display::Clear() {
+  delete[] m_string;
+  m_string = NULL;
+
+  delete[] m_language;
+  m_language = NULL;
+
+  delete[] m_country;
+  m_country = NULL;
+}
+
+long Chapters::Display::Parse(IMkvReader* pReader, long long pos,
+                              long long size) {
+  const long long stop = pos + size;
+
+  while (pos < stop) {
+    long long id, size;
+
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (size == 0)  // weird
+      continue;
+
+    if (id == 0x05) {  // ChapterString ID
+      status = UnserializeString(pReader, pos, size, m_string);
+
+      if (status)
+        return status;
+    } else if (id == 0x037C) {  // ChapterLanguage ID
+      status = UnserializeString(pReader, pos, size, m_language);
+
+      if (status)
+        return status;
+    } else if (id == 0x037E) {  // ChapterCountry ID
+      status = UnserializeString(pReader, pos, size, m_country);
+
+      if (status)
+        return status;
     }
 
-    assert(pos == stop);
-    return 0;
+    pos += size;
+    assert(pos <= stop);
+  }
+
+  assert(pos == stop);
+  return 0;
 }
 
+SegmentInfo::SegmentInfo(Segment* pSegment, long long start, long long size_,
+                         long long element_start, long long element_size)
+    : m_pSegment(pSegment),
+      m_start(start),
+      m_size(size_),
+      m_element_start(element_start),
+      m_element_size(element_size),
+      m_pMuxingAppAsUTF8(NULL),
+      m_pWritingAppAsUTF8(NULL),
+      m_pTitleAsUTF8(NULL) {}
 
-long Chapters::Edition::ParseAtom(
-    IMkvReader* pReader,
-    long long pos,
-    long long size)
-{
-    if (!ExpandAtomsArray())
-        return -1;
+SegmentInfo::~SegmentInfo() {
+  delete[] m_pMuxingAppAsUTF8;
+  m_pMuxingAppAsUTF8 = NULL;
 
-    Atom& a = m_atoms[m_atoms_count++];
-    a.Init();
+  delete[] m_pWritingAppAsUTF8;
+  m_pWritingAppAsUTF8 = NULL;
 
-    return a.Parse(pReader, pos, size);
+  delete[] m_pTitleAsUTF8;
+  m_pTitleAsUTF8 = NULL;
 }
 
+long SegmentInfo::Parse() {
+  assert(m_pMuxingAppAsUTF8 == NULL);
+  assert(m_pWritingAppAsUTF8 == NULL);
+  assert(m_pTitleAsUTF8 == NULL);
 
-bool Chapters::Edition::ExpandAtomsArray()
-{
-    if (m_atoms_size > m_atoms_count)
-        return true;  // nothing else to do
+  IMkvReader* const pReader = m_pSegment->m_pReader;
 
-    const int size = (m_atoms_size == 0) ? 1 : 2 * m_atoms_size;
+  long long pos = m_start;
+  const long long stop = m_start + m_size;
 
-    Atom* const atoms = new (std::nothrow) Atom[size];
+  m_timecodeScale = 1000000;
+  m_duration = -1;
 
-    if (atoms == NULL)
-        return false;
+  while (pos < stop) {
+    long long id, size;
 
-    for (int idx = 0; idx < m_atoms_count; ++idx)
-    {
-        m_atoms[idx].ShallowCopy(atoms[idx]);
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (id == 0x0AD7B1) {  // Timecode Scale
+      m_timecodeScale = UnserializeUInt(pReader, pos, size);
+
+      if (m_timecodeScale <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == 0x0489) {  // Segment duration
+      const long status = UnserializeFloat(pReader, pos, size, m_duration);
+
+      if (status < 0)
+        return status;
+
+      if (m_duration < 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == 0x0D80) {  // MuxingApp
+      const long status =
+          UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8);
+
+      if (status)
+        return status;
+    } else if (id == 0x1741) {  // WritingApp
+      const long status =
+          UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8);
+
+      if (status)
+        return status;
+    } else if (id == 0x3BA9) {  // Title
+      const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8);
+
+      if (status)
+        return status;
     }
 
-    delete[] m_atoms;
-    m_atoms = atoms;
+    pos += size;
+    assert(pos <= stop);
+  }
 
-    m_atoms_size = size;
-    return true;
+  assert(pos == stop);
+
+  return 0;
 }
 
+long long SegmentInfo::GetTimeCodeScale() const { return m_timecodeScale; }
 
-Chapters::Atom::Atom()
-{
+long long SegmentInfo::GetDuration() const {
+  if (m_duration < 0)
+    return -1;
+
+  assert(m_timecodeScale >= 1);
+
+  const double dd = double(m_duration) * double(m_timecodeScale);
+  const long long d = static_cast<long long>(dd);
+
+  return d;
 }
 
-
-Chapters::Atom::~Atom()
-{
+const char* SegmentInfo::GetMuxingAppAsUTF8() const {
+  return m_pMuxingAppAsUTF8;
 }
 
-
-unsigned long long Chapters::Atom::GetUID() const
-{
-    return m_uid;
+const char* SegmentInfo::GetWritingAppAsUTF8() const {
+  return m_pWritingAppAsUTF8;
 }
 
-
-const char* Chapters::Atom::GetStringUID() const
-{
-    return m_string_uid;
-}
-
-
-long long Chapters::Atom::GetStartTimecode() const
-{
-    return m_start_timecode;
-}
-
-
-long long Chapters::Atom::GetStopTimecode() const
-{
-    return m_stop_timecode;
-}
-
-
-long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const
-{
-    return GetTime(pChapters, m_start_timecode);
-}
-
-
-long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const
-{
-    return GetTime(pChapters, m_stop_timecode);
-}
-
-
-int Chapters::Atom::GetDisplayCount() const
-{
-    return m_displays_count;
-}
-
-
-const Chapters::Display* Chapters::Atom::GetDisplay(int index) const
-{
-    if (index < 0)
-        return NULL;
-
-    if (index >= m_displays_count)
-        return NULL;
-
-    return m_displays + index;
-}
-
-
-void Chapters::Atom::Init()
-{
-    m_string_uid = NULL;
-    m_uid = 0;
-    m_start_timecode = -1;
-    m_stop_timecode = -1;
-
-    m_displays = NULL;
-    m_displays_size = 0;
-    m_displays_count = 0;
-}
-
-
-void Chapters::Atom::ShallowCopy(Atom& rhs) const
-{
-    rhs.m_string_uid = m_string_uid;
-    rhs.m_uid = m_uid;
-    rhs.m_start_timecode = m_start_timecode;
-    rhs.m_stop_timecode = m_stop_timecode;
-
-    rhs.m_displays = m_displays;
-    rhs.m_displays_size = m_displays_size;
-    rhs.m_displays_count = m_displays_count;
-}
-
-
-void Chapters::Atom::Clear()
-{
-    delete[] m_string_uid;
-    m_string_uid = NULL;
-
-    while (m_displays_count > 0)
-    {
-        Display& d = m_displays[--m_displays_count];
-        d.Clear();
-    }
-
-    delete[] m_displays;
-    m_displays = NULL;
-
-    m_displays_size = 0;
-}
-
-
-long Chapters::Atom::Parse(
-    IMkvReader* pReader,
-    long long pos,
-    long long size)
-{
-    const long long stop = pos + size;
-
-    while (pos < stop)
-    {
-        long long id, size;
-
-        long status = ParseElementHeader(
-                        pReader,
-                        pos,
-                        stop,
-                        id,
-                        size);
-
-        if (status < 0)  // error
-            return status;
-
-        if (size == 0)  // weird
-            continue;
-
-        if (id == 0x00)  // Display ID
-        {
-            status = ParseDisplay(pReader, pos, size);
-
-            if (status < 0)  // error
-                return status;
-        }
-        else if (id == 0x1654)  // StringUID ID
-        {
-            status = UnserializeString(pReader, pos, size, m_string_uid);
-
-            if (status < 0)  // error
-                return status;
-        }
-        else if (id == 0x33C4)  // UID ID
-        {
-            const long long val = UnserializeUInt(pReader, pos, size);
-
-            if (val < 0)  // error
-                return static_cast<long>(val);
-
-            m_uid = val;
-        }
-        else if (id == 0x11)  // TimeStart ID
-        {
-            const long long val = UnserializeUInt(pReader, pos, size);
-
-            if (val < 0)  // error
-                return static_cast<long>(val);
-
-            m_start_timecode = val;
-        }
-        else if (id == 0x12)  // TimeEnd ID
-        {
-            const long long val = UnserializeUInt(pReader, pos, size);
-
-            if (val < 0)  // error
-                return static_cast<long>(val);
-
-            m_stop_timecode = val;
-        }
-
-        pos += size;
-        assert(pos <= stop);
-    }
-
-    assert(pos == stop);
-    return 0;
-}
-
-
-long long Chapters::Atom::GetTime(
-    const Chapters* pChapters,
-    long long timecode)
-{
-    if (pChapters == NULL)
-        return -1;
-
-    Segment* const pSegment = pChapters->m_pSegment;
-
-    if (pSegment == NULL)  // weird
-        return -1;
-
-    const SegmentInfo* const pInfo = pSegment->GetInfo();
-
-    if (pInfo == NULL)
-        return -1;
-
-    const long long timecode_scale = pInfo->GetTimeCodeScale();
-
-    if (timecode_scale < 1)  // weird
-        return -1;
-
-    if (timecode < 0)
-        return -1;
-
-    const long long result = timecode_scale * timecode;
-
-    return result;
-}
-
-
-long Chapters::Atom::ParseDisplay(
-    IMkvReader* pReader,
-    long long pos,
-    long long size)
-{
-    if (!ExpandDisplaysArray())
-        return -1;
-
-    Display& d = m_displays[m_displays_count++];
-    d.Init();
-
-    return d.Parse(pReader, pos, size);
-}
-
-
-bool Chapters::Atom::ExpandDisplaysArray()
-{
-    if (m_displays_size > m_displays_count)
-        return true;  // nothing else to do
-
-    const int size = (m_displays_size == 0) ? 1 : 2 * m_displays_size;
-
-    Display* const displays = new (std::nothrow) Display[size];
-
-    if (displays == NULL)
-        return false;
-
-    for (int idx = 0; idx < m_displays_count; ++idx)
-    {
-        m_displays[idx].ShallowCopy(displays[idx]);
-    }
-
-    delete[] m_displays;
-    m_displays = displays;
-
-    m_displays_size = size;
-    return true;
-}
-
-
-Chapters::Display::Display()
-{
-}
-
-
-Chapters::Display::~Display()
-{
-}
-
-
-const char* Chapters::Display::GetString() const
-{
-    return m_string;
-}
-
-
-const char* Chapters::Display::GetLanguage() const
-{
-    return m_language;
-}
-
-
-const char* Chapters::Display::GetCountry() const
-{
-    return m_country;
-}
-
-
-void Chapters::Display::Init()
-{
-    m_string = NULL;
-    m_language = NULL;
-    m_country = NULL;
-}
-
-
-void Chapters::Display::ShallowCopy(Display& rhs) const
-{
-    rhs.m_string = m_string;
-    rhs.m_language = m_language;
-    rhs.m_country = m_country;
-}
-
-
-void Chapters::Display::Clear()
-{
-    delete[] m_string;
-    m_string = NULL;
-
-    delete[] m_language;
-    m_language = NULL;
-
-    delete[] m_country;
-    m_country = NULL;
-}
-
-
-long Chapters::Display::Parse(
-    IMkvReader* pReader,
-    long long pos,
-    long long size)
-{
-    const long long stop = pos + size;
-
-    while (pos < stop)
-    {
-        long long id, size;
-
-        long status = ParseElementHeader(
-                        pReader,
-                        pos,
-                        stop,
-                        id,
-                        size);
-
-        if (status < 0)  // error
-            return status;
-
-        if (size == 0)  // weird
-            continue;
-
-        if (id == 0x05)  // ChapterString ID
-        {
-            status = UnserializeString(pReader, pos, size, m_string);
-
-            if (status)
-              return status;
-        }
-        else if (id == 0x037C)  // ChapterLanguage ID
-        {
-            status = UnserializeString(pReader, pos, size, m_language);
-
-            if (status)
-              return status;
-        }
-        else if (id == 0x037E)  // ChapterCountry ID
-        {
-            status = UnserializeString(pReader, pos, size, m_country);
-
-            if (status)
-              return status;
-        }
-
-        pos += size;
-        assert(pos <= stop);
-    }
-
-    assert(pos == stop);
-    return 0;
-}
-
-
-SegmentInfo::SegmentInfo(
-    Segment* pSegment,
-    long long start,
-    long long size_,
-    long long element_start,
-    long long element_size) :
-    m_pSegment(pSegment),
-    m_start(start),
-    m_size(size_),
-    m_element_start(element_start),
-    m_element_size(element_size),
-    m_pMuxingAppAsUTF8(NULL),
-    m_pWritingAppAsUTF8(NULL),
-    m_pTitleAsUTF8(NULL)
-{
-}
-
-SegmentInfo::~SegmentInfo()
-{
-    delete[] m_pMuxingAppAsUTF8;
-    m_pMuxingAppAsUTF8 = NULL;
-
-    delete[] m_pWritingAppAsUTF8;
-    m_pWritingAppAsUTF8 = NULL;
-
-    delete[] m_pTitleAsUTF8;
-    m_pTitleAsUTF8 = NULL;
-}
-
-
-long SegmentInfo::Parse()
-{
-    assert(m_pMuxingAppAsUTF8 == NULL);
-    assert(m_pWritingAppAsUTF8 == NULL);
-    assert(m_pTitleAsUTF8 == NULL);
-
-    IMkvReader* const pReader = m_pSegment->m_pReader;
-
-    long long pos = m_start;
-    const long long stop = m_start + m_size;
-
-    m_timecodeScale = 1000000;
-    m_duration = -1;
-
-    while (pos < stop)
-    {
-        long long id, size;
-
-        const long status = ParseElementHeader(
-                                pReader,
-                                pos,
-                                stop,
-                                id,
-                                size);
-
-        if (status < 0)  //error
-            return status;
-
-        if (id == 0x0AD7B1)  //Timecode Scale
-        {
-            m_timecodeScale = UnserializeUInt(pReader, pos, size);
-
-            if (m_timecodeScale <= 0)
-                return E_FILE_FORMAT_INVALID;
-        }
-        else if (id == 0x0489)  //Segment duration
-        {
-            const long status = UnserializeFloat(
-                                    pReader,
-                                    pos,
-                                    size,
-                                    m_duration);
-
-            if (status < 0)
-                return status;
-
-            if (m_duration < 0)
-                return E_FILE_FORMAT_INVALID;
-        }
-        else if (id == 0x0D80)  //MuxingApp
-        {
-            const long status = UnserializeString(
-                                    pReader,
-                                    pos,
-                                    size,
-                                    m_pMuxingAppAsUTF8);
-
-            if (status)
-                return status;
-        }
-        else if (id == 0x1741)  //WritingApp
-        {
-            const long status = UnserializeString(
-                                    pReader,
-                                    pos,
-                                    size,
-                                    m_pWritingAppAsUTF8);
-
-            if (status)
-                return status;
-        }
-        else if (id == 0x3BA9)  //Title
-        {
-            const long status = UnserializeString(
-                                    pReader,
-                                    pos,
-                                    size,
-                                    m_pTitleAsUTF8);
-
-            if (status)
-                return status;
-        }
-
-        pos += size;
-        assert(pos <= stop);
-    }
-
-    assert(pos == stop);
-
-    return 0;
-}
-
-
-long long SegmentInfo::GetTimeCodeScale() const
-{
-    return m_timecodeScale;
-}
-
-
-long long SegmentInfo::GetDuration() const
-{
-    if (m_duration < 0)
-        return -1;
-
-    assert(m_timecodeScale >= 1);
-
-    const double dd = double(m_duration) * double(m_timecodeScale);
-    const long long d = static_cast<long long>(dd);
-
-    return d;
-}
-
-const char* SegmentInfo::GetMuxingAppAsUTF8() const
-{
-    return m_pMuxingAppAsUTF8;
-}
-
-
-const char* SegmentInfo::GetWritingAppAsUTF8() const
-{
-    return m_pWritingAppAsUTF8;
-}
-
-const char* SegmentInfo::GetTitleAsUTF8() const
-{
-    return m_pTitleAsUTF8;
-}
+const char* SegmentInfo::GetTitleAsUTF8() const { return m_pTitleAsUTF8; }
 
 ///////////////////////////////////////////////////////////////
 // ContentEncoding element
 ContentEncoding::ContentCompression::ContentCompression()
-    : algo(0),
-      settings(NULL),
-      settings_len(0) {
-}
+    : algo(0), settings(NULL), settings_len(0) {}
 
 ContentEncoding::ContentCompression::~ContentCompression() {
-  delete [] settings;
+  delete[] settings;
 }
 
 ContentEncoding::ContentEncryption::ContentEncryption()
@@ -5064,13 +4418,12 @@
       sig_key_id(NULL),
       sig_key_id_len(0),
       sig_algo(0),
-      sig_hash_algo(0) {
-}
+      sig_hash_algo(0) {}
 
 ContentEncoding::ContentEncryption::~ContentEncryption() {
-  delete [] key_id;
-  delete [] signature;
-  delete [] sig_key_id;
+  delete[] key_id;
+  delete[] signature;
+  delete[] sig_key_id;
 }
 
 ContentEncoding::ContentEncoding()
@@ -5080,8 +4433,7 @@
       encryption_entries_end_(NULL),
       encoding_order_(0),
       encoding_scope_(1),
-      encoding_type_(0) {
-}
+      encoding_type_(0) {}
 
 ContentEncoding::~ContentEncoding() {
   ContentCompression** comp_i = compression_entries_;
@@ -5092,7 +4444,7 @@
     delete comp;
   }
 
-  delete [] compression_entries_;
+  delete[] compression_entries_;
 
   ContentEncryption** enc_i = encryption_entries_;
   ContentEncryption** const enc_j = encryption_entries_end_;
@@ -5102,10 +4454,9 @@
     delete enc;
   }
 
-  delete [] encryption_entries_;
+  delete[] encryption_entries_;
 }
 
-
 const ContentEncoding::ContentCompression*
 ContentEncoding::GetCompressionByIndex(unsigned long idx) const {
   const ptrdiff_t count = compression_entries_end_ - compression_entries_;
@@ -5124,8 +4475,8 @@
   return static_cast<unsigned long>(count);
 }
 
-const ContentEncoding::ContentEncryption*
-ContentEncoding::GetEncryptionByIndex(unsigned long idx) const {
+const ContentEncoding::ContentEncryption* ContentEncoding::GetEncryptionByIndex(
+    unsigned long idx) const {
   const ptrdiff_t count = encryption_entries_end_ - encryption_entries_;
   assert(count >= 0);
 
@@ -5143,9 +4494,7 @@
 }
 
 long ContentEncoding::ParseContentEncAESSettingsEntry(
-    long long start,
-    long long size,
-    IMkvReader* pReader,
+    long long start, long long size, IMkvReader* pReader,
     ContentEncAESSettings* aes) {
   assert(pReader);
   assert(aes);
@@ -5155,12 +4504,8 @@
 
   while (pos < stop) {
     long long id, size;
-    const long status = ParseElementHeader(pReader,
-                                           pos,
-                                           stop,
-                                           id,
-                                           size);
-    if (status < 0)  //error
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
       return status;
 
     if (id == 0x7E8) {
@@ -5170,15 +4515,14 @@
         return E_FILE_FORMAT_INVALID;
     }
 
-    pos += size;  //consume payload
+    pos += size;  // consume payload
     assert(pos <= stop);
   }
 
   return 0;
 }
 
-long ContentEncoding::ParseContentEncodingEntry(long long start,
-                                                long long size,
+long ContentEncoding::ParseContentEncodingEntry(long long start, long long size,
                                                 IMkvReader* pReader) {
   assert(pReader);
 
@@ -5191,12 +4535,8 @@
 
   while (pos < stop) {
     long long id, size;
-    const long status = ParseElementHeader(pReader,
-                                           pos,
-                                           stop,
-                                           id,
-                                           size);
-    if (status < 0)  //error
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
       return status;
 
     if (id == 0x1034)  // ContentCompression ID
@@ -5205,7 +4545,7 @@
     if (id == 0x1035)  // ContentEncryption ID
       ++encryption_count;
 
-    pos += size;  //consume payload
+    pos += size;  // consume payload
     assert(pos <= stop);
   }
 
@@ -5214,7 +4554,7 @@
 
   if (compression_count > 0) {
     compression_entries_ =
-        new (std::nothrow) ContentCompression*[compression_count];
+        new (std::nothrow) ContentCompression* [compression_count];
     if (!compression_entries_)
       return -1;
     compression_entries_end_ = compression_entries_;
@@ -5222,9 +4562,9 @@
 
   if (encryption_count > 0) {
     encryption_entries_ =
-        new (std::nothrow) ContentEncryption*[encryption_count];
+        new (std::nothrow) ContentEncryption* [encryption_count];
     if (!encryption_entries_) {
-      delete [] compression_entries_;
+      delete[] compression_entries_;
       return -1;
     }
     encryption_entries_end_ = encryption_entries_;
@@ -5233,12 +4573,8 @@
   pos = start;
   while (pos < stop) {
     long long id, size;
-    long status = ParseElementHeader(pReader,
-                                     pos,
-                                     stop,
-                                     id,
-                                     size);
-    if (status < 0)  //error
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
       return status;
 
     if (id == 0x1031) {
@@ -5255,7 +4591,7 @@
     } else if (id == 0x1034) {
       // ContentCompression ID
       ContentCompression* const compression =
-        new (std::nothrow) ContentCompression();
+          new (std::nothrow) ContentCompression();
       if (!compression)
         return -1;
 
@@ -5280,7 +4616,7 @@
       *encryption_entries_end_++ = encryption;
     }
 
-    pos += size;  //consume payload
+    pos += size;  // consume payload
     assert(pos <= stop);
   }
 
@@ -5288,11 +4624,9 @@
   return 0;
 }
 
-long ContentEncoding::ParseCompressionEntry(
-    long long start,
-    long long size,
-    IMkvReader* pReader,
-    ContentCompression* compression) {
+long ContentEncoding::ParseCompressionEntry(long long start, long long size,
+                                            IMkvReader* pReader,
+                                            ContentCompression* compression) {
   assert(pReader);
   assert(compression);
 
@@ -5303,12 +4637,8 @@
 
   while (pos < stop) {
     long long id, size;
-    const long status = ParseElementHeader(pReader,
-                                           pos,
-                                           stop,
-                                           id,
-                                           size);
-    if (status < 0)  //error
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
       return status;
 
     if (id == 0x254) {
@@ -5329,9 +4659,10 @@
       if (buf == NULL)
         return -1;
 
-      const int read_status = pReader->Read(pos, buflen, buf);
+      const int read_status =
+          pReader->Read(pos, static_cast<long>(buflen), buf);
       if (read_status) {
-        delete [] buf;
+        delete[] buf;
         return status;
       }
 
@@ -5339,7 +4670,7 @@
       compression->settings_len = buflen;
     }
 
-    pos += size;  //consume payload
+    pos += size;  // consume payload
     assert(pos <= stop);
   }
 
@@ -5350,11 +4681,9 @@
   return 0;
 }
 
-long ContentEncoding::ParseEncryptionEntry(
-    long long start,
-    long long size,
-    IMkvReader* pReader,
-    ContentEncryption* encryption) {
+long ContentEncoding::ParseEncryptionEntry(long long start, long long size,
+                                           IMkvReader* pReader,
+                                           ContentEncryption* encryption) {
   assert(pReader);
   assert(encryption);
 
@@ -5363,12 +4692,8 @@
 
   while (pos < stop) {
     long long id, size;
-    const long status = ParseElementHeader(pReader,
-                                           pos,
-                                           stop,
-                                           id,
-                                           size);
-    if (status < 0)  //error
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
       return status;
 
     if (id == 0x7E1) {
@@ -5378,7 +4703,7 @@
         return E_FILE_FORMAT_INVALID;
     } else if (id == 0x7E2) {
       // ContentEncKeyID
-      delete[] encryption->key_id;
+      delete[] encryption -> key_id;
       encryption->key_id = NULL;
       encryption->key_id_len = 0;
 
@@ -5391,9 +4716,10 @@
       if (buf == NULL)
         return -1;
 
-      const int read_status = pReader->Read(pos, buflen, buf);
+      const int read_status =
+          pReader->Read(pos, static_cast<long>(buflen), buf);
       if (read_status) {
-        delete [] buf;
+        delete[] buf;
         return status;
       }
 
@@ -5401,7 +4727,7 @@
       encryption->key_id_len = buflen;
     } else if (id == 0x7E3) {
       // ContentSignature
-      delete[] encryption->signature;
+      delete[] encryption -> signature;
       encryption->signature = NULL;
       encryption->signature_len = 0;
 
@@ -5414,9 +4740,10 @@
       if (buf == NULL)
         return -1;
 
-      const int read_status = pReader->Read(pos, buflen, buf);
+      const int read_status =
+          pReader->Read(pos, static_cast<long>(buflen), buf);
       if (read_status) {
-        delete [] buf;
+        delete[] buf;
         return status;
       }
 
@@ -5424,7 +4751,7 @@
       encryption->signature_len = buflen;
     } else if (id == 0x7E4) {
       // ContentSigKeyID
-      delete[] encryption->sig_key_id;
+      delete[] encryption -> sig_key_id;
       encryption->sig_key_id = NULL;
       encryption->sig_key_id_len = 0;
 
@@ -5437,9 +4764,10 @@
       if (buf == NULL)
         return -1;
 
-      const int read_status = pReader->Read(pos, buflen, buf);
+      const int read_status =
+          pReader->Read(pos, static_cast<long>(buflen), buf);
       if (read_status) {
-        delete [] buf;
+        delete[] buf;
         return status;
       }
 
@@ -5454,400 +4782,322 @@
     } else if (id == 0x7E7) {
       // ContentEncAESSettings
       const long status = ParseContentEncAESSettingsEntry(
-          pos,
-          size,
-          pReader,
-          &encryption->aes_settings);
+          pos, size, pReader, &encryption->aes_settings);
       if (status)
         return status;
     }
 
-    pos += size;  //consume payload
+    pos += size;  // consume payload
     assert(pos <= stop);
   }
 
   return 0;
 }
 
-Track::Track(
-    Segment* pSegment,
-    long long element_start,
-    long long element_size) :
-    m_pSegment(pSegment),
-    m_element_start(element_start),
-    m_element_size(element_size),
-    content_encoding_entries_(NULL),
-    content_encoding_entries_end_(NULL)
-{
+Track::Track(Segment* pSegment, long long element_start, long long element_size)
+    : m_pSegment(pSegment),
+      m_element_start(element_start),
+      m_element_size(element_size),
+      content_encoding_entries_(NULL),
+      content_encoding_entries_end_(NULL) {}
+
+Track::~Track() {
+  Info& info = const_cast<Info&>(m_info);
+  info.Clear();
+
+  ContentEncoding** i = content_encoding_entries_;
+  ContentEncoding** const j = content_encoding_entries_end_;
+
+  while (i != j) {
+    ContentEncoding* const encoding = *i++;
+    delete encoding;
+  }
+
+  delete[] content_encoding_entries_;
 }
 
-Track::~Track()
-{
-    Info& info = const_cast<Info&>(m_info);
-    info.Clear();
+long Track::Create(Segment* pSegment, const Info& info, long long element_start,
+                   long long element_size, Track*& pResult) {
+  if (pResult)
+    return -1;
 
-    ContentEncoding** i = content_encoding_entries_;
-    ContentEncoding** const j = content_encoding_entries_end_;
+  Track* const pTrack =
+      new (std::nothrow) Track(pSegment, element_start, element_size);
 
-    while (i != j) {
-        ContentEncoding* const encoding = *i++;
-        delete encoding;
-    }
+  if (pTrack == NULL)
+    return -1;  // generic error
 
-    delete [] content_encoding_entries_;
+  const int status = info.Copy(pTrack->m_info);
+
+  if (status) {  // error
+    delete pTrack;
+    return status;
+  }
+
+  pResult = pTrack;
+  return 0;  // success
 }
 
-long Track::Create(
-    Segment* pSegment,
-    const Info& info,
-    long long element_start,
-    long long element_size,
-    Track*& pResult)
-{
-    if (pResult)
-        return -1;
+Track::Info::Info()
+    : uid(0),
+      defaultDuration(0),
+      codecDelay(0),
+      seekPreRoll(0),
+      nameAsUTF8(NULL),
+      language(NULL),
+      codecId(NULL),
+      codecNameAsUTF8(NULL),
+      codecPrivate(NULL),
+      codecPrivateSize(0),
+      lacing(false) {}
 
-    Track* const pTrack = new (std::nothrow) Track(pSegment,
-                                                   element_start,
-                                                   element_size);
+Track::Info::~Info() { Clear(); }
 
-    if (pTrack == NULL)
-        return -1;  //generic error
+void Track::Info::Clear() {
+  delete[] nameAsUTF8;
+  nameAsUTF8 = NULL;
 
-    const int status = info.Copy(pTrack->m_info);
+  delete[] language;
+  language = NULL;
 
-    if (status)  // error
-    {
-        delete pTrack;
-        return status;
-    }
+  delete[] codecId;
+  codecId = NULL;
 
-    pResult = pTrack;
-    return 0;  //success
+  delete[] codecPrivate;
+  codecPrivate = NULL;
+  codecPrivateSize = 0;
+
+  delete[] codecNameAsUTF8;
+  codecNameAsUTF8 = NULL;
 }
 
-Track::Info::Info():
-    uid(0),
-    defaultDuration(0),
-    codecDelay(0),
-    seekPreRoll(0),
-    nameAsUTF8(NULL),
-    language(NULL),
-    codecId(NULL),
-    codecNameAsUTF8(NULL),
-    codecPrivate(NULL),
-    codecPrivateSize(0),
-    lacing(false)
-{
-}
+int Track::Info::CopyStr(char* Info::*str, Info& dst_) const {
+  if (str == static_cast<char * Info::*>(NULL))
+    return -1;
 
-Track::Info::~Info()
-{
-    Clear();
-}
+  char*& dst = dst_.*str;
 
-void Track::Info::Clear()
-{
-    delete[] nameAsUTF8;
-    nameAsUTF8 = NULL;
+  if (dst)  // should be NULL already
+    return -1;
 
-    delete[] language;
-    language = NULL;
+  const char* const src = this->*str;
 
-    delete[] codecId;
-    codecId = NULL;
-
-    delete[] codecPrivate;
-    codecPrivate = NULL;
-    codecPrivateSize = 0;
-
-    delete[] codecNameAsUTF8;
-    codecNameAsUTF8 = NULL;
-}
-
-int Track::Info::CopyStr(char* Info::*str, Info& dst_) const
-{
-    if (str == static_cast<char* Info::*>(NULL))
-        return -1;
-
-    char*& dst = dst_.*str;
-
-    if (dst)  //should be NULL already
-        return -1;
-
-    const char* const src = this->*str;
-
-    if (src == NULL)
-        return 0;
-
-    const size_t len = strlen(src);
-
-    dst = new (std::nothrow) char[len+1];
-
-    if (dst == NULL)
-        return -1;
-
-    strcpy(dst, src);
-
+  if (src == NULL)
     return 0;
+
+  const size_t len = strlen(src);
+
+  dst = new (std::nothrow) char[len + 1];
+
+  if (dst == NULL)
+    return -1;
+
+  strcpy(dst, src);
+
+  return 0;
 }
 
+int Track::Info::Copy(Info& dst) const {
+  if (&dst == this)
+    return 0;
 
-int Track::Info::Copy(Info& dst) const
-{
-    if (&dst == this)
-        return 0;
+  dst.type = type;
+  dst.number = number;
+  dst.defaultDuration = defaultDuration;
+  dst.codecDelay = codecDelay;
+  dst.seekPreRoll = seekPreRoll;
+  dst.uid = uid;
+  dst.lacing = lacing;
+  dst.settings = settings;
 
-    dst.type = type;
-    dst.number = number;
-    dst.defaultDuration = defaultDuration;
-    dst.codecDelay = codecDelay;
-    dst.seekPreRoll = seekPreRoll;
-    dst.uid = uid;
-    dst.lacing = lacing;
-    dst.settings = settings;
+  // We now copy the string member variables from src to dst.
+  // This involves memory allocation so in principle the operation
+  // can fail (indeed, that's why we have Info::Copy), so we must
+  // report this to the caller.  An error return from this function
+  // therefore implies that the copy was only partially successful.
 
-    //We now copy the string member variables from src to dst.
-    //This involves memory allocation so in principle the operation
-    //can fail (indeed, that's why we have Info::Copy), so we must
-    //report this to the caller.  An error return from this function
-    //therefore implies that the copy was only partially successful.
+  if (int status = CopyStr(&Info::nameAsUTF8, dst))
+    return status;
 
-    if (int status = CopyStr(&Info::nameAsUTF8, dst))
-        return status;
+  if (int status = CopyStr(&Info::language, dst))
+    return status;
 
-    if (int status = CopyStr(&Info::language, dst))
-        return status;
+  if (int status = CopyStr(&Info::codecId, dst))
+    return status;
 
-    if (int status = CopyStr(&Info::codecId, dst))
-        return status;
+  if (int status = CopyStr(&Info::codecNameAsUTF8, dst))
+    return status;
 
-    if (int status = CopyStr(&Info::codecNameAsUTF8, dst))
-        return status;
+  if (codecPrivateSize > 0) {
+    if (codecPrivate == NULL)
+      return -1;
 
-    if (codecPrivateSize > 0)
-    {
-        if (codecPrivate == NULL)
-            return -1;
+    if (dst.codecPrivate)
+      return -1;
 
-        if (dst.codecPrivate)
-            return -1;
+    if (dst.codecPrivateSize != 0)
+      return -1;
 
-        if (dst.codecPrivateSize != 0)
-            return -1;
+    dst.codecPrivate = new (std::nothrow) unsigned char[codecPrivateSize];
 
-        dst.codecPrivate = new (std::nothrow) unsigned char[codecPrivateSize];
+    if (dst.codecPrivate == NULL)
+      return -1;
 
-        if (dst.codecPrivate == NULL)
-            return -1;
+    memcpy(dst.codecPrivate, codecPrivate, codecPrivateSize);
+    dst.codecPrivateSize = codecPrivateSize;
+  }
 
-        memcpy(dst.codecPrivate, codecPrivate, codecPrivateSize);
-        dst.codecPrivateSize = codecPrivateSize;
+  return 0;
+}
+
+const BlockEntry* Track::GetEOS() const { return &m_eos; }
+
+long Track::GetType() const { return m_info.type; }
+
+long Track::GetNumber() const { return m_info.number; }
+
+unsigned long long Track::GetUid() const { return m_info.uid; }
+
+const char* Track::GetNameAsUTF8() const { return m_info.nameAsUTF8; }
+
+const char* Track::GetLanguage() const { return m_info.language; }
+
+const char* Track::GetCodecNameAsUTF8() const { return m_info.codecNameAsUTF8; }
+
+const char* Track::GetCodecId() const { return m_info.codecId; }
+
+const unsigned char* Track::GetCodecPrivate(size_t& size) const {
+  size = m_info.codecPrivateSize;
+  return m_info.codecPrivate;
+}
+
+bool Track::GetLacing() const { return m_info.lacing; }
+
+unsigned long long Track::GetDefaultDuration() const {
+  return m_info.defaultDuration;
+}
+
+unsigned long long Track::GetCodecDelay() const { return m_info.codecDelay; }
+
+unsigned long long Track::GetSeekPreRoll() const { return m_info.seekPreRoll; }
+
+long Track::GetFirst(const BlockEntry*& pBlockEntry) const {
+  const Cluster* pCluster = m_pSegment->GetFirst();
+
+  for (int i = 0;;) {
+    if (pCluster == NULL) {
+      pBlockEntry = GetEOS();
+      return 1;
     }
 
-    return 0;
-}
-
-const BlockEntry* Track::GetEOS() const
-{
-    return &m_eos;
-}
-
-long Track::GetType() const
-{
-    return m_info.type;
-}
-
-long Track::GetNumber() const
-{
-    return m_info.number;
-}
-
-unsigned long long Track::GetUid() const
-{
-    return m_info.uid;
-}
-
-const char* Track::GetNameAsUTF8() const
-{
-    return m_info.nameAsUTF8;
-}
-
-const char* Track::GetLanguage() const
-{
-    return m_info.language;
-}
-
-const char* Track::GetCodecNameAsUTF8() const
-{
-    return m_info.codecNameAsUTF8;
-}
-
-
-const char* Track::GetCodecId() const
-{
-    return m_info.codecId;
-}
-
-const unsigned char* Track::GetCodecPrivate(size_t& size) const
-{
-    size = m_info.codecPrivateSize;
-    return m_info.codecPrivate;
-}
-
-
-bool Track::GetLacing() const
-{
-    return m_info.lacing;
-}
-
-unsigned long long Track::GetDefaultDuration() const
-{
-    return m_info.defaultDuration;
-}
-
-unsigned long long Track::GetCodecDelay() const
-{
-    return m_info.codecDelay;
-}
-
-unsigned long long Track::GetSeekPreRoll() const
-{
-    return m_info.seekPreRoll;
-}
-
-long Track::GetFirst(const BlockEntry*& pBlockEntry) const
-{
-    const Cluster* pCluster = m_pSegment->GetFirst();
-
-    for (int i = 0; ; )
-    {
-        if (pCluster == NULL)
-        {
-            pBlockEntry = GetEOS();
-            return 1;
-        }
-
-        if (pCluster->EOS())
-        {
+    if (pCluster->EOS()) {
 #if 0
-            if (m_pSegment->Unparsed() <= 0)  //all clusters have been loaded
-            {
+            if (m_pSegment->Unparsed() <= 0) {  //all clusters have been loaded
                 pBlockEntry = GetEOS();
                 return 1;
             }
 #else
-            if (m_pSegment->DoneParsing())
-            {
-                pBlockEntry = GetEOS();
-                return 1;
-            }
+      if (m_pSegment->DoneParsing()) {
+        pBlockEntry = GetEOS();
+        return 1;
+      }
 #endif
 
-            pBlockEntry = 0;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        long status = pCluster->GetFirst(pBlockEntry);
-
-        if (status < 0)  //error
-            return status;
-
-        if (pBlockEntry == 0)  //empty cluster
-        {
-            pCluster = m_pSegment->GetNext(pCluster);
-            continue;
-        }
-
-        for (;;)
-        {
-            const Block* const pBlock = pBlockEntry->GetBlock();
-            assert(pBlock);
-
-            const long long tn = pBlock->GetTrackNumber();
-
-            if ((tn == m_info.number) && VetEntry(pBlockEntry))
-                return 0;
-
-            const BlockEntry* pNextEntry;
-
-            status = pCluster->GetNext(pBlockEntry, pNextEntry);
-
-            if (status < 0)  //error
-                return status;
-
-            if (pNextEntry == 0)
-                break;
-
-            pBlockEntry = pNextEntry;
-        }
-
-        ++i;
-
-        if (i >= 100)
-            break;
-
-        pCluster = m_pSegment->GetNext(pCluster);
+      pBlockEntry = 0;
+      return E_BUFFER_NOT_FULL;
     }
 
-    //NOTE: if we get here, it means that we didn't find a block with
-    //a matching track number.  We interpret that as an error (which
-    //might be too conservative).
+    long status = pCluster->GetFirst(pBlockEntry);
 
-    pBlockEntry = GetEOS();  //so we can return a non-NULL value
-    return 1;
-}
+    if (status < 0)  // error
+      return status;
 
+    if (pBlockEntry == 0) {  // empty cluster
+      pCluster = m_pSegment->GetNext(pCluster);
+      continue;
+    }
 
-long Track::GetNext(
-    const BlockEntry* pCurrEntry,
-    const BlockEntry*& pNextEntry) const
-{
-    assert(pCurrEntry);
-    assert(!pCurrEntry->EOS());  //?
+    for (;;) {
+      const Block* const pBlock = pBlockEntry->GetBlock();
+      assert(pBlock);
 
-    const Block* const pCurrBlock = pCurrEntry->GetBlock();
-    assert(pCurrBlock && pCurrBlock->GetTrackNumber() == m_info.number);
-    if (!pCurrBlock || pCurrBlock->GetTrackNumber() != m_info.number)
-        return -1;
+      const long long tn = pBlock->GetTrackNumber();
 
-    const Cluster* pCluster = pCurrEntry->GetCluster();
-    assert(pCluster);
-    assert(!pCluster->EOS());
+      if ((tn == m_info.number) && VetEntry(pBlockEntry))
+        return 0;
 
-    long status = pCluster->GetNext(pCurrEntry, pNextEntry);
+      const BlockEntry* pNextEntry;
 
-    if (status < 0)  //error
+      status = pCluster->GetNext(pBlockEntry, pNextEntry);
+
+      if (status < 0)  // error
         return status;
 
-    for (int i = 0; ; )
-    {
-        while (pNextEntry)
-        {
-            const Block* const pNextBlock = pNextEntry->GetBlock();
-            assert(pNextBlock);
+      if (pNextEntry == 0)
+        break;
 
-            if (pNextBlock->GetTrackNumber() == m_info.number)
-                return 0;
+      pBlockEntry = pNextEntry;
+    }
 
-            pCurrEntry = pNextEntry;
+    ++i;
 
-            status = pCluster->GetNext(pCurrEntry, pNextEntry);
+    if (i >= 100)
+      break;
 
-            if (status < 0) //error
-                return status;
-        }
+    pCluster = m_pSegment->GetNext(pCluster);
+  }
 
-        pCluster = m_pSegment->GetNext(pCluster);
+  // NOTE: if we get here, it means that we didn't find a block with
+  // a matching track number.  We interpret that as an error (which
+  // might be too conservative).
 
-        if (pCluster == NULL)
-        {
-            pNextEntry = GetEOS();
-            return 1;
-        }
+  pBlockEntry = GetEOS();  // so we can return a non-NULL value
+  return 1;
+}
 
-        if (pCluster->EOS())
-        {
+long Track::GetNext(const BlockEntry* pCurrEntry,
+                    const BlockEntry*& pNextEntry) const {
+  assert(pCurrEntry);
+  assert(!pCurrEntry->EOS());  //?
+
+  const Block* const pCurrBlock = pCurrEntry->GetBlock();
+  assert(pCurrBlock && pCurrBlock->GetTrackNumber() == m_info.number);
+  if (!pCurrBlock || pCurrBlock->GetTrackNumber() != m_info.number)
+    return -1;
+
+  const Cluster* pCluster = pCurrEntry->GetCluster();
+  assert(pCluster);
+  assert(!pCluster->EOS());
+
+  long status = pCluster->GetNext(pCurrEntry, pNextEntry);
+
+  if (status < 0)  // error
+    return status;
+
+  for (int i = 0;;) {
+    while (pNextEntry) {
+      const Block* const pNextBlock = pNextEntry->GetBlock();
+      assert(pNextBlock);
+
+      if (pNextBlock->GetTrackNumber() == m_info.number)
+        return 0;
+
+      pCurrEntry = pNextEntry;
+
+      status = pCluster->GetNext(pCurrEntry, pNextEntry);
+
+      if (status < 0)  // error
+        return status;
+    }
+
+    pCluster = m_pSegment->GetNext(pCluster);
+
+    if (pCluster == NULL) {
+      pNextEntry = GetEOS();
+      return 1;
+    }
+
+    if (pCluster->EOS()) {
 #if 0
             if (m_pSegment->Unparsed() <= 0)   //all clusters have been loaded
             {
@@ -5855,155 +5105,148 @@
                 return 1;
             }
 #else
-            if (m_pSegment->DoneParsing())
-            {
-                pNextEntry = GetEOS();
-                return 1;
-            }
+      if (m_pSegment->DoneParsing()) {
+        pNextEntry = GetEOS();
+        return 1;
+      }
 #endif
 
-            //TODO: there is a potential O(n^2) problem here: we tell the
-            //caller to (pre)load another cluster, which he does, but then he
-            //calls GetNext again, which repeats the same search.  This is
-            //a pathological case, since the only way it can happen is if
-            //there exists a long sequence of clusters none of which contain a
-            // block from this track.  One way around this problem is for the
-            //caller to be smarter when he loads another cluster: don't call
-            //us back until you have a cluster that contains a block from this
-            //track. (Of course, that's not cheap either, since our caller
-            //would have to scan the each cluster as it's loaded, so that
-            //would just push back the problem.)
+      // TODO: there is a potential O(n^2) problem here: we tell the
+      // caller to (pre)load another cluster, which he does, but then he
+      // calls GetNext again, which repeats the same search.  This is
+      // a pathological case, since the only way it can happen is if
+      // there exists a long sequence of clusters none of which contain a
+      // block from this track.  One way around this problem is for the
+      // caller to be smarter when he loads another cluster: don't call
+      // us back until you have a cluster that contains a block from this
+      // track. (Of course, that's not cheap either, since our caller
+      // would have to scan the each cluster as it's loaded, so that
+      // would just push back the problem.)
 
-            pNextEntry = NULL;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        status = pCluster->GetFirst(pNextEntry);
-
-        if (status < 0)  //error
-            return status;
-
-        if (pNextEntry == NULL)  //empty cluster
-            continue;
-
-        ++i;
-
-        if (i >= 100)
-            break;
+      pNextEntry = NULL;
+      return E_BUFFER_NOT_FULL;
     }
 
-    //NOTE: if we get here, it means that we didn't find a block with
-    //a matching track number after lots of searching, so we give
-    //up trying.
+    status = pCluster->GetFirst(pNextEntry);
 
-    pNextEntry = GetEOS();  //so we can return a non-NULL value
-    return 1;
+    if (status < 0)  // error
+      return status;
+
+    if (pNextEntry == NULL)  // empty cluster
+      continue;
+
+    ++i;
+
+    if (i >= 100)
+      break;
+  }
+
+  // NOTE: if we get here, it means that we didn't find a block with
+  // a matching track number after lots of searching, so we give
+  // up trying.
+
+  pNextEntry = GetEOS();  // so we can return a non-NULL value
+  return 1;
 }
 
-bool Track::VetEntry(const BlockEntry* pBlockEntry) const
-{
-    assert(pBlockEntry);
-    const Block* const pBlock = pBlockEntry->GetBlock();
-    assert(pBlock);
-    assert(pBlock->GetTrackNumber() == m_info.number);
-    if (!pBlock || pBlock->GetTrackNumber() != m_info.number)
-        return false;
+bool Track::VetEntry(const BlockEntry* pBlockEntry) const {
+  assert(pBlockEntry);
+  const Block* const pBlock = pBlockEntry->GetBlock();
+  assert(pBlock);
+  assert(pBlock->GetTrackNumber() == m_info.number);
+  if (!pBlock || pBlock->GetTrackNumber() != m_info.number)
+    return false;
 
-    // This function is used during a seek to determine whether the
-    // frame is a valid seek target.  This default function simply
-    // returns true, which means all frames are valid seek targets.
-    // It gets overridden by the VideoTrack class, because only video
-    // keyframes can be used as seek target.
+  // This function is used during a seek to determine whether the
+  // frame is a valid seek target.  This default function simply
+  // returns true, which means all frames are valid seek targets.
+  // It gets overridden by the VideoTrack class, because only video
+  // keyframes can be used as seek target.
 
-    return true;
+  return true;
 }
 
-long Track::Seek(
-    long long time_ns,
-    const BlockEntry*& pResult) const
-{
-    const long status = GetFirst(pResult);
+long Track::Seek(long long time_ns, const BlockEntry*& pResult) const {
+  const long status = GetFirst(pResult);
 
-    if (status < 0)  //buffer underflow, etc
-        return status;
+  if (status < 0)  // buffer underflow, etc
+    return status;
 
-    assert(pResult);
+  assert(pResult);
 
-    if (pResult->EOS())
-        return 0;
+  if (pResult->EOS())
+    return 0;
 
-    const Cluster* pCluster = pResult->GetCluster();
+  const Cluster* pCluster = pResult->GetCluster();
+  assert(pCluster);
+  assert(pCluster->GetIndex() >= 0);
+
+  if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
+    return 0;
+
+  Cluster** const clusters = m_pSegment->m_clusters;
+  assert(clusters);
+
+  const long count = m_pSegment->GetCount();  // loaded only, not preloaded
+  assert(count > 0);
+
+  Cluster** const i = clusters + pCluster->GetIndex();
+  assert(i);
+  assert(*i == pCluster);
+  assert(pCluster->GetTime() <= time_ns);
+
+  Cluster** const j = clusters + count;
+
+  Cluster** lo = i;
+  Cluster** hi = j;
+
+  while (lo < hi) {
+    // INVARIANT:
+    //[i, lo) <= time_ns
+    //[lo, hi) ?
+    //[hi, j)  > time_ns
+
+    Cluster** const mid = lo + (hi - lo) / 2;
+    assert(mid < hi);
+
+    pCluster = *mid;
     assert(pCluster);
     assert(pCluster->GetIndex() >= 0);
+    assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
 
-    if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
-        return 0;
+    const long long t = pCluster->GetTime();
 
-    Cluster** const clusters = m_pSegment->m_clusters;
-    assert(clusters);
+    if (t <= time_ns)
+      lo = mid + 1;
+    else
+      hi = mid;
 
-    const long count = m_pSegment->GetCount();  //loaded only, not preloaded
-    assert(count > 0);
+    assert(lo <= hi);
+  }
 
-    Cluster** const i = clusters + pCluster->GetIndex();
-    assert(i);
-    assert(*i == pCluster);
+  assert(lo == hi);
+  assert(lo > i);
+  assert(lo <= j);
+
+  while (lo > i) {
+    pCluster = *--lo;
+    assert(pCluster);
     assert(pCluster->GetTime() <= time_ns);
 
-    Cluster** const j = clusters + count;
+    pResult = pCluster->GetEntry(this);
 
-    Cluster** lo = i;
-    Cluster** hi = j;
+    if ((pResult != 0) && !pResult->EOS())
+      return 0;
 
-    while (lo < hi)
-    {
-        //INVARIANT:
-        //[i, lo) <= time_ns
-        //[lo, hi) ?
-        //[hi, j)  > time_ns
+    // landed on empty cluster (no entries)
+  }
 
-        Cluster** const mid = lo + (hi - lo) / 2;
-        assert(mid < hi);
-
-        pCluster = *mid;
-        assert(pCluster);
-        assert(pCluster->GetIndex() >= 0);
-        assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
-
-        const long long t = pCluster->GetTime();
-
-        if (t <= time_ns)
-            lo = mid + 1;
-        else
-            hi = mid;
-
-        assert(lo <= hi);
-    }
-
-    assert(lo == hi);
-    assert(lo > i);
-    assert(lo <= j);
-
-    while (lo > i)
-    {
-        pCluster = *--lo;
-        assert(pCluster);
-        assert(pCluster->GetTime() <= time_ns);
-
-        pResult = pCluster->GetEntry(this);
-
-        if ((pResult != 0) && !pResult->EOS())
-            return 0;
-
-        //landed on empty cluster (no entries)
-    }
-
-    pResult = GetEOS();  //weird
-    return 0;
+  pResult = GetEOS();  // weird
+  return 0;
 }
 
-const ContentEncoding*
-Track::GetContentEncodingByIndex(unsigned long idx) const {
+const ContentEncoding* Track::GetContentEncodingByIndex(
+    unsigned long idx) const {
   const ptrdiff_t count =
       content_encoding_entries_end_ - content_encoding_entries_;
   assert(count >= 0);
@@ -6033,27 +5276,22 @@
   int count = 0;
   while (pos < stop) {
     long long id, size;
-    const long status = ParseElementHeader(pReader,
-                                           pos,
-                                           stop,
-                                           id,
-                                           size);
-    if (status < 0)  //error
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
       return status;
 
-
-    //pos now designates start of element
+    // pos now designates start of element
     if (id == 0x2240)  // ContentEncoding ID
       ++count;
 
-    pos += size;  //consume payload
+    pos += size;  // consume payload
     assert(pos <= stop);
   }
 
   if (count <= 0)
     return -1;
 
-  content_encoding_entries_ = new (std::nothrow) ContentEncoding*[count];
+  content_encoding_entries_ = new (std::nothrow) ContentEncoding* [count];
   if (!content_encoding_entries_)
     return -1;
 
@@ -6062,24 +5300,18 @@
   pos = start;
   while (pos < stop) {
     long long id, size;
-    long status = ParseElementHeader(pReader,
-                                     pos,
-                                     stop,
-                                     id,
-                                     size);
-    if (status < 0)  //error
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
       return status;
 
-    //pos now designates start of element
-    if (id == 0x2240) { // ContentEncoding ID
+    // pos now designates start of element
+    if (id == 0x2240) {  // ContentEncoding ID
       ContentEncoding* const content_encoding =
           new (std::nothrow) ContentEncoding();
       if (!content_encoding)
         return -1;
 
-      status = content_encoding->ParseContentEncodingEntry(pos,
-                                                           size,
-                                                           pReader);
+      status = content_encoding->ParseContentEncodingEntry(pos, size, pReader);
       if (status) {
         delete content_encoding;
         return status;
@@ -6088,7 +5320,7 @@
       *content_encoding_entries_end_++ = content_encoding;
     }
 
-    pos += size;  //consume payload
+    pos += size;  // consume payload
     assert(pos <= stop);
   }
 
@@ -6097,219 +5329,175 @@
   return 0;
 }
 
-Track::EOSBlock::EOSBlock() :
-    BlockEntry(NULL, LONG_MIN)
-{
-}
+Track::EOSBlock::EOSBlock() : BlockEntry(NULL, LONG_MIN) {}
 
-BlockEntry::Kind Track::EOSBlock::GetKind() const
-{
-    return kBlockEOS;
-}
+BlockEntry::Kind Track::EOSBlock::GetKind() const { return kBlockEOS; }
 
+const Block* Track::EOSBlock::GetBlock() const { return NULL; }
 
-const Block* Track::EOSBlock::GetBlock() const
-{
-    return NULL;
-}
+VideoTrack::VideoTrack(Segment* pSegment, long long element_start,
+                       long long element_size)
+    : Track(pSegment, element_start, element_size) {}
 
+long VideoTrack::Parse(Segment* pSegment, const Info& info,
+                       long long element_start, long long element_size,
+                       VideoTrack*& pResult) {
+  if (pResult)
+    return -1;
 
-VideoTrack::VideoTrack(
-    Segment* pSegment,
-    long long element_start,
-    long long element_size) :
-    Track(pSegment, element_start, element_size)
-{
-}
+  if (info.type != Track::kVideo)
+    return -1;
 
+  long long width = 0;
+  long long height = 0;
+  double rate = 0.0;
 
-long VideoTrack::Parse(
-    Segment* pSegment,
-    const Info& info,
-    long long element_start,
-    long long element_size,
-    VideoTrack*& pResult)
-{
-    if (pResult)
-        return -1;
+  IMkvReader* const pReader = pSegment->m_pReader;
 
-    if (info.type != Track::kVideo)
-        return -1;
+  const Settings& s = info.settings;
+  assert(s.start >= 0);
+  assert(s.size >= 0);
 
-    long long width = 0;
-    long long height = 0;
-    double rate = 0.0;
+  long long pos = s.start;
+  assert(pos >= 0);
 
-    IMkvReader* const pReader = pSegment->m_pReader;
+  const long long stop = pos + s.size;
 
-    const Settings& s = info.settings;
-    assert(s.start >= 0);
-    assert(s.size >= 0);
+  while (pos < stop) {
+    long long id, size;
 
-    long long pos = s.start;
-    assert(pos >= 0);
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
 
-    const long long stop = pos + s.size;
+    if (status < 0)  // error
+      return status;
 
-    while (pos < stop)
-    {
-        long long id, size;
+    if (id == 0x30) {  // pixel width
+      width = UnserializeUInt(pReader, pos, size);
 
-        const long status = ParseElementHeader(
-                                pReader,
-                                pos,
-                                stop,
-                                id,
-                                size);
+      if (width <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == 0x3A) {  // pixel height
+      height = UnserializeUInt(pReader, pos, size);
 
-        if (status < 0)  //error
-            return status;
+      if (height <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == 0x0383E3) {  // frame rate
+      const long status = UnserializeFloat(pReader, pos, size, rate);
 
-        if (id == 0x30)  //pixel width
-        {
-            width = UnserializeUInt(pReader, pos, size);
-
-            if (width <= 0)
-                return E_FILE_FORMAT_INVALID;
-        }
-        else if (id == 0x3A)  //pixel height
-        {
-            height = UnserializeUInt(pReader, pos, size);
-
-            if (height <= 0)
-                return E_FILE_FORMAT_INVALID;
-        }
-        else if (id == 0x0383E3)  //frame rate
-        {
-            const long status = UnserializeFloat(
-                                    pReader,
-                                    pos,
-                                    size,
-                                    rate);
-
-            if (status < 0)
-                return status;
-
-            if (rate <= 0)
-                return E_FILE_FORMAT_INVALID;
-        }
-
-        pos += size;  //consume payload
-        assert(pos <= stop);
-    }
-
-    assert(pos == stop);
-
-    VideoTrack* const pTrack = new (std::nothrow) VideoTrack(pSegment,
-                                                             element_start,
-                                                             element_size);
-
-    if (pTrack == NULL)
-        return -1;  //generic error
-
-    const int status = info.Copy(pTrack->m_info);
-
-    if (status)  // error
-    {
-        delete pTrack;
-        return status;
-    }
-
-    pTrack->m_width = width;
-    pTrack->m_height = height;
-    pTrack->m_rate = rate;
-
-    pResult = pTrack;
-    return 0;  //success
-}
-
-
-bool VideoTrack::VetEntry(const BlockEntry* pBlockEntry) const
-{
-    return Track::VetEntry(pBlockEntry) && pBlockEntry->GetBlock()->IsKey();
-}
-
-long VideoTrack::Seek(
-    long long time_ns,
-    const BlockEntry*& pResult) const
-{
-    const long status = GetFirst(pResult);
-
-    if (status < 0)  //buffer underflow, etc
+      if (status < 0)
         return status;
 
-    assert(pResult);
+      if (rate <= 0)
+        return E_FILE_FORMAT_INVALID;
+    }
 
-    if (pResult->EOS())
-        return 0;
+    pos += size;  // consume payload
+    assert(pos <= stop);
+  }
 
-    const Cluster* pCluster = pResult->GetCluster();
+  assert(pos == stop);
+
+  VideoTrack* const pTrack =
+      new (std::nothrow) VideoTrack(pSegment, element_start, element_size);
+
+  if (pTrack == NULL)
+    return -1;  // generic error
+
+  const int status = info.Copy(pTrack->m_info);
+
+  if (status) {  // error
+    delete pTrack;
+    return status;
+  }
+
+  pTrack->m_width = width;
+  pTrack->m_height = height;
+  pTrack->m_rate = rate;
+
+  pResult = pTrack;
+  return 0;  // success
+}
+
+bool VideoTrack::VetEntry(const BlockEntry* pBlockEntry) const {
+  return Track::VetEntry(pBlockEntry) && pBlockEntry->GetBlock()->IsKey();
+}
+
+long VideoTrack::Seek(long long time_ns, const BlockEntry*& pResult) const {
+  const long status = GetFirst(pResult);
+
+  if (status < 0)  // buffer underflow, etc
+    return status;
+
+  assert(pResult);
+
+  if (pResult->EOS())
+    return 0;
+
+  const Cluster* pCluster = pResult->GetCluster();
+  assert(pCluster);
+  assert(pCluster->GetIndex() >= 0);
+
+  if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
+    return 0;
+
+  Cluster** const clusters = m_pSegment->m_clusters;
+  assert(clusters);
+
+  const long count = m_pSegment->GetCount();  // loaded only, not pre-loaded
+  assert(count > 0);
+
+  Cluster** const i = clusters + pCluster->GetIndex();
+  assert(i);
+  assert(*i == pCluster);
+  assert(pCluster->GetTime() <= time_ns);
+
+  Cluster** const j = clusters + count;
+
+  Cluster** lo = i;
+  Cluster** hi = j;
+
+  while (lo < hi) {
+    // INVARIANT:
+    //[i, lo) <= time_ns
+    //[lo, hi) ?
+    //[hi, j)  > time_ns
+
+    Cluster** const mid = lo + (hi - lo) / 2;
+    assert(mid < hi);
+
+    pCluster = *mid;
     assert(pCluster);
     assert(pCluster->GetIndex() >= 0);
+    assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
 
-    if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
-        return 0;
+    const long long t = pCluster->GetTime();
 
-    Cluster** const clusters = m_pSegment->m_clusters;
-    assert(clusters);
+    if (t <= time_ns)
+      lo = mid + 1;
+    else
+      hi = mid;
 
-    const long count = m_pSegment->GetCount();  //loaded only, not pre-loaded
-    assert(count > 0);
+    assert(lo <= hi);
+  }
 
-    Cluster** const i = clusters + pCluster->GetIndex();
-    assert(i);
-    assert(*i == pCluster);
-    assert(pCluster->GetTime() <= time_ns);
+  assert(lo == hi);
+  assert(lo > i);
+  assert(lo <= j);
 
-    Cluster** const j = clusters + count;
+  pCluster = *--lo;
+  assert(pCluster);
+  assert(pCluster->GetTime() <= time_ns);
 
-    Cluster** lo = i;
-    Cluster** hi = j;
+  pResult = pCluster->GetEntry(this, time_ns);
 
-    while (lo < hi)
-    {
-        //INVARIANT:
-        //[i, lo) <= time_ns
-        //[lo, hi) ?
-        //[hi, j)  > time_ns
+  if ((pResult != 0) && !pResult->EOS())  // found a keyframe
+    return 0;
 
-        Cluster** const mid = lo + (hi - lo) / 2;
-        assert(mid < hi);
-
-        pCluster = *mid;
-        assert(pCluster);
-        assert(pCluster->GetIndex() >= 0);
-        assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
-
-        const long long t = pCluster->GetTime();
-
-        if (t <= time_ns)
-            lo = mid + 1;
-        else
-            hi = mid;
-
-        assert(lo <= hi);
-    }
-
-    assert(lo == hi);
-    assert(lo > i);
-    assert(lo <= j);
-
+  while (lo != i) {
     pCluster = *--lo;
     assert(pCluster);
     assert(pCluster->GetTime() <= time_ns);
 
-    pResult = pCluster->GetEntry(this, time_ns);
-
-    if ((pResult != 0) && !pResult->EOS())  //found a keyframe
-        return 0;
-
-    while (lo != i)
-    {
-        pCluster = *--lo;
-        assert(pCluster);
-        assert(pCluster->GetTime() <= time_ns);
-
 #if 0
         //TODO:
         //We need to handle the case when a cluster
@@ -6318,651 +5506,501 @@
         //good enough.
         pResult = pCluster->GetMaxKey(this);
 #else
-        pResult = pCluster->GetEntry(this, time_ns);
+    pResult = pCluster->GetEntry(this, time_ns);
 #endif
 
-        if ((pResult != 0) && !pResult->EOS())
-            return 0;
+    if ((pResult != 0) && !pResult->EOS())
+      return 0;
+  }
+
+  // weird: we're on the first cluster, but no keyframe found
+  // should never happen but we must return something anyway
+
+  pResult = GetEOS();
+  return 0;
+}
+
+long long VideoTrack::GetWidth() const { return m_width; }
+
+long long VideoTrack::GetHeight() const { return m_height; }
+
+double VideoTrack::GetFrameRate() const { return m_rate; }
+
+AudioTrack::AudioTrack(Segment* pSegment, long long element_start,
+                       long long element_size)
+    : Track(pSegment, element_start, element_size) {}
+
+long AudioTrack::Parse(Segment* pSegment, const Info& info,
+                       long long element_start, long long element_size,
+                       AudioTrack*& pResult) {
+  if (pResult)
+    return -1;
+
+  if (info.type != Track::kAudio)
+    return -1;
+
+  IMkvReader* const pReader = pSegment->m_pReader;
+
+  const Settings& s = info.settings;
+  assert(s.start >= 0);
+  assert(s.size >= 0);
+
+  long long pos = s.start;
+  assert(pos >= 0);
+
+  const long long stop = pos + s.size;
+
+  double rate = 8000.0;  // MKV default
+  long long channels = 1;
+  long long bit_depth = 0;
+
+  while (pos < stop) {
+    long long id, size;
+
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (id == 0x35) {  // Sample Rate
+      status = UnserializeFloat(pReader, pos, size, rate);
+
+      if (status < 0)
+        return status;
+
+      if (rate <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == 0x1F) {  // Channel Count
+      channels = UnserializeUInt(pReader, pos, size);
+
+      if (channels <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == 0x2264) {  // Bit Depth
+      bit_depth = UnserializeUInt(pReader, pos, size);
+
+      if (bit_depth <= 0)
+        return E_FILE_FORMAT_INVALID;
     }
 
-    //weird: we're on the first cluster, but no keyframe found
-    //should never happen but we must return something anyway
+    pos += size;  // consume payload
+    assert(pos <= stop);
+  }
 
-    pResult = GetEOS();
-    return 0;
+  assert(pos == stop);
+
+  AudioTrack* const pTrack =
+      new (std::nothrow) AudioTrack(pSegment, element_start, element_size);
+
+  if (pTrack == NULL)
+    return -1;  // generic error
+
+  const int status = info.Copy(pTrack->m_info);
+
+  if (status) {
+    delete pTrack;
+    return status;
+  }
+
+  pTrack->m_rate = rate;
+  pTrack->m_channels = channels;
+  pTrack->m_bitDepth = bit_depth;
+
+  pResult = pTrack;
+  return 0;  // success
 }
 
+double AudioTrack::GetSamplingRate() const { return m_rate; }
 
-long long VideoTrack::GetWidth() const
-{
-    return m_width;
-}
+long long AudioTrack::GetChannels() const { return m_channels; }
 
+long long AudioTrack::GetBitDepth() const { return m_bitDepth; }
 
-long long VideoTrack::GetHeight() const
-{
-    return m_height;
-}
+Tracks::Tracks(Segment* pSegment, long long start, long long size_,
+               long long element_start, long long element_size)
+    : m_pSegment(pSegment),
+      m_start(start),
+      m_size(size_),
+      m_element_start(element_start),
+      m_element_size(element_size),
+      m_trackEntries(NULL),
+      m_trackEntriesEnd(NULL) {}
 
+long Tracks::Parse() {
+  assert(m_trackEntries == NULL);
+  assert(m_trackEntriesEnd == NULL);
 
-double VideoTrack::GetFrameRate() const
-{
-    return m_rate;
-}
+  const long long stop = m_start + m_size;
+  IMkvReader* const pReader = m_pSegment->m_pReader;
 
+  int count = 0;
+  long long pos = m_start;
 
-AudioTrack::AudioTrack(
-    Segment* pSegment,
-    long long element_start,
-    long long element_size) :
-    Track(pSegment, element_start, element_size)
-{
-}
+  while (pos < stop) {
+    long long id, size;
 
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
 
-long AudioTrack::Parse(
-    Segment* pSegment,
-    const Info& info,
-    long long element_start,
-    long long element_size,
-    AudioTrack*& pResult)
-{
-    if (pResult)
-        return -1;
+    if (status < 0)  // error
+      return status;
 
-    if (info.type != Track::kAudio)
-        return -1;
+    if (size == 0)  // weird
+      continue;
 
-    IMkvReader* const pReader = pSegment->m_pReader;
+    if (id == 0x2E)  // TrackEntry ID
+      ++count;
 
-    const Settings& s = info.settings;
-    assert(s.start >= 0);
-    assert(s.size >= 0);
+    pos += size;  // consume payload
+    assert(pos <= stop);
+  }
 
-    long long pos = s.start;
-    assert(pos >= 0);
+  assert(pos == stop);
 
-    const long long stop = pos + s.size;
+  if (count <= 0)
+    return 0;  // success
 
-    double rate = 8000.0;  // MKV default
-    long long channels = 1;
-    long long bit_depth = 0;
+  m_trackEntries = new (std::nothrow) Track* [count];
 
-    while (pos < stop)
-    {
-        long long id, size;
+  if (m_trackEntries == NULL)
+    return -1;
 
-        long status = ParseElementHeader(
-                                pReader,
-                                pos,
-                                stop,
-                                id,
-                                size);
+  m_trackEntriesEnd = m_trackEntries;
 
-        if (status < 0)  //error
-            return status;
+  pos = m_start;
 
-        if (id == 0x35)  //Sample Rate
-        {
-            status = UnserializeFloat(pReader, pos, size, rate);
+  while (pos < stop) {
+    const long long element_start = pos;
 
-            if (status < 0)
-                return status;
+    long long id, payload_size;
 
-            if (rate <= 0)
-                return E_FILE_FORMAT_INVALID;
-        }
-        else if (id == 0x1F)  //Channel Count
-        {
-            channels = UnserializeUInt(pReader, pos, size);
+    const long status =
+        ParseElementHeader(pReader, pos, stop, id, payload_size);
 
-            if (channels <= 0)
-                return E_FILE_FORMAT_INVALID;
-        }
-        else if (id == 0x2264)  //Bit Depth
-        {
-            bit_depth = UnserializeUInt(pReader, pos, size);
+    if (status < 0)  // error
+      return status;
 
-            if (bit_depth <= 0)
-                return E_FILE_FORMAT_INVALID;
-        }
+    if (payload_size == 0)  // weird
+      continue;
 
-        pos += size;  //consume payload
-        assert(pos <= stop);
+    const long long payload_stop = pos + payload_size;
+    assert(payload_stop <= stop);  // checked in ParseElement
+
+    const long long element_size = payload_stop - element_start;
+
+    if (id == 0x2E) {  // TrackEntry ID
+      Track*& pTrack = *m_trackEntriesEnd;
+      pTrack = NULL;
+
+      const long status = ParseTrackEntry(pos, payload_size, element_start,
+                                          element_size, pTrack);
+
+      if (status)
+        return status;
+
+      if (pTrack)
+        ++m_trackEntriesEnd;
     }
 
-    assert(pos == stop);
+    pos = payload_stop;
+    assert(pos <= stop);
+  }
 
-    AudioTrack* const pTrack = new (std::nothrow) AudioTrack(pSegment,
-                                                             element_start,
-                                                             element_size);
+  assert(pos == stop);
 
-    if (pTrack == NULL)
-        return -1;  //generic error
+  return 0;  // success
+}
 
-    const int status = info.Copy(pTrack->m_info);
+unsigned long Tracks::GetTracksCount() const {
+  const ptrdiff_t result = m_trackEntriesEnd - m_trackEntries;
+  assert(result >= 0);
+
+  return static_cast<unsigned long>(result);
+}
+
+long Tracks::ParseTrackEntry(long long track_start, long long track_size,
+                             long long element_start, long long element_size,
+                             Track*& pResult) const {
+  if (pResult)
+    return -1;
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  long long pos = track_start;
+  const long long track_stop = track_start + track_size;
+
+  Track::Info info;
+
+  info.type = 0;
+  info.number = 0;
+  info.uid = 0;
+  info.defaultDuration = 0;
+
+  Track::Settings v;
+  v.start = -1;
+  v.size = -1;
+
+  Track::Settings a;
+  a.start = -1;
+  a.size = -1;
+
+  Track::Settings e;  // content_encodings_settings;
+  e.start = -1;
+  e.size = -1;
+
+  long long lacing = 1;  // default is true
+
+  while (pos < track_stop) {
+    long long id, size;
+
+    const long status = ParseElementHeader(pReader, pos, track_stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (size < 0)
+      return E_FILE_FORMAT_INVALID;
+
+    const long long start = pos;
+
+    if (id == 0x60) {  // VideoSettings ID
+      v.start = start;
+      v.size = size;
+    } else if (id == 0x61) {  // AudioSettings ID
+      a.start = start;
+      a.size = size;
+    } else if (id == 0x2D80) {  // ContentEncodings ID
+      e.start = start;
+      e.size = size;
+    } else if (id == 0x33C5) {  // Track UID
+      if (size > 8)
+        return E_FILE_FORMAT_INVALID;
+
+      info.uid = 0;
+
+      long long pos_ = start;
+      const long long pos_end = start + size;
+
+      while (pos_ != pos_end) {
+        unsigned char b;
+
+        const int status = pReader->Read(pos_, 1, &b);
+
+        if (status)
+          return status;
+
+        info.uid <<= 8;
+        info.uid |= b;
+
+        ++pos_;
+      }
+    } else if (id == 0x57) {  // Track Number
+      const long long num = UnserializeUInt(pReader, pos, size);
+
+      if ((num <= 0) || (num > 127))
+        return E_FILE_FORMAT_INVALID;
+
+      info.number = static_cast<long>(num);
+    } else if (id == 0x03) {  // Track Type
+      const long long type = UnserializeUInt(pReader, pos, size);
+
+      if ((type <= 0) || (type > 254))
+        return E_FILE_FORMAT_INVALID;
+
+      info.type = static_cast<long>(type);
+    } else if (id == 0x136E) {  // Track Name
+      const long status =
+          UnserializeString(pReader, pos, size, info.nameAsUTF8);
+
+      if (status)
+        return status;
+    } else if (id == 0x02B59C) {  // Track Language
+      const long status = UnserializeString(pReader, pos, size, info.language);
+
+      if (status)
+        return status;
+    } else if (id == 0x03E383) {  // Default Duration
+      const long long duration = UnserializeUInt(pReader, pos, size);
+
+      if (duration < 0)
+        return E_FILE_FORMAT_INVALID;
+
+      info.defaultDuration = static_cast<unsigned long long>(duration);
+    } else if (id == 0x06) {  // CodecID
+      const long status = UnserializeString(pReader, pos, size, info.codecId);
+
+      if (status)
+        return status;
+    } else if (id == 0x1C) {  // lacing
+      lacing = UnserializeUInt(pReader, pos, size);
+
+      if ((lacing < 0) || (lacing > 1))
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == 0x23A2) {  // Codec Private
+      delete[] info.codecPrivate;
+      info.codecPrivate = NULL;
+      info.codecPrivateSize = 0;
+
+      const size_t buflen = static_cast<size_t>(size);
+
+      if (buflen) {
+        typedef unsigned char* buf_t;
+
+        const buf_t buf = new (std::nothrow) unsigned char[buflen];
+
+        if (buf == NULL)
+          return -1;
+
+        const int status = pReader->Read(pos, static_cast<long>(buflen), buf);
+
+        if (status) {
+          delete[] buf;
+          return status;
+        }
+
+        info.codecPrivate = buf;
+        info.codecPrivateSize = buflen;
+      }
+    } else if (id == 0x058688) {  // Codec Name
+      const long status =
+          UnserializeString(pReader, pos, size, info.codecNameAsUTF8);
+
+      if (status)
+        return status;
+    } else if (id == 0x16AA) {  // Codec Delay
+      info.codecDelay = UnserializeUInt(pReader, pos, size);
+    } else if (id == 0x16BB) {  // Seek Pre Roll
+      info.seekPreRoll = UnserializeUInt(pReader, pos, size);
+    }
+
+    pos += size;  // consume payload
+    assert(pos <= track_stop);
+  }
+
+  assert(pos == track_stop);
+
+  if (info.number <= 0)  // not specified
+    return E_FILE_FORMAT_INVALID;
+
+  if (GetTrackByNumber(info.number))
+    return E_FILE_FORMAT_INVALID;
+
+  if (info.type <= 0)  // not specified
+    return E_FILE_FORMAT_INVALID;
+
+  info.lacing = (lacing > 0) ? true : false;
+
+  if (info.type == Track::kVideo) {
+    if (v.start < 0)
+      return E_FILE_FORMAT_INVALID;
+
+    if (a.start >= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    info.settings = v;
+
+    VideoTrack* pTrack = NULL;
+
+    const long status = VideoTrack::Parse(m_pSegment, info, element_start,
+                                          element_size, pTrack);
 
     if (status)
-    {
-        delete pTrack;
-        return status;
-    }
-
-    pTrack->m_rate = rate;
-    pTrack->m_channels = channels;
-    pTrack->m_bitDepth = bit_depth;
+      return status;
 
     pResult = pTrack;
-    return 0;  //success
+    assert(pResult);
+
+    if (e.start >= 0)
+      pResult->ParseContentEncodingsEntry(e.start, e.size);
+  } else if (info.type == Track::kAudio) {
+    if (a.start < 0)
+      return E_FILE_FORMAT_INVALID;
+
+    if (v.start >= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    info.settings = a;
+
+    AudioTrack* pTrack = NULL;
+
+    const long status = AudioTrack::Parse(m_pSegment, info, element_start,
+                                          element_size, pTrack);
+
+    if (status)
+      return status;
+
+    pResult = pTrack;
+    assert(pResult);
+
+    if (e.start >= 0)
+      pResult->ParseContentEncodingsEntry(e.start, e.size);
+  } else {
+    // neither video nor audio - probably metadata or subtitles
+
+    if (a.start >= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    if (v.start >= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    if (e.start >= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    info.settings.start = -1;
+    info.settings.size = 0;
+
+    Track* pTrack = NULL;
+
+    const long status =
+        Track::Create(m_pSegment, info, element_start, element_size, pTrack);
+
+    if (status)
+      return status;
+
+    pResult = pTrack;
+    assert(pResult);
+  }
+
+  return 0;  // success
 }
 
+Tracks::~Tracks() {
+  Track** i = m_trackEntries;
+  Track** const j = m_trackEntriesEnd;
 
-double AudioTrack::GetSamplingRate() const
-{
-    return m_rate;
+  while (i != j) {
+    Track* const pTrack = *i++;
+    delete pTrack;
+  }
+
+  delete[] m_trackEntries;
 }
 
+const Track* Tracks::GetTrackByNumber(long tn) const {
+  if (tn < 0)
+    return NULL;
 
-long long AudioTrack::GetChannels() const
-{
-    return m_channels;
+  Track** i = m_trackEntries;
+  Track** const j = m_trackEntriesEnd;
+
+  while (i != j) {
+    Track* const pTrack = *i++;
+
+    if (pTrack == NULL)
+      continue;
+
+    if (tn == pTrack->GetNumber())
+      return pTrack;
+  }
+
+  return NULL;  // not found
 }
 
-long long AudioTrack::GetBitDepth() const
-{
-    return m_bitDepth;
-}
+const Track* Tracks::GetTrackByIndex(unsigned long idx) const {
+  const ptrdiff_t count = m_trackEntriesEnd - m_trackEntries;
 
-Tracks::Tracks(
-    Segment* pSegment,
-    long long start,
-    long long size_,
-    long long element_start,
-    long long element_size) :
-    m_pSegment(pSegment),
-    m_start(start),
-    m_size(size_),
-    m_element_start(element_start),
-    m_element_size(element_size),
-    m_trackEntries(NULL),
-    m_trackEntriesEnd(NULL)
-{
-}
+  if (idx >= static_cast<unsigned long>(count))
+    return NULL;
 
-
-long Tracks::Parse()
-{
-    assert(m_trackEntries == NULL);
-    assert(m_trackEntriesEnd == NULL);
-
-    const long long stop = m_start + m_size;
-    IMkvReader* const pReader = m_pSegment->m_pReader;
-
-    int count = 0;
-    long long pos = m_start;
-
-    while (pos < stop)
-    {
-        long long id, size;
-
-        const long status = ParseElementHeader(
-                                pReader,
-                                pos,
-                                stop,
-                                id,
-                                size);
-
-        if (status < 0)  //error
-            return status;
-
-        if (size == 0)  //weird
-            continue;
-
-        if (id == 0x2E)  //TrackEntry ID
-            ++count;
-
-        pos += size;  //consume payload
-        assert(pos <= stop);
-    }
-
-    assert(pos == stop);
-
-    if (count <= 0)
-        return 0;  //success
-
-    m_trackEntries = new (std::nothrow) Track*[count];
-
-    if (m_trackEntries == NULL)
-        return -1;
-
-    m_trackEntriesEnd = m_trackEntries;
-
-    pos = m_start;
-
-    while (pos < stop)
-    {
-        const long long element_start = pos;
-
-        long long id, payload_size;
-
-        const long status = ParseElementHeader(
-                                pReader,
-                                pos,
-                                stop,
-                                id,
-                                payload_size);
-
-        if (status < 0)  //error
-            return status;
-
-        if (payload_size == 0)  //weird
-            continue;
-
-        const long long payload_stop = pos + payload_size;
-        assert(payload_stop <= stop);  //checked in ParseElement
-
-        const long long element_size = payload_stop - element_start;
-
-        if (id == 0x2E)  //TrackEntry ID
-        {
-            Track*& pTrack = *m_trackEntriesEnd;
-            pTrack = NULL;
-
-            const long status = ParseTrackEntry(
-                                    pos,
-                                    payload_size,
-                                    element_start,
-                                    element_size,
-                                    pTrack);
-
-            if (status)
-                return status;
-
-            if (pTrack)
-                ++m_trackEntriesEnd;
-        }
-
-        pos = payload_stop;
-        assert(pos <= stop);
-    }
-
-    assert(pos == stop);
-
-    return 0;  //success
-}
-
-
-unsigned long Tracks::GetTracksCount() const
-{
-    const ptrdiff_t result = m_trackEntriesEnd - m_trackEntries;
-    assert(result >= 0);
-
-    return static_cast<unsigned long>(result);
-}
-
-long Tracks::ParseTrackEntry(
-    long long track_start,
-    long long track_size,
-    long long element_start,
-    long long element_size,
-    Track*& pResult) const
-{
-    if (pResult)
-        return -1;
-
-    IMkvReader* const pReader = m_pSegment->m_pReader;
-
-    long long pos = track_start;
-    const long long track_stop = track_start + track_size;
-
-    Track::Info info;
-
-    info.type = 0;
-    info.number = 0;
-    info.uid = 0;
-    info.defaultDuration = 0;
-
-    Track::Settings v;
-    v.start = -1;
-    v.size = -1;
-
-    Track::Settings a;
-    a.start = -1;
-    a.size = -1;
-
-    Track::Settings e;  //content_encodings_settings;
-    e.start = -1;
-    e.size = -1;
-
-    long long lacing = 1;  //default is true
-
-    while (pos < track_stop)
-    {
-        long long id, size;
-
-        const long status = ParseElementHeader(
-                                pReader,
-                                pos,
-                                track_stop,
-                                id,
-                                size);
-
-        if (status < 0)  //error
-            return status;
-
-        if (size < 0)
-            return E_FILE_FORMAT_INVALID;
-
-        const long long start = pos;
-
-        if (id == 0x60)  // VideoSettings ID
-        {
-            v.start = start;
-            v.size = size;
-        }
-        else if (id == 0x61)  // AudioSettings ID
-        {
-            a.start = start;
-            a.size = size;
-        }
-        else if (id == 0x2D80) // ContentEncodings ID
-        {
-            e.start = start;
-            e.size = size;
-        }
-        else if (id == 0x33C5)  //Track UID
-        {
-            if (size > 8)
-                return E_FILE_FORMAT_INVALID;
-
-            info.uid = 0;
-
-            long long pos_ = start;
-            const long long pos_end = start + size;
-
-            while (pos_ != pos_end)
-            {
-                unsigned char b;
-
-                const int status = pReader->Read(pos_, 1, &b);
-
-                if (status)
-                    return status;
-
-                info.uid <<= 8;
-                info.uid |= b;
-
-                ++pos_;
-            }
-        }
-        else if (id == 0x57)  //Track Number
-        {
-            const long long num = UnserializeUInt(pReader, pos, size);
-
-            if ((num <= 0) || (num > 127))
-                return E_FILE_FORMAT_INVALID;
-
-            info.number = static_cast<long>(num);
-        }
-        else if (id == 0x03)  //Track Type
-        {
-            const long long type = UnserializeUInt(pReader, pos, size);
-
-            if ((type <= 0) || (type > 254))
-                return E_FILE_FORMAT_INVALID;
-
-            info.type = static_cast<long>(type);
-        }
-        else if (id == 0x136E)  //Track Name
-        {
-            const long status = UnserializeString(
-                                    pReader,
-                                    pos,
-                                    size,
-                                    info.nameAsUTF8);
-
-            if (status)
-                return status;
-        }
-        else if (id == 0x02B59C)  //Track Language
-        {
-            const long status = UnserializeString(
-                                    pReader,
-                                    pos,
-                                    size,
-                                    info.language);
-
-            if (status)
-                return status;
-        }
-        else if (id == 0x03E383)  //Default Duration
-        {
-            const long long duration = UnserializeUInt(pReader, pos, size);
-
-            if (duration < 0)
-                return E_FILE_FORMAT_INVALID;
-
-            info.defaultDuration = static_cast<unsigned long long>(duration);
-        }
-        else if (id == 0x06)  //CodecID
-        {
-            const long status = UnserializeString(
-                                    pReader,
-                                    pos,
-                                    size,
-                                    info.codecId);
-
-            if (status)
-                return status;
-        }
-        else if (id == 0x1C)  //lacing
-        {
-            lacing = UnserializeUInt(pReader, pos, size);
-
-            if ((lacing < 0) || (lacing > 1))
-                return E_FILE_FORMAT_INVALID;
-        }
-        else if (id == 0x23A2)  //Codec Private
-        {
-            delete[] info.codecPrivate;
-            info.codecPrivate = NULL;
-            info.codecPrivateSize = 0;
-
-            const size_t buflen = static_cast<size_t>(size);
-
-            if (buflen)
-            {
-                typedef unsigned char* buf_t;
-
-                const buf_t buf = new (std::nothrow) unsigned char[buflen];
-
-                if (buf == NULL)
-                    return -1;
-
-                const int status = pReader->Read(pos, buflen, buf);
-
-                if (status)
-                {
-                    delete[] buf;
-                    return status;
-                }
-
-                info.codecPrivate = buf;
-                info.codecPrivateSize = buflen;
-            }
-        }
-        else if (id == 0x058688)  //Codec Name
-        {
-            const long status = UnserializeString(
-                                    pReader,
-                                    pos,
-                                    size,
-                                    info.codecNameAsUTF8);
-
-            if (status)
-                return status;
-        }
-        else if (id == 0x16AA)  //Codec Delay
-        {
-            info.codecDelay = UnserializeUInt(pReader, pos, size);
-
-        }
-        else if (id == 0x16BB) //Seek Pre Roll
-        {
-            info.seekPreRoll = UnserializeUInt(pReader, pos, size);
-        }
-
-        pos += size;  //consume payload
-        assert(pos <= track_stop);
-    }
-
-    assert(pos == track_stop);
-
-    if (info.number <= 0)  //not specified
-        return E_FILE_FORMAT_INVALID;
-
-    if (GetTrackByNumber(info.number))
-        return E_FILE_FORMAT_INVALID;
-
-    if (info.type <= 0)  //not specified
-        return E_FILE_FORMAT_INVALID;
-
-    info.lacing = (lacing > 0) ? true : false;
-
-    if (info.type == Track::kVideo)
-    {
-        if (v.start < 0)
-            return E_FILE_FORMAT_INVALID;
-
-        if (a.start >= 0)
-            return E_FILE_FORMAT_INVALID;
-
-        info.settings = v;
-
-        VideoTrack* pTrack = NULL;
-
-        const long status = VideoTrack::Parse(m_pSegment,
-                                              info,
-                                              element_start,
-                                              element_size,
-                                              pTrack);
-
-        if (status)
-            return status;
-
-        pResult = pTrack;
-        assert(pResult);
-
-        if (e.start >= 0)
-            pResult->ParseContentEncodingsEntry(e.start, e.size);
-    }
-    else if (info.type == Track::kAudio)
-    {
-        if (a.start < 0)
-            return E_FILE_FORMAT_INVALID;
-
-        if (v.start >= 0)
-            return E_FILE_FORMAT_INVALID;
-
-        info.settings = a;
-
-        AudioTrack* pTrack = NULL;
-
-        const long status = AudioTrack::Parse(m_pSegment,
-                                              info,
-                                              element_start,
-                                              element_size,
-                                              pTrack);
-
-        if (status)
-            return status;
-
-        pResult = pTrack;
-        assert(pResult);
-
-        if (e.start >= 0)
-            pResult->ParseContentEncodingsEntry(e.start, e.size);
-    }
-    else
-    {
-        // neither video nor audio - probably metadata or subtitles
-
-        if (a.start >= 0)
-            return E_FILE_FORMAT_INVALID;
-
-        if (v.start >= 0)
-            return E_FILE_FORMAT_INVALID;
-
-        if (e.start >= 0)
-            return E_FILE_FORMAT_INVALID;
-
-        info.settings.start = -1;
-        info.settings.size = 0;
-
-        Track* pTrack = NULL;
-
-        const long status = Track::Create(m_pSegment,
-                                          info,
-                                          element_start,
-                                          element_size,
-                                          pTrack);
-
-        if (status)
-            return status;
-
-        pResult = pTrack;
-        assert(pResult);
-    }
-
-    return 0;  //success
-}
-
-
-Tracks::~Tracks()
-{
-    Track** i = m_trackEntries;
-    Track** const j = m_trackEntriesEnd;
-
-    while (i != j)
-    {
-        Track* const pTrack = *i++;
-        delete pTrack;
-    }
-
-    delete[] m_trackEntries;
-}
-
-const Track* Tracks::GetTrackByNumber(long tn) const
-{
-    if (tn < 0)
-        return NULL;
-
-    Track** i = m_trackEntries;
-    Track** const j = m_trackEntriesEnd;
-
-    while (i != j)
-    {
-        Track* const pTrack = *i++;
-
-        if (pTrack == NULL)
-            continue;
-
-        if (tn == pTrack->GetNumber())
-            return pTrack;
-    }
-
-    return NULL;  //not found
-}
-
-
-const Track* Tracks::GetTrackByIndex(unsigned long idx) const
-{
-    const ptrdiff_t count = m_trackEntriesEnd - m_trackEntries;
-
-    if (idx >= static_cast<unsigned long>(count))
-         return NULL;
-
-    return m_trackEntries[idx];
+  return m_trackEntries[idx];
 }
 
 #if 0
@@ -6984,104 +6022,100 @@
 }
 #endif
 
+long Cluster::Load(long long& pos, long& len) const {
+  assert(m_pSegment);
+  assert(m_pos >= m_element_start);
 
-long Cluster::Load(long long& pos, long& len) const
-{
-    assert(m_pSegment);
-    assert(m_pos >= m_element_start);
+  if (m_timecode >= 0)  // at least partially loaded
+    return 0;
 
-    if (m_timecode >= 0)  //at least partially loaded
-        return 0;
+  assert(m_pos == m_element_start);
+  assert(m_element_size < 0);
 
-    assert(m_pos == m_element_start);
-    assert(m_element_size < 0);
+  IMkvReader* const pReader = m_pSegment->m_pReader;
 
-    IMkvReader* const pReader = m_pSegment->m_pReader;
+  long long total, avail;
 
-    long long total, avail;
+  const int status = pReader->Length(&total, &avail);
 
-    const int status = pReader->Length(&total, &avail);
+  if (status < 0)  // error
+    return status;
 
-    if (status < 0)  //error
-        return status;
+  assert((total < 0) || (avail <= total));
+  assert((total < 0) || (m_pos <= total));  // TODO: verify this
 
-    assert((total < 0) || (avail <= total));
-    assert((total < 0) || (m_pos <= total));  //TODO: verify this
+  pos = m_pos;
 
-    pos = m_pos;
+  long long cluster_size = -1;
 
-    long long cluster_size = -1;
-
-    {
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        long long result = GetUIntLength(pReader, pos, len);
-
-        if (result < 0)  //error or underflow
-            return static_cast<long>(result);
-
-        if (result > 0)  //underflow (weird)
-            return E_BUFFER_NOT_FULL;
-
-        //if ((pos + len) > segment_stop)
-        //    return E_FILE_FORMAT_INVALID;
-
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
-
-        const long long id_ = ReadUInt(pReader, pos, len);
-
-        if (id_ < 0)  //error
-            return static_cast<long>(id_);
-
-        if (id_ != 0x0F43B675)  //Cluster ID
-            return E_FILE_FORMAT_INVALID;
-
-        pos += len;  //consume id
-
-        //read cluster size
-
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        result = GetUIntLength(pReader, pos, len);
-
-        if (result < 0)  //error
-            return static_cast<long>(result);
-
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
-
-        //if ((pos + len) > segment_stop)
-        //    return E_FILE_FORMAT_INVALID;
-
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
-
-        const long long size = ReadUInt(pReader, pos, len);
-
-        if (size < 0)  //error
-            return static_cast<long>(cluster_size);
-
-        if (size == 0)
-            return E_FILE_FORMAT_INVALID;  //TODO: verify this
-
-        pos += len;  //consume length of size of element
-
-        const long long unknown_size = (1LL << (7 * len)) - 1;
-
-        if (size != unknown_size)
-            cluster_size = size;
+  {
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
     }
 
-    //pos points to start of payload
+    long long result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error or underflow
+      return static_cast<long>(result);
+
+    if (result > 0)  // underflow (weird)
+      return E_BUFFER_NOT_FULL;
+
+    // if ((pos + len) > segment_stop)
+    //    return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long id_ = ReadUInt(pReader, pos, len);
+
+    if (id_ < 0)  // error
+      return static_cast<long>(id_);
+
+    if (id_ != 0x0F43B675)  // Cluster ID
+      return E_FILE_FORMAT_INVALID;
+
+    pos += len;  // consume id
+
+    // read cluster size
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    // if ((pos + len) > segment_stop)
+    //    return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(cluster_size);
+
+    if (size == 0)
+      return E_FILE_FORMAT_INVALID;  // TODO: verify this
+
+    pos += len;  // consume length of size of element
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if (size != unknown_size)
+      cluster_size = size;
+  }
+
+// pos points to start of payload
 
 #if 0
     len = static_cast<long>(size_);
@@ -7090,403 +6124,376 @@
         return E_BUFFER_NOT_FULL;
 #endif
 
-    long long timecode = -1;
-    long long new_pos = -1;
-    bool bBlock = false;
+  long long timecode = -1;
+  long long new_pos = -1;
+  bool bBlock = false;
 
-    long long cluster_stop = (cluster_size < 0) ? -1 : pos + cluster_size;
+  long long cluster_stop = (cluster_size < 0) ? -1 : pos + cluster_size;
 
-    for (;;)
-    {
-        if ((cluster_stop >= 0) && (pos >= cluster_stop))
-            break;
+  for (;;) {
+    if ((cluster_stop >= 0) && (pos >= cluster_stop))
+      break;
 
-        //Parse ID
+    // Parse ID
 
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        long long result = GetUIntLength(pReader, pos, len);
-
-        if (result < 0)  //error
-            return static_cast<long>(result);
-
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
-
-        if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
-
-        const long long id = ReadUInt(pReader, pos, len);
-
-        if (id < 0) //error
-            return static_cast<long>(id);
-
-        if (id == 0)
-            return E_FILE_FORMAT_INVALID;
-
-        //This is the distinguished set of ID's we use to determine
-        //that we have exhausted the sub-element's inside the cluster
-        //whose ID we parsed earlier.
-
-        if (id == 0x0F43B675)  //Cluster ID
-            break;
-
-        if (id == 0x0C53BB6B)  //Cues ID
-            break;
-
-        pos += len;  //consume ID field
-
-        //Parse Size
-
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        result = GetUIntLength(pReader, pos, len);
-
-        if (result < 0)  //error
-            return static_cast<long>(result);
-
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
-
-        if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
-
-        const long long size = ReadUInt(pReader, pos, len);
-
-        if (size < 0)  //error
-            return static_cast<long>(size);
-
-        const long long unknown_size = (1LL << (7 * len)) - 1;
-
-        if (size == unknown_size)
-            return E_FILE_FORMAT_INVALID;
-
-        pos += len;  //consume size field
-
-        if ((cluster_stop >= 0) && (pos > cluster_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        //pos now points to start of payload
-
-        if (size == 0)  //weird
-            continue;
-
-        if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if (id == 0x67)  //TimeCode ID
-        {
-            len = static_cast<long>(size);
-
-            if ((pos + size) > avail)
-                return E_BUFFER_NOT_FULL;
-
-            timecode = UnserializeUInt(pReader, pos, size);
-
-            if (timecode < 0)  //error (or underflow)
-                return static_cast<long>(timecode);
-
-            new_pos = pos + size;
-
-            if (bBlock)
-                break;
-        }
-        else if (id == 0x20)  //BlockGroup ID
-        {
-            bBlock = true;
-            break;
-        }
-        else if (id == 0x23)  //SimpleBlock ID
-        {
-            bBlock = true;
-            break;
-        }
-
-        pos += size;  //consume payload
-        assert((cluster_stop < 0) || (pos <= cluster_stop));
-    }
-
-    assert((cluster_stop < 0) || (pos <= cluster_stop));
-
-    if (timecode < 0)  //no timecode found
-        return E_FILE_FORMAT_INVALID;
-
-    if (!bBlock)
-        return E_FILE_FORMAT_INVALID;
-
-    m_pos = new_pos;  //designates position just beyond timecode payload
-    m_timecode = timecode;  // m_timecode >= 0 means we're partially loaded
-
-    if (cluster_size >= 0)
-        m_element_size = cluster_stop - m_element_start;
-
-    return 0;
-}
-
-
-long Cluster::Parse(long long& pos, long& len) const
-{
-    long status = Load(pos, len);
-
-    if (status < 0)
-        return status;
-
-    assert(m_pos >= m_element_start);
-    assert(m_timecode >= 0);
-    //assert(m_size > 0);
-    //assert(m_element_size > m_size);
-
-    const long long cluster_stop =
-        (m_element_size < 0) ? -1 : m_element_start + m_element_size;
-
-    if ((cluster_stop >= 0) && (m_pos >= cluster_stop))
-        return 1;  //nothing else to do
-
-    IMkvReader* const pReader = m_pSegment->m_pReader;
-
-    long long total, avail;
-
-    status = pReader->Length(&total, &avail);
-
-    if (status < 0)  //error
-        return status;
-
-    assert((total < 0) || (avail <= total));
-
-    pos = m_pos;
-
-    for (;;)
-    {
-        if ((cluster_stop >= 0) && (pos >= cluster_stop))
-            break;
-
-        if ((total >= 0) && (pos >= total))
-        {
-            if (m_element_size < 0)
-                m_element_size = pos - m_element_start;
-
-            break;
-        }
-
-        //Parse ID
-
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        long long result = GetUIntLength(pReader, pos, len);
-
-        if (result < 0)  //error
-            return static_cast<long>(result);
-
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
-
-        if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
-
-        const long long id = ReadUInt(pReader, pos, len);
-
-        if (id < 0) //error
-            return static_cast<long>(id);
-
-        if (id == 0)  //weird
-            return E_FILE_FORMAT_INVALID;
-
-        //This is the distinguished set of ID's we use to determine
-        //that we have exhausted the sub-element's inside the cluster
-        //whose ID we parsed earlier.
-
-        if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) //Cluster or Cues ID
-        {
-            if (m_element_size < 0)
-                m_element_size = pos - m_element_start;
-
-            break;
-        }
-
-        pos += len;  //consume ID field
-
-        //Parse Size
-
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        result = GetUIntLength(pReader, pos, len);
-
-        if (result < 0)  //error
-            return static_cast<long>(result);
-
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
-
-        if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
-
-        const long long size = ReadUInt(pReader, pos, len);
-
-        if (size < 0)  //error
-            return static_cast<long>(size);
-
-        const long long unknown_size = (1LL << (7 * len)) - 1;
-
-        if (size == unknown_size)
-            return E_FILE_FORMAT_INVALID;
-
-        pos += len;  //consume size field
-
-        if ((cluster_stop >= 0) && (pos > cluster_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        //pos now points to start of payload
-
-        if (size == 0)  //weird
-            continue;
-
-        //const long long block_start = pos;
-        const long long block_stop = pos + size;
-
-        if (cluster_stop >= 0)
-        {
-            if (block_stop > cluster_stop)
-            {
-                if ((id == 0x20) || (id == 0x23))
-                    return E_FILE_FORMAT_INVALID;
-
-                pos = cluster_stop;
-                break;
-            }
-        }
-        else if ((total >= 0) && (block_stop > total))
-        {
-            m_element_size = total - m_element_start;
-            pos = total;
-            break;
-        }
-        else if (block_stop > avail)
-        {
-            len = static_cast<long>(size);
-            return E_BUFFER_NOT_FULL;
-        }
-
-        Cluster* const this_ = const_cast<Cluster*>(this);
-
-        if (id == 0x20)  //BlockGroup
-            return this_->ParseBlockGroup(size, pos, len);
-
-        if (id == 0x23)  //SimpleBlock
-            return this_->ParseSimpleBlock(size, pos, len);
-
-        pos += size;  //consume payload
-        assert((cluster_stop < 0) || (pos <= cluster_stop));
-    }
-
-    assert(m_element_size > 0);
-
-    m_pos = pos;
-    assert((cluster_stop < 0) || (m_pos <= cluster_stop));
-
-    if (m_entries_count > 0)
-    {
-        const long idx = m_entries_count - 1;
-
-        const BlockEntry* const pLast = m_entries[idx];
-        assert(pLast);
-
-        const Block* const pBlock = pLast->GetBlock();
-        assert(pBlock);
-
-        const long long start = pBlock->m_start;
-
-        if ((total >= 0) && (start > total))
-            return -1;  //defend against trucated stream
-
-        const long long size = pBlock->m_size;
-
-        const long long stop = start + size;
-        assert((cluster_stop < 0) || (stop <= cluster_stop));
-
-        if ((total >= 0) && (stop > total))
-            return -1;  //defend against trucated stream
-    }
-
-    return 1;  //no more entries
-}
-
-
-long Cluster::ParseSimpleBlock(
-    long long block_size,
-    long long& pos,
-    long& len)
-{
-    const long long block_start = pos;
-    const long long block_stop = pos + block_size;
-
-    IMkvReader* const pReader = m_pSegment->m_pReader;
-
-    long long total, avail;
-
-    long status = pReader->Length(&total, &avail);
-
-    if (status < 0)  //error
-        return status;
-
-    assert((total < 0) || (avail <= total));
-
-    //parse track number
-
-    if ((pos + 1) > avail)
-    {
-        len = 1;
-        return E_BUFFER_NOT_FULL;
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
     }
 
     long long result = GetUIntLength(pReader, pos, len);
 
-    if (result < 0)  //error
-        return static_cast<long>(result);
+    if (result < 0)  // error
+      return static_cast<long>(result);
 
-    if (result > 0)  //weird
-        return E_BUFFER_NOT_FULL;
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
 
-    if ((pos + len) > block_stop)
-        return E_FILE_FORMAT_INVALID;
+    if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
 
     if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long id = ReadUInt(pReader, pos, len);
+
+    if (id < 0)  // error
+      return static_cast<long>(id);
+
+    if (id == 0)
+      return E_FILE_FORMAT_INVALID;
+
+    // This is the distinguished set of ID's we use to determine
+    // that we have exhausted the sub-element's inside the cluster
+    // whose ID we parsed earlier.
+
+    if (id == 0x0F43B675)  // Cluster ID
+      break;
+
+    if (id == 0x0C53BB6B)  // Cues ID
+      break;
+
+    pos += len;  // consume ID field
+
+    // Parse Size
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(size);
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if (size == unknown_size)
+      return E_FILE_FORMAT_INVALID;
+
+    pos += len;  // consume size field
+
+    if ((cluster_stop >= 0) && (pos > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    // pos now points to start of payload
+
+    if (size == 0)  // weird
+      continue;
+
+    if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if (id == 0x67) {  // TimeCode ID
+      len = static_cast<long>(size);
+
+      if ((pos + size) > avail)
         return E_BUFFER_NOT_FULL;
 
-    const long long track = ReadUInt(pReader, pos, len);
+      timecode = UnserializeUInt(pReader, pos, size);
 
-    if (track < 0) //error
-        return static_cast<long>(track);
+      if (timecode < 0)  // error (or underflow)
+        return static_cast<long>(timecode);
 
-    if (track == 0)
-        return E_FILE_FORMAT_INVALID;
+      new_pos = pos + size;
+
+      if (bBlock)
+        break;
+    } else if (id == 0x20) {  // BlockGroup ID
+      bBlock = true;
+      break;
+    } else if (id == 0x23) {  // SimpleBlock ID
+      bBlock = true;
+      break;
+    }
+
+    pos += size;  // consume payload
+    assert((cluster_stop < 0) || (pos <= cluster_stop));
+  }
+
+  assert((cluster_stop < 0) || (pos <= cluster_stop));
+
+  if (timecode < 0)  // no timecode found
+    return E_FILE_FORMAT_INVALID;
+
+  if (!bBlock)
+    return E_FILE_FORMAT_INVALID;
+
+  m_pos = new_pos;  // designates position just beyond timecode payload
+  m_timecode = timecode;  // m_timecode >= 0 means we're partially loaded
+
+  if (cluster_size >= 0)
+    m_element_size = cluster_stop - m_element_start;
+
+  return 0;
+}
+
+long Cluster::Parse(long long& pos, long& len) const {
+  long status = Load(pos, len);
+
+  if (status < 0)
+    return status;
+
+  assert(m_pos >= m_element_start);
+  assert(m_timecode >= 0);
+  // assert(m_size > 0);
+  // assert(m_element_size > m_size);
+
+  const long long cluster_stop =
+      (m_element_size < 0) ? -1 : m_element_start + m_element_size;
+
+  if ((cluster_stop >= 0) && (m_pos >= cluster_stop))
+    return 1;  // nothing else to do
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  long long total, avail;
+
+  status = pReader->Length(&total, &avail);
+
+  if (status < 0)  // error
+    return status;
+
+  assert((total < 0) || (avail <= total));
+
+  pos = m_pos;
+
+  for (;;) {
+    if ((cluster_stop >= 0) && (pos >= cluster_stop))
+      break;
+
+    if ((total >= 0) && (pos >= total)) {
+      if (m_element_size < 0)
+        m_element_size = pos - m_element_start;
+
+      break;
+    }
+
+    // Parse ID
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    long long result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long id = ReadUInt(pReader, pos, len);
+
+    if (id < 0)  // error
+      return static_cast<long>(id);
+
+    if (id == 0)  // weird
+      return E_FILE_FORMAT_INVALID;
+
+    // This is the distinguished set of ID's we use to determine
+    // that we have exhausted the sub-element's inside the cluster
+    // whose ID we parsed earlier.
+
+    if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) {  // Cluster or Cues ID
+      if (m_element_size < 0)
+        m_element_size = pos - m_element_start;
+
+      break;
+    }
+
+    pos += len;  // consume ID field
+
+    // Parse Size
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(size);
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if (size == unknown_size)
+      return E_FILE_FORMAT_INVALID;
+
+    pos += len;  // consume size field
+
+    if ((cluster_stop >= 0) && (pos > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    // pos now points to start of payload
+
+    if (size == 0)  // weird
+      continue;
+
+    // const long long block_start = pos;
+    const long long block_stop = pos + size;
+
+    if (cluster_stop >= 0) {
+      if (block_stop > cluster_stop) {
+        if ((id == 0x20) || (id == 0x23))
+          return E_FILE_FORMAT_INVALID;
+
+        pos = cluster_stop;
+        break;
+      }
+    } else if ((total >= 0) && (block_stop > total)) {
+      m_element_size = total - m_element_start;
+      pos = total;
+      break;
+    } else if (block_stop > avail) {
+      len = static_cast<long>(size);
+      return E_BUFFER_NOT_FULL;
+    }
+
+    Cluster* const this_ = const_cast<Cluster*>(this);
+
+    if (id == 0x20)  // BlockGroup
+      return this_->ParseBlockGroup(size, pos, len);
+
+    if (id == 0x23)  // SimpleBlock
+      return this_->ParseSimpleBlock(size, pos, len);
+
+    pos += size;  // consume payload
+    assert((cluster_stop < 0) || (pos <= cluster_stop));
+  }
+
+  assert(m_element_size > 0);
+
+  m_pos = pos;
+  assert((cluster_stop < 0) || (m_pos <= cluster_stop));
+
+  if (m_entries_count > 0) {
+    const long idx = m_entries_count - 1;
+
+    const BlockEntry* const pLast = m_entries[idx];
+    assert(pLast);
+
+    const Block* const pBlock = pLast->GetBlock();
+    assert(pBlock);
+
+    const long long start = pBlock->m_start;
+
+    if ((total >= 0) && (start > total))
+      return -1;  // defend against trucated stream
+
+    const long long size = pBlock->m_size;
+
+    const long long stop = start + size;
+    assert((cluster_stop < 0) || (stop <= cluster_stop));
+
+    if ((total >= 0) && (stop > total))
+      return -1;  // defend against trucated stream
+  }
+
+  return 1;  // no more entries
+}
+
+long Cluster::ParseSimpleBlock(long long block_size, long long& pos,
+                               long& len) {
+  const long long block_start = pos;
+  const long long block_stop = pos + block_size;
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  long long total, avail;
+
+  long status = pReader->Length(&total, &avail);
+
+  if (status < 0)  // error
+    return status;
+
+  assert((total < 0) || (avail <= total));
+
+  // parse track number
+
+  if ((pos + 1) > avail) {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
+
+  long long result = GetUIntLength(pReader, pos, len);
+
+  if (result < 0)  // error
+    return static_cast<long>(result);
+
+  if (result > 0)  // weird
+    return E_BUFFER_NOT_FULL;
+
+  if ((pos + len) > block_stop)
+    return E_FILE_FORMAT_INVALID;
+
+  if ((pos + len) > avail)
+    return E_BUFFER_NOT_FULL;
+
+  const long long track = ReadUInt(pReader, pos, len);
+
+  if (track < 0)  // error
+    return static_cast<long>(track);
+
+  if (track == 0)
+    return E_FILE_FORMAT_INVALID;
 
 #if 0
     //TODO(matthewjheaney)
@@ -7518,228 +6525,213 @@
         return E_FILE_FORMAT_INVALID;
 #endif
 
-    pos += len;  //consume track number
+  pos += len;  // consume track number
 
-    if ((pos + 2) > block_stop)
-        return E_FILE_FORMAT_INVALID;
+  if ((pos + 2) > block_stop)
+    return E_FILE_FORMAT_INVALID;
 
-    if ((pos + 2) > avail)
-    {
-        len = 2;
-        return E_BUFFER_NOT_FULL;
-    }
+  if ((pos + 2) > avail) {
+    len = 2;
+    return E_BUFFER_NOT_FULL;
+  }
 
-    pos += 2;  //consume timecode
+  pos += 2;  // consume timecode
 
-    if ((pos + 1) > block_stop)
-        return E_FILE_FORMAT_INVALID;
+  if ((pos + 1) > block_stop)
+    return E_FILE_FORMAT_INVALID;
 
-    if ((pos + 1) > avail)
-    {
-        len = 1;
-        return E_BUFFER_NOT_FULL;
-    }
+  if ((pos + 1) > avail) {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
 
-    unsigned char flags;
+  unsigned char flags;
 
-    status = pReader->Read(pos, 1, &flags);
+  status = pReader->Read(pos, 1, &flags);
 
-    if (status < 0)  //error or underflow
-    {
-        len = 1;
-        return status;
-    }
+  if (status < 0) {  // error or underflow
+    len = 1;
+    return status;
+  }
 
-    ++pos;  //consume flags byte
-    assert(pos <= avail);
+  ++pos;  // consume flags byte
+  assert(pos <= avail);
 
-    if (pos >= block_stop)
-        return E_FILE_FORMAT_INVALID;
+  if (pos >= block_stop)
+    return E_FILE_FORMAT_INVALID;
 
-    const int lacing = int(flags & 0x06) >> 1;
+  const int lacing = int(flags & 0x06) >> 1;
 
-    if ((lacing != 0) && (block_stop > avail))
-    {
-        len = static_cast<long>(block_stop - pos);
-        return E_BUFFER_NOT_FULL;
-    }
+  if ((lacing != 0) && (block_stop > avail)) {
+    len = static_cast<long>(block_stop - pos);
+    return E_BUFFER_NOT_FULL;
+  }
 
-    status = CreateBlock(0x23,  //simple block id
-                         block_start, block_size,
-                         0);  //DiscardPadding
+  status = CreateBlock(0x23,  // simple block id
+                       block_start, block_size,
+                       0);  // DiscardPadding
 
-    if (status != 0)
-        return status;
+  if (status != 0)
+    return status;
 
-    m_pos = block_stop;
+  m_pos = block_stop;
 
-    return 0;  //success
+  return 0;  // success
 }
 
+long Cluster::ParseBlockGroup(long long payload_size, long long& pos,
+                              long& len) {
+  const long long payload_start = pos;
+  const long long payload_stop = pos + payload_size;
 
-long Cluster::ParseBlockGroup(
-    long long payload_size,
-    long long& pos,
-    long& len)
-{
-    const long long payload_start = pos;
-    const long long payload_stop = pos + payload_size;
+  IMkvReader* const pReader = m_pSegment->m_pReader;
 
-    IMkvReader* const pReader = m_pSegment->m_pReader;
+  long long total, avail;
 
-    long long total, avail;
+  long status = pReader->Length(&total, &avail);
 
-    long status = pReader->Length(&total, &avail);
+  if (status < 0)  // error
+    return status;
 
-    if (status < 0)  //error
-        return status;
+  assert((total < 0) || (avail <= total));
 
-    assert((total < 0) || (avail <= total));
+  if ((total >= 0) && (payload_stop > total))
+    return E_FILE_FORMAT_INVALID;
 
-    if ((total >= 0) && (payload_stop > total))
-        return E_FILE_FORMAT_INVALID;
+  if (payload_stop > avail) {
+    len = static_cast<long>(payload_size);
+    return E_BUFFER_NOT_FULL;
+  }
 
-    if (payload_stop > avail)
-    {
-         len = static_cast<long>(payload_size);
-         return E_BUFFER_NOT_FULL;
+  long long discard_padding = 0;
+
+  while (pos < payload_stop) {
+    // parse sub-block element ID
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
     }
 
-    long long discard_padding = 0;
+    long long result = GetUIntLength(pReader, pos, len);
 
-    while (pos < payload_stop)
-    {
-        //parse sub-block element ID
+    if (result < 0)  // error
+      return static_cast<long>(result);
 
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
 
-        long long result = GetUIntLength(pReader, pos, len);
+    if ((pos + len) > payload_stop)
+      return E_FILE_FORMAT_INVALID;
 
-        if (result < 0)  //error
-            return static_cast<long>(result);
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
 
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
+    const long long id = ReadUInt(pReader, pos, len);
 
-        if ((pos + len) > payload_stop)
-            return E_FILE_FORMAT_INVALID;
+    if (id < 0)  // error
+      return static_cast<long>(id);
 
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
+    if (id == 0)  // not a value ID
+      return E_FILE_FORMAT_INVALID;
 
-        const long long id = ReadUInt(pReader, pos, len);
+    pos += len;  // consume ID field
 
-        if (id < 0) //error
-            return static_cast<long>(id);
+    // Parse Size
 
-        if (id == 0)  //not a value ID
-            return E_FILE_FORMAT_INVALID;
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
 
-        pos += len;  //consume ID field
+    result = GetUIntLength(pReader, pos, len);
 
-        //Parse Size
+    if (result < 0)  // error
+      return static_cast<long>(result);
 
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
 
-        result = GetUIntLength(pReader, pos, len);
+    if ((pos + len) > payload_stop)
+      return E_FILE_FORMAT_INVALID;
 
-        if (result < 0)  //error
-            return static_cast<long>(result);
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
 
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
+    const long long size = ReadUInt(pReader, pos, len);
 
-        if ((pos + len) > payload_stop)
-            return E_FILE_FORMAT_INVALID;
+    if (size < 0)  // error
+      return static_cast<long>(size);
 
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
+    pos += len;  // consume size field
 
-        const long long size = ReadUInt(pReader, pos, len);
+    // pos now points to start of sub-block group payload
 
-        if (size < 0)  //error
-            return static_cast<long>(size);
+    if (pos > payload_stop)
+      return E_FILE_FORMAT_INVALID;
 
-        pos += len;  //consume size field
+    if (size == 0)  // weird
+      continue;
 
-        //pos now points to start of sub-block group payload
+    const long long unknown_size = (1LL << (7 * len)) - 1;
 
-        if (pos > payload_stop)
-            return E_FILE_FORMAT_INVALID;
+    if (size == unknown_size)
+      return E_FILE_FORMAT_INVALID;
 
-        if (size == 0)  //weird
-            continue;
+    if (id == 0x35A2) {  // DiscardPadding
+      result = GetUIntLength(pReader, pos, len);
 
-        const long long unknown_size = (1LL << (7 * len)) - 1;
+      if (result < 0)  // error
+        return static_cast<long>(result);
 
-        if (size == unknown_size)
-            return E_FILE_FORMAT_INVALID;
+      status = UnserializeInt(pReader, pos, len, discard_padding);
 
-        if (id == 0x35A2)  //DiscardPadding
-        {
-            result = GetUIntLength(pReader, pos, len);
+      if (status < 0)  // error
+        return status;
+    }
 
-            if (result < 0)  //error
-                return static_cast<long>(result);
+    if (id != 0x21) {  // sub-part of BlockGroup is not a Block
+      pos += size;  // consume sub-part of block group
 
-            status = UnserializeInt(pReader, pos, len, discard_padding);
+      if (pos > payload_stop)
+        return E_FILE_FORMAT_INVALID;
 
-            if (status < 0)  //error
-                return status;
-        }
+      continue;
+    }
 
-        if (id != 0x21)  //sub-part of BlockGroup is not a Block
-        {
-            pos += size;  //consume sub-part of block group
+    const long long block_stop = pos + size;
 
-            if (pos > payload_stop)
-                return E_FILE_FORMAT_INVALID;
+    if (block_stop > payload_stop)
+      return E_FILE_FORMAT_INVALID;
 
-            continue;
-        }
+    // parse track number
 
-        const long long block_stop = pos + size;
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
 
-        if (block_stop > payload_stop)
-            return E_FILE_FORMAT_INVALID;
+    result = GetUIntLength(pReader, pos, len);
 
-        //parse track number
+    if (result < 0)  // error
+      return static_cast<long>(result);
 
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
 
-        result = GetUIntLength(pReader, pos, len);
+    if ((pos + len) > block_stop)
+      return E_FILE_FORMAT_INVALID;
 
-        if (result < 0)  //error
-            return static_cast<long>(result);
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
 
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
+    const long long track = ReadUInt(pReader, pos, len);
 
-        if ((pos + len) > block_stop)
-            return E_FILE_FORMAT_INVALID;
+    if (track < 0)  // error
+      return static_cast<long>(track);
 
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
-
-        const long long track = ReadUInt(pReader, pos, len);
-
-        if (track < 0) //error
-            return static_cast<long>(track);
-
-        if (track == 0)
-            return E_FILE_FORMAT_INVALID;
+    if (track == 0)
+      return E_FILE_FORMAT_INVALID;
 
 #if 0
         //TODO(matthewjheaney)
@@ -7771,213 +6763,173 @@
             return E_FILE_FORMAT_INVALID;
 #endif
 
-        pos += len;  //consume track number
+    pos += len;  // consume track number
 
-        if ((pos + 2) > block_stop)
-            return E_FILE_FORMAT_INVALID;
+    if ((pos + 2) > block_stop)
+      return E_FILE_FORMAT_INVALID;
 
-        if ((pos + 2) > avail)
-        {
-            len = 2;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        pos += 2;  //consume timecode
-
-        if ((pos + 1) > block_stop)
-            return E_FILE_FORMAT_INVALID;
-
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        unsigned char flags;
-
-        status = pReader->Read(pos, 1, &flags);
-
-        if (status < 0)  //error or underflow
-        {
-            len = 1;
-            return status;
-        }
-
-        ++pos;  //consume flags byte
-        assert(pos <= avail);
-
-        if (pos >= block_stop)
-            return E_FILE_FORMAT_INVALID;
-
-        const int lacing = int(flags & 0x06) >> 1;
-
-        if ((lacing != 0) && (block_stop > avail))
-        {
-            len = static_cast<long>(block_stop - pos);
-            return E_BUFFER_NOT_FULL;
-        }
-
-        pos = block_stop;  //consume block-part of block group
-        assert(pos <= payload_stop);
+    if ((pos + 2) > avail) {
+      len = 2;
+      return E_BUFFER_NOT_FULL;
     }
 
-    assert(pos == payload_stop);
+    pos += 2;  // consume timecode
 
-    status = CreateBlock(0x20,  //BlockGroup ID
-                         payload_start, payload_size,
-                         discard_padding);
-    if (status != 0)
-        return status;
+    if ((pos + 1) > block_stop)
+      return E_FILE_FORMAT_INVALID;
 
-    m_pos = payload_stop;
-
-    return 0;  //success
-}
-
-
-long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const
-{
-    assert(m_pos >= m_element_start);
-
-    pEntry = NULL;
-
-    if (index < 0)
-        return -1;  //generic error
-
-    if (m_entries_count < 0)
-        return E_BUFFER_NOT_FULL;
-
-    assert(m_entries);
-    assert(m_entries_size > 0);
-    assert(m_entries_count <= m_entries_size);
-
-    if (index < m_entries_count)
-    {
-        pEntry = m_entries[index];
-        assert(pEntry);
-
-        return 1;  //found entry
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
     }
 
-    if (m_element_size < 0)        //we don't know cluster end yet
-        return E_BUFFER_NOT_FULL;  //underflow
+    unsigned char flags;
 
-    const long long element_stop = m_element_start + m_element_size;
+    status = pReader->Read(pos, 1, &flags);
 
-    if (m_pos >= element_stop)
-        return 0;  //nothing left to parse
-
-    return E_BUFFER_NOT_FULL;  //underflow, since more remains to be parsed
-}
-
-
-Cluster* Cluster::Create(
-    Segment* pSegment,
-    long idx,
-    long long off)
-    //long long element_size)
-{
-    assert(pSegment);
-    assert(off >= 0);
-
-    const long long element_start = pSegment->m_start + off;
-
-    Cluster* const pCluster = new Cluster(pSegment,
-                                          idx,
-                                          element_start);
-                                          //element_size);
-    assert(pCluster);
-
-    return pCluster;
-}
-
-
-Cluster::Cluster() :
-    m_pSegment(NULL),
-    m_element_start(0),
-    m_index(0),
-    m_pos(0),
-    m_element_size(0),
-    m_timecode(0),
-    m_entries(NULL),
-    m_entries_size(0),
-    m_entries_count(0)  //means "no entries"
-{
-}
-
-
-Cluster::Cluster(
-    Segment* pSegment,
-    long idx,
-    long long element_start
-    /* long long element_size */ ) :
-    m_pSegment(pSegment),
-    m_element_start(element_start),
-    m_index(idx),
-    m_pos(element_start),
-    m_element_size(-1 /* element_size */ ),
-    m_timecode(-1),
-    m_entries(NULL),
-    m_entries_size(0),
-    m_entries_count(-1)  //means "has not been parsed yet"
-{
-}
-
-
-Cluster::~Cluster()
-{
-    if (m_entries_count <= 0)
-        return;
-
-    BlockEntry** i = m_entries;
-    BlockEntry** const j = m_entries + m_entries_count;
-
-    while (i != j)
-    {
-         BlockEntry* p = *i++;
-         assert(p);
-
-         delete p;
+    if (status < 0) {  // error or underflow
+      len = 1;
+      return status;
     }
 
-    delete[] m_entries;
+    ++pos;  // consume flags byte
+    assert(pos <= avail);
+
+    if (pos >= block_stop)
+      return E_FILE_FORMAT_INVALID;
+
+    const int lacing = int(flags & 0x06) >> 1;
+
+    if ((lacing != 0) && (block_stop > avail)) {
+      len = static_cast<long>(block_stop - pos);
+      return E_BUFFER_NOT_FULL;
+    }
+
+    pos = block_stop;  // consume block-part of block group
+    assert(pos <= payload_stop);
+  }
+
+  assert(pos == payload_stop);
+
+  status = CreateBlock(0x20,  // BlockGroup ID
+                       payload_start, payload_size, discard_padding);
+  if (status != 0)
+    return status;
+
+  m_pos = payload_stop;
+
+  return 0;  // success
 }
 
+long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const {
+  assert(m_pos >= m_element_start);
 
-bool Cluster::EOS() const
+  pEntry = NULL;
+
+  if (index < 0)
+    return -1;  // generic error
+
+  if (m_entries_count < 0)
+    return E_BUFFER_NOT_FULL;
+
+  assert(m_entries);
+  assert(m_entries_size > 0);
+  assert(m_entries_count <= m_entries_size);
+
+  if (index < m_entries_count) {
+    pEntry = m_entries[index];
+    assert(pEntry);
+
+    return 1;  // found entry
+  }
+
+  if (m_element_size < 0)  // we don't know cluster end yet
+    return E_BUFFER_NOT_FULL;  // underflow
+
+  const long long element_stop = m_element_start + m_element_size;
+
+  if (m_pos >= element_stop)
+    return 0;  // nothing left to parse
+
+  return E_BUFFER_NOT_FULL;  // underflow, since more remains to be parsed
+}
+
+Cluster* Cluster::Create(Segment* pSegment, long idx, long long off)
+// long long element_size)
 {
-    return (m_pSegment == NULL);
+  assert(pSegment);
+  assert(off >= 0);
+
+  const long long element_start = pSegment->m_start + off;
+
+  Cluster* const pCluster = new Cluster(pSegment, idx, element_start);
+  // element_size);
+  assert(pCluster);
+
+  return pCluster;
 }
 
+Cluster::Cluster()
+    : m_pSegment(NULL),
+      m_element_start(0),
+      m_index(0),
+      m_pos(0),
+      m_element_size(0),
+      m_timecode(0),
+      m_entries(NULL),
+      m_entries_size(0),
+      m_entries_count(0)  // means "no entries"
+{}
 
-long Cluster::GetIndex() const
-{
-    return m_index;
+Cluster::Cluster(Segment* pSegment, long idx, long long element_start
+                 /* long long element_size */)
+    : m_pSegment(pSegment),
+      m_element_start(element_start),
+      m_index(idx),
+      m_pos(element_start),
+      m_element_size(-1 /* element_size */),
+      m_timecode(-1),
+      m_entries(NULL),
+      m_entries_size(0),
+      m_entries_count(-1)  // means "has not been parsed yet"
+{}
+
+Cluster::~Cluster() {
+  if (m_entries_count <= 0)
+    return;
+
+  BlockEntry** i = m_entries;
+  BlockEntry** const j = m_entries + m_entries_count;
+
+  while (i != j) {
+    BlockEntry* p = *i++;
+    assert(p);
+
+    delete p;
+  }
+
+  delete[] m_entries;
 }
 
+bool Cluster::EOS() const { return (m_pSegment == NULL); }
 
-long long Cluster::GetPosition() const
-{
-    const long long pos = m_element_start - m_pSegment->m_start;
-    assert(pos >= 0);
+long Cluster::GetIndex() const { return m_index; }
 
-    return pos;
+long long Cluster::GetPosition() const {
+  const long long pos = m_element_start - m_pSegment->m_start;
+  assert(pos >= 0);
+
+  return pos;
 }
 
-
-long long Cluster::GetElementSize() const
-{
-    return m_element_size;
-}
-
+long long Cluster::GetElementSize() const { return m_element_size; }
 
 #if 0
 bool Cluster::HasBlockEntries(
     const Segment* pSegment,
-    long long off)  //relative to start of segment payload
-{
+    long long off) {
     assert(pSegment);
-    assert(off >= 0);  //relative to segment
+    assert(off >= 0);  //relative to start of segment payload
 
     IMkvReader* const pReader = pSegment->m_pReader;
 
@@ -8034,631 +6986,558 @@
 }
 #endif
 
-
 long Cluster::HasBlockEntries(
     const Segment* pSegment,
-    long long off,  //relative to start of segment payload
-    long long& pos,
-    long& len)
-{
-    assert(pSegment);
-    assert(off >= 0);  //relative to segment
+    long long off,  // relative to start of segment payload
+    long long& pos, long& len) {
+  assert(pSegment);
+  assert(off >= 0);  // relative to segment
 
-    IMkvReader* const pReader = pSegment->m_pReader;
+  IMkvReader* const pReader = pSegment->m_pReader;
 
-    long long total, avail;
+  long long total, avail;
 
-    long status = pReader->Length(&total, &avail);
+  long status = pReader->Length(&total, &avail);
 
-    if (status < 0)  //error
-        return status;
+  if (status < 0)  // error
+    return status;
 
-    assert((total < 0) || (avail <= total));
+  assert((total < 0) || (avail <= total));
 
-    pos = pSegment->m_start + off;  //absolute
+  pos = pSegment->m_start + off;  // absolute
 
-    if ((total >= 0) && (pos >= total))
-        return 0;  //we don't even have a complete cluster
+  if ((total >= 0) && (pos >= total))
+    return 0;  // we don't even have a complete cluster
 
-    const long long segment_stop =
-        (pSegment->m_size < 0) ? -1 : pSegment->m_start + pSegment->m_size;
+  const long long segment_stop =
+      (pSegment->m_size < 0) ? -1 : pSegment->m_start + pSegment->m_size;
 
-    long long cluster_stop = -1;  //interpreted later to mean "unknown size"
+  long long cluster_stop = -1;  // interpreted later to mean "unknown size"
 
-    {
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        long long result = GetUIntLength(pReader, pos, len);
-
-        if (result < 0)  //error
-            return static_cast<long>(result);
-
-        if (result > 0)  //need more data
-            return E_BUFFER_NOT_FULL;
-
-        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((total >= 0) && ((pos + len) > total))
-            return 0;
-
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
-
-        const long long id = ReadUInt(pReader, pos, len);
-
-        if (id < 0)  //error
-            return static_cast<long>(id);
-
-        if (id != 0x0F43B675)  //weird: not cluster ID
-            return -1;         //generic error
-
-        pos += len;  //consume Cluster ID field
-
-        //read size field
-
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        result = GetUIntLength(pReader, pos, len);
-
-        if (result < 0)  //error
-            return static_cast<long>(result);
-
-        if (result > 0)  //weird
-            return E_BUFFER_NOT_FULL;
-
-        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((total >= 0) && ((pos + len) > total))
-            return 0;
-
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
-
-        const long long size = ReadUInt(pReader, pos, len);
-
-        if (size < 0)  //error
-            return static_cast<long>(size);
-
-        if (size == 0)
-            return 0;  //cluster does not have entries
-
-        pos += len;  //consume size field
-
-        //pos now points to start of payload
-
-        const long long unknown_size = (1LL << (7 * len)) - 1;
-
-        if (size != unknown_size)
-        {
-            cluster_stop = pos + size;
-            assert(cluster_stop >= 0);
-
-            if ((segment_stop >= 0) && (cluster_stop > segment_stop))
-                return E_FILE_FORMAT_INVALID;
-
-            if ((total >= 0) && (cluster_stop > total))
-                //return E_FILE_FORMAT_INVALID;  //too conservative
-                return 0;  //cluster does not have any entries
-        }
+  {
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
     }
 
-    for (;;)
-    {
-        if ((cluster_stop >= 0) && (pos >= cluster_stop))
-            return 0;  //no entries detected
+    long long result = GetUIntLength(pReader, pos, len);
 
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
+    if (result < 0)  // error
+      return static_cast<long>(result);
 
-        long long result = GetUIntLength(pReader, pos, len);
+    if (result > 0)  // need more data
+      return E_BUFFER_NOT_FULL;
 
-        if (result < 0)  //error
-            return static_cast<long>(result);
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
 
-        if (result > 0)  //need more data
-            return E_BUFFER_NOT_FULL;
+    if ((total >= 0) && ((pos + len) > total))
+      return 0;
 
-        if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
-            return E_FILE_FORMAT_INVALID;
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
 
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
+    const long long id = ReadUInt(pReader, pos, len);
 
-        const long long id = ReadUInt(pReader, pos, len);
+    if (id < 0)  // error
+      return static_cast<long>(id);
 
-        if (id < 0)  //error
-            return static_cast<long>(id);
+    if (id != 0x0F43B675)  // weird: not cluster ID
+      return -1;  // generic error
 
-        //This is the distinguished set of ID's we use to determine
-        //that we have exhausted the sub-element's inside the cluster
-        //whose ID we parsed earlier.
+    pos += len;  // consume Cluster ID field
 
-        if (id == 0x0F43B675)  //Cluster ID
-            return 0;  //no entries found
+    // read size field
 
-        if (id == 0x0C53BB6B)  //Cues ID
-            return 0;  //no entries found
-
-        pos += len;  //consume id field
-
-        if ((cluster_stop >= 0) && (pos >= cluster_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        //read size field
-
-        if ((pos + 1) > avail)
-        {
-            len = 1;
-            return E_BUFFER_NOT_FULL;
-        }
-
-        result = GetUIntLength(pReader, pos, len);
-
-        if (result < 0)  //error
-            return static_cast<long>(result);
-
-        if (result > 0)  //underflow
-            return E_BUFFER_NOT_FULL;
-
-        if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if ((pos + len) > avail)
-            return E_BUFFER_NOT_FULL;
-
-        const long long size = ReadUInt(pReader, pos, len);
-
-        if (size < 0)  //error
-            return static_cast<long>(size);
-
-        pos += len;  //consume size field
-
-        //pos now points to start of payload
-
-        if ((cluster_stop >= 0) && (pos > cluster_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if (size == 0)  //weird
-            continue;
-
-        const long long unknown_size = (1LL << (7 * len)) - 1;
-
-        if (size == unknown_size)
-            return E_FILE_FORMAT_INVALID;  //not supported inside cluster
-
-        if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
-            return E_FILE_FORMAT_INVALID;
-
-        if (id == 0x20)  //BlockGroup ID
-            return 1;    //have at least one entry
-
-        if (id == 0x23)  //SimpleBlock ID
-            return 1;    //have at least one entry
-
-        pos += size;  //consume payload
-        assert((cluster_stop < 0) || (pos <= cluster_stop));
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
     }
+
+    result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((total >= 0) && ((pos + len) > total))
+      return 0;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(size);
+
+    if (size == 0)
+      return 0;  // cluster does not have entries
+
+    pos += len;  // consume size field
+
+    // pos now points to start of payload
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if (size != unknown_size) {
+      cluster_stop = pos + size;
+      assert(cluster_stop >= 0);
+
+      if ((segment_stop >= 0) && (cluster_stop > segment_stop))
+        return E_FILE_FORMAT_INVALID;
+
+      if ((total >= 0) && (cluster_stop > total))
+        // return E_FILE_FORMAT_INVALID;  //too conservative
+        return 0;  // cluster does not have any entries
+    }
+  }
+
+  for (;;) {
+    if ((cluster_stop >= 0) && (pos >= cluster_stop))
+      return 0;  // no entries detected
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    long long result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // need more data
+      return E_BUFFER_NOT_FULL;
+
+    if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long id = ReadUInt(pReader, pos, len);
+
+    if (id < 0)  // error
+      return static_cast<long>(id);
+
+    // This is the distinguished set of ID's we use to determine
+    // that we have exhausted the sub-element's inside the cluster
+    // whose ID we parsed earlier.
+
+    if (id == 0x0F43B675)  // Cluster ID
+      return 0;  // no entries found
+
+    if (id == 0x0C53BB6B)  // Cues ID
+      return 0;  // no entries found
+
+    pos += len;  // consume id field
+
+    if ((cluster_stop >= 0) && (pos >= cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    // read size field
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // underflow
+      return E_BUFFER_NOT_FULL;
+
+    if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(size);
+
+    pos += len;  // consume size field
+
+    // pos now points to start of payload
+
+    if ((cluster_stop >= 0) && (pos > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if (size == 0)  // weird
+      continue;
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if (size == unknown_size)
+      return E_FILE_FORMAT_INVALID;  // not supported inside cluster
+
+    if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if (id == 0x20)  // BlockGroup ID
+      return 1;  // have at least one entry
+
+    if (id == 0x23)  // SimpleBlock ID
+      return 1;  // have at least one entry
+
+    pos += size;  // consume payload
+    assert((cluster_stop < 0) || (pos <= cluster_stop));
+  }
 }
 
+long long Cluster::GetTimeCode() const {
+  long long pos;
+  long len;
 
-long long Cluster::GetTimeCode() const
-{
+  const long status = Load(pos, len);
+
+  if (status < 0)  // error
+    return status;
+
+  return m_timecode;
+}
+
+long long Cluster::GetTime() const {
+  const long long tc = GetTimeCode();
+
+  if (tc < 0)
+    return tc;
+
+  const SegmentInfo* const pInfo = m_pSegment->GetInfo();
+  assert(pInfo);
+
+  const long long scale = pInfo->GetTimeCodeScale();
+  assert(scale >= 1);
+
+  const long long t = m_timecode * scale;
+
+  return t;
+}
+
+long long Cluster::GetFirstTime() const {
+  const BlockEntry* pEntry;
+
+  const long status = GetFirst(pEntry);
+
+  if (status < 0)  // error
+    return status;
+
+  if (pEntry == NULL)  // empty cluster
+    return GetTime();
+
+  const Block* const pBlock = pEntry->GetBlock();
+  assert(pBlock);
+
+  return pBlock->GetTime(this);
+}
+
+long long Cluster::GetLastTime() const {
+  const BlockEntry* pEntry;
+
+  const long status = GetLast(pEntry);
+
+  if (status < 0)  // error
+    return status;
+
+  if (pEntry == NULL)  // empty cluster
+    return GetTime();
+
+  const Block* const pBlock = pEntry->GetBlock();
+  assert(pBlock);
+
+  return pBlock->GetTime(this);
+}
+
+long Cluster::CreateBlock(long long id,
+                          long long pos,  // absolute pos of payload
+                          long long size, long long discard_padding) {
+  assert((id == 0x20) || (id == 0x23));  // BlockGroup or SimpleBlock
+
+  if (m_entries_count < 0) {  // haven't parsed anything yet
+    assert(m_entries == NULL);
+    assert(m_entries_size == 0);
+
+    m_entries_size = 1024;
+    m_entries = new BlockEntry* [m_entries_size];
+
+    m_entries_count = 0;
+  } else {
+    assert(m_entries);
+    assert(m_entries_size > 0);
+    assert(m_entries_count <= m_entries_size);
+
+    if (m_entries_count >= m_entries_size) {
+      const long entries_size = 2 * m_entries_size;
+
+      BlockEntry** const entries = new BlockEntry* [entries_size];
+      assert(entries);
+
+      BlockEntry** src = m_entries;
+      BlockEntry** const src_end = src + m_entries_count;
+
+      BlockEntry** dst = entries;
+
+      while (src != src_end)
+        *dst++ = *src++;
+
+      delete[] m_entries;
+
+      m_entries = entries;
+      m_entries_size = entries_size;
+    }
+  }
+
+  if (id == 0x20)  // BlockGroup ID
+    return CreateBlockGroup(pos, size, discard_padding);
+  else  // SimpleBlock ID
+    return CreateSimpleBlock(pos, size);
+}
+
+long Cluster::CreateBlockGroup(long long start_offset, long long size,
+                               long long discard_padding) {
+  assert(m_entries);
+  assert(m_entries_size > 0);
+  assert(m_entries_count >= 0);
+  assert(m_entries_count < m_entries_size);
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  long long pos = start_offset;
+  const long long stop = start_offset + size;
+
+  // For WebM files, there is a bias towards previous reference times
+  //(in order to support alt-ref frames, which refer back to the previous
+  // keyframe).  Normally a 0 value is not possible, but here we tenatively
+  // allow 0 as the value of a reference frame, with the interpretation
+  // that this is a "previous" reference time.
+
+  long long prev = 1;  // nonce
+  long long next = 0;  // nonce
+  long long duration = -1;  // really, this is unsigned
+
+  long long bpos = -1;
+  long long bsize = -1;
+
+  while (pos < stop) {
+    long len;
+    const long long id = ReadUInt(pReader, pos, len);
+    assert(id >= 0);  // TODO
+    assert((pos + len) <= stop);
+
+    pos += len;  // consume ID
+
+    const long long size = ReadUInt(pReader, pos, len);
+    assert(size >= 0);  // TODO
+    assert((pos + len) <= stop);
+
+    pos += len;  // consume size
+
+    if (id == 0x21) {  // Block ID
+      if (bpos < 0) {  // Block ID
+        bpos = pos;
+        bsize = size;
+      }
+    } else if (id == 0x1B) {  // Duration ID
+      assert(size <= 8);
+
+      duration = UnserializeUInt(pReader, pos, size);
+      assert(duration >= 0);  // TODO
+    } else if (id == 0x7B) {  // ReferenceBlock
+      assert(size <= 8);
+      const long size_ = static_cast<long>(size);
+
+      long long time;
+
+      long status = UnserializeInt(pReader, pos, size_, time);
+      assert(status == 0);
+      if (status != 0)
+        return -1;
+
+      if (time <= 0)  // see note above
+        prev = time;
+      else  // weird
+        next = time;
+    }
+
+    pos += size;  // consume payload
+    assert(pos <= stop);
+  }
+
+  assert(pos == stop);
+  assert(bpos >= 0);
+  assert(bsize >= 0);
+
+  const long idx = m_entries_count;
+
+  BlockEntry** const ppEntry = m_entries + idx;
+  BlockEntry*& pEntry = *ppEntry;
+
+  pEntry = new (std::nothrow)
+      BlockGroup(this, idx, bpos, bsize, prev, next, duration, discard_padding);
+
+  if (pEntry == NULL)
+    return -1;  // generic error
+
+  BlockGroup* const p = static_cast<BlockGroup*>(pEntry);
+
+  const long status = p->Parse();
+
+  if (status == 0) {  // success
+    ++m_entries_count;
+    return 0;
+  }
+
+  delete pEntry;
+  pEntry = 0;
+
+  return status;
+}
+
+long Cluster::CreateSimpleBlock(long long st, long long sz) {
+  assert(m_entries);
+  assert(m_entries_size > 0);
+  assert(m_entries_count >= 0);
+  assert(m_entries_count < m_entries_size);
+
+  const long idx = m_entries_count;
+
+  BlockEntry** const ppEntry = m_entries + idx;
+  BlockEntry*& pEntry = *ppEntry;
+
+  pEntry = new (std::nothrow) SimpleBlock(this, idx, st, sz);
+
+  if (pEntry == NULL)
+    return -1;  // generic error
+
+  SimpleBlock* const p = static_cast<SimpleBlock*>(pEntry);
+
+  const long status = p->Parse();
+
+  if (status == 0) {
+    ++m_entries_count;
+    return 0;
+  }
+
+  delete pEntry;
+  pEntry = 0;
+
+  return status;
+}
+
+long Cluster::GetFirst(const BlockEntry*& pFirst) const {
+  if (m_entries_count <= 0) {
     long long pos;
     long len;
 
-    const long status = Load(pos, len);
+    const long status = Parse(pos, len);
 
-    if (status < 0) //error
-        return status;
-
-    return m_timecode;
-}
-
-
-long long Cluster::GetTime() const
-{
-    const long long tc = GetTimeCode();
-
-    if (tc < 0)
-        return tc;
-
-    const SegmentInfo* const pInfo = m_pSegment->GetInfo();
-    assert(pInfo);
-
-    const long long scale = pInfo->GetTimeCodeScale();
-    assert(scale >= 1);
-
-    const long long t = m_timecode * scale;
-
-    return t;
-}
-
-
-long long Cluster::GetFirstTime() const
-{
-    const BlockEntry* pEntry;
-
-    const long status = GetFirst(pEntry);
-
-    if (status < 0)  //error
-        return status;
-
-    if (pEntry == NULL)  //empty cluster
-        return GetTime();
-
-    const Block* const pBlock = pEntry->GetBlock();
-    assert(pBlock);
-
-    return pBlock->GetTime(this);
-}
-
-
-long long Cluster::GetLastTime() const
-{
-    const BlockEntry* pEntry;
-
-    const long status = GetLast(pEntry);
-
-    if (status < 0)  //error
-        return status;
-
-    if (pEntry == NULL)  //empty cluster
-        return GetTime();
-
-    const Block* const pBlock = pEntry->GetBlock();
-    assert(pBlock);
-
-    return pBlock->GetTime(this);
-}
-
-
-long Cluster::CreateBlock(
-    long long id,
-    long long pos,   //absolute pos of payload
-    long long size,
-    long long discard_padding)
-{
-    assert((id == 0x20) || (id == 0x23));  //BlockGroup or SimpleBlock
-
-    if (m_entries_count < 0)  //haven't parsed anything yet
-    {
-        assert(m_entries == NULL);
-        assert(m_entries_size == 0);
-
-        m_entries_size = 1024;
-        m_entries = new BlockEntry*[m_entries_size];
-
-        m_entries_count = 0;
-    }
-    else
-    {
-        assert(m_entries);
-        assert(m_entries_size > 0);
-        assert(m_entries_count <= m_entries_size);
-
-        if (m_entries_count >= m_entries_size)
-        {
-            const long entries_size = 2 * m_entries_size;
-
-            BlockEntry** const entries = new BlockEntry*[entries_size];
-            assert(entries);
-
-            BlockEntry** src = m_entries;
-            BlockEntry** const src_end = src + m_entries_count;
-
-            BlockEntry** dst = entries;
-
-            while (src != src_end)
-                *dst++ = *src++;
-
-            delete[] m_entries;
-
-            m_entries = entries;
-            m_entries_size = entries_size;
-        }
+    if (status < 0) {  // error
+      pFirst = NULL;
+      return status;
     }
 
-    if (id == 0x20)  //BlockGroup ID
-        return CreateBlockGroup(pos, size, discard_padding);
-    else  //SimpleBlock ID
-        return CreateSimpleBlock(pos, size);
+    if (m_entries_count <= 0) {  // empty cluster
+      pFirst = NULL;
+      return 0;
+    }
+  }
+
+  assert(m_entries);
+
+  pFirst = m_entries[0];
+  assert(pFirst);
+
+  return 0;  // success
 }
 
+long Cluster::GetLast(const BlockEntry*& pLast) const {
+  for (;;) {
+    long long pos;
+    long len;
 
-long Cluster::CreateBlockGroup(
-    long long start_offset,
-    long long size,
-    long long discard_padding)
-{
-    assert(m_entries);
-    assert(m_entries_size > 0);
-    assert(m_entries_count >= 0);
-    assert(m_entries_count < m_entries_size);
+    const long status = Parse(pos, len);
 
-    IMkvReader* const pReader = m_pSegment->m_pReader;
-
-    long long pos = start_offset;
-    const long long stop = start_offset + size;
-
-    //For WebM files, there is a bias towards previous reference times
-    //(in order to support alt-ref frames, which refer back to the previous
-    //keyframe).  Normally a 0 value is not possible, but here we tenatively
-    //allow 0 as the value of a reference frame, with the interpretation
-    //that this is a "previous" reference time.
-
-    long long prev = 1;  //nonce
-    long long next = 0;  //nonce
-    long long duration = -1;  //really, this is unsigned
-
-    long long bpos = -1;
-    long long bsize = -1;
-
-    while (pos < stop)
-    {
-        long len;
-        const long long id = ReadUInt(pReader, pos, len);
-        assert(id >= 0);  //TODO
-        assert((pos + len) <= stop);
-
-        pos += len;  //consume ID
-
-        const long long size = ReadUInt(pReader, pos, len);
-        assert(size >= 0);  //TODO
-        assert((pos + len) <= stop);
-
-        pos += len;  //consume size
-
-        if (id == 0x21) //Block ID
-        {
-            if (bpos < 0) //Block ID
-            {
-                bpos = pos;
-                bsize = size;
-            }
-        }
-        else if (id == 0x1B)  //Duration ID
-        {
-            assert(size <= 8);
-
-            duration = UnserializeUInt(pReader, pos, size);
-            assert(duration >= 0);  //TODO
-        }
-        else if (id == 0x7B)  //ReferenceBlock
-        {
-            assert(size <= 8);
-            const long size_ = static_cast<long>(size);
-
-            long long time;
-
-            long status = UnserializeInt(pReader, pos, size_, time);
-            assert(status == 0);
-            if (status != 0)
-                return -1;
-
-            if (time <= 0)  //see note above
-                prev = time;
-            else  //weird
-                next = time;
-        }
-
-        pos += size;  //consume payload
-        assert(pos <= stop);
+    if (status < 0) {  // error
+      pLast = NULL;
+      return status;
     }
 
-    assert(pos == stop);
-    assert(bpos >= 0);
-    assert(bsize >= 0);
+    if (status > 0)  // no new block
+      break;
+  }
 
-    const long idx = m_entries_count;
-
-    BlockEntry** const ppEntry = m_entries + idx;
-    BlockEntry*& pEntry = *ppEntry;
-
-    pEntry = new (std::nothrow) BlockGroup(
-                                  this,
-                                  idx,
-                                  bpos,
-                                  bsize,
-                                  prev,
-                                  next,
-                                  duration,
-                                  discard_padding);
-
-    if (pEntry == NULL)
-        return -1;  //generic error
-
-    BlockGroup* const p = static_cast<BlockGroup*>(pEntry);
-
-    const long status = p->Parse();
-
-    if (status == 0)  //success
-    {
-        ++m_entries_count;
-        return 0;
-    }
-
-    delete pEntry;
-    pEntry = 0;
-
-    return status;
-}
-
-
-
-long Cluster::CreateSimpleBlock(
-    long long st,
-    long long sz)
-{
-    assert(m_entries);
-    assert(m_entries_size > 0);
-    assert(m_entries_count >= 0);
-    assert(m_entries_count < m_entries_size);
-
-    const long idx = m_entries_count;
-
-    BlockEntry** const ppEntry = m_entries + idx;
-    BlockEntry*& pEntry = *ppEntry;
-
-    pEntry = new (std::nothrow) SimpleBlock(this, idx, st, sz);
-
-    if (pEntry == NULL)
-        return -1;  //generic error
-
-    SimpleBlock* const p = static_cast<SimpleBlock*>(pEntry);
-
-    const long status = p->Parse();
-
-    if (status == 0)
-    {
-        ++m_entries_count;
-        return 0;
-    }
-
-    delete pEntry;
-    pEntry = 0;
-
-    return status;
-}
-
-
-long Cluster::GetFirst(const BlockEntry*& pFirst) const
-{
-    if (m_entries_count <= 0)
-    {
-        long long pos;
-        long len;
-
-        const long status = Parse(pos, len);
-
-        if (status < 0)  //error
-        {
-            pFirst = NULL;
-            return status;
-        }
-
-        if (m_entries_count <= 0)  //empty cluster
-        {
-            pFirst = NULL;
-            return 0;
-        }
-    }
-
-    assert(m_entries);
-
-    pFirst = m_entries[0];
-    assert(pFirst);
-
-    return 0;  //success
-}
-
-long Cluster::GetLast(const BlockEntry*& pLast) const
-{
-    for (;;)
-    {
-        long long pos;
-        long len;
-
-        const long status = Parse(pos, len);
-
-        if (status < 0)  //error
-        {
-            pLast = NULL;
-            return status;
-        }
-
-        if (status > 0)  //no new block
-            break;
-    }
-
-    if (m_entries_count <= 0)
-    {
-        pLast = NULL;
-        return 0;
-    }
-
-    assert(m_entries);
-
-    const long idx = m_entries_count - 1;
-
-    pLast = m_entries[idx];
-    assert(pLast);
-
+  if (m_entries_count <= 0) {
+    pLast = NULL;
     return 0;
+  }
+
+  assert(m_entries);
+
+  const long idx = m_entries_count - 1;
+
+  pLast = m_entries[idx];
+  assert(pLast);
+
+  return 0;
 }
 
+long Cluster::GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const {
+  assert(pCurr);
+  assert(m_entries);
+  assert(m_entries_count > 0);
 
-long Cluster::GetNext(
-    const BlockEntry* pCurr,
-    const BlockEntry*& pNext) const
-{
-    assert(pCurr);
+  size_t idx = pCurr->GetIndex();
+  assert(idx < size_t(m_entries_count));
+  assert(m_entries[idx] == pCurr);
+
+  ++idx;
+
+  if (idx >= size_t(m_entries_count)) {
+    long long pos;
+    long len;
+
+    const long status = Parse(pos, len);
+
+    if (status < 0) {  // error
+      pNext = NULL;
+      return status;
+    }
+
+    if (status > 0) {
+      pNext = NULL;
+      return 0;
+    }
+
     assert(m_entries);
     assert(m_entries_count > 0);
-
-    size_t idx = pCurr->GetIndex();
     assert(idx < size_t(m_entries_count));
-    assert(m_entries[idx] == pCurr);
+  }
 
-    ++idx;
+  pNext = m_entries[idx];
+  assert(pNext);
 
-    if (idx >= size_t(m_entries_count))
-    {
-        long long pos;
-        long len;
-
-        const long status = Parse(pos, len);
-
-        if (status < 0)  //error
-        {
-            pNext = NULL;
-            return status;
-        }
-
-        if (status > 0)
-        {
-            pNext = NULL;
-            return 0;
-        }
-
-        assert(m_entries);
-        assert(m_entries_count > 0);
-        assert(idx < size_t(m_entries_count));
-    }
-
-    pNext = m_entries[idx];
-    assert(pNext);
-
-    return 0;
+  return 0;
 }
 
+long Cluster::GetEntryCount() const { return m_entries_count; }
 
-long Cluster::GetEntryCount() const
-{
-    return m_entries_count;
-}
+const BlockEntry* Cluster::GetEntry(const Track* pTrack,
+                                    long long time_ns) const {
+  assert(pTrack);
 
-
-const BlockEntry* Cluster::GetEntry(
-    const Track* pTrack,
-    long long time_ns) const
-{
-    assert(pTrack);
-
-    if (m_pSegment == NULL)  //this is the special EOS cluster
-        return pTrack->GetEOS();
+  if (m_pSegment == NULL)  // this is the special EOS cluster
+    return pTrack->GetEOS();
 
 #if 0
 
@@ -8711,76 +7590,66 @@
 
 #else
 
-    const BlockEntry* pResult = pTrack->GetEOS();
+  const BlockEntry* pResult = pTrack->GetEOS();
 
-    long index = 0;
+  long index = 0;
 
-    for (;;)
-    {
-        if (index >= m_entries_count)
-        {
-            long long pos;
-            long len;
+  for (;;) {
+    if (index >= m_entries_count) {
+      long long pos;
+      long len;
 
-            const long status = Parse(pos, len);
-            assert(status >= 0);
+      const long status = Parse(pos, len);
+      assert(status >= 0);
 
-            if (status > 0)  //completely parsed, and no more entries
-                return pResult;
+      if (status > 0)  // completely parsed, and no more entries
+        return pResult;
 
-            if (status < 0)  //should never happen
-                return 0;
+      if (status < 0)  // should never happen
+        return 0;
 
-            assert(m_entries);
-            assert(index < m_entries_count);
-        }
-
-        const BlockEntry* const pEntry = m_entries[index];
-        assert(pEntry);
-        assert(!pEntry->EOS());
-
-        const Block* const pBlock = pEntry->GetBlock();
-        assert(pBlock);
-
-        if (pBlock->GetTrackNumber() != pTrack->GetNumber())
-        {
-            ++index;
-            continue;
-        }
-
-        if (pTrack->VetEntry(pEntry))
-        {
-            if (time_ns < 0)  //just want first candidate block
-                return pEntry;
-
-            const long long ns = pBlock->GetTime(this);
-
-            if (ns > time_ns)
-                return pResult;
-
-            pResult = pEntry;  //have a candidate
-        }
-        else if (time_ns >= 0)
-        {
-            const long long ns = pBlock->GetTime(this);
-
-            if (ns > time_ns)
-                return pResult;
-        }
-
-        ++index;
+      assert(m_entries);
+      assert(index < m_entries_count);
     }
 
+    const BlockEntry* const pEntry = m_entries[index];
+    assert(pEntry);
+    assert(!pEntry->EOS());
+
+    const Block* const pBlock = pEntry->GetBlock();
+    assert(pBlock);
+
+    if (pBlock->GetTrackNumber() != pTrack->GetNumber()) {
+      ++index;
+      continue;
+    }
+
+    if (pTrack->VetEntry(pEntry)) {
+      if (time_ns < 0)  // just want first candidate block
+        return pEntry;
+
+      const long long ns = pBlock->GetTime(this);
+
+      if (ns > time_ns)
+        return pResult;
+
+      pResult = pEntry;  // have a candidate
+    } else if (time_ns >= 0) {
+      const long long ns = pBlock->GetTime(this);
+
+      if (ns > time_ns)
+        return pResult;
+    }
+
+    ++index;
+  }
+
 #endif
 }
 
-
-const BlockEntry*
-Cluster::GetEntry(
-    const CuePoint& cp,
-    const CuePoint::TrackPosition& tp) const
-{
-    assert(m_pSegment);
+const BlockEntry* Cluster::GetEntry(const CuePoint& cp,
+                                    const CuePoint::TrackPosition& tp) const {
+  assert(m_pSegment);
 
 #if 0
 
@@ -8871,114 +7740,105 @@
 
 #else
 
-    const long long tc = cp.GetTimeCode();
+  const long long tc = cp.GetTimeCode();
 
-    if (tp.m_block > 0)
-    {
-        const long block = static_cast<long>(tp.m_block);
-        const long index = block - 1;
+  if (tp.m_block > 0) {
+    const long block = static_cast<long>(tp.m_block);
+    const long index = block - 1;
 
-        while (index >= m_entries_count)
-        {
-            long long pos;
-            long len;
+    while (index >= m_entries_count) {
+      long long pos;
+      long len;
 
-            const long status = Parse(pos, len);
+      const long status = Parse(pos, len);
 
-            if (status < 0)  //TODO: can this happen?
-                return NULL;
+      if (status < 0)  // TODO: can this happen?
+        return NULL;
 
-            if (status > 0)  //nothing remains to be parsed
-                return NULL;
-        }
-
-        const BlockEntry* const pEntry = m_entries[index];
-        assert(pEntry);
-        assert(!pEntry->EOS());
-
-        const Block* const pBlock = pEntry->GetBlock();
-        assert(pBlock);
-
-        if ((pBlock->GetTrackNumber() == tp.m_track) &&
-            (pBlock->GetTimeCode(this) == tc))
-        {
-            return pEntry;
-        }
+      if (status > 0)  // nothing remains to be parsed
+        return NULL;
     }
 
-    long index = 0;
+    const BlockEntry* const pEntry = m_entries[index];
+    assert(pEntry);
+    assert(!pEntry->EOS());
 
-    for (;;)
-    {
-        if (index >= m_entries_count)
-        {
-            long long pos;
-            long len;
+    const Block* const pBlock = pEntry->GetBlock();
+    assert(pBlock);
 
-            const long status = Parse(pos, len);
-
-            if (status < 0)  //TODO: can this happen?
-                return NULL;
-
-            if (status > 0)  //nothing remains to be parsed
-                return NULL;
-
-            assert(m_entries);
-            assert(index < m_entries_count);
-        }
-
-        const BlockEntry* const pEntry = m_entries[index];
-        assert(pEntry);
-        assert(!pEntry->EOS());
-
-        const Block* const pBlock = pEntry->GetBlock();
-        assert(pBlock);
-
-        if (pBlock->GetTrackNumber() != tp.m_track)
-        {
-            ++index;
-            continue;
-        }
-
-        const long long tc_ = pBlock->GetTimeCode(this);
-
-        if (tc_ < tc)
-        {
-            ++index;
-            continue;
-        }
-
-        if (tc_ > tc)
-            return NULL;
-
-        const Tracks* const pTracks = m_pSegment->GetTracks();
-        assert(pTracks);
-
-        const long tn = static_cast<long>(tp.m_track);
-        const Track* const pTrack = pTracks->GetTrackByNumber(tn);
-
-        if (pTrack == NULL)
-            return NULL;
-
-        const long long type = pTrack->GetType();
-
-        if (type == 2)  //audio
-            return pEntry;
-
-        if (type != 1)  //not video
-            return NULL;
-
-        if (!pBlock->IsKey())
-            return NULL;
-
-        return pEntry;
+    if ((pBlock->GetTrackNumber() == tp.m_track) &&
+        (pBlock->GetTimeCode(this) == tc)) {
+      return pEntry;
     }
+  }
+
+  long index = 0;
+
+  for (;;) {
+    if (index >= m_entries_count) {
+      long long pos;
+      long len;
+
+      const long status = Parse(pos, len);
+
+      if (status < 0)  // TODO: can this happen?
+        return NULL;
+
+      if (status > 0)  // nothing remains to be parsed
+        return NULL;
+
+      assert(m_entries);
+      assert(index < m_entries_count);
+    }
+
+    const BlockEntry* const pEntry = m_entries[index];
+    assert(pEntry);
+    assert(!pEntry->EOS());
+
+    const Block* const pBlock = pEntry->GetBlock();
+    assert(pBlock);
+
+    if (pBlock->GetTrackNumber() != tp.m_track) {
+      ++index;
+      continue;
+    }
+
+    const long long tc_ = pBlock->GetTimeCode(this);
+
+    if (tc_ < tc) {
+      ++index;
+      continue;
+    }
+
+    if (tc_ > tc)
+      return NULL;
+
+    const Tracks* const pTracks = m_pSegment->GetTracks();
+    assert(pTracks);
+
+    const long tn = static_cast<long>(tp.m_track);
+    const Track* const pTrack = pTracks->GetTrackByNumber(tn);
+
+    if (pTrack == NULL)
+      return NULL;
+
+    const long long type = pTrack->GetType();
+
+    if (type == 2)  // audio
+      return pEntry;
+
+    if (type != 1)  // not video
+      return NULL;
+
+    if (!pBlock->IsKey())
+      return NULL;
+
+    return pEntry;
+  }
 
 #endif
-
 }
 
-
 #if 0
 const BlockEntry* Cluster::GetMaxKey(const VideoTrack* pTrack) const
 {
@@ -9015,97 +7875,46 @@
 }
 #endif
 
+BlockEntry::BlockEntry(Cluster* p, long idx) : m_pCluster(p), m_index(idx) {}
 
-BlockEntry::BlockEntry(Cluster* p, long idx) :
-    m_pCluster(p),
-    m_index(idx)
-{
+BlockEntry::~BlockEntry() {}
+
+bool BlockEntry::EOS() const { return (GetKind() == kBlockEOS); }
+
+const Cluster* BlockEntry::GetCluster() const { return m_pCluster; }
+
+long BlockEntry::GetIndex() const { return m_index; }
+
+SimpleBlock::SimpleBlock(Cluster* pCluster, long idx, long long start,
+                         long long size)
+    : BlockEntry(pCluster, idx), m_block(start, size, 0) {}
+
+long SimpleBlock::Parse() { return m_block.Parse(m_pCluster); }
+
+BlockEntry::Kind SimpleBlock::GetKind() const { return kBlockSimple; }
+
+const Block* SimpleBlock::GetBlock() const { return &m_block; }
+
+BlockGroup::BlockGroup(Cluster* pCluster, long idx, long long block_start,
+                       long long block_size, long long prev, long long next,
+                       long long duration, long long discard_padding)
+    : BlockEntry(pCluster, idx),
+      m_block(block_start, block_size, discard_padding),
+      m_prev(prev),
+      m_next(next),
+      m_duration(duration) {}
+
+long BlockGroup::Parse() {
+  const long status = m_block.Parse(m_pCluster);
+
+  if (status)
+    return status;
+
+  m_block.SetKey((m_prev > 0) && (m_next <= 0));
+
+  return 0;
 }
 
-
-BlockEntry::~BlockEntry()
-{
-}
-
-
-bool BlockEntry::EOS() const
-{
-    return (GetKind() == kBlockEOS);
-}
-
-
-const Cluster* BlockEntry::GetCluster() const
-{
-    return m_pCluster;
-}
-
-
-long BlockEntry::GetIndex() const
-{
-    return m_index;
-}
-
-
-SimpleBlock::SimpleBlock(
-    Cluster* pCluster,
-    long idx,
-    long long start,
-    long long size) :
-    BlockEntry(pCluster, idx),
-    m_block(start, size, 0)
-{
-}
-
-
-long SimpleBlock::Parse()
-{
-    return m_block.Parse(m_pCluster);
-}
-
-
-BlockEntry::Kind SimpleBlock::GetKind() const
-{
-    return kBlockSimple;
-}
-
-
-const Block* SimpleBlock::GetBlock() const
-{
-    return &m_block;
-}
-
-
-BlockGroup::BlockGroup(
-    Cluster* pCluster,
-    long idx,
-    long long block_start,
-    long long block_size,
-    long long prev,
-    long long next,
-    long long duration,
-    long long discard_padding) :
-    BlockEntry(pCluster, idx),
-    m_block(block_start, block_size, discard_padding),
-    m_prev(prev),
-    m_next(next),
-    m_duration(duration)
-{
-}
-
-
-long BlockGroup::Parse()
-{
-    const long status = m_block.Parse(m_pCluster);
-
-    if (status)
-        return status;
-
-    m_block.SetKey((m_prev > 0) && (m_next <= 0));
-
-    return 0;
-}
-
-
 #if 0
 void BlockGroup::ParseBlock(long long start, long long size)
 {
@@ -9122,496 +7931,428 @@
 }
 #endif
 
+BlockEntry::Kind BlockGroup::GetKind() const { return kBlockGroup; }
 
-BlockEntry::Kind BlockGroup::GetKind() const
-{
-    return kBlockGroup;
-}
+const Block* BlockGroup::GetBlock() const { return &m_block; }
 
+long long BlockGroup::GetPrevTimeCode() const { return m_prev; }
 
-const Block* BlockGroup::GetBlock() const
-{
-    return &m_block;
-}
+long long BlockGroup::GetNextTimeCode() const { return m_next; }
 
+long long BlockGroup::GetDurationTimeCode() const { return m_duration; }
 
-long long BlockGroup::GetPrevTimeCode() const
-{
-    return m_prev;
-}
+Block::Block(long long start, long long size_, long long discard_padding)
+    : m_start(start),
+      m_size(size_),
+      m_track(0),
+      m_timecode(-1),
+      m_flags(0),
+      m_frames(NULL),
+      m_frame_count(-1),
+      m_discard_padding(discard_padding) {}
 
+Block::~Block() { delete[] m_frames; }
 
-long long BlockGroup::GetNextTimeCode() const
-{
-    return m_next;
-}
+long Block::Parse(const Cluster* pCluster) {
+  if (pCluster == NULL)
+    return -1;
 
-long long BlockGroup::GetDurationTimeCode() const
-{
-    return m_duration;
-}
+  if (pCluster->m_pSegment == NULL)
+    return -1;
 
-Block::Block(long long start, long long size_, long long discard_padding) :
-    m_start(start),
-    m_size(size_),
-    m_track(0),
-    m_timecode(-1),
-    m_flags(0),
-    m_frames(NULL),
-    m_frame_count(-1),
-    m_discard_padding(discard_padding)
-{
-}
+  assert(m_start >= 0);
+  assert(m_size >= 0);
+  assert(m_track <= 0);
+  assert(m_frames == NULL);
+  assert(m_frame_count <= 0);
 
+  long long pos = m_start;
+  const long long stop = m_start + m_size;
 
-Block::~Block()
-{
-    delete[] m_frames;
-}
+  long len;
 
+  IMkvReader* const pReader = pCluster->m_pSegment->m_pReader;
 
-long Block::Parse(const Cluster* pCluster)
-{
-    if (pCluster == NULL)
-        return -1;
+  m_track = ReadUInt(pReader, pos, len);
 
-    if (pCluster->m_pSegment == NULL)
-        return -1;
+  if (m_track <= 0)
+    return E_FILE_FORMAT_INVALID;
 
-    assert(m_start >= 0);
-    assert(m_size >= 0);
-    assert(m_track <= 0);
-    assert(m_frames == NULL);
-    assert(m_frame_count <= 0);
+  if ((pos + len) > stop)
+    return E_FILE_FORMAT_INVALID;
 
-    long long pos = m_start;
-    const long long stop = m_start + m_size;
+  pos += len;  // consume track number
 
-    long len;
+  if ((stop - pos) < 2)
+    return E_FILE_FORMAT_INVALID;
 
-    IMkvReader* const pReader = pCluster->m_pSegment->m_pReader;
+  long status;
+  long long value;
 
-    m_track = ReadUInt(pReader, pos, len);
+  status = UnserializeInt(pReader, pos, 2, value);
 
-    if (m_track <= 0)
-        return E_FILE_FORMAT_INVALID;
+  if (status)
+    return E_FILE_FORMAT_INVALID;
 
-    if ((pos + len) > stop)
-        return E_FILE_FORMAT_INVALID;
+  if (value < SHRT_MIN)
+    return E_FILE_FORMAT_INVALID;
 
-    pos += len;  //consume track number
+  if (value > SHRT_MAX)
+    return E_FILE_FORMAT_INVALID;
 
-    if ((stop - pos) < 2)
-        return E_FILE_FORMAT_INVALID;
+  m_timecode = static_cast<short>(value);
 
-    long status;
-    long long value;
+  pos += 2;
 
-    status = UnserializeInt(pReader, pos, 2, value);
+  if ((stop - pos) <= 0)
+    return E_FILE_FORMAT_INVALID;
 
-    if (status)
-        return E_FILE_FORMAT_INVALID;
+  status = pReader->Read(pos, 1, &m_flags);
 
-    if (value < SHRT_MIN)
-        return E_FILE_FORMAT_INVALID;
+  if (status)
+    return E_FILE_FORMAT_INVALID;
 
-    if (value > SHRT_MAX)
-        return E_FILE_FORMAT_INVALID;
+  const int lacing = int(m_flags & 0x06) >> 1;
 
-    m_timecode = static_cast<short>(value);
+  ++pos;  // consume flags byte
 
-    pos += 2;
+  if (lacing == 0) {  // no lacing
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
 
-    if ((stop - pos) <= 0)
-        return E_FILE_FORMAT_INVALID;
-
-    status = pReader->Read(pos, 1, &m_flags);
-
-    if (status)
-        return E_FILE_FORMAT_INVALID;
-
-    const int lacing = int(m_flags & 0x06) >> 1;
-
-    ++pos;  //consume flags byte
-
-    if (lacing == 0)  //no lacing
-    {
-        if (pos > stop)
-            return E_FILE_FORMAT_INVALID;
-
-        m_frame_count = 1;
-        m_frames = new Frame[m_frame_count];
-
-        Frame& f = m_frames[0];
-        f.pos = pos;
-
-        const long long frame_size = stop - pos;
-
-        if (frame_size > LONG_MAX)
-            return E_FILE_FORMAT_INVALID;
-
-        f.len = static_cast<long>(frame_size);
-
-        return 0;  //success
-    }
-
-    if (pos >= stop)
-        return E_FILE_FORMAT_INVALID;
-
-    unsigned char biased_count;
-
-    status = pReader->Read(pos, 1, &biased_count);
-
-    if (status)
-        return E_FILE_FORMAT_INVALID;
-
-    ++pos;  //consume frame count
-    assert(pos <= stop);
-
-    m_frame_count = int(biased_count) + 1;
-
+    m_frame_count = 1;
     m_frames = new Frame[m_frame_count];
-    assert(m_frames);
 
-    if (lacing == 1)  //Xiph
-    {
-        Frame* pf = m_frames;
-        Frame* const pf_end = pf + m_frame_count;
+    Frame& f = m_frames[0];
+    f.pos = pos;
 
-        long size = 0;
-        int frame_count = m_frame_count;
+    const long long frame_size = stop - pos;
 
-        while (frame_count > 1)
-        {
-            long frame_size = 0;
+    if (frame_size > LONG_MAX)
+      return E_FILE_FORMAT_INVALID;
 
-            for (;;)
-            {
-                unsigned char val;
+    f.len = static_cast<long>(frame_size);
 
-                if (pos >= stop)
-                    return E_FILE_FORMAT_INVALID;
+    return 0;  // success
+  }
 
-                status = pReader->Read(pos, 1, &val);
+  if (pos >= stop)
+    return E_FILE_FORMAT_INVALID;
 
-                if (status)
-                    return E_FILE_FORMAT_INVALID;
+  unsigned char biased_count;
 
-                ++pos;  //consume xiph size byte
+  status = pReader->Read(pos, 1, &biased_count);
 
-                frame_size += val;
+  if (status)
+    return E_FILE_FORMAT_INVALID;
 
-                if (val < 255)
-                    break;
-            }
+  ++pos;  // consume frame count
+  assert(pos <= stop);
 
-            Frame& f = *pf++;
-            assert(pf < pf_end);
+  m_frame_count = int(biased_count) + 1;
 
-            f.pos = 0;  //patch later
+  m_frames = new Frame[m_frame_count];
+  assert(m_frames);
 
-            f.len = frame_size;
-            size += frame_size;  //contribution of this frame
+  if (lacing == 1) {  // Xiph
+    Frame* pf = m_frames;
+    Frame* const pf_end = pf + m_frame_count;
 
-            --frame_count;
-        }
+    long size = 0;
+    int frame_count = m_frame_count;
 
-        assert(pf < pf_end);
-        assert(pos <= stop);
+    while (frame_count > 1) {
+      long frame_size = 0;
 
-        {
-            Frame& f = *pf++;
-
-            if (pf != pf_end)
-                return E_FILE_FORMAT_INVALID;
-
-            f.pos = 0;  //patch later
-
-            const long long total_size = stop - pos;
-
-            if (total_size < size)
-                return E_FILE_FORMAT_INVALID;
-
-            const long long frame_size = total_size - size;
-
-            if (frame_size > LONG_MAX)
-                return E_FILE_FORMAT_INVALID;
-
-            f.len = static_cast<long>(frame_size);
-        }
-
-        pf = m_frames;
-        while (pf != pf_end)
-        {
-            Frame& f = *pf++;
-            assert((pos + f.len) <= stop);
-
-            f.pos = pos;
-            pos += f.len;
-        }
-
-        assert(pos == stop);
-    }
-    else if (lacing == 2)  //fixed-size lacing
-    {
-        const long long total_size = stop - pos;
-
-        if ((total_size % m_frame_count) != 0)
-            return E_FILE_FORMAT_INVALID;
-
-        const long long frame_size = total_size / m_frame_count;
-
-        if (frame_size > LONG_MAX)
-            return E_FILE_FORMAT_INVALID;
-
-        Frame* pf = m_frames;
-        Frame* const pf_end = pf + m_frame_count;
-
-        while (pf != pf_end)
-        {
-            assert((pos + frame_size) <= stop);
-
-            Frame& f = *pf++;
-
-            f.pos = pos;
-            f.len = static_cast<long>(frame_size);
-
-            pos += frame_size;
-        }
-
-        assert(pos == stop);
-    }
-    else
-    {
-        assert(lacing == 3);  //EBML lacing
+      for (;;) {
+        unsigned char val;
 
         if (pos >= stop)
-            return E_FILE_FORMAT_INVALID;
+          return E_FILE_FORMAT_INVALID;
 
-        long size = 0;
-        int frame_count = m_frame_count;
+        status = pReader->Read(pos, 1, &val);
 
-        long long frame_size = ReadUInt(pReader, pos, len);
+        if (status)
+          return E_FILE_FORMAT_INVALID;
 
-        if (frame_size < 0)
-            return E_FILE_FORMAT_INVALID;
+        ++pos;  // consume xiph size byte
 
-        if (frame_size > LONG_MAX)
-            return E_FILE_FORMAT_INVALID;
+        frame_size += val;
 
-        if ((pos + len) > stop)
-            return E_FILE_FORMAT_INVALID;
+        if (val < 255)
+          break;
+      }
 
-        pos += len; //consume length of size of first frame
+      Frame& f = *pf++;
+      assert(pf < pf_end);
 
-        if ((pos + frame_size) > stop)
-            return E_FILE_FORMAT_INVALID;
+      f.pos = 0;  // patch later
 
-        Frame* pf = m_frames;
-        Frame* const pf_end = pf + m_frame_count;
+      f.len = frame_size;
+      size += frame_size;  // contribution of this frame
 
-        {
-            Frame& curr = *pf;
-
-            curr.pos = 0;  //patch later
-
-            curr.len = static_cast<long>(frame_size);
-            size += curr.len;  //contribution of this frame
-        }
-
-        --frame_count;
-
-        while (frame_count > 1)
-        {
-            if (pos >= stop)
-                return E_FILE_FORMAT_INVALID;
-
-            assert(pf < pf_end);
-
-            const Frame& prev = *pf++;
-            assert(prev.len == frame_size);
-            if (prev.len != frame_size)
-                return E_FILE_FORMAT_INVALID;
-
-            assert(pf < pf_end);
-
-            Frame& curr = *pf;
-
-            curr.pos = 0;  //patch later
-
-            const long long delta_size_ = ReadUInt(pReader, pos, len);
-
-            if (delta_size_ < 0)
-                return E_FILE_FORMAT_INVALID;
-
-            if ((pos + len) > stop)
-                return E_FILE_FORMAT_INVALID;
-
-            pos += len;  //consume length of (delta) size
-            assert(pos <= stop);
-
-            const int exp = 7*len - 1;
-            const long long bias = (1LL << exp) - 1LL;
-            const long long delta_size = delta_size_ - bias;
-
-            frame_size += delta_size;
-
-            if (frame_size < 0)
-                return E_FILE_FORMAT_INVALID;
-
-            if (frame_size > LONG_MAX)
-                return E_FILE_FORMAT_INVALID;
-
-            curr.len = static_cast<long>(frame_size);
-            size += curr.len;  //contribution of this frame
-
-            --frame_count;
-        }
-
-        {
-            assert(pos <= stop);
-            assert(pf < pf_end);
-
-            const Frame& prev = *pf++;
-            assert(prev.len == frame_size);
-            if (prev.len != frame_size)
-                return E_FILE_FORMAT_INVALID;
-
-            assert(pf < pf_end);
-
-            Frame& curr = *pf++;
-            assert(pf == pf_end);
-
-            curr.pos = 0;  //patch later
-
-            const long long total_size = stop - pos;
-
-            if (total_size < size)
-                return E_FILE_FORMAT_INVALID;
-
-            frame_size = total_size - size;
-
-            if (frame_size > LONG_MAX)
-                return E_FILE_FORMAT_INVALID;
-
-            curr.len = static_cast<long>(frame_size);
-        }
-
-        pf = m_frames;
-        while (pf != pf_end)
-        {
-            Frame& f = *pf++;
-            assert((pos + f.len) <= stop);
-
-            f.pos = pos;
-            pos += f.len;
-        }
-
-        assert(pos == stop);
+      --frame_count;
     }
 
-    return 0;  //success
+    assert(pf < pf_end);
+    assert(pos <= stop);
+
+    {
+      Frame& f = *pf++;
+
+      if (pf != pf_end)
+        return E_FILE_FORMAT_INVALID;
+
+      f.pos = 0;  // patch later
+
+      const long long total_size = stop - pos;
+
+      if (total_size < size)
+        return E_FILE_FORMAT_INVALID;
+
+      const long long frame_size = total_size - size;
+
+      if (frame_size > LONG_MAX)
+        return E_FILE_FORMAT_INVALID;
+
+      f.len = static_cast<long>(frame_size);
+    }
+
+    pf = m_frames;
+    while (pf != pf_end) {
+      Frame& f = *pf++;
+      assert((pos + f.len) <= stop);
+
+      f.pos = pos;
+      pos += f.len;
+    }
+
+    assert(pos == stop);
+  } else if (lacing == 2) {  // fixed-size lacing
+    const long long total_size = stop - pos;
+
+    if ((total_size % m_frame_count) != 0)
+      return E_FILE_FORMAT_INVALID;
+
+    const long long frame_size = total_size / m_frame_count;
+
+    if (frame_size > LONG_MAX)
+      return E_FILE_FORMAT_INVALID;
+
+    Frame* pf = m_frames;
+    Frame* const pf_end = pf + m_frame_count;
+
+    while (pf != pf_end) {
+      assert((pos + frame_size) <= stop);
+
+      Frame& f = *pf++;
+
+      f.pos = pos;
+      f.len = static_cast<long>(frame_size);
+
+      pos += frame_size;
+    }
+
+    assert(pos == stop);
+  } else {
+    assert(lacing == 3);  // EBML lacing
+
+    if (pos >= stop)
+      return E_FILE_FORMAT_INVALID;
+
+    long size = 0;
+    int frame_count = m_frame_count;
+
+    long long frame_size = ReadUInt(pReader, pos, len);
+
+    if (frame_size < 0)
+      return E_FILE_FORMAT_INVALID;
+
+    if (frame_size > LONG_MAX)
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > stop)
+      return E_FILE_FORMAT_INVALID;
+
+    pos += len;  // consume length of size of first frame
+
+    if ((pos + frame_size) > stop)
+      return E_FILE_FORMAT_INVALID;
+
+    Frame* pf = m_frames;
+    Frame* const pf_end = pf + m_frame_count;
+
+    {
+      Frame& curr = *pf;
+
+      curr.pos = 0;  // patch later
+
+      curr.len = static_cast<long>(frame_size);
+      size += curr.len;  // contribution of this frame
+    }
+
+    --frame_count;
+
+    while (frame_count > 1) {
+      if (pos >= stop)
+        return E_FILE_FORMAT_INVALID;
+
+      assert(pf < pf_end);
+
+      const Frame& prev = *pf++;
+      assert(prev.len == frame_size);
+      if (prev.len != frame_size)
+        return E_FILE_FORMAT_INVALID;
+
+      assert(pf < pf_end);
+
+      Frame& curr = *pf;
+
+      curr.pos = 0;  // patch later
+
+      const long long delta_size_ = ReadUInt(pReader, pos, len);
+
+      if (delta_size_ < 0)
+        return E_FILE_FORMAT_INVALID;
+
+      if ((pos + len) > stop)
+        return E_FILE_FORMAT_INVALID;
+
+      pos += len;  // consume length of (delta) size
+      assert(pos <= stop);
+
+      const int exp = 7 * len - 1;
+      const long long bias = (1LL << exp) - 1LL;
+      const long long delta_size = delta_size_ - bias;
+
+      frame_size += delta_size;
+
+      if (frame_size < 0)
+        return E_FILE_FORMAT_INVALID;
+
+      if (frame_size > LONG_MAX)
+        return E_FILE_FORMAT_INVALID;
+
+      curr.len = static_cast<long>(frame_size);
+      size += curr.len;  // contribution of this frame
+
+      --frame_count;
+    }
+
+    {
+      assert(pos <= stop);
+      assert(pf < pf_end);
+
+      const Frame& prev = *pf++;
+      assert(prev.len == frame_size);
+      if (prev.len != frame_size)
+        return E_FILE_FORMAT_INVALID;
+
+      assert(pf < pf_end);
+
+      Frame& curr = *pf++;
+      assert(pf == pf_end);
+
+      curr.pos = 0;  // patch later
+
+      const long long total_size = stop - pos;
+
+      if (total_size < size)
+        return E_FILE_FORMAT_INVALID;
+
+      frame_size = total_size - size;
+
+      if (frame_size > LONG_MAX)
+        return E_FILE_FORMAT_INVALID;
+
+      curr.len = static_cast<long>(frame_size);
+    }
+
+    pf = m_frames;
+    while (pf != pf_end) {
+      Frame& f = *pf++;
+      assert((pos + f.len) <= stop);
+
+      f.pos = pos;
+      pos += f.len;
+    }
+
+    assert(pos == stop);
+  }
+
+  return 0;  // success
 }
 
+long long Block::GetTimeCode(const Cluster* pCluster) const {
+  if (pCluster == 0)
+    return m_timecode;
 
-long long Block::GetTimeCode(const Cluster* pCluster) const
-{
-    if (pCluster == 0)
-        return m_timecode;
+  const long long tc0 = pCluster->GetTimeCode();
+  assert(tc0 >= 0);
 
-    const long long tc0 = pCluster->GetTimeCode();
-    assert(tc0 >= 0);
+  const long long tc = tc0 + m_timecode;
 
-    const long long tc = tc0 + m_timecode;
-
-    return tc;  //unscaled timecode units
+  return tc;  // unscaled timecode units
 }
 
+long long Block::GetTime(const Cluster* pCluster) const {
+  assert(pCluster);
 
-long long Block::GetTime(const Cluster* pCluster) const
-{
-    assert(pCluster);
+  const long long tc = GetTimeCode(pCluster);
 
-    const long long tc = GetTimeCode(pCluster);
+  const Segment* const pSegment = pCluster->m_pSegment;
+  const SegmentInfo* const pInfo = pSegment->GetInfo();
+  assert(pInfo);
 
-    const Segment* const pSegment = pCluster->m_pSegment;
-    const SegmentInfo* const pInfo = pSegment->GetInfo();
-    assert(pInfo);
+  const long long scale = pInfo->GetTimeCodeScale();
+  assert(scale >= 1);
 
-    const long long scale = pInfo->GetTimeCodeScale();
-    assert(scale >= 1);
+  const long long ns = tc * scale;
 
-    const long long ns = tc * scale;
-
-    return ns;
+  return ns;
 }
 
+long long Block::GetTrackNumber() const { return m_track; }
 
-long long Block::GetTrackNumber() const
-{
-    return m_track;
+bool Block::IsKey() const {
+  return ((m_flags & static_cast<unsigned char>(1 << 7)) != 0);
 }
 
-
-bool Block::IsKey() const
-{
-    return ((m_flags & static_cast<unsigned char>(1 << 7)) != 0);
+void Block::SetKey(bool bKey) {
+  if (bKey)
+    m_flags |= static_cast<unsigned char>(1 << 7);
+  else
+    m_flags &= 0x7F;
 }
 
+bool Block::IsInvisible() const { return bool(int(m_flags & 0x08) != 0); }
 
-void Block::SetKey(bool bKey)
-{
-    if (bKey)
-        m_flags |= static_cast<unsigned char>(1 << 7);
-    else
-        m_flags &= 0x7F;
+Block::Lacing Block::GetLacing() const {
+  const int value = int(m_flags & 0x06) >> 1;
+  return static_cast<Lacing>(value);
 }
 
+int Block::GetFrameCount() const { return m_frame_count; }
 
-bool Block::IsInvisible() const
-{
-    return bool(int(m_flags & 0x08) != 0);
+const Block::Frame& Block::GetFrame(int idx) const {
+  assert(idx >= 0);
+  assert(idx < m_frame_count);
+
+  const Frame& f = m_frames[idx];
+  assert(f.pos > 0);
+  assert(f.len > 0);
+
+  return f;
 }
 
+long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const {
+  assert(pReader);
+  assert(buf);
 
-Block::Lacing Block::GetLacing() const
-{
-    const int value = int(m_flags & 0x06) >> 1;
-    return static_cast<Lacing>(value);
+  const long status = pReader->Read(pos, len, buf);
+  return status;
 }
 
+long long Block::GetDiscardPadding() const { return m_discard_padding; }
 
-int Block::GetFrameCount() const
-{
-    return m_frame_count;
-}
-
-
-const Block::Frame& Block::GetFrame(int idx) const
-{
-    assert(idx >= 0);
-    assert(idx < m_frame_count);
-
-    const Frame& f = m_frames[idx];
-    assert(f.pos > 0);
-    assert(f.len > 0);
-
-    return f;
-}
-
-
-long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const
-{
-    assert(pReader);
-    assert(buf);
-
-    const long status = pReader->Read(pos, len, buf);
-    return status;
-}
-
-long long Block::GetDiscardPadding() const
-{
-    return m_discard_padding;
-}
-
-}  //end namespace mkvparser
+}  // end namespace mkvparser
diff --git a/third_party/libwebm/mkvparser.hpp b/third_party/libwebm/mkvparser.hpp
index 7184d26..3e17d07 100644
--- a/third_party/libwebm/mkvparser.hpp
+++ b/third_party/libwebm/mkvparser.hpp
@@ -13,19 +13,18 @@
 #include <cstdio>
 #include <cstddef>
 
-namespace mkvparser
-{
+namespace mkvparser {
 
 const int E_FILE_FORMAT_INVALID = -2;
 const int E_BUFFER_NOT_FULL = -3;
 
-class IMkvReader
-{
-public:
-    virtual int Read(long long pos, long len, unsigned char* buf) = 0;
-    virtual int Length(long long* total, long long* available) = 0;
-protected:
-    virtual ~IMkvReader();
+class IMkvReader {
+ public:
+  virtual int Read(long long pos, long len, unsigned char* buf) = 0;
+  virtual int Length(long long* total, long long* available) = 0;
+
+ protected:
+  virtual ~IMkvReader();
 };
 
 long long GetUIntLength(IMkvReader*, long long, long&);
@@ -35,170 +34,148 @@
 long UnserializeFloat(IMkvReader*, long long pos, long long size, double&);
 long UnserializeInt(IMkvReader*, long long pos, long len, long long& result);
 
-long UnserializeString(
-        IMkvReader*,
-        long long pos,
-        long long size,
-        char*& str);
+long UnserializeString(IMkvReader*, long long pos, long long size, char*& str);
 
-long ParseElementHeader(
-    IMkvReader* pReader,
-    long long& pos,  //consume id and size fields
-    long long stop,  //if you know size of element's parent
-    long long& id,
-    long long& size);
+long ParseElementHeader(IMkvReader* pReader,
+                        long long& pos,  // consume id and size fields
+                        long long stop,  // if you know size of element's parent
+                        long long& id, long long& size);
 
 bool Match(IMkvReader*, long long&, unsigned long, long long&);
 bool Match(IMkvReader*, long long&, unsigned long, unsigned char*&, size_t&);
 
 void GetVersion(int& major, int& minor, int& build, int& revision);
 
-struct EBMLHeader
-{
-    EBMLHeader();
-    ~EBMLHeader();
-    long long m_version;
-    long long m_readVersion;
-    long long m_maxIdLength;
-    long long m_maxSizeLength;
-    char* m_docType;
-    long long m_docTypeVersion;
-    long long m_docTypeReadVersion;
+struct EBMLHeader {
+  EBMLHeader();
+  ~EBMLHeader();
+  long long m_version;
+  long long m_readVersion;
+  long long m_maxIdLength;
+  long long m_maxSizeLength;
+  char* m_docType;
+  long long m_docTypeVersion;
+  long long m_docTypeReadVersion;
 
-    long long Parse(IMkvReader*, long long&);
-    void Init();
+  long long Parse(IMkvReader*, long long&);
+  void Init();
 };
 
-
 class Segment;
 class Track;
 class Cluster;
 
-class Block
-{
-    Block(const Block&);
-    Block& operator=(const Block&);
+class Block {
+  Block(const Block&);
+  Block& operator=(const Block&);
 
-public:
-    const long long m_start;
-    const long long m_size;
+ public:
+  const long long m_start;
+  const long long m_size;
 
-    Block(long long start, long long size, long long discard_padding);
-    ~Block();
+  Block(long long start, long long size, long long discard_padding);
+  ~Block();
 
-    long Parse(const Cluster*);
+  long Parse(const Cluster*);
 
-    long long GetTrackNumber() const;
-    long long GetTimeCode(const Cluster*) const;  //absolute, but not scaled
-    long long GetTime(const Cluster*) const;      //absolute, and scaled (ns)
-    bool IsKey() const;
-    void SetKey(bool);
-    bool IsInvisible() const;
+  long long GetTrackNumber() const;
+  long long GetTimeCode(const Cluster*) const;  // absolute, but not scaled
+  long long GetTime(const Cluster*) const;  // absolute, and scaled (ns)
+  bool IsKey() const;
+  void SetKey(bool);
+  bool IsInvisible() const;
 
-    enum Lacing { kLacingNone, kLacingXiph, kLacingFixed, kLacingEbml };
-    Lacing GetLacing() const;
+  enum Lacing { kLacingNone, kLacingXiph, kLacingFixed, kLacingEbml };
+  Lacing GetLacing() const;
 
-    int GetFrameCount() const;  //to index frames: [0, count)
+  int GetFrameCount() const;  // to index frames: [0, count)
 
-    struct Frame
-    {
-        long long pos;  //absolute offset
-        long len;
+  struct Frame {
+    long long pos;  // absolute offset
+    long len;
 
-        long Read(IMkvReader*, unsigned char*) const;
-    };
+    long Read(IMkvReader*, unsigned char*) const;
+  };
 
-    const Frame& GetFrame(int frame_index) const;
+  const Frame& GetFrame(int frame_index) const;
 
-    long long GetDiscardPadding() const;
+  long long GetDiscardPadding() const;
 
-private:
-    long long m_track;   //Track::Number()
-    short m_timecode;  //relative to cluster
-    unsigned char m_flags;
+ private:
+  long long m_track;  // Track::Number()
+  short m_timecode;  // relative to cluster
+  unsigned char m_flags;
 
-    Frame* m_frames;
-    int m_frame_count;
+  Frame* m_frames;
+  int m_frame_count;
 
-protected:
-    const long long m_discard_padding;
+ protected:
+  const long long m_discard_padding;
 };
 
+class BlockEntry {
+  BlockEntry(const BlockEntry&);
+  BlockEntry& operator=(const BlockEntry&);
 
-class BlockEntry
-{
-    BlockEntry(const BlockEntry&);
-    BlockEntry& operator=(const BlockEntry&);
+ protected:
+  BlockEntry(Cluster*, long index);
 
-protected:
-    BlockEntry(Cluster*, long index);
+ public:
+  virtual ~BlockEntry();
 
-public:
-    virtual ~BlockEntry();
+  bool EOS() const;
+  const Cluster* GetCluster() const;
+  long GetIndex() const;
+  virtual const Block* GetBlock() const = 0;
 
-    bool EOS() const;
-    const Cluster* GetCluster() const;
-    long GetIndex() const;
-    virtual const Block* GetBlock() const = 0;
+  enum Kind { kBlockEOS, kBlockSimple, kBlockGroup };
+  virtual Kind GetKind() const = 0;
 
-    enum Kind { kBlockEOS, kBlockSimple, kBlockGroup };
-    virtual Kind GetKind() const = 0;
-
-protected:
-    Cluster* const m_pCluster;
-    const long m_index;
-
+ protected:
+  Cluster* const m_pCluster;
+  const long m_index;
 };
 
+class SimpleBlock : public BlockEntry {
+  SimpleBlock(const SimpleBlock&);
+  SimpleBlock& operator=(const SimpleBlock&);
 
-class SimpleBlock : public BlockEntry
-{
-    SimpleBlock(const SimpleBlock&);
-    SimpleBlock& operator=(const SimpleBlock&);
+ public:
+  SimpleBlock(Cluster*, long index, long long start, long long size);
+  long Parse();
 
-public:
-    SimpleBlock(Cluster*, long index, long long start, long long size);
-    long Parse();
+  Kind GetKind() const;
+  const Block* GetBlock() const;
 
-    Kind GetKind() const;
-    const Block* GetBlock() const;
-
-protected:
-    Block m_block;
-
+ protected:
+  Block m_block;
 };
 
+class BlockGroup : public BlockEntry {
+  BlockGroup(const BlockGroup&);
+  BlockGroup& operator=(const BlockGroup&);
 
-class BlockGroup : public BlockEntry
-{
-    BlockGroup(const BlockGroup&);
-    BlockGroup& operator=(const BlockGroup&);
+ public:
+  BlockGroup(Cluster*, long index,
+             long long block_start,  // absolute pos of block's payload
+             long long block_size,  // size of block's payload
+             long long prev, long long next, long long duration,
+             long long discard_padding);
 
-public:
-    BlockGroup(
-        Cluster*,
-        long index,
-        long long block_start, //absolute pos of block's payload
-        long long block_size,  //size of block's payload
-        long long prev,
-        long long next,
-        long long duration,
-        long long discard_padding);
+  long Parse();
 
-    long Parse();
+  Kind GetKind() const;
+  const Block* GetBlock() const;
 
-    Kind GetKind() const;
-    const Block* GetBlock() const;
+  long long GetPrevTimeCode() const;  // relative to block's time
+  long long GetNextTimeCode() const;  // as above
+  long long GetDurationTimeCode() const;
 
-    long long GetPrevTimeCode() const;  //relative to block's time
-    long long GetNextTimeCode() const;  //as above
-    long long GetDurationTimeCode() const;
-
-private:
-    Block m_block;
-    const long long m_prev;
-    const long long m_next;
-    const long long m_duration;
+ private:
+  Block m_block;
+  const long long m_prev;
+  const long long m_next;
+  const long long m_duration;
 };
 
 ///////////////////////////////////////////////////////////////
@@ -206,635 +183,552 @@
 // Elements used to describe if the track data has been encrypted or
 // compressed with zlib or header stripping.
 class ContentEncoding {
-public:
-    enum {
-      kCTR = 1
-    };
+ public:
+  enum { kCTR = 1 };
 
-    ContentEncoding();
-    ~ContentEncoding();
+  ContentEncoding();
+  ~ContentEncoding();
 
-    // ContentCompression element names
-    struct ContentCompression {
-        ContentCompression();
-        ~ContentCompression();
+  // ContentCompression element names
+  struct ContentCompression {
+    ContentCompression();
+    ~ContentCompression();
 
-        unsigned long long algo;
-        unsigned char* settings;
-        long long settings_len;
-    };
+    unsigned long long algo;
+    unsigned char* settings;
+    long long settings_len;
+  };
 
-    // ContentEncAESSettings element names
-    struct ContentEncAESSettings {
-      ContentEncAESSettings() : cipher_mode(kCTR) {}
-      ~ContentEncAESSettings() {}
+  // ContentEncAESSettings element names
+  struct ContentEncAESSettings {
+    ContentEncAESSettings() : cipher_mode(kCTR) {}
+    ~ContentEncAESSettings() {}
 
-      unsigned long long cipher_mode;
-    };
+    unsigned long long cipher_mode;
+  };
 
-    // ContentEncryption element names
-    struct ContentEncryption {
-        ContentEncryption();
-        ~ContentEncryption();
+  // ContentEncryption element names
+  struct ContentEncryption {
+    ContentEncryption();
+    ~ContentEncryption();
 
-        unsigned long long algo;
-        unsigned char* key_id;
-        long long key_id_len;
-        unsigned char* signature;
-        long long signature_len;
-        unsigned char* sig_key_id;
-        long long sig_key_id_len;
-        unsigned long long sig_algo;
-        unsigned long long sig_hash_algo;
+    unsigned long long algo;
+    unsigned char* key_id;
+    long long key_id_len;
+    unsigned char* signature;
+    long long signature_len;
+    unsigned char* sig_key_id;
+    long long sig_key_id_len;
+    unsigned long long sig_algo;
+    unsigned long long sig_hash_algo;
 
-        ContentEncAESSettings aes_settings;
-    };
+    ContentEncAESSettings aes_settings;
+  };
 
-    // Returns ContentCompression represented by |idx|. Returns NULL if |idx|
-    // is out of bounds.
-    const ContentCompression* GetCompressionByIndex(unsigned long idx) const;
+  // Returns ContentCompression represented by |idx|. Returns NULL if |idx|
+  // is out of bounds.
+  const ContentCompression* GetCompressionByIndex(unsigned long idx) const;
 
-    // Returns number of ContentCompression elements in this ContentEncoding
-    // element.
-    unsigned long GetCompressionCount() const;
+  // Returns number of ContentCompression elements in this ContentEncoding
+  // element.
+  unsigned long GetCompressionCount() const;
 
-    // Parses the ContentCompression element from |pReader|. |start| is the
-    // starting offset of the ContentCompression payload. |size| is the size in
-    // bytes of the ContentCompression payload. |compression| is where the parsed
-    // values will be stored.
-    long ParseCompressionEntry(long long start,
-                               long long size,
-                               IMkvReader* pReader,
-                               ContentCompression* compression);
+  // Parses the ContentCompression element from |pReader|. |start| is the
+  // starting offset of the ContentCompression payload. |size| is the size in
+  // bytes of the ContentCompression payload. |compression| is where the parsed
+  // values will be stored.
+  long ParseCompressionEntry(long long start, long long size,
+                             IMkvReader* pReader,
+                             ContentCompression* compression);
 
-    // Returns ContentEncryption represented by |idx|. Returns NULL if |idx|
-    // is out of bounds.
-    const ContentEncryption* GetEncryptionByIndex(unsigned long idx) const;
+  // Returns ContentEncryption represented by |idx|. Returns NULL if |idx|
+  // is out of bounds.
+  const ContentEncryption* GetEncryptionByIndex(unsigned long idx) const;
 
-    // Returns number of ContentEncryption elements in this ContentEncoding
-    // element.
-    unsigned long GetEncryptionCount() const;
+  // Returns number of ContentEncryption elements in this ContentEncoding
+  // element.
+  unsigned long GetEncryptionCount() const;
 
-    // Parses the ContentEncAESSettings element from |pReader|. |start| is the
-    // starting offset of the ContentEncAESSettings payload. |size| is the
-    // size in bytes of the ContentEncAESSettings payload. |encryption| is
-    // where the parsed values will be stored.
-    long ParseContentEncAESSettingsEntry(long long start,
-                                         long long size,
-                                         IMkvReader* pReader,
-                                         ContentEncAESSettings* aes);
+  // Parses the ContentEncAESSettings element from |pReader|. |start| is the
+  // starting offset of the ContentEncAESSettings payload. |size| is the
+  // size in bytes of the ContentEncAESSettings payload. |encryption| is
+  // where the parsed values will be stored.
+  long ParseContentEncAESSettingsEntry(long long start, long long size,
+                                       IMkvReader* pReader,
+                                       ContentEncAESSettings* aes);
 
-    // Parses the ContentEncoding element from |pReader|. |start| is the
-    // starting offset of the ContentEncoding payload. |size| is the size in
-    // bytes of the ContentEncoding payload. Returns true on success.
-    long ParseContentEncodingEntry(long long start,
-                                   long long size,
-                                   IMkvReader* pReader);
+  // Parses the ContentEncoding element from |pReader|. |start| is the
+  // starting offset of the ContentEncoding payload. |size| is the size in
+  // bytes of the ContentEncoding payload. Returns true on success.
+  long ParseContentEncodingEntry(long long start, long long size,
+                                 IMkvReader* pReader);
 
-    // Parses the ContentEncryption element from |pReader|. |start| is the
-    // starting offset of the ContentEncryption payload. |size| is the size in
-    // bytes of the ContentEncryption payload. |encryption| is where the parsed
-    // values will be stored.
-    long ParseEncryptionEntry(long long start,
-                              long long size,
-                              IMkvReader* pReader,
-                              ContentEncryption* encryption);
+  // Parses the ContentEncryption element from |pReader|. |start| is the
+  // starting offset of the ContentEncryption payload. |size| is the size in
+  // bytes of the ContentEncryption payload. |encryption| is where the parsed
+  // values will be stored.
+  long ParseEncryptionEntry(long long start, long long size,
+                            IMkvReader* pReader, ContentEncryption* encryption);
 
-    unsigned long long encoding_order() const { return encoding_order_; }
-    unsigned long long encoding_scope() const { return encoding_scope_; }
-    unsigned long long encoding_type() const { return encoding_type_; }
+  unsigned long long encoding_order() const { return encoding_order_; }
+  unsigned long long encoding_scope() const { return encoding_scope_; }
+  unsigned long long encoding_type() const { return encoding_type_; }
 
-private:
-    // Member variables for list of ContentCompression elements.
-    ContentCompression** compression_entries_;
-    ContentCompression** compression_entries_end_;
+ private:
+  // Member variables for list of ContentCompression elements.
+  ContentCompression** compression_entries_;
+  ContentCompression** compression_entries_end_;
 
-    // Member variables for list of ContentEncryption elements.
-    ContentEncryption** encryption_entries_;
-    ContentEncryption** encryption_entries_end_;
+  // Member variables for list of ContentEncryption elements.
+  ContentEncryption** encryption_entries_;
+  ContentEncryption** encryption_entries_end_;
 
-    // ContentEncoding element names
-    unsigned long long encoding_order_;
-    unsigned long long encoding_scope_;
-    unsigned long long encoding_type_;
+  // ContentEncoding element names
+  unsigned long long encoding_order_;
+  unsigned long long encoding_scope_;
+  unsigned long long encoding_type_;
 
-    // LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding);
-    ContentEncoding(const ContentEncoding&);
-    ContentEncoding& operator=(const ContentEncoding&);
+  // LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding);
+  ContentEncoding(const ContentEncoding&);
+  ContentEncoding& operator=(const ContentEncoding&);
 };
 
-class Track
-{
-    Track(const Track&);
-    Track& operator=(const Track&);
+class Track {
+  Track(const Track&);
+  Track& operator=(const Track&);
 
-public:
-    class Info;
-    static long Create(
-        Segment*,
-        const Info&,
-        long long element_start,
-        long long element_size,
-        Track*&);
+ public:
+  class Info;
+  static long Create(Segment*, const Info&, long long element_start,
+                     long long element_size, Track*&);
 
-    enum Type {
-        kVideo = 1,
-        kAudio = 2,
-        kSubtitle = 0x11,
-        kMetadata = 0x21
-     };
+  enum Type { kVideo = 1, kAudio = 2, kSubtitle = 0x11, kMetadata = 0x21 };
 
-    Segment* const m_pSegment;
-    const long long m_element_start;
-    const long long m_element_size;
-    virtual ~Track();
+  Segment* const m_pSegment;
+  const long long m_element_start;
+  const long long m_element_size;
+  virtual ~Track();
 
-    long GetType() const;
-    long GetNumber() const;
-    unsigned long long GetUid() const;
-    const char* GetNameAsUTF8() const;
+  long GetType() const;
+  long GetNumber() const;
+  unsigned long long GetUid() const;
+  const char* GetNameAsUTF8() const;
+  const char* GetLanguage() const;
+  const char* GetCodecNameAsUTF8() const;
+  const char* GetCodecId() const;
+  const unsigned char* GetCodecPrivate(size_t&) const;
+  bool GetLacing() const;
+  unsigned long long GetDefaultDuration() const;
+  unsigned long long GetCodecDelay() const;
+  unsigned long long GetSeekPreRoll() const;
+
+  const BlockEntry* GetEOS() const;
+
+  struct Settings {
+    long long start;
+    long long size;
+  };
+
+  class Info {
+   public:
+    Info();
+    ~Info();
+    int Copy(Info&) const;
+    void Clear();
+    long type;
+    long number;
+    unsigned long long uid;
+    unsigned long long defaultDuration;
+    unsigned long long codecDelay;
+    unsigned long long seekPreRoll;
+    char* nameAsUTF8;
+    char* language;
+    char* codecId;
+    char* codecNameAsUTF8;
+    unsigned char* codecPrivate;
+    size_t codecPrivateSize;
+    bool lacing;
+    Settings settings;
+
+   private:
+    Info(const Info&);
+    Info& operator=(const Info&);
+    int CopyStr(char* Info::*str, Info&) const;
+  };
+
+  long GetFirst(const BlockEntry*&) const;
+  long GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const;
+  virtual bool VetEntry(const BlockEntry*) const;
+  virtual long Seek(long long time_ns, const BlockEntry*&) const;
+
+  const ContentEncoding* GetContentEncodingByIndex(unsigned long idx) const;
+  unsigned long GetContentEncodingCount() const;
+
+  long ParseContentEncodingsEntry(long long start, long long size);
+
+ protected:
+  Track(Segment*, long long element_start, long long element_size);
+
+  Info m_info;
+
+  class EOSBlock : public BlockEntry {
+   public:
+    EOSBlock();
+
+    Kind GetKind() const;
+    const Block* GetBlock() const;
+  };
+
+  EOSBlock m_eos;
+
+ private:
+  ContentEncoding** content_encoding_entries_;
+  ContentEncoding** content_encoding_entries_end_;
+};
+
+class VideoTrack : public Track {
+  VideoTrack(const VideoTrack&);
+  VideoTrack& operator=(const VideoTrack&);
+
+  VideoTrack(Segment*, long long element_start, long long element_size);
+
+ public:
+  static long Parse(Segment*, const Info&, long long element_start,
+                    long long element_size, VideoTrack*&);
+
+  long long GetWidth() const;
+  long long GetHeight() const;
+  double GetFrameRate() const;
+
+  bool VetEntry(const BlockEntry*) const;
+  long Seek(long long time_ns, const BlockEntry*&) const;
+
+ private:
+  long long m_width;
+  long long m_height;
+  double m_rate;
+};
+
+class AudioTrack : public Track {
+  AudioTrack(const AudioTrack&);
+  AudioTrack& operator=(const AudioTrack&);
+
+  AudioTrack(Segment*, long long element_start, long long element_size);
+
+ public:
+  static long Parse(Segment*, const Info&, long long element_start,
+                    long long element_size, AudioTrack*&);
+
+  double GetSamplingRate() const;
+  long long GetChannels() const;
+  long long GetBitDepth() const;
+
+ private:
+  double m_rate;
+  long long m_channels;
+  long long m_bitDepth;
+};
+
+class Tracks {
+  Tracks(const Tracks&);
+  Tracks& operator=(const Tracks&);
+
+ public:
+  Segment* const m_pSegment;
+  const long long m_start;
+  const long long m_size;
+  const long long m_element_start;
+  const long long m_element_size;
+
+  Tracks(Segment*, long long start, long long size, long long element_start,
+         long long element_size);
+
+  ~Tracks();
+
+  long Parse();
+
+  unsigned long GetTracksCount() const;
+
+  const Track* GetTrackByNumber(long tn) const;
+  const Track* GetTrackByIndex(unsigned long idx) const;
+
+ private:
+  Track** m_trackEntries;
+  Track** m_trackEntriesEnd;
+
+  long ParseTrackEntry(long long payload_start, long long payload_size,
+                       long long element_start, long long element_size,
+                       Track*&) const;
+};
+
+class Chapters {
+  Chapters(const Chapters&);
+  Chapters& operator=(const Chapters&);
+
+ public:
+  Segment* const m_pSegment;
+  const long long m_start;
+  const long long m_size;
+  const long long m_element_start;
+  const long long m_element_size;
+
+  Chapters(Segment*, long long payload_start, long long payload_size,
+           long long element_start, long long element_size);
+
+  ~Chapters();
+
+  long Parse();
+
+  class Atom;
+  class Edition;
+
+  class Display {
+    friend class Atom;
+    Display();
+    Display(const Display&);
+    ~Display();
+    Display& operator=(const Display&);
+
+   public:
+    const char* GetString() const;
     const char* GetLanguage() const;
-    const char* GetCodecNameAsUTF8() const;
-    const char* GetCodecId() const;
-    const unsigned char* GetCodecPrivate(size_t&) const;
-    bool GetLacing() const;
-    unsigned long long GetDefaultDuration() const;
-    unsigned long long GetCodecDelay() const;
-    unsigned long long GetSeekPreRoll() const;
+    const char* GetCountry() const;
 
-    const BlockEntry* GetEOS() const;
+   private:
+    void Init();
+    void ShallowCopy(Display&) const;
+    void Clear();
+    long Parse(IMkvReader*, long long pos, long long size);
 
-    struct Settings
-    {
-        long long start;
-        long long size;
-    };
+    char* m_string;
+    char* m_language;
+    char* m_country;
+  };
 
-    class Info
-    {
-    public:
-        Info();
-        ~Info();
-        int Copy(Info&) const;
-        void Clear();
-        long type;
-        long number;
-        unsigned long long uid;
-        unsigned long long defaultDuration;
-        unsigned long long codecDelay;
-        unsigned long long seekPreRoll;
-        char* nameAsUTF8;
-        char* language;
-        char* codecId;
-        char* codecNameAsUTF8;
-        unsigned char* codecPrivate;
-        size_t codecPrivateSize;
-        bool lacing;
-        Settings settings;
+  class Atom {
+    friend class Edition;
+    Atom();
+    Atom(const Atom&);
+    ~Atom();
+    Atom& operator=(const Atom&);
 
-    private:
-        Info(const Info&);
-        Info& operator=(const Info&);
-        int CopyStr(char* Info::*str, Info&) const;
-    };
+   public:
+    unsigned long long GetUID() const;
+    const char* GetStringUID() const;
 
-    long GetFirst(const BlockEntry*&) const;
-    long GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const;
-    virtual bool VetEntry(const BlockEntry*) const;
-    virtual long Seek(long long time_ns, const BlockEntry*&) const;
+    long long GetStartTimecode() const;
+    long long GetStopTimecode() const;
 
-    const ContentEncoding* GetContentEncodingByIndex(unsigned long idx) const;
-    unsigned long GetContentEncodingCount() const;
+    long long GetStartTime(const Chapters*) const;
+    long long GetStopTime(const Chapters*) const;
 
-    long ParseContentEncodingsEntry(long long start, long long size);
+    int GetDisplayCount() const;
+    const Display* GetDisplay(int index) const;
 
-protected:
-    Track(
-        Segment*,
-        long long element_start,
-        long long element_size);
+   private:
+    void Init();
+    void ShallowCopy(Atom&) const;
+    void Clear();
+    long Parse(IMkvReader*, long long pos, long long size);
+    static long long GetTime(const Chapters*, long long timecode);
 
-    Info m_info;
+    long ParseDisplay(IMkvReader*, long long pos, long long size);
+    bool ExpandDisplaysArray();
 
-    class EOSBlock : public BlockEntry
-    {
-    public:
-        EOSBlock();
+    char* m_string_uid;
+    unsigned long long m_uid;
+    long long m_start_timecode;
+    long long m_stop_timecode;
 
-        Kind GetKind() const;
-        const Block* GetBlock() const;
-    };
+    Display* m_displays;
+    int m_displays_size;
+    int m_displays_count;
+  };
 
-    EOSBlock m_eos;
+  class Edition {
+    friend class Chapters;
+    Edition();
+    Edition(const Edition&);
+    ~Edition();
+    Edition& operator=(const Edition&);
 
-private:
-    ContentEncoding** content_encoding_entries_;
-    ContentEncoding** content_encoding_entries_end_;
+   public:
+    int GetAtomCount() const;
+    const Atom* GetAtom(int index) const;
+
+   private:
+    void Init();
+    void ShallowCopy(Edition&) const;
+    void Clear();
+    long Parse(IMkvReader*, long long pos, long long size);
+
+    long ParseAtom(IMkvReader*, long long pos, long long size);
+    bool ExpandAtomsArray();
+
+    Atom* m_atoms;
+    int m_atoms_size;
+    int m_atoms_count;
+  };
+
+  int GetEditionCount() const;
+  const Edition* GetEdition(int index) const;
+
+ private:
+  long ParseEdition(long long pos, long long size);
+  bool ExpandEditionsArray();
+
+  Edition* m_editions;
+  int m_editions_size;
+  int m_editions_count;
 };
 
+class SegmentInfo {
+  SegmentInfo(const SegmentInfo&);
+  SegmentInfo& operator=(const SegmentInfo&);
 
-class VideoTrack : public Track
-{
-    VideoTrack(const VideoTrack&);
-    VideoTrack& operator=(const VideoTrack&);
+ public:
+  Segment* const m_pSegment;
+  const long long m_start;
+  const long long m_size;
+  const long long m_element_start;
+  const long long m_element_size;
 
-    VideoTrack(
-        Segment*,
-        long long element_start,
-        long long element_size);
+  SegmentInfo(Segment*, long long start, long long size,
+              long long element_start, long long element_size);
 
-public:
-    static long Parse(
-        Segment*,
-        const Info&,
-        long long element_start,
-        long long element_size,
-        VideoTrack*&);
+  ~SegmentInfo();
 
-    long long GetWidth() const;
-    long long GetHeight() const;
-    double GetFrameRate() const;
+  long Parse();
 
-    bool VetEntry(const BlockEntry*) const;
-    long Seek(long long time_ns, const BlockEntry*&) const;
+  long long GetTimeCodeScale() const;
+  long long GetDuration() const;  // scaled
+  const char* GetMuxingAppAsUTF8() const;
+  const char* GetWritingAppAsUTF8() const;
+  const char* GetTitleAsUTF8() const;
 
-private:
-    long long m_width;
-    long long m_height;
-    double m_rate;
-
+ private:
+  long long m_timecodeScale;
+  double m_duration;
+  char* m_pMuxingAppAsUTF8;
+  char* m_pWritingAppAsUTF8;
+  char* m_pTitleAsUTF8;
 };
 
+class SeekHead {
+  SeekHead(const SeekHead&);
+  SeekHead& operator=(const SeekHead&);
 
-class AudioTrack : public Track
-{
-    AudioTrack(const AudioTrack&);
-    AudioTrack& operator=(const AudioTrack&);
+ public:
+  Segment* const m_pSegment;
+  const long long m_start;
+  const long long m_size;
+  const long long m_element_start;
+  const long long m_element_size;
 
-    AudioTrack(
-        Segment*,
-        long long element_start,
-        long long element_size);
-public:
-    static long Parse(
-        Segment*,
-        const Info&,
-        long long element_start,
-        long long element_size,
-        AudioTrack*&);
+  SeekHead(Segment*, long long start, long long size, long long element_start,
+           long long element_size);
 
-    double GetSamplingRate() const;
-    long long GetChannels() const;
-    long long GetBitDepth() const;
+  ~SeekHead();
 
-private:
-    double m_rate;
-    long long m_channels;
-    long long m_bitDepth;
-};
+  long Parse();
 
+  struct Entry {
+    // the SeekHead entry payload
+    long long id;
+    long long pos;
 
-class Tracks
-{
-    Tracks(const Tracks&);
-    Tracks& operator=(const Tracks&);
+    // absolute pos of SeekEntry ID
+    long long element_start;
 
-public:
-    Segment* const m_pSegment;
-    const long long m_start;
-    const long long m_size;
-    const long long m_element_start;
-    const long long m_element_size;
+    // SeekEntry ID size + size size + payload
+    long long element_size;
+  };
 
-    Tracks(
-        Segment*,
-        long long start,
-        long long size,
-        long long element_start,
-        long long element_size);
+  int GetCount() const;
+  const Entry* GetEntry(int idx) const;
 
-    ~Tracks();
+  struct VoidElement {
+    // absolute pos of Void ID
+    long long element_start;
 
-    long Parse();
+    // ID size + size size + payload size
+    long long element_size;
+  };
 
-    unsigned long GetTracksCount() const;
+  int GetVoidElementCount() const;
+  const VoidElement* GetVoidElement(int idx) const;
 
-    const Track* GetTrackByNumber(long tn) const;
-    const Track* GetTrackByIndex(unsigned long idx) const;
+ private:
+  Entry* m_entries;
+  int m_entry_count;
 
-private:
-    Track** m_trackEntries;
-    Track** m_trackEntriesEnd;
+  VoidElement* m_void_elements;
+  int m_void_element_count;
 
-    long ParseTrackEntry(
-        long long payload_start,
-        long long payload_size,
-        long long element_start,
-        long long element_size,
-        Track*&) const;
-
-};
-
-
-class Chapters
-{
-    Chapters(const Chapters&);
-    Chapters& operator=(const Chapters&);
-
-public:
-    Segment* const m_pSegment;
-    const long long m_start;
-    const long long m_size;
-    const long long m_element_start;
-    const long long m_element_size;
-
-    Chapters(
-        Segment*,
-        long long payload_start,
-        long long payload_size,
-        long long element_start,
-        long long element_size);
-
-    ~Chapters();
-
-    long Parse();
-
-    class Atom;
-    class Edition;
-
-    class Display
-    {
-        friend class Atom;
-        Display();
-        Display(const Display&);
-        ~Display();
-        Display& operator=(const Display&);
-    public:
-        const char* GetString() const;
-        const char* GetLanguage() const;
-        const char* GetCountry() const;
-    private:
-        void Init();
-        void ShallowCopy(Display&) const;
-        void Clear();
-        long Parse(IMkvReader*, long long pos, long long size);
-
-        char* m_string;
-        char* m_language;
-        char* m_country;
-    };
-
-    class Atom
-    {
-        friend class Edition;
-        Atom();
-        Atom(const Atom&);
-        ~Atom();
-        Atom& operator=(const Atom&);
-    public:
-        unsigned long long GetUID() const;
-        const char* GetStringUID() const;
-
-        long long GetStartTimecode() const;
-        long long GetStopTimecode() const;
-
-        long long GetStartTime(const Chapters*) const;
-        long long GetStopTime(const Chapters*) const;
-
-        int GetDisplayCount() const;
-        const Display* GetDisplay(int index) const;
-    private:
-        void Init();
-        void ShallowCopy(Atom&) const;
-        void Clear();
-        long Parse(IMkvReader*, long long pos, long long size);
-        static long long GetTime(const Chapters*, long long timecode);
-
-        long ParseDisplay(IMkvReader*, long long pos, long long size);
-        bool ExpandDisplaysArray();
-
-        char* m_string_uid;
-        unsigned long long m_uid;
-        long long m_start_timecode;
-        long long m_stop_timecode;
-
-        Display* m_displays;
-        int m_displays_size;
-        int m_displays_count;
-    };
-
-    class Edition
-    {
-        friend class Chapters;
-        Edition();
-        Edition(const Edition&);
-        ~Edition();
-        Edition& operator=(const Edition&);
-    public:
-        int GetAtomCount() const;
-        const Atom* GetAtom(int index) const;
-    private:
-        void Init();
-        void ShallowCopy(Edition&) const;
-        void Clear();
-        long Parse(IMkvReader*, long long pos, long long size);
-
-        long ParseAtom(IMkvReader*, long long pos, long long size);
-        bool ExpandAtomsArray();
-
-        Atom* m_atoms;
-        int m_atoms_size;
-        int m_atoms_count;
-    };
-
-    int GetEditionCount() const;
-    const Edition* GetEdition(int index) const;
-
-private:
-    long ParseEdition(long long pos, long long size);
-    bool ExpandEditionsArray();
-
-    Edition* m_editions;
-    int m_editions_size;
-    int m_editions_count;
-
-};
-
-
-class SegmentInfo
-{
-    SegmentInfo(const SegmentInfo&);
-    SegmentInfo& operator=(const SegmentInfo&);
-
-public:
-    Segment* const m_pSegment;
-    const long long m_start;
-    const long long m_size;
-    const long long m_element_start;
-    const long long m_element_size;
-
-    SegmentInfo(
-        Segment*,
-        long long start,
-        long long size,
-        long long element_start,
-        long long element_size);
-
-    ~SegmentInfo();
-
-    long Parse();
-
-    long long GetTimeCodeScale() const;
-    long long GetDuration() const;  //scaled
-    const char* GetMuxingAppAsUTF8() const;
-    const char* GetWritingAppAsUTF8() const;
-    const char* GetTitleAsUTF8() const;
-
-private:
-    long long m_timecodeScale;
-    double m_duration;
-    char* m_pMuxingAppAsUTF8;
-    char* m_pWritingAppAsUTF8;
-    char* m_pTitleAsUTF8;
-};
-
-
-class SeekHead
-{
-    SeekHead(const SeekHead&);
-    SeekHead& operator=(const SeekHead&);
-
-public:
-    Segment* const m_pSegment;
-    const long long m_start;
-    const long long m_size;
-    const long long m_element_start;
-    const long long m_element_size;
-
-    SeekHead(
-        Segment*,
-        long long start,
-        long long size,
-        long long element_start,
-        long long element_size);
-
-    ~SeekHead();
-
-    long Parse();
-
-    struct Entry
-    {
-        //the SeekHead entry payload
-        long long id;
-        long long pos;
-
-        //absolute pos of SeekEntry ID
-        long long element_start;
-
-        //SeekEntry ID size + size size + payload
-        long long element_size;
-    };
-
-    int GetCount() const;
-    const Entry* GetEntry(int idx) const;
-
-    struct VoidElement
-    {
-        //absolute pos of Void ID
-        long long element_start;
-
-        //ID size + size size + payload size
-        long long element_size;
-    };
-
-    int GetVoidElementCount() const;
-    const VoidElement* GetVoidElement(int idx) const;
-
-private:
-    Entry* m_entries;
-    int m_entry_count;
-
-    VoidElement* m_void_elements;
-    int m_void_element_count;
-
-    static bool ParseEntry(
-        IMkvReader*,
-        long long pos,  //payload
-        long long size,
-        Entry*);
-
+  static bool ParseEntry(IMkvReader*,
+                         long long pos,  // payload
+                         long long size, Entry*);
 };
 
 class Cues;
-class CuePoint
-{
-    friend class Cues;
+class CuePoint {
+  friend class Cues;
 
-    CuePoint(long, long long);
-    ~CuePoint();
+  CuePoint(long, long long);
+  ~CuePoint();
 
-    CuePoint(const CuePoint&);
-    CuePoint& operator=(const CuePoint&);
+  CuePoint(const CuePoint&);
+  CuePoint& operator=(const CuePoint&);
 
-public:
-    long long m_element_start;
-    long long m_element_size;
+ public:
+  long long m_element_start;
+  long long m_element_size;
 
-    void Load(IMkvReader*);
+  void Load(IMkvReader*);
 
-    long long GetTimeCode() const;      //absolute but unscaled
-    long long GetTime(const Segment*) const;  //absolute and scaled (ns units)
+  long long GetTimeCode() const;  // absolute but unscaled
+  long long GetTime(const Segment*) const;  // absolute and scaled (ns units)
 
-    struct TrackPosition
-    {
-        long long m_track;
-        long long m_pos;  //of cluster
-        long long m_block;
-        //codec_state  //defaults to 0
-        //reference = clusters containing req'd referenced blocks
-        //  reftime = timecode of the referenced block
+  struct TrackPosition {
+    long long m_track;
+    long long m_pos;  // of cluster
+    long long m_block;
+    // codec_state  //defaults to 0
+    // reference = clusters containing req'd referenced blocks
+    //  reftime = timecode of the referenced block
 
-        void Parse(IMkvReader*, long long, long long);
-    };
+    void Parse(IMkvReader*, long long, long long);
+  };
 
-    const TrackPosition* Find(const Track*) const;
+  const TrackPosition* Find(const Track*) const;
 
-private:
-    const long m_index;
-    long long m_timecode;
-    TrackPosition* m_track_positions;
-    size_t m_track_positions_count;
-
+ private:
+  const long m_index;
+  long long m_timecode;
+  TrackPosition* m_track_positions;
+  size_t m_track_positions_count;
 };
 
+class Cues {
+  friend class Segment;
 
-class Cues
-{
-    friend class Segment;
+  Cues(Segment*, long long start, long long size, long long element_start,
+       long long element_size);
+  ~Cues();
 
-    Cues(
-        Segment*,
-        long long start,
-        long long size,
-        long long element_start,
-        long long element_size);
-    ~Cues();
+  Cues(const Cues&);
+  Cues& operator=(const Cues&);
 
-    Cues(const Cues&);
-    Cues& operator=(const Cues&);
+ public:
+  Segment* const m_pSegment;
+  const long long m_start;
+  const long long m_size;
+  const long long m_element_start;
+  const long long m_element_size;
 
-public:
-    Segment* const m_pSegment;
-    const long long m_start;
-    const long long m_size;
-    const long long m_element_start;
-    const long long m_element_size;
-
-    bool Find(  //lower bound of time_ns
-        long long time_ns,
-        const Track*,
-        const CuePoint*&,
-        const CuePoint::TrackPosition*&) const;
+  bool Find(  // lower bound of time_ns
+      long long time_ns, const Track*, const CuePoint*&,
+      const CuePoint::TrackPosition*&) const;
 
 #if 0
     bool FindNext(  //upper_bound of time_ns
@@ -844,165 +738,144 @@
         const CuePoint::TrackPosition*&) const;
 #endif
 
-    const CuePoint* GetFirst() const;
-    const CuePoint* GetLast() const;
-    const CuePoint* GetNext(const CuePoint*) const;
+  const CuePoint* GetFirst() const;
+  const CuePoint* GetLast() const;
+  const CuePoint* GetNext(const CuePoint*) const;
 
-    const BlockEntry* GetBlock(
-                        const CuePoint*,
-                        const CuePoint::TrackPosition*) const;
+  const BlockEntry* GetBlock(const CuePoint*,
+                             const CuePoint::TrackPosition*) const;
 
-    bool LoadCuePoint() const;
-    long GetCount() const;  //loaded only
-    //long GetTotal() const;  //loaded + preloaded
-    bool DoneParsing() const;
+  bool LoadCuePoint() const;
+  long GetCount() const;  // loaded only
+  // long GetTotal() const;  //loaded + preloaded
+  bool DoneParsing() const;
 
-private:
-    void Init() const;
-    void PreloadCuePoint(long&, long long) const;
+ private:
+  void Init() const;
+  void PreloadCuePoint(long&, long long) const;
 
-    mutable CuePoint** m_cue_points;
-    mutable long m_count;
-    mutable long m_preload_count;
-    mutable long long m_pos;
-
+  mutable CuePoint** m_cue_points;
+  mutable long m_count;
+  mutable long m_preload_count;
+  mutable long long m_pos;
 };
 
+class Cluster {
+  friend class Segment;
 
-class Cluster
-{
-    friend class Segment;
+  Cluster(const Cluster&);
+  Cluster& operator=(const Cluster&);
 
-    Cluster(const Cluster&);
-    Cluster& operator=(const Cluster&);
+ public:
+  Segment* const m_pSegment;
 
-public:
-    Segment* const m_pSegment;
+ public:
+  static Cluster* Create(Segment*,
+                         long index,  // index in segment
+                         long long off);  // offset relative to segment
+  // long long element_size);
 
-public:
-    static Cluster* Create(
-        Segment*,
-        long index,       //index in segment
-        long long off);   //offset relative to segment
-        //long long element_size);
+  Cluster();  // EndOfStream
+  ~Cluster();
 
-    Cluster();  //EndOfStream
-    ~Cluster();
+  bool EOS() const;
 
-    bool EOS() const;
+  long long GetTimeCode() const;  // absolute, but not scaled
+  long long GetTime() const;  // absolute, and scaled (nanosecond units)
+  long long GetFirstTime() const;  // time (ns) of first (earliest) block
+  long long GetLastTime() const;  // time (ns) of last (latest) block
 
-    long long GetTimeCode() const;   //absolute, but not scaled
-    long long GetTime() const;       //absolute, and scaled (nanosecond units)
-    long long GetFirstTime() const;  //time (ns) of first (earliest) block
-    long long GetLastTime() const;   //time (ns) of last (latest) block
+  long GetFirst(const BlockEntry*&) const;
+  long GetLast(const BlockEntry*&) const;
+  long GetNext(const BlockEntry* curr, const BlockEntry*& next) const;
 
-    long GetFirst(const BlockEntry*&) const;
-    long GetLast(const BlockEntry*&) const;
-    long GetNext(const BlockEntry* curr, const BlockEntry*& next) const;
+  const BlockEntry* GetEntry(const Track*, long long ns = -1) const;
+  const BlockEntry* GetEntry(const CuePoint&,
+                             const CuePoint::TrackPosition&) const;
+  // const BlockEntry* GetMaxKey(const VideoTrack*) const;
 
-    const BlockEntry* GetEntry(const Track*, long long ns = -1) const;
-    const BlockEntry* GetEntry(
-        const CuePoint&,
-        const CuePoint::TrackPosition&) const;
-    //const BlockEntry* GetMaxKey(const VideoTrack*) const;
+  //    static bool HasBlockEntries(const Segment*, long long);
 
-//    static bool HasBlockEntries(const Segment*, long long);
+  static long HasBlockEntries(const Segment*, long long idoff, long long& pos,
+                              long& size);
 
-    static long HasBlockEntries(
-            const Segment*,
-            long long idoff,
-            long long& pos,
-            long& size);
+  long GetEntryCount() const;
 
-    long GetEntryCount() const;
+  long Load(long long& pos, long& size) const;
 
-    long Load(long long& pos, long& size) const;
+  long Parse(long long& pos, long& size) const;
+  long GetEntry(long index, const mkvparser::BlockEntry*&) const;
 
-    long Parse(long long& pos, long& size) const;
-    long GetEntry(long index, const mkvparser::BlockEntry*&) const;
+ protected:
+  Cluster(Segment*, long index, long long element_start);
+  // long long element_size);
 
-protected:
-    Cluster(
-        Segment*,
-        long index,
-        long long element_start);
-        //long long element_size);
+ public:
+  const long long m_element_start;
+  long long GetPosition() const;  // offset relative to segment
 
-public:
-    const long long m_element_start;
-    long long GetPosition() const;  //offset relative to segment
+  long GetIndex() const;
+  long long GetElementSize() const;
+  // long long GetPayloadSize() const;
 
-    long GetIndex() const;
-    long long GetElementSize() const;
-    //long long GetPayloadSize() const;
+  // long long Unparsed() const;
 
-    //long long Unparsed() const;
+ private:
+  long m_index;
+  mutable long long m_pos;
+  // mutable long long m_size;
+  mutable long long m_element_size;
+  mutable long long m_timecode;
+  mutable BlockEntry** m_entries;
+  mutable long m_entries_size;
+  mutable long m_entries_count;
 
-private:
-    long m_index;
-    mutable long long m_pos;
-    //mutable long long m_size;
-    mutable long long m_element_size;
-    mutable long long m_timecode;
-    mutable BlockEntry** m_entries;
-    mutable long m_entries_size;
-    mutable long m_entries_count;
+  long ParseSimpleBlock(long long, long long&, long&);
+  long ParseBlockGroup(long long, long long&, long&);
 
-    long ParseSimpleBlock(long long, long long&, long&);
-    long ParseBlockGroup(long long, long long&, long&);
-
-    long CreateBlock(long long id, long long pos, long long size,
-                     long long discard_padding);
-    long CreateBlockGroup(long long start_offset, long long size,
-                          long long discard_padding);
-    long CreateSimpleBlock(long long, long long);
-
+  long CreateBlock(long long id, long long pos, long long size,
+                   long long discard_padding);
+  long CreateBlockGroup(long long start_offset, long long size,
+                        long long discard_padding);
+  long CreateSimpleBlock(long long, long long);
 };
 
+class Segment {
+  friend class Cues;
+  friend class Track;
+  friend class VideoTrack;
 
-class Segment
-{
-    friend class Cues;
-    friend class Track;
-    friend class VideoTrack;
+  Segment(const Segment&);
+  Segment& operator=(const Segment&);
 
-    Segment(const Segment&);
-    Segment& operator=(const Segment&);
+ private:
+  Segment(IMkvReader*, long long elem_start,
+          // long long elem_size,
+          long long pos, long long size);
 
-private:
-    Segment(
-        IMkvReader*,
-        long long elem_start,
-        //long long elem_size,
-        long long pos,
-        long long size);
+ public:
+  IMkvReader* const m_pReader;
+  const long long m_element_start;
+  // const long long m_element_size;
+  const long long m_start;  // posn of segment payload
+  const long long m_size;  // size of segment payload
+  Cluster m_eos;  // TODO: make private?
 
-public:
-    IMkvReader* const m_pReader;
-    const long long m_element_start;
-    //const long long m_element_size;
-    const long long m_start;  //posn of segment payload
-    const long long m_size;   //size of segment payload
-    Cluster m_eos;  //TODO: make private?
+  static long long CreateInstance(IMkvReader*, long long, Segment*&);
+  ~Segment();
 
-    static long long CreateInstance(IMkvReader*, long long, Segment*&);
-    ~Segment();
+  long Load();  // loads headers and all clusters
 
-    long Load();  //loads headers and all clusters
+  // for incremental loading
+  // long long Unparsed() const;
+  bool DoneParsing() const;
+  long long ParseHeaders();  // stops when first cluster is found
+  // long FindNextCluster(long long& pos, long& size) const;
+  long LoadCluster(long long& pos, long& size);  // load one cluster
+  long LoadCluster();
 
-    //for incremental loading
-    //long long Unparsed() const;
-    bool DoneParsing() const;
-    long long ParseHeaders();  //stops when first cluster is found
-    //long FindNextCluster(long long& pos, long& size) const;
-    long LoadCluster(long long& pos, long& size);  //load one cluster
-    long LoadCluster();
-
-    long ParseNext(
-            const Cluster* pCurr,
-            const Cluster*& pNext,
-            long long& pos,
-            long& size);
+  long ParseNext(const Cluster* pCurr, const Cluster*& pNext, long long& pos,
+                 long& size);
 
 #if 0
     //This pair parses one cluster, but only changes the state of the
@@ -1011,69 +884,62 @@
     bool AddCluster(long long cluster_pos, long long new_pos);
 #endif
 
-    const SeekHead* GetSeekHead() const;
-    const Tracks* GetTracks() const;
-    const SegmentInfo* GetInfo() const;
-    const Cues* GetCues() const;
-    const Chapters* GetChapters() const;
+  const SeekHead* GetSeekHead() const;
+  const Tracks* GetTracks() const;
+  const SegmentInfo* GetInfo() const;
+  const Cues* GetCues() const;
+  const Chapters* GetChapters() const;
 
-    long long GetDuration() const;
+  long long GetDuration() const;
 
-    unsigned long GetCount() const;
-    const Cluster* GetFirst() const;
-    const Cluster* GetLast() const;
-    const Cluster* GetNext(const Cluster*);
+  unsigned long GetCount() const;
+  const Cluster* GetFirst() const;
+  const Cluster* GetLast() const;
+  const Cluster* GetNext(const Cluster*);
 
-    const Cluster* FindCluster(long long time_nanoseconds) const;
-    //const BlockEntry* Seek(long long time_nanoseconds, const Track*) const;
+  const Cluster* FindCluster(long long time_nanoseconds) const;
+  // const BlockEntry* Seek(long long time_nanoseconds, const Track*) const;
 
-    const Cluster* FindOrPreloadCluster(long long pos);
+  const Cluster* FindOrPreloadCluster(long long pos);
 
-    long ParseCues(
-        long long cues_off,  //offset relative to start of segment
-        long long& parse_pos,
-        long& parse_len);
+  long ParseCues(long long cues_off,  // offset relative to start of segment
+                 long long& parse_pos, long& parse_len);
 
-private:
+ private:
+  long long m_pos;  // absolute file posn; what has been consumed so far
+  Cluster* m_pUnknownSize;
 
-    long long m_pos;  //absolute file posn; what has been consumed so far
-    Cluster* m_pUnknownSize;
+  SeekHead* m_pSeekHead;
+  SegmentInfo* m_pInfo;
+  Tracks* m_pTracks;
+  Cues* m_pCues;
+  Chapters* m_pChapters;
+  Cluster** m_clusters;
+  long m_clusterCount;  // number of entries for which m_index >= 0
+  long m_clusterPreloadCount;  // number of entries for which m_index < 0
+  long m_clusterSize;  // array size
 
-    SeekHead* m_pSeekHead;
-    SegmentInfo* m_pInfo;
-    Tracks* m_pTracks;
-    Cues* m_pCues;
-    Chapters* m_pChapters;
-    Cluster** m_clusters;
-    long m_clusterCount;         //number of entries for which m_index >= 0
-    long m_clusterPreloadCount;  //number of entries for which m_index < 0
-    long m_clusterSize;          //array size
+  long DoLoadCluster(long long&, long&);
+  long DoLoadClusterUnknownSize(long long&, long&);
+  long DoParseNext(const Cluster*&, long long&, long&);
 
-    long DoLoadCluster(long long&, long&);
-    long DoLoadClusterUnknownSize(long long&, long&);
-    long DoParseNext(const Cluster*&, long long&, long&);
+  void AppendCluster(Cluster*);
+  void PreloadCluster(Cluster*, ptrdiff_t);
 
-    void AppendCluster(Cluster*);
-    void PreloadCluster(Cluster*, ptrdiff_t);
+  // void ParseSeekHead(long long pos, long long size);
+  // void ParseSeekEntry(long long pos, long long size);
+  // void ParseCues(long long);
 
-    //void ParseSeekHead(long long pos, long long size);
-    //void ParseSeekEntry(long long pos, long long size);
-    //void ParseCues(long long);
-
-    const BlockEntry* GetBlock(
-        const CuePoint&,
-        const CuePoint::TrackPosition&);
-
+  const BlockEntry* GetBlock(const CuePoint&, const CuePoint::TrackPosition&);
 };
 
-}  //end namespace mkvparser
+}  // end namespace mkvparser
 
-inline long mkvparser::Segment::LoadCluster()
-{
-    long long pos;
-    long size;
+inline long mkvparser::Segment::LoadCluster() {
+  long long pos;
+  long size;
 
-    return LoadCluster(pos, size);
+  return LoadCluster(pos, size);
 }
 
-#endif  //MKVPARSER_HPP
+#endif  // MKVPARSER_HPP
diff --git a/third_party/libwebm/mkvreader.cpp b/third_party/libwebm/mkvreader.cpp
index b4b2459..eaf9e0a 100644
--- a/third_party/libwebm/mkvreader.cpp
+++ b/third_party/libwebm/mkvreader.cpp
@@ -10,17 +10,11 @@
 
 #include <cassert>
 
-namespace mkvparser
-{
+namespace mkvparser {
 
-MkvReader::MkvReader() :
-    m_file(NULL),
-    reader_owns_file_(true) {
-}
+MkvReader::MkvReader() : m_file(NULL), reader_owns_file_(true) {}
 
-MkvReader::MkvReader(FILE* fp) :
-    m_file(fp),
-    reader_owns_file_(false) {
+MkvReader::MkvReader(FILE* fp) : m_file(fp), reader_owns_file_(false) {
   GetFileSize();
 }
 
@@ -30,114 +24,109 @@
   m_file = NULL;
 }
 
-int MkvReader::Open(const char* fileName)
-{
-    if (fileName == NULL)
-        return -1;
+int MkvReader::Open(const char* fileName) {
+  if (fileName == NULL)
+    return -1;
 
-    if (m_file)
-        return -1;
+  if (m_file)
+    return -1;
 
 #ifdef _MSC_VER
-    const errno_t e = fopen_s(&m_file, fileName, "rb");
+  const errno_t e = fopen_s(&m_file, fileName, "rb");
 
-    if (e)
-        return -1;  //error
+  if (e)
+    return -1;  // error
 #else
-    m_file = fopen(fileName, "rb");
+  m_file = fopen(fileName, "rb");
 
-    if (m_file == NULL)
-        return -1;
+  if (m_file == NULL)
+    return -1;
 #endif
-    return !GetFileSize();
+  return !GetFileSize();
 }
 
 bool MkvReader::GetFileSize() {
-    if (m_file == NULL)
-        return false;
+  if (m_file == NULL)
+    return false;
 #ifdef _MSC_VER
-    int status = _fseeki64(m_file, 0L, SEEK_END);
+  int status = _fseeki64(m_file, 0L, SEEK_END);
 
-    if (status)
-        return false;  //error
+  if (status)
+    return false;  // error
 
-    m_length = _ftelli64(m_file);
+  m_length = _ftelli64(m_file);
 #else
-    fseek(m_file, 0L, SEEK_END);
-    m_length = ftell(m_file);
+  fseek(m_file, 0L, SEEK_END);
+  m_length = ftell(m_file);
 #endif
-    assert(m_length >= 0);
+  assert(m_length >= 0);
 
-    if (m_length < 0)
-        return false;
+  if (m_length < 0)
+    return false;
 
 #ifdef _MSC_VER
-    status = _fseeki64(m_file, 0L, SEEK_SET);
+  status = _fseeki64(m_file, 0L, SEEK_SET);
 
-    if (status)
-        return false;  //error
+  if (status)
+    return false;  // error
 #else
-    fseek(m_file, 0L, SEEK_SET);
+  fseek(m_file, 0L, SEEK_SET);
 #endif
 
-    return true;
+  return true;
 }
 
-void MkvReader::Close()
-{
-    if (m_file != NULL)
-    {
-        fclose(m_file);
-        m_file = NULL;
-    }
+void MkvReader::Close() {
+  if (m_file != NULL) {
+    fclose(m_file);
+    m_file = NULL;
+  }
 }
 
-int MkvReader::Length(long long* total, long long* available)
-{
-    if (m_file == NULL)
-        return -1;
+int MkvReader::Length(long long* total, long long* available) {
+  if (m_file == NULL)
+    return -1;
 
-    if (total)
-        *total = m_length;
+  if (total)
+    *total = m_length;
 
-    if (available)
-        *available = m_length;
+  if (available)
+    *available = m_length;
 
+  return 0;
+}
+
+int MkvReader::Read(long long offset, long len, unsigned char* buffer) {
+  if (m_file == NULL)
+    return -1;
+
+  if (offset < 0)
+    return -1;
+
+  if (len < 0)
+    return -1;
+
+  if (len == 0)
     return 0;
-}
 
-int MkvReader::Read(long long offset, long len, unsigned char* buffer)
-{
-    if (m_file == NULL)
-        return -1;
-
-    if (offset < 0)
-        return -1;
-
-    if (len < 0)
-        return -1;
-
-    if (len == 0)
-        return 0;
-
-    if (offset >= m_length)
-        return -1;
+  if (offset >= m_length)
+    return -1;
 
 #ifdef _MSC_VER
-    const int status = _fseeki64(m_file, offset, SEEK_SET);
+  const int status = _fseeki64(m_file, offset, SEEK_SET);
 
-    if (status)
-        return -1;  //error
+  if (status)
+    return -1;  // error
 #else
-    fseek(m_file, offset, SEEK_SET);
+  fseek(m_file, offset, SEEK_SET);
 #endif
 
-    const size_t size = fread(buffer, 1, len, m_file);
+  const size_t size = fread(buffer, 1, len, m_file);
 
-    if (size < size_t(len))
-        return -1;  //error
+  if (size < size_t(len))
+    return -1;  // error
 
-    return 0;  //success
+  return 0;  // success
 }
 
-}  //end namespace mkvparser
+}  // end namespace mkvparser
diff --git a/third_party/libwebm/mkvreader.hpp b/third_party/libwebm/mkvreader.hpp
index 8ebdd99..82ebad5 100644
--- a/third_party/libwebm/mkvreader.hpp
+++ b/third_party/libwebm/mkvreader.hpp
@@ -12,35 +12,34 @@
 #include "mkvparser.hpp"
 #include <cstdio>
 
-namespace mkvparser
-{
+namespace mkvparser {
 
-class MkvReader : public IMkvReader
-{
-    MkvReader(const MkvReader&);
-    MkvReader& operator=(const MkvReader&);
-public:
-    MkvReader();
-    MkvReader(FILE* fp);
-    virtual ~MkvReader();
+class MkvReader : public IMkvReader {
+ public:
+  MkvReader();
+  explicit MkvReader(FILE* fp);
+  virtual ~MkvReader();
 
-    int Open(const char*);
-    void Close();
+  int Open(const char*);
+  void Close();
 
-    virtual int Read(long long position, long length, unsigned char* buffer);
-    virtual int Length(long long* total, long long* available);
-private:
+  virtual int Read(long long position, long length, unsigned char* buffer);
+  virtual int Length(long long* total, long long* available);
 
-    // Determines the size of the file. This is called either by the constructor
-    // or by the Open function depending on file ownership. Returns true on
-    // success.
-    bool GetFileSize();
+ private:
+  MkvReader(const MkvReader&);
+  MkvReader& operator=(const MkvReader&);
 
-    long long m_length;
-    FILE* m_file;
-    bool reader_owns_file_;
+  // Determines the size of the file. This is called either by the constructor
+  // or by the Open function depending on file ownership. Returns true on
+  // success.
+  bool GetFileSize();
+
+  long long m_length;
+  FILE* m_file;
+  bool reader_owns_file_;
 };
 
-}  //end namespace mkvparser
+}  // end namespace mkvparser
 
-#endif //MKVREADER_HPP
+#endif  // MKVREADER_HPP
diff --git a/third_party/libwebm/mkvwriter.cpp b/third_party/libwebm/mkvwriter.cpp
index 8de89a4..75d4350 100644
--- a/third_party/libwebm/mkvwriter.cpp
+++ b/third_party/libwebm/mkvwriter.cpp
@@ -16,15 +16,11 @@
 
 namespace mkvmuxer {
 
-MkvWriter::MkvWriter() : file_(NULL), writer_owns_file_(true) {
-}
+MkvWriter::MkvWriter() : file_(NULL), writer_owns_file_(true) {}
 
-MkvWriter::MkvWriter(FILE* fp): file_(fp), writer_owns_file_(false) {
-}
+MkvWriter::MkvWriter(FILE* fp) : file_(fp), writer_owns_file_(false) {}
 
-MkvWriter::~MkvWriter() {
-  Close();
-}
+MkvWriter::~MkvWriter() { Close(); }
 
 int32 MkvWriter::Write(const void* buffer, uint32 length) {
   if (!file_)
@@ -70,9 +66,9 @@
     return 0;
 
 #ifdef _MSC_VER
-    return _ftelli64(file_);
+  return _ftelli64(file_);
 #else
-    return ftell(file_);
+  return ftell(file_);
 #endif
 }
 
@@ -81,17 +77,14 @@
     return -1;
 
 #ifdef _MSC_VER
-    return _fseeki64(file_, position, SEEK_SET);
+  return _fseeki64(file_, position, SEEK_SET);
 #else
-    return fseek(file_, position, SEEK_SET);
+  return fseek(file_, position, SEEK_SET);
 #endif
 }
 
-bool MkvWriter::Seekable() const {
-  return true;
-}
+bool MkvWriter::Seekable() const { return true; }
 
-void MkvWriter::ElementStartNotify(uint64, int64) {
-}
+void MkvWriter::ElementStartNotify(uint64, int64) {}
 
 }  // namespace mkvmuxer
diff --git a/third_party/libwebm/mkvwriter.hpp b/third_party/libwebm/mkvwriter.hpp
index 524e0f7..684560c 100644
--- a/third_party/libwebm/mkvwriter.hpp
+++ b/third_party/libwebm/mkvwriter.hpp
@@ -20,7 +20,7 @@
 class MkvWriter : public IMkvWriter {
  public:
   MkvWriter();
-  MkvWriter(FILE* fp);
+  explicit MkvWriter(FILE* fp);
   virtual ~MkvWriter();
 
   // IMkvWriter interface
@@ -46,6 +46,6 @@
   LIBWEBM_DISALLOW_COPY_AND_ASSIGN(MkvWriter);
 };
 
-}  //end namespace mkvmuxer
+}  // end namespace mkvmuxer
 
-#endif // MKVWRITER_HPP
+#endif  // MKVWRITER_HPP
diff --git a/third_party/libwebm/webmids.hpp b/third_party/libwebm/webmids.hpp
index 65fab96..eeb52d8 100644
--- a/third_party/libwebm/webmids.hpp
+++ b/third_party/libwebm/webmids.hpp
@@ -12,130 +12,130 @@
 namespace mkvmuxer {
 
 enum MkvId {
-  kMkvEBML                    = 0x1A45DFA3,
-  kMkvEBMLVersion             = 0x4286,
-  kMkvEBMLReadVersion         = 0x42F7,
-  kMkvEBMLMaxIDLength         = 0x42F2,
-  kMkvEBMLMaxSizeLength       = 0x42F3,
-  kMkvDocType                 = 0x4282,
-  kMkvDocTypeVersion          = 0x4287,
-  kMkvDocTypeReadVersion      = 0x4285,
-  kMkvVoid                    = 0xEC,
-  kMkvSignatureSlot           = 0x1B538667,
-  kMkvSignatureAlgo           = 0x7E8A,
-  kMkvSignatureHash           = 0x7E9A,
-  kMkvSignaturePublicKey      = 0x7EA5,
-  kMkvSignature               = 0x7EB5,
-  kMkvSignatureElements       = 0x7E5B,
-  kMkvSignatureElementList    = 0x7E7B,
-  kMkvSignedElement           = 0x6532,
-  //segment
-  kMkvSegment                 = 0x18538067,
-  //Meta Seek Information
-  kMkvSeekHead                = 0x114D9B74,
-  kMkvSeek                    = 0x4DBB,
-  kMkvSeekID                  = 0x53AB,
-  kMkvSeekPosition            = 0x53AC,
-  //Segment Information
-  kMkvInfo                    = 0x1549A966,
-  kMkvTimecodeScale           = 0x2AD7B1,
-  kMkvDuration                = 0x4489,
-  kMkvDateUTC                 = 0x4461,
-  kMkvMuxingApp               = 0x4D80,
-  kMkvWritingApp              = 0x5741,
-  //Cluster
-  kMkvCluster                 = 0x1F43B675,
-  kMkvTimecode                = 0xE7,
-  kMkvPrevSize                = 0xAB,
-  kMkvBlockGroup              = 0xA0,
-  kMkvBlock                   = 0xA1,
-  kMkvBlockDuration           = 0x9B,
-  kMkvReferenceBlock          = 0xFB,
-  kMkvLaceNumber              = 0xCC,
-  kMkvSimpleBlock             = 0xA3,
-  kMkvBlockAdditions          = 0x75A1,
-  kMkvBlockMore               = 0xA6,
-  kMkvBlockAddID              = 0xEE,
-  kMkvBlockAdditional         = 0xA5,
-  kMkvDiscardPadding          = 0x75A2,
-  //Track
-  kMkvTracks                  = 0x1654AE6B,
-  kMkvTrackEntry              = 0xAE,
-  kMkvTrackNumber             = 0xD7,
-  kMkvTrackUID                = 0x73C5,
-  kMkvTrackType               = 0x83,
-  kMkvFlagEnabled             = 0xB9,
-  kMkvFlagDefault             = 0x88,
-  kMkvFlagForced              = 0x55AA,
-  kMkvFlagLacing              = 0x9C,
-  kMkvDefaultDuration         = 0x23E383,
-  kMkvMaxBlockAdditionID      = 0x55EE,
-  kMkvName                    = 0x536E,
-  kMkvLanguage                = 0x22B59C,
-  kMkvCodecID                 = 0x86,
-  kMkvCodecPrivate            = 0x63A2,
-  kMkvCodecName               = 0x258688,
-  kMkvCodecDelay              = 0x56AA,
-  kMkvSeekPreRoll             = 0x56BB,
-  //video
-  kMkvVideo                   = 0xE0,
-  kMkvFlagInterlaced          = 0x9A,
-  kMkvStereoMode              = 0x53B8,
-  kMkvAlphaMode               = 0x53C0,
-  kMkvPixelWidth              = 0xB0,
-  kMkvPixelHeight             = 0xBA,
-  kMkvPixelCropBottom         = 0x54AA,
-  kMkvPixelCropTop            = 0x54BB,
-  kMkvPixelCropLeft           = 0x54CC,
-  kMkvPixelCropRight          = 0x54DD,
-  kMkvDisplayWidth            = 0x54B0,
-  kMkvDisplayHeight           = 0x54BA,
-  kMkvDisplayUnit             = 0x54B2,
-  kMkvAspectRatioType         = 0x54B3,
-  kMkvFrameRate               = 0x2383E3,
-  //end video
-  //audio
-  kMkvAudio                   = 0xE1,
-  kMkvSamplingFrequency       = 0xB5,
+  kMkvEBML = 0x1A45DFA3,
+  kMkvEBMLVersion = 0x4286,
+  kMkvEBMLReadVersion = 0x42F7,
+  kMkvEBMLMaxIDLength = 0x42F2,
+  kMkvEBMLMaxSizeLength = 0x42F3,
+  kMkvDocType = 0x4282,
+  kMkvDocTypeVersion = 0x4287,
+  kMkvDocTypeReadVersion = 0x4285,
+  kMkvVoid = 0xEC,
+  kMkvSignatureSlot = 0x1B538667,
+  kMkvSignatureAlgo = 0x7E8A,
+  kMkvSignatureHash = 0x7E9A,
+  kMkvSignaturePublicKey = 0x7EA5,
+  kMkvSignature = 0x7EB5,
+  kMkvSignatureElements = 0x7E5B,
+  kMkvSignatureElementList = 0x7E7B,
+  kMkvSignedElement = 0x6532,
+  // segment
+  kMkvSegment = 0x18538067,
+  // Meta Seek Information
+  kMkvSeekHead = 0x114D9B74,
+  kMkvSeek = 0x4DBB,
+  kMkvSeekID = 0x53AB,
+  kMkvSeekPosition = 0x53AC,
+  // Segment Information
+  kMkvInfo = 0x1549A966,
+  kMkvTimecodeScale = 0x2AD7B1,
+  kMkvDuration = 0x4489,
+  kMkvDateUTC = 0x4461,
+  kMkvMuxingApp = 0x4D80,
+  kMkvWritingApp = 0x5741,
+  // Cluster
+  kMkvCluster = 0x1F43B675,
+  kMkvTimecode = 0xE7,
+  kMkvPrevSize = 0xAB,
+  kMkvBlockGroup = 0xA0,
+  kMkvBlock = 0xA1,
+  kMkvBlockDuration = 0x9B,
+  kMkvReferenceBlock = 0xFB,
+  kMkvLaceNumber = 0xCC,
+  kMkvSimpleBlock = 0xA3,
+  kMkvBlockAdditions = 0x75A1,
+  kMkvBlockMore = 0xA6,
+  kMkvBlockAddID = 0xEE,
+  kMkvBlockAdditional = 0xA5,
+  kMkvDiscardPadding = 0x75A2,
+  // Track
+  kMkvTracks = 0x1654AE6B,
+  kMkvTrackEntry = 0xAE,
+  kMkvTrackNumber = 0xD7,
+  kMkvTrackUID = 0x73C5,
+  kMkvTrackType = 0x83,
+  kMkvFlagEnabled = 0xB9,
+  kMkvFlagDefault = 0x88,
+  kMkvFlagForced = 0x55AA,
+  kMkvFlagLacing = 0x9C,
+  kMkvDefaultDuration = 0x23E383,
+  kMkvMaxBlockAdditionID = 0x55EE,
+  kMkvName = 0x536E,
+  kMkvLanguage = 0x22B59C,
+  kMkvCodecID = 0x86,
+  kMkvCodecPrivate = 0x63A2,
+  kMkvCodecName = 0x258688,
+  kMkvCodecDelay = 0x56AA,
+  kMkvSeekPreRoll = 0x56BB,
+  // video
+  kMkvVideo = 0xE0,
+  kMkvFlagInterlaced = 0x9A,
+  kMkvStereoMode = 0x53B8,
+  kMkvAlphaMode = 0x53C0,
+  kMkvPixelWidth = 0xB0,
+  kMkvPixelHeight = 0xBA,
+  kMkvPixelCropBottom = 0x54AA,
+  kMkvPixelCropTop = 0x54BB,
+  kMkvPixelCropLeft = 0x54CC,
+  kMkvPixelCropRight = 0x54DD,
+  kMkvDisplayWidth = 0x54B0,
+  kMkvDisplayHeight = 0x54BA,
+  kMkvDisplayUnit = 0x54B2,
+  kMkvAspectRatioType = 0x54B3,
+  kMkvFrameRate = 0x2383E3,
+  // end video
+  // audio
+  kMkvAudio = 0xE1,
+  kMkvSamplingFrequency = 0xB5,
   kMkvOutputSamplingFrequency = 0x78B5,
-  kMkvChannels                = 0x9F,
-  kMkvBitDepth                = 0x6264,
-  //end audio
-  //ContentEncodings
-  kMkvContentEncodings        = 0x6D80,
-  kMkvContentEncoding         = 0x6240,
-  kMkvContentEncodingOrder    = 0x5031,
-  kMkvContentEncodingScope    = 0x5032,
-  kMkvContentEncodingType     = 0x5033,
-  kMkvContentEncryption       = 0x5035,
-  kMkvContentEncAlgo          = 0x47E1,
-  kMkvContentEncKeyID         = 0x47E2,
-  kMkvContentEncAESSettings   = 0x47E7,
-  kMkvAESSettingsCipherMode   = 0x47E8,
+  kMkvChannels = 0x9F,
+  kMkvBitDepth = 0x6264,
+  // end audio
+  // ContentEncodings
+  kMkvContentEncodings = 0x6D80,
+  kMkvContentEncoding = 0x6240,
+  kMkvContentEncodingOrder = 0x5031,
+  kMkvContentEncodingScope = 0x5032,
+  kMkvContentEncodingType = 0x5033,
+  kMkvContentEncryption = 0x5035,
+  kMkvContentEncAlgo = 0x47E1,
+  kMkvContentEncKeyID = 0x47E2,
+  kMkvContentEncAESSettings = 0x47E7,
+  kMkvAESSettingsCipherMode = 0x47E8,
   kMkvAESSettingsCipherInitData = 0x47E9,
-  //end ContentEncodings
-  //Cueing Data
-  kMkvCues                    = 0x1C53BB6B,
-  kMkvCuePoint                = 0xBB,
-  kMkvCueTime                 = 0xB3,
-  kMkvCueTrackPositions       = 0xB7,
-  kMkvCueTrack                = 0xF7,
-  kMkvCueClusterPosition      = 0xF1,
-  kMkvCueBlockNumber          = 0x5378,
-  //Chapters
-  kMkvChapters                = 0x1043A770,
-  kMkvEditionEntry            = 0x45B9,
-  kMkvChapterAtom             = 0xB6,
-  kMkvChapterUID              = 0x73C4,
-  kMkvChapterStringUID        = 0x5654,
-  kMkvChapterTimeStart        = 0x91,
-  kMkvChapterTimeEnd          = 0x92,
-  kMkvChapterDisplay          = 0x80,
-  kMkvChapString              = 0x85,
-  kMkvChapLanguage            = 0x437C,
-  kMkvChapCountry             = 0x437E
+  // end ContentEncodings
+  // Cueing Data
+  kMkvCues = 0x1C53BB6B,
+  kMkvCuePoint = 0xBB,
+  kMkvCueTime = 0xB3,
+  kMkvCueTrackPositions = 0xB7,
+  kMkvCueTrack = 0xF7,
+  kMkvCueClusterPosition = 0xF1,
+  kMkvCueBlockNumber = 0x5378,
+  // Chapters
+  kMkvChapters = 0x1043A770,
+  kMkvEditionEntry = 0x45B9,
+  kMkvChapterAtom = 0xB6,
+  kMkvChapterUID = 0x73C4,
+  kMkvChapterStringUID = 0x5654,
+  kMkvChapterTimeStart = 0x91,
+  kMkvChapterTimeEnd = 0x92,
+  kMkvChapterDisplay = 0x80,
+  kMkvChapString = 0x85,
+  kMkvChapLanguage = 0x437C,
+  kMkvChapCountry = 0x437E
 };
 
 }  // end namespace mkvmuxer
 
-#endif // WEBMIDS_HPP
+#endif  // WEBMIDS_HPP
diff --git a/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm b/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
deleted file mode 100644
index 3a39210..0000000
--- a/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
+++ /dev/null
@@ -1,81 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
-;
-
-
-    EXPORT  |idct_dequant_0_2x_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;void idct_dequant_0_2x_neon(short *q, short dq,
-;                            unsigned char *dst, int stride);
-; r0   *q
-; r1   dq
-; r2   *dst
-; r3   stride
-|idct_dequant_0_2x_neon| PROC
-    push            {r4, r5}
-    vpush           {d8-d15}
-
-    add             r12, r2, #4
-    vld1.32         {d2[0]}, [r2], r3
-    vld1.32         {d8[0]}, [r12], r3
-    vld1.32         {d2[1]}, [r2], r3
-    vld1.32         {d8[1]}, [r12], r3
-    vld1.32         {d4[0]}, [r2], r3
-    vld1.32         {d10[0]}, [r12], r3
-    vld1.32         {d4[1]}, [r2], r3
-    vld1.32         {d10[1]}, [r12], r3
-
-    ldrh            r12, [r0]               ; lo q
-    ldrh            r4, [r0, #32]           ; hi q
-    mov             r5, #0
-    strh            r5, [r0]
-    strh            r5, [r0, #32]
-
-    sxth            r12, r12                ; lo
-    mul             r0, r12, r1
-    add             r0, r0, #4
-    asr             r0, r0, #3
-    vdup.16         q0, r0
-    sxth            r4, r4                  ; hi
-    mul             r0, r4, r1
-    add             r0, r0, #4
-    asr             r0, r0, #3
-    vdup.16         q3, r0
-
-    vaddw.u8        q1, q0, d2              ; lo
-    vaddw.u8        q2, q0, d4
-    vaddw.u8        q4, q3, d8              ; hi
-    vaddw.u8        q5, q3, d10
-
-    sub             r2, r2, r3, lsl #2      ; dst - 4*stride
-    add             r0, r2, #4
-
-    vqmovun.s16     d2, q1                  ; lo
-    vqmovun.s16     d4, q2
-    vqmovun.s16     d8, q4                  ; hi
-    vqmovun.s16     d10, q5
-
-    vst1.32         {d2[0]}, [r2], r3       ; lo
-    vst1.32         {d8[0]}, [r0], r3       ; hi
-    vst1.32         {d2[1]}, [r2], r3
-    vst1.32         {d8[1]}, [r0], r3
-    vst1.32         {d4[0]}, [r2], r3
-    vst1.32         {d10[0]}, [r0], r3
-    vst1.32         {d4[1]}, [r2]
-    vst1.32         {d10[1]}, [r0]
-
-    vpop            {d8-d15}
-    pop             {r4, r5}
-    bx              lr
-
-    ENDP            ; |idct_dequant_0_2x_neon|
-    END
diff --git a/vp8/common/arm/neon/idct_dequant_0_2x_neon.c b/vp8/common/arm/neon/idct_dequant_0_2x_neon.c
new file mode 100644
index 0000000..967c322
--- /dev/null
+++ b/vp8/common/arm/neon/idct_dequant_0_2x_neon.c
@@ -0,0 +1,62 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void idct_dequant_0_2x_neon(
+        int16_t *q,
+        int16_t dq,
+        unsigned char *dst,
+        int stride) {
+    unsigned char *dst0;
+    int i, a0, a1;
+    int16x8x2_t q2Add;
+    int32x2_t d2s32, d4s32;
+    uint8x8_t d2u8, d4u8;
+    uint16x8_t q1u16, q2u16;
+
+    a0 = ((q[0] * dq) + 4) >> 3;
+    a1 = ((q[16] * dq) + 4) >> 3;
+    q[0] = q[16] = 0;
+    q2Add.val[0] = vdupq_n_s16((int16_t)a0);
+    q2Add.val[1] = vdupq_n_s16((int16_t)a1);
+
+    for (i = 0; i < 2; i++, dst += 4) {
+        dst0 = dst;
+        d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0);
+        dst0 += stride;
+        d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1);
+        dst0 += stride;
+        d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0);
+        dst0 += stride;
+        d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1);
+
+        q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
+                         vreinterpret_u8_s32(d2s32));
+        q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
+                         vreinterpret_u8_s32(d4s32));
+
+        d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
+        d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16));
+
+        d2s32 = vreinterpret_s32_u8(d2u8);
+        d4s32 = vreinterpret_s32_u8(d4u8);
+
+        dst0 = dst;
+        vst1_lane_s32((int32_t *)dst0, d2s32, 0);
+        dst0 += stride;
+        vst1_lane_s32((int32_t *)dst0, d2s32, 1);
+        dst0 += stride;
+        vst1_lane_s32((int32_t *)dst0, d4s32, 0);
+        dst0 += stride;
+        vst1_lane_s32((int32_t *)dst0, d4s32, 1);
+    }
+    return;
+}
diff --git a/vp8/common/arm/neon/iwalsh_neon.asm b/vp8/common/arm/neon/iwalsh_neon.asm
deleted file mode 100644
index e8ea2a6..0000000
--- a/vp8/common/arm/neon/iwalsh_neon.asm
+++ /dev/null
@@ -1,87 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-    EXPORT  |vp8_short_inv_walsh4x4_neon|
-
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA    |.text|, CODE, READONLY  ; name this block of code
-
-;short vp8_short_inv_walsh4x4_neon(short *input, short *mb_dqcoeff)
-|vp8_short_inv_walsh4x4_neon| PROC
-
-    ; read in all four lines of values: d0->d3
-    vld1.i16 {q0-q1}, [r0@128]
-
-    ; first for loop
-    vadd.s16 d4, d0, d3 ;a = [0] + [12]
-    vadd.s16 d6, d1, d2 ;b = [4] + [8]
-    vsub.s16 d5, d0, d3 ;d = [0] - [12]
-    vsub.s16 d7, d1, d2 ;c = [4] - [8]
-
-    vadd.s16 q0, q2, q3 ; a+b d+c
-    vsub.s16 q1, q2, q3 ; a-b d-c
-
-    vtrn.32 d0, d2 ;d0:  0  1  8  9
-                   ;d2:  2  3 10 11
-    vtrn.32 d1, d3 ;d1:  4  5 12 13
-                   ;d3:  6  7 14 15
-
-    vtrn.16 d0, d1 ;d0:  0  4  8 12
-                   ;d1:  1  5  9 13
-    vtrn.16 d2, d3 ;d2:  2  6 10 14
-                   ;d3:  3  7 11 15
-
-    ; second for loop
-
-    vadd.s16 d4, d0, d3 ;a = [0] + [3]
-    vadd.s16 d6, d1, d2 ;b = [1] + [2]
-    vsub.s16 d5, d0, d3 ;d = [0] - [3]
-    vsub.s16 d7, d1, d2 ;c = [1] - [2]
-
-    vmov.i16 q8, #3
-
-    vadd.s16 q0, q2, q3 ; a+b d+c
-    vsub.s16 q1, q2, q3 ; a-b d-c
-
-    vadd.i16 q0, q0, q8 ;e/f += 3
-    vadd.i16 q1, q1, q8 ;g/h += 3
-
-    vshr.s16 q0, q0, #3 ;e/f >> 3
-    vshr.s16 q1, q1, #3 ;g/h >> 3
-
-    mov      r2, #64
-    add      r3, r1, #32
-
-    vst1.i16 d0[0], [r1],r2
-    vst1.i16 d1[0], [r3],r2
-    vst1.i16 d2[0], [r1],r2
-    vst1.i16 d3[0], [r3],r2
-
-    vst1.i16 d0[1], [r1],r2
-    vst1.i16 d1[1], [r3],r2
-    vst1.i16 d2[1], [r1],r2
-    vst1.i16 d3[1], [r3],r2
-
-    vst1.i16 d0[2], [r1],r2
-    vst1.i16 d1[2], [r3],r2
-    vst1.i16 d2[2], [r1],r2
-    vst1.i16 d3[2], [r3],r2
-
-    vst1.i16 d0[3], [r1],r2
-    vst1.i16 d1[3], [r3],r2
-    vst1.i16 d2[3], [r1]
-    vst1.i16 d3[3], [r3]
-
-    bx lr
-    ENDP    ; |vp8_short_inv_walsh4x4_neon|
-
-    END
diff --git a/vp8/common/arm/neon/iwalsh_neon.c b/vp8/common/arm/neon/iwalsh_neon.c
new file mode 100644
index 0000000..6ea9dd7
--- /dev/null
+++ b/vp8/common/arm/neon/iwalsh_neon.c
@@ -0,0 +1,102 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_short_inv_walsh4x4_neon(
+        int16_t *input,
+        int16_t *mb_dqcoeff) {
+    int16x8_t q0s16, q1s16, q2s16, q3s16;
+    int16x4_t d4s16, d5s16, d6s16, d7s16;
+    int16x4x2_t v2tmp0, v2tmp1;
+    int32x2x2_t v2tmp2, v2tmp3;
+    int16x8_t qAdd3;
+
+    q0s16 = vld1q_s16(input);
+    q1s16 = vld1q_s16(input + 8);
+
+    // 1st for loop
+    d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
+    d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
+    d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
+    d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
+
+    q2s16 = vcombine_s16(d4s16, d5s16);
+    q3s16 = vcombine_s16(d6s16, d7s16);
+
+    q0s16 = vaddq_s16(q2s16, q3s16);
+    q1s16 = vsubq_s16(q2s16, q3s16);
+
+    v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)),
+                      vreinterpret_s32_s16(vget_low_s16(q1s16)));
+    v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)),
+                      vreinterpret_s32_s16(vget_high_s16(q1s16)));
+    v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]),
+                      vreinterpret_s16_s32(v2tmp3.val[0]));
+    v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]),
+                      vreinterpret_s16_s32(v2tmp3.val[1]));
+
+    // 2nd for loop
+    d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
+    d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
+    d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
+    d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
+    q2s16 = vcombine_s16(d4s16, d5s16);
+    q3s16 = vcombine_s16(d6s16, d7s16);
+
+    qAdd3 = vdupq_n_s16(3);
+
+    q0s16 = vaddq_s16(q2s16, q3s16);
+    q1s16 = vsubq_s16(q2s16, q3s16);
+
+    q0s16 = vaddq_s16(q0s16, qAdd3);
+    q1s16 = vaddq_s16(q1s16, qAdd3);
+
+    q0s16 = vshrq_n_s16(q0s16, 3);
+    q1s16 = vshrq_n_s16(q1s16, 3);
+
+    // store
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16),  0);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16),  0);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0);
+    mb_dqcoeff += 16;
+
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16),  1);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16),  1);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1);
+    mb_dqcoeff += 16;
+
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16),  2);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16),  2);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2);
+    mb_dqcoeff += 16;
+
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16),  3);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16),  3);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3);
+    mb_dqcoeff += 16;
+    return;
+}
diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
deleted file mode 100644
index 6eb0651..0000000
--- a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
+++ /dev/null
@@ -1,117 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_bhs_neon|
-    EXPORT  |vp8_loop_filter_mbhs_neon|
-    ARM
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0    unsigned char *s, PRESERVE
-; r1    int p, PRESERVE
-; q1    limit, PRESERVE
-
-|vp8_loop_filter_simple_horizontal_edge_neon| PROC
-    vpush       {d8-d15}
-    sub         r3, r0, r1, lsl #1          ; move src pointer down by 2 lines
-
-    vld1.u8     {q7}, [r0@128], r1          ; q0
-    vld1.u8     {q5}, [r3@128], r1          ; p0
-    vld1.u8     {q8}, [r0@128]              ; q1
-    vld1.u8     {q6}, [r3@128]              ; p1
-
-    vabd.u8     q15, q6, q7                 ; abs(p0 - q0)
-    vabd.u8     q14, q5, q8                 ; abs(p1 - q1)
-
-    vqadd.u8    q15, q15, q15               ; abs(p0 - q0) * 2
-    vshr.u8     q14, q14, #1                ; abs(p1 - q1) / 2
-    vmov.u8     q0, #0x80                   ; 0x80
-    vmov.s16    q13, #3
-    vqadd.u8    q15, q15, q14               ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-
-    vcge.u8     q15, q1, q15                ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > limit)*-1
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q3, d15, d13
-
-    vqsub.s8    q4, q5, q8                  ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
-    vmul.s16    q2, q2, q13                 ;  3 * ( qs0 - ps0)
-    vmul.s16    q3, q3, q13
-
-    vmov.u8     q10, #0x03                  ; 0x03
-    vmov.u8     q9, #0x04                   ; 0x04
-
-    vaddw.s8    q2, q2, d8                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q3, q3, d9
-
-    vqmovn.s16  d8, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d9, q3
-
-    vand        q14, q4, q15                ; vp8_filter &= mask
-
-    vqadd.s8    q2, q14, q10                ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
-    vqadd.s8    q3, q14, q9                 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q4, q3, #3                  ; Filter1 >>= 3
-
-    sub         r0, r0, r1
-
-    ;calculate output
-    vqadd.s8    q11, q6, q2                 ; u = vp8_signed_char_clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q4                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-
-    vst1.u8     {q6}, [r3@128]              ; store op0
-    vst1.u8     {q7}, [r0@128]              ; store oq0
-
-    vpop        {d8-d15}
-    bx          lr
-    ENDP        ; |vp8_loop_filter_simple_horizontal_edge_neon|
-
-; r0    unsigned char *y
-; r1    int ystride
-; r2    const unsigned char *blimit
-
-|vp8_loop_filter_bhs_neon| PROC
-    push        {r4, lr}
-    ldrb        r3, [r2]                    ; load blim from mem
-    vdup.s8     q1, r3                      ; duplicate blim
-
-    add         r0, r0, r1, lsl #2          ; src = y_ptr + 4 * y_stride
-    bl          vp8_loop_filter_simple_horizontal_edge_neon
-    ; vp8_loop_filter_simple_horizontal_edge_neon preserves r0, r1 and q1
-    add         r0, r0, r1, lsl #2          ; src = y_ptr + 8* y_stride
-    bl          vp8_loop_filter_simple_horizontal_edge_neon
-    add         r0, r0, r1, lsl #2          ; src = y_ptr + 12 * y_stride
-    pop         {r4, lr}
-    b           vp8_loop_filter_simple_horizontal_edge_neon
-    ENDP        ;|vp8_loop_filter_bhs_neon|
-
-; r0    unsigned char *y
-; r1    int ystride
-; r2    const unsigned char *blimit
-
-|vp8_loop_filter_mbhs_neon| PROC
-    ldrb        r3, [r2]                   ; load blim from mem
-    vdup.s8     q1, r3                     ; duplicate mblim
-    b           vp8_loop_filter_simple_horizontal_edge_neon
-    ENDP        ;|vp8_loop_filter_bhs_neon|
-
-    END
diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c
new file mode 100644
index 0000000..b25686f
--- /dev/null
+++ b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c
@@ -0,0 +1,111 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vpx_config.h"
+
+static INLINE void vp8_loop_filter_simple_horizontal_edge_neon(
+        unsigned char *s,
+        int p,
+        const unsigned char *blimit) {
+    uint8_t *sp;
+    uint8x16_t qblimit, q0u8;
+    uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8;
+    int16x8_t q2s16, q3s16, q13s16;
+    int8x8_t d8s8, d9s8;
+    int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8;
+
+    qblimit = vdupq_n_u8(*blimit);
+
+    sp = s - (p << 1);
+    q5u8 = vld1q_u8(sp);
+    sp += p;
+    q6u8 = vld1q_u8(sp);
+    sp += p;
+    q7u8 = vld1q_u8(sp);
+    sp += p;
+    q8u8 = vld1q_u8(sp);
+
+    q15u8 = vabdq_u8(q6u8, q7u8);
+    q14u8 = vabdq_u8(q5u8, q8u8);
+
+    q15u8 = vqaddq_u8(q15u8, q15u8);
+    q14u8 = vshrq_n_u8(q14u8, 1);
+    q0u8 = vdupq_n_u8(0x80);
+    q13s16 = vdupq_n_s16(3);
+    q15u8 = vqaddq_u8(q15u8, q14u8);
+
+    q5u8 = veorq_u8(q5u8, q0u8);
+    q6u8 = veorq_u8(q6u8, q0u8);
+    q7u8 = veorq_u8(q7u8, q0u8);
+    q8u8 = veorq_u8(q8u8, q0u8);
+
+    q15u8 = vcgeq_u8(qblimit, q15u8);
+
+    q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)),
+                     vget_low_s8(vreinterpretq_s8_u8(q6u8)));
+    q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)),
+                     vget_high_s8(vreinterpretq_s8_u8(q6u8)));
+
+    q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8),
+                     vreinterpretq_s8_u8(q8u8));
+
+    q2s16 = vmulq_s16(q2s16, q13s16);
+    q3s16 = vmulq_s16(q3s16, q13s16);
+
+    q10u8 = vdupq_n_u8(3);
+    q9u8 = vdupq_n_u8(4);
+
+    q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8));
+    q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8));
+
+    d8s8 = vqmovn_s16(q2s16);
+    d9s8 = vqmovn_s16(q3s16);
+    q4s8 = vcombine_s8(d8s8, d9s8);
+
+    q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8));
+
+    q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8));
+    q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8));
+    q2s8 = vshrq_n_s8(q2s8, 3);
+    q3s8 = vshrq_n_s8(q3s8, 3);
+
+    q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8);
+    q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8);
+
+    q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
+    q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
+
+    vst1q_u8(s, q7u8);
+    s -= p;
+    vst1q_u8(s, q6u8);
+    return;
+}
+
+void vp8_loop_filter_bhs_neon(
+        unsigned char *y_ptr,
+        int y_stride,
+        const unsigned char *blimit) {
+    y_ptr += y_stride * 4;
+    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
+    y_ptr += y_stride * 4;
+    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
+    y_ptr += y_stride * 4;
+    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
+    return;
+}
+
+void vp8_loop_filter_mbhs_neon(
+        unsigned char *y_ptr,
+        int y_stride,
+        const unsigned char *blimit) {
+    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
+    return;
+}
diff --git a/vp8/common/arm/neon/mbloopfilter_neon.asm b/vp8/common/arm/neon/mbloopfilter_neon.asm
deleted file mode 100644
index d200c30..0000000
--- a/vp8/common/arm/neon/mbloopfilter_neon.asm
+++ /dev/null
@@ -1,481 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mbloop_filter_horizontal_edge_y_neon|
-    EXPORT  |vp8_mbloop_filter_horizontal_edge_uv_neon|
-    EXPORT  |vp8_mbloop_filter_vertical_edge_y_neon|
-    EXPORT  |vp8_mbloop_filter_vertical_edge_uv_neon|
-    ARM
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
-;                                               const unsigned char *blimit,
-;                                               const unsigned char *limit,
-;                                               const unsigned char *thresh)
-; r0    unsigned char *src,
-; r1    int pitch,
-; r2    unsigned char blimit
-; r3    unsigned char limit
-; sp    unsigned char thresh,
-|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
-    push        {lr}
-    vpush       {d8-d15}
-
-    add         r1, r1, r1                  ; double stride
-    ldr         r12, [sp, #68]              ; load thresh
-    sub         r0, r0, r1, lsl #1          ; move src pointer down by 4 lines
-    vdup.u8     q2, r12                     ; thresh
-    add         r12, r0, r1,  lsr #1        ; move src pointer up by 1 line
-
-    vld1.u8     {q3}, [r0@128], r1              ; p3
-    vld1.u8     {q4}, [r12@128], r1             ; p2
-    vld1.u8     {q5}, [r0@128], r1              ; p1
-    vld1.u8     {q6}, [r12@128], r1             ; p0
-    vld1.u8     {q7}, [r0@128], r1              ; q0
-    vld1.u8     {q8}, [r12@128], r1             ; q1
-    vld1.u8     {q9}, [r0@128], r1              ; q2
-    vld1.u8     {q10}, [r12@128], r1            ; q3
-
-    bl          vp8_mbloop_filter_neon
-
-    sub         r12, r12, r1, lsl #2
-    add         r0, r12, r1, lsr #1
-
-    vst1.u8     {q4}, [r12@128],r1         ; store op2
-    vst1.u8     {q5}, [r0@128],r1          ; store op1
-    vst1.u8     {q6}, [r12@128], r1        ; store op0
-    vst1.u8     {q7}, [r0@128],r1          ; store oq0
-    vst1.u8     {q8}, [r12@128]            ; store oq1
-    vst1.u8     {q9}, [r0@128]             ; store oq2
-
-    vpop        {d8-d15}
-    pop         {pc}
-    ENDP        ; |vp8_mbloop_filter_horizontal_edge_y_neon|
-
-; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
-;                                                const unsigned char *blimit,
-;                                                const unsigned char *limit,
-;                                                const unsigned char *thresh,
-;                                                unsigned char *v)
-; r0    unsigned char *u,
-; r1    int pitch,
-; r2    unsigned char blimit
-; r3    unsigned char limit
-; sp    unsigned char thresh,
-; sp+4  unsigned char *v
-
-|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
-    push        {lr}
-    vpush       {d8-d15}
-
-    ldr         r12, [sp, #68]                ; load thresh
-    sub         r0, r0, r1, lsl #2            ; move u pointer down by 4 lines
-    vdup.u8     q2, r12                       ; thresh
-    ldr         r12, [sp, #72]                ; load v ptr
-    sub         r12, r12, r1, lsl #2          ; move v pointer down by 4 lines
-
-    vld1.u8     {d6}, [r0@64], r1              ; p3
-    vld1.u8     {d7}, [r12@64], r1              ; p3
-    vld1.u8     {d8}, [r0@64], r1              ; p2
-    vld1.u8     {d9}, [r12@64], r1              ; p2
-    vld1.u8     {d10}, [r0@64], r1             ; p1
-    vld1.u8     {d11}, [r12@64], r1             ; p1
-    vld1.u8     {d12}, [r0@64], r1             ; p0
-    vld1.u8     {d13}, [r12@64], r1             ; p0
-    vld1.u8     {d14}, [r0@64], r1             ; q0
-    vld1.u8     {d15}, [r12@64], r1             ; q0
-    vld1.u8     {d16}, [r0@64], r1             ; q1
-    vld1.u8     {d17}, [r12@64], r1             ; q1
-    vld1.u8     {d18}, [r0@64], r1             ; q2
-    vld1.u8     {d19}, [r12@64], r1             ; q2
-    vld1.u8     {d20}, [r0@64], r1             ; q3
-    vld1.u8     {d21}, [r12@64], r1             ; q3
-
-    bl          vp8_mbloop_filter_neon
-
-    sub         r0, r0, r1, lsl #3
-    sub         r12, r12, r1, lsl #3
-
-    add         r0, r0, r1
-    add         r12, r12, r1
-
-    vst1.u8     {d8}, [r0@64], r1              ; store u op2
-    vst1.u8     {d9}, [r12@64], r1              ; store v op2
-    vst1.u8     {d10}, [r0@64], r1             ; store u op1
-    vst1.u8     {d11}, [r12@64], r1             ; store v op1
-    vst1.u8     {d12}, [r0@64], r1             ; store u op0
-    vst1.u8     {d13}, [r12@64], r1             ; store v op0
-    vst1.u8     {d14}, [r0@64], r1             ; store u oq0
-    vst1.u8     {d15}, [r12@64], r1             ; store v oq0
-    vst1.u8     {d16}, [r0@64], r1             ; store u oq1
-    vst1.u8     {d17}, [r12@64], r1             ; store v oq1
-    vst1.u8     {d18}, [r0@64], r1             ; store u oq2
-    vst1.u8     {d19}, [r12@64], r1             ; store v oq2
-
-    vpop        {d8-d15}
-    pop         {pc}
-    ENDP        ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
-
-; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
-;                                             const unsigned char *blimit,
-;                                             const unsigned char *limit,
-;                                             const unsigned char *thresh)
-; r0    unsigned char *src,
-; r1    int pitch,
-; r2    unsigned char blimit
-; r3    unsigned char limit
-; sp    unsigned char thresh,
-|vp8_mbloop_filter_vertical_edge_y_neon| PROC
-    push        {lr}
-    vpush       {d8-d15}
-
-    ldr         r12, [sp, #68]              ; load thresh
-    sub         r0, r0, #4                  ; move src pointer down by 4 columns
-    vdup.s8     q2, r12                     ; thresh
-    add         r12, r0, r1, lsl #3         ; move src pointer down by 8 lines
-
-    vld1.u8     {d6}, [r0], r1              ; load first 8-line src data
-    vld1.u8     {d7}, [r12], r1             ; load second 8-line src data
-    vld1.u8     {d8}, [r0], r1
-    vld1.u8     {d9}, [r12], r1
-    vld1.u8     {d10}, [r0], r1
-    vld1.u8     {d11}, [r12], r1
-    vld1.u8     {d12}, [r0], r1
-    vld1.u8     {d13}, [r12], r1
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d15}, [r12], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d17}, [r12], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d19}, [r12], r1
-    vld1.u8     {d20}, [r0], r1
-    vld1.u8     {d21}, [r12], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    sub         r0, r0, r1, lsl #3
-
-    bl          vp8_mbloop_filter_neon
-
-    sub         r12, r12, r1, lsl #3
-
-    ;transpose to 16x8 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    ;store op2, op1, op0, oq0, oq1, oq2
-    vst1.8      {d6}, [r0], r1
-    vst1.8      {d7}, [r12], r1
-    vst1.8      {d8}, [r0], r1
-    vst1.8      {d9}, [r12], r1
-    vst1.8      {d10}, [r0], r1
-    vst1.8      {d11}, [r12], r1
-    vst1.8      {d12}, [r0], r1
-    vst1.8      {d13}, [r12], r1
-    vst1.8      {d14}, [r0], r1
-    vst1.8      {d15}, [r12], r1
-    vst1.8      {d16}, [r0], r1
-    vst1.8      {d17}, [r12], r1
-    vst1.8      {d18}, [r0], r1
-    vst1.8      {d19}, [r12], r1
-    vst1.8      {d20}, [r0]
-    vst1.8      {d21}, [r12]
-
-    vpop        {d8-d15}
-    pop         {pc}
-    ENDP        ; |vp8_mbloop_filter_vertical_edge_y_neon|
-
-; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
-;                                              const unsigned char *blimit,
-;                                              const unsigned char *limit,
-;                                              const unsigned char *thresh,
-;                                              unsigned char *v)
-; r0    unsigned char *u,
-; r1    int pitch,
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; sp    const signed char *thresh,
-; sp+4  unsigned char *v
-|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
-    push        {lr}
-    vpush       {d8-d15}
-
-    ldr         r12, [sp, #68]              ; load thresh
-    sub         r0, r0, #4                  ; move u pointer down by 4 columns
-    vdup.u8     q2, r12                     ; thresh
-    ldr         r12, [sp, #72]              ; load v ptr
-    sub         r12, r12, #4                ; move v pointer down by 4 columns
-
-    vld1.u8     {d6}, [r0], r1              ;load u data
-    vld1.u8     {d7}, [r12], r1             ;load v data
-    vld1.u8     {d8}, [r0], r1
-    vld1.u8     {d9}, [r12], r1
-    vld1.u8     {d10}, [r0], r1
-    vld1.u8     {d11}, [r12], r1
-    vld1.u8     {d12}, [r0], r1
-    vld1.u8     {d13}, [r12], r1
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d15}, [r12], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d17}, [r12], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d19}, [r12], r1
-    vld1.u8     {d20}, [r0], r1
-    vld1.u8     {d21}, [r12], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    sub         r0, r0, r1, lsl #3
-
-    bl          vp8_mbloop_filter_neon
-
-    sub         r12, r12, r1, lsl #3
-
-    ;transpose to 16x8 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    ;store op2, op1, op0, oq0, oq1, oq2
-    vst1.8      {d6}, [r0], r1
-    vst1.8      {d7}, [r12], r1
-    vst1.8      {d8}, [r0], r1
-    vst1.8      {d9}, [r12], r1
-    vst1.8      {d10}, [r0], r1
-    vst1.8      {d11}, [r12], r1
-    vst1.8      {d12}, [r0], r1
-    vst1.8      {d13}, [r12], r1
-    vst1.8      {d14}, [r0], r1
-    vst1.8      {d15}, [r12], r1
-    vst1.8      {d16}, [r0], r1
-    vst1.8      {d17}, [r12], r1
-    vst1.8      {d18}, [r0], r1
-    vst1.8      {d19}, [r12], r1
-    vst1.8      {d20}, [r0]
-    vst1.8      {d21}, [r12]
-
-    vpop        {d8-d15}
-    pop         {pc}
-    ENDP        ; |vp8_mbloop_filter_vertical_edge_uv_neon|
-
-; void vp8_mbloop_filter_neon()
-; This is a helper function for the macroblock loopfilters. The individual
-; functions do the necessary load, transpose (if necessary), preserve (if
-; necessary) and store.
-
-; r0,r1 PRESERVE
-; r2    mblimit
-; r3    limit
-
-; q2    thresh
-; q3    p3 PRESERVE
-; q4    p2
-; q5    p1
-; q6    p0
-; q7    q0
-; q8    q1
-; q9    q2
-; q10   q3 PRESERVE
-
-|vp8_mbloop_filter_neon| PROC
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q1, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q1, q1, q0
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh) * -1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh) * -1
-    vmax.u8     q15, q15, q1
-
-    vdup.u8     q1, r3                      ; limit
-    vdup.u8     q2, r2                      ; mblimit
-
-    vmov.u8     q0, #0x80                   ; 0x80
-
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q1, q5, q8                  ; a = abs(p1 - q1)
-    vqadd.u8    q12, q12, q12               ; b = abs(p0 - q0) * 2
-    vmov.u16    q11, #3                     ; #3
-
-    ; vp8_filter
-    ; convert to signed
-    veor        q7, q7, q0                  ; qs0
-    vshr.u8     q1, q1, #1                  ; a = a / 2
-    veor        q6, q6, q0                  ; ps0
-    veor        q5, q5, q0                  ; ps1
-
-    vqadd.u8    q12, q12, q1                ; a = b + a
-
-    veor        q8, q8, q0                  ; qs1
-    veor        q4, q4, q0                  ; ps2
-    veor        q9, q9, q0                  ; qs2
-
-    vorr        q14, q13, q14               ; vp8_hevmask
-
-    vcge.u8     q12, q2, q12                ; (a > flimit * 2 + limit) * -1
-
-    vsubl.s8    q2, d14, d12                ; qs0 - ps0
-    vsubl.s8    q13, d15, d13
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = clamp(ps1-qs1)
-
-    vmul.i16    q2, q2, q11                 ; 3 * ( qs0 - ps0)
-
-    vand        q15, q15, q12               ; vp8_filter_mask
-
-    vmul.i16    q13, q13, q11
-
-    vmov.u8     q12, #3                     ; #3
-
-    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q13, q13, d3
-
-    vmov.u8     q11, #4                     ; #4
-
-    ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d2, q2
-    vqmovn.s16  d3, q13
-
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vmov.u16    q15, #63                    ; #63
-
-    vand        q13, q1, q14                ; Filter2 &= hev
-
-    vqadd.s8    q2, q13, q11                ; Filter1 = clamp(Filter2+4)
-    vqadd.s8    q13, q13, q12               ; Filter2 = clamp(Filter2+3)
-
-    vmov        q0, q15
-
-    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
-    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-
-    vmov        q11, q15
-    vmov        q12, q15
-
-    vqsub.s8    q7, q7, q2                  ; qs0 = clamp(qs0 - Filter1)
-
-    vqadd.s8    q6, q6, q13                 ; ps0 = clamp(ps0 + Filter2)
-
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-
-    ; roughly 1/7th difference across boundary
-    ; roughly 2/7th difference across boundary
-    ; roughly 3/7th difference across boundary
-
-    vmov.u8     d5, #9                      ; #9
-    vmov.u8     d4, #18                     ; #18
-
-    vmov        q13, q15
-    vmov        q14, q15
-
-    vmlal.s8    q0, d2, d5                  ; 63 + Filter2 * 9
-    vmlal.s8    q11, d3, d5
-    vmov.u8     d5, #27                     ; #27
-    vmlal.s8    q12, d2, d4                 ; 63 + Filter2 * 18
-    vmlal.s8    q13, d3, d4
-    vmlal.s8    q14, d2, d5                 ; 63 + Filter2 * 27
-    vmlal.s8    q15, d3, d5
-
-    vqshrn.s16  d0, q0, #7                  ; u = clamp((63 + Filter2 * 9)>>7)
-    vqshrn.s16  d1, q11, #7
-    vqshrn.s16  d24, q12, #7                ; u = clamp((63 + Filter2 * 18)>>7)
-    vqshrn.s16  d25, q13, #7
-    vqshrn.s16  d28, q14, #7                ; u = clamp((63 + Filter2 * 27)>>7)
-    vqshrn.s16  d29, q15, #7
-
-    vmov.u8     q1, #0x80                   ; 0x80
-
-    vqsub.s8    q11, q9, q0                 ; s = clamp(qs2 - u)
-    vqadd.s8    q0, q4, q0                  ; s = clamp(ps2 + u)
-    vqsub.s8    q13, q8, q12                ; s = clamp(qs1 - u)
-    vqadd.s8    q12, q5, q12                ; s = clamp(ps1 + u)
-    vqsub.s8    q15, q7, q14                ; s = clamp(qs0 - u)
-    vqadd.s8    q14, q6, q14                ; s = clamp(ps0 + u)
-
-    veor        q9, q11, q1                 ; *oq2 = s^0x80
-    veor        q4, q0, q1                  ; *op2 = s^0x80
-    veor        q8, q13, q1                 ; *oq1 = s^0x80
-    veor        q5, q12, q1                 ; *op2 = s^0x80
-    veor        q7, q15, q1                 ; *oq0 = s^0x80
-    veor        q6, q14, q1                 ; *op0 = s^0x80
-
-    bx          lr
-    ENDP        ; |vp8_mbloop_filter_neon|
-
-;-----------------
-
-    END
diff --git a/vp8/common/arm/neon/mbloopfilter_neon.c b/vp8/common/arm/neon/mbloopfilter_neon.c
new file mode 100644
index 0000000..5351f4b
--- /dev/null
+++ b/vp8/common/arm/neon/mbloopfilter_neon.c
@@ -0,0 +1,625 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vpx_config.h"
+
+static INLINE void vp8_mbloop_filter_neon(
+        uint8x16_t qblimit,  // mblimit
+        uint8x16_t qlimit,   // limit
+        uint8x16_t qthresh,  // thresh
+        uint8x16_t q3,       // p2
+        uint8x16_t q4,       // p2
+        uint8x16_t q5,       // p1
+        uint8x16_t q6,       // p0
+        uint8x16_t q7,       // q0
+        uint8x16_t q8,       // q1
+        uint8x16_t q9,       // q2
+        uint8x16_t q10,      // q3
+        uint8x16_t *q4r,     // p1
+        uint8x16_t *q5r,     // p1
+        uint8x16_t *q6r,     // p0
+        uint8x16_t *q7r,     // q0
+        uint8x16_t *q8r,     // q1
+        uint8x16_t *q9r) {   // q1
+    uint8x16_t q0u8, q1u8, q11u8, q12u8, q13u8, q14u8, q15u8;
+    int16x8_t q0s16, q2s16, q11s16, q12s16, q13s16, q14s16, q15s16;
+    int8x16_t q1s8, q6s8, q7s8, q2s8, q11s8, q13s8;
+    uint16x8_t q0u16, q11u16, q12u16, q13u16, q14u16, q15u16;
+    int8x16_t q0s8, q12s8, q14s8, q15s8;
+    int8x8_t d0, d1, d2, d3, d4, d5, d24, d25, d28, d29;
+
+    q11u8 = vabdq_u8(q3, q4);
+    q12u8 = vabdq_u8(q4, q5);
+    q13u8 = vabdq_u8(q5, q6);
+    q14u8 = vabdq_u8(q8, q7);
+    q1u8  = vabdq_u8(q9, q8);
+    q0u8  = vabdq_u8(q10, q9);
+
+    q11u8 = vmaxq_u8(q11u8, q12u8);
+    q12u8 = vmaxq_u8(q13u8, q14u8);
+    q1u8  = vmaxq_u8(q1u8, q0u8);
+    q15u8 = vmaxq_u8(q11u8, q12u8);
+
+    q12u8 = vabdq_u8(q6, q7);
+
+    // vp8_hevmask
+    q13u8 = vcgtq_u8(q13u8, qthresh);
+    q14u8 = vcgtq_u8(q14u8, qthresh);
+    q15u8 = vmaxq_u8(q15u8, q1u8);
+
+    q15u8 = vcgeq_u8(qlimit, q15u8);
+
+    q1u8 = vabdq_u8(q5, q8);
+    q12u8 = vqaddq_u8(q12u8, q12u8);
+
+    // vp8_filter() function
+    // convert to signed
+    q0u8 = vdupq_n_u8(0x80);
+    q9 = veorq_u8(q9, q0u8);
+    q8 = veorq_u8(q8, q0u8);
+    q7 = veorq_u8(q7, q0u8);
+    q6 = veorq_u8(q6, q0u8);
+    q5 = veorq_u8(q5, q0u8);
+    q4 = veorq_u8(q4, q0u8);
+
+    q1u8 = vshrq_n_u8(q1u8, 1);
+    q12u8 = vqaddq_u8(q12u8, q1u8);
+
+    q14u8 = vorrq_u8(q13u8, q14u8);
+    q12u8 = vcgeq_u8(qblimit, q12u8);
+
+    q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
+                     vget_low_s8(vreinterpretq_s8_u8(q6)));
+    q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
+                      vget_high_s8(vreinterpretq_s8_u8(q6)));
+
+    q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
+                     vreinterpretq_s8_u8(q8));
+
+    q11s16 = vdupq_n_s16(3);
+    q2s16  = vmulq_s16(q2s16, q11s16);
+    q13s16 = vmulq_s16(q13s16, q11s16);
+
+    q15u8 = vandq_u8(q15u8, q12u8);
+
+    q2s16  = vaddw_s8(q2s16, vget_low_s8(q1s8));
+    q13s16 = vaddw_s8(q13s16, vget_high_s8(q1s8));
+
+    q12u8 = vdupq_n_u8(3);
+    q11u8 = vdupq_n_u8(4);
+    // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+    d2 = vqmovn_s16(q2s16);
+    d3 = vqmovn_s16(q13s16);
+    q1s8 = vcombine_s8(d2, d3);
+    q1s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q15u8));
+    q13s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
+
+    q2s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q11u8));
+    q13s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q12u8));
+    q2s8 = vshrq_n_s8(q2s8, 3);
+    q13s8 = vshrq_n_s8(q13s8, 3);
+
+    q7s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q2s8);
+    q6s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q13s8);
+
+    q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
+
+    q0u16 = q11u16 = q12u16 = q13u16 = q14u16 = q15u16 = vdupq_n_u16(63);
+    d5 = vdup_n_s8(9);
+    d4 = vdup_n_s8(18);
+
+    q0s16  = vmlal_s8(vreinterpretq_s16_u16(q0u16),  vget_low_s8(q1s8),  d5);
+    q11s16 = vmlal_s8(vreinterpretq_s16_u16(q11u16), vget_high_s8(q1s8), d5);
+    d5 = vdup_n_s8(27);
+    q12s16 = vmlal_s8(vreinterpretq_s16_u16(q12u16), vget_low_s8(q1s8),  d4);
+    q13s16 = vmlal_s8(vreinterpretq_s16_u16(q13u16), vget_high_s8(q1s8), d4);
+    q14s16 = vmlal_s8(vreinterpretq_s16_u16(q14u16), vget_low_s8(q1s8),  d5);
+    q15s16 = vmlal_s8(vreinterpretq_s16_u16(q15u16), vget_high_s8(q1s8), d5);
+
+    d0  = vqshrn_n_s16(q0s16 , 7);
+    d1  = vqshrn_n_s16(q11s16, 7);
+    d24 = vqshrn_n_s16(q12s16, 7);
+    d25 = vqshrn_n_s16(q13s16, 7);
+    d28 = vqshrn_n_s16(q14s16, 7);
+    d29 = vqshrn_n_s16(q15s16, 7);
+
+    q0s8  = vcombine_s8(d0, d1);
+    q12s8 = vcombine_s8(d24, d25);
+    q14s8 = vcombine_s8(d28, d29);
+
+    q11s8 = vqsubq_s8(vreinterpretq_s8_u8(q9), q0s8);
+    q0s8  = vqaddq_s8(vreinterpretq_s8_u8(q4), q0s8);
+    q13s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q12s8);
+    q12s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q12s8);
+    q15s8 = vqsubq_s8((q7s8), q14s8);
+    q14s8 = vqaddq_s8((q6s8), q14s8);
+
+    q1u8 = vdupq_n_u8(0x80);
+    *q9r = veorq_u8(vreinterpretq_u8_s8(q11s8), q1u8);
+    *q8r = veorq_u8(vreinterpretq_u8_s8(q13s8), q1u8);
+    *q7r = veorq_u8(vreinterpretq_u8_s8(q15s8), q1u8);
+    *q6r = veorq_u8(vreinterpretq_u8_s8(q14s8), q1u8);
+    *q5r = veorq_u8(vreinterpretq_u8_s8(q12s8), q1u8);
+    *q4r = veorq_u8(vreinterpretq_u8_s8(q0s8), q1u8);
+    return;
+}
+
+void vp8_mbloop_filter_horizontal_edge_y_neon(
+        unsigned char *src,
+        int pitch,
+        unsigned char blimit,
+        unsigned char limit,
+        unsigned char thresh) {
+    uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+    uint8x16_t q5, q6, q7, q8, q9, q10;
+
+    qblimit = vdupq_n_u8(blimit);
+    qlimit = vdupq_n_u8(limit);
+    qthresh = vdupq_n_u8(thresh);
+
+    src -= (pitch << 2);
+
+    q3 = vld1q_u8(src);
+    src += pitch;
+    q4 = vld1q_u8(src);
+    src += pitch;
+    q5 = vld1q_u8(src);
+    src += pitch;
+    q6 = vld1q_u8(src);
+    src += pitch;
+    q7 = vld1q_u8(src);
+    src += pitch;
+    q8 = vld1q_u8(src);
+    src += pitch;
+    q9 = vld1q_u8(src);
+    src += pitch;
+    q10 = vld1q_u8(src);
+
+    vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+                         q5, q6, q7, q8, q9, q10,
+                         &q4, &q5, &q6, &q7, &q8, &q9);
+
+    src -= (pitch * 6);
+    vst1q_u8(src, q4);
+    src += pitch;
+    vst1q_u8(src, q5);
+    src += pitch;
+    vst1q_u8(src, q6);
+    src += pitch;
+    vst1q_u8(src, q7);
+    src += pitch;
+    vst1q_u8(src, q8);
+    src += pitch;
+    vst1q_u8(src, q9);
+    return;
+}
+
+void vp8_mbloop_filter_horizontal_edge_uv_neon(
+        unsigned char *u,
+        int pitch,
+        unsigned char blimit,
+        unsigned char limit,
+        unsigned char thresh,
+        unsigned char *v) {
+    uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+    uint8x16_t q5, q6, q7, q8, q9, q10;
+    uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+    uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+
+    qblimit = vdupq_n_u8(blimit);
+    qlimit = vdupq_n_u8(limit);
+    qthresh = vdupq_n_u8(thresh);
+
+    u -= (pitch << 2);
+    v -= (pitch << 2);
+
+    d6 = vld1_u8(u);
+    u += pitch;
+    d7 = vld1_u8(v);
+    v += pitch;
+    d8 = vld1_u8(u);
+    u += pitch;
+    d9 = vld1_u8(v);
+    v += pitch;
+    d10 = vld1_u8(u);
+    u += pitch;
+    d11 = vld1_u8(v);
+    v += pitch;
+    d12 = vld1_u8(u);
+    u += pitch;
+    d13 = vld1_u8(v);
+    v += pitch;
+    d14 = vld1_u8(u);
+    u += pitch;
+    d15 = vld1_u8(v);
+    v += pitch;
+    d16 = vld1_u8(u);
+    u += pitch;
+    d17 = vld1_u8(v);
+    v += pitch;
+    d18 = vld1_u8(u);
+    u += pitch;
+    d19 = vld1_u8(v);
+    v += pitch;
+    d20 = vld1_u8(u);
+    d21 = vld1_u8(v);
+
+    q3 = vcombine_u8(d6, d7);
+    q4 = vcombine_u8(d8, d9);
+    q5 = vcombine_u8(d10, d11);
+    q6 = vcombine_u8(d12, d13);
+    q7 = vcombine_u8(d14, d15);
+    q8 = vcombine_u8(d16, d17);
+    q9 = vcombine_u8(d18, d19);
+    q10 = vcombine_u8(d20, d21);
+
+    vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+                         q5, q6, q7, q8, q9, q10,
+                         &q4, &q5, &q6, &q7, &q8, &q9);
+
+    u -= (pitch * 6);
+    v -= (pitch * 6);
+    vst1_u8(u, vget_low_u8(q4));
+    u += pitch;
+    vst1_u8(v, vget_high_u8(q4));
+    v += pitch;
+    vst1_u8(u, vget_low_u8(q5));
+    u += pitch;
+    vst1_u8(v, vget_high_u8(q5));
+    v += pitch;
+    vst1_u8(u, vget_low_u8(q6));
+    u += pitch;
+    vst1_u8(v, vget_high_u8(q6));
+    v += pitch;
+    vst1_u8(u, vget_low_u8(q7));
+    u += pitch;
+    vst1_u8(v, vget_high_u8(q7));
+    v += pitch;
+    vst1_u8(u, vget_low_u8(q8));
+    u += pitch;
+    vst1_u8(v, vget_high_u8(q8));
+    v += pitch;
+    vst1_u8(u, vget_low_u8(q9));
+    vst1_u8(v, vget_high_u8(q9));
+    return;
+}
+
+void vp8_mbloop_filter_vertical_edge_y_neon(
+        unsigned char *src,
+        int pitch,
+        unsigned char blimit,
+        unsigned char limit,
+        unsigned char thresh) {
+    unsigned char *s1, *s2;
+    uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+    uint8x16_t q5, q6, q7, q8, q9, q10;
+    uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+    uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+    uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
+    uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
+    uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
+
+    qblimit = vdupq_n_u8(blimit);
+    qlimit = vdupq_n_u8(limit);
+    qthresh = vdupq_n_u8(thresh);
+
+    s1 = src - 4;
+    s2 = s1 + 8 * pitch;
+    d6  = vld1_u8(s1);
+    s1 += pitch;
+    d7  = vld1_u8(s2);
+    s2 += pitch;
+    d8  = vld1_u8(s1);
+    s1 += pitch;
+    d9  = vld1_u8(s2);
+    s2 += pitch;
+    d10 = vld1_u8(s1);
+    s1 += pitch;
+    d11 = vld1_u8(s2);
+    s2 += pitch;
+    d12 = vld1_u8(s1);
+    s1 += pitch;
+    d13 = vld1_u8(s2);
+    s2 += pitch;
+    d14 = vld1_u8(s1);
+    s1 += pitch;
+    d15 = vld1_u8(s2);
+    s2 += pitch;
+    d16 = vld1_u8(s1);
+    s1 += pitch;
+    d17 = vld1_u8(s2);
+    s2 += pitch;
+    d18 = vld1_u8(s1);
+    s1 += pitch;
+    d19 = vld1_u8(s2);
+    s2 += pitch;
+    d20 = vld1_u8(s1);
+    d21 = vld1_u8(s2);
+
+    q3 = vcombine_u8(d6, d7);
+    q4 = vcombine_u8(d8, d9);
+    q5 = vcombine_u8(d10, d11);
+    q6 = vcombine_u8(d12, d13);
+    q7 = vcombine_u8(d14, d15);
+    q8 = vcombine_u8(d16, d17);
+    q9 = vcombine_u8(d18, d19);
+    q10 = vcombine_u8(d20, d21);
+
+    q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+    q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+    q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+    q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+    q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+                       vreinterpretq_u16_u32(q2tmp2.val[0]));
+    q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+                       vreinterpretq_u16_u32(q2tmp3.val[0]));
+    q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+                       vreinterpretq_u16_u32(q2tmp2.val[1]));
+    q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+                       vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+    q2tmp8  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+                       vreinterpretq_u8_u16(q2tmp5.val[0]));
+    q2tmp9  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+                       vreinterpretq_u8_u16(q2tmp5.val[1]));
+    q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+                       vreinterpretq_u8_u16(q2tmp7.val[0]));
+    q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+                       vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+    q3 = q2tmp8.val[0];
+    q4 = q2tmp8.val[1];
+    q5 = q2tmp9.val[0];
+    q6 = q2tmp9.val[1];
+    q7 = q2tmp10.val[0];
+    q8 = q2tmp10.val[1];
+    q9 = q2tmp11.val[0];
+    q10 = q2tmp11.val[1];
+
+    vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+                         q5, q6, q7, q8, q9, q10,
+                         &q4, &q5, &q6, &q7, &q8, &q9);
+
+    q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+    q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+    q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+    q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+    q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+                       vreinterpretq_u16_u32(q2tmp2.val[0]));
+    q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+                       vreinterpretq_u16_u32(q2tmp3.val[0]));
+    q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+                       vreinterpretq_u16_u32(q2tmp2.val[1]));
+    q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+                       vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+    q2tmp8  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+                       vreinterpretq_u8_u16(q2tmp5.val[0]));
+    q2tmp9  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+                       vreinterpretq_u8_u16(q2tmp5.val[1]));
+    q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+                       vreinterpretq_u8_u16(q2tmp7.val[0]));
+    q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+                       vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+    q3 = q2tmp8.val[0];
+    q4 = q2tmp8.val[1];
+    q5 = q2tmp9.val[0];
+    q6 = q2tmp9.val[1];
+    q7 = q2tmp10.val[0];
+    q8 = q2tmp10.val[1];
+    q9 = q2tmp11.val[0];
+    q10 = q2tmp11.val[1];
+
+    s1 -= 7 * pitch;
+    s2 -= 7 * pitch;
+
+    vst1_u8(s1, vget_low_u8(q3));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q3));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q4));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q4));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q5));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q5));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q6));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q6));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q7));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q7));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q8));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q8));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q9));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q9));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q10));
+    vst1_u8(s2, vget_high_u8(q10));
+    return;
+}
+
+void vp8_mbloop_filter_vertical_edge_uv_neon(
+        unsigned char *u,
+        int pitch,
+        unsigned char blimit,
+        unsigned char limit,
+        unsigned char thresh,
+        unsigned char *v) {
+    unsigned char *us, *ud;
+    unsigned char *vs, *vd;
+    uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+    uint8x16_t q5, q6, q7, q8, q9, q10;
+    uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+    uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+    uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
+    uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
+    uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
+
+    qblimit = vdupq_n_u8(blimit);
+    qlimit = vdupq_n_u8(limit);
+    qthresh = vdupq_n_u8(thresh);
+
+    us = u - 4;
+    vs = v - 4;
+    d6 = vld1_u8(us);
+    us += pitch;
+    d7 = vld1_u8(vs);
+    vs += pitch;
+    d8 = vld1_u8(us);
+    us += pitch;
+    d9 = vld1_u8(vs);
+    vs += pitch;
+    d10 = vld1_u8(us);
+    us += pitch;
+    d11 = vld1_u8(vs);
+    vs += pitch;
+    d12 = vld1_u8(us);
+    us += pitch;
+    d13 = vld1_u8(vs);
+    vs += pitch;
+    d14 = vld1_u8(us);
+    us += pitch;
+    d15 = vld1_u8(vs);
+    vs += pitch;
+    d16 = vld1_u8(us);
+    us += pitch;
+    d17 = vld1_u8(vs);
+    vs += pitch;
+    d18 = vld1_u8(us);
+    us += pitch;
+    d19 = vld1_u8(vs);
+    vs += pitch;
+    d20 = vld1_u8(us);
+    d21 = vld1_u8(vs);
+
+    q3 = vcombine_u8(d6, d7);
+    q4 = vcombine_u8(d8, d9);
+    q5 = vcombine_u8(d10, d11);
+    q6 = vcombine_u8(d12, d13);
+    q7 = vcombine_u8(d14, d15);
+    q8 = vcombine_u8(d16, d17);
+    q9 = vcombine_u8(d18, d19);
+    q10 = vcombine_u8(d20, d21);
+
+    q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+    q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+    q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+    q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+    q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+                       vreinterpretq_u16_u32(q2tmp2.val[0]));
+    q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+                       vreinterpretq_u16_u32(q2tmp3.val[0]));
+    q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+                       vreinterpretq_u16_u32(q2tmp2.val[1]));
+    q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+                       vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+    q2tmp8  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+                       vreinterpretq_u8_u16(q2tmp5.val[0]));
+    q2tmp9  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+                       vreinterpretq_u8_u16(q2tmp5.val[1]));
+    q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+                       vreinterpretq_u8_u16(q2tmp7.val[0]));
+    q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+                       vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+    q3 = q2tmp8.val[0];
+    q4 = q2tmp8.val[1];
+    q5 = q2tmp9.val[0];
+    q6 = q2tmp9.val[1];
+    q7 = q2tmp10.val[0];
+    q8 = q2tmp10.val[1];
+    q9 = q2tmp11.val[0];
+    q10 = q2tmp11.val[1];
+
+    vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+                         q5, q6, q7, q8, q9, q10,
+                         &q4, &q5, &q6, &q7, &q8, &q9);
+
+    q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+    q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+    q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+    q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+    q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+                       vreinterpretq_u16_u32(q2tmp2.val[0]));
+    q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+                       vreinterpretq_u16_u32(q2tmp3.val[0]));
+    q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+                       vreinterpretq_u16_u32(q2tmp2.val[1]));
+    q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+                       vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+    q2tmp8  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+                       vreinterpretq_u8_u16(q2tmp5.val[0]));
+    q2tmp9  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+                       vreinterpretq_u8_u16(q2tmp5.val[1]));
+    q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+                       vreinterpretq_u8_u16(q2tmp7.val[0]));
+    q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+                       vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+    q3 = q2tmp8.val[0];
+    q4 = q2tmp8.val[1];
+    q5 = q2tmp9.val[0];
+    q6 = q2tmp9.val[1];
+    q7 = q2tmp10.val[0];
+    q8 = q2tmp10.val[1];
+    q9 = q2tmp11.val[0];
+    q10 = q2tmp11.val[1];
+
+    ud = u - 4;
+    vst1_u8(ud, vget_low_u8(q3));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q4));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q5));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q6));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q7));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q8));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q9));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q10));
+
+    vd = v - 4;
+    vst1_u8(vd, vget_high_u8(q3));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q4));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q5));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q6));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q7));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q8));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q9));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q10));
+    return;
+}
diff --git a/vp8/common/arm/neon/sad16_neon.asm b/vp8/common/arm/neon/sad16_neon.asm
deleted file mode 100644
index 7197e56..0000000
--- a/vp8/common/arm/neon/sad16_neon.asm
+++ /dev/null
@@ -1,212 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_sad16x16_neon|
-    EXPORT  |vp8_sad16x8_neon|
-
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0    unsigned char *src_ptr
-; r1    int  src_stride
-; r2    unsigned char *ref_ptr
-; r3    int  ref_stride
-|vp8_sad16x16_neon| PROC
-;;
-    vpush           {d8-d15}
-    vld1.8          {q0}, [r0], r1
-    vld1.8          {q4}, [r2], r3
-
-    vld1.8          {q1}, [r0], r1
-    vld1.8          {q5}, [r2], r3
-
-    vabdl.u8        q12, d0, d8
-    vabdl.u8        q13, d1, d9
-
-    vld1.8          {q2}, [r0], r1
-    vld1.8          {q6}, [r2], r3
-
-    vabal.u8        q12, d2, d10
-    vabal.u8        q13, d3, d11
-
-    vld1.8          {q3}, [r0], r1
-    vld1.8          {q7}, [r2], r3
-
-    vabal.u8        q12, d4, d12
-    vabal.u8        q13, d5, d13
-
-;;
-    vld1.8          {q0}, [r0], r1
-    vld1.8          {q4}, [r2], r3
-
-    vabal.u8        q12, d6, d14
-    vabal.u8        q13, d7, d15
-
-    vld1.8          {q1}, [r0], r1
-    vld1.8          {q5}, [r2], r3
-
-    vabal.u8        q12, d0, d8
-    vabal.u8        q13, d1, d9
-
-    vld1.8          {q2}, [r0], r1
-    vld1.8          {q6}, [r2], r3
-
-    vabal.u8        q12, d2, d10
-    vabal.u8        q13, d3, d11
-
-    vld1.8          {q3}, [r0], r1
-    vld1.8          {q7}, [r2], r3
-
-    vabal.u8        q12, d4, d12
-    vabal.u8        q13, d5, d13
-
-;;
-    vld1.8          {q0}, [r0], r1
-    vld1.8          {q4}, [r2], r3
-
-    vabal.u8        q12, d6, d14
-    vabal.u8        q13, d7, d15
-
-    vld1.8          {q1}, [r0], r1
-    vld1.8          {q5}, [r2], r3
-
-    vabal.u8        q12, d0, d8
-    vabal.u8        q13, d1, d9
-
-    vld1.8          {q2}, [r0], r1
-    vld1.8          {q6}, [r2], r3
-
-    vabal.u8        q12, d2, d10
-    vabal.u8        q13, d3, d11
-
-    vld1.8          {q3}, [r0], r1
-    vld1.8          {q7}, [r2], r3
-
-    vabal.u8        q12, d4, d12
-    vabal.u8        q13, d5, d13
-
-;;
-    vld1.8          {q0}, [r0], r1
-    vld1.8          {q4}, [r2], r3
-
-    vabal.u8        q12, d6, d14
-    vabal.u8        q13, d7, d15
-
-    vld1.8          {q1}, [r0], r1
-    vld1.8          {q5}, [r2], r3
-
-    vabal.u8        q12, d0, d8
-    vabal.u8        q13, d1, d9
-
-    vld1.8          {q2}, [r0], r1
-    vld1.8          {q6}, [r2], r3
-
-    vabal.u8        q12, d2, d10
-    vabal.u8        q13, d3, d11
-
-    vld1.8          {q3}, [r0]
-    vld1.8          {q7}, [r2]
-
-    vabal.u8        q12, d4, d12
-    vabal.u8        q13, d5, d13
-
-    vabal.u8        q12, d6, d14
-    vabal.u8        q13, d7, d15
-
-    vadd.u16        q0, q12, q13
-
-    vpaddl.u16      q1, q0
-    vpaddl.u32      q0, q1
-
-    vadd.u32        d0, d0, d1
-
-    vmov.32         r0, d0[0]
-
-    vpop            {d8-d15}
-    bx              lr
-
-    ENDP
-
-;==============================
-;unsigned int vp8_sad16x8_c(
-;    unsigned char *src_ptr,
-;    int  src_stride,
-;    unsigned char *ref_ptr,
-;    int  ref_stride)
-|vp8_sad16x8_neon| PROC
-    vpush           {d8-d15}
-
-    vld1.8          {q0}, [r0], r1
-    vld1.8          {q4}, [r2], r3
-
-    vld1.8          {q1}, [r0], r1
-    vld1.8          {q5}, [r2], r3
-
-    vabdl.u8        q12, d0, d8
-    vabdl.u8        q13, d1, d9
-
-    vld1.8          {q2}, [r0], r1
-    vld1.8          {q6}, [r2], r3
-
-    vabal.u8        q12, d2, d10
-    vabal.u8        q13, d3, d11
-
-    vld1.8          {q3}, [r0], r1
-    vld1.8          {q7}, [r2], r3
-
-    vabal.u8        q12, d4, d12
-    vabal.u8        q13, d5, d13
-
-    vld1.8          {q0}, [r0], r1
-    vld1.8          {q4}, [r2], r3
-
-    vabal.u8        q12, d6, d14
-    vabal.u8        q13, d7, d15
-
-    vld1.8          {q1}, [r0], r1
-    vld1.8          {q5}, [r2], r3
-
-    vabal.u8        q12, d0, d8
-    vabal.u8        q13, d1, d9
-
-    vld1.8          {q2}, [r0], r1
-    vld1.8          {q6}, [r2], r3
-
-    vabal.u8        q12, d2, d10
-    vabal.u8        q13, d3, d11
-
-    vld1.8          {q3}, [r0], r1
-    vld1.8          {q7}, [r2], r3
-
-    vabal.u8        q12, d4, d12
-    vabal.u8        q13, d5, d13
-
-    vabal.u8        q12, d6, d14
-    vabal.u8        q13, d7, d15
-
-    vadd.u16        q0, q12, q13
-
-    vpaddl.u16      q1, q0
-    vpaddl.u32      q0, q1
-
-    vadd.u32        d0, d0, d1
-
-    vmov.32         r0, d0[0]
-
-    vpop            {d8-d15}
-    bx              lr
-
-    ENDP
-
-    END
diff --git a/vp8/common/arm/neon/sad8_neon.asm b/vp8/common/arm/neon/sad8_neon.asm
deleted file mode 100644
index 6b849d9..0000000
--- a/vp8/common/arm/neon/sad8_neon.asm
+++ /dev/null
@@ -1,215 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_sad8x8_neon|
-    EXPORT  |vp8_sad8x16_neon|
-    EXPORT  |vp8_sad4x4_neon|
-
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-; unsigned int vp8_sad8x8_c(
-;    unsigned char *src_ptr,
-;    int  src_stride,
-;    unsigned char *ref_ptr,
-;    int  ref_stride)
-
-|vp8_sad8x8_neon| PROC
-    vpush           {d8-d15}
-    vld1.8          {d0}, [r0], r1
-    vld1.8          {d8}, [r2], r3
-
-    vld1.8          {d2}, [r0], r1
-    vld1.8          {d10}, [r2], r3
-
-    vabdl.u8        q12, d0, d8
-
-    vld1.8          {d4}, [r0], r1
-    vld1.8          {d12}, [r2], r3
-
-    vabal.u8        q12, d2, d10
-
-    vld1.8          {d6}, [r0], r1
-    vld1.8          {d14}, [r2], r3
-
-    vabal.u8        q12, d4, d12
-
-    vld1.8          {d0}, [r0], r1
-    vld1.8          {d8}, [r2], r3
-
-    vabal.u8        q12, d6, d14
-
-    vld1.8          {d2}, [r0], r1
-    vld1.8          {d10}, [r2], r3
-
-    vabal.u8        q12, d0, d8
-
-    vld1.8          {d4}, [r0], r1
-    vld1.8          {d12}, [r2], r3
-
-    vabal.u8        q12, d2, d10
-
-    vld1.8          {d6}, [r0], r1
-    vld1.8          {d14}, [r2], r3
-
-    vabal.u8        q12, d4, d12
-    vabal.u8        q12, d6, d14
-
-    vpaddl.u16      q1, q12
-    vpaddl.u32      q0, q1
-    vadd.u32        d0, d0, d1
-
-    vmov.32         r0, d0[0]
-
-    vpop            {d8-d15}
-    bx              lr
-
-    ENDP
-
-;============================
-;unsigned int vp8_sad8x16_c(
-;    unsigned char *src_ptr,
-;    int  src_stride,
-;    unsigned char *ref_ptr,
-;    int  ref_stride)
-
-|vp8_sad8x16_neon| PROC
-    vpush           {d8-d15}
-    vld1.8          {d0}, [r0], r1
-    vld1.8          {d8}, [r2], r3
-
-    vld1.8          {d2}, [r0], r1
-    vld1.8          {d10}, [r2], r3
-
-    vabdl.u8        q12, d0, d8
-
-    vld1.8          {d4}, [r0], r1
-    vld1.8          {d12}, [r2], r3
-
-    vabal.u8        q12, d2, d10
-
-    vld1.8          {d6}, [r0], r1
-    vld1.8          {d14}, [r2], r3
-
-    vabal.u8        q12, d4, d12
-
-    vld1.8          {d0}, [r0], r1
-    vld1.8          {d8}, [r2], r3
-
-    vabal.u8        q12, d6, d14
-
-    vld1.8          {d2}, [r0], r1
-    vld1.8          {d10}, [r2], r3
-
-    vabal.u8        q12, d0, d8
-
-    vld1.8          {d4}, [r0], r1
-    vld1.8          {d12}, [r2], r3
-
-    vabal.u8        q12, d2, d10
-
-    vld1.8          {d6}, [r0], r1
-    vld1.8          {d14}, [r2], r3
-
-    vabal.u8        q12, d4, d12
-
-    vld1.8          {d0}, [r0], r1
-    vld1.8          {d8}, [r2], r3
-
-    vabal.u8        q12, d6, d14
-
-    vld1.8          {d2}, [r0], r1
-    vld1.8          {d10}, [r2], r3
-
-    vabal.u8        q12, d0, d8
-
-    vld1.8          {d4}, [r0], r1
-    vld1.8          {d12}, [r2], r3
-
-    vabal.u8        q12, d2, d10
-
-    vld1.8          {d6}, [r0], r1
-    vld1.8          {d14}, [r2], r3
-
-    vabal.u8        q12, d4, d12
-
-    vld1.8          {d0}, [r0], r1
-    vld1.8          {d8}, [r2], r3
-
-    vabal.u8        q12, d6, d14
-
-    vld1.8          {d2}, [r0], r1
-    vld1.8          {d10}, [r2], r3
-
-    vabal.u8        q12, d0, d8
-
-    vld1.8          {d4}, [r0], r1
-    vld1.8          {d12}, [r2], r3
-
-    vabal.u8        q12, d2, d10
-
-    vld1.8          {d6}, [r0], r1
-    vld1.8          {d14}, [r2], r3
-
-    vabal.u8        q12, d4, d12
-    vabal.u8        q12, d6, d14
-
-    vpaddl.u16      q1, q12
-    vpaddl.u32      q0, q1
-    vadd.u32        d0, d0, d1
-
-    vmov.32         r0, d0[0]
-
-    vpop            {d8-d15}
-    bx              lr
-
-    ENDP
-
-;===========================
-;unsigned int vp8_sad4x4_c(
-;    unsigned char *src_ptr,
-;    int  src_stride,
-;    unsigned char *ref_ptr,
-;    int  ref_stride)
-
-|vp8_sad4x4_neon| PROC
-    vpush           {d8-d15}
-    vld1.8          {d0}, [r0], r1
-    vld1.8          {d8}, [r2], r3
-
-    vld1.8          {d2}, [r0], r1
-    vld1.8          {d10}, [r2], r3
-
-    vabdl.u8        q12, d0, d8
-
-    vld1.8          {d4}, [r0], r1
-    vld1.8          {d12}, [r2], r3
-
-    vabal.u8        q12, d2, d10
-
-    vld1.8          {d6}, [r0], r1
-    vld1.8          {d14}, [r2], r3
-
-    vabal.u8        q12, d4, d12
-    vabal.u8        q12, d6, d14
-
-    vpaddl.u16      d1, d24
-    vpaddl.u32      d0, d1
-    vmov.32         r0, d0[0]
-
-    vpop            {d8-d15}
-    bx              lr
-
-    ENDP
-
-    END
diff --git a/vp8/common/arm/neon/sad_neon.c b/vp8/common/arm/neon/sad_neon.c
new file mode 100644
index 0000000..6595ac0
--- /dev/null
+++ b/vp8/common/arm/neon/sad_neon.c
@@ -0,0 +1,184 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+unsigned int vp8_sad8x8_neon(
+        unsigned char *src_ptr,
+        int src_stride,
+        unsigned char *ref_ptr,
+        int ref_stride) {
+    uint8x8_t d0, d8;
+    uint16x8_t q12;
+    uint32x4_t q1;
+    uint64x2_t q3;
+    uint32x2_t d5;
+    int i;
+
+    d0 = vld1_u8(src_ptr);
+    src_ptr += src_stride;
+    d8 = vld1_u8(ref_ptr);
+    ref_ptr += ref_stride;
+    q12 = vabdl_u8(d0, d8);
+
+    for (i = 0; i < 7; i++) {
+        d0 = vld1_u8(src_ptr);
+        src_ptr += src_stride;
+        d8 = vld1_u8(ref_ptr);
+        ref_ptr += ref_stride;
+        q12 = vabal_u8(q12, d0, d8);
+    }
+
+    q1 = vpaddlq_u16(q12);
+    q3 = vpaddlq_u32(q1);
+    d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
+                  vreinterpret_u32_u64(vget_high_u64(q3)));
+
+    return vget_lane_u32(d5, 0);
+}
+
+unsigned int vp8_sad8x16_neon(
+        unsigned char *src_ptr,
+        int src_stride,
+        unsigned char *ref_ptr,
+        int ref_stride) {
+    uint8x8_t d0, d8;
+    uint16x8_t q12;
+    uint32x4_t q1;
+    uint64x2_t q3;
+    uint32x2_t d5;
+    int i;
+
+    d0 = vld1_u8(src_ptr);
+    src_ptr += src_stride;
+    d8 = vld1_u8(ref_ptr);
+    ref_ptr += ref_stride;
+    q12 = vabdl_u8(d0, d8);
+
+    for (i = 0; i < 15; i++) {
+        d0 = vld1_u8(src_ptr);
+        src_ptr += src_stride;
+        d8 = vld1_u8(ref_ptr);
+        ref_ptr += ref_stride;
+        q12 = vabal_u8(q12, d0, d8);
+    }
+
+    q1 = vpaddlq_u16(q12);
+    q3 = vpaddlq_u32(q1);
+    d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
+                  vreinterpret_u32_u64(vget_high_u64(q3)));
+
+    return vget_lane_u32(d5, 0);
+}
+
+unsigned int vp8_sad4x4_neon(
+        unsigned char *src_ptr,
+        int src_stride,
+        unsigned char *ref_ptr,
+        int ref_stride) {
+    uint8x8_t d0, d8;
+    uint16x8_t q12;
+    uint32x2_t d1;
+    uint64x1_t d3;
+    int i;
+
+    d0 = vld1_u8(src_ptr);
+    src_ptr += src_stride;
+    d8 = vld1_u8(ref_ptr);
+    ref_ptr += ref_stride;
+    q12 = vabdl_u8(d0, d8);
+
+    for (i = 0; i < 3; i++) {
+        d0 = vld1_u8(src_ptr);
+        src_ptr += src_stride;
+        d8 = vld1_u8(ref_ptr);
+        ref_ptr += ref_stride;
+        q12 = vabal_u8(q12, d0, d8);
+    }
+
+    d1 = vpaddl_u16(vget_low_u16(q12));
+    d3 = vpaddl_u32(d1);
+
+    return vget_lane_u32(vreinterpret_u32_u64(d3), 0);
+}
+
+unsigned int vp8_sad16x16_neon(
+        unsigned char *src_ptr,
+        int src_stride,
+        unsigned char *ref_ptr,
+        int ref_stride) {
+    uint8x16_t q0, q4;
+    uint16x8_t q12, q13;
+    uint32x4_t q1;
+    uint64x2_t q3;
+    uint32x2_t d5;
+    int i;
+
+    q0 = vld1q_u8(src_ptr);
+    src_ptr += src_stride;
+    q4 = vld1q_u8(ref_ptr);
+    ref_ptr += ref_stride;
+    q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
+    q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
+
+    for (i = 0; i < 15; i++) {
+        q0 = vld1q_u8(src_ptr);
+        src_ptr += src_stride;
+        q4 = vld1q_u8(ref_ptr);
+        ref_ptr += ref_stride;
+        q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
+        q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
+    }
+
+    q12 = vaddq_u16(q12, q13);
+    q1 = vpaddlq_u16(q12);
+    q3 = vpaddlq_u32(q1);
+    d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
+                  vreinterpret_u32_u64(vget_high_u64(q3)));
+
+    return vget_lane_u32(d5, 0);
+}
+
+unsigned int vp8_sad16x8_neon(
+        unsigned char *src_ptr,
+        int src_stride,
+        unsigned char *ref_ptr,
+        int ref_stride) {
+    uint8x16_t q0, q4;
+    uint16x8_t q12, q13;
+    uint32x4_t q1;
+    uint64x2_t q3;
+    uint32x2_t d5;
+    int i;
+
+    q0 = vld1q_u8(src_ptr);
+    src_ptr += src_stride;
+    q4 = vld1q_u8(ref_ptr);
+    ref_ptr += ref_stride;
+    q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
+    q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
+
+    for (i = 0; i < 7; i++) {
+        q0 = vld1q_u8(src_ptr);
+        src_ptr += src_stride;
+        q4 = vld1q_u8(ref_ptr);
+        ref_ptr += ref_stride;
+        q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
+        q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
+    }
+
+    q12 = vaddq_u16(q12, q13);
+    q1 = vpaddlq_u16(q12);
+    q3 = vpaddlq_u32(q1);
+    d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
+                  vreinterpret_u32_u64(vget_high_u64(q3)));
+
+    return vget_lane_u32(d5, 0);
+}
diff --git a/vp8/common/arm/neon/shortidct4x4llm_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
deleted file mode 100644
index 87ca887..0000000
--- a/vp8/common/arm/neon/shortidct4x4llm_neon.asm
+++ /dev/null
@@ -1,142 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_short_idct4x4llm_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-;*************************************************************
-;void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch,
-;                            unsigned char *dst, int stride)
-;r0 short * input
-;r1 short * pred
-;r2 int pitch
-;r3 unsigned char dst
-;sp int stride
-;*************************************************************
-
-; static const int cospi8sqrt2minus1=20091;
-; static const int sinpi8sqrt2      =35468;
-; static const int rounding = 0;
-
-; Optimization note: The resulted data from dequantization are signed
-; 13-bit data that is in the range of [-4096, 4095]. This allows to
-; use "vqdmulh"(neon) instruction since it won't go out of range
-; (13+16+1=30bits<32bits). This instruction gives the high half
-; result of the multiplication that is needed in IDCT.
-
-|vp8_short_idct4x4llm_neon| PROC
-    vpush           {d8-d15}
-
-    adr             r12, idct_coeff
-    vld1.16         {q1, q2}, [r0]
-    vld1.16         {d0}, [r12]
-
-    vswp            d3, d4                  ;q2(vp[4] vp[12])
-    ldr             r0, [sp, #64]           ; stride
-
-    vqdmulh.s16     q3, q2, d0[2]
-    vqdmulh.s16     q4, q2, d0[0]
-
-    vqadd.s16       d12, d2, d3             ;a1
-    vqsub.s16       d13, d2, d3             ;b1
-
-    vshr.s16        q3, q3, #1
-    vshr.s16        q4, q4, #1
-
-    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
-    vqadd.s16       q4, q4, q2
-
-    ;d6 - c1:temp1
-    ;d7 - d1:temp2
-    ;d8 - d1:temp1
-    ;d9 - c1:temp2
-
-    vqsub.s16       d10, d6, d9             ;c1
-    vqadd.s16       d11, d7, d8             ;d1
-
-    vqadd.s16       d2, d12, d11
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-
-    vtrn.32         d2, d4
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-
-    vswp            d3, d4
-
-    vqdmulh.s16     q3, q2, d0[2]
-    vqdmulh.s16     q4, q2, d0[0]
-
-    vqadd.s16       d12, d2, d3             ;a1
-    vqsub.s16       d13, d2, d3             ;b1
-
-    vshr.s16        q3, q3, #1
-    vshr.s16        q4, q4, #1
-
-    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
-    vqadd.s16       q4, q4, q2
-
-    vqsub.s16       d10, d6, d9             ;c1
-    vqadd.s16       d11, d7, d8             ;d1
-
-    vqadd.s16       d2, d12, d11
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-
-    vrshr.s16       d2, d2, #3
-    vrshr.s16       d3, d3, #3
-    vrshr.s16       d4, d4, #3
-    vrshr.s16       d5, d5, #3
-
-    vtrn.32         d2, d4
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-
-    ; load prediction data
-    vld1.32         d6[0], [r1], r2
-    vld1.32         d6[1], [r1], r2
-    vld1.32         d7[0], [r1], r2
-    vld1.32         d7[1], [r1], r2
-
-    ; add prediction and residual
-    vaddw.u8        q1, q1, d6
-    vaddw.u8        q2, q2, d7
-
-    vqmovun.s16     d1, q1
-    vqmovun.s16     d2, q2
-
-    ; store to destination
-    vst1.32         d1[0], [r3], r0
-    vst1.32         d1[1], [r3], r0
-    vst1.32         d2[0], [r3], r0
-    vst1.32         d2[1], [r3], r0
-
-    vpop            {d8-d15}
-    bx              lr
-
-    ENDP
-
-;-----------------
-
-idct_coeff
-    DCD     0x4e7b4e7b, 0x8a8c8a8c
-
-;20091, 20091, 35468, 35468
-
-    END
diff --git a/vp8/common/arm/neon/shortidct4x4llm_neon.c b/vp8/common/arm/neon/shortidct4x4llm_neon.c
new file mode 100644
index 0000000..373afa6
--- /dev/null
+++ b/vp8/common/arm/neon/shortidct4x4llm_neon.c
@@ -0,0 +1,123 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const int16_t cospi8sqrt2minus1 = 20091;
+static const int16_t sinpi8sqrt2       = 35468;
+
+void vp8_short_idct4x4llm_neon(
+        int16_t *input,
+        unsigned char *pred_ptr,
+        int pred_stride,
+        unsigned char *dst_ptr,
+        int dst_stride) {
+    int i;
+    uint32x2_t d6u32 = vdup_n_u32(0);
+    uint8x8_t d1u8;
+    int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
+    uint16x8_t q1u16;
+    int16x8_t q1s16, q2s16, q3s16, q4s16;
+    int32x2x2_t v2tmp0, v2tmp1;
+    int16x4x2_t v2tmp2, v2tmp3;
+
+    d2 = vld1_s16(input);
+    d3 = vld1_s16(input + 4);
+    d4 = vld1_s16(input + 8);
+    d5 = vld1_s16(input + 12);
+
+    // 1st for loop
+    q1s16 = vcombine_s16(d2, d4);  // Swap d3 d4 here
+    q2s16 = vcombine_s16(d3, d5);
+
+    q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
+    q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
+
+    d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16));  // a1
+    d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16));  // b1
+
+    q3s16 = vshrq_n_s16(q3s16, 1);
+    q4s16 = vshrq_n_s16(q4s16, 1);
+
+    q3s16 = vqaddq_s16(q3s16, q2s16);
+    q4s16 = vqaddq_s16(q4s16, q2s16);
+
+    d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16));  // c1
+    d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16));  // d1
+
+    d2 = vqadd_s16(d12, d11);
+    d3 = vqadd_s16(d13, d10);
+    d4 = vqsub_s16(d13, d10);
+    d5 = vqsub_s16(d12, d11);
+
+    v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+    v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+    v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
+                      vreinterpret_s16_s32(v2tmp1.val[0]));
+    v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
+                      vreinterpret_s16_s32(v2tmp1.val[1]));
+
+    // 2nd for loop
+    q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]);
+    q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]);
+
+    q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
+    q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
+
+    d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16));  // a1
+    d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16));  // b1
+
+    q3s16 = vshrq_n_s16(q3s16, 1);
+    q4s16 = vshrq_n_s16(q4s16, 1);
+
+    q3s16 = vqaddq_s16(q3s16, q2s16);
+    q4s16 = vqaddq_s16(q4s16, q2s16);
+
+    d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16));  // c1
+    d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16));  // d1
+
+    d2 = vqadd_s16(d12, d11);
+    d3 = vqadd_s16(d13, d10);
+    d4 = vqsub_s16(d13, d10);
+    d5 = vqsub_s16(d12, d11);
+
+    d2 = vrshr_n_s16(d2, 3);
+    d3 = vrshr_n_s16(d3, 3);
+    d4 = vrshr_n_s16(d4, 3);
+    d5 = vrshr_n_s16(d5, 3);
+
+    v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+    v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+    v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
+                      vreinterpret_s16_s32(v2tmp1.val[0]));
+    v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
+                      vreinterpret_s16_s32(v2tmp1.val[1]));
+
+    q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]);
+    q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]);
+
+    // dc_only_idct_add
+    for (i = 0; i < 2; i++, q1s16 = q2s16) {
+        d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0);
+        pred_ptr += pred_stride;
+        d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1);
+        pred_ptr += pred_stride;
+
+        q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16),
+                         vreinterpret_u8_u32(d6u32));
+        d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
+
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0);
+        dst_ptr += dst_stride;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1);
+        dst_ptr += dst_stride;
+    }
+    return;
+}
diff --git a/vp8/common/arm/neon/sixtappredict16x16_neon.asm b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
deleted file mode 100644
index dd27719..0000000
--- a/vp8/common/arm/neon/sixtappredict16x16_neon.asm
+++ /dev/null
@@ -1,495 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_sixtap_predict16x16_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-filter16_coeff
-    DCD     0,  0,  128,    0,   0,  0,   0,  0
-    DCD     0, -6,  123,   12,  -1,  0,   0,  0
-    DCD     2, -11, 108,   36,  -8,  1,   0,  0
-    DCD     0, -9,   93,   50,  -6,  0,   0,  0
-    DCD     3, -16,  77,   77, -16,  3,   0,  0
-    DCD     0, -6,   50,   93,  -9,  0,   0,  0
-    DCD     1, -8,   36,  108, -11,  2,   0,  0
-    DCD     0, -1,   12,  123,  -6,   0,  0,  0
-
-; r0    unsigned char  *src_ptr,
-; r1    int  src_pixels_per_line,
-; r2    int  xoffset,
-; r3    int  yoffset,
-; r4    unsigned char *dst_ptr,
-; stack(r5) int  dst_pitch
-
-;Note: To take advantage of 8-bit mulplication instruction in NEON. First apply abs() to
-; filter coeffs to make them u8. Then, use vmlsl for negtive coeffs. After multiplication,
-; the result can be negtive. So, I treat the result as s16. But, since it is also possible
-; that the result can be a large positive number (> 2^15-1), which could be confused as a
-; negtive number. To avoid that error, apply filter coeffs in the order of 0, 1, 4 ,5 ,2,
-; which ensures that the result stays in s16 range. Finally, saturated add the result by
-; applying 3rd filter coeff. Same applys to other filter functions.
-
-|vp8_sixtap_predict16x16_neon| PROC
-    push            {r4-r5, lr}
-    vpush           {d8-d15}
-
-    adr             r12, filter16_coeff
-    ldr             r4, [sp, #76]           ;load parameters from stack
-    ldr             r5, [sp, #80]           ;load parameters from stack
-
-    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
-    beq             secondpass_filter16x16_only
-
-    add             r2, r12, r2, lsl #5     ;calculate filter location
-
-    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
-
-    vld1.s32        {q14, q15}, [r2]        ;load first_pass filter
-
-    beq             firstpass_filter16x16_only
-
-    sub             sp, sp, #336            ;reserve space on stack for temporary storage
-    mov             lr, sp
-
-    vabs.s32        q12, q14
-    vabs.s32        q13, q15
-
-    mov             r2, #7                  ;loop counter
-    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
-    sub             r0, r0, r1, lsl #1
-
-    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
-    vdup.8          d1, d24[4]
-    vdup.8          d2, d25[0]
-    vdup.8          d3, d25[4]
-    vdup.8          d4, d26[0]
-    vdup.8          d5, d26[4]
-
-;First Pass: output_height lines x output_width columns (21x16)
-filt_blk2d_fp16x16_loop_neon
-    vld1.u8         {d6, d7, d8}, [r0], r1      ;load src data
-    vld1.u8         {d9, d10, d11}, [r0], r1
-    vld1.u8         {d12, d13, d14}, [r0], r1
-
-    pld             [r0]
-    pld             [r0, r1]
-    pld             [r0, r1, lsl #1]
-
-    vmull.u8        q8, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q9, d7, d0
-    vmull.u8        q10, d9, d0
-    vmull.u8        q11, d10, d0
-    vmull.u8        q12, d12, d0
-    vmull.u8        q13, d13, d0
-
-    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
-    vext.8          d29, d9, d10, #1
-    vext.8          d30, d12, d13, #1
-
-    vmlsl.u8        q8, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q10, d29, d1
-    vmlsl.u8        q12, d30, d1
-
-    vext.8          d28, d7, d8, #1
-    vext.8          d29, d10, d11, #1
-    vext.8          d30, d13, d14, #1
-
-    vmlsl.u8        q9, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q11, d29, d1
-    vmlsl.u8        q13, d30, d1
-
-    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
-    vext.8          d29, d9, d10, #4
-    vext.8          d30, d12, d13, #4
-
-    vmlsl.u8        q8, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q10, d29, d4
-    vmlsl.u8        q12, d30, d4
-
-    vext.8          d28, d7, d8, #4
-    vext.8          d29, d10, d11, #4
-    vext.8          d30, d13, d14, #4
-
-    vmlsl.u8        q9, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q11, d29, d4
-    vmlsl.u8        q13, d30, d4
-
-    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
-    vext.8          d29, d9, d10, #5
-    vext.8          d30, d12, d13, #5
-
-    vmlal.u8        q8, d28, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q10, d29, d5
-    vmlal.u8        q12, d30, d5
-
-    vext.8          d28, d7, d8, #5
-    vext.8          d29, d10, d11, #5
-    vext.8          d30, d13, d14, #5
-
-    vmlal.u8        q9, d28, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q11, d29, d5
-    vmlal.u8        q13, d30, d5
-
-    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
-    vext.8          d29, d9, d10, #2
-    vext.8          d30, d12, d13, #2
-
-    vmlal.u8        q8, d28, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q10, d29, d2
-    vmlal.u8        q12, d30, d2
-
-    vext.8          d28, d7, d8, #2
-    vext.8          d29, d10, d11, #2
-    vext.8          d30, d13, d14, #2
-
-    vmlal.u8        q9, d28, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q11, d29, d2
-    vmlal.u8        q13, d30, d2
-
-    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
-    vext.8          d29, d9, d10, #3
-    vext.8          d30, d12, d13, #3
-
-    vext.8          d15, d7, d8, #3
-    vext.8          d31, d10, d11, #3
-    vext.8          d6, d13, d14, #3
-
-    vmull.u8        q4, d28, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q5, d29, d3
-    vmull.u8        q6, d30, d3
-
-    vqadd.s16       q8, q4                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q10, q5
-    vqadd.s16       q12, q6
-
-    vmull.u8        q6, d15, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q7, d31, d3
-    vmull.u8        q3, d6, d3
-
-    subs            r2, r2, #1
-
-    vqadd.s16       q9, q6
-    vqadd.s16       q11, q7
-    vqadd.s16       q13, q3
-
-    vqrshrun.s16    d6, q8, #7              ;shift/round/saturate to u8
-    vqrshrun.s16    d7, q9, #7
-    vqrshrun.s16    d8, q10, #7
-    vqrshrun.s16    d9, q11, #7
-    vqrshrun.s16    d10, q12, #7
-    vqrshrun.s16    d11, q13, #7
-
-    vst1.u8         {d6, d7, d8}, [lr]!     ;store result
-    vst1.u8         {d9, d10, d11}, [lr]!
-
-    bne             filt_blk2d_fp16x16_loop_neon
-
-;Second pass: 16x16
-;secondpass_filter - do first 8-columns and then second 8-columns
-    add             r3, r12, r3, lsl #5
-    sub             lr, lr, #336
-
-    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
-    mov             r3, #2                  ;loop counter
-
-    vabs.s32        q7, q5
-    vabs.s32        q8, q6
-
-    mov             r2, #16
-
-    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
-    vdup.8          d1, d14[4]
-    vdup.8          d2, d15[0]
-    vdup.8          d3, d15[4]
-    vdup.8          d4, d16[0]
-    vdup.8          d5, d16[4]
-
-filt_blk2d_sp16x16_outloop_neon
-    vld1.u8         {d18}, [lr], r2         ;load src data
-    vld1.u8         {d19}, [lr], r2
-    vld1.u8         {d20}, [lr], r2
-    vld1.u8         {d21}, [lr], r2
-    mov             r12, #4                 ;loop counter
-    vld1.u8         {d22}, [lr], r2
-
-secondpass_inner_loop_neon
-    vld1.u8         {d23}, [lr], r2         ;load src data
-    vld1.u8         {d24}, [lr], r2
-    vld1.u8         {d25}, [lr], r2
-    vld1.u8         {d26}, [lr], r2
-
-    vmull.u8        q3, d18, d0             ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q4, d19, d0
-    vmull.u8        q5, d20, d0
-    vmull.u8        q6, d21, d0
-
-    vmlsl.u8        q3, d19, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q4, d20, d1
-    vmlsl.u8        q5, d21, d1
-    vmlsl.u8        q6, d22, d1
-
-    vmlsl.u8        q3, d22, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q4, d23, d4
-    vmlsl.u8        q5, d24, d4
-    vmlsl.u8        q6, d25, d4
-
-    vmlal.u8        q3, d20, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q4, d21, d2
-    vmlal.u8        q5, d22, d2
-    vmlal.u8        q6, d23, d2
-
-    vmlal.u8        q3, d23, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q4, d24, d5
-    vmlal.u8        q5, d25, d5
-    vmlal.u8        q6, d26, d5
-
-    vmull.u8        q7, d21, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q8, d22, d3
-    vmull.u8        q9, d23, d3
-    vmull.u8        q10, d24, d3
-
-    subs            r12, r12, #1
-
-    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q4
-    vqadd.s16       q9, q5
-    vqadd.s16       q10, q6
-
-    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
-    vqrshrun.s16    d7, q8, #7
-    vqrshrun.s16    d8, q9, #7
-    vqrshrun.s16    d9, q10, #7
-
-    vst1.u8         {d6}, [r4], r5          ;store result
-    vmov            q9, q11
-    vst1.u8         {d7}, [r4], r5
-    vmov            q10, q12
-    vst1.u8         {d8}, [r4], r5
-    vmov            d22, d26
-    vst1.u8         {d9}, [r4], r5
-
-    bne             secondpass_inner_loop_neon
-
-    subs            r3, r3, #1
-    sub             lr, lr, #336
-    add             lr, lr, #8
-
-    sub             r4, r4, r5, lsl #4
-    add             r4, r4, #8
-
-    bne filt_blk2d_sp16x16_outloop_neon
-
-    add             sp, sp, #336
-
-    vpop            {d8-d15}
-    pop             {r4-r5,pc}
-
-;--------------------
-firstpass_filter16x16_only
-    vabs.s32        q12, q14
-    vabs.s32        q13, q15
-
-    mov             r2, #8                  ;loop counter
-    sub             r0, r0, #2              ;move srcptr back to (column-2)
-
-    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
-    vdup.8          d1, d24[4]
-    vdup.8          d2, d25[0]
-    vdup.8          d3, d25[4]
-    vdup.8          d4, d26[0]
-    vdup.8          d5, d26[4]
-
-;First Pass: output_height lines x output_width columns (16x16)
-filt_blk2d_fpo16x16_loop_neon
-    vld1.u8         {d6, d7, d8}, [r0], r1      ;load src data
-    vld1.u8         {d9, d10, d11}, [r0], r1
-
-    pld             [r0]
-    pld             [r0, r1]
-
-    vmull.u8        q6, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q7, d7, d0
-    vmull.u8        q8, d9, d0
-    vmull.u8        q9, d10, d0
-
-    vext.8          d20, d6, d7, #1         ;construct src_ptr[-1]
-    vext.8          d21, d9, d10, #1
-    vext.8          d22, d7, d8, #1
-    vext.8          d23, d10, d11, #1
-    vext.8          d24, d6, d7, #4         ;construct src_ptr[2]
-    vext.8          d25, d9, d10, #4
-    vext.8          d26, d7, d8, #4
-    vext.8          d27, d10, d11, #4
-    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
-    vext.8          d29, d9, d10, #5
-
-    vmlsl.u8        q6, d20, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q8, d21, d1
-    vmlsl.u8        q7, d22, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q9, d23, d1
-    vmlsl.u8        q6, d24, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q8, d25, d4
-    vmlsl.u8        q7, d26, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q9, d27, d4
-    vmlal.u8        q6, d28, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q8, d29, d5
-
-    vext.8          d20, d7, d8, #5
-    vext.8          d21, d10, d11, #5
-    vext.8          d22, d6, d7, #2         ;construct src_ptr[0]
-    vext.8          d23, d9, d10, #2
-    vext.8          d24, d7, d8, #2
-    vext.8          d25, d10, d11, #2
-
-    vext.8          d26, d6, d7, #3         ;construct src_ptr[1]
-    vext.8          d27, d9, d10, #3
-    vext.8          d28, d7, d8, #3
-    vext.8          d29, d10, d11, #3
-
-    vmlal.u8        q7, d20, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q9, d21, d5
-    vmlal.u8        q6, d22, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q8, d23, d2
-    vmlal.u8        q7, d24, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q9, d25, d2
-
-    vmull.u8        q10, d26, d3            ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q11, d27, d3
-    vmull.u8        q12, d28, d3            ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q15, d29, d3
-
-    vqadd.s16       q6, q10                 ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q11
-    vqadd.s16       q7, q12
-    vqadd.s16       q9, q15
-
-    subs            r2, r2, #1
-
-    vqrshrun.s16    d6, q6, #7              ;shift/round/saturate to u8
-    vqrshrun.s16    d7, q7, #7
-    vqrshrun.s16    d8, q8, #7
-    vqrshrun.s16    d9, q9, #7
-
-    vst1.u8         {q3}, [r4], r5              ;store result
-    vst1.u8         {q4}, [r4], r5
-
-    bne             filt_blk2d_fpo16x16_loop_neon
-
-    vpop            {d8-d15}
-    pop             {r4-r5,pc}
-
-;--------------------
-secondpass_filter16x16_only
-;Second pass: 16x16
-    add             r3, r12, r3, lsl #5
-    sub             r0, r0, r1, lsl #1
-
-    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
-    mov             r3, #2                  ;loop counter
-
-    vabs.s32        q7, q5
-    vabs.s32        q8, q6
-
-    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
-    vdup.8          d1, d14[4]
-    vdup.8          d2, d15[0]
-    vdup.8          d3, d15[4]
-    vdup.8          d4, d16[0]
-    vdup.8          d5, d16[4]
-
-filt_blk2d_spo16x16_outloop_neon
-    vld1.u8         {d18}, [r0], r1         ;load src data
-    vld1.u8         {d19}, [r0], r1
-    vld1.u8         {d20}, [r0], r1
-    vld1.u8         {d21}, [r0], r1
-    mov             r12, #4                 ;loop counter
-    vld1.u8         {d22}, [r0], r1
-
-secondpass_only_inner_loop_neon
-    vld1.u8         {d23}, [r0], r1         ;load src data
-    vld1.u8         {d24}, [r0], r1
-    vld1.u8         {d25}, [r0], r1
-    vld1.u8         {d26}, [r0], r1
-
-    vmull.u8        q3, d18, d0             ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q4, d19, d0
-    vmull.u8        q5, d20, d0
-    vmull.u8        q6, d21, d0
-
-    vmlsl.u8        q3, d19, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q4, d20, d1
-    vmlsl.u8        q5, d21, d1
-    vmlsl.u8        q6, d22, d1
-
-    vmlsl.u8        q3, d22, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q4, d23, d4
-    vmlsl.u8        q5, d24, d4
-    vmlsl.u8        q6, d25, d4
-
-    vmlal.u8        q3, d20, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q4, d21, d2
-    vmlal.u8        q5, d22, d2
-    vmlal.u8        q6, d23, d2
-
-    vmlal.u8        q3, d23, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q4, d24, d5
-    vmlal.u8        q5, d25, d5
-    vmlal.u8        q6, d26, d5
-
-    vmull.u8        q7, d21, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q8, d22, d3
-    vmull.u8        q9, d23, d3
-    vmull.u8        q10, d24, d3
-
-    subs            r12, r12, #1
-
-    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q4
-    vqadd.s16       q9, q5
-    vqadd.s16       q10, q6
-
-    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
-    vqrshrun.s16    d7, q8, #7
-    vqrshrun.s16    d8, q9, #7
-    vqrshrun.s16    d9, q10, #7
-
-    vst1.u8         {d6}, [r4], r5          ;store result
-    vmov            q9, q11
-    vst1.u8         {d7}, [r4], r5
-    vmov            q10, q12
-    vst1.u8         {d8}, [r4], r5
-    vmov            d22, d26
-    vst1.u8         {d9}, [r4], r5
-
-    bne             secondpass_only_inner_loop_neon
-
-    subs            r3, r3, #1
-    sub             r0, r0, r1, lsl #4
-    sub             r0, r0, r1, lsl #2
-    sub             r0, r0, r1
-    add             r0, r0, #8
-
-    sub             r4, r4, r5, lsl #4
-    add             r4, r4, #8
-
-    bne filt_blk2d_spo16x16_outloop_neon
-
-    vpop            {d8-d15}
-    pop             {r4-r5,pc}
-
-    ENDP
-
-;-----------------
-    END
diff --git a/vp8/common/arm/neon/sixtappredict4x4_neon.asm b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
deleted file mode 100644
index e32e713..0000000
--- a/vp8/common/arm/neon/sixtappredict4x4_neon.asm
+++ /dev/null
@@ -1,426 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_sixtap_predict4x4_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-filter4_coeff
-    DCD     0,  0,  128,    0,   0,  0,   0,  0
-    DCD     0, -6,  123,   12,  -1,  0,   0,  0
-    DCD     2, -11, 108,   36,  -8,  1,   0,  0
-    DCD     0, -9,   93,   50,  -6,  0,   0,  0
-    DCD     3, -16,  77,   77, -16,  3,   0,  0
-    DCD     0, -6,   50,   93,  -9,  0,   0,  0
-    DCD     1, -8,   36,  108, -11,  2,   0,  0
-    DCD     0, -1,   12,  123,  -6,   0,  0,  0
-
-; r0    unsigned char  *src_ptr,
-; r1    int  src_pixels_per_line,
-; r2    int  xoffset,
-; r3    int  yoffset,
-; stack(r4) unsigned char *dst_ptr,
-; stack(lr) int  dst_pitch
-
-|vp8_sixtap_predict4x4_neon| PROC
-    push            {r4, lr}
-    vpush           {d8-d15}
-
-    adr             r12, filter4_coeff
-    ldr             r4, [sp, #72]            ;load parameters from stack
-    ldr             lr, [sp, #76]           ;load parameters from stack
-
-    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
-    beq             secondpass_filter4x4_only
-
-    add             r2, r12, r2, lsl #5     ;calculate filter location
-
-    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
-    vld1.s32        {q14, q15}, [r2]        ;load first_pass filter
-
-    beq             firstpass_filter4x4_only
-
-    vabs.s32        q12, q14                ;get abs(filer_parameters)
-    vabs.s32        q13, q15
-
-    sub             r0, r0, #2              ;go back 2 columns of src data
-    sub             r0, r0, r1, lsl #1      ;go back 2 lines of src data
-
-;First pass: output_height lines x output_width columns (9x4)
-    vld1.u8         {q3}, [r0], r1          ;load first 4-line src data
-    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
-    vld1.u8         {q4}, [r0], r1
-    vdup.8          d1, d24[4]
-    vld1.u8         {q5}, [r0], r1
-    vdup.8          d2, d25[0]
-    vld1.u8         {q6}, [r0], r1
-    vdup.8          d3, d25[4]
-    vdup.8          d4, d26[0]
-    vdup.8          d5, d26[4]
-
-    pld             [r0]
-    pld             [r0, r1]
-    pld             [r0, r1, lsl #1]
-
-    vext.8          d18, d6, d7, #5         ;construct src_ptr[3]
-    vext.8          d19, d8, d9, #5
-    vext.8          d20, d10, d11, #5
-    vext.8          d21, d12, d13, #5
-
-    vswp            d7, d8                  ;discard 2nd half data after src_ptr[3] is done
-    vswp            d11, d12
-
-    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[3])
-    vzip.32         d20, d21
-    vmull.u8        q7, d18, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmull.u8        q8, d20, d5
-
-    vmov            q4, q3                  ;keep original src data in q4 q6
-    vmov            q6, q5
-
-    vzip.32         d6, d7                  ;construct src_ptr[-2], and put 2-line data together
-    vzip.32         d10, d11
-    vshr.u64        q9, q4, #8              ;construct src_ptr[-1]
-    vshr.u64        q10, q6, #8
-    vmlal.u8        q7, d6, d0              ;+(src_ptr[-2] * vp8_filter[0])
-    vmlal.u8        q8, d10, d0
-
-    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[-1])
-    vzip.32         d20, d21
-    vshr.u64        q3, q4, #32             ;construct src_ptr[2]
-    vshr.u64        q5, q6, #32
-    vmlsl.u8        q7, d18, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q8, d20, d1
-
-    vzip.32         d6, d7                  ;put 2-line data in 1 register (src_ptr[2])
-    vzip.32         d10, d11
-    vshr.u64        q9, q4, #16             ;construct src_ptr[0]
-    vshr.u64        q10, q6, #16
-    vmlsl.u8        q7, d6, d4              ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q8, d10, d4
-
-    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[0])
-    vzip.32         d20, d21
-    vshr.u64        q3, q4, #24             ;construct src_ptr[1]
-    vshr.u64        q5, q6, #24
-    vmlal.u8        q7, d18, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q8, d20, d2
-
-    vzip.32         d6, d7                  ;put 2-line data in 1 register (src_ptr[1])
-    vzip.32         d10, d11
-    vmull.u8        q9, d6, d3              ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q10, d10, d3
-
-    vld1.u8         {q3}, [r0], r1          ;load rest 5-line src data
-    vld1.u8         {q4}, [r0], r1
-
-    vqadd.s16       q7, q9                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q10
-
-    vld1.u8         {q5}, [r0], r1
-    vld1.u8         {q6}, [r0], r1
-
-    vqrshrun.s16    d27, q7, #7             ;shift/round/saturate to u8
-    vqrshrun.s16    d28, q8, #7
-
-    ;First Pass on rest 5-line data
-    vld1.u8         {q11}, [r0], r1
-
-    vext.8          d18, d6, d7, #5         ;construct src_ptr[3]
-    vext.8          d19, d8, d9, #5
-    vext.8          d20, d10, d11, #5
-    vext.8          d21, d12, d13, #5
-
-    vswp            d7, d8                  ;discard 2nd half data after src_ptr[3] is done
-    vswp            d11, d12
-
-    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[3])
-    vzip.32         d20, d21
-    vext.8          d31, d22, d23, #5       ;construct src_ptr[3]
-    vmull.u8        q7, d18, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmull.u8        q8, d20, d5
-    vmull.u8        q12, d31, d5            ;(src_ptr[3] * vp8_filter[5])
-
-    vmov            q4, q3                  ;keep original src data in q4 q6
-    vmov            q6, q5
-
-    vzip.32         d6, d7                  ;construct src_ptr[-2], and put 2-line data together
-    vzip.32         d10, d11
-    vshr.u64        q9, q4, #8              ;construct src_ptr[-1]
-    vshr.u64        q10, q6, #8
-
-    vmlal.u8        q7, d6, d0              ;+(src_ptr[-2] * vp8_filter[0])
-    vmlal.u8        q8, d10, d0
-    vmlal.u8        q12, d22, d0            ;(src_ptr[-2] * vp8_filter[0])
-
-    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[-1])
-    vzip.32         d20, d21
-    vshr.u64        q3, q4, #32             ;construct src_ptr[2]
-    vshr.u64        q5, q6, #32
-    vext.8          d31, d22, d23, #1       ;construct src_ptr[-1]
-
-    vmlsl.u8        q7, d18, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q8, d20, d1
-    vmlsl.u8        q12, d31, d1            ;-(src_ptr[-1] * vp8_filter[1])
-
-    vzip.32         d6, d7                  ;put 2-line data in 1 register (src_ptr[2])
-    vzip.32         d10, d11
-    vshr.u64        q9, q4, #16             ;construct src_ptr[0]
-    vshr.u64        q10, q6, #16
-    vext.8          d31, d22, d23, #4       ;construct src_ptr[2]
-
-    vmlsl.u8        q7, d6, d4              ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q8, d10, d4
-    vmlsl.u8        q12, d31, d4            ;-(src_ptr[2] * vp8_filter[4])
-
-    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[0])
-    vzip.32         d20, d21
-    vshr.u64        q3, q4, #24             ;construct src_ptr[1]
-    vshr.u64        q5, q6, #24
-    vext.8          d31, d22, d23, #2       ;construct src_ptr[0]
-
-    vmlal.u8        q7, d18, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q8, d20, d2
-    vmlal.u8        q12, d31, d2            ;(src_ptr[0] * vp8_filter[2])
-
-    vzip.32         d6, d7                  ;put 2-line data in 1 register (src_ptr[1])
-    vzip.32         d10, d11
-    vext.8          d31, d22, d23, #3       ;construct src_ptr[1]
-    vmull.u8        q9, d6, d3              ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q10, d10, d3
-    vmull.u8        q11, d31, d3            ;(src_ptr[1] * vp8_filter[3])
-
-    add             r3, r12, r3, lsl #5
-
-    vqadd.s16       q7, q9                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q10
-    vqadd.s16       q12, q11
-
-    vext.8          d23, d27, d28, #4
-    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
-
-    vqrshrun.s16    d29, q7, #7             ;shift/round/saturate to u8
-    vqrshrun.s16    d30, q8, #7
-    vqrshrun.s16    d31, q12, #7
-
-;Second pass: 4x4
-    vabs.s32        q7, q5
-    vabs.s32        q8, q6
-
-    vext.8          d24, d28, d29, #4
-    vext.8          d25, d29, d30, #4
-    vext.8          d26, d30, d31, #4
-
-    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
-    vdup.8          d1, d14[4]
-    vdup.8          d2, d15[0]
-    vdup.8          d3, d15[4]
-    vdup.8          d4, d16[0]
-    vdup.8          d5, d16[4]
-
-    vmull.u8        q3, d27, d0             ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q4, d28, d0
-
-    vmull.u8        q5, d25, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmull.u8        q6, d26, d5
-
-    vmlsl.u8        q3, d29, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q4, d30, d4
-
-    vmlsl.u8        q5, d23, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q6, d24, d1
-
-    vmlal.u8        q3, d28, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q4, d29, d2
-
-    vmlal.u8        q5, d24, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmlal.u8        q6, d25, d3
-
-    add             r0, r4, lr
-    add             r1, r0, lr
-    add             r2, r1, lr
-
-    vqadd.s16       q5, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q6, q4
-
-    vqrshrun.s16    d3, q5, #7              ;shift/round/saturate to u8
-    vqrshrun.s16    d4, q6, #7
-
-    vst1.32         {d3[0]}, [r4]           ;store result
-    vst1.32         {d3[1]}, [r0]
-    vst1.32         {d4[0]}, [r1]
-    vst1.32         {d4[1]}, [r2]
-
-    vpop            {d8-d15}
-    pop             {r4, pc}
-
-
-;---------------------
-firstpass_filter4x4_only
-    vabs.s32        q12, q14                ;get abs(filer_parameters)
-    vabs.s32        q13, q15
-
-    sub             r0, r0, #2              ;go back 2 columns of src data
-
-;First pass: output_height lines x output_width columns (4x4)
-    vld1.u8         {q3}, [r0], r1          ;load first 4-line src data
-    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
-    vld1.u8         {q4}, [r0], r1
-    vdup.8          d1, d24[4]
-    vld1.u8         {q5}, [r0], r1
-    vdup.8          d2, d25[0]
-    vld1.u8         {q6}, [r0], r1
-
-    vdup.8          d3, d25[4]
-    vdup.8          d4, d26[0]
-    vdup.8          d5, d26[4]
-
-    vext.8          d18, d6, d7, #5         ;construct src_ptr[3]
-    vext.8          d19, d8, d9, #5
-    vext.8          d20, d10, d11, #5
-    vext.8          d21, d12, d13, #5
-
-    vswp            d7, d8                  ;discard 2nd half data after src_ptr[3] is done
-    vswp            d11, d12
-
-    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[3])
-    vzip.32         d20, d21
-    vmull.u8        q7, d18, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmull.u8        q8, d20, d5
-
-    vmov            q4, q3                  ;keep original src data in q4 q6
-    vmov            q6, q5
-
-    vzip.32         d6, d7                  ;construct src_ptr[-2], and put 2-line data together
-    vzip.32         d10, d11
-    vshr.u64        q9, q4, #8              ;construct src_ptr[-1]
-    vshr.u64        q10, q6, #8
-    vmlal.u8        q7, d6, d0              ;+(src_ptr[-2] * vp8_filter[0])
-    vmlal.u8        q8, d10, d0
-
-    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[-1])
-    vzip.32         d20, d21
-    vshr.u64        q3, q4, #32             ;construct src_ptr[2]
-    vshr.u64        q5, q6, #32
-    vmlsl.u8        q7, d18, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q8, d20, d1
-
-    vzip.32         d6, d7                  ;put 2-line data in 1 register (src_ptr[2])
-    vzip.32         d10, d11
-    vshr.u64        q9, q4, #16             ;construct src_ptr[0]
-    vshr.u64        q10, q6, #16
-    vmlsl.u8        q7, d6, d4              ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q8, d10, d4
-
-    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[0])
-    vzip.32         d20, d21
-    vshr.u64        q3, q4, #24             ;construct src_ptr[1]
-    vshr.u64        q5, q6, #24
-    vmlal.u8        q7, d18, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q8, d20, d2
-
-    vzip.32         d6, d7                  ;put 2-line data in 1 register (src_ptr[1])
-    vzip.32         d10, d11
-    vmull.u8        q9, d6, d3              ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q10, d10, d3
-
-    add             r0, r4, lr
-    add             r1, r0, lr
-    add             r2, r1, lr
-
-    vqadd.s16       q7, q9                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q10
-
-    vqrshrun.s16    d27, q7, #7             ;shift/round/saturate to u8
-    vqrshrun.s16    d28, q8, #7
-
-    vst1.32         {d27[0]}, [r4]          ;store result
-    vst1.32         {d27[1]}, [r0]
-    vst1.32         {d28[0]}, [r1]
-    vst1.32         {d28[1]}, [r2]
-
-    vpop            {d8-d15}
-    pop             {r4, pc}
-
-
-;---------------------
-secondpass_filter4x4_only
-    sub             r0, r0, r1, lsl #1
-    add             r3, r12, r3, lsl #5
-
-    vld1.32         {d27[0]}, [r0], r1      ;load src data
-    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
-    vld1.32         {d27[1]}, [r0], r1
-    vabs.s32        q7, q5
-    vld1.32         {d28[0]}, [r0], r1
-    vabs.s32        q8, q6
-    vld1.32         {d28[1]}, [r0], r1
-    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
-    vld1.32         {d29[0]}, [r0], r1
-    vdup.8          d1, d14[4]
-    vld1.32         {d29[1]}, [r0], r1
-    vdup.8          d2, d15[0]
-    vld1.32         {d30[0]}, [r0], r1
-    vdup.8          d3, d15[4]
-    vld1.32         {d30[1]}, [r0], r1
-    vdup.8          d4, d16[0]
-    vld1.32         {d31[0]}, [r0], r1
-    vdup.8          d5, d16[4]
-
-    vext.8          d23, d27, d28, #4
-    vext.8          d24, d28, d29, #4
-    vext.8          d25, d29, d30, #4
-    vext.8          d26, d30, d31, #4
-
-    vmull.u8        q3, d27, d0             ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q4, d28, d0
-
-    vmull.u8        q5, d25, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmull.u8        q6, d26, d5
-
-    vmlsl.u8        q3, d29, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q4, d30, d4
-
-    vmlsl.u8        q5, d23, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q6, d24, d1
-
-    vmlal.u8        q3, d28, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q4, d29, d2
-
-    vmlal.u8        q5, d24, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmlal.u8        q6, d25, d3
-
-    add             r0, r4, lr
-    add             r1, r0, lr
-    add             r2, r1, lr
-
-    vqadd.s16       q5, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q6, q4
-
-    vqrshrun.s16    d3, q5, #7              ;shift/round/saturate to u8
-    vqrshrun.s16    d4, q6, #7
-
-    vst1.32         {d3[0]}, [r4]           ;store result
-    vst1.32         {d3[1]}, [r0]
-    vst1.32         {d4[0]}, [r1]
-    vst1.32         {d4[1]}, [r2]
-
-    vpop            {d8-d15}
-    pop             {r4, pc}
-
-    ENDP
-
-;-----------------
-
-    END
diff --git a/vp8/common/arm/neon/sixtappredict8x4_neon.asm b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
deleted file mode 100644
index d19bf89..0000000
--- a/vp8/common/arm/neon/sixtappredict8x4_neon.asm
+++ /dev/null
@@ -1,478 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_sixtap_predict8x4_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-filter8_coeff
-    DCD     0,  0,  128,    0,   0,  0,   0,  0
-    DCD     0, -6,  123,   12,  -1,  0,   0,  0
-    DCD     2, -11, 108,   36,  -8,  1,   0,  0
-    DCD     0, -9,   93,   50,  -6,  0,   0,  0
-    DCD     3, -16,  77,   77, -16,  3,   0,  0
-    DCD     0, -6,   50,   93,  -9,  0,   0,  0
-    DCD     1, -8,   36,  108, -11,  2,   0,  0
-    DCD     0, -1,   12,  123,  -6,   0,  0,  0
-
-; r0    unsigned char  *src_ptr,
-; r1    int  src_pixels_per_line,
-; r2    int  xoffset,
-; r3    int  yoffset,
-; r4    unsigned char *dst_ptr,
-; stack(r5) int  dst_pitch
-
-|vp8_sixtap_predict8x4_neon| PROC
-    push            {r4-r5, lr}
-    vpush           {d8-d15}
-
-    adr             r12, filter8_coeff
-    ldr             r4, [sp, #76]           ;load parameters from stack
-    ldr             r5, [sp, #80]           ;load parameters from stack
-
-    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
-    beq             secondpass_filter8x4_only
-
-    add             r2, r12, r2, lsl #5     ;calculate filter location
-
-    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
-
-    vld1.s32        {q14, q15}, [r2]        ;load first_pass filter
-
-    beq             firstpass_filter8x4_only
-
-    sub             sp, sp, #32             ;reserve space on stack for temporary storage
-    vabs.s32        q12, q14
-    vabs.s32        q13, q15
-
-    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
-    mov             lr, sp
-    sub             r0, r0, r1, lsl #1
-
-    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
-    vdup.8          d1, d24[4]
-    vdup.8          d2, d25[0]
-
-;First pass: output_height lines x output_width columns (9x8)
-    vld1.u8         {q3}, [r0], r1          ;load src data
-    vdup.8          d3, d25[4]
-    vld1.u8         {q4}, [r0], r1
-    vdup.8          d4, d26[0]
-    vld1.u8         {q5}, [r0], r1
-    vdup.8          d5, d26[4]
-    vld1.u8         {q6}, [r0], r1
-
-    pld             [r0]
-    pld             [r0, r1]
-    pld             [r0, r1, lsl #1]
-
-    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q8, d8, d0
-    vmull.u8        q9, d10, d0
-    vmull.u8        q10, d12, d0
-
-    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
-    vext.8          d29, d8, d9, #1
-    vext.8          d30, d10, d11, #1
-    vext.8          d31, d12, d13, #1
-
-    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q8, d29, d1
-    vmlsl.u8        q9, d30, d1
-    vmlsl.u8        q10, d31, d1
-
-    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
-    vext.8          d29, d8, d9, #4
-    vext.8          d30, d10, d11, #4
-    vext.8          d31, d12, d13, #4
-
-    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q8, d29, d4
-    vmlsl.u8        q9, d30, d4
-    vmlsl.u8        q10, d31, d4
-
-    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
-    vext.8          d29, d8, d9, #2
-    vext.8          d30, d10, d11, #2
-    vext.8          d31, d12, d13, #2
-
-    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q8, d29, d2
-    vmlal.u8        q9, d30, d2
-    vmlal.u8        q10, d31, d2
-
-    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
-    vext.8          d29, d8, d9, #5
-    vext.8          d30, d10, d11, #5
-    vext.8          d31, d12, d13, #5
-
-    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q8, d29, d5
-    vmlal.u8        q9, d30, d5
-    vmlal.u8        q10, d31, d5
-
-    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
-    vext.8          d29, d8, d9, #3
-    vext.8          d30, d10, d11, #3
-    vext.8          d31, d12, d13, #3
-
-    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q4, d29, d3
-    vmull.u8        q5, d30, d3
-    vmull.u8        q6, d31, d3
-
-    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q4
-    vqadd.s16       q9, q5
-    vqadd.s16       q10, q6
-
-    vld1.u8         {q3}, [r0], r1          ;load src data
-
-    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
-    vqrshrun.s16    d23, q8, #7
-    vqrshrun.s16    d24, q9, #7
-    vqrshrun.s16    d25, q10, #7
-
-    vld1.u8         {q4}, [r0], r1
-    vst1.u8         {d22}, [lr]!            ;store result
-    vld1.u8         {q5}, [r0], r1
-    vst1.u8         {d23}, [lr]!
-    vld1.u8         {q6}, [r0], r1
-    vst1.u8         {d24}, [lr]!
-    vld1.u8         {q7}, [r0], r1
-    vst1.u8         {d25}, [lr]!
-
-    ;first_pass filtering on the rest 5-line data
-    vmull.u8        q8, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q9, d8, d0
-    vmull.u8        q10, d10, d0
-    vmull.u8        q11, d12, d0
-    vmull.u8        q12, d14, d0
-
-    vext.8          d27, d6, d7, #1         ;construct src_ptr[-1]
-    vext.8          d28, d8, d9, #1
-    vext.8          d29, d10, d11, #1
-    vext.8          d30, d12, d13, #1
-    vext.8          d31, d14, d15, #1
-
-    vmlsl.u8        q8, d27, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q9, d28, d1
-    vmlsl.u8        q10, d29, d1
-    vmlsl.u8        q11, d30, d1
-    vmlsl.u8        q12, d31, d1
-
-    vext.8          d27, d6, d7, #4         ;construct src_ptr[2]
-    vext.8          d28, d8, d9, #4
-    vext.8          d29, d10, d11, #4
-    vext.8          d30, d12, d13, #4
-    vext.8          d31, d14, d15, #4
-
-    vmlsl.u8        q8, d27, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q9, d28, d4
-    vmlsl.u8        q10, d29, d4
-    vmlsl.u8        q11, d30, d4
-    vmlsl.u8        q12, d31, d4
-
-    vext.8          d27, d6, d7, #2         ;construct src_ptr[0]
-    vext.8          d28, d8, d9, #2
-    vext.8          d29, d10, d11, #2
-    vext.8          d30, d12, d13, #2
-    vext.8          d31, d14, d15, #2
-
-    vmlal.u8        q8, d27, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q9, d28, d2
-    vmlal.u8        q10, d29, d2
-    vmlal.u8        q11, d30, d2
-    vmlal.u8        q12, d31, d2
-
-    vext.8          d27, d6, d7, #5         ;construct src_ptr[3]
-    vext.8          d28, d8, d9, #5
-    vext.8          d29, d10, d11, #5
-    vext.8          d30, d12, d13, #5
-    vext.8          d31, d14, d15, #5
-
-    vmlal.u8        q8, d27, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q9, d28, d5
-    vmlal.u8        q10, d29, d5
-    vmlal.u8        q11, d30, d5
-    vmlal.u8        q12, d31, d5
-
-    vext.8          d27, d6, d7, #3         ;construct src_ptr[1]
-    vext.8          d28, d8, d9, #3
-    vext.8          d29, d10, d11, #3
-    vext.8          d30, d12, d13, #3
-    vext.8          d31, d14, d15, #3
-
-    vmull.u8        q3, d27, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q4, d28, d3
-    vmull.u8        q5, d29, d3
-    vmull.u8        q6, d30, d3
-    vmull.u8        q7, d31, d3
-
-    vqadd.s16       q8, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q9, q4
-    vqadd.s16       q10, q5
-    vqadd.s16       q11, q6
-    vqadd.s16       q12, q7
-
-    vqrshrun.s16    d26, q8, #7             ;shift/round/saturate to u8
-    vqrshrun.s16    d27, q9, #7
-    vqrshrun.s16    d28, q10, #7
-    vqrshrun.s16    d29, q11, #7                ;load intermediate data from stack
-    vqrshrun.s16    d30, q12, #7
-
-;Second pass: 8x4
-;secondpass_filter
-    add             r3, r12, r3, lsl #5
-    sub             lr, lr, #32
-
-    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
-    vld1.u8         {q11}, [lr]!
-
-    vabs.s32        q7, q5
-    vabs.s32        q8, q6
-
-    vld1.u8         {q12}, [lr]!
-
-    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
-    vdup.8          d1, d14[4]
-    vdup.8          d2, d15[0]
-    vdup.8          d3, d15[4]
-    vdup.8          d4, d16[0]
-    vdup.8          d5, d16[4]
-
-    vmull.u8        q3, d22, d0             ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q4, d23, d0
-    vmull.u8        q5, d24, d0
-    vmull.u8        q6, d25, d0
-
-    vmlsl.u8        q3, d23, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q4, d24, d1
-    vmlsl.u8        q5, d25, d1
-    vmlsl.u8        q6, d26, d1
-
-    vmlsl.u8        q3, d26, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q4, d27, d4
-    vmlsl.u8        q5, d28, d4
-    vmlsl.u8        q6, d29, d4
-
-    vmlal.u8        q3, d24, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q4, d25, d2
-    vmlal.u8        q5, d26, d2
-    vmlal.u8        q6, d27, d2
-
-    vmlal.u8        q3, d27, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q4, d28, d5
-    vmlal.u8        q5, d29, d5
-    vmlal.u8        q6, d30, d5
-
-    vmull.u8        q7, d25, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q8, d26, d3
-    vmull.u8        q9, d27, d3
-    vmull.u8        q10, d28, d3
-
-    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q4
-    vqadd.s16       q9, q5
-    vqadd.s16       q10, q6
-
-    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
-    vqrshrun.s16    d7, q8, #7
-    vqrshrun.s16    d8, q9, #7
-    vqrshrun.s16    d9, q10, #7
-
-    vst1.u8         {d6}, [r4], r5          ;store result
-    vst1.u8         {d7}, [r4], r5
-    vst1.u8         {d8}, [r4], r5
-    vst1.u8         {d9}, [r4], r5
-
-    add             sp, sp, #32
-
-    vpop            {d8-d15}
-    pop             {r4-r5,pc}
-
-;--------------------
-firstpass_filter8x4_only
-    vabs.s32        q12, q14
-    vabs.s32        q13, q15
-
-    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
-    vld1.u8         {q3}, [r0], r1          ;load src data
-
-    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
-    vld1.u8         {q4}, [r0], r1
-    vdup.8          d1, d24[4]
-    vld1.u8         {q5}, [r0], r1
-    vdup.8          d2, d25[0]
-    vld1.u8         {q6}, [r0], r1
-    vdup.8          d3, d25[4]
-    vdup.8          d4, d26[0]
-    vdup.8          d5, d26[4]
-
-;First pass: output_height lines x output_width columns (4x8)
-    pld             [r0]
-    pld             [r0, r1]
-    pld             [r0, r1, lsl #1]
-
-    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q8, d8, d0
-    vmull.u8        q9, d10, d0
-    vmull.u8        q10, d12, d0
-
-    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
-    vext.8          d29, d8, d9, #1
-    vext.8          d30, d10, d11, #1
-    vext.8          d31, d12, d13, #1
-
-    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q8, d29, d1
-    vmlsl.u8        q9, d30, d1
-    vmlsl.u8        q10, d31, d1
-
-    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
-    vext.8          d29, d8, d9, #4
-    vext.8          d30, d10, d11, #4
-    vext.8          d31, d12, d13, #4
-
-    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q8, d29, d4
-    vmlsl.u8        q9, d30, d4
-    vmlsl.u8        q10, d31, d4
-
-    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
-    vext.8          d29, d8, d9, #2
-    vext.8          d30, d10, d11, #2
-    vext.8          d31, d12, d13, #2
-
-    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q8, d29, d2
-    vmlal.u8        q9, d30, d2
-    vmlal.u8        q10, d31, d2
-
-    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
-    vext.8          d29, d8, d9, #5
-    vext.8          d30, d10, d11, #5
-    vext.8          d31, d12, d13, #5
-
-    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q8, d29, d5
-    vmlal.u8        q9, d30, d5
-    vmlal.u8        q10, d31, d5
-
-    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
-    vext.8          d29, d8, d9, #3
-    vext.8          d30, d10, d11, #3
-    vext.8          d31, d12, d13, #3
-
-    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q4, d29, d3
-    vmull.u8        q5, d30, d3
-    vmull.u8        q6, d31, d3
-
-    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q4
-    vqadd.s16       q9, q5
-    vqadd.s16       q10, q6
-
-    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
-    vqrshrun.s16    d23, q8, #7
-    vqrshrun.s16    d24, q9, #7
-    vqrshrun.s16    d25, q10, #7
-
-    vst1.u8         {d22}, [r4], r5         ;store result
-    vst1.u8         {d23}, [r4], r5
-    vst1.u8         {d24}, [r4], r5
-    vst1.u8         {d25}, [r4], r5
-
-    vpop            {d8-d15}
-    pop             {r4-r5,pc}
-
-;---------------------
-secondpass_filter8x4_only
-;Second pass: 8x4
-    add             r3, r12, r3, lsl #5
-    sub             r0, r0, r1, lsl #1
-    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
-    vabs.s32        q7, q5
-    vabs.s32        q8, q6
-
-    vld1.u8         {d22}, [r0], r1
-    vld1.u8         {d23}, [r0], r1
-    vld1.u8         {d24}, [r0], r1
-    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
-    vld1.u8         {d25}, [r0], r1
-    vdup.8          d1, d14[4]
-    vld1.u8         {d26}, [r0], r1
-    vdup.8          d2, d15[0]
-    vld1.u8         {d27}, [r0], r1
-    vdup.8          d3, d15[4]
-    vld1.u8         {d28}, [r0], r1
-    vdup.8          d4, d16[0]
-    vld1.u8         {d29}, [r0], r1
-    vdup.8          d5, d16[4]
-    vld1.u8         {d30}, [r0], r1
-
-    vmull.u8        q3, d22, d0             ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q4, d23, d0
-    vmull.u8        q5, d24, d0
-    vmull.u8        q6, d25, d0
-
-    vmlsl.u8        q3, d23, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q4, d24, d1
-    vmlsl.u8        q5, d25, d1
-    vmlsl.u8        q6, d26, d1
-
-    vmlsl.u8        q3, d26, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q4, d27, d4
-    vmlsl.u8        q5, d28, d4
-    vmlsl.u8        q6, d29, d4
-
-    vmlal.u8        q3, d24, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q4, d25, d2
-    vmlal.u8        q5, d26, d2
-    vmlal.u8        q6, d27, d2
-
-    vmlal.u8        q3, d27, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q4, d28, d5
-    vmlal.u8        q5, d29, d5
-    vmlal.u8        q6, d30, d5
-
-    vmull.u8        q7, d25, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q8, d26, d3
-    vmull.u8        q9, d27, d3
-    vmull.u8        q10, d28, d3
-
-    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q4
-    vqadd.s16       q9, q5
-    vqadd.s16       q10, q6
-
-    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
-    vqrshrun.s16    d7, q8, #7
-    vqrshrun.s16    d8, q9, #7
-    vqrshrun.s16    d9, q10, #7
-
-    vst1.u8         {d6}, [r4], r5          ;store result
-    vst1.u8         {d7}, [r4], r5
-    vst1.u8         {d8}, [r4], r5
-    vst1.u8         {d9}, [r4], r5
-
-    vpop            {d8-d15}
-    pop             {r4-r5,pc}
-
-    ENDP
-
-;-----------------
-
-    END
diff --git a/vp8/common/arm/neon/sixtappredict8x8_neon.asm b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
deleted file mode 100644
index 4b04925..0000000
--- a/vp8/common/arm/neon/sixtappredict8x8_neon.asm
+++ /dev/null
@@ -1,528 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_sixtap_predict8x8_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-filter8_coeff
-    DCD     0,  0,  128,    0,   0,  0,   0,  0
-    DCD     0, -6,  123,   12,  -1,  0,   0,  0
-    DCD     2, -11, 108,   36,  -8,  1,   0,  0
-    DCD     0, -9,   93,   50,  -6,  0,   0,  0
-    DCD     3, -16,  77,   77, -16,  3,   0,  0
-    DCD     0, -6,   50,   93,  -9,  0,   0,  0
-    DCD     1, -8,   36,  108, -11,  2,   0,  0
-    DCD     0, -1,   12,  123,  -6,   0,  0,  0
-
-; r0    unsigned char  *src_ptr,
-; r1    int  src_pixels_per_line,
-; r2    int  xoffset,
-; r3    int  yoffset,
-; stack(r4) unsigned char *dst_ptr,
-; stack(r5) int  dst_pitch
-
-|vp8_sixtap_predict8x8_neon| PROC
-    push            {r4-r5, lr}
-    vpush           {d8-d15}
-    adr             r12, filter8_coeff
-
-    ldr             r4, [sp, #76]           ;load parameters from stack
-    ldr             r5, [sp, #80]           ;load parameters from stack
-
-    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
-    beq             secondpass_filter8x8_only
-
-    add             r2, r12, r2, lsl #5     ;calculate filter location
-
-    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
-
-    vld1.s32        {q14, q15}, [r2]        ;load first_pass filter
-
-    beq             firstpass_filter8x8_only
-
-    sub             sp, sp, #64             ;reserve space on stack for temporary storage
-    mov             lr, sp
-
-    vabs.s32        q12, q14
-    vabs.s32        q13, q15
-
-    mov             r2, #2                  ;loop counter
-    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
-    sub             r0, r0, r1, lsl #1
-
-    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
-    vdup.8          d1, d24[4]
-    vdup.8          d2, d25[0]
-
-;First pass: output_height lines x output_width columns (13x8)
-    vld1.u8         {q3}, [r0], r1          ;load src data
-    vdup.8          d3, d25[4]
-    vld1.u8         {q4}, [r0], r1
-    vdup.8          d4, d26[0]
-    vld1.u8         {q5}, [r0], r1
-    vdup.8          d5, d26[4]
-    vld1.u8         {q6}, [r0], r1
-
-filt_blk2d_fp8x8_loop_neon
-    pld             [r0]
-    pld             [r0, r1]
-    pld             [r0, r1, lsl #1]
-
-    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q8, d8, d0
-    vmull.u8        q9, d10, d0
-    vmull.u8        q10, d12, d0
-
-    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
-    vext.8          d29, d8, d9, #1
-    vext.8          d30, d10, d11, #1
-    vext.8          d31, d12, d13, #1
-
-    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q8, d29, d1
-    vmlsl.u8        q9, d30, d1
-    vmlsl.u8        q10, d31, d1
-
-    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
-    vext.8          d29, d8, d9, #4
-    vext.8          d30, d10, d11, #4
-    vext.8          d31, d12, d13, #4
-
-    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q8, d29, d4
-    vmlsl.u8        q9, d30, d4
-    vmlsl.u8        q10, d31, d4
-
-    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
-    vext.8          d29, d8, d9, #2
-    vext.8          d30, d10, d11, #2
-    vext.8          d31, d12, d13, #2
-
-    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q8, d29, d2
-    vmlal.u8        q9, d30, d2
-    vmlal.u8        q10, d31, d2
-
-    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
-    vext.8          d29, d8, d9, #5
-    vext.8          d30, d10, d11, #5
-    vext.8          d31, d12, d13, #5
-
-    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q8, d29, d5
-    vmlal.u8        q9, d30, d5
-    vmlal.u8        q10, d31, d5
-
-    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
-    vext.8          d29, d8, d9, #3
-    vext.8          d30, d10, d11, #3
-    vext.8          d31, d12, d13, #3
-
-    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q4, d29, d3
-    vmull.u8        q5, d30, d3
-    vmull.u8        q6, d31, d3
-
-    subs            r2, r2, #1
-
-    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q4
-    vqadd.s16       q9, q5
-    vqadd.s16       q10, q6
-
-    vld1.u8         {q3}, [r0], r1          ;load src data
-
-    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
-    vqrshrun.s16    d23, q8, #7
-    vqrshrun.s16    d24, q9, #7
-    vqrshrun.s16    d25, q10, #7
-
-    vst1.u8         {d22}, [lr]!            ;store result
-    vld1.u8         {q4}, [r0], r1
-    vst1.u8         {d23}, [lr]!
-    vld1.u8         {q5}, [r0], r1
-    vst1.u8         {d24}, [lr]!
-    vld1.u8         {q6}, [r0], r1
-    vst1.u8         {d25}, [lr]!
-
-    bne             filt_blk2d_fp8x8_loop_neon
-
-    ;first_pass filtering on the rest 5-line data
-    ;vld1.u8            {q3}, [r0], r1          ;load src data
-    ;vld1.u8            {q4}, [r0], r1
-    ;vld1.u8            {q5}, [r0], r1
-    ;vld1.u8            {q6}, [r0], r1
-    vld1.u8         {q7}, [r0], r1
-
-    vmull.u8        q8, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q9, d8, d0
-    vmull.u8        q10, d10, d0
-    vmull.u8        q11, d12, d0
-    vmull.u8        q12, d14, d0
-
-    vext.8          d27, d6, d7, #1         ;construct src_ptr[-1]
-    vext.8          d28, d8, d9, #1
-    vext.8          d29, d10, d11, #1
-    vext.8          d30, d12, d13, #1
-    vext.8          d31, d14, d15, #1
-
-    vmlsl.u8        q8, d27, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q9, d28, d1
-    vmlsl.u8        q10, d29, d1
-    vmlsl.u8        q11, d30, d1
-    vmlsl.u8        q12, d31, d1
-
-    vext.8          d27, d6, d7, #4         ;construct src_ptr[2]
-    vext.8          d28, d8, d9, #4
-    vext.8          d29, d10, d11, #4
-    vext.8          d30, d12, d13, #4
-    vext.8          d31, d14, d15, #4
-
-    vmlsl.u8        q8, d27, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q9, d28, d4
-    vmlsl.u8        q10, d29, d4
-    vmlsl.u8        q11, d30, d4
-    vmlsl.u8        q12, d31, d4
-
-    vext.8          d27, d6, d7, #2         ;construct src_ptr[0]
-    vext.8          d28, d8, d9, #2
-    vext.8          d29, d10, d11, #2
-    vext.8          d30, d12, d13, #2
-    vext.8          d31, d14, d15, #2
-
-    vmlal.u8        q8, d27, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q9, d28, d2
-    vmlal.u8        q10, d29, d2
-    vmlal.u8        q11, d30, d2
-    vmlal.u8        q12, d31, d2
-
-    vext.8          d27, d6, d7, #5         ;construct src_ptr[3]
-    vext.8          d28, d8, d9, #5
-    vext.8          d29, d10, d11, #5
-    vext.8          d30, d12, d13, #5
-    vext.8          d31, d14, d15, #5
-
-    vmlal.u8        q8, d27, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q9, d28, d5
-    vmlal.u8        q10, d29, d5
-    vmlal.u8        q11, d30, d5
-    vmlal.u8        q12, d31, d5
-
-    vext.8          d27, d6, d7, #3         ;construct src_ptr[1]
-    vext.8          d28, d8, d9, #3
-    vext.8          d29, d10, d11, #3
-    vext.8          d30, d12, d13, #3
-    vext.8          d31, d14, d15, #3
-
-    vmull.u8        q3, d27, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q4, d28, d3
-    vmull.u8        q5, d29, d3
-    vmull.u8        q6, d30, d3
-    vmull.u8        q7, d31, d3
-
-    vqadd.s16       q8, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q9, q4
-    vqadd.s16       q10, q5
-    vqadd.s16       q11, q6
-    vqadd.s16       q12, q7
-
-    add             r3, r12, r3, lsl #5
-
-    vqrshrun.s16    d26, q8, #7             ;shift/round/saturate to u8
-    sub             lr, lr, #64
-    vqrshrun.s16    d27, q9, #7
-    vld1.u8         {q9}, [lr]!             ;load intermediate data from stack
-    vqrshrun.s16    d28, q10, #7
-    vld1.u8         {q10}, [lr]!
-
-    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
-
-    vqrshrun.s16    d29, q11, #7
-    vld1.u8         {q11}, [lr]!
-
-    vabs.s32        q7, q5
-    vabs.s32        q8, q6
-
-    vqrshrun.s16    d30, q12, #7
-    vld1.u8         {q12}, [lr]!
-
-;Second pass: 8x8
-    mov             r3, #2                  ;loop counter
-
-    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
-    vdup.8          d1, d14[4]
-    vdup.8          d2, d15[0]
-    vdup.8          d3, d15[4]
-    vdup.8          d4, d16[0]
-    vdup.8          d5, d16[4]
-
-filt_blk2d_sp8x8_loop_neon
-    vmull.u8        q3, d18, d0             ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q4, d19, d0
-    vmull.u8        q5, d20, d0
-    vmull.u8        q6, d21, d0
-
-    vmlsl.u8        q3, d19, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q4, d20, d1
-    vmlsl.u8        q5, d21, d1
-    vmlsl.u8        q6, d22, d1
-
-    vmlsl.u8        q3, d22, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q4, d23, d4
-    vmlsl.u8        q5, d24, d4
-    vmlsl.u8        q6, d25, d4
-
-    vmlal.u8        q3, d20, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q4, d21, d2
-    vmlal.u8        q5, d22, d2
-    vmlal.u8        q6, d23, d2
-
-    vmlal.u8        q3, d23, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q4, d24, d5
-    vmlal.u8        q5, d25, d5
-    vmlal.u8        q6, d26, d5
-
-    vmull.u8        q7, d21, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q8, d22, d3
-    vmull.u8        q9, d23, d3
-    vmull.u8        q10, d24, d3
-
-    subs            r3, r3, #1
-
-    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q4
-    vqadd.s16       q9, q5
-    vqadd.s16       q10, q6
-
-    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
-    vqrshrun.s16    d7, q8, #7
-    vqrshrun.s16    d8, q9, #7
-    vqrshrun.s16    d9, q10, #7
-
-    vmov            q9, q11
-    vst1.u8         {d6}, [r4], r5          ;store result
-    vmov            q10, q12
-    vst1.u8         {d7}, [r4], r5
-    vmov            q11, q13
-    vst1.u8         {d8}, [r4], r5
-    vmov            q12, q14
-    vst1.u8         {d9}, [r4], r5
-    vmov            d26, d30
-
-    bne filt_blk2d_sp8x8_loop_neon
-
-    add             sp, sp, #64
-
-    vpop            {d8-d15}
-    pop             {r4-r5,pc}
-
-;---------------------
-firstpass_filter8x8_only
-    ;add                r2, r12, r2, lsl #5     ;calculate filter location
-    ;vld1.s32       {q14, q15}, [r2]        ;load first_pass filter
-    vabs.s32        q12, q14
-    vabs.s32        q13, q15
-
-    mov             r2, #2                  ;loop counter
-    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
-
-    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
-    vdup.8          d1, d24[4]
-    vdup.8          d2, d25[0]
-    vdup.8          d3, d25[4]
-    vdup.8          d4, d26[0]
-    vdup.8          d5, d26[4]
-
-;First pass: output_height lines x output_width columns (8x8)
-filt_blk2d_fpo8x8_loop_neon
-    vld1.u8         {q3}, [r0], r1          ;load src data
-    vld1.u8         {q4}, [r0], r1
-    vld1.u8         {q5}, [r0], r1
-    vld1.u8         {q6}, [r0], r1
-
-    pld             [r0]
-    pld             [r0, r1]
-    pld             [r0, r1, lsl #1]
-
-    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q8, d8, d0
-    vmull.u8        q9, d10, d0
-    vmull.u8        q10, d12, d0
-
-    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
-    vext.8          d29, d8, d9, #1
-    vext.8          d30, d10, d11, #1
-    vext.8          d31, d12, d13, #1
-
-    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q8, d29, d1
-    vmlsl.u8        q9, d30, d1
-    vmlsl.u8        q10, d31, d1
-
-    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
-    vext.8          d29, d8, d9, #4
-    vext.8          d30, d10, d11, #4
-    vext.8          d31, d12, d13, #4
-
-    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q8, d29, d4
-    vmlsl.u8        q9, d30, d4
-    vmlsl.u8        q10, d31, d4
-
-    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
-    vext.8          d29, d8, d9, #2
-    vext.8          d30, d10, d11, #2
-    vext.8          d31, d12, d13, #2
-
-    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q8, d29, d2
-    vmlal.u8        q9, d30, d2
-    vmlal.u8        q10, d31, d2
-
-    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
-    vext.8          d29, d8, d9, #5
-    vext.8          d30, d10, d11, #5
-    vext.8          d31, d12, d13, #5
-
-    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q8, d29, d5
-    vmlal.u8        q9, d30, d5
-    vmlal.u8        q10, d31, d5
-
-    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
-    vext.8          d29, d8, d9, #3
-    vext.8          d30, d10, d11, #3
-    vext.8          d31, d12, d13, #3
-
-    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q4, d29, d3
-    vmull.u8        q5, d30, d3
-    vmull.u8        q6, d31, d3
- ;
-    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q4
-    vqadd.s16       q9, q5
-    vqadd.s16       q10, q6
-
-    subs            r2, r2, #1
-
-    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
-    vqrshrun.s16    d23, q8, #7
-    vqrshrun.s16    d24, q9, #7
-    vqrshrun.s16    d25, q10, #7
-
-    vst1.u8         {d22}, [r4], r5         ;store result
-    vst1.u8         {d23}, [r4], r5
-    vst1.u8         {d24}, [r4], r5
-    vst1.u8         {d25}, [r4], r5
-
-    bne             filt_blk2d_fpo8x8_loop_neon
-
-    vpop            {d8-d15}
-    pop             {r4-r5,pc}
-
-;---------------------
-secondpass_filter8x8_only
-    sub             r0, r0, r1, lsl #1
-    add             r3, r12, r3, lsl #5
-
-    vld1.u8         {d18}, [r0], r1         ;load src data
-    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
-    vld1.u8         {d19}, [r0], r1
-    vabs.s32        q7, q5
-    vld1.u8         {d20}, [r0], r1
-    vabs.s32        q8, q6
-    vld1.u8         {d21}, [r0], r1
-    mov             r3, #2                  ;loop counter
-    vld1.u8         {d22}, [r0], r1
-    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
-    vld1.u8         {d23}, [r0], r1
-    vdup.8          d1, d14[4]
-    vld1.u8         {d24}, [r0], r1
-    vdup.8          d2, d15[0]
-    vld1.u8         {d25}, [r0], r1
-    vdup.8          d3, d15[4]
-    vld1.u8         {d26}, [r0], r1
-    vdup.8          d4, d16[0]
-    vld1.u8         {d27}, [r0], r1
-    vdup.8          d5, d16[4]
-    vld1.u8         {d28}, [r0], r1
-    vld1.u8         {d29}, [r0], r1
-    vld1.u8         {d30}, [r0], r1
-
-;Second pass: 8x8
-filt_blk2d_spo8x8_loop_neon
-    vmull.u8        q3, d18, d0             ;(src_ptr[-2] * vp8_filter[0])
-    vmull.u8        q4, d19, d0
-    vmull.u8        q5, d20, d0
-    vmull.u8        q6, d21, d0
-
-    vmlsl.u8        q3, d19, d1             ;-(src_ptr[-1] * vp8_filter[1])
-    vmlsl.u8        q4, d20, d1
-    vmlsl.u8        q5, d21, d1
-    vmlsl.u8        q6, d22, d1
-
-    vmlsl.u8        q3, d22, d4             ;-(src_ptr[2] * vp8_filter[4])
-    vmlsl.u8        q4, d23, d4
-    vmlsl.u8        q5, d24, d4
-    vmlsl.u8        q6, d25, d4
-
-    vmlal.u8        q3, d20, d2             ;(src_ptr[0] * vp8_filter[2])
-    vmlal.u8        q4, d21, d2
-    vmlal.u8        q5, d22, d2
-    vmlal.u8        q6, d23, d2
-
-    vmlal.u8        q3, d23, d5             ;(src_ptr[3] * vp8_filter[5])
-    vmlal.u8        q4, d24, d5
-    vmlal.u8        q5, d25, d5
-    vmlal.u8        q6, d26, d5
-
-    vmull.u8        q7, d21, d3             ;(src_ptr[1] * vp8_filter[3])
-    vmull.u8        q8, d22, d3
-    vmull.u8        q9, d23, d3
-    vmull.u8        q10, d24, d3
-
-    subs            r3, r3, #1
-
-    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
-    vqadd.s16       q8, q4
-    vqadd.s16       q9, q5
-    vqadd.s16       q10, q6
-
-    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
-    vqrshrun.s16    d7, q8, #7
-    vqrshrun.s16    d8, q9, #7
-    vqrshrun.s16    d9, q10, #7
-
-    vmov            q9, q11
-    vst1.u8         {d6}, [r4], r5          ;store result
-    vmov            q10, q12
-    vst1.u8         {d7}, [r4], r5
-    vmov            q11, q13
-    vst1.u8         {d8}, [r4], r5
-    vmov            q12, q14
-    vst1.u8         {d9}, [r4], r5
-    vmov            d26, d30
-
-    bne filt_blk2d_spo8x8_loop_neon
-
-    vpop            {d8-d15}
-    pop             {r4-r5,pc}
-
-    ENDP
-
-;-----------------
-
-    END
diff --git a/vp8/common/arm/neon/sixtappredict_neon.c b/vp8/common/arm/neon/sixtappredict_neon.c
new file mode 100644
index 0000000..7a4d9e0
--- /dev/null
+++ b/vp8/common/arm/neon/sixtappredict_neon.c
@@ -0,0 +1,1752 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#ifdef _MSC_VER
+#define __builtin_prefetch(x)
+#endif
+
+static const int8_t vp8_sub_pel_filters[8][8] = {
+    {0,  0,  128,   0,   0, 0, 0, 0},  /* note that 1/8 pel positionyys are */
+    {0, -6,  123,  12,  -1, 0, 0, 0},  /*    just as per alpha -0.5 bicubic */
+    {2, -11, 108,  36,  -8, 1, 0, 0},  /* New 1/4 pel 6 tap filter */
+    {0, -9,   93,  50,  -6, 0, 0, 0},
+    {3, -16,  77,  77, -16, 3, 0, 0},  /* New 1/2 pel 6 tap filter */
+    {0, -6,   50,  93,  -9, 0, 0, 0},
+    {1, -8,   36, 108, -11, 2, 0, 0},  /* New 1/4 pel 6 tap filter */
+    {0, -1,   12, 123,  -6, 0, 0, 0},
+};
+
+void vp8_sixtap_predict4x4_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    unsigned char *src;
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d18u8, d19u8, d20u8, d21u8;
+    uint8x8_t d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8, d31u8;
+    int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
+    uint32x2_t d27u32, d28u32, d29u32, d30u32, d31u32;
+    uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16;
+    uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16;
+    int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16;
+    int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16;
+    uint8x16_t q3u8, q4u8, q5u8, q6u8, q11u8;
+    uint64x2_t q3u64, q4u64, q5u64, q6u64, q9u64, q10u64;
+    uint32x2x2_t d0u32x2, d1u32x2;
+
+    if (xoffset == 0) {  // secondpass_filter4x4_only
+        // load second_pass filter
+        dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+        d0s8 = vdup_lane_s8(dtmps8, 0);
+        d1s8 = vdup_lane_s8(dtmps8, 1);
+        d2s8 = vdup_lane_s8(dtmps8, 2);
+        d3s8 = vdup_lane_s8(dtmps8, 3);
+        d4s8 = vdup_lane_s8(dtmps8, 4);
+        d5s8 = vdup_lane_s8(dtmps8, 5);
+        d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+        d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+        d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+        d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+        d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+        d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+        // load src data
+        src = src_ptr - src_pixels_per_line * 2;
+        d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 0);
+        src += src_pixels_per_line;
+        d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 1);
+        src += src_pixels_per_line;
+        d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 0);
+        src += src_pixels_per_line;
+        d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 1);
+        src += src_pixels_per_line;
+        d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 0);
+        src += src_pixels_per_line;
+        d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 1);
+        src += src_pixels_per_line;
+        d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 0);
+        src += src_pixels_per_line;
+        d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 1);
+        src += src_pixels_per_line;
+        d31u32 = vld1_lane_u32((const uint32_t *)src, d31u32, 0);
+
+        d27u8 = vreinterpret_u8_u32(d27u32);
+        d28u8 = vreinterpret_u8_u32(d28u32);
+        d29u8 = vreinterpret_u8_u32(d29u32);
+        d30u8 = vreinterpret_u8_u32(d30u32);
+        d31u8 = vreinterpret_u8_u32(d31u32);
+
+        d23u8 = vext_u8(d27u8, d28u8, 4);
+        d24u8 = vext_u8(d28u8, d29u8, 4);
+        d25u8 = vext_u8(d29u8, d30u8, 4);
+        d26u8 = vext_u8(d30u8, d31u8, 4);
+
+        q3u16 = vmull_u8(d27u8, d0u8);
+        q4u16 = vmull_u8(d28u8, d0u8);
+        q5u16 = vmull_u8(d25u8, d5u8);
+        q6u16 = vmull_u8(d26u8, d5u8);
+
+        q3u16 = vmlsl_u8(q3u16, d29u8, d4u8);
+        q4u16 = vmlsl_u8(q4u16, d30u8, d4u8);
+        q5u16 = vmlsl_u8(q5u16, d23u8, d1u8);
+        q6u16 = vmlsl_u8(q6u16, d24u8, d1u8);
+
+        q3u16 = vmlal_u8(q3u16, d28u8, d2u8);
+        q4u16 = vmlal_u8(q4u16, d29u8, d2u8);
+        q5u16 = vmlal_u8(q5u16, d24u8, d3u8);
+        q6u16 = vmlal_u8(q6u16, d25u8, d3u8);
+
+        q3s16 = vreinterpretq_s16_u16(q3u16);
+        q4s16 = vreinterpretq_s16_u16(q4u16);
+        q5s16 = vreinterpretq_s16_u16(q5u16);
+        q6s16 = vreinterpretq_s16_u16(q6u16);
+
+        q5s16 = vqaddq_s16(q5s16, q3s16);
+        q6s16 = vqaddq_s16(q6s16, q4s16);
+
+        d3u8 = vqrshrun_n_s16(q5s16, 7);
+        d4u8 = vqrshrun_n_s16(q6s16, 7);
+
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
+        dst_ptr += dst_pitch;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
+        dst_ptr += dst_pitch;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0);
+        dst_ptr += dst_pitch;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1);
+        return;
+    }
+
+    // load first_pass filter
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    // First pass: output_height lines x output_width columns (9x4)
+
+    if (yoffset == 0)  // firstpass_filter4x4_only
+        src = src_ptr - 2;
+    else
+        src = src_ptr - 2 - (src_pixels_per_line * 2);
+
+    q3u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q4u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q5u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q6u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+
+    d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+    d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+    d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+    d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+
+    // vswp here
+    q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8));
+    q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8));
+
+    d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8),  // d18 d19
+                       vreinterpret_u32_u8(d19u8));
+    d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8),  // d20 d21
+                       vreinterpret_u32_u8(d21u8));
+    q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8);
+    q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8);
+
+    // keep original src data in q4 q6
+    q4u64 = vreinterpretq_u64_u8(q3u8);
+    q6u64 = vreinterpretq_u64_u8(q5u8);
+
+    d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)),  // d6 d7
+                       vreinterpret_u32_u8(vget_high_u8(q3u8)));
+    d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)),  // d10 d11
+                       vreinterpret_u32_u8(vget_high_u8(q5u8)));
+    q9u64 = vshrq_n_u64(q4u64, 8);
+    q10u64 = vshrq_n_u64(q6u64, 8);
+    q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
+    q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
+
+    d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)),   // d18 d19
+                       vreinterpret_u32_u64(vget_high_u64(q9u64)));
+    d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)),  // d20 d211
+                       vreinterpret_u32_u64(vget_high_u64(q10u64)));
+    q3u64 = vshrq_n_u64(q4u64, 32);
+    q5u64 = vshrq_n_u64(q6u64, 32);
+    q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8);
+    q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8);
+
+    d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)),  // d6 d7
+                       vreinterpret_u32_u64(vget_high_u64(q3u64)));
+    d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)),  // d10 d11
+                       vreinterpret_u32_u64(vget_high_u64(q5u64)));
+    q9u64 = vshrq_n_u64(q4u64, 16);
+    q10u64 = vshrq_n_u64(q6u64, 16);
+    q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8);
+    q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8);
+
+    d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)),   // d18 d19
+                       vreinterpret_u32_u64(vget_high_u64(q9u64)));
+    d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)),  // d20 d211
+                       vreinterpret_u32_u64(vget_high_u64(q10u64)));
+    q3u64 = vshrq_n_u64(q4u64, 24);
+    q5u64 = vshrq_n_u64(q6u64, 24);
+    q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8);
+    q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8);
+
+    d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)),  // d6 d7
+                       vreinterpret_u32_u64(vget_high_u64(q3u64)));
+    d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)),  // d10 d11
+                       vreinterpret_u32_u64(vget_high_u64(q5u64)));
+    q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8);
+    q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8);
+
+    q7s16 = vreinterpretq_s16_u16(q7u16);
+    q8s16 = vreinterpretq_s16_u16(q8u16);
+    q9s16 = vreinterpretq_s16_u16(q9u16);
+    q10s16 = vreinterpretq_s16_u16(q10u16);
+    q7s16 = vqaddq_s16(q7s16, q9s16);
+    q8s16 = vqaddq_s16(q8s16, q10s16);
+
+    d27u8 = vqrshrun_n_s16(q7s16, 7);
+    d28u8 = vqrshrun_n_s16(q8s16, 7);
+
+    if (yoffset == 0) {  // firstpass_filter4x4_only
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 0);
+        dst_ptr += dst_pitch;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 1);
+        dst_ptr += dst_pitch;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0);
+        dst_ptr += dst_pitch;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1);
+        return;
+    }
+
+    // First Pass on rest 5-line data
+    q3u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q4u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q5u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q6u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q11u8 = vld1q_u8(src);
+
+    d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+    d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+    d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+    d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+
+    // vswp here
+    q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8));
+    q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8));
+
+    d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8),  // d18 d19
+                       vreinterpret_u32_u8(d19u8));
+    d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8),  // d20 d21
+                       vreinterpret_u32_u8(d21u8));
+    d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 5);
+    q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8);
+    q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8);
+    q12u16 = vmull_u8(d31u8, d5u8);
+
+    q4u64 = vreinterpretq_u64_u8(q3u8);
+    q6u64 = vreinterpretq_u64_u8(q5u8);
+
+    d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)),  // d6 d7
+                       vreinterpret_u32_u8(vget_high_u8(q3u8)));
+    d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)),  // d10 d11
+                       vreinterpret_u32_u8(vget_high_u8(q5u8)));
+    q9u64 = vshrq_n_u64(q4u64, 8);
+    q10u64 = vshrq_n_u64(q6u64, 8);
+    q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
+    q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
+    q12u16 = vmlal_u8(q12u16, vget_low_u8(q11u8), d0u8);
+
+    d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)),   // d18 d19
+                       vreinterpret_u32_u64(vget_high_u64(q9u64)));
+    d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)),  // d20 d211
+                       vreinterpret_u32_u64(vget_high_u64(q10u64)));
+    q3u64 = vshrq_n_u64(q4u64, 32);
+    q5u64 = vshrq_n_u64(q6u64, 32);
+    d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 1);
+    q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8);
+    q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8);
+    q12u16 = vmlsl_u8(q12u16, d31u8, d1u8);
+
+    d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)),  // d6 d7
+                       vreinterpret_u32_u64(vget_high_u64(q3u64)));
+    d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)),  // d10 d11
+                       vreinterpret_u32_u64(vget_high_u64(q5u64)));
+    q9u64 = vshrq_n_u64(q4u64, 16);
+    q10u64 = vshrq_n_u64(q6u64, 16);
+    d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 4);
+    q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8);
+    q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8);
+    q12u16 = vmlsl_u8(q12u16, d31u8, d4u8);
+
+    d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)),   // d18 d19
+                       vreinterpret_u32_u64(vget_high_u64(q9u64)));
+    d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)),  // d20 d211
+                       vreinterpret_u32_u64(vget_high_u64(q10u64)));
+    q3u64 = vshrq_n_u64(q4u64, 24);
+    q5u64 = vshrq_n_u64(q6u64, 24);
+    d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 2);
+    q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8);
+    q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8);
+    q12u16 = vmlal_u8(q12u16, d31u8, d2u8);
+
+    d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)),  // d6 d7
+                       vreinterpret_u32_u64(vget_high_u64(q3u64)));
+    d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)),  // d10 d11
+                       vreinterpret_u32_u64(vget_high_u64(q5u64)));
+    d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 3);
+    q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8);
+    q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8);
+    q11u16 = vmull_u8(d31u8, d3u8);
+
+    q7s16 = vreinterpretq_s16_u16(q7u16);
+    q8s16 = vreinterpretq_s16_u16(q8u16);
+    q9s16 = vreinterpretq_s16_u16(q9u16);
+    q10s16 = vreinterpretq_s16_u16(q10u16);
+    q11s16 = vreinterpretq_s16_u16(q11u16);
+    q12s16 = vreinterpretq_s16_u16(q12u16);
+    q7s16 = vqaddq_s16(q7s16, q9s16);
+    q8s16 = vqaddq_s16(q8s16, q10s16);
+    q12s16 = vqaddq_s16(q12s16, q11s16);
+
+    d29u8 = vqrshrun_n_s16(q7s16, 7);
+    d30u8 = vqrshrun_n_s16(q8s16, 7);
+    d31u8 = vqrshrun_n_s16(q12s16, 7);
+
+    // Second pass: 4x4
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    d23u8 = vext_u8(d27u8, d28u8, 4);
+    d24u8 = vext_u8(d28u8, d29u8, 4);
+    d25u8 = vext_u8(d29u8, d30u8, 4);
+    d26u8 = vext_u8(d30u8, d31u8, 4);
+
+    q3u16 = vmull_u8(d27u8, d0u8);
+    q4u16 = vmull_u8(d28u8, d0u8);
+    q5u16 = vmull_u8(d25u8, d5u8);
+    q6u16 = vmull_u8(d26u8, d5u8);
+
+    q3u16 = vmlsl_u8(q3u16, d29u8, d4u8);
+    q4u16 = vmlsl_u8(q4u16, d30u8, d4u8);
+    q5u16 = vmlsl_u8(q5u16, d23u8, d1u8);
+    q6u16 = vmlsl_u8(q6u16, d24u8, d1u8);
+
+    q3u16 = vmlal_u8(q3u16, d28u8, d2u8);
+    q4u16 = vmlal_u8(q4u16, d29u8, d2u8);
+    q5u16 = vmlal_u8(q5u16, d24u8, d3u8);
+    q6u16 = vmlal_u8(q6u16, d25u8, d3u8);
+
+    q3s16 = vreinterpretq_s16_u16(q3u16);
+    q4s16 = vreinterpretq_s16_u16(q4u16);
+    q5s16 = vreinterpretq_s16_u16(q5u16);
+    q6s16 = vreinterpretq_s16_u16(q6u16);
+
+    q5s16 = vqaddq_s16(q5s16, q3s16);
+    q6s16 = vqaddq_s16(q6s16, q4s16);
+
+    d3u8 = vqrshrun_n_s16(q5s16, 7);
+    d4u8 = vqrshrun_n_s16(q6s16, 7);
+
+    vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
+    dst_ptr += dst_pitch;
+    vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
+    dst_ptr += dst_pitch;
+    vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0);
+    dst_ptr += dst_pitch;
+    vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1);
+    return;
+}
+
+void vp8_sixtap_predict8x4_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    unsigned char *src;
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
+    uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8;
+    uint8x8_t d27u8, d28u8, d29u8, d30u8, d31u8;
+    int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
+    uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16;
+    uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16;
+    int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16;
+    int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16;
+    uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8;
+
+    if (xoffset == 0) {  // secondpass_filter8x4_only
+        // load second_pass filter
+        dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+        d0s8 = vdup_lane_s8(dtmps8, 0);
+        d1s8 = vdup_lane_s8(dtmps8, 1);
+        d2s8 = vdup_lane_s8(dtmps8, 2);
+        d3s8 = vdup_lane_s8(dtmps8, 3);
+        d4s8 = vdup_lane_s8(dtmps8, 4);
+        d5s8 = vdup_lane_s8(dtmps8, 5);
+        d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+        d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+        d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+        d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+        d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+        d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+        // load src data
+        src = src_ptr - src_pixels_per_line * 2;
+        d22u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d23u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d24u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d25u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d26u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d27u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d28u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d29u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d30u8 = vld1_u8(src);
+
+        q3u16 = vmull_u8(d22u8, d0u8);
+        q4u16 = vmull_u8(d23u8, d0u8);
+        q5u16 = vmull_u8(d24u8, d0u8);
+        q6u16 = vmull_u8(d25u8, d0u8);
+
+        q3u16 = vmlsl_u8(q3u16, d23u8, d1u8);
+        q4u16 = vmlsl_u8(q4u16, d24u8, d1u8);
+        q5u16 = vmlsl_u8(q5u16, d25u8, d1u8);
+        q6u16 = vmlsl_u8(q6u16, d26u8, d1u8);
+
+        q3u16 = vmlsl_u8(q3u16, d26u8, d4u8);
+        q4u16 = vmlsl_u8(q4u16, d27u8, d4u8);
+        q5u16 = vmlsl_u8(q5u16, d28u8, d4u8);
+        q6u16 = vmlsl_u8(q6u16, d29u8, d4u8);
+
+        q3u16 = vmlal_u8(q3u16, d24u8, d2u8);
+        q4u16 = vmlal_u8(q4u16, d25u8, d2u8);
+        q5u16 = vmlal_u8(q5u16, d26u8, d2u8);
+        q6u16 = vmlal_u8(q6u16, d27u8, d2u8);
+
+        q3u16 = vmlal_u8(q3u16, d27u8, d5u8);
+        q4u16 = vmlal_u8(q4u16, d28u8, d5u8);
+        q5u16 = vmlal_u8(q5u16, d29u8, d5u8);
+        q6u16 = vmlal_u8(q6u16, d30u8, d5u8);
+
+        q7u16 = vmull_u8(d25u8, d3u8);
+        q8u16 = vmull_u8(d26u8, d3u8);
+        q9u16 = vmull_u8(d27u8, d3u8);
+        q10u16 = vmull_u8(d28u8, d3u8);
+
+        q3s16 = vreinterpretq_s16_u16(q3u16);
+        q4s16 = vreinterpretq_s16_u16(q4u16);
+        q5s16 = vreinterpretq_s16_u16(q5u16);
+        q6s16 = vreinterpretq_s16_u16(q6u16);
+        q7s16 = vreinterpretq_s16_u16(q7u16);
+        q8s16 = vreinterpretq_s16_u16(q8u16);
+        q9s16 = vreinterpretq_s16_u16(q9u16);
+        q10s16 = vreinterpretq_s16_u16(q10u16);
+
+        q7s16 = vqaddq_s16(q7s16, q3s16);
+        q8s16 = vqaddq_s16(q8s16, q4s16);
+        q9s16 = vqaddq_s16(q9s16, q5s16);
+        q10s16 = vqaddq_s16(q10s16, q6s16);
+
+        d6u8 = vqrshrun_n_s16(q7s16, 7);
+        d7u8 = vqrshrun_n_s16(q8s16, 7);
+        d8u8 = vqrshrun_n_s16(q9s16, 7);
+        d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+        vst1_u8(dst_ptr, d6u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d7u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d8u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d9u8);
+        return;
+    }
+
+    // load first_pass filter
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    // First pass: output_height lines x output_width columns (9x4)
+    if (yoffset == 0)  // firstpass_filter4x4_only
+        src = src_ptr - 2;
+    else
+        src = src_ptr - 2 - (src_pixels_per_line * 2);
+    q3u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q4u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q5u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q6u8 = vld1q_u8(src);
+
+    q7u16  = vmull_u8(vget_low_u8(q3u8), d0u8);
+    q8u16  = vmull_u8(vget_low_u8(q4u8), d0u8);
+    q9u16  = vmull_u8(vget_low_u8(q5u8), d0u8);
+    q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8);
+
+    d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+    d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+    d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+    d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1);
+
+    q7u16  = vmlsl_u8(q7u16, d28u8, d1u8);
+    q8u16  = vmlsl_u8(q8u16, d29u8, d1u8);
+    q9u16  = vmlsl_u8(q9u16, d30u8, d1u8);
+    q10u16 = vmlsl_u8(q10u16, d31u8, d1u8);
+
+    d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4);
+    d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4);
+    d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4);
+    d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4);
+
+    q7u16  = vmlsl_u8(q7u16, d28u8, d4u8);
+    q8u16  = vmlsl_u8(q8u16, d29u8, d4u8);
+    q9u16  = vmlsl_u8(q9u16, d30u8, d4u8);
+    q10u16 = vmlsl_u8(q10u16, d31u8, d4u8);
+
+    d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2);
+    d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2);
+    d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2);
+    d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2);
+
+    q7u16  = vmlal_u8(q7u16, d28u8, d2u8);
+    q8u16  = vmlal_u8(q8u16, d29u8, d2u8);
+    q9u16  = vmlal_u8(q9u16, d30u8, d2u8);
+    q10u16 = vmlal_u8(q10u16, d31u8, d2u8);
+
+    d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+    d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+    d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+    d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+
+    q7u16 = vmlal_u8(q7u16, d28u8, d5u8);
+    q8u16 = vmlal_u8(q8u16, d29u8, d5u8);
+    q9u16 = vmlal_u8(q9u16, d30u8, d5u8);
+    q10u16 = vmlal_u8(q10u16, d31u8, d5u8);
+
+    d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3);
+    d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3);
+    d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3);
+    d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3);
+
+    q3u16 = vmull_u8(d28u8, d3u8);
+    q4u16 = vmull_u8(d29u8, d3u8);
+    q5u16 = vmull_u8(d30u8, d3u8);
+    q6u16 = vmull_u8(d31u8, d3u8);
+
+    q3s16 = vreinterpretq_s16_u16(q3u16);
+    q4s16 = vreinterpretq_s16_u16(q4u16);
+    q5s16 = vreinterpretq_s16_u16(q5u16);
+    q6s16 = vreinterpretq_s16_u16(q6u16);
+    q7s16 = vreinterpretq_s16_u16(q7u16);
+    q8s16 = vreinterpretq_s16_u16(q8u16);
+    q9s16 = vreinterpretq_s16_u16(q9u16);
+    q10s16 = vreinterpretq_s16_u16(q10u16);
+
+    q7s16 = vqaddq_s16(q7s16, q3s16);
+    q8s16 = vqaddq_s16(q8s16, q4s16);
+    q9s16 = vqaddq_s16(q9s16, q5s16);
+    q10s16 = vqaddq_s16(q10s16, q6s16);
+
+    d22u8 = vqrshrun_n_s16(q7s16, 7);
+    d23u8 = vqrshrun_n_s16(q8s16, 7);
+    d24u8 = vqrshrun_n_s16(q9s16, 7);
+    d25u8 = vqrshrun_n_s16(q10s16, 7);
+
+    if (yoffset == 0) {  // firstpass_filter8x4_only
+        vst1_u8(dst_ptr, d22u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d23u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d24u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d25u8);
+        return;
+    }
+
+    // First Pass on rest 5-line data
+    src += src_pixels_per_line;
+    q3u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q4u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q5u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q6u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q7u8 = vld1q_u8(src);
+
+    q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+    q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+    q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+    q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8);
+    q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1);
+
+    q8u16  = vmlsl_u8(q8u16, d27u8, d1u8);
+    q9u16  = vmlsl_u8(q9u16, d28u8, d1u8);
+    q10u16 = vmlsl_u8(q10u16, d29u8, d1u8);
+    q11u16 = vmlsl_u8(q11u16, d30u8, d1u8);
+    q12u16 = vmlsl_u8(q12u16, d31u8, d1u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4);
+
+    q8u16  = vmlsl_u8(q8u16, d27u8, d4u8);
+    q9u16  = vmlsl_u8(q9u16, d28u8, d4u8);
+    q10u16 = vmlsl_u8(q10u16, d29u8, d4u8);
+    q11u16 = vmlsl_u8(q11u16, d30u8, d4u8);
+    q12u16 = vmlsl_u8(q12u16, d31u8, d4u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2);
+
+    q8u16  = vmlal_u8(q8u16, d27u8, d2u8);
+    q9u16  = vmlal_u8(q9u16, d28u8, d2u8);
+    q10u16 = vmlal_u8(q10u16, d29u8, d2u8);
+    q11u16 = vmlal_u8(q11u16, d30u8, d2u8);
+    q12u16 = vmlal_u8(q12u16, d31u8, d2u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5);
+
+    q8u16  = vmlal_u8(q8u16, d27u8, d5u8);
+    q9u16  = vmlal_u8(q9u16, d28u8, d5u8);
+    q10u16 = vmlal_u8(q10u16, d29u8, d5u8);
+    q11u16 = vmlal_u8(q11u16, d30u8, d5u8);
+    q12u16 = vmlal_u8(q12u16, d31u8, d5u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3);
+
+    q3u16 = vmull_u8(d27u8, d3u8);
+    q4u16 = vmull_u8(d28u8, d3u8);
+    q5u16 = vmull_u8(d29u8, d3u8);
+    q6u16 = vmull_u8(d30u8, d3u8);
+    q7u16 = vmull_u8(d31u8, d3u8);
+
+    q3s16 = vreinterpretq_s16_u16(q3u16);
+    q4s16 = vreinterpretq_s16_u16(q4u16);
+    q5s16 = vreinterpretq_s16_u16(q5u16);
+    q6s16 = vreinterpretq_s16_u16(q6u16);
+    q7s16 = vreinterpretq_s16_u16(q7u16);
+    q8s16 = vreinterpretq_s16_u16(q8u16);
+    q9s16 = vreinterpretq_s16_u16(q9u16);
+    q10s16 = vreinterpretq_s16_u16(q10u16);
+    q11s16 = vreinterpretq_s16_u16(q11u16);
+    q12s16 = vreinterpretq_s16_u16(q12u16);
+
+    q8s16 = vqaddq_s16(q8s16, q3s16);
+    q9s16 = vqaddq_s16(q9s16, q4s16);
+    q10s16 = vqaddq_s16(q10s16, q5s16);
+    q11s16 = vqaddq_s16(q11s16, q6s16);
+    q12s16 = vqaddq_s16(q12s16, q7s16);
+
+    d26u8 = vqrshrun_n_s16(q8s16, 7);
+    d27u8 = vqrshrun_n_s16(q9s16, 7);
+    d28u8 = vqrshrun_n_s16(q10s16, 7);
+    d29u8 = vqrshrun_n_s16(q11s16, 7);
+    d30u8 = vqrshrun_n_s16(q12s16, 7);
+
+    // Second pass: 8x4
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    q3u16 = vmull_u8(d22u8, d0u8);
+    q4u16 = vmull_u8(d23u8, d0u8);
+    q5u16 = vmull_u8(d24u8, d0u8);
+    q6u16 = vmull_u8(d25u8, d0u8);
+
+    q3u16 = vmlsl_u8(q3u16, d23u8, d1u8);
+    q4u16 = vmlsl_u8(q4u16, d24u8, d1u8);
+    q5u16 = vmlsl_u8(q5u16, d25u8, d1u8);
+    q6u16 = vmlsl_u8(q6u16, d26u8, d1u8);
+
+    q3u16 = vmlsl_u8(q3u16, d26u8, d4u8);
+    q4u16 = vmlsl_u8(q4u16, d27u8, d4u8);
+    q5u16 = vmlsl_u8(q5u16, d28u8, d4u8);
+    q6u16 = vmlsl_u8(q6u16, d29u8, d4u8);
+
+    q3u16 = vmlal_u8(q3u16, d24u8, d2u8);
+    q4u16 = vmlal_u8(q4u16, d25u8, d2u8);
+    q5u16 = vmlal_u8(q5u16, d26u8, d2u8);
+    q6u16 = vmlal_u8(q6u16, d27u8, d2u8);
+
+    q3u16 = vmlal_u8(q3u16, d27u8, d5u8);
+    q4u16 = vmlal_u8(q4u16, d28u8, d5u8);
+    q5u16 = vmlal_u8(q5u16, d29u8, d5u8);
+    q6u16 = vmlal_u8(q6u16, d30u8, d5u8);
+
+    q7u16 = vmull_u8(d25u8, d3u8);
+    q8u16 = vmull_u8(d26u8, d3u8);
+    q9u16 = vmull_u8(d27u8, d3u8);
+    q10u16 = vmull_u8(d28u8, d3u8);
+
+    q3s16 = vreinterpretq_s16_u16(q3u16);
+    q4s16 = vreinterpretq_s16_u16(q4u16);
+    q5s16 = vreinterpretq_s16_u16(q5u16);
+    q6s16 = vreinterpretq_s16_u16(q6u16);
+    q7s16 = vreinterpretq_s16_u16(q7u16);
+    q8s16 = vreinterpretq_s16_u16(q8u16);
+    q9s16 = vreinterpretq_s16_u16(q9u16);
+    q10s16 = vreinterpretq_s16_u16(q10u16);
+
+    q7s16 = vqaddq_s16(q7s16, q3s16);
+    q8s16 = vqaddq_s16(q8s16, q4s16);
+    q9s16 = vqaddq_s16(q9s16, q5s16);
+    q10s16 = vqaddq_s16(q10s16, q6s16);
+
+    d6u8 = vqrshrun_n_s16(q7s16, 7);
+    d7u8 = vqrshrun_n_s16(q8s16, 7);
+    d8u8 = vqrshrun_n_s16(q9s16, 7);
+    d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+    vst1_u8(dst_ptr, d6u8);
+    dst_ptr += dst_pitch;
+    vst1_u8(dst_ptr, d7u8);
+    dst_ptr += dst_pitch;
+    vst1_u8(dst_ptr, d8u8);
+    dst_ptr += dst_pitch;
+    vst1_u8(dst_ptr, d9u8);
+    return;
+}
+
+void vp8_sixtap_predict8x8_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    unsigned char *src, *tmpp;
+    unsigned char tmp[64];
+    int i;
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
+    uint8x8_t d18u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8, d25u8;
+    uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8, d31u8;
+    int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
+    uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16;
+    uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16;
+    int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16;
+    int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16;
+    uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q9u8, q10u8, q11u8, q12u8;
+
+    if (xoffset == 0) {  // secondpass_filter8x8_only
+        // load second_pass filter
+        dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+        d0s8 = vdup_lane_s8(dtmps8, 0);
+        d1s8 = vdup_lane_s8(dtmps8, 1);
+        d2s8 = vdup_lane_s8(dtmps8, 2);
+        d3s8 = vdup_lane_s8(dtmps8, 3);
+        d4s8 = vdup_lane_s8(dtmps8, 4);
+        d5s8 = vdup_lane_s8(dtmps8, 5);
+        d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+        d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+        d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+        d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+        d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+        d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+        // load src data
+        src = src_ptr - src_pixels_per_line * 2;
+        d18u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d19u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d20u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d21u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d22u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d23u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d24u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d25u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d26u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d27u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d28u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d29u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d30u8 = vld1_u8(src);
+
+        for (i = 2; i > 0; i--) {
+            q3u16 = vmull_u8(d18u8, d0u8);
+            q4u16 = vmull_u8(d19u8, d0u8);
+            q5u16 = vmull_u8(d20u8, d0u8);
+            q6u16 = vmull_u8(d21u8, d0u8);
+
+            q3u16 = vmlsl_u8(q3u16, d19u8, d1u8);
+            q4u16 = vmlsl_u8(q4u16, d20u8, d1u8);
+            q5u16 = vmlsl_u8(q5u16, d21u8, d1u8);
+            q6u16 = vmlsl_u8(q6u16, d22u8, d1u8);
+
+            q3u16 = vmlsl_u8(q3u16, d22u8, d4u8);
+            q4u16 = vmlsl_u8(q4u16, d23u8, d4u8);
+            q5u16 = vmlsl_u8(q5u16, d24u8, d4u8);
+            q6u16 = vmlsl_u8(q6u16, d25u8, d4u8);
+
+            q3u16 = vmlal_u8(q3u16, d20u8, d2u8);
+            q4u16 = vmlal_u8(q4u16, d21u8, d2u8);
+            q5u16 = vmlal_u8(q5u16, d22u8, d2u8);
+            q6u16 = vmlal_u8(q6u16, d23u8, d2u8);
+
+            q3u16 = vmlal_u8(q3u16, d23u8, d5u8);
+            q4u16 = vmlal_u8(q4u16, d24u8, d5u8);
+            q5u16 = vmlal_u8(q5u16, d25u8, d5u8);
+            q6u16 = vmlal_u8(q6u16, d26u8, d5u8);
+
+            q7u16 = vmull_u8(d21u8, d3u8);
+            q8u16 = vmull_u8(d22u8, d3u8);
+            q9u16 = vmull_u8(d23u8, d3u8);
+            q10u16 = vmull_u8(d24u8, d3u8);
+
+            q3s16 = vreinterpretq_s16_u16(q3u16);
+            q4s16 = vreinterpretq_s16_u16(q4u16);
+            q5s16 = vreinterpretq_s16_u16(q5u16);
+            q6s16 = vreinterpretq_s16_u16(q6u16);
+            q7s16 = vreinterpretq_s16_u16(q7u16);
+            q8s16 = vreinterpretq_s16_u16(q8u16);
+            q9s16 = vreinterpretq_s16_u16(q9u16);
+            q10s16 = vreinterpretq_s16_u16(q10u16);
+
+            q7s16 = vqaddq_s16(q7s16, q3s16);
+            q8s16 = vqaddq_s16(q8s16, q4s16);
+            q9s16 = vqaddq_s16(q9s16, q5s16);
+            q10s16 = vqaddq_s16(q10s16, q6s16);
+
+            d6u8 = vqrshrun_n_s16(q7s16, 7);
+            d7u8 = vqrshrun_n_s16(q8s16, 7);
+            d8u8 = vqrshrun_n_s16(q9s16, 7);
+            d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+            d18u8 = d22u8;
+            d19u8 = d23u8;
+            d20u8 = d24u8;
+            d21u8 = d25u8;
+            d22u8 = d26u8;
+            d23u8 = d27u8;
+            d24u8 = d28u8;
+            d25u8 = d29u8;
+            d26u8 = d30u8;
+
+            vst1_u8(dst_ptr, d6u8);
+            dst_ptr += dst_pitch;
+            vst1_u8(dst_ptr, d7u8);
+            dst_ptr += dst_pitch;
+            vst1_u8(dst_ptr, d8u8);
+            dst_ptr += dst_pitch;
+            vst1_u8(dst_ptr, d9u8);
+            dst_ptr += dst_pitch;
+        }
+        return;
+    }
+
+    // load first_pass filter
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    // First pass: output_height lines x output_width columns (9x4)
+    if (yoffset == 0)  // firstpass_filter4x4_only
+        src = src_ptr - 2;
+    else
+        src = src_ptr - 2 - (src_pixels_per_line * 2);
+
+    tmpp = tmp;
+    for (i = 2; i > 0; i--) {
+        q3u8 = vld1q_u8(src);
+        src += src_pixels_per_line;
+        q4u8 = vld1q_u8(src);
+        src += src_pixels_per_line;
+        q5u8 = vld1q_u8(src);
+        src += src_pixels_per_line;
+        q6u8 = vld1q_u8(src);
+        src += src_pixels_per_line;
+
+        __builtin_prefetch(src);
+        __builtin_prefetch(src + src_pixels_per_line);
+        __builtin_prefetch(src + src_pixels_per_line * 2);
+
+        q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+        q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+        q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+        q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8);
+
+        d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+        d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+        d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+        d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1);
+
+        q7u16 = vmlsl_u8(q7u16, d28u8, d1u8);
+        q8u16 = vmlsl_u8(q8u16, d29u8, d1u8);
+        q9u16 = vmlsl_u8(q9u16, d30u8, d1u8);
+        q10u16 = vmlsl_u8(q10u16, d31u8, d1u8);
+
+        d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4);
+        d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4);
+        d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4);
+        d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4);
+
+        q7u16 = vmlsl_u8(q7u16, d28u8, d4u8);
+        q8u16 = vmlsl_u8(q8u16, d29u8, d4u8);
+        q9u16 = vmlsl_u8(q9u16, d30u8, d4u8);
+        q10u16 = vmlsl_u8(q10u16, d31u8, d4u8);
+
+        d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2);
+        d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2);
+        d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2);
+        d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2);
+
+        q7u16 = vmlal_u8(q7u16, d28u8, d2u8);
+        q8u16 = vmlal_u8(q8u16, d29u8, d2u8);
+        q9u16 = vmlal_u8(q9u16, d30u8, d2u8);
+        q10u16 = vmlal_u8(q10u16, d31u8, d2u8);
+
+        d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+        d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+        d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+        d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+
+        q7u16 = vmlal_u8(q7u16, d28u8, d5u8);
+        q8u16 = vmlal_u8(q8u16, d29u8, d5u8);
+        q9u16 = vmlal_u8(q9u16, d30u8, d5u8);
+        q10u16 = vmlal_u8(q10u16, d31u8, d5u8);
+
+        d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3);
+        d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3);
+        d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3);
+        d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3);
+
+        q3u16 = vmull_u8(d28u8, d3u8);
+        q4u16 = vmull_u8(d29u8, d3u8);
+        q5u16 = vmull_u8(d30u8, d3u8);
+        q6u16 = vmull_u8(d31u8, d3u8);
+
+        q3s16 = vreinterpretq_s16_u16(q3u16);
+        q4s16 = vreinterpretq_s16_u16(q4u16);
+        q5s16 = vreinterpretq_s16_u16(q5u16);
+        q6s16 = vreinterpretq_s16_u16(q6u16);
+        q7s16 = vreinterpretq_s16_u16(q7u16);
+        q8s16 = vreinterpretq_s16_u16(q8u16);
+        q9s16 = vreinterpretq_s16_u16(q9u16);
+        q10s16 = vreinterpretq_s16_u16(q10u16);
+
+        q7s16 = vqaddq_s16(q7s16, q3s16);
+        q8s16 = vqaddq_s16(q8s16, q4s16);
+        q9s16 = vqaddq_s16(q9s16, q5s16);
+        q10s16 = vqaddq_s16(q10s16, q6s16);
+
+        d22u8 = vqrshrun_n_s16(q7s16, 7);
+        d23u8 = vqrshrun_n_s16(q8s16, 7);
+        d24u8 = vqrshrun_n_s16(q9s16, 7);
+        d25u8 = vqrshrun_n_s16(q10s16, 7);
+
+        if (yoffset == 0) {  // firstpass_filter8x4_only
+            vst1_u8(dst_ptr, d22u8);
+            dst_ptr += dst_pitch;
+            vst1_u8(dst_ptr, d23u8);
+            dst_ptr += dst_pitch;
+            vst1_u8(dst_ptr, d24u8);
+            dst_ptr += dst_pitch;
+            vst1_u8(dst_ptr, d25u8);
+            dst_ptr += dst_pitch;
+        } else {
+            vst1_u8(tmpp, d22u8);
+            tmpp += 8;
+            vst1_u8(tmpp, d23u8);
+            tmpp += 8;
+            vst1_u8(tmpp, d24u8);
+            tmpp += 8;
+            vst1_u8(tmpp, d25u8);
+            tmpp += 8;
+        }
+    }
+    if (yoffset == 0)
+        return;
+
+    // First Pass on rest 5-line data
+    q3u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q4u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q5u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q6u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q7u8 = vld1q_u8(src);
+
+    q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+    q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+    q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+    q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8);
+    q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1);
+
+    q8u16 = vmlsl_u8(q8u16, d27u8, d1u8);
+    q9u16 = vmlsl_u8(q9u16, d28u8, d1u8);
+    q10u16 = vmlsl_u8(q10u16, d29u8, d1u8);
+    q11u16 = vmlsl_u8(q11u16, d30u8, d1u8);
+    q12u16 = vmlsl_u8(q12u16, d31u8, d1u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4);
+
+    q8u16 = vmlsl_u8(q8u16, d27u8, d4u8);
+    q9u16 = vmlsl_u8(q9u16, d28u8, d4u8);
+    q10u16 = vmlsl_u8(q10u16, d29u8, d4u8);
+    q11u16 = vmlsl_u8(q11u16, d30u8, d4u8);
+    q12u16 = vmlsl_u8(q12u16, d31u8, d4u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2);
+
+    q8u16 = vmlal_u8(q8u16, d27u8, d2u8);
+    q9u16 = vmlal_u8(q9u16, d28u8, d2u8);
+    q10u16 = vmlal_u8(q10u16, d29u8, d2u8);
+    q11u16 = vmlal_u8(q11u16, d30u8, d2u8);
+    q12u16 = vmlal_u8(q12u16, d31u8, d2u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5);
+
+    q8u16 = vmlal_u8(q8u16, d27u8, d5u8);
+    q9u16 = vmlal_u8(q9u16, d28u8, d5u8);
+    q10u16 = vmlal_u8(q10u16, d29u8, d5u8);
+    q11u16 = vmlal_u8(q11u16, d30u8, d5u8);
+    q12u16 = vmlal_u8(q12u16, d31u8, d5u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3);
+
+    q3u16 = vmull_u8(d27u8, d3u8);
+    q4u16 = vmull_u8(d28u8, d3u8);
+    q5u16 = vmull_u8(d29u8, d3u8);
+    q6u16 = vmull_u8(d30u8, d3u8);
+    q7u16 = vmull_u8(d31u8, d3u8);
+
+    q3s16 = vreinterpretq_s16_u16(q3u16);
+    q4s16 = vreinterpretq_s16_u16(q4u16);
+    q5s16 = vreinterpretq_s16_u16(q5u16);
+    q6s16 = vreinterpretq_s16_u16(q6u16);
+    q7s16 = vreinterpretq_s16_u16(q7u16);
+    q8s16 = vreinterpretq_s16_u16(q8u16);
+    q9s16 = vreinterpretq_s16_u16(q9u16);
+    q10s16 = vreinterpretq_s16_u16(q10u16);
+    q11s16 = vreinterpretq_s16_u16(q11u16);
+    q12s16 = vreinterpretq_s16_u16(q12u16);
+
+    q8s16 = vqaddq_s16(q8s16, q3s16);
+    q9s16 = vqaddq_s16(q9s16, q4s16);
+    q10s16 = vqaddq_s16(q10s16, q5s16);
+    q11s16 = vqaddq_s16(q11s16, q6s16);
+    q12s16 = vqaddq_s16(q12s16, q7s16);
+
+    d26u8 = vqrshrun_n_s16(q8s16, 7);
+    d27u8 = vqrshrun_n_s16(q9s16, 7);
+    d28u8 = vqrshrun_n_s16(q10s16, 7);
+    d29u8 = vqrshrun_n_s16(q11s16, 7);
+    d30u8 = vqrshrun_n_s16(q12s16, 7);
+
+    // Second pass: 8x8
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    tmpp = tmp;
+    q9u8 = vld1q_u8(tmpp);
+    tmpp += 16;
+    q10u8 = vld1q_u8(tmpp);
+    tmpp += 16;
+    q11u8 = vld1q_u8(tmpp);
+    tmpp += 16;
+    q12u8 = vld1q_u8(tmpp);
+
+    d18u8 = vget_low_u8(q9u8);
+    d19u8 = vget_high_u8(q9u8);
+    d20u8 = vget_low_u8(q10u8);
+    d21u8 = vget_high_u8(q10u8);
+    d22u8 = vget_low_u8(q11u8);
+    d23u8 = vget_high_u8(q11u8);
+    d24u8 = vget_low_u8(q12u8);
+    d25u8 = vget_high_u8(q12u8);
+
+    for (i = 2; i > 0; i--) {
+        q3u16 = vmull_u8(d18u8, d0u8);
+        q4u16 = vmull_u8(d19u8, d0u8);
+        q5u16 = vmull_u8(d20u8, d0u8);
+        q6u16 = vmull_u8(d21u8, d0u8);
+
+        q3u16 = vmlsl_u8(q3u16, d19u8, d1u8);
+        q4u16 = vmlsl_u8(q4u16, d20u8, d1u8);
+        q5u16 = vmlsl_u8(q5u16, d21u8, d1u8);
+        q6u16 = vmlsl_u8(q6u16, d22u8, d1u8);
+
+        q3u16 = vmlsl_u8(q3u16, d22u8, d4u8);
+        q4u16 = vmlsl_u8(q4u16, d23u8, d4u8);
+        q5u16 = vmlsl_u8(q5u16, d24u8, d4u8);
+        q6u16 = vmlsl_u8(q6u16, d25u8, d4u8);
+
+        q3u16 = vmlal_u8(q3u16, d20u8, d2u8);
+        q4u16 = vmlal_u8(q4u16, d21u8, d2u8);
+        q5u16 = vmlal_u8(q5u16, d22u8, d2u8);
+        q6u16 = vmlal_u8(q6u16, d23u8, d2u8);
+
+        q3u16 = vmlal_u8(q3u16, d23u8, d5u8);
+        q4u16 = vmlal_u8(q4u16, d24u8, d5u8);
+        q5u16 = vmlal_u8(q5u16, d25u8, d5u8);
+        q6u16 = vmlal_u8(q6u16, d26u8, d5u8);
+
+        q7u16 = vmull_u8(d21u8, d3u8);
+        q8u16 = vmull_u8(d22u8, d3u8);
+        q9u16 = vmull_u8(d23u8, d3u8);
+        q10u16 = vmull_u8(d24u8, d3u8);
+
+        q3s16 = vreinterpretq_s16_u16(q3u16);
+        q4s16 = vreinterpretq_s16_u16(q4u16);
+        q5s16 = vreinterpretq_s16_u16(q5u16);
+        q6s16 = vreinterpretq_s16_u16(q6u16);
+        q7s16 = vreinterpretq_s16_u16(q7u16);
+        q8s16 = vreinterpretq_s16_u16(q8u16);
+        q9s16 = vreinterpretq_s16_u16(q9u16);
+        q10s16 = vreinterpretq_s16_u16(q10u16);
+
+        q7s16 = vqaddq_s16(q7s16, q3s16);
+        q8s16 = vqaddq_s16(q8s16, q4s16);
+        q9s16 = vqaddq_s16(q9s16, q5s16);
+        q10s16 = vqaddq_s16(q10s16, q6s16);
+
+        d6u8 = vqrshrun_n_s16(q7s16, 7);
+        d7u8 = vqrshrun_n_s16(q8s16, 7);
+        d8u8 = vqrshrun_n_s16(q9s16, 7);
+        d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+        d18u8 = d22u8;
+        d19u8 = d23u8;
+        d20u8 = d24u8;
+        d21u8 = d25u8;
+        d22u8 = d26u8;
+        d23u8 = d27u8;
+        d24u8 = d28u8;
+        d25u8 = d29u8;
+        d26u8 = d30u8;
+
+        vst1_u8(dst_ptr, d6u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d7u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d8u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d9u8);
+        dst_ptr += dst_pitch;
+    }
+    return;
+}
+
+void vp8_sixtap_predict16x16_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    unsigned char *src, *src_tmp, *dst, *tmpp;
+    unsigned char tmp[336];
+    int i, j;
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
+    uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d18u8, d19u8;
+    uint8x8_t d20u8, d21u8, d22u8, d23u8, d24u8, d25u8, d26u8, d27u8;
+    uint8x8_t d28u8, d29u8, d30u8, d31u8;
+    int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
+    uint8x16_t q3u8, q4u8;
+    uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16, q8u16, q9u16, q10u16;
+    uint16x8_t q11u16, q12u16, q13u16, q15u16;
+    int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16, q8s16, q9s16, q10s16;
+    int16x8_t q11s16, q12s16, q13s16, q15s16;
+
+    if (xoffset == 0) {  // secondpass_filter8x8_only
+        // load second_pass filter
+        dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+        d0s8 = vdup_lane_s8(dtmps8, 0);
+        d1s8 = vdup_lane_s8(dtmps8, 1);
+        d2s8 = vdup_lane_s8(dtmps8, 2);
+        d3s8 = vdup_lane_s8(dtmps8, 3);
+        d4s8 = vdup_lane_s8(dtmps8, 4);
+        d5s8 = vdup_lane_s8(dtmps8, 5);
+        d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+        d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+        d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+        d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+        d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+        d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+        // load src data
+        src_tmp = src_ptr - src_pixels_per_line * 2;
+        for (i = 0; i < 2; i++) {
+            src = src_tmp + i * 8;
+            dst = dst_ptr + i * 8;
+            d18u8 = vld1_u8(src);
+            src += src_pixels_per_line;
+            d19u8 = vld1_u8(src);
+            src += src_pixels_per_line;
+            d20u8 = vld1_u8(src);
+            src += src_pixels_per_line;
+            d21u8 = vld1_u8(src);
+            src += src_pixels_per_line;
+            d22u8 = vld1_u8(src);
+            src += src_pixels_per_line;
+            for (j = 0; j < 4; j++) {
+                d23u8 = vld1_u8(src);
+                src += src_pixels_per_line;
+                d24u8 = vld1_u8(src);
+                src += src_pixels_per_line;
+                d25u8 = vld1_u8(src);
+                src += src_pixels_per_line;
+                d26u8 = vld1_u8(src);
+                src += src_pixels_per_line;
+
+                q3u16 = vmull_u8(d18u8, d0u8);
+                q4u16 = vmull_u8(d19u8, d0u8);
+                q5u16 = vmull_u8(d20u8, d0u8);
+                q6u16 = vmull_u8(d21u8, d0u8);
+
+                q3u16 = vmlsl_u8(q3u16, d19u8, d1u8);
+                q4u16 = vmlsl_u8(q4u16, d20u8, d1u8);
+                q5u16 = vmlsl_u8(q5u16, d21u8, d1u8);
+                q6u16 = vmlsl_u8(q6u16, d22u8, d1u8);
+
+                q3u16 = vmlsl_u8(q3u16, d22u8, d4u8);
+                q4u16 = vmlsl_u8(q4u16, d23u8, d4u8);
+                q5u16 = vmlsl_u8(q5u16, d24u8, d4u8);
+                q6u16 = vmlsl_u8(q6u16, d25u8, d4u8);
+
+                q3u16 = vmlal_u8(q3u16, d20u8, d2u8);
+                q4u16 = vmlal_u8(q4u16, d21u8, d2u8);
+                q5u16 = vmlal_u8(q5u16, d22u8, d2u8);
+                q6u16 = vmlal_u8(q6u16, d23u8, d2u8);
+
+                q3u16 = vmlal_u8(q3u16, d23u8, d5u8);
+                q4u16 = vmlal_u8(q4u16, d24u8, d5u8);
+                q5u16 = vmlal_u8(q5u16, d25u8, d5u8);
+                q6u16 = vmlal_u8(q6u16, d26u8, d5u8);
+
+                q7u16 = vmull_u8(d21u8, d3u8);
+                q8u16 = vmull_u8(d22u8, d3u8);
+                q9u16 = vmull_u8(d23u8, d3u8);
+                q10u16 = vmull_u8(d24u8, d3u8);
+
+                q3s16 = vreinterpretq_s16_u16(q3u16);
+                q4s16 = vreinterpretq_s16_u16(q4u16);
+                q5s16 = vreinterpretq_s16_u16(q5u16);
+                q6s16 = vreinterpretq_s16_u16(q6u16);
+                q7s16 = vreinterpretq_s16_u16(q7u16);
+                q8s16 = vreinterpretq_s16_u16(q8u16);
+                q9s16 = vreinterpretq_s16_u16(q9u16);
+                q10s16 = vreinterpretq_s16_u16(q10u16);
+
+                q7s16 = vqaddq_s16(q7s16, q3s16);
+                q8s16 = vqaddq_s16(q8s16, q4s16);
+                q9s16 = vqaddq_s16(q9s16, q5s16);
+                q10s16 = vqaddq_s16(q10s16, q6s16);
+
+                d6u8 = vqrshrun_n_s16(q7s16, 7);
+                d7u8 = vqrshrun_n_s16(q8s16, 7);
+                d8u8 = vqrshrun_n_s16(q9s16, 7);
+                d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+                d18u8 = d22u8;
+                d19u8 = d23u8;
+                d20u8 = d24u8;
+                d21u8 = d25u8;
+                d22u8 = d26u8;
+
+                vst1_u8(dst, d6u8);
+                dst += dst_pitch;
+                vst1_u8(dst, d7u8);
+                dst += dst_pitch;
+                vst1_u8(dst, d8u8);
+                dst += dst_pitch;
+                vst1_u8(dst, d9u8);
+                dst += dst_pitch;
+            }
+        }
+        return;
+    }
+
+    // load first_pass filter
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    // First pass: output_height lines x output_width columns (9x4)
+    if (yoffset == 0) {  // firstpass_filter4x4_only
+        src = src_ptr - 2;
+        dst = dst_ptr;
+        for (i = 0; i < 8; i++) {
+            d6u8 = vld1_u8(src);
+            d7u8 = vld1_u8(src + 8);
+            d8u8 = vld1_u8(src + 16);
+            src += src_pixels_per_line;
+            d9u8 = vld1_u8(src);
+            d10u8 = vld1_u8(src + 8);
+            d11u8 = vld1_u8(src + 16);
+            src += src_pixels_per_line;
+
+            __builtin_prefetch(src);
+            __builtin_prefetch(src + src_pixels_per_line);
+
+            q6u16 = vmull_u8(d6u8, d0u8);
+            q7u16 = vmull_u8(d7u8, d0u8);
+            q8u16 = vmull_u8(d9u8, d0u8);
+            q9u16 = vmull_u8(d10u8, d0u8);
+
+            d20u8 = vext_u8(d6u8, d7u8, 1);
+            d21u8 = vext_u8(d9u8, d10u8, 1);
+            d22u8 = vext_u8(d7u8, d8u8, 1);
+            d23u8 = vext_u8(d10u8, d11u8, 1);
+            d24u8 = vext_u8(d6u8, d7u8, 4);
+            d25u8 = vext_u8(d9u8, d10u8, 4);
+            d26u8 = vext_u8(d7u8, d8u8, 4);
+            d27u8 = vext_u8(d10u8, d11u8, 4);
+            d28u8 = vext_u8(d6u8, d7u8, 5);
+            d29u8 = vext_u8(d9u8, d10u8, 5);
+
+            q6u16 = vmlsl_u8(q6u16, d20u8, d1u8);
+            q8u16 = vmlsl_u8(q8u16, d21u8, d1u8);
+            q7u16 = vmlsl_u8(q7u16, d22u8, d1u8);
+            q9u16 = vmlsl_u8(q9u16, d23u8, d1u8);
+            q6u16 = vmlsl_u8(q6u16, d24u8, d4u8);
+            q8u16 = vmlsl_u8(q8u16, d25u8, d4u8);
+            q7u16 = vmlsl_u8(q7u16, d26u8, d4u8);
+            q9u16 = vmlsl_u8(q9u16, d27u8, d4u8);
+            q6u16 = vmlal_u8(q6u16, d28u8, d5u8);
+            q8u16 = vmlal_u8(q8u16, d29u8, d5u8);
+
+            d20u8 = vext_u8(d7u8, d8u8, 5);
+            d21u8 = vext_u8(d10u8, d11u8, 5);
+            d22u8 = vext_u8(d6u8, d7u8, 2);
+            d23u8 = vext_u8(d9u8, d10u8, 2);
+            d24u8 = vext_u8(d7u8, d8u8, 2);
+            d25u8 = vext_u8(d10u8, d11u8, 2);
+            d26u8 = vext_u8(d6u8, d7u8, 3);
+            d27u8 = vext_u8(d9u8, d10u8, 3);
+            d28u8 = vext_u8(d7u8, d8u8, 3);
+            d29u8 = vext_u8(d10u8, d11u8, 3);
+
+            q7u16 = vmlal_u8(q7u16, d20u8, d5u8);
+            q9u16 = vmlal_u8(q9u16, d21u8, d5u8);
+            q6u16 = vmlal_u8(q6u16, d22u8, d2u8);
+            q8u16 = vmlal_u8(q8u16, d23u8, d2u8);
+            q7u16 = vmlal_u8(q7u16, d24u8, d2u8);
+            q9u16 = vmlal_u8(q9u16, d25u8, d2u8);
+
+            q10u16 = vmull_u8(d26u8, d3u8);
+            q11u16 = vmull_u8(d27u8, d3u8);
+            q12u16 = vmull_u8(d28u8, d3u8);
+            q15u16 = vmull_u8(d29u8, d3u8);
+
+            q6s16 = vreinterpretq_s16_u16(q6u16);
+            q7s16 = vreinterpretq_s16_u16(q7u16);
+            q8s16 = vreinterpretq_s16_u16(q8u16);
+            q9s16 = vreinterpretq_s16_u16(q9u16);
+            q10s16 = vreinterpretq_s16_u16(q10u16);
+            q11s16 = vreinterpretq_s16_u16(q11u16);
+            q12s16 = vreinterpretq_s16_u16(q12u16);
+            q15s16 = vreinterpretq_s16_u16(q15u16);
+
+            q6s16 = vqaddq_s16(q6s16, q10s16);
+            q8s16 = vqaddq_s16(q8s16, q11s16);
+            q7s16 = vqaddq_s16(q7s16, q12s16);
+            q9s16 = vqaddq_s16(q9s16, q15s16);
+
+            d6u8 = vqrshrun_n_s16(q6s16, 7);
+            d7u8 = vqrshrun_n_s16(q7s16, 7);
+            d8u8 = vqrshrun_n_s16(q8s16, 7);
+            d9u8 = vqrshrun_n_s16(q9s16, 7);
+
+            q3u8 = vcombine_u8(d6u8, d7u8);
+            q4u8 = vcombine_u8(d8u8, d9u8);
+            vst1q_u8(dst, q3u8);
+            dst += dst_pitch;
+            vst1q_u8(dst, q4u8);
+            dst += dst_pitch;
+        }
+        return;
+    }
+
+    src = src_ptr - 2 - src_pixels_per_line * 2;
+    tmpp = tmp;
+    for (i = 0; i < 7; i++) {
+        d6u8 = vld1_u8(src);
+        d7u8 = vld1_u8(src + 8);
+        d8u8 = vld1_u8(src + 16);
+        src += src_pixels_per_line;
+        d9u8 = vld1_u8(src);
+        d10u8 = vld1_u8(src + 8);
+        d11u8 = vld1_u8(src + 16);
+        src += src_pixels_per_line;
+        d12u8 = vld1_u8(src);
+        d13u8 = vld1_u8(src + 8);
+        d14u8 = vld1_u8(src + 16);
+        src += src_pixels_per_line;
+
+        __builtin_prefetch(src);
+        __builtin_prefetch(src + src_pixels_per_line);
+        __builtin_prefetch(src + src_pixels_per_line * 2);
+
+        q8u16 = vmull_u8(d6u8, d0u8);
+        q9u16 = vmull_u8(d7u8, d0u8);
+        q10u16 = vmull_u8(d9u8, d0u8);
+        q11u16 = vmull_u8(d10u8, d0u8);
+        q12u16 = vmull_u8(d12u8, d0u8);
+        q13u16 = vmull_u8(d13u8, d0u8);
+
+        d28u8 = vext_u8(d6u8, d7u8, 1);
+        d29u8 = vext_u8(d9u8, d10u8, 1);
+        d30u8 = vext_u8(d12u8, d13u8, 1);
+        q8u16 = vmlsl_u8(q8u16, d28u8, d1u8);
+        q10u16 = vmlsl_u8(q10u16, d29u8, d1u8);
+        q12u16 = vmlsl_u8(q12u16, d30u8, d1u8);
+        d28u8 = vext_u8(d7u8, d8u8, 1);
+        d29u8 = vext_u8(d10u8, d11u8, 1);
+        d30u8 = vext_u8(d13u8, d14u8, 1);
+        q9u16  = vmlsl_u8(q9u16, d28u8, d1u8);
+        q11u16 = vmlsl_u8(q11u16, d29u8, d1u8);
+        q13u16 = vmlsl_u8(q13u16, d30u8, d1u8);
+
+        d28u8 = vext_u8(d6u8, d7u8, 4);
+        d29u8 = vext_u8(d9u8, d10u8, 4);
+        d30u8 = vext_u8(d12u8, d13u8, 4);
+        q8u16 = vmlsl_u8(q8u16, d28u8, d4u8);
+        q10u16 = vmlsl_u8(q10u16, d29u8, d4u8);
+        q12u16 = vmlsl_u8(q12u16, d30u8, d4u8);
+        d28u8 = vext_u8(d7u8, d8u8, 4);
+        d29u8 = vext_u8(d10u8, d11u8, 4);
+        d30u8 = vext_u8(d13u8, d14u8, 4);
+        q9u16 = vmlsl_u8(q9u16, d28u8, d4u8);
+        q11u16 = vmlsl_u8(q11u16, d29u8, d4u8);
+        q13u16 = vmlsl_u8(q13u16, d30u8, d4u8);
+
+        d28u8 = vext_u8(d6u8, d7u8, 5);
+        d29u8 = vext_u8(d9u8, d10u8, 5);
+        d30u8 = vext_u8(d12u8, d13u8, 5);
+        q8u16 = vmlal_u8(q8u16, d28u8, d5u8);
+        q10u16 = vmlal_u8(q10u16, d29u8, d5u8);
+        q12u16 = vmlal_u8(q12u16, d30u8, d5u8);
+        d28u8 = vext_u8(d7u8, d8u8, 5);
+        d29u8 = vext_u8(d10u8, d11u8, 5);
+        d30u8 = vext_u8(d13u8, d14u8, 5);
+        q9u16 = vmlal_u8(q9u16, d28u8, d5u8);
+        q11u16 = vmlal_u8(q11u16, d29u8, d5u8);
+        q13u16 = vmlal_u8(q13u16, d30u8, d5u8);
+
+        d28u8 = vext_u8(d6u8, d7u8, 2);
+        d29u8 = vext_u8(d9u8, d10u8, 2);
+        d30u8 = vext_u8(d12u8, d13u8, 2);
+        q8u16 = vmlal_u8(q8u16, d28u8, d2u8);
+        q10u16 = vmlal_u8(q10u16, d29u8, d2u8);
+        q12u16 = vmlal_u8(q12u16, d30u8, d2u8);
+        d28u8 = vext_u8(d7u8, d8u8, 2);
+        d29u8 = vext_u8(d10u8, d11u8, 2);
+        d30u8 = vext_u8(d13u8, d14u8, 2);
+        q9u16 = vmlal_u8(q9u16, d28u8, d2u8);
+        q11u16 = vmlal_u8(q11u16, d29u8, d2u8);
+        q13u16 = vmlal_u8(q13u16, d30u8, d2u8);
+
+        d28u8 = vext_u8(d6u8, d7u8, 3);
+        d29u8 = vext_u8(d9u8, d10u8, 3);
+        d30u8 = vext_u8(d12u8, d13u8, 3);
+        d15u8 = vext_u8(d7u8, d8u8, 3);
+        d31u8 = vext_u8(d10u8, d11u8, 3);
+        d6u8  = vext_u8(d13u8, d14u8, 3);
+        q4u16 = vmull_u8(d28u8, d3u8);
+        q5u16 = vmull_u8(d29u8, d3u8);
+        q6u16 = vmull_u8(d30u8, d3u8);
+        q4s16 = vreinterpretq_s16_u16(q4u16);
+        q5s16 = vreinterpretq_s16_u16(q5u16);
+        q6s16 = vreinterpretq_s16_u16(q6u16);
+        q8s16 = vreinterpretq_s16_u16(q8u16);
+        q10s16 = vreinterpretq_s16_u16(q10u16);
+        q12s16 = vreinterpretq_s16_u16(q12u16);
+        q8s16 = vqaddq_s16(q8s16, q4s16);
+        q10s16 = vqaddq_s16(q10s16, q5s16);
+        q12s16 = vqaddq_s16(q12s16, q6s16);
+
+        q6u16 = vmull_u8(d15u8, d3u8);
+        q7u16 = vmull_u8(d31u8, d3u8);
+        q3u16 = vmull_u8(d6u8, d3u8);
+        q3s16 = vreinterpretq_s16_u16(q3u16);
+        q6s16 = vreinterpretq_s16_u16(q6u16);
+        q7s16 = vreinterpretq_s16_u16(q7u16);
+        q9s16 = vreinterpretq_s16_u16(q9u16);
+        q11s16 = vreinterpretq_s16_u16(q11u16);
+        q13s16 = vreinterpretq_s16_u16(q13u16);
+        q9s16 = vqaddq_s16(q9s16, q6s16);
+        q11s16 = vqaddq_s16(q11s16, q7s16);
+        q13s16 = vqaddq_s16(q13s16, q3s16);
+
+        d6u8 = vqrshrun_n_s16(q8s16, 7);
+        d7u8 = vqrshrun_n_s16(q9s16, 7);
+        d8u8 = vqrshrun_n_s16(q10s16, 7);
+        d9u8 = vqrshrun_n_s16(q11s16, 7);
+        d10u8 = vqrshrun_n_s16(q12s16, 7);
+        d11u8 = vqrshrun_n_s16(q13s16, 7);
+
+        vst1_u8(tmpp, d6u8);
+        tmpp += 8;
+        vst1_u8(tmpp, d7u8);
+        tmpp += 8;
+        vst1_u8(tmpp, d8u8);
+        tmpp += 8;
+        vst1_u8(tmpp, d9u8);
+        tmpp += 8;
+        vst1_u8(tmpp, d10u8);
+        tmpp += 8;
+        vst1_u8(tmpp, d11u8);
+        tmpp += 8;
+    }
+
+    // Second pass: 16x16
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    for (i = 0; i < 2; i++) {
+        dst = dst_ptr + 8 * i;
+        tmpp = tmp + 8 * i;
+        d18u8 = vld1_u8(tmpp);
+        tmpp += 16;
+        d19u8 = vld1_u8(tmpp);
+        tmpp += 16;
+        d20u8 = vld1_u8(tmpp);
+        tmpp += 16;
+        d21u8 = vld1_u8(tmpp);
+        tmpp += 16;
+        d22u8 = vld1_u8(tmpp);
+        tmpp += 16;
+        for (j = 0; j < 4; j++) {
+            d23u8 = vld1_u8(tmpp);
+            tmpp += 16;
+            d24u8 = vld1_u8(tmpp);
+            tmpp += 16;
+            d25u8 = vld1_u8(tmpp);
+            tmpp += 16;
+            d26u8 = vld1_u8(tmpp);
+            tmpp += 16;
+
+            q3u16 = vmull_u8(d18u8, d0u8);
+            q4u16 = vmull_u8(d19u8, d0u8);
+            q5u16 = vmull_u8(d20u8, d0u8);
+            q6u16 = vmull_u8(d21u8, d0u8);
+
+            q3u16 = vmlsl_u8(q3u16, d19u8, d1u8);
+            q4u16 = vmlsl_u8(q4u16, d20u8, d1u8);
+            q5u16 = vmlsl_u8(q5u16, d21u8, d1u8);
+            q6u16 = vmlsl_u8(q6u16, d22u8, d1u8);
+
+            q3u16 = vmlsl_u8(q3u16, d22u8, d4u8);
+            q4u16 = vmlsl_u8(q4u16, d23u8, d4u8);
+            q5u16 = vmlsl_u8(q5u16, d24u8, d4u8);
+            q6u16 = vmlsl_u8(q6u16, d25u8, d4u8);
+
+            q3u16 = vmlal_u8(q3u16, d20u8, d2u8);
+            q4u16 = vmlal_u8(q4u16, d21u8, d2u8);
+            q5u16 = vmlal_u8(q5u16, d22u8, d2u8);
+            q6u16 = vmlal_u8(q6u16, d23u8, d2u8);
+
+            q3u16 = vmlal_u8(q3u16, d23u8, d5u8);
+            q4u16 = vmlal_u8(q4u16, d24u8, d5u8);
+            q5u16 = vmlal_u8(q5u16, d25u8, d5u8);
+            q6u16 = vmlal_u8(q6u16, d26u8, d5u8);
+
+            q7u16 = vmull_u8(d21u8, d3u8);
+            q8u16 = vmull_u8(d22u8, d3u8);
+            q9u16 = vmull_u8(d23u8, d3u8);
+            q10u16 = vmull_u8(d24u8, d3u8);
+
+            q3s16 = vreinterpretq_s16_u16(q3u16);
+            q4s16 = vreinterpretq_s16_u16(q4u16);
+            q5s16 = vreinterpretq_s16_u16(q5u16);
+            q6s16 = vreinterpretq_s16_u16(q6u16);
+            q7s16 = vreinterpretq_s16_u16(q7u16);
+            q8s16 = vreinterpretq_s16_u16(q8u16);
+            q9s16 = vreinterpretq_s16_u16(q9u16);
+            q10s16 = vreinterpretq_s16_u16(q10u16);
+
+            q7s16 = vqaddq_s16(q7s16, q3s16);
+            q8s16 = vqaddq_s16(q8s16, q4s16);
+            q9s16 = vqaddq_s16(q9s16, q5s16);
+            q10s16 = vqaddq_s16(q10s16, q6s16);
+
+            d6u8 = vqrshrun_n_s16(q7s16, 7);
+            d7u8 = vqrshrun_n_s16(q8s16, 7);
+            d8u8 = vqrshrun_n_s16(q9s16, 7);
+            d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+            d18u8 = d22u8;
+            d19u8 = d23u8;
+            d20u8 = d24u8;
+            d21u8 = d25u8;
+            d22u8 = d26u8;
+
+            vst1_u8(dst, d6u8);
+            dst += dst_pitch;
+            vst1_u8(dst, d7u8);
+            dst += dst_pitch;
+            vst1_u8(dst, d8u8);
+            dst += dst_pitch;
+            vst1_u8(dst, d9u8);
+            dst += dst_pitch;
+        }
+    }
+    return;
+}
diff --git a/vp8/common/arm/neon/variance_neon.asm b/vp8/common/arm/neon/variance_neon.asm
deleted file mode 100644
index 8ecad72..0000000
--- a/vp8/common/arm/neon/variance_neon.asm
+++ /dev/null
@@ -1,291 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_variance16x16_neon|
-    EXPORT  |vp8_variance16x8_neon|
-    EXPORT  |vp8_variance8x16_neon|
-    EXPORT  |vp8_variance8x8_neon|
-
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0    unsigned char *src_ptr
-; r1    int source_stride
-; r2    unsigned char *ref_ptr
-; r3    int  recon_stride
-; stack unsigned int *sse
-|vp8_variance16x16_neon| PROC
-    vpush           {q5}
-    vmov.i8         q8, #0                      ;q8 - sum
-    vmov.i8         q9, #0                      ;q9, q10 - sse
-    vmov.i8         q10, #0
-
-    mov             r12, #8
-
-variance16x16_neon_loop
-    vld1.8          {q0}, [r0], r1              ;Load up source and reference
-    vld1.8          {q2}, [r2], r3
-    vld1.8          {q1}, [r0], r1
-    vld1.8          {q3}, [r2], r3
-
-    vsubl.u8        q11, d0, d4                 ;calculate diff
-    vsubl.u8        q12, d1, d5
-    vsubl.u8        q13, d2, d6
-    vsubl.u8        q14, d3, d7
-
-    ;VPADAL adds adjacent pairs of elements of a vector, and accumulates
-    ;the results into the elements of the destination vector. The explanation
-    ;in ARM guide is wrong.
-    vpadal.s16      q8, q11                     ;calculate sum
-    vmlal.s16       q9, d22, d22                ;calculate sse
-    vmlal.s16       q10, d23, d23
-
-    subs            r12, r12, #1
-
-    vpadal.s16      q8, q12
-    vmlal.s16       q9, d24, d24
-    vmlal.s16       q10, d25, d25
-    vpadal.s16      q8, q13
-    vmlal.s16       q9, d26, d26
-    vmlal.s16       q10, d27, d27
-    vpadal.s16      q8, q14
-    vmlal.s16       q9, d28, d28
-    vmlal.s16       q10, d29, d29
-
-    bne             variance16x16_neon_loop
-
-    vadd.u32        q10, q9, q10                ;accumulate sse
-    vpaddl.s32      q0, q8                      ;accumulate sum
-
-    ldr             r12, [sp, #16]              ;load *sse from stack
-
-    vpaddl.u32      q1, q10
-    vadd.s64        d0, d0, d1
-    vadd.u64        d1, d2, d3
-
-    ;vmov.32        r0, d0[0]                   ;this instruction costs a lot
-    ;vmov.32        r1, d1[0]
-    ;mul            r0, r0, r0
-    ;str            r1, [r12]
-    ;sub            r0, r1, r0, lsr #8
-
-    ; while sum is signed, sum * sum is always positive and must be treated as
-    ; unsigned to avoid propagating the sign bit.
-    vmull.s32       q5, d0, d0
-    vst1.32         {d1[0]}, [r12]              ;store sse
-    vshr.u32        d10, d10, #8
-    vsub.u32        d0, d1, d10
-
-    vmov.32         r0, d0[0]                   ;return
-
-    vpop            {q5}
-    bx              lr
-
-    ENDP
-
-;================================
-;unsigned int vp8_variance16x8_c(
-;    unsigned char *src_ptr,
-;    int  source_stride,
-;    unsigned char *ref_ptr,
-;    int  recon_stride,
-;   unsigned int *sse)
-|vp8_variance16x8_neon| PROC
-    vpush           {q5}
-
-    vmov.i8         q8, #0                      ;q8 - sum
-    vmov.i8         q9, #0                      ;q9, q10 - sse
-    vmov.i8         q10, #0
-
-    mov             r12, #4
-
-variance16x8_neon_loop
-    vld1.8          {q0}, [r0], r1              ;Load up source and reference
-    vld1.8          {q2}, [r2], r3
-    vld1.8          {q1}, [r0], r1
-    vld1.8          {q3}, [r2], r3
-
-    vsubl.u8        q11, d0, d4                 ;calculate diff
-    vsubl.u8        q12, d1, d5
-    vsubl.u8        q13, d2, d6
-    vsubl.u8        q14, d3, d7
-
-    vpadal.s16      q8, q11                     ;calculate sum
-    vmlal.s16       q9, d22, d22                ;calculate sse
-    vmlal.s16       q10, d23, d23
-
-    subs            r12, r12, #1
-
-    vpadal.s16      q8, q12
-    vmlal.s16       q9, d24, d24
-    vmlal.s16       q10, d25, d25
-    vpadal.s16      q8, q13
-    vmlal.s16       q9, d26, d26
-    vmlal.s16       q10, d27, d27
-    vpadal.s16      q8, q14
-    vmlal.s16       q9, d28, d28
-    vmlal.s16       q10, d29, d29
-
-    bne             variance16x8_neon_loop
-
-    vadd.u32        q10, q9, q10                ;accumulate sse
-    vpaddl.s32      q0, q8                      ;accumulate sum
-
-    ldr             r12, [sp, #16]              ;load *sse from stack
-
-    vpaddl.u32      q1, q10
-    vadd.s64        d0, d0, d1
-    vadd.u64        d1, d2, d3
-
-    vmull.s32       q5, d0, d0
-    vst1.32         {d1[0]}, [r12]              ;store sse
-    vshr.u32        d10, d10, #7
-    vsub.u32        d0, d1, d10
-
-    vmov.32         r0, d0[0]                   ;return
-
-    vpop            {q5}
-    bx              lr
-
-    ENDP
-
-;=================================
-;unsigned int vp8_variance8x16_c(
-;    unsigned char *src_ptr,
-;    int  source_stride,
-;    unsigned char *ref_ptr,
-;    int  recon_stride,
-;   unsigned int *sse)
-
-|vp8_variance8x16_neon| PROC
-    vpush           {q5}
-
-    vmov.i8         q8, #0                      ;q8 - sum
-    vmov.i8         q9, #0                      ;q9, q10 - sse
-    vmov.i8         q10, #0
-
-    mov             r12, #8
-
-variance8x16_neon_loop
-    vld1.8          {d0}, [r0], r1              ;Load up source and reference
-    vld1.8          {d4}, [r2], r3
-    vld1.8          {d2}, [r0], r1
-    vld1.8          {d6}, [r2], r3
-
-    vsubl.u8        q11, d0, d4                 ;calculate diff
-    vsubl.u8        q12, d2, d6
-
-    vpadal.s16      q8, q11                     ;calculate sum
-    vmlal.s16       q9, d22, d22                ;calculate sse
-    vmlal.s16       q10, d23, d23
-
-    subs            r12, r12, #1
-
-    vpadal.s16      q8, q12
-    vmlal.s16       q9, d24, d24
-    vmlal.s16       q10, d25, d25
-
-    bne             variance8x16_neon_loop
-
-    vadd.u32        q10, q9, q10                ;accumulate sse
-    vpaddl.s32      q0, q8                      ;accumulate sum
-
-    ldr             r12, [sp, #16]              ;load *sse from stack
-
-    vpaddl.u32      q1, q10
-    vadd.s64        d0, d0, d1
-    vadd.u64        d1, d2, d3
-
-    vmull.s32       q5, d0, d0
-    vst1.32         {d1[0]}, [r12]              ;store sse
-    vshr.u32        d10, d10, #7
-    vsub.u32        d0, d1, d10
-
-    vmov.32         r0, d0[0]                   ;return
-
-    vpop            {q5}
-    bx              lr
-
-    ENDP
-
-;==================================
-; r0    unsigned char *src_ptr
-; r1    int source_stride
-; r2    unsigned char *ref_ptr
-; r3    int  recon_stride
-; stack unsigned int *sse
-|vp8_variance8x8_neon| PROC
-    vpush           {q5}
-
-    vmov.i8         q8, #0                      ;q8 - sum
-    vmov.i8         q9, #0                      ;q9, q10 - sse
-    vmov.i8         q10, #0
-
-    mov             r12, #2
-
-variance8x8_neon_loop
-    vld1.8          {d0}, [r0], r1              ;Load up source and reference
-    vld1.8          {d4}, [r2], r3
-    vld1.8          {d1}, [r0], r1
-    vld1.8          {d5}, [r2], r3
-    vld1.8          {d2}, [r0], r1
-    vld1.8          {d6}, [r2], r3
-    vld1.8          {d3}, [r0], r1
-    vld1.8          {d7}, [r2], r3
-
-    vsubl.u8        q11, d0, d4                 ;calculate diff
-    vsubl.u8        q12, d1, d5
-    vsubl.u8        q13, d2, d6
-    vsubl.u8        q14, d3, d7
-
-    vpadal.s16      q8, q11                     ;calculate sum
-    vmlal.s16       q9, d22, d22                ;calculate sse
-    vmlal.s16       q10, d23, d23
-
-    subs            r12, r12, #1
-
-    vpadal.s16      q8, q12
-    vmlal.s16       q9, d24, d24
-    vmlal.s16       q10, d25, d25
-    vpadal.s16      q8, q13
-    vmlal.s16       q9, d26, d26
-    vmlal.s16       q10, d27, d27
-    vpadal.s16      q8, q14
-    vmlal.s16       q9, d28, d28
-    vmlal.s16       q10, d29, d29
-
-    bne             variance8x8_neon_loop
-
-    vadd.u32        q10, q9, q10                ;accumulate sse
-    vpaddl.s32      q0, q8                      ;accumulate sum
-
-    ldr             r12, [sp, #16]              ;load *sse from stack
-
-    vpaddl.u32      q1, q10
-    vadd.s64        d0, d0, d1
-    vadd.u64        d1, d2, d3
-
-    vmull.s32       q5, d0, d0
-    vst1.32         {d1[0]}, [r12]              ;store sse
-    vshr.u32        d10, d10, #6
-    vsub.u32        d0, d1, d10
-
-    vmov.32         r0, d0[0]                   ;return
-
-    vpop            {q5}
-    bx              lr
-
-    ENDP
-
-    END
diff --git a/vp8/common/arm/neon/variance_neon.c b/vp8/common/arm/neon/variance_neon.c
new file mode 100644
index 0000000..afd2dc3
--- /dev/null
+++ b/vp8/common/arm/neon/variance_neon.c
@@ -0,0 +1,323 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#ifdef _MSC_VER
+#define __builtin_prefetch(x)
+#endif
+
+unsigned int vp8_variance16x16_neon(
+        const unsigned char *src_ptr,
+        int source_stride,
+        const unsigned char *ref_ptr,
+        int recon_stride,
+        unsigned int *sse) {
+    int i;
+    int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
+    uint32x2_t d0u32, d10u32;
+    int64x1_t d0s64, d1s64;
+    uint8x16_t q0u8, q1u8, q2u8, q3u8;
+    uint16x8_t q11u16, q12u16, q13u16, q14u16;
+    int32x4_t q8s32, q9s32, q10s32;
+    int64x2_t q0s64, q1s64, q5s64;
+
+    q8s32 = vdupq_n_s32(0);
+    q9s32 = vdupq_n_s32(0);
+    q10s32 = vdupq_n_s32(0);
+
+    for (i = 0; i < 8; i++) {
+        q0u8 = vld1q_u8(src_ptr);
+        src_ptr += source_stride;
+        q1u8 = vld1q_u8(src_ptr);
+        src_ptr += source_stride;
+        __builtin_prefetch(src_ptr);
+
+        q2u8 = vld1q_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        q3u8 = vld1q_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        __builtin_prefetch(ref_ptr);
+
+        q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
+        q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
+        q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
+        q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
+
+        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+        d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
+        q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
+        q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
+
+        d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+        d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
+        q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+        q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+
+        d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+        d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
+        q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
+        q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
+
+        d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
+        d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
+        q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
+        q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
+    }
+
+    q10s32 = vaddq_s32(q10s32, q9s32);
+    q0s64 = vpaddlq_s32(q8s32);
+    q1s64 = vpaddlq_s32(q10s32);
+
+    d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
+    d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+    q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
+                      vreinterpret_s32_s64(d0s64));
+    vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+
+    d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8);
+    d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+
+    return vget_lane_u32(d0u32, 0);
+}
+
+unsigned int vp8_variance16x8_neon(
+        const unsigned char *src_ptr,
+        int source_stride,
+        const unsigned char *ref_ptr,
+        int recon_stride,
+        unsigned int *sse) {
+    int i;
+    int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
+    uint32x2_t d0u32, d10u32;
+    int64x1_t d0s64, d1s64;
+    uint8x16_t q0u8, q1u8, q2u8, q3u8;
+    uint16x8_t q11u16, q12u16, q13u16, q14u16;
+    int32x4_t q8s32, q9s32, q10s32;
+    int64x2_t q0s64, q1s64, q5s64;
+
+    q8s32 = vdupq_n_s32(0);
+    q9s32 = vdupq_n_s32(0);
+    q10s32 = vdupq_n_s32(0);
+
+    for (i = 0; i < 4; i++) {  // variance16x8_neon_loop
+        q0u8 = vld1q_u8(src_ptr);
+        src_ptr += source_stride;
+        q1u8 = vld1q_u8(src_ptr);
+        src_ptr += source_stride;
+        __builtin_prefetch(src_ptr);
+
+        q2u8 = vld1q_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        q3u8 = vld1q_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        __builtin_prefetch(ref_ptr);
+
+        q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
+        q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
+        q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
+        q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
+
+        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+        d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
+        q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
+        q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
+
+        d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+        d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
+        q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+        q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+
+        d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+        d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
+        q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
+        q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
+
+        d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
+        d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
+        q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
+        q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
+    }
+
+    q10s32 = vaddq_s32(q10s32, q9s32);
+    q0s64 = vpaddlq_s32(q8s32);
+    q1s64 = vpaddlq_s32(q10s32);
+
+    d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
+    d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+    q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
+                      vreinterpret_s32_s64(d0s64));
+    vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+
+    d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
+    d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+
+    return vget_lane_u32(d0u32, 0);
+}
+
+unsigned int vp8_variance8x16_neon(
+        const unsigned char *src_ptr,
+        int source_stride,
+        const unsigned char *ref_ptr,
+        int recon_stride,
+        unsigned int *sse) {
+    int i;
+    uint8x8_t d0u8, d2u8, d4u8, d6u8;
+    int16x4_t d22s16, d23s16, d24s16, d25s16;
+    uint32x2_t d0u32, d10u32;
+    int64x1_t d0s64, d1s64;
+    uint16x8_t q11u16, q12u16;
+    int32x4_t q8s32, q9s32, q10s32;
+    int64x2_t q0s64, q1s64, q5s64;
+
+    q8s32 = vdupq_n_s32(0);
+    q9s32 = vdupq_n_s32(0);
+    q10s32 = vdupq_n_s32(0);
+
+    for (i = 0; i < 8; i++) {  // variance8x16_neon_loop
+        d0u8 = vld1_u8(src_ptr);
+        src_ptr += source_stride;
+        d2u8 = vld1_u8(src_ptr);
+        src_ptr += source_stride;
+        __builtin_prefetch(src_ptr);
+
+        d4u8 = vld1_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        d6u8 = vld1_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        __builtin_prefetch(ref_ptr);
+
+        q11u16 = vsubl_u8(d0u8, d4u8);
+        q12u16 = vsubl_u8(d2u8, d6u8);
+
+        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+        d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
+        q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
+        q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
+
+        d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+        d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
+        q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+        q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+    }
+
+    q10s32 = vaddq_s32(q10s32, q9s32);
+    q0s64 = vpaddlq_s32(q8s32);
+    q1s64 = vpaddlq_s32(q10s32);
+
+    d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
+    d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+    q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
+                      vreinterpret_s32_s64(d0s64));
+    vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+
+    d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
+    d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+
+    return vget_lane_u32(d0u32, 0);
+}
+
+unsigned int vp8_variance8x8_neon(
+        const unsigned char *src_ptr,
+        int source_stride,
+        const unsigned char *ref_ptr,
+        int recon_stride,
+        unsigned int *sse) {
+    int i;
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
+    int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
+    uint32x2_t d0u32, d10u32;
+    int64x1_t d0s64, d1s64;
+    uint16x8_t q11u16, q12u16, q13u16, q14u16;
+    int32x4_t q8s32, q9s32, q10s32;
+    int64x2_t q0s64, q1s64, q5s64;
+
+    q8s32 = vdupq_n_s32(0);
+    q9s32 = vdupq_n_s32(0);
+    q10s32 = vdupq_n_s32(0);
+
+    for (i = 0; i < 2; i++) {  // variance8x8_neon_loop
+        d0u8 = vld1_u8(src_ptr);
+        src_ptr += source_stride;
+        d1u8 = vld1_u8(src_ptr);
+        src_ptr += source_stride;
+        d2u8 = vld1_u8(src_ptr);
+        src_ptr += source_stride;
+        d3u8 = vld1_u8(src_ptr);
+        src_ptr += source_stride;
+
+        d4u8 = vld1_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        d5u8 = vld1_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        d6u8 = vld1_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        d7u8 = vld1_u8(ref_ptr);
+        ref_ptr += recon_stride;
+
+        q11u16 = vsubl_u8(d0u8, d4u8);
+        q12u16 = vsubl_u8(d1u8, d5u8);
+        q13u16 = vsubl_u8(d2u8, d6u8);
+        q14u16 = vsubl_u8(d3u8, d7u8);
+
+        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+        d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
+        q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
+        q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
+
+        d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+        d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
+        q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+        q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+
+        d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+        d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
+        q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
+        q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
+
+        d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
+        d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
+        q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
+        q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
+    }
+
+    q10s32 = vaddq_s32(q10s32, q9s32);
+    q0s64 = vpaddlq_s32(q8s32);
+    q1s64 = vpaddlq_s32(q10s32);
+
+    d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
+    d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+    q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
+                      vreinterpret_s32_s64(d0s64));
+    vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+
+    d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 6);
+    d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+
+    return vget_lane_u32(d0u32, 0);
+}
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 7d0fbf6..8e546d5 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -310,6 +310,7 @@
     }
 }
 
+#if CONFIG_POSTPROC
 static void vp8_de_mblock(YV12_BUFFER_CONFIG         *post,
                           int                         q)
 {
@@ -382,6 +383,7 @@
         vp8_yv12_copy_frame(source, post);
     }
 }
+#endif
 
 #if !(CONFIG_TEMPORAL_DENOISING)
 void vp8_de_noise(VP8_COMMON                 *cm,
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index 130d965..789b0de 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -532,7 +532,7 @@
 # Denoiser filter
 #
 if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
-    add_proto qw/int vp8_denoiser_filter/, "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset";
+    add_proto qw/int vp8_denoiser_filter/, "unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude";
     specialize qw/vp8_denoiser_filter sse2 neon/;
 }
 
diff --git a/vp8/encoder/arm/neon/denoising_neon.c b/vp8/encoder/arm/neon/denoising_neon.c
index 23dc0a9..1bebe8f 100644
--- a/vp8/encoder/arm/neon/denoising_neon.c
+++ b/vp8/encoder/arm/neon/denoising_neon.c
@@ -45,10 +45,12 @@
  *      [16, 255]       3               6                    7
  */
 
-int vp8_denoiser_filter_neon(YV12_BUFFER_CONFIG *mc_running_avg,
-                             YV12_BUFFER_CONFIG *running_avg,
-                             MACROBLOCK *signal, unsigned int motion_magnitude,
-                             int y_offset, int uv_offset) {
+int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y,
+                             int mc_running_avg_y_stride,
+                             unsigned char *running_avg_y,
+                             int running_avg_y_stride,
+                             unsigned char *sig, int sig_stride,
+                             unsigned int motion_magnitude) {
     /* If motion_magnitude is small, making the denoiser more aggressive by
      * increasing the adjustment for each level, level1 adjustment is
      * increased, the deltas stay the same.
@@ -60,14 +62,6 @@
     const uint8x16_t v_level1_threshold = vdupq_n_u8(4);
     const uint8x16_t v_level2_threshold = vdupq_n_u8(8);
     const uint8x16_t v_level3_threshold = vdupq_n_u8(16);
-
-    /* Local variables for array pointers and strides. */
-    unsigned char *sig = signal->thismb;
-    int            sig_stride = 16;
-    unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
-    int            mc_running_avg_y_stride = mc_running_avg->y_stride;
-    unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
-    int            running_avg_y_stride = running_avg->y_stride;
     int64x2_t v_sum_diff_total = vdupq_n_s64(0);
 
     /* Go over lines. */
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index 7819265..bfce280 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -51,17 +51,13 @@
  * [16, 255]              6                                    7
  */
 
-int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
-                          YV12_BUFFER_CONFIG *running_avg, MACROBLOCK *signal,
-                          unsigned int motion_magnitude, int y_offset,
-                          int uv_offset)
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride,
+                          unsigned char *running_avg_y, int avg_y_stride,
+                          unsigned char *sig, int sig_stride,
+                          unsigned int motion_magnitude)
 {
-    unsigned char *sig = signal->thismb;
-    int sig_stride = 16;
-    unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
-    int mc_avg_y_stride = mc_running_avg->y_stride;
-    unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
-    int avg_y_stride = running_avg->y_stride;
+    unsigned char *running_avg_y_start = running_avg_y;
+    unsigned char *sig_start = sig;
     int r, c, i;
     int sum_diff = 0;
     int adj_val[3] = {3, 4, 6};
@@ -130,8 +126,7 @@
     if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
         return COPY_BLOCK;
 
-    vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride,
-                      signal->thismb, sig_stride);
+    vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride);
     return FILTER_BLOCK;
 }
 
@@ -285,12 +280,17 @@
 
     if (decision == FILTER_BLOCK)
     {
+        unsigned char *mc_running_avg_y =
+            denoiser->yv12_mc_running_avg.y_buffer + recon_yoffset;
+        int mc_avg_y_stride = denoiser->yv12_mc_running_avg.y_stride;
+        unsigned char *running_avg_y =
+            denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset;
+        int avg_y_stride = denoiser->yv12_running_avg[INTRA_FRAME].y_stride;
+
         /* Filter. */
-        decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg,
-                                       &denoiser->yv12_running_avg[INTRA_FRAME],
-                                       x,
-                                       motion_magnitude2,
-                                       recon_yoffset, recon_uvoffset);
+        decision = vp8_denoiser_filter(mc_running_avg_y, mc_avg_y_stride,
+                                         running_avg_y, avg_y_stride,
+                                         x->thismb, 16, motion_magnitude2);
     }
     if (decision == COPY_BLOCK)
     {
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 560134e..e95e44f 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -5227,7 +5227,7 @@
                 int y_samples = orig->y_height * orig->y_width ;
                 int uv_samples = orig->uv_height * orig->uv_width ;
                 int t_samples = y_samples + 2 * uv_samples;
-                double sq_error, sq_error2;
+                double sq_error;
 
                 ye = calc_plane_error(orig->y_buffer, orig->y_stride,
                   recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height);
@@ -5250,6 +5250,7 @@
 #if CONFIG_POSTPROC
                 {
                     YV12_BUFFER_CONFIG      *pp = &cm->post_proc_buffer;
+                    double sq_error2;
                     double frame_psnr2, frame_ssim2 = 0;
                     double weight = 0;
 
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 513b2bf..4dc0d95 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -98,6 +98,7 @@
     unsigned int i, j, k;
     int modifier;
     int byte = 0;
+    const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
 
     for (i = 0,k = 0; i < block_size; i++)
     {
@@ -114,7 +115,7 @@
              */
             modifier  *= modifier;
             modifier  *= 3;
-            modifier  += 1 << (strength - 1);
+            modifier  += rounding;
             modifier >>= strength;
 
             if (modifier > 16)
diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c
index cceb826..d1f76b2 100644
--- a/vp8/encoder/x86/denoising_sse2.c
+++ b/vp8/encoder/x86/denoising_sse2.c
@@ -22,17 +22,14 @@
     signed char e[16];
 };
 
-int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
-                             YV12_BUFFER_CONFIG *running_avg,
-                             MACROBLOCK *signal, unsigned int motion_magnitude,
-                             int y_offset, int uv_offset)
+int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y,
+                             int mc_avg_y_stride,
+                             unsigned char *running_avg_y, int avg_y_stride,
+                             unsigned char *sig, int sig_stride,
+                             unsigned int motion_magnitude)
 {
-    unsigned char *sig = signal->thismb;
-    int sig_stride = 16;
-    unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
-    int mc_avg_y_stride = mc_running_avg->y_stride;
-    unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
-    int avg_y_stride = running_avg->y_stride;
+    unsigned char *running_avg_y_start = running_avg_y;
+    unsigned char *sig_start = sig;
     int r;
     __m128i acc_diff = _mm_setzero_si128();
     const __m128i k_0 = _mm_setzero_si128();
@@ -114,7 +111,6 @@
         }
     }
 
-    vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride,
-                      signal->thismb, sig_stride);
+    vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride);
     return FILTER_BLOCK;
 }
diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm
deleted file mode 100644
index 7b1dc11..0000000
--- a/vp8/encoder/x86/quantize_ssse3.asm
+++ /dev/null
@@ -1,138 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-%include "vp8_asm_enc_offsets.asm"
-
-
-; void vp8_fast_quantize_b_ssse3 | arg
-;  (BLOCK  *b,                   |  0
-;   BLOCKD *d)                   |  1
-;
-
-global sym(vp8_fast_quantize_b_ssse3) PRIVATE
-sym(vp8_fast_quantize_b_ssse3):
-    push        rbp
-    mov         rbp, rsp
-    GET_GOT     rbx
-
-%if ABI_IS_32BIT
-    push        rdi
-    push        rsi
-%else
-  %if LIBVPX_YASM_WIN64
-    push        rdi
-    push        rsi
-  %endif
-%endif
-    ; end prolog
-
-%if ABI_IS_32BIT
-    mov         rdi, arg(0)                 ; BLOCK *b
-    mov         rsi, arg(1)                 ; BLOCKD *d
-%else
-  %if LIBVPX_YASM_WIN64
-    mov         rdi, rcx                    ; BLOCK *b
-    mov         rsi, rdx                    ; BLOCKD *d
-  %else
-    ;mov         rdi, rdi                    ; BLOCK *b
-    ;mov         rsi, rsi                    ; BLOCKD *d
-  %endif
-%endif
-
-    mov         rax, [rdi + vp8_block_coeff]
-    mov         rcx, [rdi + vp8_block_round]
-    mov         rdx, [rdi + vp8_block_quant_fast]
-
-    ; coeff
-    movdqa      xmm0, [rax]
-    movdqa      xmm4, [rax + 16]
-
-    ; round
-    movdqa      xmm2, [rcx]
-    movdqa      xmm3, [rcx + 16]
-
-    movdqa      xmm1, xmm0
-    movdqa      xmm5, xmm4
-
-    ; sz = z >> 15
-    psraw       xmm0, 15
-    psraw       xmm4, 15
-
-    pabsw       xmm1, xmm1
-    pabsw       xmm5, xmm5
-
-    paddw       xmm1, xmm2
-    paddw       xmm5, xmm3
-
-    ; quant_fast
-    pmulhw      xmm1, [rdx]
-    pmulhw      xmm5, [rdx + 16]
-
-    mov         rax, [rsi + vp8_blockd_qcoeff]
-    mov         rdi, [rsi + vp8_blockd_dequant]
-    mov         rcx, [rsi + vp8_blockd_dqcoeff]
-
-    movdqa      xmm2, xmm1                  ;store y for getting eob
-    movdqa      xmm3, xmm5
-
-    pxor        xmm1, xmm0
-    pxor        xmm5, xmm4
-    psubw       xmm1, xmm0
-    psubw       xmm5, xmm4
-
-    movdqa      [rax], xmm1
-    movdqa      [rax + 16], xmm5
-
-    movdqa      xmm0, [rdi]
-    movdqa      xmm4, [rdi + 16]
-
-    pmullw      xmm0, xmm1
-    pmullw      xmm4, xmm5
-    pxor        xmm1, xmm1
-
-    pcmpgtw     xmm2, xmm1                  ;calculate eob
-    pcmpgtw     xmm3, xmm1
-    packsswb    xmm2, xmm3
-    pshufb      xmm2, [GLOBAL(zz_shuf)]
-
-    pmovmskb    edx, xmm2
-
-    movdqa      [rcx], xmm0                 ;store dqcoeff
-    movdqa      [rcx + 16], xmm4            ;store dqcoeff
-    mov         rcx, [rsi + vp8_blockd_eob]
-
-    bsr         eax, edx                    ;count 0
-    add         eax, 1
-
-    cmp         edx, 0                      ;if all 0, eob=0
-    cmove       eax, edx
-
-    mov         BYTE PTR [rcx], al          ;store eob
-
-    ; begin epilog
-%if ABI_IS_32BIT
-    pop         rsi
-    pop         rdi
-%else
-  %if LIBVPX_YASM_WIN64
-    pop         rsi
-    pop         rdi
-  %endif
-%endif
-
-    RESTORE_GOT
-    pop         rbp
-    ret
-
-SECTION_RODATA
-align 16
-zz_shuf:
-    db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
diff --git a/vp8/encoder/x86/quantize_ssse3.c b/vp8/encoder/x86/quantize_ssse3.c
new file mode 100644
index 0000000..9b4471d
--- /dev/null
+++ b/vp8/encoder/x86/quantize_ssse3.c
@@ -0,0 +1,110 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <tmmintrin.h> /* SSSE3 */
+
+#include "vp8/encoder/block.h"
+
+/* bitscan reverse (bsr) */
+#if defined(_MSC_VER)
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse)
+static int bsr(int mask) {
+  int eob;
+  _BitScanReverse(&eob, mask);
+  eob++;
+  if (mask == 0)
+    eob = 0;
+  return eob;
+}
+#else
+static int bsr(int mask) {
+  int eob;
+  asm volatile("bsr %1, %0" : "=r" (eob) : "r" (mask) : "flags");
+  eob++;
+  if (mask == 0)
+    eob = 0;
+  return eob;
+}
+#endif
+
+void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d) {
+  int eob, mask;
+
+  __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
+  __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8));
+  __m128i round0 = _mm_load_si128((__m128i *)(b->round));
+  __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
+  __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast));
+  __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8));
+  __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
+  __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
+
+  __m128i sz0, sz1, x, x0, x1, y0, y1, zeros, abs0, abs1;
+
+  DECLARE_ALIGNED(16, const uint8_t, pshufb_zig_zag_mask[16]) =
+    { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 };
+  __m128i zig_zag = _mm_load_si128((const __m128i *)pshufb_zig_zag_mask);
+
+  /* sign of z: z >> 15 */
+  sz0 = _mm_srai_epi16(z0, 15);
+  sz1 = _mm_srai_epi16(z1, 15);
+
+  /* x = abs(z) */
+  x0 = _mm_abs_epi16(z0);
+  x1 = _mm_abs_epi16(z1);
+
+  /* x += round */
+  x0 = _mm_add_epi16(x0, round0);
+  x1 = _mm_add_epi16(x1, round1);
+
+  /* y = (x * quant) >> 16 */
+  y0 = _mm_mulhi_epi16(x0, quant_fast0);
+  y1 = _mm_mulhi_epi16(x1, quant_fast1);
+
+  /* ASM saves Y for EOB */
+  /* I think we can ignore that because adding the sign doesn't change anything
+   * and multiplying 0 by dequant is OK as well */
+  abs0 = y0;
+  abs1 = y1;
+
+  /* Restore the sign bit. */
+  y0 = _mm_xor_si128(y0, sz0);
+  y1 = _mm_xor_si128(y1, sz1);
+  x0 = _mm_sub_epi16(y0, sz0);
+  x1 = _mm_sub_epi16(y1, sz1);
+
+  /* qcoeff = x */
+  _mm_store_si128((__m128i *)(d->qcoeff), x0);
+  _mm_store_si128((__m128i *)(d->qcoeff + 8), x1);
+
+  /* x * dequant */
+  x0 = _mm_mullo_epi16(x0, dequant0);
+  x1 = _mm_mullo_epi16(x1, dequant1);
+
+  /* dqcoeff = x * dequant */
+  _mm_store_si128((__m128i *)(d->dqcoeff), x0);
+  _mm_store_si128((__m128i *)(d->dqcoeff + 8), x1);
+
+  zeros = _mm_setzero_si128();
+
+  x0 = _mm_cmpgt_epi16(abs0, zeros);
+  x1 = _mm_cmpgt_epi16(abs1, zeros);
+
+  x = _mm_packs_epi16(x0, x1);
+
+  x = _mm_shuffle_epi8(x, zig_zag);
+
+  mask = _mm_movemask_epi8(x);
+
+  eob = bsr(mask);
+
+  *d->eob = 0xFF & eob;
+}
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index cdb2716..2812111 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -159,22 +159,10 @@
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM)
 
 # common (neon)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/iwalsh_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/loopfilter_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/mbloopfilter_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/shortidct4x4llm_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/sad8_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/sad16_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/sixtappredict4x4_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/sixtappredict8x4_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/sixtappredict8x8_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/sixtappredict16x16_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/idct_dequant_0_2x_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/idct_blk_neon.c
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/variance_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
@@ -186,6 +174,13 @@
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/dequant_idct_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/dequantizeb_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/idct_dequant_full_2x_neon.c
-
+VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/iwalsh_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/loopfiltersimplehorizontaledge_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/mbloopfilter_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/sad_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/shortidct4x4llm_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/sixtappredict_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/idct_dequant_0_2x_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/variance_neon.c
 
 $(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.pl))
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index d7c6dd1..607382b 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -88,6 +88,7 @@
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c
+VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.c
 
 ifeq ($(CONFIG_TEMPORAL_DENOISING),yes)
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c
@@ -96,7 +97,6 @@
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c
-VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm
 VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index d4c3065..de69772 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -310,7 +310,7 @@
 specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/;
 
 add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/;
+specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/, "$ssse3_x86_64";
 
 add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
 specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/;
@@ -379,10 +379,6 @@
 add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
 
-add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-specialize qw/vp9_get_sse_sum_16x16 sse2/;
-$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2;
-
 add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
 
@@ -392,10 +388,6 @@
 add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc";
 
-add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-specialize qw/vp9_get_sse_sum_8x8 sse2/;
-$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2;
-
 add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance8x4/, "$sse2_x86inc";
 
@@ -701,7 +693,7 @@
 specialize qw/vp9_fht16x16 sse2 avx2/;
 
 add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fwht4x4/;
+specialize qw/vp9_fwht4x4 mmx/;
 
 add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
 specialize qw/vp9_fdct4x4 sse2 avx2/;
diff --git a/vp9/common/x86/vp9_idct_ssse3.asm b/vp9/common/x86/vp9_idct_ssse3.asm
new file mode 100644
index 0000000..f2a120f
--- /dev/null
+++ b/vp9/common/x86/vp9_idct_ssse3.asm
@@ -0,0 +1,162 @@
+;
+;  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+%include "third_party/x86inc/x86inc.asm"
+
+; This file provides SSSE3 version of the inverse transformation. Part
+; of the functions are originally derived from the ffmpeg project.
+; Note that the current version applies to x86 64-bit only.
+
+SECTION_RODATA
+
+pw_11585x2: times 8 dw 23170
+pd_8192:    times 4 dd 8192
+pw_16:      times 8 dw 16
+
+%macro TRANSFORM_COEFFS 2
+pw_%1_%2:   dw  %1,  %2,  %1,  %2,  %1,  %2,  %1,  %2
+pw_m%2_%1:  dw -%2,  %1, -%2,  %1, -%2,  %1, -%2,  %1
+%endmacro
+
+TRANSFORM_COEFFS    6270, 15137
+TRANSFORM_COEFFS    3196, 16069
+TRANSFORM_COEFFS   13623,  9102
+
+SECTION .text
+
+%if ARCH_X86_64
+%macro SUM_SUB 3
+  psubw  m%3, m%1, m%2
+  paddw  m%1, m%2
+  SWAP    %2, %3
+%endmacro
+
+; butterfly operation
+%macro MUL_ADD_2X 6 ; dst1, dst2, src, round, coefs1, coefs2
+  pmaddwd            m%1, m%3, %5
+  pmaddwd            m%2, m%3, %6
+  paddd              m%1,  %4
+  paddd              m%2,  %4
+  psrad              m%1,  14
+  psrad              m%2,  14
+%endmacro
+
+%macro BUTTERFLY_4X 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2
+  punpckhwd          m%6, m%2, m%1
+  MUL_ADD_2X         %7,  %6,  %6,  %5, [pw_m%4_%3], [pw_%3_%4]
+  punpcklwd          m%2, m%1
+  MUL_ADD_2X         %1,  %2,  %2,  %5, [pw_m%4_%3], [pw_%3_%4]
+  packssdw           m%1, m%7
+  packssdw           m%2, m%6
+%endmacro
+
+; matrix transpose
+%macro INTERLEAVE_2X 4
+  punpckh%1          m%4, m%2, m%3
+  punpckl%1          m%2, m%3
+  SWAP               %3,  %4
+%endmacro
+
+%macro TRANSPOSE8X8 9
+  INTERLEAVE_2X  wd, %1, %2, %9
+  INTERLEAVE_2X  wd, %3, %4, %9
+  INTERLEAVE_2X  wd, %5, %6, %9
+  INTERLEAVE_2X  wd, %7, %8, %9
+
+  INTERLEAVE_2X  dq, %1, %3, %9
+  INTERLEAVE_2X  dq, %2, %4, %9
+  INTERLEAVE_2X  dq, %5, %7, %9
+  INTERLEAVE_2X  dq, %6, %8, %9
+
+  INTERLEAVE_2X  qdq, %1, %5, %9
+  INTERLEAVE_2X  qdq, %3, %7, %9
+  INTERLEAVE_2X  qdq, %2, %6, %9
+  INTERLEAVE_2X  qdq, %4, %8, %9
+
+  SWAP  %2, %5
+  SWAP  %4, %7
+%endmacro
+
+%macro IDCT8_1D 0
+  SUM_SUB          0,    4,    9
+  BUTTERFLY_4X     2,    6,    6270, 15137,  m8,  9,  10
+  pmulhrsw        m0,  m12
+  pmulhrsw        m4,  m12
+  BUTTERFLY_4X     1,    7,    3196, 16069,  m8,  9,  10
+  BUTTERFLY_4X     5,    3,   13623,  9102,  m8,  9,  10
+
+  SUM_SUB          1,    5,    9
+  SUM_SUB          7,    3,    9
+  SUM_SUB          0,    6,    9
+  SUM_SUB          4,    2,    9
+  SUM_SUB          3,    5,    9
+  pmulhrsw        m3,  m12
+  pmulhrsw        m5,  m12
+
+  SUM_SUB          0,    7,    9
+  SUM_SUB          4,    3,    9
+  SUM_SUB          2,    5,    9
+  SUM_SUB          6,    1,    9
+
+  SWAP             3,    6
+  SWAP             1,    4
+%endmacro
+
+; This macro handles 8 pixels per line
+%macro ADD_STORE_8P_2X 5;  src1, src2, tmp1, tmp2, zero
+  paddw           m%1, m11
+  paddw           m%2, m11
+  psraw           m%1, 5
+  psraw           m%2, 5
+
+  movh            m%3, [outputq]
+  movh            m%4, [outputq + strideq]
+  punpcklbw       m%3, m%5
+  punpcklbw       m%4, m%5
+  paddw           m%3, m%1
+  paddw           m%4, m%2
+  packuswb        m%3, m%5
+  packuswb        m%4, m%5
+  movh               [outputq], m%3
+  movh     [outputq + strideq], m%4
+%endmacro
+
+INIT_XMM ssse3
+cglobal idct8x8_64_add, 3, 5, 13, input, output, stride
+  mova     m8, [pd_8192]
+  mova    m11, [pw_16]
+  mova    m12, [pw_11585x2]
+
+  lea      r3, [2 * strideq]
+
+  mova     m0, [inputq +   0]
+  mova     m1, [inputq +  16]
+  mova     m2, [inputq +  32]
+  mova     m3, [inputq +  48]
+  mova     m4, [inputq +  64]
+  mova     m5, [inputq +  80]
+  mova     m6, [inputq +  96]
+  mova     m7, [inputq + 112]
+
+  TRANSPOSE8X8  0, 1, 2, 3, 4, 5, 6, 7, 9
+  IDCT8_1D
+  TRANSPOSE8X8  0, 1, 2, 3, 4, 5, 6, 7, 9
+  IDCT8_1D
+
+  pxor    m12, m12
+  ADD_STORE_8P_2X  0, 1, 9, 10, 12
+  lea              outputq, [outputq + r3]
+  ADD_STORE_8P_2X  2, 3, 9, 10, 12
+  lea              outputq, [outputq + r3]
+  ADD_STORE_8P_2X  4, 5, 9, 10, 12
+  lea              outputq, [outputq + r3]
+  ADD_STORE_8P_2X  6, 7, 9, 10, 12
+
+  RET
+%endif
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index c5a85c9..35d2ecf 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1006,9 +1006,10 @@
     found = cm->width == cfg->y_crop_width &&
             cm->height == cfg->y_crop_height;
 
-    // TODO(ivan): This prevents a bug while more than 3 buffers are used. Do it
-    // in a better way.
-    if (cpi->use_svc) {
+    // Set "found" to 0 for temporal svc and for spatial svc key frame
+    if (cpi->use_svc &&
+        (cpi->svc.number_spatial_layers == 1 ||
+         cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame)) {
       found = 0;
     }
     vp9_wb_write_bit(wb, found);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index f35a85f..2ccf4f8 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -129,12 +129,6 @@
   int *nmvsadcost_hp[2];
   int **mvsadcost;
 
-  int mbmode_cost[INTRA_MODES];
-  unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
-  int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES];
-  int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
-  int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
-
   // These define limits to motion vector components to prevent them
   // from extending outside the UMV borders
   int mv_col_min;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 19aa592..ef1d4e6 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -70,6 +70,18 @@
   128, 128, 128, 128, 128, 128, 128, 128
 };
 
+static void get_sse_sum_8x8(const uint8_t *src, int src_stride,
+                            const uint8_t *ref, int ref_stride,
+                            unsigned int *sse, int *sum) {
+  variance(src, src_stride, ref, ref_stride, 8, 8, sse, sum);
+}
+
+static void get_sse_sum_16x16(const uint8_t *src, int src_stride,
+                              const uint8_t *ref, int ref_stride,
+                              unsigned int *sse, int *sum) {
+  variance(src, src_stride, ref, ref_stride, 16, 16, sse, sum);
+}
+
 static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
                                               const struct buf_2d *ref,
                                               BLOCK_SIZE bs) {
@@ -475,8 +487,8 @@
         unsigned int sse = 0;
         int sum = 0;
         if (x_idx < pixels_wide && y_idx < pixels_high)
-          vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
-                              d + y_idx * dp + x_idx, dp, &sse, &sum);
+          get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
+                          d + y_idx * dp + x_idx, dp, &sse, &sum);
         fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);
       }
     }
@@ -1211,10 +1223,9 @@
         int b_offset = b_mi_row * MI_SIZE * src_stride +
                        b_mi_col * MI_SIZE;
 
-        vp9_get_sse_sum_16x16(src + b_offset,
-                              src_stride,
-                              pre_src + b_offset,
-                              pre_stride, &d16[j].sse, &d16[j].sum);
+        get_sse_sum_16x16(src + b_offset, src_stride,
+                          pre_src + b_offset, pre_stride,
+                          &d16[j].sse, &d16[j].sum);
 
         d16[j].var = d16[j].sse -
             (((uint32_t)d16[j].sum * d16[j].sum) >> 8);
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 364ea3a..8732095 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -499,6 +499,12 @@
 
   search_site_config ss_cfg;
 
+  int mbmode_cost[INTRA_MODES];
+  unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
+  int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES];
+  int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
+  int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
+
 #if CONFIG_MULTIPLE_ARF
   // ARF tracking variables.
   int multi_arf_enabled;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index bbec4da..15afa1a 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -878,89 +878,67 @@
                             const vp9_variance_fn_ptr_t *fn_ptr,
                             const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
-  const uint8_t *what = x->plane[0].src.buf;
-  const int what_stride = x->plane[0].src.stride;
-  const uint8_t *in_what;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
-
-  unsigned int bestsad = INT_MAX;
-  int ref_row, ref_col;
-
-  unsigned int thissad;
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+  const int range = 64;
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-  int tr, tc;
-  int best_tr = 0;
-  int best_tc = 0;
-  int range = 64;
-
-  int start_col, end_col;
-  int start_row, end_row;
-  int i;
+  unsigned int best_sad = INT_MAX;
+  int r, c, i;
+  int start_col, end_col, start_row, end_row;
 
   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
-  ref_row = ref_mv->row;
-  ref_col = ref_mv->col;
+  *best_mv = *ref_mv;
   *num00 = 11;
-  best_mv->row = ref_row;
-  best_mv->col = ref_col;
+  best_sad = fn_ptr->sdf(what->buf, what->stride,
+                         get_buf_from_mv(in_what, ref_mv), in_what->stride,
+                         0x7fffffff) +
+                 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+  start_row = MAX(-range, x->mv_row_min - ref_mv->row);
+  start_col = MAX(-range, x->mv_col_min - ref_mv->col);
+  end_row = MIN(range, x->mv_row_max - ref_mv->row);
+  end_col = MIN(range, x->mv_col_max - ref_mv->col);
 
-  // Work out the start point for the search
-  in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
+  for (r = start_row; r <= end_row; ++r) {
+    for (c = start_col; c <= end_col; c += 4) {
+      if (c + 3 <= end_col) {
+        unsigned int sads[4];
+        const uint8_t *addrs[4];
+        for (i = 0; i < 4; ++i) {
+          const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
+          addrs[i] = get_buf_from_mv(in_what, &mv);
+        }
 
-  // Check the starting position
-  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
-                + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
-
-  start_row = MAX(-range, x->mv_row_min - ref_row);
-  start_col = MAX(-range, x->mv_col_min - ref_col);
-  end_row = MIN(range, x->mv_row_max - ref_row);
-  end_col = MIN(range, x->mv_col_max - ref_col);
-
-  for (tr = start_row; tr <= end_row; ++tr) {
-    for (tc = start_col; tc <= end_col; tc += 4) {
-      if ((tc + 3) <= end_col) {
-        unsigned int sad_array[4];
-        unsigned char const *addr_ref[4];
-        for (i = 0; i < 4; ++i)
-          addr_ref[i] = in_what + tr * in_what_stride + tc + i;
-
-        fn_ptr->sdx4df(what, what_stride, addr_ref, in_what_stride, sad_array);
+        fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
 
         for (i = 0; i < 4; ++i) {
-          if (sad_array[i] < bestsad) {
-            const MV this_mv = {ref_row + tr, ref_col + tc + i};
-            thissad = sad_array[i] +
-                      mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
-            if (thissad < bestsad) {
-              bestsad = thissad;
-              best_tr = tr;
-              best_tc = tc + i;
+          if (sads[i] < best_sad) {
+            const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
+            const unsigned int sad = sads[i] +
+                mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+            if (sad < best_sad) {
+              best_sad = sad;
+              *best_mv = mv;
             }
           }
         }
       } else {
-        for (i = 0; i < end_col - tc; ++i) {
-          const uint8_t *check_here = in_what + tr * in_what_stride + tc + i;
-          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
-                                bestsad);
-
-          if (thissad < bestsad) {
-            const MV this_mv = {ref_row + tr, ref_col + tc + i};
-            thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
-
-            if (thissad < bestsad) {
-              bestsad = thissad;
-              best_tr = tr;
-              best_tc = tc + i;
+        for (i = 0; i < end_col - c; ++i) {
+          const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
+          unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+              get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
+          if (sad < best_sad) {
+            sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+            if (sad < best_sad) {
+              best_sad = sad;
+              *best_mv = mv;
             }
           }
         }
       }
     }
   }
-  best_mv->row += best_tr;
-  best_mv->col += best_tc;
-  return bestsad;
+
+  return best_sad;
 }
 
 int vp9_diamond_search_sad_c(const MACROBLOCK *x,
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 66d0ac4..e750a53 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -290,8 +290,8 @@
         if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV)
           continue;
 
-        rate_mode = x->inter_mode_cost[mbmi->mode_context[ref_frame]]
-                                      [INTER_OFFSET(this_mode)];
+        rate_mode = cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
+                                        [INTER_OFFSET(this_mode)];
         if (RDCOST(x->rdmult, x->rddiv, rate_mv + rate_mode, 0) > best_rd)
           continue;
 
@@ -325,24 +325,24 @@
         model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP],
                           &pf_dist[EIGHTTAP]);
         tmp_rdcost1 = RDCOST(x->rdmult, x->rddiv,
-                             vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP],
+                             vp9_get_switchable_rate(cpi) + pf_rate[EIGHTTAP],
                              pf_dist[EIGHTTAP]);
 
         mbmi->interp_filter = EIGHTTAP_SHARP;
         vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
         model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SHARP],
                           &pf_dist[EIGHTTAP_SHARP]);
-        tmp_rdcost2 = RDCOST(x->rdmult, x->rddiv,
-                          vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SHARP],
-                          pf_dist[EIGHTTAP_SHARP]);
+        tmp_rdcost2 = RDCOST(x->rdmult, x->rddiv, vp9_get_switchable_rate(cpi) +
+                                 pf_rate[EIGHTTAP_SHARP],
+                             pf_dist[EIGHTTAP_SHARP]);
 
         mbmi->interp_filter = EIGHTTAP_SMOOTH;
         vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
         model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SMOOTH],
                           &pf_dist[EIGHTTAP_SMOOTH]);
-        tmp_rdcost3 = RDCOST(x->rdmult, x->rddiv,
-                          vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SMOOTH],
-                          pf_dist[EIGHTTAP_SMOOTH]);
+        tmp_rdcost3 = RDCOST(x->rdmult, x->rddiv, vp9_get_switchable_rate(cpi) +
+                                 pf_rate[EIGHTTAP_SMOOTH],
+                             pf_dist[EIGHTTAP_SMOOTH]);
 
         if (tmp_rdcost2 < tmp_rdcost1) {
           if (tmp_rdcost2 < tmp_rdcost3)
@@ -365,7 +365,7 @@
       }
 
       rate += rate_mv;
-      rate += x->inter_mode_cost[mbmi->mode_context[ref_frame]]
+      rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
                                 [INTER_OFFSET(this_mode)];
       this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
 
@@ -396,7 +396,7 @@
                               &pd->dst.buf[0], pd->dst.stride, 0, 0, 0);
 
       model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
-      rate += x->mbmode_cost[this_mode];
+      rate += cpi->mbmode_cost[this_mode];
       rate += intra_cost_penalty;
       this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
 
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 24e75ae..985743d 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1308,11 +1308,26 @@
           cpi->oxcf.key_freq == 0))) {
     cm->frame_type = KEY_FRAME;
     rc->source_alt_ref_active = 0;
+
+    if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+      cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame = 1;
+    }
+
     if (cpi->pass == 0 && cpi->oxcf.rc_mode == RC_MODE_CBR) {
       target = calc_iframe_target_size_one_pass_cbr(cpi);
     }
   } else {
     cm->frame_type = INTER_FRAME;
+
+    if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+      LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+      if (cpi->svc.spatial_layer_id == 0) {
+        lc->is_key_frame = 0;
+      } else {
+        lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;
+      }
+    }
+
     if (cpi->pass == 0 && cpi->oxcf.rc_mode == RC_MODE_CBR) {
       target = calc_pframe_target_size_one_pass_cbr(cpi);
     }
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 16cf4b9..66464d0 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -152,24 +152,23 @@
 }
 
 static void fill_mode_costs(VP9_COMP *cpi) {
-  MACROBLOCK *const x = &cpi->mb;
   const FRAME_CONTEXT *const fc = &cpi->common.fc;
   int i, j;
 
   for (i = 0; i < INTRA_MODES; i++)
     for (j = 0; j < INTRA_MODES; j++)
-      vp9_cost_tokens((int *)x->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
+      vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
                       vp9_intra_mode_tree);
 
   // TODO(rbultje) separate tables for superblock costing?
-  vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
-  vp9_cost_tokens(x->intra_uv_mode_cost[KEY_FRAME],
+  vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
+  vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
                   vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
-  vp9_cost_tokens(x->intra_uv_mode_cost[INTER_FRAME],
+  vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
                   fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
 
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
-    vp9_cost_tokens((int *)x->switchable_interp_costs[i],
+    vp9_cost_tokens(cpi->switchable_interp_costs[i],
                     fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
 }
 
@@ -313,7 +312,7 @@
                                &cm->fc.nmvc, cm->allow_high_precision_mv);
 
       for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
-        vp9_cost_tokens((int *)x->inter_mode_cost[i],
+        vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
                         cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
     }
   }
@@ -1187,7 +1186,7 @@
   int tot_rate_y = 0;
   int64_t total_rd = 0;
   ENTROPY_CONTEXT t_above[4], t_left[4];
-  const int *bmode_costs = mb->mbmode_cost;
+  const int *bmode_costs = cpi->mbmode_cost;
 
   vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
   vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
@@ -1203,7 +1202,7 @@
         const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
         const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
 
-        bmode_costs  = mb->y_mode_costs[A][L];
+        bmode_costs  = cpi->y_mode_costs[A][L];
       }
 
       this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
@@ -1250,7 +1249,7 @@
   int64_t this_distortion, this_rd;
   TX_SIZE best_tx = TX_4X4;
   int i;
-  int *bmode_costs = x->mbmode_cost;
+  int *bmode_costs = cpi->mbmode_cost;
 
   if (cpi->sf.tx_size_search_method == USE_FULL_RD)
     for (i = 0; i < TX_MODES; i++)
@@ -1269,7 +1268,7 @@
       const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
       const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
 
-      bmode_costs = x->y_mode_costs[A][L];
+      bmode_costs = cpi->y_mode_costs[A][L];
     }
     mic->mbmi.mode = mode;
 
@@ -1378,7 +1377,7 @@
     if (this_rate_tokenonly == INT_MAX)
       continue;
     this_rate = this_rate_tokenonly +
-                x->intra_uv_mode_cost[cpi->common.frame_type][mode];
+                cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 
     if (this_rd < best_rd) {
@@ -1426,7 +1425,7 @@
   x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
   super_block_uvrd(cpi, x, rate_tokenonly, distortion,
                    skippable, &unused, bsize, INT64_MAX);
-  *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED];
+  *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
   return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
 }
 
@@ -1460,7 +1459,7 @@
   // Don't account for mode here if segment skip is enabled.
   if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
     assert(is_inter_mode(mode));
-    return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
+    return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
   } else {
     return 0;
   }
@@ -2298,12 +2297,12 @@
   return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
 }
 
-int vp9_get_switchable_rate(const MACROBLOCK *x) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
+int vp9_get_switchable_rate(const VP9_COMP *cpi) {
+  const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const int ctx = vp9_get_pred_context_switchable_interp(xd);
   return SWITCHABLE_INTERP_RATE_FACTOR *
-             x->switchable_interp_costs[ctx][mbmi->interp_filter];
+             cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
 }
 
 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
@@ -2710,7 +2709,7 @@
         int j;
         int64_t rs_rd;
         mbmi->interp_filter = i;
-        rs = vp9_get_switchable_rate(x);
+        rs = vp9_get_switchable_rate(cpi);
         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
 
         if (i > 0 && intpel_mv) {
@@ -2780,7 +2779,7 @@
   // Set the appropriate filter
   mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
       cm->interp_filter : *best_filter;
-  rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(x) : 0;
+  rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0;
 
   if (pred_exists) {
     if (best_needs_copy) {
@@ -2810,7 +2809,7 @@
   }
 
   if (cm->interp_filter == SWITCHABLE)
-    *rate2 += vp9_get_switchable_rate(x);
+    *rate2 += vp9_get_switchable_rate(cpi);
 
   if (!is_comp_pred) {
     if (!x->in_active_map) {
@@ -3357,7 +3356,7 @@
       skippable = skippable && skip_uv[uv_tx];
       mbmi->uv_mode = mode_uv[uv_tx];
 
-      rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
+      rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
       if (this_mode != DC_PRED && this_mode != TM_PRED)
         rate2 += intra_cost_penalty;
       distortion2 = distortion_y + distortion_uv;
@@ -3956,7 +3955,7 @@
 
             if (tmp_rd == INT64_MAX)
               continue;
-            rs = vp9_get_switchable_rate(x);
+            rs = vp9_get_switchable_rate(cpi);
             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
             rd_opt->filter_cache[switchable_filter_index] = tmp_rd;
             rd_opt->filter_cache[SWITCHABLE_FILTERS] =
@@ -4034,7 +4033,7 @@
       distortion2 += distortion;
 
       if (cm->interp_filter == SWITCHABLE)
-        rate2 += vp9_get_switchable_rate(x);
+        rate2 += vp9_get_switchable_rate(cpi);
 
       if (!mode_excluded)
         mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index be3efb9..b6b51e5 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -40,7 +40,7 @@
                                   unsigned int qstep, int *rate,
                                   int64_t *dist);
 
-int vp9_get_switchable_rate(const MACROBLOCK *x);
+int vp9_get_switchable_rate(const VP9_COMP *cpi);
 
 void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
                             const TileInfo *const tile,
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index a5234cd..6eff200 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -99,6 +99,7 @@
   unsigned int i, j, k;
   int modifier;
   int byte = 0;
+  const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
 
   for (i = 0, k = 0; i < block_size; i++) {
     for (j = 0; j < block_size; j++, k++) {
@@ -111,7 +112,7 @@
       // modifier =  (int)roundf(coeff > 16 ? 0 : 16-coeff);
       modifier  *= modifier;
       modifier  *= 3;
-      modifier  += 1 << (strength - 1);
+      modifier  += rounding;
       modifier >>= strength;
 
       if (modifier > 16)
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index ae3c86a..91d8ea4 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -113,12 +113,9 @@
 unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
                                        const uint8_t *b, int b_stride, \
                                        unsigned int *sse) { \
-  unsigned int var; \
-  int avg; \
-\
-  variance(a, a_stride, b, b_stride, W, H, &var, &avg); \
-  *sse = var; \
-  return var - (((int64_t)avg * avg) / (W * H)); \
+  int sum; \
+  variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+  return *sse - (((int64_t)sum * sum) / (W * H)); \
 }
 
 #define SUBPIX_VAR(W, H) \
@@ -159,69 +156,36 @@
   return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
 }
 
-
-void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
-                             const uint8_t *ref_ptr, int ref_stride,
-                             unsigned int *sse, int *sum) {
-  variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
-}
-
-void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
-                       const uint8_t *ref_ptr, int ref_stride,
-                       unsigned int *sse, int *sum) {
-  variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
-}
-
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
-                            int  source_stride,
-                            const uint8_t *ref_ptr,
-                            int  recon_stride,
+unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride,
+                            const uint8_t *ref, int ref_stride,
                             unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
-  *sse = var;
-  return var;
+  int sum;
+  variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum);
+  return *sse;
 }
 
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
-                           int  source_stride,
-                           const uint8_t *ref_ptr,
-                           int  recon_stride,
+unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride,
+                           const uint8_t *ref, int ref_stride,
                            unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
-  *sse = var;
-  return var;
+  int sum;
+  variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum);
+  return *sse;
 }
 
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
-                           int  source_stride,
-                           const uint8_t *ref_ptr,
-                           int  recon_stride,
+unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride,
+                           const uint8_t *ref, int ref_stride,
                            unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
-  *sse = var;
-  return var;
+  int sum;
+  variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum);
+  return *sse;
 }
 
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
-                          int  source_stride,
-                          const uint8_t *ref_ptr,
-                          int  recon_stride,
+unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride,
+                          const uint8_t *ref, int ref_stride,
                           unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
-  *sse = var;
-  return var;
+  int sum;
+  variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum);
+  return *sse;
 }
 
 VAR(4, 4)
diff --git a/vp9/encoder/x86/vp9_dct_mmx.c b/vp9/encoder/x86/vp9_dct_mmx.c
new file mode 100644
index 0000000..4524ba4
--- /dev/null
+++ b/vp9/encoder/x86/vp9_dct_mmx.c
@@ -0,0 +1,67 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <mmintrin.h>
+#include <stdint.h>
+
+#include "./vpx_config.h"
+
+static void INLINE transpose_4x4_mmx(__m64* a, __m64* b, __m64* c, __m64* d) {
+  __m64 w, x, y, z;
+  w = _mm_unpacklo_pi16(*a, *b);
+  x = _mm_unpackhi_pi16(*a, *b);
+  y = _mm_unpacklo_pi16(*c, *d);
+  z = _mm_unpackhi_pi16(*c, *d);
+  *a = _mm_unpacklo_pi32(w, y);
+  *b = _mm_unpackhi_pi32(w, y);
+  *c = _mm_unpacklo_pi32(x, z);
+  *d = _mm_unpackhi_pi32(x, z);
+}
+
+static void INLINE fwht_4x4_cols(__m64* out0,
+                                 __m64* out1,
+                                 __m64* out2,
+                                 __m64* out3,
+                                 __m64 a1,
+                                 __m64 b1,
+                                 __m64 c1,
+                                 __m64 d1) {
+  __m64 e1;
+
+  a1 = _mm_add_pi16(a1, b1);
+  d1 = _mm_sub_pi16(d1, c1);
+  e1 = _mm_sub_pi16(a1, d1);
+  e1 = _mm_srai_pi16(e1, 1);
+  b1 = _mm_sub_pi16(e1, b1);
+  c1 = _mm_sub_pi16(e1, c1);
+  a1 = _mm_sub_pi16(a1, c1);
+  d1 = _mm_add_pi16(d1, b1);
+  *out0 = a1;
+  *out1 = c1;
+  *out2 = d1;
+  *out3 = b1;
+}
+
+void vp9_fwht4x4_mmx(const int16_t* input, int16_t* output, int stride) {
+  __m64 a1 = *(const __m64*)input;
+  __m64 b1 = *(const __m64*)(input + stride);
+  __m64 c1 = *(const __m64*)(input + 2 * stride);
+  __m64 d1 = *(const __m64*)(input + 3 * stride);
+
+  fwht_4x4_cols(&a1, &b1, &c1, &d1, a1, b1, c1, d1);
+  transpose_4x4_mmx(&a1, &b1, &c1, &d1);
+  fwht_4x4_cols(&a1, &b1, &c1, &d1, a1, b1, c1, d1);
+  transpose_4x4_mmx(&a1, &b1, &c1, &d1);
+
+  *(__m64*)output = _mm_slli_pi16(a1, 2);
+  *(__m64*)(output + 4) = _mm_slli_pi16(b1, 2);
+  *(__m64*)(output + 8) = _mm_slli_pi16(c1, 2);
+  *(__m64*)(output + 12) = _mm_slli_pi16(d1, 2);
+}
diff --git a/vp9/encoder/x86/vp9_dct_ssse3.asm b/vp9/encoder/x86/vp9_dct_ssse3.asm
index 1400071..8723a71 100644
--- a/vp9/encoder/x86/vp9_dct_ssse3.asm
+++ b/vp9/encoder/x86/vp9_dct_ssse3.asm
@@ -10,7 +10,7 @@
 %include "third_party/x86inc/x86inc.asm"
 
 ; This file provides SSSE3 version of the forward transformation. Part
-; of the macro definitions are originally derived from ffmpeg project.
+; of the macro definitions are originally derived from the ffmpeg project.
 ; The current version applies to x86 64-bit only.
 
 SECTION_RODATA
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index b1ba0b1..eaff60a 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -120,6 +120,10 @@
 
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
 
+ifeq ($(ARCH_X86_64), yes)
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3.asm
+endif
+
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_neon.c
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_16_neon.c
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 5e88793..154fdae 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -92,6 +92,7 @@
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_mmx.c
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_impl_mmx.asm
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index 76aacd2..38c2d26 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -1000,8 +1000,10 @@
               (int)si->frame_size, (int)pts);
     }
   }
-  ++si->frame_within_gop;
-  ++si->encode_frame_count;
+  if (rawimg != NULL) {
+    ++si->frame_within_gop;
+    ++si->encode_frame_count;
+  }
 
   return VPX_CODEC_OK;
 }
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c
index 542ff67..fa0e030 100644
--- a/vpx_ports/arm_cpudetect.c
+++ b/vpx_ports/arm_cpudetect.c
@@ -12,6 +12,13 @@
 #include <string.h>
 #include "arm.h"
 
+#ifdef WINAPI_FAMILY
+#include <winapifamily.h>
+#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#define getenv(x) NULL
+#endif
+#endif
+
 static int arm_cpu_env_flags(int *flags) {
   char *env;
   env = getenv("VPX_SIMD_CAPS");