Merge changes I7d6394e4,Ia8ce1464,If20e8637,Ia9adc46b,I651db25b into nextgenv2

* changes:
  Define SIMD_INLINE using AOM_FORCE_INLINE
  AOM_FORCE_INLINE: fix always_inline attribute
  Free memory allocated by daala_ec encoder.
  Move clpf_sse4_1.c to clpf_sse4.c in agreement with convention
  sync avg_test.cc with aom/master
diff --git a/aom/aom_integer.h b/aom/aom_integer.h
index c70e696..2e8f23f 100644
--- a/aom/aom_integer.h
+++ b/aom/aom_integer.h
@@ -19,7 +19,7 @@
 #define AOM_FORCE_INLINE __forceinline
 #define AOM_INLINE __inline
 #else
-#define AOM_FORCE_INLINE __inline__ __attribute__(always_inline)
+#define AOM_FORCE_INLINE __inline__ __attribute__((always_inline))
 // TODO(jbb): Allow a way to force inline off for older compilers.
 #define AOM_INLINE inline
 #endif
diff --git a/aom_dsp/aom_dsp.mk b/aom_dsp/aom_dsp.mk
index 238853f..c74bfe3 100644
--- a/aom_dsp/aom_dsp.mk
+++ b/aom_dsp/aom_dsp.mk
@@ -388,5 +388,15 @@
 DSP_SRCS-yes += aom_dsp_rtcd_defs.pl
 
 DSP_SRCS-yes += aom_simd.c
+DSP_SRCS-yes += aom_simd.h
+DSP_SRCS-yes += aom_simd_inline.h
+DSP_SRCS-yes += simd/v64_intrinsics.h
+DSP_SRCS-yes += simd/v64_intrinsics_c.h
+DSP_SRCS-yes += simd/v128_intrinsics.h
+DSP_SRCS-yes += simd/v128_intrinsics_c.h
+DSP_SRCS-$(HAVE_SSE2) += simd/v64_intrinsics_x86.h
+DSP_SRCS-$(HAVE_SSE2) += simd/v128_intrinsics_x86.h
+DSP_SRCS-$(HAVE_NEON) += simd/v64_intrinsics_arm.h
+DSP_SRCS-$(HAVE_NEON) += simd/v128_intrinsics_arm.h
 
 $(eval $(call rtcd_h_template,aom_dsp_rtcd,aom_dsp/aom_dsp_rtcd_defs.pl))
diff --git a/aom_dsp/aom_simd.h b/aom_dsp/aom_simd.h
index 7ffca4a..3879d95 100644
--- a/aom_dsp/aom_simd.h
+++ b/aom_dsp/aom_simd.h
@@ -9,20 +9,8 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
-#ifndef _AOM_SIMD_H
-#define _AOM_SIMD_H
-
-#ifndef SIMD_INLINE
-#ifdef __GNUC__
-#define SIMD_INLINE static inline __attribute__((always_inline))
-#elif __STDC_VERSION__ >= 199901L
-#define SIMD_INLINE static inline
-#elif defined(_MSC_VER)
-#define SIMD_INLINE static __inline
-#else
-#define SIMD_INLINE static
-#endif
-#endif
+#ifndef AOM_DSP_AOM_AOM_SIMD_H_
+#define AOM_DSP_AOM_AOM_SIMD_H_
 
 #include <stdint.h>
 
@@ -31,6 +19,7 @@
 #endif
 
 #include "./aom_config.h"
+#include "./aom_simd_inline.h"
 
 #if HAVE_NEON
 #include "simd/v128_intrinsics_arm.h"
@@ -40,4 +29,4 @@
 #include "simd/v128_intrinsics.h"
 #endif
 
-#endif /* _AOM_SIMD_H */
+#endif  // AOM_DSP_AOM_AOM_SIMD_H_
diff --git a/aom_dsp/aom_simd_inline.h b/aom_dsp/aom_simd_inline.h
new file mode 100644
index 0000000..02a8b3a
--- /dev/null
+++ b/aom_dsp/aom_simd_inline.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_DSP_AOM_SIMD_INLINE_H_
+#define AOM_DSP_AOM_SIMD_INLINE_H_
+
+#include "aom/aom_integer.h"
+
+#ifndef SIMD_INLINE
+#define SIMD_INLINE static AOM_FORCE_INLINE
+#endif
+
+#endif  // AOM_DSP_AOM_SIMD_INLINE_H_
diff --git a/aom_dsp/daalaboolwriter.c b/aom_dsp/daalaboolwriter.c
index 15a3af7..0ba8f6a 100644
--- a/aom_dsp/daalaboolwriter.c
+++ b/aom_dsp/daalaboolwriter.c
@@ -28,4 +28,5 @@
      Must always be added, so that rawbits knows the exact length of the
       bitstream. */
   br->buffer[br->pos++] = 0;
+  od_ec_enc_clear(&br->ec);
 }
diff --git a/av1/av1_common.mk b/av1/av1_common.mk
index 585e44e..38fb6fd 100644
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk
@@ -92,7 +92,7 @@
 AV1_COMMON_SRCS-yes += common/clpf_simd.h
 AV1_COMMON_SRCS-$(HAVE_SSE2) += common/clpf_sse2.c
 AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/clpf_ssse3.c
-AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/clpf_sse4_1.c
+AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/clpf_sse4.c
 AV1_COMMON_SRCS-$(HAVE_NEON) += common/clpf_neon.c
 endif
 ifeq ($(CONFIG_DERING),yes)
diff --git a/av1/common/clpf_sse4_1.c b/av1/common/clpf_sse4.c
similarity index 100%
rename from av1/common/clpf_sse4_1.c
rename to av1/common/clpf_sse4.c
diff --git a/test/avg_test.cc b/test/avg_test.cc
index cf6f89c..b7a707d 100644
--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -54,14 +54,14 @@
   }
 
   // Sum Pixels
-  unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) {
+  static unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) {
     unsigned int average = 0;
     for (int h = 0; h < 8; ++h)
       for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
     return ((average + 32) >> 6);
   }
 
-  unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) {
+  static unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) {
     unsigned int average = 0;
     for (int h = 0; h < 4; ++h)
       for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
@@ -97,11 +97,12 @@
 
  protected:
   void CheckAverages() {
+    const int block_size = GET_PARAM(3);
     unsigned int expected = 0;
-    if (GET_PARAM(3) == 8) {
+    if (block_size == 8) {
       expected =
           ReferenceAverage8x8(source_data_ + GET_PARAM(2), source_stride_);
-    } else if (GET_PARAM(3) == 4) {
+    } else if (block_size == 4) {
       expected =
           ReferenceAverage4x4(source_data_ + GET_PARAM(2), source_stride_);
     }