Fix a number of typos and improve formatting

Change-Id: I79c1b3171823376c4f6dcc72fe829e00b7bd46b4
diff --git a/aom/aom_codec.mk b/aom/aom_codec.mk
index f55eb31..33bd3fe 100644
--- a/aom/aom_codec.mk
+++ b/aom/aom_codec.mk
@@ -10,10 +10,9 @@
 ##
 
 
-
 API_EXPORTS += exports
 
-API_SRCS-$(CONFIG_V10_ENCODER) += aom.h
+API_SRCS-$(CONFIG_AV1_ENCODER) += aom.h
 API_SRCS-$(CONFIG_AV1_ENCODER) += aomcx.h
 API_DOC_SRCS-$(CONFIG_AV1_ENCODER) += aom.h
 API_DOC_SRCS-$(CONFIG_AV1_ENCODER) += aomcx.h
diff --git a/aom/aom_decoder.h b/aom/aom_decoder.h
index 9d94a53..e6f0504 100644
--- a/aom/aom_decoder.h
+++ b/aom/aom_decoder.h
@@ -55,10 +55,10 @@
 #define AOM_CODEC_CAP_PUT_SLICE 0x10000 /**< Will issue put_slice callbacks */
 #define AOM_CODEC_CAP_PUT_FRAME 0x20000 /**< Will issue put_frame callbacks */
 #define AOM_CODEC_CAP_POSTPROC 0x40000  /**< Can postprocess decoded frame */
+/*!\brief Can conceal errors due to packet loss */
 #define AOM_CODEC_CAP_ERROR_CONCEALMENT 0x80000
-/**< Can conceal errors due to packet loss */
+/*!\brief Can receive encoded frames one fragment at a time */
 #define AOM_CODEC_CAP_INPUT_FRAGMENTS 0x100000
-/**< Can receive encoded frames one fragment at a time */
 
 /*! \brief Initialization-time Feature Enabling
  *
@@ -67,18 +67,19 @@
  *
  *  The available flags are specified by AOM_CODEC_USE_* defines.
  */
+/*!\brief Can support frame-based multi-threading */
 #define AOM_CODEC_CAP_FRAME_THREADING 0x200000
-/**< Can support frame-based multi-threading */
+/*!brief Can support external frame buffers */
 #define AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000
-/**< Can support external frame buffers */
 
 #define AOM_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */
+/*!\brief Conceal errors in decoded frames */
 #define AOM_CODEC_USE_ERROR_CONCEALMENT 0x20000
-/**< Conceal errors in decoded frames */
+/*!\brief The input frame should be passed to the decoder one fragment at a
+ * time */
 #define AOM_CODEC_USE_INPUT_FRAGMENTS 0x40000
-/**< The input frame should be passed to the decoder one fragment at a time */
+/*!\brief Enable frame-based multi-threading */
 #define AOM_CODEC_USE_FRAME_THREADING 0x80000
-/**< Enable frame-based multi-threading */
 
 /*!\brief Stream properties
  *
diff --git a/aom/aom_image.h b/aom/aom_image.h
index 1a3d111..b2f75e6 100644
--- a/aom/aom_image.h
+++ b/aom/aom_image.h
@@ -119,11 +119,10 @@
 
   int bps; /**< bits per sample (for packed formats) */
 
-  /* The following member may be set by the application to associate data
-   * with this image.
+  /*!\brief The following member may be set by the application to associate
+   * data with this image.
    */
-  void *user_priv; /**< may be set by the application to associate data
-                    *   with this image. */
+  void *user_priv;
 
   /* The following members should be treated as private. */
   unsigned char *img_data; /**< private */
diff --git a/aom_dsp/fastssim.c b/aom_dsp/fastssim.c
index 8d5d237..057101e 100644
--- a/aom_dsp/fastssim.c
+++ b/aom_dsp/fastssim.c
@@ -19,6 +19,7 @@
 #include "./aom_dsp_rtcd.h"
 #include "aom_dsp/ssim.h"
 #include "aom_ports/system_state.h"
+
 typedef struct fs_level fs_level;
 typedef struct fs_ctx fs_ctx;
 
diff --git a/av1/encoder/x86/quantize_ssse3_x86_64.asm b/av1/encoder/x86/quantize_ssse3_x86_64.asm
index 3a40a44..ad4ae27 100644
--- a/av1/encoder/x86/quantize_ssse3_x86_64.asm
+++ b/av1/encoder/x86/quantize_ssse3_x86_64.asm
@@ -174,7 +174,7 @@
   pshuflw                         m7, m8, 0x1
   pmaxsw                          m8, m7
   pextrw                          r6, m8, 0
-  mov                           [r2], r6w
+  mov                           [r2], r6
   RET
 
   ; skip-block, i.e. just write all zeroes
diff --git a/build/make/ads2armasm_ms.pl b/build/make/ads2armasm_ms.pl
index 576fdc5..8568a2d 100755
--- a/build/make/ads2armasm_ms.pl
+++ b/build/make/ads2armasm_ms.pl
@@ -9,10 +9,6 @@
 ## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ##
 
-#
-
-##
-
 use FindBin;
 use lib $FindBin::Bin;
 use thumb;
diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl
index abc3d57..adf45a3 100755
--- a/build/make/ads2gas.pl
+++ b/build/make/ads2gas.pl
@@ -9,10 +9,6 @@
 ## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ##
 
-#
-
-##
-
 
 # ads2gas.pl
 # Author: Eric Fung (efung (at) acm.org)
diff --git a/build/make/ads2gas_apple.pl b/build/make/ads2gas_apple.pl
index 591d379..31ec91d 100755
--- a/build/make/ads2gas_apple.pl
+++ b/build/make/ads2gas_apple.pl
@@ -9,10 +9,6 @@
 ## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ##
 
-#
-
-##
-
 
 # ads2gas_apple.pl
 # Author: Eric Fung (efung (at) acm.org)
diff --git a/examples/aom_cx_set_ref.c b/examples/aom_cx_set_ref.c
index d2398ae..74fdec0 100644
--- a/examples/aom_cx_set_ref.c
+++ b/examples/aom_cx_set_ref.c
@@ -25,7 +25,7 @@
 // -----
 // This example encodes a raw video. And the last argument passed in specifies
 // the frame number to update the reference frame on. For example, run
-// examples/aom_cx_set_ref vp10 352 288 in.yuv out.ivf 4 30
+// examples/aom_cx_set_ref av1 352 288 in.yuv out.ivf 4 30
 // The parameter is parsed as follows:
 //
 //
diff --git a/third_party/libyuv/source/compare_win.cc b/third_party/libyuv/source/compare_win.cc
index 19806f2..8d7c8b0 100644
--- a/third_party/libyuv/source/compare_win.cc
+++ b/third_party/libyuv/source/compare_win.cc
@@ -64,9 +64,9 @@
 __declspec(naked)
 uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
   __asm {
-    mov        eax, [esp + 4]    // src_a
-    mov        edx, [esp + 8]    // src_b
-    mov        ecx, [esp + 12]   // count
+    mov        eax, [esp + 4]  // src_a
+    mov        edx, [esp + 8]  // src_b
+    mov        ecx, [esp + 12]  // count
     vpxor      ymm0, ymm0, ymm0  // sum
     vpxor      ymm5, ymm5, ymm5  // constant 0 for unpck
     sub        edx, eax
diff --git a/third_party/libyuv/source/row_win.cc b/third_party/libyuv/source/row_win.cc
index 71be268..0c0037d 100644
--- a/third_party/libyuv/source/row_win.cc
+++ b/third_party/libyuv/source/row_win.cc
@@ -4851,23 +4851,23 @@
                           uint8* dst_argb, int width) {
   __asm {
     push       esi
-    mov        eax, [esp + 4 + 4]   // src_argb0
-    mov        esi, [esp + 4 + 8]   // src_argb1
+    mov        eax, [esp + 4 + 4]  // src_argb0
+    mov        esi, [esp + 4 + 8]  // src_argb1
     mov        edx, [esp + 4 + 12]  // dst_argb
     mov        ecx, [esp + 4 + 16]  // width
     vpxor      ymm5, ymm5, ymm5     // constant 0
 
  convertloop:
-    vmovdqu    ymm1, [eax]        // read 8 pixels from src_argb0
+    vmovdqu    ymm1, [eax]  // read 8 pixels from src_argb0
     lea        eax, [eax + 32]
-    vmovdqu    ymm3, [esi]        // read 8 pixels from src_argb1
+    vmovdqu    ymm3, [esi]  // read 8 pixels from src_argb1
     lea        esi, [esi + 32]
-    vpunpcklbw ymm0, ymm1, ymm1   // low 4
-    vpunpckhbw ymm1, ymm1, ymm1   // high 4
-    vpunpcklbw ymm2, ymm3, ymm5   // low 4
-    vpunpckhbw ymm3, ymm3, ymm5   // high 4
-    vpmulhuw   ymm0, ymm0, ymm2   // src_argb0 * src_argb1 low 4
-    vpmulhuw   ymm1, ymm1, ymm3   // src_argb0 * src_argb1 high 4
+    vpunpcklbw ymm0, ymm1, ymm1  // low 4
+    vpunpckhbw ymm1, ymm1, ymm1  // high 4
+    vpunpcklbw ymm2, ymm3, ymm5  // low 4
+    vpunpckhbw ymm3, ymm3, ymm5  // high 4
+    vpmulhuw   ymm0, ymm0, ymm2  // src_argb0 * src_argb1 low 4
+    vpmulhuw   ymm1, ymm1, ymm3  // src_argb0 * src_argb1 high 4
     vpackuswb  ymm0, ymm0, ymm1
     vmovdqu    [edx], ymm0
     lea        edx, [edx + 32]
@@ -5512,8 +5512,8 @@
   __asm {
     push       esi
     push       edi
-    mov        edi, [esp + 8 + 4]   // dst_ptr
-    mov        esi, [esp + 8 + 8]   // src_ptr
+    mov        edi, [esp + 8 + 4]  // dst_ptr
+    mov        esi, [esp + 8 + 8]  // src_ptr
     mov        edx, [esp + 8 + 12]  // src_stride
     mov        ecx, [esp + 8 + 16]  // dst_width
     mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
@@ -5523,11 +5523,11 @@
     je         xloop100  // 0 / 128.  Blend 100 / 0.
     sub        edi, esi
     cmp        eax, 32
-    je         xloop75   // 32 / 128 is 0.25.  Blend 75 / 25.
+    je         xloop75  // 32 / 128 is 0.25.  Blend 75 / 25.
     cmp        eax, 64
-    je         xloop50   // 64 / 128 is 0.50.  Blend 50 / 50.
+    je         xloop50  // 64 / 128 is 0.50.  Blend 50 / 50.
     cmp        eax, 96
-    je         xloop25   // 96 / 128 is 0.75.  Blend 25 / 75.
+    je         xloop25  // 96 / 128 is 0.75.  Blend 25 / 75.
 
     vmovd      xmm0, eax  // high fraction 0..127
     neg        eax
@@ -5547,14 +5547,14 @@
     vpmaddubsw ymm1, ymm1, ymm5
     vpsrlw     ymm0, ymm0, 7
     vpsrlw     ymm1, ymm1, 7
-    vpackuswb  ymm0, ymm0, ymm1  // unmutates
+    vpackuswb  ymm0, ymm0, ymm1            // unmutates
     vmovdqu    [esi + edi], ymm0
     lea        esi, [esi + 32]
     sub        ecx, 32
     jg         xloop
     jmp        xloop99
 
-   // Blend 25 / 75.
+    // Blend 25 / 75.
  xloop25:
    vmovdqu    ymm0, [esi]
    vmovdqu    ymm1, [esi + edx]
@@ -5566,7 +5566,7 @@
    jg         xloop25
    jmp        xloop99
 
-   // Blend 50 / 50.
+    // Blend 50 / 50.
  xloop50:
    vmovdqu    ymm0, [esi]
    vpavgb     ymm0, ymm0, [esi + edx]
@@ -5576,7 +5576,7 @@
    jg         xloop50
    jmp        xloop99
 
-   // Blend 75 / 25.
+    // Blend 75 / 25.
  xloop75:
    vmovdqu    ymm1, [esi]
    vmovdqu    ymm0, [esi + edx]
@@ -5588,7 +5588,7 @@
    jg         xloop75
    jmp        xloop99
 
-   // Blend 100 / 0 - Copy row unchanged.
+    // Blend 100 / 0 - Copy row unchanged.
  xloop100:
    rep movsb
 
diff --git a/third_party/libyuv/source/scale_win.cc b/third_party/libyuv/source/scale_win.cc
index c3896eb..4e931d8 100644
--- a/third_party/libyuv/source/scale_win.cc
+++ b/third_party/libyuv/source/scale_win.cc
@@ -232,12 +232,12 @@
 void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
                               uint8* dst_ptr, int dst_width) {
   __asm {
-    mov         eax, [esp + 4]        // src_ptr
-                                      // src_stride
-    mov         edx, [esp + 12]       // dst_ptr
-    mov         ecx, [esp + 16]       // dst_width
+    mov         eax, [esp + 4]  // src_ptr
+    // src_stride
+    mov         edx, [esp + 12]  // dst_ptr
+    mov         ecx, [esp + 16]  // dst_width
 
-    vpcmpeqb    ymm4, ymm4, ymm4      // '1' constant, 8b
+    vpcmpeqb    ymm4, ymm4, ymm4  // '1' constant, 8b
     vpsrlw      ymm4, ymm4, 15
     vpackuswb   ymm4, ymm4, ymm4
     vpxor       ymm5, ymm5, ymm5      // constant 0
@@ -247,12 +247,12 @@
     vmovdqu     ymm1, [eax + 32]
     lea         eax,  [eax + 64]
 
-    vpmaddubsw  ymm0, ymm0, ymm4      // average horizontally
+    vpmaddubsw  ymm0, ymm0, ymm4  // average horizontally
     vpmaddubsw  ymm1, ymm1, ymm4
-    vpavgw      ymm0, ymm0, ymm5      // (x + 1) / 2
+    vpavgw      ymm0, ymm0, ymm5  // (x + 1) / 2
     vpavgw      ymm1, ymm1, ymm5
     vpackuswb   ymm0, ymm0, ymm1
-    vpermq      ymm0, ymm0, 0xd8      // unmutate vpackuswb
+    vpermq      ymm0, ymm0, 0xd8       // unmutate vpackuswb
 
     vmovdqu     [edx], ymm0
     lea         edx, [edx + 32]
@@ -270,29 +270,29 @@
                            uint8* dst_ptr, int dst_width) {
   __asm {
     push        esi
-    mov         eax, [esp + 4 + 4]    // src_ptr
-    mov         esi, [esp + 4 + 8]    // src_stride
-    mov         edx, [esp + 4 + 12]   // dst_ptr
-    mov         ecx, [esp + 4 + 16]   // dst_width
+    mov         eax, [esp + 4 + 4]  // src_ptr
+    mov         esi, [esp + 4 + 8]  // src_stride
+    mov         edx, [esp + 4 + 12]  // dst_ptr
+    mov         ecx, [esp + 4 + 16]  // dst_width
 
-    vpcmpeqb    ymm4, ymm4, ymm4      // '1' constant, 8b
+    vpcmpeqb    ymm4, ymm4, ymm4  // '1' constant, 8b
     vpsrlw      ymm4, ymm4, 15
     vpackuswb   ymm4, ymm4, ymm4
     vpxor       ymm5, ymm5, ymm5      // constant 0
 
   wloop:
-    vmovdqu     ymm0, [eax]           // average rows
+    vmovdqu     ymm0, [eax]  // average rows
     vmovdqu     ymm1, [eax + 32]
     vpavgb      ymm0, ymm0, [eax + esi]
     vpavgb      ymm1, ymm1, [eax + esi + 32]
     lea         eax,  [eax + 64]
 
-    vpmaddubsw  ymm0, ymm0, ymm4      // average horizontally
+    vpmaddubsw  ymm0, ymm0, ymm4  // average horizontally
     vpmaddubsw  ymm1, ymm1, ymm4
-    vpavgw      ymm0, ymm0, ymm5      // (x + 1) / 2
+    vpavgw      ymm0, ymm0, ymm5  // (x + 1) / 2
     vpavgw      ymm1, ymm1, ymm5
     vpackuswb   ymm0, ymm0, ymm1
-    vpermq      ymm0, ymm0, 0xd8      // unmutate vpackuswb
+    vpermq      ymm0, ymm0, 0xd8  // unmutate vpackuswb
 
     vmovdqu     [edx], ymm0
     lea         edx, [edx + 32]
@@ -831,21 +831,21 @@
 __declspec(naked)
 void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
   __asm {
-    mov         eax, [esp + 4]   // src_ptr
-    mov         edx, [esp + 8]   // dst_ptr
+    mov         eax, [esp + 4]  // src_ptr
+    mov         edx, [esp + 8]  // dst_ptr
     mov         ecx, [esp + 12]  // src_width
     vpxor       ymm5, ymm5, ymm5
 
-  // sum rows
+    // sum rows
   xloop:
-    vmovdqu     ymm3, [eax]       // read 32 bytes
+    vmovdqu     ymm3, [eax]  // read 32 bytes
     lea         eax, [eax + 32]
     vpermq      ymm3, ymm3, 0xd8  // unmutate for vpunpck
     vpunpcklbw  ymm2, ymm3, ymm5
     vpunpckhbw  ymm3, ymm3, ymm5
-    vpaddusw    ymm0, ymm2, [edx] // sum 16 words
+    vpaddusw    ymm0, ymm2, [edx]  // sum 16 words
     vpaddusw    ymm1, ymm3, [edx + 32]
-    vmovdqu     [edx], ymm0       // write 32 words to destination
+    vmovdqu     [edx], ymm0  // write 32 words to destination
     vmovdqu     [edx + 32], ymm1
     lea         edx, [edx + 64]
     sub         ecx, 32