Merge "configure.sh: Remove armv6-darwin target."
diff --git a/vp10/encoder/lookahead.c b/vp10/encoder/lookahead.c
index dce0139..3185cb6 100644
--- a/vp10/encoder/lookahead.c
+++ b/vp10/encoder/lookahead.c
@@ -20,8 +20,8 @@
 
 /* Return the buffer at the given absolute index and increment the index */
 static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
-                                   unsigned int *idx) {
-  unsigned int index = *idx;
+                                   int *idx) {
+  int index = *idx;
   struct lookahead_entry *buf = ctx->buf + index;
 
   assert(index < ctx->max_sz);
@@ -35,7 +35,7 @@
 void vp10_lookahead_destroy(struct lookahead_ctx *ctx) {
   if (ctx) {
     if (ctx->buf) {
-      unsigned int i;
+      int i;
 
       for (i = 0; i < ctx->max_sz; i++)
         vpx_free_frame_buffer(&ctx->buf[i].img);
@@ -221,9 +221,9 @@
 
   if (index >= 0) {
     // Forward peek
-    if (index < (int)ctx->sz) {
+    if (index < ctx->sz) {
       index += ctx->read_idx;
-      if (index >= (int)ctx->max_sz)
+      if (index >= ctx->max_sz)
         index -= ctx->max_sz;
       buf = ctx->buf + index;
     }
diff --git a/vp10/encoder/lookahead.h b/vp10/encoder/lookahead.h
index 22429ae..f650f80 100644
--- a/vp10/encoder/lookahead.h
+++ b/vp10/encoder/lookahead.h
@@ -31,10 +31,10 @@
 #define MAX_PRE_FRAMES 1
 
 struct lookahead_ctx {
-  unsigned int max_sz;         /* Absolute size of the queue */
-  unsigned int sz;             /* Number of buffers currently in the queue */
-  unsigned int read_idx;       /* Read index */
-  unsigned int write_idx;      /* Write index */
+  int max_sz;                  /* Absolute size of the queue */
+  int sz;                      /* Number of buffers currently in the queue */
+  int read_idx;                /* Read index */
+  int write_idx;               /* Write index */
   struct lookahead_entry *buf; /* Buffer list */
 };
 
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index def9b8c..441280c 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -20,8 +20,8 @@
 
 /* Return the buffer at the given absolute index and increment the index */
 static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
-                                   unsigned int *idx) {
-  unsigned int index = *idx;
+                                   int *idx) {
+  int index = *idx;
   struct lookahead_entry *buf = ctx->buf + index;
 
   assert(index < ctx->max_sz);
@@ -35,7 +35,7 @@
 void vp9_lookahead_destroy(struct lookahead_ctx *ctx) {
   if (ctx) {
     if (ctx->buf) {
-      unsigned int i;
+      int i;
 
       for (i = 0; i < ctx->max_sz; i++)
         vpx_free_frame_buffer(&ctx->buf[i].img);
@@ -221,9 +221,9 @@
 
   if (index >= 0) {
     // Forward peek
-    if (index < (int)ctx->sz) {
+    if (index < ctx->sz) {
       index += ctx->read_idx;
-      if (index >= (int)ctx->max_sz)
+      if (index >= ctx->max_sz)
         index -= ctx->max_sz;
       buf = ctx->buf + index;
     }
diff --git a/vp9/encoder/vp9_lookahead.h b/vp9/encoder/vp9_lookahead.h
index 1382038..db0fd1c 100644
--- a/vp9/encoder/vp9_lookahead.h
+++ b/vp9/encoder/vp9_lookahead.h
@@ -36,10 +36,10 @@
 #define MAX_PRE_FRAMES 1
 
 struct lookahead_ctx {
-  unsigned int max_sz;         /* Absolute size of the queue */
-  unsigned int sz;             /* Number of buffers currently in the queue */
-  unsigned int read_idx;       /* Read index */
-  unsigned int write_idx;      /* Write index */
+  int max_sz;                  /* Absolute size of the queue */
+  int sz;                      /* Number of buffers currently in the queue */
+  int read_idx;                /* Read index */
+  int write_idx;               /* Write index */
   struct lookahead_entry *buf; /* Buffer list */
 };
 
diff --git a/vp9/encoder/x86/vp9_denoiser_sse2.c b/vp9/encoder/x86/vp9_denoiser_sse2.c
index f4a149d..883507a 100644
--- a/vp9/encoder/x86/vp9_denoiser_sse2.c
+++ b/vp9/encoder/x86/vp9_denoiser_sse2.c
@@ -147,8 +147,9 @@
   const __m128i l32 = _mm_set1_epi8(2);
   // Difference between level 2 and level 1 is 1.
   const __m128i l21 = _mm_set1_epi8(1);
+  const int b_height = (4 << b_height_log2_lookup[bs]) >> 1;
 
-  for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> 1); ++r) {
+  for (r = 0; r < b_height; ++r) {
     memcpy(sig_buffer[r], sig, width);
     memcpy(sig_buffer[r] + width, sig + sig_stride, width);
     memcpy(mc_running_buffer[r], mc_running_avg_y, width);
@@ -188,8 +189,8 @@
       // Only apply the adjustment for max delta up to 3.
       if (delta < 4) {
         const __m128i k_delta = _mm_set1_epi8(delta);
-        running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]);
-        for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> 1); ++r) {
+        running_avg_y -= avg_y_stride * (b_height << 1);
+        for (r = 0; r < b_height; ++r) {
           acc_diff = vp9_denoiser_adj_16x1_sse2(
               sig_buffer[r], mc_running_buffer[r], running_buffer[r],
               k_0, k_delta, acc_diff);
@@ -235,38 +236,37 @@
   const __m128i l32 = _mm_set1_epi8(2);
   // Difference between level 2 and level 1 is 1.
   const __m128i l21 = _mm_set1_epi8(1);
+  const int b_width = (4 << b_width_log2_lookup[bs]);
+  const int b_height = (4 << b_height_log2_lookup[bs]);
+  const int b_width_shift4 = b_width >> 4;
 
-  for (c = 0; c < 4; ++c) {
-    for (r = 0; r < 4; ++r) {
+  for (r = 0; r < 4; ++r) {
+    for (c = 0; c < b_width_shift4; ++c) {
       acc_diff[c][r] = _mm_setzero_si128();
     }
   }
 
-  for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
-    for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
-      acc_diff[c>>4][r>>4] = vp9_denoiser_16x1_sse2(
+  for (r = 0; r < b_height; ++r) {
+    for (c = 0; c < b_width_shift4; ++c) {
+      acc_diff[c][r>>4] = vp9_denoiser_16x1_sse2(
           sig, mc_running_avg_y, running_avg_y, &k_0, &k_4,
-          &k_8, &k_16, &l3, &l32, &l21, acc_diff[c>>4][r>>4]);
+          &k_8, &k_16, &l3, &l32, &l21, acc_diff[c][r>>4]);
       // Update pointers for next iteration.
       sig += 16;
       mc_running_avg_y += 16;
       running_avg_y += 16;
     }
 
-    if ((r + 1) % 16 == 0 || (bs == BLOCK_16X8 && r == 7)) {
-      for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
-        sum_diff += sum_diff_16x1(acc_diff[c>>4][r>>4]);
+    if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) {
+      for (c = 0; c < b_width_shift4; ++c) {
+        sum_diff += sum_diff_16x1(acc_diff[c][r>>4]);
       }
     }
 
     // Update pointers for next iteration.
-    sig = sig - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + sig_stride;
-    mc_running_avg_y = mc_running_avg_y -
-                       16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
-                       mc_avg_y_stride;
-    running_avg_y = running_avg_y -
-                    16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
-                    avg_y_stride;
+    sig = sig - b_width + sig_stride;
+    mc_running_avg_y = mc_running_avg_y - b_width + mc_avg_y_stride;
+    running_avg_y = running_avg_y - b_width + avg_y_stride;
   }
 
   {
@@ -278,33 +278,29 @@
       // Only apply the adjustment for max delta up to 3.
       if (delta < 4) {
         const __m128i k_delta = _mm_set1_epi8(delta);
-        sig -= sig_stride * (4 << b_height_log2_lookup[bs]);
-        mc_running_avg_y -= mc_avg_y_stride * (4 << b_height_log2_lookup[bs]);
-        running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]);
+        sig -= sig_stride * b_height;
+        mc_running_avg_y -= mc_avg_y_stride * b_height;
+        running_avg_y -= avg_y_stride * b_height;
         sum_diff = 0;
-        for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
-          for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
-            acc_diff[c>>4][r>>4] = vp9_denoiser_adj_16x1_sse2(
+        for (r = 0; r < b_height; ++r) {
+          for (c = 0; c < b_width_shift4; ++c) {
+            acc_diff[c][r>>4] = vp9_denoiser_adj_16x1_sse2(
                 sig, mc_running_avg_y, running_avg_y, k_0,
-                k_delta, acc_diff[c>>4][r>>4]);
+                k_delta, acc_diff[c][r>>4]);
             // Update pointers for next iteration.
             sig += 16;
             mc_running_avg_y += 16;
             running_avg_y += 16;
           }
 
-          if ((r + 1) % 16 == 0 || (bs == BLOCK_16X8 && r == 7)) {
-            for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
-              sum_diff += sum_diff_16x1(acc_diff[c>>4][r>>4]);
+          if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) {
+            for (c = 0; c < b_width_shift4; ++c) {
+              sum_diff += sum_diff_16x1(acc_diff[c][r>>4]);
             }
           }
-          sig = sig - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + sig_stride;
-          mc_running_avg_y = mc_running_avg_y -
-                             16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
-                             mc_avg_y_stride;
-          running_avg_y = running_avg_y -
-                          16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
-                          avg_y_stride;
+          sig = sig - b_width + sig_stride;
+          mc_running_avg_y = mc_running_avg_y - b_width + mc_avg_y_stride;
+          running_avg_y = running_avg_y - b_width + avg_y_stride;
         }
         if (abs(sum_diff) > sum_diff_thresh) {
           return COPY_BLOCK;