Port dering experiment from aom

Mannually cherry-picked:
1579133 Use OD_DIVU for small divisions in temporal_filter.
0312229 Replace divides by small values with multiplies.
9c48eec Removing divisions from od_dir_find8()
0950ed8 Merge "Port active map / cyclic refresh fixes to vp10."
efefdad Port active map / cyclic refresh fixes to vp10.
1eaf748 Port switch to 9-bit rate cost to aom.
0b1606e Only build deringing code when --enable-dering.
e2511e1 Deringing cleanup: don't hardcode the number of levels
8fe5c5d Rename dering_in to od_dering_in to sync with Daala
4eb1380 Makes second filters for 45-degree directions horizontal
7f4c3f5 Removes the superblock variance contribution to the threshold
3dc56f9 Simplifying arithmetic by using multiply+shift
cf2aaba Return 0 explicitly for OD_ILOG(0).
49ca22a Use the Daala implementation of OD_ILOG().
8518724 Fix compiler warning in od_dering.c.
485d6a6 Prevent multiple inclusion of odintrin.h.
51b7a99 Adds the Daala deringing filter as experimental

Note that a few of the changes were already in libvpx codebse.

Change-Id: I1c32ee7694e5ad22c98b06ff97737cd792cd88ae
diff --git a/configure b/configure
index f3fcd34..633e3ed 100755
--- a/configure
+++ b/configure
@@ -253,6 +253,7 @@
     fp_mb_stats
     emulate_hardware
     clpf
+    dering
     var_tx
     rect_tx
     ref_mv
diff --git a/test/acm_random.h b/test/acm_random.h
index c2f6b0e..eae2cf9 100644
--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -24,6 +24,11 @@
   explicit ACMRandom(int seed) : random_(seed) {}
 
   void Reset(int seed) { random_.Reseed(seed); }
+
+  uint32_t Rand31(void) {
+    return random_.Generate(testing::internal::Random::kMaxRange);
+  }
+
   uint16_t Rand16(void) {
     const uint32_t value =
         random_.Generate(testing::internal::Random::kMaxRange);
diff --git a/test/divu_small_test.cc b/test/divu_small_test.cc
new file mode 100644
index 0000000..a7cfbf7
--- /dev/null
+++ b/test/divu_small_test.cc
@@ -0,0 +1,52 @@
+/*Daala video codec
+Copyright (c) 2013 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#include <stdlib.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "vp10/common/odintrin.h"
+
+using libvpx_test::ACMRandom;
+
+TEST(Daala, TestDIVUuptoMAX) {
+  for (int d = 1; d <= OD_DIVU_DMAX; d++) {
+    for (uint32_t x = 1; x <= 1000000; x++) {
+      GTEST_ASSERT_EQ(x/d, OD_DIVU_SMALL(x, d)) << "x=" << x << " d=" << d <<
+       " x/d=" << (x/d) << " != " << OD_DIVU_SMALL(x, d);
+    }
+  }
+}
+
+TEST(Daala, TestDIVUrandI31) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  for (int d = 1; d < OD_DIVU_DMAX; d++) {
+    for (int i = 0; i < 1000000; i++) {
+      uint32_t x = rnd.Rand31();
+      GTEST_ASSERT_EQ(x/d, OD_DIVU_SMALL(x, d)) << "x=" << x << " d=" << d <<
+       " x/d=" << (x/d) << " != " << OD_DIVU_SMALL(x, d);
+    }
+  }
+}
diff --git a/test/test.mk b/test/test.mk
index bc8424c..feb3f49 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -102,6 +102,7 @@
 LIBVPX_TEST_SRCS-yes                   += superframe_test.cc
 LIBVPX_TEST_SRCS-yes                   += tile_independence_test.cc
 LIBVPX_TEST_SRCS-yes                   += boolcoder_test.cc
+LIBVPX_TEST_SRCS-yes                   += divu_small_test.cc
 #LIBVPX_TEST_SRCS-yes                   += encoder_parms_get_to_decoder.cc
 endif
 
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 9c04812..6abc290 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -239,6 +239,8 @@
   int dq_off_index;
   int send_dq_bit;
 #endif  // CONFIG_NEW_QUANT
+  /* deringing gain *per-superblock* */
+  int8_t dering_gain;
 } MB_MODE_INFO;
 
 typedef struct MODE_INFO {
diff --git a/vp10/common/dering.c b/vp10/common/dering.c
new file mode 100644
index 0000000..f657c83
--- /dev/null
+++ b/vp10/common/dering.c
@@ -0,0 +1,153 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string.h>
+#include <math.h>
+
+#include "./vpx_scale_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vp10/common/dering.h"
+#include "vp10/common/onyxc_int.h"
+#include "vp10/common/reconinter.h"
+#include "vp10/common/od_dering.h"
+
+
+int compute_level_from_index(int global_level, int gi) {
+  static const int dering_gains[DERING_REFINEMENT_LEVELS] = {0, 11, 16, 22};
+  int level;
+  if (global_level == 0) return 0;
+  level = (global_level*dering_gains[gi] + 8) >> 4;
+  return clamp(level, gi, MAX_DERING_LEVEL-1);
+}
+
+int sb_all_skip(const VP10_COMMON *const cm, int mi_row, int mi_col) {
+  int r, c;
+  int maxc, maxr;
+  int skip = 1;
+  maxc = cm->mi_cols - mi_col;
+  maxr = cm->mi_rows - mi_row;
+  if (maxr > MI_BLOCK_SIZE) maxr = MI_BLOCK_SIZE;
+  if (maxc > MI_BLOCK_SIZE) maxc = MI_BLOCK_SIZE;
+  for (r = 0; r < maxr; r++) {
+    for (c = 0; c < maxc; c++) {
+      skip = skip &&
+          cm->mi_grid_visible[(mi_row + r)*cm->mi_stride + mi_col + c]->
+          mbmi.skip;
+    }
+  }
+  return skip;
+}
+
+void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+                       MACROBLOCKD *xd, int global_level) {
+  int r, c;
+  int sbr, sbc;
+  int nhsb, nvsb;
+  od_dering_in *src[3];
+  unsigned char *bskip;
+  int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = {{0}};
+  int stride;
+  int bsize[3];
+  int dec[3];
+  int pli;
+  int coeff_shift = VPXMAX(cm->bit_depth - 8, 0);
+  nvsb = (cm->mi_rows + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE;
+  nhsb = (cm->mi_cols + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE;
+  bskip = vpx_malloc(sizeof(*bskip)*cm->mi_rows*cm->mi_cols);
+  vp10_setup_dst_planes(xd->plane, frame, 0, 0);
+  for (pli = 0; pli < 3; pli++) {
+    dec[pli] = xd->plane[pli].subsampling_x;
+    bsize[pli] = 8 >> dec[pli];
+  }
+  stride = bsize[0]*cm->mi_cols;
+  for (pli = 0; pli < 3; pli++) {
+    src[pli] = vpx_malloc(sizeof(*src)*cm->mi_rows*cm->mi_cols*64);
+    for (r = 0; r < bsize[pli]*cm->mi_rows; ++r) {
+      for (c = 0; c < bsize[pli]*cm->mi_cols; ++c) {
+#if CONFIG_VPX_HIGHBITDEPTH
+        if (cm->use_highbitdepth) {
+          src[pli][r * stride + c] =
+              CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
+              [r * xd->plane[pli].dst.stride + c];
+        } else {
+#endif
+          src[pli][r * stride + c] =
+              xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
+#if CONFIG_VPX_HIGHBITDEPTH
+        }
+#endif
+      }
+    }
+  }
+  for (r = 0; r < cm->mi_rows; ++r) {
+    for (c = 0; c < cm->mi_cols; ++c) {
+      const MB_MODE_INFO *mbmi =
+          &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi;
+      bskip[r * cm->mi_cols + c] = mbmi->skip;
+    }
+  }
+  for (sbr = 0; sbr < nvsb; sbr++) {
+    for (sbc = 0; sbc < nhsb; sbc++) {
+      int level;
+      int nhb, nvb;
+      nhb = VPXMIN(MI_BLOCK_SIZE, cm->mi_cols - MI_BLOCK_SIZE*sbc);
+      nvb = VPXMIN(MI_BLOCK_SIZE, cm->mi_rows - MI_BLOCK_SIZE*sbr);
+      for (pli = 0; pli < 3; pli++) {
+        int16_t dst[MI_BLOCK_SIZE*MI_BLOCK_SIZE*8*8];
+        int threshold;
+#if DERING_REFINEMENT
+        level = compute_level_from_index(
+            global_level,
+            cm->mi_grid_visible[MI_BLOCK_SIZE*sbr*cm->mi_stride +
+            MI_BLOCK_SIZE*sbc]->mbmi.dering_gain);
+#else
+        level = global_level;
+#endif
+        /* FIXME: This is a temporary hack that uses more conservative
+           deringing for chroma. */
+        if (pli) level = (level*5 + 4) >> 3;
+        if (sb_all_skip(cm, sbr*MI_BLOCK_SIZE, sbc*MI_BLOCK_SIZE)) level = 0;
+        threshold = level << coeff_shift;
+        od_dering(
+            &OD_DERING_VTBL_C,
+            dst,
+            MI_BLOCK_SIZE*bsize[pli],
+            &src[pli][sbr*stride*bsize[pli]*MI_BLOCK_SIZE +
+            sbc*bsize[pli]*MI_BLOCK_SIZE],
+            stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli,
+            &bskip[MI_BLOCK_SIZE*sbr*cm->mi_cols + MI_BLOCK_SIZE*sbc],
+            cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP, coeff_shift);
+        for (r = 0; r < bsize[pli]*nvb; ++r) {
+          for (c = 0; c < bsize[pli]*nhb; ++c) {
+#if CONFIG_VPX_HIGHBITDEPTH
+            if (cm->use_highbitdepth) {
+              CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
+                  [xd->plane[pli].dst.stride*(bsize[pli]*MI_BLOCK_SIZE*sbr + r)
+                  + sbc*bsize[pli]*MI_BLOCK_SIZE + c] =
+                  dst[r * MI_BLOCK_SIZE * bsize[pli] + c];
+            } else {
+#endif
+              xd->plane[pli].dst.buf[xd->plane[pli].dst.stride*
+                  (bsize[pli]*MI_BLOCK_SIZE*sbr + r) +
+                  sbc*bsize[pli]*MI_BLOCK_SIZE + c] =
+                  dst[r * MI_BLOCK_SIZE * bsize[pli] + c];
+#if CONFIG_VPX_HIGHBITDEPTH
+            }
+#endif
+          }
+        }
+      }
+    }
+  }
+  for (pli = 0; pli < 3; pli++) {
+    vpx_free(src[pli]);
+  }
+  vpx_free(bskip);
+}
diff --git a/vp10/common/dering.h b/vp10/common/dering.h
new file mode 100644
index 0000000..946366a
--- /dev/null
+++ b/vp10/common/dering.h
@@ -0,0 +1,33 @@
+#ifndef VP10_COMMON_DERING_H_
+#define VP10_COMMON_DERING_H_
+
+#include "vp10/common/od_dering.h"
+#include "vp10/common/onyxc_int.h"
+#include "vpx/vpx_integer.h"
+#include "./vpx_config.h"
+#include "vpx_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DERING_LEVEL_BITS 6
+#define MAX_DERING_LEVEL (1 << DERING_LEVEL_BITS)
+
+#define DERING_REFINEMENT 1
+#define DERING_REFINEMENT_BITS 2
+#define DERING_REFINEMENT_LEVELS 4
+
+int compute_level_from_index(int global_level, int gi);
+int sb_all_skip(const VP10_COMMON *const cm, int mi_row, int mi_col);
+void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+                       MACROBLOCKD *xd, int global_level);
+
+int vp10_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
+                      VP10_COMMON *cm,
+                      MACROBLOCKD *xd);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+#endif  // VP10_COMMON_DERING_H_
diff --git a/vp10/common/od_dering.c b/vp10/common/od_dering.c
new file mode 100644
index 0000000..af89b80
--- /dev/null
+++ b/vp10/common/od_dering.c
@@ -0,0 +1,352 @@
+/*Daala video codec
+Copyright (c) 2014-2016 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <math.h>
+#include "dering.h"
+
+const od_dering_opt_vtbl OD_DERING_VTBL_C = {
+  {od_filter_dering_direction_4x4_c, od_filter_dering_direction_8x8_c},
+  {od_filter_dering_orthogonal_4x4_c, od_filter_dering_orthogonal_8x8_c}
+};
+
+/* Generated from gen_filter_tables.c. */
+const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
+  {-1*OD_FILT_BSTRIDE + 1, -2*OD_FILT_BSTRIDE + 2, -3*OD_FILT_BSTRIDE + 3  },
+  { 0*OD_FILT_BSTRIDE + 1, -1*OD_FILT_BSTRIDE + 2, -1*OD_FILT_BSTRIDE + 3  },
+  { 0*OD_FILT_BSTRIDE + 1,  0*OD_FILT_BSTRIDE + 2,  0*OD_FILT_BSTRIDE + 3  },
+  { 0*OD_FILT_BSTRIDE + 1,  1*OD_FILT_BSTRIDE + 2,  1*OD_FILT_BSTRIDE + 3  },
+  { 1*OD_FILT_BSTRIDE + 1,  2*OD_FILT_BSTRIDE + 2,  3*OD_FILT_BSTRIDE + 3  },
+  { 1*OD_FILT_BSTRIDE + 0,  2*OD_FILT_BSTRIDE + 1,  3*OD_FILT_BSTRIDE + 1  },
+  { 1*OD_FILT_BSTRIDE + 0,  2*OD_FILT_BSTRIDE + 0,  3*OD_FILT_BSTRIDE + 0  },
+  { 1*OD_FILT_BSTRIDE + 0,  2*OD_FILT_BSTRIDE - 1,  3*OD_FILT_BSTRIDE - 1  },
+};
+
+const double OD_DERING_GAIN_TABLE[OD_DERING_LEVELS] = {
+  0, 0.5, 0.707, 1, 1.41, 2
+};
+
+/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
+   The search minimizes the weighted variance along all the lines in a
+   particular direction, i.e. the squared error between the input and a
+   "predicted" block where each pixel is replaced by the average along a line
+   in a particular direction. Since each direction have the same sum(x^2) term,
+   that term is never computed. See Section 2, step 2, of:
+   http://jmvalin.ca/notes/intra_paint.pdf */
+static int od_dir_find8(const od_dering_in *img, int stride, int32_t *var,
+    int coeff_shift) {
+  int i;
+  int32_t cost[8] = {0};
+  int partial[8][15] = {{0}};
+  int32_t best_cost = 0;
+  int best_dir = 0;
+  /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
+     The output is then 840 times larger, but we don't care for finding
+     the max. */
+  static const int div_table[] = {0, 840, 420, 280, 210, 168, 140, 120, 105};
+  for (i = 0; i < 8; i++) {
+    int j;
+    for (j = 0; j < 8; j++) {
+      int x;
+      /* We subtract 128 here to reduce the maximum range of the squared
+         partial sums. */
+      x = (img[i*stride + j] >> coeff_shift) - 128;
+      partial[0][i + j] += x;
+      partial[1][i + j/2] += x;
+      partial[2][i] += x;
+      partial[3][3 + i - j/2] += x;
+      partial[4][7 + i - j] += x;
+      partial[5][3 - i/2 + j] += x;
+      partial[6][j] += x;
+      partial[7][i/2 + j] += x;
+    }
+  }
+  for (i = 0; i < 8; i++) {
+    cost[2] += partial[2][i]*partial[2][i];
+    cost[6] += partial[6][i]*partial[6][i];
+  }
+  cost[2] *= div_table[8];
+  cost[6] *= div_table[8];
+  for (i = 0; i < 7; i++) {
+    cost[0] += (partial[0][i]*partial[0][i]
+     + partial[0][14 - i]*partial[0][14 - i])*div_table[i + 1];
+    cost[4] += (partial[4][i]*partial[4][i]
+     + partial[4][14 - i]*partial[4][14 - i])*div_table[i + 1];
+  }
+  cost[0] += partial[0][7]*partial[0][7]*div_table[8];
+  cost[4] += partial[4][7]*partial[4][7]*div_table[8];
+  for (i = 1; i < 8; i += 2) {
+    int j;
+    for (j = 0; j < 4 + 1; j++) {
+      cost[i] += partial[i][3 + j]*partial[i][3 + j];
+    }
+    cost[i] *= div_table[8];
+    for (j = 0; j < 4 - 1; j++) {
+      cost[i] += (partial[i][j]*partial[i][j]
+       + partial[i][10 - j]*partial[i][10 - j])*div_table[2*j + 2];
+    }
+  }
+  for (i = 0; i < 8; i++) {
+    if (cost[i] > best_cost) {
+      best_cost = cost[i];
+      best_dir = i;
+    }
+  }
+  /* Difference between the optimal variance and the variance along the
+     orthogonal direction. Again, the sum(x^2) terms cancel out. */
+  *var = best_cost - cost[(best_dir + 4) & 7];
+  /* We'd normally divide by 840, but dividing by 1024 is close enough
+     for what we're going to do with this. */
+  *var >>= 10;
+  return best_dir;
+}
+
+#define OD_DERING_VERY_LARGE (30000)
+#define OD_DERING_INBUF_SIZE ((OD_BSIZE_MAX + 2*OD_FILT_BORDER)*\
+ (OD_BSIZE_MAX + 2*OD_FILT_BORDER))
+
+/* Smooth in the direction detected. */
+void od_filter_dering_direction_c(int16_t *y, int ystride, const int16_t *in,
+ int ln, int threshold, int dir) {
+  int i;
+  int j;
+  int k;
+  static const int taps[3] = {3, 2, 2};
+  for (i = 0; i < 1 << ln; i++) {
+    for (j = 0; j < 1 << ln; j++) {
+      int16_t sum;
+      int16_t xx;
+      int16_t yy;
+      xx = in[i*OD_FILT_BSTRIDE + j];
+      sum= 0;
+      for (k = 0; k < 3; k++) {
+        int16_t p0;
+        int16_t p1;
+        p0 = in[i*OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]]
+         - xx;
+        p1 = in[i*OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]]
+         - xx;
+        if (abs(p0) < threshold) sum += taps[k]*p0;
+        if (abs(p1) < threshold) sum += taps[k]*p1;
+      }
+      yy = xx + ((sum + 8) >> 4);
+      y[i*ystride + j] = yy;
+    }
+  }
+}
+
+void od_filter_dering_direction_4x4_c(int16_t *y, int ystride,
+ const int16_t *in, int threshold, int dir) {
+  od_filter_dering_direction_c(y, ystride, in, 2, threshold, dir);
+}
+
+void od_filter_dering_direction_8x8_c(int16_t *y, int ystride,
+ const int16_t *in, int threshold, int dir) {
+  od_filter_dering_direction_c(y, ystride, in, 3, threshold, dir);
+}
+
+/* Smooth in the direction orthogonal to what was detected. */
+void od_filter_dering_orthogonal_c(int16_t *y, int ystride, const int16_t *in,
+ const od_dering_in *x, int xstride, int ln, int threshold, int dir) {
+  int i;
+  int j;
+  int offset;
+  if (dir > 0 && dir < 4) offset = OD_FILT_BSTRIDE;
+  else offset = 1;
+  for (i = 0; i < 1 << ln; i++) {
+    for (j = 0; j < 1 << ln; j++) {
+      int16_t athresh;
+      int16_t yy;
+      int16_t sum;
+      int16_t p;
+      /* Deringing orthogonal to the direction uses a tighter threshold
+         because we want to be conservative. We've presumably already
+         achieved some deringing, so the amount of change is expected
+         to be low. Also, since we might be filtering across an edge, we
+         want to make sure not to blur it. That being said, we might want
+         to be a little bit more aggressive on pure horizontal/vertical
+         since the ringing there tends to be directional, so it doesn't
+         get removed by the directional filtering. */
+      athresh = OD_MINI(threshold, threshold/3
+       + abs(in[i*OD_FILT_BSTRIDE + j] - x[i*xstride + j]));
+      yy = in[i*OD_FILT_BSTRIDE + j];
+      sum = 0;
+      p = in[i*OD_FILT_BSTRIDE + j + offset] - yy;
+      if (abs(p) < athresh) sum += p;
+      p = in[i*OD_FILT_BSTRIDE + j - offset] - yy;
+      if (abs(p) < athresh) sum += p;
+      p = in[i*OD_FILT_BSTRIDE + j + 2*offset] - yy;
+      if (abs(p) < athresh) sum += p;
+      p = in[i*OD_FILT_BSTRIDE + j - 2*offset] - yy;
+      if (abs(p) < athresh) sum += p;
+      y[i*ystride + j] = yy + ((3*sum + 8) >> 4);
+    }
+  }
+}
+
+void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
+ const int16_t *in, const od_dering_in *x, int xstride, int threshold,
+ int dir) {
+  od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 2, threshold, dir);
+}
+
+void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
+ const int16_t *in, const od_dering_in *x, int xstride, int threshold,
+ int dir) {
+  od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 3, threshold, dir);
+}
+
+/* This table approximates x^0.16 with the index being log2(x). It is clamped
+   to [-.5, 3]. The table is computed as:
+   round(256*min(3, max(.5, 1.08*(sqrt(2)*2.^([0:17]+8)/256/256).^.16))) */
+static const int16_t OD_THRESH_TABLE_Q8[18] = {
+  128, 134, 150, 168, 188, 210, 234, 262,
+  292, 327, 365, 408, 455, 509, 569, 635,
+  710, 768,
+};
+
+/* Compute deringing filter threshold for each 8x8 block based on the
+   directional variance difference. A high variance difference means that we
+   have a highly directional pattern (e.g. a high contrast edge), so we can
+   apply more deringing. A low variance means that we either have a low
+   contrast edge, or a non-directional texture, so we want to be careful not
+   to blur. */
+static void od_compute_thresh(int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
+ int threshold, int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
+ int nhb, int nvb) {
+  int bx;
+  int by;
+  for (by = 0; by < nvb; by++) {
+    for (bx = 0; bx < nhb; bx++) {
+      int v1;
+      /* We use the variance of 8x8 blocks to adjust the threshold. */
+      v1 = OD_MINI(32767, var[by][bx] >> 6);
+      thresh[by][bx] = (threshold*OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
+    }
+  }
+}
+
+void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
+ const od_dering_in *x, int xstride, int nhb, int nvb, int sbx, int sby,
+ int nhsb, int nvsb, int xdec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
+ int pli, unsigned char *bskip, int skip_stride, int threshold, int overlap,
+ int coeff_shift) {
+  int i;
+  int j;
+  int bx;
+  int by;
+  int16_t inbuf[OD_DERING_INBUF_SIZE];
+  int16_t *in;
+  int bsize;
+  int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
+  int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
+  bsize = 3 - xdec;
+  in = inbuf + OD_FILT_BORDER*OD_FILT_BSTRIDE + OD_FILT_BORDER;
+  /* We avoid filtering the pixels for which some of the pixels to average
+     are outside the frame. We could change the filter instead, but it would
+     add special cases for any future vectorization. */
+  for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE;
+  for (i = -OD_FILT_BORDER*(sby != 0); i < (nvb << bsize)
+   + OD_FILT_BORDER*(sby != nvsb - 1); i++) {
+    for (j = -OD_FILT_BORDER*(sbx != 0); j < (nhb << bsize)
+     + OD_FILT_BORDER*(sbx != nhsb - 1); j++) {
+      in[i*OD_FILT_BSTRIDE + j] = x[i*xstride + j];
+    }
+  }
+  if (pli == 0) {
+    for (by = 0; by < nvb; by++) {
+      for (bx = 0; bx < nhb; bx++) {
+        dir[by][bx] = od_dir_find8(&x[8*by*xstride + 8*bx], xstride,
+         &var[by][bx], coeff_shift);
+      }
+    }
+    od_compute_thresh(thresh, threshold, var, nhb, nvb);
+  }
+  else {
+    for (by = 0; by < nvb; by++) {
+      for (bx = 0; bx < nhb; bx++) {
+        thresh[by][bx] = threshold;
+      }
+    }
+  }
+  for (by = 0; by < nvb; by++) {
+    for (bx = 0; bx < nhb; bx++) {
+      int skip;
+# if defined(DAALA_ODINTRIN)
+      int xstart;
+      int ystart;
+      int xend;
+      int yend;
+      xstart = ystart = 0;
+      xend = yend = (2 >> xdec);
+      if (overlap) {
+        xstart -= (sbx != 0);
+        ystart -= (sby != 0);
+        xend += (sbx != nhsb - 1);
+        yend += (sby != nvsb - 1);
+      }
+      skip = 1;
+      /* We look at whether the current block and its 4x4 surrounding (due to
+         lapping) are skipped to avoid filtering the same content multiple
+         times. */
+      for (i = ystart; i < yend; i++) {
+        for (j = xstart; j < xend; j++) {
+          skip = skip && bskip[((by << 1 >> xdec) + i)*skip_stride
+           + (bx << 1 >> xdec) + j];
+        }
+      }
+#else
+      (void)overlap;
+      skip = bskip[by*skip_stride + bx];
+#endif
+      if (skip) thresh[by][bx] = 0;
+    }
+  }
+  for (by = 0; by < nvb; by++) {
+    for (bx = 0; bx < nhb; bx++) {
+      (vtbl->filter_dering_direction[bsize - OD_LOG_BSIZE0])(
+       &y[(by*ystride << bsize) + (bx << bsize)], ystride,
+       &in[(by*OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
+       thresh[by][bx], dir[by][bx]);
+    }
+  }
+  for (i = 0; i < nvb << bsize; i++) {
+    for (j = 0; j < nhb << bsize; j++) {
+      in[i*OD_FILT_BSTRIDE + j] = y[i*ystride + j];
+    }
+  }
+  for (by = 0; by < nvb; by++) {
+    for (bx = 0; bx < nhb; bx++) {
+      (vtbl->filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
+       &y[(by*ystride << bsize) + (bx << bsize)], ystride,
+       &in[(by*OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
+       &x[(by*xstride << bsize) + (bx << bsize)], xstride,
+       thresh[by][bx], dir[by][bx]);
+    }
+  }
+}
diff --git a/vp10/common/od_dering.h b/vp10/common/od_dering.h
new file mode 100644
index 0000000..24127dd
--- /dev/null
+++ b/vp10/common/od_dering.h
@@ -0,0 +1,86 @@
+/*Daala video codec
+Copyright (c) 2003-2010 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#if !defined(_dering_H)
+# define _dering_H (1)
+
+# include "odintrin.h"
+
+# if defined(DAALA_ODINTRIN)
+#  include "filter.h"
+typedef int16_t od_dering_in;
+# endif
+
+#define OD_DERINGSIZES (2)
+
+#define OD_DERING_NO_CHECK_OVERLAP (0)
+#define OD_DERING_CHECK_OVERLAP (1)
+
+#define OD_DERING_LEVELS (6)
+extern const double OD_DERING_GAIN_TABLE[OD_DERING_LEVELS];
+
+#define OD_DERING_NBLOCKS (OD_BSIZE_MAX/8)
+
+#define OD_FILT_BORDER (3)
+#define OD_FILT_BSTRIDE (OD_BSIZE_MAX + 2*OD_FILT_BORDER)
+
+extern const int OD_DIRECTION_OFFSETS_TABLE[8][3];
+
+typedef void (*od_filter_dering_direction_func)(int16_t *y, int ystride,
+ const int16_t *in, int threshold, int dir);
+typedef void (*od_filter_dering_orthogonal_func)(int16_t *y, int ystride,
+ const int16_t *in, const od_dering_in *x, int xstride, int threshold,
+ int dir);
+
+struct od_dering_opt_vtbl {
+  od_filter_dering_direction_func filter_dering_direction[OD_DERINGSIZES];
+  od_filter_dering_orthogonal_func filter_dering_orthogonal[OD_DERINGSIZES];
+};
+typedef struct od_dering_opt_vtbl od_dering_opt_vtbl;
+
+
+void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
+ const od_dering_in *x, int xstride, int nvb, int nhb, int sbx, int sby,
+ int nhsb, int nvsb, int xdec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
+ int pli, unsigned char *bskip, int skip_stride, int threshold, int overlap,
+ int coeff_shift);
+void od_filter_dering_direction_c(int16_t *y, int ystride, const int16_t *in,
+ int ln, int threshold, int dir);
+void od_filter_dering_orthogonal_c(int16_t *y, int ystride, const int16_t *in,
+ const od_dering_in *x, int xstride, int ln, int threshold, int dir);
+
+extern const od_dering_opt_vtbl OD_DERING_VTBL_C;
+
+void od_filter_dering_direction_4x4_c(int16_t *y, int ystride,
+ const int16_t *in, int threshold, int dir);
+void od_filter_dering_direction_8x8_c(int16_t *y, int ystride,
+ const int16_t *in, int threshold, int dir);
+void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
+ const int16_t *in, const od_dering_in *x, int xstride, int threshold,
+ int dir);
+void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
+ const int16_t *in, const od_dering_in *x, int xstride, int threshold,
+ int dir);
+
+#endif
diff --git a/vp10/common/odintrin.c b/vp10/common/odintrin.c
new file mode 100644
index 0000000..ca9a5fc
--- /dev/null
+++ b/vp10/common/odintrin.c
@@ -0,0 +1,382 @@
+/*Daala video codec
+Copyright (c) 2006-2010 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#include "vp10/common/odintrin.h"
+
+/*Constants for use with OD_DIVU_SMALL().
+  See \cite{Rob05} for details on computing these constants.
+  @INPROCEEDINGS{Rob05,
+    author="Arch D. Robison",
+    title="{N}-bit Unsigned Division via {N}-bit Multiply-Add",
+    booktitle="Proc. of the 17th IEEE Symposium on Computer Arithmetic
+     (ARITH'05)",
+    pages="131--139",
+    address="Cape Cod, MA",
+    month=Jun,
+    year=2005
+  }*/
+uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2] = {
+  {0xFFFFFFFF, 0xFFFFFFFF}, {0xFFFFFFFF, 0xFFFFFFFF}, {0xAAAAAAAB,          0},
+  {0xFFFFFFFF, 0xFFFFFFFF}, {0xCCCCCCCD,          0}, {0xAAAAAAAB,          0},
+  {0x92492492, 0x92492492}, {0xFFFFFFFF, 0xFFFFFFFF}, {0xE38E38E4,          0},
+  {0xCCCCCCCD,          0}, {0xBA2E8BA3,          0}, {0xAAAAAAAB,          0},
+  {0x9D89D89E,          0}, {0x92492492, 0x92492492}, {0x88888889,          0},
+  {0xFFFFFFFF, 0xFFFFFFFF}, {0xF0F0F0F1,          0}, {0xE38E38E4,          0},
+  {0xD79435E5, 0xD79435E5}, {0xCCCCCCCD,          0}, {0xC30C30C3, 0xC30C30C3},
+  {0xBA2E8BA3,          0}, {0xB21642C9,          0}, {0xAAAAAAAB,          0},
+  {0xA3D70A3E,          0}, {0x9D89D89E,          0}, {0x97B425ED, 0x97B425ED},
+  {0x92492492, 0x92492492}, {0x8D3DCB09,          0}, {0x88888889,          0},
+  {0x84210842, 0x84210842}, {0xFFFFFFFF, 0xFFFFFFFF}, {0xF83E0F84,          0},
+  {0xF0F0F0F1,          0}, {0xEA0EA0EA, 0xEA0EA0EA}, {0xE38E38E4,          0},
+  {0xDD67C8A6, 0xDD67C8A6}, {0xD79435E5, 0xD79435E5}, {0xD20D20D2, 0xD20D20D2},
+  {0xCCCCCCCD,          0}, {0xC7CE0C7D,          0}, {0xC30C30C3, 0xC30C30C3},
+  {0xBE82FA0C,          0}, {0xBA2E8BA3,          0}, {0xB60B60B6, 0xB60B60B6},
+  {0xB21642C9,          0}, {0xAE4C415D,          0}, {0xAAAAAAAB,          0},
+  {0xA72F053A,          0}, {0xA3D70A3E,          0}, {0xA0A0A0A1,          0},
+  {0x9D89D89E,          0}, {0x9A90E7D9, 0x9A90E7D9}, {0x97B425ED, 0x97B425ED},
+  {0x94F2094F, 0x94F2094F}, {0x92492492, 0x92492492}, {0x8FB823EE, 0x8FB823EE},
+  {0x8D3DCB09,          0}, {0x8AD8F2FC,          0}, {0x88888889,          0},
+  {0x864B8A7E,          0}, {0x84210842, 0x84210842}, {0x82082082, 0x82082082},
+  {0xFFFFFFFF, 0xFFFFFFFF}, {0xFC0FC0FD,          0}, {0xF83E0F84,          0},
+  {0xF4898D60,          0}, {0xF0F0F0F1,          0}, {0xED7303B6,          0},
+  {0xEA0EA0EA, 0xEA0EA0EA}, {0xE6C2B449,          0}, {0xE38E38E4,          0},
+  {0xE070381C, 0xE070381C}, {0xDD67C8A6, 0xDD67C8A6}, {0xDA740DA8,          0},
+  {0xD79435E5, 0xD79435E5}, {0xD4C77B04,          0}, {0xD20D20D2, 0xD20D20D2},
+  {0xCF6474A9,          0}, {0xCCCCCCCD,          0}, {0xCA4587E7,          0},
+  {0xC7CE0C7D,          0}, {0xC565C87C,          0}, {0xC30C30C3, 0xC30C30C3},
+  {0xC0C0C0C1,          0}, {0xBE82FA0C,          0}, {0xBC52640C,          0},
+  {0xBA2E8BA3,          0}, {0xB81702E1,          0}, {0xB60B60B6, 0xB60B60B6},
+  {0xB40B40B4, 0xB40B40B4}, {0xB21642C9,          0}, {0xB02C0B03,          0},
+  {0xAE4C415D,          0}, {0xAC769184, 0xAC769184}, {0xAAAAAAAB,          0},
+  {0xA8E83F57, 0xA8E83F57}, {0xA72F053A,          0}, {0xA57EB503,          0},
+  {0xA3D70A3E,          0}, {0xA237C32B, 0xA237C32B}, {0xA0A0A0A1,          0},
+  {0x9F1165E7, 0x9F1165E7}, {0x9D89D89E,          0}, {0x9C09C09C, 0x9C09C09C},
+  {0x9A90E7D9, 0x9A90E7D9}, {0x991F1A51, 0x991F1A51}, {0x97B425ED, 0x97B425ED},
+  {0x964FDA6C, 0x964FDA6C}, {0x94F2094F, 0x94F2094F}, {0x939A85C4, 0x939A85C4},
+  {0x92492492, 0x92492492}, {0x90FDBC09, 0x90FDBC09}, {0x8FB823EE, 0x8FB823EE},
+  {0x8E78356D, 0x8E78356D}, {0x8D3DCB09,          0}, {0x8C08C08C, 0x8C08C08C},
+  {0x8AD8F2FC,          0}, {0x89AE408A,          0}, {0x88888889,          0},
+  {0x8767AB5F, 0x8767AB5F}, {0x864B8A7E,          0}, {0x85340853, 0x85340853},
+  {0x84210842, 0x84210842}, {0x83126E98,          0}, {0x82082082, 0x82082082},
+  {0x81020408, 0x81020408}, {0xFFFFFFFF, 0xFFFFFFFF}, {0xFE03F810,          0},
+  {0xFC0FC0FD,          0}, {0xFA232CF3,          0}, {0xF83E0F84,          0},
+  {0xF6603D99,          0}, {0xF4898D60,          0}, {0xF2B9D649,          0},
+  {0xF0F0F0F1,          0}, {0xEF2EB720,          0}, {0xED7303B6,          0},
+  {0xEBBDB2A6,          0}, {0xEA0EA0EA, 0xEA0EA0EA}, {0xE865AC7C,          0},
+  {0xE6C2B449,          0}, {0xE525982B,          0}, {0xE38E38E4,          0},
+  {0xE1FC780F,          0}, {0xE070381C, 0xE070381C}, {0xDEE95C4D,          0},
+  {0xDD67C8A6, 0xDD67C8A6}, {0xDBEB61EF,          0}, {0xDA740DA8,          0},
+  {0xD901B204,          0}, {0xD79435E5, 0xD79435E5}, {0xD62B80D7,          0},
+  {0xD4C77B04,          0}, {0xD3680D37,          0}, {0xD20D20D2, 0xD20D20D2},
+  {0xD0B69FCC,          0}, {0xCF6474A9,          0}, {0xCE168A77, 0xCE168A77},
+  {0xCCCCCCCD,          0}, {0xCB8727C1,          0}, {0xCA4587E7,          0},
+  {0xC907DA4F,          0}, {0xC7CE0C7D,          0}, {0xC6980C6A,          0},
+  {0xC565C87C,          0}, {0xC4372F86,          0}, {0xC30C30C3, 0xC30C30C3},
+  {0xC1E4BBD6,          0}, {0xC0C0C0C1,          0}, {0xBFA02FE8, 0xBFA02FE8},
+  {0xBE82FA0C,          0}, {0xBD691047, 0xBD691047}, {0xBC52640C,          0},
+  {0xBB3EE722,          0}, {0xBA2E8BA3,          0}, {0xB92143FA, 0xB92143FA},
+  {0xB81702E1,          0}, {0xB70FBB5A, 0xB70FBB5A}, {0xB60B60B6, 0xB60B60B6},
+  {0xB509E68B,          0}, {0xB40B40B4, 0xB40B40B4}, {0xB30F6353,          0},
+  {0xB21642C9,          0}, {0xB11FD3B8, 0xB11FD3B8}, {0xB02C0B03,          0},
+  {0xAF3ADDC7,          0}, {0xAE4C415D,          0}, {0xAD602B58, 0xAD602B58},
+  {0xAC769184, 0xAC769184}, {0xAB8F69E3,          0}, {0xAAAAAAAB,          0},
+  {0xA9C84A48,          0}, {0xA8E83F57, 0xA8E83F57}, {0xA80A80A8, 0xA80A80A8},
+  {0xA72F053A,          0}, {0xA655C439, 0xA655C439}, {0xA57EB503,          0},
+  {0xA4A9CF1E,          0}, {0xA3D70A3E,          0}, {0xA3065E40,          0},
+  {0xA237C32B, 0xA237C32B}, {0xA16B312F,          0}, {0xA0A0A0A1,          0},
+  {0x9FD809FE,          0}, {0x9F1165E7, 0x9F1165E7}, {0x9E4CAD24,          0},
+  {0x9D89D89E,          0}, {0x9CC8E161,          0}, {0x9C09C09C, 0x9C09C09C},
+  {0x9B4C6F9F,          0}, {0x9A90E7D9, 0x9A90E7D9}, {0x99D722DB,          0},
+  {0x991F1A51, 0x991F1A51}, {0x9868C80A,          0}, {0x97B425ED, 0x97B425ED},
+  {0x97012E02, 0x97012E02}, {0x964FDA6C, 0x964FDA6C}, {0x95A02568, 0x95A02568},
+  {0x94F2094F, 0x94F2094F}, {0x94458094, 0x94458094}, {0x939A85C4, 0x939A85C4},
+  {0x92F11384, 0x92F11384}, {0x92492492, 0x92492492}, {0x91A2B3C5,          0},
+  {0x90FDBC09, 0x90FDBC09}, {0x905A3863, 0x905A3863}, {0x8FB823EE, 0x8FB823EE},
+  {0x8F1779DA,          0}, {0x8E78356D, 0x8E78356D}, {0x8DDA5202, 0x8DDA5202},
+  {0x8D3DCB09,          0}, {0x8CA29C04, 0x8CA29C04}, {0x8C08C08C, 0x8C08C08C},
+  {0x8B70344A, 0x8B70344A}, {0x8AD8F2FC,          0}, {0x8A42F870, 0x8A42F870},
+  {0x89AE408A,          0}, {0x891AC73B,          0}, {0x88888889,          0},
+  {0x87F78088,          0}, {0x8767AB5F, 0x8767AB5F}, {0x86D90545,          0},
+  {0x864B8A7E,          0}, {0x85BF3761, 0x85BF3761}, {0x85340853, 0x85340853},
+  {0x84A9F9C8, 0x84A9F9C8}, {0x84210842, 0x84210842}, {0x83993052, 0x83993052},
+  {0x83126E98,          0}, {0x828CBFBF,          0}, {0x82082082, 0x82082082},
+  {0x81848DA9,          0}, {0x81020408, 0x81020408}, {0x80808081,          0},
+  {0xFFFFFFFF, 0xFFFFFFFF}, {0xFF00FF01,          0}, {0xFE03F810,          0},
+  {0xFD08E551,          0}, {0xFC0FC0FD,          0}, {0xFB188566,          0},
+  {0xFA232CF3,          0}, {0xF92FB222,          0}, {0xF83E0F84,          0},
+  {0xF74E3FC3,          0}, {0xF6603D99,          0}, {0xF57403D6,          0},
+  {0xF4898D60,          0}, {0xF3A0D52D,          0}, {0xF2B9D649,          0},
+  {0xF1D48BCF,          0}, {0xF0F0F0F1,          0}, {0xF00F00F0, 0xF00F00F0},
+  {0xEF2EB720,          0}, {0xEE500EE5, 0xEE500EE5}, {0xED7303B6,          0},
+  {0xEC979119,          0}, {0xEBBDB2A6,          0}, {0xEAE56404,          0},
+  {0xEA0EA0EA, 0xEA0EA0EA}, {0xE9396520,          0}, {0xE865AC7C,          0},
+  {0xE79372E3,          0}, {0xE6C2B449,          0}, {0xE5F36CB0, 0xE5F36CB0},
+  {0xE525982B,          0}, {0xE45932D8,          0}, {0xE38E38E4,          0},
+  {0xE2C4A689,          0}, {0xE1FC780F,          0}, {0xE135A9CA,          0},
+  {0xE070381C, 0xE070381C}, {0xDFAC1F75,          0}, {0xDEE95C4D,          0},
+  {0xDE27EB2D,          0}, {0xDD67C8A6, 0xDD67C8A6}, {0xDCA8F159,          0},
+  {0xDBEB61EF,          0}, {0xDB2F171E,          0}, {0xDA740DA8,          0},
+  {0xD9BA4257,          0}, {0xD901B204,          0}, {0xD84A598F,          0},
+  {0xD79435E5, 0xD79435E5}, {0xD6DF43FD,          0}, {0xD62B80D7,          0},
+  {0xD578E97D,          0}, {0xD4C77B04,          0}, {0xD417328A,          0},
+  {0xD3680D37,          0}, {0xD2BA083C,          0}, {0xD20D20D2, 0xD20D20D2},
+  {0xD161543E, 0xD161543E}, {0xD0B69FCC,          0}, {0xD00D00D0, 0xD00D00D0},
+  {0xCF6474A9,          0}, {0xCEBCF8BC,          0}, {0xCE168A77, 0xCE168A77},
+  {0xCD712753,          0}, {0xCCCCCCCD,          0}, {0xCC29786D,          0},
+  {0xCB8727C1,          0}, {0xCAE5D85F, 0xCAE5D85F}, {0xCA4587E7,          0},
+  {0xC9A633FD,          0}, {0xC907DA4F,          0}, {0xC86A7890, 0xC86A7890},
+  {0xC7CE0C7D,          0}, {0xC73293D8,          0}, {0xC6980C6A,          0},
+  {0xC5FE7403, 0xC5FE7403}, {0xC565C87C,          0}, {0xC4CE07B0, 0xC4CE07B0},
+  {0xC4372F86,          0}, {0xC3A13DE6, 0xC3A13DE6}, {0xC30C30C3, 0xC30C30C3},
+  {0xC2780614,          0}, {0xC1E4BBD6,          0}, {0xC152500C, 0xC152500C},
+  {0xC0C0C0C1,          0}, {0xC0300C03, 0xC0300C03}, {0xBFA02FE8, 0xBFA02FE8},
+  {0xBF112A8B,          0}, {0xBE82FA0C,          0}, {0xBDF59C92,          0},
+  {0xBD691047, 0xBD691047}, {0xBCDD535E,          0}, {0xBC52640C,          0},
+  {0xBBC8408D,          0}, {0xBB3EE722,          0}, {0xBAB65610, 0xBAB65610},
+  {0xBA2E8BA3,          0}, {0xB9A7862A, 0xB9A7862A}, {0xB92143FA, 0xB92143FA},
+  {0xB89BC36D,          0}, {0xB81702E1,          0}, {0xB79300B8,          0},
+  {0xB70FBB5A, 0xB70FBB5A}, {0xB68D3134, 0xB68D3134}, {0xB60B60B6, 0xB60B60B6},
+  {0xB58A4855, 0xB58A4855}, {0xB509E68B,          0}, {0xB48A39D4, 0xB48A39D4},
+  {0xB40B40B4, 0xB40B40B4}, {0xB38CF9B0, 0xB38CF9B0}, {0xB30F6353,          0},
+  {0xB2927C2A,          0}, {0xB21642C9,          0}, {0xB19AB5C5,          0},
+  {0xB11FD3B8, 0xB11FD3B8}, {0xB0A59B42,          0}, {0xB02C0B03,          0},
+  {0xAFB321A1, 0xAFB321A1}, {0xAF3ADDC7,          0}, {0xAEC33E20,          0},
+  {0xAE4C415D,          0}, {0xADD5E632, 0xADD5E632}, {0xAD602B58, 0xAD602B58},
+  {0xACEB0F89, 0xACEB0F89}, {0xAC769184, 0xAC769184}, {0xAC02B00B,          0},
+  {0xAB8F69E3,          0}, {0xAB1CBDD4,          0}, {0xAAAAAAAB,          0},
+  {0xAA392F36,          0}, {0xA9C84A48,          0}, {0xA957FAB5, 0xA957FAB5},
+  {0xA8E83F57, 0xA8E83F57}, {0xA8791709,          0}, {0xA80A80A8, 0xA80A80A8},
+  {0xA79C7B17,          0}, {0xA72F053A,          0}, {0xA6C21DF7,          0},
+  {0xA655C439, 0xA655C439}, {0xA5E9F6ED, 0xA5E9F6ED}, {0xA57EB503,          0},
+  {0xA513FD6C,          0}, {0xA4A9CF1E,          0}, {0xA4402910, 0xA4402910},
+  {0xA3D70A3E,          0}, {0xA36E71A3,          0}, {0xA3065E40,          0},
+  {0xA29ECF16, 0xA29ECF16}, {0xA237C32B, 0xA237C32B}, {0xA1D13986,          0},
+  {0xA16B312F,          0}, {0xA105A933,          0}, {0xA0A0A0A1,          0},
+  {0xA03C1689,          0}, {0x9FD809FE,          0}, {0x9F747A15, 0x9F747A15},
+  {0x9F1165E7, 0x9F1165E7}, {0x9EAECC8D, 0x9EAECC8D}, {0x9E4CAD24,          0},
+  {0x9DEB06C9, 0x9DEB06C9}, {0x9D89D89E,          0}, {0x9D2921C4,          0},
+  {0x9CC8E161,          0}, {0x9C69169B, 0x9C69169B}, {0x9C09C09C, 0x9C09C09C},
+  {0x9BAADE8E, 0x9BAADE8E}, {0x9B4C6F9F,          0}, {0x9AEE72FD,          0},
+  {0x9A90E7D9, 0x9A90E7D9}, {0x9A33CD67, 0x9A33CD67}, {0x99D722DB,          0},
+  {0x997AE76B, 0x997AE76B}, {0x991F1A51, 0x991F1A51}, {0x98C3BAC7, 0x98C3BAC7},
+  {0x9868C80A,          0}, {0x980E4156, 0x980E4156}, {0x97B425ED, 0x97B425ED},
+  {0x975A7510,          0}, {0x97012E02, 0x97012E02}, {0x96A8500A,          0},
+  {0x964FDA6C, 0x964FDA6C}, {0x95F7CC73,          0}, {0x95A02568, 0x95A02568},
+  {0x9548E498,          0}, {0x94F2094F, 0x94F2094F}, {0x949B92DE,          0},
+  {0x94458094, 0x94458094}, {0x93EFD1C5, 0x93EFD1C5}, {0x939A85C4, 0x939A85C4},
+  {0x93459BE7,          0}, {0x92F11384, 0x92F11384}, {0x929CEBF5,          0},
+  {0x92492492, 0x92492492}, {0x91F5BCB9,          0}, {0x91A2B3C5,          0},
+  {0x91500915, 0x91500915}, {0x90FDBC09, 0x90FDBC09}, {0x90ABCC02, 0x90ABCC02},
+  {0x905A3863, 0x905A3863}, {0x90090090, 0x90090090}, {0x8FB823EE, 0x8FB823EE},
+  {0x8F67A1E4,          0}, {0x8F1779DA,          0}, {0x8EC7AB3A,          0},
+  {0x8E78356D, 0x8E78356D}, {0x8E2917E1,          0}, {0x8DDA5202, 0x8DDA5202},
+  {0x8D8BE340,          0}, {0x8D3DCB09,          0}, {0x8CF008CF, 0x8CF008CF},
+  {0x8CA29C04, 0x8CA29C04}, {0x8C55841D,          0}, {0x8C08C08C, 0x8C08C08C},
+  {0x8BBC50C9,          0}, {0x8B70344A, 0x8B70344A}, {0x8B246A88,          0},
+  {0x8AD8F2FC,          0}, {0x8A8DCD20,          0}, {0x8A42F870, 0x8A42F870},
+  {0x89F8746A,          0}, {0x89AE408A,          0}, {0x89645C4F, 0x89645C4F},
+  {0x891AC73B,          0}, {0x88D180CD, 0x88D180CD}, {0x88888889,          0},
+  {0x883FDDF0, 0x883FDDF0}, {0x87F78088,          0}, {0x87AF6FD6,          0},
+  {0x8767AB5F, 0x8767AB5F}, {0x872032AC, 0x872032AC}, {0x86D90545,          0},
+  {0x869222B2,          0}, {0x864B8A7E,          0}, {0x86053C34, 0x86053C34},
+  {0x85BF3761, 0x85BF3761}, {0x85797B91, 0x85797B91}, {0x85340853, 0x85340853},
+  {0x84EEDD36,          0}, {0x84A9F9C8, 0x84A9F9C8}, {0x84655D9C,          0},
+  {0x84210842, 0x84210842}, {0x83DCF94E,          0}, {0x83993052, 0x83993052},
+  {0x8355ACE4,          0}, {0x83126E98,          0}, {0x82CF7504,          0},
+  {0x828CBFBF,          0}, {0x824A4E61,          0}, {0x82082082, 0x82082082},
+  {0x81C635BC, 0x81C635BC}, {0x81848DA9,          0}, {0x814327E4,          0},
+  {0x81020408, 0x81020408}, {0x80C121B3,          0}, {0x80808081,          0},
+  {0x80402010, 0x80402010}, {0xFFFFFFFF, 0xFFFFFFFF}, {0xFF803FE1,          0},
+  {0xFF00FF01,          0}, {0xFE823CA6,          0}, {0xFE03F810,          0},
+  {0xFD863087,          0}, {0xFD08E551,          0}, {0xFC8C15B5,          0},
+  {0xFC0FC0FD,          0}, {0xFB93E673,          0}, {0xFB188566,          0},
+  {0xFA9D9D20,          0}, {0xFA232CF3,          0}, {0xF9A9342D,          0},
+  {0xF92FB222,          0}, {0xF8B6A622, 0xF8B6A622}, {0xF83E0F84,          0},
+  {0xF7C5ED9D,          0}, {0xF74E3FC3,          0}, {0xF6D7054E,          0},
+  {0xF6603D99,          0}, {0xF5E9E7FD,          0}, {0xF57403D6,          0},
+  {0xF4FE9083,          0}, {0xF4898D60,          0}, {0xF414F9CE,          0},
+  {0xF3A0D52D,          0}, {0xF32D1EE0,          0}, {0xF2B9D649,          0},
+  {0xF246FACC,          0}, {0xF1D48BCF,          0}, {0xF16288B9,          0},
+  {0xF0F0F0F1,          0}, {0xF07FC3E0, 0xF07FC3E0}, {0xF00F00F0, 0xF00F00F0},
+  {0xEF9EA78C,          0}, {0xEF2EB720,          0}, {0xEEBF2F19,          0},
+  {0xEE500EE5, 0xEE500EE5}, {0xEDE155F4,          0}, {0xED7303B6,          0},
+  {0xED05179C, 0xED05179C}, {0xEC979119,          0}, {0xEC2A6FA0, 0xEC2A6FA0},
+  {0xEBBDB2A6,          0}, {0xEB5159A0,          0}, {0xEAE56404,          0},
+  {0xEA79D14A,          0}, {0xEA0EA0EA, 0xEA0EA0EA}, {0xE9A3D25E, 0xE9A3D25E},
+  {0xE9396520,          0}, {0xE8CF58AB,          0}, {0xE865AC7C,          0},
+  {0xE7FC600F,          0}, {0xE79372E3,          0}, {0xE72AE476,          0},
+  {0xE6C2B449,          0}, {0xE65AE1DC,          0}, {0xE5F36CB0, 0xE5F36CB0},
+  {0xE58C544A,          0}, {0xE525982B,          0}, {0xE4BF37D9,          0},
+  {0xE45932D8,          0}, {0xE3F388AF,          0}, {0xE38E38E4,          0},
+  {0xE32942FF,          0}, {0xE2C4A689,          0}, {0xE260630B,          0},
+  {0xE1FC780F,          0}, {0xE198E520,          0}, {0xE135A9CA,          0},
+  {0xE0D2C59A,          0}, {0xE070381C, 0xE070381C}, {0xE00E00E0, 0xE00E00E0},
+  {0xDFAC1F75,          0}, {0xDF4A9369,          0}, {0xDEE95C4D,          0},
+  {0xDE8879B3,          0}, {0xDE27EB2D,          0}, {0xDDC7B04D,          0},
+  {0xDD67C8A6, 0xDD67C8A6}, {0xDD0833CE,          0}, {0xDCA8F159,          0},
+  {0xDC4A00DD,          0}, {0xDBEB61EF,          0}, {0xDB8D1428,          0},
+  {0xDB2F171E,          0}, {0xDAD16A6B,          0}, {0xDA740DA8,          0},
+  {0xDA17006D, 0xDA17006D}, {0xD9BA4257,          0}, {0xD95DD300,          0},
+  {0xD901B204,          0}, {0xD8A5DEFF,          0}, {0xD84A598F,          0},
+  {0xD7EF2152,          0}, {0xD79435E5, 0xD79435E5}, {0xD73996E9,          0},
+  {0xD6DF43FD,          0}, {0xD6853CC1,          0}, {0xD62B80D7,          0},
+  {0xD5D20FDF,          0}, {0xD578E97D,          0}, {0xD5200D52, 0xD5200D52},
+  {0xD4C77B04,          0}, {0xD46F3235,          0}, {0xD417328A,          0},
+  {0xD3BF7BA9,          0}, {0xD3680D37,          0}, {0xD310E6DB,          0},
+  {0xD2BA083C,          0}, {0xD2637101,          0}, {0xD20D20D2, 0xD20D20D2},
+  {0xD1B71759,          0}, {0xD161543E, 0xD161543E}, {0xD10BD72C,          0},
+  {0xD0B69FCC,          0}, {0xD061ADCA,          0}, {0xD00D00D0, 0xD00D00D0},
+  {0xCFB8988C,          0}, {0xCF6474A9,          0}, {0xCF1094D4,          0},
+  {0xCEBCF8BC,          0}, {0xCE69A00D,          0}, {0xCE168A77, 0xCE168A77},
+  {0xCDC3B7A9, 0xCDC3B7A9}, {0xCD712753,          0}, {0xCD1ED924,          0},
+  {0xCCCCCCCD,          0}, {0xCC7B0200,          0}, {0xCC29786D,          0},
+  {0xCBD82FC7,          0}, {0xCB8727C1,          0}, {0xCB36600D,          0},
+  {0xCAE5D85F, 0xCAE5D85F}, {0xCA95906C,          0}, {0xCA4587E7,          0},
+  {0xC9F5BE86,          0}, {0xC9A633FD,          0}, {0xC956E803, 0xC956E803},
+  {0xC907DA4F,          0}, {0xC8B90A96,          0}, {0xC86A7890, 0xC86A7890},
+  {0xC81C23F5, 0xC81C23F5}, {0xC7CE0C7D,          0}, {0xC78031E0, 0xC78031E0},
+  {0xC73293D8,          0}, {0xC6E5321D,          0}, {0xC6980C6A,          0},
+  {0xC64B2278, 0xC64B2278}, {0xC5FE7403, 0xC5FE7403}, {0xC5B200C6,          0},
+  {0xC565C87C,          0}, {0xC519CAE0, 0xC519CAE0}, {0xC4CE07B0, 0xC4CE07B0},
+  {0xC4827EA8, 0xC4827EA8}, {0xC4372F86,          0}, {0xC3EC1A06,          0},
+  {0xC3A13DE6, 0xC3A13DE6}, {0xC3569AE6,          0}, {0xC30C30C3, 0xC30C30C3},
+  {0xC2C1FF3E,          0}, {0xC2780614,          0}, {0xC22E4507,          0},
+  {0xC1E4BBD6,          0}, {0xC19B6A42,          0}, {0xC152500C, 0xC152500C},
+  {0xC1096CF6,          0}, {0xC0C0C0C1,          0}, {0xC0784B2F,          0},
+  {0xC0300C03, 0xC0300C03}, {0xBFE80300,          0}, {0xBFA02FE8, 0xBFA02FE8},
+  {0xBF589280,          0}, {0xBF112A8B,          0}, {0xBEC9F7CE,          0},
+  {0xBE82FA0C,          0}, {0xBE3C310C,          0}, {0xBDF59C92,          0},
+  {0xBDAF3C64,          0}, {0xBD691047, 0xBD691047}, {0xBD231803,          0},
+  {0xBCDD535E,          0}, {0xBC97C21E, 0xBC97C21E}, {0xBC52640C,          0},
+  {0xBC0D38EE, 0xBC0D38EE}, {0xBBC8408D,          0}, {0xBB837AB1,          0},
+  {0xBB3EE722,          0}, {0xBAFA85A9, 0xBAFA85A9}, {0xBAB65610, 0xBAB65610},
+  {0xBA725820, 0xBA725820}, {0xBA2E8BA3,          0}, {0xB9EAF063,          0},
+  {0xB9A7862A, 0xB9A7862A}, {0xB9644CC4,          0}, {0xB92143FA, 0xB92143FA},
+  {0xB8DE6B9A,          0}, {0xB89BC36D,          0}, {0xB8594B41,          0},
+  {0xB81702E1,          0}, {0xB7D4EA19, 0xB7D4EA19}, {0xB79300B8,          0},
+  {0xB7514689,          0}, {0xB70FBB5A, 0xB70FBB5A}, {0xB6CE5EF9, 0xB6CE5EF9},
+  {0xB68D3134, 0xB68D3134}, {0xB64C31D9,          0}, {0xB60B60B6, 0xB60B60B6},
+  {0xB5CABD9B,          0}, {0xB58A4855, 0xB58A4855}, {0xB54A00B5, 0xB54A00B5},
+  {0xB509E68B,          0}, {0xB4C9F9A5,          0}, {0xB48A39D4, 0xB48A39D4},
+  {0xB44AA6E9, 0xB44AA6E9}, {0xB40B40B4, 0xB40B40B4}, {0xB3CC0706,          0},
+  {0xB38CF9B0, 0xB38CF9B0}, {0xB34E1884,          0}, {0xB30F6353,          0},
+  {0xB2D0D9EF,          0}, {0xB2927C2A,          0}, {0xB25449D7,          0},
+  {0xB21642C9,          0}, {0xB1D866D1, 0xB1D866D1}, {0xB19AB5C5,          0},
+  {0xB15D2F76,          0}, {0xB11FD3B8, 0xB11FD3B8}, {0xB0E2A260, 0xB0E2A260},
+  {0xB0A59B42,          0}, {0xB068BE31,          0}, {0xB02C0B03,          0},
+  {0xAFEF818C,          0}, {0xAFB321A1, 0xAFB321A1}, {0xAF76EB19,          0},
+  {0xAF3ADDC7,          0}, {0xAEFEF982,          0}, {0xAEC33E20,          0},
+  {0xAE87AB76, 0xAE87AB76}, {0xAE4C415D,          0}, {0xAE10FFA9,          0},
+  {0xADD5E632, 0xADD5E632}, {0xAD9AF4D0,          0}, {0xAD602B58, 0xAD602B58},
+  {0xAD2589A4,          0}, {0xACEB0F89, 0xACEB0F89}, {0xACB0BCE1, 0xACB0BCE1},
+  {0xAC769184, 0xAC769184}, {0xAC3C8D4A,          0}, {0xAC02B00B,          0},
+  {0xABC8F9A0, 0xABC8F9A0}, {0xAB8F69E3,          0}, {0xAB5600AC,          0},
+  {0xAB1CBDD4,          0}, {0xAAE3A136,          0}, {0xAAAAAAAB,          0},
+  {0xAA71DA0D,          0}, {0xAA392F36,          0}, {0xAA00AA01,          0},
+  {0xA9C84A48,          0}, {0xA9900FE6,          0}, {0xA957FAB5, 0xA957FAB5},
+  {0xA9200A92, 0xA9200A92}, {0xA8E83F57, 0xA8E83F57}, {0xA8B098E0, 0xA8B098E0},
+  {0xA8791709,          0}, {0xA841B9AD,          0}, {0xA80A80A8, 0xA80A80A8},
+  {0xA7D36BD8,          0}, {0xA79C7B17,          0}, {0xA765AE44,          0},
+  {0xA72F053A,          0}, {0xA6F87FD6, 0xA6F87FD6}, {0xA6C21DF7,          0},
+  {0xA68BDF79,          0}, {0xA655C439, 0xA655C439}, {0xA61FCC16, 0xA61FCC16},
+  {0xA5E9F6ED, 0xA5E9F6ED}, {0xA5B4449D,          0}, {0xA57EB503,          0},
+  {0xA54947FE,          0}, {0xA513FD6C,          0}, {0xA4DED52C, 0xA4DED52C},
+  {0xA4A9CF1E,          0}, {0xA474EB1F, 0xA474EB1F}, {0xA4402910, 0xA4402910},
+  {0xA40B88D0,          0}, {0xA3D70A3E,          0}, {0xA3A2AD39, 0xA3A2AD39},
+  {0xA36E71A3,          0}, {0xA33A575A, 0xA33A575A}, {0xA3065E40,          0},
+  {0xA2D28634,          0}, {0xA29ECF16, 0xA29ECF16}, {0xA26B38C9,          0},
+  {0xA237C32B, 0xA237C32B}, {0xA2046E1F, 0xA2046E1F}, {0xA1D13986,          0},
+  {0xA19E2540,          0}, {0xA16B312F,          0}, {0xA1385D35,          0},
+  {0xA105A933,          0}, {0xA0D3150C,          0}, {0xA0A0A0A1,          0},
+  {0xA06E4BD4, 0xA06E4BD4}, {0xA03C1689,          0}, {0xA00A00A0, 0xA00A00A0},
+  {0x9FD809FE,          0}, {0x9FA63284,          0}, {0x9F747A15, 0x9F747A15},
+  {0x9F42E095, 0x9F42E095}, {0x9F1165E7, 0x9F1165E7}, {0x9EE009EE, 0x9EE009EE},
+  {0x9EAECC8D, 0x9EAECC8D}, {0x9E7DADA9,          0}, {0x9E4CAD24,          0},
+  {0x9E1BCAE3,          0}, {0x9DEB06C9, 0x9DEB06C9}, {0x9DBA60BB, 0x9DBA60BB},
+  {0x9D89D89E,          0}, {0x9D596E54, 0x9D596E54}, {0x9D2921C4,          0},
+  {0x9CF8F2D1, 0x9CF8F2D1}, {0x9CC8E161,          0}, {0x9C98ED58,          0},
+  {0x9C69169B, 0x9C69169B}, {0x9C395D10, 0x9C395D10}, {0x9C09C09C, 0x9C09C09C},
+  {0x9BDA4124, 0x9BDA4124}, {0x9BAADE8E, 0x9BAADE8E}, {0x9B7B98C0,          0},
+  {0x9B4C6F9F,          0}, {0x9B1D6311, 0x9B1D6311}, {0x9AEE72FD,          0},
+  {0x9ABF9F48, 0x9ABF9F48}, {0x9A90E7D9, 0x9A90E7D9}, {0x9A624C97,          0},
+  {0x9A33CD67, 0x9A33CD67}, {0x9A056A31,          0}, {0x99D722DB,          0},
+  {0x99A8F74C,          0}, {0x997AE76B, 0x997AE76B}, {0x994CF320, 0x994CF320},
+  {0x991F1A51, 0x991F1A51}, {0x98F15CE7,          0}, {0x98C3BAC7, 0x98C3BAC7},
+  {0x989633DB, 0x989633DB}, {0x9868C80A,          0}, {0x983B773B,          0},
+  {0x980E4156, 0x980E4156}, {0x97E12644, 0x97E12644}, {0x97B425ED, 0x97B425ED},
+  {0x97874039,          0}, {0x975A7510,          0}, {0x972DC45B,          0},
+  {0x97012E02, 0x97012E02}, {0x96D4B1EF,          0}, {0x96A8500A,          0},
+  {0x967C083B,          0}, {0x964FDA6C, 0x964FDA6C}, {0x9623C686, 0x9623C686},
+  {0x95F7CC73,          0}, {0x95CBEC1B,          0}, {0x95A02568, 0x95A02568},
+  {0x95747844,          0}, {0x9548E498,          0}, {0x951D6A4E,          0},
+  {0x94F2094F, 0x94F2094F}, {0x94C6C187,          0}, {0x949B92DE,          0},
+  {0x94707D3F,          0}, {0x94458094, 0x94458094}, {0x941A9CC8, 0x941A9CC8},
+  {0x93EFD1C5, 0x93EFD1C5}, {0x93C51F76,          0}, {0x939A85C4, 0x939A85C4},
+  {0x9370049C,          0}, {0x93459BE7,          0}, {0x931B4B91,          0},
+  {0x92F11384, 0x92F11384}, {0x92C6F3AC, 0x92C6F3AC}, {0x929CEBF5,          0},
+  {0x9272FC48, 0x9272FC48}, {0x92492492, 0x92492492}, {0x921F64BF,          0},
+  {0x91F5BCB9,          0}, {0x91CC2C6C, 0x91CC2C6C}, {0x91A2B3C5,          0},
+  {0x917952AF,          0}, {0x91500915, 0x91500915}, {0x9126D6E5,          0},
+  {0x90FDBC09, 0x90FDBC09}, {0x90D4B86F,          0}, {0x90ABCC02, 0x90ABCC02},
+  {0x9082F6B0,          0}, {0x905A3863, 0x905A3863}, {0x9031910A,          0},
+  {0x90090090, 0x90090090}, {0x8FE086E3,          0}, {0x8FB823EE, 0x8FB823EE},
+  {0x8F8FD7A0,          0}, {0x8F67A1E4,          0}, {0x8F3F82A8, 0x8F3F82A8},
+  {0x8F1779DA,          0}, {0x8EEF8766,          0}, {0x8EC7AB3A,          0},
+  {0x8E9FE542, 0x8E9FE542}, {0x8E78356D, 0x8E78356D}, {0x8E509BA8, 0x8E509BA8},
+  {0x8E2917E1,          0}, {0x8E01AA05,          0}, {0x8DDA5202, 0x8DDA5202},
+  {0x8DB30FC6, 0x8DB30FC6}, {0x8D8BE340,          0}, {0x8D64CC5C,          0},
+  {0x8D3DCB09,          0}, {0x8D16DF35, 0x8D16DF35}, {0x8CF008CF, 0x8CF008CF},
+  {0x8CC947C5,          0}, {0x8CA29C04, 0x8CA29C04}, {0x8C7C057D,          0},
+  {0x8C55841D,          0}, {0x8C2F17D2, 0x8C2F17D2}, {0x8C08C08C, 0x8C08C08C},
+  {0x8BE27E39, 0x8BE27E39}, {0x8BBC50C9,          0}, {0x8B963829, 0x8B963829},
+  {0x8B70344A, 0x8B70344A}, {0x8B4A451A,          0}, {0x8B246A88,          0},
+  {0x8AFEA483, 0x8AFEA483}, {0x8AD8F2FC,          0}, {0x8AB355E0, 0x8AB355E0},
+  {0x8A8DCD20,          0}, {0x8A6858AB,          0}, {0x8A42F870, 0x8A42F870},
+  {0x8A1DAC60, 0x8A1DAC60}, {0x89F8746A,          0}, {0x89D3507D,          0},
+  {0x89AE408A,          0}, {0x89894480,          0}, {0x89645C4F, 0x89645C4F},
+  {0x893F87E8, 0x893F87E8}, {0x891AC73B,          0}, {0x88F61A37, 0x88F61A37},
+  {0x88D180CD, 0x88D180CD}, {0x88ACFAEE,          0}, {0x88888889,          0},
+  {0x8864298F,          0}, {0x883FDDF0, 0x883FDDF0}, {0x881BA59E,          0},
+  {0x87F78088,          0}, {0x87D36EA0,          0}, {0x87AF6FD6,          0},
+  {0x878B841B,          0}, {0x8767AB5F, 0x8767AB5F}, {0x8743E595,          0},
+  {0x872032AC, 0x872032AC}, {0x86FC9296, 0x86FC9296}, {0x86D90545,          0},
+  {0x86B58AA8,          0}, {0x869222B2,          0}, {0x866ECD53, 0x866ECD53},
+  {0x864B8A7E,          0}, {0x86285A23, 0x86285A23}, {0x86053C34, 0x86053C34},
+  {0x85E230A3, 0x85E230A3}, {0x85BF3761, 0x85BF3761}, {0x859C5060, 0x859C5060},
+  {0x85797B91, 0x85797B91}, {0x8556B8E7, 0x8556B8E7}, {0x85340853, 0x85340853},
+  {0x851169C7, 0x851169C7}, {0x84EEDD36,          0}, {0x84CC6290,          0},
+  {0x84A9F9C8, 0x84A9F9C8}, {0x8487A2D1,          0}, {0x84655D9C,          0},
+  {0x84432A1B, 0x84432A1B}, {0x84210842, 0x84210842}, {0x83FEF802, 0x83FEF802},
+  {0x83DCF94E,          0}, {0x83BB0C18,          0}, {0x83993052, 0x83993052},
+  {0x837765F0, 0x837765F0}, {0x8355ACE4,          0}, {0x83340520, 0x83340520},
+  {0x83126E98,          0}, {0x82F0E93D, 0x82F0E93D}, {0x82CF7504,          0},
+  {0x82AE11DE,          0}, {0x828CBFBF,          0}, {0x826B7E99, 0x826B7E99},
+  {0x824A4E61,          0}, {0x82292F08,          0}, {0x82082082, 0x82082082},
+  {0x81E722C2, 0x81E722C2}, {0x81C635BC, 0x81C635BC}, {0x81A55963,          0},
+  {0x81848DA9,          0}, {0x8163D283,          0}, {0x814327E4,          0},
+  {0x81228DBF,          0}, {0x81020408, 0x81020408}, {0x80E18AB3,          0},
+  {0x80C121B3,          0}, {0x80A0C8FB, 0x80A0C8FB}, {0x80808081,          0},
+  {0x80604836, 0x80604836}, {0x80402010, 0x80402010}, {0x80200802, 0x80200802},
+  {0xFFFFFFFF, 0xFFFFFFFF}
+};
diff --git a/vp10/common/odintrin.h b/vp10/common/odintrin.h
new file mode 100644
index 0000000..c96f8a7
--- /dev/null
+++ b/vp10/common/odintrin.h
@@ -0,0 +1,47 @@
+#ifndef VP10_COMMON_ODINTRIN_H_
+#define VP10_COMMON_ODINTRIN_H_
+
+#include "vp10/common/enums.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_ports/bitops.h"
+
+/*Smallest blocks are 4x4*/
+# define OD_LOG_BSIZE0 (2)
+/*There are 5 block sizes total (4x4, 8x8, 16x16, 32x32 and 64x64).*/
+# define OD_NBSIZES    (5)
+/*The log of the maximum length of the side of a block.*/
+# define OD_LOG_BSIZE_MAX (OD_LOG_BSIZE0 + OD_NBSIZES - 1)
+/*The maximum length of the side of a block.*/
+# define OD_BSIZE_MAX     (1 << OD_LOG_BSIZE_MAX)
+
+typedef int od_coeff;
+
+typedef int16_t od_dering_in;
+
+# define OD_DIVU_DMAX (1024)
+
+extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2];
+
+/*Translate unsigned division by small divisors into multiplications.*/
+# define OD_DIVU_SMALL(_x, _d) \
+  ((uint32_t)((OD_DIVU_SMALL_CONSTS[(_d)-1][0]* \
+  (uint64_t)(_x)+OD_DIVU_SMALL_CONSTS[(_d)-1][1])>>32)>> \
+  (OD_ILOG(_d)-1))
+
+# define OD_DIVU(_x, _d) \
+  (((_d) < OD_DIVU_DMAX)?(OD_DIVU_SMALL((_x), (_d))):((_x)/(_d)))
+
+#define OD_MINI VPXMIN
+#define OD_CLAMPI(min, val, max) clamp((val), (min), (max))
+
+# define OD_CLZ0 (1)
+# define OD_CLZ(x) (-get_msb(x))
+# define OD_ILOG_NZ(x) (OD_CLZ0 - OD_CLZ(x))
+/*Note that __builtin_clz is not defined when x == 0, according to the gcc
+   documentation (and that of the x86 BSR instruction that implements it), so
+   we have to special-case it.
+  We define a special version of the macro to use when x can be zero.*/
+# define OD_ILOG(x) ((x) ? OD_ILOG_NZ(x) : 0)
+
+#endif
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h
index 1f0c1bf..dfa04b5 100644
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h
@@ -368,6 +368,9 @@
   BLOCK_SIZE sb_size;  // Size of the superblock used for this frame
   int mib_size;        // Size of the superblock in units of MI blocks
   int mib_size_log2;   // Log 2 of above.
+#if CONFIG_DERING
+  int dering_level;
+#endif
 } VP10_COMMON;
 
 // TODO(hkuang): Don't need to lock the whole pool after implementing atomic
diff --git a/vp10/common/vp10_txfm.h b/vp10/common/vp10_txfm.h
index a767352..12dc691 100644
--- a/vp10/common/vp10_txfm.h
+++ b/vp10/common/vp10_txfm.h
@@ -95,7 +95,7 @@
     printf(
         "%s overflow result_32: %d result_64: %lld w0: %d in0: %d w1: %d in1: "
         "%d\n",
-        __func__, result_32, (long long int)result_64, w0, in0, w1, in1);
+        __func__, result_32, ((long long int)result_64), w0, in0, w1, in1);
     assert(0 && "half_btf overflow");
   }
 #endif
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index c1eaed7..f2f8ebb 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -14,6 +14,7 @@
 #include "./vp10_rtcd.h"
 #include "./vpx_dsp_rtcd.h"
 #include "./vpx_scale_rtcd.h"
+#include "./vpx_config.h"
 
 #include "vpx_dsp/bitreader_buffer.h"
 #include "vp10/decoder/bitreader.h"
@@ -29,6 +30,9 @@
 #include "vp10/common/clpf.h"
 #endif
 #include "vp10/common/common.h"
+#if CONFIG_DERING
+#include "vp10/common/dering.h"
+#endif  // CONFIG_DERING
 #include "vp10/common/entropy.h"
 #include "vp10/common/entropymode.h"
 #include "vp10/common/idct.h"
@@ -1776,6 +1780,16 @@
   if (bsize >= BLOCK_8X8 &&
       (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
     dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh);
+#if DERING_REFINEMENT
+  if (bsize == BLOCK_64X64) {
+    if (cm->dering_level != 0 && !sb_all_skip(cm, mi_row, mi_col)) {
+      cm->mi_grid_visible[mi_row*cm->mi_stride + mi_col]->mbmi.dering_gain =
+          vpx_read_literal(r, DERING_REFINEMENT_BITS);
+    } else {
+      cm->mi_grid_visible[mi_row*cm->mi_stride + mi_col]->mbmi.dering_gain = 0;
+    }
+  }
+#endif  // DERGING_REFINEMENT
 #endif  // CONFIG_EXT_PARTITION_TYPES
 }
 
@@ -1951,6 +1965,12 @@
 }
 #endif
 
+#if CONFIG_DERING
+static void setup_dering(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
+  cm->dering_level = vpx_rb_read_literal(rb,  DERING_LEVEL_BITS);
+}
+#endif  // CONFIG_DERING
+
 static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) {
   return vpx_rb_read_bit(rb) ? vpx_rb_read_inv_signed_literal(rb, 6) : 0;
 }
@@ -2706,6 +2726,11 @@
   if (cm->clpf && !cm->skip_loop_filter)
     vp10_clpf_frame(&pbi->cur_buf->buf, cm, &pbi->mb);
 #endif
+#if CONFIG_DERING
+  if (cm->dering_level && !cm->skip_loop_filter) {
+    vp10_dering_frame(&pbi->cur_buf->buf, cm, &pbi->mb, cm->dering_level);
+  }
+#endif  // CONFIG_DERING
 
   if (cm->frame_parallel_decode)
     vp10_frameworker_broadcast(pbi->cur_buf, INT_MAX);
@@ -3242,6 +3267,9 @@
 #if CONFIG_CLPF
   setup_clpf(cm, rb);
 #endif
+#if CONFIG_DERING
+  setup_dering(cm, rb);
+#endif
 #if CONFIG_LOOP_RESTORATION
   setup_restoration(cm, rb);
 #endif  // CONFIG_LOOP_RESTORATION
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index fb101af..d5bf02c 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -23,6 +23,9 @@
 #if CONFIG_CLPF
 #include "vp10/common/clpf.h"
 #endif
+#if CONFIG_DERING
+#include "vp10/common/dering.h"
+#endif  // CONFIG_DERING
 #include "vp10/common/entropy.h"
 #include "vp10/common/entropymode.h"
 #include "vp10/common/entropymv.h"
@@ -1875,6 +1878,15 @@
   if (bsize >= BLOCK_8X8 &&
       (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+
+#if DERING_REFINEMENT
+  if (bsize == BLOCK_64X64 && cm->dering_level != 0 &&
+      !sb_all_skip(cm, mi_row, mi_col)) {
+    vpx_write_literal(
+        w, cm->mi_grid_visible[mi_row*cm->mi_stride + mi_col]->mbmi.dering_gain,
+        DERING_REFINEMENT_BITS);
+  }
+#endif
 #endif  // CONFIG_EXT_PARTITION_TYPES
 }
 
@@ -2447,6 +2459,12 @@
 }
 #endif
 
+#if CONFIG_DERING
+static void encode_dering(int level, struct vpx_write_bit_buffer *wb) {
+  vpx_wb_write_literal(wb, level, DERING_LEVEL_BITS);
+}
+#endif  // CONFIG_DERING
+
 static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) {
   if (delta_q != 0) {
     vpx_wb_write_bit(wb, 1);
@@ -3103,6 +3121,9 @@
 #if CONFIG_CLPF
   encode_clpf(cm, wb);
 #endif
+#if CONFIG_DERING
+  encode_dering(cm->dering_level, wb);
+#endif  // CONFIG_DERING
 #if CONFIG_LOOP_RESTORATION
   encode_restoration(cm, wb);
 #endif  // CONFIG_LOOP_RESTORATION
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index afe8dc5..823c861 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -18,6 +18,9 @@
 #if CONFIG_CLPF
 #include "vp10/common/clpf.h"
 #endif
+#if CONFIG_DERING
+#include "vp10/common/dering.h"
+#endif  // CONFIG_DERING
 #include "vp10/common/filter.h"
 #include "vp10/common/idct.h"
 #include "vp10/common/reconinter.h"
@@ -382,7 +385,6 @@
     vp10_init_me_luts();
     vp10_rc_init_minq_luts();
     vp10_entropy_mv_init();
-    vp10_temporal_filter_init();
     vp10_encode_token_init();
 #if CONFIG_EXT_INTER
     vp10_init_wedge_masks();
@@ -3343,6 +3345,15 @@
       vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
 #endif
   }
+#if CONFIG_DERING
+  if (is_lossless_requested(&cpi->oxcf)) {
+    cm->dering_level = 0;
+  } else {
+    cm->dering_level = vp10_dering_search(cm->frame_to_show, cpi->Source, cm,
+                                          xd);
+    vp10_dering_frame(cm->frame_to_show, cm, xd, cm->dering_level);
+  }
+#endif  // CONFIG_DERING
 
 #if CONFIG_CLPF
   cm->clpf = 0;
diff --git a/vp10/encoder/pickdering.c b/vp10/encoder/pickdering.c
new file mode 100644
index 0000000..5a185ee
--- /dev/null
+++ b/vp10/encoder/pickdering.c
@@ -0,0 +1,180 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string.h>
+
+#include "./vpx_scale_rtcd.h"
+#include "vp10/common/dering.h"
+#include "vp10/common/onyxc_int.h"
+#include "vp10/common/reconinter.h"
+#include "vp10/encoder/encoder.h"
+#include "vpx/vpx_integer.h"
+
+static double compute_dist(int16_t *x, int xstride, int16_t *y, int ystride,
+    int nhb, int nvb, int coeff_shift) {
+  int i, j;
+  double sum;
+  sum = 0;
+  for (i = 0; i < nvb << 3; i++) {
+    for (j = 0; j < nhb << 3; j++) {
+      double tmp;
+      tmp = x[i*xstride + j] - y[i*ystride + j];
+      sum += tmp*tmp;
+    }
+  }
+  return sum/(double)(1 << 2*coeff_shift);
+}
+
+int vp10_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
+                       VP10_COMMON *cm,
+                       MACROBLOCKD *xd) {
+  int r, c;
+  int sbr, sbc;
+  int nhsb, nvsb;
+  od_dering_in *src;
+  int16_t *ref_coeff;
+  unsigned char *bskip;
+  int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = {{0}};
+  int stride;
+  int bsize[3];
+  int dec[3];
+  int pli;
+  int (*mse)[MAX_DERING_LEVEL];
+  int best_count[MAX_DERING_LEVEL] = {0};
+  double tot_mse[MAX_DERING_LEVEL] = {0};
+  int level;
+  int best_level;
+  int global_level;
+  double best_tot_mse = 1e15;
+  int coeff_shift = VPXMAX(cm->bit_depth - 8, 0);
+  src = vpx_malloc(sizeof(*src)*cm->mi_rows*cm->mi_cols*64);
+  ref_coeff = vpx_malloc(sizeof(*ref_coeff)*cm->mi_rows*cm->mi_cols*64);
+  bskip = vpx_malloc(sizeof(*bskip)*cm->mi_rows*cm->mi_cols);
+  vp10_setup_dst_planes(xd->plane, frame, 0, 0);
+  for (pli = 0; pli < 3; pli++) {
+    dec[pli] = xd->plane[pli].subsampling_x;
+    bsize[pli] = 8 >> dec[pli];
+  }
+  stride = bsize[0]*cm->mi_cols;
+  for (r = 0; r < bsize[0]*cm->mi_rows; ++r) {
+    for (c = 0; c < bsize[0]*cm->mi_cols; ++c) {
+#if CONFIG_VPX_HIGHBITDEPTH
+      if (cm->use_highbitdepth) {
+        src[r * stride + c] =
+            CONVERT_TO_SHORTPTR(xd->plane[0].dst.buf)
+            [r*xd->plane[0].dst.stride + c];
+        ref_coeff[r * stride + c] =
+            CONVERT_TO_SHORTPTR(ref->y_buffer)[r * ref->y_stride + c];
+      } else {
+#endif
+        src[r * stride + c] =
+            xd->plane[0].dst.buf[r*xd->plane[0].dst.stride + c];
+        ref_coeff[r * stride + c] = ref->y_buffer[r * ref->y_stride + c];
+#if CONFIG_VPX_HIGHBITDEPTH
+      }
+#endif
+    }
+  }
+  for (r = 0; r < cm->mi_rows; ++r) {
+    for (c = 0; c < cm->mi_cols; ++c) {
+      const MB_MODE_INFO *mbmi =
+          &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi;
+      bskip[r * cm->mi_cols + c] = mbmi->skip;
+    }
+  }
+  nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1)/MAX_MIB_SIZE;
+  nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1)/MAX_MIB_SIZE;
+  mse = vpx_malloc(nvsb*nhsb*sizeof(*mse));
+  for (sbr = 0; sbr < nvsb; sbr++) {
+    for (sbc = 0; sbc < nhsb; sbc++) {
+      int best_mse = 1000000000;
+      int nvb, nhb;
+      int16_t dst[MAX_MIB_SIZE*MAX_MIB_SIZE*8*8];
+      best_level = 0;
+      nhb = VPXMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE*sbc);
+      nvb = VPXMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE*sbr);
+      for (level = 0; level < 64; level++) {
+        int threshold;
+        threshold = level << coeff_shift;
+        od_dering(
+            &OD_DERING_VTBL_C,
+            dst,
+            MAX_MIB_SIZE*bsize[0],
+            &src[sbr*stride*bsize[0]*MAX_MIB_SIZE +
+            sbc*bsize[0]*MAX_MIB_SIZE],
+            cm->mi_cols*bsize[0], nhb, nvb, sbc, sbr, nhsb, nvsb, 0, dir, 0,
+            &bskip[MAX_MIB_SIZE*sbr*cm->mi_cols + MAX_MIB_SIZE*sbc],
+            cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP, coeff_shift);
+        mse[nhsb*sbr+sbc][level] = (int)compute_dist(
+            dst, MAX_MIB_SIZE*bsize[0],
+            &ref_coeff[sbr*stride*bsize[0]*MAX_MIB_SIZE +
+            sbc*bsize[0]*MAX_MIB_SIZE],
+            stride, nhb, nvb, coeff_shift);
+        tot_mse[level] += mse[nhsb*sbr+sbc][level];
+        if (mse[nhsb*sbr+sbc][level] < best_mse) {
+          best_mse = mse[nhsb*sbr+sbc][level];
+          best_level = level;
+        }
+      }
+      best_count[best_level]++;
+    }
+  }
+#if DERING_REFINEMENT
+  best_level = 0;
+  /* Search for the best global level one value at a time. */
+  for (global_level = 2; global_level < MAX_DERING_LEVEL; global_level++) {
+    double tot_mse = 0;
+    for (sbr = 0; sbr < nvsb; sbr++) {
+      for (sbc = 0; sbc < nhsb; sbc++) {
+        int gi;
+        int best_mse = mse[nhsb*sbr+sbc][0];
+        for (gi = 1; gi < 4; gi++) {
+          level = compute_level_from_index(global_level, gi);
+          if (mse[nhsb*sbr+sbc][level] < best_mse) {
+            best_mse = mse[nhsb*sbr+sbc][level];
+          }
+        }
+        tot_mse += best_mse;
+      }
+    }
+    if (tot_mse < best_tot_mse) {
+      best_level = global_level;
+      best_tot_mse = tot_mse;
+    }
+  }
+  for (sbr = 0; sbr < nvsb; sbr++) {
+    for (sbc = 0; sbc < nhsb; sbc++) {
+      int gi;
+      int best_gi;
+      int best_mse = mse[nhsb*sbr+sbc][0];
+      best_gi = 0;
+      for (gi = 1; gi < DERING_REFINEMENT_LEVELS; gi++) {
+        level = compute_level_from_index(best_level, gi);
+        if (mse[nhsb*sbr+sbc][level] < best_mse) {
+          best_gi = gi;
+          best_mse = mse[nhsb*sbr+sbc][level];
+        }
+      }
+      cm->mi_grid_visible[MAX_MIB_SIZE*sbr*cm->mi_stride + MAX_MIB_SIZE*sbc]->
+          mbmi.dering_gain = best_gi;
+    }
+  }
+#else
+  best_level = 0;
+  for (level = 0; level < MAX_DERING_LEVEL; level++) {
+    if (tot_mse[level] < tot_mse[best_level]) best_level = level;
+  }
+#endif
+  vpx_free(src);
+  vpx_free(ref_coeff);
+  vpx_free(bskip);
+  vpx_free(mse);
+  return best_level;
+}
diff --git a/vp10/encoder/temporal_filter.c b/vp10/encoder/temporal_filter.c
index a22f3b5..cc484a8 100644
--- a/vp10/encoder/temporal_filter.c
+++ b/vp10/encoder/temporal_filter.c
@@ -15,6 +15,7 @@
 #include "vp10/common/onyxc_int.h"
 #include "vp10/common/quant_common.h"
 #include "vp10/common/reconinter.h"
+#include "vp10/common/odintrin.h"
 #include "vp10/encoder/extend.h"
 #include "vp10/encoder/firstpass.h"
 #include "vp10/encoder/mcomp.h"
@@ -29,8 +30,6 @@
 #include "vpx_ports/vpx_timer.h"
 #include "vpx_scale/vpx_scale.h"
 
-static int fixed_divide[512];
-
 static void temporal_filter_predictors_mb_c(
     MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr,
     int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col,
@@ -92,13 +91,6 @@
                              which_mv, interp_filter, mv_precision_uv, x, y);
 }
 
-void vp10_temporal_filter_init(void) {
-  int i;
-
-  fixed_divide[0] = 0;
-  for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i;
-}
-
 void vp10_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride,
                                   uint8_t *frame2, unsigned int block_width,
                                   unsigned int block_height, int strength,
@@ -443,11 +435,8 @@
         byte = mb_y_offset;
         for (i = 0, k = 0; i < 16; i++) {
           for (j = 0; j < 16; j++, k++) {
-            unsigned int pval = accumulator[k] + (count[k] >> 1);
-            pval *= fixed_divide[count[k]];
-            pval >>= 19;
-
-            dst1_16[byte] = (uint16_t)pval;
+            dst1_16[byte] =
+                (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
 
             // move to next pixel
             byte++;
@@ -467,16 +456,12 @@
             int m = k + 256;
 
             // U
-            unsigned int pval = accumulator[k] + (count[k] >> 1);
-            pval *= fixed_divide[count[k]];
-            pval >>= 19;
-            dst1_16[byte] = (uint16_t)pval;
+            dst1_16[byte] =
+                (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
 
             // V
-            pval = accumulator[m] + (count[m] >> 1);
-            pval *= fixed_divide[count[m]];
-            pval >>= 19;
-            dst2_16[byte] = (uint16_t)pval;
+            dst2_16[byte] =
+                (uint16_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
 
             // move to next pixel
             byte++;
@@ -491,11 +476,8 @@
         byte = mb_y_offset;
         for (i = 0, k = 0; i < 16; i++) {
           for (j = 0; j < 16; j++, k++) {
-            unsigned int pval = accumulator[k] + (count[k] >> 1);
-            pval *= fixed_divide[count[k]];
-            pval >>= 19;
-
-            dst1[byte] = (uint8_t)pval;
+            dst1[byte] =
+                (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
 
             // move to next pixel
             byte++;
@@ -512,16 +494,12 @@
             int m = k + 256;
 
             // U
-            unsigned int pval = accumulator[k] + (count[k] >> 1);
-            pval *= fixed_divide[count[k]];
-            pval >>= 19;
-            dst1[byte] = (uint8_t)pval;
+            dst1[byte] =
+                (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
 
             // V
-            pval = accumulator[m] + (count[m] >> 1);
-            pval *= fixed_divide[count[m]];
-            pval >>= 19;
-            dst2[byte] = (uint8_t)pval;
+            dst2[byte] =
+                (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
 
             // move to next pixel
             byte++;
@@ -536,11 +514,8 @@
       byte = mb_y_offset;
       for (i = 0, k = 0; i < 16; i++) {
         for (j = 0; j < 16; j++, k++) {
-          unsigned int pval = accumulator[k] + (count[k] >> 1);
-          pval *= fixed_divide[count[k]];
-          pval >>= 19;
-
-          dst1[byte] = (uint8_t)pval;
+          dst1[byte] =
+              (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
 
           // move to next pixel
           byte++;
@@ -557,16 +532,12 @@
           int m = k + 256;
 
           // U
-          unsigned int pval = accumulator[k] + (count[k] >> 1);
-          pval *= fixed_divide[count[k]];
-          pval >>= 19;
-          dst1[byte] = (uint8_t)pval;
+          dst1[byte] =
+              (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
 
           // V
-          pval = accumulator[m] + (count[m] >> 1);
-          pval *= fixed_divide[count[m]];
-          pval >>= 19;
-          dst2[byte] = (uint8_t)pval;
+          dst2[byte] =
+              (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
 
           // move to next pixel
           byte++;
diff --git a/vp10/encoder/temporal_filter.h b/vp10/encoder/temporal_filter.h
index 6e331e6..ce5291a 100644
--- a/vp10/encoder/temporal_filter.h
+++ b/vp10/encoder/temporal_filter.h
@@ -15,7 +15,6 @@
 extern "C" {
 #endif
 
-void vp10_temporal_filter_init(void);
 void vp10_temporal_filter(VP10_COMP *cpi, int distance);
 
 #ifdef __cplusplus
diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk
index 19fe6ca..e25cdcf 100644
--- a/vp10/vp10_common.mk
+++ b/vp10/vp10_common.mk
@@ -91,6 +91,14 @@
 endif
 VP10_COMMON_SRCS-yes += common/clpf.c
 VP10_COMMON_SRCS-yes += common/clpf.h
+ifeq ($(CONFIG_DERING),yes)
+VP10_COMMON_SRCS-yes += common/od_dering.c
+VP10_COMMON_SRCS-yes += common/od_dering.h
+VP10_COMMON_SRCS-yes += common/dering.c
+VP10_COMMON_SRCS-yes += common/dering.h
+endif
+VP10_COMMON_SRCS-yes += common/odintrin.c
+VP10_COMMON_SRCS-yes += common/odintrin.h
 
 ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
 VP10_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/itrans4_dspr2.c
diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk
index cb9e108..6764ac8 100644
--- a/vp10/vp10cx.mk
+++ b/vp10/vp10cx.mk
@@ -90,7 +90,9 @@
 VP10_CX_SRCS-yes += encoder/temporal_filter.h
 VP10_CX_SRCS-yes += encoder/mbgraph.c
 VP10_CX_SRCS-yes += encoder/mbgraph.h
-
+ifeq ($(CONFIG_DERING),yes)
+VP10_CX_SRCS-yes += encoder/pickdering.c
+endif
 VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
 VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c
 ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)